aboutsummaryrefslogtreecommitdiff
path: root/src/reader/parser/inside_declaration.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/reader/parser/inside_declaration.rs')
-rw-r--r--src/reader/parser/inside_declaration.rs137
1 files changed, 83 insertions, 54 deletions
diff --git a/src/reader/parser/inside_declaration.rs b/src/reader/parser/inside_declaration.rs
index af39d10..4ff1427 100644
--- a/src/reader/parser/inside_declaration.rs
+++ b/src/reader/parser/inside_declaration.rs
@@ -1,44 +1,62 @@
-
-use common::XmlVersion;
-
-use reader::events::XmlEvent;
-use reader::lexer::Token;
+use crate::common::{is_whitespace_char, XmlVersion};
+use crate::reader::error::SyntaxError;
+use crate::reader::events::XmlEvent;
+use crate::reader::lexer::Token;
+use crate::util::Encoding;
use super::{
- Result, PullParser, State, DeclarationSubstate, QualifiedNameTarget,
- DEFAULT_VERSION, DEFAULT_ENCODING
+ DeclarationSubstate, Encountered, PullParser, QualifiedNameTarget, Result, State,
+ DEFAULT_VERSION,
};
impl PullParser {
+ #[inline(never)]
+ fn emit_start_document(&mut self) -> Option<Result> {
+ debug_assert!(self.encountered == Encountered::None);
+ self.encountered = Encountered::Declaration;
+
+ let version = self.data.version;
+ let encoding = self.data.take_encoding();
+ let standalone = self.data.standalone;
+
+ if let Some(new_encoding) = encoding.as_deref() {
+ let new_encoding = match new_encoding.parse() {
+ Ok(e) => e,
+ Err(_) if self.config.ignore_invalid_encoding_declarations => Encoding::Latin1,
+ Err(_) => return Some(self.error(SyntaxError::UnsupportedEncoding(new_encoding.into()))),
+ };
+ let current_encoding = self.lexer.encoding();
+ if current_encoding != new_encoding {
+ let set = match (current_encoding, new_encoding) {
+ (Encoding::Unknown | Encoding::Default, new) if new != Encoding::Utf16 => new,
+ (Encoding::Utf16Be | Encoding::Utf16Le, Encoding::Utf16) => current_encoding,
+ _ if self.config.ignore_invalid_encoding_declarations => current_encoding,
+ _ => return Some(self.error(SyntaxError::ConflictingEncoding(new_encoding, current_encoding))),
+ };
+ self.lexer.set_encoding(set);
+ }
+ }
+
+ let current_encoding = self.lexer.encoding();
+ self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument {
+ version: version.unwrap_or(DEFAULT_VERSION),
+ encoding: encoding.unwrap_or_else(move || current_encoding.to_string()),
+ standalone
+ }))
+ }
+
// TODO: remove redundancy via macros or extra methods
pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> {
- macro_rules! unexpected_token(
- ($this:expr; $t:expr) => (Some($this.error(format!("Unexpected token inside XML declaration: {}", $t))));
- ($t:expr) => (unexpected_token!(self; $t));
- );
-
- #[inline]
- fn emit_start_document(this: &mut PullParser) -> Option<Result> {
- this.parsed_declaration = true;
- let version = this.data.take_version();
- let encoding = this.data.take_encoding();
- let standalone = this.data.take_standalone();
- this.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument {
- version: version.unwrap_or(DEFAULT_VERSION),
- encoding: encoding.unwrap_or(DEFAULT_ENCODING.into()),
- standalone: standalone
- }))
- }
match s {
DeclarationSubstate::BeforeVersion => match t {
- Token::Whitespace(_) => None, // continue
Token::Character('v') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)),
- _ => unexpected_token!(t)
+ Token::Character(c) if is_whitespace_char(c) => None, // continue
+ _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
- match &name.local_name[..] {
+ match &*name.local_name {
"ersion" if name.namespace.is_none() =>
this.into_state_continue(State::InsideDeclaration(
if token == Token::EqualsSign {
@@ -47,18 +65,18 @@ impl PullParser {
DeclarationSubstate::AfterVersion
}
)),
- _ => unexpected_token!(this; name)
+ _ => Some(this.error(SyntaxError::UnexpectedNameInsideXml(name.to_string().into()))),
}
}),
DeclarationSubstate::AfterVersion => match t {
- Token::Whitespace(_) => None,
Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)),
- _ => unexpected_token!(t)
+ Token::Character(c) if is_whitespace_char(c) => None,
+ _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| {
- this.data.version = match &value[..] {
+ this.data.version = match &*value {
"1.0" => Some(XmlVersion::Version10),
"1.1" => Some(XmlVersion::Version11),
_ => None
@@ -66,48 +84,60 @@ impl PullParser {
if this.data.version.is_some() {
this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue))
} else {
- Some(self_error!(this; "Unexpected XML version value: {}", value))
+ Some(this.error(SyntaxError::UnexpectedXmlVersion(value.into())))
}
}),
DeclarationSubstate::AfterVersionValue => match t {
- Token::Whitespace(_) => None, // skip whitespace
+ Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)),
+ Token::ProcessingInstructionEnd => self.emit_start_document(),
+ _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
+ },
+
+ DeclarationSubstate::BeforeEncoding => match t {
Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)),
Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
- Token::ProcessingInstructionEnd => emit_start_document(self),
- _ => unexpected_token!(t)
+ Token::ProcessingInstructionEnd => self.emit_start_document(),
+ Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
+ _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
- match &name.local_name[..] {
+ match &*name.local_name {
"ncoding" if name.namespace.is_none() =>
this.into_state_continue(State::InsideDeclaration(
if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding }
)),
- _ => unexpected_token!(this; name)
+ _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into())))
}
}),
DeclarationSubstate::AfterEncoding => match t {
- Token::Whitespace(_) => None,
Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)),
- _ => unexpected_token!(t)
+ Token::Character(c) if is_whitespace_char(c) => None,
+ _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| {
this.data.encoding = Some(value);
- this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl))
+ this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue))
}),
+ DeclarationSubstate::AfterEncodingValue => match t {
+ Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)),
+ Token::ProcessingInstructionEnd => self.emit_start_document(),
+ _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
+ },
+
DeclarationSubstate::BeforeStandaloneDecl => match t {
- Token::Whitespace(_) => None, // skip whitespace
Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
- Token::ProcessingInstructionEnd => emit_start_document(self),
- _ => unexpected_token!(t)
+ Token::ProcessingInstructionEnd => self.emit_start_document(),
+ Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
+ _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
- match &name.local_name[..] {
+ match &*name.local_name {
"tandalone" if name.namespace.is_none() =>
this.into_state_continue(State::InsideDeclaration(
if token == Token::EqualsSign {
@@ -116,18 +146,18 @@ impl PullParser {
DeclarationSubstate::AfterStandaloneDecl
}
)),
- _ => unexpected_token!(this; name)
+ _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))),
}
}),
DeclarationSubstate::AfterStandaloneDecl => match t {
- Token::Whitespace(_) => None,
Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)),
- _ => unexpected_token!(t)
+ Token::Character(c) if is_whitespace_char(c) => None,
+ _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| {
- let standalone = match &value[..] {
+ let standalone = match &*value {
"yes" => Some(true),
"no" => Some(false),
_ => None
@@ -136,16 +166,15 @@ impl PullParser {
this.data.standalone = standalone;
this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue))
} else {
- Some(self_error!(this; "Invalid standalone declaration value: {}", value))
+ Some(this.error(SyntaxError::InvalidStandaloneDeclaration(value.into())))
}
}),
DeclarationSubstate::AfterStandaloneDeclValue => match t {
- Token::Whitespace(_) => None, // skip whitespace
- Token::ProcessingInstructionEnd => emit_start_document(self),
- _ => unexpected_token!(t)
- }
+ Token::ProcessingInstructionEnd => self.emit_start_document(),
+ Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
+ _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
+ },
}
}
-
}