diff options
Diffstat (limited to 'src/reader/parser/inside_declaration.rs')
-rw-r--r-- | src/reader/parser/inside_declaration.rs | 137 |
1 files changed, 83 insertions, 54 deletions
diff --git a/src/reader/parser/inside_declaration.rs b/src/reader/parser/inside_declaration.rs index af39d10..4ff1427 100644 --- a/src/reader/parser/inside_declaration.rs +++ b/src/reader/parser/inside_declaration.rs @@ -1,44 +1,62 @@ - -use common::XmlVersion; - -use reader::events::XmlEvent; -use reader::lexer::Token; +use crate::common::{is_whitespace_char, XmlVersion}; +use crate::reader::error::SyntaxError; +use crate::reader::events::XmlEvent; +use crate::reader::lexer::Token; +use crate::util::Encoding; use super::{ - Result, PullParser, State, DeclarationSubstate, QualifiedNameTarget, - DEFAULT_VERSION, DEFAULT_ENCODING + DeclarationSubstate, Encountered, PullParser, QualifiedNameTarget, Result, State, + DEFAULT_VERSION, }; impl PullParser { + #[inline(never)] + fn emit_start_document(&mut self) -> Option<Result> { + debug_assert!(self.encountered == Encountered::None); + self.encountered = Encountered::Declaration; + + let version = self.data.version; + let encoding = self.data.take_encoding(); + let standalone = self.data.standalone; + + if let Some(new_encoding) = encoding.as_deref() { + let new_encoding = match new_encoding.parse() { + Ok(e) => e, + Err(_) if self.config.ignore_invalid_encoding_declarations => Encoding::Latin1, + Err(_) => return Some(self.error(SyntaxError::UnsupportedEncoding(new_encoding.into()))), + }; + let current_encoding = self.lexer.encoding(); + if current_encoding != new_encoding { + let set = match (current_encoding, new_encoding) { + (Encoding::Unknown | Encoding::Default, new) if new != Encoding::Utf16 => new, + (Encoding::Utf16Be | Encoding::Utf16Le, Encoding::Utf16) => current_encoding, + _ if self.config.ignore_invalid_encoding_declarations => current_encoding, + _ => return Some(self.error(SyntaxError::ConflictingEncoding(new_encoding, current_encoding))), + }; + self.lexer.set_encoding(set); + } + } + + let current_encoding = self.lexer.encoding(); + self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument { + version: version.unwrap_or(DEFAULT_VERSION), + encoding: encoding.unwrap_or_else(move || current_encoding.to_string()), + standalone + })) + } + // TODO: remove redundancy via macros or extra methods pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> { - macro_rules! unexpected_token( - ($this:expr; $t:expr) => (Some($this.error(format!("Unexpected token inside XML declaration: {}", $t)))); - ($t:expr) => (unexpected_token!(self; $t)); - ); - - #[inline] - fn emit_start_document(this: &mut PullParser) -> Option<Result> { - this.parsed_declaration = true; - let version = this.data.take_version(); - let encoding = this.data.take_encoding(); - let standalone = this.data.take_standalone(); - this.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument { - version: version.unwrap_or(DEFAULT_VERSION), - encoding: encoding.unwrap_or(DEFAULT_ENCODING.into()), - standalone: standalone - })) - } match s { DeclarationSubstate::BeforeVersion => match t { - Token::Whitespace(_) => None, // continue Token::Character('v') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)), - _ => unexpected_token!(t) + Token::Character(c) if is_whitespace_char(c) => None, // continue + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), }, DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { - match &name.local_name[..] { + match &*name.local_name { "ersion" if name.namespace.is_none() => this.into_state_continue(State::InsideDeclaration( if token == Token::EqualsSign { @@ -47,18 +65,18 @@ impl PullParser { DeclarationSubstate::AfterVersion } )), - _ => unexpected_token!(this; name) + _ => Some(this.error(SyntaxError::UnexpectedNameInsideXml(name.to_string().into()))), } }), DeclarationSubstate::AfterVersion => match t { - Token::Whitespace(_) => None, Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)), - _ => unexpected_token!(t) + Token::Character(c) if is_whitespace_char(c) => None, + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), }, DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| { - this.data.version = match &value[..] { + this.data.version = match &*value { "1.0" => Some(XmlVersion::Version10), "1.1" => Some(XmlVersion::Version11), _ => None @@ -66,48 +84,60 @@ impl PullParser { if this.data.version.is_some() { this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue)) } else { - Some(self_error!(this; "Unexpected XML version value: {}", value)) + Some(this.error(SyntaxError::UnexpectedXmlVersion(value.into()))) } }), DeclarationSubstate::AfterVersionValue => match t { - Token::Whitespace(_) => None, // skip whitespace + Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)), + Token::ProcessingInstructionEnd => self.emit_start_document(), + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), + }, + + DeclarationSubstate::BeforeEncoding => match t { Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)), Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)), - Token::ProcessingInstructionEnd => emit_start_document(self), - _ => unexpected_token!(t) + Token::ProcessingInstructionEnd => self.emit_start_document(), + Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), }, DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { - match &name.local_name[..] { + match &*name.local_name { "ncoding" if name.namespace.is_none() => this.into_state_continue(State::InsideDeclaration( if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding } )), - _ => unexpected_token!(this; name) + _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))) } }), DeclarationSubstate::AfterEncoding => match t { - Token::Whitespace(_) => None, Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)), - _ => unexpected_token!(t) + Token::Character(c) if is_whitespace_char(c) => None, + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), }, DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| { this.data.encoding = Some(value); - this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)) + this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue)) }), + DeclarationSubstate::AfterEncodingValue => match t { + Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)), + Token::ProcessingInstructionEnd => self.emit_start_document(), + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), + }, + DeclarationSubstate::BeforeStandaloneDecl => match t { - Token::Whitespace(_) => None, // skip whitespace Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)), - Token::ProcessingInstructionEnd => emit_start_document(self), - _ => unexpected_token!(t) + Token::ProcessingInstructionEnd => self.emit_start_document(), + Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), }, DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| { - match &name.local_name[..] { + match &*name.local_name { "tandalone" if name.namespace.is_none() => this.into_state_continue(State::InsideDeclaration( if token == Token::EqualsSign { @@ -116,18 +146,18 @@ impl PullParser { DeclarationSubstate::AfterStandaloneDecl } )), - _ => unexpected_token!(this; name) + _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))), } }), DeclarationSubstate::AfterStandaloneDecl => match t { - Token::Whitespace(_) => None, Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)), - _ => unexpected_token!(t) + Token::Character(c) if is_whitespace_char(c) => None, + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), }, DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| { - let standalone = match &value[..] { + let standalone = match &*value { "yes" => Some(true), "no" => Some(false), _ => None @@ -136,16 +166,15 @@ impl PullParser { this.data.standalone = standalone; this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue)) } else { - Some(self_error!(this; "Invalid standalone declaration value: {}", value)) + Some(this.error(SyntaxError::InvalidStandaloneDeclaration(value.into()))) } }), DeclarationSubstate::AfterStandaloneDeclValue => match t { - Token::Whitespace(_) => None, // skip whitespace - Token::ProcessingInstructionEnd => emit_start_document(self), - _ => unexpected_token!(t) - } + Token::ProcessingInstructionEnd => self.emit_start_document(), + Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), + }, } } - } |