diff options
Diffstat (limited to 'src/reader/parser/inside_processing_instruction.rs')
-rw-r--r-- | src/reader/parser/inside_processing_instruction.rs | 82 |
1 files changed, 48 insertions, 34 deletions
diff --git a/src/reader/parser/inside_processing_instruction.rs b/src/reader/parser/inside_processing_instruction.rs index 8ddf6b8..96f6753 100644 --- a/src/reader/parser/inside_processing_instruction.rs +++ b/src/reader/parser/inside_processing_instruction.rs @@ -1,18 +1,20 @@ -use common::{ - is_name_start_char, is_name_char, -}; +use crate::reader::error::SyntaxError; +use crate::common::{is_name_char, is_name_start_char, is_whitespace_char}; -use reader::events::XmlEvent; -use reader::lexer::Token; +use crate::reader::events::XmlEvent; +use crate::reader::lexer::Token; -use super::{Result, PullParser, State, ProcessingInstructionSubstate, DeclarationSubstate}; +use super::{DeclarationSubstate, ProcessingInstructionSubstate, PullParser, Result, State, Encountered}; impl PullParser { pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> { match s { ProcessingInstructionSubstate::PIInsideName => match t { - Token::Character(c) if !self.buf_has_data() && is_name_start_char(c) || - self.buf_has_data() && is_name_char(c) => self.append_char_continue(c), + Token::Character(c) if self.buf.is_empty() && is_name_start_char(c) || + self.buf_has_data() && is_name_char(c) => { + self.buf.push(c); + None + }, Token::ProcessingInstructionEnd => { // self.buf contains PI name @@ -20,70 +22,83 @@ impl PullParser { // Don't need to check for declaration because it has mandatory attributes // but there is none - match &name[..] { + match &*name { // Name is empty, it is an error - "" => Some(self_error!(self; "Encountered processing instruction without name")), + "" => Some(self.error(SyntaxError::ProcessingInstructionWithoutName)), // Found <?xml-like PI not at the beginning of a document, // it is an error - see section 2.6 of XML 1.1 spec - "xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML" => - Some(self_error!(self; "Invalid processing instruction: <?{}", name)), + n if "xml".eq_ignore_ascii_case(n) => + Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))), // All is ok, emitting event _ => { - self.into_state_emit( - State::OutsideTag, - Ok(XmlEvent::ProcessingInstruction { - name: name, - data: None - }) - ) + debug_assert!(self.next_event.is_none(), "{:?}", self.next_event); + // can't have a PI before `<?xml` + let event1 = self.set_encountered(Encountered::Declaration); + let event2 = Some(Ok(XmlEvent::ProcessingInstruction { + name, + data: None + })); + // emitting two events at once is cumbersome + let event1 = if event1.is_some() { + self.next_event = event2; + event1 + } else { + event2 + }; + self.into_state(State::OutsideTag, event1) } } } - Token::Whitespace(_) => { + Token::Character(c) if is_whitespace_char(c) => { // self.buf contains PI name let name = self.take_buf(); - match &name[..] { + match &*name { // We have not ever encountered an element and have not parsed XML declaration - "xml" if !self.encountered_element && !self.parsed_declaration => + "xml" if self.encountered == Encountered::None => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)), // Found <?xml-like PI after the beginning of a document, // it is an error - see section 2.6 of XML 1.1 spec - "xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML" - if self.encountered_element || self.parsed_declaration => - Some(self_error!(self; "Invalid processing instruction: <?{}", name)), + n if "xml".eq_ignore_ascii_case(n) => + Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))), // All is ok, starting parsing PI data _ => { - self.lexer.disable_errors(); // data is arbitrary, so disable errors self.data.name = name; - self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData)) + // can't have a PI before `<?xml` + let next_event = self.set_encountered(Encountered::Declaration); + self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData), next_event) } - } } - _ => Some(self_error!(self; "Unexpected token: <?{}{}", self.buf, t)) + _ => { + let buf = self.take_buf(); + Some(self.error(SyntaxError::UnexpectedProcessingInstruction(buf.into(), t))) + } }, ProcessingInstructionSubstate::PIInsideData => match t { Token::ProcessingInstructionEnd => { - self.lexer.enable_errors(); let name = self.data.take_name(); let data = self.take_buf(); self.into_state_emit( State::OutsideTag, Ok(XmlEvent::ProcessingInstruction { - name: name, - data: Some(data) - }) + name, + data: Some(data), + }), ) }, + Token::Character(c) if !self.is_valid_xml_char(c) => { + Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) + }, + // Any other token should be treated as plain characters _ => { t.push_to_string(&mut self.buf); @@ -92,5 +107,4 @@ impl PullParser { }, } } - } |