aboutsummaryrefslogtreecommitdiff
path: root/src/reader/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/reader/parser.rs')
-rw-r--r--src/reader/parser.rs53
1 files changed, 28 insertions, 25 deletions
diff --git a/src/reader/parser.rs b/src/reader/parser.rs
index dcdec89..18f073d 100644
--- a/src/reader/parser.rs
+++ b/src/reader/parser.rs
@@ -1,24 +1,19 @@
//! Contains an implementation of pull-based XML parser.
-
-use crate::common::is_xml11_char;
-use crate::common::is_xml10_char;
-use crate::common::is_xml11_char_not_restricted;
-use crate::reader::error::SyntaxError;
-use std::collections::HashMap;
-use std::io::prelude::*;
-
-use crate::attribute::OwnedAttribute;
-use crate::common::{self, is_name_char, is_name_start_char, Position, TextPosition, XmlVersion, is_whitespace_char};
+use crate::common::{is_xml10_char, is_xml11_char, is_xml11_char_not_restricted, is_name_char, is_name_start_char, is_whitespace_char};
+use crate::common::{Position, TextPosition, XmlVersion};
use crate::name::OwnedName;
use crate::namespace::NamespaceStack;
-
use crate::reader::config::ParserConfig2;
+use crate::reader::error::SyntaxError;
use crate::reader::events::XmlEvent;
+use crate::reader::indexset::AttributesSet;
use crate::reader::lexer::{Lexer, Token};
-
use super::{Error, ErrorKind};
+use std::collections::HashMap;
+use std::io::Read;
+
macro_rules! gen_takes(
($($field:ident -> $method:ident, $t:ty, $def:expr);+) => (
$(
@@ -42,7 +37,7 @@ gen_takes!(
element_name -> take_element_name, Option<OwnedName>, None;
attr_name -> take_attr_name, Option<OwnedName>, None;
- attributes -> take_attributes, Vec<OwnedAttribute>, vec!()
+ attributes -> take_attributes, AttributesSet, AttributesSet::new()
);
mod inside_cdata;
@@ -107,7 +102,7 @@ impl PullParser {
#[inline]
fn new_with_config2(config: ParserConfig2) -> PullParser {
- let mut lexer = Lexer::new();
+ let mut lexer = Lexer::new(&config);
if let Some(enc) = config.override_encoding {
lexer.set_encoding(enc);
}
@@ -133,7 +128,7 @@ impl PullParser {
element_name: None,
quote: None,
attr_name: None,
- attributes: Vec::new(),
+ attributes: AttributesSet::new(),
},
final_result: None,
next_event: None,
@@ -299,7 +294,7 @@ struct MarkupData {
name: String, // used for processing instruction name
ref_data: String, // used for reference content
- version: Option<common::XmlVersion>, // used for XML declaration version
+ version: Option<XmlVersion>, // used for XML declaration version
encoding: Option<String>, // used for XML declaration encoding
standalone: Option<bool>, // used for XML declaration standalone parameter
@@ -307,7 +302,7 @@ struct MarkupData {
quote: Option<QuoteToken>, // used to hold opening quote for attribute value
attr_name: Option<OwnedName>, // used to hold attribute name
- attributes: Vec<OwnedAttribute> // used to hold all accumulated attributes
+ attributes: AttributesSet, // used to hold all accumulated attributes
}
impl PullParser {
@@ -401,7 +396,7 @@ impl PullParser {
fn next_pos(&mut self) {
// unfortunately calls to next_pos will never be perfectly balanced with push_pos,
// at very least because parse errors and EOF can happen unexpectedly without a prior push.
- if self.pos.len() > 0 {
+ if !self.pos.is_empty() {
if self.pos.len() > 1 {
self.pos.remove(0);
} else {
@@ -490,7 +485,7 @@ impl PullParser {
let name = this.take_buf();
match name.parse() {
Ok(name) => on_name(this, t, name),
- Err(_) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into())))
+ Err(_) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into()))),
}
};
@@ -504,6 +499,9 @@ impl PullParser {
Token::Character(c) if c != ':' && (self.buf.is_empty() && is_name_start_char(c) ||
self.buf_has_data() && is_name_char(c)) => {
+ if self.buf.len() > self.config.max_name_length {
+ return Some(self.error(SyntaxError::ExceededConfiguredLimit));
+ }
self.buf.push(c);
None
},
@@ -517,7 +515,7 @@ impl PullParser {
Token::Character(c) if is_whitespace_char(c) => invoke_callback(self, t),
- _ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t)))
+ _ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t))),
}
}
@@ -529,7 +527,7 @@ impl PullParser {
fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
where F: Fn(&mut PullParser, String) -> Option<Result> {
match t {
- Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace
+ Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace
Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
None => { // Entered attribute value
@@ -547,6 +545,9 @@ impl PullParser {
return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
}
}
+ if self.buf.len() > self.config.max_attribute_length {
+ return Some(self.error(SyntaxError::ExceededConfiguredLimit));
+ }
t.push_to_string(&mut self.buf);
None
}
@@ -557,8 +558,7 @@ impl PullParser {
self.into_state_continue(State::InsideReference)
},
- Token::OpeningTagStart =>
- Some(self.error(SyntaxError::UnexpectedOpeningTag)),
+ Token::OpeningTagStart => Some(self.error(SyntaxError::UnexpectedOpeningTag)),
Token::Character(c) if !self.is_valid_xml_char_not_restricted(c) => {
Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
@@ -566,6 +566,9 @@ impl PullParser {
// Every character except " and ' and < is okay
_ if self.data.quote.is_some() => {
+ if self.buf.len() > self.config.max_attribute_length {
+ return Some(self.error(SyntaxError::ExceededConfiguredLimit));
+ }
t.push_to_string(&mut self.buf);
None
}
@@ -576,11 +579,11 @@ impl PullParser {
fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> {
let mut name = self.data.take_element_name()?;
- let mut attributes = self.data.take_attributes();
+ let mut attributes = self.data.take_attributes().into_vec();
// check whether the name prefix is bound and fix its namespace
match self.nst.get(name.borrow().prefix_repr()) {
- Some("") => name.namespace = None, // default namespace
+ Some("") => name.namespace = None, // default namespace
Some(ns) => name.namespace = Some(ns.into()),
None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into())))
}