aboutsummaryrefslogtreecommitdiff
path: root/src/yaml.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/yaml.rs')
-rw-r--r--src/yaml.rs739
1 files changed, 739 insertions, 0 deletions
diff --git a/src/yaml.rs b/src/yaml.rs
new file mode 100644
index 0000000..4bb70da
--- /dev/null
+++ b/src/yaml.rs
@@ -0,0 +1,739 @@
+use linked_hash_map::LinkedHashMap;
+use crate::parser::*;
+use crate::scanner::{Marker, ScanError, TScalarStyle, TokenType};
+use std::collections::BTreeMap;
+use std::f64;
+use std::i64;
+use std::mem;
+use std::ops::Index;
+use std::string;
+use std::vec;
+
+/// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to
+/// access your YAML document.
+///
+/// # Examples
+///
+/// ```
+/// use yaml_rust::Yaml;
+/// let foo = Yaml::from_str("-123"); // convert the string to the appropriate YAML type
+/// assert_eq!(foo.as_i64().unwrap(), -123);
+///
+/// // iterate over an Array
+/// let vec = Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2)]);
+/// for v in vec.as_vec().unwrap() {
+/// assert!(v.as_i64().is_some());
+/// }
+/// ```
+#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)]
+pub enum Yaml {
+ /// Float types are stored as String and parsed on demand.
+ /// Note that f64 does NOT implement Eq trait and can NOT be stored in BTreeMap.
+ Real(string::String),
+ /// YAML int is stored as i64.
+ Integer(i64),
+ /// YAML scalar.
+ String(string::String),
+ /// YAML bool, e.g. `true` or `false`.
+ Boolean(bool),
+ /// YAML array, can be accessed as a `Vec`.
+ Array(self::Array),
+ /// YAML hash, can be accessed as a `LinkedHashMap`.
+ ///
+ /// Insertion order will match the order of insertion into the map.
+ Hash(self::Hash),
+ /// Alias, not fully supported yet.
+ Alias(usize),
+ /// YAML null, e.g. `null` or `~`.
+ Null,
+ /// Accessing a nonexistent node via the Index trait returns `BadValue`. This
+ /// simplifies error handling in the calling code. Invalid type conversion also
+ /// returns `BadValue`.
+ BadValue,
+}
+
+pub type Array = Vec<Yaml>;
+pub type Hash = LinkedHashMap<Yaml, Yaml>;
+
+// parse f64 as Core schema
+// See: https://github.com/chyh1990/yaml-rust/issues/51
+fn parse_f64(v: &str) -> Option<f64> {
+ match v {
+ ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY),
+ "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY),
+ ".nan" | "NaN" | ".NAN" => Some(f64::NAN),
+ _ => v.parse::<f64>().ok(),
+ }
+}
+
+pub struct YamlLoader {
+ docs: Vec<Yaml>,
+ // states
+ // (current node, anchor_id) tuple
+ doc_stack: Vec<(Yaml, usize)>,
+ key_stack: Vec<Yaml>,
+ anchor_map: BTreeMap<usize, Yaml>,
+}
+
+impl MarkedEventReceiver for YamlLoader {
+ fn on_event(&mut self, ev: Event, _: Marker) {
+ // println!("EV {:?}", ev);
+ match ev {
+ Event::DocumentStart => {
+ // do nothing
+ }
+ Event::DocumentEnd => {
+ match self.doc_stack.len() {
+ // empty document
+ 0 => self.docs.push(Yaml::BadValue),
+ 1 => self.docs.push(self.doc_stack.pop().unwrap().0),
+ _ => unreachable!(),
+ }
+ }
+ Event::SequenceStart(aid) => {
+ self.doc_stack.push((Yaml::Array(Vec::new()), aid));
+ }
+ Event::SequenceEnd => {
+ let node = self.doc_stack.pop().unwrap();
+ self.insert_new_node(node);
+ }
+ Event::MappingStart(aid) => {
+ self.doc_stack.push((Yaml::Hash(Hash::new()), aid));
+ self.key_stack.push(Yaml::BadValue);
+ }
+ Event::MappingEnd => {
+ self.key_stack.pop().unwrap();
+ let node = self.doc_stack.pop().unwrap();
+ self.insert_new_node(node);
+ }
+ Event::Scalar(v, style, aid, tag) => {
+ let node = if style != TScalarStyle::Plain {
+ Yaml::String(v)
+ } else if let Some(TokenType::Tag(ref handle, ref suffix)) = tag {
+ // XXX tag:yaml.org,2002:
+ if handle == "!!" {
+ match suffix.as_ref() {
+ "bool" => {
+ // "true" or "false"
+ match v.parse::<bool>() {
+ Err(_) => Yaml::BadValue,
+ Ok(v) => Yaml::Boolean(v),
+ }
+ }
+ "int" => match v.parse::<i64>() {
+ Err(_) => Yaml::BadValue,
+ Ok(v) => Yaml::Integer(v),
+ },
+ "float" => match parse_f64(&v) {
+ Some(_) => Yaml::Real(v),
+ None => Yaml::BadValue,
+ },
+ "null" => match v.as_ref() {
+ "~" | "null" => Yaml::Null,
+ _ => Yaml::BadValue,
+ },
+ _ => Yaml::String(v),
+ }
+ } else {
+ Yaml::String(v)
+ }
+ } else {
+ // Datatype is not specified, or unrecognized
+ Yaml::from_str(&v)
+ };
+
+ self.insert_new_node((node, aid));
+ }
+ Event::Alias(id) => {
+ let n = match self.anchor_map.get(&id) {
+ Some(v) => v.clone(),
+ None => Yaml::BadValue,
+ };
+ self.insert_new_node((n, 0));
+ }
+ _ => { /* ignore */ }
+ }
+ // println!("DOC {:?}", self.doc_stack);
+ }
+}
+
+impl YamlLoader {
+ fn insert_new_node(&mut self, node: (Yaml, usize)) {
+ // valid anchor id starts from 1
+ if node.1 > 0 {
+ self.anchor_map.insert(node.1, node.0.clone());
+ }
+ if self.doc_stack.is_empty() {
+ self.doc_stack.push(node);
+ } else {
+ let parent = self.doc_stack.last_mut().unwrap();
+ match *parent {
+ (Yaml::Array(ref mut v), _) => v.push(node.0),
+ (Yaml::Hash(ref mut h), _) => {
+ let cur_key = self.key_stack.last_mut().unwrap();
+ // current node is a key
+ if cur_key.is_badvalue() {
+ *cur_key = node.0;
+ // current node is a value
+ } else {
+ let mut newkey = Yaml::BadValue;
+ mem::swap(&mut newkey, cur_key);
+ h.insert(newkey, node.0);
+ }
+ }
+ _ => unreachable!(),
+ }
+ }
+ }
+
+ pub fn load_from_str(source: &str) -> Result<Vec<Yaml>, ScanError> {
+ let mut loader = YamlLoader {
+ docs: Vec::new(),
+ doc_stack: Vec::new(),
+ key_stack: Vec::new(),
+ anchor_map: BTreeMap::new(),
+ };
+ let mut parser = Parser::new(source.chars());
+ parser.load(&mut loader, true)?;
+ Ok(loader.docs)
+ }
+}
+
+macro_rules! define_as (
+ ($name:ident, $t:ident, $yt:ident) => (
+pub fn $name(&self) -> Option<$t> {
+ match *self {
+ Yaml::$yt(v) => Some(v),
+ _ => None
+ }
+}
+ );
+);
+
+macro_rules! define_as_ref (
+ ($name:ident, $t:ty, $yt:ident) => (
+pub fn $name(&self) -> Option<$t> {
+ match *self {
+ Yaml::$yt(ref v) => Some(v),
+ _ => None
+ }
+}
+ );
+);
+
+macro_rules! define_into (
+ ($name:ident, $t:ty, $yt:ident) => (
+pub fn $name(self) -> Option<$t> {
+ match self {
+ Yaml::$yt(v) => Some(v),
+ _ => None
+ }
+}
+ );
+);
+
+impl Yaml {
+ define_as!(as_bool, bool, Boolean);
+ define_as!(as_i64, i64, Integer);
+
+ define_as_ref!(as_str, &str, String);
+ define_as_ref!(as_hash, &Hash, Hash);
+ define_as_ref!(as_vec, &Array, Array);
+
+ define_into!(into_bool, bool, Boolean);
+ define_into!(into_i64, i64, Integer);
+ define_into!(into_string, String, String);
+ define_into!(into_hash, Hash, Hash);
+ define_into!(into_vec, Array, Array);
+
+ pub fn is_null(&self) -> bool {
+ match *self {
+ Yaml::Null => true,
+ _ => false,
+ }
+ }
+
+ pub fn is_badvalue(&self) -> bool {
+ match *self {
+ Yaml::BadValue => true,
+ _ => false,
+ }
+ }
+
+ pub fn is_array(&self) -> bool {
+ match *self {
+ Yaml::Array(_) => true,
+ _ => false,
+ }
+ }
+
+ pub fn as_f64(&self) -> Option<f64> {
+ match *self {
+ Yaml::Real(ref v) => parse_f64(v),
+ _ => None,
+ }
+ }
+
+ pub fn into_f64(self) -> Option<f64> {
+ match self {
+ Yaml::Real(ref v) => parse_f64(v),
+ _ => None,
+ }
+ }
+}
+
+#[cfg_attr(feature = "cargo-clippy", allow(should_implement_trait))]
+impl Yaml {
+ // Not implementing FromStr because there is no possibility of Error.
+ // This function falls back to Yaml::String if nothing else matches.
+ pub fn from_str(v: &str) -> Yaml {
+ if v.starts_with("0x") {
+ if let Ok(i) = i64::from_str_radix(&v[2..], 16) {
+ return Yaml::Integer(i);
+ }
+ }
+ if v.starts_with("0o") {
+ if let Ok(i) = i64::from_str_radix(&v[2..], 8) {
+ return Yaml::Integer(i);
+ }
+ }
+ if v.starts_with('+') {
+ if let Ok(i) = v[1..].parse::<i64>() {
+ return Yaml::Integer(i);
+ }
+ }
+ match v {
+ "~" | "null" => Yaml::Null,
+ "true" => Yaml::Boolean(true),
+ "false" => Yaml::Boolean(false),
+ _ if v.parse::<i64>().is_ok() => Yaml::Integer(v.parse::<i64>().unwrap()),
+ // try parsing as f64
+ _ if parse_f64(v).is_some() => Yaml::Real(v.to_owned()),
+ _ => Yaml::String(v.to_owned()),
+ }
+ }
+}
+
+static BAD_VALUE: Yaml = Yaml::BadValue;
+impl<'a> Index<&'a str> for Yaml {
+ type Output = Yaml;
+
+ fn index(&self, idx: &'a str) -> &Yaml {
+ let key = Yaml::String(idx.to_owned());
+ match self.as_hash() {
+ Some(h) => h.get(&key).unwrap_or(&BAD_VALUE),
+ None => &BAD_VALUE,
+ }
+ }
+}
+
+impl Index<usize> for Yaml {
+ type Output = Yaml;
+
+ fn index(&self, idx: usize) -> &Yaml {
+ if let Some(v) = self.as_vec() {
+ v.get(idx).unwrap_or(&BAD_VALUE)
+ } else if let Some(v) = self.as_hash() {
+ let key = Yaml::Integer(idx as i64);
+ v.get(&key).unwrap_or(&BAD_VALUE)
+ } else {
+ &BAD_VALUE
+ }
+ }
+}
+
+impl IntoIterator for Yaml {
+ type Item = Yaml;
+ type IntoIter = YamlIter;
+
+ fn into_iter(self) -> Self::IntoIter {
+ YamlIter {
+ yaml: self.into_vec().unwrap_or_else(Vec::new).into_iter(),
+ }
+ }
+}
+
+pub struct YamlIter {
+ yaml: vec::IntoIter<Yaml>,
+}
+
+impl Iterator for YamlIter {
+ type Item = Yaml;
+
+ fn next(&mut self) -> Option<Yaml> {
+ self.yaml.next()
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use std::f64;
+ use crate::yaml::*;
+ #[test]
+ fn test_coerce() {
+ let s = "---
+a: 1
+b: 2.2
+c: [1, 2]
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+ assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
+ assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64);
+ assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
+ assert!(doc["d"][0].is_badvalue());
+ }
+
+ #[test]
+ fn test_empty_doc() {
+ let s: String = "".to_owned();
+ YamlLoader::load_from_str(&s).unwrap();
+ let s: String = "---".to_owned();
+ assert_eq!(YamlLoader::load_from_str(&s).unwrap()[0], Yaml::Null);
+ }
+
+ #[test]
+ fn test_parser() {
+ let s: String = "
+# comment
+a0 bb: val
+a1:
+ b1: 4
+ b2: d
+a2: 4 # i'm comment
+a3: [1, 2, 3]
+a4:
+ - - a1
+ - a2
+ - 2
+a5: 'single_quoted'
+a6: \"double_quoted\"
+a7: 你好
+"
+ .to_owned();
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+ assert_eq!(doc["a7"].as_str().unwrap(), "你好");
+ }
+
+ #[test]
+ fn test_multi_doc() {
+ let s = "
+'a scalar'
+---
+'a scalar'
+---
+'a scalar'
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ assert_eq!(out.len(), 3);
+ }
+
+ #[test]
+ fn test_anchor() {
+ let s = "
+a1: &DEFAULT
+ b1: 4
+ b2: d
+a2: *DEFAULT
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+ assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4);
+ }
+
+ #[test]
+ fn test_bad_anchor() {
+ let s = "
+a1: &DEFAULT
+ b1: 4
+ b2: *DEFAULT
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+ assert_eq!(doc["a1"]["b2"], Yaml::BadValue);
+ }
+
+ #[test]
+ fn test_github_27() {
+ // https://github.com/chyh1990/yaml-rust/issues/27
+ let s = "&a";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+ assert_eq!(doc.as_str().unwrap(), "");
+ }
+
+ #[test]
+ fn test_plain_datatype() {
+ let s = "
+- 'string'
+- \"string\"
+- string
+- 123
+- -321
+- 1.23
+- -1e4
+- ~
+- null
+- true
+- false
+- !!str 0
+- !!int 100
+- !!float 2
+- !!null ~
+- !!bool true
+- !!bool false
+- 0xFF
+# bad values
+- !!int string
+- !!float string
+- !!bool null
+- !!null val
+- 0o77
+- [ 0xF, 0xF ]
+- +12345
+- [ true, false ]
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+
+ assert_eq!(doc[0].as_str().unwrap(), "string");
+ assert_eq!(doc[1].as_str().unwrap(), "string");
+ assert_eq!(doc[2].as_str().unwrap(), "string");
+ assert_eq!(doc[3].as_i64().unwrap(), 123);
+ assert_eq!(doc[4].as_i64().unwrap(), -321);
+ assert_eq!(doc[5].as_f64().unwrap(), 1.23);
+ assert_eq!(doc[6].as_f64().unwrap(), -1e4);
+ assert!(doc[7].is_null());
+ assert!(doc[8].is_null());
+ assert_eq!(doc[9].as_bool().unwrap(), true);
+ assert_eq!(doc[10].as_bool().unwrap(), false);
+ assert_eq!(doc[11].as_str().unwrap(), "0");
+ assert_eq!(doc[12].as_i64().unwrap(), 100);
+ assert_eq!(doc[13].as_f64().unwrap(), 2.0);
+ assert!(doc[14].is_null());
+ assert_eq!(doc[15].as_bool().unwrap(), true);
+ assert_eq!(doc[16].as_bool().unwrap(), false);
+ assert_eq!(doc[17].as_i64().unwrap(), 255);
+ assert!(doc[18].is_badvalue());
+ assert!(doc[19].is_badvalue());
+ assert!(doc[20].is_badvalue());
+ assert!(doc[21].is_badvalue());
+ assert_eq!(doc[22].as_i64().unwrap(), 63);
+ assert_eq!(doc[23][0].as_i64().unwrap(), 15);
+ assert_eq!(doc[23][1].as_i64().unwrap(), 15);
+ assert_eq!(doc[24].as_i64().unwrap(), 12345);
+ assert!(doc[25][0].as_bool().unwrap());
+ assert!(!doc[25][1].as_bool().unwrap());
+ }
+
+ #[test]
+ fn test_bad_hyphen() {
+ // See: https://github.com/chyh1990/yaml-rust/issues/23
+ let s = "{-";
+ assert!(YamlLoader::load_from_str(&s).is_err());
+ }
+
+ #[test]
+ fn test_issue_65() {
+ // See: https://github.com/chyh1990/yaml-rust/issues/65
+ let b = "\n\"ll\\\"ll\\\r\n\"ll\\\"ll\\\r\r\r\rU\r\r\rU";
+ assert!(YamlLoader::load_from_str(&b).is_err());
+ }
+
+ #[test]
+ fn test_bad_docstart() {
+ assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok());
+ assert_eq!(
+ YamlLoader::load_from_str("----"),
+ Ok(vec![Yaml::String(String::from("----"))])
+ );
+ assert_eq!(
+ YamlLoader::load_from_str("--- #here goes a comment"),
+ Ok(vec![Yaml::Null])
+ );
+ assert_eq!(
+ YamlLoader::load_from_str("---- #here goes a comment"),
+ Ok(vec![Yaml::String(String::from("----"))])
+ );
+ }
+
+ #[test]
+ fn test_plain_datatype_with_into_methods() {
+ let s = "
+- 'string'
+- \"string\"
+- string
+- 123
+- -321
+- 1.23
+- -1e4
+- true
+- false
+- !!str 0
+- !!int 100
+- !!float 2
+- !!bool true
+- !!bool false
+- 0xFF
+- 0o77
+- +12345
+- -.INF
+- .NAN
+- !!float .INF
+";
+ let mut out = YamlLoader::load_from_str(&s).unwrap().into_iter();
+ let mut doc = out.next().unwrap().into_iter();
+
+ assert_eq!(doc.next().unwrap().into_string().unwrap(), "string");
+ assert_eq!(doc.next().unwrap().into_string().unwrap(), "string");
+ assert_eq!(doc.next().unwrap().into_string().unwrap(), "string");
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), 123);
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), -321);
+ assert_eq!(doc.next().unwrap().into_f64().unwrap(), 1.23);
+ assert_eq!(doc.next().unwrap().into_f64().unwrap(), -1e4);
+ assert_eq!(doc.next().unwrap().into_bool().unwrap(), true);
+ assert_eq!(doc.next().unwrap().into_bool().unwrap(), false);
+ assert_eq!(doc.next().unwrap().into_string().unwrap(), "0");
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), 100);
+ assert_eq!(doc.next().unwrap().into_f64().unwrap(), 2.0);
+ assert_eq!(doc.next().unwrap().into_bool().unwrap(), true);
+ assert_eq!(doc.next().unwrap().into_bool().unwrap(), false);
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), 255);
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), 63);
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345);
+ assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::NEG_INFINITY);
+ assert!(doc.next().unwrap().into_f64().is_some());
+ assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::INFINITY);
+ }
+
+ #[test]
+ fn test_hash_order() {
+ let s = "---
+b: ~
+a: ~
+c: ~
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let first = out.into_iter().next().unwrap();
+ let mut iter = first.into_hash().unwrap().into_iter();
+ assert_eq!(
+ Some((Yaml::String("b".to_owned()), Yaml::Null)),
+ iter.next()
+ );
+ assert_eq!(
+ Some((Yaml::String("a".to_owned()), Yaml::Null)),
+ iter.next()
+ );
+ assert_eq!(
+ Some((Yaml::String("c".to_owned()), Yaml::Null)),
+ iter.next()
+ );
+ assert_eq!(None, iter.next());
+ }
+
+ #[test]
+ fn test_integer_key() {
+ let s = "
+0:
+ important: true
+1:
+ important: false
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let first = out.into_iter().next().unwrap();
+ assert_eq!(first[0]["important"].as_bool().unwrap(), true);
+ }
+
+ #[test]
+ fn test_indentation_equality() {
+ let four_spaces = YamlLoader::load_from_str(
+ r#"
+hash:
+ with:
+ indentations
+"#,
+ )
+ .unwrap()
+ .into_iter()
+ .next()
+ .unwrap();
+
+ let two_spaces = YamlLoader::load_from_str(
+ r#"
+hash:
+ with:
+ indentations
+"#,
+ )
+ .unwrap()
+ .into_iter()
+ .next()
+ .unwrap();
+
+ let one_space = YamlLoader::load_from_str(
+ r#"
+hash:
+ with:
+ indentations
+"#,
+ )
+ .unwrap()
+ .into_iter()
+ .next()
+ .unwrap();
+
+ let mixed_spaces = YamlLoader::load_from_str(
+ r#"
+hash:
+ with:
+ indentations
+"#,
+ )
+ .unwrap()
+ .into_iter()
+ .next()
+ .unwrap();
+
+ assert_eq!(four_spaces, two_spaces);
+ assert_eq!(two_spaces, one_space);
+ assert_eq!(four_spaces, mixed_spaces);
+ }
+
+ #[test]
+ fn test_two_space_indentations() {
+ // https://github.com/kbknapp/clap-rs/issues/965
+
+ let s = r#"
+subcommands:
+ - server:
+ about: server related commands
+subcommands2:
+ - server:
+ about: server related commands
+subcommands3:
+ - server:
+ about: server related commands
+ "#;
+
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out.into_iter().next().unwrap();
+
+ println!("{:#?}", doc);
+ assert_eq!(doc["subcommands"][0]["server"], Yaml::Null);
+ assert!(doc["subcommands2"][0]["server"].as_hash().is_some());
+ assert!(doc["subcommands3"][0]["server"].as_hash().is_some());
+ }
+
+ #[test]
+ fn test_recursion_depth_check_objects() {
+ let s = "{a:".repeat(10_000) + &"}".repeat(10_000);
+ assert!(YamlLoader::load_from_str(&s).is_err());
+ }
+
+ #[test]
+ fn test_recursion_depth_check_arrays() {
+ let s = "[".repeat(10_000) + &"]".repeat(10_000);
+ assert!(YamlLoader::load_from_str(&s).is_err());
+ }
+}