aboutsummaryrefslogtreecommitdiff
path: root/src/analyze.rs
blob: d369d2f0145bb32e9b9a2d6d1832576e531a5626 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#![forbid(unsafe_code)]

extern crate xml;

use std::cmp;
use std::env;
use std::io::{self, Read, Write, BufReader};
use std::fs::File;
use std::collections::HashSet;

use xml::ParserConfig;
use xml::reader::XmlEvent;

macro_rules! abort {
    ($code:expr) => {::std::process::exit($code)};
    ($code:expr, $($args:tt)+) => {{
        writeln!(&mut ::std::io::stderr(), $($args)+).unwrap();
        ::std::process::exit($code);
    }}
}

fn main() {
    let mut file;
    let mut stdin;
    let source: &mut Read = match env::args().nth(1) {
        Some(file_name) => {
            file = File::open(file_name)
                .unwrap_or_else(|e| abort!(1, "Cannot open input file: {}", e));
            &mut file
        }
        None => {
            stdin = io::stdin();
            &mut stdin
        }
    };

    let reader = ParserConfig::new()
        .whitespace_to_characters(true)
        .ignore_comments(false)
        .create_reader(BufReader::new(source));

    let mut processing_instructions = 0;
    let mut elements = 0;
    let mut character_blocks = 0;
    let mut cdata_blocks = 0;
    let mut characters = 0;
    let mut comment_blocks = 0;
    let mut comment_characters = 0;
    let mut namespaces = HashSet::new();
    let mut depth = 0;
    let mut max_depth = 0;

    for e in reader {
        match e {
            Ok(e) => match e {
                XmlEvent::StartDocument { version, encoding, standalone } =>
                    println!(
                        "XML document version {}, encoded in {}, {}standalone",
                        version, encoding, if standalone.unwrap_or(false) { "" } else { "not " }
                    ),
                XmlEvent::EndDocument => println!("Document finished"),
                XmlEvent::ProcessingInstruction { .. } => processing_instructions += 1,
                XmlEvent::Whitespace(_) => {}  // can't happen due to configuration
                XmlEvent::Characters(s) => {
                    character_blocks += 1;
                    characters += s.len();
                }
                XmlEvent::CData(s) => {
                    cdata_blocks += 1;
                    characters += s.len();
                }
                XmlEvent::Comment(s) => {
                    comment_blocks += 1;
                    comment_characters += s.len();
                }
                XmlEvent::StartElement { namespace, .. } => {
                    depth += 1;
                    max_depth = cmp::max(max_depth, depth);
                    elements += 1;
                    namespaces.extend(namespace.0.into_iter().map(|(_, ns_uri)| ns_uri));
                }
                XmlEvent::EndElement { .. } => {
                    depth -= 1;
                }
            },
            Err(e) => abort!(1, "Error parsing XML document: {}", e)
        }
    }
    namespaces.remove(xml::namespace::NS_EMPTY_URI);
    namespaces.remove(xml::namespace::NS_XMLNS_URI);
    namespaces.remove(xml::namespace::NS_XML_URI);

    println!("Elements: {}, maximum depth: {}", elements, max_depth);
    println!("Namespaces (excluding built-in): {}", namespaces.len());
    println!("Characters: {}, characters blocks: {}, CDATA blocks: {}",
             characters, character_blocks, cdata_blocks);
    println!("Comment blocks: {}, comment characters: {}", comment_blocks, comment_characters);
    println!("Processing instructions (excluding built-in): {}", processing_instructions);
}