aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Saul <kevinsaul@gmail.com>2023-05-28 21:36:28 +1200
committerKevin Saul <kevinsaul@gmail.com>2023-05-28 21:36:28 +1200
commit98303bbda342433ef3735b13aa902cef4499c5e7 (patch)
treec71862af10f6bf85abde0cd3dcb376c43067db97
parente05956094c27117f989d22f25b75633123d72a83 (diff)
downloadtinyxml2-98303bbda342433ef3735b13aa902cef4499c5e7.tar.gz
add pedantic whitespace mode
-rwxr-xr-xtinyxml2.cpp22
-rwxr-xr-xtinyxml2.h5
-rwxr-xr-xxmltest.cpp172
3 files changed, 192 insertions, 7 deletions
diff --git a/tinyxml2.cpp b/tinyxml2.cpp
index 4b561b3..901e395 100755
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp
@@ -715,7 +715,7 @@ bool XMLUtil::ToUnsigned64(const char* str, uint64_t* value) {
}
-char* XMLDocument::Identify( char* p, XMLNode** node )
+char* XMLDocument::Identify( char* p, XMLNode** node, bool first )
{
TIXMLASSERT( node );
TIXMLASSERT( p );
@@ -767,9 +767,19 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
p += dtdHeaderLen;
}
else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
- returnNode = CreateUnlinkedNode<XMLElement>( _elementPool );
- returnNode->_parseLineNum = _parseCurLineNum;
- p += elementHeaderLen;
+
+ // Preserve whitespace pedantically before closing tag, when it's immediately after opening tag
+ if (WhitespaceMode() == PEDANTIC_WHITESPACE && first && p != start && *(p + elementHeaderLen) == '/') {
+ returnNode = CreateUnlinkedNode<XMLText>(_textPool);
+ returnNode->_parseLineNum = startLine;
+ p = start; // Back it up, all the text counts.
+ _parseCurLineNum = startLine;
+ }
+ else {
+ returnNode = CreateUnlinkedNode<XMLElement>(_elementPool);
+ returnNode->_parseLineNum = _parseCurLineNum;
+ p += elementHeaderLen;
+ }
}
else {
returnNode = CreateUnlinkedNode<XMLText>( _textPool );
@@ -1070,14 +1080,16 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEndTag, int* curLineNumPtr )
if (_document->Error())
return 0;
+ bool first = true;
while( p && *p ) {
XMLNode* node = 0;
- p = _document->Identify( p, &node );
+ p = _document->Identify( p, &node, first );
TIXMLASSERT( p );
if ( node == 0 ) {
break;
}
+ first = false;
const int initialLineNum = node->_parseLineNum;
diff --git a/tinyxml2.h b/tinyxml2.h
index b0c8b6c..45d6980 100755
--- a/tinyxml2.h
+++ b/tinyxml2.h
@@ -1704,7 +1704,8 @@ private:
enum Whitespace {
PRESERVE_WHITESPACE,
- COLLAPSE_WHITESPACE
+ COLLAPSE_WHITESPACE,
+ PEDANTIC_WHITESPACE
};
@@ -1915,7 +1916,7 @@ public:
void DeepCopy(XMLDocument* target) const;
// internal
- char* Identify( char* p, XMLNode** node );
+ char* Identify( char* p, XMLNode** node, bool first );
// internal
void MarkInUse(const XMLNode* const);
diff --git a/xmltest.cpp b/xmltest.cpp
index c3ce079..ae97604 100755
--- a/xmltest.cpp
+++ b/xmltest.cpp
@@ -1869,6 +1869,178 @@ int main( int argc, const char ** argv )
XMLTest( "Whitespace all space", true, 0 == doc.FirstChildElement()->FirstChild() );
}
+ // ----------- Preserve Whitespace ------------
+ {
+ const char* xml = "<element>This is &apos; \n\n text &apos;</element>";
+ XMLDocument doc(true, PRESERVE_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with whitespace preserved", false, doc.Error());
+ XMLTest("Whitespace preserved", "This is ' \n\n text '", doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> This \nis &apos; text &apos; </element>";
+ XMLDocument doc(true, PRESERVE_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with whitespace preserved", false, doc.Error());
+ XMLTest("Whitespace preserved", " This \nis ' text ' ", doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> \n This is &apos; text &apos; \n</element>";
+ XMLDocument doc(true, PRESERVE_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with whitespace preserved", false, doc.Error());
+ XMLTest("Whitespace preserved", " \n This is ' text ' \n", doc.FirstChildElement()->GetText());
+ }
+
+ // Following cases are for text that is all whitespace which are not preserved intentionally
+ {
+ const char* xml = "<element> </element>";
+ XMLDocument doc(true, PRESERVE_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with whitespace preserved", false, doc.Error());
+ XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> </element>";
+ XMLDocument doc(true, PRESERVE_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with whitespace preserved", false, doc.Error());
+ XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element>\n\n</element>";
+ XMLDocument doc(true, PRESERVE_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with whitespace preserved", false, doc.Error());
+ XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> \n</element>";
+ XMLDocument doc(true, PRESERVE_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with whitespace preserved", false, doc.Error());
+ XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> \n \n </element>";
+ XMLDocument doc(true, PRESERVE_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with whitespace preserved", false, doc.Error());
+ XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+ }
+
+ // ----------- Pedantic Whitespace ------------
+ {
+ const char* xml = "<element>This is &apos; \n\n text &apos;</element>";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", "This is ' \n\n text '", doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> This \nis &apos; text &apos; </element>";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", " This \nis ' text ' ", doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> \n This is &apos; text &apos; \n</element>";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", " \n This is ' text ' \n", doc.FirstChildElement()->GetText());
+ }
+
+ // Following cases are for text that is all whitespace which is preserved with pedantic mode
+ {
+ const char* xml = "<element> </element>";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> </element>";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element>\n\n</element>\n";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", "\n\n", doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> \n</element> \n ";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", " \n", doc.FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> \n \n </element> ";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", " \n \n ", doc.FirstChildElement()->GetText());
+ }
+
+ // Following cases are for checking nested elements are still parsed with pedantic whitespace
+ {
+ const char* xml = "<element>\n\t<a> This is nested text </a>\n</element> ";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", " This is nested text ", doc.RootElement()->FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> <b> </b> </element>\n";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", " ", doc.RootElement()->FirstChildElement()->GetText());
+ }
+
+ {
+ const char* xml = "<element> <c attribute=\"test\"/> </element>\n ";
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.Parse(xml);
+ XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+ XMLTest("Pedantic whitespace", true, 0 == doc.RootElement()->FirstChildElement()->GetText());
+ }
+
+ // Check sample xml can be parsed with pedantic mode
+ {
+ XMLDocument doc(true, PEDANTIC_WHITESPACE);
+ doc.LoadFile("resources/dream.xml");
+ XMLTest("Load dream.xml with pedantic whitespace mode", false, doc.Error());
+
+ XMLTest("Dream", "xml version=\"1.0\"",
+ doc.FirstChild()->ToDeclaration()->Value());
+ XMLTest("Dream", true, doc.FirstChild()->NextSibling()->ToUnknown() != 0);
+ XMLTest("Dream", "DOCTYPE PLAY SYSTEM \"play.dtd\"",
+ doc.FirstChild()->NextSibling()->ToUnknown()->Value());
+ XMLTest("Dream", "And Robin shall restore amends.",
+ doc.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText());
+ }
+
{
// An assert should not fire.
const char* xml = "<element/>";