summaryrefslogtreecommitdiff
path: root/tinyxmlparser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tinyxmlparser.cpp')
-rw-r--r--tinyxmlparser.cpp214
1 files changed, 140 insertions, 74 deletions
diff --git a/tinyxmlparser.cpp b/tinyxmlparser.cpp
index 67d0a9e..81b7eae 100644
--- a/tinyxmlparser.cpp
+++ b/tinyxmlparser.cpp
@@ -1,6 +1,6 @@
/*
www.sourceforge.net/projects/tinyxml
-Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
+Original code by Lee Thomason (www.grinninglizard.com)
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any
@@ -22,16 +22,25 @@ must not be misrepresented as being the original software.
distribution.
*/
-#include "tinyxml.h"
#include <ctype.h>
#include <stddef.h>
+#include "tinyxml.h"
+
//#define DEBUG_PARSER
+#if defined( DEBUG_PARSER )
+# if defined( DEBUG ) && defined( _MSC_VER )
+# include <windows.h>
+# define TIXML_LOG OutputDebugString
+# else
+# define TIXML_LOG printf
+# endif
+#endif
// Note tha "PutString" hardcodes the same list. This
// is less flexible than it appears. Changing the entries
// or order will break putstring.
-TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
+TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] =
{
{ "&amp;", 5, '&' },
{ "&lt;", 4, '<' },
@@ -165,7 +174,7 @@ class TiXmlParsingData
public:
void Stamp( const char* now, TiXmlEncoding encoding );
- const TiXmlCursor& Cursor() { return cursor; }
+ const TiXmlCursor& Cursor() const { return cursor; }
private:
// Only used by the document!
@@ -277,7 +286,7 @@ void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
if ( encoding == TIXML_ENCODING_UTF8 )
{
// Eat the 1 to 4 byte utf8 character.
- int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
+ int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
if ( step == 0 )
step = 1; // Error case from bad encoding, but handle gracefully.
p += step;
@@ -337,7 +346,7 @@ const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
continue;
}
- if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space.
+ if ( IsWhiteSpace( *p ) ) // Still using old rules for white space.
++p;
else
break;
@@ -345,7 +354,7 @@ const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
}
else
{
- while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
+ while ( *p && IsWhiteSpace( *p ) )
++p;
}
@@ -353,7 +362,7 @@ const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
}
#ifdef TIXML_USE_STL
-/*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
+/*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
{
for( ;; )
{
@@ -368,7 +377,7 @@ const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
}
}
-/*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
+/*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
{
//assert( character > 0 && character < 128 ); // else it won't work in utf-8
while ( in->good() )
@@ -386,8 +395,14 @@ const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
}
#endif
+// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
+// "assign" optimization removes over 10% of the execution time.
+//
const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
{
+ // Oddly, not supported on some comilers,
+ //name->clear();
+ // So use this:
*name = "";
assert( p );
@@ -401,6 +416,7 @@ const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncodi
if ( p && *p
&& ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
{
+ const char* start = p;
while( p && *p
&& ( IsAlphaNum( (unsigned char ) *p, encoding )
|| *p == '_'
@@ -408,9 +424,12 @@ const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncodi
|| *p == '.'
|| *p == ':' ) )
{
- (*name) += *p;
+ //(*name) += *p; // expensive
++p;
}
+ if ( p-start > 0 ) {
+ name->assign( start, p-start );
+ }
return p;
}
return 0;
@@ -506,6 +525,8 @@ const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXml
// So it wasn't an entity, its unrecognized, or something like that.
*value = *p; // Don't put back the last one, since we return it!
+ //*length = 1; // Leave unrecognized entities - this doesn't really work.
+ // Just writes strange XML.
return p+1;
}
@@ -610,12 +631,14 @@ const char* TiXmlBase::ReadText( const char* p,
}
}
}
- return p + strlen( endTag );
+ if ( p && *p )
+ p += strlen( endTag );
+ return ( p && *p ) ? p : 0;
}
#ifdef TIXML_USE_STL
-void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
+void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
{
// The basic issue with a document is that we don't know what we're
// streaming. Read something presumed to be a tag (and hope), then
@@ -802,7 +825,6 @@ TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
return 0;
}
- TiXmlDocument* doc = GetDocument();
p = SkipWhiteSpace( p, encoding );
if ( !p || !*p )
@@ -873,17 +895,12 @@ TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
// Set the parent, so it can report errors
returnNode->parent = this;
}
- else
- {
- if ( doc )
- doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
- }
return returnNode;
}
#ifdef TIXML_USE_STL
-void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
+void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
{
// We're called with some amount of pre-parsing. That is, some of "this"
// element is in "tag". Go ahead and stream to the closing ">"
@@ -918,6 +935,7 @@ void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
{
// There is more. Could be:
// text
+ // cdata text (which looks like another node)
// closing tag
// another node.
for ( ;; )
@@ -965,6 +983,17 @@ void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
*tag += (char) c;
in->get();
+ // Early out if we find the CDATA id.
+ if ( c == '[' && tag->size() >= 9 )
+ {
+ size_t len = tag->size();
+ const char* start = tag->c_str() + len - 9;
+ if ( strcmp( start, "<![CDATA[" ) == 0 ) {
+ assert( !closingTag );
+ break;
+ }
+ }
+
if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
{
firstCharFound = true;
@@ -1048,7 +1077,6 @@ const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEnc
TIXML_STRING endTag ("</");
endTag += value;
- endTag += ">";
// Check for and read attributes. Also look for an empty
// tag or an end tag.
@@ -1079,14 +1107,28 @@ const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEnc
// elements -- read the end tag, and return.
++p;
p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
- if ( !p || !*p )
+ if ( !p || !*p ) {
+ // We were looking for the end tag, but found nothing.
+ // Fix for [ 1663758 ] Failure to report error on bad XML
+ if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
return 0;
+ }
// We should find the end tag now
+ // note that:
+ // </foo > and
+ // </foo>
+ // are both valid end tags.
if ( StringEqual( p, endTag.c_str(), false, encoding ) )
{
p += endTag.length();
- return p;
+ p = SkipWhiteSpace( p, encoding );
+ if ( p && *p && *p == '>' ) {
+ ++p;
+ return p;
+ }
+ if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
+ return 0;
}
else
{
@@ -1100,12 +1142,11 @@ const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEnc
TiXmlAttribute* attrib = new TiXmlAttribute();
if ( !attrib )
{
- if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
return 0;
}
attrib->SetDocument( document );
- const char* pErr = p;
+ pErr = p;
p = attrib->Parse( p, data, encoding );
if ( !p || !*p )
@@ -1116,10 +1157,14 @@ const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEnc
}
// Handle the strange case of double attributes:
+ #ifdef TIXML_USE_STL
+ TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
+ #else
TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
+ #endif
if ( node )
{
- node->SetValue( attrib->Value() );
+ if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
delete attrib;
return 0;
}
@@ -1148,8 +1193,7 @@ const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXm
if ( !textNode )
{
- if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
- return 0;
+ return 0;
}
if ( TiXmlBase::IsWhiteSpaceCondensed() )
@@ -1204,7 +1248,7 @@ const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXm
#ifdef TIXML_USE_STL
-void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
+void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
{
while ( in->good() )
{
@@ -1254,15 +1298,16 @@ const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEnc
if ( !p )
{
- if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
+ if ( document )
+ document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
}
- if ( *p == '>' )
+ if ( p && *p == '>' )
return p+1;
return p;
}
#ifdef TIXML_USE_STL
-void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
+void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
{
while ( in->good() )
{
@@ -1306,11 +1351,40 @@ const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEnc
if ( !StringEqual( p, startTag, false, encoding ) )
{
- document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
+ if ( document )
+ document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
return 0;
}
p += strlen( startTag );
- p = ReadText( p, &value, false, endTag, false, encoding );
+
+ // [ 1475201 ] TinyXML parses entities in comments
+ // Oops - ReadText doesn't work, because we don't want to parse the entities.
+ // p = ReadText( p, &value, false, endTag, false, encoding );
+ //
+ // from the XML spec:
+ /*
+ [Definition: Comments may appear anywhere in a document outside other markup; in addition,
+ they may appear within the document type declaration at places allowed by the grammar.
+ They are not part of the document's character data; an XML processor MAY, but need not,
+ make it possible for an application to retrieve the text of comments. For compatibility,
+ the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
+ references MUST NOT be recognized within comments.
+
+ An example of a comment:
+
+ <!-- declarations for <head> & <body> -->
+ */
+
+ value = "";
+ // Keep all the white space.
+ while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
+ {
+ value.append( p, 1 );
+ ++p;
+ }
+ if ( p && *p )
+ p += strlen( endTag );
+
return p;
}
@@ -1320,10 +1394,6 @@ const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlE
p = SkipWhiteSpace( p, encoding );
if ( !p || !*p ) return 0;
- int tabsize = 4;
- if ( document )
- tabsize = document->TabSize();
-
if ( data )
{
data->Stamp( p, encoding );
@@ -1353,17 +1423,19 @@ const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlE
}
const char* end;
+ const char SINGLE_QUOTE = '\'';
+ const char DOUBLE_QUOTE = '\"';
- if ( *p == '\'' )
+ if ( *p == SINGLE_QUOTE )
{
++p;
- end = "\'";
+ end = "\'"; // single quote in string
p = ReadText( p, &value, false, end, false, encoding );
}
- else if ( *p == '"' )
+ else if ( *p == DOUBLE_QUOTE )
{
++p;
- end = "\"";
+ end = "\""; // double quote in string
p = ReadText( p, &value, false, end, false, encoding );
}
else
@@ -1372,10 +1444,17 @@ const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlE
// But this is such a common error that the parser will try
// its best, even without them.
value = "";
- while ( p && *p // existence
- && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace
- && *p != '/' && *p != '>' ) // tag end
+ while ( p && *p // existence
+ && !IsWhiteSpace( *p ) // whitespace
+ && *p != '/' && *p != '>' ) // tag end
{
+ if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
+ // [ 1451649 ] Attribute values with trailing quotes not handled correctly
+ // We did not have an opening quote but seem to have a
+ // closing one. Give up and throw an error.
+ if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
+ return 0;
+ }
value += *p;
++p;
}
@@ -1384,11 +1463,15 @@ const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlE
}
#ifdef TIXML_USE_STL
-void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
+void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
{
- if ( cdata )
+ while ( in->good() )
{
- int c = in->get();
+ int c = in->peek();
+ if ( !cdata && (c == '<' ) )
+ {
+ return;
+ }
if ( c <= 0 )
{
TiXmlDocument* document = GetDocument();
@@ -1398,33 +1481,15 @@ void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
}
(*tag) += (char) c;
+ in->get(); // "commits" the peek made above
- if ( c == '>'
- && tag->at( tag->length() - 2 ) == ']'
- && tag->at( tag->length() - 3 ) == ']' )
- {
- // All is well.
- return;
- }
- }
- else
- {
- while ( in->good() )
- {
- int c = in->peek();
- if ( c == '<' )
- return;
- if ( c <= 0 )
- {
- TiXmlDocument* document = GetDocument();
- if ( document )
- document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
+ if ( cdata && c == '>' && tag->size() >= 3 ) {
+ size_t len = tag->size();
+ if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
+ // terminator of cdata.
return;
}
-
- (*tag) += (char) c;
- in->get();
- }
+ }
}
}
#endif
@@ -1449,7 +1514,8 @@ const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncodi
if ( !StringEqual( p, startTag, false, encoding ) )
{
- document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
+ if ( document )
+ document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
return 0;
}
p += strlen( startTag );
@@ -1473,14 +1539,14 @@ const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncodi
const char* end = "<";
p = ReadText( p, &value, ignoreWhite, end, false, encoding );
- if ( p )
+ if ( p && *p )
return p-1; // don't truncate the '<'
return 0;
}
}
#ifdef TIXML_USE_STL
-void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
+void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
{
while ( in->good() )
{