diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/expat.h | 15 | ||||
-rw-r--r-- | lib/expat_external.h | 14 | ||||
-rw-r--r-- | lib/internal.h | 22 | ||||
-rw-r--r-- | lib/xmlparse.c | 24 | ||||
-rw-r--r-- | lib/xmlrole.c | 220 | ||||
-rw-r--r-- | lib/xmltok.c | 111 | ||||
-rw-r--r-- | lib/xmltok_impl.c | 216 |
7 files changed, 352 insertions, 270 deletions
diff --git a/lib/expat.h b/lib/expat.h index ec62f140..086e24b3 100644 --- a/lib/expat.h +++ b/lib/expat.h @@ -342,7 +342,7 @@ XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler); /* OBSOLETE -- OBSOLETE -- OBSOLETE - This handler has been superceded by the EntityDeclHandler above. + This handler has been superseded by the EntityDeclHandler above. It is provided here for backward compatibility. This is called for a declaration of an unparsed (NDATA) entity. @@ -973,9 +973,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model); /* Exposing the memory handling functions used in Expat */ XMLPARSEAPI(void *) +XML_ATTR_MALLOC +XML_ATTR_ALLOC_SIZE(2) XML_MemMalloc(XML_Parser parser, size_t size); XMLPARSEAPI(void *) +XML_ATTR_ALLOC_SIZE(3) XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); XMLPARSEAPI(void) @@ -1031,14 +1034,12 @@ XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); -/* Expat follows the GNU/Linux convention of odd number minor version for - beta/development releases and even number minor version for stable - releases. Micro is bumped with each release, and set to 0 with each - change to major or minor version. +/* Expat follows the semantic versioning convention. + See http://semver.org. */ #define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 1 -#define XML_MICRO_VERSION 1 +#define XML_MINOR_VERSION 2 +#define XML_MICRO_VERSION 0 #ifdef __cplusplus } diff --git a/lib/expat_external.h b/lib/expat_external.h index 2c03284e..aa08a2f8 100644 --- a/lib/expat_external.h +++ b/lib/expat_external.h @@ -65,12 +65,26 @@ #endif #endif /* not defined XML_STATIC */ +#if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4) +#define XMLIMPORT __attribute__ ((visibility ("default"))) +#endif /* If we didn't define it above, define it away: */ #ifndef XMLIMPORT #define XMLIMPORT #endif +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) +#define XML_ATTR_MALLOC __attribute__((__malloc__)) +#else +#define XML_ATTR_MALLOC +#endif + +#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +#define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) +#else +#define XML_ATTR_ALLOC_SIZE(x) +#endif #define XMLPARSEAPI(type) XMLIMPORT type XMLCALL diff --git a/lib/internal.h b/lib/internal.h index dd545483..94cb98e1 100644 --- a/lib/internal.h +++ b/lib/internal.h @@ -71,3 +71,25 @@ #define inline #endif #endif + +#ifndef UNUSED_P +# ifdef __GNUC__ +# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__)) +# else +# define UNUSED_P(p) UNUSED_ ## p +# endif +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +void +align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef); + + +#ifdef __cplusplus +} +#endif diff --git a/lib/xmlparse.c b/lib/xmlparse.c index 0655e080..2f4e7258 100644 --- a/lib/xmlparse.c +++ b/lib/xmlparse.c @@ -729,7 +729,7 @@ generate_hash_secret_salt(XML_Parser parser) if (sizeof(unsigned long) == 4) { return entropy * 2147483647; } else { - return entropy * 2305843009213693951; + return entropy * (unsigned long)2305843009213693951; } } @@ -1728,14 +1728,17 @@ XML_GetBuffer(XML_Parser parser, int len) } if (len > bufferLim - bufferEnd) { - int neededSize = len + (int)(bufferEnd - bufferPtr); +#ifdef XML_CONTEXT_BYTES + int keep; +#endif /* defined XML_CONTEXT_BYTES */ + /* Do not invoke signed arithmetic overflow: */ + int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr)); if (neededSize < 0) { errorCode = XML_ERROR_NO_MEMORY; return NULL; } #ifdef XML_CONTEXT_BYTES - int keep = (int)(bufferPtr - buffer); - + keep = (int)(bufferPtr - buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; neededSize += keep; @@ -1760,7 +1763,8 @@ XML_GetBuffer(XML_Parser parser, int len) if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { - bufferSize *= 2; + /* Do not invoke signed arithmetic overflow: */ + bufferSize = (int) (2U * (unsigned) bufferSize); } while (bufferSize < neededSize && bufferSize > 0); if (bufferSize <= 0) { errorCode = XML_ERROR_NO_MEMORY; @@ -1887,7 +1891,7 @@ XML_Index XMLCALL XML_GetCurrentByteIndex(XML_Parser parser) { if (eventPtr) - return parseEndByteIndex - (parseEndPtr - eventPtr); + return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr)); return -1; } @@ -4959,9 +4963,9 @@ internalEntityProcessor(XML_Parser parser, static enum XML_Error PTRCALL errorProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) + const char *UNUSED_P(s), + const char *UNUSED_P(end), + const char **UNUSED_P(nextPtr)) { return errorCode; } @@ -6285,7 +6289,7 @@ poolGrow(STRING_POOL *pool) } if (pool->blocks && pool->start == pool->blocks->s) { BLOCK *temp; - int blockSize = (int)(pool->end - pool->start)*2; + int blockSize = (int)((unsigned)(pool->end - pool->start)*2U); if (blockSize < 0) return XML_FALSE; diff --git a/lib/xmlrole.c b/lib/xmlrole.c index 44772e21..8a68e20f 100644 --- a/lib/xmlrole.c +++ b/lib/xmlrole.c @@ -195,9 +195,9 @@ prolog1(PROLOG_STATE *state, static int PTRCALL prolog2(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -216,9 +216,9 @@ prolog2(PROLOG_STATE *state, static int PTRCALL doctype0(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -264,9 +264,9 @@ doctype1(PROLOG_STATE *state, static int PTRCALL doctype2(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -281,9 +281,9 @@ doctype2(PROLOG_STATE *state, static int PTRCALL doctype3(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -298,9 +298,9 @@ doctype3(PROLOG_STATE *state, static int PTRCALL doctype4(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -318,9 +318,9 @@ doctype4(PROLOG_STATE *state, static int PTRCALL doctype5(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -437,9 +437,9 @@ externalSubset1(PROLOG_STATE *state, static int PTRCALL entity0(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -457,9 +457,9 @@ entity0(PROLOG_STATE *state, static int PTRCALL entity1(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -502,9 +502,9 @@ entity2(PROLOG_STATE *state, static int PTRCALL entity3(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -519,9 +519,9 @@ entity3(PROLOG_STATE *state, static int PTRCALL entity4(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -559,9 +559,9 @@ entity5(PROLOG_STATE *state, static int PTRCALL entity6(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -605,9 +605,9 @@ entity7(PROLOG_STATE *state, static int PTRCALL entity8(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -622,9 +622,9 @@ entity8(PROLOG_STATE *state, static int PTRCALL entity9(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -639,9 +639,9 @@ entity9(PROLOG_STATE *state, static int PTRCALL entity10(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -656,9 +656,9 @@ entity10(PROLOG_STATE *state, static int PTRCALL notation0(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -697,9 +697,9 @@ notation1(PROLOG_STATE *state, static int PTRCALL notation2(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -714,9 +714,9 @@ notation2(PROLOG_STATE *state, static int PTRCALL notation3(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -732,9 +732,9 @@ notation3(PROLOG_STATE *state, static int PTRCALL notation4(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -753,9 +753,9 @@ notation4(PROLOG_STATE *state, static int PTRCALL attlist0(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -771,9 +771,9 @@ attlist0(PROLOG_STATE *state, static int PTRCALL attlist1(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -833,9 +833,9 @@ attlist2(PROLOG_STATE *state, static int PTRCALL attlist3(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -852,9 +852,9 @@ attlist3(PROLOG_STATE *state, static int PTRCALL attlist4(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -872,9 +872,9 @@ attlist4(PROLOG_STATE *state, static int PTRCALL attlist5(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -889,9 +889,9 @@ attlist5(PROLOG_STATE *state, static int PTRCALL attlist6(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -906,9 +906,9 @@ attlist6(PROLOG_STATE *state, static int PTRCALL attlist7(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -967,9 +967,9 @@ attlist8(PROLOG_STATE *state, static int PTRCALL attlist9(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -984,9 +984,9 @@ attlist9(PROLOG_STATE *state, static int PTRCALL element0(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1072,9 +1072,9 @@ element2(PROLOG_STATE *state, static int PTRCALL element3(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1097,9 +1097,9 @@ element3(PROLOG_STATE *state, static int PTRCALL element4(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1115,9 +1115,9 @@ element4(PROLOG_STATE *state, static int PTRCALL element5(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1136,9 +1136,9 @@ element5(PROLOG_STATE *state, static int PTRCALL element6(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1166,9 +1166,9 @@ element6(PROLOG_STATE *state, static int PTRCALL element7(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1240,9 +1240,9 @@ condSect0(PROLOG_STATE *state, static int PTRCALL condSect1(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1258,9 +1258,9 @@ condSect1(PROLOG_STATE *state, static int PTRCALL condSect2(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1277,9 +1277,9 @@ condSect2(PROLOG_STATE *state, static int PTRCALL declClose(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1292,11 +1292,11 @@ declClose(PROLOG_STATE *state, } static int PTRCALL -error(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +error(PROLOG_STATE *UNUSED_P(state), + int UNUSED_P(tok), + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { return XML_ROLE_NONE; } diff --git a/lib/xmltok.c b/lib/xmltok.c index cb98ce1f..daa35654 100644 --- a/lib/xmltok.c +++ b/lib/xmltok.c @@ -46,7 +46,7 @@ #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) #define UCS2_GET_NAMING(pages, hi, lo) \ - (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) + (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F))) /* A 2 byte UTF-8 representation splits the characters 11 bits between the bottom 5 and 6 bits of the bytes. We need 8 bits to index into @@ -56,7 +56,7 @@ (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + ((((byte)[0]) & 3) << 1) \ + ((((byte)[1]) >> 5) & 1)] \ - & (1 << (((byte)[1]) & 0x1F))) + & (1u << (((byte)[1]) & 0x1F))) /* A 3 byte UTF-8 representation splits the characters 16 bits between the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index @@ -69,7 +69,7 @@ << 3) \ + ((((byte)[1]) & 3) << 1) \ + ((((byte)[2]) >> 5) & 1)] \ - & (1 << (((byte)[2]) & 0x1F))) + & (1u << (((byte)[2]) & 0x1F))) #define UTF8_GET_NAMING(pages, p, n) \ ((n) == 2 \ @@ -122,19 +122,19 @@ ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) static int PTRFASTCALL -isNever(const ENCODING *enc, const char *p) +isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p)) { return 0; } static int PTRFASTCALL -utf8_isName2(const ENCODING *enc, const char *p) +utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); } static int PTRFASTCALL -utf8_isName3(const ENCODING *enc, const char *p) +utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); } @@ -142,13 +142,13 @@ utf8_isName3(const ENCODING *enc, const char *p) #define utf8_isName4 isNever static int PTRFASTCALL -utf8_isNmstrt2(const ENCODING *enc, const char *p) +utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); } static int PTRFASTCALL -utf8_isNmstrt3(const ENCODING *enc, const char *p) +utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); } @@ -156,19 +156,19 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p) #define utf8_isNmstrt4 isNever static int PTRFASTCALL -utf8_isInvalid2(const ENCODING *enc, const char *p) +utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_INVALID2((const unsigned char *)p); } static int PTRFASTCALL -utf8_isInvalid3(const ENCODING *enc, const char *p) +utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_INVALID3((const unsigned char *)p); } static int PTRFASTCALL -utf8_isInvalid4(const ENCODING *enc, const char *p) +utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_INVALID4((const unsigned char *)p); } @@ -222,6 +222,17 @@ struct normal_encoding { E ## isInvalid3, \ E ## isInvalid4 +#define NULL_VTABLE \ + /* isName2 */ NULL, \ + /* isName3 */ NULL, \ + /* isName4 */ NULL, \ + /* isNmstrt2 */ NULL, \ + /* isNmstrt3 */ NULL, \ + /* isNmstrt4 */ NULL, \ + /* isInvalid2 */ NULL, \ + /* isInvalid3 */ NULL, \ + /* isInvalid4 */ NULL + static int FASTCALL checkCharRefNumber(int); #include "xmltok_impl.h" @@ -318,8 +329,43 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ UTF8_cval4 = 0xf0 }; +void +align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef) +{ + const char * fromLim = *fromLimRef; + size_t walked = 0; + for (; fromLim > from; fromLim--, walked++) { + const unsigned char prev = (unsigned char)fromLim[-1]; + if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */ + if (walked + 1 >= 4) { + fromLim += 4 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */ + if (walked + 1 >= 3) { + fromLim += 3 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */ + if (walked + 1 >= 2) { + fromLim += 2 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */ + break; + } + } + *fromLimRef = fromLim; +} + static enum XML_Convert_Result PTRCALL -utf8_toUtf8(const ENCODING *enc, +utf8_toUtf8(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, char **toP, const char *toLim) { @@ -329,9 +375,8 @@ utf8_toUtf8(const ENCODING *enc, if (fromLim - *fromP > toLim - *toP) { /* Avoid copying partial characters. */ res = XML_CONVERT_OUTPUT_EXHAUSTED; - for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) - if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) - break; + fromLim = *fromP + (toLim - *toP); + align_limit_to_full_utf8_characters(*fromP, &fromLim); } for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++) *to = *from; @@ -449,7 +494,7 @@ static const struct normal_encoding internal_utf8_encoding = { }; static enum XML_Convert_Result PTRCALL -latin1_toUtf8(const ENCODING *enc, +latin1_toUtf8(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, char **toP, const char *toLim) { @@ -474,7 +519,7 @@ latin1_toUtf8(const ENCODING *enc, } static enum XML_Convert_Result PTRCALL -latin1_toUtf16(const ENCODING *enc, +latin1_toUtf16(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { @@ -495,7 +540,7 @@ static const struct normal_encoding latin1_encoding_ns = { #include "asciitab.h" #include "latin1tab.h" }, - STANDARD_VTABLE(sb_) + STANDARD_VTABLE(sb_) NULL_VTABLE }; #endif @@ -508,11 +553,11 @@ static const struct normal_encoding latin1_encoding = { #undef BT_COLON #include "latin1tab.h" }, - STANDARD_VTABLE(sb_) + STANDARD_VTABLE(sb_) NULL_VTABLE }; static enum XML_Convert_Result PTRCALL -ascii_toUtf8(const ENCODING *enc, +ascii_toUtf8(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, char **toP, const char *toLim) { @@ -533,7 +578,7 @@ static const struct normal_encoding ascii_encoding_ns = { #include "asciitab.h" /* BT_NONXML == 0 */ }, - STANDARD_VTABLE(sb_) + STANDARD_VTABLE(sb_) NULL_VTABLE }; #endif @@ -546,7 +591,7 @@ static const struct normal_encoding ascii_encoding = { #undef BT_COLON /* BT_NONXML == 0 */ }, - STANDARD_VTABLE(sb_) + STANDARD_VTABLE(sb_) NULL_VTABLE }; static int PTRFASTCALL @@ -570,7 +615,7 @@ unicode_byte_type(char hi, char lo) #define DEFINE_UTF16_TO_UTF8(E) \ static enum XML_Convert_Result PTRCALL \ -E ## toUtf8(const ENCODING *enc, \ +E ## toUtf8(const ENCODING *UNUSED_P(enc), \ const char **fromP, const char *fromLim, \ char **toP, const char *toLim) \ { \ @@ -642,7 +687,7 @@ E ## toUtf8(const ENCODING *enc, \ #define DEFINE_UTF16_TO_UTF16(E) \ static enum XML_Convert_Result PTRCALL \ -E ## toUtf16(const ENCODING *enc, \ +E ## toUtf16(const ENCODING *UNUSED_P(enc), \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \ { \ @@ -776,7 +821,7 @@ static const struct normal_encoding little2_encoding_ns = { #include "asciitab.h" #include "latin1tab.h" }, - STANDARD_VTABLE(little2_) + STANDARD_VTABLE(little2_) NULL_VTABLE }; #endif @@ -795,7 +840,7 @@ static const struct normal_encoding little2_encoding = { #undef BT_COLON #include "latin1tab.h" }, - STANDARD_VTABLE(little2_) + STANDARD_VTABLE(little2_) NULL_VTABLE }; #if BYTEORDER != 4321 @@ -808,7 +853,7 @@ static const struct normal_encoding internal_little2_encoding_ns = { #include "iasciitab.h" #include "latin1tab.h" }, - STANDARD_VTABLE(little2_) + STANDARD_VTABLE(little2_) NULL_VTABLE }; #endif @@ -821,7 +866,7 @@ static const struct normal_encoding internal_little2_encoding = { #undef BT_COLON #include "latin1tab.h" }, - STANDARD_VTABLE(little2_) + STANDARD_VTABLE(little2_) NULL_VTABLE }; #endif @@ -917,7 +962,7 @@ static const struct normal_encoding big2_encoding_ns = { #include "asciitab.h" #include "latin1tab.h" }, - STANDARD_VTABLE(big2_) + STANDARD_VTABLE(big2_) NULL_VTABLE }; #endif @@ -936,7 +981,7 @@ static const struct normal_encoding big2_encoding = { #undef BT_COLON #include "latin1tab.h" }, - STANDARD_VTABLE(big2_) + STANDARD_VTABLE(big2_) NULL_VTABLE }; #if BYTEORDER != 1234 @@ -949,7 +994,7 @@ static const struct normal_encoding internal_big2_encoding_ns = { #include "iasciitab.h" #include "latin1tab.h" }, - STANDARD_VTABLE(big2_) + STANDARD_VTABLE(big2_) NULL_VTABLE }; #endif @@ -962,7 +1007,7 @@ static const struct normal_encoding internal_big2_encoding = { #undef BT_COLON #include "latin1tab.h" }, - STANDARD_VTABLE(big2_) + STANDARD_VTABLE(big2_) NULL_VTABLE }; #endif @@ -988,7 +1033,7 @@ streqci(const char *s1, const char *s2) } static void PTRCALL -initUpdatePosition(const ENCODING *enc, const char *ptr, +initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end, POSITION *pos) { normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c index 6c5a3ba4..5f779c05 100644 --- a/lib/xmltok_impl.c +++ b/lib/xmltok_impl.c @@ -87,27 +87,45 @@ #define PREFIX(ident) ident #endif + +#define HAS_CHARS(enc, ptr, end, count) \ + (end - ptr >= count * MINBPC(enc)) + +#define HAS_CHAR(enc, ptr, end) \ + HAS_CHARS(enc, ptr, end, 1) + +#define REQUIRE_CHARS(enc, ptr, end, count) \ + { \ + if (! HAS_CHARS(enc, ptr, end, count)) { \ + return XML_TOK_PARTIAL; \ + } \ + } + +#define REQUIRE_CHAR(enc, ptr, end) \ + REQUIRE_CHARS(enc, ptr, end, 1) + + /* ptr points to character following "<!-" */ static int PTRCALL PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr < end) { + if (HAS_CHAR(enc, ptr, end)) { if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_MINUS: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -131,8 +149,7 @@ static int PTRCALL PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -147,11 +164,10 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { case BT_PERCNT: - if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 2); /* don't allow <!ENTITY% foo "whatever"> */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: @@ -175,7 +191,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, +PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end, int *tokPtr) { int upper = 0; @@ -225,15 +241,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, { int tok; const char *target = ptr; - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: @@ -242,13 +257,12 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, return XML_TOK_INVALID; } ptr += MINBPC(enc); - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_QUEST: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; @@ -266,8 +280,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, return XML_TOK_INVALID; } ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; @@ -282,15 +295,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, +PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end, const char **nextTokPtr) { static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; int i; /* CDATA[ */ - if (end - ptr < 6 * MINBPC(enc)) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 6); for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { *nextTokPtr = ptr; @@ -319,13 +331,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, switch (BYTE_TYPE(enc, ptr)) { case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; @@ -334,8 +344,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, return XML_TOK_CDATA_SECT_CLOSE; case BT_CR: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; @@ -348,7 +357,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, ptr += MINBPC(enc); break; } - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -383,19 +392,18 @@ static int PTRCALL PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: - for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: break; @@ -432,7 +440,7 @@ static int PTRCALL PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr < end) { + if (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -441,7 +449,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -464,7 +472,7 @@ static int PTRCALL PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr < end) { + if (HAS_CHAR(enc, ptr, end)) { if (CHAR_MATCHES(enc, ptr, ASCII_x)) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); switch (BYTE_TYPE(enc, ptr)) { @@ -474,7 +482,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: break; @@ -496,8 +504,7 @@ static int PTRCALL PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_NUM: @@ -506,7 +513,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: @@ -529,7 +536,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS int hadColon = 0; #endif - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS @@ -540,8 +547,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, } hadColon = 1; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: @@ -555,8 +561,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, int t; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); t = BYTE_TYPE(enc, ptr); if (t == BT_EQUALS) break; @@ -579,8 +584,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, #endif for (;;) { ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); open = BYTE_TYPE(enc, ptr); if (open == BT_QUOT || open == BT_APOS) break; @@ -598,8 +602,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, /* in attribute value */ for (;;) { int t; - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); t = BYTE_TYPE(enc, ptr); if (t == open) break; @@ -624,8 +627,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, } } ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: @@ -642,8 +644,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to closing quote */ for (;;) { ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: @@ -655,8 +656,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, case BT_SOL: sol: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -688,13 +688,12 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS int hadColon; #endif - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_EXCL: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -716,7 +715,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, hadColon = 0; #endif /* we have a start-tag */ - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS @@ -727,8 +726,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, } hadColon = 1; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: @@ -740,7 +738,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_CR: case BT_LF: { ptr += MINBPC(enc); - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_GT: @@ -765,8 +763,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_SOL: sol: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -803,7 +800,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_CR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -814,12 +811,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_DATA_NEWLINE; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); @@ -832,7 +829,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); break; } - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -845,12 +842,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_RSQB: - if (ptr + MINBPC(enc) != end) { + if (HAS_CHARS(enc, ptr, end, 2)) { if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { ptr += MINBPC(enc); break; } - if (ptr + 2*MINBPC(enc) != end) { + if (HAS_CHARS(enc, ptr, end, 3)) { if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { ptr += MINBPC(enc); break; @@ -884,8 +881,7 @@ static int PTRCALL PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: @@ -895,7 +891,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: @@ -913,15 +909,14 @@ static int PTRCALL PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_CR: case BT_LF: case BT_S: @@ -941,7 +936,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { int t = BYTE_TYPE(enc, ptr); switch (t) { INVALID_CASES(ptr, nextTokPtr) @@ -950,7 +945,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc, ptr += MINBPC(enc); if (t != open) break; - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { @@ -992,8 +987,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_LT: { ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_EXCL: return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1021,7 +1015,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_LF: for (;;) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) break; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_LF: @@ -1048,11 +1042,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_BRACKET; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_BRACKET; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 2); if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { *nextTokPtr = ptr + 2*MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; @@ -1065,7 +1058,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_PAREN; case BT_RPAR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: @@ -1141,7 +1134,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_GT: case BT_RPAR: case BT_COMMA: @@ -1154,8 +1147,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) @@ -1206,8 +1198,10 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *start; if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) + return XML_TOK_PARTIAL; start = ptr; - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; @@ -1232,7 +1226,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1264,8 +1258,10 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *start; if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) + return XML_TOK_PARTIAL; start = ptr; - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; @@ -1294,7 +1290,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1326,15 +1322,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, end = ptr + n; } } - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_LT: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { ++level; ptr += MINBPC(enc); @@ -1342,11 +1338,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, } break; case BT_RSQB: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr += MINBPC(enc); if (level == 0) { @@ -1373,7 +1369,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, { ptr += MINBPC(enc); end -= MINBPC(enc); - for (; ptr < end; ptr += MINBPC(enc)) { + for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -1521,7 +1517,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, } static int PTRFASTCALL -PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) +PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr) { int result = 0; /* skip &# */ @@ -1565,7 +1561,7 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) } static int PTRCALL -PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, +PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end) { switch ((end - ptr)/MINBPC(enc)) { @@ -1683,11 +1679,11 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) } static int PTRCALL -PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, +PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { - if (ptr1 == end1) + if (end1 - ptr1 < MINBPC(enc)) return 0; if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; @@ -1744,7 +1740,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *end, POSITION *pos) { - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -1760,7 +1756,7 @@ PREFIX(updatePosition)(const ENCODING *enc, case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); - if (ptr < end && BYTE_TYPE(enc, ptr) == BT_LF) + if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); pos->columnNumber = (XML_Size)-1; break; |