diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | HTMLparser.c | 442 | ||||
-rw-r--r-- | INSTALL.libxml2 | 58 | ||||
-rw-r--r-- | METADATA | 8 | ||||
-rw-r--r-- | entities.c | 16 | ||||
-rw-r--r-- | fuzz/Makefile.am | 4 | ||||
-rw-r--r-- | fuzz/html.options | 2 | ||||
-rw-r--r-- | fuzz/uri.options | 2 | ||||
-rw-r--r-- | include/libxml/c14n.h | 10 | ||||
-rw-r--r-- | parser.c | 16 | ||||
-rw-r--r-- | tree.c | 31 | ||||
-rw-r--r-- | xinclude.c | 5 | ||||
-rw-r--r-- | xmllint.c | 3 | ||||
-rw-r--r-- | xmlschemastypes.c | 41 | ||||
-rw-r--r-- | xpath.c | 41 |
15 files changed, 443 insertions, 238 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index d6ee9ec7..a437717b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -496,7 +496,7 @@ set_target_properties( VERSION ${PROJECT_VERSION} ) -if(WIN32) +if(MSVC) if(BUILD_SHARED_LIBS) set_target_properties( LibXml2 diff --git a/HTMLparser.c b/HTMLparser.c index c9a64c78..b56363a3 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -1072,102 +1072,266 @@ html40ElementTable[] = { } }; +typedef struct { + const char *oldTag; + const char *newTag; +} htmlStartCloseEntry; + /* * start tags that imply the end of current element */ -static const char * const htmlStartClose[] = { -"form", "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6", - "dl", "ul", "ol", "menu", "dir", "address", "pre", - "listing", "xmp", "head", NULL, -"head", "p", NULL, -"title", "p", NULL, -"body", "head", "style", "link", "title", "p", NULL, -"frameset", "head", "style", "link", "title", "p", NULL, -"li", "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address", - "pre", "listing", "xmp", "head", "li", NULL, -"hr", "p", "head", NULL, -"h1", "p", "head", NULL, -"h2", "p", "head", NULL, -"h3", "p", "head", NULL, -"h4", "p", "head", NULL, -"h5", "p", "head", NULL, -"h6", "p", "head", NULL, -"dir", "p", "head", NULL, -"address", "p", "head", "ul", NULL, -"pre", "p", "head", "ul", NULL, -"listing", "p", "head", NULL, -"xmp", "p", "head", NULL, -"blockquote", "p", "head", NULL, -"dl", "p", "dt", "menu", "dir", "address", "pre", "listing", - "xmp", "head", NULL, -"dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", - "head", "dd", NULL, -"dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", - "head", "dt", NULL, -"ul", "p", "head", "ol", "menu", "dir", "address", "pre", - "listing", "xmp", NULL, -"ol", "p", "head", "ul", NULL, -"menu", "p", "head", "ul", NULL, -"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL, -"div", "p", "head", NULL, -"noscript", "script", NULL, -"center", "font", "b", "i", "p", "head", NULL, -"a", "a", "head", NULL, -"caption", "p", NULL, -"colgroup", "caption", "colgroup", "col", "p", NULL, -"col", "caption", "col", "p", NULL, -"table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", - "listing", "xmp", "a", NULL, -"th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, -"td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, -"tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, -"thead", "caption", "col", "colgroup", NULL, -"tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", - "tbody", "p", NULL, -"tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", - "tfoot", "tbody", "p", NULL, -"optgroup", "option", NULL, -"option", "option", NULL, -"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", - "pre", "listing", "xmp", "a", NULL, -/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */ -"tt", "head", NULL, -"i", "head", NULL, -"b", "head", NULL, -"u", "head", NULL, -"s", "head", NULL, -"strike", "head", NULL, -"big", "head", NULL, -"small", "head", NULL, - -"em", "head", NULL, -"strong", "head", NULL, -"dfn", "head", NULL, -"code", "head", NULL, -"samp", "head", NULL, -"kbd", "head", NULL, -"var", "head", NULL, -"cite", "head", NULL, -"abbr", "head", NULL, -"acronym", "head", NULL, - -/* "a" */ -"img", "head", NULL, -/* "applet" */ -/* "embed" */ -/* "object" */ -"font", "head", NULL, -/* "basefont" */ -"br", "head", NULL, -/* "script" */ -"map", "head", NULL, -"q", "head", NULL, -"sub", "head", NULL, -"sup", "head", NULL, -"span", "head", NULL, -"bdo", "head", NULL, -"iframe", "head", NULL, -NULL +static const htmlStartCloseEntry htmlStartClose[] = { + { "a", "a" }, + { "a", "fieldset" }, + { "a", "table" }, + { "a", "td" }, + { "a", "th" }, + { "address", "dd" }, + { "address", "dl" }, + { "address", "dt" }, + { "address", "form" }, + { "address", "li" }, + { "address", "ul" }, + { "b", "center" }, + { "b", "p" }, + { "b", "td" }, + { "b", "th" }, + { "big", "p" }, + { "caption", "col" }, + { "caption", "colgroup" }, + { "caption", "tbody" }, + { "caption", "tfoot" }, + { "caption", "thead" }, + { "caption", "tr" }, + { "col", "col" }, + { "col", "colgroup" }, + { "col", "tbody" }, + { "col", "tfoot" }, + { "col", "thead" }, + { "col", "tr" }, + { "colgroup", "colgroup" }, + { "colgroup", "tbody" }, + { "colgroup", "tfoot" }, + { "colgroup", "thead" }, + { "colgroup", "tr" }, + { "dd", "dt" }, + { "dir", "dd" }, + { "dir", "dl" }, + { "dir", "dt" }, + { "dir", "form" }, + { "dir", "ul" }, + { "dl", "form" }, + { "dl", "li" }, + { "dt", "dd" }, + { "dt", "dl" }, + { "font", "center" }, + { "font", "td" }, + { "font", "th" }, + { "form", "form" }, + { "h1", "fieldset" }, + { "h1", "form" }, + { "h1", "li" }, + { "h1", "p" }, + { "h1", "table" }, + { "h2", "fieldset" }, + { "h2", "form" }, + { "h2", "li" }, + { "h2", "p" }, + { "h2", "table" }, + { "h3", "fieldset" }, + { "h3", "form" }, + { "h3", "li" }, + { "h3", "p" }, + { "h3", "table" }, + { "h4", "fieldset" }, + { "h4", "form" }, + { "h4", "li" }, + { "h4", "p" }, + { "h4", "table" }, + { "h5", "fieldset" }, + { "h5", "form" }, + { "h5", "li" }, + { "h5", "p" }, + { "h5", "table" }, + { "h6", "fieldset" }, + { "h6", "form" }, + { "h6", "li" }, + { "h6", "p" }, + { "h6", "table" }, + { "head", "a" }, + { "head", "abbr" }, + { "head", "acronym" }, + { "head", "address" }, + { "head", "b" }, + { "head", "bdo" }, + { "head", "big" }, + { "head", "blockquote" }, + { "head", "body" }, + { "head", "br" }, + { "head", "center" }, + { "head", "cite" }, + { "head", "code" }, + { "head", "dd" }, + { "head", "dfn" }, + { "head", "dir" }, + { "head", "div" }, + { "head", "dl" }, + { "head", "dt" }, + { "head", "em" }, + { "head", "fieldset" }, + { "head", "font" }, + { "head", "form" }, + { "head", "frameset" }, + { "head", "h1" }, + { "head", "h2" }, + { "head", "h3" }, + { "head", "h4" }, + { "head", "h5" }, + { "head", "h6" }, + { "head", "hr" }, + { "head", "i" }, + { "head", "iframe" }, + { "head", "img" }, + { "head", "kbd" }, + { "head", "li" }, + { "head", "listing" }, + { "head", "map" }, + { "head", "menu" }, + { "head", "ol" }, + { "head", "p" }, + { "head", "pre" }, + { "head", "q" }, + { "head", "s" }, + { "head", "samp" }, + { "head", "small" }, + { "head", "span" }, + { "head", "strike" }, + { "head", "strong" }, + { "head", "sub" }, + { "head", "sup" }, + { "head", "table" }, + { "head", "tt" }, + { "head", "u" }, + { "head", "ul" }, + { "head", "var" }, + { "head", "xmp" }, + { "hr", "form" }, + { "i", "center" }, + { "i", "p" }, + { "i", "td" }, + { "i", "th" }, + { "legend", "fieldset" }, + { "li", "li" }, + { "link", "body" }, + { "link", "frameset" }, + { "listing", "dd" }, + { "listing", "dl" }, + { "listing", "dt" }, + { "listing", "fieldset" }, + { "listing", "form" }, + { "listing", "li" }, + { "listing", "table" }, + { "listing", "ul" }, + { "menu", "dd" }, + { "menu", "dl" }, + { "menu", "dt" }, + { "menu", "form" }, + { "menu", "ul" }, + { "ol", "form" }, + { "ol", "ul" }, + { "option", "optgroup" }, + { "option", "option" }, + { "p", "address" }, + { "p", "blockquote" }, + { "p", "body" }, + { "p", "caption" }, + { "p", "center" }, + { "p", "col" }, + { "p", "colgroup" }, + { "p", "dd" }, + { "p", "dir" }, + { "p", "div" }, + { "p", "dl" }, + { "p", "dt" }, + { "p", "fieldset" }, + { "p", "form" }, + { "p", "frameset" }, + { "p", "h1" }, + { "p", "h2" }, + { "p", "h3" }, + { "p", "h4" }, + { "p", "h5" }, + { "p", "h6" }, + { "p", "head" }, + { "p", "hr" }, + { "p", "li" }, + { "p", "listing" }, + { "p", "menu" }, + { "p", "ol" }, + { "p", "p" }, + { "p", "pre" }, + { "p", "table" }, + { "p", "tbody" }, + { "p", "td" }, + { "p", "tfoot" }, + { "p", "th" }, + { "p", "title" }, + { "p", "tr" }, + { "p", "ul" }, + { "p", "xmp" }, + { "pre", "dd" }, + { "pre", "dl" }, + { "pre", "dt" }, + { "pre", "fieldset" }, + { "pre", "form" }, + { "pre", "li" }, + { "pre", "table" }, + { "pre", "ul" }, + { "s", "p" }, + { "script", "noscript" }, + { "small", "p" }, + { "span", "td" }, + { "span", "th" }, + { "strike", "p" }, + { "style", "body" }, + { "style", "frameset" }, + { "tbody", "tbody" }, + { "tbody", "tfoot" }, + { "td", "tbody" }, + { "td", "td" }, + { "td", "tfoot" }, + { "td", "th" }, + { "td", "tr" }, + { "tfoot", "tbody" }, + { "th", "tbody" }, + { "th", "td" }, + { "th", "tfoot" }, + { "th", "th" }, + { "th", "tr" }, + { "thead", "tbody" }, + { "thead", "tfoot" }, + { "title", "body" }, + { "title", "frameset" }, + { "tr", "tbody" }, + { "tr", "tfoot" }, + { "tr", "tr" }, + { "tt", "p" }, + { "u", "p" }, + { "u", "td" }, + { "u", "th" }, + { "ul", "address" }, + { "ul", "form" }, + { "ul", "menu" }, + { "ul", "ol" }, + { "ul", "pre" }, + { "xmp", "dd" }, + { "xmp", "dl" }, + { "xmp", "dt" }, + { "xmp", "fieldset" }, + { "xmp", "form" }, + { "xmp", "li" }, + { "xmp", "table" }, + { "xmp", "ul" } }; /* @@ -1237,9 +1401,6 @@ static const elementPriority htmlEndPriority[] = { {NULL, 100} /* Default priority */ }; -static const char** htmlStartCloseIndex[100]; -static int htmlStartCloseIndexinitialized = 0; - /************************************************************************ * * * functions to handle HTML specific data * @@ -1249,24 +1410,18 @@ static int htmlStartCloseIndexinitialized = 0; /** * htmlInitAutoClose: * - * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. - * This is not reentrant. Call xmlInitParser() once before processing in - * case of use in multithreaded programs. + * This is a no-op now. */ void htmlInitAutoClose(void) { - int indx, i = 0; +} - if (htmlStartCloseIndexinitialized) return; +static int +htmlCompareTags(const void *key, const void *member) { + const xmlChar *tag = (const xmlChar *) key; + const htmlElemDesc *desc = (const htmlElemDesc *) member; - for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL; - indx = 0; - while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) { - htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i]; - while (htmlStartClose[i] != NULL) i++; - i++; - } - htmlStartCloseIndexinitialized = 1; + return(xmlStrcasecmp(tag, BAD_CAST desc->name)); } /** @@ -1279,14 +1434,12 @@ htmlInitAutoClose(void) { */ const htmlElemDesc * htmlTagLookup(const xmlChar *tag) { - unsigned int i; + if (tag == NULL) + return(NULL); - for (i = 0; i < (sizeof(html40ElementTable) / - sizeof(html40ElementTable[0]));i++) { - if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name)) - return((htmlElemDescPtr) &html40ElementTable[i]); - } - return(NULL); + return((const htmlElemDesc *) bsearch(tag, html40ElementTable, + sizeof(html40ElementTable) / sizeof(htmlElemDesc), + sizeof(htmlElemDesc), htmlCompareTags)); } /** @@ -1307,6 +1460,19 @@ htmlGetEndPriority (const xmlChar *name) { } +static int +htmlCompareStartClose(const void *vkey, const void *member) { + const htmlStartCloseEntry *key = (const htmlStartCloseEntry *) vkey; + const htmlStartCloseEntry *entry = (const htmlStartCloseEntry *) member; + int ret; + + ret = strcmp(key->oldTag, entry->oldTag); + if (ret == 0) + ret = strcmp(key->newTag, entry->newTag); + + return(ret); +} + /** * htmlCheckAutoClose: * @newtag: The new tag name @@ -1314,37 +1480,21 @@ htmlGetEndPriority (const xmlChar *name) { * * Checks whether the new tag is one of the registered valid tags for * closing old. - * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. * * Returns 0 if no, 1 if yes. */ static int htmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag) { - int i, indx; - const char **closed = NULL; - - if (htmlStartCloseIndexinitialized == 0) - htmlInitAutoClose(); - - /* inefficient, but not a big deal */ - for (indx = 0; indx < 100; indx++) { - closed = htmlStartCloseIndex[indx]; - if (closed == NULL) - return (0); - if (xmlStrEqual(BAD_CAST * closed, newtag)) - break; - } - - i = closed - htmlStartClose; - i++; - while (htmlStartClose[i] != NULL) { - if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) { - return (1); - } - i++; - } - return (0); + htmlStartCloseEntry key; + void *res; + + key.oldTag = (const char *) oldtag; + key.newTag = (const char *) newtag; + res = bsearch(&key, htmlStartClose, + sizeof(htmlStartClose) / sizeof(htmlStartCloseEntry), + sizeof(htmlStartCloseEntry), htmlCompareStartClose); + return(res != NULL); } /** @@ -4057,12 +4207,10 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt) * With the exception that the autoclose may have popped stuff out * of the stack. */ - if (!xmlStrEqual(name, ctxt->name)) { - if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) { - htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, - "Opening and ending tag mismatch: %s and %s\n", - name, ctxt->name); - } + if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) { + htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, + "Opening and ending tag mismatch: %s and %s\n", + name, ctxt->name); } /* diff --git a/INSTALL.libxml2 b/INSTALL.libxml2 index 383fb935..ac9211d6 100644 --- a/INSTALL.libxml2 +++ b/INSTALL.libxml2 @@ -1,27 +1,21 @@ -Extracted from the documentation: - http://xmlsoft.org/FAQ.html#Compilatio - See also the generic INSTALL file for configure options Compilation - 1.What is the process to compile libxml ? + 1. What is the process to compile libxml? As most UNIX libraries libxml follows the "standard": - gunzip -c xxx.tar.gz | tar xvf - - - cd libxml-xxxx + gunzip -c xxx.tar.gz | tar xvf - + cd libxml-xxxx - ./configure --help + ./configure --help to see the options, then the compilation/installation proper - ./configure [possible options] - - make - - make install + ./configure [possible options] + make + make install At that point you may have to rerun ldconfig or similar utility to update your list of installed shared libs. @@ -29,33 +23,29 @@ Compilation At this point you can check that the library is properly functioning by running - make tests + make check + + Please report test failures to the mailing list or bug tracker. - 2.What other libraries are needed to compile/install libxml ? + 2. What other libraries are needed to compile/install libxml? - Libxml does not requires any other library, the normal C ANSI API - should be sufficient (please report any violation to this rule you - may find). + Libxml does not require any other libraries. A platform with somewhat + recent POSIX support should be sufficient (please report any violation + to this rule you may find). - However if found at configuration time libxml will detect and use + However if found at configuration time, libxml will detect and use the following libs: - libz: a highly portable and available widely compression library - http://www.info-zip.org/pub/infozip/zlib/ + libz: a highly portable and widely available compression library + https://zlib.net/ + liblzma: another compression library + https://tukaani.org/xz/ iconv: a powerful character encoding conversion library. It's - included by default on recent glibc libraries, so it doesn't - need to be installed specifically on linux. It seems it's - now part of the official UNIX specification. Here is one - implementation of the library which source can be found here. - http://clisp.cons.org/~haible/packages-libiconv.html - ftp://ftp.ilog.fr/pub/Users/haible/gnu/ - - 3.make tests may fail on some platforms - - Sometime the regression tests results don't completely match the - value produced by the parser, and the makefile uses diff to print - the delta. On some platforms the diff return breaks the compilation - process, if the diff is small this is probably not a serious problem + part of POSIX.1-2001, so it doesn't need to be installed + on modern UNIX-like systems, specifically on Linux. + https://www.gnu.org/software/libiconv/ + ICU: Mainly used by Chromium on Windows. Unnecessary on most + systems. Daniel veillard@redhat.com @@ -10,13 +10,13 @@ third_party { } url { type: ARCHIVE - value: "https://github.com/GNOME/libxml2/archive/ce2fbaa89da627a6c6cd6344d5339a76feba94b5.zip" + value: "https://github.com/GNOME/libxml2/archive/7279d236364739a05657a8a614c15990eb08d0c6.zip" } - version: "ce2fbaa89da627a6c6cd6344d5339a76feba94b5" + version: "7279d236364739a05657a8a614c15990eb08d0c6" license_type: BY_EXCEPTION_ONLY last_upgrade_date { year: 2021 - month: 2 - day: 23 + month: 5 + day: 6 } } @@ -704,11 +704,25 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) { } else { /* * We assume we have UTF-8 input. + * It must match either: + * 110xxxxx 10xxxxxx + * 1110xxxx 10xxxxxx 10xxxxxx + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * That is: + * cur[0] is 11xxxxxx + * cur[1] is 10xxxxxx + * cur[2] is 10xxxxxx if cur[0] is 111xxxxx + * cur[3] is 10xxxxxx if cur[0] is 1111xxxx + * cur[0] is not 11111xxx */ char buf[11], *ptr; int val = 0, l = 1; - if (*cur < 0xC0) { + if (((cur[0] & 0xC0) != 0xC0) || + ((cur[1] & 0xC0) != 0x80) || + (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) || + (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) || + (((cur[0] & 0xF8) == 0xF8))) { xmlEntitiesErr(XML_CHECK_NOT_UTF8, "xmlEncodeEntities: input not UTF-8"); if (doc != NULL) diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am index 2bbdbb1f..7d383470 100644 --- a/fuzz/Makefile.am +++ b/fuzz/Makefile.am @@ -74,7 +74,7 @@ fuzz-html: html$(EXEEXT) seed/html.stamp ./html$(EXEEXT) \ -dict=html.dict \ -max_len=1000000 \ - -timeout=20 \ + -timeout=10 \ corpus/html seed/html # Regexp fuzzer @@ -99,7 +99,7 @@ fuzz-uri: uri$(EXEEXT) @mkdir -p corpus/uri ./uri$(EXEEXT) \ -max_len=10000 \ - -timeout=5 \ + -timeout=2 \ corpus/uri $(srcdir)/seed/uri # XML Schema fuzzer diff --git a/fuzz/html.options b/fuzz/html.options index e5d3bbee..1c63f53d 100644 --- a/fuzz/html.options +++ b/fuzz/html.options @@ -1,2 +1,2 @@ [libfuzzer] -timeout = 20 +timeout = 10 diff --git a/fuzz/uri.options b/fuzz/uri.options index ea2a7a23..0676c659 100644 --- a/fuzz/uri.options +++ b/fuzz/uri.options @@ -1,2 +1,2 @@ [libfuzzer] -timeout = 5 +timeout = 2 diff --git a/include/libxml/c14n.h b/include/libxml/c14n.h index d74847df..af93de63 100644 --- a/include/libxml/c14n.h +++ b/include/libxml/c14n.h @@ -16,17 +16,19 @@ */ #ifndef __XML_C14N_H__ #define __XML_C14N_H__ + +#include <libxml/xmlversion.h> + #ifdef LIBXML_C14N_ENABLED #ifdef LIBXML_OUTPUT_ENABLED +#include <libxml/tree.h> +#include <libxml/xpath.h> + #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ -#include <libxml/xmlversion.h> -#include <libxml/tree.h> -#include <libxml/xpath.h> - /* * XML Canonicalization * http://www.w3.org/TR/xml-c14n @@ -2684,8 +2684,10 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); ctxt->depth--; - if (rep == NULL) + if (rep == NULL) { + ent->content[0] = 0; goto int_error; + } current = rep; while (*current != 0) { /* non input consuming loop */ @@ -2740,8 +2742,11 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); ctxt->depth--; - if (rep == NULL) + if (rep == NULL) { + if (ent->content != NULL) + ent->content[0] = 0; goto int_error; + } current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; @@ -6203,6 +6208,8 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, SKIP_BLANKS; cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, depth + 1); + if (cur == NULL) + return(NULL); SKIP_BLANKS; GROW; } else { @@ -6336,6 +6343,11 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, SKIP_BLANKS; last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, depth + 1); + if (last == NULL) { + if (ret != NULL) + xmlFreeDocElementContent(ctxt->myDoc, ret); + return(NULL); + } SKIP_BLANKS; } else { elem = xmlParseName(ctxt); @@ -1901,12 +1901,6 @@ xmlNewPropInternal(xmlNodePtr node, xmlNsPtr ns, if (value != NULL) { xmlNodePtr tmp; - if(!xmlCheckUTF8(value)) { - xmlTreeErr(XML_TREE_NOT_UTF8, (xmlNodePtr) doc, - NULL); - if (doc != NULL) - doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); - } cur->children = xmlNewDocText(doc, value); cur->last = NULL; tmp = cur->children; @@ -2026,6 +2020,11 @@ xmlNewNsPropEatName(xmlNodePtr node, xmlNsPtr ns, xmlChar *name, * @value: the value of the attribute * * Create a new property carried by a document. + * NOTE: @value is supposed to be a piece of XML CDATA, so it allows entity + * references, but XML special chars need to be escaped first by using + * xmlEncodeEntitiesReentrant(). Use xmlNewProp() if you don't need + * entities support. + * * Returns a pointer to the attribute */ xmlAttrPtr @@ -4894,7 +4893,9 @@ xmlGetNodePath(const xmlNode *node) } next = ((xmlAttrPtr) cur)->parent; } else { - next = cur->parent; + xmlFree(buf); + xmlFree(buffer); + return (NULL); } /* @@ -6589,6 +6590,16 @@ xmlGetPropNodeInternal(const xmlNode *node, const xmlChar *name, attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, elemQName, name, NULL); } + } else if (xmlStrEqual(nsName, XML_XML_NAMESPACE)) { + /* + * The XML namespace must be bound to prefix 'xml'. + */ + attrDecl = xmlGetDtdQAttrDesc(doc->intSubset, + elemQName, name, BAD_CAST "xml"); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) { + attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, + elemQName, name, BAD_CAST "xml"); + } } else { xmlNsPtr *nsList, *cur; @@ -6935,12 +6946,6 @@ xmlSetNsProp(xmlNodePtr node, xmlNsPtr ns, const xmlChar *name, if (value != NULL) { xmlNodePtr tmp; - if(!xmlCheckUTF8(value)) { - xmlTreeErr(XML_TREE_NOT_UTF8, (xmlNodePtr) node->doc, - NULL); - if (node->doc != NULL) - node->doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); - } prop->children = xmlNewDocText(node->doc, value); prop->last = NULL; tmp = prop->children; @@ -2430,9 +2430,8 @@ xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree, ctxt->incTotal++; xmlXIncludePreProcessNode(ctxt, cur); } else if ((cur->children != NULL) && - (cur->children->type != XML_ENTITY_DECL) && - (cur->children->type != XML_XINCLUDE_START) && - (cur->children->type != XML_XINCLUDE_END)) { + ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_ELEMENT_NODE))) { cur = cur->children; continue; } @@ -2213,7 +2213,7 @@ static void parseAndPrintFile(char *filename, xmlParserCtxtPtr rectxt) { if (res > 0) { ctxt = htmlCreatePushParserCtxt(NULL, NULL, chars, res, filename, XML_CHAR_ENCODING_NONE); - xmlCtxtUseOptions(ctxt, options); + htmlCtxtUseOptions(ctxt, options); while ((res = fread(chars, 1, pushsize, f)) > 0) { htmlParseChunk(ctxt, chars, res, 0); } @@ -2426,6 +2426,7 @@ static void parseAndPrintFile(char *filename, xmlParserCtxtPtr rectxt) { dtd = xmlGetIntSubset(doc); if (dtd != NULL) { xmlUnlinkNode((xmlNodePtr)dtd); + doc->intSubset = NULL; xmlFreeDtd(dtd); } } diff --git a/xmlschemastypes.c b/xmlschemastypes.c index 07b5fd76..9c2dff06 100644 --- a/xmlschemastypes.c +++ b/xmlschemastypes.c @@ -2187,6 +2187,44 @@ xmlSchemaParseUInt(const xmlChar **str, unsigned long *llo, return(ret); } +/* + * xmlSchemaCheckLanguageType + * @value: the value to check + * + * Check that a value conforms to the lexical space of the language datatype. + * Must conform to [a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})* + * + * Returns 1 if this validates, 0 otherwise. + */ +static int +xmlSchemaCheckLanguageType(const xmlChar* value) { + int first = 1, len = 0; + const xmlChar* cur = value; + + if (value == NULL) + return (0); + + while (cur[0] != 0) { + if (!( ((cur[0] >= 'a') && (cur[0] <= 'z')) || ((cur[0] >= 'A') && (cur[0] <= 'Z')) + || (cur[0] == '-') + || ((first == 0) && (xmlIsDigit_ch(cur[0]))) )) + return (0); + if (cur[0] == '-') { + if ((len < 1) || (len > 8)) + return (0); + len = 0; + first = 0; + } + else + len++; + cur++; + } + if ((len < 1) || (len > 8)) + return (0); + + return (1); +} + /** * xmlSchemaValAtomicType: * @type: the predefined type @@ -2704,7 +2742,8 @@ xmlSchemaValAtomicType(xmlSchemaTypePtr type, const xmlChar * value, if (norm != NULL) value = norm; } - if (xmlCheckLanguageID(value) == 1) { + + if (xmlSchemaCheckLanguageType(value) == 1) { if (val != NULL) { v = xmlSchemaNewValue(XML_SCHEMAS_LANGUAGE); if (v != NULL) { @@ -488,14 +488,6 @@ int wrap_cmp( xmlNodePtr x, xmlNodePtr y ); * * ************************************************************************/ -#ifndef INFINITY -#define INFINITY (DBL_MAX * DBL_MAX) -#endif - -#ifndef NAN -#define NAN (INFINITY / INFINITY) -#endif - double xmlXPathNAN; double xmlXPathPINF; double xmlXPathNINF; @@ -505,11 +497,14 @@ double xmlXPathNINF; * * Initialize the XPath environment */ +ATTRIBUTE_NO_SANITIZE("float-divide-by-zero") void xmlXPathInit(void) { - xmlXPathNAN = NAN; - xmlXPathPINF = INFINITY; - xmlXPathNINF = -INFINITY; + /* MSVC doesn't allow division by zero in constant expressions. */ + double zero = 0.0; + xmlXPathNAN = 0.0 / zero; + xmlXPathPINF = 1.0 / zero; + xmlXPathNINF = -xmlXPathPINF; } /** @@ -538,9 +533,9 @@ xmlXPathIsInf(double val) { #ifdef isinf return isinf(val) ? (val > 0 ? 1 : -1) : 0; #else - if (val >= INFINITY) + if (val >= xmlXPathPINF) return 1; - if (val <= -INFINITY) + if (val <= -xmlXPathPINF) return -1; return 0; #endif @@ -5873,10 +5868,10 @@ xmlXPathCastNodeToNumber (xmlNodePtr node) { double ret; if (node == NULL) - return(NAN); + return(xmlXPathNAN); strval = xmlXPathCastNodeToString(node); if (strval == NULL) - return(NAN); + return(xmlXPathNAN); ret = xmlXPathCastStringToNumber(strval); xmlFree(strval); @@ -5897,7 +5892,7 @@ xmlXPathCastNodeSetToNumber (xmlNodeSetPtr ns) { double ret; if (ns == NULL) - return(NAN); + return(xmlXPathNAN); str = xmlXPathCastNodeSetToString(ns); ret = xmlXPathCastStringToNumber(str); xmlFree(str); @@ -5917,13 +5912,13 @@ xmlXPathCastToNumber(xmlXPathObjectPtr val) { double ret = 0.0; if (val == NULL) - return(NAN); + return(xmlXPathNAN); switch (val->type) { case XPATH_UNDEFINED: #ifdef DEBUG_EXPR xmlGenericError(xmlGenericErrorContext, "NUMBER: undefined\n"); #endif - ret = NAN; + ret = xmlXPathNAN; break; case XPATH_NODESET: case XPATH_XSLT_TREE: @@ -5943,7 +5938,7 @@ xmlXPathCastToNumber(xmlXPathObjectPtr val) { case XPATH_RANGE: case XPATH_LOCATIONSET: TODO; - ret = NAN; + ret = xmlXPathNAN; break; } return(ret); @@ -7570,7 +7565,7 @@ xmlXPathModValues(xmlXPathParserContextPtr ctxt) { CHECK_TYPE(XPATH_NUMBER); arg1 = ctxt->value->floatval; if (arg2 == 0) - ctxt->value->floatval = NAN; + ctxt->value->floatval = xmlXPathNAN; else { ctxt->value->floatval = fmod(arg1, arg2); } @@ -10000,7 +9995,7 @@ xmlXPathStringEvalNumber(const xmlChar *str) { if (cur == NULL) return(0); while (IS_BLANK_CH(*cur)) cur++; if ((*cur != '.') && ((*cur < '0') || (*cur > '9')) && (*cur != '-')) { - return(NAN); + return(xmlXPathNAN); } if (*cur == '-') { isneg = 1; @@ -10036,7 +10031,7 @@ xmlXPathStringEvalNumber(const xmlChar *str) { cur++; if (((*cur < '0') || (*cur > '9')) && (!ok)) { - return(NAN); + return(xmlXPathNAN); } while (*cur == '0') { frac = frac + 1; @@ -10069,7 +10064,7 @@ xmlXPathStringEvalNumber(const xmlChar *str) { } } while (IS_BLANK_CH(*cur)) cur++; - if (*cur != 0) return(NAN); + if (*cur != 0) return(xmlXPathNAN); if (isneg) ret = -ret; if (is_exponent_negative) exponent = -exponent; ret *= pow(10.0, (double)exponent); |