summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Duffin <paulduffin@google.com>2017-03-01 13:48:32 +0000
committerandroid-build-merger <android-build-merger@google.com>2017-03-01 13:48:32 +0000
commitcea41540d6f933a456d7ce815fc1c5aff9f69f2f (patch)
treef4cc4a019c9a6ddfbb5ef4873df40264cdb13d0b
parenta95af47c5fdaccda1c37e36b27949800df8dee02 (diff)
parent451c00f7c3205ce3c3054b6add79a6d66b5b5746 (diff)
downloadexpat-cea41540d6f933a456d7ce815fc1c5aff9f69f2f.tar.gz
Merge changes from topic 'upgrade-expat-2.2.0' am: d61777ec98 am: 470553711f
am: 451c00f7c3 Change-Id: I9dede9d10c6600918538fda45ed74fabe021ac4d
-rw-r--r--Changes51
-rw-r--r--NOTICE5
-rw-r--r--README4
-rw-r--r--README.version8
-rw-r--r--doc/xmlwf.15
-rw-r--r--doc/xmlwf.xml440
-rw-r--r--examples/elements.c7
-rw-r--r--examples/outline.c7
-rw-r--r--lib/expat.h15
-rw-r--r--lib/expat_external.h14
-rw-r--r--lib/internal.h22
-rw-r--r--lib/xmlparse.c24
-rw-r--r--lib/xmlrole.c220
-rw-r--r--lib/xmltok.c111
-rw-r--r--lib/xmltok_impl.c216
-rw-r--r--tests/chardata.c4
-rw-r--r--tests/minicheck.c7
-rw-r--r--tests/minicheck.h15
-rw-r--r--tests/runtests.c479
-rwxr-xr-xtests/xmltest.sh8
20 files changed, 1225 insertions, 437 deletions
diff --git a/Changes b/Changes
index db5f7f32..583c8685 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,52 @@
+Release 2.2.0 Tue June 21 2016
+ Security fixes:
+ #537 CVE-2016-0718 -- Fix crash on malformed input
+ CVE-2016-4472 -- Improve insufficient fix to CVE-2015-1283 /
+ CVE-2015-2716 introduced with Expat 2.1.1
+ #499 CVE-2016-5300 -- Use more entropy for hash initialization
+ than the original fix to CVE-2012-0876
+ #519 CVE-2012-6702 -- Resolve troublesome internal call to srand
+ that was introduced with Expat 2.1.0
+ when addressing CVE-2012-0876 (issue #496)
+
+ Bug fixes:
+ Fix uninitialized reads of size 1
+ (e.g. in little2_updatePosition)
+ Fix detection of UTF-8 character boundaries
+
+ Other changes:
+ #532 Fix compilation for Visual Studio 2010 (keyword "C99")
+ Autotools: Resolve use of "$<" to better support bmake
+ Autotools: Add QA script "qa.sh" (and make target "qa")
+ Autotools: Respect CXXFLAGS if given
+ Autotools: Fix "make run-xmltest"
+ Autotools: Have "make run-xmltest" check for expected output
+ p90 CMake: Fix static build (BUILD_shared=OFF) on Windows
+ #536 CMake: Add soversion, support -DNO_SONAME=yes to bypass
+ #323 CMake: Add suffix "d" to differentiate debug from release
+ CMake: Define WIN32 with CMake on Windows
+ Annotate memory allocators for GCC
+ Address all currently known compile warnings
+ Make sure that API symbols remain visible despite
+ -fvisibility=hidden
+ Remove executable flag from source files
+ Resolve COMPILED_FROM_DSP in favor of WIN32
+
+ Special thanks to:
+ Björn Lindahl
+ Christian Heimes
+ Cristian Rodríguez
+ Daniel Krügler
+ Gustavo Grieco
+ Karl Waclawek
+ László Böszörményi
+ Marco Grassi
+ Pascal Cuoq
+ Sergei Nikulov
+ Thomas Beutlich
+ Warren Young
+ Yann Droneaud
+
Release 2.1.1 Sat March 12 2016
Security fixes:
#582: CVE-2015-1283 - Multiple integer overflows in XML_GetBuffer
@@ -7,7 +56,7 @@ Release 2.1.1 Sat March 12 2016
#520: Symbol XML_SetHashSalt was not exported
Output of "xmlwf -h" was incomplete
- Other changes
+ Other changes:
#503: Document behavior of calling XML_SetHashSalt with salt 0
Minor improvements to man page xmlwf(1)
Improvements to the experimental CMake build system
diff --git a/NOTICE b/NOTICE
index dcb45064..092c83ba 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,6 +1,5 @@
-Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
- and Clark Cooper
-Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers.
+Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper
+Copyright (c) 2001-2016 Expat maintainers
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/README b/README
index 9ec8d0c6..a7d28450 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
- Expat, Release 2.1.1
+ Expat, Release 2.2.0
This is Expat, a C library for parsing XML, written by James Clark.
Expat is a stream-oriented XML parser. This means that you register
@@ -114,7 +114,7 @@ Note for Solaris users: The "ar" command is usually located in
"/usr/ccs/bin", which is not in the default PATH. You will need to
add this to your path for the "make" command, and probably also switch
to GNU make (the "make" found in /usr/ccs/bin does not seem to work
-properly -- appearantly it does not understand .PHONY directives). If
+properly -- apparently it does not understand .PHONY directives). If
you're using ksh or bash, use this command to build:
PATH=/usr/ccs/bin:$PATH make
diff --git a/README.version b/README.version
index 55146253..7b85695c 100644
--- a/README.version
+++ b/README.version
@@ -1,3 +1,7 @@
-URL: http://sourceforge.net/projects/expat/files/expat/2.1.0/expat-2.1.0.tar.gz/download
-Version: 2.1.0
+URL: https://sourceforge.net/projects/expat/files/expat/2.2.0/expat-2.2.0.tar.bz2/download
+Version: 2.2.0
BugComponent: 24949
+
+Local Changes:
+ Reverted change to switch from COMPILED_FROM_DSP to WIN32. This was needed in order to get it to compile as winconfig.h is not available.
+ Fix cast from pointer to integer of different size
diff --git a/doc/xmlwf.1 b/doc/xmlwf.1
index f45aea9c..06bb84cf 100644
--- a/doc/xmlwf.1
+++ b/doc/xmlwf.1
@@ -177,12 +177,14 @@ data from outside the XML file currently being parsed.
This is an example of an internal entity:
.nf
+
<!ENTITY vers '1.0.2'>
.fi
And here are some examples of external entities:
.nf
+
<!ENTITY header SYSTEM "header\-&vers;.xml"> (parsed)
<!ENTITY logo SYSTEM "logo.png" PNG> (unparsed)
.fi
@@ -193,6 +195,7 @@ Terminates the list of options. This is only needed if a filename
starts with a hyphen. For example:
.nf
+
xmlwf \-\- \-myfile.xml
.fi
@@ -228,6 +231,7 @@ me, I'd like to add this information to this manpage.
Here are some XML validators on the web:
.nf
+
http://www.hcrc.ed.ac.uk/~richard/xml\-check.html
http://www.stg.brown.edu/service/xmlvalid/
http://www.scripting.com/frontier5/xml/code/xmlValidator.html
@@ -235,6 +239,7 @@ http://www.xml.com/pub/a/tools/ruwf/check.html
.fi
.SH "SEE ALSO"
.nf
+
The Expat home page: http://www.libexpat.org/
The W3 XML specification: http://www.w3.org/TR/REC\-xml
.fi
diff --git a/doc/xmlwf.xml b/doc/xmlwf.xml
new file mode 100644
index 00000000..92ea8b59
--- /dev/null
+++ b/doc/xmlwf.xml
@@ -0,0 +1,440 @@
+<!DOCTYPE refentry [
+ <!-- Fill in your name for FIRSTNAME and SURNAME. -->
+ <!ENTITY dhfirstname "<firstname>Scott</firstname>">
+ <!ENTITY dhsurname "<surname>Bronson</surname>">
+ <!-- Please adjust the date whenever revising the manpage. -->
+ <!ENTITY dhdate "<date>March 11, 2016</date>">
+ <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
+ allowed: see man(7), man(1). -->
+ <!ENTITY dhsection "<manvolnum>1</manvolnum>">
+ <!ENTITY dhemail "<email>bronson@rinspin.com</email>">
+ <!ENTITY dhusername "Scott Bronson">
+ <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
+ <!ENTITY dhpackage "xmlwf">
+
+ <!ENTITY debian "<productname>Debian GNU/Linux</productname>">
+ <!ENTITY gnu "<acronym>GNU</acronym>">
+]>
+
+<refentry>
+ <refentryinfo>
+ <address>
+ &dhemail;
+ </address>
+ <author>
+ &dhfirstname;
+ &dhsurname;
+ </author>
+ <copyright>
+ <year>2001</year>
+ <holder>&dhusername;</holder>
+ </copyright>
+ &dhdate;
+ </refentryinfo>
+ <refmeta>
+ &dhucpackage;
+
+ &dhsection;
+ </refmeta>
+ <refnamediv>
+ <refname>&dhpackage;</refname>
+
+ <refpurpose>Determines if an XML document is well-formed</refpurpose>
+ </refnamediv>
+ <refsynopsisdiv>
+ <cmdsynopsis>
+ <command>&dhpackage;</command>
+ <arg><option>-s</option></arg>
+ <arg><option>-n</option></arg>
+ <arg><option>-p</option></arg>
+ <arg><option>-x</option></arg>
+
+ <arg><option>-e <replaceable>encoding</replaceable></option></arg>
+ <arg><option>-w</option></arg>
+
+ <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
+ <arg><option>-c</option></arg>
+ <arg><option>-m</option></arg>
+
+ <arg><option>-r</option></arg>
+ <arg><option>-t</option></arg>
+
+ <arg><option>-v</option></arg>
+
+ <arg>file ...</arg>
+ </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>
+ <command>&dhpackage;</command> uses the Expat library to
+ determine if an XML document is well-formed. It is
+ non-validating.
+ </para>
+
+ <para>
+ If you do not specify any files on the command-line, and you
+ have a recent version of <command>&dhpackage;</command>, the
+ input file will be read from standard input.
+ </para>
+
+ </refsect1>
+
+ <refsect1>
+ <title>WELL-FORMED DOCUMENTS</title>
+
+ <para>
+ A well-formed document must adhere to the
+ following rules:
+ </para>
+
+ <itemizedlist>
+ <listitem><para>
+ The file begins with an XML declaration. For instance,
+ <literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
+ <emphasis>NOTE:</emphasis>
+ <command>&dhpackage;</command> does not currently
+ check for a valid XML declaration.
+ </para></listitem>
+ <listitem><para>
+ Every start tag is either empty (&lt;tag/&gt;)
+ or has a corresponding end tag.
+ </para></listitem>
+ <listitem><para>
+ There is exactly one root element. This element must contain
+ all other elements in the document. Only comments, white
+ space, and processing instructions may come after the close
+ of the root element.
+ </para></listitem>
+ <listitem><para>
+ All elements nest properly.
+ </para></listitem>
+ <listitem><para>
+ All attribute values are enclosed in quotes (either single
+ or double).
+ </para></listitem>
+ </itemizedlist>
+
+ <para>
+ If the document has a DTD, and it strictly complies with that
+ DTD, then the document is also considered <emphasis>valid</emphasis>.
+ <command>&dhpackage;</command> is a non-validating parser --
+ it does not check the DTD. However, it does support
+ external entities (see the <option>-x</option> option).
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>OPTIONS</title>
+
+<para>
+When an option includes an argument, you may specify the argument either
+separately ("<option>-d</option> output") or concatenated with the
+option ("<option>-d</option>output"). <command>&dhpackage;</command>
+supports both.
+</para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term><option>-c</option></term>
+ <listitem>
+ <para>
+ If the input file is well-formed and <command>&dhpackage;</command>
+ doesn't encounter any errors, the input file is simply copied to
+ the output directory unchanged.
+ This implies no namespaces (turns off <option>-n</option>) and
+ requires <option>-d</option> to specify an output file.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-d output-dir</option></term>
+ <listitem>
+ <para>
+ Specifies a directory to contain transformed
+ representations of the input files.
+ By default, <option>-d</option> outputs a canonical representation
+ (described below).
+ You can select different output formats using <option>-c</option>
+ and <option>-m</option>.
+ </para>
+ <para>
+ The output filenames will
+ be exactly the same as the input filenames or "STDIN" if the input is
+ coming from standard input. Therefore, you must be careful that the
+ output file does not go into the same directory as the input
+ file. Otherwise, <command>&dhpackage;</command> will delete the
+ input file before it generates the output file (just like running
+ <literal>cat &lt; file &gt; file</literal> in most shells).
+ </para>
+ <para>
+ Two structurally equivalent XML documents have a byte-for-byte
+ identical canonical XML representation.
+ Note that ignorable white space is considered significant and
+ is treated equivalently to data.
+ More on canonical XML can be found at
+ http://www.jclark.com/xml/canonxml.html .
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-e encoding</option></term>
+ <listitem>
+ <para>
+ Specifies the character encoding for the document, overriding
+ any document encoding declaration. <command>&dhpackage;</command>
+ supports four built-in encodings:
+ <literal>US-ASCII</literal>,
+ <literal>UTF-8</literal>,
+ <literal>UTF-16</literal>, and
+ <literal>ISO-8859-1</literal>.
+ Also see the <option>-w</option> option.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-m</option></term>
+ <listitem>
+ <para>
+ Outputs some strange sort of XML file that completely
+ describes the input file, including character positions.
+ Requires <option>-d</option> to specify an output file.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-n</option></term>
+ <listitem>
+ <para>
+ Turns on namespace processing. (describe namespaces)
+ <option>-c</option> disables namespaces.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-p</option></term>
+ <listitem>
+ <para>
+ Tells xmlwf to process external DTDs and parameter
+ entities.
+ </para>
+ <para>
+ Normally <command>&dhpackage;</command> never parses parameter
+ entities. <option>-p</option> tells it to always parse them.
+ <option>-p</option> implies <option>-x</option>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-r</option></term>
+ <listitem>
+ <para>
+ Normally <command>&dhpackage;</command> memory-maps the XML file
+ before parsing; this can result in faster parsing on many
+ platforms.
+ <option>-r</option> turns off memory-mapping and uses normal file
+ IO calls instead.
+ Of course, memory-mapping is automatically turned off
+ when reading from standard input.
+ </para>
+ <para>
+ Use of memory-mapping can cause some platforms to report
+ substantially higher memory usage for
+ <command>&dhpackage;</command>, but this appears to be a matter of
+ the operating system reporting memory in a strange way; there is
+ not a leak in <command>&dhpackage;</command>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-s</option></term>
+ <listitem>
+ <para>
+ Prints an error if the document is not standalone.
+ A document is standalone if it has no external subset and no
+ references to parameter entities.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-t</option></term>
+ <listitem>
+ <para>
+ Turns on timings. This tells Expat to parse the entire file,
+ but not perform any processing.
+ This gives a fairly accurate idea of the raw speed of Expat itself
+ without client overhead.
+ <option>-t</option> turns off most of the output options
+ (<option>-d</option>, <option>-m</option>, <option>-c</option>, ...).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-v</option></term>
+ <listitem>
+ <para>
+ Prints the version of the Expat library being used, including some
+ information on the compile-time configuration of the library, and
+ then exits.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-w</option></term>
+ <listitem>
+ <para>
+ Enables support for Windows code pages.
+ Normally, <command>&dhpackage;</command> will throw an error if it
+ runs across an encoding that it is not equipped to handle itself. With
+ <option>-w</option>, &dhpackage; will try to use a Windows code
+ page. See also <option>-e</option>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-x</option></term>
+ <listitem>
+ <para>
+ Turns on parsing external entities.
+ </para>
+<para>
+ Non-validating parsers are not required to resolve external
+ entities, or even expand entities at all.
+ Expat always expands internal entities (?),
+ but external entity parsing must be enabled explicitly.
+ </para>
+ <para>
+ External entities are simply entities that obtain their
+ data from outside the XML file currently being parsed.
+ </para>
+ <para>
+ This is an example of an internal entity:
+<literallayout>
+&lt;!ENTITY vers '1.0.2'&gt;
+</literallayout>
+ </para>
+ <para>
+ And here are some examples of external entities:
+
+<literallayout>
+&lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt; (parsed)
+&lt;!ENTITY logo SYSTEM "logo.png" PNG&gt; (unparsed)
+</literallayout>
+
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>--</option></term>
+ <listitem>
+ <para>
+ (Two hyphens.)
+ Terminates the list of options. This is only needed if a filename
+ starts with a hyphen. For example:
+ </para>
+<literallayout>
+&dhpackage; -- -myfile.xml
+</literallayout>
+ <para>
+ will run <command>&dhpackage;</command> on the file
+ <filename>-myfile.xml</filename>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Older versions of <command>&dhpackage;</command> do not support
+ reading from standard input.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>OUTPUT</title>
+ <para>
+ If an input file is not well-formed,
+ <command>&dhpackage;</command> prints a single line describing
+ the problem to standard output. If a file is well formed,
+ <command>&dhpackage;</command> outputs nothing.
+ Note that the result code is <emphasis>not</emphasis> set.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>BUGS</title>
+ <para>
+ <command>&dhpackage;</command> returns a 0 - noerr result,
+ even if the file is not well-formed. There is no good way for
+ a program to use <command>&dhpackage;</command> to quickly
+ check a file -- it must parse <command>&dhpackage;</command>'s
+ standard output.
+ </para>
+ <para>
+ The errors should go to standard error, not standard output.
+ </para>
+ <para>
+ There should be a way to get <option>-d</option> to send its
+ output to standard output rather than forcing the user to send
+ it to a file.
+ </para>
+ <para>
+ I have no idea why anyone would want to use the
+ <option>-d</option>, <option>-c</option>, and
+ <option>-m</option> options. If someone could explain it to
+ me, I'd like to add this information to this manpage.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>ALTERNATIVES</title>
+ <para>
+ Here are some XML validators on the web:
+
+<literallayout>
+http://www.hcrc.ed.ac.uk/~richard/xml-check.html
+http://www.stg.brown.edu/service/xmlvalid/
+http://www.scripting.com/frontier5/xml/code/xmlValidator.html
+http://www.xml.com/pub/a/tools/ruwf/check.html
+</literallayout>
+
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+ <para>
+
+<literallayout>
+The Expat home page: http://www.libexpat.org/
+The W3 XML specification: http://www.w3.org/TR/REC-xml
+</literallayout>
+
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>AUTHOR</title>
+ <para>
+ This manual page was written by &dhusername; &dhemail; for
+ the &debian; system (but may be used by others). Permission is
+ granted to copy, distribute and/or modify this document under
+ the terms of the <acronym>GNU</acronym> Free Documentation
+ License, Version 1.1.
+ </para>
+ </refsect1>
+</refentry>
diff --git a/examples/elements.c b/examples/elements.c
index bc04b6b1..0ca1abd9 100644
--- a/examples/elements.c
+++ b/examples/elements.c
@@ -27,6 +27,8 @@ startElement(void *userData, const char *name, const char **atts)
{
int i;
int *depthPtr = (int *)userData;
+ (void)atts;
+
for (i = 0; i < *depthPtr; i++)
putchar('\t');
puts(name);
@@ -37,6 +39,8 @@ static void XMLCALL
endElement(void *userData, const char *name)
{
int *depthPtr = (int *)userData;
+ (void)name;
+
*depthPtr -= 1;
}
@@ -47,6 +51,9 @@ main(int argc, char *argv[])
XML_Parser parser = XML_ParserCreate(NULL);
int done;
int depth = 0;
+ (void)argc;
+ (void)argv;
+
XML_SetUserData(parser, &depth);
XML_SetElementHandler(parser, startElement, endElement);
do {
diff --git a/examples/outline.c b/examples/outline.c
index 3a3c8385..d9b09176 100644
--- a/examples/outline.c
+++ b/examples/outline.c
@@ -49,6 +49,7 @@ static void XMLCALL
start(void *data, const char *el, const char **attr)
{
int i;
+ (void)data;
for (i = 0; i < Depth; i++)
printf(" ");
@@ -66,6 +67,9 @@ start(void *data, const char *el, const char **attr)
static void XMLCALL
end(void *data, const char *el)
{
+ (void)data;
+ (void)el;
+
Depth--;
}
@@ -73,6 +77,9 @@ int
main(int argc, char *argv[])
{
XML_Parser p = XML_ParserCreate(NULL);
+ (void)argc;
+ (void)argv;
+
if (! p) {
fprintf(stderr, "Couldn't allocate memory for parser\n");
exit(-1);
diff --git a/lib/expat.h b/lib/expat.h
index ec62f140..086e24b3 100644
--- a/lib/expat.h
+++ b/lib/expat.h
@@ -342,7 +342,7 @@ XML_SetEntityDeclHandler(XML_Parser parser,
XML_EntityDeclHandler handler);
/* OBSOLETE -- OBSOLETE -- OBSOLETE
- This handler has been superceded by the EntityDeclHandler above.
+ This handler has been superseded by the EntityDeclHandler above.
It is provided here for backward compatibility.
This is called for a declaration of an unparsed (NDATA) entity.
@@ -973,9 +973,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model);
/* Exposing the memory handling functions used in Expat */
XMLPARSEAPI(void *)
+XML_ATTR_MALLOC
+XML_ATTR_ALLOC_SIZE(2)
XML_MemMalloc(XML_Parser parser, size_t size);
XMLPARSEAPI(void *)
+XML_ATTR_ALLOC_SIZE(3)
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
XMLPARSEAPI(void)
@@ -1031,14 +1034,12 @@ XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
-/* Expat follows the GNU/Linux convention of odd number minor version for
- beta/development releases and even number minor version for stable
- releases. Micro is bumped with each release, and set to 0 with each
- change to major or minor version.
+/* Expat follows the semantic versioning convention.
+ See http://semver.org.
*/
#define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 1
-#define XML_MICRO_VERSION 1
+#define XML_MINOR_VERSION 2
+#define XML_MICRO_VERSION 0
#ifdef __cplusplus
}
diff --git a/lib/expat_external.h b/lib/expat_external.h
index 2c03284e..aa08a2f8 100644
--- a/lib/expat_external.h
+++ b/lib/expat_external.h
@@ -65,12 +65,26 @@
#endif
#endif /* not defined XML_STATIC */
+#if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4)
+#define XMLIMPORT __attribute__ ((visibility ("default")))
+#endif
/* If we didn't define it above, define it away: */
#ifndef XMLIMPORT
#define XMLIMPORT
#endif
+#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
+#define XML_ATTR_MALLOC __attribute__((__malloc__))
+#else
+#define XML_ATTR_MALLOC
+#endif
+
+#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+#define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))
+#else
+#define XML_ATTR_ALLOC_SIZE(x)
+#endif
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
diff --git a/lib/internal.h b/lib/internal.h
index dd545483..94cb98e1 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -71,3 +71,25 @@
#define inline
#endif
#endif
+
+#ifndef UNUSED_P
+# ifdef __GNUC__
+# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__))
+# else
+# define UNUSED_P(p) UNUSED_ ## p
+# endif
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void
+align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef);
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 0655e080..2f4e7258 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -729,7 +729,7 @@ generate_hash_secret_salt(XML_Parser parser)
if (sizeof(unsigned long) == 4) {
return entropy * 2147483647;
} else {
- return entropy * 2305843009213693951;
+ return entropy * (unsigned long)2305843009213693951;
}
}
@@ -1728,14 +1728,17 @@ XML_GetBuffer(XML_Parser parser, int len)
}
if (len > bufferLim - bufferEnd) {
- int neededSize = len + (int)(bufferEnd - bufferPtr);
+#ifdef XML_CONTEXT_BYTES
+ int keep;
+#endif /* defined XML_CONTEXT_BYTES */
+ /* Do not invoke signed arithmetic overflow: */
+ int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
if (neededSize < 0) {
errorCode = XML_ERROR_NO_MEMORY;
return NULL;
}
#ifdef XML_CONTEXT_BYTES
- int keep = (int)(bufferPtr - buffer);
-
+ keep = (int)(bufferPtr - buffer);
if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES;
neededSize += keep;
@@ -1760,7 +1763,8 @@ XML_GetBuffer(XML_Parser parser, int len)
if (bufferSize == 0)
bufferSize = INIT_BUFFER_SIZE;
do {
- bufferSize *= 2;
+ /* Do not invoke signed arithmetic overflow: */
+ bufferSize = (int) (2U * (unsigned) bufferSize);
} while (bufferSize < neededSize && bufferSize > 0);
if (bufferSize <= 0) {
errorCode = XML_ERROR_NO_MEMORY;
@@ -1887,7 +1891,7 @@ XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)
{
if (eventPtr)
- return parseEndByteIndex - (parseEndPtr - eventPtr);
+ return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
return -1;
}
@@ -4959,9 +4963,9 @@ internalEntityProcessor(XML_Parser parser,
static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,
- const char *s,
- const char *end,
- const char **nextPtr)
+ const char *UNUSED_P(s),
+ const char *UNUSED_P(end),
+ const char **UNUSED_P(nextPtr))
{
return errorCode;
}
@@ -6285,7 +6289,7 @@ poolGrow(STRING_POOL *pool)
}
if (pool->blocks && pool->start == pool->blocks->s) {
BLOCK *temp;
- int blockSize = (int)(pool->end - pool->start)*2;
+ int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
if (blockSize < 0)
return XML_FALSE;
diff --git a/lib/xmlrole.c b/lib/xmlrole.c
index 44772e21..8a68e20f 100644
--- a/lib/xmlrole.c
+++ b/lib/xmlrole.c
@@ -195,9 +195,9 @@ prolog1(PROLOG_STATE *state,
static int PTRCALL
prolog2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -216,9 +216,9 @@ prolog2(PROLOG_STATE *state,
static int PTRCALL
doctype0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -264,9 +264,9 @@ doctype1(PROLOG_STATE *state,
static int PTRCALL
doctype2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -281,9 +281,9 @@ doctype2(PROLOG_STATE *state,
static int PTRCALL
doctype3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -298,9 +298,9 @@ doctype3(PROLOG_STATE *state,
static int PTRCALL
doctype4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -318,9 +318,9 @@ doctype4(PROLOG_STATE *state,
static int PTRCALL
doctype5(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -437,9 +437,9 @@ externalSubset1(PROLOG_STATE *state,
static int PTRCALL
entity0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -457,9 +457,9 @@ entity0(PROLOG_STATE *state,
static int PTRCALL
entity1(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -502,9 +502,9 @@ entity2(PROLOG_STATE *state,
static int PTRCALL
entity3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -519,9 +519,9 @@ entity3(PROLOG_STATE *state,
static int PTRCALL
entity4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -559,9 +559,9 @@ entity5(PROLOG_STATE *state,
static int PTRCALL
entity6(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -605,9 +605,9 @@ entity7(PROLOG_STATE *state,
static int PTRCALL
entity8(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -622,9 +622,9 @@ entity8(PROLOG_STATE *state,
static int PTRCALL
entity9(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -639,9 +639,9 @@ entity9(PROLOG_STATE *state,
static int PTRCALL
entity10(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -656,9 +656,9 @@ entity10(PROLOG_STATE *state,
static int PTRCALL
notation0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -697,9 +697,9 @@ notation1(PROLOG_STATE *state,
static int PTRCALL
notation2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -714,9 +714,9 @@ notation2(PROLOG_STATE *state,
static int PTRCALL
notation3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -732,9 +732,9 @@ notation3(PROLOG_STATE *state,
static int PTRCALL
notation4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -753,9 +753,9 @@ notation4(PROLOG_STATE *state,
static int PTRCALL
attlist0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -771,9 +771,9 @@ attlist0(PROLOG_STATE *state,
static int PTRCALL
attlist1(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -833,9 +833,9 @@ attlist2(PROLOG_STATE *state,
static int PTRCALL
attlist3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -852,9 +852,9 @@ attlist3(PROLOG_STATE *state,
static int PTRCALL
attlist4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -872,9 +872,9 @@ attlist4(PROLOG_STATE *state,
static int PTRCALL
attlist5(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -889,9 +889,9 @@ attlist5(PROLOG_STATE *state,
static int PTRCALL
attlist6(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -906,9 +906,9 @@ attlist6(PROLOG_STATE *state,
static int PTRCALL
attlist7(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -967,9 +967,9 @@ attlist8(PROLOG_STATE *state,
static int PTRCALL
attlist9(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -984,9 +984,9 @@ attlist9(PROLOG_STATE *state,
static int PTRCALL
element0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1072,9 +1072,9 @@ element2(PROLOG_STATE *state,
static int PTRCALL
element3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1097,9 +1097,9 @@ element3(PROLOG_STATE *state,
static int PTRCALL
element4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1115,9 +1115,9 @@ element4(PROLOG_STATE *state,
static int PTRCALL
element5(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1136,9 +1136,9 @@ element5(PROLOG_STATE *state,
static int PTRCALL
element6(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1166,9 +1166,9 @@ element6(PROLOG_STATE *state,
static int PTRCALL
element7(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1240,9 +1240,9 @@ condSect0(PROLOG_STATE *state,
static int PTRCALL
condSect1(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1258,9 +1258,9 @@ condSect1(PROLOG_STATE *state,
static int PTRCALL
condSect2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1277,9 +1277,9 @@ condSect2(PROLOG_STATE *state,
static int PTRCALL
declClose(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1292,11 +1292,11 @@ declClose(PROLOG_STATE *state,
}
static int PTRCALL
-error(PROLOG_STATE *state,
- int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+error(PROLOG_STATE *UNUSED_P(state),
+ int UNUSED_P(tok),
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
return XML_ROLE_NONE;
}
diff --git a/lib/xmltok.c b/lib/xmltok.c
index cb98ce1f..daa35654 100644
--- a/lib/xmltok.c
+++ b/lib/xmltok.c
@@ -46,7 +46,7 @@
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define UCS2_GET_NAMING(pages, hi, lo) \
- (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
+ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
@@ -56,7 +56,7 @@
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \
- & (1 << (((byte)[1]) & 0x1F)))
+ & (1u << (((byte)[1]) & 0x1F)))
/* A 3 byte UTF-8 representation splits the characters 16 bits between
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
@@ -69,7 +69,7 @@
<< 3) \
+ ((((byte)[1]) & 3) << 1) \
+ ((((byte)[2]) >> 5) & 1)] \
- & (1 << (((byte)[2]) & 0x1F)))
+ & (1u << (((byte)[2]) & 0x1F)))
#define UTF8_GET_NAMING(pages, p, n) \
((n) == 2 \
@@ -122,19 +122,19 @@
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
static int PTRFASTCALL
-isNever(const ENCODING *enc, const char *p)
+isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
{
return 0;
}
static int PTRFASTCALL
-utf8_isName2(const ENCODING *enc, const char *p)
+utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isName3(const ENCODING *enc, const char *p)
+utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
}
@@ -142,13 +142,13 @@ utf8_isName3(const ENCODING *enc, const char *p)
#define utf8_isName4 isNever
static int PTRFASTCALL
-utf8_isNmstrt2(const ENCODING *enc, const char *p)
+utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isNmstrt3(const ENCODING *enc, const char *p)
+utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
}
@@ -156,19 +156,19 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p)
#define utf8_isNmstrt4 isNever
static int PTRFASTCALL
-utf8_isInvalid2(const ENCODING *enc, const char *p)
+utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_INVALID2((const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isInvalid3(const ENCODING *enc, const char *p)
+utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_INVALID3((const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isInvalid4(const ENCODING *enc, const char *p)
+utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_INVALID4((const unsigned char *)p);
}
@@ -222,6 +222,17 @@ struct normal_encoding {
E ## isInvalid3, \
E ## isInvalid4
+#define NULL_VTABLE \
+ /* isName2 */ NULL, \
+ /* isName3 */ NULL, \
+ /* isName4 */ NULL, \
+ /* isNmstrt2 */ NULL, \
+ /* isNmstrt3 */ NULL, \
+ /* isNmstrt4 */ NULL, \
+ /* isInvalid2 */ NULL, \
+ /* isInvalid3 */ NULL, \
+ /* isInvalid4 */ NULL
+
static int FASTCALL checkCharRefNumber(int);
#include "xmltok_impl.h"
@@ -318,8 +329,43 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval4 = 0xf0
};
+void
+align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
+{
+ const char * fromLim = *fromLimRef;
+ size_t walked = 0;
+ for (; fromLim > from; fromLim--, walked++) {
+ const unsigned char prev = (unsigned char)fromLim[-1];
+ if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
+ if (walked + 1 >= 4) {
+ fromLim += 4 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
+ if (walked + 1 >= 3) {
+ fromLim += 3 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
+ if (walked + 1 >= 2) {
+ fromLim += 2 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
+ break;
+ }
+ }
+ *fromLimRef = fromLim;
+}
+
static enum XML_Convert_Result PTRCALL
-utf8_toUtf8(const ENCODING *enc,
+utf8_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
@@ -329,9 +375,8 @@ utf8_toUtf8(const ENCODING *enc,
if (fromLim - *fromP > toLim - *toP) {
/* Avoid copying partial characters. */
res = XML_CONVERT_OUTPUT_EXHAUSTED;
- for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
- if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
- break;
+ fromLim = *fromP + (toLim - *toP);
+ align_limit_to_full_utf8_characters(*fromP, &fromLim);
}
for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
*to = *from;
@@ -449,7 +494,7 @@ static const struct normal_encoding internal_utf8_encoding = {
};
static enum XML_Convert_Result PTRCALL
-latin1_toUtf8(const ENCODING *enc,
+latin1_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
@@ -474,7 +519,7 @@ latin1_toUtf8(const ENCODING *enc,
}
static enum XML_Convert_Result PTRCALL
-latin1_toUtf16(const ENCODING *enc,
+latin1_toUtf16(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
@@ -495,7 +540,7 @@ static const struct normal_encoding latin1_encoding_ns = {
#include "asciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
#endif
@@ -508,11 +553,11 @@ static const struct normal_encoding latin1_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
static enum XML_Convert_Result PTRCALL
-ascii_toUtf8(const ENCODING *enc,
+ascii_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
@@ -533,7 +578,7 @@ static const struct normal_encoding ascii_encoding_ns = {
#include "asciitab.h"
/* BT_NONXML == 0 */
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
#endif
@@ -546,7 +591,7 @@ static const struct normal_encoding ascii_encoding = {
#undef BT_COLON
/* BT_NONXML == 0 */
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
static int PTRFASTCALL
@@ -570,7 +615,7 @@ unicode_byte_type(char hi, char lo)
#define DEFINE_UTF16_TO_UTF8(E) \
static enum XML_Convert_Result PTRCALL \
-E ## toUtf8(const ENCODING *enc, \
+E ## toUtf8(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \
{ \
@@ -642,7 +687,7 @@ E ## toUtf8(const ENCODING *enc, \
#define DEFINE_UTF16_TO_UTF16(E) \
static enum XML_Convert_Result PTRCALL \
-E ## toUtf16(const ENCODING *enc, \
+E ## toUtf16(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \
{ \
@@ -776,7 +821,7 @@ static const struct normal_encoding little2_encoding_ns = {
#include "asciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#endif
@@ -795,7 +840,7 @@ static const struct normal_encoding little2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#if BYTEORDER != 4321
@@ -808,7 +853,7 @@ static const struct normal_encoding internal_little2_encoding_ns = {
#include "iasciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#endif
@@ -821,7 +866,7 @@ static const struct normal_encoding internal_little2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#endif
@@ -917,7 +962,7 @@ static const struct normal_encoding big2_encoding_ns = {
#include "asciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#endif
@@ -936,7 +981,7 @@ static const struct normal_encoding big2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#if BYTEORDER != 1234
@@ -949,7 +994,7 @@ static const struct normal_encoding internal_big2_encoding_ns = {
#include "iasciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#endif
@@ -962,7 +1007,7 @@ static const struct normal_encoding internal_big2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#endif
@@ -988,7 +1033,7 @@ streqci(const char *s1, const char *s2)
}
static void PTRCALL
-initUpdatePosition(const ENCODING *enc, const char *ptr,
+initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, POSITION *pos)
{
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
index 6c5a3ba4..5f779c05 100644
--- a/lib/xmltok_impl.c
+++ b/lib/xmltok_impl.c
@@ -87,27 +87,45 @@
#define PREFIX(ident) ident
#endif
+
+#define HAS_CHARS(enc, ptr, end, count) \
+ (end - ptr >= count * MINBPC(enc))
+
+#define HAS_CHAR(enc, ptr, end) \
+ HAS_CHARS(enc, ptr, end, 1)
+
+#define REQUIRE_CHARS(enc, ptr, end, count) \
+ { \
+ if (! HAS_CHARS(enc, ptr, end, count)) { \
+ return XML_TOK_PARTIAL; \
+ } \
+ }
+
+#define REQUIRE_CHAR(enc, ptr, end) \
+ REQUIRE_CHARS(enc, ptr, end, 1)
+
+
/* ptr points to character following "<!-" */
static int PTRCALL
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr < end) {
+ if (HAS_CHAR(enc, ptr, end)) {
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_MINUS:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
@@ -131,8 +149,7 @@ static int PTRCALL
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -147,11 +164,10 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_PERCNT:
- if (ptr + MINBPC(enc) == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHARS(enc, ptr, end, 2);
/* don't allow <!ENTITY% foo "whatever"> */
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
@@ -175,7 +191,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
}
static int PTRCALL
-PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
+PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, int *tokPtr)
{
int upper = 0;
@@ -225,15 +241,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
{
int tok;
const char *target = ptr;
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
@@ -242,13 +257,12 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_QUEST:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
@@ -266,8 +280,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
@@ -282,15 +295,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
}
static int PTRCALL
-PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
+PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, const char **nextTokPtr)
{
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
ASCII_T, ASCII_A, ASCII_LSQB };
int i;
/* CDATA[ */
- if (end - ptr < 6 * MINBPC(enc))
- return XML_TOK_PARTIAL;
+ REQUIRE_CHARS(enc, ptr, end, 6);
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
*nextTokPtr = ptr;
@@ -319,13 +331,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
switch (BYTE_TYPE(enc, ptr)) {
case BT_RSQB:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc);
break;
@@ -334,8 +344,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
return XML_TOK_CDATA_SECT_CLOSE;
case BT_CR:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
*nextTokPtr = ptr;
@@ -348,7 +357,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
ptr += MINBPC(enc);
break;
}
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
@@ -383,19 +392,18 @@ static int PTRCALL
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
- for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) {
+ for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_CR: case BT_LF:
break;
@@ -432,7 +440,7 @@ static int PTRCALL
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr < end) {
+ if (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
@@ -441,7 +449,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) {
+ for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
@@ -464,7 +472,7 @@ static int PTRCALL
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr < end) {
+ if (HAS_CHAR(enc, ptr, end)) {
if (CHAR_MATCHES(enc, ptr, ASCII_x))
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
switch (BYTE_TYPE(enc, ptr)) {
@@ -474,7 +482,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) {
+ for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
break;
@@ -496,8 +504,7 @@ static int PTRCALL
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_NUM:
@@ -506,7 +513,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI:
@@ -529,7 +536,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS
int hadColon = 0;
#endif
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS
@@ -540,8 +547,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
}
hadColon = 1;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
@@ -555,8 +561,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
int t;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
t = BYTE_TYPE(enc, ptr);
if (t == BT_EQUALS)
break;
@@ -579,8 +584,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#endif
for (;;) {
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
open = BYTE_TYPE(enc, ptr);
if (open == BT_QUOT || open == BT_APOS)
break;
@@ -598,8 +602,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* in attribute value */
for (;;) {
int t;
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
t = BYTE_TYPE(enc, ptr);
if (t == open)
break;
@@ -624,8 +627,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
}
}
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_S:
case BT_CR:
@@ -642,8 +644,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to closing quote */
for (;;) {
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
@@ -655,8 +656,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
case BT_SOL:
sol:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
@@ -688,13 +688,12 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS
int hadColon;
#endif
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_EXCL:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -716,7 +715,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
hadColon = 0;
#endif
/* we have a start-tag */
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS
@@ -727,8 +726,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
}
hadColon = 1;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
@@ -740,7 +738,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_CR: case BT_LF:
{
ptr += MINBPC(enc);
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_GT:
@@ -765,8 +763,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_SOL:
sol:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
@@ -803,7 +800,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_CR:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
@@ -814,12 +811,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_DATA_NEWLINE;
case BT_RSQB:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break;
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc);
@@ -832,7 +829,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
break;
}
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
@@ -845,12 +842,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
case BT_RSQB:
- if (ptr + MINBPC(enc) != end) {
+ if (HAS_CHARS(enc, ptr, end, 2)) {
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
ptr += MINBPC(enc);
break;
}
- if (ptr + 2*MINBPC(enc) != end) {
+ if (HAS_CHARS(enc, ptr, end, 3)) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
ptr += MINBPC(enc);
break;
@@ -884,8 +881,7 @@ static int PTRCALL
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
@@ -895,7 +891,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI:
@@ -913,15 +909,14 @@ static int PTRCALL
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_CR: case BT_LF: case BT_S:
@@ -941,7 +936,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
const char *ptr, const char *end,
const char **nextTokPtr)
{
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
int t = BYTE_TYPE(enc, ptr);
switch (t) {
INVALID_CASES(ptr, nextTokPtr)
@@ -950,7 +945,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
ptr += MINBPC(enc);
if (t != open)
break;
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_LITERAL;
*nextTokPtr = ptr;
switch (BYTE_TYPE(enc, ptr)) {
@@ -992,8 +987,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_LT:
{
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_EXCL:
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -1021,7 +1015,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_LF:
for (;;) {
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
break;
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_LF:
@@ -1048,11 +1042,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_OPEN_BRACKET;
case BT_RSQB:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_CLOSE_BRACKET;
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
- if (ptr + MINBPC(enc) == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHARS(enc, ptr, end, 2);
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
*nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_COND_SECT_CLOSE;
@@ -1065,7 +1058,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_OPEN_PAREN;
case BT_RPAR:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_CLOSE_PAREN;
switch (BYTE_TYPE(enc, ptr)) {
case BT_AST:
@@ -1141,7 +1134,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_GT: case BT_RPAR: case BT_COMMA:
@@ -1154,8 +1147,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
switch (tok) {
case XML_TOK_NAME:
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
tok = XML_TOK_PREFIXED_NAME;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
@@ -1206,8 +1198,10 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char *start;
if (ptr >= end)
return XML_TOK_NONE;
+ else if (! HAS_CHAR(enc, ptr, end))
+ return XML_TOK_PARTIAL;
start = ptr;
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break;
@@ -1232,7 +1226,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
case BT_CR:
if (ptr == start) {
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
@@ -1264,8 +1258,10 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char *start;
if (ptr >= end)
return XML_TOK_NONE;
+ else if (! HAS_CHAR(enc, ptr, end))
+ return XML_TOK_PARTIAL;
start = ptr;
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break;
@@ -1294,7 +1290,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
case BT_CR:
if (ptr == start) {
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
@@ -1326,15 +1322,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
end = ptr + n;
}
}
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_LT:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
++level;
ptr += MINBPC(enc);
@@ -1342,11 +1338,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
}
break;
case BT_RSQB:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr += MINBPC(enc);
if (level == 0) {
@@ -1373,7 +1369,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
{
ptr += MINBPC(enc);
end -= MINBPC(enc);
- for (; ptr < end; ptr += MINBPC(enc)) {
+ for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
@@ -1521,7 +1517,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
}
static int PTRFASTCALL
-PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
+PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
{
int result = 0;
/* skip &# */
@@ -1565,7 +1561,7 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
}
static int PTRCALL
-PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
+PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end)
{
switch ((end - ptr)/MINBPC(enc)) {
@@ -1683,11 +1679,11 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
}
static int PTRCALL
-PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
+PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
const char *end1, const char *ptr2)
{
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
- if (ptr1 == end1)
+ if (end1 - ptr1 < MINBPC(enc))
return 0;
if (!CHAR_MATCHES(enc, ptr1, *ptr2))
return 0;
@@ -1744,7 +1740,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
const char *end,
POSITION *pos)
{
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
@@ -1760,7 +1756,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
case BT_CR:
pos->lineNumber++;
ptr += MINBPC(enc);
- if (ptr < end && BYTE_TYPE(enc, ptr) == BT_LF)
+ if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
pos->columnNumber = (XML_Size)-1;
break;
diff --git a/tests/chardata.c b/tests/chardata.c
index 02243e04..012499bb 100644
--- a/tests/chardata.c
+++ b/tests/chardata.c
@@ -47,7 +47,7 @@ CharData_AppendString(CharData *storage, const char *s)
if ((len + storage->count) > maxchars) {
len = (maxchars - storage->count);
}
- if (len + storage->count < sizeof(storage->data)) {
+ if (len + storage->count < (int)sizeof(storage->data)) {
memcpy(storage->data + storage->count, s, len);
storage->count += len;
}
@@ -68,7 +68,7 @@ CharData_AppendXMLChars(CharData *storage, const XML_Char *s, int len)
if ((len + storage->count) > maxchars) {
len = (maxchars - storage->count);
}
- if (len + storage->count < sizeof(storage->data)) {
+ if (len + storage->count < (int)sizeof(storage->data)) {
memcpy(storage->data + storage->count, s,
len * sizeof(storage->data[0]));
storage->count += len;
diff --git a/tests/minicheck.c b/tests/minicheck.c
index d2f4295f..5a1f5ed0 100644
--- a/tests/minicheck.c
+++ b/tests/minicheck.c
@@ -10,10 +10,11 @@
#include <setjmp.h>
#include <assert.h>
+#include "internal.h" /* for UNUSED_P only */
#include "minicheck.h"
Suite *
-suite_create(char *name)
+suite_create(const char *name)
{
Suite *suite = (Suite *) calloc(1, sizeof(Suite));
if (suite != NULL) {
@@ -23,7 +24,7 @@ suite_create(char *name)
}
TCase *
-tcase_create(char *name)
+tcase_create(const char *name)
{
TCase *tc = (TCase *) calloc(1, sizeof(TCase));
if (tc != NULL) {
@@ -156,7 +157,7 @@ srunner_run_all(SRunner *runner, int verbosity)
}
void
-_fail_unless(int condition, const char *file, int line, char *msg)
+_fail_unless(int UNUSED_P(condition), const char *UNUSED_P(file), int UNUSED_P(line), const char *msg)
{
/* Always print the error message so it isn't lost. In this case,
we have a failure, so there's no reason to be quiet about what
diff --git a/tests/minicheck.h b/tests/minicheck.h
index c917c026..9b06f51a 100644
--- a/tests/minicheck.h
+++ b/tests/minicheck.h
@@ -26,6 +26,11 @@ extern "C" {
#define __func__ __FUNCTION__
#endif
+/* ISO C90 does not support '__func__' predefined identifier */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ < 199901)
+# define __func__ "(unknown)"
+#endif
+
#define START_TEST(testname) static void testname(void) { \
_check_set_test_info(__func__, __FILE__, __LINE__); \
{
@@ -48,12 +53,12 @@ struct SRunner {
};
struct Suite {
- char *name;
+ const char *name;
TCase *tests;
};
struct TCase {
- char *name;
+ const char *name;
tcase_setup_function setup;
tcase_teardown_function teardown;
tcase_test_function *tests;
@@ -72,9 +77,9 @@ void _check_set_test_info(char const *function,
* Prototypes for the actual implementation.
*/
-void _fail_unless(int condition, const char *file, int line, char *msg);
-Suite *suite_create(char *name);
-TCase *tcase_create(char *name);
+void _fail_unless(int condition, const char *file, int line, const char *msg);
+Suite *suite_create(const char *name);
+TCase *tcase_create(const char *name);
void suite_add_tcase(Suite *suite, TCase *tc);
void tcase_add_checked_fixture(TCase *,
tcase_setup_function,
diff --git a/tests/runtests.c b/tests/runtests.c
index 614d6b24..c0cdea99 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -13,9 +13,14 @@
#include <stdio.h>
#include <string.h>
#include <stdint.h>
+#include <stddef.h> /* ptrdiff_t */
+#ifndef __cplusplus
+# include <stdbool.h>
+#endif
#include "expat.h"
#include "chardata.h"
+#include "internal.h" /* for UNUSED_P only */
#include "minicheck.h"
#if defined(__amigaos__) && defined(__USE_INLINE__)
@@ -66,13 +71,34 @@ _xml_failure(XML_Parser parser, const char *file, int line)
_fail_unless(0, file, line, buffer);
}
+static enum XML_Status
+_XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal)
+{
+ enum XML_Status res = XML_STATUS_ERROR;
+ int offset = 0;
+
+ if (len == 0) {
+ return XML_Parse(parser, s, len, isFinal);
+ }
+
+ for (; offset < len; offset++) {
+ const int innerIsFinal = (offset == len - 1) && isFinal;
+ const char c = s[offset]; /* to help out-of-bounds detection */
+ res = XML_Parse(parser, &c, sizeof(char), innerIsFinal);
+ if (res != XML_STATUS_OK) {
+ return res;
+ }
+ }
+ return res;
+}
+
#define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__)
static void
-_expect_failure(char *text, enum XML_Error errorCode, char *errorMessage,
- char *file, int lineno)
+_expect_failure(const char *text, enum XML_Error errorCode, const char *errorMessage,
+ const char *file, int lineno)
{
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
/* Hackish use of _fail_unless() macro, but let's us report
the right filename and line number. */
_fail_unless(0, file, lineno, errorMessage);
@@ -89,63 +115,63 @@ _expect_failure(char *text, enum XML_Error errorCode, char *errorMessage,
*/
static void XMLCALL
-dummy_start_doctype_handler(void *userData,
- const XML_Char *doctypeName,
- const XML_Char *sysid,
- const XML_Char *pubid,
- int has_internal_subset)
+dummy_start_doctype_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(doctypeName),
+ const XML_Char *UNUSED_P(sysid),
+ const XML_Char *UNUSED_P(pubid),
+ int UNUSED_P(has_internal_subset))
{}
static void XMLCALL
-dummy_end_doctype_handler(void *userData)
+dummy_end_doctype_handler(void *UNUSED_P(userData))
{}
static void XMLCALL
-dummy_entity_decl_handler(void *userData,
- const XML_Char *entityName,
- int is_parameter_entity,
- const XML_Char *value,
- int value_length,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId,
- const XML_Char *notationName)
+dummy_entity_decl_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(entityName),
+ int UNUSED_P(is_parameter_entity),
+ const XML_Char *UNUSED_P(value),
+ int UNUSED_P(value_length),
+ const XML_Char *UNUSED_P(base),
+ const XML_Char *UNUSED_P(systemId),
+ const XML_Char *UNUSED_P(publicId),
+ const XML_Char *UNUSED_P(notationName))
{}
static void XMLCALL
-dummy_notation_decl_handler(void *userData,
- const XML_Char *notationName,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId)
+dummy_notation_decl_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(notationName),
+ const XML_Char *UNUSED_P(base),
+ const XML_Char *UNUSED_P(systemId),
+ const XML_Char *UNUSED_P(publicId))
{}
static void XMLCALL
-dummy_element_decl_handler(void *userData,
- const XML_Char *name,
- XML_Content *model)
+dummy_element_decl_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(name),
+ XML_Content *UNUSED_P(model))
{}
static void XMLCALL
-dummy_attlist_decl_handler(void *userData,
- const XML_Char *elname,
- const XML_Char *attname,
- const XML_Char *att_type,
- const XML_Char *dflt,
- int isrequired)
+dummy_attlist_decl_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(elname),
+ const XML_Char *UNUSED_P(attname),
+ const XML_Char *UNUSED_P(att_type),
+ const XML_Char *UNUSED_P(dflt),
+ int UNUSED_P(isrequired))
{}
static void XMLCALL
-dummy_comment_handler(void *userData, const XML_Char *data)
+dummy_comment_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(data))
{}
static void XMLCALL
-dummy_pi_handler(void *userData, const XML_Char *target, const XML_Char *data)
+dummy_pi_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), const XML_Char *UNUSED_P(data))
{}
static void XMLCALL
-dummy_start_element(void *userData,
- const XML_Char *name, const XML_Char **atts)
+dummy_start_element(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
{}
@@ -158,7 +184,7 @@ START_TEST(test_nul_byte)
char text[] = "<doc>\0</doc>";
/* test that a NUL byte (in US-ASCII data) is an error */
- if (XML_Parse(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
fail("Parser did not report error on NUL-byte.");
if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
xml_failure(parser);
@@ -178,9 +204,9 @@ END_TEST
START_TEST(test_bom_utf8)
{
/* This test is really just making sure we don't core on a UTF-8 BOM. */
- char *text = "\357\273\277<e/>";
+ const char *text = "\357\273\277<e/>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -189,7 +215,7 @@ START_TEST(test_bom_utf16_be)
{
char text[] = "\376\377\0<\0e\0/\0>";
- if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -198,7 +224,7 @@ START_TEST(test_bom_utf16_le)
{
char text[] = "\377\376<\0e\0/\0>\0";
- if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -210,7 +236,7 @@ accumulate_characters(void *userData, const XML_Char *s, int len)
}
static void XMLCALL
-accumulate_attribute(void *userData, const XML_Char *name,
+accumulate_attribute(void *userData, const XML_Char *UNUSED_P(name),
const XML_Char **atts)
{
CharData *storage = (CharData *)userData;
@@ -222,7 +248,7 @@ accumulate_attribute(void *userData, const XML_Char *name,
static void
-_run_character_check(XML_Char *text, XML_Char *expected,
+_run_character_check(const XML_Char *text, const XML_Char *expected,
const char *file, int line)
{
CharData storage;
@@ -230,7 +256,7 @@ _run_character_check(XML_Char *text, XML_Char *expected,
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
_xml_failure(parser, file, line);
CharData_CheckXMLChars(&storage, expected);
}
@@ -239,7 +265,7 @@ _run_character_check(XML_Char *text, XML_Char *expected,
_run_character_check(text, expected, __FILE__, __LINE__)
static void
-_run_attribute_check(XML_Char *text, XML_Char *expected,
+_run_attribute_check(const XML_Char *text, const XML_Char *expected,
const char *file, int line)
{
CharData storage;
@@ -247,7 +273,7 @@ _run_attribute_check(XML_Char *text, XML_Char *expected,
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetStartElementHandler(parser, accumulate_attribute);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
_xml_failure(parser, file, line);
CharData_CheckXMLChars(&storage, expected);
}
@@ -258,7 +284,7 @@ _run_attribute_check(XML_Char *text, XML_Char *expected,
/* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
run_character_check(text,
@@ -270,7 +296,7 @@ END_TEST
/* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
run_character_check(text,
@@ -280,7 +306,7 @@ END_TEST
START_TEST(test_french_charref_decimal)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
run_character_check(text,
@@ -290,7 +316,7 @@ END_TEST
START_TEST(test_french_latin1)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
run_character_check(text,
@@ -300,7 +326,7 @@ END_TEST
START_TEST(test_french_utf8)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='utf-8'?>\n"
"<doc>\xC3\xA9</doc>";
run_character_check(text, "\xC3\xA9");
@@ -314,7 +340,7 @@ END_TEST
*/
START_TEST(test_utf8_false_rejection)
{
- char *text = "<doc>\xEF\xBA\xBF</doc>";
+ const char *text = "<doc>\xEF\xBA\xBF</doc>";
run_character_check(text, "\xEF\xBA\xBF");
}
END_TEST
@@ -331,7 +357,7 @@ START_TEST(test_illegal_utf8)
for (i = 128; i <= 255; ++i) {
sprintf(text, "<e>%ccd</e>", i);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) {
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) {
sprintf(text,
"expected token error for '%c' (ordinal %d) in UTF-8 text",
i, i);
@@ -345,6 +371,68 @@ START_TEST(test_illegal_utf8)
}
END_TEST
+
+/* Examples, not masks: */
+#define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
+#define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
+#define UTF8_LEAD_3 "\xef" /* 0b11101111 */
+#define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
+#define UTF8_FOLLOW "\xbf" /* 0b10111111 */
+
+START_TEST(test_utf8_auto_align)
+{
+ struct TestCase {
+ ptrdiff_t expectedMovementInChars;
+ const char * input;
+ };
+
+ struct TestCase cases[] = {
+ {00, ""},
+
+ {00, UTF8_LEAD_1},
+
+ {-1, UTF8_LEAD_2},
+ {00, UTF8_LEAD_2 UTF8_FOLLOW},
+
+ {-1, UTF8_LEAD_3},
+ {-2, UTF8_LEAD_3 UTF8_FOLLOW},
+ {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
+
+ {-1, UTF8_LEAD_4},
+ {-2, UTF8_LEAD_4 UTF8_FOLLOW},
+ {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
+ {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
+ };
+
+ size_t i = 0;
+ bool success = true;
+ for (; i < sizeof(cases) / sizeof(*cases); i++) {
+ const char * fromLim = cases[i].input + strlen(cases[i].input);
+ const char * const fromLimInitially = fromLim;
+ ptrdiff_t actualMovementInChars;
+
+ align_limit_to_full_utf8_characters(cases[i].input, &fromLim);
+
+ actualMovementInChars = (fromLim - fromLimInitially);
+ if (actualMovementInChars != cases[i].expectedMovementInChars) {
+ size_t j = 0;
+ success = false;
+ printf("[-] UTF-8 case %2lu: Expected movement by %2ld chars"
+ ", actually moved by %2ld chars: \"",
+ i + 1, cases[i].expectedMovementInChars, actualMovementInChars);
+ for (; j < strlen(cases[i].input); j++) {
+ printf("\\x%02x", (unsigned char)cases[i].input[j]);
+ }
+ printf("\"\n");
+ }
+ }
+
+ if (! success) {
+ fail("UTF-8 auto-alignment is not bullet-proof\n");
+ }
+}
+END_TEST
+
START_TEST(test_utf16)
{
/* <?xml version="1.0" encoding="UTF-16"?>
@@ -358,7 +446,7 @@ START_TEST(test_utf16)
"\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'"
"\000>\000s\000o\000m\000e\000 \000t\000e\000x\000t\000<\000/"
"\000d\000o\000c\000>";
- if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -373,12 +461,12 @@ START_TEST(test_utf16_le_epilog_newline)
if (first_chunk_bytes >= sizeof(text) - 1)
fail("bad value of first_chunk_bytes");
- if ( XML_Parse(parser, text, first_chunk_bytes, XML_FALSE)
+ if ( _XML_Parse_SINGLE_BYTES(parser, text, first_chunk_bytes, XML_FALSE)
== XML_STATUS_ERROR)
xml_failure(parser);
else {
enum XML_Status rc;
- rc = XML_Parse(parser, text + first_chunk_bytes,
+ rc = _XML_Parse_SINGLE_BYTES(parser, text + first_chunk_bytes,
sizeof(text) - first_chunk_bytes - 1, XML_TRUE);
if (rc == XML_STATUS_ERROR)
xml_failure(parser);
@@ -389,11 +477,11 @@ END_TEST
/* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
" >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
- char *utf8 =
+ const char *utf8 =
"\xC3\xA4 \xC3\xB6 \xC3\xBC "
"\xC3\xA4 \xC3\xB6 \xC3\xBC "
"\xC3\xA4 \xC3\xB6 \xC3\xBC >";
@@ -406,13 +494,13 @@ END_TEST
/* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)
{
- char *text =
+ const char *text =
"<tag>\n"
"\n"
"\n</tag>";
XML_Size lineno;
- if (XML_Parse(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
lineno = XML_GetCurrentLineNumber(parser);
if (lineno != 4) {
@@ -427,10 +515,10 @@ END_TEST
/* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)
{
- char *text = "<tag></tag>";
+ const char *text = "<tag></tag>";
XML_Size colno;
- if (XML_Parse(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
colno = XML_GetCurrentColumnNumber(parser);
if (colno != 11) {
@@ -444,7 +532,7 @@ END_TEST
static void XMLCALL
start_element_event_handler2(void *userData, const XML_Char *name,
- const XML_Char **attr)
+ const XML_Char **UNUSED_P(attr))
{
CharData *storage = (CharData *) userData;
char buffer[100];
@@ -474,7 +562,7 @@ end_element_event_handler2(void *userData, const XML_Char *name)
/* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)
{
- char *text =
+ const char *text =
"<a>\n" /* Unix end-of-line */
" <b>\r\n" /* Windows end-of-line */
" <c/>\r" /* Mac OS end-of-line */
@@ -483,7 +571,7 @@ START_TEST(test_line_and_column_numbers_inside_handlers)
" <f/>\n"
" </d>\n"
"</a>";
- char *expected =
+ const char *expected =
"<a> at col:0 line:1\n"
"<b> at col:2 line:2\n"
"<c> at col:4 line:3\n"
@@ -500,7 +588,7 @@ START_TEST(test_line_and_column_numbers_inside_handlers)
XML_SetUserData(parser, &storage);
XML_SetStartElementHandler(parser, start_element_event_handler2);
XML_SetEndElementHandler(parser, end_element_event_handler2);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, expected);
@@ -510,12 +598,12 @@ END_TEST
/* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)
{
- char *text =
+ const char *text =
"<a>\n"
" <b>\n"
" </a>"; /* missing </b> */
XML_Size lineno;
- if (XML_Parse(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
fail("Expected a parse error");
lineno = XML_GetCurrentLineNumber(parser);
@@ -530,12 +618,12 @@ END_TEST
/* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)
{
- char *text =
+ const char *text =
"<a>\n"
" <b>\n"
" </a>"; /* missing </b> */
XML_Size colno;
- if (XML_Parse(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
fail("Expected a parse error");
colno = XML_GetCurrentColumnNumber(parser);
@@ -556,7 +644,7 @@ START_TEST(test_really_long_lines)
really cheesy approach to building the input buffer, because
this avoids writing bugs in buffer-filling code.
*/
- char *text =
+ const char *text =
"<e>"
/* 64 chars */
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
@@ -578,7 +666,7 @@ START_TEST(test_really_long_lines)
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"</e>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -598,14 +686,14 @@ end_element_event_handler(void *userData, const XML_Char *name)
START_TEST(test_end_element_events)
{
- char *text = "<a><b><c/></b><d><f/></d></a>";
- char *expected = "/c/b/f/d/a";
+ const char *text = "<a><b><c/></b><d><f/></d></a>";
+ const char *expected = "/c/b/f/d/a";
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetEndElementHandler(parser, end_element_event_handler);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, expected);
}
@@ -679,8 +767,8 @@ testhelper_is_whitespace_normalized(void)
}
static void XMLCALL
-check_attr_contains_normalized_whitespace(void *userData,
- const XML_Char *name,
+check_attr_contains_normalized_whitespace(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(name),
const XML_Char **atts)
{
int i;
@@ -702,7 +790,7 @@ check_attr_contains_normalized_whitespace(void *userData,
START_TEST(test_attr_whitespace_normalization)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
" <!ATTLIST doc\n"
" attr NMTOKENS #REQUIRED\n"
@@ -718,7 +806,7 @@ START_TEST(test_attr_whitespace_normalization)
XML_SetStartElementHandler(parser,
check_attr_contains_normalized_whitespace);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -740,7 +828,7 @@ END_TEST
/* Regression test for SF bug #584832. */
static int XMLCALL
-UnknownEncodingHandler(void *data,const XML_Char *encoding,XML_Encoding *info)
+UnknownEncodingHandler(void *UNUSED_P(data),const XML_Char *encoding,XML_Encoding *info)
{
if (strcmp(encoding,"unsupported-encoding") == 0) {
int i;
@@ -756,13 +844,13 @@ UnknownEncodingHandler(void *data,const XML_Char *encoding,XML_Encoding *info)
START_TEST(test_unknown_encoding_internal_entity)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='unsupported-encoding'?>\n"
"<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
"<test a='&foo;'/>";
XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, NULL);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -771,14 +859,14 @@ END_TEST
static int XMLCALL
external_entity_loader_set_encoding(XML_Parser parser,
const XML_Char *context,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId)
+ const XML_Char *UNUSED_P(base),
+ const XML_Char *UNUSED_P(systemId),
+ const XML_Char *UNUSED_P(publicId))
{
/* This text says it's an unsupported encoding, but it's really
UTF-8, which we tell Expat using XML_SetEncoding().
*/
- char *text =
+ const char *text =
"<?xml encoding='iso-8859-3'?>"
"\xC3\xA9";
XML_Parser extparser;
@@ -788,7 +876,7 @@ external_entity_loader_set_encoding(XML_Parser parser,
fail("Could not create external entity parser.");
if (!XML_SetEncoding(extparser, "utf-8"))
fail("XML_SetEncoding() ignored for external entity");
- if ( XML_Parse(extparser, text, strlen(text), XML_TRUE)
+ if ( _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE)
== XML_STATUS_ERROR) {
xml_failure(parser);
return 0;
@@ -798,7 +886,7 @@ external_entity_loader_set_encoding(XML_Parser parser,
START_TEST(test_ext_entity_set_encoding)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://xml.libexpat.org/dummy.ent'>\n"
"]>\n"
@@ -814,11 +902,11 @@ END_TEST
read an external subset. This was fixed in Expat 1.95.5.
*/
START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
- char *text =
+ const char *text =
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -837,7 +925,7 @@ END_TEST
read an external subset, but have been declared standalone.
*/
START_TEST(test_wfc_undeclared_entity_standalone) {
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
@@ -851,9 +939,9 @@ END_TEST
static int XMLCALL
external_entity_loader(XML_Parser parser,
const XML_Char *context,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId)
+ const XML_Char *UNUSED_P(base),
+ const XML_Char *UNUSED_P(systemId),
+ const XML_Char *UNUSED_P(publicId))
{
char *text = (char *)XML_GetUserData(parser);
XML_Parser extparser;
@@ -861,7 +949,7 @@ external_entity_loader(XML_Parser parser,
extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
if (extparser == NULL)
fail("Could not create external entity parser.");
- if ( XML_Parse(extparser, text, strlen(text), XML_TRUE)
+ if ( _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE)
== XML_STATUS_ERROR) {
xml_failure(parser);
return XML_STATUS_ERROR;
@@ -873,11 +961,11 @@ external_entity_loader(XML_Parser parser,
an external subset, and standalone is true.
*/
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
- char *foo_text =
+ char foo_text[] =
"<!ELEMENT doc (#PCDATA)*>";
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
@@ -893,24 +981,24 @@ END_TEST
an external subset, and standalone is false.
*/
START_TEST(test_wfc_undeclared_entity_with_external_subset) {
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
- char *foo_text =
+ char foo_text[] =
"<!ELEMENT doc (#PCDATA)*>";
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetUserData(parser, foo_text);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
START_TEST(test_wfc_no_recursive_entity_refs)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY entity '&#38;entity;'>\n"
"]>\n"
@@ -925,7 +1013,7 @@ END_TEST
/* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
"<!ENTITY e SYSTEM 'http://xml.libexpat.org/e'>\n"
"<!NOTATION n SYSTEM 'http://xml.libexpat.org/n'>\n"
@@ -957,12 +1045,12 @@ END_TEST
*/
START_TEST(test_empty_ns_without_namespaces)
{
- char *text =
+ const char *text =
"<doc xmlns:prefix='http://www.example.com/'>\n"
" <e xmlns:prefix=''/>\n"
"</doc>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -973,19 +1061,19 @@ END_TEST
*/
START_TEST(test_ns_in_attribute_default_without_namespaces)
{
- char *text =
+ const char *text =
"<!DOCTYPE e:element [\n"
" <!ATTLIST e:element\n"
" xmlns:e CDATA 'http://example.com/'>\n"
" ]>\n"
"<e:element/>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
-static char *long_character_data_text =
+static const char *long_character_data_text =
"<?xml version='1.0' encoding='iso-8859-1'?><s>"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
@@ -1012,8 +1100,8 @@ static char *long_character_data_text =
static XML_Bool resumable = XML_FALSE;
static void
-clearing_aborting_character_handler(void *userData,
- const XML_Char *s, int len)
+clearing_aborting_character_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(s), int UNUSED_P(len))
{
XML_StopParser(parser, resumable);
XML_SetCharacterDataHandler(parser, NULL);
@@ -1029,11 +1117,11 @@ START_TEST(test_stop_parser_between_char_data_calls)
handler must stop the parser and clear the character data
handler.
*/
- char *text = long_character_data_text;
+ const char *text = long_character_data_text;
XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
resumable = XML_FALSE;
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
xml_failure(parser);
if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED)
xml_failure(parser);
@@ -1050,17 +1138,112 @@ START_TEST(test_suspend_parser_between_char_data_calls)
handler must stop the parser and clear the character data
handler.
*/
- char *text = long_character_data_text;
+ const char *text = long_character_data_text;
XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
resumable = XML_TRUE;
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
xml_failure(parser);
if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
xml_failure(parser);
}
END_TEST
+START_TEST(test_good_cdata_ascii)
+{
+ const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
+ const char *expected = "<greeting>Hello, world!</greeting>";
+
+ CharData storage;
+ CharData_Init(&storage);
+ XML_SetUserData(parser, &storage);
+ XML_SetCharacterDataHandler(parser, accumulate_characters);
+
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ xml_failure(parser);
+ CharData_CheckXMLChars(&storage, expected);
+}
+END_TEST
+
+START_TEST(test_good_cdata_utf16)
+{
+ /* Test data is:
+ * <?xml version='1.0' encoding='utf-16'?>
+ * <a><![CDATA[hello]]></a>
+ */
+ const char text[] =
+ "\0<\0?\0x\0m\0l\0"
+ " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
+ " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
+ "\0?\0>\0\n"
+ "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
+ const char *expected = "hello";
+
+ CharData storage;
+ CharData_Init(&storage);
+ XML_SetUserData(parser, &storage);
+ XML_SetCharacterDataHandler(parser, accumulate_characters);
+
+ if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
+ xml_failure(parser);
+ CharData_CheckXMLChars(&storage, expected);
+}
+END_TEST
+
+START_TEST(test_bad_cdata)
+{
+ struct CaseData {
+ const char *text;
+ enum XML_Error expectedError;
+ };
+
+ struct CaseData cases[] = {
+ {"<a><", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
+
+ {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
+ {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
+ {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
+
+ {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
+ {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
+ {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
+ {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
+ {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
+ {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
+ {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
+
+ {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
+ {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
+ {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}
+ };
+
+ size_t i = 0;
+ for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
+ const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
+ parser, cases[i].text, strlen(cases[i].text), XML_TRUE);
+ const enum XML_Error actualError = XML_GetErrorCode(parser);
+
+ assert(actualStatus == XML_STATUS_ERROR);
+
+ if (actualError != cases[i].expectedError) {
+ char message[100];
+ sprintf(message, "Expected error %d but got error %d for case %u: \"%s\"\n",
+ cases[i].expectedError, actualError, (unsigned int)i + 1, cases[i].text);
+ fail(message);
+ }
+
+ XML_ParserReset(parser, NULL);
+ }
+}
+END_TEST
+
/*
* Namespaces tests.
@@ -1118,17 +1301,17 @@ triplet_end_checker(void *userData, const XML_Char *name)
START_TEST(test_return_ns_triplet)
{
- char *text =
+ const char *text =
"<foo:e xmlns:foo='http://expat.sf.net/' bar:a='12'\n"
" xmlns:bar='http://expat.sf.net/'></foo:e>";
- char *elemstr[] = {
+ const char *elemstr[] = {
"http://expat.sf.net/ e foo",
"http://expat.sf.net/ a bar"
};
XML_SetReturnNSTriplet(parser, XML_TRUE);
XML_SetUserData(parser, elemstr);
XML_SetElementHandler(parser, triplet_start_checker, triplet_end_checker);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -1158,14 +1341,14 @@ overwrite_end_checker(void *userData, const XML_Char *name)
}
static void
-run_ns_tagname_overwrite_test(char *text, char *result)
+run_ns_tagname_overwrite_test(const char *text, const char *result)
{
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetElementHandler(parser,
overwrite_start_checker, overwrite_end_checker);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, result);
}
@@ -1173,12 +1356,12 @@ run_ns_tagname_overwrite_test(char *text, char *result)
/* Regression test for SF bug #566334. */
START_TEST(test_ns_tagname_overwrite)
{
- char *text =
+ const char *text =
"<n:e xmlns:n='http://xml.libexpat.org/'>\n"
" <n:f n:attr='foo'/>\n"
" <n:g n:attr2='bar'/>\n"
"</n:e>";
- char *result =
+ const char *result =
"start http://xml.libexpat.org/ e\n"
"start http://xml.libexpat.org/ f\n"
"attribute http://xml.libexpat.org/ attr\n"
@@ -1194,12 +1377,12 @@ END_TEST
/* Regression test for SF bug #566334. */
START_TEST(test_ns_tagname_overwrite_triplet)
{
- char *text =
+ const char *text =
"<n:e xmlns:n='http://xml.libexpat.org/'>\n"
" <n:f n:attr='foo'/>\n"
" <n:g n:attr2='bar'/>\n"
"</n:e>";
- char *result =
+ const char *result =
"start http://xml.libexpat.org/ e n\n"
"start http://xml.libexpat.org/ f n\n"
"attribute http://xml.libexpat.org/ attr n\n"
@@ -1216,8 +1399,8 @@ END_TEST
/* Regression test for SF bug #620343. */
static void XMLCALL
-start_element_fail(void *userData,
- const XML_Char *name, const XML_Char **atts)
+start_element_fail(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
{
/* We should never get here. */
fail("should never reach start_element_fail()");
@@ -1225,8 +1408,8 @@ start_element_fail(void *userData,
static void XMLCALL
start_ns_clearing_start_element(void *userData,
- const XML_Char *prefix,
- const XML_Char *uri)
+ const XML_Char *UNUSED_P(prefix),
+ const XML_Char *UNUSED_P(uri))
{
XML_SetStartElementHandler((XML_Parser) userData, NULL);
}
@@ -1237,12 +1420,12 @@ START_TEST(test_start_ns_clears_start_element)
syntax doesn't cause the problematic path through Expat to be
taken.
*/
- char *text = "<e xmlns='http://xml.libexpat.org/'></e>";
+ const char *text = "<e xmlns='http://xml.libexpat.org/'></e>";
XML_SetStartElementHandler(parser, start_element_fail);
XML_SetStartNamespaceDeclHandler(parser, start_ns_clearing_start_element);
XML_UseParserAsHandlerArg(parser);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -1251,12 +1434,12 @@ END_TEST
static int XMLCALL
external_entity_handler(XML_Parser parser,
const XML_Char *context,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId)
+ const XML_Char *UNUSED_P(base),
+ const XML_Char *UNUSED_P(systemId),
+ const XML_Char *UNUSED_P(publicId))
{
intptr_t callno = 1 + (intptr_t)XML_GetUserData(parser);
- char *text;
+ const char *text;
XML_Parser p2;
if (callno == 1)
@@ -1269,7 +1452,7 @@ external_entity_handler(XML_Parser parser,
XML_SetUserData(parser, (void *) callno);
p2 = XML_ExternalEntityParserCreate(parser, context, NULL);
- if (XML_Parse(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) {
+ if (_XML_Parse_SINGLE_BYTES(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) {
xml_failure(p2);
return 0;
}
@@ -1279,7 +1462,7 @@ external_entity_handler(XML_Parser parser,
START_TEST(test_default_ns_from_ext_subset_and_ext_ge)
{
- char *text =
+ const char *text =
"<?xml version='1.0'?>\n"
"<!DOCTYPE doc SYSTEM 'http://xml.libexpat.org/doc.dtd' [\n"
" <!ENTITY en SYSTEM 'http://xml.libexpat.org/entity.ent'>\n"
@@ -1293,7 +1476,7 @@ START_TEST(test_default_ns_from_ext_subset_and_ext_ge)
/* We actually need to set this handler to tickle this bug. */
XML_SetStartElementHandler(parser, dummy_start_element);
XML_SetUserData(parser, NULL);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -1301,7 +1484,7 @@ END_TEST
/* Regression test #1 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_1)
{
- char *text =
+ const char *text =
"<doc xmlns:prefix='http://xml.libexpat.org/'>\n"
" <e xmlns:prefix=''/>\n"
"</doc>";
@@ -1316,7 +1499,7 @@ END_TEST
/* Regression test #2 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_2)
{
- char *text =
+ const char *text =
"<?xml version='1.0'?>\n"
"<docelem xmlns:pre=''/>";
@@ -1329,7 +1512,7 @@ END_TEST
/* Regression test #3 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_3)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
" <!ELEMENT doc EMPTY>\n"
" <!ATTLIST doc\n"
@@ -1346,7 +1529,7 @@ END_TEST
/* Regression test #4 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_4)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
" <!ELEMENT prefix:doc EMPTY>\n"
" <!ATTLIST prefix:doc\n"
@@ -1356,24 +1539,24 @@ START_TEST(test_ns_prefix_with_empty_uri_4)
/* Packaged info expected by the end element handler;
the weird structuring lets us re-use the triplet_end_checker()
function also used for another test. */
- char *elemstr[] = {
+ const char *elemstr[] = {
"http://xml.libexpat.org/ doc prefix"
};
XML_SetReturnNSTriplet(parser, XML_TRUE);
XML_SetUserData(parser, elemstr);
XML_SetEndElementHandler(parser, triplet_end_checker);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
START_TEST(test_ns_default_with_empty_uri)
{
- char *text =
+ const char *text =
"<doc xmlns='http://xml.libexpat.org/'>\n"
" <e xmlns=''/>\n"
"</doc>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -1381,7 +1564,7 @@ END_TEST
/* Regression test for SF bug #692964: two prefixes for one namespace. */
START_TEST(test_ns_duplicate_attrs_diff_prefixes)
{
- char *text =
+ const char *text =
"<doc xmlns:a='http://xml.libexpat.org/a'\n"
" xmlns:b='http://xml.libexpat.org/a'\n"
" a:a='v' b:a='v' />";
@@ -1394,7 +1577,7 @@ END_TEST
/* Regression test for SF bug #695401: unbound prefix. */
START_TEST(test_ns_unbound_prefix_on_attribute)
{
- char *text = "<doc a:attr=''/>";
+ const char *text = "<doc a:attr=''/>";
expect_failure(text,
XML_ERROR_UNBOUND_PREFIX,
"did not report unbound prefix on attribute");
@@ -1404,7 +1587,7 @@ END_TEST
/* Regression test for SF bug #695401: unbound prefix. */
START_TEST(test_ns_unbound_prefix_on_element)
{
- char *text = "<a:doc/>";
+ const char *text = "<a:doc/>";
expect_failure(text,
XML_ERROR_UNBOUND_PREFIX,
"did not report unbound prefix on element");
@@ -1426,6 +1609,7 @@ make_suite(void)
tcase_add_test(tc_basic, test_bom_utf16_be);
tcase_add_test(tc_basic, test_bom_utf16_le);
tcase_add_test(tc_basic, test_illegal_utf8);
+ tcase_add_test(tc_basic, test_utf8_auto_align);
tcase_add_test(tc_basic, test_utf16);
tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
tcase_add_test(tc_basic, test_latin1_umlauts);
@@ -1461,6 +1645,9 @@ make_suite(void)
tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
+ tcase_add_test(tc_basic, test_good_cdata_ascii);
+ tcase_add_test(tc_basic, test_good_cdata_utf16);
+ tcase_add_test(tc_basic, test_bad_cdata);
suite_add_tcase(s, tc_namespace);
tcase_add_checked_fixture(tc_namespace,
diff --git a/tests/xmltest.sh b/tests/xmltest.sh
index 793a5cc2..64a17eec 100755
--- a/tests/xmltest.sh
+++ b/tests/xmltest.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#! /usr/bin/env bash
# EXPAT TEST SCRIPT FOR W3C XML TEST SUITE
@@ -20,12 +20,14 @@
# produced by xmlwf conforms to an older definition of canonical XML
# and does not generate notation declarations.
+shopt -s nullglob
+
MYDIR="`dirname \"$0\"`"
cd "$MYDIR"
MYDIR="`pwd`"
XMLWF="`dirname \"$MYDIR\"`/xmlwf/xmlwf"
# XMLWF=/usr/local/bin/xmlwf
-TS="$MYDIR/XML-Test-Suite"
+TS="$MYDIR"
# OUTPUT must terminate with the directory separator.
OUTPUT="$TS/out/"
# OUTPUT=/home/tmp/xml-testsuite-out/
@@ -100,7 +102,7 @@ for xmldir in ibm/valid/P* \
RunXmlwfWF "$xmlfile" "$xmldir/"
UpdateStatus $?
done
- rm outfile
+ rm -f outfile
done
cd "$TS/xmlconf/oasis"