aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThe Android Open Source Project <initial-contribution@android.com>2009-03-03 19:30:46 -0800
committerThe Android Open Source Project <initial-contribution@android.com>2009-03-03 19:30:46 -0800
commit41cdf40d933f4029c37cf844f8cf3314114e4e0a (patch)
treecd0b0b7bd9d1346b4cb18678cec0d15d8ea47df7 /src
parentb0e041b91e19d554585bc4423075929013f05f22 (diff)
downloadtagsoup-cupcake.tar.gz
Diffstat (limited to 'src')
-rw-r--r--src/org/ccil/cowan/tagsoup/AttributesImpl.java626
-rw-r--r--src/org/ccil/cowan/tagsoup/AutoDetector.java43
-rw-r--r--src/org/ccil/cowan/tagsoup/CommandLine.java289
-rw-r--r--src/org/ccil/cowan/tagsoup/Element.java203
-rw-r--r--src/org/ccil/cowan/tagsoup/ElementType.java276
-rw-r--r--src/org/ccil/cowan/tagsoup/GOOGLE_README.txt8
-rw-r--r--src/org/ccil/cowan/tagsoup/HTMLModels.java53
-rw-r--r--src/org/ccil/cowan/tagsoup/HTMLScanner.java648
-rw-r--r--src/org/ccil/cowan/tagsoup/HTMLSchema.java2895
-rw-r--r--src/org/ccil/cowan/tagsoup/LICENSE201
-rw-r--r--src/org/ccil/cowan/tagsoup/MODULE_LICENSE_APACHE20
-rw-r--r--src/org/ccil/cowan/tagsoup/PYXScanner.java124
-rw-r--r--src/org/ccil/cowan/tagsoup/PYXWriter.java217
-rw-r--r--src/org/ccil/cowan/tagsoup/Parser.java1114
-rw-r--r--src/org/ccil/cowan/tagsoup/ScanHandler.java119
-rw-r--r--src/org/ccil/cowan/tagsoup/Scanner.java50
-rw-r--r--src/org/ccil/cowan/tagsoup/Schema.java170
-rw-r--r--src/org/ccil/cowan/tagsoup/XMLWriter.java1435
-rw-r--r--src/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java54
-rw-r--r--src/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java232
-rw-r--r--src/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java114
-rw-r--r--src/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java113
22 files changed, 8984 insertions, 0 deletions
diff --git a/src/org/ccil/cowan/tagsoup/AttributesImpl.java b/src/org/ccil/cowan/tagsoup/AttributesImpl.java
new file mode 100644
index 0000000..86f76fc
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/AttributesImpl.java
@@ -0,0 +1,626 @@
+// XMLWriter.java - serialize an XML document.
+// Written by David Megginson, david@megginson.com
+// and placed by him into the public domain.
+// Extensively modified by John Cowan for TagSoup.
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup;
+import org.xml.sax.Attributes;
+
+
+/**
+ * Default implementation of the Attributes interface.
+ *
+ * <blockquote>
+ * <em>This module, both source code and documentation, is in the
+ * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
+ * See <a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+ * for further information.
+ * </blockquote>
+ *
+ * <p>This class provides a default implementation of the SAX2
+ * {@link org.xml.sax.Attributes Attributes} interface, with the
+ * addition of manipulators so that the list can be modified or
+ * reused.</p>
+ *
+ * <p>There are two typical uses of this class:</p>
+ *
+ * <ol>
+ * <li>to take a persistent snapshot of an Attributes object
+ * in a {@link org.xml.sax.ContentHandler#startElement startElement} event; or</li>
+ * <li>to construct or modify an Attributes object in a SAX2 driver or filter.</li>
+ * </ol>
+ *
+ * <p>This class replaces the now-deprecated SAX1 {@link
+ * org.xml.sax.helpers.AttributeListImpl AttributeListImpl}
+ * class; in addition to supporting the updated Attributes
+ * interface rather than the deprecated {@link org.xml.sax.AttributeList
+ * AttributeList} interface, it also includes a much more efficient
+ * implementation using a single array rather than a set of Vectors.</p>
+ *
+ * @since SAX 2.0
+ * @author David Megginson
+ * @version 2.0.1 (sax2r2)
+ */
+public class AttributesImpl implements Attributes
+{
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Constructors.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Construct a new, empty AttributesImpl object.
+ */
+ public AttributesImpl ()
+ {
+ length = 0;
+ data = null;
+ }
+
+
+ /**
+ * Copy an existing Attributes object.
+ *
+ * <p>This constructor is especially useful inside a
+ * {@link org.xml.sax.ContentHandler#startElement startElement} event.</p>
+ *
+ * @param atts The existing Attributes object.
+ */
+ public AttributesImpl (Attributes atts)
+ {
+ setAttributes(atts);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Implementation of org.xml.sax.Attributes.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Return the number of attributes in the list.
+ *
+ * @return The number of attributes in the list.
+ * @see org.xml.sax.Attributes#getLength
+ */
+ public int getLength ()
+ {
+ return length;
+ }
+
+
+ /**
+ * Return an attribute's Namespace URI.
+ *
+ * @param index The attribute's index (zero-based).
+ * @return The Namespace URI, the empty string if none is
+ * available, or null if the index is out of range.
+ * @see org.xml.sax.Attributes#getURI
+ */
+ public String getURI (int index)
+ {
+ if (index >= 0 && index < length) {
+ return data[index*5];
+ } else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Return an attribute's local name.
+ *
+ * @param index The attribute's index (zero-based).
+ * @return The attribute's local name, the empty string if
+ * none is available, or null if the index if out of range.
+ * @see org.xml.sax.Attributes#getLocalName
+ */
+ public String getLocalName (int index)
+ {
+ if (index >= 0 && index < length) {
+ return data[index*5+1];
+ } else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Return an attribute's qualified (prefixed) name.
+ *
+ * @param index The attribute's index (zero-based).
+ * @return The attribute's qualified name, the empty string if
+ * none is available, or null if the index is out of bounds.
+ * @see org.xml.sax.Attributes#getQName
+ */
+ public String getQName (int index)
+ {
+ if (index >= 0 && index < length) {
+ return data[index*5+2];
+ } else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Return an attribute's type by index.
+ *
+ * @param index The attribute's index (zero-based).
+ * @return The attribute's type, "CDATA" if the type is unknown, or null
+ * if the index is out of bounds.
+ * @see org.xml.sax.Attributes#getType(int)
+ */
+ public String getType (int index)
+ {
+ if (index >= 0 && index < length) {
+ return data[index*5+3];
+ } else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Return an attribute's value by index.
+ *
+ * @param index The attribute's index (zero-based).
+ * @return The attribute's value or null if the index is out of bounds.
+ * @see org.xml.sax.Attributes#getValue(int)
+ */
+ public String getValue (int index)
+ {
+ if (index >= 0 && index < length) {
+ return data[index*5+4];
+ } else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Look up an attribute's index by Namespace name.
+ *
+ * <p>In many cases, it will be more efficient to look up the name once and
+ * use the index query methods rather than using the name query methods
+ * repeatedly.</p>
+ *
+ * @param uri The attribute's Namespace URI, or the empty
+ * string if none is available.
+ * @param localName The attribute's local name.
+ * @return The attribute's index, or -1 if none matches.
+ * @see org.xml.sax.Attributes#getIndex(java.lang.String,java.lang.String)
+ */
+ public int getIndex (String uri, String localName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i].equals(uri) && data[i+1].equals(localName)) {
+ return i / 5;
+ }
+ }
+ return -1;
+ }
+
+
+ /**
+ * Look up an attribute's index by qualified (prefixed) name.
+ *
+ * @param qName The qualified name.
+ * @return The attribute's index, or -1 if none matches.
+ * @see org.xml.sax.Attributes#getIndex(java.lang.String)
+ */
+ public int getIndex (String qName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i+2].equals(qName)) {
+ return i / 5;
+ }
+ }
+ return -1;
+ }
+
+
+ /**
+ * Look up an attribute's type by Namespace-qualified name.
+ *
+ * @param uri The Namespace URI, or the empty string for a name
+ * with no explicit Namespace URI.
+ * @param localName The local name.
+ * @return The attribute's type, or null if there is no
+ * matching attribute.
+ * @see org.xml.sax.Attributes#getType(java.lang.String,java.lang.String)
+ */
+ public String getType (String uri, String localName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i].equals(uri) && data[i+1].equals(localName)) {
+ return data[i+3];
+ }
+ }
+ return null;
+ }
+
+
+ /**
+ * Look up an attribute's type by qualified (prefixed) name.
+ *
+ * @param qName The qualified name.
+ * @return The attribute's type, or null if there is no
+ * matching attribute.
+ * @see org.xml.sax.Attributes#getType(java.lang.String)
+ */
+ public String getType (String qName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i+2].equals(qName)) {
+ return data[i+3];
+ }
+ }
+ return null;
+ }
+
+
+ /**
+ * Look up an attribute's value by Namespace-qualified name.
+ *
+ * @param uri The Namespace URI, or the empty string for a name
+ * with no explicit Namespace URI.
+ * @param localName The local name.
+ * @return The attribute's value, or null if there is no
+ * matching attribute.
+ * @see org.xml.sax.Attributes#getValue(java.lang.String,java.lang.String)
+ */
+ public String getValue (String uri, String localName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i].equals(uri) && data[i+1].equals(localName)) {
+ return data[i+4];
+ }
+ }
+ return null;
+ }
+
+
+ /**
+ * Look up an attribute's value by qualified (prefixed) name.
+ *
+ * @param qName The qualified name.
+ * @return The attribute's value, or null if there is no
+ * matching attribute.
+ * @see org.xml.sax.Attributes#getValue(java.lang.String)
+ */
+ public String getValue (String qName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i+2].equals(qName)) {
+ return data[i+4];
+ }
+ }
+ return null;
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Manipulators.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Clear the attribute list for reuse.
+ *
+ * <p>Note that little memory is freed by this call:
+ * the current array is kept so it can be
+ * reused.</p>
+ */
+ public void clear ()
+ {
+ if (data != null) {
+ for (int i = 0; i < (length * 5); i++)
+ data [i] = null;
+ }
+ length = 0;
+ }
+
+
+ /**
+ * Copy an entire Attributes object.
+ *
+ * <p>It may be more efficient to reuse an existing object
+ * rather than constantly allocating new ones.</p>
+ *
+ * @param atts The attributes to copy.
+ */
+ public void setAttributes (Attributes atts)
+ {
+ clear();
+ length = atts.getLength();
+ if (length > 0) {
+ data = new String[length*5];
+ for (int i = 0; i < length; i++) {
+ data[i*5] = atts.getURI(i);
+ data[i*5+1] = atts.getLocalName(i);
+ data[i*5+2] = atts.getQName(i);
+ data[i*5+3] = atts.getType(i);
+ data[i*5+4] = atts.getValue(i);
+ }
+ }
+ }
+
+
+ /**
+ * Add an attribute to the end of the list.
+ *
+ * <p>For the sake of speed, this method does no checking
+ * to see if the attribute is already in the list: that is
+ * the responsibility of the application.</p>
+ *
+ * @param uri The Namespace URI, or the empty string if
+ * none is available or Namespace processing is not
+ * being performed.
+ * @param localName The local name, or the empty string if
+ * Namespace processing is not being performed.
+ * @param qName The qualified (prefixed) name, or the empty string
+ * if qualified names are not available.
+ * @param type The attribute type as a string.
+ * @param value The attribute value.
+ */
+ public void addAttribute (String uri, String localName, String qName,
+ String type, String value)
+ {
+ ensureCapacity(length+1);
+ data[length*5] = uri;
+ data[length*5+1] = localName;
+ data[length*5+2] = qName;
+ data[length*5+3] = type;
+ data[length*5+4] = value;
+ length++;
+ }
+
+
+ /**
+ * Set an attribute in the list.
+ *
+ * <p>For the sake of speed, this method does no checking
+ * for name conflicts or well-formedness: such checks are the
+ * responsibility of the application.</p>
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param uri The Namespace URI, or the empty string if
+ * none is available or Namespace processing is not
+ * being performed.
+ * @param localName The local name, or the empty string if
+ * Namespace processing is not being performed.
+ * @param qName The qualified name, or the empty string
+ * if qualified names are not available.
+ * @param type The attribute type as a string.
+ * @param value The attribute value.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setAttribute (int index, String uri, String localName,
+ String qName, String type, String value)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5] = uri;
+ data[index*5+1] = localName;
+ data[index*5+2] = qName;
+ data[index*5+3] = type;
+ data[index*5+4] = value;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Remove an attribute from the list.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void removeAttribute (int index)
+ {
+ if (index >= 0 && index < length) {
+ if (index < length - 1) {
+ System.arraycopy(data, (index+1)*5, data, index*5,
+ (length-index-1)*5);
+ }
+ index = (length - 1) * 5;
+ data [index++] = null;
+ data [index++] = null;
+ data [index++] = null;
+ data [index++] = null;
+ data [index] = null;
+ length--;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Set the Namespace URI of a specific attribute.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param uri The attribute's Namespace URI, or the empty
+ * string for none.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setURI (int index, String uri)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5] = uri;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Set the local name of a specific attribute.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param localName The attribute's local name, or the empty
+ * string for none.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setLocalName (int index, String localName)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5+1] = localName;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Set the qualified name of a specific attribute.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param qName The attribute's qualified name, or the empty
+ * string for none.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setQName (int index, String qName)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5+2] = qName;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Set the type of a specific attribute.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param type The attribute's type.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setType (int index, String type)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5+3] = type;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Set the value of a specific attribute.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param value The attribute's value.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setValue (int index, String value)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5+4] = value;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Internal methods.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Ensure the internal array's capacity.
+ *
+ * @param n The minimum number of attributes that the array must
+ * be able to hold.
+ */
+ private void ensureCapacity (int n) {
+ if (n <= 0) {
+ return;
+ }
+ int max;
+ if (data == null || data.length == 0) {
+ max = 25;
+ }
+ else if (data.length >= n * 5) {
+ return;
+ }
+ else {
+ max = data.length;
+ }
+ while (max < n * 5) {
+ max *= 2;
+ }
+
+ String newData[] = new String[max];
+ if (length > 0) {
+ System.arraycopy(data, 0, newData, 0, length*5);
+ }
+ data = newData;
+ }
+
+
+ /**
+ * Report a bad array index in a manipulator.
+ *
+ * @param index The index to report.
+ * @exception java.lang.ArrayIndexOutOfBoundsException Always.
+ */
+ private void badIndex (int index)
+ throws ArrayIndexOutOfBoundsException
+ {
+ String msg =
+ "Attempt to modify attribute at illegal index: " + index;
+ throw new ArrayIndexOutOfBoundsException(msg);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Internal state.
+ ////////////////////////////////////////////////////////////////////
+
+ int length;
+ String data [];
+
+}
+
+// end of AttributesImpl.java
+
diff --git a/src/org/ccil/cowan/tagsoup/AutoDetector.java b/src/org/ccil/cowan/tagsoup/AutoDetector.java
new file mode 100644
index 0000000..eb85d6f
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/AutoDetector.java
@@ -0,0 +1,43 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Interface to objects that translate InputStreams to Readers by auto-detection
+
+package org.ccil.cowan.tagsoup;
+import java.io.Reader;
+import java.io.InputStream;
+
+/**
+Classes which accept an InputStream and provide a Reader which figures
+out the encoding of the InputStream and reads characters from it should
+conform to this interface.
+@see java.io.InputStream
+@see java.io.Reader
+*/
+
+public interface AutoDetector {
+
+ /**
+ Given an InputStream, return a suitable Reader that understands
+ the presumed character encoding of that InputStream.
+ If bytes are consumed from the InputStream in the process, they
+ <i>must</i> be pushed back onto the InputStream so that they can be
+ reinterpreted as characters.
+ @param i The InputStream
+ @return A Reader that reads from the InputStream
+ */
+
+ public Reader autoDetectingReader(InputStream i);
+
+ }
diff --git a/src/org/ccil/cowan/tagsoup/CommandLine.java b/src/org/ccil/cowan/tagsoup/CommandLine.java
new file mode 100644
index 0000000..dd0c022
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/CommandLine.java
@@ -0,0 +1,289 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// The TagSoup command line UI
+
+package org.ccil.cowan.tagsoup;
+import java.util.Hashtable;
+import java.util.Enumeration;
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+import org.xml.sax.*;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.ext.LexicalHandler;
+
+
+/**
+The stand-alone TagSoup program.
+**/
+public class CommandLine {
+
+ static Hashtable options = new Hashtable(); static {
+ options.put("--nocdata", Boolean.FALSE); // CDATA elements are normal
+ options.put("--files", Boolean.FALSE); // process arguments as separate files
+ options.put("--reuse", Boolean.FALSE); // reuse a single Parser
+ options.put("--nons", Boolean.FALSE); // no namespaces
+ options.put("--nobogons", Boolean.FALSE); // suppress unknown elements
+ options.put("--any", Boolean.FALSE); // unknowns have ANY content model
+ options.put("--emptybogons", Boolean.FALSE); // unknowns have EMPTY content model
+ options.put("--norootbogons", Boolean.FALSE); // unknowns can't be the root
+ options.put("--pyxin", Boolean.FALSE); // input is PYX
+ options.put("--lexical", Boolean.FALSE); // output comments
+ options.put("--pyx", Boolean.FALSE); // output is PYX
+ options.put("--html", Boolean.FALSE); // output is HTML
+ options.put("--method=", Boolean.FALSE); // output method
+ options.put("--doctype-public=", Boolean.FALSE); // override public id
+ options.put("--doctype-system=", Boolean.FALSE); // override system id
+ options.put("--output-encoding=", Boolean.FALSE); // output encoding
+ options.put("--omit-xml-declaration", Boolean.FALSE); // omit XML decl
+ options.put("--encoding=", Boolean.FALSE); // specify encoding
+ options.put("--help", Boolean.FALSE); // display help
+ options.put("--version", Boolean.FALSE); // display version
+ options.put("--nodefaults", Boolean.FALSE); // no default attrs
+ options.put("--nocolons", Boolean.FALSE); // colon to underscore
+ options.put("--norestart", Boolean.FALSE); // no restartable elements
+ options.put("--ignorable", Boolean.FALSE); // return ignorable whitespace
+ }
+
+ /**
+ Main method. Processes specified files or standard input.
+ **/
+
+ public static void main(String[] argv) throws IOException, SAXException {
+ int optind = getopts(options, argv);
+ if (hasOption(options, "--help")) {
+ doHelp();
+ return;
+ }
+ if (hasOption(options, "--version")) {
+ System.err.println("TagSoup version 1.2");
+ return;
+ }
+ if (argv.length == optind) {
+ process("", System.out);
+ }
+ else if (hasOption(options, "--files")) {
+ for (int i = optind; i < argv.length; i++) {
+ String src = argv[i];
+ String dst;
+ int j = src.lastIndexOf('.');
+ if (j == -1)
+ dst = src + ".xhtml";
+ else if (src.endsWith(".xhtml"))
+ dst = src + "_";
+ else
+ dst = src.substring(0, j) + ".xhtml";
+ System.err.println("src: " + src + " dst: " + dst);
+ OutputStream os = new FileOutputStream(dst);
+ process(src, os);
+ }
+ }
+ else {
+ for (int i = optind; i < argv.length; i++) {
+ System.err.println("src: " + argv[i]);
+ process(argv[i], System.out);
+ }
+ }
+ }
+
+ // Print the help message
+
+ private static void doHelp() {
+ System.err.print("usage: java -jar tagsoup-*.jar ");
+ System.err.print(" [ ");
+ boolean first = true;
+ for (Enumeration e = options.keys(); e.hasMoreElements(); ) {
+ if (!first) {
+ System.err.print("| ");
+ }
+ first = false;
+ String key = (String)(e.nextElement());
+ System.err.print(key);
+ if (key.endsWith("="))
+ System.err.print("?");
+ System.err.print(" ");
+ }
+ System.err.println("]*");
+ }
+
+ private static Parser theParser = null;
+ private static HTMLSchema theSchema = null;
+ private static String theOutputEncoding = null;
+
+ // Process one source onto an output stream.
+
+ private static void process(String src, OutputStream os)
+ throws IOException, SAXException {
+ XMLReader r;
+ if (hasOption(options, "--reuse")) {
+ if (theParser == null) theParser = new Parser();
+ r = theParser;
+ }
+ else {
+ r = new Parser();
+ }
+ theSchema = new HTMLSchema();
+ r.setProperty(Parser.schemaProperty, theSchema);
+
+ if (hasOption(options, "--nocdata")) {
+ r.setFeature(Parser.CDATAElementsFeature, false);
+ }
+
+ if (hasOption(options, "--nons") || hasOption(options, "--html")) {
+ r.setFeature(Parser.namespacesFeature, false);
+ }
+
+ if (hasOption(options, "--nobogons")) {
+ r.setFeature(Parser.ignoreBogonsFeature, true);
+ }
+
+ if (hasOption(options, "--any")) {
+ r.setFeature(Parser.bogonsEmptyFeature, false);
+ }
+ else if (hasOption(options, "--emptybogons")) {
+ r.setFeature(Parser.bogonsEmptyFeature, true);
+ }
+
+ if (hasOption(options, "--norootbogons")) {
+ r.setFeature(Parser.rootBogonsFeature, false);
+ }
+
+ if (hasOption(options, "--nodefaults")) {
+ r.setFeature(Parser.defaultAttributesFeature, false);
+ }
+ if (hasOption(options, "--nocolons")) {
+ r.setFeature(Parser.translateColonsFeature, true);
+ }
+
+ if (hasOption(options, "--norestart")) {
+ r.setFeature(Parser.restartElementsFeature, false);
+ }
+
+ if (hasOption(options, "--ignorable")) {
+ r.setFeature(Parser.ignorableWhitespaceFeature, true);
+ }
+
+ if (hasOption(options, "--pyxin")) {
+ r.setProperty(Parser.scannerProperty, new PYXScanner());
+ }
+
+ Writer w;
+ if (theOutputEncoding == null) {
+ w = new OutputStreamWriter(os);
+ }
+ else {
+ w = new OutputStreamWriter(os, theOutputEncoding);
+ }
+ ContentHandler h = chooseContentHandler(w);
+ r.setContentHandler(h);
+ if (hasOption(options, "--lexical") && h instanceof LexicalHandler) {
+ r.setProperty(Parser.lexicalHandlerProperty, h);
+ }
+ InputSource s = new InputSource();
+ if (src != "") {
+ s.setSystemId(src);
+ }
+ else {
+ s.setByteStream(System.in);
+ }
+ if (hasOption(options, "--encoding=")) {
+// System.out.println("%% Found --encoding");
+ String encoding = (String)options.get("--encoding=");
+ if (encoding != null) s.setEncoding(encoding);
+ }
+ r.parse(s);
+ }
+
+ // Pick a content handler to generate the desired format.
+
+ private static ContentHandler chooseContentHandler(Writer w) {
+ XMLWriter x;
+ if (hasOption(options, "--pyx")) {
+ return new PYXWriter(w);
+ }
+
+ x = new XMLWriter(w);
+ if (hasOption(options, "--html")) {
+ x.setOutputProperty(XMLWriter.METHOD, "html");
+ x.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes");
+ }
+ if (hasOption(options, "--method=")) {
+ String method = (String)options.get("--method=");
+ if (method != null) {
+ x.setOutputProperty(XMLWriter.METHOD, method);
+ }
+ }
+ if (hasOption(options, "--doctype-public=")) {
+ String doctype_public = (String)options.get("--doctype-public=");
+ if (doctype_public != null) {
+ x.setOutputProperty(XMLWriter.DOCTYPE_PUBLIC, doctype_public);
+ }
+ }
+ if (hasOption(options, "--doctype-system=")) {
+ String doctype_system = (String)options.get("--doctype-system=");
+ if (doctype_system != null) {
+ x.setOutputProperty(XMLWriter.DOCTYPE_SYSTEM, doctype_system);
+ }
+ }
+ if (hasOption(options, "--output-encoding=")) {
+ theOutputEncoding = (String)options.get("--output-encoding=");
+// System.err.println("%%%% Output encoding is " + theOutputEncoding);
+ if (theOutputEncoding != null) {
+ x.setOutputProperty(XMLWriter.ENCODING, theOutputEncoding);
+ }
+ }
+ if (hasOption(options, "--omit-xml-declaration")) {
+ x.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes");
+ }
+ x.setPrefix(theSchema.getURI(), "");
+ return x;
+ }
+
+ // Options processing
+
+ private static int getopts(Hashtable options, String[] argv) {
+ int optind;
+ for (optind = 0; optind < argv.length; optind++) {
+ String arg = argv[optind];
+ String value = null;
+ if (arg.charAt(0) != '-') break;
+ int eqsign = arg.indexOf('=');
+ if (eqsign != -1) {
+ value = arg.substring(eqsign + 1, arg.length());
+ arg = arg.substring(0, eqsign + 1);
+ }
+ if (options.containsKey(arg)) {
+ if (value == null) options.put(arg, Boolean.TRUE);
+ else options.put(arg, value);
+// System.out.println("%% Parsed [" + arg + "]=[" + value + "]");
+ }
+ else {
+ System.err.print("Unknown option ");
+ System.err.println(arg);
+ System.exit(1);
+ }
+ }
+ return optind;
+ }
+
+ // Return true if an option exists.
+
+ private static boolean hasOption(Hashtable options, String option) {
+ if (Boolean.getBoolean(option)) return true;
+ else if (options.get(option) != Boolean.FALSE) return true;
+ return false;
+ }
+
+ }
diff --git a/src/org/ccil/cowan/tagsoup/Element.java b/src/org/ccil/cowan/tagsoup/Element.java
new file mode 100644
index 0000000..01a9fa7
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/Element.java
@@ -0,0 +1,203 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup;
+
+/**
+The internal representation of an actual element (not an element type).
+An Element has an element type, attributes, and a successor Element
+for use in constructing stacks and queues of Elements.
+@see ElementType
+@see AttributesImpl
+*/
+public class Element {
+
+
+ private ElementType theType; // type of element
+ private AttributesImpl theAtts; // attributes of element
+ private Element theNext; // successor of element
+ private boolean preclosed; // this element has been preclosed
+
+ /**
+ Return an Element from a specified ElementType.
+ @param type The element type of the newly constructed element
+ @param defaultAttributes True if default attributes are wanted
+ */
+
+ public Element(ElementType type, boolean defaultAttributes) {
+ theType = type;
+ if (defaultAttributes) theAtts = new AttributesImpl(type.atts());
+ else theAtts = new AttributesImpl();
+ theNext = null;
+ preclosed = false;
+ }
+
+ /**
+ Return the element type.
+ @return The element type.
+ */
+
+ public ElementType type() { return theType; }
+
+ /**
+ Return the attributes as an AttributesImpl object.
+ Returning an AttributesImpl makes the attributes mutable.
+ @return The attributes
+ @see AttributesImpl
+ */
+ public AttributesImpl atts() { return theAtts; }
+
+ /**
+ Return the next element in an element stack or queue.
+ @return The next element
+ */
+
+ public Element next() { return theNext; }
+
+ /**
+ Change the next element in an element stack or queue.
+ @param next The new next element
+ */
+
+ public void setNext(Element next) { theNext = next; }
+
+ /**
+ Return the name of the element's type.
+ Convenience method.
+ @return The element type name
+ */
+
+ public String name() { return theType.name(); }
+
+ /**
+ Return the namespace name of the element's type.
+ Convenience method.
+ @return The element type namespace name
+ */
+
+ public String namespace() { return theType.namespace(); }
+
+ /**
+ Return the local name of the element's type.
+ Convenience method.
+ @return The element type local name
+ */
+
+ public String localName() { return theType.localName(); }
+
+ /**
+ Return the content model vector of the element's type.
+ Convenience method.
+ @return The content model vector
+ */
+
+ public int model() { return theType.model(); }
+
+ /**
+ Return the member-of vector of the element's type.
+ Convenience method.
+ @return The member-of vector
+ */
+
+ public int memberOf() { return theType.memberOf(); }
+
+ /**
+ Return the flags vector of the element's type.
+ Convenience method.
+ @return The flags vector
+ */
+
+ public int flags() { return theType.flags(); }
+
+ /**
+ Return the parent element type of the element's type.
+ Convenience method.
+ @return The parent element type
+ */
+
+ public ElementType parent() { return theType.parent(); }
+
+ /**
+ Return true if the type of this element can contain the type of
+ another element.
+ Convenience method.
+ @param other The other element
+ */
+
+ public boolean canContain(Element other) {
+ return theType.canContain(other.theType);
+ }
+
+
+ /**
+ Set an attribute and its value into this element.
+ @param name The attribute name (Qname)
+ @param type The attribute type
+ @param value The attribute value
+ */
+
+ public void setAttribute(String name, String type, String value) {
+ theType.setAttribute(theAtts, name, type, value);
+ }
+
+ /**
+ Make this element anonymous.
+ Remove any <tt>id</tt> or <tt>name</tt> attribute present
+ in the element's attributes.
+ */
+
+ public void anonymize() {
+ for (int i = theAtts.getLength() - 1; i >= 0; i--) {
+ if (theAtts.getType(i).equals("ID") ||
+ theAtts.getQName(i).equals("name")) {
+ theAtts.removeAttribute(i);
+ }
+ }
+ }
+
+ /**
+ Clean the attributes of this element.
+ Attributes with null name (the name was ill-formed)
+ or null value (the attribute was present in the element type but
+ not in this actual element) are removed.
+ */
+
+ public void clean() {
+ for (int i = theAtts.getLength() - 1; i >= 0; i--) {
+ String name = theAtts.getLocalName(i);
+ if (theAtts.getValue(i) == null || name == null ||
+ name.length() == 0) {
+ theAtts.removeAttribute(i);
+ continue;
+ }
+ }
+ }
+
+ /**
+ Force this element to preclosed status, meaning that an end-tag has
+ been seen but the element cannot yet be closed for structural reasons.
+ */
+
+ public void preclose() {
+ preclosed = true;
+ }
+
+ /**
+ Return true if this element has been preclosed.
+ */
+
+ public boolean isPreclosed() {
+ return preclosed;
+ }
+
+ }
diff --git a/src/org/ccil/cowan/tagsoup/ElementType.java b/src/org/ccil/cowan/tagsoup/ElementType.java
new file mode 100644
index 0000000..46ae883
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/ElementType.java
@@ -0,0 +1,276 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup;
+
+/**
+This class represents an element type in the schema.
+An element type has a name, a content model vector, a member-of vector,
+a flags vector, default attributes, and a schema to which it belongs.
+@see Schema
+*/
+
+public class ElementType {
+
+ private String theName; // element type name (Qname)
+ private String theNamespace; // element type namespace name
+ private String theLocalName; // element type local name
+ private int theModel; // bitmap: what the element contains
+ private int theMemberOf; // bitmap: what element is contained in
+ private int theFlags; // bitmap: element flags
+ private AttributesImpl theAtts; // default attributes
+ private ElementType theParent; // parent of this element type
+ private Schema theSchema; // schema to which this belongs
+
+ /**
+ Construct an ElementType:
+ but it's better to use Schema.element() instead.
+ The content model, member-of, and flags vectors are specified as ints.
+ @param name The element type name
+ @param model ORed-together bits representing the content models
+ allowed in the content of this element type
+ @param memberOf ORed-together bits representing the content models
+ to which this element type belongs
+ @param flags ORed-together bits representing the flags associated
+ with this element type
+ @param schema The schema with which this element type will be
+ associated
+ */
+
+ public ElementType(String name, int model, int memberOf, int flags, Schema schema) {
+ theName = name;
+ theModel = model;
+ theMemberOf = memberOf;
+ theFlags = flags;
+ theAtts = new AttributesImpl();
+ theSchema = schema;
+ theNamespace = namespace(name, false);
+ theLocalName = localName(name);
+ }
+
+ /**
+ Return a namespace name from a Qname.
+ The attribute flag tells us whether to return an empty namespace
+ name if there is no prefix, or use the schema default instead.
+ @param name The Qname
+ @param attribute True if name is an attribute name
+ @return The namespace name
+ **/
+ public String namespace(String name, boolean attribute) {
+ int colon = name.indexOf(':');
+ if (colon == -1) {
+ return attribute ? "" : theSchema.getURI();
+ }
+ String prefix = name.substring(0, colon);
+ if (prefix.equals("xml")) {
+ return "http://www.w3.org/XML/1998/namespace";
+ }
+ else {
+ return ("urn:x-prefix:" + prefix).intern();
+ }
+ }
+
+ /**
+ Return a local name from a Qname.
+ @param name The Qname
+ @return The local name
+ **/
+ public String localName(String name) {
+ int colon = name.indexOf(':');
+ if (colon == -1) {
+ return name;
+ }
+ else {
+ return name.substring(colon+1).intern();
+ }
+ }
+
+ /**
+ Returns the name of this element type.
+ @return The name of the element type
+ */
+
+ public String name() { return theName; }
+
+ /**
+ Returns the namespace name of this element type.
+ @return The namespace name of the element type
+ */
+
+ public String namespace() { return theNamespace; }
+
+ /**
+ Returns the local name of this element type.
+ @return The local name of the element type
+ */
+
+ public String localName() { return theLocalName; }
+
+ /**
+ Returns the content models of this element type.
+ @return The content models of this element type as a vector of bits
+ */
+
+ public int model() { return theModel; }
+
+ /**
+ Returns the content models to which this element type belongs.
+ @return The content models to which this element type belongs as a
+ vector of bits
+ */
+
+ public int memberOf() { return theMemberOf; }
+
+ /**
+ Returns the flags associated with this element type.
+ @return The flags associated with this element type as a vector of bits
+ */
+
+ public int flags() { return theFlags; }
+
+ /**
+ Returns the default attributes associated with this element type.
+ Attributes of type CDATA that don't have default values are
+ typically not included. Other attributes without default values
+ have an internal value of <tt>null</tt>.
+ The return value is an AttributesImpl to allow the caller to mutate
+ the attributes.
+ */
+
+ public AttributesImpl atts() {return theAtts;}
+
+ /**
+ Returns the parent element type of this element type.
+ @return The parent element type
+ */
+
+ public ElementType parent() {return theParent;}
+
+ /**
+ Returns the schema which this element type is associated with.
+ @return The schema
+ */
+
+ public Schema schema() {return theSchema;}
+
+
+ /**
+ Returns true if this element type can contain another element type.
+ That is, if any of the models in this element's model vector
+ match any of the models in the other element type's member-of
+ vector.
+ @param other The other element type
+ */
+
+ public boolean canContain(ElementType other) {
+ return (theModel & other.theMemberOf) != 0;
+ }
+
+
+ /**
+ Sets an attribute and its value into an AttributesImpl object.
+ Attempts to set a namespace declaration are ignored.
+ @param atts The AttributesImpl object
+ @param name The name (Qname) of the attribute
+ @param type The type of the attribute
+ @param value The value of the attribute
+ */
+
+ public void setAttribute(AttributesImpl atts, String name, String type, String value) {
+ if (name.equals("xmlns") || name.startsWith("xmlns:")) {
+ return;
+ }
+;
+ String namespace = namespace(name, true);
+ String localName = localName(name);
+ int i = atts.getIndex(name);
+ if (i == -1) {
+ name = name.intern();
+ if (type == null) type = "CDATA";
+ if (!type.equals("CDATA")) value = normalize(value);
+ atts.addAttribute(namespace, localName, name, type, value);
+ }
+ else {
+ if (type == null) type = atts.getType(i);
+ if (!type.equals("CDATA")) value=normalize(value);
+ atts.setAttribute(i, namespace, localName, name, type, value);
+ }
+ }
+
+ /**
+ Normalize an attribute value (ID-style).
+ CDATA-style attribute normalization is already done.
+ @param value The value to normalize
+ @return The normalized value
+ **/
+ public static String normalize(String value) {
+ if (value == null) return value;
+ value = value.trim();
+ if (value.indexOf(" ") == -1) return value;
+ boolean space = false;
+ int len = value.length();
+ StringBuffer b = new StringBuffer(len);
+ for (int i = 0; i < len; i++) {
+ char v = value.charAt(i);
+ if (v == ' ') {
+ if (!space) b.append(v);
+ space = true;
+ }
+ else {
+ b.append(v);
+ space = false;
+ }
+ }
+ return b.toString();
+ }
+
+ /**
+ Sets an attribute and its value into this element type.
+ @param name The name of the attribute
+ @param type The type of the attribute
+ @param value The value of the attribute
+ */
+
+ public void setAttribute(String name, String type, String value) {
+ setAttribute(theAtts, name, type, value);
+ }
+
+ /**
+ Sets the models of this element type.
+ @param model The content models of this element type as a vector of bits
+ */
+
+ public void setModel(int model) { theModel = model; }
+
+ /**
+ Sets the content models to which this element type belongs.
+ @param memberOf The content models to which this element type belongs as a vector of bits
+ */
+
+ public void setMemberOf(int memberOf) { theMemberOf = memberOf; }
+
+ /**
+ Sets the flags of this element type.
+ @param flags associated with this element type The flags as a vector of bits
+ */
+
+ public void setFlags(int flags) { theFlags = flags; }
+
+ /**
+ Sets the parent element type of this element type.
+ @param parent The parent element type
+ */
+
+ public void setParent(ElementType parent) { theParent = parent; }
+
+ }
diff --git a/src/org/ccil/cowan/tagsoup/GOOGLE_README.txt b/src/org/ccil/cowan/tagsoup/GOOGLE_README.txt
new file mode 100644
index 0000000..7462b7a
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/GOOGLE_README.txt
@@ -0,0 +1,8 @@
+This is TagSoup 1.2, downloaded from http://home.ccil.org/~cowan/XML/tagsoup/.
+
+To get the java files included here:
+- download tagsoup
+- unzip it
+- cd into the tagsoup directory
+- run ant
+- copy the files in src/java and tmp/src \ No newline at end of file
diff --git a/src/org/ccil/cowan/tagsoup/HTMLModels.java b/src/org/ccil/cowan/tagsoup/HTMLModels.java
new file mode 100644
index 0000000..a6e413c
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/HTMLModels.java
@@ -0,0 +1,53 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Defines models for HTMLSchema
+
+/**
+This interface contains generated constants representing HTML content
+models. Logically, it is part of HTMLSchema, but it is more
+convenient to generate the constants into a separate interface.
+*/
+
+package org.ccil.cowan.tagsoup;
+public interface HTMLModels {
+
+ // Start of model definitions
+ public static final int M_AREA = 1 << 1;
+ public static final int M_BLOCK = 1 << 2;
+ public static final int M_BLOCKINLINE = 1 << 3;
+ public static final int M_BODY = 1 << 4;
+ public static final int M_CELL = 1 << 5;
+ public static final int M_COL = 1 << 6;
+ public static final int M_DEF = 1 << 7;
+ public static final int M_FORM = 1 << 8;
+ public static final int M_FRAME = 1 << 9;
+ public static final int M_HEAD = 1 << 10;
+ public static final int M_HTML = 1 << 11;
+ public static final int M_INLINE = 1 << 12;
+ public static final int M_LEGEND = 1 << 13;
+ public static final int M_LI = 1 << 14;
+ public static final int M_NOLINK = 1 << 15;
+ public static final int M_OPTION = 1 << 16;
+ public static final int M_OPTIONS = 1 << 17;
+ public static final int M_P = 1 << 18;
+ public static final int M_PARAM = 1 << 19;
+ public static final int M_TABLE = 1 << 20;
+ public static final int M_TABULAR = 1 << 21;
+ public static final int M_TR = 1 << 22;
+
+
+ // End of model definitions
+
+ }
diff --git a/src/org/ccil/cowan/tagsoup/HTMLScanner.java b/src/org/ccil/cowan/tagsoup/HTMLScanner.java
new file mode 100644
index 0000000..f5980ee
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/HTMLScanner.java
@@ -0,0 +1,648 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+package org.ccil.cowan.tagsoup;
+import java.io.*;
+import org.xml.sax.SAXException;
+import org.xml.sax.Locator;
+
+/**
+This class implements a table-driven scanner for HTML, allowing for lots of
+defects. It implements the Scanner interface, which accepts a Reader
+object to fetch characters from and a ScanHandler object to report lexical
+events to.
+*/
+
+public class HTMLScanner implements Scanner, Locator {
+
+ // Start of state table
+ private static final int S_ANAME = 1;
+ private static final int S_APOS = 2;
+ private static final int S_AVAL = 3;
+ private static final int S_BB = 4;
+ private static final int S_BBC = 5;
+ private static final int S_BBCD = 6;
+ private static final int S_BBCDA = 7;
+ private static final int S_BBCDAT = 8;
+ private static final int S_BBCDATA = 9;
+ private static final int S_CDATA = 10;
+ private static final int S_CDATA2 = 11;
+ private static final int S_CDSECT = 12;
+ private static final int S_CDSECT1 = 13;
+ private static final int S_CDSECT2 = 14;
+ private static final int S_COM = 15;
+ private static final int S_COM2 = 16;
+ private static final int S_COM3 = 17;
+ private static final int S_COM4 = 18;
+ private static final int S_DECL = 19;
+ private static final int S_DECL2 = 20;
+ private static final int S_DONE = 21;
+ private static final int S_EMPTYTAG = 22;
+ private static final int S_ENT = 23;
+ private static final int S_EQ = 24;
+ private static final int S_ETAG = 25;
+ private static final int S_GI = 26;
+ private static final int S_NCR = 27;
+ private static final int S_PCDATA = 28;
+ private static final int S_PI = 29;
+ private static final int S_PITARGET = 30;
+ private static final int S_QUOT = 31;
+ private static final int S_STAGC = 32;
+ private static final int S_TAG = 33;
+ private static final int S_TAGWS = 34;
+ private static final int S_XNCR = 35;
+ private static final int A_ADUP = 1;
+ private static final int A_ADUP_SAVE = 2;
+ private static final int A_ADUP_STAGC = 3;
+ private static final int A_ANAME = 4;
+ private static final int A_ANAME_ADUP = 5;
+ private static final int A_ANAME_ADUP_STAGC = 6;
+ private static final int A_AVAL = 7;
+ private static final int A_AVAL_STAGC = 8;
+ private static final int A_CDATA = 9;
+ private static final int A_CMNT = 10;
+ private static final int A_DECL = 11;
+ private static final int A_EMPTYTAG = 12;
+ private static final int A_ENTITY = 13;
+ private static final int A_ENTITY_START = 14;
+ private static final int A_ETAG = 15;
+ private static final int A_GI = 16;
+ private static final int A_GI_STAGC = 17;
+ private static final int A_LT = 18;
+ private static final int A_LT_PCDATA = 19;
+ private static final int A_MINUS = 20;
+ private static final int A_MINUS2 = 21;
+ private static final int A_MINUS3 = 22;
+ private static final int A_PCDATA = 23;
+ private static final int A_PI = 24;
+ private static final int A_PITARGET = 25;
+ private static final int A_PITARGET_PI = 26;
+ private static final int A_SAVE = 27;
+ private static final int A_SKIP = 28;
+ private static final int A_SP = 29;
+ private static final int A_STAGC = 30;
+ private static final int A_UNGET = 31;
+ private static final int A_UNSAVE_PCDATA = 32;
+ private static int[] statetable = {
+ S_ANAME, '/', A_ANAME_ADUP, S_EMPTYTAG,
+ S_ANAME, '=', A_ANAME, S_AVAL,
+ S_ANAME, '>', A_ANAME_ADUP_STAGC, S_PCDATA,
+ S_ANAME, 0, A_SAVE, S_ANAME,
+ S_ANAME, -1, A_ANAME_ADUP_STAGC, S_DONE,
+ S_ANAME, ' ', A_ANAME, S_EQ,
+ S_ANAME, '\n', A_ANAME, S_EQ,
+ S_ANAME, '\t', A_ANAME, S_EQ,
+ S_APOS, '\'', A_AVAL, S_TAGWS,
+ S_APOS, 0, A_SAVE, S_APOS,
+ S_APOS, -1, A_AVAL_STAGC, S_DONE,
+ S_APOS, ' ', A_SP, S_APOS,
+ S_APOS, '\n', A_SP, S_APOS,
+ S_APOS, '\t', A_SP, S_APOS,
+ S_AVAL, '\'', A_SKIP, S_APOS,
+ S_AVAL, '"', A_SKIP, S_QUOT,
+ S_AVAL, '>', A_AVAL_STAGC, S_PCDATA,
+ S_AVAL, 0, A_SAVE, S_STAGC,
+ S_AVAL, -1, A_AVAL_STAGC, S_DONE,
+ S_AVAL, ' ', A_SKIP, S_AVAL,
+ S_AVAL, '\n', A_SKIP, S_AVAL,
+ S_AVAL, '\t', A_SKIP, S_AVAL,
+ S_BB, 'C', A_SKIP, S_BBC,
+ S_BB, 0, A_SKIP, S_DECL,
+ S_BB, -1, A_SKIP, S_DONE,
+ S_BBC, 'D', A_SKIP, S_BBCD,
+ S_BBC, 0, A_SKIP, S_DECL,
+ S_BBC, -1, A_SKIP, S_DONE,
+ S_BBCD, 'A', A_SKIP, S_BBCDA,
+ S_BBCD, 0, A_SKIP, S_DECL,
+ S_BBCD, -1, A_SKIP, S_DONE,
+ S_BBCDA, 'T', A_SKIP, S_BBCDAT,
+ S_BBCDA, 0, A_SKIP, S_DECL,
+ S_BBCDA, -1, A_SKIP, S_DONE,
+ S_BBCDAT, 'A', A_SKIP, S_BBCDATA,
+ S_BBCDAT, 0, A_SKIP, S_DECL,
+ S_BBCDAT, -1, A_SKIP, S_DONE,
+ S_BBCDATA, '[', A_SKIP, S_CDSECT,
+ S_BBCDATA, 0, A_SKIP, S_DECL,
+ S_BBCDATA, -1, A_SKIP, S_DONE,
+ S_CDATA, '<', A_SAVE, S_CDATA2,
+ S_CDATA, 0, A_SAVE, S_CDATA,
+ S_CDATA, -1, A_PCDATA, S_DONE,
+ S_CDATA2, '/', A_UNSAVE_PCDATA, S_ETAG,
+ S_CDATA2, 0, A_SAVE, S_CDATA,
+ S_CDATA2, -1, A_UNSAVE_PCDATA, S_DONE,
+ S_CDSECT, ']', A_SAVE, S_CDSECT1,
+ S_CDSECT, 0, A_SAVE, S_CDSECT,
+ S_CDSECT, -1, A_SKIP, S_DONE,
+ S_CDSECT1, ']', A_SAVE, S_CDSECT2,
+ S_CDSECT1, 0, A_SAVE, S_CDSECT,
+ S_CDSECT1, -1, A_SKIP, S_DONE,
+ S_CDSECT2, '>', A_CDATA, S_PCDATA,
+ S_CDSECT2, 0, A_SAVE, S_CDSECT,
+ S_CDSECT2, -1, A_SKIP, S_DONE,
+ S_COM, '-', A_SKIP, S_COM2,
+ S_COM, 0, A_SAVE, S_COM2,
+ S_COM, -1, A_CMNT, S_DONE,
+ S_COM2, '-', A_SKIP, S_COM3,
+ S_COM2, 0, A_SAVE, S_COM2,
+ S_COM2, -1, A_CMNT, S_DONE,
+ S_COM3, '-', A_SKIP, S_COM4,
+ S_COM3, 0, A_MINUS, S_COM2,
+ S_COM3, -1, A_CMNT, S_DONE,
+ S_COM4, '-', A_MINUS3, S_COM4,
+ S_COM4, '>', A_CMNT, S_PCDATA,
+ S_COM4, 0, A_MINUS2, S_COM2,
+ S_COM4, -1, A_CMNT, S_DONE,
+ S_DECL, '-', A_SKIP, S_COM,
+ S_DECL, '[', A_SKIP, S_BB,
+ S_DECL, '>', A_SKIP, S_PCDATA,
+ S_DECL, 0, A_SAVE, S_DECL2,
+ S_DECL, -1, A_SKIP, S_DONE,
+ S_DECL2, '>', A_DECL, S_PCDATA,
+ S_DECL2, 0, A_SAVE, S_DECL2,
+ S_DECL2, -1, A_SKIP, S_DONE,
+ S_EMPTYTAG, '>', A_EMPTYTAG, S_PCDATA,
+ S_EMPTYTAG, 0, A_SAVE, S_ANAME,
+ S_EMPTYTAG, ' ', A_SKIP, S_TAGWS,
+ S_EMPTYTAG, '\n', A_SKIP, S_TAGWS,
+ S_EMPTYTAG, '\t', A_SKIP, S_TAGWS,
+ S_ENT, 0, A_ENTITY, S_ENT,
+ S_ENT, -1, A_ENTITY, S_DONE,
+ S_EQ, '=', A_SKIP, S_AVAL,
+ S_EQ, '>', A_ADUP_STAGC, S_PCDATA,
+ S_EQ, 0, A_ADUP_SAVE, S_ANAME,
+ S_EQ, -1, A_ADUP_STAGC, S_DONE,
+ S_EQ, ' ', A_SKIP, S_EQ,
+ S_EQ, '\n', A_SKIP, S_EQ,
+ S_EQ, '\t', A_SKIP, S_EQ,
+ S_ETAG, '>', A_ETAG, S_PCDATA,
+ S_ETAG, 0, A_SAVE, S_ETAG,
+ S_ETAG, -1, A_ETAG, S_DONE,
+ S_ETAG, ' ', A_SKIP, S_ETAG,
+ S_ETAG, '\n', A_SKIP, S_ETAG,
+ S_ETAG, '\t', A_SKIP, S_ETAG,
+ S_GI, '/', A_SKIP, S_EMPTYTAG,
+ S_GI, '>', A_GI_STAGC, S_PCDATA,
+ S_GI, 0, A_SAVE, S_GI,
+ S_GI, -1, A_SKIP, S_DONE,
+ S_GI, ' ', A_GI, S_TAGWS,
+ S_GI, '\n', A_GI, S_TAGWS,
+ S_GI, '\t', A_GI, S_TAGWS,
+ S_NCR, 0, A_ENTITY, S_NCR,
+ S_NCR, -1, A_ENTITY, S_DONE,
+ S_PCDATA, '&', A_ENTITY_START, S_ENT,
+ S_PCDATA, '<', A_PCDATA, S_TAG,
+ S_PCDATA, 0, A_SAVE, S_PCDATA,
+ S_PCDATA, -1, A_PCDATA, S_DONE,
+ S_PI, '>', A_PI, S_PCDATA,
+ S_PI, 0, A_SAVE, S_PI,
+ S_PI, -1, A_PI, S_DONE,
+ S_PITARGET, '>', A_PITARGET_PI, S_PCDATA,
+ S_PITARGET, 0, A_SAVE, S_PITARGET,
+ S_PITARGET, -1, A_PITARGET_PI, S_DONE,
+ S_PITARGET, ' ', A_PITARGET, S_PI,
+ S_PITARGET, '\n', A_PITARGET, S_PI,
+ S_PITARGET, '\t', A_PITARGET, S_PI,
+ S_QUOT, '"', A_AVAL, S_TAGWS,
+ S_QUOT, 0, A_SAVE, S_QUOT,
+ S_QUOT, -1, A_AVAL_STAGC, S_DONE,
+ S_QUOT, ' ', A_SP, S_QUOT,
+ S_QUOT, '\n', A_SP, S_QUOT,
+ S_QUOT, '\t', A_SP, S_QUOT,
+ S_STAGC, '>', A_AVAL_STAGC, S_PCDATA,
+ S_STAGC, 0, A_SAVE, S_STAGC,
+ S_STAGC, -1, A_AVAL_STAGC, S_DONE,
+ S_STAGC, ' ', A_AVAL, S_TAGWS,
+ S_STAGC, '\n', A_AVAL, S_TAGWS,
+ S_STAGC, '\t', A_AVAL, S_TAGWS,
+ S_TAG, '!', A_SKIP, S_DECL,
+ S_TAG, '?', A_SKIP, S_PITARGET,
+ S_TAG, '/', A_SKIP, S_ETAG,
+ S_TAG, '<', A_SAVE, S_TAG,
+ S_TAG, 0, A_SAVE, S_GI,
+ S_TAG, -1, A_LT_PCDATA, S_DONE,
+ S_TAG, ' ', A_LT, S_PCDATA,
+ S_TAG, '\n', A_LT, S_PCDATA,
+ S_TAG, '\t', A_LT, S_PCDATA,
+ S_TAGWS, '/', A_SKIP, S_EMPTYTAG,
+ S_TAGWS, '>', A_STAGC, S_PCDATA,
+ S_TAGWS, 0, A_SAVE, S_ANAME,
+ S_TAGWS, -1, A_STAGC, S_DONE,
+ S_TAGWS, ' ', A_SKIP, S_TAGWS,
+ S_TAGWS, '\n', A_SKIP, S_TAGWS,
+ S_TAGWS, '\t', A_SKIP, S_TAGWS,
+ S_XNCR, 0, A_ENTITY, S_XNCR,
+ S_XNCR, -1, A_ENTITY, S_DONE,
+
+ };
+ private static final String[] debug_actionnames = { "", "A_ADUP", "A_ADUP_SAVE", "A_ADUP_STAGC", "A_ANAME", "A_ANAME_ADUP", "A_ANAME_ADUP_STAGC", "A_AVAL", "A_AVAL_STAGC", "A_CDATA", "A_CMNT", "A_DECL", "A_EMPTYTAG", "A_ENTITY", "A_ENTITY_START", "A_ETAG", "A_GI", "A_GI_STAGC", "A_LT", "A_LT_PCDATA", "A_MINUS", "A_MINUS2", "A_MINUS3", "A_PCDATA", "A_PI", "A_PITARGET", "A_PITARGET_PI", "A_SAVE", "A_SKIP", "A_SP", "A_STAGC", "A_UNGET", "A_UNSAVE_PCDATA"};
+ private static final String[] debug_statenames = { "", "S_ANAME", "S_APOS", "S_AVAL", "S_BB", "S_BBC", "S_BBCD", "S_BBCDA", "S_BBCDAT", "S_BBCDATA", "S_CDATA", "S_CDATA2", "S_CDSECT", "S_CDSECT1", "S_CDSECT2", "S_COM", "S_COM2", "S_COM3", "S_COM4", "S_DECL", "S_DECL2", "S_DONE", "S_EMPTYTAG", "S_ENT", "S_EQ", "S_ETAG", "S_GI", "S_NCR", "S_PCDATA", "S_PI", "S_PITARGET", "S_QUOT", "S_STAGC", "S_TAG", "S_TAGWS", "S_XNCR"};
+
+
+ // End of state table
+
+ private String thePublicid; // Locator state
+ private String theSystemid;
+ private int theLastLine;
+ private int theLastColumn;
+ private int theCurrentLine;
+ private int theCurrentColumn;
+
+ int theState; // Current state
+ int theNextState; // Next state
+ char[] theOutputBuffer = new char[200]; // Output buffer
+ int theSize; // Current buffer size
+ int[] theWinMap = { // Windows chars map
+ 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178};
+
+ // Compensate for bug in PushbackReader that allows
+ // pushing back EOF.
+ private void unread(PushbackReader r, int c) throws IOException {
+ if (c != -1) r.unread(c);
+ }
+
+ // Locator implementation
+
+ public int getLineNumber() {
+ return theLastLine;
+ }
+ public int getColumnNumber() {
+ return theLastColumn;
+ }
+ public String getPublicId() {
+ return thePublicid;
+ }
+ public String getSystemId() {
+ return theSystemid;
+ }
+
+
+ // Scanner implementation
+
+ /**
+ Reset document locator, supplying systemid and publicid.
+ @param systemid System id
+ @param publicid Public id
+ */
+
+ public void resetDocumentLocator(String publicid, String systemid) {
+ thePublicid = publicid;
+ theSystemid = systemid;
+ theLastLine = theLastColumn = theCurrentLine = theCurrentColumn = 0;
+ }
+
+ /**
+ Scan HTML source, reporting lexical events.
+ @param r0 Reader that provides characters
+ @param h ScanHandler that accepts lexical events.
+ */
+
+ public void scan(Reader r0, ScanHandler h) throws IOException, SAXException {
+ theState = S_PCDATA;
+ PushbackReader r;
+ if (r0 instanceof PushbackReader) {
+ r = (PushbackReader)r0;
+ }
+ else if (r0 instanceof BufferedReader) {
+ r = new PushbackReader(r0);
+ }
+ else {
+ r = new PushbackReader(new BufferedReader(r0, 200));
+ }
+
+ int firstChar = r.read(); // Remove any leading BOM
+ if (firstChar != '\uFEFF') unread(r, firstChar);
+
+ while (theState != S_DONE) {
+ int ch = r.read();
+
+ // Process control characters
+ if (ch >= 0x80 && ch <= 0x9F) ch = theWinMap[ch-0x80];
+
+ if (ch == '\r') {
+ ch = r.read(); // expect LF next
+ if (ch != '\n') {
+ unread(r, ch); // nope
+ ch = '\n';
+ }
+ }
+
+ if (ch == '\n') {
+ theCurrentLine++;
+ theCurrentColumn = 0;
+ }
+ else {
+ theCurrentColumn++;
+ }
+
+ if (!(ch >= 0x20 || ch == '\n' || ch == '\t' || ch == -1)) continue;
+
+ // Search state table
+ int action = 0;
+ for (int i = 0; i < statetable.length; i += 4) {
+ if (theState != statetable[i]) {
+ if (action != 0) break;
+ continue;
+ }
+ if (statetable[i+1] == 0) {
+ action = statetable[i+2];
+ theNextState = statetable[i+3];
+ }
+ else if (statetable[i+1] == ch) {
+ action = statetable[i+2];
+ theNextState = statetable[i+3];
+ break;
+ }
+ }
+// System.err.println("In " + debug_statenames[theState] + " got " + nicechar(ch) + " doing " + debug_actionnames[action] + " then " + debug_statenames[theNextState]);
+ switch (action) {
+ case 0:
+ throw new Error(
+"HTMLScanner can't cope with " + Integer.toString(ch) + " in state " +
+Integer.toString(theState));
+ case A_ADUP:
+ h.adup(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_ADUP_SAVE:
+ h.adup(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ save(ch, h);
+ break;
+ case A_ADUP_STAGC:
+ h.adup(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.stagc(theOutputBuffer, 0, theSize);
+ break;
+ case A_ANAME:
+ h.aname(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_ANAME_ADUP:
+ h.aname(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.adup(theOutputBuffer, 0, theSize);
+ break;
+ case A_ANAME_ADUP_STAGC:
+ h.aname(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.adup(theOutputBuffer, 0, theSize);
+ h.stagc(theOutputBuffer, 0, theSize);
+ break;
+ case A_AVAL:
+ h.aval(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_AVAL_STAGC:
+ h.aval(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.stagc(theOutputBuffer, 0, theSize);
+ break;
+ case A_CDATA:
+ mark();
+ // suppress the final "]]" in the buffer
+ if (theSize > 1) theSize -= 2;
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_ENTITY_START:
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ save(ch, h);
+ break;
+ case A_ENTITY:
+ mark();
+ char ch1 = (char)ch;
+// System.out.println("Got " + ch1 + " in state " + ((theState == S_ENT) ? "S_ENT" : ((theState == S_NCR) ? "S_NCR" : "UNK")));
+ if (theState == S_ENT && ch1 == '#') {
+ theNextState = S_NCR;
+ save(ch, h);
+ break;
+ }
+ else if (theState == S_NCR && (ch1 == 'x' || ch1 == 'X')) {
+ theNextState = S_XNCR;
+ save(ch, h);
+ break;
+ }
+ else if (theState == S_ENT && Character.isLetterOrDigit(ch1)) {
+ save(ch, h);
+ break;
+ }
+ else if (theState == S_NCR && Character.isDigit(ch1)) {
+ save(ch, h);
+ break;
+ }
+ else if (theState == S_XNCR && (Character.isDigit(ch1) || "abcdefABCDEF".indexOf(ch1) != -1)) {
+ save(ch, h);
+ break;
+ }
+
+ // The whole entity reference has been collected
+// System.err.println("%%" + new String(theOutputBuffer, 0, theSize));
+ h.entity(theOutputBuffer, 1, theSize - 1);
+ int ent = h.getEntity();
+// System.err.println("%% value = " + ent);
+ if (ent != 0) {
+ theSize = 0;
+ if (ent >= 0x80 && ent <= 0x9F) {
+ ent = theWinMap[ent-0x80];
+ }
+ if (ent < 0x20) {
+ // Control becomes space
+ ent = 0x20;
+ }
+ else if (ent >= 0xD800 && ent <= 0xDFFF) {
+ // Surrogates get dropped
+ ent = 0;
+ }
+ else if (ent <= 0xFFFF) {
+ // BMP character
+ save(ent, h);
+ }
+ else {
+ // Astral converted to two surrogates
+ ent -= 0x10000;
+ save((ent>>10) + 0xD800, h);
+ save((ent&0x3FF) + 0xDC00, h);
+ }
+ if (ch != ';') {
+ unread(r, ch);
+ theCurrentColumn--;
+ }
+ }
+ else {
+ unread(r, ch);
+ theCurrentColumn--;
+ }
+ theNextState = S_PCDATA;
+ break;
+ case A_ETAG:
+ h.etag(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_DECL:
+ h.decl(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_GI:
+ h.gi(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_GI_STAGC:
+ h.gi(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.stagc(theOutputBuffer, 0, theSize);
+ break;
+ case A_LT:
+ mark();
+ save('<', h);
+ save(ch, h);
+ break;
+ case A_LT_PCDATA:
+ mark();
+ save('<', h);
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_PCDATA:
+ mark();
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_CMNT:
+ mark();
+ h.cmnt(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_MINUS3:
+ save('-', h);
+ save(' ', h);
+ break;
+ case A_MINUS2:
+ save('-', h);
+ save(' ', h);
+ // fall through into A_MINUS
+ case A_MINUS:
+ save('-', h);
+ save(ch, h);
+ break;
+ case A_PI:
+ mark();
+ h.pi(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_PITARGET:
+ h.pitarget(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_PITARGET_PI:
+ h.pitarget(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.pi(theOutputBuffer, 0, theSize);
+ break;
+ case A_SAVE:
+ save(ch, h);
+ break;
+ case A_SKIP:
+ break;
+ case A_SP:
+ save(' ', h);
+ break;
+ case A_STAGC:
+ h.stagc(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_EMPTYTAG:
+ mark();
+// System.err.println("%%% Empty tag seen");
+ if (theSize > 0) h.gi(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.stage(theOutputBuffer, 0, theSize);
+ break;
+ case A_UNGET:
+ unread(r, ch);
+ theCurrentColumn--;
+ break;
+ case A_UNSAVE_PCDATA:
+ if (theSize > 0) theSize--;
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ default:
+ throw new Error("Can't process state " + action);
+ }
+ theState = theNextState;
+ }
+ h.eof(theOutputBuffer, 0, 0);
+ }
+
+ /**
+ * Mark the current scan position as a "point of interest" - start of a tag,
+ * cdata, processing instruction etc.
+ */
+
+ private void mark() {
+ theLastColumn = theCurrentColumn;
+ theLastLine = theCurrentLine;
+ }
+
+ /**
+ A callback for the ScanHandler that allows it to force
+ the lexer state to CDATA content (no markup is recognized except
+ the end of element.
+ */
+
+ public void startCDATA() { theNextState = S_CDATA; }
+
+ private void save(int ch, ScanHandler h) throws IOException, SAXException {
+ if (theSize >= theOutputBuffer.length - 20) {
+ if (theState == S_PCDATA || theState == S_CDATA) {
+ // Return a buffer-sized chunk of PCDATA
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ }
+ else {
+ // Grow the buffer size
+ char[] newOutputBuffer = new char[theOutputBuffer.length * 2];
+ System.arraycopy(theOutputBuffer, 0, newOutputBuffer, 0, theSize+1);
+ theOutputBuffer = newOutputBuffer;
+ }
+ }
+ theOutputBuffer[theSize++] = (char)ch;
+ }
+
+ /**
+ Test procedure. Reads HTML from the standard input and writes
+ PYX to the standard output.
+ */
+
+ public static void main(String[] argv) throws IOException, SAXException {
+ Scanner s = new HTMLScanner();
+ Reader r = new InputStreamReader(System.in, "UTF-8");
+ Writer w = new OutputStreamWriter(System.out, "UTF-8");
+ PYXWriter pw = new PYXWriter(w);
+ s.scan(r, pw);
+ w.close();
+ }
+
+
+ private static String nicechar(int in) {
+ if (in == '\n') return "\\n";
+ if (in < 32) return "0x"+Integer.toHexString(in);
+ return "'"+((char)in)+"'";
+ }
+
+ }
diff --git a/src/org/ccil/cowan/tagsoup/HTMLSchema.java b/src/org/ccil/cowan/tagsoup/HTMLSchema.java
new file mode 100644
index 0000000..9b46a68
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/HTMLSchema.java
@@ -0,0 +1,2895 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+/**
+This class provides a Schema that has been preinitialized with HTML
+elements, attributes, and character entity declarations. All the declarations
+normally provided with HTML 4.01 are given, plus some that are IE-specific
+and NS4-specific. Attribute declarations of type CDATA with no default
+value are not included.
+*/
+
+package org.ccil.cowan.tagsoup;
+public class HTMLSchema extends Schema implements HTMLModels {
+
+ /**
+ Returns a newly constructed HTMLSchema object independent of
+ any existing ones.
+ */
+
+ public HTMLSchema() {
+ // Start of Schema calls
+ setURI("http://www.w3.org/1999/xhtml");
+ setPrefix("html");
+ elementType("<pcdata>", M_EMPTY, M_PCDATA, 0);
+ elementType("<root>", M_ROOT, M_EMPTY, 0);
+ elementType("a", M_PCDATA|M_NOLINK, M_INLINE, 0);
+ elementType("abbr", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("acronym", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("address", M_PCDATA|M_INLINE|M_P, M_BLOCK, 0);
+ elementType("applet", M_PCDATA|M_PARAM|M_INLINE|M_BLOCK, M_INLINE|M_NOLINK, 0);
+ elementType("area", M_EMPTY, M_AREA, 0);
+ elementType("b", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("base", M_EMPTY, M_HEAD, 0);
+ elementType("basefont", M_EMPTY, M_INLINE|M_NOLINK, 0);
+ elementType("bdo", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("bgsound", M_EMPTY, M_HEAD, 0);
+ elementType("big", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("blink", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("blockquote", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0);
+ elementType("body", M_PCDATA|M_INLINE|M_BLOCK, M_HTML|M_BODY, 0);
+ elementType("br", M_EMPTY, M_INLINE|M_NOLINK, 0);
+ elementType("button", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_NOLINK, 0);
+ elementType("canvas", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0);
+ elementType("caption", M_PCDATA|M_INLINE, M_TABULAR, 0);
+ elementType("center", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0);
+ elementType("cite", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("code", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("col", M_EMPTY, M_COL|M_TABULAR, 0);
+ elementType("colgroup", M_COL, M_TABULAR, 0);
+ elementType("comment", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0);
+ elementType("dd", M_PCDATA|M_INLINE|M_BLOCK, M_DEF, 0);
+ elementType("del", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_BLOCKINLINE|M_BLOCK, F_RESTART);
+ elementType("dfn", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("dir", M_LI, M_BLOCK, 0);
+ elementType("div", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0);
+ elementType("dl", M_DEF, M_BLOCK, 0);
+ elementType("dt", M_PCDATA|M_INLINE, M_DEF, 0);
+ elementType("em", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("fieldset", M_PCDATA|M_LEGEND|M_INLINE|M_BLOCK, M_BLOCK, 0);
+ elementType("font", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0);
+ elementType("form", M_PCDATA|M_INLINE|M_NOLINK|M_BLOCK|M_TR|M_CELL, M_BLOCK|M_FORM, F_NOFORCE);
+ elementType("frame", M_EMPTY, M_FRAME, 0);
+ elementType("frameset", M_FRAME, M_FRAME|M_HTML, 0);
+ elementType("h1", M_PCDATA|M_INLINE, M_BLOCK, 0);
+ elementType("h2", M_PCDATA|M_INLINE, M_BLOCK, 0);
+ elementType("h3", M_PCDATA|M_INLINE, M_BLOCK, 0);
+ elementType("h4", M_PCDATA|M_INLINE, M_BLOCK, 0);
+ elementType("h5", M_PCDATA|M_INLINE, M_BLOCK, 0);
+ elementType("h6", M_PCDATA|M_INLINE, M_BLOCK, 0);
+ elementType("head", M_HEAD, M_HTML, 0);
+ elementType("hr", M_EMPTY, M_BLOCK, 0);
+ elementType("html", M_HTML, M_ROOT, 0);
+ elementType("i", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("iframe", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_NOLINK, 0);
+ elementType("img", M_EMPTY, M_INLINE|M_NOLINK, 0);
+ elementType("input", M_EMPTY, M_INLINE|M_NOLINK, 0);
+ elementType("ins", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_BLOCK, F_RESTART);
+ elementType("isindex", M_EMPTY, M_HEAD, 0);
+ elementType("kbd", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("label", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0);
+ elementType("legend", M_PCDATA|M_INLINE, M_LEGEND, 0);
+ elementType("li", M_PCDATA|M_INLINE|M_BLOCK, M_LI, 0);
+ elementType("link", M_EMPTY, M_HEAD|M_INLINE, 0);
+ elementType("listing", M_PCDATA|M_INLINE, M_BLOCK, 0);
+ elementType("map", M_BLOCK|M_AREA, M_INLINE, 0);
+ elementType("marquee", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0);
+ elementType("menu", M_LI, M_BLOCK, 0);
+ elementType("meta", M_EMPTY, M_HEAD, 0);
+ elementType("nobr", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0);
+ elementType("noframes", M_BODY|M_BLOCK|M_INLINE, M_BLOCK|M_HTML|M_FRAME, 0);
+ elementType("noscript", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0);
+ elementType("object", M_PCDATA|M_PARAM|M_INLINE|M_BLOCK, M_HEAD|M_INLINE|M_NOLINK, 0);
+ elementType("ol", M_LI, M_BLOCK, 0);
+ elementType("optgroup", M_OPTIONS, M_OPTIONS, 0);
+ elementType("option", M_PCDATA, M_OPTION|M_OPTIONS, 0);
+ elementType("p", M_PCDATA|M_INLINE|M_TABLE, M_BLOCK|M_P, 0);
+ elementType("param", M_EMPTY, M_PARAM, 0);
+ elementType("pre", M_PCDATA|M_INLINE, M_BLOCK, 0);
+ elementType("q", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("rb", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("rbc", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("rp", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("rt", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("rtc", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("ruby", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("s", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("samp", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("script", M_PCDATA, M_ANY & ~M_ROOT, F_CDATA);
+ elementType("select", M_OPTIONS, M_INLINE, 0);
+ elementType("small", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("span", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0);
+ elementType("strike", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("strong", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("style", M_PCDATA, M_HEAD|M_INLINE, F_CDATA);
+ elementType("sub", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("sup", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("table", M_FORM|M_TABULAR, M_BLOCK|M_TABLE, F_NOFORCE);
+ elementType("tbody", M_TR, M_TABULAR, 0);
+ elementType("td", M_PCDATA|M_INLINE|M_BLOCK, M_CELL, 0);
+ elementType("textarea", M_PCDATA, M_INLINE, 0);
+ elementType("tfoot", M_TR|M_FORM|M_CELL, M_TABULAR, 0);
+ elementType("th", M_PCDATA|M_INLINE|M_BLOCK, M_CELL, 0);
+ elementType("thead", M_TR|M_FORM|M_CELL, M_TABULAR, 0);
+ elementType("title", M_PCDATA, M_HEAD, 0);
+ elementType("tr", M_FORM|M_CELL, M_TR|M_TABULAR, 0);
+ elementType("tt", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("u", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART);
+ elementType("ul", M_LI, M_BLOCK, 0);
+ elementType("var", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0);
+ elementType("wbr", M_EMPTY, M_INLINE|M_NOLINK, 0);
+ elementType("xmp", M_PCDATA|M_INLINE, M_BLOCK, 0);
+ parent("<pcdata>", "body");
+ parent("html", "<root>");
+ parent("a", "body");
+ parent("abbr", "body");
+ parent("acronym", "body");
+ parent("address", "body");
+ parent("applet", "body");
+ parent("area", "map");
+ parent("b", "body");
+ parent("base", "head");
+ parent("basefont", "body");
+ parent("bdo", "body");
+ parent("bgsound", "head");
+ parent("big", "body");
+ parent("blink", "body");
+ parent("blockquote", "body");
+ parent("body", "html");
+ parent("br", "body");
+ parent("button", "form");
+ parent("canvas", "body");
+ parent("caption", "table");
+ parent("center", "body");
+ parent("cite", "body");
+ parent("code", "body");
+ parent("col", "table");
+ parent("colgroup", "table");
+ parent("comment", "body");
+ parent("dd", "dl");
+ parent("del", "body");
+ parent("dfn", "body");
+ parent("dir", "body");
+ parent("div", "body");
+ parent("dl", "body");
+ parent("dt", "dl");
+ parent("em", "body");
+ parent("fieldset", "form");
+ parent("font", "body");
+ parent("form", "body");
+ parent("frame", "frameset");
+ parent("frameset", "html");
+ parent("h1", "body");
+ parent("h2", "body");
+ parent("h3", "body");
+ parent("h4", "body");
+ parent("h5", "body");
+ parent("h6", "body");
+ parent("head", "html");
+ parent("hr", "body");
+ parent("i", "body");
+ parent("iframe", "body");
+ parent("img", "body");
+ parent("input", "form");
+ parent("ins", "body");
+ parent("isindex", "head");
+ parent("kbd", "body");
+ parent("label", "form");
+ parent("legend", "fieldset");
+ parent("li", "ul");
+ parent("link", "head");
+ parent("listing", "body");
+ parent("map", "body");
+ parent("marquee", "body");
+ parent("menu", "body");
+ parent("meta", "head");
+ parent("nobr", "body");
+ parent("noframes", "html");
+ parent("noscript", "body");
+ parent("object", "body");
+ parent("ol", "body");
+ parent("optgroup", "select");
+ parent("option", "select");
+ parent("p", "body");
+ parent("param", "object");
+ parent("pre", "body");
+ parent("q", "body");
+ parent("rb", "body");
+ parent("rbc", "body");
+ parent("rp", "body");
+ parent("rt", "body");
+ parent("rtc", "body");
+ parent("ruby", "body");
+ parent("s", "body");
+ parent("samp", "body");
+ parent("script", "html");
+ parent("select", "form");
+ parent("small", "body");
+ parent("span", "body");
+ parent("strike", "body");
+ parent("strong", "body");
+ parent("style", "head");
+ parent("sub", "body");
+ parent("sup", "body");
+ parent("table", "body");
+ parent("tbody", "table");
+ parent("td", "tr");
+ parent("textarea", "form");
+ parent("tfoot", "table");
+ parent("th", "tr");
+ parent("thead", "table");
+ parent("title", "head");
+ parent("tr", "tbody");
+ parent("tt", "body");
+ parent("u", "body");
+ parent("ul", "body");
+ parent("var", "body");
+ parent("wbr", "body");
+ parent("xmp", "body");
+ attribute("a", "hreflang", "NMTOKEN", null);
+ attribute("a", "shape", "CDATA", "rect");
+ attribute("a", "tabindex", "NMTOKEN", null);
+ attribute("applet", "align", "NMTOKEN", null);
+ attribute("area", "nohref", "BOOLEAN", null);
+ attribute("area", "shape", "CDATA", "rect");
+ attribute("area", "tabindex", "NMTOKEN", null);
+ attribute("br", "clear", "CDATA", "none");
+ attribute("button", "disabled", "BOOLEAN", null);
+ attribute("button", "tabindex", "NMTOKEN", null);
+ attribute("button", "type", "CDATA", "submit");
+ attribute("caption", "align", "NMTOKEN", null);
+ attribute("col", "align", "NMTOKEN", null);
+ attribute("col", "span", "CDATA", "1");
+ attribute("col", "valign", "NMTOKEN", null);
+ attribute("colgroup", "align", "NMTOKEN", null);
+ attribute("colgroup", "span", "CDATA", "1");
+ attribute("colgroup", "valign", "NMTOKEN", null);
+ attribute("dir", "compact", "BOOLEAN", null);
+ attribute("div", "align", "NMTOKEN", null);
+ attribute("dl", "compact", "BOOLEAN", null);
+ attribute("form", "enctype", "CDATA", "application/x-www-form-urlencoded");
+ attribute("form", "method", "CDATA", "get");
+ attribute("frame", "frameborder", "CDATA", "1");
+ attribute("frame", "noresize", "BOOLEAN", null);
+ attribute("frame", "scrolling", "CDATA", "auto");
+ attribute("h1", "align", "NMTOKEN", null);
+ attribute("h2", "align", "NMTOKEN", null);
+ attribute("h3", "align", "NMTOKEN", null);
+ attribute("h4", "align", "NMTOKEN", null);
+ attribute("h5", "align", "NMTOKEN", null);
+ attribute("h6", "align", "NMTOKEN", null);
+ attribute("hr", "align", "NMTOKEN", null);
+ attribute("hr", "noshade", "BOOLEAN", null);
+ attribute("iframe", "align", "NMTOKEN", null);
+ attribute("iframe", "frameborder", "CDATA", "1");
+ attribute("iframe", "scrolling", "CDATA", "auto");
+ attribute("img", "align", "NMTOKEN", null);
+ attribute("img", "ismap", "BOOLEAN", null);
+ attribute("input", "align", "NMTOKEN", null);
+ attribute("input", "checked", "BOOLEAN", null);
+ attribute("input", "disabled", "BOOLEAN", null);
+ attribute("input", "ismap", "BOOLEAN", null);
+ attribute("input", "maxlength", "NMTOKEN", null);
+ attribute("input", "readonly", "BOOLEAN", null);
+ attribute("input", "tabindex", "NMTOKEN", null);
+ attribute("input", "type", "CDATA", "text");
+ attribute("label", "for", "IDREF", null);
+ attribute("legend", "align", "NMTOKEN", null);
+ attribute("li", "value", "NMTOKEN", null);
+ attribute("link", "hreflang", "NMTOKEN", null);
+ attribute("marquee", "width", "NMTOKEN", null);
+ attribute("menu", "compact", "BOOLEAN", null);
+ attribute("meta", "http-equiv", "NMTOKEN", null);
+ attribute("meta", "name", "NMTOKEN", null);
+ attribute("object", "align", "NMTOKEN", null);
+ attribute("object", "declare", "BOOLEAN", null);
+ attribute("object", "tabindex", "NMTOKEN", null);
+ attribute("ol", "compact", "BOOLEAN", null);
+ attribute("ol", "start", "NMTOKEN", null);
+ attribute("optgroup", "disabled", "BOOLEAN", null);
+ attribute("option", "disabled", "BOOLEAN", null);
+ attribute("option", "selected", "BOOLEAN", null);
+ attribute("p", "align", "NMTOKEN", null);
+ attribute("param", "valuetype", "CDATA", "data");
+ attribute("pre", "width", "NMTOKEN", null);
+ attribute("rt", "rbspan", "CDATA", "1");
+ attribute("script", "defer", "BOOLEAN", null);
+ attribute("select", "disabled", "BOOLEAN", null);
+ attribute("select", "multiple", "BOOLEAN", null);
+ attribute("select", "size", "NMTOKEN", null);
+ attribute("select", "tabindex", "NMTOKEN", null);
+ attribute("table", "align", "NMTOKEN", null);
+ attribute("table", "frame", "NMTOKEN", null);
+ attribute("table", "rules", "NMTOKEN", null);
+ attribute("tbody", "align", "NMTOKEN", null);
+ attribute("tbody", "valign", "NMTOKEN", null);
+ attribute("td", "align", "NMTOKEN", null);
+ attribute("td", "colspan", "CDATA", "1");
+ attribute("td", "headers", "IDREFS", null);
+ attribute("td", "nowrap", "BOOLEAN", null);
+ attribute("td", "rowspan", "CDATA", "1");
+ attribute("td", "scope", "NMTOKEN", null);
+ attribute("td", "valign", "NMTOKEN", null);
+ attribute("textarea", "cols", "NMTOKEN", null);
+ attribute("textarea", "disabled", "BOOLEAN", null);
+ attribute("textarea", "readonly", "BOOLEAN", null);
+ attribute("textarea", "rows", "NMTOKEN", null);
+ attribute("textarea", "tabindex", "NMTOKEN", null);
+ attribute("tfoot", "align", "NMTOKEN", null);
+ attribute("tfoot", "valign", "NMTOKEN", null);
+ attribute("th", "align", "NMTOKEN", null);
+ attribute("th", "colspan", "CDATA", "1");
+ attribute("th", "headers", "IDREFS", null);
+ attribute("th", "nowrap", "BOOLEAN", null);
+ attribute("th", "rowspan", "CDATA", "1");
+ attribute("th", "scope", "NMTOKEN", null);
+ attribute("th", "valign", "NMTOKEN", null);
+ attribute("thead", "align", "NMTOKEN", null);
+ attribute("thead", "valign", "NMTOKEN", null);
+ attribute("tr", "align", "NMTOKEN", null);
+ attribute("tr", "valign", "NMTOKEN", null);
+ attribute("ul", "compact", "BOOLEAN", null);
+ attribute("ul", "type", "NMTOKEN", null);
+ attribute("xmp", "width", "NMTOKEN", null);
+ attribute("a", "class", "NMTOKEN", null);
+ attribute("abbr", "class", "NMTOKEN", null);
+ attribute("acronym", "class", "NMTOKEN", null);
+ attribute("address", "class", "NMTOKEN", null);
+ attribute("applet", "class", "NMTOKEN", null);
+ attribute("area", "class", "NMTOKEN", null);
+ attribute("b", "class", "NMTOKEN", null);
+ attribute("base", "class", "NMTOKEN", null);
+ attribute("basefont", "class", "NMTOKEN", null);
+ attribute("bdo", "class", "NMTOKEN", null);
+ attribute("bgsound", "class", "NMTOKEN", null);
+ attribute("big", "class", "NMTOKEN", null);
+ attribute("blink", "class", "NMTOKEN", null);
+ attribute("blockquote", "class", "NMTOKEN", null);
+ attribute("body", "class", "NMTOKEN", null);
+ attribute("br", "class", "NMTOKEN", null);
+ attribute("button", "class", "NMTOKEN", null);
+ attribute("canvas", "class", "NMTOKEN", null);
+ attribute("caption", "class", "NMTOKEN", null);
+ attribute("center", "class", "NMTOKEN", null);
+ attribute("cite", "class", "NMTOKEN", null);
+ attribute("code", "class", "NMTOKEN", null);
+ attribute("col", "class", "NMTOKEN", null);
+ attribute("colgroup", "class", "NMTOKEN", null);
+ attribute("comment", "class", "NMTOKEN", null);
+ attribute("dd", "class", "NMTOKEN", null);
+ attribute("del", "class", "NMTOKEN", null);
+ attribute("dfn", "class", "NMTOKEN", null);
+ attribute("dir", "class", "NMTOKEN", null);
+ attribute("div", "class", "NMTOKEN", null);
+ attribute("dl", "class", "NMTOKEN", null);
+ attribute("dt", "class", "NMTOKEN", null);
+ attribute("em", "class", "NMTOKEN", null);
+ attribute("fieldset", "class", "NMTOKEN", null);
+ attribute("font", "class", "NMTOKEN", null);
+ attribute("form", "class", "NMTOKEN", null);
+ attribute("frame", "class", "NMTOKEN", null);
+ attribute("frameset", "class", "NMTOKEN", null);
+ attribute("h1", "class", "NMTOKEN", null);
+ attribute("h2", "class", "NMTOKEN", null);
+ attribute("h3", "class", "NMTOKEN", null);
+ attribute("h4", "class", "NMTOKEN", null);
+ attribute("h5", "class", "NMTOKEN", null);
+ attribute("h6", "class", "NMTOKEN", null);
+ attribute("head", "class", "NMTOKEN", null);
+ attribute("hr", "class", "NMTOKEN", null);
+ attribute("html", "class", "NMTOKEN", null);
+ attribute("i", "class", "NMTOKEN", null);
+ attribute("iframe", "class", "NMTOKEN", null);
+ attribute("img", "class", "NMTOKEN", null);
+ attribute("input", "class", "NMTOKEN", null);
+ attribute("ins", "class", "NMTOKEN", null);
+ attribute("isindex", "class", "NMTOKEN", null);
+ attribute("kbd", "class", "NMTOKEN", null);
+ attribute("label", "class", "NMTOKEN", null);
+ attribute("legend", "class", "NMTOKEN", null);
+ attribute("li", "class", "NMTOKEN", null);
+ attribute("link", "class", "NMTOKEN", null);
+ attribute("listing", "class", "NMTOKEN", null);
+ attribute("map", "class", "NMTOKEN", null);
+ attribute("marquee", "class", "NMTOKEN", null);
+ attribute("menu", "class", "NMTOKEN", null);
+ attribute("meta", "class", "NMTOKEN", null);
+ attribute("nobr", "class", "NMTOKEN", null);
+ attribute("noframes", "class", "NMTOKEN", null);
+ attribute("noscript", "class", "NMTOKEN", null);
+ attribute("object", "class", "NMTOKEN", null);
+ attribute("ol", "class", "NMTOKEN", null);
+ attribute("optgroup", "class", "NMTOKEN", null);
+ attribute("option", "class", "NMTOKEN", null);
+ attribute("p", "class", "NMTOKEN", null);
+ attribute("param", "class", "NMTOKEN", null);
+ attribute("pre", "class", "NMTOKEN", null);
+ attribute("q", "class", "NMTOKEN", null);
+ attribute("rb", "class", "NMTOKEN", null);
+ attribute("rbc", "class", "NMTOKEN", null);
+ attribute("rp", "class", "NMTOKEN", null);
+ attribute("rt", "class", "NMTOKEN", null);
+ attribute("rtc", "class", "NMTOKEN", null);
+ attribute("ruby", "class", "NMTOKEN", null);
+ attribute("s", "class", "NMTOKEN", null);
+ attribute("samp", "class", "NMTOKEN", null);
+ attribute("script", "class", "NMTOKEN", null);
+ attribute("select", "class", "NMTOKEN", null);
+ attribute("small", "class", "NMTOKEN", null);
+ attribute("span", "class", "NMTOKEN", null);
+ attribute("strike", "class", "NMTOKEN", null);
+ attribute("strong", "class", "NMTOKEN", null);
+ attribute("style", "class", "NMTOKEN", null);
+ attribute("sub", "class", "NMTOKEN", null);
+ attribute("sup", "class", "NMTOKEN", null);
+ attribute("table", "class", "NMTOKEN", null);
+ attribute("tbody", "class", "NMTOKEN", null);
+ attribute("td", "class", "NMTOKEN", null);
+ attribute("textarea", "class", "NMTOKEN", null);
+ attribute("tfoot", "class", "NMTOKEN", null);
+ attribute("th", "class", "NMTOKEN", null);
+ attribute("thead", "class", "NMTOKEN", null);
+ attribute("title", "class", "NMTOKEN", null);
+ attribute("tr", "class", "NMTOKEN", null);
+ attribute("tt", "class", "NMTOKEN", null);
+ attribute("u", "class", "NMTOKEN", null);
+ attribute("ul", "class", "NMTOKEN", null);
+ attribute("var", "class", "NMTOKEN", null);
+ attribute("wbr", "class", "NMTOKEN", null);
+ attribute("xmp", "class", "NMTOKEN", null);
+ attribute("a", "dir", "NMTOKEN", null);
+ attribute("abbr", "dir", "NMTOKEN", null);
+ attribute("acronym", "dir", "NMTOKEN", null);
+ attribute("address", "dir", "NMTOKEN", null);
+ attribute("applet", "dir", "NMTOKEN", null);
+ attribute("area", "dir", "NMTOKEN", null);
+ attribute("b", "dir", "NMTOKEN", null);
+ attribute("base", "dir", "NMTOKEN", null);
+ attribute("basefont", "dir", "NMTOKEN", null);
+ attribute("bdo", "dir", "NMTOKEN", null);
+ attribute("bgsound", "dir", "NMTOKEN", null);
+ attribute("big", "dir", "NMTOKEN", null);
+ attribute("blink", "dir", "NMTOKEN", null);
+ attribute("blockquote", "dir", "NMTOKEN", null);
+ attribute("body", "dir", "NMTOKEN", null);
+ attribute("br", "dir", "NMTOKEN", null);
+ attribute("button", "dir", "NMTOKEN", null);
+ attribute("canvas", "dir", "NMTOKEN", null);
+ attribute("caption", "dir", "NMTOKEN", null);
+ attribute("center", "dir", "NMTOKEN", null);
+ attribute("cite", "dir", "NMTOKEN", null);
+ attribute("code", "dir", "NMTOKEN", null);
+ attribute("col", "dir", "NMTOKEN", null);
+ attribute("colgroup", "dir", "NMTOKEN", null);
+ attribute("comment", "dir", "NMTOKEN", null);
+ attribute("dd", "dir", "NMTOKEN", null);
+ attribute("del", "dir", "NMTOKEN", null);
+ attribute("dfn", "dir", "NMTOKEN", null);
+ attribute("dir", "dir", "NMTOKEN", null);
+ attribute("div", "dir", "NMTOKEN", null);
+ attribute("dl", "dir", "NMTOKEN", null);
+ attribute("dt", "dir", "NMTOKEN", null);
+ attribute("em", "dir", "NMTOKEN", null);
+ attribute("fieldset", "dir", "NMTOKEN", null);
+ attribute("font", "dir", "NMTOKEN", null);
+ attribute("form", "dir", "NMTOKEN", null);
+ attribute("frame", "dir", "NMTOKEN", null);
+ attribute("frameset", "dir", "NMTOKEN", null);
+ attribute("h1", "dir", "NMTOKEN", null);
+ attribute("h2", "dir", "NMTOKEN", null);
+ attribute("h3", "dir", "NMTOKEN", null);
+ attribute("h4", "dir", "NMTOKEN", null);
+ attribute("h5", "dir", "NMTOKEN", null);
+ attribute("h6", "dir", "NMTOKEN", null);
+ attribute("head", "dir", "NMTOKEN", null);
+ attribute("hr", "dir", "NMTOKEN", null);
+ attribute("html", "dir", "NMTOKEN", null);
+ attribute("i", "dir", "NMTOKEN", null);
+ attribute("iframe", "dir", "NMTOKEN", null);
+ attribute("img", "dir", "NMTOKEN", null);
+ attribute("input", "dir", "NMTOKEN", null);
+ attribute("ins", "dir", "NMTOKEN", null);
+ attribute("isindex", "dir", "NMTOKEN", null);
+ attribute("kbd", "dir", "NMTOKEN", null);
+ attribute("label", "dir", "NMTOKEN", null);
+ attribute("legend", "dir", "NMTOKEN", null);
+ attribute("li", "dir", "NMTOKEN", null);
+ attribute("link", "dir", "NMTOKEN", null);
+ attribute("listing", "dir", "NMTOKEN", null);
+ attribute("map", "dir", "NMTOKEN", null);
+ attribute("marquee", "dir", "NMTOKEN", null);
+ attribute("menu", "dir", "NMTOKEN", null);
+ attribute("meta", "dir", "NMTOKEN", null);
+ attribute("nobr", "dir", "NMTOKEN", null);
+ attribute("noframes", "dir", "NMTOKEN", null);
+ attribute("noscript", "dir", "NMTOKEN", null);
+ attribute("object", "dir", "NMTOKEN", null);
+ attribute("ol", "dir", "NMTOKEN", null);
+ attribute("optgroup", "dir", "NMTOKEN", null);
+ attribute("option", "dir", "NMTOKEN", null);
+ attribute("p", "dir", "NMTOKEN", null);
+ attribute("param", "dir", "NMTOKEN", null);
+ attribute("pre", "dir", "NMTOKEN", null);
+ attribute("q", "dir", "NMTOKEN", null);
+ attribute("rb", "dir", "NMTOKEN", null);
+ attribute("rbc", "dir", "NMTOKEN", null);
+ attribute("rp", "dir", "NMTOKEN", null);
+ attribute("rt", "dir", "NMTOKEN", null);
+ attribute("rtc", "dir", "NMTOKEN", null);
+ attribute("ruby", "dir", "NMTOKEN", null);
+ attribute("s", "dir", "NMTOKEN", null);
+ attribute("samp", "dir", "NMTOKEN", null);
+ attribute("script", "dir", "NMTOKEN", null);
+ attribute("select", "dir", "NMTOKEN", null);
+ attribute("small", "dir", "NMTOKEN", null);
+ attribute("span", "dir", "NMTOKEN", null);
+ attribute("strike", "dir", "NMTOKEN", null);
+ attribute("strong", "dir", "NMTOKEN", null);
+ attribute("style", "dir", "NMTOKEN", null);
+ attribute("sub", "dir", "NMTOKEN", null);
+ attribute("sup", "dir", "NMTOKEN", null);
+ attribute("table", "dir", "NMTOKEN", null);
+ attribute("tbody", "dir", "NMTOKEN", null);
+ attribute("td", "dir", "NMTOKEN", null);
+ attribute("textarea", "dir", "NMTOKEN", null);
+ attribute("tfoot", "dir", "NMTOKEN", null);
+ attribute("th", "dir", "NMTOKEN", null);
+ attribute("thead", "dir", "NMTOKEN", null);
+ attribute("title", "dir", "NMTOKEN", null);
+ attribute("tr", "dir", "NMTOKEN", null);
+ attribute("tt", "dir", "NMTOKEN", null);
+ attribute("u", "dir", "NMTOKEN", null);
+ attribute("ul", "dir", "NMTOKEN", null);
+ attribute("var", "dir", "NMTOKEN", null);
+ attribute("wbr", "dir", "NMTOKEN", null);
+ attribute("xmp", "dir", "NMTOKEN", null);
+ attribute("a", "id", "ID", null);
+ attribute("abbr", "id", "ID", null);
+ attribute("acronym", "id", "ID", null);
+ attribute("address", "id", "ID", null);
+ attribute("applet", "id", "ID", null);
+ attribute("area", "id", "ID", null);
+ attribute("b", "id", "ID", null);
+ attribute("base", "id", "ID", null);
+ attribute("basefont", "id", "ID", null);
+ attribute("bdo", "id", "ID", null);
+ attribute("bgsound", "id", "ID", null);
+ attribute("big", "id", "ID", null);
+ attribute("blink", "id", "ID", null);
+ attribute("blockquote", "id", "ID", null);
+ attribute("body", "id", "ID", null);
+ attribute("br", "id", "ID", null);
+ attribute("button", "id", "ID", null);
+ attribute("canvas", "id", "ID", null);
+ attribute("caption", "id", "ID", null);
+ attribute("center", "id", "ID", null);
+ attribute("cite", "id", "ID", null);
+ attribute("code", "id", "ID", null);
+ attribute("col", "id", "ID", null);
+ attribute("colgroup", "id", "ID", null);
+ attribute("comment", "id", "ID", null);
+ attribute("dd", "id", "ID", null);
+ attribute("del", "id", "ID", null);
+ attribute("dfn", "id", "ID", null);
+ attribute("dir", "id", "ID", null);
+ attribute("div", "id", "ID", null);
+ attribute("dl", "id", "ID", null);
+ attribute("dt", "id", "ID", null);
+ attribute("em", "id", "ID", null);
+ attribute("fieldset", "id", "ID", null);
+ attribute("font", "id", "ID", null);
+ attribute("form", "id", "ID", null);
+ attribute("frame", "id", "ID", null);
+ attribute("frameset", "id", "ID", null);
+ attribute("h1", "id", "ID", null);
+ attribute("h2", "id", "ID", null);
+ attribute("h3", "id", "ID", null);
+ attribute("h4", "id", "ID", null);
+ attribute("h5", "id", "ID", null);
+ attribute("h6", "id", "ID", null);
+ attribute("head", "id", "ID", null);
+ attribute("hr", "id", "ID", null);
+ attribute("html", "id", "ID", null);
+ attribute("i", "id", "ID", null);
+ attribute("iframe", "id", "ID", null);
+ attribute("img", "id", "ID", null);
+ attribute("input", "id", "ID", null);
+ attribute("ins", "id", "ID", null);
+ attribute("isindex", "id", "ID", null);
+ attribute("kbd", "id", "ID", null);
+ attribute("label", "id", "ID", null);
+ attribute("legend", "id", "ID", null);
+ attribute("li", "id", "ID", null);
+ attribute("link", "id", "ID", null);
+ attribute("listing", "id", "ID", null);
+ attribute("map", "id", "ID", null);
+ attribute("marquee", "id", "ID", null);
+ attribute("menu", "id", "ID", null);
+ attribute("meta", "id", "ID", null);
+ attribute("nobr", "id", "ID", null);
+ attribute("noframes", "id", "ID", null);
+ attribute("noscript", "id", "ID", null);
+ attribute("object", "id", "ID", null);
+ attribute("ol", "id", "ID", null);
+ attribute("optgroup", "id", "ID", null);
+ attribute("option", "id", "ID", null);
+ attribute("p", "id", "ID", null);
+ attribute("param", "id", "ID", null);
+ attribute("pre", "id", "ID", null);
+ attribute("q", "id", "ID", null);
+ attribute("rb", "id", "ID", null);
+ attribute("rbc", "id", "ID", null);
+ attribute("rp", "id", "ID", null);
+ attribute("rt", "id", "ID", null);
+ attribute("rtc", "id", "ID", null);
+ attribute("ruby", "id", "ID", null);
+ attribute("s", "id", "ID", null);
+ attribute("samp", "id", "ID", null);
+ attribute("script", "id", "ID", null);
+ attribute("select", "id", "ID", null);
+ attribute("small", "id", "ID", null);
+ attribute("span", "id", "ID", null);
+ attribute("strike", "id", "ID", null);
+ attribute("strong", "id", "ID", null);
+ attribute("style", "id", "ID", null);
+ attribute("sub", "id", "ID", null);
+ attribute("sup", "id", "ID", null);
+ attribute("table", "id", "ID", null);
+ attribute("tbody", "id", "ID", null);
+ attribute("td", "id", "ID", null);
+ attribute("textarea", "id", "ID", null);
+ attribute("tfoot", "id", "ID", null);
+ attribute("th", "id", "ID", null);
+ attribute("thead", "id", "ID", null);
+ attribute("title", "id", "ID", null);
+ attribute("tr", "id", "ID", null);
+ attribute("tt", "id", "ID", null);
+ attribute("u", "id", "ID", null);
+ attribute("ul", "id", "ID", null);
+ attribute("var", "id", "ID", null);
+ attribute("wbr", "id", "ID", null);
+ attribute("xmp", "id", "ID", null);
+ attribute("a", "lang", "NMTOKEN", null);
+ attribute("abbr", "lang", "NMTOKEN", null);
+ attribute("acronym", "lang", "NMTOKEN", null);
+ attribute("address", "lang", "NMTOKEN", null);
+ attribute("applet", "lang", "NMTOKEN", null);
+ attribute("area", "lang", "NMTOKEN", null);
+ attribute("b", "lang", "NMTOKEN", null);
+ attribute("base", "lang", "NMTOKEN", null);
+ attribute("basefont", "lang", "NMTOKEN", null);
+ attribute("bdo", "lang", "NMTOKEN", null);
+ attribute("bgsound", "lang", "NMTOKEN", null);
+ attribute("big", "lang", "NMTOKEN", null);
+ attribute("blink", "lang", "NMTOKEN", null);
+ attribute("blockquote", "lang", "NMTOKEN", null);
+ attribute("body", "lang", "NMTOKEN", null);
+ attribute("br", "lang", "NMTOKEN", null);
+ attribute("button", "lang", "NMTOKEN", null);
+ attribute("canvas", "lang", "NMTOKEN", null);
+ attribute("caption", "lang", "NMTOKEN", null);
+ attribute("center", "lang", "NMTOKEN", null);
+ attribute("cite", "lang", "NMTOKEN", null);
+ attribute("code", "lang", "NMTOKEN", null);
+ attribute("col", "lang", "NMTOKEN", null);
+ attribute("colgroup", "lang", "NMTOKEN", null);
+ attribute("comment", "lang", "NMTOKEN", null);
+ attribute("dd", "lang", "NMTOKEN", null);
+ attribute("del", "lang", "NMTOKEN", null);
+ attribute("dfn", "lang", "NMTOKEN", null);
+ attribute("dir", "lang", "NMTOKEN", null);
+ attribute("div", "lang", "NMTOKEN", null);
+ attribute("dl", "lang", "NMTOKEN", null);
+ attribute("dt", "lang", "NMTOKEN", null);
+ attribute("em", "lang", "NMTOKEN", null);
+ attribute("fieldset", "lang", "NMTOKEN", null);
+ attribute("font", "lang", "NMTOKEN", null);
+ attribute("form", "lang", "NMTOKEN", null);
+ attribute("frame", "lang", "NMTOKEN", null);
+ attribute("frameset", "lang", "NMTOKEN", null);
+ attribute("h1", "lang", "NMTOKEN", null);
+ attribute("h2", "lang", "NMTOKEN", null);
+ attribute("h3", "lang", "NMTOKEN", null);
+ attribute("h4", "lang", "NMTOKEN", null);
+ attribute("h5", "lang", "NMTOKEN", null);
+ attribute("h6", "lang", "NMTOKEN", null);
+ attribute("head", "lang", "NMTOKEN", null);
+ attribute("hr", "lang", "NMTOKEN", null);
+ attribute("html", "lang", "NMTOKEN", null);
+ attribute("i", "lang", "NMTOKEN", null);
+ attribute("iframe", "lang", "NMTOKEN", null);
+ attribute("img", "lang", "NMTOKEN", null);
+ attribute("input", "lang", "NMTOKEN", null);
+ attribute("ins", "lang", "NMTOKEN", null);
+ attribute("isindex", "lang", "NMTOKEN", null);
+ attribute("kbd", "lang", "NMTOKEN", null);
+ attribute("label", "lang", "NMTOKEN", null);
+ attribute("legend", "lang", "NMTOKEN", null);
+ attribute("li", "lang", "NMTOKEN", null);
+ attribute("link", "lang", "NMTOKEN", null);
+ attribute("listing", "lang", "NMTOKEN", null);
+ attribute("map", "lang", "NMTOKEN", null);
+ attribute("marquee", "lang", "NMTOKEN", null);
+ attribute("menu", "lang", "NMTOKEN", null);
+ attribute("meta", "lang", "NMTOKEN", null);
+ attribute("nobr", "lang", "NMTOKEN", null);
+ attribute("noframes", "lang", "NMTOKEN", null);
+ attribute("noscript", "lang", "NMTOKEN", null);
+ attribute("object", "lang", "NMTOKEN", null);
+ attribute("ol", "lang", "NMTOKEN", null);
+ attribute("optgroup", "lang", "NMTOKEN", null);
+ attribute("option", "lang", "NMTOKEN", null);
+ attribute("p", "lang", "NMTOKEN", null);
+ attribute("param", "lang", "NMTOKEN", null);
+ attribute("pre", "lang", "NMTOKEN", null);
+ attribute("q", "lang", "NMTOKEN", null);
+ attribute("rb", "lang", "NMTOKEN", null);
+ attribute("rbc", "lang", "NMTOKEN", null);
+ attribute("rp", "lang", "NMTOKEN", null);
+ attribute("rt", "lang", "NMTOKEN", null);
+ attribute("rtc", "lang", "NMTOKEN", null);
+ attribute("ruby", "lang", "NMTOKEN", null);
+ attribute("s", "lang", "NMTOKEN", null);
+ attribute("samp", "lang", "NMTOKEN", null);
+ attribute("script", "lang", "NMTOKEN", null);
+ attribute("select", "lang", "NMTOKEN", null);
+ attribute("small", "lang", "NMTOKEN", null);
+ attribute("span", "lang", "NMTOKEN", null);
+ attribute("strike", "lang", "NMTOKEN", null);
+ attribute("strong", "lang", "NMTOKEN", null);
+ attribute("style", "lang", "NMTOKEN", null);
+ attribute("sub", "lang", "NMTOKEN", null);
+ attribute("sup", "lang", "NMTOKEN", null);
+ attribute("table", "lang", "NMTOKEN", null);
+ attribute("tbody", "lang", "NMTOKEN", null);
+ attribute("td", "lang", "NMTOKEN", null);
+ attribute("textarea", "lang", "NMTOKEN", null);
+ attribute("tfoot", "lang", "NMTOKEN", null);
+ attribute("th", "lang", "NMTOKEN", null);
+ attribute("thead", "lang", "NMTOKEN", null);
+ attribute("title", "lang", "NMTOKEN", null);
+ attribute("tr", "lang", "NMTOKEN", null);
+ attribute("tt", "lang", "NMTOKEN", null);
+ attribute("u", "lang", "NMTOKEN", null);
+ attribute("ul", "lang", "NMTOKEN", null);
+ attribute("var", "lang", "NMTOKEN", null);
+ attribute("wbr", "lang", "NMTOKEN", null);
+ attribute("xmp", "lang", "NMTOKEN", null);
+ entity("aacgr", 0x03AC);
+ entity("Aacgr", 0x0386);
+ entity("aacute", 0x00E1);
+ entity("Aacute", 0x00C1);
+ entity("abreve", 0x0103);
+ entity("Abreve", 0x0102);
+ entity("ac", 0x223E);
+ entity("acd", 0x223F);
+ entity("acirc", 0x00E2);
+ entity("Acirc", 0x00C2);
+ entity("acute", 0x00B4);
+ entity("acy", 0x0430);
+ entity("Acy", 0x0410);
+ entity("aelig", 0x00E6);
+ entity("AElig", 0x00C6);
+ entity("af", 0x2061);
+ entity("afr", 0x1D51E);
+ entity("Afr", 0x1D504);
+ entity("agr", 0x03B1);
+ entity("Agr", 0x0391);
+ entity("agrave", 0x00E0);
+ entity("Agrave", 0x00C0);
+ entity("alefsym", 0x2135);
+ entity("aleph", 0x2135);
+ entity("alpha", 0x03B1);
+ entity("Alpha", 0x0391);
+ entity("amacr", 0x0101);
+ entity("Amacr", 0x0100);
+ entity("amalg", 0x2A3F);
+ entity("amp", 0x0026);
+ entity("and", 0x2227);
+ entity("And", 0x2A53);
+ entity("andand", 0x2A55);
+ entity("andd", 0x2A5C);
+ entity("andslope", 0x2A58);
+ entity("andv", 0x2A5A);
+ entity("ang", 0x2220);
+ entity("ange", 0x29A4);
+ entity("angle", 0x2220);
+ entity("angmsd", 0x2221);
+ entity("angmsdaa", 0x29A8);
+ entity("angmsdab", 0x29A9);
+ entity("angmsdac", 0x29AA);
+ entity("angmsdad", 0x29AB);
+ entity("angmsdae", 0x29AC);
+ entity("angmsdaf", 0x29AD);
+ entity("angmsdag", 0x29AE);
+ entity("angmsdah", 0x29AF);
+ entity("angrt", 0x221F);
+ entity("angrtvb", 0x22BE);
+ entity("angrtvbd", 0x299D);
+ entity("angsph", 0x2222);
+ entity("angst", 0x212B);
+ entity("angzarr", 0x237C);
+ entity("aogon", 0x0105);
+ entity("Aogon", 0x0104);
+ entity("aopf", 0x1D552);
+ entity("Aopf", 0x1D538);
+ entity("ap", 0x2248);
+ entity("apacir", 0x2A6F);
+ entity("ape", 0x224A);
+ entity("apE", 0x2A70);
+ entity("apid", 0x224B);
+ entity("apos", 0x0027);
+ entity("ApplyFunction", 0x2061);
+ entity("approx", 0x2248);
+ entity("approxeq", 0x224A);
+ entity("aring", 0x00E5);
+ entity("Aring", 0x00C5);
+ entity("ascr", 0x1D4B6);
+ entity("Ascr", 0x1D49C);
+ entity("Assign", 0x2254);
+ entity("ast", 0x002A);
+ entity("asymp", 0x2248);
+ entity("asympeq", 0x224D);
+ entity("atilde", 0x00E3);
+ entity("Atilde", 0x00C3);
+ entity("auml", 0x00E4);
+ entity("Auml", 0x00C4);
+ entity("awconint", 0x2233);
+ entity("awint", 0x2A11);
+ entity("b.alpha", 0x1D6C2);
+ entity("b.beta", 0x1D6C3);
+ entity("b.chi", 0x1D6D8);
+ entity("b.delta", 0x1D6C5);
+ entity("b.Delta", 0x1D6AB);
+ entity("b.epsi", 0x1D6C6);
+ entity("b.epsiv", 0x1D6DC);
+ entity("b.eta", 0x1D6C8);
+ entity("b.gamma", 0x1D6C4);
+ entity("b.Gamma", 0x1D6AA);
+ entity("b.gammad", 0x1D7CB);
+ entity("b.Gammad", 0x1D7CA);
+ entity("b.iota", 0x1D6CA);
+ entity("b.kappa", 0x1D6CB);
+ entity("b.kappav", 0x1D6DE);
+ entity("b.lambda", 0x1D6CC);
+ entity("b.Lambda", 0x1D6B2);
+ entity("b.mu", 0x1D6CD);
+ entity("b.nu", 0x1D6CE);
+ entity("b.omega", 0x1D6DA);
+ entity("b.Omega", 0x1D6C0);
+ entity("b.phi", 0x1D6D7);
+ entity("b.Phi", 0x1D6BD);
+ entity("b.phiv", 0x1D6DF);
+ entity("b.pi", 0x1D6D1);
+ entity("b.Pi", 0x1D6B7);
+ entity("b.piv", 0x1D6E1);
+ entity("b.psi", 0x1D6D9);
+ entity("b.Psi", 0x1D6BF);
+ entity("b.rho", 0x1D6D2);
+ entity("b.rhov", 0x1D6E0);
+ entity("b.sigma", 0x1D6D4);
+ entity("b.Sigma", 0x1D6BA);
+ entity("b.sigmav", 0x1D6D3);
+ entity("b.tau", 0x1D6D5);
+ entity("b.Theta", 0x1D6AF);
+ entity("b.thetas", 0x1D6C9);
+ entity("b.thetav", 0x1D6DD);
+ entity("b.upsi", 0x1D6D6);
+ entity("b.Upsi", 0x1D6BC);
+ entity("b.xi", 0x1D6CF);
+ entity("b.Xi", 0x1D6B5);
+ entity("b.zeta", 0x1D6C7);
+ entity("backcong", 0x224C);
+ entity("backepsilon", 0x03F6);
+ entity("backprime", 0x2035);
+ entity("backsim", 0x223D);
+ entity("backsimeq", 0x22CD);
+ entity("Backslash", 0x2216);
+ entity("Barv", 0x2AE7);
+ entity("barvee", 0x22BD);
+ entity("barwed", 0x2305);
+ entity("Barwed", 0x2306);
+ entity("barwedge", 0x2305);
+ entity("bbrk", 0x23B5);
+ entity("bbrktbrk", 0x23B6);
+ entity("bcong", 0x224C);
+ entity("bcy", 0x0431);
+ entity("Bcy", 0x0411);
+ entity("bdquo", 0x201E);
+ entity("becaus", 0x2235);
+ entity("because", 0x2235);
+ entity("bemptyv", 0x29B0);
+ entity("bepsi", 0x03F6);
+ entity("bernou", 0x212C);
+ entity("Bernoullis", 0x212C);
+ entity("beta", 0x03B2);
+ entity("Beta", 0x0392);
+ entity("beth", 0x2136);
+ entity("between", 0x226C);
+ entity("bfr", 0x1D51F);
+ entity("Bfr", 0x1D505);
+ entity("bgr", 0x03B2);
+ entity("Bgr", 0x0392);
+ entity("bigcap", 0x22C2);
+ entity("bigcirc", 0x25EF);
+ entity("bigcup", 0x22C3);
+ entity("bigodot", 0x2A00);
+ entity("bigoplus", 0x2A01);
+ entity("bigotimes", 0x2A02);
+ entity("bigsqcup", 0x2A06);
+ entity("bigstar", 0x2605);
+ entity("bigtriangledown", 0x25BD);
+ entity("bigtriangleup", 0x25B3);
+ entity("biguplus", 0x2A04);
+ entity("bigvee", 0x22C1);
+ entity("bigwedge", 0x22C0);
+ entity("bkarow", 0x290D);
+ entity("blacklozenge", 0x29EB);
+ entity("blacksquare", 0x25AA);
+ entity("blacktriangle", 0x25B4);
+ entity("blacktriangledown", 0x25BE);
+ entity("blacktriangleleft", 0x25C2);
+ entity("blacktriangleright", 0x25B8);
+ entity("blank", 0x2423);
+ entity("blk12", 0x2592);
+ entity("blk14", 0x2591);
+ entity("blk34", 0x2593);
+ entity("block", 0x2588);
+ entity("bnot", 0x2310);
+ entity("bNot", 0x2AED);
+ entity("bopf", 0x1D553);
+ entity("Bopf", 0x1D539);
+ entity("bot", 0x22A5);
+ entity("bottom", 0x22A5);
+ entity("bowtie", 0x22C8);
+ entity("boxbox", 0x29C9);
+ entity("boxdl", 0x2510);
+ entity("boxdL", 0x2555);
+ entity("boxDl", 0x2556);
+ entity("boxDL", 0x2557);
+ entity("boxdr", 0x250C);
+ entity("boxdR", 0x2552);
+ entity("boxDr", 0x2553);
+ entity("boxDR", 0x2554);
+ entity("boxh", 0x2500);
+ entity("boxH", 0x2550);
+ entity("boxhd", 0x252C);
+ entity("boxhD", 0x2565);
+ entity("boxHd", 0x2564);
+ entity("boxHD", 0x2566);
+ entity("boxhu", 0x2534);
+ entity("boxhU", 0x2568);
+ entity("boxHu", 0x2567);
+ entity("boxHU", 0x2569);
+ entity("boxminus", 0x229F);
+ entity("boxplus", 0x229E);
+ entity("boxtimes", 0x22A0);
+ entity("boxul", 0x2518);
+ entity("boxuL", 0x255B);
+ entity("boxUl", 0x255C);
+ entity("boxUL", 0x255D);
+ entity("boxur", 0x2514);
+ entity("boxuR", 0x2558);
+ entity("boxUr", 0x2559);
+ entity("boxUR", 0x255A);
+ entity("boxv", 0x2502);
+ entity("boxV", 0x2551);
+ entity("boxvh", 0x253C);
+ entity("boxvH", 0x256A);
+ entity("boxVh", 0x256B);
+ entity("boxVH", 0x256C);
+ entity("boxvl", 0x2524);
+ entity("boxvL", 0x2561);
+ entity("boxVl", 0x2562);
+ entity("boxVL", 0x2563);
+ entity("boxvr", 0x251C);
+ entity("boxvR", 0x255E);
+ entity("boxVr", 0x255F);
+ entity("boxVR", 0x2560);
+ entity("bprime", 0x2035);
+ entity("breve", 0x02D8);
+ entity("brvbar", 0x00A6);
+ entity("bscr", 0x1D4B7);
+ entity("Bscr", 0x212C);
+ entity("bsemi", 0x204F);
+ entity("bsim", 0x223D);
+ entity("bsime", 0x22CD);
+ entity("bsol", 0x005C);
+ entity("bsolb", 0x29C5);
+ entity("bull", 0x2022);
+ entity("bullet", 0x2022);
+ entity("bump", 0x224E);
+ entity("bumpe", 0x224F);
+ entity("bumpE", 0x2AAE);
+ entity("bumpeq", 0x224F);
+ entity("Bumpeq", 0x224E);
+ entity("cacute", 0x0107);
+ entity("Cacute", 0x0106);
+ entity("cap", 0x2229);
+ entity("Cap", 0x22D2);
+ entity("capand", 0x2A44);
+ entity("capbrcup", 0x2A49);
+ entity("capcap", 0x2A4B);
+ entity("capcup", 0x2A47);
+ entity("capdot", 0x2A40);
+ entity("CapitalDifferentialD", 0x2145);
+ entity("caret", 0x2041);
+ entity("caron", 0x02C7);
+ entity("Cayleys", 0x212D);
+ entity("ccaps", 0x2A4D);
+ entity("ccaron", 0x010D);
+ entity("Ccaron", 0x010C);
+ entity("ccedil", 0x00E7);
+ entity("Ccedil", 0x00C7);
+ entity("ccirc", 0x0109);
+ entity("Ccirc", 0x0108);
+ entity("Cconint", 0x2230);
+ entity("ccups", 0x2A4C);
+ entity("ccupssm", 0x2A50);
+ entity("cdot", 0x010B);
+ entity("Cdot", 0x010A);
+ entity("cedil", 0x00B8);
+ entity("Cedilla", 0x00B8);
+ entity("cemptyv", 0x29B2);
+ entity("cent", 0x00A2);
+ entity("centerdot", 0x00B7);
+ entity("cfr", 0x1D520);
+ entity("Cfr", 0x212D);
+ entity("chcy", 0x0447);
+ entity("CHcy", 0x0427);
+ entity("check", 0x2713);
+ entity("checkmark", 0x2713);
+ entity("chi", 0x03C7);
+ entity("Chi", 0x03A7);
+ entity("cir", 0x25CB);
+ entity("circ", 0x02C6);
+ entity("circeq", 0x2257);
+ entity("circlearrowleft", 0x21BA);
+ entity("circlearrowright", 0x21BB);
+ entity("circledast", 0x229B);
+ entity("circledcirc", 0x229A);
+ entity("circleddash", 0x229D);
+ entity("CircleDot", 0x2299);
+ entity("circledR", 0x00AE);
+ entity("circledS", 0x24C8);
+ entity("CircleMinus", 0x2296);
+ entity("CirclePlus", 0x2295);
+ entity("CircleTimes", 0x2297);
+ entity("cire", 0x2257);
+ entity("cirE", 0x29C3);
+ entity("cirfnint", 0x2A10);
+ entity("cirmid", 0x2AEF);
+ entity("cirscir", 0x29C2);
+ entity("ClockwiseContourIntegral", 0x2232);
+ entity("CloseCurlyDoubleQuote", 0x201D);
+ entity("CloseCurlyQuote", 0x2019);
+ entity("clubs", 0x2663);
+ entity("clubsuit", 0x2663);
+ entity("colon", 0x003A);
+ entity("Colon", 0x2237);
+ entity("colone", 0x2254);
+ entity("Colone", 0x2A74);
+ entity("coloneq", 0x2254);
+ entity("comma", 0x002C);
+ entity("commat", 0x0040);
+ entity("comp", 0x2201);
+ entity("compfn", 0x2218);
+ entity("complement", 0x2201);
+ entity("complexes", 0x2102);
+ entity("cong", 0x2245);
+ entity("congdot", 0x2A6D);
+ entity("Congruent", 0x2261);
+ entity("conint", 0x222E);
+ entity("Conint", 0x222F);
+ entity("ContourIntegral", 0x222E);
+ entity("copf", 0x1D554);
+ entity("Copf", 0x2102);
+ entity("coprod", 0x2210);
+ entity("Coproduct", 0x2210);
+ entity("copy", 0x00A9);
+ entity("copysr", 0x2117);
+ entity("CounterClockwiseContourIntegral", 0x2233);
+ entity("crarr", 0x21B5);
+ entity("cross", 0x2717);
+ entity("Cross", 0x2A2F);
+ entity("cscr", 0x1D4B8);
+ entity("Cscr", 0x1D49E);
+ entity("csub", 0x2ACF);
+ entity("csube", 0x2AD1);
+ entity("csup", 0x2AD0);
+ entity("csupe", 0x2AD2);
+ entity("ctdot", 0x22EF);
+ entity("cudarrl", 0x2938);
+ entity("cudarrr", 0x2935);
+ entity("cuepr", 0x22DE);
+ entity("cuesc", 0x22DF);
+ entity("cularr", 0x21B6);
+ entity("cularrp", 0x293D);
+ entity("cup", 0x222A);
+ entity("Cup", 0x22D3);
+ entity("cupbrcap", 0x2A48);
+ entity("cupcap", 0x2A46);
+ entity("CupCap", 0x224D);
+ entity("cupcup", 0x2A4A);
+ entity("cupdot", 0x228D);
+ entity("cupor", 0x2A45);
+ entity("curarr", 0x21B7);
+ entity("curarrm", 0x293C);
+ entity("curlyeqprec", 0x22DE);
+ entity("curlyeqsucc", 0x22DF);
+ entity("curlyvee", 0x22CE);
+ entity("curlywedge", 0x22CF);
+ entity("curren", 0x00A4);
+ entity("curvearrowleft", 0x21B6);
+ entity("curvearrowright", 0x21B7);
+ entity("cuvee", 0x22CE);
+ entity("cuwed", 0x22CF);
+ entity("cwconint", 0x2232);
+ entity("cwint", 0x2231);
+ entity("cylcty", 0x232D);
+ entity("dagger", 0x2020);
+ entity("Dagger", 0x2021);
+ entity("daleth", 0x2138);
+ entity("darr", 0x2193);
+ entity("dArr", 0x21D3);
+ entity("Darr", 0x21A1);
+ entity("dash", 0x2010);
+ entity("dashv", 0x22A3);
+ entity("Dashv", 0x2AE4);
+ entity("dbkarow", 0x290F);
+ entity("dblac", 0x02DD);
+ entity("dcaron", 0x010F);
+ entity("Dcaron", 0x010E);
+ entity("dcy", 0x0434);
+ entity("Dcy", 0x0414);
+ entity("dd", 0x2146);
+ entity("DD", 0x2145);
+ entity("ddagger", 0x2021);
+ entity("ddarr", 0x21CA);
+ entity("DDotrahd", 0x2911);
+ entity("ddotseq", 0x2A77);
+ entity("deg", 0x00B0);
+ entity("Del", 0x2207);
+ entity("delta", 0x03B4);
+ entity("Delta", 0x0394);
+ entity("demptyv", 0x29B1);
+ entity("dfisht", 0x297F);
+ entity("dfr", 0x1D521);
+ entity("Dfr", 0x1D507);
+ entity("dgr", 0x03B4);
+ entity("Dgr", 0x0394);
+ entity("dHar", 0x2965);
+ entity("dharl", 0x21C3);
+ entity("dharr", 0x21C2);
+ entity("DiacriticalAcute", 0x00B4);
+ entity("DiacriticalDot", 0x02D9);
+ entity("DiacriticalDoubleAcute", 0x02DD);
+ entity("DiacriticalGrave", 0x0060);
+ entity("DiacriticalTilde", 0x02DC);
+ entity("diam", 0x22C4);
+ entity("diamond", 0x22C4);
+ entity("diamondsuit", 0x2666);
+ entity("diams", 0x2666);
+ entity("die", 0x00A8);
+ entity("DifferentialD", 0x2146);
+ entity("digamma", 0x03DD);
+ entity("disin", 0x22F2);
+ entity("div", 0x00F7);
+ entity("divide", 0x00F7);
+ entity("divideontimes", 0x22C7);
+ entity("divonx", 0x22C7);
+ entity("djcy", 0x0452);
+ entity("DJcy", 0x0402);
+ entity("dlcorn", 0x231E);
+ entity("dlcrop", 0x230D);
+ entity("dollar", 0x0024);
+ entity("dopf", 0x1D555);
+ entity("Dopf", 0x1D53B);
+ entity("dot", 0x02D9);
+ entity("Dot", 0x00A8);
+ entity("doteq", 0x2250);
+ entity("doteqdot", 0x2251);
+ entity("DotEqual", 0x2250);
+ entity("dotminus", 0x2238);
+ entity("dotplus", 0x2214);
+ entity("dotsquare", 0x22A1);
+ entity("doublebarwedge", 0x2306);
+ entity("DoubleContourIntegral", 0x222F);
+ entity("DoubleDot", 0x00A8);
+ entity("DoubleDownArrow", 0x21D3);
+ entity("DoubleLeftArrow", 0x21D0);
+ entity("DoubleLeftRightArrow", 0x21D4);
+ entity("DoubleLeftTee", 0x2AE4);
+ entity("DoubleLongLeftArrow", 0x27F8);
+ entity("DoubleLongLeftRightArrow", 0x27FA);
+ entity("DoubleLongRightArrow", 0x27F9);
+ entity("DoubleRightArrow", 0x21D2);
+ entity("DoubleRightTee", 0x22A8);
+ entity("DoubleUpArrow", 0x21D1);
+ entity("DoubleUpDownArrow", 0x21D5);
+ entity("DoubleVerticalBar", 0x2225);
+ entity("downarrow", 0x2193);
+ entity("Downarrow", 0x21D3);
+ entity("DownArrowBar", 0x2913);
+ entity("DownArrowUpArrow", 0x21F5);
+ entity("downdownarrows", 0x21CA);
+ entity("downharpoonleft", 0x21C3);
+ entity("downharpoonright", 0x21C2);
+ entity("DownLeftRightVector", 0x2950);
+ entity("DownLeftTeeVector", 0x295E);
+ entity("DownLeftVector", 0x21BD);
+ entity("DownLeftVectorBar", 0x2956);
+ entity("DownRightTeeVector", 0x295F);
+ entity("DownRightVector", 0x21C1);
+ entity("DownRightVectorBar", 0x2957);
+ entity("DownTee", 0x22A4);
+ entity("DownTeeArrow", 0x21A7);
+ entity("drbkarow", 0x2910);
+ entity("drcorn", 0x231F);
+ entity("drcrop", 0x230C);
+ entity("dscr", 0x1D4B9);
+ entity("Dscr", 0x1D49F);
+ entity("dscy", 0x0455);
+ entity("DScy", 0x0405);
+ entity("dsol", 0x29F6);
+ entity("dstrok", 0x0111);
+ entity("Dstrok", 0x0110);
+ entity("dtdot", 0x22F1);
+ entity("dtri", 0x25BF);
+ entity("dtrif", 0x25BE);
+ entity("duarr", 0x21F5);
+ entity("duhar", 0x296F);
+ entity("dwangle", 0x29A6);
+ entity("dzcy", 0x045F);
+ entity("DZcy", 0x040F);
+ entity("dzigrarr", 0x27FF);
+ entity("eacgr", 0x03AD);
+ entity("Eacgr", 0x0388);
+ entity("eacute", 0x00E9);
+ entity("Eacute", 0x00C9);
+ entity("easter", 0x2A6E);
+ entity("ecaron", 0x011B);
+ entity("Ecaron", 0x011A);
+ entity("ecir", 0x2256);
+ entity("ecirc", 0x00EA);
+ entity("Ecirc", 0x00CA);
+ entity("ecolon", 0x2255);
+ entity("ecy", 0x044D);
+ entity("Ecy", 0x042D);
+ entity("eDDot", 0x2A77);
+ entity("edot", 0x0117);
+ entity("eDot", 0x2251);
+ entity("Edot", 0x0116);
+ entity("ee", 0x2147);
+ entity("eeacgr", 0x03AE);
+ entity("EEacgr", 0x0389);
+ entity("eegr", 0x03B7);
+ entity("EEgr", 0x0397);
+ entity("efDot", 0x2252);
+ entity("efr", 0x1D522);
+ entity("Efr", 0x1D508);
+ entity("eg", 0x2A9A);
+ entity("egr", 0x03B5);
+ entity("Egr", 0x0395);
+ entity("egrave", 0x00E8);
+ entity("Egrave", 0x00C8);
+ entity("egs", 0x2A96);
+ entity("egsdot", 0x2A98);
+ entity("el", 0x2A99);
+ entity("Element", 0x2208);
+ entity("elinters", 0x23E7);
+ entity("ell", 0x2113);
+ entity("els", 0x2A95);
+ entity("elsdot", 0x2A97);
+ entity("emacr", 0x0113);
+ entity("Emacr", 0x0112);
+ entity("empty", 0x2205);
+ entity("emptyset", 0x2205);
+ entity("EmptySmallSquare", 0x25FB);
+ entity("emptyv", 0x2205);
+ entity("EmptyVerySmallSquare", 0x25AB);
+ entity("emsp", 0x2003);
+ entity("emsp13", 0x2004);
+ entity("emsp14", 0x2005);
+ entity("eng", 0x014B);
+ entity("ENG", 0x014A);
+ entity("ensp", 0x2002);
+ entity("eogon", 0x0119);
+ entity("Eogon", 0x0118);
+ entity("eopf", 0x1D556);
+ entity("Eopf", 0x1D53C);
+ entity("epar", 0x22D5);
+ entity("eparsl", 0x29E3);
+ entity("eplus", 0x2A71);
+ entity("epsi", 0x03F5);
+ entity("epsilon", 0x03B5);
+ entity("Epsilon", 0x0395);
+ entity("epsiv", 0x03B5);
+ entity("eqcirc", 0x2256);
+ entity("eqcolon", 0x2255);
+ entity("eqsim", 0x2242);
+ entity("eqslantgtr", 0x2A96);
+ entity("eqslantless", 0x2A95);
+ entity("Equal", 0x2A75);
+ entity("equals", 0x003D);
+ entity("EqualTilde", 0x2242);
+ entity("equest", 0x225F);
+ entity("Equilibrium", 0x21CC);
+ entity("equiv", 0x2261);
+ entity("equivDD", 0x2A78);
+ entity("eqvparsl", 0x29E5);
+ entity("erarr", 0x2971);
+ entity("erDot", 0x2253);
+ entity("escr", 0x212F);
+ entity("Escr", 0x2130);
+ entity("esdot", 0x2250);
+ entity("esim", 0x2242);
+ entity("Esim", 0x2A73);
+ entity("eta", 0x03B7);
+ entity("Eta", 0x0397);
+ entity("eth", 0x00F0);
+ entity("ETH", 0x00D0);
+ entity("euml", 0x00EB);
+ entity("Euml", 0x00CB);
+ entity("euro", 0x20AC);
+ entity("excl", 0x0021);
+ entity("exist", 0x2203);
+ entity("Exists", 0x2203);
+ entity("expectation", 0x2130);
+ entity("exponentiale", 0x2147);
+ entity("fallingdotseq", 0x2252);
+ entity("fcy", 0x0444);
+ entity("Fcy", 0x0424);
+ entity("female", 0x2640);
+ entity("ffilig", 0xFB03);
+ entity("fflig", 0xFB00);
+ entity("ffllig", 0xFB04);
+ entity("ffr", 0x1D523);
+ entity("Ffr", 0x1D509);
+ entity("filig", 0xFB01);
+ entity("FilledSmallSquare", 0x25FC);
+ entity("FilledVerySmallSquare", 0x25AA);
+ entity("flat", 0x266D);
+ entity("fllig", 0xFB02);
+ entity("fltns", 0x25B1);
+ entity("fnof", 0x0192);
+ entity("fopf", 0x1D557);
+ entity("Fopf", 0x1D53D);
+ entity("forall", 0x2200);
+ entity("fork", 0x22D4);
+ entity("forkv", 0x2AD9);
+ entity("Fouriertrf", 0x2131);
+ entity("fpartint", 0x2A0D);
+ entity("frac12", 0x00BD);
+ entity("frac13", 0x2153);
+ entity("frac14", 0x00BC);
+ entity("frac15", 0x2155);
+ entity("frac16", 0x2159);
+ entity("frac18", 0x215B);
+ entity("frac23", 0x2154);
+ entity("frac25", 0x2156);
+ entity("frac34", 0x00BE);
+ entity("frac35", 0x2157);
+ entity("frac38", 0x215C);
+ entity("frac45", 0x2158);
+ entity("frac56", 0x215A);
+ entity("frac58", 0x215D);
+ entity("frac78", 0x215E);
+ entity("frasl", 0x2044);
+ entity("frown", 0x2322);
+ entity("fscr", 0x1D4BB);
+ entity("Fscr", 0x2131);
+ entity("gacute", 0x01F5);
+ entity("gamma", 0x03B3);
+ entity("Gamma", 0x0393);
+ entity("gammad", 0x03DD);
+ entity("Gammad", 0x03DC);
+ entity("gap", 0x2A86);
+ entity("gbreve", 0x011F);
+ entity("Gbreve", 0x011E);
+ entity("Gcedil", 0x0122);
+ entity("gcirc", 0x011D);
+ entity("Gcirc", 0x011C);
+ entity("gcy", 0x0433);
+ entity("Gcy", 0x0413);
+ entity("gdot", 0x0121);
+ entity("Gdot", 0x0120);
+ entity("ge", 0x2265);
+ entity("gE", 0x2267);
+ entity("gel", 0x22DB);
+ entity("gEl", 0x2A8C);
+ entity("geq", 0x2265);
+ entity("geqq", 0x2267);
+ entity("geqslant", 0x2A7E);
+ entity("ges", 0x2A7E);
+ entity("gescc", 0x2AA9);
+ entity("gesdot", 0x2A80);
+ entity("gesdoto", 0x2A82);
+ entity("gesdotol", 0x2A84);
+ entity("gesles", 0x2A94);
+ entity("gfr", 0x1D524);
+ entity("Gfr", 0x1D50A);
+ entity("gg", 0x226B);
+ entity("Gg", 0x22D9);
+ entity("ggg", 0x22D9);
+ entity("ggr", 0x03B3);
+ entity("Ggr", 0x0393);
+ entity("gimel", 0x2137);
+ entity("gjcy", 0x0453);
+ entity("GJcy", 0x0403);
+ entity("gl", 0x2277);
+ entity("gla", 0x2AA5);
+ entity("glE", 0x2A92);
+ entity("glj", 0x2AA4);
+ entity("gnap", 0x2A8A);
+ entity("gnapprox", 0x2A8A);
+ entity("gne", 0x2A88);
+ entity("gnE", 0x2269);
+ entity("gneq", 0x2A88);
+ entity("gneqq", 0x2269);
+ entity("gnsim", 0x22E7);
+ entity("gopf", 0x1D558);
+ entity("Gopf", 0x1D53E);
+ entity("grave", 0x0060);
+ entity("GreaterEqual", 0x2265);
+ entity("GreaterEqualLess", 0x22DB);
+ entity("GreaterFullEqual", 0x2267);
+ entity("GreaterGreater", 0x2AA2);
+ entity("GreaterLess", 0x2277);
+ entity("GreaterSlantEqual", 0x2A7E);
+ entity("GreaterTilde", 0x2273);
+ entity("gscr", 0x210A);
+ entity("Gscr", 0x1D4A2);
+ entity("gsim", 0x2273);
+ entity("gsime", 0x2A8E);
+ entity("gsiml", 0x2A90);
+ entity("gt", 0x003E);
+ entity("Gt", 0x226B);
+ entity("gtcc", 0x2AA7);
+ entity("gtcir", 0x2A7A);
+ entity("gtdot", 0x22D7);
+ entity("gtlPar", 0x2995);
+ entity("gtquest", 0x2A7C);
+ entity("gtrapprox", 0x2A86);
+ entity("gtrarr", 0x2978);
+ entity("gtrdot", 0x22D7);
+ entity("gtreqless", 0x22DB);
+ entity("gtreqqless", 0x2A8C);
+ entity("gtrless", 0x2277);
+ entity("gtrsim", 0x2273);
+ entity("Hacek", 0x02C7);
+ entity("hairsp", 0x200A);
+ entity("half", 0x00BD);
+ entity("hamilt", 0x210B);
+ entity("hardcy", 0x044A);
+ entity("HARDcy", 0x042A);
+ entity("harr", 0x2194);
+ entity("hArr", 0x21D4);
+ entity("harrcir", 0x2948);
+ entity("harrw", 0x21AD);
+ entity("Hat", 0x005E);
+ entity("hbar", 0x210F);
+ entity("hcirc", 0x0125);
+ entity("Hcirc", 0x0124);
+ entity("hearts", 0x2665);
+ entity("heartsuit", 0x2665);
+ entity("hellip", 0x2026);
+ entity("hercon", 0x22B9);
+ entity("hfr", 0x1D525);
+ entity("Hfr", 0x210C);
+ entity("HilbertSpace", 0x210B);
+ entity("hksearow", 0x2925);
+ entity("hkswarow", 0x2926);
+ entity("hoarr", 0x21FF);
+ entity("homtht", 0x223B);
+ entity("hookleftarrow", 0x21A9);
+ entity("hookrightarrow", 0x21AA);
+ entity("hopf", 0x1D559);
+ entity("Hopf", 0x210D);
+ entity("horbar", 0x2015);
+ entity("HorizontalLine", 0x2500);
+ entity("hscr", 0x1D4BD);
+ entity("Hscr", 0x210B);
+ entity("hslash", 0x210F);
+ entity("hstrok", 0x0127);
+ entity("Hstrok", 0x0126);
+ entity("HumpDownHump", 0x224E);
+ entity("HumpEqual", 0x224F);
+ entity("hybull", 0x2043);
+ entity("hyphen", 0x2010);
+ entity("iacgr", 0x03AF);
+ entity("Iacgr", 0x038A);
+ entity("iacute", 0x00ED);
+ entity("Iacute", 0x00CD);
+ entity("ic", 0x2063);
+ entity("icirc", 0x00EE);
+ entity("Icirc", 0x00CE);
+ entity("icy", 0x0438);
+ entity("Icy", 0x0418);
+ entity("idiagr", 0x0390);
+ entity("idigr", 0x03CA);
+ entity("Idigr", 0x03AA);
+ entity("Idot", 0x0130);
+ entity("iecy", 0x0435);
+ entity("IEcy", 0x0415);
+ entity("iexcl", 0x00A1);
+ entity("iff", 0x21D4);
+ entity("ifr", 0x1D526);
+ entity("Ifr", 0x2111);
+ entity("igr", 0x03B9);
+ entity("Igr", 0x0399);
+ entity("igrave", 0x00EC);
+ entity("Igrave", 0x00CC);
+ entity("ii", 0x2148);
+ entity("iiiint", 0x2A0C);
+ entity("iiint", 0x222D);
+ entity("iinfin", 0x29DC);
+ entity("iiota", 0x2129);
+ entity("ijlig", 0x0133);
+ entity("IJlig", 0x0132);
+ entity("Im", 0x2111);
+ entity("imacr", 0x012B);
+ entity("Imacr", 0x012A);
+ entity("image", 0x2111);
+ entity("ImaginaryI", 0x2148);
+ entity("imagline", 0x2110);
+ entity("imagpart", 0x2111);
+ entity("imath", 0x0131);
+ entity("imof", 0x22B7);
+ entity("imped", 0x01B5);
+ entity("Implies", 0x21D2);
+ entity("in", 0x2208);
+ entity("incare", 0x2105);
+ entity("infin", 0x221E);
+ entity("infintie", 0x29DD);
+ entity("inodot", 0x0131);
+ entity("int", 0x222B);
+ entity("Int", 0x222C);
+ entity("intcal", 0x22BA);
+ entity("integers", 0x2124);
+ entity("Integral", 0x222B);
+ entity("intercal", 0x22BA);
+ entity("Intersection", 0x22C2);
+ entity("intlarhk", 0x2A17);
+ entity("intprod", 0x2A3C);
+ entity("InvisibleComma", 0x2063);
+ entity("InvisibleTimes", 0x2062);
+ entity("iocy", 0x0451);
+ entity("IOcy", 0x0401);
+ entity("iogon", 0x012F);
+ entity("Iogon", 0x012E);
+ entity("iopf", 0x1D55A);
+ entity("Iopf", 0x1D540);
+ entity("iota", 0x03B9);
+ entity("Iota", 0x0399);
+ entity("iprod", 0x2A3C);
+ entity("iquest", 0x00BF);
+ entity("iscr", 0x1D4BE);
+ entity("Iscr", 0x2110);
+ entity("isin", 0x2208);
+ entity("isindot", 0x22F5);
+ entity("isinE", 0x22F9);
+ entity("isins", 0x22F4);
+ entity("isinsv", 0x22F3);
+ entity("isinv", 0x2208);
+ entity("it", 0x2062);
+ entity("itilde", 0x0129);
+ entity("Itilde", 0x0128);
+ entity("iukcy", 0x0456);
+ entity("Iukcy", 0x0406);
+ entity("iuml", 0x00EF);
+ entity("Iuml", 0x00CF);
+ entity("jcirc", 0x0135);
+ entity("Jcirc", 0x0134);
+ entity("jcy", 0x0439);
+ entity("Jcy", 0x0419);
+ entity("jfr", 0x1D527);
+ entity("Jfr", 0x1D50D);
+ entity("jmath", 0x0237);
+ entity("jopf", 0x1D55B);
+ entity("Jopf", 0x1D541);
+ entity("jscr", 0x1D4BF);
+ entity("Jscr", 0x1D4A5);
+ entity("jsercy", 0x0458);
+ entity("Jsercy", 0x0408);
+ entity("jukcy", 0x0454);
+ entity("Jukcy", 0x0404);
+ entity("kappa", 0x03BA);
+ entity("Kappa", 0x039A);
+ entity("kappav", 0x03F0);
+ entity("kcedil", 0x0137);
+ entity("Kcedil", 0x0136);
+ entity("kcy", 0x043A);
+ entity("Kcy", 0x041A);
+ entity("kfr", 0x1D528);
+ entity("Kfr", 0x1D50E);
+ entity("kgr", 0x03BA);
+ entity("Kgr", 0x039A);
+ entity("kgreen", 0x0138);
+ entity("khcy", 0x0445);
+ entity("KHcy", 0x0425);
+ entity("khgr", 0x03C7);
+ entity("KHgr", 0x03A7);
+ entity("kjcy", 0x045C);
+ entity("KJcy", 0x040C);
+ entity("kopf", 0x1D55C);
+ entity("Kopf", 0x1D542);
+ entity("kscr", 0x1D4C0);
+ entity("Kscr", 0x1D4A6);
+ entity("lAarr", 0x21DA);
+ entity("lacute", 0x013A);
+ entity("Lacute", 0x0139);
+ entity("laemptyv", 0x29B4);
+ entity("lagran", 0x2112);
+ entity("lambda", 0x03BB);
+ entity("Lambda", 0x039B);
+ entity("lang", 0x2329);
+ entity("Lang", 0x27EA);
+ entity("langd", 0x2991);
+ entity("langle", 0x2329);
+ entity("lap", 0x2A85);
+ entity("Laplacetrf", 0x2112);
+ entity("laquo", 0x00AB);
+ entity("larr", 0x2190);
+ entity("lArr", 0x21D0);
+ entity("Larr", 0x219E);
+ entity("larrb", 0x21E4);
+ entity("larrbfs", 0x291F);
+ entity("larrfs", 0x291D);
+ entity("larrhk", 0x21A9);
+ entity("larrlp", 0x21AB);
+ entity("larrpl", 0x2939);
+ entity("larrsim", 0x2973);
+ entity("larrtl", 0x21A2);
+ entity("lat", 0x2AAB);
+ entity("latail", 0x2919);
+ entity("lAtail", 0x291B);
+ entity("late", 0x2AAD);
+ entity("lbarr", 0x290C);
+ entity("lBarr", 0x290E);
+ entity("lbbrk", 0x2997);
+ entity("lbrace", 0x007B);
+ entity("lbrack", 0x005B);
+ entity("lbrke", 0x298B);
+ entity("lbrksld", 0x298F);
+ entity("lbrkslu", 0x298D);
+ entity("lcaron", 0x013E);
+ entity("Lcaron", 0x013D);
+ entity("lcedil", 0x013C);
+ entity("Lcedil", 0x013B);
+ entity("lceil", 0x2308);
+ entity("lcub", 0x007B);
+ entity("lcy", 0x043B);
+ entity("Lcy", 0x041B);
+ entity("ldca", 0x2936);
+ entity("ldquo", 0x201C);
+ entity("ldquor", 0x201E);
+ entity("ldrdhar", 0x2967);
+ entity("ldrushar", 0x294B);
+ entity("ldsh", 0x21B2);
+ entity("le", 0x2264);
+ entity("lE", 0x2266);
+ entity("LeftAngleBracket", 0x2329);
+ entity("leftarrow", 0x2190);
+ entity("Leftarrow", 0x21D0);
+ entity("LeftArrowBar", 0x21E4);
+ entity("LeftArrowRightArrow", 0x21C6);
+ entity("leftarrowtail", 0x21A2);
+ entity("LeftCeiling", 0x2308);
+ entity("LeftDoubleBracket", 0x27E6);
+ entity("LeftDownTeeVector", 0x2961);
+ entity("LeftDownVector", 0x21C3);
+ entity("LeftDownVectorBar", 0x2959);
+ entity("LeftFloor", 0x230A);
+ entity("leftharpoondown", 0x21BD);
+ entity("leftharpoonup", 0x21BC);
+ entity("leftleftarrows", 0x21C7);
+ entity("leftrightarrow", 0x2194);
+ entity("Leftrightarrow", 0x21D4);
+ entity("leftrightarrows", 0x21C6);
+ entity("leftrightharpoons", 0x21CB);
+ entity("leftrightsquigarrow", 0x21AD);
+ entity("LeftRightVector", 0x294E);
+ entity("LeftTee", 0x22A3);
+ entity("LeftTeeArrow", 0x21A4);
+ entity("LeftTeeVector", 0x295A);
+ entity("leftthreetimes", 0x22CB);
+ entity("LeftTriangle", 0x22B2);
+ entity("LeftTriangleBar", 0x29CF);
+ entity("LeftTriangleEqual", 0x22B4);
+ entity("LeftUpDownVector", 0x2951);
+ entity("LeftUpTeeVector", 0x2960);
+ entity("LeftUpVector", 0x21BF);
+ entity("LeftUpVectorBar", 0x2958);
+ entity("LeftVector", 0x21BC);
+ entity("LeftVectorBar", 0x2952);
+ entity("leg", 0x22DA);
+ entity("lEg", 0x2A8B);
+ entity("leq", 0x2264);
+ entity("leqq", 0x2266);
+ entity("leqslant", 0x2A7D);
+ entity("les", 0x2A7D);
+ entity("lescc", 0x2AA8);
+ entity("lesdot", 0x2A7F);
+ entity("lesdoto", 0x2A81);
+ entity("lesdotor", 0x2A83);
+ entity("lesges", 0x2A93);
+ entity("lessapprox", 0x2A85);
+ entity("lessdot", 0x22D6);
+ entity("lesseqgtr", 0x22DA);
+ entity("lesseqqgtr", 0x2A8B);
+ entity("LessEqualGreater", 0x22DA);
+ entity("LessFullEqual", 0x2266);
+ entity("LessGreater", 0x2276);
+ entity("lessgtr", 0x2276);
+ entity("LessLess", 0x2AA1);
+ entity("lesssim", 0x2272);
+ entity("LessSlantEqual", 0x2A7D);
+ entity("LessTilde", 0x2272);
+ entity("lfisht", 0x297C);
+ entity("lfloor", 0x230A);
+ entity("lfr", 0x1D529);
+ entity("Lfr", 0x1D50F);
+ entity("lg", 0x2276);
+ entity("lgE", 0x2A91);
+ entity("lgr", 0x03BB);
+ entity("Lgr", 0x039B);
+ entity("lHar", 0x2962);
+ entity("lhard", 0x21BD);
+ entity("lharu", 0x21BC);
+ entity("lharul", 0x296A);
+ entity("lhblk", 0x2584);
+ entity("ljcy", 0x0459);
+ entity("LJcy", 0x0409);
+ entity("ll", 0x226A);
+ entity("Ll", 0x22D8);
+ entity("llarr", 0x21C7);
+ entity("llcorner", 0x231E);
+ entity("Lleftarrow", 0x21DA);
+ entity("llhard", 0x296B);
+ entity("lltri", 0x25FA);
+ entity("lmidot", 0x0140);
+ entity("Lmidot", 0x013F);
+ entity("lmoust", 0x23B0);
+ entity("lmoustache", 0x23B0);
+ entity("lnap", 0x2A89);
+ entity("lnapprox", 0x2A89);
+ entity("lne", 0x2A87);
+ entity("lnE", 0x2268);
+ entity("lneq", 0x2A87);
+ entity("lneqq", 0x2268);
+ entity("lnsim", 0x22E6);
+ entity("loang", 0x27EC);
+ entity("loarr", 0x21FD);
+ entity("lobrk", 0x27E6);
+ entity("longleftarrow", 0x27F5);
+ entity("Longleftarrow", 0x27F8);
+ entity("longleftrightarrow", 0x27F7);
+ entity("Longleftrightarrow", 0x27FA);
+ entity("longmapsto", 0x27FC);
+ entity("longrightarrow", 0x27F6);
+ entity("Longrightarrow", 0x27F9);
+ entity("looparrowleft", 0x21AB);
+ entity("looparrowright", 0x21AC);
+ entity("lopar", 0x2985);
+ entity("lopf", 0x1D55D);
+ entity("Lopf", 0x1D543);
+ entity("loplus", 0x2A2D);
+ entity("lotimes", 0x2A34);
+ entity("lowast", 0x2217);
+ entity("lowbar", 0x005F);
+ entity("LowerLeftArrow", 0x2199);
+ entity("LowerRightArrow", 0x2198);
+ entity("loz", 0x25CA);
+ entity("lozenge", 0x25CA);
+ entity("lozf", 0x29EB);
+ entity("lpar", 0x0028);
+ entity("lparlt", 0x2993);
+ entity("lrarr", 0x21C6);
+ entity("lrcorner", 0x231F);
+ entity("lrhar", 0x21CB);
+ entity("lrhard", 0x296D);
+ entity("lrm", 0x200E);
+ entity("lrtri", 0x22BF);
+ entity("lsaquo", 0x2039);
+ entity("lscr", 0x1D4C1);
+ entity("Lscr", 0x2112);
+ entity("lsh", 0x21B0);
+ entity("lsim", 0x2272);
+ entity("lsime", 0x2A8D);
+ entity("lsimg", 0x2A8F);
+ entity("lsqb", 0x005B);
+ entity("lsquo", 0x2018);
+ entity("lsquor", 0x201A);
+ entity("lstrok", 0x0142);
+ entity("Lstrok", 0x0141);
+ entity("lt", 0x003C);
+ entity("Lt", 0x226A);
+ entity("ltcc", 0x2AA6);
+ entity("ltcir", 0x2A79);
+ entity("ltdot", 0x22D6);
+ entity("lthree", 0x22CB);
+ entity("ltimes", 0x22C9);
+ entity("ltlarr", 0x2976);
+ entity("ltquest", 0x2A7B);
+ entity("ltri", 0x25C3);
+ entity("ltrie", 0x22B4);
+ entity("ltrif", 0x25C2);
+ entity("ltrPar", 0x2996);
+ entity("lurdshar", 0x294A);
+ entity("luruhar", 0x2966);
+ entity("macr", 0x00AF);
+ entity("male", 0x2642);
+ entity("malt", 0x2720);
+ entity("maltese", 0x2720);
+ entity("map", 0x21A6);
+ entity("Map", 0x2905);
+ entity("mapsto", 0x21A6);
+ entity("mapstodown", 0x21A7);
+ entity("mapstoleft", 0x21A4);
+ entity("mapstoup", 0x21A5);
+ entity("marker", 0x25AE);
+ entity("mcomma", 0x2A29);
+ entity("mcy", 0x043C);
+ entity("Mcy", 0x041C);
+ entity("mdash", 0x2014);
+ entity("mDDot", 0x223A);
+ entity("measuredangle", 0x2221);
+ entity("MediumSpace", 0x205F);
+ entity("Mellintrf", 0x2133);
+ entity("mfr", 0x1D52A);
+ entity("Mfr", 0x1D510);
+ entity("mgr", 0x03BC);
+ entity("Mgr", 0x039C);
+ entity("mho", 0x2127);
+ entity("micro", 0x00B5);
+ entity("mid", 0x2223);
+ entity("midast", 0x002A);
+ entity("midcir", 0x2AF0);
+ entity("middot", 0x00B7);
+ entity("minus", 0x2212);
+ entity("minusb", 0x229F);
+ entity("minusd", 0x2238);
+ entity("minusdu", 0x2A2A);
+ entity("MinusPlus", 0x2213);
+ entity("mlcp", 0x2ADB);
+ entity("mldr", 0x2026);
+ entity("mnplus", 0x2213);
+ entity("models", 0x22A7);
+ entity("mopf", 0x1D55E);
+ entity("Mopf", 0x1D544);
+ entity("mp", 0x2213);
+ entity("mscr", 0x1D4C2);
+ entity("Mscr", 0x2133);
+ entity("mstpos", 0x223E);
+ entity("mu", 0x03BC);
+ entity("Mu", 0x039C);
+ entity("multimap", 0x22B8);
+ entity("mumap", 0x22B8);
+ entity("nabla", 0x2207);
+ entity("nacute", 0x0144);
+ entity("Nacute", 0x0143);
+ entity("nap", 0x2249);
+ entity("napos", 0x0149);
+ entity("napprox", 0x2249);
+ entity("natur", 0x266E);
+ entity("natural", 0x266E);
+ entity("naturals", 0x2115);
+ entity("nbsp", 0x00A0);
+ entity("ncap", 0x2A43);
+ entity("ncaron", 0x0148);
+ entity("Ncaron", 0x0147);
+ entity("ncedil", 0x0146);
+ entity("Ncedil", 0x0145);
+ entity("ncong", 0x2247);
+ entity("ncup", 0x2A42);
+ entity("ncy", 0x043D);
+ entity("Ncy", 0x041D);
+ entity("ndash", 0x2013);
+ entity("ne", 0x2260);
+ entity("nearhk", 0x2924);
+ entity("nearr", 0x2197);
+ entity("neArr", 0x21D7);
+ entity("nearrow", 0x2197);
+ entity("NegativeMediumSpace", 0x200B);
+ entity("NegativeThickSpace", 0x200B);
+ entity("NegativeThinSpace", 0x200B);
+ entity("NegativeVeryThinSpace", 0x200B);
+ entity("nequiv", 0x2262);
+ entity("nesear", 0x2928);
+ entity("NestedGreaterGreater", 0x226B);
+ entity("NestedLessLess", 0x226A);
+ entity("NewLine", 0x000A);
+ entity("nexist", 0x2204);
+ entity("nexists", 0x2204);
+ entity("nfr", 0x1D52B);
+ entity("Nfr", 0x1D511);
+ entity("nge", 0x2271);
+ entity("ngeq", 0x2271);
+ entity("ngr", 0x03BD);
+ entity("Ngr", 0x039D);
+ entity("ngsim", 0x2275);
+ entity("ngt", 0x226F);
+ entity("ngtr", 0x226F);
+ entity("nharr", 0x21AE);
+ entity("nhArr", 0x21CE);
+ entity("nhpar", 0x2AF2);
+ entity("ni", 0x220B);
+ entity("nis", 0x22FC);
+ entity("nisd", 0x22FA);
+ entity("niv", 0x220B);
+ entity("njcy", 0x045A);
+ entity("NJcy", 0x040A);
+ entity("nlarr", 0x219A);
+ entity("nlArr", 0x21CD);
+ entity("nldr", 0x2025);
+ entity("nle", 0x2270);
+ entity("nleftarrow", 0x219A);
+ entity("nLeftarrow", 0x21CD);
+ entity("nleftrightarrow", 0x21AE);
+ entity("nLeftrightarrow", 0x21CE);
+ entity("nleq", 0x2270);
+ entity("nless", 0x226E);
+ entity("nlsim", 0x2274);
+ entity("nlt", 0x226E);
+ entity("nltri", 0x22EA);
+ entity("nltrie", 0x22EC);
+ entity("nmid", 0x2224);
+ entity("NoBreak", 0x2060);
+ entity("NonBreakingSpace", 0x00A0);
+ entity("nopf", 0x1D55F);
+ entity("Nopf", 0x2115);
+ entity("not", 0x00AC);
+ entity("Not", 0x2AEC);
+ entity("NotCongruent", 0x2262);
+ entity("NotCupCap", 0x226D);
+ entity("NotDoubleVerticalBar", 0x2226);
+ entity("NotElement", 0x2209);
+ entity("NotEqual", 0x2260);
+ entity("NotExists", 0x2204);
+ entity("NotGreater", 0x226F);
+ entity("NotGreaterEqual", 0x2271);
+ entity("NotGreaterLess", 0x2279);
+ entity("NotGreaterTilde", 0x2275);
+ entity("notin", 0x2209);
+ entity("notinva", 0x2209);
+ entity("notinvb", 0x22F7);
+ entity("notinvc", 0x22F6);
+ entity("NotLeftTriangle", 0x22EA);
+ entity("NotLeftTriangleEqual", 0x22EC);
+ entity("NotLess", 0x226E);
+ entity("NotLessEqual", 0x2270);
+ entity("NotLessGreater", 0x2278);
+ entity("NotLessTilde", 0x2274);
+ entity("notni", 0x220C);
+ entity("notniva", 0x220C);
+ entity("notnivb", 0x22FE);
+ entity("notnivc", 0x22FD);
+ entity("NotPrecedes", 0x2280);
+ entity("NotPrecedesSlantEqual", 0x22E0);
+ entity("NotReverseElement", 0x220C);
+ entity("NotRightTriangle", 0x22EB);
+ entity("NotRightTriangleEqual", 0x22ED);
+ entity("NotSquareSubsetEqual", 0x22E2);
+ entity("NotSquareSupersetEqual", 0x22E3);
+ entity("NotSubsetEqual", 0x2288);
+ entity("NotSucceeds", 0x2281);
+ entity("NotSucceedsSlantEqual", 0x22E1);
+ entity("NotSupersetEqual", 0x2289);
+ entity("NotTilde", 0x2241);
+ entity("NotTildeEqual", 0x2244);
+ entity("NotTildeFullEqual", 0x2247);
+ entity("NotTildeTilde", 0x2249);
+ entity("NotVerticalBar", 0x2224);
+ entity("npar", 0x2226);
+ entity("nparallel", 0x2226);
+ entity("npolint", 0x2A14);
+ entity("npr", 0x2280);
+ entity("nprcue", 0x22E0);
+ entity("nprec", 0x2280);
+ entity("nrarr", 0x219B);
+ entity("nrArr", 0x21CF);
+ entity("nrightarrow", 0x219B);
+ entity("nRightarrow", 0x21CF);
+ entity("nrtri", 0x22EB);
+ entity("nrtrie", 0x22ED);
+ entity("nsc", 0x2281);
+ entity("nsccue", 0x22E1);
+ entity("nscr", 0x1D4C3);
+ entity("Nscr", 0x1D4A9);
+ entity("nshortmid", 0x2224);
+ entity("nshortparallel", 0x2226);
+ entity("nsim", 0x2241);
+ entity("nsime", 0x2244);
+ entity("nsimeq", 0x2244);
+ entity("nsmid", 0x2224);
+ entity("nspar", 0x2226);
+ entity("nsqsube", 0x22E2);
+ entity("nsqsupe", 0x22E3);
+ entity("nsub", 0x2284);
+ entity("nsube", 0x2288);
+ entity("nsubseteq", 0x2288);
+ entity("nsucc", 0x2281);
+ entity("nsup", 0x2285);
+ entity("nsupe", 0x2289);
+ entity("nsupseteq", 0x2289);
+ entity("ntgl", 0x2279);
+ entity("ntilde", 0x00F1);
+ entity("Ntilde", 0x00D1);
+ entity("ntlg", 0x2278);
+ entity("ntriangleleft", 0x22EA);
+ entity("ntrianglelefteq", 0x22EC);
+ entity("ntriangleright", 0x22EB);
+ entity("ntrianglerighteq", 0x22ED);
+ entity("nu", 0x03BD);
+ entity("Nu", 0x039D);
+ entity("num", 0x0023);
+ entity("numero", 0x2116);
+ entity("numsp", 0x2007);
+ entity("nvdash", 0x22AC);
+ entity("nvDash", 0x22AD);
+ entity("nVdash", 0x22AE);
+ entity("nVDash", 0x22AF);
+ entity("nvHarr", 0x2904);
+ entity("nvinfin", 0x29DE);
+ entity("nvlArr", 0x2902);
+ entity("nvrArr", 0x2903);
+ entity("nwarhk", 0x2923);
+ entity("nwarr", 0x2196);
+ entity("nwArr", 0x21D6);
+ entity("nwarrow", 0x2196);
+ entity("nwnear", 0x2927);
+ entity("oacgr", 0x03CC);
+ entity("Oacgr", 0x038C);
+ entity("oacute", 0x00F3);
+ entity("Oacute", 0x00D3);
+ entity("oast", 0x229B);
+ entity("ocir", 0x229A);
+ entity("ocirc", 0x00F4);
+ entity("Ocirc", 0x00D4);
+ entity("ocy", 0x043E);
+ entity("Ocy", 0x041E);
+ entity("odash", 0x229D);
+ entity("odblac", 0x0151);
+ entity("Odblac", 0x0150);
+ entity("odiv", 0x2A38);
+ entity("odot", 0x2299);
+ entity("odsold", 0x29BC);
+ entity("oelig", 0x0153);
+ entity("OElig", 0x0152);
+ entity("ofcir", 0x29BF);
+ entity("ofr", 0x1D52C);
+ entity("Ofr", 0x1D512);
+ entity("ogon", 0x02DB);
+ entity("ogr", 0x03BF);
+ entity("Ogr", 0x039F);
+ entity("ograve", 0x00F2);
+ entity("Ograve", 0x00D2);
+ entity("ogt", 0x29C1);
+ entity("ohacgr", 0x03CE);
+ entity("OHacgr", 0x038F);
+ entity("ohbar", 0x29B5);
+ entity("ohgr", 0x03C9);
+ entity("OHgr", 0x03A9);
+ entity("ohm", 0x2126);
+ entity("oint", 0x222E);
+ entity("olarr", 0x21BA);
+ entity("olcir", 0x29BE);
+ entity("olcross", 0x29BB);
+ entity("oline", 0x203E);
+ entity("olt", 0x29C0);
+ entity("omacr", 0x014D);
+ entity("Omacr", 0x014C);
+ entity("omega", 0x03C9);
+ entity("Omega", 0x03A9);
+ entity("omicron", 0x03BF);
+ entity("Omicron", 0x039F);
+ entity("omid", 0x29B6);
+ entity("ominus", 0x2296);
+ entity("oopf", 0x1D560);
+ entity("Oopf", 0x1D546);
+ entity("opar", 0x29B7);
+ entity("OpenCurlyDoubleQuote", 0x201C);
+ entity("OpenCurlyQuote", 0x2018);
+ entity("operp", 0x29B9);
+ entity("oplus", 0x2295);
+ entity("or", 0x2228);
+ entity("Or", 0x2A54);
+ entity("orarr", 0x21BB);
+ entity("ord", 0x2A5D);
+ entity("order", 0x2134);
+ entity("orderof", 0x2134);
+ entity("ordf", 0x00AA);
+ entity("ordm", 0x00BA);
+ entity("origof", 0x22B6);
+ entity("oror", 0x2A56);
+ entity("orslope", 0x2A57);
+ entity("orv", 0x2A5B);
+ entity("oS", 0x24C8);
+ entity("oscr", 0x2134);
+ entity("Oscr", 0x1D4AA);
+ entity("oslash", 0x00F8);
+ entity("Oslash", 0x00D8);
+ entity("osol", 0x2298);
+ entity("otilde", 0x00F5);
+ entity("Otilde", 0x00D5);
+ entity("otimes", 0x2297);
+ entity("Otimes", 0x2A37);
+ entity("otimesas", 0x2A36);
+ entity("ouml", 0x00F6);
+ entity("Ouml", 0x00D6);
+ entity("ovbar", 0x233D);
+ entity("OverBar", 0x00AF);
+ entity("OverBrace", 0xFE37);
+ entity("OverBracket", 0x23B4);
+ entity("OverParenthesis", 0xFE35);
+ entity("par", 0x2225);
+ entity("para", 0x00B6);
+ entity("parallel", 0x2225);
+ entity("parsim", 0x2AF3);
+ entity("parsl", 0x2AFD);
+ entity("part", 0x2202);
+ entity("PartialD", 0x2202);
+ entity("pcy", 0x043F);
+ entity("Pcy", 0x041F);
+ entity("percnt", 0x0025);
+ entity("period", 0x002E);
+ entity("permil", 0x2030);
+ entity("perp", 0x22A5);
+ entity("pertenk", 0x2031);
+ entity("pfr", 0x1D52D);
+ entity("Pfr", 0x1D513);
+ entity("pgr", 0x03C0);
+ entity("Pgr", 0x03A0);
+ entity("phgr", 0x03C6);
+ entity("PHgr", 0x03A6);
+ entity("phi", 0x03D5);
+ entity("Phi", 0x03A6);
+ entity("phiv", 0x03C6);
+ entity("phmmat", 0x2133);
+ entity("phone", 0x260E);
+ entity("pi", 0x03C0);
+ entity("Pi", 0x03A0);
+ entity("pitchfork", 0x22D4);
+ entity("piv", 0x03D6);
+ entity("planck", 0x210F);
+ entity("planckh", 0x210E);
+ entity("plankv", 0x210F);
+ entity("plus", 0x002B);
+ entity("plusacir", 0x2A23);
+ entity("plusb", 0x229E);
+ entity("pluscir", 0x2A22);
+ entity("plusdo", 0x2214);
+ entity("plusdu", 0x2A25);
+ entity("pluse", 0x2A72);
+ entity("PlusMinus", 0x00B1);
+ entity("plusmn", 0x00B1);
+ entity("plussim", 0x2A26);
+ entity("plustwo", 0x2A27);
+ entity("pm", 0x00B1);
+ entity("Poincareplane", 0x210C);
+ entity("pointint", 0x2A15);
+ entity("popf", 0x1D561);
+ entity("Popf", 0x2119);
+ entity("pound", 0x00A3);
+ entity("pr", 0x227A);
+ entity("Pr", 0x2ABB);
+ entity("prap", 0x2AB7);
+ entity("prcue", 0x227C);
+ entity("pre", 0x2AAF);
+ entity("prE", 0x2AB3);
+ entity("prec", 0x227A);
+ entity("precapprox", 0x2AB7);
+ entity("preccurlyeq", 0x227C);
+ entity("Precedes", 0x227A);
+ entity("PrecedesEqual", 0x2AAF);
+ entity("PrecedesSlantEqual", 0x227C);
+ entity("PrecedesTilde", 0x227E);
+ entity("preceq", 0x2AAF);
+ entity("precnapprox", 0x2AB9);
+ entity("precneqq", 0x2AB5);
+ entity("precnsim", 0x22E8);
+ entity("precsim", 0x227E);
+ entity("prime", 0x2032);
+ entity("Prime", 0x2033);
+ entity("primes", 0x2119);
+ entity("prnap", 0x2AB9);
+ entity("prnE", 0x2AB5);
+ entity("prnsim", 0x22E8);
+ entity("prod", 0x220F);
+ entity("Product", 0x220F);
+ entity("profalar", 0x232E);
+ entity("profline", 0x2312);
+ entity("profsurf", 0x2313);
+ entity("prop", 0x221D);
+ entity("Proportion", 0x2237);
+ entity("Proportional", 0x221D);
+ entity("propto", 0x221D);
+ entity("prsim", 0x227E);
+ entity("prurel", 0x22B0);
+ entity("pscr", 0x1D4C5);
+ entity("Pscr", 0x1D4AB);
+ entity("psgr", 0x03C8);
+ entity("PSgr", 0x03A8);
+ entity("psi", 0x03C8);
+ entity("Psi", 0x03A8);
+ entity("puncsp", 0x2008);
+ entity("qfr", 0x1D52E);
+ entity("Qfr", 0x1D514);
+ entity("qint", 0x2A0C);
+ entity("qopf", 0x1D562);
+ entity("Qopf", 0x211A);
+ entity("qprime", 0x2057);
+ entity("qscr", 0x1D4C6);
+ entity("Qscr", 0x1D4AC);
+ entity("quaternions", 0x210D);
+ entity("quatint", 0x2A16);
+ entity("quest", 0x003F);
+ entity("questeq", 0x225F);
+ entity("quot", 0x0022);
+ entity("rAarr", 0x21DB);
+ entity("race", 0x29DA);
+ entity("racute", 0x0155);
+ entity("Racute", 0x0154);
+ entity("radic", 0x221A);
+ entity("raemptyv", 0x29B3);
+ entity("rang", 0x232A);
+ entity("Rang", 0x27EB);
+ entity("rangd", 0x2992);
+ entity("range", 0x29A5);
+ entity("rangle", 0x232A);
+ entity("raquo", 0x00BB);
+ entity("rarr", 0x2192);
+ entity("rArr", 0x21D2);
+ entity("Rarr", 0x21A0);
+ entity("rarrap", 0x2975);
+ entity("rarrb", 0x21E5);
+ entity("rarrbfs", 0x2920);
+ entity("rarrc", 0x2933);
+ entity("rarrfs", 0x291E);
+ entity("rarrhk", 0x21AA);
+ entity("rarrlp", 0x21AC);
+ entity("rarrpl", 0x2945);
+ entity("rarrsim", 0x2974);
+ entity("rarrtl", 0x21A3);
+ entity("Rarrtl", 0x2916);
+ entity("rarrw", 0x219D);
+ entity("ratail", 0x291A);
+ entity("rAtail", 0x291C);
+ entity("ratio", 0x2236);
+ entity("rationals", 0x211A);
+ entity("rbarr", 0x290D);
+ entity("rBarr", 0x290F);
+ entity("RBarr", 0x2910);
+ entity("rbbrk", 0x2998);
+ entity("rbrace", 0x007D);
+ entity("rbrack", 0x005D);
+ entity("rbrke", 0x298C);
+ entity("rbrksld", 0x298E);
+ entity("rbrkslu", 0x2990);
+ entity("rcaron", 0x0159);
+ entity("Rcaron", 0x0158);
+ entity("rcedil", 0x0157);
+ entity("Rcedil", 0x0156);
+ entity("rceil", 0x2309);
+ entity("rcub", 0x007D);
+ entity("rcy", 0x0440);
+ entity("Rcy", 0x0420);
+ entity("rdca", 0x2937);
+ entity("rdldhar", 0x2969);
+ entity("rdquo", 0x201D);
+ entity("rdquor", 0x201D);
+ entity("rdsh", 0x21B3);
+ entity("Re", 0x211C);
+ entity("real", 0x211C);
+ entity("realine", 0x211B);
+ entity("realpart", 0x211C);
+ entity("reals", 0x211D);
+ entity("rect", 0x25AD);
+ entity("reg", 0x00AE);
+ entity("ReverseElement", 0x220B);
+ entity("ReverseEquilibrium", 0x21CB);
+ entity("ReverseUpEquilibrium", 0x296F);
+ entity("rfisht", 0x297D);
+ entity("rfloor", 0x230B);
+ entity("rfr", 0x1D52F);
+ entity("Rfr", 0x211C);
+ entity("rgr", 0x03C1);
+ entity("Rgr", 0x03A1);
+ entity("rHar", 0x2964);
+ entity("rhard", 0x21C1);
+ entity("rharu", 0x21C0);
+ entity("rharul", 0x296C);
+ entity("rho", 0x03C1);
+ entity("Rho", 0x03A1);
+ entity("rhov", 0x03F1);
+ entity("RightAngleBracket", 0x232A);
+ entity("rightarrow", 0x2192);
+ entity("Rightarrow", 0x21D2);
+ entity("RightArrowBar", 0x21E5);
+ entity("RightArrowLeftArrow", 0x21C4);
+ entity("rightarrowtail", 0x21A3);
+ entity("RightCeiling", 0x2309);
+ entity("RightDoubleBracket", 0x27E7);
+ entity("RightDownTeeVector", 0x295D);
+ entity("RightDownVector", 0x21C2);
+ entity("RightDownVectorBar", 0x2955);
+ entity("RightFloor", 0x230B);
+ entity("rightharpoondown", 0x21C1);
+ entity("rightharpoonup", 0x21C0);
+ entity("rightleftarrows", 0x21C4);
+ entity("rightleftharpoons", 0x21CC);
+ entity("rightrightarrows", 0x21C9);
+ entity("rightsquigarrow", 0x219D);
+ entity("RightTee", 0x22A2);
+ entity("RightTeeArrow", 0x21A6);
+ entity("RightTeeVector", 0x295B);
+ entity("rightthreetimes", 0x22CC);
+ entity("RightTriangle", 0x22B3);
+ entity("RightTriangleBar", 0x29D0);
+ entity("RightTriangleEqual", 0x22B5);
+ entity("RightUpDownVector", 0x294F);
+ entity("RightUpTeeVector", 0x295C);
+ entity("RightUpVector", 0x21BE);
+ entity("RightUpVectorBar", 0x2954);
+ entity("RightVector", 0x21C0);
+ entity("RightVectorBar", 0x2953);
+ entity("ring", 0x02DA);
+ entity("risingdotseq", 0x2253);
+ entity("rlarr", 0x21C4);
+ entity("rlhar", 0x21CC);
+ entity("rlm", 0x200F);
+ entity("rmoust", 0x23B1);
+ entity("rmoustache", 0x23B1);
+ entity("rnmid", 0x2AEE);
+ entity("roang", 0x27ED);
+ entity("roarr", 0x21FE);
+ entity("robrk", 0x27E7);
+ entity("ropar", 0x2986);
+ entity("ropf", 0x1D563);
+ entity("Ropf", 0x211D);
+ entity("roplus", 0x2A2E);
+ entity("rotimes", 0x2A35);
+ entity("RoundImplies", 0x2970);
+ entity("rpar", 0x0029);
+ entity("rpargt", 0x2994);
+ entity("rppolint", 0x2A12);
+ entity("rrarr", 0x21C9);
+ entity("Rrightarrow", 0x21DB);
+ entity("rsaquo", 0x203A);
+ entity("rscr", 0x1D4C7);
+ entity("Rscr", 0x211B);
+ entity("rsh", 0x21B1);
+ entity("rsqb", 0x005D);
+ entity("rsquo", 0x2019);
+ entity("rsquor", 0x2019);
+ entity("rthree", 0x22CC);
+ entity("rtimes", 0x22CA);
+ entity("rtri", 0x25B9);
+ entity("rtrie", 0x22B5);
+ entity("rtrif", 0x25B8);
+ entity("rtriltri", 0x29CE);
+ entity("RuleDelayed", 0x29F4);
+ entity("ruluhar", 0x2968);
+ entity("rx", 0x211E);
+ entity("sacute", 0x015B);
+ entity("Sacute", 0x015A);
+ entity("sbquo", 0x201A);
+ entity("sc", 0x227B);
+ entity("Sc", 0x2ABC);
+ entity("scap", 0x2AB8);
+ entity("scaron", 0x0161);
+ entity("Scaron", 0x0160);
+ entity("sccue", 0x227D);
+ entity("sce", 0x2AB0);
+ entity("scE", 0x2AB4);
+ entity("scedil", 0x015F);
+ entity("Scedil", 0x015E);
+ entity("scirc", 0x015D);
+ entity("Scirc", 0x015C);
+ entity("scnap", 0x2ABA);
+ entity("scnE", 0x2AB6);
+ entity("scnsim", 0x22E9);
+ entity("scpolint", 0x2A13);
+ entity("scsim", 0x227F);
+ entity("scy", 0x0441);
+ entity("Scy", 0x0421);
+ entity("sdot", 0x22C5);
+ entity("sdotb", 0x22A1);
+ entity("sdote", 0x2A66);
+ entity("searhk", 0x2925);
+ entity("searr", 0x2198);
+ entity("seArr", 0x21D8);
+ entity("searrow", 0x2198);
+ entity("sect", 0x00A7);
+ entity("semi", 0x003B);
+ entity("seswar", 0x2929);
+ entity("setminus", 0x2216);
+ entity("setmn", 0x2216);
+ entity("sext", 0x2736);
+ entity("sfgr", 0x03C2);
+ entity("sfr", 0x1D530);
+ entity("Sfr", 0x1D516);
+ entity("sfrown", 0x2322);
+ entity("sgr", 0x03C3);
+ entity("Sgr", 0x03A3);
+ entity("sharp", 0x266F);
+ entity("shchcy", 0x0449);
+ entity("SHCHcy", 0x0429);
+ entity("shcy", 0x0448);
+ entity("SHcy", 0x0428);
+ entity("ShortDownArrow", 0x2193);
+ entity("ShortLeftArrow", 0x2190);
+ entity("shortmid", 0x2223);
+ entity("shortparallel", 0x2225);
+ entity("ShortRightArrow", 0x2192);
+ entity("ShortUpArrow", 0x2191);
+ entity("shy", 0x00AD);
+ entity("sigma", 0x03C3);
+ entity("Sigma", 0x03A3);
+ entity("sigmaf", 0x03C2);
+ entity("sigmav", 0x03C2);
+ entity("sim", 0x223C);
+ entity("simdot", 0x2A6A);
+ entity("sime", 0x2243);
+ entity("simeq", 0x2243);
+ entity("simg", 0x2A9E);
+ entity("simgE", 0x2AA0);
+ entity("siml", 0x2A9D);
+ entity("simlE", 0x2A9F);
+ entity("simne", 0x2246);
+ entity("simplus", 0x2A24);
+ entity("simrarr", 0x2972);
+ entity("slarr", 0x2190);
+ entity("SmallCircle", 0x2218);
+ entity("smallsetminus", 0x2216);
+ entity("smashp", 0x2A33);
+ entity("smeparsl", 0x29E4);
+ entity("smid", 0x2223);
+ entity("smile", 0x2323);
+ entity("smt", 0x2AAA);
+ entity("smte", 0x2AAC);
+ entity("softcy", 0x044C);
+ entity("SOFTcy", 0x042C);
+ entity("sol", 0x002F);
+ entity("solb", 0x29C4);
+ entity("solbar", 0x233F);
+ entity("sopf", 0x1D564);
+ entity("Sopf", 0x1D54A);
+ entity("spades", 0x2660);
+ entity("spadesuit", 0x2660);
+ entity("spar", 0x2225);
+ entity("sqcap", 0x2293);
+ entity("sqcup", 0x2294);
+ entity("Sqrt", 0x221A);
+ entity("sqsub", 0x228F);
+ entity("sqsube", 0x2291);
+ entity("sqsubset", 0x228F);
+ entity("sqsubseteq", 0x2291);
+ entity("sqsup", 0x2290);
+ entity("sqsupe", 0x2292);
+ entity("sqsupset", 0x2290);
+ entity("sqsupseteq", 0x2292);
+ entity("squ", 0x25A1);
+ entity("square", 0x25A1);
+ entity("SquareIntersection", 0x2293);
+ entity("SquareSubset", 0x228F);
+ entity("SquareSubsetEqual", 0x2291);
+ entity("SquareSuperset", 0x2290);
+ entity("SquareSupersetEqual", 0x2292);
+ entity("SquareUnion", 0x2294);
+ entity("squarf", 0x25AA);
+ entity("squf", 0x25AA);
+ entity("srarr", 0x2192);
+ entity("sscr", 0x1D4C8);
+ entity("Sscr", 0x1D4AE);
+ entity("ssetmn", 0x2216);
+ entity("ssmile", 0x2323);
+ entity("sstarf", 0x22C6);
+ entity("star", 0x2606);
+ entity("Star", 0x22C6);
+ entity("starf", 0x2605);
+ entity("straightepsilon", 0x03F5);
+ entity("straightphi", 0x03D5);
+ entity("strns", 0x00AF);
+ entity("sub", 0x2282);
+ entity("Sub", 0x22D0);
+ entity("subdot", 0x2ABD);
+ entity("sube", 0x2286);
+ entity("subE", 0x2AC5);
+ entity("subedot", 0x2AC3);
+ entity("submult", 0x2AC1);
+ entity("subne", 0x228A);
+ entity("subnE", 0x2ACB);
+ entity("subplus", 0x2ABF);
+ entity("subrarr", 0x2979);
+ entity("subset", 0x2282);
+ entity("Subset", 0x22D0);
+ entity("subseteq", 0x2286);
+ entity("subseteqq", 0x2AC5);
+ entity("SubsetEqual", 0x2286);
+ entity("subsetneq", 0x228A);
+ entity("subsetneqq", 0x2ACB);
+ entity("subsim", 0x2AC7);
+ entity("subsub", 0x2AD5);
+ entity("subsup", 0x2AD3);
+ entity("succ", 0x227B);
+ entity("succapprox", 0x2AB8);
+ entity("succcurlyeq", 0x227D);
+ entity("Succeeds", 0x227B);
+ entity("SucceedsEqual", 0x2AB0);
+ entity("SucceedsSlantEqual", 0x227D);
+ entity("SucceedsTilde", 0x227F);
+ entity("succeq", 0x2AB0);
+ entity("succnapprox", 0x2ABA);
+ entity("succneqq", 0x2AB6);
+ entity("succnsim", 0x22E9);
+ entity("succsim", 0x227F);
+ entity("SuchThat", 0x220B);
+ entity("sum", 0x2211);
+ entity("sung", 0x266A);
+ entity("sup", 0x2283);
+ entity("Sup", 0x22D1);
+ entity("sup1", 0x00B9);
+ entity("sup2", 0x00B2);
+ entity("sup3", 0x00B3);
+ entity("supdot", 0x2ABE);
+ entity("supdsub", 0x2AD8);
+ entity("supe", 0x2287);
+ entity("supE", 0x2AC6);
+ entity("supedot", 0x2AC4);
+ entity("Superset", 0x2283);
+ entity("SupersetEqual", 0x2287);
+ entity("suphsub", 0x2AD7);
+ entity("suplarr", 0x297B);
+ entity("supmult", 0x2AC2);
+ entity("supne", 0x228B);
+ entity("supnE", 0x2ACC);
+ entity("supplus", 0x2AC0);
+ entity("supset", 0x2283);
+ entity("Supset", 0x22D1);
+ entity("supseteq", 0x2287);
+ entity("supseteqq", 0x2AC6);
+ entity("supsetneq", 0x228B);
+ entity("supsetneqq", 0x2ACC);
+ entity("supsim", 0x2AC8);
+ entity("supsub", 0x2AD4);
+ entity("supsup", 0x2AD6);
+ entity("swarhk", 0x2926);
+ entity("swarr", 0x2199);
+ entity("swArr", 0x21D9);
+ entity("swarrow", 0x2199);
+ entity("swnwar", 0x292A);
+ entity("szlig", 0x00DF);
+ entity("Tab", 0x0009);
+ entity("target", 0x2316);
+ entity("tau", 0x03C4);
+ entity("Tau", 0x03A4);
+ entity("tbrk", 0x23B4);
+ entity("tcaron", 0x0165);
+ entity("Tcaron", 0x0164);
+ entity("tcedil", 0x0163);
+ entity("Tcedil", 0x0162);
+ entity("tcy", 0x0442);
+ entity("Tcy", 0x0422);
+ entity("telrec", 0x2315);
+ entity("tfr", 0x1D531);
+ entity("Tfr", 0x1D517);
+ entity("tgr", 0x03C4);
+ entity("Tgr", 0x03A4);
+ entity("there4", 0x2234);
+ entity("therefore", 0x2234);
+ entity("theta", 0x03B8);
+ entity("Theta", 0x0398);
+ entity("thetasym", 0x03D1);
+ entity("thetav", 0x03D1);
+ entity("thgr", 0x03B8);
+ entity("THgr", 0x0398);
+ entity("thickapprox", 0x2248);
+ entity("thicksim", 0x223C);
+ entity("thinsp", 0x2009);
+ entity("ThinSpace", 0x2009);
+ entity("thkap", 0x2248);
+ entity("thksim", 0x223C);
+ entity("thorn", 0x00FE);
+ entity("THORN", 0x00DE);
+ entity("tilde", 0x02DC);
+ entity("Tilde", 0x223C);
+ entity("TildeEqual", 0x2243);
+ entity("TildeFullEqual", 0x2245);
+ entity("TildeTilde", 0x2248);
+ entity("times", 0x00D7);
+ entity("timesb", 0x22A0);
+ entity("timesbar", 0x2A31);
+ entity("timesd", 0x2A30);
+ entity("tint", 0x222D);
+ entity("toea", 0x2928);
+ entity("top", 0x22A4);
+ entity("topbot", 0x2336);
+ entity("topcir", 0x2AF1);
+ entity("topf", 0x1D565);
+ entity("Topf", 0x1D54B);
+ entity("topfork", 0x2ADA);
+ entity("tosa", 0x2929);
+ entity("tprime", 0x2034);
+ entity("trade", 0x2122);
+ entity("triangle", 0x25B5);
+ entity("triangledown", 0x25BF);
+ entity("triangleleft", 0x25C3);
+ entity("trianglelefteq", 0x22B4);
+ entity("triangleq", 0x225C);
+ entity("triangleright", 0x25B9);
+ entity("trianglerighteq", 0x22B5);
+ entity("tridot", 0x25EC);
+ entity("trie", 0x225C);
+ entity("triminus", 0x2A3A);
+ entity("triplus", 0x2A39);
+ entity("trisb", 0x29CD);
+ entity("tritime", 0x2A3B);
+ entity("trpezium", 0x23E2);
+ entity("tscr", 0x1D4C9);
+ entity("Tscr", 0x1D4AF);
+ entity("tscy", 0x0446);
+ entity("TScy", 0x0426);
+ entity("tshcy", 0x045B);
+ entity("TSHcy", 0x040B);
+ entity("tstrok", 0x0167);
+ entity("Tstrok", 0x0166);
+ entity("twixt", 0x226C);
+ entity("twoheadleftarrow", 0x219E);
+ entity("twoheadrightarrow", 0x21A0);
+ entity("uacgr", 0x03CD);
+ entity("Uacgr", 0x038E);
+ entity("uacute", 0x00FA);
+ entity("Uacute", 0x00DA);
+ entity("uarr", 0x2191);
+ entity("uArr", 0x21D1);
+ entity("Uarr", 0x219F);
+ entity("Uarrocir", 0x2949);
+ entity("ubrcy", 0x045E);
+ entity("Ubrcy", 0x040E);
+ entity("ubreve", 0x016D);
+ entity("Ubreve", 0x016C);
+ entity("ucirc", 0x00FB);
+ entity("Ucirc", 0x00DB);
+ entity("ucy", 0x0443);
+ entity("Ucy", 0x0423);
+ entity("udarr", 0x21C5);
+ entity("udblac", 0x0171);
+ entity("Udblac", 0x0170);
+ entity("udhar", 0x296E);
+ entity("udiagr", 0x03B0);
+ entity("udigr", 0x03CB);
+ entity("Udigr", 0x03AB);
+ entity("ufisht", 0x297E);
+ entity("ufr", 0x1D532);
+ entity("Ufr", 0x1D518);
+ entity("ugr", 0x03C5);
+ entity("Ugr", 0x03A5);
+ entity("ugrave", 0x00F9);
+ entity("Ugrave", 0x00D9);
+ entity("uHar", 0x2963);
+ entity("uharl", 0x21BF);
+ entity("uharr", 0x21BE);
+ entity("uhblk", 0x2580);
+ entity("ulcorn", 0x231C);
+ entity("ulcorner", 0x231C);
+ entity("ulcrop", 0x230F);
+ entity("ultri", 0x25F8);
+ entity("umacr", 0x016B);
+ entity("Umacr", 0x016A);
+ entity("uml", 0x00A8);
+ entity("UnderBrace", 0xFE38);
+ entity("UnderBracket", 0x23B5);
+ entity("UnderParenthesis", 0xFE36);
+ entity("Union", 0x22C3);
+ entity("UnionPlus", 0x228E);
+ entity("uogon", 0x0173);
+ entity("Uogon", 0x0172);
+ entity("uopf", 0x1D566);
+ entity("Uopf", 0x1D54C);
+ entity("uparrow", 0x2191);
+ entity("Uparrow", 0x21D1);
+ entity("UpArrowBar", 0x2912);
+ entity("UpArrowDownArrow", 0x21C5);
+ entity("updownarrow", 0x2195);
+ entity("Updownarrow", 0x21D5);
+ entity("UpEquilibrium", 0x296E);
+ entity("upharpoonleft", 0x21BF);
+ entity("upharpoonright", 0x21BE);
+ entity("uplus", 0x228E);
+ entity("UpperLeftArrow", 0x2196);
+ entity("UpperRightArrow", 0x2197);
+ entity("upsi", 0x03C5);
+ entity("Upsi", 0x03D2);
+ entity("upsih", 0x03D2);
+ entity("upsilon", 0x03C5);
+ entity("Upsilon", 0x03A5);
+ entity("UpTee", 0x22A5);
+ entity("UpTeeArrow", 0x21A5);
+ entity("upuparrows", 0x21C8);
+ entity("urcorn", 0x231D);
+ entity("urcorner", 0x231D);
+ entity("urcrop", 0x230E);
+ entity("uring", 0x016F);
+ entity("Uring", 0x016E);
+ entity("urtri", 0x25F9);
+ entity("uscr", 0x1D4CA);
+ entity("Uscr", 0x1D4B0);
+ entity("utdot", 0x22F0);
+ entity("utilde", 0x0169);
+ entity("Utilde", 0x0168);
+ entity("utri", 0x25B5);
+ entity("utrif", 0x25B4);
+ entity("uuarr", 0x21C8);
+ entity("uuml", 0x00FC);
+ entity("Uuml", 0x00DC);
+ entity("uwangle", 0x29A7);
+ entity("vangrt", 0x299C);
+ entity("varepsilon", 0x03B5);
+ entity("varkappa", 0x03F0);
+ entity("varnothing", 0x2205);
+ entity("varphi", 0x03C6);
+ entity("varpi", 0x03D6);
+ entity("varpropto", 0x221D);
+ entity("varr", 0x2195);
+ entity("vArr", 0x21D5);
+ entity("varrho", 0x03F1);
+ entity("varsigma", 0x03C2);
+ entity("vartheta", 0x03D1);
+ entity("vartriangleleft", 0x22B2);
+ entity("vartriangleright", 0x22B3);
+ entity("vBar", 0x2AE8);
+ entity("Vbar", 0x2AEB);
+ entity("vBarv", 0x2AE9);
+ entity("vcy", 0x0432);
+ entity("Vcy", 0x0412);
+ entity("vdash", 0x22A2);
+ entity("vDash", 0x22A8);
+ entity("Vdash", 0x22A9);
+ entity("VDash", 0x22AB);
+ entity("Vdashl", 0x2AE6);
+ entity("vee", 0x2228);
+ entity("Vee", 0x22C1);
+ entity("veebar", 0x22BB);
+ entity("veeeq", 0x225A);
+ entity("vellip", 0x22EE);
+ entity("verbar", 0x007C);
+ entity("Verbar", 0x2016);
+ entity("vert", 0x007C);
+ entity("Vert", 0x2016);
+ entity("VerticalBar", 0x2223);
+ entity("VerticalLine", 0x007C);
+ entity("VerticalSeparator", 0x2758);
+ entity("VerticalTilde", 0x2240);
+ entity("VeryThinSpace", 0x200A);
+ entity("vfr", 0x1D533);
+ entity("Vfr", 0x1D519);
+ entity("vltri", 0x22B2);
+ entity("vopf", 0x1D567);
+ entity("Vopf", 0x1D54D);
+ entity("vprop", 0x221D);
+ entity("vrtri", 0x22B3);
+ entity("vscr", 0x1D4CB);
+ entity("Vscr", 0x1D4B1);
+ entity("Vvdash", 0x22AA);
+ entity("vzigzag", 0x299A);
+ entity("wcirc", 0x0175);
+ entity("Wcirc", 0x0174);
+ entity("wedbar", 0x2A5F);
+ entity("wedge", 0x2227);
+ entity("Wedge", 0x22C0);
+ entity("wedgeq", 0x2259);
+ entity("weierp", 0x2118);
+ entity("wfr", 0x1D534);
+ entity("Wfr", 0x1D51A);
+ entity("wopf", 0x1D568);
+ entity("Wopf", 0x1D54E);
+ entity("wp", 0x2118);
+ entity("wr", 0x2240);
+ entity("wreath", 0x2240);
+ entity("wscr", 0x1D4CC);
+ entity("Wscr", 0x1D4B2);
+ entity("xcap", 0x22C2);
+ entity("xcirc", 0x25EF);
+ entity("xcup", 0x22C3);
+ entity("xdtri", 0x25BD);
+ entity("xfr", 0x1D535);
+ entity("Xfr", 0x1D51B);
+ entity("xgr", 0x03BE);
+ entity("Xgr", 0x039E);
+ entity("xharr", 0x27F7);
+ entity("xhArr", 0x27FA);
+ entity("xi", 0x03BE);
+ entity("Xi", 0x039E);
+ entity("xlarr", 0x27F5);
+ entity("xlArr", 0x27F8);
+ entity("xmap", 0x27FC);
+ entity("xnis", 0x22FB);
+ entity("xodot", 0x2A00);
+ entity("xopf", 0x1D569);
+ entity("Xopf", 0x1D54F);
+ entity("xoplus", 0x2A01);
+ entity("xotime", 0x2A02);
+ entity("xrarr", 0x27F6);
+ entity("xrArr", 0x27F9);
+ entity("xscr", 0x1D4CD);
+ entity("Xscr", 0x1D4B3);
+ entity("xsqcup", 0x2A06);
+ entity("xuplus", 0x2A04);
+ entity("xutri", 0x25B3);
+ entity("xvee", 0x22C1);
+ entity("xwedge", 0x22C0);
+ entity("yacute", 0x00FD);
+ entity("Yacute", 0x00DD);
+ entity("yacy", 0x044F);
+ entity("YAcy", 0x042F);
+ entity("ycirc", 0x0177);
+ entity("Ycirc", 0x0176);
+ entity("ycy", 0x044B);
+ entity("Ycy", 0x042B);
+ entity("yen", 0x00A5);
+ entity("yfr", 0x1D536);
+ entity("Yfr", 0x1D51C);
+ entity("yicy", 0x0457);
+ entity("YIcy", 0x0407);
+ entity("yopf", 0x1D56A);
+ entity("Yopf", 0x1D550);
+ entity("yscr", 0x1D4CE);
+ entity("Yscr", 0x1D4B4);
+ entity("yucy", 0x044E);
+ entity("YUcy", 0x042E);
+ entity("yuml", 0x00FF);
+ entity("Yuml", 0x0178);
+ entity("zacute", 0x017A);
+ entity("Zacute", 0x0179);
+ entity("zcaron", 0x017E);
+ entity("Zcaron", 0x017D);
+ entity("zcy", 0x0437);
+ entity("Zcy", 0x0417);
+ entity("zdot", 0x017C);
+ entity("Zdot", 0x017B);
+ entity("zeetrf", 0x2128);
+ entity("ZeroWidthSpace", 0x200B);
+ entity("zeta", 0x03B6);
+ entity("Zeta", 0x0396);
+ entity("zfr", 0x1D537);
+ entity("Zfr", 0x2128);
+ entity("zgr", 0x03B6);
+ entity("Zgr", 0x0396);
+ entity("zhcy", 0x0436);
+ entity("ZHcy", 0x0416);
+ entity("zigrarr", 0x21DD);
+ entity("zopf", 0x1D56B);
+ entity("Zopf", 0x2124);
+ entity("zscr", 0x1D4CF);
+ entity("Zscr", 0x1D4B5);
+ entity("zwj", 0x200D);
+ entity("zwnj", 0x200C);
+
+ // End of Schema calls
+ }
+
+
+ }
diff --git a/src/org/ccil/cowan/tagsoup/LICENSE b/src/org/ccil/cowan/tagsoup/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/src/org/ccil/cowan/tagsoup/MODULE_LICENSE_APACHE2 b/src/org/ccil/cowan/tagsoup/MODULE_LICENSE_APACHE2
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/MODULE_LICENSE_APACHE2
diff --git a/src/org/ccil/cowan/tagsoup/PYXScanner.java b/src/org/ccil/cowan/tagsoup/PYXScanner.java
new file mode 100644
index 0000000..ebfba26
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/PYXScanner.java
@@ -0,0 +1,124 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// This file is part of TagSoup.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version. You may also distribute
+// and/or modify it under version 2.1 of the Academic Free License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// PYX Scanner
+
+package org.ccil.cowan.tagsoup;
+import java.io.*;
+import org.xml.sax.SAXException;
+
+/**
+A Scanner that accepts PYX format instead of HTML.
+Useful primarily for debugging.
+**/
+public class PYXScanner implements Scanner {
+
+ public void resetDocumentLocator(String publicid, String systemid) {
+ // Need this method for interface compatibility, but note
+ // that PyxScanner does not implement Locator.
+ }
+
+ public void scan(Reader r, ScanHandler h) throws IOException, SAXException {
+ BufferedReader br = new BufferedReader(r);
+ String s;
+ char[] buff = null;
+ boolean instag = false;
+ while ((s = br.readLine()) != null) {
+ int size = s.length();
+ if (buff == null || buff.length < size) {
+ buff = new char[size];
+ }
+ s.getChars(0, size, buff, 0);
+ switch (buff[0]) {
+ case '(':
+ if (instag) {
+ h.stagc(buff, 0, 0);
+ instag = false;
+ }
+ h.gi(buff, 1, size - 1);
+ instag = true;
+ break;
+ case ')':
+ if (instag) {
+ h.stagc(buff, 0, 0);
+ instag = false;
+ }
+ h.etag(buff, 1, size - 1);
+ break;
+ case '?':
+ if (instag) {
+ h.stagc(buff, 0, 0);
+ instag = false;
+ }
+ h.pi(buff, 1, size - 1);
+ break;
+ case 'A':
+ int sp = s.indexOf(' ');
+ h.aname(buff, 1, sp - 1);
+ h.aval(buff, sp + 1, size - sp - 1);
+ break;
+ case '-':
+ if (instag) {
+ h.stagc(buff, 0, 0);
+ instag = false;
+ }
+ if (s.equals("-\\n")) {
+ buff[0] = '\n';
+ h.pcdata(buff, 0, 1);
+ }
+ else {
+ // FIXME:
+ // Does not decode \t and \\ in input
+ h.pcdata(buff, 1, size - 1);
+ }
+ break;
+ case 'E':
+ if (instag) {
+ h.stagc(buff, 0, 0);
+ instag = false;
+ }
+ h.entity(buff, 1, size - 1);
+ break;
+ default:
+// System.err.print("Gotcha ");
+// System.err.print(s);
+// System.err.print('\n');
+ break;
+ }
+ }
+ h.eof(buff, 0, 0);
+ }
+
+ public void startCDATA() { }
+
+ public static void main(String[] argv) throws IOException, SAXException {
+ Scanner s = new PYXScanner();
+ Reader r = new InputStreamReader(System.in, "UTF-8");
+ Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8"));
+ s.scan(r, new PYXWriter(w));
+ }
+ }
diff --git a/src/org/ccil/cowan/tagsoup/PYXWriter.java b/src/org/ccil/cowan/tagsoup/PYXWriter.java
new file mode 100644
index 0000000..81917dd
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/PYXWriter.java
@@ -0,0 +1,217 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// PYX Writer
+// FIXME: does not do escapes in attribute values
+// FIXME: outputs entities as bare '&' character
+
+package org.ccil.cowan.tagsoup;
+import java.io.*;
+import org.xml.sax.*;
+import org.xml.sax.ext.LexicalHandler;
+
+/**
+A ContentHandler that generates PYX format instead of XML.
+Primarily useful for debugging.
+**/
+public class PYXWriter
+ implements ScanHandler, ContentHandler, LexicalHandler {
+
+ private PrintWriter theWriter; // where we write to
+ private static char[] dummy = new char[1];
+ private String attrName; // saved attribute name
+
+ // ScanHandler implementation
+
+ public void adup(char[] buff, int offset, int length) throws SAXException {
+ theWriter.println(attrName);
+ attrName = null;
+ }
+
+ public void aname(char[] buff, int offset, int length) throws SAXException {
+ theWriter.print('A');
+ theWriter.write(buff, offset, length);
+ theWriter.print(' ');
+ attrName = new String(buff, offset, length);
+ }
+
+ public void aval(char[] buff, int offset, int length) throws SAXException {
+ theWriter.write(buff, offset, length);
+ theWriter.println();
+ attrName = null;
+ }
+
+ public void cmnt(char [] buff, int offset, int length) throws SAXException {
+// theWriter.print('!');
+// theWriter.write(buff, offset, length);
+// theWriter.println();
+ }
+
+ public void entity(char[] buff, int offset, int length) throws SAXException { }
+
+ public int getEntity() { return 0; }
+
+ public void eof(char[] buff, int offset, int length) throws SAXException {
+ theWriter.close();
+ }
+
+ public void etag(char[] buff, int offset, int length) throws SAXException {
+ theWriter.print(')');
+ theWriter.write(buff, offset, length);
+ theWriter.println();
+ }
+
+ public void decl(char[] buff, int offset, int length) throws SAXException {
+ }
+
+ public void gi(char[] buff, int offset, int length) throws SAXException {
+ theWriter.print('(');
+ theWriter.write(buff, offset, length);
+ theWriter.println();
+ }
+
+ public void cdsect(char[] buff, int offset, int length) throws SAXException {
+ pcdata(buff, offset, length);
+ }
+
+ public void pcdata(char[] buff, int offset, int length) throws SAXException {
+ if (length == 0) return; // nothing to do
+ boolean inProgress = false;
+ length += offset;
+ for (int i = offset; i < length; i++) {
+ if (buff[i] == '\n') {
+ if (inProgress) {
+ theWriter.println();
+ }
+ theWriter.println("-\\n");
+ inProgress = false;
+ }
+ else {
+ if (!inProgress) {
+ theWriter.print('-');
+ }
+ switch(buff[i]) {
+ case '\t':
+ theWriter.print("\\t");
+ break;
+ case '\\':
+ theWriter.print("\\\\");
+ break;
+ default:
+ theWriter.print(buff[i]);
+ }
+ inProgress = true;
+ }
+ }
+ if (inProgress) {
+ theWriter.println();
+ }
+ }
+
+ public void pitarget(char[] buff, int offset, int length) throws SAXException {
+ theWriter.print('?');
+ theWriter.write(buff, offset, length);
+ theWriter.write(' ');
+ }
+
+ public void pi(char[] buff, int offset, int length) throws SAXException {
+ theWriter.write(buff, offset, length);
+ theWriter.println();
+ }
+
+ public void stagc(char[] buff, int offset, int length) throws SAXException {
+// theWriter.println("!"); // FIXME
+ }
+
+ public void stage(char[] buff, int offset, int length) throws SAXException {
+ theWriter.println("!"); // FIXME
+ }
+
+ // SAX ContentHandler implementation
+
+ public void characters(char[] buff, int offset, int length) throws SAXException {
+ pcdata(buff, offset, length);
+ }
+
+ public void endDocument() throws SAXException {
+ theWriter.close();
+ }
+
+ public void endElement(String uri, String localname, String qname) throws SAXException {
+ if (qname.length() == 0) qname = localname;
+ theWriter.print(')');
+ theWriter.println(qname);
+ }
+
+ public void endPrefixMapping(String prefix) throws SAXException { }
+
+ public void ignorableWhitespace(char[] buff, int offset, int length) throws SAXException {
+ characters(buff, offset, length);
+ }
+
+ public void processingInstruction(String target, String data) throws SAXException {
+ theWriter.print('?');
+ theWriter.print(target);
+ theWriter.print(' ');
+ theWriter.println(data);
+ }
+
+ public void setDocumentLocator(Locator locator) { }
+
+ public void skippedEntity(String name) throws SAXException { }
+
+ public void startDocument() throws SAXException { }
+
+ public void startElement(String uri, String localname, String qname,
+ Attributes atts) throws SAXException {
+ if (qname.length() == 0) qname=localname;
+ theWriter.print('(');
+ theWriter.println(qname);
+ int length = atts.getLength();
+ for (int i = 0; i < length; i++) {
+ qname = atts.getQName(i);
+ if (qname.length() == 0) qname = atts.getLocalName(i);
+ theWriter.print('A');
+// theWriter.print(atts.getType(i)); // DEBUG
+ theWriter.print(qname);
+ theWriter.print(' ');
+ theWriter.println(atts.getValue(i));
+ }
+ }
+
+ public void startPrefixMapping(String prefix, String uri) throws SAXException { }
+
+ // Default LexicalHandler implementation
+
+ public void comment(char[] ch, int start, int length) throws SAXException {
+ cmnt(ch, start, length);
+ }
+ public void endCDATA() throws SAXException { }
+ public void endDTD() throws SAXException { }
+ public void endEntity(String name) throws SAXException { }
+ public void startCDATA() throws SAXException { }
+ public void startDTD(String name, String publicId, String systemId) throws SAXException { }
+ public void startEntity(String name) throws SAXException { }
+
+ // Constructor
+
+ public PYXWriter(Writer w) {
+ if (w instanceof PrintWriter) {
+ theWriter = (PrintWriter)w;
+ }
+ else {
+ theWriter = new PrintWriter(w);
+ }
+ }
+ }
diff --git a/src/org/ccil/cowan/tagsoup/Parser.java b/src/org/ccil/cowan/tagsoup/Parser.java
new file mode 100644
index 0000000..0997f23
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/Parser.java
@@ -0,0 +1,1114 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// The TagSoup parser
+
+package org.ccil.cowan.tagsoup;
+import java.util.HashMap;
+import java.util.ArrayList;
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+import org.xml.sax.*;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.ext.LexicalHandler;
+
+
+/**
+The SAX parser class.
+**/
+public class Parser extends DefaultHandler implements ScanHandler, XMLReader, LexicalHandler {
+
+ // XMLReader implementation
+
+ private ContentHandler theContentHandler = this;
+ private LexicalHandler theLexicalHandler = this;
+ private DTDHandler theDTDHandler = this;
+ private ErrorHandler theErrorHandler = this;
+ private EntityResolver theEntityResolver = this;
+ private Schema theSchema;
+ private Scanner theScanner;
+ private AutoDetector theAutoDetector;
+
+ // Default values for feature flags
+
+ private static boolean DEFAULT_NAMESPACES = true;
+ private static boolean DEFAULT_IGNORE_BOGONS = false;
+ private static boolean DEFAULT_BOGONS_EMPTY = false;
+ private static boolean DEFAULT_ROOT_BOGONS = true;
+ private static boolean DEFAULT_DEFAULT_ATTRIBUTES = true;
+ private static boolean DEFAULT_TRANSLATE_COLONS = false;
+ private static boolean DEFAULT_RESTART_ELEMENTS = true;
+ private static boolean DEFAULT_IGNORABLE_WHITESPACE = false;
+ private static boolean DEFAULT_CDATA_ELEMENTS = true;
+
+ // Feature flags.
+
+ private boolean namespaces = DEFAULT_NAMESPACES;
+ private boolean ignoreBogons = DEFAULT_IGNORE_BOGONS;
+ private boolean bogonsEmpty = DEFAULT_BOGONS_EMPTY;
+ private boolean rootBogons = DEFAULT_ROOT_BOGONS;
+ private boolean defaultAttributes = DEFAULT_DEFAULT_ATTRIBUTES;
+ private boolean translateColons = DEFAULT_TRANSLATE_COLONS;
+ private boolean restartElements = DEFAULT_RESTART_ELEMENTS;
+ private boolean ignorableWhitespace = DEFAULT_IGNORABLE_WHITESPACE;
+ private boolean CDATAElements = DEFAULT_CDATA_ELEMENTS;
+
+ /**
+ A value of "true" indicates namespace URIs and unprefixed local
+ names for element and attribute names will be available.
+ **/
+ public final static String namespacesFeature =
+ "http://xml.org/sax/features/namespaces";
+
+ /**
+ A value of "true" indicates that XML qualified names (with prefixes)
+ and attributes (including xmlns* attributes) will be available.
+ We don't support this value.
+ **/
+ public final static String namespacePrefixesFeature =
+ "http://xml.org/sax/features/namespace-prefixes";
+
+ /**
+ Reports whether this parser processes external general entities
+ (it doesn't).
+ **/
+ public final static String externalGeneralEntitiesFeature =
+ "http://xml.org/sax/features/external-general-entities";
+
+ /**
+ Reports whether this parser processes external parameter entities
+ (it doesn't).
+ **/
+ public final static String externalParameterEntitiesFeature =
+ "http://xml.org/sax/features/external-parameter-entities";
+
+ /**
+ May be examined only during a parse, after the startDocument()
+ callback has been completed; read-only. The value is true if
+ the document specified standalone="yes" in its XML declaration,
+ and otherwise is false. (It's always false.)
+ **/
+ public final static String isStandaloneFeature =
+ "http://xml.org/sax/features/is-standalone";
+
+ /**
+ A value of "true" indicates that the LexicalHandler will report
+ the beginning and end of parameter entities (it won't).
+ **/
+ public final static String lexicalHandlerParameterEntitiesFeature =
+ "http://xml.org/sax/features/lexical-handler/parameter-entities";
+
+ /**
+ A value of "true" indicates that system IDs in declarations will
+ be absolutized (relative to their base URIs) before reporting.
+ (This returns true but doesn't actually do anything.)
+ **/
+ public final static String resolveDTDURIsFeature =
+ "http://xml.org/sax/features/resolve-dtd-uris";
+
+ /**
+ Has a value of "true" if all XML names (for elements,
+ prefixes, attributes, entities, notations, and local
+ names), as well as Namespace URIs, will have been interned
+ using java.lang.String.intern. This supports fast testing of
+ equality/inequality against string constants, rather than forcing
+ slower calls to String.equals(). (We always intern.)
+ **/
+ public final static String stringInterningFeature =
+ "http://xml.org/sax/features/string-interning";
+
+ /**
+ Returns "true" if the Attributes objects passed by this
+ parser in ContentHandler.startElement() implement the
+ org.xml.sax.ext.Attributes2 interface. (They don't.)
+ **/
+
+ public final static String useAttributes2Feature =
+ "http://xml.org/sax/features/use-attributes2";
+
+ /**
+ Returns "true" if the Locator objects passed by this parser
+ in ContentHandler.setDocumentLocator() implement the
+ org.xml.sax.ext.Locator2 interface. (They don't.)
+ **/
+ public final static String useLocator2Feature =
+ "http://xml.org/sax/features/use-locator2";
+
+ /**
+ Returns "true" if, when setEntityResolver is given an object
+ implementing the org.xml.sax.ext.EntityResolver2 interface,
+ those new methods will be used. (They won't be.)
+ **/
+ public final static String useEntityResolver2Feature =
+ "http://xml.org/sax/features/use-entity-resolver2";
+
+ /**
+ Controls whether the parser is reporting all validity errors
+ (We don't report any validity errors.)
+ **/
+ public final static String validationFeature =
+ "http://xml.org/sax/features/validation";
+
+ /**
+ Controls whether the parser reports Unicode normalization
+ errors as described in section 2.13 and Appendix B of the XML
+ 1.1 Recommendation. (We don't normalize.)
+ **/
+ public final static String unicodeNormalizationCheckingFeature =
+"http://xml.org/sax/features/unicode-normalization-checking";
+
+ /**
+ Controls whether, when the namespace-prefixes feature is set,
+ the parser treats namespace declaration attributes as being in
+ the http://www.w3.org/2000/xmlns/ namespace. (It doesn't.)
+ **/
+ public final static String xmlnsURIsFeature =
+ "http://xml.org/sax/features/xmlns-uris";
+
+ /**
+ Returns "true" if the parser supports both XML 1.1 and XML 1.0.
+ (Always false.)
+ **/
+ public final static String XML11Feature =
+ "http://xml.org/sax/features/xml-1.1";
+
+ /**
+ A value of "true" indicates that the parser will ignore
+ unknown elements.
+ **/
+ public final static String ignoreBogonsFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons";
+
+ /**
+ A value of "true" indicates that the parser will give unknown
+ elements a content model of EMPTY; a value of "false", a
+ content model of ANY.
+ **/
+ public final static String bogonsEmptyFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/bogons-empty";
+
+ /**
+ A value of "true" indicates that the parser will allow unknown
+ elements to be the root element.
+ **/
+ public final static String rootBogonsFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/root-bogons";
+
+ /**
+ A value of "true" indicates that the parser will return default
+ attribute values for missing attributes that have default values.
+ **/
+ public final static String defaultAttributesFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/default-attributes";
+
+ /**
+ A value of "true" indicates that the parser will
+ translate colons into underscores in names.
+ **/
+ public final static String translateColonsFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/translate-colons";
+
+ /**
+ A value of "true" indicates that the parser will
+ attempt to restart the restartable elements.
+ **/
+ public final static String restartElementsFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/restart-elements";
+
+ /**
+ A value of "true" indicates that the parser will
+ transmit whitespace in element-only content via the SAX
+ ignorableWhitespace callback. Normally this is not done,
+ because HTML is an SGML application and SGML suppresses
+ such whitespace.
+ **/
+ public final static String ignorableWhitespaceFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace";
+
+ /**
+ A value of "true" indicates that the parser will treat CDATA
+ elements specially. Normally true, since the input is by
+ default HTML.
+ **/
+ public final static String CDATAElementsFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/cdata-elements";
+
+ /**
+ Used to see some syntax events that are essential in some
+ applications: comments, CDATA delimiters, selected general
+ entity inclusions, and the start and end of the DTD (and
+ declaration of document element name). The Object must implement
+ org.xml.sax.ext.LexicalHandler.
+ **/
+ public final static String lexicalHandlerProperty =
+ "http://xml.org/sax/properties/lexical-handler";
+
+ /**
+ Specifies the Scanner object this Parser uses.
+ **/
+ public final static String scannerProperty =
+ "http://www.ccil.org/~cowan/tagsoup/properties/scanner";
+
+ /**
+ Specifies the Schema object this Parser uses.
+ **/
+ public final static String schemaProperty =
+ "http://www.ccil.org/~cowan/tagsoup/properties/schema";
+
+ /**
+ Specifies the AutoDetector (for encoding detection) this Parser uses.
+ **/
+ public final static String autoDetectorProperty =
+ "http://www.ccil.org/~cowan/tagsoup/properties/auto-detector";
+
+ // Due to sucky Java order of initialization issues, these
+ // entries are maintained separately from the initial values of
+ // the corresponding instance variables, but care must be taken
+ // to keep them in sync.
+
+ private HashMap theFeatures = new HashMap();
+ {
+ theFeatures.put(namespacesFeature, truthValue(DEFAULT_NAMESPACES));
+ theFeatures.put(namespacePrefixesFeature, Boolean.FALSE);
+ theFeatures.put(externalGeneralEntitiesFeature, Boolean.FALSE);
+ theFeatures.put(externalParameterEntitiesFeature, Boolean.FALSE);
+ theFeatures.put(isStandaloneFeature, Boolean.FALSE);
+ theFeatures.put(lexicalHandlerParameterEntitiesFeature,
+ Boolean.FALSE);
+ theFeatures.put(resolveDTDURIsFeature, Boolean.TRUE);
+ theFeatures.put(stringInterningFeature, Boolean.TRUE);
+ theFeatures.put(useAttributes2Feature, Boolean.FALSE);
+ theFeatures.put(useLocator2Feature, Boolean.FALSE);
+ theFeatures.put(useEntityResolver2Feature, Boolean.FALSE);
+ theFeatures.put(validationFeature, Boolean.FALSE);
+ theFeatures.put(xmlnsURIsFeature, Boolean.FALSE);
+ theFeatures.put(xmlnsURIsFeature, Boolean.FALSE);
+ theFeatures.put(XML11Feature, Boolean.FALSE);
+ theFeatures.put(ignoreBogonsFeature, truthValue(DEFAULT_IGNORE_BOGONS));
+ theFeatures.put(bogonsEmptyFeature, truthValue(DEFAULT_BOGONS_EMPTY));
+ theFeatures.put(rootBogonsFeature, truthValue(DEFAULT_ROOT_BOGONS));
+ theFeatures.put(defaultAttributesFeature, truthValue(DEFAULT_DEFAULT_ATTRIBUTES));
+ theFeatures.put(translateColonsFeature, truthValue(DEFAULT_TRANSLATE_COLONS));
+ theFeatures.put(restartElementsFeature, truthValue(DEFAULT_RESTART_ELEMENTS));
+ theFeatures.put(ignorableWhitespaceFeature, truthValue(DEFAULT_IGNORABLE_WHITESPACE));
+ theFeatures.put(CDATAElementsFeature, truthValue(DEFAULT_CDATA_ELEMENTS));
+ }
+
+ // Private clone of Boolean.valueOf that is guaranteed to return
+ // Boolean.TRUE or Boolean.FALSE
+ private static Boolean truthValue(boolean b) {
+ return b ? Boolean.TRUE : Boolean.FALSE;
+ }
+
+
+ public boolean getFeature (String name)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ Boolean b = (Boolean)theFeatures.get(name);
+ if (b == null) {
+ throw new SAXNotRecognizedException("Unknown feature " + name);
+ }
+ return b.booleanValue();
+ }
+
+ public void setFeature (String name, boolean value)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ Boolean b = (Boolean)theFeatures.get(name);
+ if (b == null) {
+ throw new SAXNotRecognizedException("Unknown feature " + name);
+ }
+ if (value) theFeatures.put(name, Boolean.TRUE);
+ else theFeatures.put(name, Boolean.FALSE);
+
+ if (name.equals(namespacesFeature)) namespaces = value;
+ else if (name.equals(ignoreBogonsFeature)) ignoreBogons = value;
+ else if (name.equals(bogonsEmptyFeature)) bogonsEmpty = value;
+ else if (name.equals(rootBogonsFeature)) rootBogons = value;
+ else if (name.equals(defaultAttributesFeature)) defaultAttributes = value;
+ else if (name.equals(translateColonsFeature)) translateColons = value;
+ else if (name.equals(restartElementsFeature)) restartElements = value;
+ else if (name.equals(ignorableWhitespaceFeature)) ignorableWhitespace = value;
+ else if (name.equals(CDATAElementsFeature)) CDATAElements = value;
+ }
+
+ public Object getProperty (String name)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ if (name.equals(lexicalHandlerProperty)) {
+ return theLexicalHandler == this ? null : theLexicalHandler;
+ }
+ else if (name.equals(scannerProperty)) {
+ return theScanner;
+ }
+ else if (name.equals(schemaProperty)) {
+ return theSchema;
+ }
+ else if (name.equals(autoDetectorProperty)) {
+ return theAutoDetector;
+ }
+ else {
+ throw new SAXNotRecognizedException("Unknown property " + name);
+ }
+ }
+
+ public void setProperty (String name, Object value)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ if (name.equals(lexicalHandlerProperty)) {
+ if (value == null) {
+ theLexicalHandler = this;
+ }
+ else if (value instanceof LexicalHandler) {
+ theLexicalHandler = (LexicalHandler)value;
+ }
+ else {
+ throw new SAXNotSupportedException("Your lexical handler is not a LexicalHandler");
+ }
+ }
+ else if (name.equals(scannerProperty)) {
+ if (value instanceof Scanner) {
+ theScanner = (Scanner)value;
+ }
+ else {
+ throw new SAXNotSupportedException("Your scanner is not a Scanner");
+ }
+ }
+ else if (name.equals(schemaProperty)) {
+ if (value instanceof Schema) {
+ theSchema = (Schema)value;
+ }
+ else {
+ throw new SAXNotSupportedException("Your schema is not a Schema");
+ }
+ }
+ else if (name.equals(autoDetectorProperty)) {
+ if (value instanceof AutoDetector) {
+ theAutoDetector = (AutoDetector)value;
+ }
+ else {
+ throw new SAXNotSupportedException("Your auto-detector is not an AutoDetector");
+ }
+ }
+ else {
+ throw new SAXNotRecognizedException("Unknown property " + name);
+ }
+ }
+
+ public void setEntityResolver (EntityResolver resolver) {
+ theEntityResolver = (resolver == null) ? this : resolver;
+ }
+
+ public EntityResolver getEntityResolver () {
+ return (theEntityResolver == this) ? null : theEntityResolver;
+ }
+
+ public void setDTDHandler (DTDHandler handler) {
+ theDTDHandler = (handler == null) ? this : handler;
+ }
+
+ public DTDHandler getDTDHandler () {
+ return (theDTDHandler == this) ? null : theDTDHandler;
+ }
+
+ public void setContentHandler (ContentHandler handler) {
+ theContentHandler = (handler == null) ? this : handler;
+ }
+
+ public ContentHandler getContentHandler () {
+ return (theContentHandler == this) ? null : theContentHandler;
+ }
+
+ public void setErrorHandler (ErrorHandler handler) {
+ theErrorHandler = (handler == null) ? this : handler;
+ }
+
+ public ErrorHandler getErrorHandler () {
+ return (theErrorHandler == this) ? null : theErrorHandler;
+ }
+
+ public void parse (InputSource input) throws IOException, SAXException {
+ setup();
+ Reader r = getReader(input);
+ theContentHandler.startDocument();
+ theScanner.resetDocumentLocator(input.getPublicId(), input.getSystemId());
+ if (theScanner instanceof Locator) {
+ theContentHandler.setDocumentLocator((Locator)theScanner);
+ }
+ if (!(theSchema.getURI().equals("")))
+ theContentHandler.startPrefixMapping(theSchema.getPrefix(),
+ theSchema.getURI());
+ theScanner.scan(r, this);
+ }
+
+ public void parse (String systemid) throws IOException, SAXException {
+ parse(new InputSource(systemid));
+ }
+
+ // Sets up instance variables that haven't been set by setFeature
+ private void setup() {
+ if (theSchema == null) theSchema = new HTMLSchema();
+ if (theScanner == null) theScanner = new HTMLScanner();
+ if (theAutoDetector == null) {
+ theAutoDetector = new AutoDetector() {
+ public Reader autoDetectingReader(InputStream i) {
+ return new InputStreamReader(i);
+ }
+ };
+ }
+ theStack = new Element(theSchema.getElementType("<root>"), defaultAttributes);
+ thePCDATA = new Element(theSchema.getElementType("<pcdata>"), defaultAttributes);
+ theNewElement = null;
+ theAttributeName = null;
+ thePITarget = null;
+ theSaved = null;
+ theEntity = 0;
+ virginStack = true;
+ theDoctypeName = theDoctypePublicId = theDoctypeSystemId = null;
+ }
+
+ // Return a Reader based on the contents of an InputSource
+ // Buffer both the InputStream and the Reader
+ private Reader getReader(InputSource s) throws SAXException, IOException {
+ Reader r = s.getCharacterStream();
+ InputStream i = s.getByteStream();
+ String encoding = s.getEncoding();
+ String publicid = s.getPublicId();
+ String systemid = s.getSystemId();
+ if (r == null) {
+ if (i == null) i = getInputStream(publicid, systemid);
+// i = new BufferedInputStream(i);
+ if (encoding == null) {
+ r = theAutoDetector.autoDetectingReader(i);
+ }
+ else {
+ try {
+ r = new InputStreamReader(i, encoding);
+ }
+ catch (UnsupportedEncodingException e) {
+ r = new InputStreamReader(i);
+ }
+ }
+ }
+// r = new BufferedReader(r);
+ return r;
+ }
+
+ // Get an InputStream based on a publicid and a systemid
+ private InputStream getInputStream(String publicid, String systemid) throws IOException, SAXException {
+ URL basis = new URL("file", "", System.getProperty("user.dir") + "/.");
+ URL url = new URL(basis, systemid);
+ URLConnection c = url.openConnection();
+ return c.getInputStream();
+ }
+ // We don't process publicids (who uses them anyhow?)
+
+ // ScanHandler implementation
+
+ private Element theNewElement = null;
+ private String theAttributeName = null;
+ private boolean theDoctypeIsPresent = false;
+ private String theDoctypePublicId = null;
+ private String theDoctypeSystemId = null;
+ private String theDoctypeName = null;
+ private String thePITarget = null;
+ private Element theStack = null;
+ private Element theSaved = null;
+ private Element thePCDATA = null;
+ private int theEntity = 0; // needs to support chars past U+FFFF
+
+ public void adup(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement == null || theAttributeName == null) return;
+ theNewElement.setAttribute(theAttributeName, null, theAttributeName);
+ theAttributeName = null;
+ }
+
+ public void aname(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement == null) return;
+ // Currently we don't rely on Schema to canonicalize
+ // attribute names.
+ theAttributeName = makeName(buff, offset, length).toLowerCase();
+// System.err.println("%% Attribute name " + theAttributeName);
+ }
+
+ public void aval(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement == null || theAttributeName == null) return;
+ String value = new String(buff, offset, length);
+// System.err.println("%% Attribute value [" + value + "]");
+ value = expandEntities(value);
+ theNewElement.setAttribute(theAttributeName, null, value);
+ theAttributeName = null;
+// System.err.println("%% Aval done");
+ }
+
+ // Expand entity references in attribute values selectively.
+ // Currently we expand a reference iff it is properly terminated
+ // with a semicolon.
+ private String expandEntities(String src) {
+ int refStart = -1;
+ int len = src.length();
+ char[] dst = new char[len];
+ int dstlen = 0;
+ for (int i = 0; i < len; i++) {
+ char ch = src.charAt(i);
+ dst[dstlen++] = ch;
+// System.err.print("i = " + i + ", d = " + dstlen + ", ch = [" + ch + "] ");
+ if (ch == '&' && refStart == -1) {
+ // start of a ref excluding &
+ refStart = dstlen;
+// System.err.println("start of ref");
+ }
+ else if (refStart == -1) {
+ // not in a ref
+// System.err.println("not in ref");
+ }
+ else if (Character.isLetter(ch) ||
+ Character.isDigit(ch) ||
+ ch == '#') {
+ // valid entity char
+// System.err.println("valid");
+ }
+ else if (ch == ';') {
+ // properly terminated ref
+// System.err.print("got [" + new String(dst, refStart, dstlen-refStart-1) + "]");
+ int ent = lookupEntity(dst, refStart, dstlen - refStart - 1);
+// System.err.println(" = " + ent);
+ if (ent > 0xFFFF) {
+ ent -= 0x10000;
+ dst[refStart - 1] = (char)((ent>>10) + 0xD800);
+ dst[refStart] = (char)((ent&0x3FF) + 0xDC00);
+ dstlen = refStart + 1;
+ }
+ else if (ent != 0) {
+ dst[refStart - 1] = (char)ent;
+ dstlen = refStart;
+ }
+ refStart = -1;
+ }
+ else {
+ // improperly terminated ref
+// System.err.println("end of ref");
+ refStart = -1;
+ }
+ }
+ return new String(dst, 0, dstlen);
+ }
+
+ public void entity(char[] buff, int offset, int length) throws SAXException {
+ theEntity = lookupEntity(buff, offset, length);
+ }
+
+ // Process numeric character references,
+ // deferring to the schema for named ones.
+ private int lookupEntity(char[] buff, int offset, int length) {
+ int result = 0;
+ if (length < 1) return result;
+// System.err.println("%% Entity at " + offset + " " + length);
+// System.err.println("%% Got entity [" + new String(buff, offset, length) + "]");
+ if (buff[offset] == '#') {
+ if (length > 1 && (buff[offset+1] == 'x'
+ || buff[offset+1] == 'X')) {
+ try {
+ return Integer.parseInt(new String(buff, offset + 2, length - 2), 16);
+ }
+ catch (NumberFormatException e) { return 0; }
+ }
+ try {
+ return Integer.parseInt(new String(buff, offset + 1, length - 1), 10);
+ }
+ catch (NumberFormatException e) { return 0; }
+ }
+ return theSchema.getEntity(new String(buff, offset, length));
+ }
+
+ public void eof(char[] buff, int offset, int length) throws SAXException {
+ if (virginStack) rectify(thePCDATA);
+ while (theStack.next() != null) {
+ pop();
+ }
+ if (!(theSchema.getURI().equals("")))
+ theContentHandler.endPrefixMapping(theSchema.getPrefix());
+ theContentHandler.endDocument();
+ }
+
+ public void etag(char[] buff, int offset, int length) throws SAXException {
+ if (etag_cdata(buff, offset, length)) return;
+ etag_basic(buff, offset, length);
+ }
+
+ private static char[] etagchars = {'<', '/', '>'};
+ public boolean etag_cdata(char[] buff, int offset, int length) throws SAXException {
+ String currentName = theStack.name();
+ // If this is a CDATA element and the tag doesn't match,
+ // or isn't properly formed (junk after the name),
+ // restart CDATA mode and process the tag as characters.
+ if (CDATAElements && (theStack.flags() & Schema.F_CDATA) != 0) {
+ boolean realTag = (length == currentName.length());
+ if (realTag) {
+ for (int i = 0; i < length; i++) {
+ if (Character.toLowerCase(buff[offset + i]) != Character.toLowerCase(currentName.charAt(i))) {
+ realTag = false;
+ break;
+ }
+ }
+ }
+ if (!realTag) {
+ theContentHandler.characters(etagchars, 0, 2);
+ theContentHandler.characters(buff, offset, length);
+ theContentHandler.characters(etagchars, 2, 1);
+ theScanner.startCDATA();
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public void etag_basic(char[] buff, int offset, int length) throws SAXException {
+ theNewElement = null;
+ String name;
+ if (length != 0) {
+ // Canonicalize case of name
+ name = makeName(buff, offset, length);
+// System.err.println("got etag [" + name + "]");
+ ElementType type = theSchema.getElementType(name);
+ if (type == null) return; // mysterious end-tag
+ name = type.name();
+ }
+ else {
+ name = theStack.name();
+ }
+// System.err.println("%% Got end of " + name);
+
+ Element sp;
+ boolean inNoforce = false;
+ for (sp = theStack; sp != null; sp = sp.next()) {
+ if (sp.name().equals(name)) break;
+ if ((sp.flags() & Schema.F_NOFORCE) != 0) inNoforce = true;
+ }
+
+ if (sp == null) return; // Ignore unknown etags
+ if (sp.next() == null || sp.next().next() == null) return;
+ if (inNoforce) { // inside an F_NOFORCE element?
+ sp.preclose(); // preclose the matching element
+ }
+ else { // restartably pop everything above us
+ while (theStack != sp) {
+ restartablyPop();
+ }
+ pop();
+ }
+ // pop any preclosed elements now at the top
+ while (theStack.isPreclosed()) {
+ pop();
+ }
+ restart(null);
+ }
+
+ // Push restartables on the stack if possible
+ // e is the next element to be started, if we know what it is
+ private void restart(Element e) throws SAXException {
+ while (theSaved != null && theStack.canContain(theSaved) &&
+ (e == null || theSaved.canContain(e))) {
+ Element next = theSaved.next();
+ push(theSaved);
+ theSaved = next;
+ }
+ }
+
+ // Pop the stack irrevocably
+ private void pop() throws SAXException {
+ if (theStack == null) return; // empty stack
+ String name = theStack.name();
+ String localName = theStack.localName();
+ String namespace = theStack.namespace();
+ String prefix = prefixOf(name);
+
+// System.err.println("%% Popping " + name);
+ if (!namespaces) namespace = localName = "";
+ theContentHandler.endElement(namespace, localName, name);
+ if (foreign(prefix, namespace)) {
+ theContentHandler.endPrefixMapping(prefix);
+// System.err.println("%% Unmapping [" + prefix + "] for elements to " + namespace);
+ }
+ Attributes atts = theStack.atts();
+ for (int i = atts.getLength() - 1; i >= 0; i--) {
+ String attNamespace = atts.getURI(i);
+ String attPrefix = prefixOf(atts.getQName(i));
+ if (foreign(attPrefix, attNamespace)) {
+ theContentHandler.endPrefixMapping(attPrefix);
+// System.err.println("%% Unmapping [" + attPrefix + "] for attributes to " + attNamespace);
+ }
+ }
+ theStack = theStack.next();
+ }
+
+ // Pop the stack restartably
+ private void restartablyPop() throws SAXException {
+ Element popped = theStack;
+ pop();
+ if (restartElements && (popped.flags() & Schema.F_RESTART) != 0) {
+ popped.anonymize();
+ popped.setNext(theSaved);
+ theSaved = popped;
+ }
+ }
+
+ // Push element onto stack
+ private boolean virginStack = true;
+ private void push(Element e) throws SAXException {
+ String name = e.name();
+ String localName = e.localName();
+ String namespace = e.namespace();
+ String prefix = prefixOf(name);
+
+// System.err.println("%% Pushing " + name);
+ e.clean();
+ if (!namespaces) namespace = localName = "";
+ if (virginStack && localName.equalsIgnoreCase(theDoctypeName)) {
+ try {
+ theEntityResolver.resolveEntity(theDoctypePublicId, theDoctypeSystemId);
+ } catch (IOException ew) { } // Can't be thrown for root I believe.
+ }
+ if (foreign(prefix, namespace)) {
+ theContentHandler.startPrefixMapping(prefix, namespace);
+// System.err.println("%% Mapping [" + prefix + "] for elements to " + namespace);
+ }
+ Attributes atts = e.atts();
+ int len = atts.getLength();
+ for (int i = 0; i < len; i++) {
+ String attNamespace = atts.getURI(i);
+ String attPrefix = prefixOf(atts.getQName(i));
+ if (foreign(attPrefix, attNamespace)) {
+ theContentHandler.startPrefixMapping(attPrefix, attNamespace);
+// System.err.println("%% Mapping [" + attPrefix + "] for attributes to " + attNamespace);
+ }
+ }
+ theContentHandler.startElement(namespace, localName, name, e.atts());
+ e.setNext(theStack);
+ theStack = e;
+ virginStack = false;
+ if (CDATAElements && (theStack.flags() & Schema.F_CDATA) != 0) {
+ theScanner.startCDATA();
+ }
+ }
+
+ // Get the prefix from a QName
+ private String prefixOf(String name) {
+ int i = name.indexOf(':');
+ String prefix = "";
+ if (i != -1) prefix = name.substring(0, i);
+// System.err.println("%% " + prefix + " is prefix of " + name);
+ return prefix;
+ }
+
+ // Return true if we have a foreign name
+ private boolean foreign(String prefix, String namespace) {
+// System.err.print("%% Testing " + prefix + " and " + namespace + " for foreignness -- ");
+ boolean foreign = !(prefix.equals("") || namespace.equals("") ||
+ namespace.equals(theSchema.getURI()));
+// System.err.println(foreign);
+ return foreign;
+ }
+
+ /**
+ * Parsing the complete XML Document Type Definition is way too complex,
+ * but for many simple cases we can extract something useful from it.
+ *
+ * doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
+ * DeclSep ::= PEReference | S
+ * intSubset ::= (markupdecl | DeclSep)*
+ * markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
+ * ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
+ */
+ public void decl(char[] buff, int offset, int length) throws SAXException {
+ String s = new String(buff, offset, length);
+ String name = null;
+ String systemid = null;
+ String publicid = null;
+ String[] v = split(s);
+ if (v.length > 0 && "DOCTYPE".equals(v[0])) {
+ if (theDoctypeIsPresent) return; // one doctype only!
+ theDoctypeIsPresent = true;
+ if (v.length > 1) {
+ name = v[1];
+ if (v.length>3 && "SYSTEM".equals(v[2])) {
+ systemid = v[3];
+ }
+ else if (v.length > 3 && "PUBLIC".equals(v[2])) {
+ publicid = v[3];
+ if (v.length > 4) {
+ systemid = v[4];
+ }
+ else {
+ systemid = "";
+ }
+ }
+ }
+ }
+ publicid = trimquotes(publicid);
+ systemid = trimquotes(systemid);
+ if (name != null) {
+ publicid = cleanPublicid(publicid);
+ theLexicalHandler.startDTD(name, publicid, systemid);
+ theLexicalHandler.endDTD();
+ theDoctypeName = name;
+ theDoctypePublicId = publicid;
+ if (theScanner instanceof Locator) { // Must resolve systemid
+ theDoctypeSystemId = ((Locator)theScanner).getSystemId();
+ try {
+ theDoctypeSystemId = new URL(new URL(theDoctypeSystemId), systemid).toString();
+ } catch (Exception e) {}
+ }
+ }
+ }
+
+ // If the String is quoted, trim the quotes.
+ private static String trimquotes(String in) {
+ if (in == null) return in;
+ int length = in.length();
+ if (length == 0) return in;
+ char s = in.charAt(0);
+ char e = in.charAt(length - 1);
+ if (s == e && (s == '\'' || s == '"')) {
+ in = in.substring(1, in.length() - 1);
+ }
+ return in;
+ }
+
+ // Split the supplied String into words or phrases seperated by spaces.
+ // Recognises quotes around a phrase and doesn't split it.
+ private static String[] split(String val) throws IllegalArgumentException {
+ val = val.trim();
+ if (val.length() == 0) {
+ return new String[0];
+ }
+ else {
+ ArrayList l = new ArrayList();
+ int s = 0;
+ int e = 0;
+ boolean sq = false; // single quote
+ boolean dq = false; // double quote
+ char lastc = 0;
+ int len = val.length();
+ for (e=0; e < len; e++) {
+ char c = val.charAt(e);
+ if (!dq && c == '\'' && lastc != '\\') {
+ sq = !sq;
+ if (s < 0) s = e;
+ }
+ else if (!sq && c == '\"' && lastc != '\\') {
+ dq = !dq;
+ if (s < 0) s = e;
+ }
+ else if (!sq && !dq) {
+ if (Character.isWhitespace(c)) {
+ if (s >= 0) l.add(val.substring(s, e));
+ s = -1;
+ }
+ else if (s < 0 && c != ' ') {
+ s = e;
+ }
+ }
+ lastc = c;
+ }
+ l.add(val.substring(s, e));
+ return (String[])l.toArray(new String[0]);
+ }
+ }
+
+ // Replace junk in publicids with spaces
+ private static String legal =
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-'()+,./:=?;!*#@$_%";
+
+ private String cleanPublicid(String src) {
+ if (src == null) return null;
+ int len = src.length();
+ StringBuffer dst = new StringBuffer(len);
+ boolean suppressSpace = true;
+ for (int i = 0; i < len; i++) {
+ char ch = src.charAt(i);
+ if (legal.indexOf(ch) != -1) { // legal but not whitespace
+ dst.append(ch);
+ suppressSpace = false;
+ }
+ else if (suppressSpace) { // normalizable whitespace or junk
+ ;
+ }
+ else {
+ dst.append(' ');
+ suppressSpace = true;
+ }
+ }
+// System.err.println("%% Publicid [" + dst.toString().trim() + "]");
+ return dst.toString().trim(); // trim any final junk whitespace
+ }
+
+
+ public void gi(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement != null) return;
+ String name = makeName(buff, offset, length);
+ if (name == null) return;
+ ElementType type = theSchema.getElementType(name);
+ if (type == null) {
+ // Suppress unknown elements if ignore-bogons is on
+ if (ignoreBogons) return;
+ int bogonModel = bogonsEmpty ? Schema.M_EMPTY : Schema.M_ANY;
+ int bogonMemberOf = rootBogons ? Schema.M_ANY : (Schema.M_ANY & ~ Schema.M_ROOT);
+ theSchema.elementType(name, bogonModel, bogonMemberOf, 0);
+ if (!rootBogons) theSchema.parent(name, theSchema.rootElementType().name());
+ type = theSchema.getElementType(name);
+ }
+
+ theNewElement = new Element(type, defaultAttributes);
+// System.err.println("%% Got GI " + theNewElement.name());
+ }
+
+ public void cdsect(char[] buff, int offset, int length) throws SAXException {
+ theLexicalHandler.startCDATA();
+ pcdata(buff, offset, length);
+ theLexicalHandler.endCDATA();
+ }
+ public void pcdata(char[] buff, int offset, int length) throws SAXException {
+ if (length == 0) return;
+ boolean allWhite = true;
+ for (int i = 0; i < length; i++) {
+ if (!Character.isWhitespace(buff[offset+i])) {
+ allWhite = false;
+ }
+ }
+ if (allWhite && !theStack.canContain(thePCDATA)) {
+ if (ignorableWhitespace) {
+ theContentHandler.ignorableWhitespace(buff, offset, length);
+ }
+ }
+ else {
+ rectify(thePCDATA);
+ theContentHandler.characters(buff, offset, length);
+ }
+ }
+
+ public void pitarget(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement != null) return;
+ thePITarget = makeName(buff, offset, length).replace(':', '_');
+ }
+
+ public void pi(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement != null || thePITarget == null) return;
+ if ("xml".equalsIgnoreCase(thePITarget)) return;
+// if (length > 0 && buff[length - 1] == '?') System.err.println("%% Removing ? from PI");
+ if (length > 0 && buff[length - 1] == '?') length--; // remove trailing ?
+ theContentHandler.processingInstruction(thePITarget,
+ new String(buff, offset, length));
+ thePITarget = null;
+ }
+
+ public void stagc(char[] buff, int offset, int length) throws SAXException {
+// System.err.println("%% Start-tag");
+ if (theNewElement == null) return;
+ rectify(theNewElement);
+ if (theStack.model() == Schema.M_EMPTY) {
+ // Force an immediate end tag
+ etag_basic(buff, offset, length);
+ }
+ }
+
+ public void stage(char[] buff, int offset, int length) throws SAXException {
+// System.err.println("%% Empty-tag");
+ if (theNewElement == null) return;
+ rectify(theNewElement);
+ // Force an immediate end tag
+ etag_basic(buff, offset, length);
+ }
+
+ // Comment buffer is twice the size of the output buffer
+ private char[] theCommentBuffer = new char[2000];
+ public void cmnt(char[] buff, int offset, int length) throws SAXException {
+ theLexicalHandler.comment(buff, offset, length);
+ }
+
+ // Rectify the stack, pushing and popping as needed
+ // so that the argument can be safely pushed
+ private void rectify(Element e) throws SAXException {
+ Element sp;
+ while (true) {
+ for (sp = theStack; sp != null; sp = sp.next()) {
+ if (sp.canContain(e)) break;
+ }
+ if (sp != null) break;
+ ElementType parentType = e.parent();
+ if (parentType == null) break;
+ Element parent = new Element(parentType, defaultAttributes);
+// System.err.println("%% Ascending from " + e.name() + " to " + parent.name());
+ parent.setNext(e);
+ e = parent;
+ }
+ if (sp == null) return; // don't know what to do
+ while (theStack != sp) {
+ if (theStack == null || theStack.next() == null ||
+ theStack.next().next() == null) break;
+ restartablyPop();
+ }
+ while (e != null) {
+ Element nexte = e.next();
+ if (!e.name().equals("<pcdata>")) push(e);
+ e = nexte;
+ restart(e);
+ }
+ theNewElement = null;
+ }
+
+ public int getEntity() {
+ return theEntity;
+ }
+
+ // Return the argument as a valid XML name
+ // This no longer lowercases the result: we depend on Schema to
+ // canonicalize case.
+ private String makeName(char[] buff, int offset, int length) {
+ StringBuffer dst = new StringBuffer(length + 2);
+ boolean seenColon = false;
+ boolean start = true;
+// String src = new String(buff, offset, length); // DEBUG
+ for (; length-- > 0; offset++) {
+ char ch = buff[offset];
+ if (Character.isLetter(ch) || ch == '_') {
+ start = false;
+ dst.append(ch);
+ }
+ else if (Character.isDigit(ch) || ch == '-' || ch == '.') {
+ if (start) dst.append('_');
+ start = false;
+ dst.append(ch);
+ }
+ else if (ch == ':' && !seenColon) {
+ seenColon = true;
+ if (start) dst.append('_');
+ start = true;
+ dst.append(translateColons ? '_' : ch);
+ }
+ }
+ int dstLength = dst.length();
+ if (dstLength == 0 || dst.charAt(dstLength - 1) == ':') dst.append('_');
+// System.err.println("Made name \"" + dst + "\" from \"" + src + "\"");
+ return dst.toString().intern();
+ }
+
+ // Default LexicalHandler implementation
+
+ public void comment(char[] ch, int start, int length) throws SAXException { }
+ public void endCDATA() throws SAXException { }
+ public void endDTD() throws SAXException { }
+ public void endEntity(String name) throws SAXException { }
+ public void startCDATA() throws SAXException { }
+ public void startDTD(String name, String publicid, String systemid) throws SAXException { }
+ public void startEntity(String name) throws SAXException { }
+
+ }
diff --git a/src/org/ccil/cowan/tagsoup/ScanHandler.java b/src/org/ccil/cowan/tagsoup/ScanHandler.java
new file mode 100644
index 0000000..368569a
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/ScanHandler.java
@@ -0,0 +1,119 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Scanner handler
+
+package org.ccil.cowan.tagsoup;
+import org.xml.sax.SAXException;
+
+/**
+An interface that Scanners use to report events in the input stream.
+**/
+
+public interface ScanHandler {
+ /**
+ Reports an attribute name without a value.
+ **/
+
+ public void adup(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports an attribute name; a value will follow.
+ **/
+
+ public void aname(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports an attribute value.
+ **/
+
+ public void aval(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ * Reports the content of a CDATA section (not a CDATA element)
+ */
+ public void cdsect(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ * Reports a <!....> declaration - typically a DOCTYPE
+ */
+
+ public void decl(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports an entity reference or character reference.
+ **/
+
+ public void entity(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports EOF.
+ **/
+
+ public void eof(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports an end-tag.
+ **/
+
+ public void etag(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports the general identifier (element type name) of a start-tag.
+ **/
+
+ public void gi(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports character content.
+ **/
+
+ public void pcdata(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports the data part of a processing instruction.
+ **/
+
+ public void pi(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports the target part of a processing instruction.
+ **/
+
+ public void pitarget(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports the close of a start-tag.
+ **/
+
+ public void stagc(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports the close of an empty-tag.
+ **/
+
+ public void stage(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports a comment.
+ **/
+
+ public void cmnt(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Returns the value of the last entity or character reference reported.
+ **/
+
+ public int getEntity();
+ }
diff --git a/src/org/ccil/cowan/tagsoup/Scanner.java b/src/org/ccil/cowan/tagsoup/Scanner.java
new file mode 100644
index 0000000..04c8b97
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/Scanner.java
@@ -0,0 +1,50 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Scanner
+
+package org.ccil.cowan.tagsoup;
+import java.io.IOException;
+import java.io.Reader;
+import org.xml.sax.SAXException;
+
+/**
+An interface allowing Parser to invoke scanners.
+**/
+
+public interface Scanner {
+
+ /**
+ Invoke a scanner.
+ @param r A source of characters to scan
+ @param h A ScanHandler to report events to
+ **/
+
+ public void scan(Reader r, ScanHandler h) throws IOException, SAXException;
+
+ /**
+ Reset the embedded locator.
+ @param publicid The publicid of the source
+ @param systemid The systemid of the source
+ **/
+
+ public void resetDocumentLocator(String publicid, String systemid);
+
+ /**
+ Signal to the scanner to start CDATA content mode.
+ **/
+
+ public void startCDATA();
+
+ }
diff --git a/src/org/ccil/cowan/tagsoup/Schema.java b/src/org/ccil/cowan/tagsoup/Schema.java
new file mode 100644
index 0000000..0d99a23
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/Schema.java
@@ -0,0 +1,170 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Model of document
+
+package org.ccil.cowan.tagsoup;
+import java.util.HashMap;
+
+/**
+Abstract class representing a TSSL schema.
+Actual TSSL schemas are compiled into concrete subclasses of this class.
+**/
+
+public abstract class Schema {
+
+ public static final int M_ANY = 0xFFFFFFFF;
+ public static final int M_EMPTY = 0;
+ public static final int M_PCDATA = 1 << 30;
+ public static final int M_ROOT = 1 << 31;
+
+
+ public static final int F_RESTART = 1;
+ public static final int F_CDATA = 2;
+ public static final int F_NOFORCE = 4;
+
+ private HashMap theEntities =
+ new HashMap(); // String -> Character
+ private HashMap theElementTypes =
+ new HashMap(); // String -> ElementType
+
+ private String theURI = "";
+ private String thePrefix = "";
+ private ElementType theRoot = null;
+
+ /**
+ Add or replace an element type for this schema.
+ @param name Name (Qname) of the element
+ @param model Models of the element's content as a vector of bits
+ @param memberOf Models the element is a member of as a vector of bits
+ @param flags Flags for the element
+ **/
+
+ public void elementType(String name, int model, int memberOf, int flags) {
+ ElementType e = new ElementType(name, model, memberOf, flags, this);
+ theElementTypes.put(name.toLowerCase(), e);
+ if (memberOf == M_ROOT) theRoot = e;
+ }
+
+ /**
+ Get the root element of this schema
+ **/
+
+ public ElementType rootElementType() {
+ return theRoot;
+ }
+
+ /**
+ Add or replace a default attribute for an element type in this schema.
+ @param elemName Name (Qname) of the element type
+ @param attrName Name (Qname) of the attribute
+ @param type Type of the attribute
+ @param value Default value of the attribute; null if no default
+ **/
+
+ public void attribute(String elemName, String attrName,
+ String type, String value) {
+ ElementType e = getElementType(elemName);
+ if (e == null) {
+ throw new Error("Attribute " + attrName +
+ " specified for unknown element type " +
+ elemName);
+ }
+ e.setAttribute(attrName, type, value);
+ }
+
+ /**
+ Specify natural parent of an element in this schema.
+ @param name Name of the child element
+ @param parentName Name of the parent element
+ **/
+
+ public void parent(String name, String parentName) {
+ ElementType child = getElementType(name);
+ ElementType parent = getElementType(parentName);
+ if (child == null) {
+ throw new Error("No child " + name + " for parent " + parentName);
+ }
+ if (parent == null) {
+ throw new Error("No parent " + parentName + " for child " + name);
+ }
+ child.setParent(parent);
+ }
+
+ /**
+ Add to or replace a character entity in this schema.
+ @param name Name of the entity
+ @param value Value of the entity
+ **/
+
+ public void entity(String name, int value) {
+ theEntities.put(name, new Integer(value));
+ }
+
+ /**
+ Get an ElementType by name.
+ @param name Name (Qname) of the element type
+ @return The corresponding ElementType
+ **/
+
+ public ElementType getElementType(String name) {
+ return (ElementType)(theElementTypes.get(name.toLowerCase()));
+ }
+
+ /**
+ Get an entity value by name.
+ @param name Name of the entity
+ @return The corresponding character, or 0 if none
+ **/
+
+ public int getEntity(String name) {
+// System.err.println("%% Looking up entity " + name);
+ Integer ch = (Integer)theEntities.get(name);
+ if (ch == null) return 0;
+ return ch.intValue();
+ }
+
+ /**
+ Return the URI (namespace name) of this schema.
+ **/
+
+ public String getURI() {
+ return theURI;
+ }
+
+ /**
+ Return the prefix of this schema.
+ **/
+
+ public String getPrefix() {
+ return thePrefix;
+ }
+
+ /**
+ Change the URI (namespace name) of this schema.
+ **/
+
+ public void setURI(String uri) {
+ theURI = uri;
+ }
+
+ /**
+ Change the prefix of this schema.
+ **/
+
+ public void setPrefix(String prefix) {
+ thePrefix = prefix;
+ }
+
+ }
diff --git a/src/org/ccil/cowan/tagsoup/XMLWriter.java b/src/org/ccil/cowan/tagsoup/XMLWriter.java
new file mode 100644
index 0000000..0dc7a03
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/XMLWriter.java
@@ -0,0 +1,1435 @@
+// XMLWriter.java - serialize an XML document.
+// Written by David Megginson, david@megginson.com
+// and placed by him into the public domain.
+// Extensively modified by John Cowan for TagSoup.
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup;
+
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.Properties;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.AttributesImpl;
+import org.xml.sax.helpers.NamespaceSupport;
+import org.xml.sax.helpers.XMLFilterImpl;
+import org.xml.sax.ext.LexicalHandler;
+
+
+/**
+ * Filter to write an XML document from a SAX event stream.
+ *
+ * <p>This class can be used by itself or as part of a SAX event
+ * stream: it takes as input a series of SAX2 ContentHandler
+ * events and uses the information in those events to write
+ * an XML document. Since this class is a filter, it can also
+ * pass the events on down a filter chain for further processing
+ * (you can use the XMLWriter to take a snapshot of the current
+ * state at any point in a filter chain), and it can be
+ * used directly as a ContentHandler for a SAX2 XMLReader.</p>
+ *
+ * <p>The client creates a document by invoking the methods for
+ * standard SAX2 events, always beginning with the
+ * {@link #startDocument startDocument} method and ending with
+ * the {@link #endDocument endDocument} method. There are convenience
+ * methods provided so that clients to not have to create empty
+ * attribute lists or provide empty strings as parameters; for
+ * example, the method invocation</p>
+ *
+ * <pre>
+ * w.startElement("foo");
+ * </pre>
+ *
+ * <p>is equivalent to the regular SAX2 ContentHandler method</p>
+ *
+ * <pre>
+ * w.startElement("", "foo", "", new AttributesImpl());
+ * </pre>
+ *
+ * <p>Except that it is more efficient because it does not allocate
+ * a new empty attribute list each time. The following code will send
+ * a simple XML document to standard output:</p>
+ *
+ * <pre>
+ * XMLWriter w = new XMLWriter();
+ *
+ * w.startDocument();
+ * w.startElement("greeting");
+ * w.characters("Hello, world!");
+ * w.endElement("greeting");
+ * w.endDocument();
+ * </pre>
+ *
+ * <p>The resulting document will look like this:</p>
+ *
+ * <pre>
+ * &lt;?xml version="1.0" standalone="yes"?>
+ *
+ * &lt;greeting>Hello, world!&lt;/greeting>
+ * </pre>
+ *
+ * <p>In fact, there is an even simpler convenience method,
+ * <var>dataElement</var>, designed for writing elements that
+ * contain only character data, so the code to generate the
+ * document could be shortened to</p>
+ *
+ * <pre>
+ * XMLWriter w = new XMLWriter();
+ *
+ * w.startDocument();
+ * w.dataElement("greeting", "Hello, world!");
+ * w.endDocument();
+ * </pre>
+ *
+ * <h2>Whitespace</h2>
+ *
+ * <p>According to the XML Recommendation, <em>all</em> whitespace
+ * in an XML document is potentially significant to an application,
+ * so this class never adds newlines or indentation. If you
+ * insert three elements in a row, as in</p>
+ *
+ * <pre>
+ * w.dataElement("item", "1");
+ * w.dataElement("item", "2");
+ * w.dataElement("item", "3");
+ * </pre>
+ *
+ * <p>you will end up with</p>
+ *
+ * <pre>
+ * &lt;item>1&lt;/item>&lt;item>3&lt;/item>&lt;item>3&lt;/item>
+ * </pre>
+ *
+ * <p>You need to invoke one of the <var>characters</var> methods
+ * explicitly to add newlines or indentation. Alternatively, you
+ * can use {@link com.megginson.sax.DataWriter DataWriter}, which
+ * is derived from this class -- it is optimized for writing
+ * purely data-oriented (or field-oriented) XML, and does automatic
+ * linebreaks and indentation (but does not support mixed content
+ * properly).</p>
+ *
+ *
+ * <h2>Namespace Support</h2>
+ *
+ * <p>The writer contains extensive support for XML Namespaces, so that
+ * a client application does not have to keep track of prefixes and
+ * supply <var>xmlns</var> attributes. By default, the XML writer will
+ * generate Namespace declarations in the form _NS1, _NS2, etc., wherever
+ * they are needed, as in the following example:</p>
+ *
+ * <pre>
+ * w.startDocument();
+ * w.emptyElement("http://www.foo.com/ns/", "foo");
+ * w.endDocument();
+ * </pre>
+ *
+ * <p>The resulting document will look like this:</p>
+ *
+ * <pre>
+ * &lt;?xml version="1.0" standalone="yes"?>
+ *
+ * &lt;_NS1:foo xmlns:_NS1="http://www.foo.com/ns/"/>
+ * </pre>
+ *
+ * <p>In many cases, document authors will prefer to choose their
+ * own prefixes rather than using the (ugly) default names. The
+ * XML writer allows two methods for selecting prefixes:</p>
+ *
+ * <ol>
+ * <li>the qualified name</li>
+ * <li>the {@link #setPrefix setPrefix} method.</li>
+ * </ol>
+ *
+ * <p>Whenever the XML writer finds a new Namespace URI, it checks
+ * to see if a qualified (prefixed) name is also available; if so
+ * it attempts to use the name's prefix (as long as the prefix is
+ * not already in use for another Namespace URI).</p>
+ *
+ * <p>Before writing a document, the client can also pre-map a prefix
+ * to a Namespace URI with the setPrefix method:</p>
+ *
+ * <pre>
+ * w.setPrefix("http://www.foo.com/ns/", "foo");
+ * w.startDocument();
+ * w.emptyElement("http://www.foo.com/ns/", "foo");
+ * w.endDocument();
+ * </pre>
+ *
+ * <p>The resulting document will look like this:</p>
+ *
+ * <pre>
+ * &lt;?xml version="1.0" standalone="yes"?>
+ *
+ * &lt;foo:foo xmlns:foo="http://www.foo.com/ns/"/>
+ * </pre>
+ *
+ * <p>The default Namespace simply uses an empty string as the prefix:</p>
+ *
+ * <pre>
+ * w.setPrefix("http://www.foo.com/ns/", "");
+ * w.startDocument();
+ * w.emptyElement("http://www.foo.com/ns/", "foo");
+ * w.endDocument();
+ * </pre>
+ *
+ * <p>The resulting document will look like this:</p>
+ *
+ * <pre>
+ * &lt;?xml version="1.0" standalone="yes"?>
+ *
+ * &lt;foo xmlns="http://www.foo.com/ns/"/>
+ * </pre>
+ *
+ * <p>By default, the XML writer will not declare a Namespace until
+ * it is actually used. Sometimes, this approach will create
+ * a large number of Namespace declarations, as in the following
+ * example:</p>
+ *
+ * <pre>
+ * &lt;xml version="1.0" standalone="yes"?>
+ *
+ * &lt;rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+ * &lt;rdf:Description about="http://www.foo.com/ids/books/12345">
+ * &lt;dc:title xmlns:dc="http://www.purl.org/dc/">A Dark Night&lt;/dc:title>
+ * &lt;dc:creator xmlns:dc="http://www.purl.org/dc/">Jane Smith&lt;/dc:title>
+ * &lt;dc:date xmlns:dc="http://www.purl.org/dc/">2000-09-09&lt;/dc:title>
+ * &lt;/rdf:Description>
+ * &lt;/rdf:RDF>
+ * </pre>
+ *
+ * <p>The "rdf" prefix is declared only once, because the RDF Namespace
+ * is used by the root element and can be inherited by all of its
+ * descendants; the "dc" prefix, on the other hand, is declared three
+ * times, because no higher element uses the Namespace. To solve this
+ * problem, you can instruct the XML writer to predeclare Namespaces
+ * on the root element even if they are not used there:</p>
+ *
+ * <pre>
+ * w.forceNSDecl("http://www.purl.org/dc/");
+ * </pre>
+ *
+ * <p>Now, the "dc" prefix will be declared on the root element even
+ * though it's not needed there, and can be inherited by its
+ * descendants:</p>
+ *
+ * <pre>
+ * &lt;xml version="1.0" standalone="yes"?>
+ *
+ * &lt;rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ * xmlns:dc="http://www.purl.org/dc/">
+ * &lt;rdf:Description about="http://www.foo.com/ids/books/12345">
+ * &lt;dc:title>A Dark Night&lt;/dc:title>
+ * &lt;dc:creator>Jane Smith&lt;/dc:title>
+ * &lt;dc:date>2000-09-09&lt;/dc:title>
+ * &lt;/rdf:Description>
+ * &lt;/rdf:RDF>
+ * </pre>
+ *
+ * <p>This approach is also useful for declaring Namespace prefixes
+ * that be used by qualified names appearing in attribute values or
+ * character data.</p>
+ *
+ * @author David Megginson, david@megginson.com
+ * @version 0.2
+ * @see org.xml.sax.XMLFilter
+ * @see org.xml.sax.ContentHandler
+ */
+public class XMLWriter extends XMLFilterImpl implements LexicalHandler
+{
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Constructors.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Create a new XML writer.
+ *
+ * <p>Write to standard output.</p>
+ */
+ public XMLWriter ()
+ {
+ init(null);
+ }
+
+
+ /**
+ * Create a new XML writer.
+ *
+ * <p>Write to the writer provided.</p>
+ *
+ * @param writer The output destination, or null to use standard
+ * output.
+ */
+ public XMLWriter (Writer writer)
+ {
+ init(writer);
+ }
+
+
+ /**
+ * Create a new XML writer.
+ *
+ * <p>Use the specified XML reader as the parent.</p>
+ *
+ * @param xmlreader The parent in the filter chain, or null
+ * for no parent.
+ */
+ public XMLWriter (XMLReader xmlreader)
+ {
+ super(xmlreader);
+ init(null);
+ }
+
+
+ /**
+ * Create a new XML writer.
+ *
+ * <p>Use the specified XML reader as the parent, and write
+ * to the specified writer.</p>
+ *
+ * @param xmlreader The parent in the filter chain, or null
+ * for no parent.
+ * @param writer The output destination, or null to use standard
+ * output.
+ */
+ public XMLWriter (XMLReader xmlreader, Writer writer)
+ {
+ super(xmlreader);
+ init(writer);
+ }
+
+
+ /**
+ * Internal initialization method.
+ *
+ * <p>All of the public constructors invoke this method.
+ *
+ * @param writer The output destination, or null to use
+ * standard output.
+ */
+ private void init (Writer writer)
+ {
+ setOutput(writer);
+ nsSupport = new NamespaceSupport();
+ prefixTable = new Hashtable();
+ forcedDeclTable = new Hashtable();
+ doneDeclTable = new Hashtable();
+ outputProperties = new Properties();
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Public methods.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Reset the writer.
+ *
+ * <p>This method is especially useful if the writer throws an
+ * exception before it is finished, and you want to reuse the
+ * writer for a new document. It is usually a good idea to
+ * invoke {@link #flush flush} before resetting the writer,
+ * to make sure that no output is lost.</p>
+ *
+ * <p>This method is invoked automatically by the
+ * {@link #startDocument startDocument} method before writing
+ * a new document.</p>
+ *
+ * <p><strong>Note:</strong> this method will <em>not</em>
+ * clear the prefix or URI information in the writer or
+ * the selected output writer.</p>
+ *
+ * @see #flush
+ */
+ public void reset ()
+ {
+ elementLevel = 0;
+ prefixCounter = 0;
+ nsSupport.reset();
+ }
+
+
+ /**
+ * Flush the output.
+ *
+ * <p>This method flushes the output stream. It is especially useful
+ * when you need to make certain that the entire document has
+ * been written to output but do not want to close the output
+ * stream.</p>
+ *
+ * <p>This method is invoked automatically by the
+ * {@link #endDocument endDocument} method after writing a
+ * document.</p>
+ *
+ * @see #reset
+ */
+ public void flush ()
+ throws IOException
+ {
+ output.flush();
+ }
+
+
+ /**
+ * Set a new output destination for the document.
+ *
+ * @param writer The output destination, or null to use
+ * standard output.
+ * @return The current output writer.
+ * @see #flush
+ */
+ public void setOutput (Writer writer)
+ {
+ if (writer == null) {
+ output = new OutputStreamWriter(System.out);
+ } else {
+ output = writer;
+ }
+ }
+
+
+ /**
+ * Specify a preferred prefix for a Namespace URI.
+ *
+ * <p>Note that this method does not actually force the Namespace
+ * to be declared; to do that, use the {@link
+ * #forceNSDecl(java.lang.String) forceNSDecl} method as well.</p>
+ *
+ * @param uri The Namespace URI.
+ * @param prefix The preferred prefix, or "" to select
+ * the default Namespace.
+ * @see #getPrefix
+ * @see #forceNSDecl(java.lang.String)
+ * @see #forceNSDecl(java.lang.String,java.lang.String)
+ */
+ public void setPrefix (String uri, String prefix)
+ {
+ prefixTable.put(uri, prefix);
+ }
+
+
+ /**
+ * Get the current or preferred prefix for a Namespace URI.
+ *
+ * @param uri The Namespace URI.
+ * @return The preferred prefix, or "" for the default Namespace.
+ * @see #setPrefix
+ */
+ public String getPrefix (String uri)
+ {
+ return (String)prefixTable.get(uri);
+ }
+
+
+ /**
+ * Force a Namespace to be declared on the root element.
+ *
+ * <p>By default, the XMLWriter will declare only the Namespaces
+ * needed for an element; as a result, a Namespace may be
+ * declared many places in a document if it is not used on the
+ * root element.</p>
+ *
+ * <p>This method forces a Namespace to be declared on the root
+ * element even if it is not used there, and reduces the number
+ * of xmlns attributes in the document.</p>
+ *
+ * @param uri The Namespace URI to declare.
+ * @see #forceNSDecl(java.lang.String,java.lang.String)
+ * @see #setPrefix
+ */
+ public void forceNSDecl (String uri)
+ {
+ forcedDeclTable.put(uri, Boolean.TRUE);
+ }
+
+
+ /**
+ * Force a Namespace declaration with a preferred prefix.
+ *
+ * <p>This is a convenience method that invokes {@link
+ * #setPrefix setPrefix} then {@link #forceNSDecl(java.lang.String)
+ * forceNSDecl}.</p>
+ *
+ * @param uri The Namespace URI to declare on the root element.
+ * @param prefix The preferred prefix for the Namespace, or ""
+ * for the default Namespace.
+ * @see #setPrefix
+ * @see #forceNSDecl(java.lang.String)
+ */
+ public void forceNSDecl (String uri, String prefix)
+ {
+ setPrefix(uri, prefix);
+ forceNSDecl(uri);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Methods from org.xml.sax.ContentHandler.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Write the XML declaration at the beginning of the document.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the XML declaration, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#startDocument
+ */
+ public void startDocument ()
+ throws SAXException
+ {
+ reset();
+ if (!("yes".equals(outputProperties.getProperty(OMIT_XML_DECLARATION, "no")))) {
+ write("<?xml");
+ if (version == null) {
+ write(" version=\"1.0\"");
+ } else {
+ write(" version=\"");
+ write(version);
+ write("\"");
+ }
+ if (outputEncoding != null && outputEncoding != "") {
+ write(" encoding=\"");
+ write(outputEncoding);
+ write("\"");
+ }
+ if (standalone == null) {
+ write(" standalone=\"yes\"?>\n");
+ } else {
+ write(" standalone=\"");
+ write(standalone);
+ write("\"");
+ }
+ }
+ super.startDocument();
+ }
+
+
+ /**
+ * Write a newline at the end of the document.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the newline, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#endDocument
+ */
+ public void endDocument ()
+ throws SAXException
+ {
+ write('\n');
+ super.endDocument();
+ try {
+ flush();
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+
+ /**
+ * Write a start tag.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @param uri The Namespace URI, or the empty string if none
+ * is available.
+ * @param localName The element's local (unprefixed) name (required).
+ * @param qName The element's qualified (prefixed) name, or the
+ * empty string is none is available. This method will
+ * use the qName as a template for generating a prefix
+ * if necessary, but it is not guaranteed to use the
+ * same qName.
+ * @param atts The element's attribute list (must not be null).
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the start tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#startElement
+ */
+ public void startElement (String uri, String localName,
+ String qName, Attributes atts)
+ throws SAXException
+ {
+ elementLevel++;
+ nsSupport.pushContext();
+ if (forceDTD && !hasOutputDTD) startDTD(localName == null ? qName : localName, "", "");
+ write('<');
+ writeName(uri, localName, qName, true);
+ writeAttributes(atts);
+ if (elementLevel == 1) {
+ forceNSDecls();
+ }
+ writeNSDecls();
+ write('>');
+// System.out.println("%%%% startElement [" + qName + "] htmlMode = " + htmlMode);
+ if (htmlMode && (qName.equals("script") || qName.equals("style"))) {
+ cdataElement = true;
+// System.out.println("%%%% CDATA element");
+ }
+ super.startElement(uri, localName, qName, atts);
+ }
+
+
+ /**
+ * Write an end tag.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @param uri The Namespace URI, or the empty string if none
+ * is available.
+ * @param localName The element's local (unprefixed) name (required).
+ * @param qName The element's qualified (prefixed) name, or the
+ * empty string is none is available. This method will
+ * use the qName as a template for generating a prefix
+ * if necessary, but it is not guaranteed to use the
+ * same qName.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the end tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#endElement
+ */
+ public void endElement (String uri, String localName, String qName)
+ throws SAXException
+ {
+ if (!(htmlMode &&
+ (uri.equals("http://www.w3.org/1999/xhtml") ||
+ uri.equals("")) &&
+ (qName.equals("area") || qName.equals("base") ||
+ qName.equals("basefont") || qName.equals("br") ||
+ qName.equals("col") || qName.equals("frame") ||
+ qName.equals("hr") || qName.equals("img") ||
+ qName.equals("input") || qName.equals("isindex") ||
+ qName.equals("link") || qName.equals("meta") ||
+ qName.equals("param")))) {
+ write("</");
+ writeName(uri, localName, qName, true);
+ write('>');
+ }
+ if (elementLevel == 1) {
+ write('\n');
+ }
+ cdataElement = false;
+ super.endElement(uri, localName, qName);
+ nsSupport.popContext();
+ elementLevel--;
+ }
+
+
+ /**
+ * Write character data.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @param ch The array of characters to write.
+ * @param start The starting position in the array.
+ * @param length The number of characters to write.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the characters, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#characters
+ */
+ public void characters (char ch[], int start, int len)
+ throws SAXException
+ {
+ if (!cdataElement) {
+ writeEsc(ch, start, len, false);
+ }
+ else {
+ for (int i = start; i < start + len; i++) {
+ write(ch[i]);
+ }
+ }
+ super.characters(ch, start, len);
+ }
+
+
+ /**
+ * Write ignorable whitespace.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @param ch The array of characters to write.
+ * @param start The starting position in the array.
+ * @param length The number of characters to write.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the whitespace, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#ignorableWhitespace
+ */
+ public void ignorableWhitespace (char ch[], int start, int length)
+ throws SAXException
+ {
+ writeEsc(ch, start, length, false);
+ super.ignorableWhitespace(ch, start, length);
+ }
+
+
+
+ /**
+ * Write a processing instruction.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @param target The PI target.
+ * @param data The PI data.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the PI, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#processingInstruction
+ */
+ public void processingInstruction (String target, String data)
+ throws SAXException
+ {
+ write("<?");
+ write(target);
+ write(' ');
+ write(data);
+ write("?>");
+ if (elementLevel < 1) {
+ write('\n');
+ }
+ super.processingInstruction(target, data);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Additional markup.
+ ////////////////////////////////////////////////////////////////////
+
+ /**
+ * Write an empty element.
+ *
+ * This method writes an empty element tag rather than a start tag
+ * followed by an end tag. Both a {@link #startElement
+ * startElement} and an {@link #endElement endElement} event will
+ * be passed on down the filter chain.
+ *
+ * @param uri The element's Namespace URI, or the empty string
+ * if the element has no Namespace or if Namespace
+ * processing is not being performed.
+ * @param localName The element's local name (without prefix). This
+ * parameter must be provided.
+ * @param qName The element's qualified name (with prefix), or
+ * the empty string if none is available. This parameter
+ * is strictly advisory: the writer may or may not use
+ * the prefix attached.
+ * @param atts The element's attribute list.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement
+ * @see #endElement
+ */
+ public void emptyElement (String uri, String localName,
+ String qName, Attributes atts)
+ throws SAXException
+ {
+ nsSupport.pushContext();
+ write('<');
+ writeName(uri, localName, qName, true);
+ writeAttributes(atts);
+ if (elementLevel == 1) {
+ forceNSDecls();
+ }
+ writeNSDecls();
+ write("/>");
+ super.startElement(uri, localName, qName, atts);
+ super.endElement(uri, localName, qName);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Convenience methods.
+ ////////////////////////////////////////////////////////////////////
+
+
+
+ /**
+ * Start a new element without a qname or attributes.
+ *
+ * <p>This method will provide a default empty attribute
+ * list and an empty string for the qualified name.
+ * It invokes {@link
+ * #startElement(String, String, String, Attributes)}
+ * directly.</p>
+ *
+ * @param uri The element's Namespace URI.
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the start tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement(String, String, String, Attributes)
+ */
+ public void startElement (String uri, String localName)
+ throws SAXException
+ {
+ startElement(uri, localName, "", EMPTY_ATTS);
+ }
+
+
+ /**
+ * Start a new element without a qname, attributes or a Namespace URI.
+ *
+ * <p>This method will provide an empty string for the
+ * Namespace URI, and empty string for the qualified name,
+ * and a default empty attribute list. It invokes
+ * #startElement(String, String, String, Attributes)}
+ * directly.</p>
+ *
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the start tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement(String, String, String, Attributes)
+ */
+ public void startElement (String localName)
+ throws SAXException
+ {
+ startElement("", localName, "", EMPTY_ATTS);
+ }
+
+
+ /**
+ * End an element without a qname.
+ *
+ * <p>This method will supply an empty string for the qName.
+ * It invokes {@link #endElement(String, String, String)}
+ * directly.</p>
+ *
+ * @param uri The element's Namespace URI.
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the end tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #endElement(String, String, String)
+ */
+ public void endElement (String uri, String localName)
+ throws SAXException
+ {
+ endElement(uri, localName, "");
+ }
+
+
+ /**
+ * End an element without a Namespace URI or qname.
+ *
+ * <p>This method will supply an empty string for the qName
+ * and an empty string for the Namespace URI.
+ * It invokes {@link #endElement(String, String, String)}
+ * directly.</p>
+ *
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the end tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #endElement(String, String, String)
+ */
+ public void endElement (String localName)
+ throws SAXException
+ {
+ endElement("", localName, "");
+ }
+
+
+ /**
+ * Add an empty element without a qname or attributes.
+ *
+ * <p>This method will supply an empty string for the qname
+ * and an empty attribute list. It invokes
+ * {@link #emptyElement(String, String, String, Attributes)}
+ * directly.</p>
+ *
+ * @param uri The element's Namespace URI.
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #emptyElement(String, String, String, Attributes)
+ */
+ public void emptyElement (String uri, String localName)
+ throws SAXException
+ {
+ emptyElement(uri, localName, "", EMPTY_ATTS);
+ }
+
+
+ /**
+ * Add an empty element without a Namespace URI, qname or attributes.
+ *
+ * <p>This method will supply an empty string for the qname,
+ * and empty string for the Namespace URI, and an empty
+ * attribute list. It invokes
+ * {@link #emptyElement(String, String, String, Attributes)}
+ * directly.</p>
+ *
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #emptyElement(String, String, String, Attributes)
+ */
+ public void emptyElement (String localName)
+ throws SAXException
+ {
+ emptyElement("", localName, "", EMPTY_ATTS);
+ }
+
+
+ /**
+ * Write an element with character data content.
+ *
+ * <p>This is a convenience method to write a complete element
+ * with character data content, including the start tag
+ * and end tag.</p>
+ *
+ * <p>This method invokes
+ * {@link #startElement(String, String, String, Attributes)},
+ * followed by
+ * {@link #characters(String)}, followed by
+ * {@link #endElement(String, String, String)}.</p>
+ *
+ * @param uri The element's Namespace URI.
+ * @param localName The element's local name.
+ * @param qName The element's default qualified name.
+ * @param atts The element's attributes.
+ * @param content The character data content.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement(String, String, String, Attributes)
+ * @see #characters(String)
+ * @see #endElement(String, String, String)
+ */
+ public void dataElement (String uri, String localName,
+ String qName, Attributes atts,
+ String content)
+ throws SAXException
+ {
+ startElement(uri, localName, qName, atts);
+ characters(content);
+ endElement(uri, localName, qName);
+ }
+
+
+ /**
+ * Write an element with character data content but no attributes.
+ *
+ * <p>This is a convenience method to write a complete element
+ * with character data content, including the start tag
+ * and end tag. This method provides an empty string
+ * for the qname and an empty attribute list.</p>
+ *
+ * <p>This method invokes
+ * {@link #startElement(String, String, String, Attributes)},
+ * followed by
+ * {@link #characters(String)}, followed by
+ * {@link #endElement(String, String, String)}.</p>
+ *
+ * @param uri The element's Namespace URI.
+ * @param localName The element's local name.
+ * @param content The character data content.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement(String, String, String, Attributes)
+ * @see #characters(String)
+ * @see #endElement(String, String, String)
+ */
+ public void dataElement (String uri, String localName, String content)
+ throws SAXException
+ {
+ dataElement(uri, localName, "", EMPTY_ATTS, content);
+ }
+
+
+ /**
+ * Write an element with character data content but no attributes or Namespace URI.
+ *
+ * <p>This is a convenience method to write a complete element
+ * with character data content, including the start tag
+ * and end tag. The method provides an empty string for the
+ * Namespace URI, and empty string for the qualified name,
+ * and an empty attribute list.</p>
+ *
+ * <p>This method invokes
+ * {@link #startElement(String, String, String, Attributes)},
+ * followed by
+ * {@link #characters(String)}, followed by
+ * {@link #endElement(String, String, String)}.</p>
+ *
+ * @param localName The element's local name.
+ * @param content The character data content.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement(String, String, String, Attributes)
+ * @see #characters(String)
+ * @see #endElement(String, String, String)
+ */
+ public void dataElement (String localName, String content)
+ throws SAXException
+ {
+ dataElement("", localName, "", EMPTY_ATTS, content);
+ }
+
+
+ /**
+ * Write a string of character data, with XML escaping.
+ *
+ * <p>This is a convenience method that takes an XML
+ * String, converts it to a character array, then invokes
+ * {@link #characters(char[], int, int)}.</p>
+ *
+ * @param data The character data.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the string, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #characters(char[], int, int)
+ */
+ public void characters (String data)
+ throws SAXException
+ {
+ char ch[] = data.toCharArray();
+ characters(ch, 0, ch.length);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Internal methods.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Force all Namespaces to be declared.
+ *
+ * This method is used on the root element to ensure that
+ * the predeclared Namespaces all appear.
+ */
+ private void forceNSDecls ()
+ {
+ Enumeration prefixes = forcedDeclTable.keys();
+ while (prefixes.hasMoreElements()) {
+ String prefix = (String)prefixes.nextElement();
+ doPrefix(prefix, null, true);
+ }
+ }
+
+
+ /**
+ * Determine the prefix for an element or attribute name.
+ *
+ * TODO: this method probably needs some cleanup.
+ *
+ * @param uri The Namespace URI.
+ * @param qName The qualified name (optional); this will be used
+ * to indicate the preferred prefix if none is currently
+ * bound.
+ * @param isElement true if this is an element name, false
+ * if it is an attribute name (which cannot use the
+ * default Namespace).
+ */
+ private String doPrefix (String uri, String qName, boolean isElement)
+ {
+ String defaultNS = nsSupport.getURI("");
+ if ("".equals(uri)) {
+ if (isElement && defaultNS != null)
+ nsSupport.declarePrefix("", "");
+ return null;
+ }
+ String prefix;
+ if (isElement && defaultNS != null && uri.equals(defaultNS)) {
+ prefix = "";
+ } else {
+ prefix = nsSupport.getPrefix(uri);
+ }
+ if (prefix != null) {
+ return prefix;
+ }
+ prefix = (String) doneDeclTable.get(uri);
+ if (prefix != null &&
+ ((!isElement || defaultNS != null) &&
+ "".equals(prefix) || nsSupport.getURI(prefix) != null)) {
+ prefix = null;
+ }
+ if (prefix == null) {
+ prefix = (String) prefixTable.get(uri);
+ if (prefix != null &&
+ ((!isElement || defaultNS != null) &&
+ "".equals(prefix) || nsSupport.getURI(prefix) != null)) {
+ prefix = null;
+ }
+ }
+ if (prefix == null && qName != null && !"".equals(qName)) {
+ int i = qName.indexOf(':');
+ if (i == -1) {
+ if (isElement && defaultNS == null) {
+ prefix = "";
+ }
+ } else {
+ prefix = qName.substring(0, i);
+ }
+ }
+ for (;
+ prefix == null || nsSupport.getURI(prefix) != null;
+ prefix = "__NS" + ++prefixCounter)
+ ;
+ nsSupport.declarePrefix(prefix, uri);
+ doneDeclTable.put(uri, prefix);
+ return prefix;
+ }
+
+
+ /**
+ * Write a raw character.
+ *
+ * @param c The character to write.
+ * @exception org.xml.sax.SAXException If there is an error writing
+ * the character, this method will throw an IOException
+ * wrapped in a SAXException.
+ */
+ private void write (char c)
+ throws SAXException
+ {
+ try {
+ output.write(c);
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+
+ /**
+ * Write a raw string.
+ *
+ * @param s
+ * @exception org.xml.sax.SAXException If there is an error writing
+ * the string, this method will throw an IOException
+ * wrapped in a SAXException
+ */
+ private void write (String s)
+ throws SAXException
+ {
+ try {
+ output.write(s);
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+
+ /**
+ * Write out an attribute list, escaping values.
+ *
+ * The names will have prefixes added to them.
+ *
+ * @param atts The attribute list to write.
+ * @exception org.xml.SAXException If there is an error writing
+ * the attribute list, this method will throw an
+ * IOException wrapped in a SAXException.
+ */
+ private void writeAttributes (Attributes atts)
+ throws SAXException
+ {
+ int len = atts.getLength();
+ for (int i = 0; i < len; i++) {
+ char ch[] = atts.getValue(i).toCharArray();
+ write(' ');
+ writeName(atts.getURI(i), atts.getLocalName(i),
+ atts.getQName(i), false);
+ if (htmlMode &&
+ booleanAttribute(atts.getLocalName(i), atts.getQName(i), atts.getValue(i))) break;
+ write("=\"");
+ writeEsc(ch, 0, ch.length, true);
+ write('"');
+ }
+ }
+
+
+ private String[] booleans = {"checked", "compact", "declare", "defer",
+ "disabled", "ismap", "multiple",
+ "nohref", "noresize", "noshade",
+ "nowrap", "readonly", "selected"};
+
+ // Return true if the attribute is an HTML boolean from the above list.
+ private boolean booleanAttribute (String localName, String qName, String value)
+ {
+ String name = localName;
+ if (name == null) {
+ int i = qName.indexOf(':');
+ if (i != -1) name = qName.substring(i + 1, qName.length());
+ }
+ if (!name.equals(value)) return false;
+ for (int j = 0; j < booleans.length; j++) {
+ if (name.equals(booleans[j])) return true;
+ }
+ return false;
+ }
+
+ /**
+ * Write an array of data characters with escaping.
+ *
+ * @param ch The array of characters.
+ * @param start The starting position.
+ * @param length The number of characters to use.
+ * @param isAttVal true if this is an attribute value literal.
+ * @exception org.xml.SAXException If there is an error writing
+ * the characters, this method will throw an
+ * IOException wrapped in a SAXException.
+ */
+ private void writeEsc (char ch[], int start,
+ int length, boolean isAttVal)
+ throws SAXException
+ {
+ for (int i = start; i < start + length; i++) {
+ switch (ch[i]) {
+ case '&':
+ write("&amp;");
+ break;
+ case '<':
+ write("&lt;");
+ break;
+ case '>':
+ write("&gt;");
+ break;
+ case '\"':
+ if (isAttVal) {
+ write("&quot;");
+ } else {
+ write('\"');
+ }
+ break;
+ default:
+ if (!unicodeMode && ch[i] > '\u007f') {
+ write("&#");
+ write(Integer.toString(ch[i]));
+ write(';');
+ } else {
+ write(ch[i]);
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Write out the list of Namespace declarations.
+ *
+ * @exception org.xml.sax.SAXException This method will throw
+ * an IOException wrapped in a SAXException if
+ * there is an error writing the Namespace
+ * declarations.
+ */
+ private void writeNSDecls ()
+ throws SAXException
+ {
+ Enumeration prefixes = nsSupport.getDeclaredPrefixes();
+ while (prefixes.hasMoreElements()) {
+ String prefix = (String) prefixes.nextElement();
+ String uri = nsSupport.getURI(prefix);
+ if (uri == null) {
+ uri = "";
+ }
+ char ch[] = uri.toCharArray();
+ write(' ');
+ if ("".equals(prefix)) {
+ write("xmlns=\"");
+ } else {
+ write("xmlns:");
+ write(prefix);
+ write("=\"");
+ }
+ writeEsc(ch, 0, ch.length, true);
+ write('\"');
+ }
+ }
+
+
+ /**
+ * Write an element or attribute name.
+ *
+ * @param uri The Namespace URI.
+ * @param localName The local name.
+ * @param qName The prefixed name, if available, or the empty string.
+ * @param isElement true if this is an element name, false if it
+ * is an attribute name.
+ * @exception org.xml.sax.SAXException This method will throw an
+ * IOException wrapped in a SAXException if there is
+ * an error writing the name.
+ */
+ private void writeName (String uri, String localName,
+ String qName, boolean isElement)
+ throws SAXException
+ {
+ String prefix = doPrefix(uri, qName, isElement);
+ if (prefix != null && !"".equals(prefix)) {
+ write(prefix);
+ write(':');
+ }
+ if (localName != null && !"".equals(localName)) {
+ write(localName);
+ } else {
+ int i = qName.indexOf(':');
+ write(qName.substring(i + 1, qName.length()));
+ }
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Default LexicalHandler implementation
+ ////////////////////////////////////////////////////////////////////
+
+ public void comment(char[] ch, int start, int length) throws SAXException
+ {
+ write("<!--");
+ for (int i = start; i < start + length; i++) {
+ write(ch[i]);
+ if (ch[i] == '-' && i + 1 <= start + length && ch[i+1] == '-')
+ write(' ');
+ }
+ write("-->");
+ }
+
+ public void endCDATA() throws SAXException { }
+ public void endDTD() throws SAXException { }
+ public void endEntity(String name) throws SAXException { }
+ public void startCDATA() throws SAXException { }
+ public void startDTD(String name, String publicid, String systemid) throws SAXException {
+ if (name == null) return; // can't cope
+ if (hasOutputDTD) return; // only one DTD
+ hasOutputDTD = true;
+ write("<!DOCTYPE ");
+ write(name);
+ if (systemid == null) systemid = "";
+ if (overrideSystem != null) systemid = overrideSystem;
+ char sysquote = (systemid.indexOf('"') != -1) ? '\'': '"';
+ if (overridePublic != null) publicid = overridePublic;
+ if (!(publicid == null || "".equals(publicid))) {
+ char pubquote = (publicid.indexOf('"') != -1) ? '\'': '"';
+ write(" PUBLIC ");
+ write(pubquote);
+ write(publicid);
+ write(pubquote);
+ write(' ');
+ }
+ else {
+ write(" SYSTEM ");
+ }
+ write(sysquote);
+ write(systemid);
+ write(sysquote);
+ write(">\n");
+ }
+
+ public void startEntity(String name) throws SAXException { }
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Output properties
+ ////////////////////////////////////////////////////////////////////
+
+ public String getOutputProperty(String key) {
+ return outputProperties.getProperty(key);
+ }
+
+ public void setOutputProperty(String key, String value) {
+ outputProperties.setProperty(key, value);
+// System.out.println("%%%% key = [" + key + "] value = [" + value +"]");
+ if (key.equals(ENCODING)) {
+ outputEncoding = value;
+ unicodeMode = value.substring(0, 3).equalsIgnoreCase("utf");
+// System.out.println("%%%% unicodeMode = " + unicodeMode);
+ }
+ else if (key.equals(METHOD)) {
+ htmlMode = value.equals("html");
+ }
+ else if (key.equals(DOCTYPE_PUBLIC)) {
+ overridePublic = value;
+ forceDTD = true;
+ }
+ else if (key.equals(DOCTYPE_SYSTEM)) {
+ overrideSystem = value;
+ forceDTD = true;
+ }
+ else if (key.equals(VERSION)) {
+ version = value;
+ }
+ else if (key.equals(STANDALONE)) {
+ standalone = value;
+ }
+// System.out.println("%%%% htmlMode = " + htmlMode);
+ }
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Constants.
+ ////////////////////////////////////////////////////////////////////
+
+ private final Attributes EMPTY_ATTS = new AttributesImpl();
+ public static final String CDATA_SECTION_ELEMENTS =
+ "cdata-section-elements";
+ public static final String DOCTYPE_PUBLIC = "doctype-public";
+ public static final String DOCTYPE_SYSTEM = "doctype-system";
+ public static final String ENCODING = "encoding";
+ public static final String INDENT = "indent"; // currently ignored
+ public static final String MEDIA_TYPE = "media-type"; // currently ignored
+ public static final String METHOD = "method"; // currently html or xml
+ public static final String OMIT_XML_DECLARATION = "omit-xml-declaration";
+ public static final String STANDALONE = "standalone"; // currently ignored
+ public static final String VERSION = "version";
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Internal state.
+ ////////////////////////////////////////////////////////////////////
+
+ private Hashtable prefixTable;
+ private Hashtable forcedDeclTable;
+ private Hashtable doneDeclTable;
+ private int elementLevel = 0;
+ private Writer output;
+ private NamespaceSupport nsSupport;
+ private int prefixCounter = 0;
+ private Properties outputProperties;
+ private boolean unicodeMode = false;
+ private String outputEncoding = "";
+ private boolean htmlMode = false;
+ private boolean forceDTD = false;
+ private boolean hasOutputDTD = false;
+ private String overridePublic = null;
+ private String overrideSystem = null;
+ private String version = null;
+ private String standalone = null;
+ private boolean cdataElement = false;
+
+}
+
+// end of XMLWriter.java
diff --git a/src/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java b/src/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java
new file mode 100644
index 0000000..61f20ad
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java
@@ -0,0 +1,54 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup.jaxp;
+
+import java.io.*;
+
+import javax.xml.parsers.*;
+import org.w3c.dom.Document;
+
+/**
+ * Trivial non-robust test class, to show that TagSoup can be accessed using
+ * JAXP interface.
+ */
+public class JAXPTest
+{
+ public static void main(String[] args)
+ throws Exception
+ {
+ new JAXPTest().test(args);
+ }
+
+ private void test(String[] args)
+ throws Exception
+ {
+ if (args.length != 1) {
+ System.err.println("Usage: java "+getClass()+" [input-file]");
+ System.exit(1);
+ }
+ File f = new File(args[0]);
+ //System.setProperty("javax.xml.parsers.SAXParserFactory", SAXFactoryImpl.class.toString());
+ System.setProperty("javax.xml.parsers.SAXParserFactory", "org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl");
+
+ SAXParserFactory spf = SAXParserFactory.newInstance();
+ System.out.println("Ok, SAX factory JAXP creates is: "+spf);
+ System.out.println("Let's parse...");
+ spf.newSAXParser().parse(f, new org.xml.sax.helpers.DefaultHandler());
+ System.out.println("Done. And then DOM build:");
+
+ Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(f);
+
+ System.out.println("Succesfully built DOM tree from '"+f+"', -> "+doc);
+ }
+}
diff --git a/src/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java b/src/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java
new file mode 100644
index 0000000..883a3e7
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java
@@ -0,0 +1,232 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup.jaxp;
+
+import java.io.IOException;
+import javax.xml.parsers.*;
+
+import org.xml.sax.*;
+
+/**
+ * This is a simpler adapter class that allows using SAX1 interface on top
+ * of basic SAX2 implementation, such as TagSoup.
+ *
+ * @author Tatu Saloranta (cowtowncoder@yahoo.com)
+ * @deprecated
+ */
+public class SAX1ParserAdapter
+ implements org.xml.sax.Parser
+{
+ final XMLReader xmlReader;
+
+ public SAX1ParserAdapter(XMLReader xr)
+ {
+ xmlReader = xr;
+ }
+
+ // Sax1 API impl
+
+ public void parse(InputSource source)
+ throws SAXException
+ {
+ try {
+ xmlReader.parse(source);
+ } catch (IOException ioe) {
+ throw new SAXException(ioe);
+ }
+ }
+
+ public void parse(String systemId)
+ throws SAXException
+ {
+ try {
+ xmlReader.parse(systemId);
+ } catch (IOException ioe) {
+ throw new SAXException(ioe);
+ }
+ }
+
+ /**
+ * @deprecated
+ */
+ public void setDocumentHandler(DocumentHandler h)
+ {
+ xmlReader.setContentHandler(new DocHandlerWrapper(h));
+ }
+
+ public void setDTDHandler(DTDHandler h)
+ {
+ xmlReader.setDTDHandler(h);
+ }
+
+ public void setEntityResolver(EntityResolver r)
+ {
+ xmlReader.setEntityResolver(r);
+ }
+
+ public void setErrorHandler(ErrorHandler h)
+ {
+ xmlReader.setErrorHandler(h);
+ }
+
+ public void setLocale(java.util.Locale locale)
+ throws SAXException
+ {
+ /* I have no idea what this is supposed to do... so let's
+ * throw an exception
+ */
+ throw new SAXNotSupportedException("TagSoup does not implement setLocale() method");
+ }
+
+ // Helper classes:
+
+ /**
+ * We need another helper class to deal with differences between
+ * Sax2 handler (content handler), and Sax1 handler (document handler)
+ * @deprecated
+ */
+ final static class DocHandlerWrapper
+ implements ContentHandler
+ {
+ final DocumentHandler docHandler;
+
+ final AttributesWrapper mAttrWrapper = new AttributesWrapper();
+
+ /**
+ * @deprecated
+ */
+ DocHandlerWrapper(DocumentHandler h)
+ {
+ docHandler = h;
+ }
+
+ public void characters(char[] ch, int start, int length)
+ throws SAXException
+ {
+ docHandler.characters(ch, start, length);
+ }
+
+ public void endDocument()
+ throws SAXException
+ {
+ docHandler.endDocument();
+ }
+
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException
+ {
+ if (qName == null) {
+ qName = localName;
+ }
+ docHandler.endElement(qName);
+ }
+
+ public void endPrefixMapping(String prefix)
+ {
+ // no equivalent in SAX1, ignore
+ }
+
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException
+ {
+ docHandler.ignorableWhitespace(ch, start, length);
+ }
+
+ public void processingInstruction(String target, String data)
+ throws SAXException
+ {
+ docHandler.processingInstruction(target, data);
+ }
+
+ public void setDocumentLocator(Locator locator)
+ {
+ docHandler.setDocumentLocator(locator);
+ }
+
+ public void skippedEntity(String name)
+ {
+ // no equivalent in SAX1, ignore
+ }
+
+ public void startDocument()
+ throws SAXException
+ {
+ docHandler.startDocument();
+ }
+
+ public void startElement(String uri, String localName, String qName,
+ Attributes attrs)
+ throws SAXException
+ {
+ if (qName == null) {
+ qName = localName;
+ }
+ // Also, need to wrap Attributes to look like AttributeLost
+ mAttrWrapper.setAttributes(attrs);
+ docHandler.startElement(qName, mAttrWrapper);
+ }
+
+ public void startPrefixMapping(String prefix, String uri)
+ {
+ // no equivalent in SAX1, ignore
+ }
+ }
+
+ /**
+ * And one more helper to deal with attribute access differences
+ * @deprecated
+ */
+ final static class AttributesWrapper
+ implements AttributeList
+ {
+ Attributes attrs;
+
+ public AttributesWrapper() { }
+
+ public void setAttributes(Attributes a) {
+ attrs = a;
+ }
+
+ public int getLength()
+ {
+ return attrs.getLength();
+ }
+
+ public String getName(int i)
+ {
+ String n = attrs.getQName(i);
+ return (n == null) ? attrs.getLocalName(i) : n;
+ }
+
+ public String getType(int i)
+ {
+ return attrs.getType(i);
+ }
+
+ public String getType(String name)
+ {
+ return attrs.getType(name);
+ }
+
+ public String getValue(int i)
+ {
+ return attrs.getValue(i);
+ }
+
+ public String getValue(String name)
+ {
+ return attrs.getValue(name);
+ }
+ }
+}
diff --git a/src/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java b/src/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java
new file mode 100644
index 0000000..780adfe
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java
@@ -0,0 +1,114 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup.jaxp;
+
+import java.util.*;
+import javax.xml.parsers.*;
+
+import org.xml.sax.*;
+
+/**
+ * This is a simple implementation of JAXP {@link SAXParserFactory},
+ * to allow easier integration of TagSoup with the default JDK
+ * xml processing stack.
+ *
+ * @author Tatu Saloranta (cowtowncoder@yahoo.com)
+ */
+public class SAXFactoryImpl
+ extends SAXParserFactory
+{
+ /**
+ * The easiest way to test validity of features to set is to use
+ * a prototype object. Currently this is actually not a real prototype,
+ * in the sense that the configuration is actually passed separately
+ * (as opposed to instantiating new readers from this prototype), but
+ * this could be changed in future, if TagSoup parser object allowed
+ * cloning.
+ */
+ private SAXParserImpl prototypeParser = null;
+
+ /**
+ * This Map contains explicitly set features that can be succesfully
+ * set for XMLReader instances. Temporary storage is needed due to
+ * JAXP design: multiple readers can be instantiated from a single
+ * factory, and settings can be changed between instantiations.
+ *<p>
+ * Note that we wouldn't need this map if we could create instances
+ * directly using the prototype instance.
+ */
+ private HashMap features = null;
+
+ public SAXFactoryImpl()
+ {
+ super();
+ }
+
+ // // // JAXP API implementation:
+
+ /**
+ * Creates a new instance of <code>SAXParser</code> using the currently
+ * configured factory parameters.
+ */
+ public SAXParser newSAXParser()
+ throws ParserConfigurationException
+ {
+ try {
+ return SAXParserImpl.newInstance(features);
+ } catch (SAXException se) {
+ // Translate to ParserConfigurationException
+ throw new ParserConfigurationException(se.getMessage());
+ }
+ }
+
+ /**
+ * Defines that the specified feature is to enabled/disabled (as
+ * per second argument) on reader instances created by this
+ * factory.
+ */
+ public void setFeature(String name, boolean value)
+ throws ParserConfigurationException, SAXNotRecognizedException,
+ SAXNotSupportedException
+ {
+ // First, let's see if it's a valid call
+ getPrototype().setFeature(name, value);
+
+ // If not, exception was thrown: so we are good now:
+ if (features == null) {
+ // Let's retain the ordering as well
+ features = new LinkedHashMap();
+ }
+ features.put(name, value ? Boolean.TRUE : Boolean.FALSE);
+ }
+
+ /**
+ * Returns whether the specified property will be enabled or disabled
+ * on reader instances constructed by this factory.
+ */
+ public boolean getFeature(String name)
+ throws ParserConfigurationException, SAXNotRecognizedException,
+ SAXNotSupportedException
+ {
+ return getPrototype().getFeature(name);
+ }
+
+ // // // Internal methods
+
+ private SAXParserImpl getPrototype()
+ {
+ if (prototypeParser == null) {
+ prototypeParser = new SAXParserImpl();
+ }
+ return prototypeParser;
+ }
+}
diff --git a/src/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java b/src/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java
new file mode 100644
index 0000000..75f3df4
--- /dev/null
+++ b/src/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java
@@ -0,0 +1,113 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup.jaxp;
+
+import java.io.*;
+import java.util.*;
+import javax.xml.parsers.*;
+
+import org.xml.sax.*;
+
+import org.ccil.cowan.tagsoup.Parser;
+
+/**
+ * This is a simple implementation of JAXP {@link SAXParser},
+ * to allow easier integration of TagSoup with the default JDK
+ * xml processing stack.
+ *
+ * @author Tatu Saloranta (cowtowncoder@yahoo.com)
+ */
+public class SAXParserImpl
+ extends SAXParser
+{
+ final org.ccil.cowan.tagsoup.Parser parser;
+
+ protected SAXParserImpl() // used by factory, for prototypes
+ {
+ super();
+ parser = new org.ccil.cowan.tagsoup.Parser();
+ }
+
+ public static SAXParserImpl newInstance(Map features)
+ throws SAXException
+ {
+ SAXParserImpl parser = new SAXParserImpl();
+ if (features != null) {
+ Iterator it = features.entrySet().iterator();
+ while (it.hasNext()) {
+ Map.Entry entry = (Map.Entry) it.next();
+ parser.setFeature((String) entry.getKey(), ((Boolean) entry.getValue()).booleanValue());
+ }
+ }
+ return parser;
+ }
+
+ // // // JAXP API implementation:
+
+ /**
+ * To support SAX1 interface, we'll need to use an adapter.
+ * @deprecated
+ */
+ public org.xml.sax.Parser getParser()
+ throws SAXException
+ {
+ return new SAX1ParserAdapter(parser);
+ }
+
+ public XMLReader getXMLReader() { return parser; }
+
+ public boolean isNamespaceAware()
+ {
+ try {
+ return parser.getFeature(Parser.namespacesFeature);
+ } catch (SAXException sex) { // should never happen... so:
+ throw new RuntimeException(sex.getMessage());
+ }
+ }
+
+ public boolean isValidating()
+ {
+ try {
+ return parser.getFeature(Parser.validationFeature);
+ } catch (SAXException sex) { // should never happen... so:
+ throw new RuntimeException(sex.getMessage());
+ }
+ }
+
+ public void setProperty(String name, Object value)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ parser.setProperty(name, value);
+ }
+
+ public Object getProperty(String name)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ return parser.getProperty(name);
+ }
+
+ // // // Additional convenience methods
+
+ public void setFeature(String name, boolean value)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ parser.setFeature(name, value);
+ }
+
+ public boolean getFeature(String name)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ return parser.getFeature(name);
+ }
+}