From 926d907a2fb69573f1c6337f064645dde18b1e5e Mon Sep 17 00:00:00 2001 From: The Android Open Source Project Date: Tue, 21 Oct 2008 07:00:00 -0700 Subject: Initial Contribution --- src/org/ccil/cowan/tagsoup/AttributesImpl.java | 626 +++++ src/org/ccil/cowan/tagsoup/AutoDetector.java | 43 + src/org/ccil/cowan/tagsoup/CommandLine.java | 289 ++ src/org/ccil/cowan/tagsoup/Element.java | 203 ++ src/org/ccil/cowan/tagsoup/ElementType.java | 276 ++ src/org/ccil/cowan/tagsoup/GOOGLE_README.txt | 8 + src/org/ccil/cowan/tagsoup/HTMLModels.java | 53 + src/org/ccil/cowan/tagsoup/HTMLScanner.java | 648 +++++ src/org/ccil/cowan/tagsoup/HTMLSchema.java | 2895 ++++++++++++++++++++ src/org/ccil/cowan/tagsoup/LICENSE | 201 ++ src/org/ccil/cowan/tagsoup/MODULE_LICENSE_APACHE2 | 0 src/org/ccil/cowan/tagsoup/PYXScanner.java | 124 + src/org/ccil/cowan/tagsoup/PYXWriter.java | 217 ++ src/org/ccil/cowan/tagsoup/Parser.java | 1114 ++++++++ src/org/ccil/cowan/tagsoup/ScanHandler.java | 119 + src/org/ccil/cowan/tagsoup/Scanner.java | 50 + src/org/ccil/cowan/tagsoup/Schema.java | 170 ++ src/org/ccil/cowan/tagsoup/XMLWriter.java | 1435 ++++++++++ src/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java | 54 + .../ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java | 232 ++ .../ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java | 114 + src/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java | 113 + 22 files changed, 8984 insertions(+) create mode 100644 src/org/ccil/cowan/tagsoup/AttributesImpl.java create mode 100644 src/org/ccil/cowan/tagsoup/AutoDetector.java create mode 100644 src/org/ccil/cowan/tagsoup/CommandLine.java create mode 100644 src/org/ccil/cowan/tagsoup/Element.java create mode 100644 src/org/ccil/cowan/tagsoup/ElementType.java create mode 100644 src/org/ccil/cowan/tagsoup/GOOGLE_README.txt create mode 100644 src/org/ccil/cowan/tagsoup/HTMLModels.java create mode 100644 src/org/ccil/cowan/tagsoup/HTMLScanner.java create mode 100644 src/org/ccil/cowan/tagsoup/HTMLSchema.java create mode 100644 src/org/ccil/cowan/tagsoup/LICENSE create mode 100644 src/org/ccil/cowan/tagsoup/MODULE_LICENSE_APACHE2 create mode 100644 src/org/ccil/cowan/tagsoup/PYXScanner.java create mode 100644 src/org/ccil/cowan/tagsoup/PYXWriter.java create mode 100644 src/org/ccil/cowan/tagsoup/Parser.java create mode 100644 src/org/ccil/cowan/tagsoup/ScanHandler.java create mode 100644 src/org/ccil/cowan/tagsoup/Scanner.java create mode 100644 src/org/ccil/cowan/tagsoup/Schema.java create mode 100644 src/org/ccil/cowan/tagsoup/XMLWriter.java create mode 100644 src/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java create mode 100644 src/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java create mode 100644 src/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java create mode 100644 src/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java (limited to 'src/org') diff --git a/src/org/ccil/cowan/tagsoup/AttributesImpl.java b/src/org/ccil/cowan/tagsoup/AttributesImpl.java new file mode 100644 index 0000000..86f76fc --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/AttributesImpl.java @@ -0,0 +1,626 @@ +// XMLWriter.java - serialize an XML document. +// Written by David Megginson, david@megginson.com +// and placed by him into the public domain. +// Extensively modified by John Cowan for TagSoup. +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +package org.ccil.cowan.tagsoup; +import org.xml.sax.Attributes; + + +/** + * Default implementation of the Attributes interface. + * + *
+ * This module, both source code and documentation, is in the + * Public Domain, and comes with NO WARRANTY. + * See http://www.saxproject.org + * for further information. + *
+ * + *

This class provides a default implementation of the SAX2 + * {@link org.xml.sax.Attributes Attributes} interface, with the + * addition of manipulators so that the list can be modified or + * reused.

+ * + *

There are two typical uses of this class:

+ * + *
    + *
  1. to take a persistent snapshot of an Attributes object + * in a {@link org.xml.sax.ContentHandler#startElement startElement} event; or
  2. + *
  3. to construct or modify an Attributes object in a SAX2 driver or filter.
  4. + *
+ * + *

This class replaces the now-deprecated SAX1 {@link + * org.xml.sax.helpers.AttributeListImpl AttributeListImpl} + * class; in addition to supporting the updated Attributes + * interface rather than the deprecated {@link org.xml.sax.AttributeList + * AttributeList} interface, it also includes a much more efficient + * implementation using a single array rather than a set of Vectors.

+ * + * @since SAX 2.0 + * @author David Megginson + * @version 2.0.1 (sax2r2) + */ +public class AttributesImpl implements Attributes +{ + + + //////////////////////////////////////////////////////////////////// + // Constructors. + //////////////////////////////////////////////////////////////////// + + + /** + * Construct a new, empty AttributesImpl object. + */ + public AttributesImpl () + { + length = 0; + data = null; + } + + + /** + * Copy an existing Attributes object. + * + *

This constructor is especially useful inside a + * {@link org.xml.sax.ContentHandler#startElement startElement} event.

+ * + * @param atts The existing Attributes object. + */ + public AttributesImpl (Attributes atts) + { + setAttributes(atts); + } + + + + //////////////////////////////////////////////////////////////////// + // Implementation of org.xml.sax.Attributes. + //////////////////////////////////////////////////////////////////// + + + /** + * Return the number of attributes in the list. + * + * @return The number of attributes in the list. + * @see org.xml.sax.Attributes#getLength + */ + public int getLength () + { + return length; + } + + + /** + * Return an attribute's Namespace URI. + * + * @param index The attribute's index (zero-based). + * @return The Namespace URI, the empty string if none is + * available, or null if the index is out of range. + * @see org.xml.sax.Attributes#getURI + */ + public String getURI (int index) + { + if (index >= 0 && index < length) { + return data[index*5]; + } else { + return null; + } + } + + + /** + * Return an attribute's local name. + * + * @param index The attribute's index (zero-based). + * @return The attribute's local name, the empty string if + * none is available, or null if the index if out of range. + * @see org.xml.sax.Attributes#getLocalName + */ + public String getLocalName (int index) + { + if (index >= 0 && index < length) { + return data[index*5+1]; + } else { + return null; + } + } + + + /** + * Return an attribute's qualified (prefixed) name. + * + * @param index The attribute's index (zero-based). + * @return The attribute's qualified name, the empty string if + * none is available, or null if the index is out of bounds. + * @see org.xml.sax.Attributes#getQName + */ + public String getQName (int index) + { + if (index >= 0 && index < length) { + return data[index*5+2]; + } else { + return null; + } + } + + + /** + * Return an attribute's type by index. + * + * @param index The attribute's index (zero-based). + * @return The attribute's type, "CDATA" if the type is unknown, or null + * if the index is out of bounds. + * @see org.xml.sax.Attributes#getType(int) + */ + public String getType (int index) + { + if (index >= 0 && index < length) { + return data[index*5+3]; + } else { + return null; + } + } + + + /** + * Return an attribute's value by index. + * + * @param index The attribute's index (zero-based). + * @return The attribute's value or null if the index is out of bounds. + * @see org.xml.sax.Attributes#getValue(int) + */ + public String getValue (int index) + { + if (index >= 0 && index < length) { + return data[index*5+4]; + } else { + return null; + } + } + + + /** + * Look up an attribute's index by Namespace name. + * + *

In many cases, it will be more efficient to look up the name once and + * use the index query methods rather than using the name query methods + * repeatedly.

+ * + * @param uri The attribute's Namespace URI, or the empty + * string if none is available. + * @param localName The attribute's local name. + * @return The attribute's index, or -1 if none matches. + * @see org.xml.sax.Attributes#getIndex(java.lang.String,java.lang.String) + */ + public int getIndex (String uri, String localName) + { + int max = length * 5; + for (int i = 0; i < max; i += 5) { + if (data[i].equals(uri) && data[i+1].equals(localName)) { + return i / 5; + } + } + return -1; + } + + + /** + * Look up an attribute's index by qualified (prefixed) name. + * + * @param qName The qualified name. + * @return The attribute's index, or -1 if none matches. + * @see org.xml.sax.Attributes#getIndex(java.lang.String) + */ + public int getIndex (String qName) + { + int max = length * 5; + for (int i = 0; i < max; i += 5) { + if (data[i+2].equals(qName)) { + return i / 5; + } + } + return -1; + } + + + /** + * Look up an attribute's type by Namespace-qualified name. + * + * @param uri The Namespace URI, or the empty string for a name + * with no explicit Namespace URI. + * @param localName The local name. + * @return The attribute's type, or null if there is no + * matching attribute. + * @see org.xml.sax.Attributes#getType(java.lang.String,java.lang.String) + */ + public String getType (String uri, String localName) + { + int max = length * 5; + for (int i = 0; i < max; i += 5) { + if (data[i].equals(uri) && data[i+1].equals(localName)) { + return data[i+3]; + } + } + return null; + } + + + /** + * Look up an attribute's type by qualified (prefixed) name. + * + * @param qName The qualified name. + * @return The attribute's type, or null if there is no + * matching attribute. + * @see org.xml.sax.Attributes#getType(java.lang.String) + */ + public String getType (String qName) + { + int max = length * 5; + for (int i = 0; i < max; i += 5) { + if (data[i+2].equals(qName)) { + return data[i+3]; + } + } + return null; + } + + + /** + * Look up an attribute's value by Namespace-qualified name. + * + * @param uri The Namespace URI, or the empty string for a name + * with no explicit Namespace URI. + * @param localName The local name. + * @return The attribute's value, or null if there is no + * matching attribute. + * @see org.xml.sax.Attributes#getValue(java.lang.String,java.lang.String) + */ + public String getValue (String uri, String localName) + { + int max = length * 5; + for (int i = 0; i < max; i += 5) { + if (data[i].equals(uri) && data[i+1].equals(localName)) { + return data[i+4]; + } + } + return null; + } + + + /** + * Look up an attribute's value by qualified (prefixed) name. + * + * @param qName The qualified name. + * @return The attribute's value, or null if there is no + * matching attribute. + * @see org.xml.sax.Attributes#getValue(java.lang.String) + */ + public String getValue (String qName) + { + int max = length * 5; + for (int i = 0; i < max; i += 5) { + if (data[i+2].equals(qName)) { + return data[i+4]; + } + } + return null; + } + + + + //////////////////////////////////////////////////////////////////// + // Manipulators. + //////////////////////////////////////////////////////////////////// + + + /** + * Clear the attribute list for reuse. + * + *

Note that little memory is freed by this call: + * the current array is kept so it can be + * reused.

+ */ + public void clear () + { + if (data != null) { + for (int i = 0; i < (length * 5); i++) + data [i] = null; + } + length = 0; + } + + + /** + * Copy an entire Attributes object. + * + *

It may be more efficient to reuse an existing object + * rather than constantly allocating new ones.

+ * + * @param atts The attributes to copy. + */ + public void setAttributes (Attributes atts) + { + clear(); + length = atts.getLength(); + if (length > 0) { + data = new String[length*5]; + for (int i = 0; i < length; i++) { + data[i*5] = atts.getURI(i); + data[i*5+1] = atts.getLocalName(i); + data[i*5+2] = atts.getQName(i); + data[i*5+3] = atts.getType(i); + data[i*5+4] = atts.getValue(i); + } + } + } + + + /** + * Add an attribute to the end of the list. + * + *

For the sake of speed, this method does no checking + * to see if the attribute is already in the list: that is + * the responsibility of the application.

+ * + * @param uri The Namespace URI, or the empty string if + * none is available or Namespace processing is not + * being performed. + * @param localName The local name, or the empty string if + * Namespace processing is not being performed. + * @param qName The qualified (prefixed) name, or the empty string + * if qualified names are not available. + * @param type The attribute type as a string. + * @param value The attribute value. + */ + public void addAttribute (String uri, String localName, String qName, + String type, String value) + { + ensureCapacity(length+1); + data[length*5] = uri; + data[length*5+1] = localName; + data[length*5+2] = qName; + data[length*5+3] = type; + data[length*5+4] = value; + length++; + } + + + /** + * Set an attribute in the list. + * + *

For the sake of speed, this method does no checking + * for name conflicts or well-formedness: such checks are the + * responsibility of the application.

+ * + * @param index The index of the attribute (zero-based). + * @param uri The Namespace URI, or the empty string if + * none is available or Namespace processing is not + * being performed. + * @param localName The local name, or the empty string if + * Namespace processing is not being performed. + * @param qName The qualified name, or the empty string + * if qualified names are not available. + * @param type The attribute type as a string. + * @param value The attribute value. + * @exception java.lang.ArrayIndexOutOfBoundsException When the + * supplied index does not point to an attribute + * in the list. + */ + public void setAttribute (int index, String uri, String localName, + String qName, String type, String value) + { + if (index >= 0 && index < length) { + data[index*5] = uri; + data[index*5+1] = localName; + data[index*5+2] = qName; + data[index*5+3] = type; + data[index*5+4] = value; + } else { + badIndex(index); + } + } + + + /** + * Remove an attribute from the list. + * + * @param index The index of the attribute (zero-based). + * @exception java.lang.ArrayIndexOutOfBoundsException When the + * supplied index does not point to an attribute + * in the list. + */ + public void removeAttribute (int index) + { + if (index >= 0 && index < length) { + if (index < length - 1) { + System.arraycopy(data, (index+1)*5, data, index*5, + (length-index-1)*5); + } + index = (length - 1) * 5; + data [index++] = null; + data [index++] = null; + data [index++] = null; + data [index++] = null; + data [index] = null; + length--; + } else { + badIndex(index); + } + } + + + /** + * Set the Namespace URI of a specific attribute. + * + * @param index The index of the attribute (zero-based). + * @param uri The attribute's Namespace URI, or the empty + * string for none. + * @exception java.lang.ArrayIndexOutOfBoundsException When the + * supplied index does not point to an attribute + * in the list. + */ + public void setURI (int index, String uri) + { + if (index >= 0 && index < length) { + data[index*5] = uri; + } else { + badIndex(index); + } + } + + + /** + * Set the local name of a specific attribute. + * + * @param index The index of the attribute (zero-based). + * @param localName The attribute's local name, or the empty + * string for none. + * @exception java.lang.ArrayIndexOutOfBoundsException When the + * supplied index does not point to an attribute + * in the list. + */ + public void setLocalName (int index, String localName) + { + if (index >= 0 && index < length) { + data[index*5+1] = localName; + } else { + badIndex(index); + } + } + + + /** + * Set the qualified name of a specific attribute. + * + * @param index The index of the attribute (zero-based). + * @param qName The attribute's qualified name, or the empty + * string for none. + * @exception java.lang.ArrayIndexOutOfBoundsException When the + * supplied index does not point to an attribute + * in the list. + */ + public void setQName (int index, String qName) + { + if (index >= 0 && index < length) { + data[index*5+2] = qName; + } else { + badIndex(index); + } + } + + + /** + * Set the type of a specific attribute. + * + * @param index The index of the attribute (zero-based). + * @param type The attribute's type. + * @exception java.lang.ArrayIndexOutOfBoundsException When the + * supplied index does not point to an attribute + * in the list. + */ + public void setType (int index, String type) + { + if (index >= 0 && index < length) { + data[index*5+3] = type; + } else { + badIndex(index); + } + } + + + /** + * Set the value of a specific attribute. + * + * @param index The index of the attribute (zero-based). + * @param value The attribute's value. + * @exception java.lang.ArrayIndexOutOfBoundsException When the + * supplied index does not point to an attribute + * in the list. + */ + public void setValue (int index, String value) + { + if (index >= 0 && index < length) { + data[index*5+4] = value; + } else { + badIndex(index); + } + } + + + + //////////////////////////////////////////////////////////////////// + // Internal methods. + //////////////////////////////////////////////////////////////////// + + + /** + * Ensure the internal array's capacity. + * + * @param n The minimum number of attributes that the array must + * be able to hold. + */ + private void ensureCapacity (int n) { + if (n <= 0) { + return; + } + int max; + if (data == null || data.length == 0) { + max = 25; + } + else if (data.length >= n * 5) { + return; + } + else { + max = data.length; + } + while (max < n * 5) { + max *= 2; + } + + String newData[] = new String[max]; + if (length > 0) { + System.arraycopy(data, 0, newData, 0, length*5); + } + data = newData; + } + + + /** + * Report a bad array index in a manipulator. + * + * @param index The index to report. + * @exception java.lang.ArrayIndexOutOfBoundsException Always. + */ + private void badIndex (int index) + throws ArrayIndexOutOfBoundsException + { + String msg = + "Attempt to modify attribute at illegal index: " + index; + throw new ArrayIndexOutOfBoundsException(msg); + } + + + + //////////////////////////////////////////////////////////////////// + // Internal state. + //////////////////////////////////////////////////////////////////// + + int length; + String data []; + +} + +// end of AttributesImpl.java + diff --git a/src/org/ccil/cowan/tagsoup/AutoDetector.java b/src/org/ccil/cowan/tagsoup/AutoDetector.java new file mode 100644 index 0000000..eb85d6f --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/AutoDetector.java @@ -0,0 +1,43 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// Interface to objects that translate InputStreams to Readers by auto-detection + +package org.ccil.cowan.tagsoup; +import java.io.Reader; +import java.io.InputStream; + +/** +Classes which accept an InputStream and provide a Reader which figures +out the encoding of the InputStream and reads characters from it should +conform to this interface. +@see java.io.InputStream +@see java.io.Reader +*/ + +public interface AutoDetector { + + /** + Given an InputStream, return a suitable Reader that understands + the presumed character encoding of that InputStream. + If bytes are consumed from the InputStream in the process, they + must be pushed back onto the InputStream so that they can be + reinterpreted as characters. + @param i The InputStream + @return A Reader that reads from the InputStream + */ + + public Reader autoDetectingReader(InputStream i); + + } diff --git a/src/org/ccil/cowan/tagsoup/CommandLine.java b/src/org/ccil/cowan/tagsoup/CommandLine.java new file mode 100644 index 0000000..dd0c022 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/CommandLine.java @@ -0,0 +1,289 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// The TagSoup command line UI + +package org.ccil.cowan.tagsoup; +import java.util.Hashtable; +import java.util.Enumeration; +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import org.xml.sax.*; +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.ext.LexicalHandler; + + +/** +The stand-alone TagSoup program. +**/ +public class CommandLine { + + static Hashtable options = new Hashtable(); static { + options.put("--nocdata", Boolean.FALSE); // CDATA elements are normal + options.put("--files", Boolean.FALSE); // process arguments as separate files + options.put("--reuse", Boolean.FALSE); // reuse a single Parser + options.put("--nons", Boolean.FALSE); // no namespaces + options.put("--nobogons", Boolean.FALSE); // suppress unknown elements + options.put("--any", Boolean.FALSE); // unknowns have ANY content model + options.put("--emptybogons", Boolean.FALSE); // unknowns have EMPTY content model + options.put("--norootbogons", Boolean.FALSE); // unknowns can't be the root + options.put("--pyxin", Boolean.FALSE); // input is PYX + options.put("--lexical", Boolean.FALSE); // output comments + options.put("--pyx", Boolean.FALSE); // output is PYX + options.put("--html", Boolean.FALSE); // output is HTML + options.put("--method=", Boolean.FALSE); // output method + options.put("--doctype-public=", Boolean.FALSE); // override public id + options.put("--doctype-system=", Boolean.FALSE); // override system id + options.put("--output-encoding=", Boolean.FALSE); // output encoding + options.put("--omit-xml-declaration", Boolean.FALSE); // omit XML decl + options.put("--encoding=", Boolean.FALSE); // specify encoding + options.put("--help", Boolean.FALSE); // display help + options.put("--version", Boolean.FALSE); // display version + options.put("--nodefaults", Boolean.FALSE); // no default attrs + options.put("--nocolons", Boolean.FALSE); // colon to underscore + options.put("--norestart", Boolean.FALSE); // no restartable elements + options.put("--ignorable", Boolean.FALSE); // return ignorable whitespace + } + + /** + Main method. Processes specified files or standard input. + **/ + + public static void main(String[] argv) throws IOException, SAXException { + int optind = getopts(options, argv); + if (hasOption(options, "--help")) { + doHelp(); + return; + } + if (hasOption(options, "--version")) { + System.err.println("TagSoup version 1.2"); + return; + } + if (argv.length == optind) { + process("", System.out); + } + else if (hasOption(options, "--files")) { + for (int i = optind; i < argv.length; i++) { + String src = argv[i]; + String dst; + int j = src.lastIndexOf('.'); + if (j == -1) + dst = src + ".xhtml"; + else if (src.endsWith(".xhtml")) + dst = src + "_"; + else + dst = src.substring(0, j) + ".xhtml"; + System.err.println("src: " + src + " dst: " + dst); + OutputStream os = new FileOutputStream(dst); + process(src, os); + } + } + else { + for (int i = optind; i < argv.length; i++) { + System.err.println("src: " + argv[i]); + process(argv[i], System.out); + } + } + } + + // Print the help message + + private static void doHelp() { + System.err.print("usage: java -jar tagsoup-*.jar "); + System.err.print(" [ "); + boolean first = true; + for (Enumeration e = options.keys(); e.hasMoreElements(); ) { + if (!first) { + System.err.print("| "); + } + first = false; + String key = (String)(e.nextElement()); + System.err.print(key); + if (key.endsWith("=")) + System.err.print("?"); + System.err.print(" "); + } + System.err.println("]*"); + } + + private static Parser theParser = null; + private static HTMLSchema theSchema = null; + private static String theOutputEncoding = null; + + // Process one source onto an output stream. + + private static void process(String src, OutputStream os) + throws IOException, SAXException { + XMLReader r; + if (hasOption(options, "--reuse")) { + if (theParser == null) theParser = new Parser(); + r = theParser; + } + else { + r = new Parser(); + } + theSchema = new HTMLSchema(); + r.setProperty(Parser.schemaProperty, theSchema); + + if (hasOption(options, "--nocdata")) { + r.setFeature(Parser.CDATAElementsFeature, false); + } + + if (hasOption(options, "--nons") || hasOption(options, "--html")) { + r.setFeature(Parser.namespacesFeature, false); + } + + if (hasOption(options, "--nobogons")) { + r.setFeature(Parser.ignoreBogonsFeature, true); + } + + if (hasOption(options, "--any")) { + r.setFeature(Parser.bogonsEmptyFeature, false); + } + else if (hasOption(options, "--emptybogons")) { + r.setFeature(Parser.bogonsEmptyFeature, true); + } + + if (hasOption(options, "--norootbogons")) { + r.setFeature(Parser.rootBogonsFeature, false); + } + + if (hasOption(options, "--nodefaults")) { + r.setFeature(Parser.defaultAttributesFeature, false); + } + if (hasOption(options, "--nocolons")) { + r.setFeature(Parser.translateColonsFeature, true); + } + + if (hasOption(options, "--norestart")) { + r.setFeature(Parser.restartElementsFeature, false); + } + + if (hasOption(options, "--ignorable")) { + r.setFeature(Parser.ignorableWhitespaceFeature, true); + } + + if (hasOption(options, "--pyxin")) { + r.setProperty(Parser.scannerProperty, new PYXScanner()); + } + + Writer w; + if (theOutputEncoding == null) { + w = new OutputStreamWriter(os); + } + else { + w = new OutputStreamWriter(os, theOutputEncoding); + } + ContentHandler h = chooseContentHandler(w); + r.setContentHandler(h); + if (hasOption(options, "--lexical") && h instanceof LexicalHandler) { + r.setProperty(Parser.lexicalHandlerProperty, h); + } + InputSource s = new InputSource(); + if (src != "") { + s.setSystemId(src); + } + else { + s.setByteStream(System.in); + } + if (hasOption(options, "--encoding=")) { +// System.out.println("%% Found --encoding"); + String encoding = (String)options.get("--encoding="); + if (encoding != null) s.setEncoding(encoding); + } + r.parse(s); + } + + // Pick a content handler to generate the desired format. + + private static ContentHandler chooseContentHandler(Writer w) { + XMLWriter x; + if (hasOption(options, "--pyx")) { + return new PYXWriter(w); + } + + x = new XMLWriter(w); + if (hasOption(options, "--html")) { + x.setOutputProperty(XMLWriter.METHOD, "html"); + x.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes"); + } + if (hasOption(options, "--method=")) { + String method = (String)options.get("--method="); + if (method != null) { + x.setOutputProperty(XMLWriter.METHOD, method); + } + } + if (hasOption(options, "--doctype-public=")) { + String doctype_public = (String)options.get("--doctype-public="); + if (doctype_public != null) { + x.setOutputProperty(XMLWriter.DOCTYPE_PUBLIC, doctype_public); + } + } + if (hasOption(options, "--doctype-system=")) { + String doctype_system = (String)options.get("--doctype-system="); + if (doctype_system != null) { + x.setOutputProperty(XMLWriter.DOCTYPE_SYSTEM, doctype_system); + } + } + if (hasOption(options, "--output-encoding=")) { + theOutputEncoding = (String)options.get("--output-encoding="); +// System.err.println("%%%% Output encoding is " + theOutputEncoding); + if (theOutputEncoding != null) { + x.setOutputProperty(XMLWriter.ENCODING, theOutputEncoding); + } + } + if (hasOption(options, "--omit-xml-declaration")) { + x.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes"); + } + x.setPrefix(theSchema.getURI(), ""); + return x; + } + + // Options processing + + private static int getopts(Hashtable options, String[] argv) { + int optind; + for (optind = 0; optind < argv.length; optind++) { + String arg = argv[optind]; + String value = null; + if (arg.charAt(0) != '-') break; + int eqsign = arg.indexOf('='); + if (eqsign != -1) { + value = arg.substring(eqsign + 1, arg.length()); + arg = arg.substring(0, eqsign + 1); + } + if (options.containsKey(arg)) { + if (value == null) options.put(arg, Boolean.TRUE); + else options.put(arg, value); +// System.out.println("%% Parsed [" + arg + "]=[" + value + "]"); + } + else { + System.err.print("Unknown option "); + System.err.println(arg); + System.exit(1); + } + } + return optind; + } + + // Return true if an option exists. + + private static boolean hasOption(Hashtable options, String option) { + if (Boolean.getBoolean(option)) return true; + else if (options.get(option) != Boolean.FALSE) return true; + return false; + } + + } diff --git a/src/org/ccil/cowan/tagsoup/Element.java b/src/org/ccil/cowan/tagsoup/Element.java new file mode 100644 index 0000000..01a9fa7 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/Element.java @@ -0,0 +1,203 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +package org.ccil.cowan.tagsoup; + +/** +The internal representation of an actual element (not an element type). +An Element has an element type, attributes, and a successor Element +for use in constructing stacks and queues of Elements. +@see ElementType +@see AttributesImpl +*/ +public class Element { + + + private ElementType theType; // type of element + private AttributesImpl theAtts; // attributes of element + private Element theNext; // successor of element + private boolean preclosed; // this element has been preclosed + + /** + Return an Element from a specified ElementType. + @param type The element type of the newly constructed element + @param defaultAttributes True if default attributes are wanted + */ + + public Element(ElementType type, boolean defaultAttributes) { + theType = type; + if (defaultAttributes) theAtts = new AttributesImpl(type.atts()); + else theAtts = new AttributesImpl(); + theNext = null; + preclosed = false; + } + + /** + Return the element type. + @return The element type. + */ + + public ElementType type() { return theType; } + + /** + Return the attributes as an AttributesImpl object. + Returning an AttributesImpl makes the attributes mutable. + @return The attributes + @see AttributesImpl + */ + public AttributesImpl atts() { return theAtts; } + + /** + Return the next element in an element stack or queue. + @return The next element + */ + + public Element next() { return theNext; } + + /** + Change the next element in an element stack or queue. + @param next The new next element + */ + + public void setNext(Element next) { theNext = next; } + + /** + Return the name of the element's type. + Convenience method. + @return The element type name + */ + + public String name() { return theType.name(); } + + /** + Return the namespace name of the element's type. + Convenience method. + @return The element type namespace name + */ + + public String namespace() { return theType.namespace(); } + + /** + Return the local name of the element's type. + Convenience method. + @return The element type local name + */ + + public String localName() { return theType.localName(); } + + /** + Return the content model vector of the element's type. + Convenience method. + @return The content model vector + */ + + public int model() { return theType.model(); } + + /** + Return the member-of vector of the element's type. + Convenience method. + @return The member-of vector + */ + + public int memberOf() { return theType.memberOf(); } + + /** + Return the flags vector of the element's type. + Convenience method. + @return The flags vector + */ + + public int flags() { return theType.flags(); } + + /** + Return the parent element type of the element's type. + Convenience method. + @return The parent element type + */ + + public ElementType parent() { return theType.parent(); } + + /** + Return true if the type of this element can contain the type of + another element. + Convenience method. + @param other The other element + */ + + public boolean canContain(Element other) { + return theType.canContain(other.theType); + } + + + /** + Set an attribute and its value into this element. + @param name The attribute name (Qname) + @param type The attribute type + @param value The attribute value + */ + + public void setAttribute(String name, String type, String value) { + theType.setAttribute(theAtts, name, type, value); + } + + /** + Make this element anonymous. + Remove any id or name attribute present + in the element's attributes. + */ + + public void anonymize() { + for (int i = theAtts.getLength() - 1; i >= 0; i--) { + if (theAtts.getType(i).equals("ID") || + theAtts.getQName(i).equals("name")) { + theAtts.removeAttribute(i); + } + } + } + + /** + Clean the attributes of this element. + Attributes with null name (the name was ill-formed) + or null value (the attribute was present in the element type but + not in this actual element) are removed. + */ + + public void clean() { + for (int i = theAtts.getLength() - 1; i >= 0; i--) { + String name = theAtts.getLocalName(i); + if (theAtts.getValue(i) == null || name == null || + name.length() == 0) { + theAtts.removeAttribute(i); + continue; + } + } + } + + /** + Force this element to preclosed status, meaning that an end-tag has + been seen but the element cannot yet be closed for structural reasons. + */ + + public void preclose() { + preclosed = true; + } + + /** + Return true if this element has been preclosed. + */ + + public boolean isPreclosed() { + return preclosed; + } + + } diff --git a/src/org/ccil/cowan/tagsoup/ElementType.java b/src/org/ccil/cowan/tagsoup/ElementType.java new file mode 100644 index 0000000..46ae883 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/ElementType.java @@ -0,0 +1,276 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +package org.ccil.cowan.tagsoup; + +/** +This class represents an element type in the schema. +An element type has a name, a content model vector, a member-of vector, +a flags vector, default attributes, and a schema to which it belongs. +@see Schema +*/ + +public class ElementType { + + private String theName; // element type name (Qname) + private String theNamespace; // element type namespace name + private String theLocalName; // element type local name + private int theModel; // bitmap: what the element contains + private int theMemberOf; // bitmap: what element is contained in + private int theFlags; // bitmap: element flags + private AttributesImpl theAtts; // default attributes + private ElementType theParent; // parent of this element type + private Schema theSchema; // schema to which this belongs + + /** + Construct an ElementType: + but it's better to use Schema.element() instead. + The content model, member-of, and flags vectors are specified as ints. + @param name The element type name + @param model ORed-together bits representing the content models + allowed in the content of this element type + @param memberOf ORed-together bits representing the content models + to which this element type belongs + @param flags ORed-together bits representing the flags associated + with this element type + @param schema The schema with which this element type will be + associated + */ + + public ElementType(String name, int model, int memberOf, int flags, Schema schema) { + theName = name; + theModel = model; + theMemberOf = memberOf; + theFlags = flags; + theAtts = new AttributesImpl(); + theSchema = schema; + theNamespace = namespace(name, false); + theLocalName = localName(name); + } + + /** + Return a namespace name from a Qname. + The attribute flag tells us whether to return an empty namespace + name if there is no prefix, or use the schema default instead. + @param name The Qname + @param attribute True if name is an attribute name + @return The namespace name + **/ + public String namespace(String name, boolean attribute) { + int colon = name.indexOf(':'); + if (colon == -1) { + return attribute ? "" : theSchema.getURI(); + } + String prefix = name.substring(0, colon); + if (prefix.equals("xml")) { + return "http://www.w3.org/XML/1998/namespace"; + } + else { + return ("urn:x-prefix:" + prefix).intern(); + } + } + + /** + Return a local name from a Qname. + @param name The Qname + @return The local name + **/ + public String localName(String name) { + int colon = name.indexOf(':'); + if (colon == -1) { + return name; + } + else { + return name.substring(colon+1).intern(); + } + } + + /** + Returns the name of this element type. + @return The name of the element type + */ + + public String name() { return theName; } + + /** + Returns the namespace name of this element type. + @return The namespace name of the element type + */ + + public String namespace() { return theNamespace; } + + /** + Returns the local name of this element type. + @return The local name of the element type + */ + + public String localName() { return theLocalName; } + + /** + Returns the content models of this element type. + @return The content models of this element type as a vector of bits + */ + + public int model() { return theModel; } + + /** + Returns the content models to which this element type belongs. + @return The content models to which this element type belongs as a + vector of bits + */ + + public int memberOf() { return theMemberOf; } + + /** + Returns the flags associated with this element type. + @return The flags associated with this element type as a vector of bits + */ + + public int flags() { return theFlags; } + + /** + Returns the default attributes associated with this element type. + Attributes of type CDATA that don't have default values are + typically not included. Other attributes without default values + have an internal value of null. + The return value is an AttributesImpl to allow the caller to mutate + the attributes. + */ + + public AttributesImpl atts() {return theAtts;} + + /** + Returns the parent element type of this element type. + @return The parent element type + */ + + public ElementType parent() {return theParent;} + + /** + Returns the schema which this element type is associated with. + @return The schema + */ + + public Schema schema() {return theSchema;} + + + /** + Returns true if this element type can contain another element type. + That is, if any of the models in this element's model vector + match any of the models in the other element type's member-of + vector. + @param other The other element type + */ + + public boolean canContain(ElementType other) { + return (theModel & other.theMemberOf) != 0; + } + + + /** + Sets an attribute and its value into an AttributesImpl object. + Attempts to set a namespace declaration are ignored. + @param atts The AttributesImpl object + @param name The name (Qname) of the attribute + @param type The type of the attribute + @param value The value of the attribute + */ + + public void setAttribute(AttributesImpl atts, String name, String type, String value) { + if (name.equals("xmlns") || name.startsWith("xmlns:")) { + return; + } +; + String namespace = namespace(name, true); + String localName = localName(name); + int i = atts.getIndex(name); + if (i == -1) { + name = name.intern(); + if (type == null) type = "CDATA"; + if (!type.equals("CDATA")) value = normalize(value); + atts.addAttribute(namespace, localName, name, type, value); + } + else { + if (type == null) type = atts.getType(i); + if (!type.equals("CDATA")) value=normalize(value); + atts.setAttribute(i, namespace, localName, name, type, value); + } + } + + /** + Normalize an attribute value (ID-style). + CDATA-style attribute normalization is already done. + @param value The value to normalize + @return The normalized value + **/ + public static String normalize(String value) { + if (value == null) return value; + value = value.trim(); + if (value.indexOf(" ") == -1) return value; + boolean space = false; + int len = value.length(); + StringBuffer b = new StringBuffer(len); + for (int i = 0; i < len; i++) { + char v = value.charAt(i); + if (v == ' ') { + if (!space) b.append(v); + space = true; + } + else { + b.append(v); + space = false; + } + } + return b.toString(); + } + + /** + Sets an attribute and its value into this element type. + @param name The name of the attribute + @param type The type of the attribute + @param value The value of the attribute + */ + + public void setAttribute(String name, String type, String value) { + setAttribute(theAtts, name, type, value); + } + + /** + Sets the models of this element type. + @param model The content models of this element type as a vector of bits + */ + + public void setModel(int model) { theModel = model; } + + /** + Sets the content models to which this element type belongs. + @param memberOf The content models to which this element type belongs as a vector of bits + */ + + public void setMemberOf(int memberOf) { theMemberOf = memberOf; } + + /** + Sets the flags of this element type. + @param flags associated with this element type The flags as a vector of bits + */ + + public void setFlags(int flags) { theFlags = flags; } + + /** + Sets the parent element type of this element type. + @param parent The parent element type + */ + + public void setParent(ElementType parent) { theParent = parent; } + + } diff --git a/src/org/ccil/cowan/tagsoup/GOOGLE_README.txt b/src/org/ccil/cowan/tagsoup/GOOGLE_README.txt new file mode 100644 index 0000000..7462b7a --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/GOOGLE_README.txt @@ -0,0 +1,8 @@ +This is TagSoup 1.2, downloaded from http://home.ccil.org/~cowan/XML/tagsoup/. + +To get the java files included here: +- download tagsoup +- unzip it +- cd into the tagsoup directory +- run ant +- copy the files in src/java and tmp/src \ No newline at end of file diff --git a/src/org/ccil/cowan/tagsoup/HTMLModels.java b/src/org/ccil/cowan/tagsoup/HTMLModels.java new file mode 100644 index 0000000..a6e413c --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/HTMLModels.java @@ -0,0 +1,53 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// Defines models for HTMLSchema + +/** +This interface contains generated constants representing HTML content +models. Logically, it is part of HTMLSchema, but it is more +convenient to generate the constants into a separate interface. +*/ + +package org.ccil.cowan.tagsoup; +public interface HTMLModels { + + // Start of model definitions + public static final int M_AREA = 1 << 1; + public static final int M_BLOCK = 1 << 2; + public static final int M_BLOCKINLINE = 1 << 3; + public static final int M_BODY = 1 << 4; + public static final int M_CELL = 1 << 5; + public static final int M_COL = 1 << 6; + public static final int M_DEF = 1 << 7; + public static final int M_FORM = 1 << 8; + public static final int M_FRAME = 1 << 9; + public static final int M_HEAD = 1 << 10; + public static final int M_HTML = 1 << 11; + public static final int M_INLINE = 1 << 12; + public static final int M_LEGEND = 1 << 13; + public static final int M_LI = 1 << 14; + public static final int M_NOLINK = 1 << 15; + public static final int M_OPTION = 1 << 16; + public static final int M_OPTIONS = 1 << 17; + public static final int M_P = 1 << 18; + public static final int M_PARAM = 1 << 19; + public static final int M_TABLE = 1 << 20; + public static final int M_TABULAR = 1 << 21; + public static final int M_TR = 1 << 22; + + + // End of model definitions + + } diff --git a/src/org/ccil/cowan/tagsoup/HTMLScanner.java b/src/org/ccil/cowan/tagsoup/HTMLScanner.java new file mode 100644 index 0000000..0d73ff7 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/HTMLScanner.java @@ -0,0 +1,648 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +package org.ccil.cowan.tagsoup; +import java.io.*; +import org.xml.sax.SAXException; +import org.xml.sax.Locator; + +/** +This class implements a table-driven scanner for HTML, allowing for lots of +defects. It implements the Scanner interface, which accepts a Reader +object to fetch characters from and a ScanHandler object to report lexical +events to. +*/ + +public class HTMLScanner implements Scanner, Locator { + + // Start of state table + private static final int S_ANAME = 1; + private static final int S_APOS = 2; + private static final int S_AVAL = 3; + private static final int S_BB = 4; + private static final int S_BBC = 5; + private static final int S_BBCD = 6; + private static final int S_BBCDA = 7; + private static final int S_BBCDAT = 8; + private static final int S_BBCDATA = 9; + private static final int S_CDATA = 10; + private static final int S_CDATA2 = 11; + private static final int S_CDSECT = 12; + private static final int S_CDSECT1 = 13; + private static final int S_CDSECT2 = 14; + private static final int S_COM = 15; + private static final int S_COM2 = 16; + private static final int S_COM3 = 17; + private static final int S_COM4 = 18; + private static final int S_DECL = 19; + private static final int S_DECL2 = 20; + private static final int S_DONE = 21; + private static final int S_EMPTYTAG = 22; + private static final int S_ENT = 23; + private static final int S_EQ = 24; + private static final int S_ETAG = 25; + private static final int S_GI = 26; + private static final int S_NCR = 27; + private static final int S_PCDATA = 28; + private static final int S_PI = 29; + private static final int S_PITARGET = 30; + private static final int S_QUOT = 31; + private static final int S_STAGC = 32; + private static final int S_TAG = 33; + private static final int S_TAGWS = 34; + private static final int S_XNCR = 35; + private static final int A_ADUP = 1; + private static final int A_ADUP_SAVE = 2; + private static final int A_ADUP_STAGC = 3; + private static final int A_ANAME = 4; + private static final int A_ANAME_ADUP = 5; + private static final int A_ANAME_ADUP_STAGC = 6; + private static final int A_AVAL = 7; + private static final int A_AVAL_STAGC = 8; + private static final int A_CDATA = 9; + private static final int A_CMNT = 10; + private static final int A_DECL = 11; + private static final int A_EMPTYTAG = 12; + private static final int A_ENTITY = 13; + private static final int A_ENTITY_START = 14; + private static final int A_ETAG = 15; + private static final int A_GI = 16; + private static final int A_GI_STAGC = 17; + private static final int A_LT = 18; + private static final int A_LT_PCDATA = 19; + private static final int A_MINUS = 20; + private static final int A_MINUS2 = 21; + private static final int A_MINUS3 = 22; + private static final int A_PCDATA = 23; + private static final int A_PI = 24; + private static final int A_PITARGET = 25; + private static final int A_PITARGET_PI = 26; + private static final int A_SAVE = 27; + private static final int A_SKIP = 28; + private static final int A_SP = 29; + private static final int A_STAGC = 30; + private static final int A_UNGET = 31; + private static final int A_UNSAVE_PCDATA = 32; + private static int[] statetable = { + S_ANAME, '/', A_ANAME_ADUP, S_EMPTYTAG, + S_ANAME, '=', A_ANAME, S_AVAL, + S_ANAME, '>', A_ANAME_ADUP_STAGC, S_PCDATA, + S_ANAME, 0, A_SAVE, S_ANAME, + S_ANAME, -1, A_ANAME_ADUP_STAGC, S_DONE, + S_ANAME, ' ', A_ANAME, S_EQ, + S_ANAME, '\n', A_ANAME, S_EQ, + S_ANAME, '\t', A_ANAME, S_EQ, + S_APOS, '\'', A_AVAL, S_TAGWS, + S_APOS, 0, A_SAVE, S_APOS, + S_APOS, -1, A_AVAL_STAGC, S_DONE, + S_APOS, ' ', A_SP, S_APOS, + S_APOS, '\n', A_SP, S_APOS, + S_APOS, '\t', A_SP, S_APOS, + S_AVAL, '\'', A_SKIP, S_APOS, + S_AVAL, '"', A_SKIP, S_QUOT, + S_AVAL, '>', A_AVAL_STAGC, S_PCDATA, + S_AVAL, 0, A_SAVE, S_STAGC, + S_AVAL, -1, A_AVAL_STAGC, S_DONE, + S_AVAL, ' ', A_SKIP, S_AVAL, + S_AVAL, '\n', A_SKIP, S_AVAL, + S_AVAL, '\t', A_SKIP, S_AVAL, + S_BB, 'C', A_SKIP, S_BBC, + S_BB, 0, A_SKIP, S_DECL, + S_BB, -1, A_SKIP, S_DONE, + S_BBC, 'D', A_SKIP, S_BBCD, + S_BBC, 0, A_SKIP, S_DECL, + S_BBC, -1, A_SKIP, S_DONE, + S_BBCD, 'A', A_SKIP, S_BBCDA, + S_BBCD, 0, A_SKIP, S_DECL, + S_BBCD, -1, A_SKIP, S_DONE, + S_BBCDA, 'T', A_SKIP, S_BBCDAT, + S_BBCDA, 0, A_SKIP, S_DECL, + S_BBCDA, -1, A_SKIP, S_DONE, + S_BBCDAT, 'A', A_SKIP, S_BBCDATA, + S_BBCDAT, 0, A_SKIP, S_DECL, + S_BBCDAT, -1, A_SKIP, S_DONE, + S_BBCDATA, '[', A_SKIP, S_CDSECT, + S_BBCDATA, 0, A_SKIP, S_DECL, + S_BBCDATA, -1, A_SKIP, S_DONE, + S_CDATA, '<', A_SAVE, S_CDATA2, + S_CDATA, 0, A_SAVE, S_CDATA, + S_CDATA, -1, A_PCDATA, S_DONE, + S_CDATA2, '/', A_UNSAVE_PCDATA, S_ETAG, + S_CDATA2, 0, A_SAVE, S_CDATA, + S_CDATA2, -1, A_UNSAVE_PCDATA, S_DONE, + S_CDSECT, ']', A_SAVE, S_CDSECT1, + S_CDSECT, 0, A_SAVE, S_CDSECT, + S_CDSECT, -1, A_SKIP, S_DONE, + S_CDSECT1, ']', A_SAVE, S_CDSECT2, + S_CDSECT1, 0, A_SAVE, S_CDSECT, + S_CDSECT1, -1, A_SKIP, S_DONE, + S_CDSECT2, '>', A_CDATA, S_PCDATA, + S_CDSECT2, 0, A_SAVE, S_CDSECT, + S_CDSECT2, -1, A_SKIP, S_DONE, + S_COM, '-', A_SKIP, S_COM2, + S_COM, 0, A_SAVE, S_COM2, + S_COM, -1, A_CMNT, S_DONE, + S_COM2, '-', A_SKIP, S_COM3, + S_COM2, 0, A_SAVE, S_COM2, + S_COM2, -1, A_CMNT, S_DONE, + S_COM3, '-', A_SKIP, S_COM4, + S_COM3, 0, A_MINUS, S_COM2, + S_COM3, -1, A_CMNT, S_DONE, + S_COM4, '-', A_MINUS3, S_COM4, + S_COM4, '>', A_CMNT, S_PCDATA, + S_COM4, 0, A_MINUS2, S_COM2, + S_COM4, -1, A_CMNT, S_DONE, + S_DECL, '-', A_SKIP, S_COM, + S_DECL, '[', A_SKIP, S_BB, + S_DECL, '>', A_SKIP, S_PCDATA, + S_DECL, 0, A_SAVE, S_DECL2, + S_DECL, -1, A_SKIP, S_DONE, + S_DECL2, '>', A_DECL, S_PCDATA, + S_DECL2, 0, A_SAVE, S_DECL2, + S_DECL2, -1, A_SKIP, S_DONE, + S_EMPTYTAG, '>', A_EMPTYTAG, S_PCDATA, + S_EMPTYTAG, 0, A_SAVE, S_ANAME, + S_EMPTYTAG, ' ', A_SKIP, S_TAGWS, + S_EMPTYTAG, '\n', A_SKIP, S_TAGWS, + S_EMPTYTAG, '\t', A_SKIP, S_TAGWS, + S_ENT, 0, A_ENTITY, S_ENT, + S_ENT, -1, A_ENTITY, S_DONE, + S_EQ, '=', A_SKIP, S_AVAL, + S_EQ, '>', A_ADUP_STAGC, S_PCDATA, + S_EQ, 0, A_ADUP_SAVE, S_ANAME, + S_EQ, -1, A_ADUP_STAGC, S_DONE, + S_EQ, ' ', A_SKIP, S_EQ, + S_EQ, '\n', A_SKIP, S_EQ, + S_EQ, '\t', A_SKIP, S_EQ, + S_ETAG, '>', A_ETAG, S_PCDATA, + S_ETAG, 0, A_SAVE, S_ETAG, + S_ETAG, -1, A_ETAG, S_DONE, + S_ETAG, ' ', A_SKIP, S_ETAG, + S_ETAG, '\n', A_SKIP, S_ETAG, + S_ETAG, '\t', A_SKIP, S_ETAG, + S_GI, '/', A_SKIP, S_EMPTYTAG, + S_GI, '>', A_GI_STAGC, S_PCDATA, + S_GI, 0, A_SAVE, S_GI, + S_GI, -1, A_SKIP, S_DONE, + S_GI, ' ', A_GI, S_TAGWS, + S_GI, '\n', A_GI, S_TAGWS, + S_GI, '\t', A_GI, S_TAGWS, + S_NCR, 0, A_ENTITY, S_NCR, + S_NCR, -1, A_ENTITY, S_DONE, + S_PCDATA, '&', A_ENTITY_START, S_ENT, + S_PCDATA, '<', A_PCDATA, S_TAG, + S_PCDATA, 0, A_SAVE, S_PCDATA, + S_PCDATA, -1, A_PCDATA, S_DONE, + S_PI, '>', A_PI, S_PCDATA, + S_PI, 0, A_SAVE, S_PI, + S_PI, -1, A_PI, S_DONE, + S_PITARGET, '>', A_PITARGET_PI, S_PCDATA, + S_PITARGET, 0, A_SAVE, S_PITARGET, + S_PITARGET, -1, A_PITARGET_PI, S_DONE, + S_PITARGET, ' ', A_PITARGET, S_PI, + S_PITARGET, '\n', A_PITARGET, S_PI, + S_PITARGET, '\t', A_PITARGET, S_PI, + S_QUOT, '"', A_AVAL, S_TAGWS, + S_QUOT, 0, A_SAVE, S_QUOT, + S_QUOT, -1, A_AVAL_STAGC, S_DONE, + S_QUOT, ' ', A_SP, S_QUOT, + S_QUOT, '\n', A_SP, S_QUOT, + S_QUOT, '\t', A_SP, S_QUOT, + S_STAGC, '>', A_AVAL_STAGC, S_PCDATA, + S_STAGC, 0, A_SAVE, S_STAGC, + S_STAGC, -1, A_AVAL_STAGC, S_DONE, + S_STAGC, ' ', A_AVAL, S_TAGWS, + S_STAGC, '\n', A_AVAL, S_TAGWS, + S_STAGC, '\t', A_AVAL, S_TAGWS, + S_TAG, '!', A_SKIP, S_DECL, + S_TAG, '?', A_SKIP, S_PITARGET, + S_TAG, '/', A_SKIP, S_ETAG, + S_TAG, '<', A_SAVE, S_TAG, + S_TAG, 0, A_SAVE, S_GI, + S_TAG, -1, A_LT_PCDATA, S_DONE, + S_TAG, ' ', A_LT, S_PCDATA, + S_TAG, '\n', A_LT, S_PCDATA, + S_TAG, '\t', A_LT, S_PCDATA, + S_TAGWS, '/', A_SKIP, S_EMPTYTAG, + S_TAGWS, '>', A_STAGC, S_PCDATA, + S_TAGWS, 0, A_SAVE, S_ANAME, + S_TAGWS, -1, A_STAGC, S_DONE, + S_TAGWS, ' ', A_SKIP, S_TAGWS, + S_TAGWS, '\n', A_SKIP, S_TAGWS, + S_TAGWS, '\t', A_SKIP, S_TAGWS, + S_XNCR, 0, A_ENTITY, S_XNCR, + S_XNCR, -1, A_ENTITY, S_DONE, + + }; + private static final String[] debug_actionnames = { "", "A_ADUP", "A_ADUP_SAVE", "A_ADUP_STAGC", "A_ANAME", "A_ANAME_ADUP", "A_ANAME_ADUP_STAGC", "A_AVAL", "A_AVAL_STAGC", "A_CDATA", "A_CMNT", "A_DECL", "A_EMPTYTAG", "A_ENTITY", "A_ENTITY_START", "A_ETAG", "A_GI", "A_GI_STAGC", "A_LT", "A_LT_PCDATA", "A_MINUS", "A_MINUS2", "A_MINUS3", "A_PCDATA", "A_PI", "A_PITARGET", "A_PITARGET_PI", "A_SAVE", "A_SKIP", "A_SP", "A_STAGC", "A_UNGET", "A_UNSAVE_PCDATA"}; + private static final String[] debug_statenames = { "", "S_ANAME", "S_APOS", "S_AVAL", "S_BB", "S_BBC", "S_BBCD", "S_BBCDA", "S_BBCDAT", "S_BBCDATA", "S_CDATA", "S_CDATA2", "S_CDSECT", "S_CDSECT1", "S_CDSECT2", "S_COM", "S_COM2", "S_COM3", "S_COM4", "S_DECL", "S_DECL2", "S_DONE", "S_EMPTYTAG", "S_ENT", "S_EQ", "S_ETAG", "S_GI", "S_NCR", "S_PCDATA", "S_PI", "S_PITARGET", "S_QUOT", "S_STAGC", "S_TAG", "S_TAGWS", "S_XNCR"}; + + + // End of state table + + private String thePublicid; // Locator state + private String theSystemid; + private int theLastLine; + private int theLastColumn; + private int theCurrentLine; + private int theCurrentColumn; + + int theState; // Current state + int theNextState; // Next state + char[] theOutputBuffer = new char[200]; // Output buffer + int theSize; // Current buffer size + int[] theWinMap = { // Windows chars map + 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178}; + + // Compensate for bug in PushbackReader that allows + // pushing back EOF. + private void unread(PushbackReader r, int c) throws IOException { + if (c != -1) r.unread(c); + } + + // Locator implementation + + public int getLineNumber() { + return theLastLine; + } + public int getColumnNumber() { + return theLastColumn; + } + public String getPublicId() { + return thePublicid; + } + public String getSystemId() { + return theSystemid; + } + + + // Scanner implementation + + /** + Reset document locator, supplying systemid and publicid. + @param systemid System id + @param publicid Public id + */ + + public void resetDocumentLocator(String publicid, String systemid) { + thePublicid = publicid; + theSystemid = systemid; + theLastLine = theLastColumn = theCurrentLine = theCurrentColumn = 0; + } + + /** + Scan HTML source, reporting lexical events. + @param r0 Reader that provides characters + @param h ScanHandler that accepts lexical events. + */ + + public void scan(Reader r0, ScanHandler h) throws IOException, SAXException { + theState = S_PCDATA; + PushbackReader r; + if (r0 instanceof PushbackReader) { + r = (PushbackReader)r0; + } + else if (r0 instanceof BufferedReader) { + r = new PushbackReader(r0); + } + else { + r = new PushbackReader(new BufferedReader(r0)); + } + + int firstChar = r.read(); // Remove any leading BOM + if (firstChar != '\uFEFF') unread(r, firstChar); + + while (theState != S_DONE) { + int ch = r.read(); + + // Process control characters + if (ch >= 0x80 && ch <= 0x9F) ch = theWinMap[ch-0x80]; + + if (ch == '\r') { + ch = r.read(); // expect LF next + if (ch != '\n') { + unread(r, ch); // nope + ch = '\n'; + } + } + + if (ch == '\n') { + theCurrentLine++; + theCurrentColumn = 0; + } + else { + theCurrentColumn++; + } + + if (!(ch >= 0x20 || ch == '\n' || ch == '\t' || ch == -1)) continue; + + // Search state table + int action = 0; + for (int i = 0; i < statetable.length; i += 4) { + if (theState != statetable[i]) { + if (action != 0) break; + continue; + } + if (statetable[i+1] == 0) { + action = statetable[i+2]; + theNextState = statetable[i+3]; + } + else if (statetable[i+1] == ch) { + action = statetable[i+2]; + theNextState = statetable[i+3]; + break; + } + } +// System.err.println("In " + debug_statenames[theState] + " got " + nicechar(ch) + " doing " + debug_actionnames[action] + " then " + debug_statenames[theNextState]); + switch (action) { + case 0: + throw new Error( +"HTMLScanner can't cope with " + Integer.toString(ch) + " in state " + +Integer.toString(theState)); + case A_ADUP: + h.adup(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_ADUP_SAVE: + h.adup(theOutputBuffer, 0, theSize); + theSize = 0; + save(ch, h); + break; + case A_ADUP_STAGC: + h.adup(theOutputBuffer, 0, theSize); + theSize = 0; + h.stagc(theOutputBuffer, 0, theSize); + break; + case A_ANAME: + h.aname(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_ANAME_ADUP: + h.aname(theOutputBuffer, 0, theSize); + theSize = 0; + h.adup(theOutputBuffer, 0, theSize); + break; + case A_ANAME_ADUP_STAGC: + h.aname(theOutputBuffer, 0, theSize); + theSize = 0; + h.adup(theOutputBuffer, 0, theSize); + h.stagc(theOutputBuffer, 0, theSize); + break; + case A_AVAL: + h.aval(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_AVAL_STAGC: + h.aval(theOutputBuffer, 0, theSize); + theSize = 0; + h.stagc(theOutputBuffer, 0, theSize); + break; + case A_CDATA: + mark(); + // suppress the final "]]" in the buffer + if (theSize > 1) theSize -= 2; + h.pcdata(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_ENTITY_START: + h.pcdata(theOutputBuffer, 0, theSize); + theSize = 0; + save(ch, h); + break; + case A_ENTITY: + mark(); + char ch1 = (char)ch; +// System.out.println("Got " + ch1 + " in state " + ((theState == S_ENT) ? "S_ENT" : ((theState == S_NCR) ? "S_NCR" : "UNK"))); + if (theState == S_ENT && ch1 == '#') { + theNextState = S_NCR; + save(ch, h); + break; + } + else if (theState == S_NCR && (ch1 == 'x' || ch1 == 'X')) { + theNextState = S_XNCR; + save(ch, h); + break; + } + else if (theState == S_ENT && Character.isLetterOrDigit(ch1)) { + save(ch, h); + break; + } + else if (theState == S_NCR && Character.isDigit(ch1)) { + save(ch, h); + break; + } + else if (theState == S_XNCR && (Character.isDigit(ch1) || "abcdefABCDEF".indexOf(ch1) != -1)) { + save(ch, h); + break; + } + + // The whole entity reference has been collected +// System.err.println("%%" + new String(theOutputBuffer, 0, theSize)); + h.entity(theOutputBuffer, 1, theSize - 1); + int ent = h.getEntity(); +// System.err.println("%% value = " + ent); + if (ent != 0) { + theSize = 0; + if (ent >= 0x80 && ent <= 0x9F) { + ent = theWinMap[ent-0x80]; + } + if (ent < 0x20) { + // Control becomes space + ent = 0x20; + } + else if (ent >= 0xD800 && ent <= 0xDFFF) { + // Surrogates get dropped + ent = 0; + } + else if (ent <= 0xFFFF) { + // BMP character + save(ent, h); + } + else { + // Astral converted to two surrogates + ent -= 0x10000; + save((ent>>10) + 0xD800, h); + save((ent&0x3FF) + 0xDC00, h); + } + if (ch != ';') { + unread(r, ch); + theCurrentColumn--; + } + } + else { + unread(r, ch); + theCurrentColumn--; + } + theNextState = S_PCDATA; + break; + case A_ETAG: + h.etag(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_DECL: + h.decl(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_GI: + h.gi(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_GI_STAGC: + h.gi(theOutputBuffer, 0, theSize); + theSize = 0; + h.stagc(theOutputBuffer, 0, theSize); + break; + case A_LT: + mark(); + save('<', h); + save(ch, h); + break; + case A_LT_PCDATA: + mark(); + save('<', h); + h.pcdata(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_PCDATA: + mark(); + h.pcdata(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_CMNT: + mark(); + h.cmnt(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_MINUS3: + save('-', h); + save(' ', h); + break; + case A_MINUS2: + save('-', h); + save(' ', h); + // fall through into A_MINUS + case A_MINUS: + save('-', h); + save(ch, h); + break; + case A_PI: + mark(); + h.pi(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_PITARGET: + h.pitarget(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_PITARGET_PI: + h.pitarget(theOutputBuffer, 0, theSize); + theSize = 0; + h.pi(theOutputBuffer, 0, theSize); + break; + case A_SAVE: + save(ch, h); + break; + case A_SKIP: + break; + case A_SP: + save(' ', h); + break; + case A_STAGC: + h.stagc(theOutputBuffer, 0, theSize); + theSize = 0; + break; + case A_EMPTYTAG: + mark(); +// System.err.println("%%% Empty tag seen"); + if (theSize > 0) h.gi(theOutputBuffer, 0, theSize); + theSize = 0; + h.stage(theOutputBuffer, 0, theSize); + break; + case A_UNGET: + unread(r, ch); + theCurrentColumn--; + break; + case A_UNSAVE_PCDATA: + if (theSize > 0) theSize--; + h.pcdata(theOutputBuffer, 0, theSize); + theSize = 0; + break; + default: + throw new Error("Can't process state " + action); + } + theState = theNextState; + } + h.eof(theOutputBuffer, 0, 0); + } + + /** + * Mark the current scan position as a "point of interest" - start of a tag, + * cdata, processing instruction etc. + */ + + private void mark() { + theLastColumn = theCurrentColumn; + theLastLine = theCurrentLine; + } + + /** + A callback for the ScanHandler that allows it to force + the lexer state to CDATA content (no markup is recognized except + the end of element. + */ + + public void startCDATA() { theNextState = S_CDATA; } + + private void save(int ch, ScanHandler h) throws IOException, SAXException { + if (theSize >= theOutputBuffer.length - 20) { + if (theState == S_PCDATA || theState == S_CDATA) { + // Return a buffer-sized chunk of PCDATA + h.pcdata(theOutputBuffer, 0, theSize); + theSize = 0; + } + else { + // Grow the buffer size + char[] newOutputBuffer = new char[theOutputBuffer.length * 2]; + System.arraycopy(theOutputBuffer, 0, newOutputBuffer, 0, theSize+1); + theOutputBuffer = newOutputBuffer; + } + } + theOutputBuffer[theSize++] = (char)ch; + } + + /** + Test procedure. Reads HTML from the standard input and writes + PYX to the standard output. + */ + + public static void main(String[] argv) throws IOException, SAXException { + Scanner s = new HTMLScanner(); + Reader r = new InputStreamReader(System.in, "UTF-8"); + Writer w = new OutputStreamWriter(System.out, "UTF-8"); + PYXWriter pw = new PYXWriter(w); + s.scan(r, pw); + w.close(); + } + + + private static String nicechar(int in) { + if (in == '\n') return "\\n"; + if (in < 32) return "0x"+Integer.toHexString(in); + return "'"+((char)in)+"'"; + } + + } diff --git a/src/org/ccil/cowan/tagsoup/HTMLSchema.java b/src/org/ccil/cowan/tagsoup/HTMLSchema.java new file mode 100644 index 0000000..9b46a68 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/HTMLSchema.java @@ -0,0 +1,2895 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +/** +This class provides a Schema that has been preinitialized with HTML +elements, attributes, and character entity declarations. All the declarations +normally provided with HTML 4.01 are given, plus some that are IE-specific +and NS4-specific. Attribute declarations of type CDATA with no default +value are not included. +*/ + +package org.ccil.cowan.tagsoup; +public class HTMLSchema extends Schema implements HTMLModels { + + /** + Returns a newly constructed HTMLSchema object independent of + any existing ones. + */ + + public HTMLSchema() { + // Start of Schema calls + setURI("http://www.w3.org/1999/xhtml"); + setPrefix("html"); + elementType("", M_EMPTY, M_PCDATA, 0); + elementType("", M_ROOT, M_EMPTY, 0); + elementType("a", M_PCDATA|M_NOLINK, M_INLINE, 0); + elementType("abbr", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("acronym", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("address", M_PCDATA|M_INLINE|M_P, M_BLOCK, 0); + elementType("applet", M_PCDATA|M_PARAM|M_INLINE|M_BLOCK, M_INLINE|M_NOLINK, 0); + elementType("area", M_EMPTY, M_AREA, 0); + elementType("b", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("base", M_EMPTY, M_HEAD, 0); + elementType("basefont", M_EMPTY, M_INLINE|M_NOLINK, 0); + elementType("bdo", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("bgsound", M_EMPTY, M_HEAD, 0); + elementType("big", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("blink", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("blockquote", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0); + elementType("body", M_PCDATA|M_INLINE|M_BLOCK, M_HTML|M_BODY, 0); + elementType("br", M_EMPTY, M_INLINE|M_NOLINK, 0); + elementType("button", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_NOLINK, 0); + elementType("canvas", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("caption", M_PCDATA|M_INLINE, M_TABULAR, 0); + elementType("center", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0); + elementType("cite", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("code", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("col", M_EMPTY, M_COL|M_TABULAR, 0); + elementType("colgroup", M_COL, M_TABULAR, 0); + elementType("comment", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("dd", M_PCDATA|M_INLINE|M_BLOCK, M_DEF, 0); + elementType("del", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_BLOCKINLINE|M_BLOCK, F_RESTART); + elementType("dfn", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("dir", M_LI, M_BLOCK, 0); + elementType("div", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0); + elementType("dl", M_DEF, M_BLOCK, 0); + elementType("dt", M_PCDATA|M_INLINE, M_DEF, 0); + elementType("em", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("fieldset", M_PCDATA|M_LEGEND|M_INLINE|M_BLOCK, M_BLOCK, 0); + elementType("font", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("form", M_PCDATA|M_INLINE|M_NOLINK|M_BLOCK|M_TR|M_CELL, M_BLOCK|M_FORM, F_NOFORCE); + elementType("frame", M_EMPTY, M_FRAME, 0); + elementType("frameset", M_FRAME, M_FRAME|M_HTML, 0); + elementType("h1", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("h2", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("h3", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("h4", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("h5", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("h6", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("head", M_HEAD, M_HTML, 0); + elementType("hr", M_EMPTY, M_BLOCK, 0); + elementType("html", M_HTML, M_ROOT, 0); + elementType("i", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("iframe", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_NOLINK, 0); + elementType("img", M_EMPTY, M_INLINE|M_NOLINK, 0); + elementType("input", M_EMPTY, M_INLINE|M_NOLINK, 0); + elementType("ins", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_BLOCK, F_RESTART); + elementType("isindex", M_EMPTY, M_HEAD, 0); + elementType("kbd", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("label", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("legend", M_PCDATA|M_INLINE, M_LEGEND, 0); + elementType("li", M_PCDATA|M_INLINE|M_BLOCK, M_LI, 0); + elementType("link", M_EMPTY, M_HEAD|M_INLINE, 0); + elementType("listing", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("map", M_BLOCK|M_AREA, M_INLINE, 0); + elementType("marquee", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("menu", M_LI, M_BLOCK, 0); + elementType("meta", M_EMPTY, M_HEAD, 0); + elementType("nobr", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("noframes", M_BODY|M_BLOCK|M_INLINE, M_BLOCK|M_HTML|M_FRAME, 0); + elementType("noscript", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0); + elementType("object", M_PCDATA|M_PARAM|M_INLINE|M_BLOCK, M_HEAD|M_INLINE|M_NOLINK, 0); + elementType("ol", M_LI, M_BLOCK, 0); + elementType("optgroup", M_OPTIONS, M_OPTIONS, 0); + elementType("option", M_PCDATA, M_OPTION|M_OPTIONS, 0); + elementType("p", M_PCDATA|M_INLINE|M_TABLE, M_BLOCK|M_P, 0); + elementType("param", M_EMPTY, M_PARAM, 0); + elementType("pre", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("q", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("rb", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("rbc", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("rp", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("rt", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("rtc", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("ruby", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("s", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("samp", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("script", M_PCDATA, M_ANY & ~M_ROOT, F_CDATA); + elementType("select", M_OPTIONS, M_INLINE, 0); + elementType("small", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("span", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("strike", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("strong", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("style", M_PCDATA, M_HEAD|M_INLINE, F_CDATA); + elementType("sub", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("sup", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("table", M_FORM|M_TABULAR, M_BLOCK|M_TABLE, F_NOFORCE); + elementType("tbody", M_TR, M_TABULAR, 0); + elementType("td", M_PCDATA|M_INLINE|M_BLOCK, M_CELL, 0); + elementType("textarea", M_PCDATA, M_INLINE, 0); + elementType("tfoot", M_TR|M_FORM|M_CELL, M_TABULAR, 0); + elementType("th", M_PCDATA|M_INLINE|M_BLOCK, M_CELL, 0); + elementType("thead", M_TR|M_FORM|M_CELL, M_TABULAR, 0); + elementType("title", M_PCDATA, M_HEAD, 0); + elementType("tr", M_FORM|M_CELL, M_TR|M_TABULAR, 0); + elementType("tt", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("u", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("ul", M_LI, M_BLOCK, 0); + elementType("var", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("wbr", M_EMPTY, M_INLINE|M_NOLINK, 0); + elementType("xmp", M_PCDATA|M_INLINE, M_BLOCK, 0); + parent("", "body"); + parent("html", ""); + parent("a", "body"); + parent("abbr", "body"); + parent("acronym", "body"); + parent("address", "body"); + parent("applet", "body"); + parent("area", "map"); + parent("b", "body"); + parent("base", "head"); + parent("basefont", "body"); + parent("bdo", "body"); + parent("bgsound", "head"); + parent("big", "body"); + parent("blink", "body"); + parent("blockquote", "body"); + parent("body", "html"); + parent("br", "body"); + parent("button", "form"); + parent("canvas", "body"); + parent("caption", "table"); + parent("center", "body"); + parent("cite", "body"); + parent("code", "body"); + parent("col", "table"); + parent("colgroup", "table"); + parent("comment", "body"); + parent("dd", "dl"); + parent("del", "body"); + parent("dfn", "body"); + parent("dir", "body"); + parent("div", "body"); + parent("dl", "body"); + parent("dt", "dl"); + parent("em", "body"); + parent("fieldset", "form"); + parent("font", "body"); + parent("form", "body"); + parent("frame", "frameset"); + parent("frameset", "html"); + parent("h1", "body"); + parent("h2", "body"); + parent("h3", "body"); + parent("h4", "body"); + parent("h5", "body"); + parent("h6", "body"); + parent("head", "html"); + parent("hr", "body"); + parent("i", "body"); + parent("iframe", "body"); + parent("img", "body"); + parent("input", "form"); + parent("ins", "body"); + parent("isindex", "head"); + parent("kbd", "body"); + parent("label", "form"); + parent("legend", "fieldset"); + parent("li", "ul"); + parent("link", "head"); + parent("listing", "body"); + parent("map", "body"); + parent("marquee", "body"); + parent("menu", "body"); + parent("meta", "head"); + parent("nobr", "body"); + parent("noframes", "html"); + parent("noscript", "body"); + parent("object", "body"); + parent("ol", "body"); + parent("optgroup", "select"); + parent("option", "select"); + parent("p", "body"); + parent("param", "object"); + parent("pre", "body"); + parent("q", "body"); + parent("rb", "body"); + parent("rbc", "body"); + parent("rp", "body"); + parent("rt", "body"); + parent("rtc", "body"); + parent("ruby", "body"); + parent("s", "body"); + parent("samp", "body"); + parent("script", "html"); + parent("select", "form"); + parent("small", "body"); + parent("span", "body"); + parent("strike", "body"); + parent("strong", "body"); + parent("style", "head"); + parent("sub", "body"); + parent("sup", "body"); + parent("table", "body"); + parent("tbody", "table"); + parent("td", "tr"); + parent("textarea", "form"); + parent("tfoot", "table"); + parent("th", "tr"); + parent("thead", "table"); + parent("title", "head"); + parent("tr", "tbody"); + parent("tt", "body"); + parent("u", "body"); + parent("ul", "body"); + parent("var", "body"); + parent("wbr", "body"); + parent("xmp", "body"); + attribute("a", "hreflang", "NMTOKEN", null); + attribute("a", "shape", "CDATA", "rect"); + attribute("a", "tabindex", "NMTOKEN", null); + attribute("applet", "align", "NMTOKEN", null); + attribute("area", "nohref", "BOOLEAN", null); + attribute("area", "shape", "CDATA", "rect"); + attribute("area", "tabindex", "NMTOKEN", null); + attribute("br", "clear", "CDATA", "none"); + attribute("button", "disabled", "BOOLEAN", null); + attribute("button", "tabindex", "NMTOKEN", null); + attribute("button", "type", "CDATA", "submit"); + attribute("caption", "align", "NMTOKEN", null); + attribute("col", "align", "NMTOKEN", null); + attribute("col", "span", "CDATA", "1"); + attribute("col", "valign", "NMTOKEN", null); + attribute("colgroup", "align", "NMTOKEN", null); + attribute("colgroup", "span", "CDATA", "1"); + attribute("colgroup", "valign", "NMTOKEN", null); + attribute("dir", "compact", "BOOLEAN", null); + attribute("div", "align", "NMTOKEN", null); + attribute("dl", "compact", "BOOLEAN", null); + attribute("form", "enctype", "CDATA", "application/x-www-form-urlencoded"); + attribute("form", "method", "CDATA", "get"); + attribute("frame", "frameborder", "CDATA", "1"); + attribute("frame", "noresize", "BOOLEAN", null); + attribute("frame", "scrolling", "CDATA", "auto"); + attribute("h1", "align", "NMTOKEN", null); + attribute("h2", "align", "NMTOKEN", null); + attribute("h3", "align", "NMTOKEN", null); + attribute("h4", "align", "NMTOKEN", null); + attribute("h5", "align", "NMTOKEN", null); + attribute("h6", "align", "NMTOKEN", null); + attribute("hr", "align", "NMTOKEN", null); + attribute("hr", "noshade", "BOOLEAN", null); + attribute("iframe", "align", "NMTOKEN", null); + attribute("iframe", "frameborder", "CDATA", "1"); + attribute("iframe", "scrolling", "CDATA", "auto"); + attribute("img", "align", "NMTOKEN", null); + attribute("img", "ismap", "BOOLEAN", null); + attribute("input", "align", "NMTOKEN", null); + attribute("input", "checked", "BOOLEAN", null); + attribute("input", "disabled", "BOOLEAN", null); + attribute("input", "ismap", "BOOLEAN", null); + attribute("input", "maxlength", "NMTOKEN", null); + attribute("input", "readonly", "BOOLEAN", null); + attribute("input", "tabindex", "NMTOKEN", null); + attribute("input", "type", "CDATA", "text"); + attribute("label", "for", "IDREF", null); + attribute("legend", "align", "NMTOKEN", null); + attribute("li", "value", "NMTOKEN", null); + attribute("link", "hreflang", "NMTOKEN", null); + attribute("marquee", "width", "NMTOKEN", null); + attribute("menu", "compact", "BOOLEAN", null); + attribute("meta", "http-equiv", "NMTOKEN", null); + attribute("meta", "name", "NMTOKEN", null); + attribute("object", "align", "NMTOKEN", null); + attribute("object", "declare", "BOOLEAN", null); + attribute("object", "tabindex", "NMTOKEN", null); + attribute("ol", "compact", "BOOLEAN", null); + attribute("ol", "start", "NMTOKEN", null); + attribute("optgroup", "disabled", "BOOLEAN", null); + attribute("option", "disabled", "BOOLEAN", null); + attribute("option", "selected", "BOOLEAN", null); + attribute("p", "align", "NMTOKEN", null); + attribute("param", "valuetype", "CDATA", "data"); + attribute("pre", "width", "NMTOKEN", null); + attribute("rt", "rbspan", "CDATA", "1"); + attribute("script", "defer", "BOOLEAN", null); + attribute("select", "disabled", "BOOLEAN", null); + attribute("select", "multiple", "BOOLEAN", null); + attribute("select", "size", "NMTOKEN", null); + attribute("select", "tabindex", "NMTOKEN", null); + attribute("table", "align", "NMTOKEN", null); + attribute("table", "frame", "NMTOKEN", null); + attribute("table", "rules", "NMTOKEN", null); + attribute("tbody", "align", "NMTOKEN", null); + attribute("tbody", "valign", "NMTOKEN", null); + attribute("td", "align", "NMTOKEN", null); + attribute("td", "colspan", "CDATA", "1"); + attribute("td", "headers", "IDREFS", null); + attribute("td", "nowrap", "BOOLEAN", null); + attribute("td", "rowspan", "CDATA", "1"); + attribute("td", "scope", "NMTOKEN", null); + attribute("td", "valign", "NMTOKEN", null); + attribute("textarea", "cols", "NMTOKEN", null); + attribute("textarea", "disabled", "BOOLEAN", null); + attribute("textarea", "readonly", "BOOLEAN", null); + attribute("textarea", "rows", "NMTOKEN", null); + attribute("textarea", "tabindex", "NMTOKEN", null); + attribute("tfoot", "align", "NMTOKEN", null); + attribute("tfoot", "valign", "NMTOKEN", null); + attribute("th", "align", "NMTOKEN", null); + attribute("th", "colspan", "CDATA", "1"); + attribute("th", "headers", "IDREFS", null); + attribute("th", "nowrap", "BOOLEAN", null); + attribute("th", "rowspan", "CDATA", "1"); + attribute("th", "scope", "NMTOKEN", null); + attribute("th", "valign", "NMTOKEN", null); + attribute("thead", "align", "NMTOKEN", null); + attribute("thead", "valign", "NMTOKEN", null); + attribute("tr", "align", "NMTOKEN", null); + attribute("tr", "valign", "NMTOKEN", null); + attribute("ul", "compact", "BOOLEAN", null); + attribute("ul", "type", "NMTOKEN", null); + attribute("xmp", "width", "NMTOKEN", null); + attribute("a", "class", "NMTOKEN", null); + attribute("abbr", "class", "NMTOKEN", null); + attribute("acronym", "class", "NMTOKEN", null); + attribute("address", "class", "NMTOKEN", null); + attribute("applet", "class", "NMTOKEN", null); + attribute("area", "class", "NMTOKEN", null); + attribute("b", "class", "NMTOKEN", null); + attribute("base", "class", "NMTOKEN", null); + attribute("basefont", "class", "NMTOKEN", null); + attribute("bdo", "class", "NMTOKEN", null); + attribute("bgsound", "class", "NMTOKEN", null); + attribute("big", "class", "NMTOKEN", null); + attribute("blink", "class", "NMTOKEN", null); + attribute("blockquote", "class", "NMTOKEN", null); + attribute("body", "class", "NMTOKEN", null); + attribute("br", "class", "NMTOKEN", null); + attribute("button", "class", "NMTOKEN", null); + attribute("canvas", "class", "NMTOKEN", null); + attribute("caption", "class", "NMTOKEN", null); + attribute("center", "class", "NMTOKEN", null); + attribute("cite", "class", "NMTOKEN", null); + attribute("code", "class", "NMTOKEN", null); + attribute("col", "class", "NMTOKEN", null); + attribute("colgroup", "class", "NMTOKEN", null); + attribute("comment", "class", "NMTOKEN", null); + attribute("dd", "class", "NMTOKEN", null); + attribute("del", "class", "NMTOKEN", null); + attribute("dfn", "class", "NMTOKEN", null); + attribute("dir", "class", "NMTOKEN", null); + attribute("div", "class", "NMTOKEN", null); + attribute("dl", "class", "NMTOKEN", null); + attribute("dt", "class", "NMTOKEN", null); + attribute("em", "class", "NMTOKEN", null); + attribute("fieldset", "class", "NMTOKEN", null); + attribute("font", "class", "NMTOKEN", null); + attribute("form", "class", "NMTOKEN", null); + attribute("frame", "class", "NMTOKEN", null); + attribute("frameset", "class", "NMTOKEN", null); + attribute("h1", "class", "NMTOKEN", null); + attribute("h2", "class", "NMTOKEN", null); + attribute("h3", "class", "NMTOKEN", null); + attribute("h4", "class", "NMTOKEN", null); + attribute("h5", "class", "NMTOKEN", null); + attribute("h6", "class", "NMTOKEN", null); + attribute("head", "class", "NMTOKEN", null); + attribute("hr", "class", "NMTOKEN", null); + attribute("html", "class", "NMTOKEN", null); + attribute("i", "class", "NMTOKEN", null); + attribute("iframe", "class", "NMTOKEN", null); + attribute("img", "class", "NMTOKEN", null); + attribute("input", "class", "NMTOKEN", null); + attribute("ins", "class", "NMTOKEN", null); + attribute("isindex", "class", "NMTOKEN", null); + attribute("kbd", "class", "NMTOKEN", null); + attribute("label", "class", "NMTOKEN", null); + attribute("legend", "class", "NMTOKEN", null); + attribute("li", "class", "NMTOKEN", null); + attribute("link", "class", "NMTOKEN", null); + attribute("listing", "class", "NMTOKEN", null); + attribute("map", "class", "NMTOKEN", null); + attribute("marquee", "class", "NMTOKEN", null); + attribute("menu", "class", "NMTOKEN", null); + attribute("meta", "class", "NMTOKEN", null); + attribute("nobr", "class", "NMTOKEN", null); + attribute("noframes", "class", "NMTOKEN", null); + attribute("noscript", "class", "NMTOKEN", null); + attribute("object", "class", "NMTOKEN", null); + attribute("ol", "class", "NMTOKEN", null); + attribute("optgroup", "class", "NMTOKEN", null); + attribute("option", "class", "NMTOKEN", null); + attribute("p", "class", "NMTOKEN", null); + attribute("param", "class", "NMTOKEN", null); + attribute("pre", "class", "NMTOKEN", null); + attribute("q", "class", "NMTOKEN", null); + attribute("rb", "class", "NMTOKEN", null); + attribute("rbc", "class", "NMTOKEN", null); + attribute("rp", "class", "NMTOKEN", null); + attribute("rt", "class", "NMTOKEN", null); + attribute("rtc", "class", "NMTOKEN", null); + attribute("ruby", "class", "NMTOKEN", null); + attribute("s", "class", "NMTOKEN", null); + attribute("samp", "class", "NMTOKEN", null); + attribute("script", "class", "NMTOKEN", null); + attribute("select", "class", "NMTOKEN", null); + attribute("small", "class", "NMTOKEN", null); + attribute("span", "class", "NMTOKEN", null); + attribute("strike", "class", "NMTOKEN", null); + attribute("strong", "class", "NMTOKEN", null); + attribute("style", "class", "NMTOKEN", null); + attribute("sub", "class", "NMTOKEN", null); + attribute("sup", "class", "NMTOKEN", null); + attribute("table", "class", "NMTOKEN", null); + attribute("tbody", "class", "NMTOKEN", null); + attribute("td", "class", "NMTOKEN", null); + attribute("textarea", "class", "NMTOKEN", null); + attribute("tfoot", "class", "NMTOKEN", null); + attribute("th", "class", "NMTOKEN", null); + attribute("thead", "class", "NMTOKEN", null); + attribute("title", "class", "NMTOKEN", null); + attribute("tr", "class", "NMTOKEN", null); + attribute("tt", "class", "NMTOKEN", null); + attribute("u", "class", "NMTOKEN", null); + attribute("ul", "class", "NMTOKEN", null); + attribute("var", "class", "NMTOKEN", null); + attribute("wbr", "class", "NMTOKEN", null); + attribute("xmp", "class", "NMTOKEN", null); + attribute("a", "dir", "NMTOKEN", null); + attribute("abbr", "dir", "NMTOKEN", null); + attribute("acronym", "dir", "NMTOKEN", null); + attribute("address", "dir", "NMTOKEN", null); + attribute("applet", "dir", "NMTOKEN", null); + attribute("area", "dir", "NMTOKEN", null); + attribute("b", "dir", "NMTOKEN", null); + attribute("base", "dir", "NMTOKEN", null); + attribute("basefont", "dir", "NMTOKEN", null); + attribute("bdo", "dir", "NMTOKEN", null); + attribute("bgsound", "dir", "NMTOKEN", null); + attribute("big", "dir", "NMTOKEN", null); + attribute("blink", "dir", "NMTOKEN", null); + attribute("blockquote", "dir", "NMTOKEN", null); + attribute("body", "dir", "NMTOKEN", null); + attribute("br", "dir", "NMTOKEN", null); + attribute("button", "dir", "NMTOKEN", null); + attribute("canvas", "dir", "NMTOKEN", null); + attribute("caption", "dir", "NMTOKEN", null); + attribute("center", "dir", "NMTOKEN", null); + attribute("cite", "dir", "NMTOKEN", null); + attribute("code", "dir", "NMTOKEN", null); + attribute("col", "dir", "NMTOKEN", null); + attribute("colgroup", "dir", "NMTOKEN", null); + attribute("comment", "dir", "NMTOKEN", null); + attribute("dd", "dir", "NMTOKEN", null); + attribute("del", "dir", "NMTOKEN", null); + attribute("dfn", "dir", "NMTOKEN", null); + attribute("dir", "dir", "NMTOKEN", null); + attribute("div", "dir", "NMTOKEN", null); + attribute("dl", "dir", "NMTOKEN", null); + attribute("dt", "dir", "NMTOKEN", null); + attribute("em", "dir", "NMTOKEN", null); + attribute("fieldset", "dir", "NMTOKEN", null); + attribute("font", "dir", "NMTOKEN", null); + attribute("form", "dir", "NMTOKEN", null); + attribute("frame", "dir", "NMTOKEN", null); + attribute("frameset", "dir", "NMTOKEN", null); + attribute("h1", "dir", "NMTOKEN", null); + attribute("h2", "dir", "NMTOKEN", null); + attribute("h3", "dir", "NMTOKEN", null); + attribute("h4", "dir", "NMTOKEN", null); + attribute("h5", "dir", "NMTOKEN", null); + attribute("h6", "dir", "NMTOKEN", null); + attribute("head", "dir", "NMTOKEN", null); + attribute("hr", "dir", "NMTOKEN", null); + attribute("html", "dir", "NMTOKEN", null); + attribute("i", "dir", "NMTOKEN", null); + attribute("iframe", "dir", "NMTOKEN", null); + attribute("img", "dir", "NMTOKEN", null); + attribute("input", "dir", "NMTOKEN", null); + attribute("ins", "dir", "NMTOKEN", null); + attribute("isindex", "dir", "NMTOKEN", null); + attribute("kbd", "dir", "NMTOKEN", null); + attribute("label", "dir", "NMTOKEN", null); + attribute("legend", "dir", "NMTOKEN", null); + attribute("li", "dir", "NMTOKEN", null); + attribute("link", "dir", "NMTOKEN", null); + attribute("listing", "dir", "NMTOKEN", null); + attribute("map", "dir", "NMTOKEN", null); + attribute("marquee", "dir", "NMTOKEN", null); + attribute("menu", "dir", "NMTOKEN", null); + attribute("meta", "dir", "NMTOKEN", null); + attribute("nobr", "dir", "NMTOKEN", null); + attribute("noframes", "dir", "NMTOKEN", null); + attribute("noscript", "dir", "NMTOKEN", null); + attribute("object", "dir", "NMTOKEN", null); + attribute("ol", "dir", "NMTOKEN", null); + attribute("optgroup", "dir", "NMTOKEN", null); + attribute("option", "dir", "NMTOKEN", null); + attribute("p", "dir", "NMTOKEN", null); + attribute("param", "dir", "NMTOKEN", null); + attribute("pre", "dir", "NMTOKEN", null); + attribute("q", "dir", "NMTOKEN", null); + attribute("rb", "dir", "NMTOKEN", null); + attribute("rbc", "dir", "NMTOKEN", null); + attribute("rp", "dir", "NMTOKEN", null); + attribute("rt", "dir", "NMTOKEN", null); + attribute("rtc", "dir", "NMTOKEN", null); + attribute("ruby", "dir", "NMTOKEN", null); + attribute("s", "dir", "NMTOKEN", null); + attribute("samp", "dir", "NMTOKEN", null); + attribute("script", "dir", "NMTOKEN", null); + attribute("select", "dir", "NMTOKEN", null); + attribute("small", "dir", "NMTOKEN", null); + attribute("span", "dir", "NMTOKEN", null); + attribute("strike", "dir", "NMTOKEN", null); + attribute("strong", "dir", "NMTOKEN", null); + attribute("style", "dir", "NMTOKEN", null); + attribute("sub", "dir", "NMTOKEN", null); + attribute("sup", "dir", "NMTOKEN", null); + attribute("table", "dir", "NMTOKEN", null); + attribute("tbody", "dir", "NMTOKEN", null); + attribute("td", "dir", "NMTOKEN", null); + attribute("textarea", "dir", "NMTOKEN", null); + attribute("tfoot", "dir", "NMTOKEN", null); + attribute("th", "dir", "NMTOKEN", null); + attribute("thead", "dir", "NMTOKEN", null); + attribute("title", "dir", "NMTOKEN", null); + attribute("tr", "dir", "NMTOKEN", null); + attribute("tt", "dir", "NMTOKEN", null); + attribute("u", "dir", "NMTOKEN", null); + attribute("ul", "dir", "NMTOKEN", null); + attribute("var", "dir", "NMTOKEN", null); + attribute("wbr", "dir", "NMTOKEN", null); + attribute("xmp", "dir", "NMTOKEN", null); + attribute("a", "id", "ID", null); + attribute("abbr", "id", "ID", null); + attribute("acronym", "id", "ID", null); + attribute("address", "id", "ID", null); + attribute("applet", "id", "ID", null); + attribute("area", "id", "ID", null); + attribute("b", "id", "ID", null); + attribute("base", "id", "ID", null); + attribute("basefont", "id", "ID", null); + attribute("bdo", "id", "ID", null); + attribute("bgsound", "id", "ID", null); + attribute("big", "id", "ID", null); + attribute("blink", "id", "ID", null); + attribute("blockquote", "id", "ID", null); + attribute("body", "id", "ID", null); + attribute("br", "id", "ID", null); + attribute("button", "id", "ID", null); + attribute("canvas", "id", "ID", null); + attribute("caption", "id", "ID", null); + attribute("center", "id", "ID", null); + attribute("cite", "id", "ID", null); + attribute("code", "id", "ID", null); + attribute("col", "id", "ID", null); + attribute("colgroup", "id", "ID", null); + attribute("comment", "id", "ID", null); + attribute("dd", "id", "ID", null); + attribute("del", "id", "ID", null); + attribute("dfn", "id", "ID", null); + attribute("dir", "id", "ID", null); + attribute("div", "id", "ID", null); + attribute("dl", "id", "ID", null); + attribute("dt", "id", "ID", null); + attribute("em", "id", "ID", null); + attribute("fieldset", "id", "ID", null); + attribute("font", "id", "ID", null); + attribute("form", "id", "ID", null); + attribute("frame", "id", "ID", null); + attribute("frameset", "id", "ID", null); + attribute("h1", "id", "ID", null); + attribute("h2", "id", "ID", null); + attribute("h3", "id", "ID", null); + attribute("h4", "id", "ID", null); + attribute("h5", "id", "ID", null); + attribute("h6", "id", "ID", null); + attribute("head", "id", "ID", null); + attribute("hr", "id", "ID", null); + attribute("html", "id", "ID", null); + attribute("i", "id", "ID", null); + attribute("iframe", "id", "ID", null); + attribute("img", "id", "ID", null); + attribute("input", "id", "ID", null); + attribute("ins", "id", "ID", null); + attribute("isindex", "id", "ID", null); + attribute("kbd", "id", "ID", null); + attribute("label", "id", "ID", null); + attribute("legend", "id", "ID", null); + attribute("li", "id", "ID", null); + attribute("link", "id", "ID", null); + attribute("listing", "id", "ID", null); + attribute("map", "id", "ID", null); + attribute("marquee", "id", "ID", null); + attribute("menu", "id", "ID", null); + attribute("meta", "id", "ID", null); + attribute("nobr", "id", "ID", null); + attribute("noframes", "id", "ID", null); + attribute("noscript", "id", "ID", null); + attribute("object", "id", "ID", null); + attribute("ol", "id", "ID", null); + attribute("optgroup", "id", "ID", null); + attribute("option", "id", "ID", null); + attribute("p", "id", "ID", null); + attribute("param", "id", "ID", null); + attribute("pre", "id", "ID", null); + attribute("q", "id", "ID", null); + attribute("rb", "id", "ID", null); + attribute("rbc", "id", "ID", null); + attribute("rp", "id", "ID", null); + attribute("rt", "id", "ID", null); + attribute("rtc", "id", "ID", null); + attribute("ruby", "id", "ID", null); + attribute("s", "id", "ID", null); + attribute("samp", "id", "ID", null); + attribute("script", "id", "ID", null); + attribute("select", "id", "ID", null); + attribute("small", "id", "ID", null); + attribute("span", "id", "ID", null); + attribute("strike", "id", "ID", null); + attribute("strong", "id", "ID", null); + attribute("style", "id", "ID", null); + attribute("sub", "id", "ID", null); + attribute("sup", "id", "ID", null); + attribute("table", "id", "ID", null); + attribute("tbody", "id", "ID", null); + attribute("td", "id", "ID", null); + attribute("textarea", "id", "ID", null); + attribute("tfoot", "id", "ID", null); + attribute("th", "id", "ID", null); + attribute("thead", "id", "ID", null); + attribute("title", "id", "ID", null); + attribute("tr", "id", "ID", null); + attribute("tt", "id", "ID", null); + attribute("u", "id", "ID", null); + attribute("ul", "id", "ID", null); + attribute("var", "id", "ID", null); + attribute("wbr", "id", "ID", null); + attribute("xmp", "id", "ID", null); + attribute("a", "lang", "NMTOKEN", null); + attribute("abbr", "lang", "NMTOKEN", null); + attribute("acronym", "lang", "NMTOKEN", null); + attribute("address", "lang", "NMTOKEN", null); + attribute("applet", "lang", "NMTOKEN", null); + attribute("area", "lang", "NMTOKEN", null); + attribute("b", "lang", "NMTOKEN", null); + attribute("base", "lang", "NMTOKEN", null); + attribute("basefont", "lang", "NMTOKEN", null); + attribute("bdo", "lang", "NMTOKEN", null); + attribute("bgsound", "lang", "NMTOKEN", null); + attribute("big", "lang", "NMTOKEN", null); + attribute("blink", "lang", "NMTOKEN", null); + attribute("blockquote", "lang", "NMTOKEN", null); + attribute("body", "lang", "NMTOKEN", null); + attribute("br", "lang", "NMTOKEN", null); + attribute("button", "lang", "NMTOKEN", null); + attribute("canvas", "lang", "NMTOKEN", null); + attribute("caption", "lang", "NMTOKEN", null); + attribute("center", "lang", "NMTOKEN", null); + attribute("cite", "lang", "NMTOKEN", null); + attribute("code", "lang", "NMTOKEN", null); + attribute("col", "lang", "NMTOKEN", null); + attribute("colgroup", "lang", "NMTOKEN", null); + attribute("comment", "lang", "NMTOKEN", null); + attribute("dd", "lang", "NMTOKEN", null); + attribute("del", "lang", "NMTOKEN", null); + attribute("dfn", "lang", "NMTOKEN", null); + attribute("dir", "lang", "NMTOKEN", null); + attribute("div", "lang", "NMTOKEN", null); + attribute("dl", "lang", "NMTOKEN", null); + attribute("dt", "lang", "NMTOKEN", null); + attribute("em", "lang", "NMTOKEN", null); + attribute("fieldset", "lang", "NMTOKEN", null); + attribute("font", "lang", "NMTOKEN", null); + attribute("form", "lang", "NMTOKEN", null); + attribute("frame", "lang", "NMTOKEN", null); + attribute("frameset", "lang", "NMTOKEN", null); + attribute("h1", "lang", "NMTOKEN", null); + attribute("h2", "lang", "NMTOKEN", null); + attribute("h3", "lang", "NMTOKEN", null); + attribute("h4", "lang", "NMTOKEN", null); + attribute("h5", "lang", "NMTOKEN", null); + attribute("h6", "lang", "NMTOKEN", null); + attribute("head", "lang", "NMTOKEN", null); + attribute("hr", "lang", "NMTOKEN", null); + attribute("html", "lang", "NMTOKEN", null); + attribute("i", "lang", "NMTOKEN", null); + attribute("iframe", "lang", "NMTOKEN", null); + attribute("img", "lang", "NMTOKEN", null); + attribute("input", "lang", "NMTOKEN", null); + attribute("ins", "lang", "NMTOKEN", null); + attribute("isindex", "lang", "NMTOKEN", null); + attribute("kbd", "lang", "NMTOKEN", null); + attribute("label", "lang", "NMTOKEN", null); + attribute("legend", "lang", "NMTOKEN", null); + attribute("li", "lang", "NMTOKEN", null); + attribute("link", "lang", "NMTOKEN", null); + attribute("listing", "lang", "NMTOKEN", null); + attribute("map", "lang", "NMTOKEN", null); + attribute("marquee", "lang", "NMTOKEN", null); + attribute("menu", "lang", "NMTOKEN", null); + attribute("meta", "lang", "NMTOKEN", null); + attribute("nobr", "lang", "NMTOKEN", null); + attribute("noframes", "lang", "NMTOKEN", null); + attribute("noscript", "lang", "NMTOKEN", null); + attribute("object", "lang", "NMTOKEN", null); + attribute("ol", "lang", "NMTOKEN", null); + attribute("optgroup", "lang", "NMTOKEN", null); + attribute("option", "lang", "NMTOKEN", null); + attribute("p", "lang", "NMTOKEN", null); + attribute("param", "lang", "NMTOKEN", null); + attribute("pre", "lang", "NMTOKEN", null); + attribute("q", "lang", "NMTOKEN", null); + attribute("rb", "lang", "NMTOKEN", null); + attribute("rbc", "lang", "NMTOKEN", null); + attribute("rp", "lang", "NMTOKEN", null); + attribute("rt", "lang", "NMTOKEN", null); + attribute("rtc", "lang", "NMTOKEN", null); + attribute("ruby", "lang", "NMTOKEN", null); + attribute("s", "lang", "NMTOKEN", null); + attribute("samp", "lang", "NMTOKEN", null); + attribute("script", "lang", "NMTOKEN", null); + attribute("select", "lang", "NMTOKEN", null); + attribute("small", "lang", "NMTOKEN", null); + attribute("span", "lang", "NMTOKEN", null); + attribute("strike", "lang", "NMTOKEN", null); + attribute("strong", "lang", "NMTOKEN", null); + attribute("style", "lang", "NMTOKEN", null); + attribute("sub", "lang", "NMTOKEN", null); + attribute("sup", "lang", "NMTOKEN", null); + attribute("table", "lang", "NMTOKEN", null); + attribute("tbody", "lang", "NMTOKEN", null); + attribute("td", "lang", "NMTOKEN", null); + attribute("textarea", "lang", "NMTOKEN", null); + attribute("tfoot", "lang", "NMTOKEN", null); + attribute("th", "lang", "NMTOKEN", null); + attribute("thead", "lang", "NMTOKEN", null); + attribute("title", "lang", "NMTOKEN", null); + attribute("tr", "lang", "NMTOKEN", null); + attribute("tt", "lang", "NMTOKEN", null); + attribute("u", "lang", "NMTOKEN", null); + attribute("ul", "lang", "NMTOKEN", null); + attribute("var", "lang", "NMTOKEN", null); + attribute("wbr", "lang", "NMTOKEN", null); + attribute("xmp", "lang", "NMTOKEN", null); + entity("aacgr", 0x03AC); + entity("Aacgr", 0x0386); + entity("aacute", 0x00E1); + entity("Aacute", 0x00C1); + entity("abreve", 0x0103); + entity("Abreve", 0x0102); + entity("ac", 0x223E); + entity("acd", 0x223F); + entity("acirc", 0x00E2); + entity("Acirc", 0x00C2); + entity("acute", 0x00B4); + entity("acy", 0x0430); + entity("Acy", 0x0410); + entity("aelig", 0x00E6); + entity("AElig", 0x00C6); + entity("af", 0x2061); + entity("afr", 0x1D51E); + entity("Afr", 0x1D504); + entity("agr", 0x03B1); + entity("Agr", 0x0391); + entity("agrave", 0x00E0); + entity("Agrave", 0x00C0); + entity("alefsym", 0x2135); + entity("aleph", 0x2135); + entity("alpha", 0x03B1); + entity("Alpha", 0x0391); + entity("amacr", 0x0101); + entity("Amacr", 0x0100); + entity("amalg", 0x2A3F); + entity("amp", 0x0026); + entity("and", 0x2227); + entity("And", 0x2A53); + entity("andand", 0x2A55); + entity("andd", 0x2A5C); + entity("andslope", 0x2A58); + entity("andv", 0x2A5A); + entity("ang", 0x2220); + entity("ange", 0x29A4); + entity("angle", 0x2220); + entity("angmsd", 0x2221); + entity("angmsdaa", 0x29A8); + entity("angmsdab", 0x29A9); + entity("angmsdac", 0x29AA); + entity("angmsdad", 0x29AB); + entity("angmsdae", 0x29AC); + entity("angmsdaf", 0x29AD); + entity("angmsdag", 0x29AE); + entity("angmsdah", 0x29AF); + entity("angrt", 0x221F); + entity("angrtvb", 0x22BE); + entity("angrtvbd", 0x299D); + entity("angsph", 0x2222); + entity("angst", 0x212B); + entity("angzarr", 0x237C); + entity("aogon", 0x0105); + entity("Aogon", 0x0104); + entity("aopf", 0x1D552); + entity("Aopf", 0x1D538); + entity("ap", 0x2248); + entity("apacir", 0x2A6F); + entity("ape", 0x224A); + entity("apE", 0x2A70); + entity("apid", 0x224B); + entity("apos", 0x0027); + entity("ApplyFunction", 0x2061); + entity("approx", 0x2248); + entity("approxeq", 0x224A); + entity("aring", 0x00E5); + entity("Aring", 0x00C5); + entity("ascr", 0x1D4B6); + entity("Ascr", 0x1D49C); + entity("Assign", 0x2254); + entity("ast", 0x002A); + entity("asymp", 0x2248); + entity("asympeq", 0x224D); + entity("atilde", 0x00E3); + entity("Atilde", 0x00C3); + entity("auml", 0x00E4); + entity("Auml", 0x00C4); + entity("awconint", 0x2233); + entity("awint", 0x2A11); + entity("b.alpha", 0x1D6C2); + entity("b.beta", 0x1D6C3); + entity("b.chi", 0x1D6D8); + entity("b.delta", 0x1D6C5); + entity("b.Delta", 0x1D6AB); + entity("b.epsi", 0x1D6C6); + entity("b.epsiv", 0x1D6DC); + entity("b.eta", 0x1D6C8); + entity("b.gamma", 0x1D6C4); + entity("b.Gamma", 0x1D6AA); + entity("b.gammad", 0x1D7CB); + entity("b.Gammad", 0x1D7CA); + entity("b.iota", 0x1D6CA); + entity("b.kappa", 0x1D6CB); + entity("b.kappav", 0x1D6DE); + entity("b.lambda", 0x1D6CC); + entity("b.Lambda", 0x1D6B2); + entity("b.mu", 0x1D6CD); + entity("b.nu", 0x1D6CE); + entity("b.omega", 0x1D6DA); + entity("b.Omega", 0x1D6C0); + entity("b.phi", 0x1D6D7); + entity("b.Phi", 0x1D6BD); + entity("b.phiv", 0x1D6DF); + entity("b.pi", 0x1D6D1); + entity("b.Pi", 0x1D6B7); + entity("b.piv", 0x1D6E1); + entity("b.psi", 0x1D6D9); + entity("b.Psi", 0x1D6BF); + entity("b.rho", 0x1D6D2); + entity("b.rhov", 0x1D6E0); + entity("b.sigma", 0x1D6D4); + entity("b.Sigma", 0x1D6BA); + entity("b.sigmav", 0x1D6D3); + entity("b.tau", 0x1D6D5); + entity("b.Theta", 0x1D6AF); + entity("b.thetas", 0x1D6C9); + entity("b.thetav", 0x1D6DD); + entity("b.upsi", 0x1D6D6); + entity("b.Upsi", 0x1D6BC); + entity("b.xi", 0x1D6CF); + entity("b.Xi", 0x1D6B5); + entity("b.zeta", 0x1D6C7); + entity("backcong", 0x224C); + entity("backepsilon", 0x03F6); + entity("backprime", 0x2035); + entity("backsim", 0x223D); + entity("backsimeq", 0x22CD); + entity("Backslash", 0x2216); + entity("Barv", 0x2AE7); + entity("barvee", 0x22BD); + entity("barwed", 0x2305); + entity("Barwed", 0x2306); + entity("barwedge", 0x2305); + entity("bbrk", 0x23B5); + entity("bbrktbrk", 0x23B6); + entity("bcong", 0x224C); + entity("bcy", 0x0431); + entity("Bcy", 0x0411); + entity("bdquo", 0x201E); + entity("becaus", 0x2235); + entity("because", 0x2235); + entity("bemptyv", 0x29B0); + entity("bepsi", 0x03F6); + entity("bernou", 0x212C); + entity("Bernoullis", 0x212C); + entity("beta", 0x03B2); + entity("Beta", 0x0392); + entity("beth", 0x2136); + entity("between", 0x226C); + entity("bfr", 0x1D51F); + entity("Bfr", 0x1D505); + entity("bgr", 0x03B2); + entity("Bgr", 0x0392); + entity("bigcap", 0x22C2); + entity("bigcirc", 0x25EF); + entity("bigcup", 0x22C3); + entity("bigodot", 0x2A00); + entity("bigoplus", 0x2A01); + entity("bigotimes", 0x2A02); + entity("bigsqcup", 0x2A06); + entity("bigstar", 0x2605); + entity("bigtriangledown", 0x25BD); + entity("bigtriangleup", 0x25B3); + entity("biguplus", 0x2A04); + entity("bigvee", 0x22C1); + entity("bigwedge", 0x22C0); + entity("bkarow", 0x290D); + entity("blacklozenge", 0x29EB); + entity("blacksquare", 0x25AA); + entity("blacktriangle", 0x25B4); + entity("blacktriangledown", 0x25BE); + entity("blacktriangleleft", 0x25C2); + entity("blacktriangleright", 0x25B8); + entity("blank", 0x2423); + entity("blk12", 0x2592); + entity("blk14", 0x2591); + entity("blk34", 0x2593); + entity("block", 0x2588); + entity("bnot", 0x2310); + entity("bNot", 0x2AED); + entity("bopf", 0x1D553); + entity("Bopf", 0x1D539); + entity("bot", 0x22A5); + entity("bottom", 0x22A5); + entity("bowtie", 0x22C8); + entity("boxbox", 0x29C9); + entity("boxdl", 0x2510); + entity("boxdL", 0x2555); + entity("boxDl", 0x2556); + entity("boxDL", 0x2557); + entity("boxdr", 0x250C); + entity("boxdR", 0x2552); + entity("boxDr", 0x2553); + entity("boxDR", 0x2554); + entity("boxh", 0x2500); + entity("boxH", 0x2550); + entity("boxhd", 0x252C); + entity("boxhD", 0x2565); + entity("boxHd", 0x2564); + entity("boxHD", 0x2566); + entity("boxhu", 0x2534); + entity("boxhU", 0x2568); + entity("boxHu", 0x2567); + entity("boxHU", 0x2569); + entity("boxminus", 0x229F); + entity("boxplus", 0x229E); + entity("boxtimes", 0x22A0); + entity("boxul", 0x2518); + entity("boxuL", 0x255B); + entity("boxUl", 0x255C); + entity("boxUL", 0x255D); + entity("boxur", 0x2514); + entity("boxuR", 0x2558); + entity("boxUr", 0x2559); + entity("boxUR", 0x255A); + entity("boxv", 0x2502); + entity("boxV", 0x2551); + entity("boxvh", 0x253C); + entity("boxvH", 0x256A); + entity("boxVh", 0x256B); + entity("boxVH", 0x256C); + entity("boxvl", 0x2524); + entity("boxvL", 0x2561); + entity("boxVl", 0x2562); + entity("boxVL", 0x2563); + entity("boxvr", 0x251C); + entity("boxvR", 0x255E); + entity("boxVr", 0x255F); + entity("boxVR", 0x2560); + entity("bprime", 0x2035); + entity("breve", 0x02D8); + entity("brvbar", 0x00A6); + entity("bscr", 0x1D4B7); + entity("Bscr", 0x212C); + entity("bsemi", 0x204F); + entity("bsim", 0x223D); + entity("bsime", 0x22CD); + entity("bsol", 0x005C); + entity("bsolb", 0x29C5); + entity("bull", 0x2022); + entity("bullet", 0x2022); + entity("bump", 0x224E); + entity("bumpe", 0x224F); + entity("bumpE", 0x2AAE); + entity("bumpeq", 0x224F); + entity("Bumpeq", 0x224E); + entity("cacute", 0x0107); + entity("Cacute", 0x0106); + entity("cap", 0x2229); + entity("Cap", 0x22D2); + entity("capand", 0x2A44); + entity("capbrcup", 0x2A49); + entity("capcap", 0x2A4B); + entity("capcup", 0x2A47); + entity("capdot", 0x2A40); + entity("CapitalDifferentialD", 0x2145); + entity("caret", 0x2041); + entity("caron", 0x02C7); + entity("Cayleys", 0x212D); + entity("ccaps", 0x2A4D); + entity("ccaron", 0x010D); + entity("Ccaron", 0x010C); + entity("ccedil", 0x00E7); + entity("Ccedil", 0x00C7); + entity("ccirc", 0x0109); + entity("Ccirc", 0x0108); + entity("Cconint", 0x2230); + entity("ccups", 0x2A4C); + entity("ccupssm", 0x2A50); + entity("cdot", 0x010B); + entity("Cdot", 0x010A); + entity("cedil", 0x00B8); + entity("Cedilla", 0x00B8); + entity("cemptyv", 0x29B2); + entity("cent", 0x00A2); + entity("centerdot", 0x00B7); + entity("cfr", 0x1D520); + entity("Cfr", 0x212D); + entity("chcy", 0x0447); + entity("CHcy", 0x0427); + entity("check", 0x2713); + entity("checkmark", 0x2713); + entity("chi", 0x03C7); + entity("Chi", 0x03A7); + entity("cir", 0x25CB); + entity("circ", 0x02C6); + entity("circeq", 0x2257); + entity("circlearrowleft", 0x21BA); + entity("circlearrowright", 0x21BB); + entity("circledast", 0x229B); + entity("circledcirc", 0x229A); + entity("circleddash", 0x229D); + entity("CircleDot", 0x2299); + entity("circledR", 0x00AE); + entity("circledS", 0x24C8); + entity("CircleMinus", 0x2296); + entity("CirclePlus", 0x2295); + entity("CircleTimes", 0x2297); + entity("cire", 0x2257); + entity("cirE", 0x29C3); + entity("cirfnint", 0x2A10); + entity("cirmid", 0x2AEF); + entity("cirscir", 0x29C2); + entity("ClockwiseContourIntegral", 0x2232); + entity("CloseCurlyDoubleQuote", 0x201D); + entity("CloseCurlyQuote", 0x2019); + entity("clubs", 0x2663); + entity("clubsuit", 0x2663); + entity("colon", 0x003A); + entity("Colon", 0x2237); + entity("colone", 0x2254); + entity("Colone", 0x2A74); + entity("coloneq", 0x2254); + entity("comma", 0x002C); + entity("commat", 0x0040); + entity("comp", 0x2201); + entity("compfn", 0x2218); + entity("complement", 0x2201); + entity("complexes", 0x2102); + entity("cong", 0x2245); + entity("congdot", 0x2A6D); + entity("Congruent", 0x2261); + entity("conint", 0x222E); + entity("Conint", 0x222F); + entity("ContourIntegral", 0x222E); + entity("copf", 0x1D554); + entity("Copf", 0x2102); + entity("coprod", 0x2210); + entity("Coproduct", 0x2210); + entity("copy", 0x00A9); + entity("copysr", 0x2117); + entity("CounterClockwiseContourIntegral", 0x2233); + entity("crarr", 0x21B5); + entity("cross", 0x2717); + entity("Cross", 0x2A2F); + entity("cscr", 0x1D4B8); + entity("Cscr", 0x1D49E); + entity("csub", 0x2ACF); + entity("csube", 0x2AD1); + entity("csup", 0x2AD0); + entity("csupe", 0x2AD2); + entity("ctdot", 0x22EF); + entity("cudarrl", 0x2938); + entity("cudarrr", 0x2935); + entity("cuepr", 0x22DE); + entity("cuesc", 0x22DF); + entity("cularr", 0x21B6); + entity("cularrp", 0x293D); + entity("cup", 0x222A); + entity("Cup", 0x22D3); + entity("cupbrcap", 0x2A48); + entity("cupcap", 0x2A46); + entity("CupCap", 0x224D); + entity("cupcup", 0x2A4A); + entity("cupdot", 0x228D); + entity("cupor", 0x2A45); + entity("curarr", 0x21B7); + entity("curarrm", 0x293C); + entity("curlyeqprec", 0x22DE); + entity("curlyeqsucc", 0x22DF); + entity("curlyvee", 0x22CE); + entity("curlywedge", 0x22CF); + entity("curren", 0x00A4); + entity("curvearrowleft", 0x21B6); + entity("curvearrowright", 0x21B7); + entity("cuvee", 0x22CE); + entity("cuwed", 0x22CF); + entity("cwconint", 0x2232); + entity("cwint", 0x2231); + entity("cylcty", 0x232D); + entity("dagger", 0x2020); + entity("Dagger", 0x2021); + entity("daleth", 0x2138); + entity("darr", 0x2193); + entity("dArr", 0x21D3); + entity("Darr", 0x21A1); + entity("dash", 0x2010); + entity("dashv", 0x22A3); + entity("Dashv", 0x2AE4); + entity("dbkarow", 0x290F); + entity("dblac", 0x02DD); + entity("dcaron", 0x010F); + entity("Dcaron", 0x010E); + entity("dcy", 0x0434); + entity("Dcy", 0x0414); + entity("dd", 0x2146); + entity("DD", 0x2145); + entity("ddagger", 0x2021); + entity("ddarr", 0x21CA); + entity("DDotrahd", 0x2911); + entity("ddotseq", 0x2A77); + entity("deg", 0x00B0); + entity("Del", 0x2207); + entity("delta", 0x03B4); + entity("Delta", 0x0394); + entity("demptyv", 0x29B1); + entity("dfisht", 0x297F); + entity("dfr", 0x1D521); + entity("Dfr", 0x1D507); + entity("dgr", 0x03B4); + entity("Dgr", 0x0394); + entity("dHar", 0x2965); + entity("dharl", 0x21C3); + entity("dharr", 0x21C2); + entity("DiacriticalAcute", 0x00B4); + entity("DiacriticalDot", 0x02D9); + entity("DiacriticalDoubleAcute", 0x02DD); + entity("DiacriticalGrave", 0x0060); + entity("DiacriticalTilde", 0x02DC); + entity("diam", 0x22C4); + entity("diamond", 0x22C4); + entity("diamondsuit", 0x2666); + entity("diams", 0x2666); + entity("die", 0x00A8); + entity("DifferentialD", 0x2146); + entity("digamma", 0x03DD); + entity("disin", 0x22F2); + entity("div", 0x00F7); + entity("divide", 0x00F7); + entity("divideontimes", 0x22C7); + entity("divonx", 0x22C7); + entity("djcy", 0x0452); + entity("DJcy", 0x0402); + entity("dlcorn", 0x231E); + entity("dlcrop", 0x230D); + entity("dollar", 0x0024); + entity("dopf", 0x1D555); + entity("Dopf", 0x1D53B); + entity("dot", 0x02D9); + entity("Dot", 0x00A8); + entity("doteq", 0x2250); + entity("doteqdot", 0x2251); + entity("DotEqual", 0x2250); + entity("dotminus", 0x2238); + entity("dotplus", 0x2214); + entity("dotsquare", 0x22A1); + entity("doublebarwedge", 0x2306); + entity("DoubleContourIntegral", 0x222F); + entity("DoubleDot", 0x00A8); + entity("DoubleDownArrow", 0x21D3); + entity("DoubleLeftArrow", 0x21D0); + entity("DoubleLeftRightArrow", 0x21D4); + entity("DoubleLeftTee", 0x2AE4); + entity("DoubleLongLeftArrow", 0x27F8); + entity("DoubleLongLeftRightArrow", 0x27FA); + entity("DoubleLongRightArrow", 0x27F9); + entity("DoubleRightArrow", 0x21D2); + entity("DoubleRightTee", 0x22A8); + entity("DoubleUpArrow", 0x21D1); + entity("DoubleUpDownArrow", 0x21D5); + entity("DoubleVerticalBar", 0x2225); + entity("downarrow", 0x2193); + entity("Downarrow", 0x21D3); + entity("DownArrowBar", 0x2913); + entity("DownArrowUpArrow", 0x21F5); + entity("downdownarrows", 0x21CA); + entity("downharpoonleft", 0x21C3); + entity("downharpoonright", 0x21C2); + entity("DownLeftRightVector", 0x2950); + entity("DownLeftTeeVector", 0x295E); + entity("DownLeftVector", 0x21BD); + entity("DownLeftVectorBar", 0x2956); + entity("DownRightTeeVector", 0x295F); + entity("DownRightVector", 0x21C1); + entity("DownRightVectorBar", 0x2957); + entity("DownTee", 0x22A4); + entity("DownTeeArrow", 0x21A7); + entity("drbkarow", 0x2910); + entity("drcorn", 0x231F); + entity("drcrop", 0x230C); + entity("dscr", 0x1D4B9); + entity("Dscr", 0x1D49F); + entity("dscy", 0x0455); + entity("DScy", 0x0405); + entity("dsol", 0x29F6); + entity("dstrok", 0x0111); + entity("Dstrok", 0x0110); + entity("dtdot", 0x22F1); + entity("dtri", 0x25BF); + entity("dtrif", 0x25BE); + entity("duarr", 0x21F5); + entity("duhar", 0x296F); + entity("dwangle", 0x29A6); + entity("dzcy", 0x045F); + entity("DZcy", 0x040F); + entity("dzigrarr", 0x27FF); + entity("eacgr", 0x03AD); + entity("Eacgr", 0x0388); + entity("eacute", 0x00E9); + entity("Eacute", 0x00C9); + entity("easter", 0x2A6E); + entity("ecaron", 0x011B); + entity("Ecaron", 0x011A); + entity("ecir", 0x2256); + entity("ecirc", 0x00EA); + entity("Ecirc", 0x00CA); + entity("ecolon", 0x2255); + entity("ecy", 0x044D); + entity("Ecy", 0x042D); + entity("eDDot", 0x2A77); + entity("edot", 0x0117); + entity("eDot", 0x2251); + entity("Edot", 0x0116); + entity("ee", 0x2147); + entity("eeacgr", 0x03AE); + entity("EEacgr", 0x0389); + entity("eegr", 0x03B7); + entity("EEgr", 0x0397); + entity("efDot", 0x2252); + entity("efr", 0x1D522); + entity("Efr", 0x1D508); + entity("eg", 0x2A9A); + entity("egr", 0x03B5); + entity("Egr", 0x0395); + entity("egrave", 0x00E8); + entity("Egrave", 0x00C8); + entity("egs", 0x2A96); + entity("egsdot", 0x2A98); + entity("el", 0x2A99); + entity("Element", 0x2208); + entity("elinters", 0x23E7); + entity("ell", 0x2113); + entity("els", 0x2A95); + entity("elsdot", 0x2A97); + entity("emacr", 0x0113); + entity("Emacr", 0x0112); + entity("empty", 0x2205); + entity("emptyset", 0x2205); + entity("EmptySmallSquare", 0x25FB); + entity("emptyv", 0x2205); + entity("EmptyVerySmallSquare", 0x25AB); + entity("emsp", 0x2003); + entity("emsp13", 0x2004); + entity("emsp14", 0x2005); + entity("eng", 0x014B); + entity("ENG", 0x014A); + entity("ensp", 0x2002); + entity("eogon", 0x0119); + entity("Eogon", 0x0118); + entity("eopf", 0x1D556); + entity("Eopf", 0x1D53C); + entity("epar", 0x22D5); + entity("eparsl", 0x29E3); + entity("eplus", 0x2A71); + entity("epsi", 0x03F5); + entity("epsilon", 0x03B5); + entity("Epsilon", 0x0395); + entity("epsiv", 0x03B5); + entity("eqcirc", 0x2256); + entity("eqcolon", 0x2255); + entity("eqsim", 0x2242); + entity("eqslantgtr", 0x2A96); + entity("eqslantless", 0x2A95); + entity("Equal", 0x2A75); + entity("equals", 0x003D); + entity("EqualTilde", 0x2242); + entity("equest", 0x225F); + entity("Equilibrium", 0x21CC); + entity("equiv", 0x2261); + entity("equivDD", 0x2A78); + entity("eqvparsl", 0x29E5); + entity("erarr", 0x2971); + entity("erDot", 0x2253); + entity("escr", 0x212F); + entity("Escr", 0x2130); + entity("esdot", 0x2250); + entity("esim", 0x2242); + entity("Esim", 0x2A73); + entity("eta", 0x03B7); + entity("Eta", 0x0397); + entity("eth", 0x00F0); + entity("ETH", 0x00D0); + entity("euml", 0x00EB); + entity("Euml", 0x00CB); + entity("euro", 0x20AC); + entity("excl", 0x0021); + entity("exist", 0x2203); + entity("Exists", 0x2203); + entity("expectation", 0x2130); + entity("exponentiale", 0x2147); + entity("fallingdotseq", 0x2252); + entity("fcy", 0x0444); + entity("Fcy", 0x0424); + entity("female", 0x2640); + entity("ffilig", 0xFB03); + entity("fflig", 0xFB00); + entity("ffllig", 0xFB04); + entity("ffr", 0x1D523); + entity("Ffr", 0x1D509); + entity("filig", 0xFB01); + entity("FilledSmallSquare", 0x25FC); + entity("FilledVerySmallSquare", 0x25AA); + entity("flat", 0x266D); + entity("fllig", 0xFB02); + entity("fltns", 0x25B1); + entity("fnof", 0x0192); + entity("fopf", 0x1D557); + entity("Fopf", 0x1D53D); + entity("forall", 0x2200); + entity("fork", 0x22D4); + entity("forkv", 0x2AD9); + entity("Fouriertrf", 0x2131); + entity("fpartint", 0x2A0D); + entity("frac12", 0x00BD); + entity("frac13", 0x2153); + entity("frac14", 0x00BC); + entity("frac15", 0x2155); + entity("frac16", 0x2159); + entity("frac18", 0x215B); + entity("frac23", 0x2154); + entity("frac25", 0x2156); + entity("frac34", 0x00BE); + entity("frac35", 0x2157); + entity("frac38", 0x215C); + entity("frac45", 0x2158); + entity("frac56", 0x215A); + entity("frac58", 0x215D); + entity("frac78", 0x215E); + entity("frasl", 0x2044); + entity("frown", 0x2322); + entity("fscr", 0x1D4BB); + entity("Fscr", 0x2131); + entity("gacute", 0x01F5); + entity("gamma", 0x03B3); + entity("Gamma", 0x0393); + entity("gammad", 0x03DD); + entity("Gammad", 0x03DC); + entity("gap", 0x2A86); + entity("gbreve", 0x011F); + entity("Gbreve", 0x011E); + entity("Gcedil", 0x0122); + entity("gcirc", 0x011D); + entity("Gcirc", 0x011C); + entity("gcy", 0x0433); + entity("Gcy", 0x0413); + entity("gdot", 0x0121); + entity("Gdot", 0x0120); + entity("ge", 0x2265); + entity("gE", 0x2267); + entity("gel", 0x22DB); + entity("gEl", 0x2A8C); + entity("geq", 0x2265); + entity("geqq", 0x2267); + entity("geqslant", 0x2A7E); + entity("ges", 0x2A7E); + entity("gescc", 0x2AA9); + entity("gesdot", 0x2A80); + entity("gesdoto", 0x2A82); + entity("gesdotol", 0x2A84); + entity("gesles", 0x2A94); + entity("gfr", 0x1D524); + entity("Gfr", 0x1D50A); + entity("gg", 0x226B); + entity("Gg", 0x22D9); + entity("ggg", 0x22D9); + entity("ggr", 0x03B3); + entity("Ggr", 0x0393); + entity("gimel", 0x2137); + entity("gjcy", 0x0453); + entity("GJcy", 0x0403); + entity("gl", 0x2277); + entity("gla", 0x2AA5); + entity("glE", 0x2A92); + entity("glj", 0x2AA4); + entity("gnap", 0x2A8A); + entity("gnapprox", 0x2A8A); + entity("gne", 0x2A88); + entity("gnE", 0x2269); + entity("gneq", 0x2A88); + entity("gneqq", 0x2269); + entity("gnsim", 0x22E7); + entity("gopf", 0x1D558); + entity("Gopf", 0x1D53E); + entity("grave", 0x0060); + entity("GreaterEqual", 0x2265); + entity("GreaterEqualLess", 0x22DB); + entity("GreaterFullEqual", 0x2267); + entity("GreaterGreater", 0x2AA2); + entity("GreaterLess", 0x2277); + entity("GreaterSlantEqual", 0x2A7E); + entity("GreaterTilde", 0x2273); + entity("gscr", 0x210A); + entity("Gscr", 0x1D4A2); + entity("gsim", 0x2273); + entity("gsime", 0x2A8E); + entity("gsiml", 0x2A90); + entity("gt", 0x003E); + entity("Gt", 0x226B); + entity("gtcc", 0x2AA7); + entity("gtcir", 0x2A7A); + entity("gtdot", 0x22D7); + entity("gtlPar", 0x2995); + entity("gtquest", 0x2A7C); + entity("gtrapprox", 0x2A86); + entity("gtrarr", 0x2978); + entity("gtrdot", 0x22D7); + entity("gtreqless", 0x22DB); + entity("gtreqqless", 0x2A8C); + entity("gtrless", 0x2277); + entity("gtrsim", 0x2273); + entity("Hacek", 0x02C7); + entity("hairsp", 0x200A); + entity("half", 0x00BD); + entity("hamilt", 0x210B); + entity("hardcy", 0x044A); + entity("HARDcy", 0x042A); + entity("harr", 0x2194); + entity("hArr", 0x21D4); + entity("harrcir", 0x2948); + entity("harrw", 0x21AD); + entity("Hat", 0x005E); + entity("hbar", 0x210F); + entity("hcirc", 0x0125); + entity("Hcirc", 0x0124); + entity("hearts", 0x2665); + entity("heartsuit", 0x2665); + entity("hellip", 0x2026); + entity("hercon", 0x22B9); + entity("hfr", 0x1D525); + entity("Hfr", 0x210C); + entity("HilbertSpace", 0x210B); + entity("hksearow", 0x2925); + entity("hkswarow", 0x2926); + entity("hoarr", 0x21FF); + entity("homtht", 0x223B); + entity("hookleftarrow", 0x21A9); + entity("hookrightarrow", 0x21AA); + entity("hopf", 0x1D559); + entity("Hopf", 0x210D); + entity("horbar", 0x2015); + entity("HorizontalLine", 0x2500); + entity("hscr", 0x1D4BD); + entity("Hscr", 0x210B); + entity("hslash", 0x210F); + entity("hstrok", 0x0127); + entity("Hstrok", 0x0126); + entity("HumpDownHump", 0x224E); + entity("HumpEqual", 0x224F); + entity("hybull", 0x2043); + entity("hyphen", 0x2010); + entity("iacgr", 0x03AF); + entity("Iacgr", 0x038A); + entity("iacute", 0x00ED); + entity("Iacute", 0x00CD); + entity("ic", 0x2063); + entity("icirc", 0x00EE); + entity("Icirc", 0x00CE); + entity("icy", 0x0438); + entity("Icy", 0x0418); + entity("idiagr", 0x0390); + entity("idigr", 0x03CA); + entity("Idigr", 0x03AA); + entity("Idot", 0x0130); + entity("iecy", 0x0435); + entity("IEcy", 0x0415); + entity("iexcl", 0x00A1); + entity("iff", 0x21D4); + entity("ifr", 0x1D526); + entity("Ifr", 0x2111); + entity("igr", 0x03B9); + entity("Igr", 0x0399); + entity("igrave", 0x00EC); + entity("Igrave", 0x00CC); + entity("ii", 0x2148); + entity("iiiint", 0x2A0C); + entity("iiint", 0x222D); + entity("iinfin", 0x29DC); + entity("iiota", 0x2129); + entity("ijlig", 0x0133); + entity("IJlig", 0x0132); + entity("Im", 0x2111); + entity("imacr", 0x012B); + entity("Imacr", 0x012A); + entity("image", 0x2111); + entity("ImaginaryI", 0x2148); + entity("imagline", 0x2110); + entity("imagpart", 0x2111); + entity("imath", 0x0131); + entity("imof", 0x22B7); + entity("imped", 0x01B5); + entity("Implies", 0x21D2); + entity("in", 0x2208); + entity("incare", 0x2105); + entity("infin", 0x221E); + entity("infintie", 0x29DD); + entity("inodot", 0x0131); + entity("int", 0x222B); + entity("Int", 0x222C); + entity("intcal", 0x22BA); + entity("integers", 0x2124); + entity("Integral", 0x222B); + entity("intercal", 0x22BA); + entity("Intersection", 0x22C2); + entity("intlarhk", 0x2A17); + entity("intprod", 0x2A3C); + entity("InvisibleComma", 0x2063); + entity("InvisibleTimes", 0x2062); + entity("iocy", 0x0451); + entity("IOcy", 0x0401); + entity("iogon", 0x012F); + entity("Iogon", 0x012E); + entity("iopf", 0x1D55A); + entity("Iopf", 0x1D540); + entity("iota", 0x03B9); + entity("Iota", 0x0399); + entity("iprod", 0x2A3C); + entity("iquest", 0x00BF); + entity("iscr", 0x1D4BE); + entity("Iscr", 0x2110); + entity("isin", 0x2208); + entity("isindot", 0x22F5); + entity("isinE", 0x22F9); + entity("isins", 0x22F4); + entity("isinsv", 0x22F3); + entity("isinv", 0x2208); + entity("it", 0x2062); + entity("itilde", 0x0129); + entity("Itilde", 0x0128); + entity("iukcy", 0x0456); + entity("Iukcy", 0x0406); + entity("iuml", 0x00EF); + entity("Iuml", 0x00CF); + entity("jcirc", 0x0135); + entity("Jcirc", 0x0134); + entity("jcy", 0x0439); + entity("Jcy", 0x0419); + entity("jfr", 0x1D527); + entity("Jfr", 0x1D50D); + entity("jmath", 0x0237); + entity("jopf", 0x1D55B); + entity("Jopf", 0x1D541); + entity("jscr", 0x1D4BF); + entity("Jscr", 0x1D4A5); + entity("jsercy", 0x0458); + entity("Jsercy", 0x0408); + entity("jukcy", 0x0454); + entity("Jukcy", 0x0404); + entity("kappa", 0x03BA); + entity("Kappa", 0x039A); + entity("kappav", 0x03F0); + entity("kcedil", 0x0137); + entity("Kcedil", 0x0136); + entity("kcy", 0x043A); + entity("Kcy", 0x041A); + entity("kfr", 0x1D528); + entity("Kfr", 0x1D50E); + entity("kgr", 0x03BA); + entity("Kgr", 0x039A); + entity("kgreen", 0x0138); + entity("khcy", 0x0445); + entity("KHcy", 0x0425); + entity("khgr", 0x03C7); + entity("KHgr", 0x03A7); + entity("kjcy", 0x045C); + entity("KJcy", 0x040C); + entity("kopf", 0x1D55C); + entity("Kopf", 0x1D542); + entity("kscr", 0x1D4C0); + entity("Kscr", 0x1D4A6); + entity("lAarr", 0x21DA); + entity("lacute", 0x013A); + entity("Lacute", 0x0139); + entity("laemptyv", 0x29B4); + entity("lagran", 0x2112); + entity("lambda", 0x03BB); + entity("Lambda", 0x039B); + entity("lang", 0x2329); + entity("Lang", 0x27EA); + entity("langd", 0x2991); + entity("langle", 0x2329); + entity("lap", 0x2A85); + entity("Laplacetrf", 0x2112); + entity("laquo", 0x00AB); + entity("larr", 0x2190); + entity("lArr", 0x21D0); + entity("Larr", 0x219E); + entity("larrb", 0x21E4); + entity("larrbfs", 0x291F); + entity("larrfs", 0x291D); + entity("larrhk", 0x21A9); + entity("larrlp", 0x21AB); + entity("larrpl", 0x2939); + entity("larrsim", 0x2973); + entity("larrtl", 0x21A2); + entity("lat", 0x2AAB); + entity("latail", 0x2919); + entity("lAtail", 0x291B); + entity("late", 0x2AAD); + entity("lbarr", 0x290C); + entity("lBarr", 0x290E); + entity("lbbrk", 0x2997); + entity("lbrace", 0x007B); + entity("lbrack", 0x005B); + entity("lbrke", 0x298B); + entity("lbrksld", 0x298F); + entity("lbrkslu", 0x298D); + entity("lcaron", 0x013E); + entity("Lcaron", 0x013D); + entity("lcedil", 0x013C); + entity("Lcedil", 0x013B); + entity("lceil", 0x2308); + entity("lcub", 0x007B); + entity("lcy", 0x043B); + entity("Lcy", 0x041B); + entity("ldca", 0x2936); + entity("ldquo", 0x201C); + entity("ldquor", 0x201E); + entity("ldrdhar", 0x2967); + entity("ldrushar", 0x294B); + entity("ldsh", 0x21B2); + entity("le", 0x2264); + entity("lE", 0x2266); + entity("LeftAngleBracket", 0x2329); + entity("leftarrow", 0x2190); + entity("Leftarrow", 0x21D0); + entity("LeftArrowBar", 0x21E4); + entity("LeftArrowRightArrow", 0x21C6); + entity("leftarrowtail", 0x21A2); + entity("LeftCeiling", 0x2308); + entity("LeftDoubleBracket", 0x27E6); + entity("LeftDownTeeVector", 0x2961); + entity("LeftDownVector", 0x21C3); + entity("LeftDownVectorBar", 0x2959); + entity("LeftFloor", 0x230A); + entity("leftharpoondown", 0x21BD); + entity("leftharpoonup", 0x21BC); + entity("leftleftarrows", 0x21C7); + entity("leftrightarrow", 0x2194); + entity("Leftrightarrow", 0x21D4); + entity("leftrightarrows", 0x21C6); + entity("leftrightharpoons", 0x21CB); + entity("leftrightsquigarrow", 0x21AD); + entity("LeftRightVector", 0x294E); + entity("LeftTee", 0x22A3); + entity("LeftTeeArrow", 0x21A4); + entity("LeftTeeVector", 0x295A); + entity("leftthreetimes", 0x22CB); + entity("LeftTriangle", 0x22B2); + entity("LeftTriangleBar", 0x29CF); + entity("LeftTriangleEqual", 0x22B4); + entity("LeftUpDownVector", 0x2951); + entity("LeftUpTeeVector", 0x2960); + entity("LeftUpVector", 0x21BF); + entity("LeftUpVectorBar", 0x2958); + entity("LeftVector", 0x21BC); + entity("LeftVectorBar", 0x2952); + entity("leg", 0x22DA); + entity("lEg", 0x2A8B); + entity("leq", 0x2264); + entity("leqq", 0x2266); + entity("leqslant", 0x2A7D); + entity("les", 0x2A7D); + entity("lescc", 0x2AA8); + entity("lesdot", 0x2A7F); + entity("lesdoto", 0x2A81); + entity("lesdotor", 0x2A83); + entity("lesges", 0x2A93); + entity("lessapprox", 0x2A85); + entity("lessdot", 0x22D6); + entity("lesseqgtr", 0x22DA); + entity("lesseqqgtr", 0x2A8B); + entity("LessEqualGreater", 0x22DA); + entity("LessFullEqual", 0x2266); + entity("LessGreater", 0x2276); + entity("lessgtr", 0x2276); + entity("LessLess", 0x2AA1); + entity("lesssim", 0x2272); + entity("LessSlantEqual", 0x2A7D); + entity("LessTilde", 0x2272); + entity("lfisht", 0x297C); + entity("lfloor", 0x230A); + entity("lfr", 0x1D529); + entity("Lfr", 0x1D50F); + entity("lg", 0x2276); + entity("lgE", 0x2A91); + entity("lgr", 0x03BB); + entity("Lgr", 0x039B); + entity("lHar", 0x2962); + entity("lhard", 0x21BD); + entity("lharu", 0x21BC); + entity("lharul", 0x296A); + entity("lhblk", 0x2584); + entity("ljcy", 0x0459); + entity("LJcy", 0x0409); + entity("ll", 0x226A); + entity("Ll", 0x22D8); + entity("llarr", 0x21C7); + entity("llcorner", 0x231E); + entity("Lleftarrow", 0x21DA); + entity("llhard", 0x296B); + entity("lltri", 0x25FA); + entity("lmidot", 0x0140); + entity("Lmidot", 0x013F); + entity("lmoust", 0x23B0); + entity("lmoustache", 0x23B0); + entity("lnap", 0x2A89); + entity("lnapprox", 0x2A89); + entity("lne", 0x2A87); + entity("lnE", 0x2268); + entity("lneq", 0x2A87); + entity("lneqq", 0x2268); + entity("lnsim", 0x22E6); + entity("loang", 0x27EC); + entity("loarr", 0x21FD); + entity("lobrk", 0x27E6); + entity("longleftarrow", 0x27F5); + entity("Longleftarrow", 0x27F8); + entity("longleftrightarrow", 0x27F7); + entity("Longleftrightarrow", 0x27FA); + entity("longmapsto", 0x27FC); + entity("longrightarrow", 0x27F6); + entity("Longrightarrow", 0x27F9); + entity("looparrowleft", 0x21AB); + entity("looparrowright", 0x21AC); + entity("lopar", 0x2985); + entity("lopf", 0x1D55D); + entity("Lopf", 0x1D543); + entity("loplus", 0x2A2D); + entity("lotimes", 0x2A34); + entity("lowast", 0x2217); + entity("lowbar", 0x005F); + entity("LowerLeftArrow", 0x2199); + entity("LowerRightArrow", 0x2198); + entity("loz", 0x25CA); + entity("lozenge", 0x25CA); + entity("lozf", 0x29EB); + entity("lpar", 0x0028); + entity("lparlt", 0x2993); + entity("lrarr", 0x21C6); + entity("lrcorner", 0x231F); + entity("lrhar", 0x21CB); + entity("lrhard", 0x296D); + entity("lrm", 0x200E); + entity("lrtri", 0x22BF); + entity("lsaquo", 0x2039); + entity("lscr", 0x1D4C1); + entity("Lscr", 0x2112); + entity("lsh", 0x21B0); + entity("lsim", 0x2272); + entity("lsime", 0x2A8D); + entity("lsimg", 0x2A8F); + entity("lsqb", 0x005B); + entity("lsquo", 0x2018); + entity("lsquor", 0x201A); + entity("lstrok", 0x0142); + entity("Lstrok", 0x0141); + entity("lt", 0x003C); + entity("Lt", 0x226A); + entity("ltcc", 0x2AA6); + entity("ltcir", 0x2A79); + entity("ltdot", 0x22D6); + entity("lthree", 0x22CB); + entity("ltimes", 0x22C9); + entity("ltlarr", 0x2976); + entity("ltquest", 0x2A7B); + entity("ltri", 0x25C3); + entity("ltrie", 0x22B4); + entity("ltrif", 0x25C2); + entity("ltrPar", 0x2996); + entity("lurdshar", 0x294A); + entity("luruhar", 0x2966); + entity("macr", 0x00AF); + entity("male", 0x2642); + entity("malt", 0x2720); + entity("maltese", 0x2720); + entity("map", 0x21A6); + entity("Map", 0x2905); + entity("mapsto", 0x21A6); + entity("mapstodown", 0x21A7); + entity("mapstoleft", 0x21A4); + entity("mapstoup", 0x21A5); + entity("marker", 0x25AE); + entity("mcomma", 0x2A29); + entity("mcy", 0x043C); + entity("Mcy", 0x041C); + entity("mdash", 0x2014); + entity("mDDot", 0x223A); + entity("measuredangle", 0x2221); + entity("MediumSpace", 0x205F); + entity("Mellintrf", 0x2133); + entity("mfr", 0x1D52A); + entity("Mfr", 0x1D510); + entity("mgr", 0x03BC); + entity("Mgr", 0x039C); + entity("mho", 0x2127); + entity("micro", 0x00B5); + entity("mid", 0x2223); + entity("midast", 0x002A); + entity("midcir", 0x2AF0); + entity("middot", 0x00B7); + entity("minus", 0x2212); + entity("minusb", 0x229F); + entity("minusd", 0x2238); + entity("minusdu", 0x2A2A); + entity("MinusPlus", 0x2213); + entity("mlcp", 0x2ADB); + entity("mldr", 0x2026); + entity("mnplus", 0x2213); + entity("models", 0x22A7); + entity("mopf", 0x1D55E); + entity("Mopf", 0x1D544); + entity("mp", 0x2213); + entity("mscr", 0x1D4C2); + entity("Mscr", 0x2133); + entity("mstpos", 0x223E); + entity("mu", 0x03BC); + entity("Mu", 0x039C); + entity("multimap", 0x22B8); + entity("mumap", 0x22B8); + entity("nabla", 0x2207); + entity("nacute", 0x0144); + entity("Nacute", 0x0143); + entity("nap", 0x2249); + entity("napos", 0x0149); + entity("napprox", 0x2249); + entity("natur", 0x266E); + entity("natural", 0x266E); + entity("naturals", 0x2115); + entity("nbsp", 0x00A0); + entity("ncap", 0x2A43); + entity("ncaron", 0x0148); + entity("Ncaron", 0x0147); + entity("ncedil", 0x0146); + entity("Ncedil", 0x0145); + entity("ncong", 0x2247); + entity("ncup", 0x2A42); + entity("ncy", 0x043D); + entity("Ncy", 0x041D); + entity("ndash", 0x2013); + entity("ne", 0x2260); + entity("nearhk", 0x2924); + entity("nearr", 0x2197); + entity("neArr", 0x21D7); + entity("nearrow", 0x2197); + entity("NegativeMediumSpace", 0x200B); + entity("NegativeThickSpace", 0x200B); + entity("NegativeThinSpace", 0x200B); + entity("NegativeVeryThinSpace", 0x200B); + entity("nequiv", 0x2262); + entity("nesear", 0x2928); + entity("NestedGreaterGreater", 0x226B); + entity("NestedLessLess", 0x226A); + entity("NewLine", 0x000A); + entity("nexist", 0x2204); + entity("nexists", 0x2204); + entity("nfr", 0x1D52B); + entity("Nfr", 0x1D511); + entity("nge", 0x2271); + entity("ngeq", 0x2271); + entity("ngr", 0x03BD); + entity("Ngr", 0x039D); + entity("ngsim", 0x2275); + entity("ngt", 0x226F); + entity("ngtr", 0x226F); + entity("nharr", 0x21AE); + entity("nhArr", 0x21CE); + entity("nhpar", 0x2AF2); + entity("ni", 0x220B); + entity("nis", 0x22FC); + entity("nisd", 0x22FA); + entity("niv", 0x220B); + entity("njcy", 0x045A); + entity("NJcy", 0x040A); + entity("nlarr", 0x219A); + entity("nlArr", 0x21CD); + entity("nldr", 0x2025); + entity("nle", 0x2270); + entity("nleftarrow", 0x219A); + entity("nLeftarrow", 0x21CD); + entity("nleftrightarrow", 0x21AE); + entity("nLeftrightarrow", 0x21CE); + entity("nleq", 0x2270); + entity("nless", 0x226E); + entity("nlsim", 0x2274); + entity("nlt", 0x226E); + entity("nltri", 0x22EA); + entity("nltrie", 0x22EC); + entity("nmid", 0x2224); + entity("NoBreak", 0x2060); + entity("NonBreakingSpace", 0x00A0); + entity("nopf", 0x1D55F); + entity("Nopf", 0x2115); + entity("not", 0x00AC); + entity("Not", 0x2AEC); + entity("NotCongruent", 0x2262); + entity("NotCupCap", 0x226D); + entity("NotDoubleVerticalBar", 0x2226); + entity("NotElement", 0x2209); + entity("NotEqual", 0x2260); + entity("NotExists", 0x2204); + entity("NotGreater", 0x226F); + entity("NotGreaterEqual", 0x2271); + entity("NotGreaterLess", 0x2279); + entity("NotGreaterTilde", 0x2275); + entity("notin", 0x2209); + entity("notinva", 0x2209); + entity("notinvb", 0x22F7); + entity("notinvc", 0x22F6); + entity("NotLeftTriangle", 0x22EA); + entity("NotLeftTriangleEqual", 0x22EC); + entity("NotLess", 0x226E); + entity("NotLessEqual", 0x2270); + entity("NotLessGreater", 0x2278); + entity("NotLessTilde", 0x2274); + entity("notni", 0x220C); + entity("notniva", 0x220C); + entity("notnivb", 0x22FE); + entity("notnivc", 0x22FD); + entity("NotPrecedes", 0x2280); + entity("NotPrecedesSlantEqual", 0x22E0); + entity("NotReverseElement", 0x220C); + entity("NotRightTriangle", 0x22EB); + entity("NotRightTriangleEqual", 0x22ED); + entity("NotSquareSubsetEqual", 0x22E2); + entity("NotSquareSupersetEqual", 0x22E3); + entity("NotSubsetEqual", 0x2288); + entity("NotSucceeds", 0x2281); + entity("NotSucceedsSlantEqual", 0x22E1); + entity("NotSupersetEqual", 0x2289); + entity("NotTilde", 0x2241); + entity("NotTildeEqual", 0x2244); + entity("NotTildeFullEqual", 0x2247); + entity("NotTildeTilde", 0x2249); + entity("NotVerticalBar", 0x2224); + entity("npar", 0x2226); + entity("nparallel", 0x2226); + entity("npolint", 0x2A14); + entity("npr", 0x2280); + entity("nprcue", 0x22E0); + entity("nprec", 0x2280); + entity("nrarr", 0x219B); + entity("nrArr", 0x21CF); + entity("nrightarrow", 0x219B); + entity("nRightarrow", 0x21CF); + entity("nrtri", 0x22EB); + entity("nrtrie", 0x22ED); + entity("nsc", 0x2281); + entity("nsccue", 0x22E1); + entity("nscr", 0x1D4C3); + entity("Nscr", 0x1D4A9); + entity("nshortmid", 0x2224); + entity("nshortparallel", 0x2226); + entity("nsim", 0x2241); + entity("nsime", 0x2244); + entity("nsimeq", 0x2244); + entity("nsmid", 0x2224); + entity("nspar", 0x2226); + entity("nsqsube", 0x22E2); + entity("nsqsupe", 0x22E3); + entity("nsub", 0x2284); + entity("nsube", 0x2288); + entity("nsubseteq", 0x2288); + entity("nsucc", 0x2281); + entity("nsup", 0x2285); + entity("nsupe", 0x2289); + entity("nsupseteq", 0x2289); + entity("ntgl", 0x2279); + entity("ntilde", 0x00F1); + entity("Ntilde", 0x00D1); + entity("ntlg", 0x2278); + entity("ntriangleleft", 0x22EA); + entity("ntrianglelefteq", 0x22EC); + entity("ntriangleright", 0x22EB); + entity("ntrianglerighteq", 0x22ED); + entity("nu", 0x03BD); + entity("Nu", 0x039D); + entity("num", 0x0023); + entity("numero", 0x2116); + entity("numsp", 0x2007); + entity("nvdash", 0x22AC); + entity("nvDash", 0x22AD); + entity("nVdash", 0x22AE); + entity("nVDash", 0x22AF); + entity("nvHarr", 0x2904); + entity("nvinfin", 0x29DE); + entity("nvlArr", 0x2902); + entity("nvrArr", 0x2903); + entity("nwarhk", 0x2923); + entity("nwarr", 0x2196); + entity("nwArr", 0x21D6); + entity("nwarrow", 0x2196); + entity("nwnear", 0x2927); + entity("oacgr", 0x03CC); + entity("Oacgr", 0x038C); + entity("oacute", 0x00F3); + entity("Oacute", 0x00D3); + entity("oast", 0x229B); + entity("ocir", 0x229A); + entity("ocirc", 0x00F4); + entity("Ocirc", 0x00D4); + entity("ocy", 0x043E); + entity("Ocy", 0x041E); + entity("odash", 0x229D); + entity("odblac", 0x0151); + entity("Odblac", 0x0150); + entity("odiv", 0x2A38); + entity("odot", 0x2299); + entity("odsold", 0x29BC); + entity("oelig", 0x0153); + entity("OElig", 0x0152); + entity("ofcir", 0x29BF); + entity("ofr", 0x1D52C); + entity("Ofr", 0x1D512); + entity("ogon", 0x02DB); + entity("ogr", 0x03BF); + entity("Ogr", 0x039F); + entity("ograve", 0x00F2); + entity("Ograve", 0x00D2); + entity("ogt", 0x29C1); + entity("ohacgr", 0x03CE); + entity("OHacgr", 0x038F); + entity("ohbar", 0x29B5); + entity("ohgr", 0x03C9); + entity("OHgr", 0x03A9); + entity("ohm", 0x2126); + entity("oint", 0x222E); + entity("olarr", 0x21BA); + entity("olcir", 0x29BE); + entity("olcross", 0x29BB); + entity("oline", 0x203E); + entity("olt", 0x29C0); + entity("omacr", 0x014D); + entity("Omacr", 0x014C); + entity("omega", 0x03C9); + entity("Omega", 0x03A9); + entity("omicron", 0x03BF); + entity("Omicron", 0x039F); + entity("omid", 0x29B6); + entity("ominus", 0x2296); + entity("oopf", 0x1D560); + entity("Oopf", 0x1D546); + entity("opar", 0x29B7); + entity("OpenCurlyDoubleQuote", 0x201C); + entity("OpenCurlyQuote", 0x2018); + entity("operp", 0x29B9); + entity("oplus", 0x2295); + entity("or", 0x2228); + entity("Or", 0x2A54); + entity("orarr", 0x21BB); + entity("ord", 0x2A5D); + entity("order", 0x2134); + entity("orderof", 0x2134); + entity("ordf", 0x00AA); + entity("ordm", 0x00BA); + entity("origof", 0x22B6); + entity("oror", 0x2A56); + entity("orslope", 0x2A57); + entity("orv", 0x2A5B); + entity("oS", 0x24C8); + entity("oscr", 0x2134); + entity("Oscr", 0x1D4AA); + entity("oslash", 0x00F8); + entity("Oslash", 0x00D8); + entity("osol", 0x2298); + entity("otilde", 0x00F5); + entity("Otilde", 0x00D5); + entity("otimes", 0x2297); + entity("Otimes", 0x2A37); + entity("otimesas", 0x2A36); + entity("ouml", 0x00F6); + entity("Ouml", 0x00D6); + entity("ovbar", 0x233D); + entity("OverBar", 0x00AF); + entity("OverBrace", 0xFE37); + entity("OverBracket", 0x23B4); + entity("OverParenthesis", 0xFE35); + entity("par", 0x2225); + entity("para", 0x00B6); + entity("parallel", 0x2225); + entity("parsim", 0x2AF3); + entity("parsl", 0x2AFD); + entity("part", 0x2202); + entity("PartialD", 0x2202); + entity("pcy", 0x043F); + entity("Pcy", 0x041F); + entity("percnt", 0x0025); + entity("period", 0x002E); + entity("permil", 0x2030); + entity("perp", 0x22A5); + entity("pertenk", 0x2031); + entity("pfr", 0x1D52D); + entity("Pfr", 0x1D513); + entity("pgr", 0x03C0); + entity("Pgr", 0x03A0); + entity("phgr", 0x03C6); + entity("PHgr", 0x03A6); + entity("phi", 0x03D5); + entity("Phi", 0x03A6); + entity("phiv", 0x03C6); + entity("phmmat", 0x2133); + entity("phone", 0x260E); + entity("pi", 0x03C0); + entity("Pi", 0x03A0); + entity("pitchfork", 0x22D4); + entity("piv", 0x03D6); + entity("planck", 0x210F); + entity("planckh", 0x210E); + entity("plankv", 0x210F); + entity("plus", 0x002B); + entity("plusacir", 0x2A23); + entity("plusb", 0x229E); + entity("pluscir", 0x2A22); + entity("plusdo", 0x2214); + entity("plusdu", 0x2A25); + entity("pluse", 0x2A72); + entity("PlusMinus", 0x00B1); + entity("plusmn", 0x00B1); + entity("plussim", 0x2A26); + entity("plustwo", 0x2A27); + entity("pm", 0x00B1); + entity("Poincareplane", 0x210C); + entity("pointint", 0x2A15); + entity("popf", 0x1D561); + entity("Popf", 0x2119); + entity("pound", 0x00A3); + entity("pr", 0x227A); + entity("Pr", 0x2ABB); + entity("prap", 0x2AB7); + entity("prcue", 0x227C); + entity("pre", 0x2AAF); + entity("prE", 0x2AB3); + entity("prec", 0x227A); + entity("precapprox", 0x2AB7); + entity("preccurlyeq", 0x227C); + entity("Precedes", 0x227A); + entity("PrecedesEqual", 0x2AAF); + entity("PrecedesSlantEqual", 0x227C); + entity("PrecedesTilde", 0x227E); + entity("preceq", 0x2AAF); + entity("precnapprox", 0x2AB9); + entity("precneqq", 0x2AB5); + entity("precnsim", 0x22E8); + entity("precsim", 0x227E); + entity("prime", 0x2032); + entity("Prime", 0x2033); + entity("primes", 0x2119); + entity("prnap", 0x2AB9); + entity("prnE", 0x2AB5); + entity("prnsim", 0x22E8); + entity("prod", 0x220F); + entity("Product", 0x220F); + entity("profalar", 0x232E); + entity("profline", 0x2312); + entity("profsurf", 0x2313); + entity("prop", 0x221D); + entity("Proportion", 0x2237); + entity("Proportional", 0x221D); + entity("propto", 0x221D); + entity("prsim", 0x227E); + entity("prurel", 0x22B0); + entity("pscr", 0x1D4C5); + entity("Pscr", 0x1D4AB); + entity("psgr", 0x03C8); + entity("PSgr", 0x03A8); + entity("psi", 0x03C8); + entity("Psi", 0x03A8); + entity("puncsp", 0x2008); + entity("qfr", 0x1D52E); + entity("Qfr", 0x1D514); + entity("qint", 0x2A0C); + entity("qopf", 0x1D562); + entity("Qopf", 0x211A); + entity("qprime", 0x2057); + entity("qscr", 0x1D4C6); + entity("Qscr", 0x1D4AC); + entity("quaternions", 0x210D); + entity("quatint", 0x2A16); + entity("quest", 0x003F); + entity("questeq", 0x225F); + entity("quot", 0x0022); + entity("rAarr", 0x21DB); + entity("race", 0x29DA); + entity("racute", 0x0155); + entity("Racute", 0x0154); + entity("radic", 0x221A); + entity("raemptyv", 0x29B3); + entity("rang", 0x232A); + entity("Rang", 0x27EB); + entity("rangd", 0x2992); + entity("range", 0x29A5); + entity("rangle", 0x232A); + entity("raquo", 0x00BB); + entity("rarr", 0x2192); + entity("rArr", 0x21D2); + entity("Rarr", 0x21A0); + entity("rarrap", 0x2975); + entity("rarrb", 0x21E5); + entity("rarrbfs", 0x2920); + entity("rarrc", 0x2933); + entity("rarrfs", 0x291E); + entity("rarrhk", 0x21AA); + entity("rarrlp", 0x21AC); + entity("rarrpl", 0x2945); + entity("rarrsim", 0x2974); + entity("rarrtl", 0x21A3); + entity("Rarrtl", 0x2916); + entity("rarrw", 0x219D); + entity("ratail", 0x291A); + entity("rAtail", 0x291C); + entity("ratio", 0x2236); + entity("rationals", 0x211A); + entity("rbarr", 0x290D); + entity("rBarr", 0x290F); + entity("RBarr", 0x2910); + entity("rbbrk", 0x2998); + entity("rbrace", 0x007D); + entity("rbrack", 0x005D); + entity("rbrke", 0x298C); + entity("rbrksld", 0x298E); + entity("rbrkslu", 0x2990); + entity("rcaron", 0x0159); + entity("Rcaron", 0x0158); + entity("rcedil", 0x0157); + entity("Rcedil", 0x0156); + entity("rceil", 0x2309); + entity("rcub", 0x007D); + entity("rcy", 0x0440); + entity("Rcy", 0x0420); + entity("rdca", 0x2937); + entity("rdldhar", 0x2969); + entity("rdquo", 0x201D); + entity("rdquor", 0x201D); + entity("rdsh", 0x21B3); + entity("Re", 0x211C); + entity("real", 0x211C); + entity("realine", 0x211B); + entity("realpart", 0x211C); + entity("reals", 0x211D); + entity("rect", 0x25AD); + entity("reg", 0x00AE); + entity("ReverseElement", 0x220B); + entity("ReverseEquilibrium", 0x21CB); + entity("ReverseUpEquilibrium", 0x296F); + entity("rfisht", 0x297D); + entity("rfloor", 0x230B); + entity("rfr", 0x1D52F); + entity("Rfr", 0x211C); + entity("rgr", 0x03C1); + entity("Rgr", 0x03A1); + entity("rHar", 0x2964); + entity("rhard", 0x21C1); + entity("rharu", 0x21C0); + entity("rharul", 0x296C); + entity("rho", 0x03C1); + entity("Rho", 0x03A1); + entity("rhov", 0x03F1); + entity("RightAngleBracket", 0x232A); + entity("rightarrow", 0x2192); + entity("Rightarrow", 0x21D2); + entity("RightArrowBar", 0x21E5); + entity("RightArrowLeftArrow", 0x21C4); + entity("rightarrowtail", 0x21A3); + entity("RightCeiling", 0x2309); + entity("RightDoubleBracket", 0x27E7); + entity("RightDownTeeVector", 0x295D); + entity("RightDownVector", 0x21C2); + entity("RightDownVectorBar", 0x2955); + entity("RightFloor", 0x230B); + entity("rightharpoondown", 0x21C1); + entity("rightharpoonup", 0x21C0); + entity("rightleftarrows", 0x21C4); + entity("rightleftharpoons", 0x21CC); + entity("rightrightarrows", 0x21C9); + entity("rightsquigarrow", 0x219D); + entity("RightTee", 0x22A2); + entity("RightTeeArrow", 0x21A6); + entity("RightTeeVector", 0x295B); + entity("rightthreetimes", 0x22CC); + entity("RightTriangle", 0x22B3); + entity("RightTriangleBar", 0x29D0); + entity("RightTriangleEqual", 0x22B5); + entity("RightUpDownVector", 0x294F); + entity("RightUpTeeVector", 0x295C); + entity("RightUpVector", 0x21BE); + entity("RightUpVectorBar", 0x2954); + entity("RightVector", 0x21C0); + entity("RightVectorBar", 0x2953); + entity("ring", 0x02DA); + entity("risingdotseq", 0x2253); + entity("rlarr", 0x21C4); + entity("rlhar", 0x21CC); + entity("rlm", 0x200F); + entity("rmoust", 0x23B1); + entity("rmoustache", 0x23B1); + entity("rnmid", 0x2AEE); + entity("roang", 0x27ED); + entity("roarr", 0x21FE); + entity("robrk", 0x27E7); + entity("ropar", 0x2986); + entity("ropf", 0x1D563); + entity("Ropf", 0x211D); + entity("roplus", 0x2A2E); + entity("rotimes", 0x2A35); + entity("RoundImplies", 0x2970); + entity("rpar", 0x0029); + entity("rpargt", 0x2994); + entity("rppolint", 0x2A12); + entity("rrarr", 0x21C9); + entity("Rrightarrow", 0x21DB); + entity("rsaquo", 0x203A); + entity("rscr", 0x1D4C7); + entity("Rscr", 0x211B); + entity("rsh", 0x21B1); + entity("rsqb", 0x005D); + entity("rsquo", 0x2019); + entity("rsquor", 0x2019); + entity("rthree", 0x22CC); + entity("rtimes", 0x22CA); + entity("rtri", 0x25B9); + entity("rtrie", 0x22B5); + entity("rtrif", 0x25B8); + entity("rtriltri", 0x29CE); + entity("RuleDelayed", 0x29F4); + entity("ruluhar", 0x2968); + entity("rx", 0x211E); + entity("sacute", 0x015B); + entity("Sacute", 0x015A); + entity("sbquo", 0x201A); + entity("sc", 0x227B); + entity("Sc", 0x2ABC); + entity("scap", 0x2AB8); + entity("scaron", 0x0161); + entity("Scaron", 0x0160); + entity("sccue", 0x227D); + entity("sce", 0x2AB0); + entity("scE", 0x2AB4); + entity("scedil", 0x015F); + entity("Scedil", 0x015E); + entity("scirc", 0x015D); + entity("Scirc", 0x015C); + entity("scnap", 0x2ABA); + entity("scnE", 0x2AB6); + entity("scnsim", 0x22E9); + entity("scpolint", 0x2A13); + entity("scsim", 0x227F); + entity("scy", 0x0441); + entity("Scy", 0x0421); + entity("sdot", 0x22C5); + entity("sdotb", 0x22A1); + entity("sdote", 0x2A66); + entity("searhk", 0x2925); + entity("searr", 0x2198); + entity("seArr", 0x21D8); + entity("searrow", 0x2198); + entity("sect", 0x00A7); + entity("semi", 0x003B); + entity("seswar", 0x2929); + entity("setminus", 0x2216); + entity("setmn", 0x2216); + entity("sext", 0x2736); + entity("sfgr", 0x03C2); + entity("sfr", 0x1D530); + entity("Sfr", 0x1D516); + entity("sfrown", 0x2322); + entity("sgr", 0x03C3); + entity("Sgr", 0x03A3); + entity("sharp", 0x266F); + entity("shchcy", 0x0449); + entity("SHCHcy", 0x0429); + entity("shcy", 0x0448); + entity("SHcy", 0x0428); + entity("ShortDownArrow", 0x2193); + entity("ShortLeftArrow", 0x2190); + entity("shortmid", 0x2223); + entity("shortparallel", 0x2225); + entity("ShortRightArrow", 0x2192); + entity("ShortUpArrow", 0x2191); + entity("shy", 0x00AD); + entity("sigma", 0x03C3); + entity("Sigma", 0x03A3); + entity("sigmaf", 0x03C2); + entity("sigmav", 0x03C2); + entity("sim", 0x223C); + entity("simdot", 0x2A6A); + entity("sime", 0x2243); + entity("simeq", 0x2243); + entity("simg", 0x2A9E); + entity("simgE", 0x2AA0); + entity("siml", 0x2A9D); + entity("simlE", 0x2A9F); + entity("simne", 0x2246); + entity("simplus", 0x2A24); + entity("simrarr", 0x2972); + entity("slarr", 0x2190); + entity("SmallCircle", 0x2218); + entity("smallsetminus", 0x2216); + entity("smashp", 0x2A33); + entity("smeparsl", 0x29E4); + entity("smid", 0x2223); + entity("smile", 0x2323); + entity("smt", 0x2AAA); + entity("smte", 0x2AAC); + entity("softcy", 0x044C); + entity("SOFTcy", 0x042C); + entity("sol", 0x002F); + entity("solb", 0x29C4); + entity("solbar", 0x233F); + entity("sopf", 0x1D564); + entity("Sopf", 0x1D54A); + entity("spades", 0x2660); + entity("spadesuit", 0x2660); + entity("spar", 0x2225); + entity("sqcap", 0x2293); + entity("sqcup", 0x2294); + entity("Sqrt", 0x221A); + entity("sqsub", 0x228F); + entity("sqsube", 0x2291); + entity("sqsubset", 0x228F); + entity("sqsubseteq", 0x2291); + entity("sqsup", 0x2290); + entity("sqsupe", 0x2292); + entity("sqsupset", 0x2290); + entity("sqsupseteq", 0x2292); + entity("squ", 0x25A1); + entity("square", 0x25A1); + entity("SquareIntersection", 0x2293); + entity("SquareSubset", 0x228F); + entity("SquareSubsetEqual", 0x2291); + entity("SquareSuperset", 0x2290); + entity("SquareSupersetEqual", 0x2292); + entity("SquareUnion", 0x2294); + entity("squarf", 0x25AA); + entity("squf", 0x25AA); + entity("srarr", 0x2192); + entity("sscr", 0x1D4C8); + entity("Sscr", 0x1D4AE); + entity("ssetmn", 0x2216); + entity("ssmile", 0x2323); + entity("sstarf", 0x22C6); + entity("star", 0x2606); + entity("Star", 0x22C6); + entity("starf", 0x2605); + entity("straightepsilon", 0x03F5); + entity("straightphi", 0x03D5); + entity("strns", 0x00AF); + entity("sub", 0x2282); + entity("Sub", 0x22D0); + entity("subdot", 0x2ABD); + entity("sube", 0x2286); + entity("subE", 0x2AC5); + entity("subedot", 0x2AC3); + entity("submult", 0x2AC1); + entity("subne", 0x228A); + entity("subnE", 0x2ACB); + entity("subplus", 0x2ABF); + entity("subrarr", 0x2979); + entity("subset", 0x2282); + entity("Subset", 0x22D0); + entity("subseteq", 0x2286); + entity("subseteqq", 0x2AC5); + entity("SubsetEqual", 0x2286); + entity("subsetneq", 0x228A); + entity("subsetneqq", 0x2ACB); + entity("subsim", 0x2AC7); + entity("subsub", 0x2AD5); + entity("subsup", 0x2AD3); + entity("succ", 0x227B); + entity("succapprox", 0x2AB8); + entity("succcurlyeq", 0x227D); + entity("Succeeds", 0x227B); + entity("SucceedsEqual", 0x2AB0); + entity("SucceedsSlantEqual", 0x227D); + entity("SucceedsTilde", 0x227F); + entity("succeq", 0x2AB0); + entity("succnapprox", 0x2ABA); + entity("succneqq", 0x2AB6); + entity("succnsim", 0x22E9); + entity("succsim", 0x227F); + entity("SuchThat", 0x220B); + entity("sum", 0x2211); + entity("sung", 0x266A); + entity("sup", 0x2283); + entity("Sup", 0x22D1); + entity("sup1", 0x00B9); + entity("sup2", 0x00B2); + entity("sup3", 0x00B3); + entity("supdot", 0x2ABE); + entity("supdsub", 0x2AD8); + entity("supe", 0x2287); + entity("supE", 0x2AC6); + entity("supedot", 0x2AC4); + entity("Superset", 0x2283); + entity("SupersetEqual", 0x2287); + entity("suphsub", 0x2AD7); + entity("suplarr", 0x297B); + entity("supmult", 0x2AC2); + entity("supne", 0x228B); + entity("supnE", 0x2ACC); + entity("supplus", 0x2AC0); + entity("supset", 0x2283); + entity("Supset", 0x22D1); + entity("supseteq", 0x2287); + entity("supseteqq", 0x2AC6); + entity("supsetneq", 0x228B); + entity("supsetneqq", 0x2ACC); + entity("supsim", 0x2AC8); + entity("supsub", 0x2AD4); + entity("supsup", 0x2AD6); + entity("swarhk", 0x2926); + entity("swarr", 0x2199); + entity("swArr", 0x21D9); + entity("swarrow", 0x2199); + entity("swnwar", 0x292A); + entity("szlig", 0x00DF); + entity("Tab", 0x0009); + entity("target", 0x2316); + entity("tau", 0x03C4); + entity("Tau", 0x03A4); + entity("tbrk", 0x23B4); + entity("tcaron", 0x0165); + entity("Tcaron", 0x0164); + entity("tcedil", 0x0163); + entity("Tcedil", 0x0162); + entity("tcy", 0x0442); + entity("Tcy", 0x0422); + entity("telrec", 0x2315); + entity("tfr", 0x1D531); + entity("Tfr", 0x1D517); + entity("tgr", 0x03C4); + entity("Tgr", 0x03A4); + entity("there4", 0x2234); + entity("therefore", 0x2234); + entity("theta", 0x03B8); + entity("Theta", 0x0398); + entity("thetasym", 0x03D1); + entity("thetav", 0x03D1); + entity("thgr", 0x03B8); + entity("THgr", 0x0398); + entity("thickapprox", 0x2248); + entity("thicksim", 0x223C); + entity("thinsp", 0x2009); + entity("ThinSpace", 0x2009); + entity("thkap", 0x2248); + entity("thksim", 0x223C); + entity("thorn", 0x00FE); + entity("THORN", 0x00DE); + entity("tilde", 0x02DC); + entity("Tilde", 0x223C); + entity("TildeEqual", 0x2243); + entity("TildeFullEqual", 0x2245); + entity("TildeTilde", 0x2248); + entity("times", 0x00D7); + entity("timesb", 0x22A0); + entity("timesbar", 0x2A31); + entity("timesd", 0x2A30); + entity("tint", 0x222D); + entity("toea", 0x2928); + entity("top", 0x22A4); + entity("topbot", 0x2336); + entity("topcir", 0x2AF1); + entity("topf", 0x1D565); + entity("Topf", 0x1D54B); + entity("topfork", 0x2ADA); + entity("tosa", 0x2929); + entity("tprime", 0x2034); + entity("trade", 0x2122); + entity("triangle", 0x25B5); + entity("triangledown", 0x25BF); + entity("triangleleft", 0x25C3); + entity("trianglelefteq", 0x22B4); + entity("triangleq", 0x225C); + entity("triangleright", 0x25B9); + entity("trianglerighteq", 0x22B5); + entity("tridot", 0x25EC); + entity("trie", 0x225C); + entity("triminus", 0x2A3A); + entity("triplus", 0x2A39); + entity("trisb", 0x29CD); + entity("tritime", 0x2A3B); + entity("trpezium", 0x23E2); + entity("tscr", 0x1D4C9); + entity("Tscr", 0x1D4AF); + entity("tscy", 0x0446); + entity("TScy", 0x0426); + entity("tshcy", 0x045B); + entity("TSHcy", 0x040B); + entity("tstrok", 0x0167); + entity("Tstrok", 0x0166); + entity("twixt", 0x226C); + entity("twoheadleftarrow", 0x219E); + entity("twoheadrightarrow", 0x21A0); + entity("uacgr", 0x03CD); + entity("Uacgr", 0x038E); + entity("uacute", 0x00FA); + entity("Uacute", 0x00DA); + entity("uarr", 0x2191); + entity("uArr", 0x21D1); + entity("Uarr", 0x219F); + entity("Uarrocir", 0x2949); + entity("ubrcy", 0x045E); + entity("Ubrcy", 0x040E); + entity("ubreve", 0x016D); + entity("Ubreve", 0x016C); + entity("ucirc", 0x00FB); + entity("Ucirc", 0x00DB); + entity("ucy", 0x0443); + entity("Ucy", 0x0423); + entity("udarr", 0x21C5); + entity("udblac", 0x0171); + entity("Udblac", 0x0170); + entity("udhar", 0x296E); + entity("udiagr", 0x03B0); + entity("udigr", 0x03CB); + entity("Udigr", 0x03AB); + entity("ufisht", 0x297E); + entity("ufr", 0x1D532); + entity("Ufr", 0x1D518); + entity("ugr", 0x03C5); + entity("Ugr", 0x03A5); + entity("ugrave", 0x00F9); + entity("Ugrave", 0x00D9); + entity("uHar", 0x2963); + entity("uharl", 0x21BF); + entity("uharr", 0x21BE); + entity("uhblk", 0x2580); + entity("ulcorn", 0x231C); + entity("ulcorner", 0x231C); + entity("ulcrop", 0x230F); + entity("ultri", 0x25F8); + entity("umacr", 0x016B); + entity("Umacr", 0x016A); + entity("uml", 0x00A8); + entity("UnderBrace", 0xFE38); + entity("UnderBracket", 0x23B5); + entity("UnderParenthesis", 0xFE36); + entity("Union", 0x22C3); + entity("UnionPlus", 0x228E); + entity("uogon", 0x0173); + entity("Uogon", 0x0172); + entity("uopf", 0x1D566); + entity("Uopf", 0x1D54C); + entity("uparrow", 0x2191); + entity("Uparrow", 0x21D1); + entity("UpArrowBar", 0x2912); + entity("UpArrowDownArrow", 0x21C5); + entity("updownarrow", 0x2195); + entity("Updownarrow", 0x21D5); + entity("UpEquilibrium", 0x296E); + entity("upharpoonleft", 0x21BF); + entity("upharpoonright", 0x21BE); + entity("uplus", 0x228E); + entity("UpperLeftArrow", 0x2196); + entity("UpperRightArrow", 0x2197); + entity("upsi", 0x03C5); + entity("Upsi", 0x03D2); + entity("upsih", 0x03D2); + entity("upsilon", 0x03C5); + entity("Upsilon", 0x03A5); + entity("UpTee", 0x22A5); + entity("UpTeeArrow", 0x21A5); + entity("upuparrows", 0x21C8); + entity("urcorn", 0x231D); + entity("urcorner", 0x231D); + entity("urcrop", 0x230E); + entity("uring", 0x016F); + entity("Uring", 0x016E); + entity("urtri", 0x25F9); + entity("uscr", 0x1D4CA); + entity("Uscr", 0x1D4B0); + entity("utdot", 0x22F0); + entity("utilde", 0x0169); + entity("Utilde", 0x0168); + entity("utri", 0x25B5); + entity("utrif", 0x25B4); + entity("uuarr", 0x21C8); + entity("uuml", 0x00FC); + entity("Uuml", 0x00DC); + entity("uwangle", 0x29A7); + entity("vangrt", 0x299C); + entity("varepsilon", 0x03B5); + entity("varkappa", 0x03F0); + entity("varnothing", 0x2205); + entity("varphi", 0x03C6); + entity("varpi", 0x03D6); + entity("varpropto", 0x221D); + entity("varr", 0x2195); + entity("vArr", 0x21D5); + entity("varrho", 0x03F1); + entity("varsigma", 0x03C2); + entity("vartheta", 0x03D1); + entity("vartriangleleft", 0x22B2); + entity("vartriangleright", 0x22B3); + entity("vBar", 0x2AE8); + entity("Vbar", 0x2AEB); + entity("vBarv", 0x2AE9); + entity("vcy", 0x0432); + entity("Vcy", 0x0412); + entity("vdash", 0x22A2); + entity("vDash", 0x22A8); + entity("Vdash", 0x22A9); + entity("VDash", 0x22AB); + entity("Vdashl", 0x2AE6); + entity("vee", 0x2228); + entity("Vee", 0x22C1); + entity("veebar", 0x22BB); + entity("veeeq", 0x225A); + entity("vellip", 0x22EE); + entity("verbar", 0x007C); + entity("Verbar", 0x2016); + entity("vert", 0x007C); + entity("Vert", 0x2016); + entity("VerticalBar", 0x2223); + entity("VerticalLine", 0x007C); + entity("VerticalSeparator", 0x2758); + entity("VerticalTilde", 0x2240); + entity("VeryThinSpace", 0x200A); + entity("vfr", 0x1D533); + entity("Vfr", 0x1D519); + entity("vltri", 0x22B2); + entity("vopf", 0x1D567); + entity("Vopf", 0x1D54D); + entity("vprop", 0x221D); + entity("vrtri", 0x22B3); + entity("vscr", 0x1D4CB); + entity("Vscr", 0x1D4B1); + entity("Vvdash", 0x22AA); + entity("vzigzag", 0x299A); + entity("wcirc", 0x0175); + entity("Wcirc", 0x0174); + entity("wedbar", 0x2A5F); + entity("wedge", 0x2227); + entity("Wedge", 0x22C0); + entity("wedgeq", 0x2259); + entity("weierp", 0x2118); + entity("wfr", 0x1D534); + entity("Wfr", 0x1D51A); + entity("wopf", 0x1D568); + entity("Wopf", 0x1D54E); + entity("wp", 0x2118); + entity("wr", 0x2240); + entity("wreath", 0x2240); + entity("wscr", 0x1D4CC); + entity("Wscr", 0x1D4B2); + entity("xcap", 0x22C2); + entity("xcirc", 0x25EF); + entity("xcup", 0x22C3); + entity("xdtri", 0x25BD); + entity("xfr", 0x1D535); + entity("Xfr", 0x1D51B); + entity("xgr", 0x03BE); + entity("Xgr", 0x039E); + entity("xharr", 0x27F7); + entity("xhArr", 0x27FA); + entity("xi", 0x03BE); + entity("Xi", 0x039E); + entity("xlarr", 0x27F5); + entity("xlArr", 0x27F8); + entity("xmap", 0x27FC); + entity("xnis", 0x22FB); + entity("xodot", 0x2A00); + entity("xopf", 0x1D569); + entity("Xopf", 0x1D54F); + entity("xoplus", 0x2A01); + entity("xotime", 0x2A02); + entity("xrarr", 0x27F6); + entity("xrArr", 0x27F9); + entity("xscr", 0x1D4CD); + entity("Xscr", 0x1D4B3); + entity("xsqcup", 0x2A06); + entity("xuplus", 0x2A04); + entity("xutri", 0x25B3); + entity("xvee", 0x22C1); + entity("xwedge", 0x22C0); + entity("yacute", 0x00FD); + entity("Yacute", 0x00DD); + entity("yacy", 0x044F); + entity("YAcy", 0x042F); + entity("ycirc", 0x0177); + entity("Ycirc", 0x0176); + entity("ycy", 0x044B); + entity("Ycy", 0x042B); + entity("yen", 0x00A5); + entity("yfr", 0x1D536); + entity("Yfr", 0x1D51C); + entity("yicy", 0x0457); + entity("YIcy", 0x0407); + entity("yopf", 0x1D56A); + entity("Yopf", 0x1D550); + entity("yscr", 0x1D4CE); + entity("Yscr", 0x1D4B4); + entity("yucy", 0x044E); + entity("YUcy", 0x042E); + entity("yuml", 0x00FF); + entity("Yuml", 0x0178); + entity("zacute", 0x017A); + entity("Zacute", 0x0179); + entity("zcaron", 0x017E); + entity("Zcaron", 0x017D); + entity("zcy", 0x0437); + entity("Zcy", 0x0417); + entity("zdot", 0x017C); + entity("Zdot", 0x017B); + entity("zeetrf", 0x2128); + entity("ZeroWidthSpace", 0x200B); + entity("zeta", 0x03B6); + entity("Zeta", 0x0396); + entity("zfr", 0x1D537); + entity("Zfr", 0x2128); + entity("zgr", 0x03B6); + entity("Zgr", 0x0396); + entity("zhcy", 0x0436); + entity("ZHcy", 0x0416); + entity("zigrarr", 0x21DD); + entity("zopf", 0x1D56B); + entity("Zopf", 0x2124); + entity("zscr", 0x1D4CF); + entity("Zscr", 0x1D4B5); + entity("zwj", 0x200D); + entity("zwnj", 0x200C); + + // End of Schema calls + } + + + } diff --git a/src/org/ccil/cowan/tagsoup/LICENSE b/src/org/ccil/cowan/tagsoup/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/org/ccil/cowan/tagsoup/MODULE_LICENSE_APACHE2 b/src/org/ccil/cowan/tagsoup/MODULE_LICENSE_APACHE2 new file mode 100644 index 0000000..e69de29 diff --git a/src/org/ccil/cowan/tagsoup/PYXScanner.java b/src/org/ccil/cowan/tagsoup/PYXScanner.java new file mode 100644 index 0000000..ebfba26 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/PYXScanner.java @@ -0,0 +1,124 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// This file is part of TagSoup. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. You may also distribute +// and/or modify it under version 2.1 of the Academic Free License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// PYX Scanner + +package org.ccil.cowan.tagsoup; +import java.io.*; +import org.xml.sax.SAXException; + +/** +A Scanner that accepts PYX format instead of HTML. +Useful primarily for debugging. +**/ +public class PYXScanner implements Scanner { + + public void resetDocumentLocator(String publicid, String systemid) { + // Need this method for interface compatibility, but note + // that PyxScanner does not implement Locator. + } + + public void scan(Reader r, ScanHandler h) throws IOException, SAXException { + BufferedReader br = new BufferedReader(r); + String s; + char[] buff = null; + boolean instag = false; + while ((s = br.readLine()) != null) { + int size = s.length(); + if (buff == null || buff.length < size) { + buff = new char[size]; + } + s.getChars(0, size, buff, 0); + switch (buff[0]) { + case '(': + if (instag) { + h.stagc(buff, 0, 0); + instag = false; + } + h.gi(buff, 1, size - 1); + instag = true; + break; + case ')': + if (instag) { + h.stagc(buff, 0, 0); + instag = false; + } + h.etag(buff, 1, size - 1); + break; + case '?': + if (instag) { + h.stagc(buff, 0, 0); + instag = false; + } + h.pi(buff, 1, size - 1); + break; + case 'A': + int sp = s.indexOf(' '); + h.aname(buff, 1, sp - 1); + h.aval(buff, sp + 1, size - sp - 1); + break; + case '-': + if (instag) { + h.stagc(buff, 0, 0); + instag = false; + } + if (s.equals("-\\n")) { + buff[0] = '\n'; + h.pcdata(buff, 0, 1); + } + else { + // FIXME: + // Does not decode \t and \\ in input + h.pcdata(buff, 1, size - 1); + } + break; + case 'E': + if (instag) { + h.stagc(buff, 0, 0); + instag = false; + } + h.entity(buff, 1, size - 1); + break; + default: +// System.err.print("Gotcha "); +// System.err.print(s); +// System.err.print('\n'); + break; + } + } + h.eof(buff, 0, 0); + } + + public void startCDATA() { } + + public static void main(String[] argv) throws IOException, SAXException { + Scanner s = new PYXScanner(); + Reader r = new InputStreamReader(System.in, "UTF-8"); + Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8")); + s.scan(r, new PYXWriter(w)); + } + } diff --git a/src/org/ccil/cowan/tagsoup/PYXWriter.java b/src/org/ccil/cowan/tagsoup/PYXWriter.java new file mode 100644 index 0000000..81917dd --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/PYXWriter.java @@ -0,0 +1,217 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// PYX Writer +// FIXME: does not do escapes in attribute values +// FIXME: outputs entities as bare '&' character + +package org.ccil.cowan.tagsoup; +import java.io.*; +import org.xml.sax.*; +import org.xml.sax.ext.LexicalHandler; + +/** +A ContentHandler that generates PYX format instead of XML. +Primarily useful for debugging. +**/ +public class PYXWriter + implements ScanHandler, ContentHandler, LexicalHandler { + + private PrintWriter theWriter; // where we write to + private static char[] dummy = new char[1]; + private String attrName; // saved attribute name + + // ScanHandler implementation + + public void adup(char[] buff, int offset, int length) throws SAXException { + theWriter.println(attrName); + attrName = null; + } + + public void aname(char[] buff, int offset, int length) throws SAXException { + theWriter.print('A'); + theWriter.write(buff, offset, length); + theWriter.print(' '); + attrName = new String(buff, offset, length); + } + + public void aval(char[] buff, int offset, int length) throws SAXException { + theWriter.write(buff, offset, length); + theWriter.println(); + attrName = null; + } + + public void cmnt(char [] buff, int offset, int length) throws SAXException { +// theWriter.print('!'); +// theWriter.write(buff, offset, length); +// theWriter.println(); + } + + public void entity(char[] buff, int offset, int length) throws SAXException { } + + public int getEntity() { return 0; } + + public void eof(char[] buff, int offset, int length) throws SAXException { + theWriter.close(); + } + + public void etag(char[] buff, int offset, int length) throws SAXException { + theWriter.print(')'); + theWriter.write(buff, offset, length); + theWriter.println(); + } + + public void decl(char[] buff, int offset, int length) throws SAXException { + } + + public void gi(char[] buff, int offset, int length) throws SAXException { + theWriter.print('('); + theWriter.write(buff, offset, length); + theWriter.println(); + } + + public void cdsect(char[] buff, int offset, int length) throws SAXException { + pcdata(buff, offset, length); + } + + public void pcdata(char[] buff, int offset, int length) throws SAXException { + if (length == 0) return; // nothing to do + boolean inProgress = false; + length += offset; + for (int i = offset; i < length; i++) { + if (buff[i] == '\n') { + if (inProgress) { + theWriter.println(); + } + theWriter.println("-\\n"); + inProgress = false; + } + else { + if (!inProgress) { + theWriter.print('-'); + } + switch(buff[i]) { + case '\t': + theWriter.print("\\t"); + break; + case '\\': + theWriter.print("\\\\"); + break; + default: + theWriter.print(buff[i]); + } + inProgress = true; + } + } + if (inProgress) { + theWriter.println(); + } + } + + public void pitarget(char[] buff, int offset, int length) throws SAXException { + theWriter.print('?'); + theWriter.write(buff, offset, length); + theWriter.write(' '); + } + + public void pi(char[] buff, int offset, int length) throws SAXException { + theWriter.write(buff, offset, length); + theWriter.println(); + } + + public void stagc(char[] buff, int offset, int length) throws SAXException { +// theWriter.println("!"); // FIXME + } + + public void stage(char[] buff, int offset, int length) throws SAXException { + theWriter.println("!"); // FIXME + } + + // SAX ContentHandler implementation + + public void characters(char[] buff, int offset, int length) throws SAXException { + pcdata(buff, offset, length); + } + + public void endDocument() throws SAXException { + theWriter.close(); + } + + public void endElement(String uri, String localname, String qname) throws SAXException { + if (qname.length() == 0) qname = localname; + theWriter.print(')'); + theWriter.println(qname); + } + + public void endPrefixMapping(String prefix) throws SAXException { } + + public void ignorableWhitespace(char[] buff, int offset, int length) throws SAXException { + characters(buff, offset, length); + } + + public void processingInstruction(String target, String data) throws SAXException { + theWriter.print('?'); + theWriter.print(target); + theWriter.print(' '); + theWriter.println(data); + } + + public void setDocumentLocator(Locator locator) { } + + public void skippedEntity(String name) throws SAXException { } + + public void startDocument() throws SAXException { } + + public void startElement(String uri, String localname, String qname, + Attributes atts) throws SAXException { + if (qname.length() == 0) qname=localname; + theWriter.print('('); + theWriter.println(qname); + int length = atts.getLength(); + for (int i = 0; i < length; i++) { + qname = atts.getQName(i); + if (qname.length() == 0) qname = atts.getLocalName(i); + theWriter.print('A'); +// theWriter.print(atts.getType(i)); // DEBUG + theWriter.print(qname); + theWriter.print(' '); + theWriter.println(atts.getValue(i)); + } + } + + public void startPrefixMapping(String prefix, String uri) throws SAXException { } + + // Default LexicalHandler implementation + + public void comment(char[] ch, int start, int length) throws SAXException { + cmnt(ch, start, length); + } + public void endCDATA() throws SAXException { } + public void endDTD() throws SAXException { } + public void endEntity(String name) throws SAXException { } + public void startCDATA() throws SAXException { } + public void startDTD(String name, String publicId, String systemId) throws SAXException { } + public void startEntity(String name) throws SAXException { } + + // Constructor + + public PYXWriter(Writer w) { + if (w instanceof PrintWriter) { + theWriter = (PrintWriter)w; + } + else { + theWriter = new PrintWriter(w); + } + } + } diff --git a/src/org/ccil/cowan/tagsoup/Parser.java b/src/org/ccil/cowan/tagsoup/Parser.java new file mode 100644 index 0000000..0997f23 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/Parser.java @@ -0,0 +1,1114 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// The TagSoup parser + +package org.ccil.cowan.tagsoup; +import java.util.HashMap; +import java.util.ArrayList; +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import org.xml.sax.*; +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.ext.LexicalHandler; + + +/** +The SAX parser class. +**/ +public class Parser extends DefaultHandler implements ScanHandler, XMLReader, LexicalHandler { + + // XMLReader implementation + + private ContentHandler theContentHandler = this; + private LexicalHandler theLexicalHandler = this; + private DTDHandler theDTDHandler = this; + private ErrorHandler theErrorHandler = this; + private EntityResolver theEntityResolver = this; + private Schema theSchema; + private Scanner theScanner; + private AutoDetector theAutoDetector; + + // Default values for feature flags + + private static boolean DEFAULT_NAMESPACES = true; + private static boolean DEFAULT_IGNORE_BOGONS = false; + private static boolean DEFAULT_BOGONS_EMPTY = false; + private static boolean DEFAULT_ROOT_BOGONS = true; + private static boolean DEFAULT_DEFAULT_ATTRIBUTES = true; + private static boolean DEFAULT_TRANSLATE_COLONS = false; + private static boolean DEFAULT_RESTART_ELEMENTS = true; + private static boolean DEFAULT_IGNORABLE_WHITESPACE = false; + private static boolean DEFAULT_CDATA_ELEMENTS = true; + + // Feature flags. + + private boolean namespaces = DEFAULT_NAMESPACES; + private boolean ignoreBogons = DEFAULT_IGNORE_BOGONS; + private boolean bogonsEmpty = DEFAULT_BOGONS_EMPTY; + private boolean rootBogons = DEFAULT_ROOT_BOGONS; + private boolean defaultAttributes = DEFAULT_DEFAULT_ATTRIBUTES; + private boolean translateColons = DEFAULT_TRANSLATE_COLONS; + private boolean restartElements = DEFAULT_RESTART_ELEMENTS; + private boolean ignorableWhitespace = DEFAULT_IGNORABLE_WHITESPACE; + private boolean CDATAElements = DEFAULT_CDATA_ELEMENTS; + + /** + A value of "true" indicates namespace URIs and unprefixed local + names for element and attribute names will be available. + **/ + public final static String namespacesFeature = + "http://xml.org/sax/features/namespaces"; + + /** + A value of "true" indicates that XML qualified names (with prefixes) + and attributes (including xmlns* attributes) will be available. + We don't support this value. + **/ + public final static String namespacePrefixesFeature = + "http://xml.org/sax/features/namespace-prefixes"; + + /** + Reports whether this parser processes external general entities + (it doesn't). + **/ + public final static String externalGeneralEntitiesFeature = + "http://xml.org/sax/features/external-general-entities"; + + /** + Reports whether this parser processes external parameter entities + (it doesn't). + **/ + public final static String externalParameterEntitiesFeature = + "http://xml.org/sax/features/external-parameter-entities"; + + /** + May be examined only during a parse, after the startDocument() + callback has been completed; read-only. The value is true if + the document specified standalone="yes" in its XML declaration, + and otherwise is false. (It's always false.) + **/ + public final static String isStandaloneFeature = + "http://xml.org/sax/features/is-standalone"; + + /** + A value of "true" indicates that the LexicalHandler will report + the beginning and end of parameter entities (it won't). + **/ + public final static String lexicalHandlerParameterEntitiesFeature = + "http://xml.org/sax/features/lexical-handler/parameter-entities"; + + /** + A value of "true" indicates that system IDs in declarations will + be absolutized (relative to their base URIs) before reporting. + (This returns true but doesn't actually do anything.) + **/ + public final static String resolveDTDURIsFeature = + "http://xml.org/sax/features/resolve-dtd-uris"; + + /** + Has a value of "true" if all XML names (for elements, + prefixes, attributes, entities, notations, and local + names), as well as Namespace URIs, will have been interned + using java.lang.String.intern. This supports fast testing of + equality/inequality against string constants, rather than forcing + slower calls to String.equals(). (We always intern.) + **/ + public final static String stringInterningFeature = + "http://xml.org/sax/features/string-interning"; + + /** + Returns "true" if the Attributes objects passed by this + parser in ContentHandler.startElement() implement the + org.xml.sax.ext.Attributes2 interface. (They don't.) + **/ + + public final static String useAttributes2Feature = + "http://xml.org/sax/features/use-attributes2"; + + /** + Returns "true" if the Locator objects passed by this parser + in ContentHandler.setDocumentLocator() implement the + org.xml.sax.ext.Locator2 interface. (They don't.) + **/ + public final static String useLocator2Feature = + "http://xml.org/sax/features/use-locator2"; + + /** + Returns "true" if, when setEntityResolver is given an object + implementing the org.xml.sax.ext.EntityResolver2 interface, + those new methods will be used. (They won't be.) + **/ + public final static String useEntityResolver2Feature = + "http://xml.org/sax/features/use-entity-resolver2"; + + /** + Controls whether the parser is reporting all validity errors + (We don't report any validity errors.) + **/ + public final static String validationFeature = + "http://xml.org/sax/features/validation"; + + /** + Controls whether the parser reports Unicode normalization + errors as described in section 2.13 and Appendix B of the XML + 1.1 Recommendation. (We don't normalize.) + **/ + public final static String unicodeNormalizationCheckingFeature = +"http://xml.org/sax/features/unicode-normalization-checking"; + + /** + Controls whether, when the namespace-prefixes feature is set, + the parser treats namespace declaration attributes as being in + the http://www.w3.org/2000/xmlns/ namespace. (It doesn't.) + **/ + public final static String xmlnsURIsFeature = + "http://xml.org/sax/features/xmlns-uris"; + + /** + Returns "true" if the parser supports both XML 1.1 and XML 1.0. + (Always false.) + **/ + public final static String XML11Feature = + "http://xml.org/sax/features/xml-1.1"; + + /** + A value of "true" indicates that the parser will ignore + unknown elements. + **/ + public final static String ignoreBogonsFeature = + "http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons"; + + /** + A value of "true" indicates that the parser will give unknown + elements a content model of EMPTY; a value of "false", a + content model of ANY. + **/ + public final static String bogonsEmptyFeature = + "http://www.ccil.org/~cowan/tagsoup/features/bogons-empty"; + + /** + A value of "true" indicates that the parser will allow unknown + elements to be the root element. + **/ + public final static String rootBogonsFeature = + "http://www.ccil.org/~cowan/tagsoup/features/root-bogons"; + + /** + A value of "true" indicates that the parser will return default + attribute values for missing attributes that have default values. + **/ + public final static String defaultAttributesFeature = + "http://www.ccil.org/~cowan/tagsoup/features/default-attributes"; + + /** + A value of "true" indicates that the parser will + translate colons into underscores in names. + **/ + public final static String translateColonsFeature = + "http://www.ccil.org/~cowan/tagsoup/features/translate-colons"; + + /** + A value of "true" indicates that the parser will + attempt to restart the restartable elements. + **/ + public final static String restartElementsFeature = + "http://www.ccil.org/~cowan/tagsoup/features/restart-elements"; + + /** + A value of "true" indicates that the parser will + transmit whitespace in element-only content via the SAX + ignorableWhitespace callback. Normally this is not done, + because HTML is an SGML application and SGML suppresses + such whitespace. + **/ + public final static String ignorableWhitespaceFeature = + "http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace"; + + /** + A value of "true" indicates that the parser will treat CDATA + elements specially. Normally true, since the input is by + default HTML. + **/ + public final static String CDATAElementsFeature = + "http://www.ccil.org/~cowan/tagsoup/features/cdata-elements"; + + /** + Used to see some syntax events that are essential in some + applications: comments, CDATA delimiters, selected general + entity inclusions, and the start and end of the DTD (and + declaration of document element name). The Object must implement + org.xml.sax.ext.LexicalHandler. + **/ + public final static String lexicalHandlerProperty = + "http://xml.org/sax/properties/lexical-handler"; + + /** + Specifies the Scanner object this Parser uses. + **/ + public final static String scannerProperty = + "http://www.ccil.org/~cowan/tagsoup/properties/scanner"; + + /** + Specifies the Schema object this Parser uses. + **/ + public final static String schemaProperty = + "http://www.ccil.org/~cowan/tagsoup/properties/schema"; + + /** + Specifies the AutoDetector (for encoding detection) this Parser uses. + **/ + public final static String autoDetectorProperty = + "http://www.ccil.org/~cowan/tagsoup/properties/auto-detector"; + + // Due to sucky Java order of initialization issues, these + // entries are maintained separately from the initial values of + // the corresponding instance variables, but care must be taken + // to keep them in sync. + + private HashMap theFeatures = new HashMap(); + { + theFeatures.put(namespacesFeature, truthValue(DEFAULT_NAMESPACES)); + theFeatures.put(namespacePrefixesFeature, Boolean.FALSE); + theFeatures.put(externalGeneralEntitiesFeature, Boolean.FALSE); + theFeatures.put(externalParameterEntitiesFeature, Boolean.FALSE); + theFeatures.put(isStandaloneFeature, Boolean.FALSE); + theFeatures.put(lexicalHandlerParameterEntitiesFeature, + Boolean.FALSE); + theFeatures.put(resolveDTDURIsFeature, Boolean.TRUE); + theFeatures.put(stringInterningFeature, Boolean.TRUE); + theFeatures.put(useAttributes2Feature, Boolean.FALSE); + theFeatures.put(useLocator2Feature, Boolean.FALSE); + theFeatures.put(useEntityResolver2Feature, Boolean.FALSE); + theFeatures.put(validationFeature, Boolean.FALSE); + theFeatures.put(xmlnsURIsFeature, Boolean.FALSE); + theFeatures.put(xmlnsURIsFeature, Boolean.FALSE); + theFeatures.put(XML11Feature, Boolean.FALSE); + theFeatures.put(ignoreBogonsFeature, truthValue(DEFAULT_IGNORE_BOGONS)); + theFeatures.put(bogonsEmptyFeature, truthValue(DEFAULT_BOGONS_EMPTY)); + theFeatures.put(rootBogonsFeature, truthValue(DEFAULT_ROOT_BOGONS)); + theFeatures.put(defaultAttributesFeature, truthValue(DEFAULT_DEFAULT_ATTRIBUTES)); + theFeatures.put(translateColonsFeature, truthValue(DEFAULT_TRANSLATE_COLONS)); + theFeatures.put(restartElementsFeature, truthValue(DEFAULT_RESTART_ELEMENTS)); + theFeatures.put(ignorableWhitespaceFeature, truthValue(DEFAULT_IGNORABLE_WHITESPACE)); + theFeatures.put(CDATAElementsFeature, truthValue(DEFAULT_CDATA_ELEMENTS)); + } + + // Private clone of Boolean.valueOf that is guaranteed to return + // Boolean.TRUE or Boolean.FALSE + private static Boolean truthValue(boolean b) { + return b ? Boolean.TRUE : Boolean.FALSE; + } + + + public boolean getFeature (String name) + throws SAXNotRecognizedException, SAXNotSupportedException { + Boolean b = (Boolean)theFeatures.get(name); + if (b == null) { + throw new SAXNotRecognizedException("Unknown feature " + name); + } + return b.booleanValue(); + } + + public void setFeature (String name, boolean value) + throws SAXNotRecognizedException, SAXNotSupportedException { + Boolean b = (Boolean)theFeatures.get(name); + if (b == null) { + throw new SAXNotRecognizedException("Unknown feature " + name); + } + if (value) theFeatures.put(name, Boolean.TRUE); + else theFeatures.put(name, Boolean.FALSE); + + if (name.equals(namespacesFeature)) namespaces = value; + else if (name.equals(ignoreBogonsFeature)) ignoreBogons = value; + else if (name.equals(bogonsEmptyFeature)) bogonsEmpty = value; + else if (name.equals(rootBogonsFeature)) rootBogons = value; + else if (name.equals(defaultAttributesFeature)) defaultAttributes = value; + else if (name.equals(translateColonsFeature)) translateColons = value; + else if (name.equals(restartElementsFeature)) restartElements = value; + else if (name.equals(ignorableWhitespaceFeature)) ignorableWhitespace = value; + else if (name.equals(CDATAElementsFeature)) CDATAElements = value; + } + + public Object getProperty (String name) + throws SAXNotRecognizedException, SAXNotSupportedException { + if (name.equals(lexicalHandlerProperty)) { + return theLexicalHandler == this ? null : theLexicalHandler; + } + else if (name.equals(scannerProperty)) { + return theScanner; + } + else if (name.equals(schemaProperty)) { + return theSchema; + } + else if (name.equals(autoDetectorProperty)) { + return theAutoDetector; + } + else { + throw new SAXNotRecognizedException("Unknown property " + name); + } + } + + public void setProperty (String name, Object value) + throws SAXNotRecognizedException, SAXNotSupportedException { + if (name.equals(lexicalHandlerProperty)) { + if (value == null) { + theLexicalHandler = this; + } + else if (value instanceof LexicalHandler) { + theLexicalHandler = (LexicalHandler)value; + } + else { + throw new SAXNotSupportedException("Your lexical handler is not a LexicalHandler"); + } + } + else if (name.equals(scannerProperty)) { + if (value instanceof Scanner) { + theScanner = (Scanner)value; + } + else { + throw new SAXNotSupportedException("Your scanner is not a Scanner"); + } + } + else if (name.equals(schemaProperty)) { + if (value instanceof Schema) { + theSchema = (Schema)value; + } + else { + throw new SAXNotSupportedException("Your schema is not a Schema"); + } + } + else if (name.equals(autoDetectorProperty)) { + if (value instanceof AutoDetector) { + theAutoDetector = (AutoDetector)value; + } + else { + throw new SAXNotSupportedException("Your auto-detector is not an AutoDetector"); + } + } + else { + throw new SAXNotRecognizedException("Unknown property " + name); + } + } + + public void setEntityResolver (EntityResolver resolver) { + theEntityResolver = (resolver == null) ? this : resolver; + } + + public EntityResolver getEntityResolver () { + return (theEntityResolver == this) ? null : theEntityResolver; + } + + public void setDTDHandler (DTDHandler handler) { + theDTDHandler = (handler == null) ? this : handler; + } + + public DTDHandler getDTDHandler () { + return (theDTDHandler == this) ? null : theDTDHandler; + } + + public void setContentHandler (ContentHandler handler) { + theContentHandler = (handler == null) ? this : handler; + } + + public ContentHandler getContentHandler () { + return (theContentHandler == this) ? null : theContentHandler; + } + + public void setErrorHandler (ErrorHandler handler) { + theErrorHandler = (handler == null) ? this : handler; + } + + public ErrorHandler getErrorHandler () { + return (theErrorHandler == this) ? null : theErrorHandler; + } + + public void parse (InputSource input) throws IOException, SAXException { + setup(); + Reader r = getReader(input); + theContentHandler.startDocument(); + theScanner.resetDocumentLocator(input.getPublicId(), input.getSystemId()); + if (theScanner instanceof Locator) { + theContentHandler.setDocumentLocator((Locator)theScanner); + } + if (!(theSchema.getURI().equals(""))) + theContentHandler.startPrefixMapping(theSchema.getPrefix(), + theSchema.getURI()); + theScanner.scan(r, this); + } + + public void parse (String systemid) throws IOException, SAXException { + parse(new InputSource(systemid)); + } + + // Sets up instance variables that haven't been set by setFeature + private void setup() { + if (theSchema == null) theSchema = new HTMLSchema(); + if (theScanner == null) theScanner = new HTMLScanner(); + if (theAutoDetector == null) { + theAutoDetector = new AutoDetector() { + public Reader autoDetectingReader(InputStream i) { + return new InputStreamReader(i); + } + }; + } + theStack = new Element(theSchema.getElementType(""), defaultAttributes); + thePCDATA = new Element(theSchema.getElementType(""), defaultAttributes); + theNewElement = null; + theAttributeName = null; + thePITarget = null; + theSaved = null; + theEntity = 0; + virginStack = true; + theDoctypeName = theDoctypePublicId = theDoctypeSystemId = null; + } + + // Return a Reader based on the contents of an InputSource + // Buffer both the InputStream and the Reader + private Reader getReader(InputSource s) throws SAXException, IOException { + Reader r = s.getCharacterStream(); + InputStream i = s.getByteStream(); + String encoding = s.getEncoding(); + String publicid = s.getPublicId(); + String systemid = s.getSystemId(); + if (r == null) { + if (i == null) i = getInputStream(publicid, systemid); +// i = new BufferedInputStream(i); + if (encoding == null) { + r = theAutoDetector.autoDetectingReader(i); + } + else { + try { + r = new InputStreamReader(i, encoding); + } + catch (UnsupportedEncodingException e) { + r = new InputStreamReader(i); + } + } + } +// r = new BufferedReader(r); + return r; + } + + // Get an InputStream based on a publicid and a systemid + private InputStream getInputStream(String publicid, String systemid) throws IOException, SAXException { + URL basis = new URL("file", "", System.getProperty("user.dir") + "/."); + URL url = new URL(basis, systemid); + URLConnection c = url.openConnection(); + return c.getInputStream(); + } + // We don't process publicids (who uses them anyhow?) + + // ScanHandler implementation + + private Element theNewElement = null; + private String theAttributeName = null; + private boolean theDoctypeIsPresent = false; + private String theDoctypePublicId = null; + private String theDoctypeSystemId = null; + private String theDoctypeName = null; + private String thePITarget = null; + private Element theStack = null; + private Element theSaved = null; + private Element thePCDATA = null; + private int theEntity = 0; // needs to support chars past U+FFFF + + public void adup(char[] buff, int offset, int length) throws SAXException { + if (theNewElement == null || theAttributeName == null) return; + theNewElement.setAttribute(theAttributeName, null, theAttributeName); + theAttributeName = null; + } + + public void aname(char[] buff, int offset, int length) throws SAXException { + if (theNewElement == null) return; + // Currently we don't rely on Schema to canonicalize + // attribute names. + theAttributeName = makeName(buff, offset, length).toLowerCase(); +// System.err.println("%% Attribute name " + theAttributeName); + } + + public void aval(char[] buff, int offset, int length) throws SAXException { + if (theNewElement == null || theAttributeName == null) return; + String value = new String(buff, offset, length); +// System.err.println("%% Attribute value [" + value + "]"); + value = expandEntities(value); + theNewElement.setAttribute(theAttributeName, null, value); + theAttributeName = null; +// System.err.println("%% Aval done"); + } + + // Expand entity references in attribute values selectively. + // Currently we expand a reference iff it is properly terminated + // with a semicolon. + private String expandEntities(String src) { + int refStart = -1; + int len = src.length(); + char[] dst = new char[len]; + int dstlen = 0; + for (int i = 0; i < len; i++) { + char ch = src.charAt(i); + dst[dstlen++] = ch; +// System.err.print("i = " + i + ", d = " + dstlen + ", ch = [" + ch + "] "); + if (ch == '&' && refStart == -1) { + // start of a ref excluding & + refStart = dstlen; +// System.err.println("start of ref"); + } + else if (refStart == -1) { + // not in a ref +// System.err.println("not in ref"); + } + else if (Character.isLetter(ch) || + Character.isDigit(ch) || + ch == '#') { + // valid entity char +// System.err.println("valid"); + } + else if (ch == ';') { + // properly terminated ref +// System.err.print("got [" + new String(dst, refStart, dstlen-refStart-1) + "]"); + int ent = lookupEntity(dst, refStart, dstlen - refStart - 1); +// System.err.println(" = " + ent); + if (ent > 0xFFFF) { + ent -= 0x10000; + dst[refStart - 1] = (char)((ent>>10) + 0xD800); + dst[refStart] = (char)((ent&0x3FF) + 0xDC00); + dstlen = refStart + 1; + } + else if (ent != 0) { + dst[refStart - 1] = (char)ent; + dstlen = refStart; + } + refStart = -1; + } + else { + // improperly terminated ref +// System.err.println("end of ref"); + refStart = -1; + } + } + return new String(dst, 0, dstlen); + } + + public void entity(char[] buff, int offset, int length) throws SAXException { + theEntity = lookupEntity(buff, offset, length); + } + + // Process numeric character references, + // deferring to the schema for named ones. + private int lookupEntity(char[] buff, int offset, int length) { + int result = 0; + if (length < 1) return result; +// System.err.println("%% Entity at " + offset + " " + length); +// System.err.println("%% Got entity [" + new String(buff, offset, length) + "]"); + if (buff[offset] == '#') { + if (length > 1 && (buff[offset+1] == 'x' + || buff[offset+1] == 'X')) { + try { + return Integer.parseInt(new String(buff, offset + 2, length - 2), 16); + } + catch (NumberFormatException e) { return 0; } + } + try { + return Integer.parseInt(new String(buff, offset + 1, length - 1), 10); + } + catch (NumberFormatException e) { return 0; } + } + return theSchema.getEntity(new String(buff, offset, length)); + } + + public void eof(char[] buff, int offset, int length) throws SAXException { + if (virginStack) rectify(thePCDATA); + while (theStack.next() != null) { + pop(); + } + if (!(theSchema.getURI().equals(""))) + theContentHandler.endPrefixMapping(theSchema.getPrefix()); + theContentHandler.endDocument(); + } + + public void etag(char[] buff, int offset, int length) throws SAXException { + if (etag_cdata(buff, offset, length)) return; + etag_basic(buff, offset, length); + } + + private static char[] etagchars = {'<', '/', '>'}; + public boolean etag_cdata(char[] buff, int offset, int length) throws SAXException { + String currentName = theStack.name(); + // If this is a CDATA element and the tag doesn't match, + // or isn't properly formed (junk after the name), + // restart CDATA mode and process the tag as characters. + if (CDATAElements && (theStack.flags() & Schema.F_CDATA) != 0) { + boolean realTag = (length == currentName.length()); + if (realTag) { + for (int i = 0; i < length; i++) { + if (Character.toLowerCase(buff[offset + i]) != Character.toLowerCase(currentName.charAt(i))) { + realTag = false; + break; + } + } + } + if (!realTag) { + theContentHandler.characters(etagchars, 0, 2); + theContentHandler.characters(buff, offset, length); + theContentHandler.characters(etagchars, 2, 1); + theScanner.startCDATA(); + return true; + } + } + return false; + } + + public void etag_basic(char[] buff, int offset, int length) throws SAXException { + theNewElement = null; + String name; + if (length != 0) { + // Canonicalize case of name + name = makeName(buff, offset, length); +// System.err.println("got etag [" + name + "]"); + ElementType type = theSchema.getElementType(name); + if (type == null) return; // mysterious end-tag + name = type.name(); + } + else { + name = theStack.name(); + } +// System.err.println("%% Got end of " + name); + + Element sp; + boolean inNoforce = false; + for (sp = theStack; sp != null; sp = sp.next()) { + if (sp.name().equals(name)) break; + if ((sp.flags() & Schema.F_NOFORCE) != 0) inNoforce = true; + } + + if (sp == null) return; // Ignore unknown etags + if (sp.next() == null || sp.next().next() == null) return; + if (inNoforce) { // inside an F_NOFORCE element? + sp.preclose(); // preclose the matching element + } + else { // restartably pop everything above us + while (theStack != sp) { + restartablyPop(); + } + pop(); + } + // pop any preclosed elements now at the top + while (theStack.isPreclosed()) { + pop(); + } + restart(null); + } + + // Push restartables on the stack if possible + // e is the next element to be started, if we know what it is + private void restart(Element e) throws SAXException { + while (theSaved != null && theStack.canContain(theSaved) && + (e == null || theSaved.canContain(e))) { + Element next = theSaved.next(); + push(theSaved); + theSaved = next; + } + } + + // Pop the stack irrevocably + private void pop() throws SAXException { + if (theStack == null) return; // empty stack + String name = theStack.name(); + String localName = theStack.localName(); + String namespace = theStack.namespace(); + String prefix = prefixOf(name); + +// System.err.println("%% Popping " + name); + if (!namespaces) namespace = localName = ""; + theContentHandler.endElement(namespace, localName, name); + if (foreign(prefix, namespace)) { + theContentHandler.endPrefixMapping(prefix); +// System.err.println("%% Unmapping [" + prefix + "] for elements to " + namespace); + } + Attributes atts = theStack.atts(); + for (int i = atts.getLength() - 1; i >= 0; i--) { + String attNamespace = atts.getURI(i); + String attPrefix = prefixOf(atts.getQName(i)); + if (foreign(attPrefix, attNamespace)) { + theContentHandler.endPrefixMapping(attPrefix); +// System.err.println("%% Unmapping [" + attPrefix + "] for attributes to " + attNamespace); + } + } + theStack = theStack.next(); + } + + // Pop the stack restartably + private void restartablyPop() throws SAXException { + Element popped = theStack; + pop(); + if (restartElements && (popped.flags() & Schema.F_RESTART) != 0) { + popped.anonymize(); + popped.setNext(theSaved); + theSaved = popped; + } + } + + // Push element onto stack + private boolean virginStack = true; + private void push(Element e) throws SAXException { + String name = e.name(); + String localName = e.localName(); + String namespace = e.namespace(); + String prefix = prefixOf(name); + +// System.err.println("%% Pushing " + name); + e.clean(); + if (!namespaces) namespace = localName = ""; + if (virginStack && localName.equalsIgnoreCase(theDoctypeName)) { + try { + theEntityResolver.resolveEntity(theDoctypePublicId, theDoctypeSystemId); + } catch (IOException ew) { } // Can't be thrown for root I believe. + } + if (foreign(prefix, namespace)) { + theContentHandler.startPrefixMapping(prefix, namespace); +// System.err.println("%% Mapping [" + prefix + "] for elements to " + namespace); + } + Attributes atts = e.atts(); + int len = atts.getLength(); + for (int i = 0; i < len; i++) { + String attNamespace = atts.getURI(i); + String attPrefix = prefixOf(atts.getQName(i)); + if (foreign(attPrefix, attNamespace)) { + theContentHandler.startPrefixMapping(attPrefix, attNamespace); +// System.err.println("%% Mapping [" + attPrefix + "] for attributes to " + attNamespace); + } + } + theContentHandler.startElement(namespace, localName, name, e.atts()); + e.setNext(theStack); + theStack = e; + virginStack = false; + if (CDATAElements && (theStack.flags() & Schema.F_CDATA) != 0) { + theScanner.startCDATA(); + } + } + + // Get the prefix from a QName + private String prefixOf(String name) { + int i = name.indexOf(':'); + String prefix = ""; + if (i != -1) prefix = name.substring(0, i); +// System.err.println("%% " + prefix + " is prefix of " + name); + return prefix; + } + + // Return true if we have a foreign name + private boolean foreign(String prefix, String namespace) { +// System.err.print("%% Testing " + prefix + " and " + namespace + " for foreignness -- "); + boolean foreign = !(prefix.equals("") || namespace.equals("") || + namespace.equals(theSchema.getURI())); +// System.err.println(foreign); + return foreign; + } + + /** + * Parsing the complete XML Document Type Definition is way too complex, + * but for many simple cases we can extract something useful from it. + * + * doctypedecl ::= '' + * DeclSep ::= PEReference | S + * intSubset ::= (markupdecl | DeclSep)* + * markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment + * ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral + */ + public void decl(char[] buff, int offset, int length) throws SAXException { + String s = new String(buff, offset, length); + String name = null; + String systemid = null; + String publicid = null; + String[] v = split(s); + if (v.length > 0 && "DOCTYPE".equals(v[0])) { + if (theDoctypeIsPresent) return; // one doctype only! + theDoctypeIsPresent = true; + if (v.length > 1) { + name = v[1]; + if (v.length>3 && "SYSTEM".equals(v[2])) { + systemid = v[3]; + } + else if (v.length > 3 && "PUBLIC".equals(v[2])) { + publicid = v[3]; + if (v.length > 4) { + systemid = v[4]; + } + else { + systemid = ""; + } + } + } + } + publicid = trimquotes(publicid); + systemid = trimquotes(systemid); + if (name != null) { + publicid = cleanPublicid(publicid); + theLexicalHandler.startDTD(name, publicid, systemid); + theLexicalHandler.endDTD(); + theDoctypeName = name; + theDoctypePublicId = publicid; + if (theScanner instanceof Locator) { // Must resolve systemid + theDoctypeSystemId = ((Locator)theScanner).getSystemId(); + try { + theDoctypeSystemId = new URL(new URL(theDoctypeSystemId), systemid).toString(); + } catch (Exception e) {} + } + } + } + + // If the String is quoted, trim the quotes. + private static String trimquotes(String in) { + if (in == null) return in; + int length = in.length(); + if (length == 0) return in; + char s = in.charAt(0); + char e = in.charAt(length - 1); + if (s == e && (s == '\'' || s == '"')) { + in = in.substring(1, in.length() - 1); + } + return in; + } + + // Split the supplied String into words or phrases seperated by spaces. + // Recognises quotes around a phrase and doesn't split it. + private static String[] split(String val) throws IllegalArgumentException { + val = val.trim(); + if (val.length() == 0) { + return new String[0]; + } + else { + ArrayList l = new ArrayList(); + int s = 0; + int e = 0; + boolean sq = false; // single quote + boolean dq = false; // double quote + char lastc = 0; + int len = val.length(); + for (e=0; e < len; e++) { + char c = val.charAt(e); + if (!dq && c == '\'' && lastc != '\\') { + sq = !sq; + if (s < 0) s = e; + } + else if (!sq && c == '\"' && lastc != '\\') { + dq = !dq; + if (s < 0) s = e; + } + else if (!sq && !dq) { + if (Character.isWhitespace(c)) { + if (s >= 0) l.add(val.substring(s, e)); + s = -1; + } + else if (s < 0 && c != ' ') { + s = e; + } + } + lastc = c; + } + l.add(val.substring(s, e)); + return (String[])l.toArray(new String[0]); + } + } + + // Replace junk in publicids with spaces + private static String legal = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-'()+,./:=?;!*#@$_%"; + + private String cleanPublicid(String src) { + if (src == null) return null; + int len = src.length(); + StringBuffer dst = new StringBuffer(len); + boolean suppressSpace = true; + for (int i = 0; i < len; i++) { + char ch = src.charAt(i); + if (legal.indexOf(ch) != -1) { // legal but not whitespace + dst.append(ch); + suppressSpace = false; + } + else if (suppressSpace) { // normalizable whitespace or junk + ; + } + else { + dst.append(' '); + suppressSpace = true; + } + } +// System.err.println("%% Publicid [" + dst.toString().trim() + "]"); + return dst.toString().trim(); // trim any final junk whitespace + } + + + public void gi(char[] buff, int offset, int length) throws SAXException { + if (theNewElement != null) return; + String name = makeName(buff, offset, length); + if (name == null) return; + ElementType type = theSchema.getElementType(name); + if (type == null) { + // Suppress unknown elements if ignore-bogons is on + if (ignoreBogons) return; + int bogonModel = bogonsEmpty ? Schema.M_EMPTY : Schema.M_ANY; + int bogonMemberOf = rootBogons ? Schema.M_ANY : (Schema.M_ANY & ~ Schema.M_ROOT); + theSchema.elementType(name, bogonModel, bogonMemberOf, 0); + if (!rootBogons) theSchema.parent(name, theSchema.rootElementType().name()); + type = theSchema.getElementType(name); + } + + theNewElement = new Element(type, defaultAttributes); +// System.err.println("%% Got GI " + theNewElement.name()); + } + + public void cdsect(char[] buff, int offset, int length) throws SAXException { + theLexicalHandler.startCDATA(); + pcdata(buff, offset, length); + theLexicalHandler.endCDATA(); + } + public void pcdata(char[] buff, int offset, int length) throws SAXException { + if (length == 0) return; + boolean allWhite = true; + for (int i = 0; i < length; i++) { + if (!Character.isWhitespace(buff[offset+i])) { + allWhite = false; + } + } + if (allWhite && !theStack.canContain(thePCDATA)) { + if (ignorableWhitespace) { + theContentHandler.ignorableWhitespace(buff, offset, length); + } + } + else { + rectify(thePCDATA); + theContentHandler.characters(buff, offset, length); + } + } + + public void pitarget(char[] buff, int offset, int length) throws SAXException { + if (theNewElement != null) return; + thePITarget = makeName(buff, offset, length).replace(':', '_'); + } + + public void pi(char[] buff, int offset, int length) throws SAXException { + if (theNewElement != null || thePITarget == null) return; + if ("xml".equalsIgnoreCase(thePITarget)) return; +// if (length > 0 && buff[length - 1] == '?') System.err.println("%% Removing ? from PI"); + if (length > 0 && buff[length - 1] == '?') length--; // remove trailing ? + theContentHandler.processingInstruction(thePITarget, + new String(buff, offset, length)); + thePITarget = null; + } + + public void stagc(char[] buff, int offset, int length) throws SAXException { +// System.err.println("%% Start-tag"); + if (theNewElement == null) return; + rectify(theNewElement); + if (theStack.model() == Schema.M_EMPTY) { + // Force an immediate end tag + etag_basic(buff, offset, length); + } + } + + public void stage(char[] buff, int offset, int length) throws SAXException { +// System.err.println("%% Empty-tag"); + if (theNewElement == null) return; + rectify(theNewElement); + // Force an immediate end tag + etag_basic(buff, offset, length); + } + + // Comment buffer is twice the size of the output buffer + private char[] theCommentBuffer = new char[2000]; + public void cmnt(char[] buff, int offset, int length) throws SAXException { + theLexicalHandler.comment(buff, offset, length); + } + + // Rectify the stack, pushing and popping as needed + // so that the argument can be safely pushed + private void rectify(Element e) throws SAXException { + Element sp; + while (true) { + for (sp = theStack; sp != null; sp = sp.next()) { + if (sp.canContain(e)) break; + } + if (sp != null) break; + ElementType parentType = e.parent(); + if (parentType == null) break; + Element parent = new Element(parentType, defaultAttributes); +// System.err.println("%% Ascending from " + e.name() + " to " + parent.name()); + parent.setNext(e); + e = parent; + } + if (sp == null) return; // don't know what to do + while (theStack != sp) { + if (theStack == null || theStack.next() == null || + theStack.next().next() == null) break; + restartablyPop(); + } + while (e != null) { + Element nexte = e.next(); + if (!e.name().equals("")) push(e); + e = nexte; + restart(e); + } + theNewElement = null; + } + + public int getEntity() { + return theEntity; + } + + // Return the argument as a valid XML name + // This no longer lowercases the result: we depend on Schema to + // canonicalize case. + private String makeName(char[] buff, int offset, int length) { + StringBuffer dst = new StringBuffer(length + 2); + boolean seenColon = false; + boolean start = true; +// String src = new String(buff, offset, length); // DEBUG + for (; length-- > 0; offset++) { + char ch = buff[offset]; + if (Character.isLetter(ch) || ch == '_') { + start = false; + dst.append(ch); + } + else if (Character.isDigit(ch) || ch == '-' || ch == '.') { + if (start) dst.append('_'); + start = false; + dst.append(ch); + } + else if (ch == ':' && !seenColon) { + seenColon = true; + if (start) dst.append('_'); + start = true; + dst.append(translateColons ? '_' : ch); + } + } + int dstLength = dst.length(); + if (dstLength == 0 || dst.charAt(dstLength - 1) == ':') dst.append('_'); +// System.err.println("Made name \"" + dst + "\" from \"" + src + "\""); + return dst.toString().intern(); + } + + // Default LexicalHandler implementation + + public void comment(char[] ch, int start, int length) throws SAXException { } + public void endCDATA() throws SAXException { } + public void endDTD() throws SAXException { } + public void endEntity(String name) throws SAXException { } + public void startCDATA() throws SAXException { } + public void startDTD(String name, String publicid, String systemid) throws SAXException { } + public void startEntity(String name) throws SAXException { } + + } diff --git a/src/org/ccil/cowan/tagsoup/ScanHandler.java b/src/org/ccil/cowan/tagsoup/ScanHandler.java new file mode 100644 index 0000000..368569a --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/ScanHandler.java @@ -0,0 +1,119 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// Scanner handler + +package org.ccil.cowan.tagsoup; +import org.xml.sax.SAXException; + +/** +An interface that Scanners use to report events in the input stream. +**/ + +public interface ScanHandler { + /** + Reports an attribute name without a value. + **/ + + public void adup(char[] buff, int offset, int length) throws SAXException; + + /** + Reports an attribute name; a value will follow. + **/ + + public void aname(char[] buff, int offset, int length) throws SAXException; + + /** + Reports an attribute value. + **/ + + public void aval(char[] buff, int offset, int length) throws SAXException; + + /** + * Reports the content of a CDATA section (not a CDATA element) + */ + public void cdsect(char[] buff, int offset, int length) throws SAXException; + + /** + * Reports a declaration - typically a DOCTYPE + */ + + public void decl(char[] buff, int offset, int length) throws SAXException; + + /** + Reports an entity reference or character reference. + **/ + + public void entity(char[] buff, int offset, int length) throws SAXException; + + /** + Reports EOF. + **/ + + public void eof(char[] buff, int offset, int length) throws SAXException; + + /** + Reports an end-tag. + **/ + + public void etag(char[] buff, int offset, int length) throws SAXException; + + /** + Reports the general identifier (element type name) of a start-tag. + **/ + + public void gi(char[] buff, int offset, int length) throws SAXException; + + /** + Reports character content. + **/ + + public void pcdata(char[] buff, int offset, int length) throws SAXException; + + /** + Reports the data part of a processing instruction. + **/ + + public void pi(char[] buff, int offset, int length) throws SAXException; + + /** + Reports the target part of a processing instruction. + **/ + + public void pitarget(char[] buff, int offset, int length) throws SAXException; + + /** + Reports the close of a start-tag. + **/ + + public void stagc(char[] buff, int offset, int length) throws SAXException; + + /** + Reports the close of an empty-tag. + **/ + + public void stage(char[] buff, int offset, int length) throws SAXException; + + /** + Reports a comment. + **/ + + public void cmnt(char[] buff, int offset, int length) throws SAXException; + + /** + Returns the value of the last entity or character reference reported. + **/ + + public int getEntity(); + } diff --git a/src/org/ccil/cowan/tagsoup/Scanner.java b/src/org/ccil/cowan/tagsoup/Scanner.java new file mode 100644 index 0000000..04c8b97 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/Scanner.java @@ -0,0 +1,50 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// Scanner + +package org.ccil.cowan.tagsoup; +import java.io.IOException; +import java.io.Reader; +import org.xml.sax.SAXException; + +/** +An interface allowing Parser to invoke scanners. +**/ + +public interface Scanner { + + /** + Invoke a scanner. + @param r A source of characters to scan + @param h A ScanHandler to report events to + **/ + + public void scan(Reader r, ScanHandler h) throws IOException, SAXException; + + /** + Reset the embedded locator. + @param publicid The publicid of the source + @param systemid The systemid of the source + **/ + + public void resetDocumentLocator(String publicid, String systemid); + + /** + Signal to the scanner to start CDATA content mode. + **/ + + public void startCDATA(); + + } diff --git a/src/org/ccil/cowan/tagsoup/Schema.java b/src/org/ccil/cowan/tagsoup/Schema.java new file mode 100644 index 0000000..0d99a23 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/Schema.java @@ -0,0 +1,170 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// Model of document + +package org.ccil.cowan.tagsoup; +import java.util.HashMap; + +/** +Abstract class representing a TSSL schema. +Actual TSSL schemas are compiled into concrete subclasses of this class. +**/ + +public abstract class Schema { + + public static final int M_ANY = 0xFFFFFFFF; + public static final int M_EMPTY = 0; + public static final int M_PCDATA = 1 << 30; + public static final int M_ROOT = 1 << 31; + + + public static final int F_RESTART = 1; + public static final int F_CDATA = 2; + public static final int F_NOFORCE = 4; + + private HashMap theEntities = + new HashMap(); // String -> Character + private HashMap theElementTypes = + new HashMap(); // String -> ElementType + + private String theURI = ""; + private String thePrefix = ""; + private ElementType theRoot = null; + + /** + Add or replace an element type for this schema. + @param name Name (Qname) of the element + @param model Models of the element's content as a vector of bits + @param memberOf Models the element is a member of as a vector of bits + @param flags Flags for the element + **/ + + public void elementType(String name, int model, int memberOf, int flags) { + ElementType e = new ElementType(name, model, memberOf, flags, this); + theElementTypes.put(name.toLowerCase(), e); + if (memberOf == M_ROOT) theRoot = e; + } + + /** + Get the root element of this schema + **/ + + public ElementType rootElementType() { + return theRoot; + } + + /** + Add or replace a default attribute for an element type in this schema. + @param elemName Name (Qname) of the element type + @param attrName Name (Qname) of the attribute + @param type Type of the attribute + @param value Default value of the attribute; null if no default + **/ + + public void attribute(String elemName, String attrName, + String type, String value) { + ElementType e = getElementType(elemName); + if (e == null) { + throw new Error("Attribute " + attrName + + " specified for unknown element type " + + elemName); + } + e.setAttribute(attrName, type, value); + } + + /** + Specify natural parent of an element in this schema. + @param name Name of the child element + @param parentName Name of the parent element + **/ + + public void parent(String name, String parentName) { + ElementType child = getElementType(name); + ElementType parent = getElementType(parentName); + if (child == null) { + throw new Error("No child " + name + " for parent " + parentName); + } + if (parent == null) { + throw new Error("No parent " + parentName + " for child " + name); + } + child.setParent(parent); + } + + /** + Add to or replace a character entity in this schema. + @param name Name of the entity + @param value Value of the entity + **/ + + public void entity(String name, int value) { + theEntities.put(name, new Integer(value)); + } + + /** + Get an ElementType by name. + @param name Name (Qname) of the element type + @return The corresponding ElementType + **/ + + public ElementType getElementType(String name) { + return (ElementType)(theElementTypes.get(name.toLowerCase())); + } + + /** + Get an entity value by name. + @param name Name of the entity + @return The corresponding character, or 0 if none + **/ + + public int getEntity(String name) { +// System.err.println("%% Looking up entity " + name); + Integer ch = (Integer)theEntities.get(name); + if (ch == null) return 0; + return ch.intValue(); + } + + /** + Return the URI (namespace name) of this schema. + **/ + + public String getURI() { + return theURI; + } + + /** + Return the prefix of this schema. + **/ + + public String getPrefix() { + return thePrefix; + } + + /** + Change the URI (namespace name) of this schema. + **/ + + public void setURI(String uri) { + theURI = uri; + } + + /** + Change the prefix of this schema. + **/ + + public void setPrefix(String prefix) { + thePrefix = prefix; + } + + } diff --git a/src/org/ccil/cowan/tagsoup/XMLWriter.java b/src/org/ccil/cowan/tagsoup/XMLWriter.java new file mode 100644 index 0000000..0dc7a03 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/XMLWriter.java @@ -0,0 +1,1435 @@ +// XMLWriter.java - serialize an XML document. +// Written by David Megginson, david@megginson.com +// and placed by him into the public domain. +// Extensively modified by John Cowan for TagSoup. +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +package org.ccil.cowan.tagsoup; + +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.Properties; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.AttributesImpl; +import org.xml.sax.helpers.NamespaceSupport; +import org.xml.sax.helpers.XMLFilterImpl; +import org.xml.sax.ext.LexicalHandler; + + +/** + * Filter to write an XML document from a SAX event stream. + * + *

This class can be used by itself or as part of a SAX event + * stream: it takes as input a series of SAX2 ContentHandler + * events and uses the information in those events to write + * an XML document. Since this class is a filter, it can also + * pass the events on down a filter chain for further processing + * (you can use the XMLWriter to take a snapshot of the current + * state at any point in a filter chain), and it can be + * used directly as a ContentHandler for a SAX2 XMLReader.

+ * + *

The client creates a document by invoking the methods for + * standard SAX2 events, always beginning with the + * {@link #startDocument startDocument} method and ending with + * the {@link #endDocument endDocument} method. There are convenience + * methods provided so that clients to not have to create empty + * attribute lists or provide empty strings as parameters; for + * example, the method invocation

+ * + *
+ * w.startElement("foo");
+ * 
+ * + *

is equivalent to the regular SAX2 ContentHandler method

+ * + *
+ * w.startElement("", "foo", "", new AttributesImpl());
+ * 
+ * + *

Except that it is more efficient because it does not allocate + * a new empty attribute list each time. The following code will send + * a simple XML document to standard output:

+ * + *
+ * XMLWriter w = new XMLWriter();
+ *
+ * w.startDocument();
+ * w.startElement("greeting");
+ * w.characters("Hello, world!");
+ * w.endElement("greeting");
+ * w.endDocument();
+ * 
+ * + *

The resulting document will look like this:

+ * + *
+ * <?xml version="1.0" standalone="yes"?>
+ *
+ * <greeting>Hello, world!</greeting>
+ * 
+ * + *

In fact, there is an even simpler convenience method, + * dataElement, designed for writing elements that + * contain only character data, so the code to generate the + * document could be shortened to

+ * + *
+ * XMLWriter w = new XMLWriter();
+ *
+ * w.startDocument();
+ * w.dataElement("greeting", "Hello, world!");
+ * w.endDocument();
+ * 
+ * + *

Whitespace

+ * + *

According to the XML Recommendation, all whitespace + * in an XML document is potentially significant to an application, + * so this class never adds newlines or indentation. If you + * insert three elements in a row, as in

+ * + *
+ * w.dataElement("item", "1");
+ * w.dataElement("item", "2");
+ * w.dataElement("item", "3");
+ * 
+ * + *

you will end up with

+ * + *
+ * <item>1</item><item>3</item><item>3</item>
+ * 
+ * + *

You need to invoke one of the characters methods + * explicitly to add newlines or indentation. Alternatively, you + * can use {@link com.megginson.sax.DataWriter DataWriter}, which + * is derived from this class -- it is optimized for writing + * purely data-oriented (or field-oriented) XML, and does automatic + * linebreaks and indentation (but does not support mixed content + * properly).

+ * + * + *

Namespace Support

+ * + *

The writer contains extensive support for XML Namespaces, so that + * a client application does not have to keep track of prefixes and + * supply xmlns attributes. By default, the XML writer will + * generate Namespace declarations in the form _NS1, _NS2, etc., wherever + * they are needed, as in the following example:

+ * + *
+ * w.startDocument();
+ * w.emptyElement("http://www.foo.com/ns/", "foo");
+ * w.endDocument();
+ * 
+ * + *

The resulting document will look like this:

+ * + *
+ * <?xml version="1.0" standalone="yes"?>
+ *
+ * <_NS1:foo xmlns:_NS1="http://www.foo.com/ns/"/>
+ * 
+ * + *

In many cases, document authors will prefer to choose their + * own prefixes rather than using the (ugly) default names. The + * XML writer allows two methods for selecting prefixes:

+ * + *
    + *
  1. the qualified name
  2. + *
  3. the {@link #setPrefix setPrefix} method.
  4. + *
+ * + *

Whenever the XML writer finds a new Namespace URI, it checks + * to see if a qualified (prefixed) name is also available; if so + * it attempts to use the name's prefix (as long as the prefix is + * not already in use for another Namespace URI).

+ * + *

Before writing a document, the client can also pre-map a prefix + * to a Namespace URI with the setPrefix method:

+ * + *
+ * w.setPrefix("http://www.foo.com/ns/", "foo");
+ * w.startDocument();
+ * w.emptyElement("http://www.foo.com/ns/", "foo");
+ * w.endDocument();
+ * 
+ * + *

The resulting document will look like this:

+ * + *
+ * <?xml version="1.0" standalone="yes"?>
+ *
+ * <foo:foo xmlns:foo="http://www.foo.com/ns/"/>
+ * 
+ * + *

The default Namespace simply uses an empty string as the prefix:

+ * + *
+ * w.setPrefix("http://www.foo.com/ns/", "");
+ * w.startDocument();
+ * w.emptyElement("http://www.foo.com/ns/", "foo");
+ * w.endDocument();
+ * 
+ * + *

The resulting document will look like this:

+ * + *
+ * <?xml version="1.0" standalone="yes"?>
+ *
+ * <foo xmlns="http://www.foo.com/ns/"/>
+ * 
+ * + *

By default, the XML writer will not declare a Namespace until + * it is actually used. Sometimes, this approach will create + * a large number of Namespace declarations, as in the following + * example:

+ * + *
+ * <xml version="1.0" standalone="yes"?>
+ *
+ * <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+ *  <rdf:Description about="http://www.foo.com/ids/books/12345">
+ *   <dc:title xmlns:dc="http://www.purl.org/dc/">A Dark Night</dc:title>
+ *   <dc:creator xmlns:dc="http://www.purl.org/dc/">Jane Smith</dc:title>
+ *   <dc:date xmlns:dc="http://www.purl.org/dc/">2000-09-09</dc:title>
+ *  </rdf:Description>
+ * </rdf:RDF>
+ * 
+ * + *

The "rdf" prefix is declared only once, because the RDF Namespace + * is used by the root element and can be inherited by all of its + * descendants; the "dc" prefix, on the other hand, is declared three + * times, because no higher element uses the Namespace. To solve this + * problem, you can instruct the XML writer to predeclare Namespaces + * on the root element even if they are not used there:

+ * + *
+ * w.forceNSDecl("http://www.purl.org/dc/");
+ * 
+ * + *

Now, the "dc" prefix will be declared on the root element even + * though it's not needed there, and can be inherited by its + * descendants:

+ * + *
+ * <xml version="1.0" standalone="yes"?>
+ *
+ * <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ *             xmlns:dc="http://www.purl.org/dc/">
+ *  <rdf:Description about="http://www.foo.com/ids/books/12345">
+ *   <dc:title>A Dark Night</dc:title>
+ *   <dc:creator>Jane Smith</dc:title>
+ *   <dc:date>2000-09-09</dc:title>
+ *  </rdf:Description>
+ * </rdf:RDF>
+ * 
+ * + *

This approach is also useful for declaring Namespace prefixes + * that be used by qualified names appearing in attribute values or + * character data.

+ * + * @author David Megginson, david@megginson.com + * @version 0.2 + * @see org.xml.sax.XMLFilter + * @see org.xml.sax.ContentHandler + */ +public class XMLWriter extends XMLFilterImpl implements LexicalHandler +{ + + + //////////////////////////////////////////////////////////////////// + // Constructors. + //////////////////////////////////////////////////////////////////// + + + /** + * Create a new XML writer. + * + *

Write to standard output.

+ */ + public XMLWriter () + { + init(null); + } + + + /** + * Create a new XML writer. + * + *

Write to the writer provided.

+ * + * @param writer The output destination, or null to use standard + * output. + */ + public XMLWriter (Writer writer) + { + init(writer); + } + + + /** + * Create a new XML writer. + * + *

Use the specified XML reader as the parent.

+ * + * @param xmlreader The parent in the filter chain, or null + * for no parent. + */ + public XMLWriter (XMLReader xmlreader) + { + super(xmlreader); + init(null); + } + + + /** + * Create a new XML writer. + * + *

Use the specified XML reader as the parent, and write + * to the specified writer.

+ * + * @param xmlreader The parent in the filter chain, or null + * for no parent. + * @param writer The output destination, or null to use standard + * output. + */ + public XMLWriter (XMLReader xmlreader, Writer writer) + { + super(xmlreader); + init(writer); + } + + + /** + * Internal initialization method. + * + *

All of the public constructors invoke this method. + * + * @param writer The output destination, or null to use + * standard output. + */ + private void init (Writer writer) + { + setOutput(writer); + nsSupport = new NamespaceSupport(); + prefixTable = new Hashtable(); + forcedDeclTable = new Hashtable(); + doneDeclTable = new Hashtable(); + outputProperties = new Properties(); + } + + + + //////////////////////////////////////////////////////////////////// + // Public methods. + //////////////////////////////////////////////////////////////////// + + + /** + * Reset the writer. + * + *

This method is especially useful if the writer throws an + * exception before it is finished, and you want to reuse the + * writer for a new document. It is usually a good idea to + * invoke {@link #flush flush} before resetting the writer, + * to make sure that no output is lost.

+ * + *

This method is invoked automatically by the + * {@link #startDocument startDocument} method before writing + * a new document.

+ * + *

Note: this method will not + * clear the prefix or URI information in the writer or + * the selected output writer.

+ * + * @see #flush + */ + public void reset () + { + elementLevel = 0; + prefixCounter = 0; + nsSupport.reset(); + } + + + /** + * Flush the output. + * + *

This method flushes the output stream. It is especially useful + * when you need to make certain that the entire document has + * been written to output but do not want to close the output + * stream.

+ * + *

This method is invoked automatically by the + * {@link #endDocument endDocument} method after writing a + * document.

+ * + * @see #reset + */ + public void flush () + throws IOException + { + output.flush(); + } + + + /** + * Set a new output destination for the document. + * + * @param writer The output destination, or null to use + * standard output. + * @return The current output writer. + * @see #flush + */ + public void setOutput (Writer writer) + { + if (writer == null) { + output = new OutputStreamWriter(System.out); + } else { + output = writer; + } + } + + + /** + * Specify a preferred prefix for a Namespace URI. + * + *

Note that this method does not actually force the Namespace + * to be declared; to do that, use the {@link + * #forceNSDecl(java.lang.String) forceNSDecl} method as well.

+ * + * @param uri The Namespace URI. + * @param prefix The preferred prefix, or "" to select + * the default Namespace. + * @see #getPrefix + * @see #forceNSDecl(java.lang.String) + * @see #forceNSDecl(java.lang.String,java.lang.String) + */ + public void setPrefix (String uri, String prefix) + { + prefixTable.put(uri, prefix); + } + + + /** + * Get the current or preferred prefix for a Namespace URI. + * + * @param uri The Namespace URI. + * @return The preferred prefix, or "" for the default Namespace. + * @see #setPrefix + */ + public String getPrefix (String uri) + { + return (String)prefixTable.get(uri); + } + + + /** + * Force a Namespace to be declared on the root element. + * + *

By default, the XMLWriter will declare only the Namespaces + * needed for an element; as a result, a Namespace may be + * declared many places in a document if it is not used on the + * root element.

+ * + *

This method forces a Namespace to be declared on the root + * element even if it is not used there, and reduces the number + * of xmlns attributes in the document.

+ * + * @param uri The Namespace URI to declare. + * @see #forceNSDecl(java.lang.String,java.lang.String) + * @see #setPrefix + */ + public void forceNSDecl (String uri) + { + forcedDeclTable.put(uri, Boolean.TRUE); + } + + + /** + * Force a Namespace declaration with a preferred prefix. + * + *

This is a convenience method that invokes {@link + * #setPrefix setPrefix} then {@link #forceNSDecl(java.lang.String) + * forceNSDecl}.

+ * + * @param uri The Namespace URI to declare on the root element. + * @param prefix The preferred prefix for the Namespace, or "" + * for the default Namespace. + * @see #setPrefix + * @see #forceNSDecl(java.lang.String) + */ + public void forceNSDecl (String uri, String prefix) + { + setPrefix(uri, prefix); + forceNSDecl(uri); + } + + + + //////////////////////////////////////////////////////////////////// + // Methods from org.xml.sax.ContentHandler. + //////////////////////////////////////////////////////////////////// + + + /** + * Write the XML declaration at the beginning of the document. + * + * Pass the event on down the filter chain for further processing. + * + * @exception org.xml.sax.SAXException If there is an error + * writing the XML declaration, or if a handler further down + * the filter chain raises an exception. + * @see org.xml.sax.ContentHandler#startDocument + */ + public void startDocument () + throws SAXException + { + reset(); + if (!("yes".equals(outputProperties.getProperty(OMIT_XML_DECLARATION, "no")))) { + write("\n"); + } else { + write(" standalone=\""); + write(standalone); + write("\""); + } + } + super.startDocument(); + } + + + /** + * Write a newline at the end of the document. + * + * Pass the event on down the filter chain for further processing. + * + * @exception org.xml.sax.SAXException If there is an error + * writing the newline, or if a handler further down + * the filter chain raises an exception. + * @see org.xml.sax.ContentHandler#endDocument + */ + public void endDocument () + throws SAXException + { + write('\n'); + super.endDocument(); + try { + flush(); + } catch (IOException e) { + throw new SAXException(e); + } + } + + + /** + * Write a start tag. + * + * Pass the event on down the filter chain for further processing. + * + * @param uri The Namespace URI, or the empty string if none + * is available. + * @param localName The element's local (unprefixed) name (required). + * @param qName The element's qualified (prefixed) name, or the + * empty string is none is available. This method will + * use the qName as a template for generating a prefix + * if necessary, but it is not guaranteed to use the + * same qName. + * @param atts The element's attribute list (must not be null). + * @exception org.xml.sax.SAXException If there is an error + * writing the start tag, or if a handler further down + * the filter chain raises an exception. + * @see org.xml.sax.ContentHandler#startElement + */ + public void startElement (String uri, String localName, + String qName, Attributes atts) + throws SAXException + { + elementLevel++; + nsSupport.pushContext(); + if (forceDTD && !hasOutputDTD) startDTD(localName == null ? qName : localName, "", ""); + write('<'); + writeName(uri, localName, qName, true); + writeAttributes(atts); + if (elementLevel == 1) { + forceNSDecls(); + } + writeNSDecls(); + write('>'); +// System.out.println("%%%% startElement [" + qName + "] htmlMode = " + htmlMode); + if (htmlMode && (qName.equals("script") || qName.equals("style"))) { + cdataElement = true; +// System.out.println("%%%% CDATA element"); + } + super.startElement(uri, localName, qName, atts); + } + + + /** + * Write an end tag. + * + * Pass the event on down the filter chain for further processing. + * + * @param uri The Namespace URI, or the empty string if none + * is available. + * @param localName The element's local (unprefixed) name (required). + * @param qName The element's qualified (prefixed) name, or the + * empty string is none is available. This method will + * use the qName as a template for generating a prefix + * if necessary, but it is not guaranteed to use the + * same qName. + * @exception org.xml.sax.SAXException If there is an error + * writing the end tag, or if a handler further down + * the filter chain raises an exception. + * @see org.xml.sax.ContentHandler#endElement + */ + public void endElement (String uri, String localName, String qName) + throws SAXException + { + if (!(htmlMode && + (uri.equals("http://www.w3.org/1999/xhtml") || + uri.equals("")) && + (qName.equals("area") || qName.equals("base") || + qName.equals("basefont") || qName.equals("br") || + qName.equals("col") || qName.equals("frame") || + qName.equals("hr") || qName.equals("img") || + qName.equals("input") || qName.equals("isindex") || + qName.equals("link") || qName.equals("meta") || + qName.equals("param")))) { + write("'); + } + if (elementLevel == 1) { + write('\n'); + } + cdataElement = false; + super.endElement(uri, localName, qName); + nsSupport.popContext(); + elementLevel--; + } + + + /** + * Write character data. + * + * Pass the event on down the filter chain for further processing. + * + * @param ch The array of characters to write. + * @param start The starting position in the array. + * @param length The number of characters to write. + * @exception org.xml.sax.SAXException If there is an error + * writing the characters, or if a handler further down + * the filter chain raises an exception. + * @see org.xml.sax.ContentHandler#characters + */ + public void characters (char ch[], int start, int len) + throws SAXException + { + if (!cdataElement) { + writeEsc(ch, start, len, false); + } + else { + for (int i = start; i < start + len; i++) { + write(ch[i]); + } + } + super.characters(ch, start, len); + } + + + /** + * Write ignorable whitespace. + * + * Pass the event on down the filter chain for further processing. + * + * @param ch The array of characters to write. + * @param start The starting position in the array. + * @param length The number of characters to write. + * @exception org.xml.sax.SAXException If there is an error + * writing the whitespace, or if a handler further down + * the filter chain raises an exception. + * @see org.xml.sax.ContentHandler#ignorableWhitespace + */ + public void ignorableWhitespace (char ch[], int start, int length) + throws SAXException + { + writeEsc(ch, start, length, false); + super.ignorableWhitespace(ch, start, length); + } + + + + /** + * Write a processing instruction. + * + * Pass the event on down the filter chain for further processing. + * + * @param target The PI target. + * @param data The PI data. + * @exception org.xml.sax.SAXException If there is an error + * writing the PI, or if a handler further down + * the filter chain raises an exception. + * @see org.xml.sax.ContentHandler#processingInstruction + */ + public void processingInstruction (String target, String data) + throws SAXException + { + write(""); + if (elementLevel < 1) { + write('\n'); + } + super.processingInstruction(target, data); + } + + + + //////////////////////////////////////////////////////////////////// + // Additional markup. + //////////////////////////////////////////////////////////////////// + + /** + * Write an empty element. + * + * This method writes an empty element tag rather than a start tag + * followed by an end tag. Both a {@link #startElement + * startElement} and an {@link #endElement endElement} event will + * be passed on down the filter chain. + * + * @param uri The element's Namespace URI, or the empty string + * if the element has no Namespace or if Namespace + * processing is not being performed. + * @param localName The element's local name (without prefix). This + * parameter must be provided. + * @param qName The element's qualified name (with prefix), or + * the empty string if none is available. This parameter + * is strictly advisory: the writer may or may not use + * the prefix attached. + * @param atts The element's attribute list. + * @exception org.xml.sax.SAXException If there is an error + * writing the empty tag, or if a handler further down + * the filter chain raises an exception. + * @see #startElement + * @see #endElement + */ + public void emptyElement (String uri, String localName, + String qName, Attributes atts) + throws SAXException + { + nsSupport.pushContext(); + write('<'); + writeName(uri, localName, qName, true); + writeAttributes(atts); + if (elementLevel == 1) { + forceNSDecls(); + } + writeNSDecls(); + write("/>"); + super.startElement(uri, localName, qName, atts); + super.endElement(uri, localName, qName); + } + + + + //////////////////////////////////////////////////////////////////// + // Convenience methods. + //////////////////////////////////////////////////////////////////// + + + + /** + * Start a new element without a qname or attributes. + * + *

This method will provide a default empty attribute + * list and an empty string for the qualified name. + * It invokes {@link + * #startElement(String, String, String, Attributes)} + * directly.

+ * + * @param uri The element's Namespace URI. + * @param localName The element's local name. + * @exception org.xml.sax.SAXException If there is an error + * writing the start tag, or if a handler further down + * the filter chain raises an exception. + * @see #startElement(String, String, String, Attributes) + */ + public void startElement (String uri, String localName) + throws SAXException + { + startElement(uri, localName, "", EMPTY_ATTS); + } + + + /** + * Start a new element without a qname, attributes or a Namespace URI. + * + *

This method will provide an empty string for the + * Namespace URI, and empty string for the qualified name, + * and a default empty attribute list. It invokes + * #startElement(String, String, String, Attributes)} + * directly.

+ * + * @param localName The element's local name. + * @exception org.xml.sax.SAXException If there is an error + * writing the start tag, or if a handler further down + * the filter chain raises an exception. + * @see #startElement(String, String, String, Attributes) + */ + public void startElement (String localName) + throws SAXException + { + startElement("", localName, "", EMPTY_ATTS); + } + + + /** + * End an element without a qname. + * + *

This method will supply an empty string for the qName. + * It invokes {@link #endElement(String, String, String)} + * directly.

+ * + * @param uri The element's Namespace URI. + * @param localName The element's local name. + * @exception org.xml.sax.SAXException If there is an error + * writing the end tag, or if a handler further down + * the filter chain raises an exception. + * @see #endElement(String, String, String) + */ + public void endElement (String uri, String localName) + throws SAXException + { + endElement(uri, localName, ""); + } + + + /** + * End an element without a Namespace URI or qname. + * + *

This method will supply an empty string for the qName + * and an empty string for the Namespace URI. + * It invokes {@link #endElement(String, String, String)} + * directly.

+ * + * @param localName The element's local name. + * @exception org.xml.sax.SAXException If there is an error + * writing the end tag, or if a handler further down + * the filter chain raises an exception. + * @see #endElement(String, String, String) + */ + public void endElement (String localName) + throws SAXException + { + endElement("", localName, ""); + } + + + /** + * Add an empty element without a qname or attributes. + * + *

This method will supply an empty string for the qname + * and an empty attribute list. It invokes + * {@link #emptyElement(String, String, String, Attributes)} + * directly.

+ * + * @param uri The element's Namespace URI. + * @param localName The element's local name. + * @exception org.xml.sax.SAXException If there is an error + * writing the empty tag, or if a handler further down + * the filter chain raises an exception. + * @see #emptyElement(String, String, String, Attributes) + */ + public void emptyElement (String uri, String localName) + throws SAXException + { + emptyElement(uri, localName, "", EMPTY_ATTS); + } + + + /** + * Add an empty element without a Namespace URI, qname or attributes. + * + *

This method will supply an empty string for the qname, + * and empty string for the Namespace URI, and an empty + * attribute list. It invokes + * {@link #emptyElement(String, String, String, Attributes)} + * directly.

+ * + * @param localName The element's local name. + * @exception org.xml.sax.SAXException If there is an error + * writing the empty tag, or if a handler further down + * the filter chain raises an exception. + * @see #emptyElement(String, String, String, Attributes) + */ + public void emptyElement (String localName) + throws SAXException + { + emptyElement("", localName, "", EMPTY_ATTS); + } + + + /** + * Write an element with character data content. + * + *

This is a convenience method to write a complete element + * with character data content, including the start tag + * and end tag.

+ * + *

This method invokes + * {@link #startElement(String, String, String, Attributes)}, + * followed by + * {@link #characters(String)}, followed by + * {@link #endElement(String, String, String)}.

+ * + * @param uri The element's Namespace URI. + * @param localName The element's local name. + * @param qName The element's default qualified name. + * @param atts The element's attributes. + * @param content The character data content. + * @exception org.xml.sax.SAXException If there is an error + * writing the empty tag, or if a handler further down + * the filter chain raises an exception. + * @see #startElement(String, String, String, Attributes) + * @see #characters(String) + * @see #endElement(String, String, String) + */ + public void dataElement (String uri, String localName, + String qName, Attributes atts, + String content) + throws SAXException + { + startElement(uri, localName, qName, atts); + characters(content); + endElement(uri, localName, qName); + } + + + /** + * Write an element with character data content but no attributes. + * + *

This is a convenience method to write a complete element + * with character data content, including the start tag + * and end tag. This method provides an empty string + * for the qname and an empty attribute list.

+ * + *

This method invokes + * {@link #startElement(String, String, String, Attributes)}, + * followed by + * {@link #characters(String)}, followed by + * {@link #endElement(String, String, String)}.

+ * + * @param uri The element's Namespace URI. + * @param localName The element's local name. + * @param content The character data content. + * @exception org.xml.sax.SAXException If there is an error + * writing the empty tag, or if a handler further down + * the filter chain raises an exception. + * @see #startElement(String, String, String, Attributes) + * @see #characters(String) + * @see #endElement(String, String, String) + */ + public void dataElement (String uri, String localName, String content) + throws SAXException + { + dataElement(uri, localName, "", EMPTY_ATTS, content); + } + + + /** + * Write an element with character data content but no attributes or Namespace URI. + * + *

This is a convenience method to write a complete element + * with character data content, including the start tag + * and end tag. The method provides an empty string for the + * Namespace URI, and empty string for the qualified name, + * and an empty attribute list.

+ * + *

This method invokes + * {@link #startElement(String, String, String, Attributes)}, + * followed by + * {@link #characters(String)}, followed by + * {@link #endElement(String, String, String)}.

+ * + * @param localName The element's local name. + * @param content The character data content. + * @exception org.xml.sax.SAXException If there is an error + * writing the empty tag, or if a handler further down + * the filter chain raises an exception. + * @see #startElement(String, String, String, Attributes) + * @see #characters(String) + * @see #endElement(String, String, String) + */ + public void dataElement (String localName, String content) + throws SAXException + { + dataElement("", localName, "", EMPTY_ATTS, content); + } + + + /** + * Write a string of character data, with XML escaping. + * + *

This is a convenience method that takes an XML + * String, converts it to a character array, then invokes + * {@link #characters(char[], int, int)}.

+ * + * @param data The character data. + * @exception org.xml.sax.SAXException If there is an error + * writing the string, or if a handler further down + * the filter chain raises an exception. + * @see #characters(char[], int, int) + */ + public void characters (String data) + throws SAXException + { + char ch[] = data.toCharArray(); + characters(ch, 0, ch.length); + } + + + + //////////////////////////////////////////////////////////////////// + // Internal methods. + //////////////////////////////////////////////////////////////////// + + + /** + * Force all Namespaces to be declared. + * + * This method is used on the root element to ensure that + * the predeclared Namespaces all appear. + */ + private void forceNSDecls () + { + Enumeration prefixes = forcedDeclTable.keys(); + while (prefixes.hasMoreElements()) { + String prefix = (String)prefixes.nextElement(); + doPrefix(prefix, null, true); + } + } + + + /** + * Determine the prefix for an element or attribute name. + * + * TODO: this method probably needs some cleanup. + * + * @param uri The Namespace URI. + * @param qName The qualified name (optional); this will be used + * to indicate the preferred prefix if none is currently + * bound. + * @param isElement true if this is an element name, false + * if it is an attribute name (which cannot use the + * default Namespace). + */ + private String doPrefix (String uri, String qName, boolean isElement) + { + String defaultNS = nsSupport.getURI(""); + if ("".equals(uri)) { + if (isElement && defaultNS != null) + nsSupport.declarePrefix("", ""); + return null; + } + String prefix; + if (isElement && defaultNS != null && uri.equals(defaultNS)) { + prefix = ""; + } else { + prefix = nsSupport.getPrefix(uri); + } + if (prefix != null) { + return prefix; + } + prefix = (String) doneDeclTable.get(uri); + if (prefix != null && + ((!isElement || defaultNS != null) && + "".equals(prefix) || nsSupport.getURI(prefix) != null)) { + prefix = null; + } + if (prefix == null) { + prefix = (String) prefixTable.get(uri); + if (prefix != null && + ((!isElement || defaultNS != null) && + "".equals(prefix) || nsSupport.getURI(prefix) != null)) { + prefix = null; + } + } + if (prefix == null && qName != null && !"".equals(qName)) { + int i = qName.indexOf(':'); + if (i == -1) { + if (isElement && defaultNS == null) { + prefix = ""; + } + } else { + prefix = qName.substring(0, i); + } + } + for (; + prefix == null || nsSupport.getURI(prefix) != null; + prefix = "__NS" + ++prefixCounter) + ; + nsSupport.declarePrefix(prefix, uri); + doneDeclTable.put(uri, prefix); + return prefix; + } + + + /** + * Write a raw character. + * + * @param c The character to write. + * @exception org.xml.sax.SAXException If there is an error writing + * the character, this method will throw an IOException + * wrapped in a SAXException. + */ + private void write (char c) + throws SAXException + { + try { + output.write(c); + } catch (IOException e) { + throw new SAXException(e); + } + } + + + /** + * Write a raw string. + * + * @param s + * @exception org.xml.sax.SAXException If there is an error writing + * the string, this method will throw an IOException + * wrapped in a SAXException + */ + private void write (String s) + throws SAXException + { + try { + output.write(s); + } catch (IOException e) { + throw new SAXException(e); + } + } + + + /** + * Write out an attribute list, escaping values. + * + * The names will have prefixes added to them. + * + * @param atts The attribute list to write. + * @exception org.xml.SAXException If there is an error writing + * the attribute list, this method will throw an + * IOException wrapped in a SAXException. + */ + private void writeAttributes (Attributes atts) + throws SAXException + { + int len = atts.getLength(); + for (int i = 0; i < len; i++) { + char ch[] = atts.getValue(i).toCharArray(); + write(' '); + writeName(atts.getURI(i), atts.getLocalName(i), + atts.getQName(i), false); + if (htmlMode && + booleanAttribute(atts.getLocalName(i), atts.getQName(i), atts.getValue(i))) break; + write("=\""); + writeEsc(ch, 0, ch.length, true); + write('"'); + } + } + + + private String[] booleans = {"checked", "compact", "declare", "defer", + "disabled", "ismap", "multiple", + "nohref", "noresize", "noshade", + "nowrap", "readonly", "selected"}; + + // Return true if the attribute is an HTML boolean from the above list. + private boolean booleanAttribute (String localName, String qName, String value) + { + String name = localName; + if (name == null) { + int i = qName.indexOf(':'); + if (i != -1) name = qName.substring(i + 1, qName.length()); + } + if (!name.equals(value)) return false; + for (int j = 0; j < booleans.length; j++) { + if (name.equals(booleans[j])) return true; + } + return false; + } + + /** + * Write an array of data characters with escaping. + * + * @param ch The array of characters. + * @param start The starting position. + * @param length The number of characters to use. + * @param isAttVal true if this is an attribute value literal. + * @exception org.xml.SAXException If there is an error writing + * the characters, this method will throw an + * IOException wrapped in a SAXException. + */ + private void writeEsc (char ch[], int start, + int length, boolean isAttVal) + throws SAXException + { + for (int i = start; i < start + length; i++) { + switch (ch[i]) { + case '&': + write("&"); + break; + case '<': + write("<"); + break; + case '>': + write(">"); + break; + case '\"': + if (isAttVal) { + write("""); + } else { + write('\"'); + } + break; + default: + if (!unicodeMode && ch[i] > '\u007f') { + write("&#"); + write(Integer.toString(ch[i])); + write(';'); + } else { + write(ch[i]); + } + } + } + } + + + /** + * Write out the list of Namespace declarations. + * + * @exception org.xml.sax.SAXException This method will throw + * an IOException wrapped in a SAXException if + * there is an error writing the Namespace + * declarations. + */ + private void writeNSDecls () + throws SAXException + { + Enumeration prefixes = nsSupport.getDeclaredPrefixes(); + while (prefixes.hasMoreElements()) { + String prefix = (String) prefixes.nextElement(); + String uri = nsSupport.getURI(prefix); + if (uri == null) { + uri = ""; + } + char ch[] = uri.toCharArray(); + write(' '); + if ("".equals(prefix)) { + write("xmlns=\""); + } else { + write("xmlns:"); + write(prefix); + write("=\""); + } + writeEsc(ch, 0, ch.length, true); + write('\"'); + } + } + + + /** + * Write an element or attribute name. + * + * @param uri The Namespace URI. + * @param localName The local name. + * @param qName The prefixed name, if available, or the empty string. + * @param isElement true if this is an element name, false if it + * is an attribute name. + * @exception org.xml.sax.SAXException This method will throw an + * IOException wrapped in a SAXException if there is + * an error writing the name. + */ + private void writeName (String uri, String localName, + String qName, boolean isElement) + throws SAXException + { + String prefix = doPrefix(uri, qName, isElement); + if (prefix != null && !"".equals(prefix)) { + write(prefix); + write(':'); + } + if (localName != null && !"".equals(localName)) { + write(localName); + } else { + int i = qName.indexOf(':'); + write(qName.substring(i + 1, qName.length())); + } + } + + + + //////////////////////////////////////////////////////////////////// + // Default LexicalHandler implementation + //////////////////////////////////////////////////////////////////// + + public void comment(char[] ch, int start, int length) throws SAXException + { + write(""); + } + + public void endCDATA() throws SAXException { } + public void endDTD() throws SAXException { } + public void endEntity(String name) throws SAXException { } + public void startCDATA() throws SAXException { } + public void startDTD(String name, String publicid, String systemid) throws SAXException { + if (name == null) return; // can't cope + if (hasOutputDTD) return; // only one DTD + hasOutputDTD = true; + write("\n"); + } + + public void startEntity(String name) throws SAXException { } + + + //////////////////////////////////////////////////////////////////// + // Output properties + //////////////////////////////////////////////////////////////////// + + public String getOutputProperty(String key) { + return outputProperties.getProperty(key); + } + + public void setOutputProperty(String key, String value) { + outputProperties.setProperty(key, value); +// System.out.println("%%%% key = [" + key + "] value = [" + value +"]"); + if (key.equals(ENCODING)) { + outputEncoding = value; + unicodeMode = value.substring(0, 3).equalsIgnoreCase("utf"); +// System.out.println("%%%% unicodeMode = " + unicodeMode); + } + else if (key.equals(METHOD)) { + htmlMode = value.equals("html"); + } + else if (key.equals(DOCTYPE_PUBLIC)) { + overridePublic = value; + forceDTD = true; + } + else if (key.equals(DOCTYPE_SYSTEM)) { + overrideSystem = value; + forceDTD = true; + } + else if (key.equals(VERSION)) { + version = value; + } + else if (key.equals(STANDALONE)) { + standalone = value; + } +// System.out.println("%%%% htmlMode = " + htmlMode); + } + + + //////////////////////////////////////////////////////////////////// + // Constants. + //////////////////////////////////////////////////////////////////// + + private final Attributes EMPTY_ATTS = new AttributesImpl(); + public static final String CDATA_SECTION_ELEMENTS = + "cdata-section-elements"; + public static final String DOCTYPE_PUBLIC = "doctype-public"; + public static final String DOCTYPE_SYSTEM = "doctype-system"; + public static final String ENCODING = "encoding"; + public static final String INDENT = "indent"; // currently ignored + public static final String MEDIA_TYPE = "media-type"; // currently ignored + public static final String METHOD = "method"; // currently html or xml + public static final String OMIT_XML_DECLARATION = "omit-xml-declaration"; + public static final String STANDALONE = "standalone"; // currently ignored + public static final String VERSION = "version"; + + + + //////////////////////////////////////////////////////////////////// + // Internal state. + //////////////////////////////////////////////////////////////////// + + private Hashtable prefixTable; + private Hashtable forcedDeclTable; + private Hashtable doneDeclTable; + private int elementLevel = 0; + private Writer output; + private NamespaceSupport nsSupport; + private int prefixCounter = 0; + private Properties outputProperties; + private boolean unicodeMode = false; + private String outputEncoding = ""; + private boolean htmlMode = false; + private boolean forceDTD = false; + private boolean hasOutputDTD = false; + private String overridePublic = null; + private String overrideSystem = null; + private String version = null; + private String standalone = null; + private boolean cdataElement = false; + +} + +// end of XMLWriter.java diff --git a/src/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java b/src/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java new file mode 100644 index 0000000..61f20ad --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java @@ -0,0 +1,54 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +package org.ccil.cowan.tagsoup.jaxp; + +import java.io.*; + +import javax.xml.parsers.*; +import org.w3c.dom.Document; + +/** + * Trivial non-robust test class, to show that TagSoup can be accessed using + * JAXP interface. + */ +public class JAXPTest +{ + public static void main(String[] args) + throws Exception + { + new JAXPTest().test(args); + } + + private void test(String[] args) + throws Exception + { + if (args.length != 1) { + System.err.println("Usage: java "+getClass()+" [input-file]"); + System.exit(1); + } + File f = new File(args[0]); + //System.setProperty("javax.xml.parsers.SAXParserFactory", SAXFactoryImpl.class.toString()); + System.setProperty("javax.xml.parsers.SAXParserFactory", "org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl"); + + SAXParserFactory spf = SAXParserFactory.newInstance(); + System.out.println("Ok, SAX factory JAXP creates is: "+spf); + System.out.println("Let's parse..."); + spf.newSAXParser().parse(f, new org.xml.sax.helpers.DefaultHandler()); + System.out.println("Done. And then DOM build:"); + + Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(f); + + System.out.println("Succesfully built DOM tree from '"+f+"', -> "+doc); + } +} diff --git a/src/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java b/src/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java new file mode 100644 index 0000000..883a3e7 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java @@ -0,0 +1,232 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +package org.ccil.cowan.tagsoup.jaxp; + +import java.io.IOException; +import javax.xml.parsers.*; + +import org.xml.sax.*; + +/** + * This is a simpler adapter class that allows using SAX1 interface on top + * of basic SAX2 implementation, such as TagSoup. + * + * @author Tatu Saloranta (cowtowncoder@yahoo.com) + * @deprecated + */ +public class SAX1ParserAdapter + implements org.xml.sax.Parser +{ + final XMLReader xmlReader; + + public SAX1ParserAdapter(XMLReader xr) + { + xmlReader = xr; + } + + // Sax1 API impl + + public void parse(InputSource source) + throws SAXException + { + try { + xmlReader.parse(source); + } catch (IOException ioe) { + throw new SAXException(ioe); + } + } + + public void parse(String systemId) + throws SAXException + { + try { + xmlReader.parse(systemId); + } catch (IOException ioe) { + throw new SAXException(ioe); + } + } + + /** + * @deprecated + */ + public void setDocumentHandler(DocumentHandler h) + { + xmlReader.setContentHandler(new DocHandlerWrapper(h)); + } + + public void setDTDHandler(DTDHandler h) + { + xmlReader.setDTDHandler(h); + } + + public void setEntityResolver(EntityResolver r) + { + xmlReader.setEntityResolver(r); + } + + public void setErrorHandler(ErrorHandler h) + { + xmlReader.setErrorHandler(h); + } + + public void setLocale(java.util.Locale locale) + throws SAXException + { + /* I have no idea what this is supposed to do... so let's + * throw an exception + */ + throw new SAXNotSupportedException("TagSoup does not implement setLocale() method"); + } + + // Helper classes: + + /** + * We need another helper class to deal with differences between + * Sax2 handler (content handler), and Sax1 handler (document handler) + * @deprecated + */ + final static class DocHandlerWrapper + implements ContentHandler + { + final DocumentHandler docHandler; + + final AttributesWrapper mAttrWrapper = new AttributesWrapper(); + + /** + * @deprecated + */ + DocHandlerWrapper(DocumentHandler h) + { + docHandler = h; + } + + public void characters(char[] ch, int start, int length) + throws SAXException + { + docHandler.characters(ch, start, length); + } + + public void endDocument() + throws SAXException + { + docHandler.endDocument(); + } + + public void endElement(String uri, String localName, String qName) + throws SAXException + { + if (qName == null) { + qName = localName; + } + docHandler.endElement(qName); + } + + public void endPrefixMapping(String prefix) + { + // no equivalent in SAX1, ignore + } + + public void ignorableWhitespace(char[] ch, int start, int length) + throws SAXException + { + docHandler.ignorableWhitespace(ch, start, length); + } + + public void processingInstruction(String target, String data) + throws SAXException + { + docHandler.processingInstruction(target, data); + } + + public void setDocumentLocator(Locator locator) + { + docHandler.setDocumentLocator(locator); + } + + public void skippedEntity(String name) + { + // no equivalent in SAX1, ignore + } + + public void startDocument() + throws SAXException + { + docHandler.startDocument(); + } + + public void startElement(String uri, String localName, String qName, + Attributes attrs) + throws SAXException + { + if (qName == null) { + qName = localName; + } + // Also, need to wrap Attributes to look like AttributeLost + mAttrWrapper.setAttributes(attrs); + docHandler.startElement(qName, mAttrWrapper); + } + + public void startPrefixMapping(String prefix, String uri) + { + // no equivalent in SAX1, ignore + } + } + + /** + * And one more helper to deal with attribute access differences + * @deprecated + */ + final static class AttributesWrapper + implements AttributeList + { + Attributes attrs; + + public AttributesWrapper() { } + + public void setAttributes(Attributes a) { + attrs = a; + } + + public int getLength() + { + return attrs.getLength(); + } + + public String getName(int i) + { + String n = attrs.getQName(i); + return (n == null) ? attrs.getLocalName(i) : n; + } + + public String getType(int i) + { + return attrs.getType(i); + } + + public String getType(String name) + { + return attrs.getType(name); + } + + public String getValue(int i) + { + return attrs.getValue(i); + } + + public String getValue(String name) + { + return attrs.getValue(name); + } + } +} diff --git a/src/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java b/src/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java new file mode 100644 index 0000000..780adfe --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java @@ -0,0 +1,114 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +package org.ccil.cowan.tagsoup.jaxp; + +import java.util.*; +import javax.xml.parsers.*; + +import org.xml.sax.*; + +/** + * This is a simple implementation of JAXP {@link SAXParserFactory}, + * to allow easier integration of TagSoup with the default JDK + * xml processing stack. + * + * @author Tatu Saloranta (cowtowncoder@yahoo.com) + */ +public class SAXFactoryImpl + extends SAXParserFactory +{ + /** + * The easiest way to test validity of features to set is to use + * a prototype object. Currently this is actually not a real prototype, + * in the sense that the configuration is actually passed separately + * (as opposed to instantiating new readers from this prototype), but + * this could be changed in future, if TagSoup parser object allowed + * cloning. + */ + private SAXParserImpl prototypeParser = null; + + /** + * This Map contains explicitly set features that can be succesfully + * set for XMLReader instances. Temporary storage is needed due to + * JAXP design: multiple readers can be instantiated from a single + * factory, and settings can be changed between instantiations. + *

+ * Note that we wouldn't need this map if we could create instances + * directly using the prototype instance. + */ + private HashMap features = null; + + public SAXFactoryImpl() + { + super(); + } + + // // // JAXP API implementation: + + /** + * Creates a new instance of SAXParser using the currently + * configured factory parameters. + */ + public SAXParser newSAXParser() + throws ParserConfigurationException + { + try { + return SAXParserImpl.newInstance(features); + } catch (SAXException se) { + // Translate to ParserConfigurationException + throw new ParserConfigurationException(se.getMessage()); + } + } + + /** + * Defines that the specified feature is to enabled/disabled (as + * per second argument) on reader instances created by this + * factory. + */ + public void setFeature(String name, boolean value) + throws ParserConfigurationException, SAXNotRecognizedException, + SAXNotSupportedException + { + // First, let's see if it's a valid call + getPrototype().setFeature(name, value); + + // If not, exception was thrown: so we are good now: + if (features == null) { + // Let's retain the ordering as well + features = new LinkedHashMap(); + } + features.put(name, value ? Boolean.TRUE : Boolean.FALSE); + } + + /** + * Returns whether the specified property will be enabled or disabled + * on reader instances constructed by this factory. + */ + public boolean getFeature(String name) + throws ParserConfigurationException, SAXNotRecognizedException, + SAXNotSupportedException + { + return getPrototype().getFeature(name); + } + + // // // Internal methods + + private SAXParserImpl getPrototype() + { + if (prototypeParser == null) { + prototypeParser = new SAXParserImpl(); + } + return prototypeParser; + } +} diff --git a/src/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java b/src/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java new file mode 100644 index 0000000..75f3df4 --- /dev/null +++ b/src/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java @@ -0,0 +1,113 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +package org.ccil.cowan.tagsoup.jaxp; + +import java.io.*; +import java.util.*; +import javax.xml.parsers.*; + +import org.xml.sax.*; + +import org.ccil.cowan.tagsoup.Parser; + +/** + * This is a simple implementation of JAXP {@link SAXParser}, + * to allow easier integration of TagSoup with the default JDK + * xml processing stack. + * + * @author Tatu Saloranta (cowtowncoder@yahoo.com) + */ +public class SAXParserImpl + extends SAXParser +{ + final org.ccil.cowan.tagsoup.Parser parser; + + protected SAXParserImpl() // used by factory, for prototypes + { + super(); + parser = new org.ccil.cowan.tagsoup.Parser(); + } + + public static SAXParserImpl newInstance(Map features) + throws SAXException + { + SAXParserImpl parser = new SAXParserImpl(); + if (features != null) { + Iterator it = features.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry entry = (Map.Entry) it.next(); + parser.setFeature((String) entry.getKey(), ((Boolean) entry.getValue()).booleanValue()); + } + } + return parser; + } + + // // // JAXP API implementation: + + /** + * To support SAX1 interface, we'll need to use an adapter. + * @deprecated + */ + public org.xml.sax.Parser getParser() + throws SAXException + { + return new SAX1ParserAdapter(parser); + } + + public XMLReader getXMLReader() { return parser; } + + public boolean isNamespaceAware() + { + try { + return parser.getFeature(Parser.namespacesFeature); + } catch (SAXException sex) { // should never happen... so: + throw new RuntimeException(sex.getMessage()); + } + } + + public boolean isValidating() + { + try { + return parser.getFeature(Parser.validationFeature); + } catch (SAXException sex) { // should never happen... so: + throw new RuntimeException(sex.getMessage()); + } + } + + public void setProperty(String name, Object value) + throws SAXNotRecognizedException, SAXNotSupportedException + { + parser.setProperty(name, value); + } + + public Object getProperty(String name) + throws SAXNotRecognizedException, SAXNotSupportedException + { + return parser.getProperty(name); + } + + // // // Additional convenience methods + + public void setFeature(String name, boolean value) + throws SAXNotRecognizedException, SAXNotSupportedException + { + parser.setFeature(name, value); + } + + public boolean getFeature(String name) + throws SAXNotRecognizedException, SAXNotSupportedException + { + return parser.getFeature(name); + } +} -- cgit v1.2.3