aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUpstream <upstream-import@none>1970-01-12 13:46:40 +0000
committerUpstream <upstream-import@none>1970-01-12 13:46:40 +0000
commit70e83658cac1d0d766e93853e3698921af269a37 (patch)
treef2dbc24614858517bc61f8811143d878002f800d
downloadtagsoup-70e83658cac1d0d766e93853e3698921af269a37.tar.gz
external/tagsoup 1.2upstream/1.2nougat-mr1-arc
-rw-r--r--CHANGES303
-rw-r--r--LICENSE201
-rw-r--r--README357
-rw-r--r--TODO14
-rw-r--r--build.xml160
-rw-r--r--etc/build/build.properties1
-rw-r--r--etc/build/taskdefs.txt0
-rw-r--r--index.html407
-rw-r--r--src/definitions/html.stml249
-rw-r--r--src/definitions/html.tssl2762
-rw-r--r--src/java/org/ccil/cowan/tagsoup/AttributesImpl.java626
-rw-r--r--src/java/org/ccil/cowan/tagsoup/AutoDetector.java43
-rw-r--r--src/java/org/ccil/cowan/tagsoup/CommandLine.java289
-rw-r--r--src/java/org/ccil/cowan/tagsoup/Element.java203
-rw-r--r--src/java/org/ccil/cowan/tagsoup/ElementType.java276
-rw-r--r--src/java/org/ccil/cowan/tagsoup/PYXScanner.java124
-rw-r--r--src/java/org/ccil/cowan/tagsoup/PYXWriter.java217
-rw-r--r--src/java/org/ccil/cowan/tagsoup/Parser.java1114
-rw-r--r--src/java/org/ccil/cowan/tagsoup/ScanHandler.java119
-rw-r--r--src/java/org/ccil/cowan/tagsoup/Scanner.java50
-rw-r--r--src/java/org/ccil/cowan/tagsoup/Schema.java170
-rw-r--r--src/java/org/ccil/cowan/tagsoup/XMLWriter.java1435
-rw-r--r--src/java/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java54
-rw-r--r--src/java/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java232
-rw-r--r--src/java/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java114
-rw-r--r--src/java/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java113
-rw-r--r--src/templates/org/ccil/cowan/tagsoup/HTMLModels.java31
-rw-r--r--src/templates/org/ccil/cowan/tagsoup/HTMLScanner.java427
-rw-r--r--src/templates/org/ccil/cowan/tagsoup/HTMLSchema.java38
-rw-r--r--stml/stml.rnc49
-rw-r--r--stml/stml.xslt150
-rw-r--r--tagsoup.1183
-rw-r--r--tagsoup.txt160
-rw-r--r--tssl/tssl-models.xslt47
-rw-r--r--tssl/tssl-validate.xslt40
-rw-r--r--tssl/tssl.rnc75
-rw-r--r--tssl/tssl.xslt220
37 files changed, 11053 insertions, 0 deletions
diff --git a/CHANGES b/CHANGES
new file mode 100644
index 0000000..94a0a38
--- /dev/null
+++ b/CHANGES
@@ -0,0 +1,303 @@
+Changes from 1.1.3 to 1.2
+=========================
+Changed license to Apache 2.0
+Bogon default model is now ANY, not EMPTY
+Support new DOCTYPE output switches --doctype-system and --doctype-public
+Support new XML declaration output switches --standalone and --version
+New --norootbogons switch makes bogons children of the root
+Don't resolve entity references in attribute values unless semicolon-terminated
+Support character entities above U+FFFF
+Add character entities from the 2007-12-14 draft of xml-entity-names
+Call SAX events startPrefixMapping and endPrefixMapping to report prefixes
+Clean up newline processing, shrinking html.stml considerably
+Allow link elements in the body as well as the head, to avoid excess bodies
+Allow tables inside paragraphs
+Allow cells and forms in thead and tfoot elements without intervening tr element
+The span element is no longer restartable
+Support non-standard elements bgsound, blink, canvas, comment, listing,
+ marquee, nobr, ruby, rbc, rtc, rb, rt, rp, wbr, xmp
+In HTML mode, boolean attributes like checked are output in minimized form
+Correctly handle runs of less-than characters
+Suppress all but the first DOCTYPE declaration
+Modify PI targets containing colons to have underscores instead
+The case of element tags is now canonicalized to the schema
+PI targets are no longer forced to lower case
+
+Changes from 1.1.2 to 1.1.3
+===========================
+Allow Parser.set* methods to accept null
+Allow setting the LexicalHandler feature to be null
+ in both cases means "use default behavior"
+
+Changes from 1.1.1 to 1.1.2
+===========================
+Setting CDATAElementsFeature didn't really set CDATAElements instance variable
+
+Changes from 1.1 to 1.1.1
+=========================
+Removed lexical handler calls to startCDATA/endCDATA from CDATA element handling
+Added lexical handler calls to startCDATA/endCDATA from CDATA section handling
+Added CDATAElementsFeature, the programmatic equivalent of the --nocdata switch
+
+Changes from 1.0.5 to 1.1
+=========================
+Add Tatu Saloranta's JAXP support package
+
+Changes from 1.0.4 to 1.0.5
+===========================
+Major repairs to comment scanning
+Skip leading BOM
+Comment out debugging code in PYXWriter
+Allow &#X as well as &#x
+Add net.sf.saxon to list of supported XSLT engines
+
+Changes from 1.0.4 to 1.0.3
+===========================
+Certain options were mutually exclusive that should not have been
+Blocked XML declaration from specifying an encoding of ""
+--method=html was not doing the right thing
+
+Changes from 1.0.3 to 1.0.2
+===========================
+Fixed build file to use Java target version 1.4
+Fixed --version switch to print the right thing
+
+Changes from 1.0.1 to 1.0.2
+===========================
+Version attribute default value removed from html element
+Leading and trailing hyphens now trimmed properly from comments
+Added --output-encoding switch to control encoding
+If output encoding is Unicode, don't generate character references
+Whitespace compressed and junk stripped from public identifiers
+
+Changes from 1.0 to 1.0.1
+=========================
+Added ignorableWhitespaceFeature and --ignorable to report ignorable whitespace
+ Patch due to David Pashley
+Insert spaces to break up -- in comments
+Change bogus chars in publicids to spaces
+--lexical switch now outputs DOCTYPE if there is one
+Remove unnecessary blank line after XML declaration
+
+Changes from 1.0rc9 to 1.0
+==========================
+Added feature to control restartability
+ Patch due to Nikita Zhuk
+Added corresponding --norestart switch in CommandLine
+Made translate-colons feature actually work
+
+Changes from 1.0rc8 to 1.0rc9
+=============================
+If there is a publicid but no systemid, set systemid to ""
+
+Changes from 1.0rc7 to 1.0rc8
+=============================
+Fixed paper-bag bug (source didn't match binary in release)
+
+Changes from 1.0rc6 to 1.0rc7
+=============================
+LexicalHandler now gets DOCTYPE information (publicid and systemid)
+ Patch due to Mike Bremford
+HTMLScanner now reports more useful debug output when not commented out
+ Patch due to Mike Bremford
+Change "<memberOfAny>" to exclude "<root>" pseudo-element
+ This prevents "script" from being output as a root
+The shared HTMLParser object has been eliminated
+
+Changes from 1.0rc5 to 1.0rc6
+=============================
+If namespaceFeature is false, uri and localname are passed as empty strings
+The namespacePrefixesFeature is now always false
+Command line switch --nons no longer affects namespacePrefixesFeature
+Command line switch --html now implies --nons
+XMLWriter is now told directly to use the schema's URI as default namespace
+XMLWriter now takes the element name from the qname if localname is empty
+
+Changes from 1.0rc4 to 1.0rc5
+=============================
+The --nodefault switch now removes only default attributes, not all of them
+Added --nocolons switch and translate-colons feature to convert ":"
+ in names to "_" (thus suppressing namespaces other than the basic one)
+The root element can be unknown without problem
+Empty <script/> and <style/> tags now work
+Added all standard SAX2 features to feature hashtable
+Reimplemented namespacePrefixes feature (broken since 1.0rc3)
+
+Changes from 1.0rc3 to 1.0rc4
+=============================
+Remove trailing ? from processing instructions (in case the input is XHTML)
+Added Javadocs for all SAX standard and TagSoup-specific features and properties
+Fixed termination conditions for entity/character references
+Fixed EOF-pushback bug that was generating bogus &#x65535; references
+Added Parser feature and --nodefaults switch to ignore default attribute values
+Added support for SAX Locator
+Updated AFL license to version 3.0
+Scanner buffer size increases as needed, allowing large attribute values
+Look for various XSLT implementations as available (still fails in raw 5.0)
+Clean up handling of XML empty tags and SGML minimized end-tags
+Support proper options and help message internally
+Use Hashtable in CommandLine class instead of HashMap
+Do proper buffering of InputStream and Reader
+Clean up content model of noframes element
+Removed htmlMode in XMLWriter
+Added support for XSLT output options METHOD=html and OMIT_XML_DECLARATION=yes
+Command line option --html sets both of these
+Wrote simple validator for TSSL schemas (tssl/tssl-validator.xslt)
+Removed various validity problems in html.tssl
+When processing a start-tag, don't restart elements that aren't in the new
+ element's content model
+Remove bogus double param in tssl.xslt
+
+Changes from 1.0rc2 to 1.0rc3
+=============================
+Convert CR and CRLF to LF in comments and PIs
+Force empty elements to close immediately
+Match close tags of CDATA elements more precisely (but case-blind)
+Process switches on the command line
+Man page available
+
+Changes from 1.0rc1 to 1.0rc2
+=============================
+Isolated & and &# now don't crash parser
+TagSoup no longer depends on /dev/stdin existing
+Refactored Parser class, removing main method to new CommandLine class
+Changes to content models of form, button, table, and tr elements in html.tssl
+'</scr' + 'ipt>' in a script element no longer terminates it
+Introduced "uncloseability" of form and table elements
+"pyxin" property specifies that input is in PYX format
+Correctly cope with unexpected characters around colons, also with multiple colons
+Correctly output comments with "--" in them (by adding a space)
+
+Changes from 0.10.2 to 1.0rc1
+=============================
+Script can now appear anywhere
+Switch -nocdata correctly implemented
+Eliminated useless M_n constants in Schema
+Introduced <memberofAny> and <isRoot> as alternatives to
+ <memberOf> in TSSL
+Allow prefixes in element names
+Attributes are now normalized
+Expanded public API for Element and ElementType
+Javadoc improved
+
+Changes from 0.10.1 to 0.10.2
+=============================
+Removed misfeature whereby > terminated a tag even inside quotes
+Added licensing language to XSLT scripts, RELAX NG schemas
+Removed long-standing mishandling of entity references in attributes
+Cleaned up logic for converting junky strings to proper XML Names
+Correctly handle empty tag that has no whitespace or attributes
+Restore correct 0.9.3 handling of an apparent end-tag in a CDATA element
+Added script element to content model of head element
+
+Changes from 0.9.7 to 0.10.1 (there is no 0.10.0):
+==================================================
+Convert to XSLT configuration exclusively;
+ Perl code and tab-separated tables are gone
+Remove xmlns:* attributes
+Append "_" to attribute names ending in ":"
+Don't prepend "_" to an attribute name starting in "_"
+Handle namespace prefixes in attributes:
+ "xml" prefix is handled correctly
+ other prefixes are mapped to "urn:x-prefix:foo"
+Ignore XML declarations
+-Dnocdata=true turns off F_CDATA on script and style elements
+Fixed off-by-one errors in character references that made them uninterpreted
+Start-tags ending in a minimized attribute are no longer being dropped
+XML empty tags are now supported (though slashes are still allowed in
+ unquoted attribute values)
+
+Changes from 0.9.6 to 0.9.7:
+============================
+Upgraded AFL to version 2.1
+Passed through newlines in character content (very old bug)
+
+Changes from 0.9.5 to 0.9.6:
+============================
+Script element can appear directly in body
+">" terminates a start-tag even inside a quoted attribute,
+ to protect against unbalanced quotes
+"_" is prepended to attributes that don't begin with a letter
+Remove "xmlns" attributes from the input
+All standard features can now be set
+ (although there is no effect from doing so)
+New "bogons-empty" feature can be set to false to give bogons
+ content model of ANY rather than EMPTY;
+ -Dany switch sets this feature to false
+TSSL now has an explicit group element to declare an element group
+STML is a new XML format for modeling state-table changes
+License updated to AFL 2.1
+
+Changes from 0.9.4 to 0.9.5:
+============================
+S in the statetable now means \r and \n and \t as well as space
+ (as was always intended; brain fart!)
+Ins and del elements are now allowed everywhere
+TSSL now correctly supports attributes that are legal on all elements
+
+Changes from 0.9.3 to 0.9.4:
+============================
+Fixed paper-bag bug that revealed attribute type BOOLEAN to applications.
+Obsolete ABSTRACT removed in favor of README.
+Improved implementation of CDATA restart after bogus end-tag.
+Allowed hyphen, underscore, and period in names as well as colon.
+First cut at TagSoup Schema Language -- doesn't do anything yet.
+Support CDATA sections on input.
+Don't generate built-in entities within CDATA elements.
+
+Changes from 0.9.2 to 0.9.3:
+============================
+Convenience main program "tagsoup" in bin directory.
+Begin to integrate tests.
+Introduced BOOLEAN type (currently just converted to NMTOKEN).
+Features that actually work are now named constants in Parser.
+Double root elements are really gone now.
+ID attributes weren't being removed from restarted elements.
+Fixed a bug that made unknown elements disappear in some cases.
+Parser is now safely reusable.
+PYXWriter and XMLWriter now implement LexicalHandler.
+Parser reports comments, startCDATA, and endCDATA events to a LexicalHandler.
+ScanHandler methods now throw only SAXException, not also IOException.
+-Dlexical=true switch sets the ContentHandler as a LexicalHandler as well
+ (XMLWriter prints comments, ignores CDATA sections; PYXWriter ignores all).
+-Dreuse=true switch reuses a single Parser object (no great speed gain).
+We now disallow an a element as the child of another a element.
+An empty input is now treated as zero-length character content.
+HTMLWriter is gone in favor of an extended XMLWriter with get/setHTMLMode methods.
+CDATA elements only terminaate with matching end-tags (thanks to Sebastien Bardoux).
+
+Changes from 0.9.1 to 0.9.2:
+============================
+No longer inserts bogus ; after unknown entity reference without ;.
+Consecutive entity references now work correctly.
+Setting namespaces and namespace-prefixes methods now works.
+-Dnons=true option turns off namespace and prefix.
+New feature http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons"
+ suppresses unknown start-tags (any end-tag will be automatically ignored).
+-Dnobogons=true option turns ignore-bogons on.
+Suppress unknown and/or empty initial start-tag always
+ (prevents double root element).
+Schema now allows style as an inline element, like script.
+Schema now allows tr as a child of table to avoid problems with embedded tables.
+Clear Parser instance variables to make Parsers properly reusable.
+
+Changes from 0.9 to 0.9.1:
+==========================
+Incorporated patch for -jar support by Joseph Walton.
+Incorporated patch for Megginson XMLWriter support by Joseph Walton.
+Changed existing XMLWriter to HTMLWriter.
+Rewrote Parsermain for better features, removed Tester class.
+-Dnewline=true removed, now implied by -DHTML=true.
+-Dfiles=true now used to generate separate outputs (old Tester behavior)
+ with extension xhtml (removing any old extension).
+Fixed nasty bug in HTMLScanner that was failing to fix unusual entities.
+Don't attempt to smash whitespace to spaces any more.
+
+Changes from 0.8 to 0.9:
+========================
+Ant-ified by Martin Rademacher.
+Don't suppress colons in element names.
+Entity problems fixed (I hope).
+Can now set namespace and namespace-prefixes features (without effect).
+Properly templatize HTMLModels.java.
+Attributes are no longer in the HTML namespace.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/README b/README
new file mode 100644
index 0000000..1e71819
--- /dev/null
+++ b/README
@@ -0,0 +1,357 @@
+ TagSoup - Just Keep On Truckin'
+
+ Introduction
+
+ This is the home page of TagSoup, a SAX-compliant parser written in
+ Java that, instead of parsing well-formed or valid XML, parses HTML as
+ it is found in the wild: [1]poor, nasty and brutish, though quite often
+ far from short. TagSoup is designed for people who have to process this
+ stuff using some semblance of a rational application design. By
+ providing a SAX interface, it allows standard XML tools to be applied
+ to even the worst HTML. TagSoup also includes a command-line processor
+ that reads HTML files and can generate either clean HTML or well-formed
+ XML that is a close approximation to XHTML.
+
+ This is also the README file packaged with TagSoup.
+
+ TagSoup is free and Open Source software. As of version 1.2, it is
+ licensed under the [2]Apache License, Version 2.0, which allows
+ proprietary re-use as well as use with GPL 3.0 or GPL 2.0-or-later
+ projects. (If anyone needs a GPL 2.0 license for a GPL 2.0-only
+ project, feel free to ask.)
+
+ Warning: TagSoup will not build on stock Java 5.x or 6.x!
+
+ Due to a bug in the versions of Xalan shipped with Java 5.x and 6.x,
+ TagSoup will not build out of the box. You need to retrieve [3]Saxon
+ 6.5.5, which does not have the bug. Unpack the zipfile in an empty
+ directory and copy the saxon.jar and saxon-xml-apis.jar files to
+ $ANT_HOME/lib. The Ant build process for TagSoup will then notice that
+ Saxon is available and use it instead.
+
+ TagSoup 1.2 released
+
+ There are a great many changes, most of them fixes for long-standing
+ bugs, in this release. Only the most important are listed here; for the
+ rest, see the CHANGES file in the source distribution. Very special
+ thanks to Jojo Dijamco, whose intensive efforts at debugging made this
+ release a usable upgrade rather than a useless mass of undetected bugs.
+ * As noted above, I have changed the license to Apache 2.0.
+ * The default content model for bogons (unknown elements) is now ANY
+ rather than EMPTY. This is a breaking change, which I have done
+ only because there was so much demand for it. It can be undone on
+ the command line with the --emptybogons switch, or programmatically
+ with parser.setFeature(Parser.emptyBogonsFeature, true).
+ * The processing of entity references in attribute values has finally
+ been fixed to do what browsers do. That is, a reference is only
+ recognized if it is properly terminated by a semicolon; otherwise
+ it is treated as plain text. This means that URIs like
+ foo?cdown=32&cup=42 are no longer seen as containing an instance of
+ the )U character (whose name happens to be cup).
+ * Several new switches have been added:
+ + --doctype-system and --doctype-public force a DOCTYPE
+ declaration to be output and allow setting the system and
+ public identifiers.
+ + --standalone and --version allow control of the XML
+ declaration that is output. (Note that TagSoup's XML output is
+ always version 1.0, even if you use --version=1.1.)
+ + --norootbogons causes unknown elements not to be allowed as
+ the document root element. Instead, they are made children of
+ the default root element (the html element for HTML).
+ * The TagSoup core now supports character entities with values above
+ U+FFFF. As a consequence, the HTML schema now supports all 2,210
+ standard character entities from the [4]2007-12-14 draft of XML
+ Entity Definitions for Characters, except the 94 which require more
+ than one Unicode character to represent.
+ * The SAX events startPrefixMapping and endPrefixMapping are now
+ being reported for all cases of foreign elements and attributes.
+ * All bugs around newline processing on Windows should now be gone.
+ * A number of content models have been loosened to allow elements to
+ appear in new and non-standard (but commonly found) places. In
+ particular, tables are now allowed inside paragraphs, against the
+ letter of the W3C specification.
+ * Since the span element is intended for fine control of appearance
+ using CSS, it should never have been a restartable element. This
+ very long-standing bug has now been fixed.
+ * The following non-standard elements are now at least partly
+ supported: bgsound, blink, canvas, comment, listing, marquee, nobr,
+ rbc, rb, rp, rtc, rt, ruby, wbr, xmp.
+ * In HTML output mode, boolean attributes like checked are now output
+ as such, rather than in XML style as checked="checked".
+ * Runs of < characters such as << and <<< are now handled correctly
+ in text rather than being transformed into extremely bogus
+ start-tags.
+
+ [5]Download the TagSoup 1.2 jar file here. It's about 87K long.
+ [6]Download the full TagSoup 1.2 source here. If you don't have zip,
+ you can use jar to unpack it.
+ [7]Download the current CHANGES file here.
+
+ TagSoup 1.1 released
+
+ TagSoup 1.1 adds Tatu Saloranta's JAXP support for TagSoup. To use
+ TagSoup within the JAXP framework (which is not something I necessarily
+ recommend, but it is part of the Java XML platform), you can create a
+ SAXParser by calling
+ org.ccil.cowan.tagsoup.jaxp.SAXParserImpl.newInstance(). You can also
+ set the system property javax.xml.parsers.SAXParserFactory to
+ org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl, but be aware that doing
+ this will cause all JAXP-based XML parsing to go through TagSoup, which
+ is a Bad Thing if your application also reads XML documents.
+
+ What TagSoup does
+
+ TagSoup is designed as a parser, not a whole application; it isn't
+ intended to permanently clean up bad HTML, as [8]HTML Tidy does, only
+ to parse it on the fly. Therefore, it does not convert presentation
+ HTML to CSS or anything similar. It does guarantee well-structured
+ results: tags will wind up properly nested, default attributes will
+ appear appropriately, and so on.
+
+ The semantics of TagSoup are as far as practical those of actual HTML
+ browsers. In particular, never, never will it throw any sort of syntax
+ error: the TagSoup motto is [9]"Just Keep On Truckin'". But there's
+ much, much more. For example, if the first tag is LI, it will supply
+ the application with enclosing HTML, BODY, and UL tags. Why UL? Because
+ that's what browsers assume in this situation. For the same reason,
+ overlapping tags are correctly restarted whenever possible: text like:
+This is <B>bold, <I>bold italic, </b>italic, </i>normal text
+
+ gets correctly rewritten as:
+This is <b>bold, <i>bold italic, </i></b><i>italic, </i>normal text.
+
+ By intention, TagSoup is small and fast. It does not depend on the
+ existence of any framework other than SAX, and should be able to work
+ with any framework that can accept SAX parsers. In particular, [10]XOM
+ is known to work.
+
+ You can replace the low-level HTML scanner with one based on Sean
+ McGrath's [11]PYX format (very close to James Clark's ESIS format). You
+ can also supply an AutoDetector that peeks at the incoming byte stream
+ and guesses a character encoding for it. Otherwise, the platform
+ default is used. If you need an autodetector of character sets,
+ consider trying to adapt the [12]Mozilla one; if you succeed, let me
+ know.
+
+ Note: TagSoup in Java 1.1
+
+ If you go through the TagSoup source and replace all references to
+ HashMap with Hashtable and recompile, TagSoup will work fine in Java
+ 1.1 VMs. Thanks to Thorbjrn Vinne for this discovery.
+
+ The TSaxon XSLT-for-HTML processor
+
+ [13]I am also distributing [14]TSaxon, a repackaging of version 6.5.5
+ of Michael Kay's Saxon XSLT version 1.0 implementation that includes
+ TagSoup. TSaxon is a drop-in replacement for Saxon, and can be used to
+ process either HTML or XML documents with XSLT stylesheets.
+
+ TagSoup as a stand-alone program
+
+ It is possible to run TagSoup as a program by saying java -jar
+ tagsoup-1.0.1 [option ...] [file ...]. Files mentioned on the command
+ line will be parsed individually. If no files are specified, the
+ standard input is read.
+
+ The following options are understood:
+
+ --files
+ Output into individual files, with html extensions changed to
+ xhtml. Otherwise, all output is sent to the standard output.
+
+ --html
+ Output is in clean HTML: the XML declaration is suppressed, as
+ are end-tags for the known empty elements.
+
+ --omit-xml-declaration
+ The XML declaration is suppressed.
+
+ --method=html
+ End-tags for the known empty HTML elements are suppressed.
+
+ --doctype-system=systemid
+ Forces the output of a DOCTYPE declaration with the specified
+ systemid.
+
+ --doctype-public=publicid
+ Forces the output of a DOCTYPE declaration with the specified
+ publicid.
+
+ --version=version
+ Sets the version string in the XML declaration.
+
+ --standalone=[yes|no]
+ Sets the standalone declaration to yes or no.
+
+ --pyx
+ Output is in PYX format.
+
+ --pyxin
+ Input is in PYXoid format (need not be well-formed).
+
+ --nons
+ Namespaces are suppressed. Normally, all elements are in the
+ XHTML 1.x namespace, and all attributes are in no namespace.
+
+ --nobogons
+ Bogons (unknown elements) are suppressed.
+
+ --nodefaults
+ suppress default attribute values
+
+ --nocolons
+ change explicit colons in element and attribute names to
+ underscores
+
+ --norestart
+ don't restart any normally restartable elements
+
+ --ignorable
+ output whitespace in elements with element-only content
+
+ --emptybogons
+ Bogons are given a content model of EMPTY rather than ANY.
+
+ --any
+ Bogons are given a content model of ANY rather than EMPTY
+ (default).
+
+ --norootbogons
+ Don't allow bogons to be root elements; make them subordinate to
+ the root.
+
+ --lexical
+ Pass through HTML comments and DOCTYPE declarations. Has no
+ effect when output is in PYX format.
+
+ --reuse
+ Reuse a single instance of TagSoup parser throughout. Normally,
+ a new one is instantiated for each input file.
+
+ --nocdata
+ Change the content models of the script and style elements to
+ treat them as ordinary #PCDATA (text-only) elements, as in
+ XHTML, rather than with the special CDATA content model.
+
+ --encoding=encoding
+ Specify the input encoding. The default is the Java platform
+ default.
+
+ --output-encoding=encoding
+ Specify the output encoding. The default is the Java platform
+ default.
+
+ --help
+ Print help.
+
+ --version
+ Print the version number.
+
+ SAX features and properties
+
+ TagSoup supports the following SAX features in addition to the standard
+ ones:
+
+ http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons
+ A value of "true" indicates that the parser will ignore unknown
+ elements.
+
+ http://www.ccil.org/~cowan/tagsoup/features/bogons-empty
+ A value of "true" indicates that the parser will give unknown
+ elements a content model of EMPTY; a value of "false", a content
+ model of ANY.
+
+ http://www.ccil.org/~cowan/tagsoup/features/root-bogons
+ A value of "true" indicates that the parser will allow unknown
+ elements to be the root of the output document.
+
+ http://www.ccil.org/~cowan/tagsoup/features/default-attributes
+ A value of "true" indicates that the parser will return default
+ attribute values for missing attributes that have default
+ values.
+
+ http://www.ccil.org/~cowan/tagsoup/features/translate-colons
+ A value of "true" indicates that the parser will translate
+ colons into underscores in names.
+
+ http://www.ccil.org/~cowan/tagsoup/features/restart-elements
+ A value of "true" indicates that the parser will attempt to
+ restart the restartable elements.
+
+ http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace
+ A value of "true" indicates that the parser will transmit
+ whitespace in element-only content via the SAX
+ ignorableWhitespace callback. Normally this is not done, because
+ HTML is an SGML application and SGML suppresses such whitespace.
+
+ http://www.ccil.org/~cowan/tagsoup/features/cdata-elements
+ A value of "true" indicates that the parser will process the
+ script and style elements (or any elements with type='cdata' in
+ the TSSL schema) as SGML CDATA elements (that is, no markup is
+ recognized except the matching end-tag).
+
+ TagSoup supports the following SAX properties in addition to the
+ standard ones:
+
+ http://www.ccil.org/~cowan/tagsoup/properties/scanner
+ Specifies the Scanner object this parser uses.
+
+ http://www.ccil.org/~cowan/tagsoup/properties/schema
+ Specifies the Schema object this parser uses.
+
+ http://www.ccil.org/~cowan/tagsoup/properties/auto-detector
+ Specifies the AutoDetector (for encoding detection) this parser
+ uses.
+
+ More information
+
+ I gave a presentation (a nocturne, so it's not on the schedule) at
+ [15]Extreme Markup Languages 2004 about TagSoup, updated from the one
+ presented in 2002 at the New York City XML SIG and at XML 2002. This is
+ the main high-level documentation about how TagSoup works. Formats:
+ [16]OpenDocument [17]Powerpoint [18]PDF.
+
+ I also had people add [19]"evil" HTML to a large poster so that I could
+ [20]clean it up; View Source is probably more useful than ordinary
+ browsing. The original instructions were:
+
+ SOUPE DE BALISES (BE EVIL)!
+ Ecritez une balise ouvrante (sans attributs)
+ ou fermante HTML ici, s.v.p.
+
+ There is a [21]tagsoup-friends mailing list hosted at [22]Yahoo Groups.
+ You can [23]join via the Web, or by sending a blank email to
+ [24]tagsoup-friends-subscribe@yahoogroups.com. The [25]archives are
+ open to all.
+
+ Online TagSoup processing for publicly accessible HTML documents is now
+ [26]available courtesy of Leigh Dodds.
+
+References
+
+ 1. http://oregonstate.edu/instruct/phl302/texts/hobbes/leviathan-c.html
+ 2. http://opensource.org/licenses/apache2.0.php
+ 3. http://prdownloads.sourceforge.net/saxon/saxon6-5-5.zip
+ 4. http://www.w3.org/TR/2007/WD-xml-entity-names-20071214
+ 5. http://home.ccil.org/~cowan/XML/tagsoup/tagsoup-1.2.jar
+ 6. http://home.ccil.org/~cowan/XML/tagsoup/tagsoup-1.2-src.zip
+ 7. http://home.ccil.org/~cowan/XML/tagsoup/CHANGES
+ 8. http://tidy.sf.net/
+ 9. http://www.crumbmuseum.com/truckin.html
+ 10. http://www.cafeconleche.org/XOM
+ 11. http://gnosis.cx/publish/programming/xml_matters_17.html
+ 12. http://jchardet.sourceforge.net/
+ 13. http://www.ccil.org/~cowan
+ 14. http://home.ccil.org/~cowan/XML/tagsoup/tsaxon
+ 15. http://www.extrememarkup.com/extreme/2004
+ 16. http://home.ccil.org/~cowan/XML/tagsoup/tagsoup.odp
+ 17. http://home.ccil.org/~cowan/XML/tagsoup/tagsoup.ppt
+ 18. http://home.ccil.org/~cowan/XML/tagsoup/tagsoup.pdf
+ 19. http://home.ccil.org/~cowan/XML/tagsoup/extreme.html
+ 20. http://home.ccil.org/~cowan/XML/tagsoup/extreme.xhtml
+ 21. http://groups.yahoo.com/group/tagsoup-friends
+ 22. http://groups.yahoo.com/
+ 23. http://groups.yahoo.com/group/tagsoup-friends/join
+ 24. mailto:tagsoup-friends-subscribe@yahoogroups.com
+ 25. http://groups.yahoo.com/group/tagsoup-friends/messages
+ 26. http://xmlarmyknife.org/docs/xhtml/tagsoup/
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..ccb59fd
--- /dev/null
+++ b/TODO
@@ -0,0 +1,14 @@
+Notice: It's quite possible that none of these features will ever be
+implemented. If you'd like to implement one yourself, feel free to send
+me a patch.
+
+Flag added parent elements with a special attribute
+Allow case sensitivity as a standard feature
+Don't see </script> tag when in pseudo-comment in script element
+Don't just chuck out namespace declarations
+Allow inline cruft in table bodies and rows
+ (Don't break up tables, ever?)
+ (Don't break up forms, ever, either?)
+Suppress start-tags for restartable elements if the element
+ is already on the stack (<b><b> is the same as <b>)
+Combine consecutive body elements
diff --git a/build.xml b/build.xml
new file mode 100644
index 0000000..1aa21d4
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,160 @@
+<!--
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+-->
+
+<project name="tagsoup" default="dist">
+
+ <!-- generic properties -->
+ <property file="etc/build/build.properties"/>
+ <!-- additional tasks -->
+ <taskdef file="etc/build/taskdefs.txt" classpath="bin"/>
+
+ <available property="transformer.factory"
+ classname="com.icl.saxon.TransformerFactoryImpl"
+ value="com.icl.saxon.TransformerFactoryImpl"/>
+ <available property="transformer.factory"
+ classname="net.sf.saxon.TransformerFactoryImpl"
+ value="net.sf.saxon.TransformerFactoryImpl"/>
+ <available property="transformer.factory"
+ classname="org.apache.xalan.processor.TransformerFactoryImpl"
+ value="org.apache.xalan.processor.TransformerFactoryImpl"/>
+ <available property="transformer.factory"
+ classname="com.sun.org.apache.xalan.processor.TransformerFactoryImpl"
+ value="com.sun.org.apache.xalan.processor.TransformerFactoryImpl"/>
+
+ <!-- some folder settings -->
+ <property name="bin" value="bin"/>
+ <property name="src" value="src"/>
+ <property name="build" value="build"/>
+ <property name="dist" value="dist"/>
+ <property name="docs" value="docs"/>
+ <property name="tmp" value="tmp"/>
+
+
+ <!-- initialize project -->
+ <target name="init" description="Init project.">
+ <tstamp/>
+ </target>
+
+
+ <!-- ensure needed folders are available -->
+ <target name="prepare" description="Set up folders.">
+ <mkdir dir="${build}"/>
+ <mkdir dir="${tmp}"/>
+ </target>
+
+ <!-- Build a distribution jar file -->
+ <target name="dist" depends="init,compile"
+ description="Build a binary distribution file.">
+ <antcall target="jar-release">
+ <param name="buildDir" value="build"/>
+ <param name="version" value="${tagsoup.version}"/>
+ </antcall>
+ </target>
+
+
+ <target name="jar-release" depends="init"
+ description="Build a release jar file.">
+ <mkdir dir="${dist}/lib" />
+ <jar jarfile="${dist}/lib/tagsoup-${tagsoup.version}.jar" basedir="${buildDir}">
+ <manifest>
+ <attribute name="Version" value="${tagsoup.version}"/>
+ <attribute name="Main-Class" value="org.ccil.cowan.tagsoup.CommandLine"/>
+ </manifest>
+ </jar>
+ </target>
+
+
+ <!-- compile java sources -->
+ <target name="compile" depends="init,prepare,build-parser"
+ description="Compile java classes.">
+ <javac source="1.4" target="1.4" srcdir="${src}/java" destdir="${build}" deprecation="on" verbose="off" debug="on">
+ <src path="${src}/java"/>
+ <src path="${tmp}/src"/>
+ </javac>
+ </target>
+
+<!-- prepare generation of the parser classes based on the definition files -->
+ <target depends="init,prepare" description="Prepare generation of parser classes." name="prepare-parser">
+
+ <echo>
+ Using ${transformer.factory} as the TransformerFactory
+ </echo>
+
+ <xslt in="${src}/definitions/html.tssl" out="${tmp}/HTMLModels.i"
+style="tssl/tssl-models.xslt">
+ <factory name="${transformer.factory}"/>
+ </xslt>
+ <xslt in="${src}/definitions/html.tssl" out="${tmp}/HTMLSchema.i"
+style="tssl/tssl.xslt">
+ <factory name="${transformer.factory}"/>
+ </xslt>
+ <xslt in="${src}/definitions/html.stml" out="${tmp}/HTMLScanner.i"
+style="stml/stml.xslt">
+ <factory name="${transformer.factory}"/>
+ </xslt>
+ </target>
+
+
+
+ <!-- patch the parser class files -->
+ <target name="build-parser" depends="prepare-parser"
+ description="Generate parser class files.">
+ <property name="parser.pkg-path" value="org/ccil/cowan/tagsoup"/>
+ <mkdir dir="${tmp}/src/${parser.pkg-path}"/>
+ <antcall target="patch-file">
+ <param name="file-pref" value="HTMLModels"/>
+ <param name="token" value="MODEL_DEFINITIONS"/>
+ </antcall>
+ <antcall target="patch-file">
+ <param name="file-pref" value="HTMLSchema"/>
+ <param name="token" value="SCHEMA_CALLS"/>
+ </antcall>
+ <antcall target="patch-file">
+ <param name="file-pref" value="HTMLScanner"/>
+ <param name="token" value="STATE_TABLE"/>
+ </antcall>
+ </target>
+
+
+ <!-- patch one parser class file -->
+ <target name="patch-file" depends="" description="Patch a parser class file.">
+ <copy file="${src}/templates/${parser.pkg-path}/${file-pref}.java" toDir="${tmp}/src/${parser.pkg-path}"/>
+ <loadfile property="patch" srcFile="${tmp}/${file-pref}.i"/>
+ <replace file="${tmp}/src/${parser.pkg-path}/${file-pref}.java" token="@@${token}@@" value="${patch}"/>
+ </target>
+
+ <!-- clean up the mess -->
+ <target name="clean" description="Clean up folders.">
+ <delete dir="${build}"/>
+ <delete dir="${tmp}"/>
+ <delete dir="${docs}"/>
+ <delete dir="${dist}"/>
+ </target>
+
+
+ <!-- generate javadoc for the java classes -->
+ <target name="docs-api" depends="init"
+ description="Generate javadoc documentation.">
+ <mkdir dir="${docs}/api"/>
+ <javadoc packagenames="org.*"
+ sourcepath="${src}/java" destdir="${docs}/api"
+ use="true"
+ windowtitle="TagSoup ${tagsoup.version} API">
+ <doctitle><![CDATA[<h1>TagSoup Package Documentation</h1>]]></doctitle>
+ <bottom><![CDATA[<em>Licence</em>: <strong>Academic Free License 3.0</strong> and/or <strong>GPL 2.0</strong>]]></bottom>
+ </javadoc>
+ </target>
+
+</project>
diff --git a/etc/build/build.properties b/etc/build/build.properties
new file mode 100644
index 0000000..e372092
--- /dev/null
+++ b/etc/build/build.properties
@@ -0,0 +1 @@
+tagsoup.version = 1.2
diff --git a/etc/build/taskdefs.txt b/etc/build/taskdefs.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/etc/build/taskdefs.txt
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..1936ace
--- /dev/null
+++ b/index.html
@@ -0,0 +1,407 @@
+<!--
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+-->
+
+<html><head><title>TagSoup home page</title></head><body>
+<h1>TagSoup - Just Keep On Truckin'</h1>
+
+<h3>Introduction</h3>
+<p>This is the home page of TagSoup, a SAX-compliant parser written in Java
+that, instead of parsing well-formed or valid XML, parses HTML as it is
+found in the wild:
+<a href="http://oregonstate.edu/instruct/phl302/texts/hobbes/leviathan-c.html">
+poor, nasty and brutish</a>, though quite often far from short.
+TagSoup is designed for people who have to process this stuff using some
+semblance of a rational application design. By providing a SAX interface,
+it allows standard XML tools to be applied to even the worst HTML.
+TagSoup also includes a command-line processor that reads HTML files
+and can generate either clean HTML or well-formed XML that is a
+close approximation to XHTML.</p>
+
+<p>This is also the README file packaged with TagSoup.</p>
+
+<p>TagSoup is free and Open Source software. As of version 1.2, it
+is licensed under the
+<a href="http://opensource.org/licenses/apache2.0.php">
+Apache License, Version 2.0</a>, which allows proprietary re-use as well
+as use with GPL 3.0 or GPL 2.0-or-later projects. (If anyone needs a
+GPL 2.0 license for a GPL 2.0-only project, feel free to ask.)
+
+<h3><i>Warning:</i> TagSoup will not build on stock Java 5.x or 6.x!</h3>
+
+<p>Due to a bug in the versions of Xalan shipped with Java 5.x and
+6.x, TagSoup will not build out of the box. You need to retrieve
+<a href="http://prdownloads.sourceforge.net/saxon/saxon6-5-5.zip">
+Saxon 6.5.5</a>, which does not have the bug. Unpack the
+zipfile in an empty directory and copy the <tt>saxon.jar</tt> and
+<tt>saxon-xml-apis.jar</tt> files to <tt>$ANT_HOME/lib</tt>. The Ant
+build process for TagSoup will then notice that Saxon is available and
+use it instead.</p>
+
+<h3>TagSoup 1.2 released</h3>
+
+<p>There are a great many changes, most of them fixes for long-standing
+bugs, in this release. Only the most important are listed here; for
+the rest, see the CHANGES file in the source distribution. Very special
+thanks to Jojo Dijamco, whose intensive efforts at debugging made this
+release a usable upgrade rather than a useless mass of undetected bugs.</p>
+
+<ul>
+
+<li><p>As noted above, I have changed the license to Apache 2.0.</p></li>
+
+<li><p>The default content model for bogons (unknown elements) is now
+ANY rather than EMPTY. <b>This is a breaking change</b>, which I have
+done only because there was so much demand for it. It can be undone
+on the command line with the <code>--emptybogons</code> switch, or
+programmatically with <code>parser.setFeature(Parser.emptyBogonsFeature,
+true)</code>.</p></li>
+
+<li><p>The processing of entity references in attribute values has
+finally been fixed to do what browsers do. That is, a reference
+is only recognized if it is properly terminated by a semicolon;
+otherwise it is treated as plain text. This means that URIs
+like <code>foo?cdown=32&amp;cup=42</code> are no longer seen as
+containing an instance of the &cup; character (whose name happens to
+be <code>cup</code>).</p></li>
+
+<li><p>Several new switches have been added:
+
+<ul>
+
+<li><p><code>--doctype-system</code> and <code>--doctype-public</code>
+force a <code>DOCTYPE</code> declaration to be output and allow setting
+the system and public identifiers.</p></li>
+
+<li><p><code>--standalone</code> and <code>--version</code> allow control
+of the XML declaration that is output. (Note that TagSoup's XML output
+is always version 1.0, even if you use <code>--version=1.1</code>.)</p></li>
+
+<li><p><code>--norootbogons</code> causes unknown elements not to be allowed
+as the document root element. Instead, they are made children of the
+default root element (the <code>html</code> element for HTML).</p></li>
+
+</ul>
+<li><p>The TagSoup core now supports character entities with values
+above U+FFFF. As a consequence, the HTML schema now supports all
+2,210 standard character entities from the
+<a href="http://www.w3.org/TR/2007/WD-xml-entity-names-20071214">
+2007-12-14 draft of XML Entity Definitions for Characters</a>, except the
+94 which require more than one Unicode character to represent.</p></li>
+
+<li>The SAX events <code>startPrefixMapping</code> and
+<code>endPrefixMapping</code> are now being reported for all cases of
+foreign elements and attributes.</li>
+
+<li><p>All bugs around newline processing on Windows should now be gone.</p></li>
+
+<li>A number of content models have been loosened to allow elements
+to appear in new and non-standard (but commonly found) places.
+In particular, tables are now allowed inside paragraphs, against the
+letter of the W3C specification.</p>
+
+<li><p>Since the <code>span</code> element is intended for fine
+control of appearance using CSS, it should never have been a
+restartable element. This very long-standing bug has now been
+fixed.</p></li>
+
+<li><p>The following non-standard elements are now at least partly
+supported: <code>bgsound</code>, <code>blink</code>, <code>canvas</code>,
+<code>comment</code>, <code>listing</code>, <code>marquee</code>,
+<code>nobr</code>, <code>rbc</code>, <code>rb</code>, <code>rp</code>,
+<code>rtc</code>, <code>rt</code>, <code>ruby</code>, <code>wbr</code>,
+<code>xmp</code>.</p></li>
+
+<li><p>In HTML output mode, boolean attributes like <code>checked</code>
+are now output as such, rather than in XML style as
+<code>checked="checked"</code>.</p></li>
+
+<li><p>Runs of &lt; characters such as &lt;&lt; and &lt;&lt;&lt; are now
+handled correctly in text rather than being transformed into extremely
+bogus start-tags.</p></li>
+
+</ul>
+
+<p><a href="tagsoup-1.2.jar">Download</a> the TagSoup 1.2 jar
+file here.
+It's about 87K long.<br/>
+<a href="tagsoup-1.2-src.zip">Download</a> the full TagSoup 1.2 source
+here. If you don't have zip, you can use jar to unpack it. <br/>
+<a href="CHANGES">Download</a> the current CHANGES file here.</p>
+
+<h3>TagSoup 1.1 released</h3>
+
+<p>TagSoup 1.1 adds Tatu Saloranta's JAXP support for TagSoup.
+To use TagSoup within the JAXP framework (which is not
+something I necessarily recommend, but it is part of the Java
+XML platform), you can create a <tt>SAXParser</tt> by calling
+<tt>org.ccil.cowan.tagsoup.jaxp.SAXParserImpl.newInstance()</tt>. You can
+also set the system property <tt>javax.xml.parsers.SAXParserFactory</tt>
+to <tt>org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl</tt>, but <i>be
+aware</i> that doing this will cause all JAXP-based XML parsing to go
+through TagSoup, which is a Bad Thing if your application also reads
+XML documents.</p>
+
+<h3>What TagSoup does</h3>
+<p>TagSoup is designed as a parser, not a whole application; it isn't
+intended to permanently clean up bad HTML, as
+<a href="http://tidy.sf.net">HTML Tidy</a> does, only to
+parse it on the fly. Therefore, it does not convert presentation HTML
+to CSS or anything similar. It does guarantee well-structured results:
+tags will wind up properly nested, default attributes will appear
+appropriately, and so on.</p>
+
+<p>The semantics of TagSoup are as far as practical those of actual HTML
+browsers. In particular, never, never will it throw any sort of syntax
+error: the TagSoup motto is
+<a href="http://www.crumbmuseum.com/truckin.html">
+"Just Keep On Truckin'"</a>. But there's much,
+much more. For example, if the first tag is LI, it will supply the
+application with enclosing HTML, BODY, and UL tags. Why UL? Because
+that's what browsers assume in this situation. For the same reason,
+overlapping tags are correctly restarted whenever possible: text like:</p>
+
+<pre>This is &lt;B>bold, &lt;I>bold italic, &lt;/b>italic, &lt;/i>normal text
+</pre>
+
+<p>gets correctly rewritten as:</p>
+
+<pre>This is &lt;b>bold, &lt;i>bold italic, &lt;/i>&lt;/b>&lt;i>italic, &lt;/i>normal text.
+</pre>
+
+<p>By intention, TagSoup is small and fast. It does not
+depend on the existence of any framework other than SAX, and should be
+able to work with any framework that can accept SAX parsers.
+In particular, <a href="http://www.cafeconleche.org/XOM">XOM</a>
+is known to work.
+
+<p>You can replace the low-level HTML scanner with one based on Sean McGrath's
+<a href="http://gnosis.cx/publish/programming/xml_matters_17.html">PYX</a>
+format (very close to James Clark's ESIS format). You can also supply
+an AutoDetector that peeks at the incoming byte stream and guesses a
+character encoding for it. Otherwise, the platform default is used.
+If you need an autodetector of character sets, consider trying to
+adapt the <a href="http://jchardet.sourceforge.net/">Mozilla one</a>;
+if you succeed, let me know.</p>
+
+<h3>Note: TagSoup in Java 1.1</h3>
+
+<p>If you go through the TagSoup source and replace all references to
+<code>HashMap</code> with <code>Hashtable</code> and recompile,
+TagSoup will work fine in Java 1.1 VMs. Thanks to Thorbj&oslash;rn
+Vinne for this discovery.<p>
+
+<h3>The TSaxon XSLT-for-HTML processor</h3>
+<p><a href="http://www.ccil.org/~cowan">I</a> am also distributing
+<a href="tsaxon">TSaxon</a>, a repackaging of version 6.5.5 of Michael
+Kay's Saxon XSLT version 1.0 implementation that includes TagSoup.
+TSaxon is a drop-in replacement for Saxon, and can be used to process
+either HTML or XML documents with XSLT stylesheets.
+
+
+<h3>TagSoup as a stand-alone program</h3>
+<p>It is possible to run TagSoup as a program by saying <code>java
+-jar tagsoup-1.0.1 [<i>option ...</i>] [<i>file ...</i>]</code>.
+Files mentioned on the command line will be parsed individually. If no
+files are specified, the standard input is read.</p>
+
+<p>The following options are understood:</p>
+
+<dl>
+<dt><code>--files</code></dt>
+<dd>Output into individual files, with <code>html</code> extensions changed
+to <code>xhtml</code>. Otherwise, all output is sent to the standard output.</dd>
+
+<dt><code>--html</code></dt>
+<dd>Output is in clean HTML: the XML declaration is suppressed, as are end-tags
+for the known empty elements.</dd>
+
+<dt><code>--omit-xml-declaration</code></dt>
+<dd>The XML declaration is suppressed.</dd>
+
+<dt><code>--method=html</code></dt>
+<dd>End-tags for the known empty HTML elements are suppressed.</dd>
+
+<dt><code>--doctype-system=<i>systemid</i></code></dt>
+<dd>Forces the output of a <code>DOCTYPE</code> declaration with the specified systemid.</dd>
+
+<dt><code>--doctype-public=<i>publicid</i></code></dt>
+<dd>Forces the output of a <code>DOCTYPE</code> declaration with the specified publicid.</dd>
+
+<dt><code>--version=<i>version</i></code></dt>
+<dd>Sets the version string in the XML declaration.</dd>
+
+<dt><code>--standalone=</code>[<code>yes</code>|<code>no</code>]</dt>
+<dd>Sets the standalone declaration to yes or no.</dd>
+
+<dt><code>--pyx</code></dt>
+<dd>Output is in PYX format.</dd>
+
+<dt><code>--pyxin</code></dt>
+<dd>Input is in PYXoid format (need not be well-formed).</dd>
+
+<dt><code>--nons</code></dt>
+<dd>Namespaces are suppressed. Normally, all elements are in the XHTML 1.x
+namespace, and all attributes are in no namespace.</dd>
+
+<dt><code>--nobogons</code></dt>
+<dd>Bogons (unknown elements) are suppressed.</dd>
+
+<dt><code>--nodefaults</code></dt>
+<dd>suppress default attribute values</dd>
+
+<dt><code>--nocolons</code></dt>
+<dd>change explicit colons in element and attribute names to underscores</dd>
+
+<dt><code>--norestart</code></dt>
+<dd>don't restart any normally restartable elements</dd>
+
+<dt><code>--ignorable</code></dt>
+<dd>output whitespace in elements with element-only content</dd>
+
+<dt><code>--emptybogons</code></dt>
+<dd>Bogons are given a content model of EMPTY rather than ANY.</dd>
+
+<dt><code>--any</code></dt>
+<dd>Bogons are given a content model of ANY rather than EMPTY (default).</dd>
+
+<dt><code>--norootbogons</code></dt>
+<dd>Don't allow bogons to be root elements; make them subordinate to the root.</dd>
+
+<dt><code>--lexical</code></dt>
+<dd>Pass through HTML comments and DOCTYPE declarations. Has no effect
+when output is in PYX format.</dd>
+
+<dt><code>--reuse</code></dt>
+<dd>Reuse a single instance of TagSoup parser throughout. Normally, a new one
+is instantiated for each input file.</dd>
+
+<dt><code>--nocdata</code></dt>
+<dd>Change the content models of the <code>script</code> and <code>style</code> elements
+to treat them as ordinary #PCDATA (text-only) elements, as in XHTML, rather than
+with the special CDATA content model.</dd>
+
+<dt><code>--encoding=</code><i>encoding</i></dt>
+<dd>Specify the input encoding. The default is the Java platform default.</dd>
+
+<dt><code>--output-encoding=</code><i>encoding</i></dt>
+<dd>Specify the output encoding. The default is the Java platform default.</dd>
+
+<dt><code>--help</code></dt>
+<dd>Print help.</dd>
+
+<dt><code>--version</code></dt>
+<dd>Print the version number.</dd>
+
+</dl>
+
+<a name="properties"></a><h3>SAX features and properties</h3>
+
+<p>TagSoup supports the following SAX features in addition to the
+standard ones:</p>
+
+<dl>
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons</tt></dt>
+<dd>A value of "true" indicates that the parser will ignore
+unknown elements.</dd>
+
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/features/bogons-empty</tt></dt>
+<dd>A value of "true" indicates that the parser will give unknown
+elements a content model of EMPTY; a value of "false", a
+content model of ANY.</dd>
+
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/features/root-bogons</tt></dt>
+<dd>A value of "true" indicates that the parser will allow unknown
+elements to be the root of the output document.</dd>
+
+
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/features/default-attributes</tt></dt>
+<dd>A value of "true" indicates that the parser will return default
+attribute values for missing attributes that have default values.</dd>
+
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/features/translate-colons</tt></dt>
+<dd>A value of "true" indicates that the parser will
+translate colons into underscores in names.</dd>
+
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/features/restart-elements</tt></dt>
+<dd>A value of "true" indicates that the parser will
+attempt to restart the restartable elements.</dd>
+
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace</tt></dt>
+<dd>A value of "true" indicates that the parser will
+transmit whitespace in element-only content via the SAX
+ignorableWhitespace callback. Normally this is not done,
+because HTML is an SGML application and SGML suppresses
+such whitespace.</dd>
+
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/features/cdata-elements</tt></dt>
+<dd>A value of "true" indicates that the parser will
+process the <tt>script</tt> and <tt>style</tt> elements
+(or any elements with <tt>type='cdata'</tt> in the TSSL schema)
+as SGML CDATA elements (that is, no markup is recognized except
+the matching end-tag).</dd>
+
+</dl>
+
+<p>TagSoup supports the following SAX properties in addition to
+the standard ones:</p>
+
+<dl>
+
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/properties/scanner</tt></dt>
+<dd>Specifies the Scanner object this parser uses.</dd>
+
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/properties/schema</tt></dt>
+<dd>Specifies the Schema object this parser uses.</dd>
+
+<dt><tt>http://www.ccil.org/~cowan/tagsoup/properties/auto-detector</tt></dt>
+<dd>Specifies the AutoDetector (for encoding detection) this parser uses.</dd>
+
+</dl>
+
+<h3>More information</h3>
+<p>I gave a presentation (a nocturne, so it's not on the schedule) at
+<a href="http://www.extrememarkup.com/extreme/2004">Extreme Markup Languages 2004</a>
+about TagSoup, updated from the one
+presented in 2002 at the New York City XML SIG and at XML 2002.
+This is the main high-level documentation about how TagSoup works.
+Formats:
+<a href="tagsoup.odp">OpenDocument</a>
+<a href="tagsoup.ppt">Powerpoint</a>
+<a href="tagsoup.pdf">PDF</a>.
+
+<p>I also had people add <a href="extreme.html">"evil" HTML</a> to a large
+poster so that I could <a href="extreme.xhtml">clean it up</a>;
+View Source is probably more useful than ordinary browsing.
+The original instructions were:</p>
+
+<p align="center">SOUPE DE BALISES (BE EVIL)!</br>
+Ecritez une balise ouvrante (sans attributs)<br/> ou fermante HTML ici, s.v.p.<p/>
+
+
+<p>There is a <a href="http://groups.yahoo.com/group/tagsoup-friends">
+tagsoup-friends</a> mailing list hosted at <a href="http://groups.yahoo.com">
+Yahoo Groups</a>. You can
+<a href="http://groups.yahoo.com/group/tagsoup-friends/join">join</a>
+via the Web, or by sending a blank email to
+<a href="mailto:tagsoup-friends-subscribe@yahoogroups.com"><i>
+tagsoup-friends-subscribe@yahoogroups.com</i></a>.
+The <a href="http://groups.yahoo.com/group/tagsoup-friends/messages">
+archives</a> are open to all.</p>
+
+<p>Online TagSoup processing for publicly accessible HTML documents
+is now <a href="http://xmlarmyknife.org/docs/xhtml/tagsoup/">available</a>
+courtesy of Leigh Dodds.</p>
diff --git a/src/definitions/html.stml b/src/definitions/html.stml
new file mode 100644
index 0000000..4cab973
--- /dev/null
+++ b/src/definitions/html.stml
@@ -0,0 +1,249 @@
+<!--
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+-->
+
+<statetable xmlns='http://www.ccil.org/~cowan/XML/tagsoup/stml'
+ version='1.0'>
+
+ <symbol id='EOF'/>
+ <symbol id='S'/>
+ <symbol id='default'/>
+
+ <action id='A_ADUP'/>
+ <action id='A_ADUP_SAVE'/>
+ <action id='A_ADUP_STAGC'/>
+ <action id='A_ANAME'/>
+ <action id='A_ANAME_ADUP_STAGC'/>
+ <action id='A_AVAL'/>
+ <action id='A_AVAL_STAGC'/>
+ <action id='A_CDATA'/>
+ <action id='A_CMNT'/>
+ <action id='A_DECL'/>
+ <action id='A_ENTITY'/>
+ <action id='A_ENTITY_START'/>
+ <action id='A_ETAG'/>
+ <action id='A_EMPTYTAG'/>
+ <action id='A_ANAME_ADUP'/>
+ <action id='A_GI'/>
+ <action id='A_GI_STAGC'/>
+ <action id='A_LT'/>
+ <action id='A_LT_PCDATA'/>
+ <action id='A_MINUS'/>
+ <action id='A_MINUS2'/>
+ <action id='A_MINUS3'/>
+ <action id='A_PCDATA'/>
+ <action id='A_PI'/>
+ <action id='A_PITARGET'/>
+ <action id='A_PITARGET_PI'/>
+ <action id='A_SAVE'/>
+ <action id='A_SKIP'/>
+ <action id='A_SP'/>
+ <action id='A_STAGC'/>
+ <action id='A_UNGET'/>
+ <action id='A_UNSAVE_PCDATA'/>
+
+ <state id='S_ANAME'>
+ <tr symbol='default' action='A_SAVE' newstate='S_ANAME'/>
+ <tr char='=' action='A_ANAME' newstate='S_AVAL'/>
+ <tr char='>' action='A_ANAME_ADUP_STAGC' newstate='S_PCDATA'/>
+ <tr char='/' action='A_ANAME_ADUP' newstate='S_EMPTYTAG'/>
+ <tr symbol='EOF' action='A_ANAME_ADUP_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_ANAME' newstate='S_EQ'/>
+ </state>
+ <state id='S_APOS'>
+ <tr symbol='default' action='A_SAVE' newstate='S_APOS'/>
+ <tr char='&apos;' action='A_AVAL' newstate='S_TAGWS'/>
+ <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SP' newstate='S_APOS'/>
+ </state>
+ <state id='S_AVAL'>
+ <tr symbol='default' action='A_SAVE' newstate='S_STAGC'/>
+ <tr char='"' action='A_SKIP' newstate='S_QUOT'/>
+ <tr char='&apos;' action='A_SKIP' newstate='S_APOS'/>
+ <tr char='>' action='A_AVAL_STAGC' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SKIP' newstate='S_AVAL'/>
+ </state>
+ <state id='S_CDATA'>
+ <tr symbol='default' action='A_SAVE' newstate='S_CDATA'/>
+ <tr char='&lt;' action='A_SAVE' newstate='S_CDATA2'/>
+ <tr symbol='EOF' action='A_PCDATA' newstate='S_DONE'/>
+ </state>
+ <state id='S_CDATA2'>
+ <tr symbol='default' action='A_SAVE' newstate='S_CDATA'/>
+ <tr char='/' action='A_UNSAVE_PCDATA' newstate='S_ETAG'/>
+ <tr symbol='EOF' action='A_UNSAVE_PCDATA' newstate='S_DONE'/>
+ </state>
+ <state id='S_COM'>
+ <tr symbol='default' action='A_SAVE' newstate='S_COM2'/>
+ <tr char='-' action='A_SKIP' newstate='S_COM2'/>
+ <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/>
+ </state>
+ <state id='S_COM2'>
+ <tr symbol='default' action='A_SAVE' newstate='S_COM2'/>
+ <tr char='-' action='A_SKIP' newstate='S_COM3'/>
+ <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/>
+ </state>
+ <state id='S_COM3'>
+ <tr symbol='default' action='A_MINUS' newstate='S_COM2'/>
+ <tr char='-' action='A_SKIP' newstate='S_COM4'/>
+ <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/>
+ </state>
+ <state id='S_COM4'>
+ <tr symbol='default' action='A_MINUS2' newstate='S_COM2'/>
+ <tr char='-' action='A_MINUS3' newstate='S_COM4'/>
+ <tr char='>' action='A_CMNT' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/>
+ </state>
+ <state id='S_DECL'>
+ <tr symbol='default' action='A_SAVE' newstate='S_DECL2'/>
+ <tr char='-' action='A_SKIP' newstate='S_COM'/>
+ <tr char='[' action='A_SKIP' newstate='S_BB'/>
+ <tr char='>' action='A_SKIP' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_DECL2'>
+ <tr symbol='default' action='A_SAVE' newstate='S_DECL2'/>
+ <tr char='>' action='A_DECL' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_ENT'>
+ <tr symbol='default' action='A_ENTITY' newstate='S_ENT'/>
+ <tr symbol='EOF' action='A_ENTITY' newstate='S_DONE'/>
+ </state>
+ <state id='S_EQ'>
+ <tr symbol='default' action='A_ADUP_SAVE' newstate='S_ANAME'/>
+ <tr char='=' action='A_SKIP' newstate='S_AVAL'/>
+ <tr char='>' action='A_ADUP_STAGC' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_ADUP_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SKIP' newstate='S_EQ'/>
+ </state>
+ <state id='S_ETAG'>
+ <tr symbol='default' action='A_SAVE' newstate='S_ETAG'/>
+ <tr char='>' action='A_ETAG' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_ETAG' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SKIP' newstate='S_ETAG'/>
+ </state>
+ <state id='S_GI'>
+ <tr symbol='default' action='A_SAVE' newstate='S_GI'/>
+ <tr char='/' action='A_SKIP' newstate='S_EMPTYTAG'/>
+ <tr char='>' action='A_GI_STAGC' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ <tr symbol='S' action='A_GI' newstate='S_TAGWS'/>
+ </state>
+ <state id='S_NCR'>
+ <tr symbol='default' action='A_ENTITY' newstate='S_NCR'/>
+ <tr symbol='EOF' action='A_ENTITY' newstate='S_DONE'/>
+ </state>
+ <state id='S_XNCR'>
+ <tr symbol='default' action='A_ENTITY' newstate='S_XNCR'/>
+ <tr symbol='EOF' action='A_ENTITY' newstate='S_DONE'/>
+ </state>
+ <state id='S_PCDATA'>
+ <tr symbol='default' action='A_SAVE' newstate='S_PCDATA'/>
+ <tr char='&amp;' action='A_ENTITY_START' newstate='S_ENT'/>
+ <tr char='&lt;' action='A_PCDATA' newstate='S_TAG'/>
+ <tr symbol='EOF' action='A_PCDATA' newstate='S_DONE'/>
+ </state>
+ <state id='S_PI'>
+ <tr symbol='default' action='A_SAVE' newstate='S_PI'/>
+ <tr char='>' action='A_PI' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_PI' newstate='S_DONE'/>
+ </state>
+ <state id='S_PITARGET'>
+ <tr symbol='default' action='A_SAVE' newstate='S_PITARGET'/>
+ <tr char='>' action='A_PITARGET_PI' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_PITARGET_PI' newstate='S_DONE'/>
+ <tr symbol='S' action='A_PITARGET' newstate='S_PI'/>
+ </state>
+ <state id='S_QUOT'>
+ <tr symbol='default' action='A_SAVE' newstate='S_QUOT'/>
+ <tr char='"' action='A_AVAL' newstate='S_TAGWS'/>
+ <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SP' newstate='S_QUOT'/>
+ </state>
+ <state id='S_STAGC'>
+ <tr symbol='default' action='A_SAVE' newstate='S_STAGC'/>
+ <tr char='>' action='A_AVAL_STAGC' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_AVAL' newstate='S_TAGWS'/>
+ </state>
+ <state id='S_TAG'>
+ <tr symbol='default' action='A_SAVE' newstate='S_GI'/>
+ <tr char='!' action='A_SKIP' newstate='S_DECL'/>
+ <tr char='/' action='A_SKIP' newstate='S_ETAG'/>
+ <tr char='?' action='A_SKIP' newstate='S_PITARGET'/>
+ <tr char='&lt;' action='A_SAVE' newstate='S_TAG'/>
+ <tr symbol='EOF' action='A_LT_PCDATA' newstate='S_DONE'/>
+ <tr symbol='S' action='A_LT' newstate='S_PCDATA'/>
+ </state>
+ <state id='S_TAGWS'>
+ <tr symbol='default' action='A_SAVE' newstate='S_ANAME'/>
+ <tr char='/' action='A_SKIP' newstate='S_EMPTYTAG'/>
+ <tr char='>' action='A_STAGC' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SKIP' newstate='S_TAGWS'/>
+ </state>
+ <state id='S_EMPTYTAG'>
+ <tr symbol='S' action='A_SKIP' newstate='S_TAGWS'/>
+ <tr symbol='default' action='A_SAVE' newstate='S_ANAME'/>
+ <tr char='>' action='A_EMPTYTAG' newstate='S_PCDATA'/>
+ </state>
+ <state id='S_BB'>
+ <tr char='C' action='A_SKIP' newstate='S_BBC'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_BBC'>
+ <tr char='D' action='A_SKIP' newstate='S_BBCD'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_BBCD'>
+ <tr char='A' action='A_SKIP' newstate='S_BBCDA'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_BBCDA'>
+ <tr char='T' action='A_SKIP' newstate='S_BBCDAT'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_BBCDAT'>
+ <tr char='A' action='A_SKIP' newstate='S_BBCDATA'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_BBCDATA'>
+ <tr char='[' action='A_SKIP' newstate='S_CDSECT'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_CDSECT'>
+ <tr char=']' action='A_SAVE' newstate='S_CDSECT1'/>
+ <tr symbol='default' action='A_SAVE' newstate='S_CDSECT'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_CDSECT1'>
+ <tr char=']' action='A_SAVE' newstate='S_CDSECT2'/>
+ <tr symbol='default' action='A_SAVE' newstate='S_CDSECT'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_CDSECT2'>
+ <tr char='>' action='A_CDATA' newstate='S_PCDATA'/>
+ <tr symbol='default' action='A_SAVE' newstate='S_CDSECT'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_DONE'/>
+</statetable>
diff --git a/src/definitions/html.tssl b/src/definitions/html.tssl
new file mode 100644
index 0000000..7207862
--- /dev/null
+++ b/src/definitions/html.tssl
@@ -0,0 +1,2762 @@
+<!--
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+-->
+
+<schema xmlns='http://www.ccil.org/~cowan/XML/tagsoup/tssl'
+ ns='http://www.w3.org/1999/xhtml' name='html'
+ prefix='html' version='1.0'>
+
+ <entity name='Aacgr' codepoint='0386'/>
+ <entity name='aacgr' codepoint='03AC'/>
+ <entity name='Aacute' codepoint='00C1'/>
+ <entity name='aacute' codepoint='00E1'/>
+ <entity name='Abreve' codepoint='0102'/>
+ <entity name='abreve' codepoint='0103'/>
+ <entity name='ac' codepoint='223E'/>
+ <entity name='acd' codepoint='223F'/>
+ <entity name='Acirc' codepoint='00C2'/>
+ <entity name='acirc' codepoint='00E2'/>
+ <entity name='acute' codepoint='00B4'/>
+ <entity name='Acy' codepoint='0410'/>
+ <entity name='acy' codepoint='0430'/>
+ <entity name='AElig' codepoint='00C6'/>
+ <entity name='aelig' codepoint='00E6'/>
+ <entity name='af' codepoint='2061'/>
+ <entity name='Afr' codepoint='1D504'/>
+ <entity name='afr' codepoint='1D51E'/>
+ <entity name='Agr' codepoint='0391'/>
+ <entity name='agr' codepoint='03B1'/>
+ <entity name='Agrave' codepoint='00C0'/>
+ <entity name='agrave' codepoint='00E0'/>
+ <entity name='alefsym' codepoint='2135'/>
+ <entity name='aleph' codepoint='2135'/>
+ <entity name='Alpha' codepoint='0391'/>
+ <entity name='alpha' codepoint='03B1'/>
+ <entity name='Amacr' codepoint='0100'/>
+ <entity name='amacr' codepoint='0101'/>
+ <entity name='amalg' codepoint='2A3F'/>
+ <entity name='amp' codepoint='0026'/>
+ <entity name='and' codepoint='2227'/>
+ <entity name='And' codepoint='2A53'/>
+ <entity name='andand' codepoint='2A55'/>
+ <entity name='andd' codepoint='2A5C'/>
+ <entity name='andslope' codepoint='2A58'/>
+ <entity name='andv' codepoint='2A5A'/>
+ <entity name='ang' codepoint='2220'/>
+ <entity name='ange' codepoint='29A4'/>
+ <entity name='angle' codepoint='2220'/>
+ <entity name='angmsd' codepoint='2221'/>
+ <entity name='angmsdaa' codepoint='29A8'/>
+ <entity name='angmsdab' codepoint='29A9'/>
+ <entity name='angmsdac' codepoint='29AA'/>
+ <entity name='angmsdad' codepoint='29AB'/>
+ <entity name='angmsdae' codepoint='29AC'/>
+ <entity name='angmsdaf' codepoint='29AD'/>
+ <entity name='angmsdag' codepoint='29AE'/>
+ <entity name='angmsdah' codepoint='29AF'/>
+ <entity name='angrt' codepoint='221F'/>
+ <entity name='angrtvb' codepoint='22BE'/>
+ <entity name='angrtvbd' codepoint='299D'/>
+ <entity name='angsph' codepoint='2222'/>
+ <entity name='angst' codepoint='212B'/>
+ <entity name='angzarr' codepoint='237C'/>
+ <entity name='Aogon' codepoint='0104'/>
+ <entity name='aogon' codepoint='0105'/>
+ <entity name='Aopf' codepoint='1D538'/>
+ <entity name='aopf' codepoint='1D552'/>
+ <entity name='ap' codepoint='2248'/>
+ <entity name='apacir' codepoint='2A6F'/>
+ <entity name='ape' codepoint='224A'/>
+ <entity name='apE' codepoint='2A70'/>
+ <entity name='apid' codepoint='224B'/>
+ <entity name='apos' codepoint='0027'/>
+ <entity name='ApplyFunction' codepoint='2061'/>
+ <entity name='approx' codepoint='2248'/>
+ <entity name='approxeq' codepoint='224A'/>
+ <entity name='Aring' codepoint='00C5'/>
+ <entity name='aring' codepoint='00E5'/>
+ <entity name='Ascr' codepoint='1D49C'/>
+ <entity name='ascr' codepoint='1D4B6'/>
+ <entity name='Assign' codepoint='2254'/>
+ <entity name='ast' codepoint='002A'/>
+ <entity name='asymp' codepoint='2248'/>
+ <entity name='asympeq' codepoint='224D'/>
+ <entity name='Atilde' codepoint='00C3'/>
+ <entity name='atilde' codepoint='00E3'/>
+ <entity name='Auml' codepoint='00C4'/>
+ <entity name='auml' codepoint='00E4'/>
+ <entity name='awconint' codepoint='2233'/>
+ <entity name='awint' codepoint='2A11'/>
+ <entity name='b.alpha' codepoint='1D6C2'/>
+ <entity name='b.beta' codepoint='1D6C3'/>
+ <entity name='b.chi' codepoint='1D6D8'/>
+ <entity name='b.Delta' codepoint='1D6AB'/>
+ <entity name='b.delta' codepoint='1D6C5'/>
+ <entity name='b.epsi' codepoint='1D6C6'/>
+ <entity name='b.epsiv' codepoint='1D6DC'/>
+ <entity name='b.eta' codepoint='1D6C8'/>
+ <entity name='b.Gamma' codepoint='1D6AA'/>
+ <entity name='b.gamma' codepoint='1D6C4'/>
+ <entity name='b.Gammad' codepoint='1D7CA'/>
+ <entity name='b.gammad' codepoint='1D7CB'/>
+ <entity name='b.iota' codepoint='1D6CA'/>
+ <entity name='b.kappa' codepoint='1D6CB'/>
+ <entity name='b.kappav' codepoint='1D6DE'/>
+ <entity name='b.Lambda' codepoint='1D6B2'/>
+ <entity name='b.lambda' codepoint='1D6CC'/>
+ <entity name='b.mu' codepoint='1D6CD'/>
+ <entity name='b.nu' codepoint='1D6CE'/>
+ <entity name='b.Omega' codepoint='1D6C0'/>
+ <entity name='b.omega' codepoint='1D6DA'/>
+ <entity name='b.Phi' codepoint='1D6BD'/>
+ <entity name='b.phi' codepoint='1D6D7'/>
+ <entity name='b.phiv' codepoint='1D6DF'/>
+ <entity name='b.Pi' codepoint='1D6B7'/>
+ <entity name='b.pi' codepoint='1D6D1'/>
+ <entity name='b.piv' codepoint='1D6E1'/>
+ <entity name='b.Psi' codepoint='1D6BF'/>
+ <entity name='b.psi' codepoint='1D6D9'/>
+ <entity name='b.rho' codepoint='1D6D2'/>
+ <entity name='b.rhov' codepoint='1D6E0'/>
+ <entity name='b.Sigma' codepoint='1D6BA'/>
+ <entity name='b.sigma' codepoint='1D6D4'/>
+ <entity name='b.sigmav' codepoint='1D6D3'/>
+ <entity name='b.tau' codepoint='1D6D5'/>
+ <entity name='b.Theta' codepoint='1D6AF'/>
+ <entity name='b.thetas' codepoint='1D6C9'/>
+ <entity name='b.thetav' codepoint='1D6DD'/>
+ <entity name='b.Upsi' codepoint='1D6BC'/>
+ <entity name='b.upsi' codepoint='1D6D6'/>
+ <entity name='b.Xi' codepoint='1D6B5'/>
+ <entity name='b.xi' codepoint='1D6CF'/>
+ <entity name='b.zeta' codepoint='1D6C7'/>
+ <entity name='backcong' codepoint='224C'/>
+ <entity name='backepsilon' codepoint='03F6'/>
+ <entity name='backprime' codepoint='2035'/>
+ <entity name='backsim' codepoint='223D'/>
+ <entity name='backsimeq' codepoint='22CD'/>
+ <entity name='Backslash' codepoint='2216'/>
+ <entity name='Barv' codepoint='2AE7'/>
+ <entity name='barvee' codepoint='22BD'/>
+ <entity name='barwed' codepoint='2305'/>
+ <entity name='Barwed' codepoint='2306'/>
+ <entity name='barwedge' codepoint='2305'/>
+ <entity name='bbrk' codepoint='23B5'/>
+ <entity name='bbrktbrk' codepoint='23B6'/>
+ <entity name='bcong' codepoint='224C'/>
+ <entity name='Bcy' codepoint='0411'/>
+ <entity name='bcy' codepoint='0431'/>
+ <entity name='bdquo' codepoint='201E'/>
+ <entity name='becaus' codepoint='2235'/>
+ <entity name='because' codepoint='2235'/>
+ <entity name='bemptyv' codepoint='29B0'/>
+ <entity name='bepsi' codepoint='03F6'/>
+ <entity name='bernou' codepoint='212C'/>
+ <entity name='Bernoullis' codepoint='212C'/>
+ <entity name='Beta' codepoint='0392'/>
+ <entity name='beta' codepoint='03B2'/>
+ <entity name='beth' codepoint='2136'/>
+ <entity name='between' codepoint='226C'/>
+ <entity name='Bfr' codepoint='1D505'/>
+ <entity name='bfr' codepoint='1D51F'/>
+ <entity name='Bgr' codepoint='0392'/>
+ <entity name='bgr' codepoint='03B2'/>
+ <entity name='bigcap' codepoint='22C2'/>
+ <entity name='bigcirc' codepoint='25EF'/>
+ <entity name='bigcup' codepoint='22C3'/>
+ <entity name='bigodot' codepoint='2A00'/>
+ <entity name='bigoplus' codepoint='2A01'/>
+ <entity name='bigotimes' codepoint='2A02'/>
+ <entity name='bigsqcup' codepoint='2A06'/>
+ <entity name='bigstar' codepoint='2605'/>
+ <entity name='bigtriangledown' codepoint='25BD'/>
+ <entity name='bigtriangleup' codepoint='25B3'/>
+ <entity name='biguplus' codepoint='2A04'/>
+ <entity name='bigvee' codepoint='22C1'/>
+ <entity name='bigwedge' codepoint='22C0'/>
+ <entity name='bkarow' codepoint='290D'/>
+ <entity name='blacklozenge' codepoint='29EB'/>
+ <entity name='blacksquare' codepoint='25AA'/>
+ <entity name='blacktriangle' codepoint='25B4'/>
+ <entity name='blacktriangledown' codepoint='25BE'/>
+ <entity name='blacktriangleleft' codepoint='25C2'/>
+ <entity name='blacktriangleright' codepoint='25B8'/>
+ <entity name='blank' codepoint='2423'/>
+ <entity name='blk12' codepoint='2592'/>
+ <entity name='blk14' codepoint='2591'/>
+ <entity name='blk34' codepoint='2593'/>
+ <entity name='block' codepoint='2588'/>
+ <entity name='bnot' codepoint='2310'/>
+ <entity name='bNot' codepoint='2AED'/>
+ <entity name='Bopf' codepoint='1D539'/>
+ <entity name='bopf' codepoint='1D553'/>
+ <entity name='bot' codepoint='22A5'/>
+ <entity name='bottom' codepoint='22A5'/>
+ <entity name='bowtie' codepoint='22C8'/>
+ <entity name='boxbox' codepoint='29C9'/>
+ <entity name='boxdl' codepoint='2510'/>
+ <entity name='boxdL' codepoint='2555'/>
+ <entity name='boxDl' codepoint='2556'/>
+ <entity name='boxDL' codepoint='2557'/>
+ <entity name='boxdr' codepoint='250C'/>
+ <entity name='boxdR' codepoint='2552'/>
+ <entity name='boxDr' codepoint='2553'/>
+ <entity name='boxDR' codepoint='2554'/>
+ <entity name='boxh' codepoint='2500'/>
+ <entity name='boxH' codepoint='2550'/>
+ <entity name='boxhd' codepoint='252C'/>
+ <entity name='boxHd' codepoint='2564'/>
+ <entity name='boxhD' codepoint='2565'/>
+ <entity name='boxHD' codepoint='2566'/>
+ <entity name='boxhu' codepoint='2534'/>
+ <entity name='boxHu' codepoint='2567'/>
+ <entity name='boxhU' codepoint='2568'/>
+ <entity name='boxHU' codepoint='2569'/>
+ <entity name='boxminus' codepoint='229F'/>
+ <entity name='boxplus' codepoint='229E'/>
+ <entity name='boxtimes' codepoint='22A0'/>
+ <entity name='boxul' codepoint='2518'/>
+ <entity name='boxuL' codepoint='255B'/>
+ <entity name='boxUl' codepoint='255C'/>
+ <entity name='boxUL' codepoint='255D'/>
+ <entity name='boxur' codepoint='2514'/>
+ <entity name='boxuR' codepoint='2558'/>
+ <entity name='boxUr' codepoint='2559'/>
+ <entity name='boxUR' codepoint='255A'/>
+ <entity name='boxv' codepoint='2502'/>
+ <entity name='boxV' codepoint='2551'/>
+ <entity name='boxvh' codepoint='253C'/>
+ <entity name='boxvH' codepoint='256A'/>
+ <entity name='boxVh' codepoint='256B'/>
+ <entity name='boxVH' codepoint='256C'/>
+ <entity name='boxvl' codepoint='2524'/>
+ <entity name='boxvL' codepoint='2561'/>
+ <entity name='boxVl' codepoint='2562'/>
+ <entity name='boxVL' codepoint='2563'/>
+ <entity name='boxvr' codepoint='251C'/>
+ <entity name='boxvR' codepoint='255E'/>
+ <entity name='boxVr' codepoint='255F'/>
+ <entity name='boxVR' codepoint='2560'/>
+ <entity name='bprime' codepoint='2035'/>
+ <entity name='breve' codepoint='02D8'/>
+ <entity name='brvbar' codepoint='00A6'/>
+ <entity name='Bscr' codepoint='212C'/>
+ <entity name='bscr' codepoint='1D4B7'/>
+ <entity name='bsemi' codepoint='204F'/>
+ <entity name='bsim' codepoint='223D'/>
+ <entity name='bsime' codepoint='22CD'/>
+ <entity name='bsol' codepoint='005C'/>
+ <entity name='bsolb' codepoint='29C5'/>
+ <entity name='bull' codepoint='2022'/>
+ <entity name='bullet' codepoint='2022'/>
+ <entity name='bump' codepoint='224E'/>
+ <entity name='bumpe' codepoint='224F'/>
+ <entity name='bumpE' codepoint='2AAE'/>
+ <entity name='Bumpeq' codepoint='224E'/>
+ <entity name='bumpeq' codepoint='224F'/>
+ <entity name='Cacute' codepoint='0106'/>
+ <entity name='cacute' codepoint='0107'/>
+ <entity name='cap' codepoint='2229'/>
+ <entity name='Cap' codepoint='22D2'/>
+ <entity name='capand' codepoint='2A44'/>
+ <entity name='capbrcup' codepoint='2A49'/>
+ <entity name='capcap' codepoint='2A4B'/>
+ <entity name='capcup' codepoint='2A47'/>
+ <entity name='capdot' codepoint='2A40'/>
+ <entity name='CapitalDifferentialD' codepoint='2145'/>
+ <entity name='caret' codepoint='2041'/>
+ <entity name='caron' codepoint='02C7'/>
+ <entity name='Cayleys' codepoint='212D'/>
+ <entity name='ccaps' codepoint='2A4D'/>
+ <entity name='Ccaron' codepoint='010C'/>
+ <entity name='ccaron' codepoint='010D'/>
+ <entity name='Ccedil' codepoint='00C7'/>
+ <entity name='ccedil' codepoint='00E7'/>
+ <entity name='Ccirc' codepoint='0108'/>
+ <entity name='ccirc' codepoint='0109'/>
+ <entity name='Cconint' codepoint='2230'/>
+ <entity name='ccups' codepoint='2A4C'/>
+ <entity name='ccupssm' codepoint='2A50'/>
+ <entity name='Cdot' codepoint='010A'/>
+ <entity name='cdot' codepoint='010B'/>
+ <entity name='cedil' codepoint='00B8'/>
+ <entity name='Cedilla' codepoint='00B8'/>
+ <entity name='cemptyv' codepoint='29B2'/>
+ <entity name='cent' codepoint='00A2'/>
+ <entity name='centerdot' codepoint='00B7'/>
+ <entity name='Cfr' codepoint='212D'/>
+ <entity name='cfr' codepoint='1D520'/>
+ <entity name='CHcy' codepoint='0427'/>
+ <entity name='chcy' codepoint='0447'/>
+ <entity name='check' codepoint='2713'/>
+ <entity name='checkmark' codepoint='2713'/>
+ <entity name='Chi' codepoint='03A7'/>
+ <entity name='chi' codepoint='03C7'/>
+ <entity name='cir' codepoint='25CB'/>
+ <entity name='circ' codepoint='02C6'/>
+ <entity name='circeq' codepoint='2257'/>
+ <entity name='circlearrowleft' codepoint='21BA'/>
+ <entity name='circlearrowright' codepoint='21BB'/>
+ <entity name='circledast' codepoint='229B'/>
+ <entity name='circledcirc' codepoint='229A'/>
+ <entity name='circleddash' codepoint='229D'/>
+ <entity name='CircleDot' codepoint='2299'/>
+ <entity name='circledR' codepoint='00AE'/>
+ <entity name='circledS' codepoint='24C8'/>
+ <entity name='CircleMinus' codepoint='2296'/>
+ <entity name='CirclePlus' codepoint='2295'/>
+ <entity name='CircleTimes' codepoint='2297'/>
+ <entity name='cire' codepoint='2257'/>
+ <entity name='cirE' codepoint='29C3'/>
+ <entity name='cirfnint' codepoint='2A10'/>
+ <entity name='cirmid' codepoint='2AEF'/>
+ <entity name='cirscir' codepoint='29C2'/>
+ <entity name='ClockwiseContourIntegral' codepoint='2232'/>
+ <entity name='CloseCurlyDoubleQuote' codepoint='201D'/>
+ <entity name='CloseCurlyQuote' codepoint='2019'/>
+ <entity name='clubs' codepoint='2663'/>
+ <entity name='clubsuit' codepoint='2663'/>
+ <entity name='colon' codepoint='003A'/>
+ <entity name='Colon' codepoint='2237'/>
+ <entity name='colone' codepoint='2254'/>
+ <entity name='Colone' codepoint='2A74'/>
+ <entity name='coloneq' codepoint='2254'/>
+ <entity name='comma' codepoint='002C'/>
+ <entity name='commat' codepoint='0040'/>
+ <entity name='comp' codepoint='2201'/>
+ <entity name='compfn' codepoint='2218'/>
+ <entity name='complement' codepoint='2201'/>
+ <entity name='complexes' codepoint='2102'/>
+ <entity name='cong' codepoint='2245'/>
+ <entity name='congdot' codepoint='2A6D'/>
+ <entity name='Congruent' codepoint='2261'/>
+ <entity name='conint' codepoint='222E'/>
+ <entity name='Conint' codepoint='222F'/>
+ <entity name='ContourIntegral' codepoint='222E'/>
+ <entity name='Copf' codepoint='2102'/>
+ <entity name='copf' codepoint='1D554'/>
+ <entity name='coprod' codepoint='2210'/>
+ <entity name='Coproduct' codepoint='2210'/>
+ <entity name='copy' codepoint='00A9'/>
+ <entity name='copysr' codepoint='2117'/>
+ <entity name='CounterClockwiseContourIntegral' codepoint='2233'/>
+ <entity name='crarr' codepoint='21B5'/>
+ <entity name='cross' codepoint='2717'/>
+ <entity name='Cross' codepoint='2A2F'/>
+ <entity name='Cscr' codepoint='1D49E'/>
+ <entity name='cscr' codepoint='1D4B8'/>
+ <entity name='csub' codepoint='2ACF'/>
+ <entity name='csube' codepoint='2AD1'/>
+ <entity name='csup' codepoint='2AD0'/>
+ <entity name='csupe' codepoint='2AD2'/>
+ <entity name='ctdot' codepoint='22EF'/>
+ <entity name='cudarrl' codepoint='2938'/>
+ <entity name='cudarrr' codepoint='2935'/>
+ <entity name='cuepr' codepoint='22DE'/>
+ <entity name='cuesc' codepoint='22DF'/>
+ <entity name='cularr' codepoint='21B6'/>
+ <entity name='cularrp' codepoint='293D'/>
+ <entity name='cup' codepoint='222A'/>
+ <entity name='Cup' codepoint='22D3'/>
+ <entity name='cupbrcap' codepoint='2A48'/>
+ <entity name='CupCap' codepoint='224D'/>
+ <entity name='cupcap' codepoint='2A46'/>
+ <entity name='cupcup' codepoint='2A4A'/>
+ <entity name='cupdot' codepoint='228D'/>
+ <entity name='cupor' codepoint='2A45'/>
+ <entity name='curarr' codepoint='21B7'/>
+ <entity name='curarrm' codepoint='293C'/>
+ <entity name='curlyeqprec' codepoint='22DE'/>
+ <entity name='curlyeqsucc' codepoint='22DF'/>
+ <entity name='curlyvee' codepoint='22CE'/>
+ <entity name='curlywedge' codepoint='22CF'/>
+ <entity name='curren' codepoint='00A4'/>
+ <entity name='curvearrowleft' codepoint='21B6'/>
+ <entity name='curvearrowright' codepoint='21B7'/>
+ <entity name='cuvee' codepoint='22CE'/>
+ <entity name='cuwed' codepoint='22CF'/>
+ <entity name='cwconint' codepoint='2232'/>
+ <entity name='cwint' codepoint='2231'/>
+ <entity name='cylcty' codepoint='232D'/>
+ <entity name='dagger' codepoint='2020'/>
+ <entity name='Dagger' codepoint='2021'/>
+ <entity name='daleth' codepoint='2138'/>
+ <entity name='darr' codepoint='2193'/>
+ <entity name='Darr' codepoint='21A1'/>
+ <entity name='dArr' codepoint='21D3'/>
+ <entity name='dash' codepoint='2010'/>
+ <entity name='dashv' codepoint='22A3'/>
+ <entity name='Dashv' codepoint='2AE4'/>
+ <entity name='dbkarow' codepoint='290F'/>
+ <entity name='dblac' codepoint='02DD'/>
+ <entity name='Dcaron' codepoint='010E'/>
+ <entity name='dcaron' codepoint='010F'/>
+ <entity name='Dcy' codepoint='0414'/>
+ <entity name='dcy' codepoint='0434'/>
+ <entity name='DD' codepoint='2145'/>
+ <entity name='dd' codepoint='2146'/>
+ <entity name='ddagger' codepoint='2021'/>
+ <entity name='ddarr' codepoint='21CA'/>
+ <entity name='DDotrahd' codepoint='2911'/>
+ <entity name='ddotseq' codepoint='2A77'/>
+ <entity name='deg' codepoint='00B0'/>
+ <entity name='Del' codepoint='2207'/>
+ <entity name='Delta' codepoint='0394'/>
+ <entity name='delta' codepoint='03B4'/>
+ <entity name='demptyv' codepoint='29B1'/>
+ <entity name='dfisht' codepoint='297F'/>
+ <entity name='Dfr' codepoint='1D507'/>
+ <entity name='dfr' codepoint='1D521'/>
+ <entity name='Dgr' codepoint='0394'/>
+ <entity name='dgr' codepoint='03B4'/>
+ <entity name='dHar' codepoint='2965'/>
+ <entity name='dharl' codepoint='21C3'/>
+ <entity name='dharr' codepoint='21C2'/>
+ <entity name='DiacriticalAcute' codepoint='00B4'/>
+ <entity name='DiacriticalDot' codepoint='02D9'/>
+ <entity name='DiacriticalDoubleAcute' codepoint='02DD'/>
+ <entity name='DiacriticalGrave' codepoint='0060'/>
+ <entity name='DiacriticalTilde' codepoint='02DC'/>
+ <entity name='diam' codepoint='22C4'/>
+ <entity name='diamond' codepoint='22C4'/>
+ <entity name='diamondsuit' codepoint='2666'/>
+ <entity name='diams' codepoint='2666'/>
+ <entity name='die' codepoint='00A8'/>
+ <entity name='DifferentialD' codepoint='2146'/>
+ <entity name='digamma' codepoint='03DD'/>
+ <entity name='disin' codepoint='22F2'/>
+ <entity name='div' codepoint='00F7'/>
+ <entity name='divide' codepoint='00F7'/>
+ <entity name='divideontimes' codepoint='22C7'/>
+ <entity name='divonx' codepoint='22C7'/>
+ <entity name='DJcy' codepoint='0402'/>
+ <entity name='djcy' codepoint='0452'/>
+ <entity name='dlcorn' codepoint='231E'/>
+ <entity name='dlcrop' codepoint='230D'/>
+ <entity name='dollar' codepoint='0024'/>
+ <entity name='Dopf' codepoint='1D53B'/>
+ <entity name='dopf' codepoint='1D555'/>
+ <entity name='Dot' codepoint='00A8'/>
+ <entity name='dot' codepoint='02D9'/>
+ <entity name='doteq' codepoint='2250'/>
+ <entity name='doteqdot' codepoint='2251'/>
+ <entity name='DotEqual' codepoint='2250'/>
+ <entity name='dotminus' codepoint='2238'/>
+ <entity name='dotplus' codepoint='2214'/>
+ <entity name='dotsquare' codepoint='22A1'/>
+ <entity name='doublebarwedge' codepoint='2306'/>
+ <entity name='DoubleContourIntegral' codepoint='222F'/>
+ <entity name='DoubleDot' codepoint='00A8'/>
+ <entity name='DoubleDownArrow' codepoint='21D3'/>
+ <entity name='DoubleLeftArrow' codepoint='21D0'/>
+ <entity name='DoubleLeftRightArrow' codepoint='21D4'/>
+ <entity name='DoubleLeftTee' codepoint='2AE4'/>
+ <entity name='DoubleLongLeftArrow' codepoint='27F8'/>
+ <entity name='DoubleLongLeftRightArrow' codepoint='27FA'/>
+ <entity name='DoubleLongRightArrow' codepoint='27F9'/>
+ <entity name='DoubleRightArrow' codepoint='21D2'/>
+ <entity name='DoubleRightTee' codepoint='22A8'/>
+ <entity name='DoubleUpArrow' codepoint='21D1'/>
+ <entity name='DoubleUpDownArrow' codepoint='21D5'/>
+ <entity name='DoubleVerticalBar' codepoint='2225'/>
+ <entity name='downarrow' codepoint='2193'/>
+ <entity name='Downarrow' codepoint='21D3'/>
+ <entity name='DownArrowBar' codepoint='2913'/>
+ <entity name='DownArrowUpArrow' codepoint='21F5'/>
+ <entity name='downdownarrows' codepoint='21CA'/>
+ <entity name='downharpoonleft' codepoint='21C3'/>
+ <entity name='downharpoonright' codepoint='21C2'/>
+ <entity name='DownLeftRightVector' codepoint='2950'/>
+ <entity name='DownLeftTeeVector' codepoint='295E'/>
+ <entity name='DownLeftVector' codepoint='21BD'/>
+ <entity name='DownLeftVectorBar' codepoint='2956'/>
+ <entity name='DownRightTeeVector' codepoint='295F'/>
+ <entity name='DownRightVector' codepoint='21C1'/>
+ <entity name='DownRightVectorBar' codepoint='2957'/>
+ <entity name='DownTee' codepoint='22A4'/>
+ <entity name='DownTeeArrow' codepoint='21A7'/>
+ <entity name='drbkarow' codepoint='2910'/>
+ <entity name='drcorn' codepoint='231F'/>
+ <entity name='drcrop' codepoint='230C'/>
+ <entity name='Dscr' codepoint='1D49F'/>
+ <entity name='dscr' codepoint='1D4B9'/>
+ <entity name='DScy' codepoint='0405'/>
+ <entity name='dscy' codepoint='0455'/>
+ <entity name='dsol' codepoint='29F6'/>
+ <entity name='Dstrok' codepoint='0110'/>
+ <entity name='dstrok' codepoint='0111'/>
+ <entity name='dtdot' codepoint='22F1'/>
+ <entity name='dtri' codepoint='25BF'/>
+ <entity name='dtrif' codepoint='25BE'/>
+ <entity name='duarr' codepoint='21F5'/>
+ <entity name='duhar' codepoint='296F'/>
+ <entity name='dwangle' codepoint='29A6'/>
+ <entity name='DZcy' codepoint='040F'/>
+ <entity name='dzcy' codepoint='045F'/>
+ <entity name='dzigrarr' codepoint='27FF'/>
+ <entity name='Eacgr' codepoint='0388'/>
+ <entity name='eacgr' codepoint='03AD'/>
+ <entity name='Eacute' codepoint='00C9'/>
+ <entity name='eacute' codepoint='00E9'/>
+ <entity name='easter' codepoint='2A6E'/>
+ <entity name='Ecaron' codepoint='011A'/>
+ <entity name='ecaron' codepoint='011B'/>
+ <entity name='ecir' codepoint='2256'/>
+ <entity name='Ecirc' codepoint='00CA'/>
+ <entity name='ecirc' codepoint='00EA'/>
+ <entity name='ecolon' codepoint='2255'/>
+ <entity name='Ecy' codepoint='042D'/>
+ <entity name='ecy' codepoint='044D'/>
+ <entity name='eDDot' codepoint='2A77'/>
+ <entity name='Edot' codepoint='0116'/>
+ <entity name='edot' codepoint='0117'/>
+ <entity name='eDot' codepoint='2251'/>
+ <entity name='ee' codepoint='2147'/>
+ <entity name='EEacgr' codepoint='0389'/>
+ <entity name='eeacgr' codepoint='03AE'/>
+ <entity name='EEgr' codepoint='0397'/>
+ <entity name='eegr' codepoint='03B7'/>
+ <entity name='efDot' codepoint='2252'/>
+ <entity name='Efr' codepoint='1D508'/>
+ <entity name='efr' codepoint='1D522'/>
+ <entity name='eg' codepoint='2A9A'/>
+ <entity name='Egr' codepoint='0395'/>
+ <entity name='egr' codepoint='03B5'/>
+ <entity name='Egrave' codepoint='00C8'/>
+ <entity name='egrave' codepoint='00E8'/>
+ <entity name='egs' codepoint='2A96'/>
+ <entity name='egsdot' codepoint='2A98'/>
+ <entity name='el' codepoint='2A99'/>
+ <entity name='Element' codepoint='2208'/>
+ <entity name='elinters' codepoint='23E7'/>
+ <entity name='ell' codepoint='2113'/>
+ <entity name='els' codepoint='2A95'/>
+ <entity name='elsdot' codepoint='2A97'/>
+ <entity name='Emacr' codepoint='0112'/>
+ <entity name='emacr' codepoint='0113'/>
+ <entity name='empty' codepoint='2205'/>
+ <entity name='emptyset' codepoint='2205'/>
+ <entity name='EmptySmallSquare' codepoint='25FB'/>
+ <entity name='emptyv' codepoint='2205'/>
+ <entity name='EmptyVerySmallSquare' codepoint='25AB'/>
+ <entity name='emsp' codepoint='2003'/>
+ <entity name='emsp13' codepoint='2004'/>
+ <entity name='emsp14' codepoint='2005'/>
+ <entity name='ENG' codepoint='014A'/>
+ <entity name='eng' codepoint='014B'/>
+ <entity name='ensp' codepoint='2002'/>
+ <entity name='Eogon' codepoint='0118'/>
+ <entity name='eogon' codepoint='0119'/>
+ <entity name='Eopf' codepoint='1D53C'/>
+ <entity name='eopf' codepoint='1D556'/>
+ <entity name='epar' codepoint='22D5'/>
+ <entity name='eparsl' codepoint='29E3'/>
+ <entity name='eplus' codepoint='2A71'/>
+ <entity name='epsi' codepoint='03F5'/>
+ <entity name='Epsilon' codepoint='0395'/>
+ <entity name='epsilon' codepoint='03B5'/>
+ <entity name='epsiv' codepoint='03B5'/>
+ <entity name='eqcirc' codepoint='2256'/>
+ <entity name='eqcolon' codepoint='2255'/>
+ <entity name='eqsim' codepoint='2242'/>
+ <entity name='eqslantgtr' codepoint='2A96'/>
+ <entity name='eqslantless' codepoint='2A95'/>
+ <entity name='Equal' codepoint='2A75'/>
+ <entity name='equals' codepoint='003D'/>
+ <entity name='EqualTilde' codepoint='2242'/>
+ <entity name='equest' codepoint='225F'/>
+ <entity name='Equilibrium' codepoint='21CC'/>
+ <entity name='equiv' codepoint='2261'/>
+ <entity name='equivDD' codepoint='2A78'/>
+ <entity name='eqvparsl' codepoint='29E5'/>
+ <entity name='erarr' codepoint='2971'/>
+ <entity name='erDot' codepoint='2253'/>
+ <entity name='escr' codepoint='212F'/>
+ <entity name='Escr' codepoint='2130'/>
+ <entity name='esdot' codepoint='2250'/>
+ <entity name='esim' codepoint='2242'/>
+ <entity name='Esim' codepoint='2A73'/>
+ <entity name='Eta' codepoint='0397'/>
+ <entity name='eta' codepoint='03B7'/>
+ <entity name='ETH' codepoint='00D0'/>
+ <entity name='eth' codepoint='00F0'/>
+ <entity name='Euml' codepoint='00CB'/>
+ <entity name='euml' codepoint='00EB'/>
+ <entity name='euro' codepoint='20AC'/>
+ <entity name='excl' codepoint='0021'/>
+ <entity name='exist' codepoint='2203'/>
+ <entity name='Exists' codepoint='2203'/>
+ <entity name='expectation' codepoint='2130'/>
+ <entity name='exponentiale' codepoint='2147'/>
+ <entity name='fallingdotseq' codepoint='2252'/>
+ <entity name='Fcy' codepoint='0424'/>
+ <entity name='fcy' codepoint='0444'/>
+ <entity name='female' codepoint='2640'/>
+ <entity name='ffilig' codepoint='FB03'/>
+ <entity name='fflig' codepoint='FB00'/>
+ <entity name='ffllig' codepoint='FB04'/>
+ <entity name='Ffr' codepoint='1D509'/>
+ <entity name='ffr' codepoint='1D523'/>
+ <entity name='filig' codepoint='FB01'/>
+ <entity name='FilledSmallSquare' codepoint='25FC'/>
+ <entity name='FilledVerySmallSquare' codepoint='25AA'/>
+ <entity name='flat' codepoint='266D'/>
+ <entity name='fllig' codepoint='FB02'/>
+ <entity name='fltns' codepoint='25B1'/>
+ <entity name='fnof' codepoint='0192'/>
+ <entity name='Fopf' codepoint='1D53D'/>
+ <entity name='fopf' codepoint='1D557'/>
+ <entity name='forall' codepoint='2200'/>
+ <entity name='fork' codepoint='22D4'/>
+ <entity name='forkv' codepoint='2AD9'/>
+ <entity name='Fouriertrf' codepoint='2131'/>
+ <entity name='fpartint' codepoint='2A0D'/>
+ <entity name='frac12' codepoint='00BD'/>
+ <entity name='frac13' codepoint='2153'/>
+ <entity name='frac14' codepoint='00BC'/>
+ <entity name='frac15' codepoint='2155'/>
+ <entity name='frac16' codepoint='2159'/>
+ <entity name='frac18' codepoint='215B'/>
+ <entity name='frac23' codepoint='2154'/>
+ <entity name='frac25' codepoint='2156'/>
+ <entity name='frac34' codepoint='00BE'/>
+ <entity name='frac35' codepoint='2157'/>
+ <entity name='frac38' codepoint='215C'/>
+ <entity name='frac45' codepoint='2158'/>
+ <entity name='frac56' codepoint='215A'/>
+ <entity name='frac58' codepoint='215D'/>
+ <entity name='frac78' codepoint='215E'/>
+ <entity name='frasl' codepoint='2044'/>
+ <entity name='frown' codepoint='2322'/>
+ <entity name='Fscr' codepoint='2131'/>
+ <entity name='fscr' codepoint='1D4BB'/>
+ <entity name='gacute' codepoint='01F5'/>
+ <entity name='Gamma' codepoint='0393'/>
+ <entity name='gamma' codepoint='03B3'/>
+ <entity name='Gammad' codepoint='03DC'/>
+ <entity name='gammad' codepoint='03DD'/>
+ <entity name='gap' codepoint='2A86'/>
+ <entity name='Gbreve' codepoint='011E'/>
+ <entity name='gbreve' codepoint='011F'/>
+ <entity name='Gcedil' codepoint='0122'/>
+ <entity name='Gcirc' codepoint='011C'/>
+ <entity name='gcirc' codepoint='011D'/>
+ <entity name='Gcy' codepoint='0413'/>
+ <entity name='gcy' codepoint='0433'/>
+ <entity name='Gdot' codepoint='0120'/>
+ <entity name='gdot' codepoint='0121'/>
+ <entity name='ge' codepoint='2265'/>
+ <entity name='gE' codepoint='2267'/>
+ <entity name='gel' codepoint='22DB'/>
+ <entity name='gEl' codepoint='2A8C'/>
+ <entity name='geq' codepoint='2265'/>
+ <entity name='geqq' codepoint='2267'/>
+ <entity name='geqslant' codepoint='2A7E'/>
+ <entity name='ges' codepoint='2A7E'/>
+ <entity name='gescc' codepoint='2AA9'/>
+ <entity name='gesdot' codepoint='2A80'/>
+ <entity name='gesdoto' codepoint='2A82'/>
+ <entity name='gesdotol' codepoint='2A84'/>
+ <entity name='gesles' codepoint='2A94'/>
+ <entity name='Gfr' codepoint='1D50A'/>
+ <entity name='gfr' codepoint='1D524'/>
+ <entity name='gg' codepoint='226B'/>
+ <entity name='Gg' codepoint='22D9'/>
+ <entity name='ggg' codepoint='22D9'/>
+ <entity name='Ggr' codepoint='0393'/>
+ <entity name='ggr' codepoint='03B3'/>
+ <entity name='gimel' codepoint='2137'/>
+ <entity name='GJcy' codepoint='0403'/>
+ <entity name='gjcy' codepoint='0453'/>
+ <entity name='gl' codepoint='2277'/>
+ <entity name='gla' codepoint='2AA5'/>
+ <entity name='glE' codepoint='2A92'/>
+ <entity name='glj' codepoint='2AA4'/>
+ <entity name='gnap' codepoint='2A8A'/>
+ <entity name='gnapprox' codepoint='2A8A'/>
+ <entity name='gnE' codepoint='2269'/>
+ <entity name='gne' codepoint='2A88'/>
+ <entity name='gneq' codepoint='2A88'/>
+ <entity name='gneqq' codepoint='2269'/>
+ <entity name='gnsim' codepoint='22E7'/>
+ <entity name='Gopf' codepoint='1D53E'/>
+ <entity name='gopf' codepoint='1D558'/>
+ <entity name='grave' codepoint='0060'/>
+ <entity name='GreaterEqual' codepoint='2265'/>
+ <entity name='GreaterEqualLess' codepoint='22DB'/>
+ <entity name='GreaterFullEqual' codepoint='2267'/>
+ <entity name='GreaterGreater' codepoint='2AA2'/>
+ <entity name='GreaterLess' codepoint='2277'/>
+ <entity name='GreaterSlantEqual' codepoint='2A7E'/>
+ <entity name='GreaterTilde' codepoint='2273'/>
+ <entity name='gscr' codepoint='210A'/>
+ <entity name='Gscr' codepoint='1D4A2'/>
+ <entity name='gsim' codepoint='2273'/>
+ <entity name='gsime' codepoint='2A8E'/>
+ <entity name='gsiml' codepoint='2A90'/>
+ <entity name='gt' codepoint='003E'/>
+ <entity name='Gt' codepoint='226B'/>
+ <entity name='gtcc' codepoint='2AA7'/>
+ <entity name='gtcir' codepoint='2A7A'/>
+ <entity name='gtdot' codepoint='22D7'/>
+ <entity name='gtlPar' codepoint='2995'/>
+ <entity name='gtquest' codepoint='2A7C'/>
+ <entity name='gtrapprox' codepoint='2A86'/>
+ <entity name='gtrarr' codepoint='2978'/>
+ <entity name='gtrdot' codepoint='22D7'/>
+ <entity name='gtreqless' codepoint='22DB'/>
+ <entity name='gtreqqless' codepoint='2A8C'/>
+ <entity name='gtrless' codepoint='2277'/>
+ <entity name='gtrsim' codepoint='2273'/>
+ <entity name='Hacek' codepoint='02C7'/>
+ <entity name='hairsp' codepoint='200A'/>
+ <entity name='half' codepoint='00BD'/>
+ <entity name='hamilt' codepoint='210B'/>
+ <entity name='HARDcy' codepoint='042A'/>
+ <entity name='hardcy' codepoint='044A'/>
+ <entity name='harr' codepoint='2194'/>
+ <entity name='hArr' codepoint='21D4'/>
+ <entity name='harrcir' codepoint='2948'/>
+ <entity name='harrw' codepoint='21AD'/>
+ <entity name='Hat' codepoint='005E'/>
+ <entity name='hbar' codepoint='210F'/>
+ <entity name='Hcirc' codepoint='0124'/>
+ <entity name='hcirc' codepoint='0125'/>
+ <entity name='hearts' codepoint='2665'/>
+ <entity name='heartsuit' codepoint='2665'/>
+ <entity name='hellip' codepoint='2026'/>
+ <entity name='hercon' codepoint='22B9'/>
+ <entity name='Hfr' codepoint='210C'/>
+ <entity name='hfr' codepoint='1D525'/>
+ <entity name='HilbertSpace' codepoint='210B'/>
+ <entity name='hksearow' codepoint='2925'/>
+ <entity name='hkswarow' codepoint='2926'/>
+ <entity name='hoarr' codepoint='21FF'/>
+ <entity name='homtht' codepoint='223B'/>
+ <entity name='hookleftarrow' codepoint='21A9'/>
+ <entity name='hookrightarrow' codepoint='21AA'/>
+ <entity name='Hopf' codepoint='210D'/>
+ <entity name='hopf' codepoint='1D559'/>
+ <entity name='horbar' codepoint='2015'/>
+ <entity name='HorizontalLine' codepoint='2500'/>
+ <entity name='Hscr' codepoint='210B'/>
+ <entity name='hscr' codepoint='1D4BD'/>
+ <entity name='hslash' codepoint='210F'/>
+ <entity name='Hstrok' codepoint='0126'/>
+ <entity name='hstrok' codepoint='0127'/>
+ <entity name='HumpDownHump' codepoint='224E'/>
+ <entity name='HumpEqual' codepoint='224F'/>
+ <entity name='hybull' codepoint='2043'/>
+ <entity name='hyphen' codepoint='2010'/>
+ <entity name='Iacgr' codepoint='038A'/>
+ <entity name='iacgr' codepoint='03AF'/>
+ <entity name='Iacute' codepoint='00CD'/>
+ <entity name='iacute' codepoint='00ED'/>
+ <entity name='ic' codepoint='2063'/>
+ <entity name='Icirc' codepoint='00CE'/>
+ <entity name='icirc' codepoint='00EE'/>
+ <entity name='Icy' codepoint='0418'/>
+ <entity name='icy' codepoint='0438'/>
+ <entity name='idiagr' codepoint='0390'/>
+ <entity name='Idigr' codepoint='03AA'/>
+ <entity name='idigr' codepoint='03CA'/>
+ <entity name='Idot' codepoint='0130'/>
+ <entity name='IEcy' codepoint='0415'/>
+ <entity name='iecy' codepoint='0435'/>
+ <entity name='iexcl' codepoint='00A1'/>
+ <entity name='iff' codepoint='21D4'/>
+ <entity name='Ifr' codepoint='2111'/>
+ <entity name='ifr' codepoint='1D526'/>
+ <entity name='Igr' codepoint='0399'/>
+ <entity name='igr' codepoint='03B9'/>
+ <entity name='Igrave' codepoint='00CC'/>
+ <entity name='igrave' codepoint='00EC'/>
+ <entity name='ii' codepoint='2148'/>
+ <entity name='iiiint' codepoint='2A0C'/>
+ <entity name='iiint' codepoint='222D'/>
+ <entity name='iinfin' codepoint='29DC'/>
+ <entity name='iiota' codepoint='2129'/>
+ <entity name='IJlig' codepoint='0132'/>
+ <entity name='ijlig' codepoint='0133'/>
+ <entity name='Im' codepoint='2111'/>
+ <entity name='Imacr' codepoint='012A'/>
+ <entity name='imacr' codepoint='012B'/>
+ <entity name='image' codepoint='2111'/>
+ <entity name='ImaginaryI' codepoint='2148'/>
+ <entity name='imagline' codepoint='2110'/>
+ <entity name='imagpart' codepoint='2111'/>
+ <entity name='imath' codepoint='0131'/>
+ <entity name='imof' codepoint='22B7'/>
+ <entity name='imped' codepoint='01B5'/>
+ <entity name='Implies' codepoint='21D2'/>
+ <entity name='in' codepoint='2208'/>
+ <entity name='incare' codepoint='2105'/>
+ <entity name='infin' codepoint='221E'/>
+ <entity name='infintie' codepoint='29DD'/>
+ <entity name='inodot' codepoint='0131'/>
+ <entity name='int' codepoint='222B'/>
+ <entity name='Int' codepoint='222C'/>
+ <entity name='intcal' codepoint='22BA'/>
+ <entity name='integers' codepoint='2124'/>
+ <entity name='Integral' codepoint='222B'/>
+ <entity name='intercal' codepoint='22BA'/>
+ <entity name='Intersection' codepoint='22C2'/>
+ <entity name='intlarhk' codepoint='2A17'/>
+ <entity name='intprod' codepoint='2A3C'/>
+ <entity name='InvisibleComma' codepoint='2063'/>
+ <entity name='InvisibleTimes' codepoint='2062'/>
+ <entity name='IOcy' codepoint='0401'/>
+ <entity name='iocy' codepoint='0451'/>
+ <entity name='Iogon' codepoint='012E'/>
+ <entity name='iogon' codepoint='012F'/>
+ <entity name='Iopf' codepoint='1D540'/>
+ <entity name='iopf' codepoint='1D55A'/>
+ <entity name='Iota' codepoint='0399'/>
+ <entity name='iota' codepoint='03B9'/>
+ <entity name='iprod' codepoint='2A3C'/>
+ <entity name='iquest' codepoint='00BF'/>
+ <entity name='Iscr' codepoint='2110'/>
+ <entity name='iscr' codepoint='1D4BE'/>
+ <entity name='isin' codepoint='2208'/>
+ <entity name='isindot' codepoint='22F5'/>
+ <entity name='isinE' codepoint='22F9'/>
+ <entity name='isins' codepoint='22F4'/>
+ <entity name='isinsv' codepoint='22F3'/>
+ <entity name='isinv' codepoint='2208'/>
+ <entity name='it' codepoint='2062'/>
+ <entity name='Itilde' codepoint='0128'/>
+ <entity name='itilde' codepoint='0129'/>
+ <entity name='Iukcy' codepoint='0406'/>
+ <entity name='iukcy' codepoint='0456'/>
+ <entity name='Iuml' codepoint='00CF'/>
+ <entity name='iuml' codepoint='00EF'/>
+ <entity name='Jcirc' codepoint='0134'/>
+ <entity name='jcirc' codepoint='0135'/>
+ <entity name='Jcy' codepoint='0419'/>
+ <entity name='jcy' codepoint='0439'/>
+ <entity name='Jfr' codepoint='1D50D'/>
+ <entity name='jfr' codepoint='1D527'/>
+ <entity name='jmath' codepoint='0237'/>
+ <entity name='Jopf' codepoint='1D541'/>
+ <entity name='jopf' codepoint='1D55B'/>
+ <entity name='Jscr' codepoint='1D4A5'/>
+ <entity name='jscr' codepoint='1D4BF'/>
+ <entity name='Jsercy' codepoint='0408'/>
+ <entity name='jsercy' codepoint='0458'/>
+ <entity name='Jukcy' codepoint='0404'/>
+ <entity name='jukcy' codepoint='0454'/>
+ <entity name='Kappa' codepoint='039A'/>
+ <entity name='kappa' codepoint='03BA'/>
+ <entity name='kappav' codepoint='03F0'/>
+ <entity name='Kcedil' codepoint='0136'/>
+ <entity name='kcedil' codepoint='0137'/>
+ <entity name='Kcy' codepoint='041A'/>
+ <entity name='kcy' codepoint='043A'/>
+ <entity name='Kfr' codepoint='1D50E'/>
+ <entity name='kfr' codepoint='1D528'/>
+ <entity name='Kgr' codepoint='039A'/>
+ <entity name='kgr' codepoint='03BA'/>
+ <entity name='kgreen' codepoint='0138'/>
+ <entity name='KHcy' codepoint='0425'/>
+ <entity name='khcy' codepoint='0445'/>
+ <entity name='KHgr' codepoint='03A7'/>
+ <entity name='khgr' codepoint='03C7'/>
+ <entity name='KJcy' codepoint='040C'/>
+ <entity name='kjcy' codepoint='045C'/>
+ <entity name='Kopf' codepoint='1D542'/>
+ <entity name='kopf' codepoint='1D55C'/>
+ <entity name='Kscr' codepoint='1D4A6'/>
+ <entity name='kscr' codepoint='1D4C0'/>
+ <entity name='lAarr' codepoint='21DA'/>
+ <entity name='Lacute' codepoint='0139'/>
+ <entity name='lacute' codepoint='013A'/>
+ <entity name='laemptyv' codepoint='29B4'/>
+ <entity name='lagran' codepoint='2112'/>
+ <entity name='Lambda' codepoint='039B'/>
+ <entity name='lambda' codepoint='03BB'/>
+ <entity name='lang' codepoint='2329'/>
+ <entity name='Lang' codepoint='27EA'/>
+ <entity name='langd' codepoint='2991'/>
+ <entity name='langle' codepoint='2329'/>
+ <entity name='lap' codepoint='2A85'/>
+ <entity name='Laplacetrf' codepoint='2112'/>
+ <entity name='laquo' codepoint='00AB'/>
+ <entity name='larr' codepoint='2190'/>
+ <entity name='Larr' codepoint='219E'/>
+ <entity name='lArr' codepoint='21D0'/>
+ <entity name='larrb' codepoint='21E4'/>
+ <entity name='larrbfs' codepoint='291F'/>
+ <entity name='larrfs' codepoint='291D'/>
+ <entity name='larrhk' codepoint='21A9'/>
+ <entity name='larrlp' codepoint='21AB'/>
+ <entity name='larrpl' codepoint='2939'/>
+ <entity name='larrsim' codepoint='2973'/>
+ <entity name='larrtl' codepoint='21A2'/>
+ <entity name='lat' codepoint='2AAB'/>
+ <entity name='latail' codepoint='2919'/>
+ <entity name='lAtail' codepoint='291B'/>
+ <entity name='late' codepoint='2AAD'/>
+ <entity name='lbarr' codepoint='290C'/>
+ <entity name='lBarr' codepoint='290E'/>
+ <entity name='lbbrk' codepoint='2997'/>
+ <entity name='lbrace' codepoint='007B'/>
+ <entity name='lbrack' codepoint='005B'/>
+ <entity name='lbrke' codepoint='298B'/>
+ <entity name='lbrksld' codepoint='298F'/>
+ <entity name='lbrkslu' codepoint='298D'/>
+ <entity name='Lcaron' codepoint='013D'/>
+ <entity name='lcaron' codepoint='013E'/>
+ <entity name='Lcedil' codepoint='013B'/>
+ <entity name='lcedil' codepoint='013C'/>
+ <entity name='lceil' codepoint='2308'/>
+ <entity name='lcub' codepoint='007B'/>
+ <entity name='Lcy' codepoint='041B'/>
+ <entity name='lcy' codepoint='043B'/>
+ <entity name='ldca' codepoint='2936'/>
+ <entity name='ldquo' codepoint='201C'/>
+ <entity name='ldquor' codepoint='201E'/>
+ <entity name='ldrdhar' codepoint='2967'/>
+ <entity name='ldrushar' codepoint='294B'/>
+ <entity name='ldsh' codepoint='21B2'/>
+ <entity name='le' codepoint='2264'/>
+ <entity name='lE' codepoint='2266'/>
+ <entity name='LeftAngleBracket' codepoint='2329'/>
+ <entity name='leftarrow' codepoint='2190'/>
+ <entity name='Leftarrow' codepoint='21D0'/>
+ <entity name='LeftArrowBar' codepoint='21E4'/>
+ <entity name='LeftArrowRightArrow' codepoint='21C6'/>
+ <entity name='leftarrowtail' codepoint='21A2'/>
+ <entity name='LeftCeiling' codepoint='2308'/>
+ <entity name='LeftDoubleBracket' codepoint='27E6'/>
+ <entity name='LeftDownTeeVector' codepoint='2961'/>
+ <entity name='LeftDownVector' codepoint='21C3'/>
+ <entity name='LeftDownVectorBar' codepoint='2959'/>
+ <entity name='LeftFloor' codepoint='230A'/>
+ <entity name='leftharpoondown' codepoint='21BD'/>
+ <entity name='leftharpoonup' codepoint='21BC'/>
+ <entity name='leftleftarrows' codepoint='21C7'/>
+ <entity name='leftrightarrow' codepoint='2194'/>
+ <entity name='Leftrightarrow' codepoint='21D4'/>
+ <entity name='leftrightarrows' codepoint='21C6'/>
+ <entity name='leftrightharpoons' codepoint='21CB'/>
+ <entity name='leftrightsquigarrow' codepoint='21AD'/>
+ <entity name='LeftRightVector' codepoint='294E'/>
+ <entity name='LeftTee' codepoint='22A3'/>
+ <entity name='LeftTeeArrow' codepoint='21A4'/>
+ <entity name='LeftTeeVector' codepoint='295A'/>
+ <entity name='leftthreetimes' codepoint='22CB'/>
+ <entity name='LeftTriangle' codepoint='22B2'/>
+ <entity name='LeftTriangleBar' codepoint='29CF'/>
+ <entity name='LeftTriangleEqual' codepoint='22B4'/>
+ <entity name='LeftUpDownVector' codepoint='2951'/>
+ <entity name='LeftUpTeeVector' codepoint='2960'/>
+ <entity name='LeftUpVector' codepoint='21BF'/>
+ <entity name='LeftUpVectorBar' codepoint='2958'/>
+ <entity name='LeftVector' codepoint='21BC'/>
+ <entity name='LeftVectorBar' codepoint='2952'/>
+ <entity name='leg' codepoint='22DA'/>
+ <entity name='lEg' codepoint='2A8B'/>
+ <entity name='leq' codepoint='2264'/>
+ <entity name='leqq' codepoint='2266'/>
+ <entity name='leqslant' codepoint='2A7D'/>
+ <entity name='les' codepoint='2A7D'/>
+ <entity name='lescc' codepoint='2AA8'/>
+ <entity name='lesdot' codepoint='2A7F'/>
+ <entity name='lesdoto' codepoint='2A81'/>
+ <entity name='lesdotor' codepoint='2A83'/>
+ <entity name='lesges' codepoint='2A93'/>
+ <entity name='lessapprox' codepoint='2A85'/>
+ <entity name='lessdot' codepoint='22D6'/>
+ <entity name='lesseqgtr' codepoint='22DA'/>
+ <entity name='lesseqqgtr' codepoint='2A8B'/>
+ <entity name='LessEqualGreater' codepoint='22DA'/>
+ <entity name='LessFullEqual' codepoint='2266'/>
+ <entity name='LessGreater' codepoint='2276'/>
+ <entity name='lessgtr' codepoint='2276'/>
+ <entity name='LessLess' codepoint='2AA1'/>
+ <entity name='lesssim' codepoint='2272'/>
+ <entity name='LessSlantEqual' codepoint='2A7D'/>
+ <entity name='LessTilde' codepoint='2272'/>
+ <entity name='lfisht' codepoint='297C'/>
+ <entity name='lfloor' codepoint='230A'/>
+ <entity name='Lfr' codepoint='1D50F'/>
+ <entity name='lfr' codepoint='1D529'/>
+ <entity name='lg' codepoint='2276'/>
+ <entity name='lgE' codepoint='2A91'/>
+ <entity name='Lgr' codepoint='039B'/>
+ <entity name='lgr' codepoint='03BB'/>
+ <entity name='lHar' codepoint='2962'/>
+ <entity name='lhard' codepoint='21BD'/>
+ <entity name='lharu' codepoint='21BC'/>
+ <entity name='lharul' codepoint='296A'/>
+ <entity name='lhblk' codepoint='2584'/>
+ <entity name='LJcy' codepoint='0409'/>
+ <entity name='ljcy' codepoint='0459'/>
+ <entity name='ll' codepoint='226A'/>
+ <entity name='Ll' codepoint='22D8'/>
+ <entity name='llarr' codepoint='21C7'/>
+ <entity name='llcorner' codepoint='231E'/>
+ <entity name='Lleftarrow' codepoint='21DA'/>
+ <entity name='llhard' codepoint='296B'/>
+ <entity name='lltri' codepoint='25FA'/>
+ <entity name='Lmidot' codepoint='013F'/>
+ <entity name='lmidot' codepoint='0140'/>
+ <entity name='lmoust' codepoint='23B0'/>
+ <entity name='lmoustache' codepoint='23B0'/>
+ <entity name='lnap' codepoint='2A89'/>
+ <entity name='lnapprox' codepoint='2A89'/>
+ <entity name='lnE' codepoint='2268'/>
+ <entity name='lne' codepoint='2A87'/>
+ <entity name='lneq' codepoint='2A87'/>
+ <entity name='lneqq' codepoint='2268'/>
+ <entity name='lnsim' codepoint='22E6'/>
+ <entity name='loang' codepoint='27EC'/>
+ <entity name='loarr' codepoint='21FD'/>
+ <entity name='lobrk' codepoint='27E6'/>
+ <entity name='longleftarrow' codepoint='27F5'/>
+ <entity name='Longleftarrow' codepoint='27F8'/>
+ <entity name='longleftrightarrow' codepoint='27F7'/>
+ <entity name='Longleftrightarrow' codepoint='27FA'/>
+ <entity name='longmapsto' codepoint='27FC'/>
+ <entity name='longrightarrow' codepoint='27F6'/>
+ <entity name='Longrightarrow' codepoint='27F9'/>
+ <entity name='looparrowleft' codepoint='21AB'/>
+ <entity name='looparrowright' codepoint='21AC'/>
+ <entity name='lopar' codepoint='2985'/>
+ <entity name='Lopf' codepoint='1D543'/>
+ <entity name='lopf' codepoint='1D55D'/>
+ <entity name='loplus' codepoint='2A2D'/>
+ <entity name='lotimes' codepoint='2A34'/>
+ <entity name='lowast' codepoint='2217'/>
+ <entity name='lowbar' codepoint='005F'/>
+ <entity name='LowerLeftArrow' codepoint='2199'/>
+ <entity name='LowerRightArrow' codepoint='2198'/>
+ <entity name='loz' codepoint='25CA'/>
+ <entity name='lozenge' codepoint='25CA'/>
+ <entity name='lozf' codepoint='29EB'/>
+ <entity name='lpar' codepoint='0028'/>
+ <entity name='lparlt' codepoint='2993'/>
+ <entity name='lrarr' codepoint='21C6'/>
+ <entity name='lrcorner' codepoint='231F'/>
+ <entity name='lrhar' codepoint='21CB'/>
+ <entity name='lrhard' codepoint='296D'/>
+ <entity name='lrm' codepoint='200E'/>
+ <entity name='lrtri' codepoint='22BF'/>
+ <entity name='lsaquo' codepoint='2039'/>
+ <entity name='Lscr' codepoint='2112'/>
+ <entity name='lscr' codepoint='1D4C1'/>
+ <entity name='lsh' codepoint='21B0'/>
+ <entity name='lsim' codepoint='2272'/>
+ <entity name='lsime' codepoint='2A8D'/>
+ <entity name='lsimg' codepoint='2A8F'/>
+ <entity name='lsqb' codepoint='005B'/>
+ <entity name='lsquo' codepoint='2018'/>
+ <entity name='lsquor' codepoint='201A'/>
+ <entity name='Lstrok' codepoint='0141'/>
+ <entity name='lstrok' codepoint='0142'/>
+ <entity name='lt' codepoint='003C'/>
+ <entity name='Lt' codepoint='226A'/>
+ <entity name='ltcc' codepoint='2AA6'/>
+ <entity name='ltcir' codepoint='2A79'/>
+ <entity name='ltdot' codepoint='22D6'/>
+ <entity name='lthree' codepoint='22CB'/>
+ <entity name='ltimes' codepoint='22C9'/>
+ <entity name='ltlarr' codepoint='2976'/>
+ <entity name='ltquest' codepoint='2A7B'/>
+ <entity name='ltri' codepoint='25C3'/>
+ <entity name='ltrie' codepoint='22B4'/>
+ <entity name='ltrif' codepoint='25C2'/>
+ <entity name='ltrPar' codepoint='2996'/>
+ <entity name='lurdshar' codepoint='294A'/>
+ <entity name='luruhar' codepoint='2966'/>
+ <entity name='macr' codepoint='00AF'/>
+ <entity name='male' codepoint='2642'/>
+ <entity name='malt' codepoint='2720'/>
+ <entity name='maltese' codepoint='2720'/>
+ <entity name='map' codepoint='21A6'/>
+ <entity name='Map' codepoint='2905'/>
+ <entity name='mapsto' codepoint='21A6'/>
+ <entity name='mapstodown' codepoint='21A7'/>
+ <entity name='mapstoleft' codepoint='21A4'/>
+ <entity name='mapstoup' codepoint='21A5'/>
+ <entity name='marker' codepoint='25AE'/>
+ <entity name='mcomma' codepoint='2A29'/>
+ <entity name='Mcy' codepoint='041C'/>
+ <entity name='mcy' codepoint='043C'/>
+ <entity name='mdash' codepoint='2014'/>
+ <entity name='mDDot' codepoint='223A'/>
+ <entity name='measuredangle' codepoint='2221'/>
+ <entity name='MediumSpace' codepoint='205F'/>
+ <entity name='Mellintrf' codepoint='2133'/>
+ <entity name='Mfr' codepoint='1D510'/>
+ <entity name='mfr' codepoint='1D52A'/>
+ <entity name='Mgr' codepoint='039C'/>
+ <entity name='mgr' codepoint='03BC'/>
+ <entity name='mho' codepoint='2127'/>
+ <entity name='micro' codepoint='00B5'/>
+ <entity name='mid' codepoint='2223'/>
+ <entity name='midast' codepoint='002A'/>
+ <entity name='midcir' codepoint='2AF0'/>
+ <entity name='middot' codepoint='00B7'/>
+ <entity name='minus' codepoint='2212'/>
+ <entity name='minusb' codepoint='229F'/>
+ <entity name='minusd' codepoint='2238'/>
+ <entity name='minusdu' codepoint='2A2A'/>
+ <entity name='MinusPlus' codepoint='2213'/>
+ <entity name='mlcp' codepoint='2ADB'/>
+ <entity name='mldr' codepoint='2026'/>
+ <entity name='mnplus' codepoint='2213'/>
+ <entity name='models' codepoint='22A7'/>
+ <entity name='Mopf' codepoint='1D544'/>
+ <entity name='mopf' codepoint='1D55E'/>
+ <entity name='mp' codepoint='2213'/>
+ <entity name='Mscr' codepoint='2133'/>
+ <entity name='mscr' codepoint='1D4C2'/>
+ <entity name='mstpos' codepoint='223E'/>
+ <entity name='Mu' codepoint='039C'/>
+ <entity name='mu' codepoint='03BC'/>
+ <entity name='multimap' codepoint='22B8'/>
+ <entity name='mumap' codepoint='22B8'/>
+ <entity name='nabla' codepoint='2207'/>
+ <entity name='Nacute' codepoint='0143'/>
+ <entity name='nacute' codepoint='0144'/>
+ <entity name='nap' codepoint='2249'/>
+ <entity name='napos' codepoint='0149'/>
+ <entity name='napprox' codepoint='2249'/>
+ <entity name='natur' codepoint='266E'/>
+ <entity name='natural' codepoint='266E'/>
+ <entity name='naturals' codepoint='2115'/>
+ <entity name='nbsp' codepoint='00A0'/>
+ <entity name='ncap' codepoint='2A43'/>
+ <entity name='Ncaron' codepoint='0147'/>
+ <entity name='ncaron' codepoint='0148'/>
+ <entity name='Ncedil' codepoint='0145'/>
+ <entity name='ncedil' codepoint='0146'/>
+ <entity name='ncong' codepoint='2247'/>
+ <entity name='ncup' codepoint='2A42'/>
+ <entity name='Ncy' codepoint='041D'/>
+ <entity name='ncy' codepoint='043D'/>
+ <entity name='ndash' codepoint='2013'/>
+ <entity name='ne' codepoint='2260'/>
+ <entity name='nearhk' codepoint='2924'/>
+ <entity name='nearr' codepoint='2197'/>
+ <entity name='neArr' codepoint='21D7'/>
+ <entity name='nearrow' codepoint='2197'/>
+ <entity name='NegativeMediumSpace' codepoint='200B'/>
+ <entity name='NegativeThickSpace' codepoint='200B'/>
+ <entity name='NegativeThinSpace' codepoint='200B'/>
+ <entity name='NegativeVeryThinSpace' codepoint='200B'/>
+ <entity name='nequiv' codepoint='2262'/>
+ <entity name='nesear' codepoint='2928'/>
+ <entity name='NestedGreaterGreater' codepoint='226B'/>
+ <entity name='NestedLessLess' codepoint='226A'/>
+ <entity name='NewLine' codepoint='000A'/>
+ <entity name='nexist' codepoint='2204'/>
+ <entity name='nexists' codepoint='2204'/>
+ <entity name='Nfr' codepoint='1D511'/>
+ <entity name='nfr' codepoint='1D52B'/>
+ <entity name='nge' codepoint='2271'/>
+ <entity name='ngeq' codepoint='2271'/>
+ <entity name='Ngr' codepoint='039D'/>
+ <entity name='ngr' codepoint='03BD'/>
+ <entity name='ngsim' codepoint='2275'/>
+ <entity name='ngt' codepoint='226F'/>
+ <entity name='ngtr' codepoint='226F'/>
+ <entity name='nharr' codepoint='21AE'/>
+ <entity name='nhArr' codepoint='21CE'/>
+ <entity name='nhpar' codepoint='2AF2'/>
+ <entity name='ni' codepoint='220B'/>
+ <entity name='nis' codepoint='22FC'/>
+ <entity name='nisd' codepoint='22FA'/>
+ <entity name='niv' codepoint='220B'/>
+ <entity name='NJcy' codepoint='040A'/>
+ <entity name='njcy' codepoint='045A'/>
+ <entity name='nlarr' codepoint='219A'/>
+ <entity name='nlArr' codepoint='21CD'/>
+ <entity name='nldr' codepoint='2025'/>
+ <entity name='nle' codepoint='2270'/>
+ <entity name='nleftarrow' codepoint='219A'/>
+ <entity name='nLeftarrow' codepoint='21CD'/>
+ <entity name='nleftrightarrow' codepoint='21AE'/>
+ <entity name='nLeftrightarrow' codepoint='21CE'/>
+ <entity name='nleq' codepoint='2270'/>
+ <entity name='nless' codepoint='226E'/>
+ <entity name='nlsim' codepoint='2274'/>
+ <entity name='nlt' codepoint='226E'/>
+ <entity name='nltri' codepoint='22EA'/>
+ <entity name='nltrie' codepoint='22EC'/>
+ <entity name='nmid' codepoint='2224'/>
+ <entity name='NoBreak' codepoint='2060'/>
+ <entity name='NonBreakingSpace' codepoint='00A0'/>
+ <entity name='Nopf' codepoint='2115'/>
+ <entity name='nopf' codepoint='1D55F'/>
+ <entity name='not' codepoint='00AC'/>
+ <entity name='Not' codepoint='2AEC'/>
+ <entity name='NotCongruent' codepoint='2262'/>
+ <entity name='NotCupCap' codepoint='226D'/>
+ <entity name='NotDoubleVerticalBar' codepoint='2226'/>
+ <entity name='NotElement' codepoint='2209'/>
+ <entity name='NotEqual' codepoint='2260'/>
+ <entity name='NotExists' codepoint='2204'/>
+ <entity name='NotGreater' codepoint='226F'/>
+ <entity name='NotGreaterEqual' codepoint='2271'/>
+ <entity name='NotGreaterLess' codepoint='2279'/>
+ <entity name='NotGreaterTilde' codepoint='2275'/>
+ <entity name='notin' codepoint='2209'/>
+ <entity name='notinva' codepoint='2209'/>
+ <entity name='notinvb' codepoint='22F7'/>
+ <entity name='notinvc' codepoint='22F6'/>
+ <entity name='NotLeftTriangle' codepoint='22EA'/>
+ <entity name='NotLeftTriangleEqual' codepoint='22EC'/>
+ <entity name='NotLess' codepoint='226E'/>
+ <entity name='NotLessEqual' codepoint='2270'/>
+ <entity name='NotLessGreater' codepoint='2278'/>
+ <entity name='NotLessTilde' codepoint='2274'/>
+ <entity name='notni' codepoint='220C'/>
+ <entity name='notniva' codepoint='220C'/>
+ <entity name='notnivb' codepoint='22FE'/>
+ <entity name='notnivc' codepoint='22FD'/>
+ <entity name='NotPrecedes' codepoint='2280'/>
+ <entity name='NotPrecedesSlantEqual' codepoint='22E0'/>
+ <entity name='NotReverseElement' codepoint='220C'/>
+ <entity name='NotRightTriangle' codepoint='22EB'/>
+ <entity name='NotRightTriangleEqual' codepoint='22ED'/>
+ <entity name='NotSquareSubsetEqual' codepoint='22E2'/>
+ <entity name='NotSquareSupersetEqual' codepoint='22E3'/>
+ <entity name='NotSubsetEqual' codepoint='2288'/>
+ <entity name='NotSucceeds' codepoint='2281'/>
+ <entity name='NotSucceedsSlantEqual' codepoint='22E1'/>
+ <entity name='NotSupersetEqual' codepoint='2289'/>
+ <entity name='NotTilde' codepoint='2241'/>
+ <entity name='NotTildeEqual' codepoint='2244'/>
+ <entity name='NotTildeFullEqual' codepoint='2247'/>
+ <entity name='NotTildeTilde' codepoint='2249'/>
+ <entity name='NotVerticalBar' codepoint='2224'/>
+ <entity name='npar' codepoint='2226'/>
+ <entity name='nparallel' codepoint='2226'/>
+ <entity name='npolint' codepoint='2A14'/>
+ <entity name='npr' codepoint='2280'/>
+ <entity name='nprcue' codepoint='22E0'/>
+ <entity name='nprec' codepoint='2280'/>
+ <entity name='nrarr' codepoint='219B'/>
+ <entity name='nrArr' codepoint='21CF'/>
+ <entity name='nrightarrow' codepoint='219B'/>
+ <entity name='nRightarrow' codepoint='21CF'/>
+ <entity name='nrtri' codepoint='22EB'/>
+ <entity name='nrtrie' codepoint='22ED'/>
+ <entity name='nsc' codepoint='2281'/>
+ <entity name='nsccue' codepoint='22E1'/>
+ <entity name='Nscr' codepoint='1D4A9'/>
+ <entity name='nscr' codepoint='1D4C3'/>
+ <entity name='nshortmid' codepoint='2224'/>
+ <entity name='nshortparallel' codepoint='2226'/>
+ <entity name='nsim' codepoint='2241'/>
+ <entity name='nsime' codepoint='2244'/>
+ <entity name='nsimeq' codepoint='2244'/>
+ <entity name='nsmid' codepoint='2224'/>
+ <entity name='nspar' codepoint='2226'/>
+ <entity name='nsqsube' codepoint='22E2'/>
+ <entity name='nsqsupe' codepoint='22E3'/>
+ <entity name='nsub' codepoint='2284'/>
+ <entity name='nsube' codepoint='2288'/>
+ <entity name='nsubseteq' codepoint='2288'/>
+ <entity name='nsucc' codepoint='2281'/>
+ <entity name='nsup' codepoint='2285'/>
+ <entity name='nsupe' codepoint='2289'/>
+ <entity name='nsupseteq' codepoint='2289'/>
+ <entity name='ntgl' codepoint='2279'/>
+ <entity name='Ntilde' codepoint='00D1'/>
+ <entity name='ntilde' codepoint='00F1'/>
+ <entity name='ntlg' codepoint='2278'/>
+ <entity name='ntriangleleft' codepoint='22EA'/>
+ <entity name='ntrianglelefteq' codepoint='22EC'/>
+ <entity name='ntriangleright' codepoint='22EB'/>
+ <entity name='ntrianglerighteq' codepoint='22ED'/>
+ <entity name='Nu' codepoint='039D'/>
+ <entity name='nu' codepoint='03BD'/>
+ <entity name='num' codepoint='0023'/>
+ <entity name='numero' codepoint='2116'/>
+ <entity name='numsp' codepoint='2007'/>
+ <entity name='nvdash' codepoint='22AC'/>
+ <entity name='nvDash' codepoint='22AD'/>
+ <entity name='nVdash' codepoint='22AE'/>
+ <entity name='nVDash' codepoint='22AF'/>
+ <entity name='nvHarr' codepoint='2904'/>
+ <entity name='nvinfin' codepoint='29DE'/>
+ <entity name='nvlArr' codepoint='2902'/>
+ <entity name='nvrArr' codepoint='2903'/>
+ <entity name='nwarhk' codepoint='2923'/>
+ <entity name='nwarr' codepoint='2196'/>
+ <entity name='nwArr' codepoint='21D6'/>
+ <entity name='nwarrow' codepoint='2196'/>
+ <entity name='nwnear' codepoint='2927'/>
+ <entity name='Oacgr' codepoint='038C'/>
+ <entity name='oacgr' codepoint='03CC'/>
+ <entity name='Oacute' codepoint='00D3'/>
+ <entity name='oacute' codepoint='00F3'/>
+ <entity name='oast' codepoint='229B'/>
+ <entity name='ocir' codepoint='229A'/>
+ <entity name='Ocirc' codepoint='00D4'/>
+ <entity name='ocirc' codepoint='00F4'/>
+ <entity name='Ocy' codepoint='041E'/>
+ <entity name='ocy' codepoint='043E'/>
+ <entity name='odash' codepoint='229D'/>
+ <entity name='Odblac' codepoint='0150'/>
+ <entity name='odblac' codepoint='0151'/>
+ <entity name='odiv' codepoint='2A38'/>
+ <entity name='odot' codepoint='2299'/>
+ <entity name='odsold' codepoint='29BC'/>
+ <entity name='OElig' codepoint='0152'/>
+ <entity name='oelig' codepoint='0153'/>
+ <entity name='ofcir' codepoint='29BF'/>
+ <entity name='Ofr' codepoint='1D512'/>
+ <entity name='ofr' codepoint='1D52C'/>
+ <entity name='ogon' codepoint='02DB'/>
+ <entity name='Ogr' codepoint='039F'/>
+ <entity name='ogr' codepoint='03BF'/>
+ <entity name='Ograve' codepoint='00D2'/>
+ <entity name='ograve' codepoint='00F2'/>
+ <entity name='ogt' codepoint='29C1'/>
+ <entity name='OHacgr' codepoint='038F'/>
+ <entity name='ohacgr' codepoint='03CE'/>
+ <entity name='ohbar' codepoint='29B5'/>
+ <entity name='OHgr' codepoint='03A9'/>
+ <entity name='ohgr' codepoint='03C9'/>
+ <entity name='ohm' codepoint='2126'/>
+ <entity name='oint' codepoint='222E'/>
+ <entity name='olarr' codepoint='21BA'/>
+ <entity name='olcir' codepoint='29BE'/>
+ <entity name='olcross' codepoint='29BB'/>
+ <entity name='oline' codepoint='203E'/>
+ <entity name='olt' codepoint='29C0'/>
+ <entity name='Omacr' codepoint='014C'/>
+ <entity name='omacr' codepoint='014D'/>
+ <entity name='Omega' codepoint='03A9'/>
+ <entity name='omega' codepoint='03C9'/>
+ <entity name='Omicron' codepoint='039F'/>
+ <entity name='omicron' codepoint='03BF'/>
+ <entity name='omid' codepoint='29B6'/>
+ <entity name='ominus' codepoint='2296'/>
+ <entity name='Oopf' codepoint='1D546'/>
+ <entity name='oopf' codepoint='1D560'/>
+ <entity name='opar' codepoint='29B7'/>
+ <entity name='OpenCurlyDoubleQuote' codepoint='201C'/>
+ <entity name='OpenCurlyQuote' codepoint='2018'/>
+ <entity name='operp' codepoint='29B9'/>
+ <entity name='oplus' codepoint='2295'/>
+ <entity name='or' codepoint='2228'/>
+ <entity name='Or' codepoint='2A54'/>
+ <entity name='orarr' codepoint='21BB'/>
+ <entity name='ord' codepoint='2A5D'/>
+ <entity name='order' codepoint='2134'/>
+ <entity name='orderof' codepoint='2134'/>
+ <entity name='ordf' codepoint='00AA'/>
+ <entity name='ordm' codepoint='00BA'/>
+ <entity name='origof' codepoint='22B6'/>
+ <entity name='oror' codepoint='2A56'/>
+ <entity name='orslope' codepoint='2A57'/>
+ <entity name='orv' codepoint='2A5B'/>
+ <entity name='oS' codepoint='24C8'/>
+ <entity name='oscr' codepoint='2134'/>
+ <entity name='Oscr' codepoint='1D4AA'/>
+ <entity name='Oslash' codepoint='00D8'/>
+ <entity name='oslash' codepoint='00F8'/>
+ <entity name='osol' codepoint='2298'/>
+ <entity name='Otilde' codepoint='00D5'/>
+ <entity name='otilde' codepoint='00F5'/>
+ <entity name='otimes' codepoint='2297'/>
+ <entity name='Otimes' codepoint='2A37'/>
+ <entity name='otimesas' codepoint='2A36'/>
+ <entity name='Ouml' codepoint='00D6'/>
+ <entity name='ouml' codepoint='00F6'/>
+ <entity name='ovbar' codepoint='233D'/>
+ <entity name='OverBar' codepoint='00AF'/>
+ <entity name='OverBrace' codepoint='FE37'/>
+ <entity name='OverBracket' codepoint='23B4'/>
+ <entity name='OverParenthesis' codepoint='FE35'/>
+ <entity name='par' codepoint='2225'/>
+ <entity name='para' codepoint='00B6'/>
+ <entity name='parallel' codepoint='2225'/>
+ <entity name='parsim' codepoint='2AF3'/>
+ <entity name='parsl' codepoint='2AFD'/>
+ <entity name='part' codepoint='2202'/>
+ <entity name='PartialD' codepoint='2202'/>
+ <entity name='Pcy' codepoint='041F'/>
+ <entity name='pcy' codepoint='043F'/>
+ <entity name='percnt' codepoint='0025'/>
+ <entity name='period' codepoint='002E'/>
+ <entity name='permil' codepoint='2030'/>
+ <entity name='perp' codepoint='22A5'/>
+ <entity name='pertenk' codepoint='2031'/>
+ <entity name='Pfr' codepoint='1D513'/>
+ <entity name='pfr' codepoint='1D52D'/>
+ <entity name='Pgr' codepoint='03A0'/>
+ <entity name='pgr' codepoint='03C0'/>
+ <entity name='PHgr' codepoint='03A6'/>
+ <entity name='phgr' codepoint='03C6'/>
+ <entity name='Phi' codepoint='03A6'/>
+ <entity name='phi' codepoint='03D5'/>
+ <entity name='phiv' codepoint='03C6'/>
+ <entity name='phmmat' codepoint='2133'/>
+ <entity name='phone' codepoint='260E'/>
+ <entity name='Pi' codepoint='03A0'/>
+ <entity name='pi' codepoint='03C0'/>
+ <entity name='pitchfork' codepoint='22D4'/>
+ <entity name='piv' codepoint='03D6'/>
+ <entity name='planck' codepoint='210F'/>
+ <entity name='planckh' codepoint='210E'/>
+ <entity name='plankv' codepoint='210F'/>
+ <entity name='plus' codepoint='002B'/>
+ <entity name='plusacir' codepoint='2A23'/>
+ <entity name='plusb' codepoint='229E'/>
+ <entity name='pluscir' codepoint='2A22'/>
+ <entity name='plusdo' codepoint='2214'/>
+ <entity name='plusdu' codepoint='2A25'/>
+ <entity name='pluse' codepoint='2A72'/>
+ <entity name='PlusMinus' codepoint='00B1'/>
+ <entity name='plusmn' codepoint='00B1'/>
+ <entity name='plussim' codepoint='2A26'/>
+ <entity name='plustwo' codepoint='2A27'/>
+ <entity name='pm' codepoint='00B1'/>
+ <entity name='Poincareplane' codepoint='210C'/>
+ <entity name='pointint' codepoint='2A15'/>
+ <entity name='Popf' codepoint='2119'/>
+ <entity name='popf' codepoint='1D561'/>
+ <entity name='pound' codepoint='00A3'/>
+ <entity name='pr' codepoint='227A'/>
+ <entity name='Pr' codepoint='2ABB'/>
+ <entity name='prap' codepoint='2AB7'/>
+ <entity name='prcue' codepoint='227C'/>
+ <entity name='pre' codepoint='2AAF'/>
+ <entity name='prE' codepoint='2AB3'/>
+ <entity name='prec' codepoint='227A'/>
+ <entity name='precapprox' codepoint='2AB7'/>
+ <entity name='preccurlyeq' codepoint='227C'/>
+ <entity name='Precedes' codepoint='227A'/>
+ <entity name='PrecedesEqual' codepoint='2AAF'/>
+ <entity name='PrecedesSlantEqual' codepoint='227C'/>
+ <entity name='PrecedesTilde' codepoint='227E'/>
+ <entity name='preceq' codepoint='2AAF'/>
+ <entity name='precnapprox' codepoint='2AB9'/>
+ <entity name='precneqq' codepoint='2AB5'/>
+ <entity name='precnsim' codepoint='22E8'/>
+ <entity name='precsim' codepoint='227E'/>
+ <entity name='prime' codepoint='2032'/>
+ <entity name='Prime' codepoint='2033'/>
+ <entity name='primes' codepoint='2119'/>
+ <entity name='prnap' codepoint='2AB9'/>
+ <entity name='prnE' codepoint='2AB5'/>
+ <entity name='prnsim' codepoint='22E8'/>
+ <entity name='prod' codepoint='220F'/>
+ <entity name='Product' codepoint='220F'/>
+ <entity name='profalar' codepoint='232E'/>
+ <entity name='profline' codepoint='2312'/>
+ <entity name='profsurf' codepoint='2313'/>
+ <entity name='prop' codepoint='221D'/>
+ <entity name='Proportion' codepoint='2237'/>
+ <entity name='Proportional' codepoint='221D'/>
+ <entity name='propto' codepoint='221D'/>
+ <entity name='prsim' codepoint='227E'/>
+ <entity name='prurel' codepoint='22B0'/>
+ <entity name='Pscr' codepoint='1D4AB'/>
+ <entity name='pscr' codepoint='1D4C5'/>
+ <entity name='PSgr' codepoint='03A8'/>
+ <entity name='psgr' codepoint='03C8'/>
+ <entity name='Psi' codepoint='03A8'/>
+ <entity name='psi' codepoint='03C8'/>
+ <entity name='puncsp' codepoint='2008'/>
+ <entity name='Qfr' codepoint='1D514'/>
+ <entity name='qfr' codepoint='1D52E'/>
+ <entity name='qint' codepoint='2A0C'/>
+ <entity name='Qopf' codepoint='211A'/>
+ <entity name='qopf' codepoint='1D562'/>
+ <entity name='qprime' codepoint='2057'/>
+ <entity name='Qscr' codepoint='1D4AC'/>
+ <entity name='qscr' codepoint='1D4C6'/>
+ <entity name='quaternions' codepoint='210D'/>
+ <entity name='quatint' codepoint='2A16'/>
+ <entity name='quest' codepoint='003F'/>
+ <entity name='questeq' codepoint='225F'/>
+ <entity name='quot' codepoint='0022'/>
+ <entity name='rAarr' codepoint='21DB'/>
+ <entity name='race' codepoint='29DA'/>
+ <entity name='Racute' codepoint='0154'/>
+ <entity name='racute' codepoint='0155'/>
+ <entity name='radic' codepoint='221A'/>
+ <entity name='raemptyv' codepoint='29B3'/>
+ <entity name='rang' codepoint='232A'/>
+ <entity name='Rang' codepoint='27EB'/>
+ <entity name='rangd' codepoint='2992'/>
+ <entity name='range' codepoint='29A5'/>
+ <entity name='rangle' codepoint='232A'/>
+ <entity name='raquo' codepoint='00BB'/>
+ <entity name='rarr' codepoint='2192'/>
+ <entity name='Rarr' codepoint='21A0'/>
+ <entity name='rArr' codepoint='21D2'/>
+ <entity name='rarrap' codepoint='2975'/>
+ <entity name='rarrb' codepoint='21E5'/>
+ <entity name='rarrbfs' codepoint='2920'/>
+ <entity name='rarrc' codepoint='2933'/>
+ <entity name='rarrfs' codepoint='291E'/>
+ <entity name='rarrhk' codepoint='21AA'/>
+ <entity name='rarrlp' codepoint='21AC'/>
+ <entity name='rarrpl' codepoint='2945'/>
+ <entity name='rarrsim' codepoint='2974'/>
+ <entity name='rarrtl' codepoint='21A3'/>
+ <entity name='Rarrtl' codepoint='2916'/>
+ <entity name='rarrw' codepoint='219D'/>
+ <entity name='ratail' codepoint='291A'/>
+ <entity name='rAtail' codepoint='291C'/>
+ <entity name='ratio' codepoint='2236'/>
+ <entity name='rationals' codepoint='211A'/>
+ <entity name='rbarr' codepoint='290D'/>
+ <entity name='rBarr' codepoint='290F'/>
+ <entity name='RBarr' codepoint='2910'/>
+ <entity name='rbbrk' codepoint='2998'/>
+ <entity name='rbrace' codepoint='007D'/>
+ <entity name='rbrack' codepoint='005D'/>
+ <entity name='rbrke' codepoint='298C'/>
+ <entity name='rbrksld' codepoint='298E'/>
+ <entity name='rbrkslu' codepoint='2990'/>
+ <entity name='Rcaron' codepoint='0158'/>
+ <entity name='rcaron' codepoint='0159'/>
+ <entity name='Rcedil' codepoint='0156'/>
+ <entity name='rcedil' codepoint='0157'/>
+ <entity name='rceil' codepoint='2309'/>
+ <entity name='rcub' codepoint='007D'/>
+ <entity name='Rcy' codepoint='0420'/>
+ <entity name='rcy' codepoint='0440'/>
+ <entity name='rdca' codepoint='2937'/>
+ <entity name='rdldhar' codepoint='2969'/>
+ <entity name='rdquo' codepoint='201D'/>
+ <entity name='rdquor' codepoint='201D'/>
+ <entity name='rdsh' codepoint='21B3'/>
+ <entity name='Re' codepoint='211C'/>
+ <entity name='real' codepoint='211C'/>
+ <entity name='realine' codepoint='211B'/>
+ <entity name='realpart' codepoint='211C'/>
+ <entity name='reals' codepoint='211D'/>
+ <entity name='rect' codepoint='25AD'/>
+ <entity name='reg' codepoint='00AE'/>
+ <entity name='ReverseElement' codepoint='220B'/>
+ <entity name='ReverseEquilibrium' codepoint='21CB'/>
+ <entity name='ReverseUpEquilibrium' codepoint='296F'/>
+ <entity name='rfisht' codepoint='297D'/>
+ <entity name='rfloor' codepoint='230B'/>
+ <entity name='Rfr' codepoint='211C'/>
+ <entity name='rfr' codepoint='1D52F'/>
+ <entity name='Rgr' codepoint='03A1'/>
+ <entity name='rgr' codepoint='03C1'/>
+ <entity name='rHar' codepoint='2964'/>
+ <entity name='rhard' codepoint='21C1'/>
+ <entity name='rharu' codepoint='21C0'/>
+ <entity name='rharul' codepoint='296C'/>
+ <entity name='Rho' codepoint='03A1'/>
+ <entity name='rho' codepoint='03C1'/>
+ <entity name='rhov' codepoint='03F1'/>
+ <entity name='RightAngleBracket' codepoint='232A'/>
+ <entity name='rightarrow' codepoint='2192'/>
+ <entity name='Rightarrow' codepoint='21D2'/>
+ <entity name='RightArrowBar' codepoint='21E5'/>
+ <entity name='RightArrowLeftArrow' codepoint='21C4'/>
+ <entity name='rightarrowtail' codepoint='21A3'/>
+ <entity name='RightCeiling' codepoint='2309'/>
+ <entity name='RightDoubleBracket' codepoint='27E7'/>
+ <entity name='RightDownTeeVector' codepoint='295D'/>
+ <entity name='RightDownVector' codepoint='21C2'/>
+ <entity name='RightDownVectorBar' codepoint='2955'/>
+ <entity name='RightFloor' codepoint='230B'/>
+ <entity name='rightharpoondown' codepoint='21C1'/>
+ <entity name='rightharpoonup' codepoint='21C0'/>
+ <entity name='rightleftarrows' codepoint='21C4'/>
+ <entity name='rightleftharpoons' codepoint='21CC'/>
+ <entity name='rightrightarrows' codepoint='21C9'/>
+ <entity name='rightsquigarrow' codepoint='219D'/>
+ <entity name='RightTee' codepoint='22A2'/>
+ <entity name='RightTeeArrow' codepoint='21A6'/>
+ <entity name='RightTeeVector' codepoint='295B'/>
+ <entity name='rightthreetimes' codepoint='22CC'/>
+ <entity name='RightTriangle' codepoint='22B3'/>
+ <entity name='RightTriangleBar' codepoint='29D0'/>
+ <entity name='RightTriangleEqual' codepoint='22B5'/>
+ <entity name='RightUpDownVector' codepoint='294F'/>
+ <entity name='RightUpTeeVector' codepoint='295C'/>
+ <entity name='RightUpVector' codepoint='21BE'/>
+ <entity name='RightUpVectorBar' codepoint='2954'/>
+ <entity name='RightVector' codepoint='21C0'/>
+ <entity name='RightVectorBar' codepoint='2953'/>
+ <entity name='ring' codepoint='02DA'/>
+ <entity name='risingdotseq' codepoint='2253'/>
+ <entity name='rlarr' codepoint='21C4'/>
+ <entity name='rlhar' codepoint='21CC'/>
+ <entity name='rlm' codepoint='200F'/>
+ <entity name='rmoust' codepoint='23B1'/>
+ <entity name='rmoustache' codepoint='23B1'/>
+ <entity name='rnmid' codepoint='2AEE'/>
+ <entity name='roang' codepoint='27ED'/>
+ <entity name='roarr' codepoint='21FE'/>
+ <entity name='robrk' codepoint='27E7'/>
+ <entity name='ropar' codepoint='2986'/>
+ <entity name='Ropf' codepoint='211D'/>
+ <entity name='ropf' codepoint='1D563'/>
+ <entity name='roplus' codepoint='2A2E'/>
+ <entity name='rotimes' codepoint='2A35'/>
+ <entity name='RoundImplies' codepoint='2970'/>
+ <entity name='rpar' codepoint='0029'/>
+ <entity name='rpargt' codepoint='2994'/>
+ <entity name='rppolint' codepoint='2A12'/>
+ <entity name='rrarr' codepoint='21C9'/>
+ <entity name='Rrightarrow' codepoint='21DB'/>
+ <entity name='rsaquo' codepoint='203A'/>
+ <entity name='Rscr' codepoint='211B'/>
+ <entity name='rscr' codepoint='1D4C7'/>
+ <entity name='rsh' codepoint='21B1'/>
+ <entity name='rsqb' codepoint='005D'/>
+ <entity name='rsquo' codepoint='2019'/>
+ <entity name='rsquor' codepoint='2019'/>
+ <entity name='rthree' codepoint='22CC'/>
+ <entity name='rtimes' codepoint='22CA'/>
+ <entity name='rtri' codepoint='25B9'/>
+ <entity name='rtrie' codepoint='22B5'/>
+ <entity name='rtrif' codepoint='25B8'/>
+ <entity name='rtriltri' codepoint='29CE'/>
+ <entity name='RuleDelayed' codepoint='29F4'/>
+ <entity name='ruluhar' codepoint='2968'/>
+ <entity name='rx' codepoint='211E'/>
+ <entity name='Sacute' codepoint='015A'/>
+ <entity name='sacute' codepoint='015B'/>
+ <entity name='sbquo' codepoint='201A'/>
+ <entity name='sc' codepoint='227B'/>
+ <entity name='Sc' codepoint='2ABC'/>
+ <entity name='scap' codepoint='2AB8'/>
+ <entity name='Scaron' codepoint='0160'/>
+ <entity name='scaron' codepoint='0161'/>
+ <entity name='sccue' codepoint='227D'/>
+ <entity name='sce' codepoint='2AB0'/>
+ <entity name='scE' codepoint='2AB4'/>
+ <entity name='Scedil' codepoint='015E'/>
+ <entity name='scedil' codepoint='015F'/>
+ <entity name='Scirc' codepoint='015C'/>
+ <entity name='scirc' codepoint='015D'/>
+ <entity name='scnap' codepoint='2ABA'/>
+ <entity name='scnE' codepoint='2AB6'/>
+ <entity name='scnsim' codepoint='22E9'/>
+ <entity name='scpolint' codepoint='2A13'/>
+ <entity name='scsim' codepoint='227F'/>
+ <entity name='Scy' codepoint='0421'/>
+ <entity name='scy' codepoint='0441'/>
+ <entity name='sdot' codepoint='22C5'/>
+ <entity name='sdotb' codepoint='22A1'/>
+ <entity name='sdote' codepoint='2A66'/>
+ <entity name='searhk' codepoint='2925'/>
+ <entity name='searr' codepoint='2198'/>
+ <entity name='seArr' codepoint='21D8'/>
+ <entity name='searrow' codepoint='2198'/>
+ <entity name='sect' codepoint='00A7'/>
+ <entity name='semi' codepoint='003B'/>
+ <entity name='seswar' codepoint='2929'/>
+ <entity name='setminus' codepoint='2216'/>
+ <entity name='setmn' codepoint='2216'/>
+ <entity name='sext' codepoint='2736'/>
+ <entity name='sfgr' codepoint='03C2'/>
+ <entity name='Sfr' codepoint='1D516'/>
+ <entity name='sfr' codepoint='1D530'/>
+ <entity name='sfrown' codepoint='2322'/>
+ <entity name='Sgr' codepoint='03A3'/>
+ <entity name='sgr' codepoint='03C3'/>
+ <entity name='sharp' codepoint='266F'/>
+ <entity name='SHCHcy' codepoint='0429'/>
+ <entity name='shchcy' codepoint='0449'/>
+ <entity name='SHcy' codepoint='0428'/>
+ <entity name='shcy' codepoint='0448'/>
+ <entity name='ShortDownArrow' codepoint='2193'/>
+ <entity name='ShortLeftArrow' codepoint='2190'/>
+ <entity name='shortmid' codepoint='2223'/>
+ <entity name='shortparallel' codepoint='2225'/>
+ <entity name='ShortRightArrow' codepoint='2192'/>
+ <entity name='ShortUpArrow' codepoint='2191'/>
+ <entity name='shy' codepoint='00AD'/>
+ <entity name='Sigma' codepoint='03A3'/>
+ <entity name='sigma' codepoint='03C3'/>
+ <entity name='sigmaf' codepoint='03C2'/>
+ <entity name='sigmav' codepoint='03C2'/>
+ <entity name='sim' codepoint='223C'/>
+ <entity name='simdot' codepoint='2A6A'/>
+ <entity name='sime' codepoint='2243'/>
+ <entity name='simeq' codepoint='2243'/>
+ <entity name='simg' codepoint='2A9E'/>
+ <entity name='simgE' codepoint='2AA0'/>
+ <entity name='siml' codepoint='2A9D'/>
+ <entity name='simlE' codepoint='2A9F'/>
+ <entity name='simne' codepoint='2246'/>
+ <entity name='simplus' codepoint='2A24'/>
+ <entity name='simrarr' codepoint='2972'/>
+ <entity name='slarr' codepoint='2190'/>
+ <entity name='SmallCircle' codepoint='2218'/>
+ <entity name='smallsetminus' codepoint='2216'/>
+ <entity name='smashp' codepoint='2A33'/>
+ <entity name='smeparsl' codepoint='29E4'/>
+ <entity name='smid' codepoint='2223'/>
+ <entity name='smile' codepoint='2323'/>
+ <entity name='smt' codepoint='2AAA'/>
+ <entity name='smte' codepoint='2AAC'/>
+ <entity name='SOFTcy' codepoint='042C'/>
+ <entity name='softcy' codepoint='044C'/>
+ <entity name='sol' codepoint='002F'/>
+ <entity name='solb' codepoint='29C4'/>
+ <entity name='solbar' codepoint='233F'/>
+ <entity name='Sopf' codepoint='1D54A'/>
+ <entity name='sopf' codepoint='1D564'/>
+ <entity name='spades' codepoint='2660'/>
+ <entity name='spadesuit' codepoint='2660'/>
+ <entity name='spar' codepoint='2225'/>
+ <entity name='sqcap' codepoint='2293'/>
+ <entity name='sqcup' codepoint='2294'/>
+ <entity name='Sqrt' codepoint='221A'/>
+ <entity name='sqsub' codepoint='228F'/>
+ <entity name='sqsube' codepoint='2291'/>
+ <entity name='sqsubset' codepoint='228F'/>
+ <entity name='sqsubseteq' codepoint='2291'/>
+ <entity name='sqsup' codepoint='2290'/>
+ <entity name='sqsupe' codepoint='2292'/>
+ <entity name='sqsupset' codepoint='2290'/>
+ <entity name='sqsupseteq' codepoint='2292'/>
+ <entity name='squ' codepoint='25A1'/>
+ <entity name='square' codepoint='25A1'/>
+ <entity name='SquareIntersection' codepoint='2293'/>
+ <entity name='SquareSubset' codepoint='228F'/>
+ <entity name='SquareSubsetEqual' codepoint='2291'/>
+ <entity name='SquareSuperset' codepoint='2290'/>
+ <entity name='SquareSupersetEqual' codepoint='2292'/>
+ <entity name='SquareUnion' codepoint='2294'/>
+ <entity name='squarf' codepoint='25AA'/>
+ <entity name='squf' codepoint='25AA'/>
+ <entity name='srarr' codepoint='2192'/>
+ <entity name='Sscr' codepoint='1D4AE'/>
+ <entity name='sscr' codepoint='1D4C8'/>
+ <entity name='ssetmn' codepoint='2216'/>
+ <entity name='ssmile' codepoint='2323'/>
+ <entity name='sstarf' codepoint='22C6'/>
+ <entity name='Star' codepoint='22C6'/>
+ <entity name='star' codepoint='2606'/>
+ <entity name='starf' codepoint='2605'/>
+ <entity name='straightepsilon' codepoint='03F5'/>
+ <entity name='straightphi' codepoint='03D5'/>
+ <entity name='strns' codepoint='00AF'/>
+ <entity name='sub' codepoint='2282'/>
+ <entity name='Sub' codepoint='22D0'/>
+ <entity name='subdot' codepoint='2ABD'/>
+ <entity name='sube' codepoint='2286'/>
+ <entity name='subE' codepoint='2AC5'/>
+ <entity name='subedot' codepoint='2AC3'/>
+ <entity name='submult' codepoint='2AC1'/>
+ <entity name='subne' codepoint='228A'/>
+ <entity name='subnE' codepoint='2ACB'/>
+ <entity name='subplus' codepoint='2ABF'/>
+ <entity name='subrarr' codepoint='2979'/>
+ <entity name='subset' codepoint='2282'/>
+ <entity name='Subset' codepoint='22D0'/>
+ <entity name='subseteq' codepoint='2286'/>
+ <entity name='subseteqq' codepoint='2AC5'/>
+ <entity name='SubsetEqual' codepoint='2286'/>
+ <entity name='subsetneq' codepoint='228A'/>
+ <entity name='subsetneqq' codepoint='2ACB'/>
+ <entity name='subsim' codepoint='2AC7'/>
+ <entity name='subsub' codepoint='2AD5'/>
+ <entity name='subsup' codepoint='2AD3'/>
+ <entity name='succ' codepoint='227B'/>
+ <entity name='succapprox' codepoint='2AB8'/>
+ <entity name='succcurlyeq' codepoint='227D'/>
+ <entity name='Succeeds' codepoint='227B'/>
+ <entity name='SucceedsEqual' codepoint='2AB0'/>
+ <entity name='SucceedsSlantEqual' codepoint='227D'/>
+ <entity name='SucceedsTilde' codepoint='227F'/>
+ <entity name='succeq' codepoint='2AB0'/>
+ <entity name='succnapprox' codepoint='2ABA'/>
+ <entity name='succneqq' codepoint='2AB6'/>
+ <entity name='succnsim' codepoint='22E9'/>
+ <entity name='succsim' codepoint='227F'/>
+ <entity name='SuchThat' codepoint='220B'/>
+ <entity name='sum' codepoint='2211'/>
+ <entity name='sung' codepoint='266A'/>
+ <entity name='sup' codepoint='2283'/>
+ <entity name='Sup' codepoint='22D1'/>
+ <entity name='sup1' codepoint='00B9'/>
+ <entity name='sup2' codepoint='00B2'/>
+ <entity name='sup3' codepoint='00B3'/>
+ <entity name='supdot' codepoint='2ABE'/>
+ <entity name='supdsub' codepoint='2AD8'/>
+ <entity name='supe' codepoint='2287'/>
+ <entity name='supE' codepoint='2AC6'/>
+ <entity name='supedot' codepoint='2AC4'/>
+ <entity name='Superset' codepoint='2283'/>
+ <entity name='SupersetEqual' codepoint='2287'/>
+ <entity name='suphsub' codepoint='2AD7'/>
+ <entity name='suplarr' codepoint='297B'/>
+ <entity name='supmult' codepoint='2AC2'/>
+ <entity name='supne' codepoint='228B'/>
+ <entity name='supnE' codepoint='2ACC'/>
+ <entity name='supplus' codepoint='2AC0'/>
+ <entity name='supset' codepoint='2283'/>
+ <entity name='Supset' codepoint='22D1'/>
+ <entity name='supseteq' codepoint='2287'/>
+ <entity name='supseteqq' codepoint='2AC6'/>
+ <entity name='supsetneq' codepoint='228B'/>
+ <entity name='supsetneqq' codepoint='2ACC'/>
+ <entity name='supsim' codepoint='2AC8'/>
+ <entity name='supsub' codepoint='2AD4'/>
+ <entity name='supsup' codepoint='2AD6'/>
+ <entity name='swarhk' codepoint='2926'/>
+ <entity name='swarr' codepoint='2199'/>
+ <entity name='swArr' codepoint='21D9'/>
+ <entity name='swarrow' codepoint='2199'/>
+ <entity name='swnwar' codepoint='292A'/>
+ <entity name='szlig' codepoint='00DF'/>
+ <entity name='Tab' codepoint='0009'/>
+ <entity name='target' codepoint='2316'/>
+ <entity name='Tau' codepoint='03A4'/>
+ <entity name='tau' codepoint='03C4'/>
+ <entity name='tbrk' codepoint='23B4'/>
+ <entity name='Tcaron' codepoint='0164'/>
+ <entity name='tcaron' codepoint='0165'/>
+ <entity name='Tcedil' codepoint='0162'/>
+ <entity name='tcedil' codepoint='0163'/>
+ <entity name='Tcy' codepoint='0422'/>
+ <entity name='tcy' codepoint='0442'/>
+ <entity name='telrec' codepoint='2315'/>
+ <entity name='Tfr' codepoint='1D517'/>
+ <entity name='tfr' codepoint='1D531'/>
+ <entity name='Tgr' codepoint='03A4'/>
+ <entity name='tgr' codepoint='03C4'/>
+ <entity name='there4' codepoint='2234'/>
+ <entity name='therefore' codepoint='2234'/>
+ <entity name='Theta' codepoint='0398'/>
+ <entity name='theta' codepoint='03B8'/>
+ <entity name='thetasym' codepoint='03D1'/>
+ <entity name='thetav' codepoint='03D1'/>
+ <entity name='THgr' codepoint='0398'/>
+ <entity name='thgr' codepoint='03B8'/>
+ <entity name='thickapprox' codepoint='2248'/>
+ <entity name='thicksim' codepoint='223C'/>
+ <entity name='thinsp' codepoint='2009'/>
+ <entity name='ThinSpace' codepoint='2009'/>
+ <entity name='thkap' codepoint='2248'/>
+ <entity name='thksim' codepoint='223C'/>
+ <entity name='THORN' codepoint='00DE'/>
+ <entity name='thorn' codepoint='00FE'/>
+ <entity name='tilde' codepoint='02DC'/>
+ <entity name='Tilde' codepoint='223C'/>
+ <entity name='TildeEqual' codepoint='2243'/>
+ <entity name='TildeFullEqual' codepoint='2245'/>
+ <entity name='TildeTilde' codepoint='2248'/>
+ <entity name='times' codepoint='00D7'/>
+ <entity name='timesb' codepoint='22A0'/>
+ <entity name='timesbar' codepoint='2A31'/>
+ <entity name='timesd' codepoint='2A30'/>
+ <entity name='tint' codepoint='222D'/>
+ <entity name='toea' codepoint='2928'/>
+ <entity name='top' codepoint='22A4'/>
+ <entity name='topbot' codepoint='2336'/>
+ <entity name='topcir' codepoint='2AF1'/>
+ <entity name='Topf' codepoint='1D54B'/>
+ <entity name='topf' codepoint='1D565'/>
+ <entity name='topfork' codepoint='2ADA'/>
+ <entity name='tosa' codepoint='2929'/>
+ <entity name='tprime' codepoint='2034'/>
+ <entity name='trade' codepoint='2122'/>
+ <entity name='triangle' codepoint='25B5'/>
+ <entity name='triangledown' codepoint='25BF'/>
+ <entity name='triangleleft' codepoint='25C3'/>
+ <entity name='trianglelefteq' codepoint='22B4'/>
+ <entity name='triangleq' codepoint='225C'/>
+ <entity name='triangleright' codepoint='25B9'/>
+ <entity name='trianglerighteq' codepoint='22B5'/>
+ <entity name='tridot' codepoint='25EC'/>
+ <entity name='trie' codepoint='225C'/>
+ <entity name='triminus' codepoint='2A3A'/>
+ <entity name='triplus' codepoint='2A39'/>
+ <entity name='trisb' codepoint='29CD'/>
+ <entity name='tritime' codepoint='2A3B'/>
+ <entity name='trpezium' codepoint='23E2'/>
+ <entity name='Tscr' codepoint='1D4AF'/>
+ <entity name='tscr' codepoint='1D4C9'/>
+ <entity name='TScy' codepoint='0426'/>
+ <entity name='tscy' codepoint='0446'/>
+ <entity name='TSHcy' codepoint='040B'/>
+ <entity name='tshcy' codepoint='045B'/>
+ <entity name='Tstrok' codepoint='0166'/>
+ <entity name='tstrok' codepoint='0167'/>
+ <entity name='twixt' codepoint='226C'/>
+ <entity name='twoheadleftarrow' codepoint='219E'/>
+ <entity name='twoheadrightarrow' codepoint='21A0'/>
+ <entity name='Uacgr' codepoint='038E'/>
+ <entity name='uacgr' codepoint='03CD'/>
+ <entity name='Uacute' codepoint='00DA'/>
+ <entity name='uacute' codepoint='00FA'/>
+ <entity name='uarr' codepoint='2191'/>
+ <entity name='Uarr' codepoint='219F'/>
+ <entity name='uArr' codepoint='21D1'/>
+ <entity name='Uarrocir' codepoint='2949'/>
+ <entity name='Ubrcy' codepoint='040E'/>
+ <entity name='ubrcy' codepoint='045E'/>
+ <entity name='Ubreve' codepoint='016C'/>
+ <entity name='ubreve' codepoint='016D'/>
+ <entity name='Ucirc' codepoint='00DB'/>
+ <entity name='ucirc' codepoint='00FB'/>
+ <entity name='Ucy' codepoint='0423'/>
+ <entity name='ucy' codepoint='0443'/>
+ <entity name='udarr' codepoint='21C5'/>
+ <entity name='Udblac' codepoint='0170'/>
+ <entity name='udblac' codepoint='0171'/>
+ <entity name='udhar' codepoint='296E'/>
+ <entity name='udiagr' codepoint='03B0'/>
+ <entity name='Udigr' codepoint='03AB'/>
+ <entity name='udigr' codepoint='03CB'/>
+ <entity name='ufisht' codepoint='297E'/>
+ <entity name='Ufr' codepoint='1D518'/>
+ <entity name='ufr' codepoint='1D532'/>
+ <entity name='Ugr' codepoint='03A5'/>
+ <entity name='ugr' codepoint='03C5'/>
+ <entity name='Ugrave' codepoint='00D9'/>
+ <entity name='ugrave' codepoint='00F9'/>
+ <entity name='uHar' codepoint='2963'/>
+ <entity name='uharl' codepoint='21BF'/>
+ <entity name='uharr' codepoint='21BE'/>
+ <entity name='uhblk' codepoint='2580'/>
+ <entity name='ulcorn' codepoint='231C'/>
+ <entity name='ulcorner' codepoint='231C'/>
+ <entity name='ulcrop' codepoint='230F'/>
+ <entity name='ultri' codepoint='25F8'/>
+ <entity name='Umacr' codepoint='016A'/>
+ <entity name='umacr' codepoint='016B'/>
+ <entity name='uml' codepoint='00A8'/>
+ <entity name='UnderBrace' codepoint='FE38'/>
+ <entity name='UnderBracket' codepoint='23B5'/>
+ <entity name='UnderParenthesis' codepoint='FE36'/>
+ <entity name='Union' codepoint='22C3'/>
+ <entity name='UnionPlus' codepoint='228E'/>
+ <entity name='Uogon' codepoint='0172'/>
+ <entity name='uogon' codepoint='0173'/>
+ <entity name='Uopf' codepoint='1D54C'/>
+ <entity name='uopf' codepoint='1D566'/>
+ <entity name='uparrow' codepoint='2191'/>
+ <entity name='Uparrow' codepoint='21D1'/>
+ <entity name='UpArrowBar' codepoint='2912'/>
+ <entity name='UpArrowDownArrow' codepoint='21C5'/>
+ <entity name='updownarrow' codepoint='2195'/>
+ <entity name='Updownarrow' codepoint='21D5'/>
+ <entity name='UpEquilibrium' codepoint='296E'/>
+ <entity name='upharpoonleft' codepoint='21BF'/>
+ <entity name='upharpoonright' codepoint='21BE'/>
+ <entity name='uplus' codepoint='228E'/>
+ <entity name='UpperLeftArrow' codepoint='2196'/>
+ <entity name='UpperRightArrow' codepoint='2197'/>
+ <entity name='upsi' codepoint='03C5'/>
+ <entity name='Upsi' codepoint='03D2'/>
+ <entity name='upsih' codepoint='03D2'/>
+ <entity name='Upsilon' codepoint='03A5'/>
+ <entity name='upsilon' codepoint='03C5'/>
+ <entity name='UpTee' codepoint='22A5'/>
+ <entity name='UpTeeArrow' codepoint='21A5'/>
+ <entity name='upuparrows' codepoint='21C8'/>
+ <entity name='urcorn' codepoint='231D'/>
+ <entity name='urcorner' codepoint='231D'/>
+ <entity name='urcrop' codepoint='230E'/>
+ <entity name='Uring' codepoint='016E'/>
+ <entity name='uring' codepoint='016F'/>
+ <entity name='urtri' codepoint='25F9'/>
+ <entity name='Uscr' codepoint='1D4B0'/>
+ <entity name='uscr' codepoint='1D4CA'/>
+ <entity name='utdot' codepoint='22F0'/>
+ <entity name='Utilde' codepoint='0168'/>
+ <entity name='utilde' codepoint='0169'/>
+ <entity name='utri' codepoint='25B5'/>
+ <entity name='utrif' codepoint='25B4'/>
+ <entity name='uuarr' codepoint='21C8'/>
+ <entity name='Uuml' codepoint='00DC'/>
+ <entity name='uuml' codepoint='00FC'/>
+ <entity name='uwangle' codepoint='29A7'/>
+ <entity name='vangrt' codepoint='299C'/>
+ <entity name='varepsilon' codepoint='03B5'/>
+ <entity name='varkappa' codepoint='03F0'/>
+ <entity name='varnothing' codepoint='2205'/>
+ <entity name='varphi' codepoint='03C6'/>
+ <entity name='varpi' codepoint='03D6'/>
+ <entity name='varpropto' codepoint='221D'/>
+ <entity name='varr' codepoint='2195'/>
+ <entity name='vArr' codepoint='21D5'/>
+ <entity name='varrho' codepoint='03F1'/>
+ <entity name='varsigma' codepoint='03C2'/>
+ <entity name='vartheta' codepoint='03D1'/>
+ <entity name='vartriangleleft' codepoint='22B2'/>
+ <entity name='vartriangleright' codepoint='22B3'/>
+ <entity name='vBar' codepoint='2AE8'/>
+ <entity name='Vbar' codepoint='2AEB'/>
+ <entity name='vBarv' codepoint='2AE9'/>
+ <entity name='Vcy' codepoint='0412'/>
+ <entity name='vcy' codepoint='0432'/>
+ <entity name='vdash' codepoint='22A2'/>
+ <entity name='vDash' codepoint='22A8'/>
+ <entity name='Vdash' codepoint='22A9'/>
+ <entity name='VDash' codepoint='22AB'/>
+ <entity name='Vdashl' codepoint='2AE6'/>
+ <entity name='vee' codepoint='2228'/>
+ <entity name='Vee' codepoint='22C1'/>
+ <entity name='veebar' codepoint='22BB'/>
+ <entity name='veeeq' codepoint='225A'/>
+ <entity name='vellip' codepoint='22EE'/>
+ <entity name='verbar' codepoint='007C'/>
+ <entity name='Verbar' codepoint='2016'/>
+ <entity name='vert' codepoint='007C'/>
+ <entity name='Vert' codepoint='2016'/>
+ <entity name='VerticalBar' codepoint='2223'/>
+ <entity name='VerticalLine' codepoint='007C'/>
+ <entity name='VerticalSeparator' codepoint='2758'/>
+ <entity name='VerticalTilde' codepoint='2240'/>
+ <entity name='VeryThinSpace' codepoint='200A'/>
+ <entity name='Vfr' codepoint='1D519'/>
+ <entity name='vfr' codepoint='1D533'/>
+ <entity name='vltri' codepoint='22B2'/>
+ <entity name='Vopf' codepoint='1D54D'/>
+ <entity name='vopf' codepoint='1D567'/>
+ <entity name='vprop' codepoint='221D'/>
+ <entity name='vrtri' codepoint='22B3'/>
+ <entity name='Vscr' codepoint='1D4B1'/>
+ <entity name='vscr' codepoint='1D4CB'/>
+ <entity name='Vvdash' codepoint='22AA'/>
+ <entity name='vzigzag' codepoint='299A'/>
+ <entity name='Wcirc' codepoint='0174'/>
+ <entity name='wcirc' codepoint='0175'/>
+ <entity name='wedbar' codepoint='2A5F'/>
+ <entity name='wedge' codepoint='2227'/>
+ <entity name='Wedge' codepoint='22C0'/>
+ <entity name='wedgeq' codepoint='2259'/>
+ <entity name='weierp' codepoint='2118'/>
+ <entity name='Wfr' codepoint='1D51A'/>
+ <entity name='wfr' codepoint='1D534'/>
+ <entity name='Wopf' codepoint='1D54E'/>
+ <entity name='wopf' codepoint='1D568'/>
+ <entity name='wp' codepoint='2118'/>
+ <entity name='wr' codepoint='2240'/>
+ <entity name='wreath' codepoint='2240'/>
+ <entity name='Wscr' codepoint='1D4B2'/>
+ <entity name='wscr' codepoint='1D4CC'/>
+ <entity name='xcap' codepoint='22C2'/>
+ <entity name='xcirc' codepoint='25EF'/>
+ <entity name='xcup' codepoint='22C3'/>
+ <entity name='xdtri' codepoint='25BD'/>
+ <entity name='Xfr' codepoint='1D51B'/>
+ <entity name='xfr' codepoint='1D535'/>
+ <entity name='Xgr' codepoint='039E'/>
+ <entity name='xgr' codepoint='03BE'/>
+ <entity name='xharr' codepoint='27F7'/>
+ <entity name='xhArr' codepoint='27FA'/>
+ <entity name='Xi' codepoint='039E'/>
+ <entity name='xi' codepoint='03BE'/>
+ <entity name='xlarr' codepoint='27F5'/>
+ <entity name='xlArr' codepoint='27F8'/>
+ <entity name='xmap' codepoint='27FC'/>
+ <entity name='xnis' codepoint='22FB'/>
+ <entity name='xodot' codepoint='2A00'/>
+ <entity name='Xopf' codepoint='1D54F'/>
+ <entity name='xopf' codepoint='1D569'/>
+ <entity name='xoplus' codepoint='2A01'/>
+ <entity name='xotime' codepoint='2A02'/>
+ <entity name='xrarr' codepoint='27F6'/>
+ <entity name='xrArr' codepoint='27F9'/>
+ <entity name='Xscr' codepoint='1D4B3'/>
+ <entity name='xscr' codepoint='1D4CD'/>
+ <entity name='xsqcup' codepoint='2A06'/>
+ <entity name='xuplus' codepoint='2A04'/>
+ <entity name='xutri' codepoint='25B3'/>
+ <entity name='xvee' codepoint='22C1'/>
+ <entity name='xwedge' codepoint='22C0'/>
+ <entity name='Yacute' codepoint='00DD'/>
+ <entity name='yacute' codepoint='00FD'/>
+ <entity name='YAcy' codepoint='042F'/>
+ <entity name='yacy' codepoint='044F'/>
+ <entity name='Ycirc' codepoint='0176'/>
+ <entity name='ycirc' codepoint='0177'/>
+ <entity name='Ycy' codepoint='042B'/>
+ <entity name='ycy' codepoint='044B'/>
+ <entity name='yen' codepoint='00A5'/>
+ <entity name='Yfr' codepoint='1D51C'/>
+ <entity name='yfr' codepoint='1D536'/>
+ <entity name='YIcy' codepoint='0407'/>
+ <entity name='yicy' codepoint='0457'/>
+ <entity name='Yopf' codepoint='1D550'/>
+ <entity name='yopf' codepoint='1D56A'/>
+ <entity name='Yscr' codepoint='1D4B4'/>
+ <entity name='yscr' codepoint='1D4CE'/>
+ <entity name='YUcy' codepoint='042E'/>
+ <entity name='yucy' codepoint='044E'/>
+ <entity name='yuml' codepoint='00FF'/>
+ <entity name='Yuml' codepoint='0178'/>
+ <entity name='Zacute' codepoint='0179'/>
+ <entity name='zacute' codepoint='017A'/>
+ <entity name='Zcaron' codepoint='017D'/>
+ <entity name='zcaron' codepoint='017E'/>
+ <entity name='Zcy' codepoint='0417'/>
+ <entity name='zcy' codepoint='0437'/>
+ <entity name='Zdot' codepoint='017B'/>
+ <entity name='zdot' codepoint='017C'/>
+ <entity name='zeetrf' codepoint='2128'/>
+ <entity name='ZeroWidthSpace' codepoint='200B'/>
+ <entity name='Zeta' codepoint='0396'/>
+ <entity name='zeta' codepoint='03B6'/>
+ <entity name='Zfr' codepoint='2128'/>
+ <entity name='zfr' codepoint='1D537'/>
+ <entity name='Zgr' codepoint='0396'/>
+ <entity name='zgr' codepoint='03B6'/>
+ <entity name='ZHcy' codepoint='0416'/>
+ <entity name='zhcy' codepoint='0436'/>
+ <entity name='zigrarr' codepoint='21DD'/>
+ <entity name='Zopf' codepoint='2124'/>
+ <entity name='zopf' codepoint='1D56B'/>
+ <entity name='Zscr' codepoint='1D4B5'/>
+ <entity name='zscr' codepoint='1D4CF'/>
+ <entity name='zwj' codepoint='200D'/>
+ <entity name='zwnj' codepoint='200C'/>
+
+ <group id='M_AREA'/>
+ <group id='M_BLOCK'/>
+ <group id='M_BLOCKINLINE'/>
+ <group id='M_BODY'/>
+ <group id='M_CELL'/>
+ <group id='M_COL'/>
+ <group id='M_DEF'/>
+ <group id='M_FORM'/>
+ <group id='M_FRAME'/>
+ <group id='M_HEAD'/>
+ <group id='M_HTML'/>
+ <group id='M_INLINE'/>
+ <group id='M_LEGEND'/>
+ <group id='M_LI'/>
+ <group id='M_NOLINK'/>
+ <group id='M_OPTION'/>
+ <group id='M_OPTIONS'/>
+ <group id='M_P'/>
+ <group id='M_PARAM'/>
+ <group id='M_TABLE'/>
+ <group id='M_TABULAR'/>
+ <group id='M_TR'/>
+ <element name='html' type='element'>
+ <isRoot/>
+ <contains group='M_HTML'/>
+ <element name='body' type='mixed' text-parent='true'>
+ <memberOf group='M_HTML'/>
+ <memberOf group='M_BODY'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <element name='a' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <contains group='M_NOLINK'/>
+ <attribute name='hreflang' type='NMTOKEN' />
+ <attribute name='shape' default='rect'/>
+ <attribute name='tabindex' type='NMTOKEN' />
+ </element>
+ <element name='abbr' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='acronym' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='address' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_P'/>
+ </element>
+ <element name='applet' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_PARAM'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ <element name='b' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='basefont' type='empty'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ </element>
+ <element name='bdo' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='big' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='blink' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='blockquote' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ </element>
+ <element name='br' type='empty'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <attribute name='clear' default='none'/>
+ </element>
+ <element name='canvas' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='center' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ </element>
+ <element name='cite' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='code' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='comment' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='del' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_BLOCKINLINE'/>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ </element>
+ <element name='dfn' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='dir' type='element'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_LI'/>
+ <attribute name='compact' type='BOOLEAN' />
+ </element>
+ <element name='div' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ <element name='dl' type='element'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_DEF'/>
+ <attribute name='compact' type='BOOLEAN' />
+ <element name='dd' type='mixed'>
+ <memberOf group='M_DEF'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ </element>
+ <element name='dt' type='mixed'>
+ <memberOf group='M_DEF'/>
+ <contains group='M_INLINE'/>
+ </element>
+ </element>
+ <element name='em' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='font' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='form' closeMode='unclosable' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <memberOf group='M_FORM'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_NOLINK'/>
+ <contains group='M_BLOCK'/>
+ <contains group='M_TR'/>
+ <contains group='M_CELL'/>
+ <attribute name='enctype' default='application/x-www-form-urlencoded'/>
+ <attribute name='method' default='get'/>
+ <element name='button' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <attribute name='disabled' type='BOOLEAN' />
+ <attribute name='tabindex' type='NMTOKEN' />
+ <attribute name='type' default='submit'/>
+ </element>
+ <element name='fieldset' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_LEGEND'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <element name='legend' type='mixed'>
+ <memberOf group='M_LEGEND'/>
+ <contains group='M_INLINE'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ </element>
+ <element name='input' type='empty'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='checked' type='BOOLEAN' />
+ <attribute name='disabled' type='BOOLEAN' />
+ <attribute name='ismap' type='BOOLEAN' />
+ <attribute name='maxlength' type='NMTOKEN' />
+ <attribute name='readonly' type='BOOLEAN' />
+ <attribute name='tabindex' type='NMTOKEN' />
+ <attribute name='type' default='text'/>
+ </element>
+ <element name='label' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='for' type='IDREF' />
+ </element>
+ <element name='select' type='element'>
+ <memberOf group='M_INLINE'/>
+ <contains group='M_OPTIONS'/>
+ <attribute name='disabled' type='BOOLEAN' />
+ <attribute name='multiple' type='BOOLEAN' />
+ <attribute name='size' type='NMTOKEN' />
+ <attribute name='tabindex' type='NMTOKEN' />
+ <element name='optgroup' type='element'>
+ <memberOf group='M_OPTIONS'/>
+ <contains group='M_OPTIONS'/>
+ <attribute name='disabled' type='BOOLEAN' />
+ </element>
+ <element name='option' type='string'>
+ <memberOf group='M_OPTION'/>
+ <memberOf group='M_OPTIONS'/>
+ <attribute name='disabled' type='BOOLEAN' />
+ <attribute name='selected' type='BOOLEAN' />
+ </element>
+ </element>
+ <element name='textarea' type='string'>
+ <memberOf group='M_INLINE'/>
+ <attribute name='cols' type='NMTOKEN' />
+ <attribute name='disabled' type='BOOLEAN' />
+ <attribute name='readonly' type='BOOLEAN' />
+ <attribute name='rows' type='NMTOKEN' />
+ <attribute name='tabindex' type='NMTOKEN' />
+ </element>
+ </element>
+ <element name='h1' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ <element name='h2' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ <element name='h3' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ <element name='h4' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ <element name='h5' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ <element name='h6' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ <element name='hr' type='empty'>
+ <memberOf group='M_BLOCK'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='noshade' type='BOOLEAN' />
+ </element>
+ <element name='i' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='iframe' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='frameborder' default='1'/>
+ <attribute name='scrolling' default='auto'/>
+ </element>
+ <element name='img' type='empty'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='ismap' type='BOOLEAN' />
+ </element>
+ <element name='ins' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ </element>
+ <element name='kbd' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='map' type='element'>
+ <memberOf group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <contains group='M_AREA'/>
+ <element name='area' type='empty'>
+ <memberOf group='M_AREA'/>
+ <attribute name='nohref' type='BOOLEAN' />
+ <attribute name='shape' default='rect'/>
+ <attribute name='tabindex' type='NMTOKEN' />
+ </element>
+ </element>
+ <element name='menu' type='element'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_LI'/>
+ <attribute name='compact' type='BOOLEAN' />
+ </element>
+ <element name='marquee' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='width' type='NMTOKEN' />
+ </element>
+ <element name='nobr' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='wbr' type='empty'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ </element>
+ <element name='noscript' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ </element>
+ <element name='object' type='mixed'>
+ <memberOf group='M_HEAD'/>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_PARAM'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='declare' type='BOOLEAN' />
+ <attribute name='tabindex' type='NMTOKEN' />
+ <element name='param' type='empty'>
+ <memberOf group='M_PARAM'/>
+ <attribute name='valuetype' default='data'/>
+ </element>
+ </element>
+ <element name='ol' type='element'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_LI'/>
+ <attribute name='compact' type='BOOLEAN' />
+ <attribute name='start' type='NMTOKEN' />
+ </element>
+ <element name='p' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <memberOf group='M_P'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_TABLE'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ <element name='pre' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='width' type='NMTOKEN' />
+ </element>
+ <element name='listing' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='xmp' type='mixed'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='width' type='NMTOKEN' />
+ </element>
+ <element name='q' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='ruby' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='rbc' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='rtc' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='rb' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='rt' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ <attribute name='rbspan' default='1'/>
+ </element>
+ <element name='rp' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='s' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='samp' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='small' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='span' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='strike' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='strong' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='sub' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='sup' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='table' closeMode='unclosable' type='element'>
+ <memberOf group='M_BLOCK'/>
+ <memberOf group='M_TABLE'/>
+ <contains group='M_FORM'/>
+ <contains group='M_TABULAR'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='frame' type='NMTOKEN' />
+ <attribute name='rules' type='NMTOKEN' />
+ <element name='caption' type='mixed'>
+ <memberOf group='M_TABULAR'/>
+ <contains group='M_INLINE'/>
+ <attribute name='align' type='NMTOKEN' />
+ </element>
+ <element name='col' type='empty'>
+ <memberOf group='M_COL'/>
+ <memberOf group='M_TABULAR'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='span' default='1'/>
+ <attribute name='valign' type='NMTOKEN' />
+ </element>
+ <element name='colgroup' type='element'>
+ <memberOf group='M_TABULAR'/>
+ <contains group='M_COL'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='span' default='1'/>
+ <attribute name='valign' type='NMTOKEN' />
+ </element>
+ <element name='tbody' type='element'>
+ <memberOf group='M_TABULAR'/>
+ <contains group='M_TR'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='valign' type='NMTOKEN' />
+ <element name='tr' type='element'>
+ <memberOf group='M_TR'/>
+ <memberOf group='M_TABULAR'/>
+ <contains group='M_FORM'/>
+ <contains group='M_CELL'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='valign' type='NMTOKEN' />
+ <element name='td' type='mixed'>
+ <memberOf group='M_CELL'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='colspan' default='1'/>
+ <attribute name='headers' type='IDREFS' />
+ <attribute name='nowrap' type='BOOLEAN' />
+ <attribute name='rowspan' default='1'/>
+ <attribute name='scope' type='NMTOKEN' />
+ <attribute name='valign' type='NMTOKEN' />
+ </element>
+ <element name='th' type='mixed'>
+ <memberOf group='M_CELL'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='colspan' default='1'/>
+ <attribute name='headers' type='IDREFS' />
+ <attribute name='nowrap' type='BOOLEAN' />
+ <attribute name='rowspan' default='1'/>
+ <attribute name='scope' type='NMTOKEN' />
+ <attribute name='valign' type='NMTOKEN' />
+ </element>
+ </element>
+ </element>
+ <element name='tfoot' type='element'>
+ <memberOf group='M_TABULAR'/>
+ <contains group='M_TR'/>
+ <contains group='M_FORM'/>
+ <contains group='M_CELL'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='valign' type='NMTOKEN' />
+ </element>
+ <element name='thead' type='element'>
+ <memberOf group='M_TABULAR'/>
+ <contains group='M_TR'/>
+ <contains group='M_FORM'/>
+ <contains group='M_CELL'/>
+ <attribute name='align' type='NMTOKEN' />
+ <attribute name='valign' type='NMTOKEN' />
+ </element>
+ </element>
+ <element name='tt' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='u' closeMode='restartable' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='ul' type='element'>
+ <memberOf group='M_BLOCK'/>
+ <contains group='M_LI'/>
+ <attribute name='compact' type='BOOLEAN' />
+ <attribute name='type' type='NMTOKEN' />
+ <element name='li' type='mixed'>
+ <memberOf group='M_LI'/>
+ <contains group='M_INLINE'/>
+ <contains group='M_BLOCK'/>
+ <attribute name='value' type='NMTOKEN' />
+ </element>
+ </element>
+ <element name='var' type='mixed'>
+ <memberOf group='M_INLINE'/>
+ <memberOf group='M_NOLINK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ </element>
+ <element name='frameset' type='element'>
+ <memberOf group='M_FRAME'/>
+ <memberOf group='M_HTML'/>
+ <contains group='M_FRAME'/>
+ <element name='frame' type='empty'>
+ <memberOf group='M_FRAME'/>
+ <attribute name='frameborder' default='1'/>
+ <attribute name='noresize' type='BOOLEAN' />
+ <attribute name='scrolling' default='auto'/>
+ </element>
+ </element>
+ <element name='head' type='element'>
+ <memberOf group='M_HTML'/>
+ <contains group='M_HEAD'/>
+ <element name='base' type='empty'>
+ <memberOf group='M_HEAD'/>
+ </element>
+ <element name='isindex' type='empty'>
+ <memberOf group='M_HEAD'/>
+ </element>
+ <element name='link' type='empty'>
+ <memberOf group='M_HEAD'/>
+ <memberOf group='M_INLINE'/>
+ <attribute name='hreflang' type='NMTOKEN' />
+ </element>
+ <element name='meta' type='empty'>
+ <memberOf group='M_HEAD'/>
+ <attribute name='http-equiv' type='NMTOKEN' />
+ <attribute name='name' type='NMTOKEN' />
+ </element>
+ <element name='style' type='cdata'>
+ <memberOf group='M_HEAD'/>
+ <memberOf group='M_INLINE'/>
+ </element>
+ <element name='title' type='string'>
+ <memberOf group='M_HEAD'/>
+ </element>
+ <element name='bgsound' type='empty'>
+ <memberOf group='M_HEAD'/>
+ </element>
+ </element>
+ <element name='noframes' type='element'>
+ <memberOf group='M_BLOCK'/>
+ <memberOf group='M_HTML'/>
+ <memberOf group='M_FRAME'/>
+ <contains group='M_BODY'/>
+ <contains group='M_BLOCK'/>
+ <contains group='M_INLINE'/>
+ </element>
+ <element name='script' type='cdata'>
+ <memberOfAny/>
+ <attribute name='defer' type='BOOLEAN' />
+ </element>
+ </element>
+ <attribute name='class' type='NMTOKEN' />
+ <attribute name='dir' type='NMTOKEN' />
+ <attribute name='id' type='ID' />
+ <attribute name='lang' type='NMTOKEN' />
+</schema>
diff --git a/src/java/org/ccil/cowan/tagsoup/AttributesImpl.java b/src/java/org/ccil/cowan/tagsoup/AttributesImpl.java
new file mode 100644
index 0000000..86f76fc
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/AttributesImpl.java
@@ -0,0 +1,626 @@
+// XMLWriter.java - serialize an XML document.
+// Written by David Megginson, david@megginson.com
+// and placed by him into the public domain.
+// Extensively modified by John Cowan for TagSoup.
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup;
+import org.xml.sax.Attributes;
+
+
+/**
+ * Default implementation of the Attributes interface.
+ *
+ * <blockquote>
+ * <em>This module, both source code and documentation, is in the
+ * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
+ * See <a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+ * for further information.
+ * </blockquote>
+ *
+ * <p>This class provides a default implementation of the SAX2
+ * {@link org.xml.sax.Attributes Attributes} interface, with the
+ * addition of manipulators so that the list can be modified or
+ * reused.</p>
+ *
+ * <p>There are two typical uses of this class:</p>
+ *
+ * <ol>
+ * <li>to take a persistent snapshot of an Attributes object
+ * in a {@link org.xml.sax.ContentHandler#startElement startElement} event; or</li>
+ * <li>to construct or modify an Attributes object in a SAX2 driver or filter.</li>
+ * </ol>
+ *
+ * <p>This class replaces the now-deprecated SAX1 {@link
+ * org.xml.sax.helpers.AttributeListImpl AttributeListImpl}
+ * class; in addition to supporting the updated Attributes
+ * interface rather than the deprecated {@link org.xml.sax.AttributeList
+ * AttributeList} interface, it also includes a much more efficient
+ * implementation using a single array rather than a set of Vectors.</p>
+ *
+ * @since SAX 2.0
+ * @author David Megginson
+ * @version 2.0.1 (sax2r2)
+ */
+public class AttributesImpl implements Attributes
+{
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Constructors.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Construct a new, empty AttributesImpl object.
+ */
+ public AttributesImpl ()
+ {
+ length = 0;
+ data = null;
+ }
+
+
+ /**
+ * Copy an existing Attributes object.
+ *
+ * <p>This constructor is especially useful inside a
+ * {@link org.xml.sax.ContentHandler#startElement startElement} event.</p>
+ *
+ * @param atts The existing Attributes object.
+ */
+ public AttributesImpl (Attributes atts)
+ {
+ setAttributes(atts);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Implementation of org.xml.sax.Attributes.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Return the number of attributes in the list.
+ *
+ * @return The number of attributes in the list.
+ * @see org.xml.sax.Attributes#getLength
+ */
+ public int getLength ()
+ {
+ return length;
+ }
+
+
+ /**
+ * Return an attribute's Namespace URI.
+ *
+ * @param index The attribute's index (zero-based).
+ * @return The Namespace URI, the empty string if none is
+ * available, or null if the index is out of range.
+ * @see org.xml.sax.Attributes#getURI
+ */
+ public String getURI (int index)
+ {
+ if (index >= 0 && index < length) {
+ return data[index*5];
+ } else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Return an attribute's local name.
+ *
+ * @param index The attribute's index (zero-based).
+ * @return The attribute's local name, the empty string if
+ * none is available, or null if the index if out of range.
+ * @see org.xml.sax.Attributes#getLocalName
+ */
+ public String getLocalName (int index)
+ {
+ if (index >= 0 && index < length) {
+ return data[index*5+1];
+ } else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Return an attribute's qualified (prefixed) name.
+ *
+ * @param index The attribute's index (zero-based).
+ * @return The attribute's qualified name, the empty string if
+ * none is available, or null if the index is out of bounds.
+ * @see org.xml.sax.Attributes#getQName
+ */
+ public String getQName (int index)
+ {
+ if (index >= 0 && index < length) {
+ return data[index*5+2];
+ } else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Return an attribute's type by index.
+ *
+ * @param index The attribute's index (zero-based).
+ * @return The attribute's type, "CDATA" if the type is unknown, or null
+ * if the index is out of bounds.
+ * @see org.xml.sax.Attributes#getType(int)
+ */
+ public String getType (int index)
+ {
+ if (index >= 0 && index < length) {
+ return data[index*5+3];
+ } else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Return an attribute's value by index.
+ *
+ * @param index The attribute's index (zero-based).
+ * @return The attribute's value or null if the index is out of bounds.
+ * @see org.xml.sax.Attributes#getValue(int)
+ */
+ public String getValue (int index)
+ {
+ if (index >= 0 && index < length) {
+ return data[index*5+4];
+ } else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Look up an attribute's index by Namespace name.
+ *
+ * <p>In many cases, it will be more efficient to look up the name once and
+ * use the index query methods rather than using the name query methods
+ * repeatedly.</p>
+ *
+ * @param uri The attribute's Namespace URI, or the empty
+ * string if none is available.
+ * @param localName The attribute's local name.
+ * @return The attribute's index, or -1 if none matches.
+ * @see org.xml.sax.Attributes#getIndex(java.lang.String,java.lang.String)
+ */
+ public int getIndex (String uri, String localName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i].equals(uri) && data[i+1].equals(localName)) {
+ return i / 5;
+ }
+ }
+ return -1;
+ }
+
+
+ /**
+ * Look up an attribute's index by qualified (prefixed) name.
+ *
+ * @param qName The qualified name.
+ * @return The attribute's index, or -1 if none matches.
+ * @see org.xml.sax.Attributes#getIndex(java.lang.String)
+ */
+ public int getIndex (String qName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i+2].equals(qName)) {
+ return i / 5;
+ }
+ }
+ return -1;
+ }
+
+
+ /**
+ * Look up an attribute's type by Namespace-qualified name.
+ *
+ * @param uri The Namespace URI, or the empty string for a name
+ * with no explicit Namespace URI.
+ * @param localName The local name.
+ * @return The attribute's type, or null if there is no
+ * matching attribute.
+ * @see org.xml.sax.Attributes#getType(java.lang.String,java.lang.String)
+ */
+ public String getType (String uri, String localName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i].equals(uri) && data[i+1].equals(localName)) {
+ return data[i+3];
+ }
+ }
+ return null;
+ }
+
+
+ /**
+ * Look up an attribute's type by qualified (prefixed) name.
+ *
+ * @param qName The qualified name.
+ * @return The attribute's type, or null if there is no
+ * matching attribute.
+ * @see org.xml.sax.Attributes#getType(java.lang.String)
+ */
+ public String getType (String qName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i+2].equals(qName)) {
+ return data[i+3];
+ }
+ }
+ return null;
+ }
+
+
+ /**
+ * Look up an attribute's value by Namespace-qualified name.
+ *
+ * @param uri The Namespace URI, or the empty string for a name
+ * with no explicit Namespace URI.
+ * @param localName The local name.
+ * @return The attribute's value, or null if there is no
+ * matching attribute.
+ * @see org.xml.sax.Attributes#getValue(java.lang.String,java.lang.String)
+ */
+ public String getValue (String uri, String localName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i].equals(uri) && data[i+1].equals(localName)) {
+ return data[i+4];
+ }
+ }
+ return null;
+ }
+
+
+ /**
+ * Look up an attribute's value by qualified (prefixed) name.
+ *
+ * @param qName The qualified name.
+ * @return The attribute's value, or null if there is no
+ * matching attribute.
+ * @see org.xml.sax.Attributes#getValue(java.lang.String)
+ */
+ public String getValue (String qName)
+ {
+ int max = length * 5;
+ for (int i = 0; i < max; i += 5) {
+ if (data[i+2].equals(qName)) {
+ return data[i+4];
+ }
+ }
+ return null;
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Manipulators.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Clear the attribute list for reuse.
+ *
+ * <p>Note that little memory is freed by this call:
+ * the current array is kept so it can be
+ * reused.</p>
+ */
+ public void clear ()
+ {
+ if (data != null) {
+ for (int i = 0; i < (length * 5); i++)
+ data [i] = null;
+ }
+ length = 0;
+ }
+
+
+ /**
+ * Copy an entire Attributes object.
+ *
+ * <p>It may be more efficient to reuse an existing object
+ * rather than constantly allocating new ones.</p>
+ *
+ * @param atts The attributes to copy.
+ */
+ public void setAttributes (Attributes atts)
+ {
+ clear();
+ length = atts.getLength();
+ if (length > 0) {
+ data = new String[length*5];
+ for (int i = 0; i < length; i++) {
+ data[i*5] = atts.getURI(i);
+ data[i*5+1] = atts.getLocalName(i);
+ data[i*5+2] = atts.getQName(i);
+ data[i*5+3] = atts.getType(i);
+ data[i*5+4] = atts.getValue(i);
+ }
+ }
+ }
+
+
+ /**
+ * Add an attribute to the end of the list.
+ *
+ * <p>For the sake of speed, this method does no checking
+ * to see if the attribute is already in the list: that is
+ * the responsibility of the application.</p>
+ *
+ * @param uri The Namespace URI, or the empty string if
+ * none is available or Namespace processing is not
+ * being performed.
+ * @param localName The local name, or the empty string if
+ * Namespace processing is not being performed.
+ * @param qName The qualified (prefixed) name, or the empty string
+ * if qualified names are not available.
+ * @param type The attribute type as a string.
+ * @param value The attribute value.
+ */
+ public void addAttribute (String uri, String localName, String qName,
+ String type, String value)
+ {
+ ensureCapacity(length+1);
+ data[length*5] = uri;
+ data[length*5+1] = localName;
+ data[length*5+2] = qName;
+ data[length*5+3] = type;
+ data[length*5+4] = value;
+ length++;
+ }
+
+
+ /**
+ * Set an attribute in the list.
+ *
+ * <p>For the sake of speed, this method does no checking
+ * for name conflicts or well-formedness: such checks are the
+ * responsibility of the application.</p>
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param uri The Namespace URI, or the empty string if
+ * none is available or Namespace processing is not
+ * being performed.
+ * @param localName The local name, or the empty string if
+ * Namespace processing is not being performed.
+ * @param qName The qualified name, or the empty string
+ * if qualified names are not available.
+ * @param type The attribute type as a string.
+ * @param value The attribute value.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setAttribute (int index, String uri, String localName,
+ String qName, String type, String value)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5] = uri;
+ data[index*5+1] = localName;
+ data[index*5+2] = qName;
+ data[index*5+3] = type;
+ data[index*5+4] = value;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Remove an attribute from the list.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void removeAttribute (int index)
+ {
+ if (index >= 0 && index < length) {
+ if (index < length - 1) {
+ System.arraycopy(data, (index+1)*5, data, index*5,
+ (length-index-1)*5);
+ }
+ index = (length - 1) * 5;
+ data [index++] = null;
+ data [index++] = null;
+ data [index++] = null;
+ data [index++] = null;
+ data [index] = null;
+ length--;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Set the Namespace URI of a specific attribute.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param uri The attribute's Namespace URI, or the empty
+ * string for none.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setURI (int index, String uri)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5] = uri;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Set the local name of a specific attribute.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param localName The attribute's local name, or the empty
+ * string for none.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setLocalName (int index, String localName)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5+1] = localName;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Set the qualified name of a specific attribute.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param qName The attribute's qualified name, or the empty
+ * string for none.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setQName (int index, String qName)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5+2] = qName;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Set the type of a specific attribute.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param type The attribute's type.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setType (int index, String type)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5+3] = type;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+ /**
+ * Set the value of a specific attribute.
+ *
+ * @param index The index of the attribute (zero-based).
+ * @param value The attribute's value.
+ * @exception java.lang.ArrayIndexOutOfBoundsException When the
+ * supplied index does not point to an attribute
+ * in the list.
+ */
+ public void setValue (int index, String value)
+ {
+ if (index >= 0 && index < length) {
+ data[index*5+4] = value;
+ } else {
+ badIndex(index);
+ }
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Internal methods.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Ensure the internal array's capacity.
+ *
+ * @param n The minimum number of attributes that the array must
+ * be able to hold.
+ */
+ private void ensureCapacity (int n) {
+ if (n <= 0) {
+ return;
+ }
+ int max;
+ if (data == null || data.length == 0) {
+ max = 25;
+ }
+ else if (data.length >= n * 5) {
+ return;
+ }
+ else {
+ max = data.length;
+ }
+ while (max < n * 5) {
+ max *= 2;
+ }
+
+ String newData[] = new String[max];
+ if (length > 0) {
+ System.arraycopy(data, 0, newData, 0, length*5);
+ }
+ data = newData;
+ }
+
+
+ /**
+ * Report a bad array index in a manipulator.
+ *
+ * @param index The index to report.
+ * @exception java.lang.ArrayIndexOutOfBoundsException Always.
+ */
+ private void badIndex (int index)
+ throws ArrayIndexOutOfBoundsException
+ {
+ String msg =
+ "Attempt to modify attribute at illegal index: " + index;
+ throw new ArrayIndexOutOfBoundsException(msg);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Internal state.
+ ////////////////////////////////////////////////////////////////////
+
+ int length;
+ String data [];
+
+}
+
+// end of AttributesImpl.java
+
diff --git a/src/java/org/ccil/cowan/tagsoup/AutoDetector.java b/src/java/org/ccil/cowan/tagsoup/AutoDetector.java
new file mode 100644
index 0000000..eb85d6f
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/AutoDetector.java
@@ -0,0 +1,43 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Interface to objects that translate InputStreams to Readers by auto-detection
+
+package org.ccil.cowan.tagsoup;
+import java.io.Reader;
+import java.io.InputStream;
+
+/**
+Classes which accept an InputStream and provide a Reader which figures
+out the encoding of the InputStream and reads characters from it should
+conform to this interface.
+@see java.io.InputStream
+@see java.io.Reader
+*/
+
+public interface AutoDetector {
+
+ /**
+ Given an InputStream, return a suitable Reader that understands
+ the presumed character encoding of that InputStream.
+ If bytes are consumed from the InputStream in the process, they
+ <i>must</i> be pushed back onto the InputStream so that they can be
+ reinterpreted as characters.
+ @param i The InputStream
+ @return A Reader that reads from the InputStream
+ */
+
+ public Reader autoDetectingReader(InputStream i);
+
+ }
diff --git a/src/java/org/ccil/cowan/tagsoup/CommandLine.java b/src/java/org/ccil/cowan/tagsoup/CommandLine.java
new file mode 100644
index 0000000..dd0c022
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/CommandLine.java
@@ -0,0 +1,289 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// The TagSoup command line UI
+
+package org.ccil.cowan.tagsoup;
+import java.util.Hashtable;
+import java.util.Enumeration;
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+import org.xml.sax.*;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.ext.LexicalHandler;
+
+
+/**
+The stand-alone TagSoup program.
+**/
+public class CommandLine {
+
+ static Hashtable options = new Hashtable(); static {
+ options.put("--nocdata", Boolean.FALSE); // CDATA elements are normal
+ options.put("--files", Boolean.FALSE); // process arguments as separate files
+ options.put("--reuse", Boolean.FALSE); // reuse a single Parser
+ options.put("--nons", Boolean.FALSE); // no namespaces
+ options.put("--nobogons", Boolean.FALSE); // suppress unknown elements
+ options.put("--any", Boolean.FALSE); // unknowns have ANY content model
+ options.put("--emptybogons", Boolean.FALSE); // unknowns have EMPTY content model
+ options.put("--norootbogons", Boolean.FALSE); // unknowns can't be the root
+ options.put("--pyxin", Boolean.FALSE); // input is PYX
+ options.put("--lexical", Boolean.FALSE); // output comments
+ options.put("--pyx", Boolean.FALSE); // output is PYX
+ options.put("--html", Boolean.FALSE); // output is HTML
+ options.put("--method=", Boolean.FALSE); // output method
+ options.put("--doctype-public=", Boolean.FALSE); // override public id
+ options.put("--doctype-system=", Boolean.FALSE); // override system id
+ options.put("--output-encoding=", Boolean.FALSE); // output encoding
+ options.put("--omit-xml-declaration", Boolean.FALSE); // omit XML decl
+ options.put("--encoding=", Boolean.FALSE); // specify encoding
+ options.put("--help", Boolean.FALSE); // display help
+ options.put("--version", Boolean.FALSE); // display version
+ options.put("--nodefaults", Boolean.FALSE); // no default attrs
+ options.put("--nocolons", Boolean.FALSE); // colon to underscore
+ options.put("--norestart", Boolean.FALSE); // no restartable elements
+ options.put("--ignorable", Boolean.FALSE); // return ignorable whitespace
+ }
+
+ /**
+ Main method. Processes specified files or standard input.
+ **/
+
+ public static void main(String[] argv) throws IOException, SAXException {
+ int optind = getopts(options, argv);
+ if (hasOption(options, "--help")) {
+ doHelp();
+ return;
+ }
+ if (hasOption(options, "--version")) {
+ System.err.println("TagSoup version 1.2");
+ return;
+ }
+ if (argv.length == optind) {
+ process("", System.out);
+ }
+ else if (hasOption(options, "--files")) {
+ for (int i = optind; i < argv.length; i++) {
+ String src = argv[i];
+ String dst;
+ int j = src.lastIndexOf('.');
+ if (j == -1)
+ dst = src + ".xhtml";
+ else if (src.endsWith(".xhtml"))
+ dst = src + "_";
+ else
+ dst = src.substring(0, j) + ".xhtml";
+ System.err.println("src: " + src + " dst: " + dst);
+ OutputStream os = new FileOutputStream(dst);
+ process(src, os);
+ }
+ }
+ else {
+ for (int i = optind; i < argv.length; i++) {
+ System.err.println("src: " + argv[i]);
+ process(argv[i], System.out);
+ }
+ }
+ }
+
+ // Print the help message
+
+ private static void doHelp() {
+ System.err.print("usage: java -jar tagsoup-*.jar ");
+ System.err.print(" [ ");
+ boolean first = true;
+ for (Enumeration e = options.keys(); e.hasMoreElements(); ) {
+ if (!first) {
+ System.err.print("| ");
+ }
+ first = false;
+ String key = (String)(e.nextElement());
+ System.err.print(key);
+ if (key.endsWith("="))
+ System.err.print("?");
+ System.err.print(" ");
+ }
+ System.err.println("]*");
+ }
+
+ private static Parser theParser = null;
+ private static HTMLSchema theSchema = null;
+ private static String theOutputEncoding = null;
+
+ // Process one source onto an output stream.
+
+ private static void process(String src, OutputStream os)
+ throws IOException, SAXException {
+ XMLReader r;
+ if (hasOption(options, "--reuse")) {
+ if (theParser == null) theParser = new Parser();
+ r = theParser;
+ }
+ else {
+ r = new Parser();
+ }
+ theSchema = new HTMLSchema();
+ r.setProperty(Parser.schemaProperty, theSchema);
+
+ if (hasOption(options, "--nocdata")) {
+ r.setFeature(Parser.CDATAElementsFeature, false);
+ }
+
+ if (hasOption(options, "--nons") || hasOption(options, "--html")) {
+ r.setFeature(Parser.namespacesFeature, false);
+ }
+
+ if (hasOption(options, "--nobogons")) {
+ r.setFeature(Parser.ignoreBogonsFeature, true);
+ }
+
+ if (hasOption(options, "--any")) {
+ r.setFeature(Parser.bogonsEmptyFeature, false);
+ }
+ else if (hasOption(options, "--emptybogons")) {
+ r.setFeature(Parser.bogonsEmptyFeature, true);
+ }
+
+ if (hasOption(options, "--norootbogons")) {
+ r.setFeature(Parser.rootBogonsFeature, false);
+ }
+
+ if (hasOption(options, "--nodefaults")) {
+ r.setFeature(Parser.defaultAttributesFeature, false);
+ }
+ if (hasOption(options, "--nocolons")) {
+ r.setFeature(Parser.translateColonsFeature, true);
+ }
+
+ if (hasOption(options, "--norestart")) {
+ r.setFeature(Parser.restartElementsFeature, false);
+ }
+
+ if (hasOption(options, "--ignorable")) {
+ r.setFeature(Parser.ignorableWhitespaceFeature, true);
+ }
+
+ if (hasOption(options, "--pyxin")) {
+ r.setProperty(Parser.scannerProperty, new PYXScanner());
+ }
+
+ Writer w;
+ if (theOutputEncoding == null) {
+ w = new OutputStreamWriter(os);
+ }
+ else {
+ w = new OutputStreamWriter(os, theOutputEncoding);
+ }
+ ContentHandler h = chooseContentHandler(w);
+ r.setContentHandler(h);
+ if (hasOption(options, "--lexical") && h instanceof LexicalHandler) {
+ r.setProperty(Parser.lexicalHandlerProperty, h);
+ }
+ InputSource s = new InputSource();
+ if (src != "") {
+ s.setSystemId(src);
+ }
+ else {
+ s.setByteStream(System.in);
+ }
+ if (hasOption(options, "--encoding=")) {
+// System.out.println("%% Found --encoding");
+ String encoding = (String)options.get("--encoding=");
+ if (encoding != null) s.setEncoding(encoding);
+ }
+ r.parse(s);
+ }
+
+ // Pick a content handler to generate the desired format.
+
+ private static ContentHandler chooseContentHandler(Writer w) {
+ XMLWriter x;
+ if (hasOption(options, "--pyx")) {
+ return new PYXWriter(w);
+ }
+
+ x = new XMLWriter(w);
+ if (hasOption(options, "--html")) {
+ x.setOutputProperty(XMLWriter.METHOD, "html");
+ x.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes");
+ }
+ if (hasOption(options, "--method=")) {
+ String method = (String)options.get("--method=");
+ if (method != null) {
+ x.setOutputProperty(XMLWriter.METHOD, method);
+ }
+ }
+ if (hasOption(options, "--doctype-public=")) {
+ String doctype_public = (String)options.get("--doctype-public=");
+ if (doctype_public != null) {
+ x.setOutputProperty(XMLWriter.DOCTYPE_PUBLIC, doctype_public);
+ }
+ }
+ if (hasOption(options, "--doctype-system=")) {
+ String doctype_system = (String)options.get("--doctype-system=");
+ if (doctype_system != null) {
+ x.setOutputProperty(XMLWriter.DOCTYPE_SYSTEM, doctype_system);
+ }
+ }
+ if (hasOption(options, "--output-encoding=")) {
+ theOutputEncoding = (String)options.get("--output-encoding=");
+// System.err.println("%%%% Output encoding is " + theOutputEncoding);
+ if (theOutputEncoding != null) {
+ x.setOutputProperty(XMLWriter.ENCODING, theOutputEncoding);
+ }
+ }
+ if (hasOption(options, "--omit-xml-declaration")) {
+ x.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes");
+ }
+ x.setPrefix(theSchema.getURI(), "");
+ return x;
+ }
+
+ // Options processing
+
+ private static int getopts(Hashtable options, String[] argv) {
+ int optind;
+ for (optind = 0; optind < argv.length; optind++) {
+ String arg = argv[optind];
+ String value = null;
+ if (arg.charAt(0) != '-') break;
+ int eqsign = arg.indexOf('=');
+ if (eqsign != -1) {
+ value = arg.substring(eqsign + 1, arg.length());
+ arg = arg.substring(0, eqsign + 1);
+ }
+ if (options.containsKey(arg)) {
+ if (value == null) options.put(arg, Boolean.TRUE);
+ else options.put(arg, value);
+// System.out.println("%% Parsed [" + arg + "]=[" + value + "]");
+ }
+ else {
+ System.err.print("Unknown option ");
+ System.err.println(arg);
+ System.exit(1);
+ }
+ }
+ return optind;
+ }
+
+ // Return true if an option exists.
+
+ private static boolean hasOption(Hashtable options, String option) {
+ if (Boolean.getBoolean(option)) return true;
+ else if (options.get(option) != Boolean.FALSE) return true;
+ return false;
+ }
+
+ }
diff --git a/src/java/org/ccil/cowan/tagsoup/Element.java b/src/java/org/ccil/cowan/tagsoup/Element.java
new file mode 100644
index 0000000..01a9fa7
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/Element.java
@@ -0,0 +1,203 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup;
+
+/**
+The internal representation of an actual element (not an element type).
+An Element has an element type, attributes, and a successor Element
+for use in constructing stacks and queues of Elements.
+@see ElementType
+@see AttributesImpl
+*/
+public class Element {
+
+
+ private ElementType theType; // type of element
+ private AttributesImpl theAtts; // attributes of element
+ private Element theNext; // successor of element
+ private boolean preclosed; // this element has been preclosed
+
+ /**
+ Return an Element from a specified ElementType.
+ @param type The element type of the newly constructed element
+ @param defaultAttributes True if default attributes are wanted
+ */
+
+ public Element(ElementType type, boolean defaultAttributes) {
+ theType = type;
+ if (defaultAttributes) theAtts = new AttributesImpl(type.atts());
+ else theAtts = new AttributesImpl();
+ theNext = null;
+ preclosed = false;
+ }
+
+ /**
+ Return the element type.
+ @return The element type.
+ */
+
+ public ElementType type() { return theType; }
+
+ /**
+ Return the attributes as an AttributesImpl object.
+ Returning an AttributesImpl makes the attributes mutable.
+ @return The attributes
+ @see AttributesImpl
+ */
+ public AttributesImpl atts() { return theAtts; }
+
+ /**
+ Return the next element in an element stack or queue.
+ @return The next element
+ */
+
+ public Element next() { return theNext; }
+
+ /**
+ Change the next element in an element stack or queue.
+ @param next The new next element
+ */
+
+ public void setNext(Element next) { theNext = next; }
+
+ /**
+ Return the name of the element's type.
+ Convenience method.
+ @return The element type name
+ */
+
+ public String name() { return theType.name(); }
+
+ /**
+ Return the namespace name of the element's type.
+ Convenience method.
+ @return The element type namespace name
+ */
+
+ public String namespace() { return theType.namespace(); }
+
+ /**
+ Return the local name of the element's type.
+ Convenience method.
+ @return The element type local name
+ */
+
+ public String localName() { return theType.localName(); }
+
+ /**
+ Return the content model vector of the element's type.
+ Convenience method.
+ @return The content model vector
+ */
+
+ public int model() { return theType.model(); }
+
+ /**
+ Return the member-of vector of the element's type.
+ Convenience method.
+ @return The member-of vector
+ */
+
+ public int memberOf() { return theType.memberOf(); }
+
+ /**
+ Return the flags vector of the element's type.
+ Convenience method.
+ @return The flags vector
+ */
+
+ public int flags() { return theType.flags(); }
+
+ /**
+ Return the parent element type of the element's type.
+ Convenience method.
+ @return The parent element type
+ */
+
+ public ElementType parent() { return theType.parent(); }
+
+ /**
+ Return true if the type of this element can contain the type of
+ another element.
+ Convenience method.
+ @param other The other element
+ */
+
+ public boolean canContain(Element other) {
+ return theType.canContain(other.theType);
+ }
+
+
+ /**
+ Set an attribute and its value into this element.
+ @param name The attribute name (Qname)
+ @param type The attribute type
+ @param value The attribute value
+ */
+
+ public void setAttribute(String name, String type, String value) {
+ theType.setAttribute(theAtts, name, type, value);
+ }
+
+ /**
+ Make this element anonymous.
+ Remove any <tt>id</tt> or <tt>name</tt> attribute present
+ in the element's attributes.
+ */
+
+ public void anonymize() {
+ for (int i = theAtts.getLength() - 1; i >= 0; i--) {
+ if (theAtts.getType(i).equals("ID") ||
+ theAtts.getQName(i).equals("name")) {
+ theAtts.removeAttribute(i);
+ }
+ }
+ }
+
+ /**
+ Clean the attributes of this element.
+ Attributes with null name (the name was ill-formed)
+ or null value (the attribute was present in the element type but
+ not in this actual element) are removed.
+ */
+
+ public void clean() {
+ for (int i = theAtts.getLength() - 1; i >= 0; i--) {
+ String name = theAtts.getLocalName(i);
+ if (theAtts.getValue(i) == null || name == null ||
+ name.length() == 0) {
+ theAtts.removeAttribute(i);
+ continue;
+ }
+ }
+ }
+
+ /**
+ Force this element to preclosed status, meaning that an end-tag has
+ been seen but the element cannot yet be closed for structural reasons.
+ */
+
+ public void preclose() {
+ preclosed = true;
+ }
+
+ /**
+ Return true if this element has been preclosed.
+ */
+
+ public boolean isPreclosed() {
+ return preclosed;
+ }
+
+ }
diff --git a/src/java/org/ccil/cowan/tagsoup/ElementType.java b/src/java/org/ccil/cowan/tagsoup/ElementType.java
new file mode 100644
index 0000000..46ae883
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/ElementType.java
@@ -0,0 +1,276 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup;
+
+/**
+This class represents an element type in the schema.
+An element type has a name, a content model vector, a member-of vector,
+a flags vector, default attributes, and a schema to which it belongs.
+@see Schema
+*/
+
+public class ElementType {
+
+ private String theName; // element type name (Qname)
+ private String theNamespace; // element type namespace name
+ private String theLocalName; // element type local name
+ private int theModel; // bitmap: what the element contains
+ private int theMemberOf; // bitmap: what element is contained in
+ private int theFlags; // bitmap: element flags
+ private AttributesImpl theAtts; // default attributes
+ private ElementType theParent; // parent of this element type
+ private Schema theSchema; // schema to which this belongs
+
+ /**
+ Construct an ElementType:
+ but it's better to use Schema.element() instead.
+ The content model, member-of, and flags vectors are specified as ints.
+ @param name The element type name
+ @param model ORed-together bits representing the content models
+ allowed in the content of this element type
+ @param memberOf ORed-together bits representing the content models
+ to which this element type belongs
+ @param flags ORed-together bits representing the flags associated
+ with this element type
+ @param schema The schema with which this element type will be
+ associated
+ */
+
+ public ElementType(String name, int model, int memberOf, int flags, Schema schema) {
+ theName = name;
+ theModel = model;
+ theMemberOf = memberOf;
+ theFlags = flags;
+ theAtts = new AttributesImpl();
+ theSchema = schema;
+ theNamespace = namespace(name, false);
+ theLocalName = localName(name);
+ }
+
+ /**
+ Return a namespace name from a Qname.
+ The attribute flag tells us whether to return an empty namespace
+ name if there is no prefix, or use the schema default instead.
+ @param name The Qname
+ @param attribute True if name is an attribute name
+ @return The namespace name
+ **/
+ public String namespace(String name, boolean attribute) {
+ int colon = name.indexOf(':');
+ if (colon == -1) {
+ return attribute ? "" : theSchema.getURI();
+ }
+ String prefix = name.substring(0, colon);
+ if (prefix.equals("xml")) {
+ return "http://www.w3.org/XML/1998/namespace";
+ }
+ else {
+ return ("urn:x-prefix:" + prefix).intern();
+ }
+ }
+
+ /**
+ Return a local name from a Qname.
+ @param name The Qname
+ @return The local name
+ **/
+ public String localName(String name) {
+ int colon = name.indexOf(':');
+ if (colon == -1) {
+ return name;
+ }
+ else {
+ return name.substring(colon+1).intern();
+ }
+ }
+
+ /**
+ Returns the name of this element type.
+ @return The name of the element type
+ */
+
+ public String name() { return theName; }
+
+ /**
+ Returns the namespace name of this element type.
+ @return The namespace name of the element type
+ */
+
+ public String namespace() { return theNamespace; }
+
+ /**
+ Returns the local name of this element type.
+ @return The local name of the element type
+ */
+
+ public String localName() { return theLocalName; }
+
+ /**
+ Returns the content models of this element type.
+ @return The content models of this element type as a vector of bits
+ */
+
+ public int model() { return theModel; }
+
+ /**
+ Returns the content models to which this element type belongs.
+ @return The content models to which this element type belongs as a
+ vector of bits
+ */
+
+ public int memberOf() { return theMemberOf; }
+
+ /**
+ Returns the flags associated with this element type.
+ @return The flags associated with this element type as a vector of bits
+ */
+
+ public int flags() { return theFlags; }
+
+ /**
+ Returns the default attributes associated with this element type.
+ Attributes of type CDATA that don't have default values are
+ typically not included. Other attributes without default values
+ have an internal value of <tt>null</tt>.
+ The return value is an AttributesImpl to allow the caller to mutate
+ the attributes.
+ */
+
+ public AttributesImpl atts() {return theAtts;}
+
+ /**
+ Returns the parent element type of this element type.
+ @return The parent element type
+ */
+
+ public ElementType parent() {return theParent;}
+
+ /**
+ Returns the schema which this element type is associated with.
+ @return The schema
+ */
+
+ public Schema schema() {return theSchema;}
+
+
+ /**
+ Returns true if this element type can contain another element type.
+ That is, if any of the models in this element's model vector
+ match any of the models in the other element type's member-of
+ vector.
+ @param other The other element type
+ */
+
+ public boolean canContain(ElementType other) {
+ return (theModel & other.theMemberOf) != 0;
+ }
+
+
+ /**
+ Sets an attribute and its value into an AttributesImpl object.
+ Attempts to set a namespace declaration are ignored.
+ @param atts The AttributesImpl object
+ @param name The name (Qname) of the attribute
+ @param type The type of the attribute
+ @param value The value of the attribute
+ */
+
+ public void setAttribute(AttributesImpl atts, String name, String type, String value) {
+ if (name.equals("xmlns") || name.startsWith("xmlns:")) {
+ return;
+ }
+;
+ String namespace = namespace(name, true);
+ String localName = localName(name);
+ int i = atts.getIndex(name);
+ if (i == -1) {
+ name = name.intern();
+ if (type == null) type = "CDATA";
+ if (!type.equals("CDATA")) value = normalize(value);
+ atts.addAttribute(namespace, localName, name, type, value);
+ }
+ else {
+ if (type == null) type = atts.getType(i);
+ if (!type.equals("CDATA")) value=normalize(value);
+ atts.setAttribute(i, namespace, localName, name, type, value);
+ }
+ }
+
+ /**
+ Normalize an attribute value (ID-style).
+ CDATA-style attribute normalization is already done.
+ @param value The value to normalize
+ @return The normalized value
+ **/
+ public static String normalize(String value) {
+ if (value == null) return value;
+ value = value.trim();
+ if (value.indexOf(" ") == -1) return value;
+ boolean space = false;
+ int len = value.length();
+ StringBuffer b = new StringBuffer(len);
+ for (int i = 0; i < len; i++) {
+ char v = value.charAt(i);
+ if (v == ' ') {
+ if (!space) b.append(v);
+ space = true;
+ }
+ else {
+ b.append(v);
+ space = false;
+ }
+ }
+ return b.toString();
+ }
+
+ /**
+ Sets an attribute and its value into this element type.
+ @param name The name of the attribute
+ @param type The type of the attribute
+ @param value The value of the attribute
+ */
+
+ public void setAttribute(String name, String type, String value) {
+ setAttribute(theAtts, name, type, value);
+ }
+
+ /**
+ Sets the models of this element type.
+ @param model The content models of this element type as a vector of bits
+ */
+
+ public void setModel(int model) { theModel = model; }
+
+ /**
+ Sets the content models to which this element type belongs.
+ @param memberOf The content models to which this element type belongs as a vector of bits
+ */
+
+ public void setMemberOf(int memberOf) { theMemberOf = memberOf; }
+
+ /**
+ Sets the flags of this element type.
+ @param flags associated with this element type The flags as a vector of bits
+ */
+
+ public void setFlags(int flags) { theFlags = flags; }
+
+ /**
+ Sets the parent element type of this element type.
+ @param parent The parent element type
+ */
+
+ public void setParent(ElementType parent) { theParent = parent; }
+
+ }
diff --git a/src/java/org/ccil/cowan/tagsoup/PYXScanner.java b/src/java/org/ccil/cowan/tagsoup/PYXScanner.java
new file mode 100644
index 0000000..ebfba26
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/PYXScanner.java
@@ -0,0 +1,124 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// This file is part of TagSoup.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version. You may also distribute
+// and/or modify it under version 2.1 of the Academic Free License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// PYX Scanner
+
+package org.ccil.cowan.tagsoup;
+import java.io.*;
+import org.xml.sax.SAXException;
+
+/**
+A Scanner that accepts PYX format instead of HTML.
+Useful primarily for debugging.
+**/
+public class PYXScanner implements Scanner {
+
+ public void resetDocumentLocator(String publicid, String systemid) {
+ // Need this method for interface compatibility, but note
+ // that PyxScanner does not implement Locator.
+ }
+
+ public void scan(Reader r, ScanHandler h) throws IOException, SAXException {
+ BufferedReader br = new BufferedReader(r);
+ String s;
+ char[] buff = null;
+ boolean instag = false;
+ while ((s = br.readLine()) != null) {
+ int size = s.length();
+ if (buff == null || buff.length < size) {
+ buff = new char[size];
+ }
+ s.getChars(0, size, buff, 0);
+ switch (buff[0]) {
+ case '(':
+ if (instag) {
+ h.stagc(buff, 0, 0);
+ instag = false;
+ }
+ h.gi(buff, 1, size - 1);
+ instag = true;
+ break;
+ case ')':
+ if (instag) {
+ h.stagc(buff, 0, 0);
+ instag = false;
+ }
+ h.etag(buff, 1, size - 1);
+ break;
+ case '?':
+ if (instag) {
+ h.stagc(buff, 0, 0);
+ instag = false;
+ }
+ h.pi(buff, 1, size - 1);
+ break;
+ case 'A':
+ int sp = s.indexOf(' ');
+ h.aname(buff, 1, sp - 1);
+ h.aval(buff, sp + 1, size - sp - 1);
+ break;
+ case '-':
+ if (instag) {
+ h.stagc(buff, 0, 0);
+ instag = false;
+ }
+ if (s.equals("-\\n")) {
+ buff[0] = '\n';
+ h.pcdata(buff, 0, 1);
+ }
+ else {
+ // FIXME:
+ // Does not decode \t and \\ in input
+ h.pcdata(buff, 1, size - 1);
+ }
+ break;
+ case 'E':
+ if (instag) {
+ h.stagc(buff, 0, 0);
+ instag = false;
+ }
+ h.entity(buff, 1, size - 1);
+ break;
+ default:
+// System.err.print("Gotcha ");
+// System.err.print(s);
+// System.err.print('\n');
+ break;
+ }
+ }
+ h.eof(buff, 0, 0);
+ }
+
+ public void startCDATA() { }
+
+ public static void main(String[] argv) throws IOException, SAXException {
+ Scanner s = new PYXScanner();
+ Reader r = new InputStreamReader(System.in, "UTF-8");
+ Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8"));
+ s.scan(r, new PYXWriter(w));
+ }
+ }
diff --git a/src/java/org/ccil/cowan/tagsoup/PYXWriter.java b/src/java/org/ccil/cowan/tagsoup/PYXWriter.java
new file mode 100644
index 0000000..81917dd
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/PYXWriter.java
@@ -0,0 +1,217 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// PYX Writer
+// FIXME: does not do escapes in attribute values
+// FIXME: outputs entities as bare '&' character
+
+package org.ccil.cowan.tagsoup;
+import java.io.*;
+import org.xml.sax.*;
+import org.xml.sax.ext.LexicalHandler;
+
+/**
+A ContentHandler that generates PYX format instead of XML.
+Primarily useful for debugging.
+**/
+public class PYXWriter
+ implements ScanHandler, ContentHandler, LexicalHandler {
+
+ private PrintWriter theWriter; // where we write to
+ private static char[] dummy = new char[1];
+ private String attrName; // saved attribute name
+
+ // ScanHandler implementation
+
+ public void adup(char[] buff, int offset, int length) throws SAXException {
+ theWriter.println(attrName);
+ attrName = null;
+ }
+
+ public void aname(char[] buff, int offset, int length) throws SAXException {
+ theWriter.print('A');
+ theWriter.write(buff, offset, length);
+ theWriter.print(' ');
+ attrName = new String(buff, offset, length);
+ }
+
+ public void aval(char[] buff, int offset, int length) throws SAXException {
+ theWriter.write(buff, offset, length);
+ theWriter.println();
+ attrName = null;
+ }
+
+ public void cmnt(char [] buff, int offset, int length) throws SAXException {
+// theWriter.print('!');
+// theWriter.write(buff, offset, length);
+// theWriter.println();
+ }
+
+ public void entity(char[] buff, int offset, int length) throws SAXException { }
+
+ public int getEntity() { return 0; }
+
+ public void eof(char[] buff, int offset, int length) throws SAXException {
+ theWriter.close();
+ }
+
+ public void etag(char[] buff, int offset, int length) throws SAXException {
+ theWriter.print(')');
+ theWriter.write(buff, offset, length);
+ theWriter.println();
+ }
+
+ public void decl(char[] buff, int offset, int length) throws SAXException {
+ }
+
+ public void gi(char[] buff, int offset, int length) throws SAXException {
+ theWriter.print('(');
+ theWriter.write(buff, offset, length);
+ theWriter.println();
+ }
+
+ public void cdsect(char[] buff, int offset, int length) throws SAXException {
+ pcdata(buff, offset, length);
+ }
+
+ public void pcdata(char[] buff, int offset, int length) throws SAXException {
+ if (length == 0) return; // nothing to do
+ boolean inProgress = false;
+ length += offset;
+ for (int i = offset; i < length; i++) {
+ if (buff[i] == '\n') {
+ if (inProgress) {
+ theWriter.println();
+ }
+ theWriter.println("-\\n");
+ inProgress = false;
+ }
+ else {
+ if (!inProgress) {
+ theWriter.print('-');
+ }
+ switch(buff[i]) {
+ case '\t':
+ theWriter.print("\\t");
+ break;
+ case '\\':
+ theWriter.print("\\\\");
+ break;
+ default:
+ theWriter.print(buff[i]);
+ }
+ inProgress = true;
+ }
+ }
+ if (inProgress) {
+ theWriter.println();
+ }
+ }
+
+ public void pitarget(char[] buff, int offset, int length) throws SAXException {
+ theWriter.print('?');
+ theWriter.write(buff, offset, length);
+ theWriter.write(' ');
+ }
+
+ public void pi(char[] buff, int offset, int length) throws SAXException {
+ theWriter.write(buff, offset, length);
+ theWriter.println();
+ }
+
+ public void stagc(char[] buff, int offset, int length) throws SAXException {
+// theWriter.println("!"); // FIXME
+ }
+
+ public void stage(char[] buff, int offset, int length) throws SAXException {
+ theWriter.println("!"); // FIXME
+ }
+
+ // SAX ContentHandler implementation
+
+ public void characters(char[] buff, int offset, int length) throws SAXException {
+ pcdata(buff, offset, length);
+ }
+
+ public void endDocument() throws SAXException {
+ theWriter.close();
+ }
+
+ public void endElement(String uri, String localname, String qname) throws SAXException {
+ if (qname.length() == 0) qname = localname;
+ theWriter.print(')');
+ theWriter.println(qname);
+ }
+
+ public void endPrefixMapping(String prefix) throws SAXException { }
+
+ public void ignorableWhitespace(char[] buff, int offset, int length) throws SAXException {
+ characters(buff, offset, length);
+ }
+
+ public void processingInstruction(String target, String data) throws SAXException {
+ theWriter.print('?');
+ theWriter.print(target);
+ theWriter.print(' ');
+ theWriter.println(data);
+ }
+
+ public void setDocumentLocator(Locator locator) { }
+
+ public void skippedEntity(String name) throws SAXException { }
+
+ public void startDocument() throws SAXException { }
+
+ public void startElement(String uri, String localname, String qname,
+ Attributes atts) throws SAXException {
+ if (qname.length() == 0) qname=localname;
+ theWriter.print('(');
+ theWriter.println(qname);
+ int length = atts.getLength();
+ for (int i = 0; i < length; i++) {
+ qname = atts.getQName(i);
+ if (qname.length() == 0) qname = atts.getLocalName(i);
+ theWriter.print('A');
+// theWriter.print(atts.getType(i)); // DEBUG
+ theWriter.print(qname);
+ theWriter.print(' ');
+ theWriter.println(atts.getValue(i));
+ }
+ }
+
+ public void startPrefixMapping(String prefix, String uri) throws SAXException { }
+
+ // Default LexicalHandler implementation
+
+ public void comment(char[] ch, int start, int length) throws SAXException {
+ cmnt(ch, start, length);
+ }
+ public void endCDATA() throws SAXException { }
+ public void endDTD() throws SAXException { }
+ public void endEntity(String name) throws SAXException { }
+ public void startCDATA() throws SAXException { }
+ public void startDTD(String name, String publicId, String systemId) throws SAXException { }
+ public void startEntity(String name) throws SAXException { }
+
+ // Constructor
+
+ public PYXWriter(Writer w) {
+ if (w instanceof PrintWriter) {
+ theWriter = (PrintWriter)w;
+ }
+ else {
+ theWriter = new PrintWriter(w);
+ }
+ }
+ }
diff --git a/src/java/org/ccil/cowan/tagsoup/Parser.java b/src/java/org/ccil/cowan/tagsoup/Parser.java
new file mode 100644
index 0000000..0997f23
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/Parser.java
@@ -0,0 +1,1114 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// The TagSoup parser
+
+package org.ccil.cowan.tagsoup;
+import java.util.HashMap;
+import java.util.ArrayList;
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+import org.xml.sax.*;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.ext.LexicalHandler;
+
+
+/**
+The SAX parser class.
+**/
+public class Parser extends DefaultHandler implements ScanHandler, XMLReader, LexicalHandler {
+
+ // XMLReader implementation
+
+ private ContentHandler theContentHandler = this;
+ private LexicalHandler theLexicalHandler = this;
+ private DTDHandler theDTDHandler = this;
+ private ErrorHandler theErrorHandler = this;
+ private EntityResolver theEntityResolver = this;
+ private Schema theSchema;
+ private Scanner theScanner;
+ private AutoDetector theAutoDetector;
+
+ // Default values for feature flags
+
+ private static boolean DEFAULT_NAMESPACES = true;
+ private static boolean DEFAULT_IGNORE_BOGONS = false;
+ private static boolean DEFAULT_BOGONS_EMPTY = false;
+ private static boolean DEFAULT_ROOT_BOGONS = true;
+ private static boolean DEFAULT_DEFAULT_ATTRIBUTES = true;
+ private static boolean DEFAULT_TRANSLATE_COLONS = false;
+ private static boolean DEFAULT_RESTART_ELEMENTS = true;
+ private static boolean DEFAULT_IGNORABLE_WHITESPACE = false;
+ private static boolean DEFAULT_CDATA_ELEMENTS = true;
+
+ // Feature flags.
+
+ private boolean namespaces = DEFAULT_NAMESPACES;
+ private boolean ignoreBogons = DEFAULT_IGNORE_BOGONS;
+ private boolean bogonsEmpty = DEFAULT_BOGONS_EMPTY;
+ private boolean rootBogons = DEFAULT_ROOT_BOGONS;
+ private boolean defaultAttributes = DEFAULT_DEFAULT_ATTRIBUTES;
+ private boolean translateColons = DEFAULT_TRANSLATE_COLONS;
+ private boolean restartElements = DEFAULT_RESTART_ELEMENTS;
+ private boolean ignorableWhitespace = DEFAULT_IGNORABLE_WHITESPACE;
+ private boolean CDATAElements = DEFAULT_CDATA_ELEMENTS;
+
+ /**
+ A value of "true" indicates namespace URIs and unprefixed local
+ names for element and attribute names will be available.
+ **/
+ public final static String namespacesFeature =
+ "http://xml.org/sax/features/namespaces";
+
+ /**
+ A value of "true" indicates that XML qualified names (with prefixes)
+ and attributes (including xmlns* attributes) will be available.
+ We don't support this value.
+ **/
+ public final static String namespacePrefixesFeature =
+ "http://xml.org/sax/features/namespace-prefixes";
+
+ /**
+ Reports whether this parser processes external general entities
+ (it doesn't).
+ **/
+ public final static String externalGeneralEntitiesFeature =
+ "http://xml.org/sax/features/external-general-entities";
+
+ /**
+ Reports whether this parser processes external parameter entities
+ (it doesn't).
+ **/
+ public final static String externalParameterEntitiesFeature =
+ "http://xml.org/sax/features/external-parameter-entities";
+
+ /**
+ May be examined only during a parse, after the startDocument()
+ callback has been completed; read-only. The value is true if
+ the document specified standalone="yes" in its XML declaration,
+ and otherwise is false. (It's always false.)
+ **/
+ public final static String isStandaloneFeature =
+ "http://xml.org/sax/features/is-standalone";
+
+ /**
+ A value of "true" indicates that the LexicalHandler will report
+ the beginning and end of parameter entities (it won't).
+ **/
+ public final static String lexicalHandlerParameterEntitiesFeature =
+ "http://xml.org/sax/features/lexical-handler/parameter-entities";
+
+ /**
+ A value of "true" indicates that system IDs in declarations will
+ be absolutized (relative to their base URIs) before reporting.
+ (This returns true but doesn't actually do anything.)
+ **/
+ public final static String resolveDTDURIsFeature =
+ "http://xml.org/sax/features/resolve-dtd-uris";
+
+ /**
+ Has a value of "true" if all XML names (for elements,
+ prefixes, attributes, entities, notations, and local
+ names), as well as Namespace URIs, will have been interned
+ using java.lang.String.intern. This supports fast testing of
+ equality/inequality against string constants, rather than forcing
+ slower calls to String.equals(). (We always intern.)
+ **/
+ public final static String stringInterningFeature =
+ "http://xml.org/sax/features/string-interning";
+
+ /**
+ Returns "true" if the Attributes objects passed by this
+ parser in ContentHandler.startElement() implement the
+ org.xml.sax.ext.Attributes2 interface. (They don't.)
+ **/
+
+ public final static String useAttributes2Feature =
+ "http://xml.org/sax/features/use-attributes2";
+
+ /**
+ Returns "true" if the Locator objects passed by this parser
+ in ContentHandler.setDocumentLocator() implement the
+ org.xml.sax.ext.Locator2 interface. (They don't.)
+ **/
+ public final static String useLocator2Feature =
+ "http://xml.org/sax/features/use-locator2";
+
+ /**
+ Returns "true" if, when setEntityResolver is given an object
+ implementing the org.xml.sax.ext.EntityResolver2 interface,
+ those new methods will be used. (They won't be.)
+ **/
+ public final static String useEntityResolver2Feature =
+ "http://xml.org/sax/features/use-entity-resolver2";
+
+ /**
+ Controls whether the parser is reporting all validity errors
+ (We don't report any validity errors.)
+ **/
+ public final static String validationFeature =
+ "http://xml.org/sax/features/validation";
+
+ /**
+ Controls whether the parser reports Unicode normalization
+ errors as described in section 2.13 and Appendix B of the XML
+ 1.1 Recommendation. (We don't normalize.)
+ **/
+ public final static String unicodeNormalizationCheckingFeature =
+"http://xml.org/sax/features/unicode-normalization-checking";
+
+ /**
+ Controls whether, when the namespace-prefixes feature is set,
+ the parser treats namespace declaration attributes as being in
+ the http://www.w3.org/2000/xmlns/ namespace. (It doesn't.)
+ **/
+ public final static String xmlnsURIsFeature =
+ "http://xml.org/sax/features/xmlns-uris";
+
+ /**
+ Returns "true" if the parser supports both XML 1.1 and XML 1.0.
+ (Always false.)
+ **/
+ public final static String XML11Feature =
+ "http://xml.org/sax/features/xml-1.1";
+
+ /**
+ A value of "true" indicates that the parser will ignore
+ unknown elements.
+ **/
+ public final static String ignoreBogonsFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons";
+
+ /**
+ A value of "true" indicates that the parser will give unknown
+ elements a content model of EMPTY; a value of "false", a
+ content model of ANY.
+ **/
+ public final static String bogonsEmptyFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/bogons-empty";
+
+ /**
+ A value of "true" indicates that the parser will allow unknown
+ elements to be the root element.
+ **/
+ public final static String rootBogonsFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/root-bogons";
+
+ /**
+ A value of "true" indicates that the parser will return default
+ attribute values for missing attributes that have default values.
+ **/
+ public final static String defaultAttributesFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/default-attributes";
+
+ /**
+ A value of "true" indicates that the parser will
+ translate colons into underscores in names.
+ **/
+ public final static String translateColonsFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/translate-colons";
+
+ /**
+ A value of "true" indicates that the parser will
+ attempt to restart the restartable elements.
+ **/
+ public final static String restartElementsFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/restart-elements";
+
+ /**
+ A value of "true" indicates that the parser will
+ transmit whitespace in element-only content via the SAX
+ ignorableWhitespace callback. Normally this is not done,
+ because HTML is an SGML application and SGML suppresses
+ such whitespace.
+ **/
+ public final static String ignorableWhitespaceFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace";
+
+ /**
+ A value of "true" indicates that the parser will treat CDATA
+ elements specially. Normally true, since the input is by
+ default HTML.
+ **/
+ public final static String CDATAElementsFeature =
+ "http://www.ccil.org/~cowan/tagsoup/features/cdata-elements";
+
+ /**
+ Used to see some syntax events that are essential in some
+ applications: comments, CDATA delimiters, selected general
+ entity inclusions, and the start and end of the DTD (and
+ declaration of document element name). The Object must implement
+ org.xml.sax.ext.LexicalHandler.
+ **/
+ public final static String lexicalHandlerProperty =
+ "http://xml.org/sax/properties/lexical-handler";
+
+ /**
+ Specifies the Scanner object this Parser uses.
+ **/
+ public final static String scannerProperty =
+ "http://www.ccil.org/~cowan/tagsoup/properties/scanner";
+
+ /**
+ Specifies the Schema object this Parser uses.
+ **/
+ public final static String schemaProperty =
+ "http://www.ccil.org/~cowan/tagsoup/properties/schema";
+
+ /**
+ Specifies the AutoDetector (for encoding detection) this Parser uses.
+ **/
+ public final static String autoDetectorProperty =
+ "http://www.ccil.org/~cowan/tagsoup/properties/auto-detector";
+
+ // Due to sucky Java order of initialization issues, these
+ // entries are maintained separately from the initial values of
+ // the corresponding instance variables, but care must be taken
+ // to keep them in sync.
+
+ private HashMap theFeatures = new HashMap();
+ {
+ theFeatures.put(namespacesFeature, truthValue(DEFAULT_NAMESPACES));
+ theFeatures.put(namespacePrefixesFeature, Boolean.FALSE);
+ theFeatures.put(externalGeneralEntitiesFeature, Boolean.FALSE);
+ theFeatures.put(externalParameterEntitiesFeature, Boolean.FALSE);
+ theFeatures.put(isStandaloneFeature, Boolean.FALSE);
+ theFeatures.put(lexicalHandlerParameterEntitiesFeature,
+ Boolean.FALSE);
+ theFeatures.put(resolveDTDURIsFeature, Boolean.TRUE);
+ theFeatures.put(stringInterningFeature, Boolean.TRUE);
+ theFeatures.put(useAttributes2Feature, Boolean.FALSE);
+ theFeatures.put(useLocator2Feature, Boolean.FALSE);
+ theFeatures.put(useEntityResolver2Feature, Boolean.FALSE);
+ theFeatures.put(validationFeature, Boolean.FALSE);
+ theFeatures.put(xmlnsURIsFeature, Boolean.FALSE);
+ theFeatures.put(xmlnsURIsFeature, Boolean.FALSE);
+ theFeatures.put(XML11Feature, Boolean.FALSE);
+ theFeatures.put(ignoreBogonsFeature, truthValue(DEFAULT_IGNORE_BOGONS));
+ theFeatures.put(bogonsEmptyFeature, truthValue(DEFAULT_BOGONS_EMPTY));
+ theFeatures.put(rootBogonsFeature, truthValue(DEFAULT_ROOT_BOGONS));
+ theFeatures.put(defaultAttributesFeature, truthValue(DEFAULT_DEFAULT_ATTRIBUTES));
+ theFeatures.put(translateColonsFeature, truthValue(DEFAULT_TRANSLATE_COLONS));
+ theFeatures.put(restartElementsFeature, truthValue(DEFAULT_RESTART_ELEMENTS));
+ theFeatures.put(ignorableWhitespaceFeature, truthValue(DEFAULT_IGNORABLE_WHITESPACE));
+ theFeatures.put(CDATAElementsFeature, truthValue(DEFAULT_CDATA_ELEMENTS));
+ }
+
+ // Private clone of Boolean.valueOf that is guaranteed to return
+ // Boolean.TRUE or Boolean.FALSE
+ private static Boolean truthValue(boolean b) {
+ return b ? Boolean.TRUE : Boolean.FALSE;
+ }
+
+
+ public boolean getFeature (String name)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ Boolean b = (Boolean)theFeatures.get(name);
+ if (b == null) {
+ throw new SAXNotRecognizedException("Unknown feature " + name);
+ }
+ return b.booleanValue();
+ }
+
+ public void setFeature (String name, boolean value)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ Boolean b = (Boolean)theFeatures.get(name);
+ if (b == null) {
+ throw new SAXNotRecognizedException("Unknown feature " + name);
+ }
+ if (value) theFeatures.put(name, Boolean.TRUE);
+ else theFeatures.put(name, Boolean.FALSE);
+
+ if (name.equals(namespacesFeature)) namespaces = value;
+ else if (name.equals(ignoreBogonsFeature)) ignoreBogons = value;
+ else if (name.equals(bogonsEmptyFeature)) bogonsEmpty = value;
+ else if (name.equals(rootBogonsFeature)) rootBogons = value;
+ else if (name.equals(defaultAttributesFeature)) defaultAttributes = value;
+ else if (name.equals(translateColonsFeature)) translateColons = value;
+ else if (name.equals(restartElementsFeature)) restartElements = value;
+ else if (name.equals(ignorableWhitespaceFeature)) ignorableWhitespace = value;
+ else if (name.equals(CDATAElementsFeature)) CDATAElements = value;
+ }
+
+ public Object getProperty (String name)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ if (name.equals(lexicalHandlerProperty)) {
+ return theLexicalHandler == this ? null : theLexicalHandler;
+ }
+ else if (name.equals(scannerProperty)) {
+ return theScanner;
+ }
+ else if (name.equals(schemaProperty)) {
+ return theSchema;
+ }
+ else if (name.equals(autoDetectorProperty)) {
+ return theAutoDetector;
+ }
+ else {
+ throw new SAXNotRecognizedException("Unknown property " + name);
+ }
+ }
+
+ public void setProperty (String name, Object value)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ if (name.equals(lexicalHandlerProperty)) {
+ if (value == null) {
+ theLexicalHandler = this;
+ }
+ else if (value instanceof LexicalHandler) {
+ theLexicalHandler = (LexicalHandler)value;
+ }
+ else {
+ throw new SAXNotSupportedException("Your lexical handler is not a LexicalHandler");
+ }
+ }
+ else if (name.equals(scannerProperty)) {
+ if (value instanceof Scanner) {
+ theScanner = (Scanner)value;
+ }
+ else {
+ throw new SAXNotSupportedException("Your scanner is not a Scanner");
+ }
+ }
+ else if (name.equals(schemaProperty)) {
+ if (value instanceof Schema) {
+ theSchema = (Schema)value;
+ }
+ else {
+ throw new SAXNotSupportedException("Your schema is not a Schema");
+ }
+ }
+ else if (name.equals(autoDetectorProperty)) {
+ if (value instanceof AutoDetector) {
+ theAutoDetector = (AutoDetector)value;
+ }
+ else {
+ throw new SAXNotSupportedException("Your auto-detector is not an AutoDetector");
+ }
+ }
+ else {
+ throw new SAXNotRecognizedException("Unknown property " + name);
+ }
+ }
+
+ public void setEntityResolver (EntityResolver resolver) {
+ theEntityResolver = (resolver == null) ? this : resolver;
+ }
+
+ public EntityResolver getEntityResolver () {
+ return (theEntityResolver == this) ? null : theEntityResolver;
+ }
+
+ public void setDTDHandler (DTDHandler handler) {
+ theDTDHandler = (handler == null) ? this : handler;
+ }
+
+ public DTDHandler getDTDHandler () {
+ return (theDTDHandler == this) ? null : theDTDHandler;
+ }
+
+ public void setContentHandler (ContentHandler handler) {
+ theContentHandler = (handler == null) ? this : handler;
+ }
+
+ public ContentHandler getContentHandler () {
+ return (theContentHandler == this) ? null : theContentHandler;
+ }
+
+ public void setErrorHandler (ErrorHandler handler) {
+ theErrorHandler = (handler == null) ? this : handler;
+ }
+
+ public ErrorHandler getErrorHandler () {
+ return (theErrorHandler == this) ? null : theErrorHandler;
+ }
+
+ public void parse (InputSource input) throws IOException, SAXException {
+ setup();
+ Reader r = getReader(input);
+ theContentHandler.startDocument();
+ theScanner.resetDocumentLocator(input.getPublicId(), input.getSystemId());
+ if (theScanner instanceof Locator) {
+ theContentHandler.setDocumentLocator((Locator)theScanner);
+ }
+ if (!(theSchema.getURI().equals("")))
+ theContentHandler.startPrefixMapping(theSchema.getPrefix(),
+ theSchema.getURI());
+ theScanner.scan(r, this);
+ }
+
+ public void parse (String systemid) throws IOException, SAXException {
+ parse(new InputSource(systemid));
+ }
+
+ // Sets up instance variables that haven't been set by setFeature
+ private void setup() {
+ if (theSchema == null) theSchema = new HTMLSchema();
+ if (theScanner == null) theScanner = new HTMLScanner();
+ if (theAutoDetector == null) {
+ theAutoDetector = new AutoDetector() {
+ public Reader autoDetectingReader(InputStream i) {
+ return new InputStreamReader(i);
+ }
+ };
+ }
+ theStack = new Element(theSchema.getElementType("<root>"), defaultAttributes);
+ thePCDATA = new Element(theSchema.getElementType("<pcdata>"), defaultAttributes);
+ theNewElement = null;
+ theAttributeName = null;
+ thePITarget = null;
+ theSaved = null;
+ theEntity = 0;
+ virginStack = true;
+ theDoctypeName = theDoctypePublicId = theDoctypeSystemId = null;
+ }
+
+ // Return a Reader based on the contents of an InputSource
+ // Buffer both the InputStream and the Reader
+ private Reader getReader(InputSource s) throws SAXException, IOException {
+ Reader r = s.getCharacterStream();
+ InputStream i = s.getByteStream();
+ String encoding = s.getEncoding();
+ String publicid = s.getPublicId();
+ String systemid = s.getSystemId();
+ if (r == null) {
+ if (i == null) i = getInputStream(publicid, systemid);
+// i = new BufferedInputStream(i);
+ if (encoding == null) {
+ r = theAutoDetector.autoDetectingReader(i);
+ }
+ else {
+ try {
+ r = new InputStreamReader(i, encoding);
+ }
+ catch (UnsupportedEncodingException e) {
+ r = new InputStreamReader(i);
+ }
+ }
+ }
+// r = new BufferedReader(r);
+ return r;
+ }
+
+ // Get an InputStream based on a publicid and a systemid
+ private InputStream getInputStream(String publicid, String systemid) throws IOException, SAXException {
+ URL basis = new URL("file", "", System.getProperty("user.dir") + "/.");
+ URL url = new URL(basis, systemid);
+ URLConnection c = url.openConnection();
+ return c.getInputStream();
+ }
+ // We don't process publicids (who uses them anyhow?)
+
+ // ScanHandler implementation
+
+ private Element theNewElement = null;
+ private String theAttributeName = null;
+ private boolean theDoctypeIsPresent = false;
+ private String theDoctypePublicId = null;
+ private String theDoctypeSystemId = null;
+ private String theDoctypeName = null;
+ private String thePITarget = null;
+ private Element theStack = null;
+ private Element theSaved = null;
+ private Element thePCDATA = null;
+ private int theEntity = 0; // needs to support chars past U+FFFF
+
+ public void adup(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement == null || theAttributeName == null) return;
+ theNewElement.setAttribute(theAttributeName, null, theAttributeName);
+ theAttributeName = null;
+ }
+
+ public void aname(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement == null) return;
+ // Currently we don't rely on Schema to canonicalize
+ // attribute names.
+ theAttributeName = makeName(buff, offset, length).toLowerCase();
+// System.err.println("%% Attribute name " + theAttributeName);
+ }
+
+ public void aval(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement == null || theAttributeName == null) return;
+ String value = new String(buff, offset, length);
+// System.err.println("%% Attribute value [" + value + "]");
+ value = expandEntities(value);
+ theNewElement.setAttribute(theAttributeName, null, value);
+ theAttributeName = null;
+// System.err.println("%% Aval done");
+ }
+
+ // Expand entity references in attribute values selectively.
+ // Currently we expand a reference iff it is properly terminated
+ // with a semicolon.
+ private String expandEntities(String src) {
+ int refStart = -1;
+ int len = src.length();
+ char[] dst = new char[len];
+ int dstlen = 0;
+ for (int i = 0; i < len; i++) {
+ char ch = src.charAt(i);
+ dst[dstlen++] = ch;
+// System.err.print("i = " + i + ", d = " + dstlen + ", ch = [" + ch + "] ");
+ if (ch == '&' && refStart == -1) {
+ // start of a ref excluding &
+ refStart = dstlen;
+// System.err.println("start of ref");
+ }
+ else if (refStart == -1) {
+ // not in a ref
+// System.err.println("not in ref");
+ }
+ else if (Character.isLetter(ch) ||
+ Character.isDigit(ch) ||
+ ch == '#') {
+ // valid entity char
+// System.err.println("valid");
+ }
+ else if (ch == ';') {
+ // properly terminated ref
+// System.err.print("got [" + new String(dst, refStart, dstlen-refStart-1) + "]");
+ int ent = lookupEntity(dst, refStart, dstlen - refStart - 1);
+// System.err.println(" = " + ent);
+ if (ent > 0xFFFF) {
+ ent -= 0x10000;
+ dst[refStart - 1] = (char)((ent>>10) + 0xD800);
+ dst[refStart] = (char)((ent&0x3FF) + 0xDC00);
+ dstlen = refStart + 1;
+ }
+ else if (ent != 0) {
+ dst[refStart - 1] = (char)ent;
+ dstlen = refStart;
+ }
+ refStart = -1;
+ }
+ else {
+ // improperly terminated ref
+// System.err.println("end of ref");
+ refStart = -1;
+ }
+ }
+ return new String(dst, 0, dstlen);
+ }
+
+ public void entity(char[] buff, int offset, int length) throws SAXException {
+ theEntity = lookupEntity(buff, offset, length);
+ }
+
+ // Process numeric character references,
+ // deferring to the schema for named ones.
+ private int lookupEntity(char[] buff, int offset, int length) {
+ int result = 0;
+ if (length < 1) return result;
+// System.err.println("%% Entity at " + offset + " " + length);
+// System.err.println("%% Got entity [" + new String(buff, offset, length) + "]");
+ if (buff[offset] == '#') {
+ if (length > 1 && (buff[offset+1] == 'x'
+ || buff[offset+1] == 'X')) {
+ try {
+ return Integer.parseInt(new String(buff, offset + 2, length - 2), 16);
+ }
+ catch (NumberFormatException e) { return 0; }
+ }
+ try {
+ return Integer.parseInt(new String(buff, offset + 1, length - 1), 10);
+ }
+ catch (NumberFormatException e) { return 0; }
+ }
+ return theSchema.getEntity(new String(buff, offset, length));
+ }
+
+ public void eof(char[] buff, int offset, int length) throws SAXException {
+ if (virginStack) rectify(thePCDATA);
+ while (theStack.next() != null) {
+ pop();
+ }
+ if (!(theSchema.getURI().equals("")))
+ theContentHandler.endPrefixMapping(theSchema.getPrefix());
+ theContentHandler.endDocument();
+ }
+
+ public void etag(char[] buff, int offset, int length) throws SAXException {
+ if (etag_cdata(buff, offset, length)) return;
+ etag_basic(buff, offset, length);
+ }
+
+ private static char[] etagchars = {'<', '/', '>'};
+ public boolean etag_cdata(char[] buff, int offset, int length) throws SAXException {
+ String currentName = theStack.name();
+ // If this is a CDATA element and the tag doesn't match,
+ // or isn't properly formed (junk after the name),
+ // restart CDATA mode and process the tag as characters.
+ if (CDATAElements && (theStack.flags() & Schema.F_CDATA) != 0) {
+ boolean realTag = (length == currentName.length());
+ if (realTag) {
+ for (int i = 0; i < length; i++) {
+ if (Character.toLowerCase(buff[offset + i]) != Character.toLowerCase(currentName.charAt(i))) {
+ realTag = false;
+ break;
+ }
+ }
+ }
+ if (!realTag) {
+ theContentHandler.characters(etagchars, 0, 2);
+ theContentHandler.characters(buff, offset, length);
+ theContentHandler.characters(etagchars, 2, 1);
+ theScanner.startCDATA();
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public void etag_basic(char[] buff, int offset, int length) throws SAXException {
+ theNewElement = null;
+ String name;
+ if (length != 0) {
+ // Canonicalize case of name
+ name = makeName(buff, offset, length);
+// System.err.println("got etag [" + name + "]");
+ ElementType type = theSchema.getElementType(name);
+ if (type == null) return; // mysterious end-tag
+ name = type.name();
+ }
+ else {
+ name = theStack.name();
+ }
+// System.err.println("%% Got end of " + name);
+
+ Element sp;
+ boolean inNoforce = false;
+ for (sp = theStack; sp != null; sp = sp.next()) {
+ if (sp.name().equals(name)) break;
+ if ((sp.flags() & Schema.F_NOFORCE) != 0) inNoforce = true;
+ }
+
+ if (sp == null) return; // Ignore unknown etags
+ if (sp.next() == null || sp.next().next() == null) return;
+ if (inNoforce) { // inside an F_NOFORCE element?
+ sp.preclose(); // preclose the matching element
+ }
+ else { // restartably pop everything above us
+ while (theStack != sp) {
+ restartablyPop();
+ }
+ pop();
+ }
+ // pop any preclosed elements now at the top
+ while (theStack.isPreclosed()) {
+ pop();
+ }
+ restart(null);
+ }
+
+ // Push restartables on the stack if possible
+ // e is the next element to be started, if we know what it is
+ private void restart(Element e) throws SAXException {
+ while (theSaved != null && theStack.canContain(theSaved) &&
+ (e == null || theSaved.canContain(e))) {
+ Element next = theSaved.next();
+ push(theSaved);
+ theSaved = next;
+ }
+ }
+
+ // Pop the stack irrevocably
+ private void pop() throws SAXException {
+ if (theStack == null) return; // empty stack
+ String name = theStack.name();
+ String localName = theStack.localName();
+ String namespace = theStack.namespace();
+ String prefix = prefixOf(name);
+
+// System.err.println("%% Popping " + name);
+ if (!namespaces) namespace = localName = "";
+ theContentHandler.endElement(namespace, localName, name);
+ if (foreign(prefix, namespace)) {
+ theContentHandler.endPrefixMapping(prefix);
+// System.err.println("%% Unmapping [" + prefix + "] for elements to " + namespace);
+ }
+ Attributes atts = theStack.atts();
+ for (int i = atts.getLength() - 1; i >= 0; i--) {
+ String attNamespace = atts.getURI(i);
+ String attPrefix = prefixOf(atts.getQName(i));
+ if (foreign(attPrefix, attNamespace)) {
+ theContentHandler.endPrefixMapping(attPrefix);
+// System.err.println("%% Unmapping [" + attPrefix + "] for attributes to " + attNamespace);
+ }
+ }
+ theStack = theStack.next();
+ }
+
+ // Pop the stack restartably
+ private void restartablyPop() throws SAXException {
+ Element popped = theStack;
+ pop();
+ if (restartElements && (popped.flags() & Schema.F_RESTART) != 0) {
+ popped.anonymize();
+ popped.setNext(theSaved);
+ theSaved = popped;
+ }
+ }
+
+ // Push element onto stack
+ private boolean virginStack = true;
+ private void push(Element e) throws SAXException {
+ String name = e.name();
+ String localName = e.localName();
+ String namespace = e.namespace();
+ String prefix = prefixOf(name);
+
+// System.err.println("%% Pushing " + name);
+ e.clean();
+ if (!namespaces) namespace = localName = "";
+ if (virginStack && localName.equalsIgnoreCase(theDoctypeName)) {
+ try {
+ theEntityResolver.resolveEntity(theDoctypePublicId, theDoctypeSystemId);
+ } catch (IOException ew) { } // Can't be thrown for root I believe.
+ }
+ if (foreign(prefix, namespace)) {
+ theContentHandler.startPrefixMapping(prefix, namespace);
+// System.err.println("%% Mapping [" + prefix + "] for elements to " + namespace);
+ }
+ Attributes atts = e.atts();
+ int len = atts.getLength();
+ for (int i = 0; i < len; i++) {
+ String attNamespace = atts.getURI(i);
+ String attPrefix = prefixOf(atts.getQName(i));
+ if (foreign(attPrefix, attNamespace)) {
+ theContentHandler.startPrefixMapping(attPrefix, attNamespace);
+// System.err.println("%% Mapping [" + attPrefix + "] for attributes to " + attNamespace);
+ }
+ }
+ theContentHandler.startElement(namespace, localName, name, e.atts());
+ e.setNext(theStack);
+ theStack = e;
+ virginStack = false;
+ if (CDATAElements && (theStack.flags() & Schema.F_CDATA) != 0) {
+ theScanner.startCDATA();
+ }
+ }
+
+ // Get the prefix from a QName
+ private String prefixOf(String name) {
+ int i = name.indexOf(':');
+ String prefix = "";
+ if (i != -1) prefix = name.substring(0, i);
+// System.err.println("%% " + prefix + " is prefix of " + name);
+ return prefix;
+ }
+
+ // Return true if we have a foreign name
+ private boolean foreign(String prefix, String namespace) {
+// System.err.print("%% Testing " + prefix + " and " + namespace + " for foreignness -- ");
+ boolean foreign = !(prefix.equals("") || namespace.equals("") ||
+ namespace.equals(theSchema.getURI()));
+// System.err.println(foreign);
+ return foreign;
+ }
+
+ /**
+ * Parsing the complete XML Document Type Definition is way too complex,
+ * but for many simple cases we can extract something useful from it.
+ *
+ * doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
+ * DeclSep ::= PEReference | S
+ * intSubset ::= (markupdecl | DeclSep)*
+ * markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
+ * ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
+ */
+ public void decl(char[] buff, int offset, int length) throws SAXException {
+ String s = new String(buff, offset, length);
+ String name = null;
+ String systemid = null;
+ String publicid = null;
+ String[] v = split(s);
+ if (v.length > 0 && "DOCTYPE".equals(v[0])) {
+ if (theDoctypeIsPresent) return; // one doctype only!
+ theDoctypeIsPresent = true;
+ if (v.length > 1) {
+ name = v[1];
+ if (v.length>3 && "SYSTEM".equals(v[2])) {
+ systemid = v[3];
+ }
+ else if (v.length > 3 && "PUBLIC".equals(v[2])) {
+ publicid = v[3];
+ if (v.length > 4) {
+ systemid = v[4];
+ }
+ else {
+ systemid = "";
+ }
+ }
+ }
+ }
+ publicid = trimquotes(publicid);
+ systemid = trimquotes(systemid);
+ if (name != null) {
+ publicid = cleanPublicid(publicid);
+ theLexicalHandler.startDTD(name, publicid, systemid);
+ theLexicalHandler.endDTD();
+ theDoctypeName = name;
+ theDoctypePublicId = publicid;
+ if (theScanner instanceof Locator) { // Must resolve systemid
+ theDoctypeSystemId = ((Locator)theScanner).getSystemId();
+ try {
+ theDoctypeSystemId = new URL(new URL(theDoctypeSystemId), systemid).toString();
+ } catch (Exception e) {}
+ }
+ }
+ }
+
+ // If the String is quoted, trim the quotes.
+ private static String trimquotes(String in) {
+ if (in == null) return in;
+ int length = in.length();
+ if (length == 0) return in;
+ char s = in.charAt(0);
+ char e = in.charAt(length - 1);
+ if (s == e && (s == '\'' || s == '"')) {
+ in = in.substring(1, in.length() - 1);
+ }
+ return in;
+ }
+
+ // Split the supplied String into words or phrases seperated by spaces.
+ // Recognises quotes around a phrase and doesn't split it.
+ private static String[] split(String val) throws IllegalArgumentException {
+ val = val.trim();
+ if (val.length() == 0) {
+ return new String[0];
+ }
+ else {
+ ArrayList l = new ArrayList();
+ int s = 0;
+ int e = 0;
+ boolean sq = false; // single quote
+ boolean dq = false; // double quote
+ char lastc = 0;
+ int len = val.length();
+ for (e=0; e < len; e++) {
+ char c = val.charAt(e);
+ if (!dq && c == '\'' && lastc != '\\') {
+ sq = !sq;
+ if (s < 0) s = e;
+ }
+ else if (!sq && c == '\"' && lastc != '\\') {
+ dq = !dq;
+ if (s < 0) s = e;
+ }
+ else if (!sq && !dq) {
+ if (Character.isWhitespace(c)) {
+ if (s >= 0) l.add(val.substring(s, e));
+ s = -1;
+ }
+ else if (s < 0 && c != ' ') {
+ s = e;
+ }
+ }
+ lastc = c;
+ }
+ l.add(val.substring(s, e));
+ return (String[])l.toArray(new String[0]);
+ }
+ }
+
+ // Replace junk in publicids with spaces
+ private static String legal =
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-'()+,./:=?;!*#@$_%";
+
+ private String cleanPublicid(String src) {
+ if (src == null) return null;
+ int len = src.length();
+ StringBuffer dst = new StringBuffer(len);
+ boolean suppressSpace = true;
+ for (int i = 0; i < len; i++) {
+ char ch = src.charAt(i);
+ if (legal.indexOf(ch) != -1) { // legal but not whitespace
+ dst.append(ch);
+ suppressSpace = false;
+ }
+ else if (suppressSpace) { // normalizable whitespace or junk
+ ;
+ }
+ else {
+ dst.append(' ');
+ suppressSpace = true;
+ }
+ }
+// System.err.println("%% Publicid [" + dst.toString().trim() + "]");
+ return dst.toString().trim(); // trim any final junk whitespace
+ }
+
+
+ public void gi(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement != null) return;
+ String name = makeName(buff, offset, length);
+ if (name == null) return;
+ ElementType type = theSchema.getElementType(name);
+ if (type == null) {
+ // Suppress unknown elements if ignore-bogons is on
+ if (ignoreBogons) return;
+ int bogonModel = bogonsEmpty ? Schema.M_EMPTY : Schema.M_ANY;
+ int bogonMemberOf = rootBogons ? Schema.M_ANY : (Schema.M_ANY & ~ Schema.M_ROOT);
+ theSchema.elementType(name, bogonModel, bogonMemberOf, 0);
+ if (!rootBogons) theSchema.parent(name, theSchema.rootElementType().name());
+ type = theSchema.getElementType(name);
+ }
+
+ theNewElement = new Element(type, defaultAttributes);
+// System.err.println("%% Got GI " + theNewElement.name());
+ }
+
+ public void cdsect(char[] buff, int offset, int length) throws SAXException {
+ theLexicalHandler.startCDATA();
+ pcdata(buff, offset, length);
+ theLexicalHandler.endCDATA();
+ }
+ public void pcdata(char[] buff, int offset, int length) throws SAXException {
+ if (length == 0) return;
+ boolean allWhite = true;
+ for (int i = 0; i < length; i++) {
+ if (!Character.isWhitespace(buff[offset+i])) {
+ allWhite = false;
+ }
+ }
+ if (allWhite && !theStack.canContain(thePCDATA)) {
+ if (ignorableWhitespace) {
+ theContentHandler.ignorableWhitespace(buff, offset, length);
+ }
+ }
+ else {
+ rectify(thePCDATA);
+ theContentHandler.characters(buff, offset, length);
+ }
+ }
+
+ public void pitarget(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement != null) return;
+ thePITarget = makeName(buff, offset, length).replace(':', '_');
+ }
+
+ public void pi(char[] buff, int offset, int length) throws SAXException {
+ if (theNewElement != null || thePITarget == null) return;
+ if ("xml".equalsIgnoreCase(thePITarget)) return;
+// if (length > 0 && buff[length - 1] == '?') System.err.println("%% Removing ? from PI");
+ if (length > 0 && buff[length - 1] == '?') length--; // remove trailing ?
+ theContentHandler.processingInstruction(thePITarget,
+ new String(buff, offset, length));
+ thePITarget = null;
+ }
+
+ public void stagc(char[] buff, int offset, int length) throws SAXException {
+// System.err.println("%% Start-tag");
+ if (theNewElement == null) return;
+ rectify(theNewElement);
+ if (theStack.model() == Schema.M_EMPTY) {
+ // Force an immediate end tag
+ etag_basic(buff, offset, length);
+ }
+ }
+
+ public void stage(char[] buff, int offset, int length) throws SAXException {
+// System.err.println("%% Empty-tag");
+ if (theNewElement == null) return;
+ rectify(theNewElement);
+ // Force an immediate end tag
+ etag_basic(buff, offset, length);
+ }
+
+ // Comment buffer is twice the size of the output buffer
+ private char[] theCommentBuffer = new char[2000];
+ public void cmnt(char[] buff, int offset, int length) throws SAXException {
+ theLexicalHandler.comment(buff, offset, length);
+ }
+
+ // Rectify the stack, pushing and popping as needed
+ // so that the argument can be safely pushed
+ private void rectify(Element e) throws SAXException {
+ Element sp;
+ while (true) {
+ for (sp = theStack; sp != null; sp = sp.next()) {
+ if (sp.canContain(e)) break;
+ }
+ if (sp != null) break;
+ ElementType parentType = e.parent();
+ if (parentType == null) break;
+ Element parent = new Element(parentType, defaultAttributes);
+// System.err.println("%% Ascending from " + e.name() + " to " + parent.name());
+ parent.setNext(e);
+ e = parent;
+ }
+ if (sp == null) return; // don't know what to do
+ while (theStack != sp) {
+ if (theStack == null || theStack.next() == null ||
+ theStack.next().next() == null) break;
+ restartablyPop();
+ }
+ while (e != null) {
+ Element nexte = e.next();
+ if (!e.name().equals("<pcdata>")) push(e);
+ e = nexte;
+ restart(e);
+ }
+ theNewElement = null;
+ }
+
+ public int getEntity() {
+ return theEntity;
+ }
+
+ // Return the argument as a valid XML name
+ // This no longer lowercases the result: we depend on Schema to
+ // canonicalize case.
+ private String makeName(char[] buff, int offset, int length) {
+ StringBuffer dst = new StringBuffer(length + 2);
+ boolean seenColon = false;
+ boolean start = true;
+// String src = new String(buff, offset, length); // DEBUG
+ for (; length-- > 0; offset++) {
+ char ch = buff[offset];
+ if (Character.isLetter(ch) || ch == '_') {
+ start = false;
+ dst.append(ch);
+ }
+ else if (Character.isDigit(ch) || ch == '-' || ch == '.') {
+ if (start) dst.append('_');
+ start = false;
+ dst.append(ch);
+ }
+ else if (ch == ':' && !seenColon) {
+ seenColon = true;
+ if (start) dst.append('_');
+ start = true;
+ dst.append(translateColons ? '_' : ch);
+ }
+ }
+ int dstLength = dst.length();
+ if (dstLength == 0 || dst.charAt(dstLength - 1) == ':') dst.append('_');
+// System.err.println("Made name \"" + dst + "\" from \"" + src + "\"");
+ return dst.toString().intern();
+ }
+
+ // Default LexicalHandler implementation
+
+ public void comment(char[] ch, int start, int length) throws SAXException { }
+ public void endCDATA() throws SAXException { }
+ public void endDTD() throws SAXException { }
+ public void endEntity(String name) throws SAXException { }
+ public void startCDATA() throws SAXException { }
+ public void startDTD(String name, String publicid, String systemid) throws SAXException { }
+ public void startEntity(String name) throws SAXException { }
+
+ }
diff --git a/src/java/org/ccil/cowan/tagsoup/ScanHandler.java b/src/java/org/ccil/cowan/tagsoup/ScanHandler.java
new file mode 100644
index 0000000..368569a
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/ScanHandler.java
@@ -0,0 +1,119 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Scanner handler
+
+package org.ccil.cowan.tagsoup;
+import org.xml.sax.SAXException;
+
+/**
+An interface that Scanners use to report events in the input stream.
+**/
+
+public interface ScanHandler {
+ /**
+ Reports an attribute name without a value.
+ **/
+
+ public void adup(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports an attribute name; a value will follow.
+ **/
+
+ public void aname(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports an attribute value.
+ **/
+
+ public void aval(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ * Reports the content of a CDATA section (not a CDATA element)
+ */
+ public void cdsect(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ * Reports a <!....> declaration - typically a DOCTYPE
+ */
+
+ public void decl(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports an entity reference or character reference.
+ **/
+
+ public void entity(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports EOF.
+ **/
+
+ public void eof(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports an end-tag.
+ **/
+
+ public void etag(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports the general identifier (element type name) of a start-tag.
+ **/
+
+ public void gi(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports character content.
+ **/
+
+ public void pcdata(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports the data part of a processing instruction.
+ **/
+
+ public void pi(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports the target part of a processing instruction.
+ **/
+
+ public void pitarget(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports the close of a start-tag.
+ **/
+
+ public void stagc(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports the close of an empty-tag.
+ **/
+
+ public void stage(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Reports a comment.
+ **/
+
+ public void cmnt(char[] buff, int offset, int length) throws SAXException;
+
+ /**
+ Returns the value of the last entity or character reference reported.
+ **/
+
+ public int getEntity();
+ }
diff --git a/src/java/org/ccil/cowan/tagsoup/Scanner.java b/src/java/org/ccil/cowan/tagsoup/Scanner.java
new file mode 100644
index 0000000..04c8b97
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/Scanner.java
@@ -0,0 +1,50 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Scanner
+
+package org.ccil.cowan.tagsoup;
+import java.io.IOException;
+import java.io.Reader;
+import org.xml.sax.SAXException;
+
+/**
+An interface allowing Parser to invoke scanners.
+**/
+
+public interface Scanner {
+
+ /**
+ Invoke a scanner.
+ @param r A source of characters to scan
+ @param h A ScanHandler to report events to
+ **/
+
+ public void scan(Reader r, ScanHandler h) throws IOException, SAXException;
+
+ /**
+ Reset the embedded locator.
+ @param publicid The publicid of the source
+ @param systemid The systemid of the source
+ **/
+
+ public void resetDocumentLocator(String publicid, String systemid);
+
+ /**
+ Signal to the scanner to start CDATA content mode.
+ **/
+
+ public void startCDATA();
+
+ }
diff --git a/src/java/org/ccil/cowan/tagsoup/Schema.java b/src/java/org/ccil/cowan/tagsoup/Schema.java
new file mode 100644
index 0000000..0d99a23
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/Schema.java
@@ -0,0 +1,170 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Model of document
+
+package org.ccil.cowan.tagsoup;
+import java.util.HashMap;
+
+/**
+Abstract class representing a TSSL schema.
+Actual TSSL schemas are compiled into concrete subclasses of this class.
+**/
+
+public abstract class Schema {
+
+ public static final int M_ANY = 0xFFFFFFFF;
+ public static final int M_EMPTY = 0;
+ public static final int M_PCDATA = 1 << 30;
+ public static final int M_ROOT = 1 << 31;
+
+
+ public static final int F_RESTART = 1;
+ public static final int F_CDATA = 2;
+ public static final int F_NOFORCE = 4;
+
+ private HashMap theEntities =
+ new HashMap(); // String -> Character
+ private HashMap theElementTypes =
+ new HashMap(); // String -> ElementType
+
+ private String theURI = "";
+ private String thePrefix = "";
+ private ElementType theRoot = null;
+
+ /**
+ Add or replace an element type for this schema.
+ @param name Name (Qname) of the element
+ @param model Models of the element's content as a vector of bits
+ @param memberOf Models the element is a member of as a vector of bits
+ @param flags Flags for the element
+ **/
+
+ public void elementType(String name, int model, int memberOf, int flags) {
+ ElementType e = new ElementType(name, model, memberOf, flags, this);
+ theElementTypes.put(name.toLowerCase(), e);
+ if (memberOf == M_ROOT) theRoot = e;
+ }
+
+ /**
+ Get the root element of this schema
+ **/
+
+ public ElementType rootElementType() {
+ return theRoot;
+ }
+
+ /**
+ Add or replace a default attribute for an element type in this schema.
+ @param elemName Name (Qname) of the element type
+ @param attrName Name (Qname) of the attribute
+ @param type Type of the attribute
+ @param value Default value of the attribute; null if no default
+ **/
+
+ public void attribute(String elemName, String attrName,
+ String type, String value) {
+ ElementType e = getElementType(elemName);
+ if (e == null) {
+ throw new Error("Attribute " + attrName +
+ " specified for unknown element type " +
+ elemName);
+ }
+ e.setAttribute(attrName, type, value);
+ }
+
+ /**
+ Specify natural parent of an element in this schema.
+ @param name Name of the child element
+ @param parentName Name of the parent element
+ **/
+
+ public void parent(String name, String parentName) {
+ ElementType child = getElementType(name);
+ ElementType parent = getElementType(parentName);
+ if (child == null) {
+ throw new Error("No child " + name + " for parent " + parentName);
+ }
+ if (parent == null) {
+ throw new Error("No parent " + parentName + " for child " + name);
+ }
+ child.setParent(parent);
+ }
+
+ /**
+ Add to or replace a character entity in this schema.
+ @param name Name of the entity
+ @param value Value of the entity
+ **/
+
+ public void entity(String name, int value) {
+ theEntities.put(name, new Integer(value));
+ }
+
+ /**
+ Get an ElementType by name.
+ @param name Name (Qname) of the element type
+ @return The corresponding ElementType
+ **/
+
+ public ElementType getElementType(String name) {
+ return (ElementType)(theElementTypes.get(name.toLowerCase()));
+ }
+
+ /**
+ Get an entity value by name.
+ @param name Name of the entity
+ @return The corresponding character, or 0 if none
+ **/
+
+ public int getEntity(String name) {
+// System.err.println("%% Looking up entity " + name);
+ Integer ch = (Integer)theEntities.get(name);
+ if (ch == null) return 0;
+ return ch.intValue();
+ }
+
+ /**
+ Return the URI (namespace name) of this schema.
+ **/
+
+ public String getURI() {
+ return theURI;
+ }
+
+ /**
+ Return the prefix of this schema.
+ **/
+
+ public String getPrefix() {
+ return thePrefix;
+ }
+
+ /**
+ Change the URI (namespace name) of this schema.
+ **/
+
+ public void setURI(String uri) {
+ theURI = uri;
+ }
+
+ /**
+ Change the prefix of this schema.
+ **/
+
+ public void setPrefix(String prefix) {
+ thePrefix = prefix;
+ }
+
+ }
diff --git a/src/java/org/ccil/cowan/tagsoup/XMLWriter.java b/src/java/org/ccil/cowan/tagsoup/XMLWriter.java
new file mode 100644
index 0000000..0dc7a03
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/XMLWriter.java
@@ -0,0 +1,1435 @@
+// XMLWriter.java - serialize an XML document.
+// Written by David Megginson, david@megginson.com
+// and placed by him into the public domain.
+// Extensively modified by John Cowan for TagSoup.
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup;
+
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.Properties;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.AttributesImpl;
+import org.xml.sax.helpers.NamespaceSupport;
+import org.xml.sax.helpers.XMLFilterImpl;
+import org.xml.sax.ext.LexicalHandler;
+
+
+/**
+ * Filter to write an XML document from a SAX event stream.
+ *
+ * <p>This class can be used by itself or as part of a SAX event
+ * stream: it takes as input a series of SAX2 ContentHandler
+ * events and uses the information in those events to write
+ * an XML document. Since this class is a filter, it can also
+ * pass the events on down a filter chain for further processing
+ * (you can use the XMLWriter to take a snapshot of the current
+ * state at any point in a filter chain), and it can be
+ * used directly as a ContentHandler for a SAX2 XMLReader.</p>
+ *
+ * <p>The client creates a document by invoking the methods for
+ * standard SAX2 events, always beginning with the
+ * {@link #startDocument startDocument} method and ending with
+ * the {@link #endDocument endDocument} method. There are convenience
+ * methods provided so that clients to not have to create empty
+ * attribute lists or provide empty strings as parameters; for
+ * example, the method invocation</p>
+ *
+ * <pre>
+ * w.startElement("foo");
+ * </pre>
+ *
+ * <p>is equivalent to the regular SAX2 ContentHandler method</p>
+ *
+ * <pre>
+ * w.startElement("", "foo", "", new AttributesImpl());
+ * </pre>
+ *
+ * <p>Except that it is more efficient because it does not allocate
+ * a new empty attribute list each time. The following code will send
+ * a simple XML document to standard output:</p>
+ *
+ * <pre>
+ * XMLWriter w = new XMLWriter();
+ *
+ * w.startDocument();
+ * w.startElement("greeting");
+ * w.characters("Hello, world!");
+ * w.endElement("greeting");
+ * w.endDocument();
+ * </pre>
+ *
+ * <p>The resulting document will look like this:</p>
+ *
+ * <pre>
+ * &lt;?xml version="1.0" standalone="yes"?>
+ *
+ * &lt;greeting>Hello, world!&lt;/greeting>
+ * </pre>
+ *
+ * <p>In fact, there is an even simpler convenience method,
+ * <var>dataElement</var>, designed for writing elements that
+ * contain only character data, so the code to generate the
+ * document could be shortened to</p>
+ *
+ * <pre>
+ * XMLWriter w = new XMLWriter();
+ *
+ * w.startDocument();
+ * w.dataElement("greeting", "Hello, world!");
+ * w.endDocument();
+ * </pre>
+ *
+ * <h2>Whitespace</h2>
+ *
+ * <p>According to the XML Recommendation, <em>all</em> whitespace
+ * in an XML document is potentially significant to an application,
+ * so this class never adds newlines or indentation. If you
+ * insert three elements in a row, as in</p>
+ *
+ * <pre>
+ * w.dataElement("item", "1");
+ * w.dataElement("item", "2");
+ * w.dataElement("item", "3");
+ * </pre>
+ *
+ * <p>you will end up with</p>
+ *
+ * <pre>
+ * &lt;item>1&lt;/item>&lt;item>3&lt;/item>&lt;item>3&lt;/item>
+ * </pre>
+ *
+ * <p>You need to invoke one of the <var>characters</var> methods
+ * explicitly to add newlines or indentation. Alternatively, you
+ * can use {@link com.megginson.sax.DataWriter DataWriter}, which
+ * is derived from this class -- it is optimized for writing
+ * purely data-oriented (or field-oriented) XML, and does automatic
+ * linebreaks and indentation (but does not support mixed content
+ * properly).</p>
+ *
+ *
+ * <h2>Namespace Support</h2>
+ *
+ * <p>The writer contains extensive support for XML Namespaces, so that
+ * a client application does not have to keep track of prefixes and
+ * supply <var>xmlns</var> attributes. By default, the XML writer will
+ * generate Namespace declarations in the form _NS1, _NS2, etc., wherever
+ * they are needed, as in the following example:</p>
+ *
+ * <pre>
+ * w.startDocument();
+ * w.emptyElement("http://www.foo.com/ns/", "foo");
+ * w.endDocument();
+ * </pre>
+ *
+ * <p>The resulting document will look like this:</p>
+ *
+ * <pre>
+ * &lt;?xml version="1.0" standalone="yes"?>
+ *
+ * &lt;_NS1:foo xmlns:_NS1="http://www.foo.com/ns/"/>
+ * </pre>
+ *
+ * <p>In many cases, document authors will prefer to choose their
+ * own prefixes rather than using the (ugly) default names. The
+ * XML writer allows two methods for selecting prefixes:</p>
+ *
+ * <ol>
+ * <li>the qualified name</li>
+ * <li>the {@link #setPrefix setPrefix} method.</li>
+ * </ol>
+ *
+ * <p>Whenever the XML writer finds a new Namespace URI, it checks
+ * to see if a qualified (prefixed) name is also available; if so
+ * it attempts to use the name's prefix (as long as the prefix is
+ * not already in use for another Namespace URI).</p>
+ *
+ * <p>Before writing a document, the client can also pre-map a prefix
+ * to a Namespace URI with the setPrefix method:</p>
+ *
+ * <pre>
+ * w.setPrefix("http://www.foo.com/ns/", "foo");
+ * w.startDocument();
+ * w.emptyElement("http://www.foo.com/ns/", "foo");
+ * w.endDocument();
+ * </pre>
+ *
+ * <p>The resulting document will look like this:</p>
+ *
+ * <pre>
+ * &lt;?xml version="1.0" standalone="yes"?>
+ *
+ * &lt;foo:foo xmlns:foo="http://www.foo.com/ns/"/>
+ * </pre>
+ *
+ * <p>The default Namespace simply uses an empty string as the prefix:</p>
+ *
+ * <pre>
+ * w.setPrefix("http://www.foo.com/ns/", "");
+ * w.startDocument();
+ * w.emptyElement("http://www.foo.com/ns/", "foo");
+ * w.endDocument();
+ * </pre>
+ *
+ * <p>The resulting document will look like this:</p>
+ *
+ * <pre>
+ * &lt;?xml version="1.0" standalone="yes"?>
+ *
+ * &lt;foo xmlns="http://www.foo.com/ns/"/>
+ * </pre>
+ *
+ * <p>By default, the XML writer will not declare a Namespace until
+ * it is actually used. Sometimes, this approach will create
+ * a large number of Namespace declarations, as in the following
+ * example:</p>
+ *
+ * <pre>
+ * &lt;xml version="1.0" standalone="yes"?>
+ *
+ * &lt;rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+ * &lt;rdf:Description about="http://www.foo.com/ids/books/12345">
+ * &lt;dc:title xmlns:dc="http://www.purl.org/dc/">A Dark Night&lt;/dc:title>
+ * &lt;dc:creator xmlns:dc="http://www.purl.org/dc/">Jane Smith&lt;/dc:title>
+ * &lt;dc:date xmlns:dc="http://www.purl.org/dc/">2000-09-09&lt;/dc:title>
+ * &lt;/rdf:Description>
+ * &lt;/rdf:RDF>
+ * </pre>
+ *
+ * <p>The "rdf" prefix is declared only once, because the RDF Namespace
+ * is used by the root element and can be inherited by all of its
+ * descendants; the "dc" prefix, on the other hand, is declared three
+ * times, because no higher element uses the Namespace. To solve this
+ * problem, you can instruct the XML writer to predeclare Namespaces
+ * on the root element even if they are not used there:</p>
+ *
+ * <pre>
+ * w.forceNSDecl("http://www.purl.org/dc/");
+ * </pre>
+ *
+ * <p>Now, the "dc" prefix will be declared on the root element even
+ * though it's not needed there, and can be inherited by its
+ * descendants:</p>
+ *
+ * <pre>
+ * &lt;xml version="1.0" standalone="yes"?>
+ *
+ * &lt;rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ * xmlns:dc="http://www.purl.org/dc/">
+ * &lt;rdf:Description about="http://www.foo.com/ids/books/12345">
+ * &lt;dc:title>A Dark Night&lt;/dc:title>
+ * &lt;dc:creator>Jane Smith&lt;/dc:title>
+ * &lt;dc:date>2000-09-09&lt;/dc:title>
+ * &lt;/rdf:Description>
+ * &lt;/rdf:RDF>
+ * </pre>
+ *
+ * <p>This approach is also useful for declaring Namespace prefixes
+ * that be used by qualified names appearing in attribute values or
+ * character data.</p>
+ *
+ * @author David Megginson, david@megginson.com
+ * @version 0.2
+ * @see org.xml.sax.XMLFilter
+ * @see org.xml.sax.ContentHandler
+ */
+public class XMLWriter extends XMLFilterImpl implements LexicalHandler
+{
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Constructors.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Create a new XML writer.
+ *
+ * <p>Write to standard output.</p>
+ */
+ public XMLWriter ()
+ {
+ init(null);
+ }
+
+
+ /**
+ * Create a new XML writer.
+ *
+ * <p>Write to the writer provided.</p>
+ *
+ * @param writer The output destination, or null to use standard
+ * output.
+ */
+ public XMLWriter (Writer writer)
+ {
+ init(writer);
+ }
+
+
+ /**
+ * Create a new XML writer.
+ *
+ * <p>Use the specified XML reader as the parent.</p>
+ *
+ * @param xmlreader The parent in the filter chain, or null
+ * for no parent.
+ */
+ public XMLWriter (XMLReader xmlreader)
+ {
+ super(xmlreader);
+ init(null);
+ }
+
+
+ /**
+ * Create a new XML writer.
+ *
+ * <p>Use the specified XML reader as the parent, and write
+ * to the specified writer.</p>
+ *
+ * @param xmlreader The parent in the filter chain, or null
+ * for no parent.
+ * @param writer The output destination, or null to use standard
+ * output.
+ */
+ public XMLWriter (XMLReader xmlreader, Writer writer)
+ {
+ super(xmlreader);
+ init(writer);
+ }
+
+
+ /**
+ * Internal initialization method.
+ *
+ * <p>All of the public constructors invoke this method.
+ *
+ * @param writer The output destination, or null to use
+ * standard output.
+ */
+ private void init (Writer writer)
+ {
+ setOutput(writer);
+ nsSupport = new NamespaceSupport();
+ prefixTable = new Hashtable();
+ forcedDeclTable = new Hashtable();
+ doneDeclTable = new Hashtable();
+ outputProperties = new Properties();
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Public methods.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Reset the writer.
+ *
+ * <p>This method is especially useful if the writer throws an
+ * exception before it is finished, and you want to reuse the
+ * writer for a new document. It is usually a good idea to
+ * invoke {@link #flush flush} before resetting the writer,
+ * to make sure that no output is lost.</p>
+ *
+ * <p>This method is invoked automatically by the
+ * {@link #startDocument startDocument} method before writing
+ * a new document.</p>
+ *
+ * <p><strong>Note:</strong> this method will <em>not</em>
+ * clear the prefix or URI information in the writer or
+ * the selected output writer.</p>
+ *
+ * @see #flush
+ */
+ public void reset ()
+ {
+ elementLevel = 0;
+ prefixCounter = 0;
+ nsSupport.reset();
+ }
+
+
+ /**
+ * Flush the output.
+ *
+ * <p>This method flushes the output stream. It is especially useful
+ * when you need to make certain that the entire document has
+ * been written to output but do not want to close the output
+ * stream.</p>
+ *
+ * <p>This method is invoked automatically by the
+ * {@link #endDocument endDocument} method after writing a
+ * document.</p>
+ *
+ * @see #reset
+ */
+ public void flush ()
+ throws IOException
+ {
+ output.flush();
+ }
+
+
+ /**
+ * Set a new output destination for the document.
+ *
+ * @param writer The output destination, or null to use
+ * standard output.
+ * @return The current output writer.
+ * @see #flush
+ */
+ public void setOutput (Writer writer)
+ {
+ if (writer == null) {
+ output = new OutputStreamWriter(System.out);
+ } else {
+ output = writer;
+ }
+ }
+
+
+ /**
+ * Specify a preferred prefix for a Namespace URI.
+ *
+ * <p>Note that this method does not actually force the Namespace
+ * to be declared; to do that, use the {@link
+ * #forceNSDecl(java.lang.String) forceNSDecl} method as well.</p>
+ *
+ * @param uri The Namespace URI.
+ * @param prefix The preferred prefix, or "" to select
+ * the default Namespace.
+ * @see #getPrefix
+ * @see #forceNSDecl(java.lang.String)
+ * @see #forceNSDecl(java.lang.String,java.lang.String)
+ */
+ public void setPrefix (String uri, String prefix)
+ {
+ prefixTable.put(uri, prefix);
+ }
+
+
+ /**
+ * Get the current or preferred prefix for a Namespace URI.
+ *
+ * @param uri The Namespace URI.
+ * @return The preferred prefix, or "" for the default Namespace.
+ * @see #setPrefix
+ */
+ public String getPrefix (String uri)
+ {
+ return (String)prefixTable.get(uri);
+ }
+
+
+ /**
+ * Force a Namespace to be declared on the root element.
+ *
+ * <p>By default, the XMLWriter will declare only the Namespaces
+ * needed for an element; as a result, a Namespace may be
+ * declared many places in a document if it is not used on the
+ * root element.</p>
+ *
+ * <p>This method forces a Namespace to be declared on the root
+ * element even if it is not used there, and reduces the number
+ * of xmlns attributes in the document.</p>
+ *
+ * @param uri The Namespace URI to declare.
+ * @see #forceNSDecl(java.lang.String,java.lang.String)
+ * @see #setPrefix
+ */
+ public void forceNSDecl (String uri)
+ {
+ forcedDeclTable.put(uri, Boolean.TRUE);
+ }
+
+
+ /**
+ * Force a Namespace declaration with a preferred prefix.
+ *
+ * <p>This is a convenience method that invokes {@link
+ * #setPrefix setPrefix} then {@link #forceNSDecl(java.lang.String)
+ * forceNSDecl}.</p>
+ *
+ * @param uri The Namespace URI to declare on the root element.
+ * @param prefix The preferred prefix for the Namespace, or ""
+ * for the default Namespace.
+ * @see #setPrefix
+ * @see #forceNSDecl(java.lang.String)
+ */
+ public void forceNSDecl (String uri, String prefix)
+ {
+ setPrefix(uri, prefix);
+ forceNSDecl(uri);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Methods from org.xml.sax.ContentHandler.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Write the XML declaration at the beginning of the document.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the XML declaration, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#startDocument
+ */
+ public void startDocument ()
+ throws SAXException
+ {
+ reset();
+ if (!("yes".equals(outputProperties.getProperty(OMIT_XML_DECLARATION, "no")))) {
+ write("<?xml");
+ if (version == null) {
+ write(" version=\"1.0\"");
+ } else {
+ write(" version=\"");
+ write(version);
+ write("\"");
+ }
+ if (outputEncoding != null && outputEncoding != "") {
+ write(" encoding=\"");
+ write(outputEncoding);
+ write("\"");
+ }
+ if (standalone == null) {
+ write(" standalone=\"yes\"?>\n");
+ } else {
+ write(" standalone=\"");
+ write(standalone);
+ write("\"");
+ }
+ }
+ super.startDocument();
+ }
+
+
+ /**
+ * Write a newline at the end of the document.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the newline, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#endDocument
+ */
+ public void endDocument ()
+ throws SAXException
+ {
+ write('\n');
+ super.endDocument();
+ try {
+ flush();
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+
+ /**
+ * Write a start tag.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @param uri The Namespace URI, or the empty string if none
+ * is available.
+ * @param localName The element's local (unprefixed) name (required).
+ * @param qName The element's qualified (prefixed) name, or the
+ * empty string is none is available. This method will
+ * use the qName as a template for generating a prefix
+ * if necessary, but it is not guaranteed to use the
+ * same qName.
+ * @param atts The element's attribute list (must not be null).
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the start tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#startElement
+ */
+ public void startElement (String uri, String localName,
+ String qName, Attributes atts)
+ throws SAXException
+ {
+ elementLevel++;
+ nsSupport.pushContext();
+ if (forceDTD && !hasOutputDTD) startDTD(localName == null ? qName : localName, "", "");
+ write('<');
+ writeName(uri, localName, qName, true);
+ writeAttributes(atts);
+ if (elementLevel == 1) {
+ forceNSDecls();
+ }
+ writeNSDecls();
+ write('>');
+// System.out.println("%%%% startElement [" + qName + "] htmlMode = " + htmlMode);
+ if (htmlMode && (qName.equals("script") || qName.equals("style"))) {
+ cdataElement = true;
+// System.out.println("%%%% CDATA element");
+ }
+ super.startElement(uri, localName, qName, atts);
+ }
+
+
+ /**
+ * Write an end tag.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @param uri The Namespace URI, or the empty string if none
+ * is available.
+ * @param localName The element's local (unprefixed) name (required).
+ * @param qName The element's qualified (prefixed) name, or the
+ * empty string is none is available. This method will
+ * use the qName as a template for generating a prefix
+ * if necessary, but it is not guaranteed to use the
+ * same qName.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the end tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#endElement
+ */
+ public void endElement (String uri, String localName, String qName)
+ throws SAXException
+ {
+ if (!(htmlMode &&
+ (uri.equals("http://www.w3.org/1999/xhtml") ||
+ uri.equals("")) &&
+ (qName.equals("area") || qName.equals("base") ||
+ qName.equals("basefont") || qName.equals("br") ||
+ qName.equals("col") || qName.equals("frame") ||
+ qName.equals("hr") || qName.equals("img") ||
+ qName.equals("input") || qName.equals("isindex") ||
+ qName.equals("link") || qName.equals("meta") ||
+ qName.equals("param")))) {
+ write("</");
+ writeName(uri, localName, qName, true);
+ write('>');
+ }
+ if (elementLevel == 1) {
+ write('\n');
+ }
+ cdataElement = false;
+ super.endElement(uri, localName, qName);
+ nsSupport.popContext();
+ elementLevel--;
+ }
+
+
+ /**
+ * Write character data.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @param ch The array of characters to write.
+ * @param start The starting position in the array.
+ * @param length The number of characters to write.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the characters, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#characters
+ */
+ public void characters (char ch[], int start, int len)
+ throws SAXException
+ {
+ if (!cdataElement) {
+ writeEsc(ch, start, len, false);
+ }
+ else {
+ for (int i = start; i < start + len; i++) {
+ write(ch[i]);
+ }
+ }
+ super.characters(ch, start, len);
+ }
+
+
+ /**
+ * Write ignorable whitespace.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @param ch The array of characters to write.
+ * @param start The starting position in the array.
+ * @param length The number of characters to write.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the whitespace, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#ignorableWhitespace
+ */
+ public void ignorableWhitespace (char ch[], int start, int length)
+ throws SAXException
+ {
+ writeEsc(ch, start, length, false);
+ super.ignorableWhitespace(ch, start, length);
+ }
+
+
+
+ /**
+ * Write a processing instruction.
+ *
+ * Pass the event on down the filter chain for further processing.
+ *
+ * @param target The PI target.
+ * @param data The PI data.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the PI, or if a handler further down
+ * the filter chain raises an exception.
+ * @see org.xml.sax.ContentHandler#processingInstruction
+ */
+ public void processingInstruction (String target, String data)
+ throws SAXException
+ {
+ write("<?");
+ write(target);
+ write(' ');
+ write(data);
+ write("?>");
+ if (elementLevel < 1) {
+ write('\n');
+ }
+ super.processingInstruction(target, data);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Additional markup.
+ ////////////////////////////////////////////////////////////////////
+
+ /**
+ * Write an empty element.
+ *
+ * This method writes an empty element tag rather than a start tag
+ * followed by an end tag. Both a {@link #startElement
+ * startElement} and an {@link #endElement endElement} event will
+ * be passed on down the filter chain.
+ *
+ * @param uri The element's Namespace URI, or the empty string
+ * if the element has no Namespace or if Namespace
+ * processing is not being performed.
+ * @param localName The element's local name (without prefix). This
+ * parameter must be provided.
+ * @param qName The element's qualified name (with prefix), or
+ * the empty string if none is available. This parameter
+ * is strictly advisory: the writer may or may not use
+ * the prefix attached.
+ * @param atts The element's attribute list.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement
+ * @see #endElement
+ */
+ public void emptyElement (String uri, String localName,
+ String qName, Attributes atts)
+ throws SAXException
+ {
+ nsSupport.pushContext();
+ write('<');
+ writeName(uri, localName, qName, true);
+ writeAttributes(atts);
+ if (elementLevel == 1) {
+ forceNSDecls();
+ }
+ writeNSDecls();
+ write("/>");
+ super.startElement(uri, localName, qName, atts);
+ super.endElement(uri, localName, qName);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Convenience methods.
+ ////////////////////////////////////////////////////////////////////
+
+
+
+ /**
+ * Start a new element without a qname or attributes.
+ *
+ * <p>This method will provide a default empty attribute
+ * list and an empty string for the qualified name.
+ * It invokes {@link
+ * #startElement(String, String, String, Attributes)}
+ * directly.</p>
+ *
+ * @param uri The element's Namespace URI.
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the start tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement(String, String, String, Attributes)
+ */
+ public void startElement (String uri, String localName)
+ throws SAXException
+ {
+ startElement(uri, localName, "", EMPTY_ATTS);
+ }
+
+
+ /**
+ * Start a new element without a qname, attributes or a Namespace URI.
+ *
+ * <p>This method will provide an empty string for the
+ * Namespace URI, and empty string for the qualified name,
+ * and a default empty attribute list. It invokes
+ * #startElement(String, String, String, Attributes)}
+ * directly.</p>
+ *
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the start tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement(String, String, String, Attributes)
+ */
+ public void startElement (String localName)
+ throws SAXException
+ {
+ startElement("", localName, "", EMPTY_ATTS);
+ }
+
+
+ /**
+ * End an element without a qname.
+ *
+ * <p>This method will supply an empty string for the qName.
+ * It invokes {@link #endElement(String, String, String)}
+ * directly.</p>
+ *
+ * @param uri The element's Namespace URI.
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the end tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #endElement(String, String, String)
+ */
+ public void endElement (String uri, String localName)
+ throws SAXException
+ {
+ endElement(uri, localName, "");
+ }
+
+
+ /**
+ * End an element without a Namespace URI or qname.
+ *
+ * <p>This method will supply an empty string for the qName
+ * and an empty string for the Namespace URI.
+ * It invokes {@link #endElement(String, String, String)}
+ * directly.</p>
+ *
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the end tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #endElement(String, String, String)
+ */
+ public void endElement (String localName)
+ throws SAXException
+ {
+ endElement("", localName, "");
+ }
+
+
+ /**
+ * Add an empty element without a qname or attributes.
+ *
+ * <p>This method will supply an empty string for the qname
+ * and an empty attribute list. It invokes
+ * {@link #emptyElement(String, String, String, Attributes)}
+ * directly.</p>
+ *
+ * @param uri The element's Namespace URI.
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #emptyElement(String, String, String, Attributes)
+ */
+ public void emptyElement (String uri, String localName)
+ throws SAXException
+ {
+ emptyElement(uri, localName, "", EMPTY_ATTS);
+ }
+
+
+ /**
+ * Add an empty element without a Namespace URI, qname or attributes.
+ *
+ * <p>This method will supply an empty string for the qname,
+ * and empty string for the Namespace URI, and an empty
+ * attribute list. It invokes
+ * {@link #emptyElement(String, String, String, Attributes)}
+ * directly.</p>
+ *
+ * @param localName The element's local name.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #emptyElement(String, String, String, Attributes)
+ */
+ public void emptyElement (String localName)
+ throws SAXException
+ {
+ emptyElement("", localName, "", EMPTY_ATTS);
+ }
+
+
+ /**
+ * Write an element with character data content.
+ *
+ * <p>This is a convenience method to write a complete element
+ * with character data content, including the start tag
+ * and end tag.</p>
+ *
+ * <p>This method invokes
+ * {@link #startElement(String, String, String, Attributes)},
+ * followed by
+ * {@link #characters(String)}, followed by
+ * {@link #endElement(String, String, String)}.</p>
+ *
+ * @param uri The element's Namespace URI.
+ * @param localName The element's local name.
+ * @param qName The element's default qualified name.
+ * @param atts The element's attributes.
+ * @param content The character data content.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement(String, String, String, Attributes)
+ * @see #characters(String)
+ * @see #endElement(String, String, String)
+ */
+ public void dataElement (String uri, String localName,
+ String qName, Attributes atts,
+ String content)
+ throws SAXException
+ {
+ startElement(uri, localName, qName, atts);
+ characters(content);
+ endElement(uri, localName, qName);
+ }
+
+
+ /**
+ * Write an element with character data content but no attributes.
+ *
+ * <p>This is a convenience method to write a complete element
+ * with character data content, including the start tag
+ * and end tag. This method provides an empty string
+ * for the qname and an empty attribute list.</p>
+ *
+ * <p>This method invokes
+ * {@link #startElement(String, String, String, Attributes)},
+ * followed by
+ * {@link #characters(String)}, followed by
+ * {@link #endElement(String, String, String)}.</p>
+ *
+ * @param uri The element's Namespace URI.
+ * @param localName The element's local name.
+ * @param content The character data content.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement(String, String, String, Attributes)
+ * @see #characters(String)
+ * @see #endElement(String, String, String)
+ */
+ public void dataElement (String uri, String localName, String content)
+ throws SAXException
+ {
+ dataElement(uri, localName, "", EMPTY_ATTS, content);
+ }
+
+
+ /**
+ * Write an element with character data content but no attributes or Namespace URI.
+ *
+ * <p>This is a convenience method to write a complete element
+ * with character data content, including the start tag
+ * and end tag. The method provides an empty string for the
+ * Namespace URI, and empty string for the qualified name,
+ * and an empty attribute list.</p>
+ *
+ * <p>This method invokes
+ * {@link #startElement(String, String, String, Attributes)},
+ * followed by
+ * {@link #characters(String)}, followed by
+ * {@link #endElement(String, String, String)}.</p>
+ *
+ * @param localName The element's local name.
+ * @param content The character data content.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the empty tag, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #startElement(String, String, String, Attributes)
+ * @see #characters(String)
+ * @see #endElement(String, String, String)
+ */
+ public void dataElement (String localName, String content)
+ throws SAXException
+ {
+ dataElement("", localName, "", EMPTY_ATTS, content);
+ }
+
+
+ /**
+ * Write a string of character data, with XML escaping.
+ *
+ * <p>This is a convenience method that takes an XML
+ * String, converts it to a character array, then invokes
+ * {@link #characters(char[], int, int)}.</p>
+ *
+ * @param data The character data.
+ * @exception org.xml.sax.SAXException If there is an error
+ * writing the string, or if a handler further down
+ * the filter chain raises an exception.
+ * @see #characters(char[], int, int)
+ */
+ public void characters (String data)
+ throws SAXException
+ {
+ char ch[] = data.toCharArray();
+ characters(ch, 0, ch.length);
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Internal methods.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /**
+ * Force all Namespaces to be declared.
+ *
+ * This method is used on the root element to ensure that
+ * the predeclared Namespaces all appear.
+ */
+ private void forceNSDecls ()
+ {
+ Enumeration prefixes = forcedDeclTable.keys();
+ while (prefixes.hasMoreElements()) {
+ String prefix = (String)prefixes.nextElement();
+ doPrefix(prefix, null, true);
+ }
+ }
+
+
+ /**
+ * Determine the prefix for an element or attribute name.
+ *
+ * TODO: this method probably needs some cleanup.
+ *
+ * @param uri The Namespace URI.
+ * @param qName The qualified name (optional); this will be used
+ * to indicate the preferred prefix if none is currently
+ * bound.
+ * @param isElement true if this is an element name, false
+ * if it is an attribute name (which cannot use the
+ * default Namespace).
+ */
+ private String doPrefix (String uri, String qName, boolean isElement)
+ {
+ String defaultNS = nsSupport.getURI("");
+ if ("".equals(uri)) {
+ if (isElement && defaultNS != null)
+ nsSupport.declarePrefix("", "");
+ return null;
+ }
+ String prefix;
+ if (isElement && defaultNS != null && uri.equals(defaultNS)) {
+ prefix = "";
+ } else {
+ prefix = nsSupport.getPrefix(uri);
+ }
+ if (prefix != null) {
+ return prefix;
+ }
+ prefix = (String) doneDeclTable.get(uri);
+ if (prefix != null &&
+ ((!isElement || defaultNS != null) &&
+ "".equals(prefix) || nsSupport.getURI(prefix) != null)) {
+ prefix = null;
+ }
+ if (prefix == null) {
+ prefix = (String) prefixTable.get(uri);
+ if (prefix != null &&
+ ((!isElement || defaultNS != null) &&
+ "".equals(prefix) || nsSupport.getURI(prefix) != null)) {
+ prefix = null;
+ }
+ }
+ if (prefix == null && qName != null && !"".equals(qName)) {
+ int i = qName.indexOf(':');
+ if (i == -1) {
+ if (isElement && defaultNS == null) {
+ prefix = "";
+ }
+ } else {
+ prefix = qName.substring(0, i);
+ }
+ }
+ for (;
+ prefix == null || nsSupport.getURI(prefix) != null;
+ prefix = "__NS" + ++prefixCounter)
+ ;
+ nsSupport.declarePrefix(prefix, uri);
+ doneDeclTable.put(uri, prefix);
+ return prefix;
+ }
+
+
+ /**
+ * Write a raw character.
+ *
+ * @param c The character to write.
+ * @exception org.xml.sax.SAXException If there is an error writing
+ * the character, this method will throw an IOException
+ * wrapped in a SAXException.
+ */
+ private void write (char c)
+ throws SAXException
+ {
+ try {
+ output.write(c);
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+
+ /**
+ * Write a raw string.
+ *
+ * @param s
+ * @exception org.xml.sax.SAXException If there is an error writing
+ * the string, this method will throw an IOException
+ * wrapped in a SAXException
+ */
+ private void write (String s)
+ throws SAXException
+ {
+ try {
+ output.write(s);
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+
+ /**
+ * Write out an attribute list, escaping values.
+ *
+ * The names will have prefixes added to them.
+ *
+ * @param atts The attribute list to write.
+ * @exception org.xml.SAXException If there is an error writing
+ * the attribute list, this method will throw an
+ * IOException wrapped in a SAXException.
+ */
+ private void writeAttributes (Attributes atts)
+ throws SAXException
+ {
+ int len = atts.getLength();
+ for (int i = 0; i < len; i++) {
+ char ch[] = atts.getValue(i).toCharArray();
+ write(' ');
+ writeName(atts.getURI(i), atts.getLocalName(i),
+ atts.getQName(i), false);
+ if (htmlMode &&
+ booleanAttribute(atts.getLocalName(i), atts.getQName(i), atts.getValue(i))) break;
+ write("=\"");
+ writeEsc(ch, 0, ch.length, true);
+ write('"');
+ }
+ }
+
+
+ private String[] booleans = {"checked", "compact", "declare", "defer",
+ "disabled", "ismap", "multiple",
+ "nohref", "noresize", "noshade",
+ "nowrap", "readonly", "selected"};
+
+ // Return true if the attribute is an HTML boolean from the above list.
+ private boolean booleanAttribute (String localName, String qName, String value)
+ {
+ String name = localName;
+ if (name == null) {
+ int i = qName.indexOf(':');
+ if (i != -1) name = qName.substring(i + 1, qName.length());
+ }
+ if (!name.equals(value)) return false;
+ for (int j = 0; j < booleans.length; j++) {
+ if (name.equals(booleans[j])) return true;
+ }
+ return false;
+ }
+
+ /**
+ * Write an array of data characters with escaping.
+ *
+ * @param ch The array of characters.
+ * @param start The starting position.
+ * @param length The number of characters to use.
+ * @param isAttVal true if this is an attribute value literal.
+ * @exception org.xml.SAXException If there is an error writing
+ * the characters, this method will throw an
+ * IOException wrapped in a SAXException.
+ */
+ private void writeEsc (char ch[], int start,
+ int length, boolean isAttVal)
+ throws SAXException
+ {
+ for (int i = start; i < start + length; i++) {
+ switch (ch[i]) {
+ case '&':
+ write("&amp;");
+ break;
+ case '<':
+ write("&lt;");
+ break;
+ case '>':
+ write("&gt;");
+ break;
+ case '\"':
+ if (isAttVal) {
+ write("&quot;");
+ } else {
+ write('\"');
+ }
+ break;
+ default:
+ if (!unicodeMode && ch[i] > '\u007f') {
+ write("&#");
+ write(Integer.toString(ch[i]));
+ write(';');
+ } else {
+ write(ch[i]);
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Write out the list of Namespace declarations.
+ *
+ * @exception org.xml.sax.SAXException This method will throw
+ * an IOException wrapped in a SAXException if
+ * there is an error writing the Namespace
+ * declarations.
+ */
+ private void writeNSDecls ()
+ throws SAXException
+ {
+ Enumeration prefixes = nsSupport.getDeclaredPrefixes();
+ while (prefixes.hasMoreElements()) {
+ String prefix = (String) prefixes.nextElement();
+ String uri = nsSupport.getURI(prefix);
+ if (uri == null) {
+ uri = "";
+ }
+ char ch[] = uri.toCharArray();
+ write(' ');
+ if ("".equals(prefix)) {
+ write("xmlns=\"");
+ } else {
+ write("xmlns:");
+ write(prefix);
+ write("=\"");
+ }
+ writeEsc(ch, 0, ch.length, true);
+ write('\"');
+ }
+ }
+
+
+ /**
+ * Write an element or attribute name.
+ *
+ * @param uri The Namespace URI.
+ * @param localName The local name.
+ * @param qName The prefixed name, if available, or the empty string.
+ * @param isElement true if this is an element name, false if it
+ * is an attribute name.
+ * @exception org.xml.sax.SAXException This method will throw an
+ * IOException wrapped in a SAXException if there is
+ * an error writing the name.
+ */
+ private void writeName (String uri, String localName,
+ String qName, boolean isElement)
+ throws SAXException
+ {
+ String prefix = doPrefix(uri, qName, isElement);
+ if (prefix != null && !"".equals(prefix)) {
+ write(prefix);
+ write(':');
+ }
+ if (localName != null && !"".equals(localName)) {
+ write(localName);
+ } else {
+ int i = qName.indexOf(':');
+ write(qName.substring(i + 1, qName.length()));
+ }
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Default LexicalHandler implementation
+ ////////////////////////////////////////////////////////////////////
+
+ public void comment(char[] ch, int start, int length) throws SAXException
+ {
+ write("<!--");
+ for (int i = start; i < start + length; i++) {
+ write(ch[i]);
+ if (ch[i] == '-' && i + 1 <= start + length && ch[i+1] == '-')
+ write(' ');
+ }
+ write("-->");
+ }
+
+ public void endCDATA() throws SAXException { }
+ public void endDTD() throws SAXException { }
+ public void endEntity(String name) throws SAXException { }
+ public void startCDATA() throws SAXException { }
+ public void startDTD(String name, String publicid, String systemid) throws SAXException {
+ if (name == null) return; // can't cope
+ if (hasOutputDTD) return; // only one DTD
+ hasOutputDTD = true;
+ write("<!DOCTYPE ");
+ write(name);
+ if (systemid == null) systemid = "";
+ if (overrideSystem != null) systemid = overrideSystem;
+ char sysquote = (systemid.indexOf('"') != -1) ? '\'': '"';
+ if (overridePublic != null) publicid = overridePublic;
+ if (!(publicid == null || "".equals(publicid))) {
+ char pubquote = (publicid.indexOf('"') != -1) ? '\'': '"';
+ write(" PUBLIC ");
+ write(pubquote);
+ write(publicid);
+ write(pubquote);
+ write(' ');
+ }
+ else {
+ write(" SYSTEM ");
+ }
+ write(sysquote);
+ write(systemid);
+ write(sysquote);
+ write(">\n");
+ }
+
+ public void startEntity(String name) throws SAXException { }
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Output properties
+ ////////////////////////////////////////////////////////////////////
+
+ public String getOutputProperty(String key) {
+ return outputProperties.getProperty(key);
+ }
+
+ public void setOutputProperty(String key, String value) {
+ outputProperties.setProperty(key, value);
+// System.out.println("%%%% key = [" + key + "] value = [" + value +"]");
+ if (key.equals(ENCODING)) {
+ outputEncoding = value;
+ unicodeMode = value.substring(0, 3).equalsIgnoreCase("utf");
+// System.out.println("%%%% unicodeMode = " + unicodeMode);
+ }
+ else if (key.equals(METHOD)) {
+ htmlMode = value.equals("html");
+ }
+ else if (key.equals(DOCTYPE_PUBLIC)) {
+ overridePublic = value;
+ forceDTD = true;
+ }
+ else if (key.equals(DOCTYPE_SYSTEM)) {
+ overrideSystem = value;
+ forceDTD = true;
+ }
+ else if (key.equals(VERSION)) {
+ version = value;
+ }
+ else if (key.equals(STANDALONE)) {
+ standalone = value;
+ }
+// System.out.println("%%%% htmlMode = " + htmlMode);
+ }
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Constants.
+ ////////////////////////////////////////////////////////////////////
+
+ private final Attributes EMPTY_ATTS = new AttributesImpl();
+ public static final String CDATA_SECTION_ELEMENTS =
+ "cdata-section-elements";
+ public static final String DOCTYPE_PUBLIC = "doctype-public";
+ public static final String DOCTYPE_SYSTEM = "doctype-system";
+ public static final String ENCODING = "encoding";
+ public static final String INDENT = "indent"; // currently ignored
+ public static final String MEDIA_TYPE = "media-type"; // currently ignored
+ public static final String METHOD = "method"; // currently html or xml
+ public static final String OMIT_XML_DECLARATION = "omit-xml-declaration";
+ public static final String STANDALONE = "standalone"; // currently ignored
+ public static final String VERSION = "version";
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Internal state.
+ ////////////////////////////////////////////////////////////////////
+
+ private Hashtable prefixTable;
+ private Hashtable forcedDeclTable;
+ private Hashtable doneDeclTable;
+ private int elementLevel = 0;
+ private Writer output;
+ private NamespaceSupport nsSupport;
+ private int prefixCounter = 0;
+ private Properties outputProperties;
+ private boolean unicodeMode = false;
+ private String outputEncoding = "";
+ private boolean htmlMode = false;
+ private boolean forceDTD = false;
+ private boolean hasOutputDTD = false;
+ private String overridePublic = null;
+ private String overrideSystem = null;
+ private String version = null;
+ private String standalone = null;
+ private boolean cdataElement = false;
+
+}
+
+// end of XMLWriter.java
diff --git a/src/java/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java b/src/java/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java
new file mode 100644
index 0000000..61f20ad
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/jaxp/JAXPTest.java
@@ -0,0 +1,54 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup.jaxp;
+
+import java.io.*;
+
+import javax.xml.parsers.*;
+import org.w3c.dom.Document;
+
+/**
+ * Trivial non-robust test class, to show that TagSoup can be accessed using
+ * JAXP interface.
+ */
+public class JAXPTest
+{
+ public static void main(String[] args)
+ throws Exception
+ {
+ new JAXPTest().test(args);
+ }
+
+ private void test(String[] args)
+ throws Exception
+ {
+ if (args.length != 1) {
+ System.err.println("Usage: java "+getClass()+" [input-file]");
+ System.exit(1);
+ }
+ File f = new File(args[0]);
+ //System.setProperty("javax.xml.parsers.SAXParserFactory", SAXFactoryImpl.class.toString());
+ System.setProperty("javax.xml.parsers.SAXParserFactory", "org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl");
+
+ SAXParserFactory spf = SAXParserFactory.newInstance();
+ System.out.println("Ok, SAX factory JAXP creates is: "+spf);
+ System.out.println("Let's parse...");
+ spf.newSAXParser().parse(f, new org.xml.sax.helpers.DefaultHandler());
+ System.out.println("Done. And then DOM build:");
+
+ Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(f);
+
+ System.out.println("Succesfully built DOM tree from '"+f+"', -> "+doc);
+ }
+}
diff --git a/src/java/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java b/src/java/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java
new file mode 100644
index 0000000..883a3e7
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/jaxp/SAX1ParserAdapter.java
@@ -0,0 +1,232 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup.jaxp;
+
+import java.io.IOException;
+import javax.xml.parsers.*;
+
+import org.xml.sax.*;
+
+/**
+ * This is a simpler adapter class that allows using SAX1 interface on top
+ * of basic SAX2 implementation, such as TagSoup.
+ *
+ * @author Tatu Saloranta (cowtowncoder@yahoo.com)
+ * @deprecated
+ */
+public class SAX1ParserAdapter
+ implements org.xml.sax.Parser
+{
+ final XMLReader xmlReader;
+
+ public SAX1ParserAdapter(XMLReader xr)
+ {
+ xmlReader = xr;
+ }
+
+ // Sax1 API impl
+
+ public void parse(InputSource source)
+ throws SAXException
+ {
+ try {
+ xmlReader.parse(source);
+ } catch (IOException ioe) {
+ throw new SAXException(ioe);
+ }
+ }
+
+ public void parse(String systemId)
+ throws SAXException
+ {
+ try {
+ xmlReader.parse(systemId);
+ } catch (IOException ioe) {
+ throw new SAXException(ioe);
+ }
+ }
+
+ /**
+ * @deprecated
+ */
+ public void setDocumentHandler(DocumentHandler h)
+ {
+ xmlReader.setContentHandler(new DocHandlerWrapper(h));
+ }
+
+ public void setDTDHandler(DTDHandler h)
+ {
+ xmlReader.setDTDHandler(h);
+ }
+
+ public void setEntityResolver(EntityResolver r)
+ {
+ xmlReader.setEntityResolver(r);
+ }
+
+ public void setErrorHandler(ErrorHandler h)
+ {
+ xmlReader.setErrorHandler(h);
+ }
+
+ public void setLocale(java.util.Locale locale)
+ throws SAXException
+ {
+ /* I have no idea what this is supposed to do... so let's
+ * throw an exception
+ */
+ throw new SAXNotSupportedException("TagSoup does not implement setLocale() method");
+ }
+
+ // Helper classes:
+
+ /**
+ * We need another helper class to deal with differences between
+ * Sax2 handler (content handler), and Sax1 handler (document handler)
+ * @deprecated
+ */
+ final static class DocHandlerWrapper
+ implements ContentHandler
+ {
+ final DocumentHandler docHandler;
+
+ final AttributesWrapper mAttrWrapper = new AttributesWrapper();
+
+ /**
+ * @deprecated
+ */
+ DocHandlerWrapper(DocumentHandler h)
+ {
+ docHandler = h;
+ }
+
+ public void characters(char[] ch, int start, int length)
+ throws SAXException
+ {
+ docHandler.characters(ch, start, length);
+ }
+
+ public void endDocument()
+ throws SAXException
+ {
+ docHandler.endDocument();
+ }
+
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException
+ {
+ if (qName == null) {
+ qName = localName;
+ }
+ docHandler.endElement(qName);
+ }
+
+ public void endPrefixMapping(String prefix)
+ {
+ // no equivalent in SAX1, ignore
+ }
+
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException
+ {
+ docHandler.ignorableWhitespace(ch, start, length);
+ }
+
+ public void processingInstruction(String target, String data)
+ throws SAXException
+ {
+ docHandler.processingInstruction(target, data);
+ }
+
+ public void setDocumentLocator(Locator locator)
+ {
+ docHandler.setDocumentLocator(locator);
+ }
+
+ public void skippedEntity(String name)
+ {
+ // no equivalent in SAX1, ignore
+ }
+
+ public void startDocument()
+ throws SAXException
+ {
+ docHandler.startDocument();
+ }
+
+ public void startElement(String uri, String localName, String qName,
+ Attributes attrs)
+ throws SAXException
+ {
+ if (qName == null) {
+ qName = localName;
+ }
+ // Also, need to wrap Attributes to look like AttributeLost
+ mAttrWrapper.setAttributes(attrs);
+ docHandler.startElement(qName, mAttrWrapper);
+ }
+
+ public void startPrefixMapping(String prefix, String uri)
+ {
+ // no equivalent in SAX1, ignore
+ }
+ }
+
+ /**
+ * And one more helper to deal with attribute access differences
+ * @deprecated
+ */
+ final static class AttributesWrapper
+ implements AttributeList
+ {
+ Attributes attrs;
+
+ public AttributesWrapper() { }
+
+ public void setAttributes(Attributes a) {
+ attrs = a;
+ }
+
+ public int getLength()
+ {
+ return attrs.getLength();
+ }
+
+ public String getName(int i)
+ {
+ String n = attrs.getQName(i);
+ return (n == null) ? attrs.getLocalName(i) : n;
+ }
+
+ public String getType(int i)
+ {
+ return attrs.getType(i);
+ }
+
+ public String getType(String name)
+ {
+ return attrs.getType(name);
+ }
+
+ public String getValue(int i)
+ {
+ return attrs.getValue(i);
+ }
+
+ public String getValue(String name)
+ {
+ return attrs.getValue(name);
+ }
+ }
+}
diff --git a/src/java/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java b/src/java/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java
new file mode 100644
index 0000000..780adfe
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/jaxp/SAXFactoryImpl.java
@@ -0,0 +1,114 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup.jaxp;
+
+import java.util.*;
+import javax.xml.parsers.*;
+
+import org.xml.sax.*;
+
+/**
+ * This is a simple implementation of JAXP {@link SAXParserFactory},
+ * to allow easier integration of TagSoup with the default JDK
+ * xml processing stack.
+ *
+ * @author Tatu Saloranta (cowtowncoder@yahoo.com)
+ */
+public class SAXFactoryImpl
+ extends SAXParserFactory
+{
+ /**
+ * The easiest way to test validity of features to set is to use
+ * a prototype object. Currently this is actually not a real prototype,
+ * in the sense that the configuration is actually passed separately
+ * (as opposed to instantiating new readers from this prototype), but
+ * this could be changed in future, if TagSoup parser object allowed
+ * cloning.
+ */
+ private SAXParserImpl prototypeParser = null;
+
+ /**
+ * This Map contains explicitly set features that can be succesfully
+ * set for XMLReader instances. Temporary storage is needed due to
+ * JAXP design: multiple readers can be instantiated from a single
+ * factory, and settings can be changed between instantiations.
+ *<p>
+ * Note that we wouldn't need this map if we could create instances
+ * directly using the prototype instance.
+ */
+ private HashMap features = null;
+
+ public SAXFactoryImpl()
+ {
+ super();
+ }
+
+ // // // JAXP API implementation:
+
+ /**
+ * Creates a new instance of <code>SAXParser</code> using the currently
+ * configured factory parameters.
+ */
+ public SAXParser newSAXParser()
+ throws ParserConfigurationException
+ {
+ try {
+ return SAXParserImpl.newInstance(features);
+ } catch (SAXException se) {
+ // Translate to ParserConfigurationException
+ throw new ParserConfigurationException(se.getMessage());
+ }
+ }
+
+ /**
+ * Defines that the specified feature is to enabled/disabled (as
+ * per second argument) on reader instances created by this
+ * factory.
+ */
+ public void setFeature(String name, boolean value)
+ throws ParserConfigurationException, SAXNotRecognizedException,
+ SAXNotSupportedException
+ {
+ // First, let's see if it's a valid call
+ getPrototype().setFeature(name, value);
+
+ // If not, exception was thrown: so we are good now:
+ if (features == null) {
+ // Let's retain the ordering as well
+ features = new LinkedHashMap();
+ }
+ features.put(name, value ? Boolean.TRUE : Boolean.FALSE);
+ }
+
+ /**
+ * Returns whether the specified property will be enabled or disabled
+ * on reader instances constructed by this factory.
+ */
+ public boolean getFeature(String name)
+ throws ParserConfigurationException, SAXNotRecognizedException,
+ SAXNotSupportedException
+ {
+ return getPrototype().getFeature(name);
+ }
+
+ // // // Internal methods
+
+ private SAXParserImpl getPrototype()
+ {
+ if (prototypeParser == null) {
+ prototypeParser = new SAXParserImpl();
+ }
+ return prototypeParser;
+ }
+}
diff --git a/src/java/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java b/src/java/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java
new file mode 100644
index 0000000..75f3df4
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/jaxp/SAXParserImpl.java
@@ -0,0 +1,113 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+package org.ccil.cowan.tagsoup.jaxp;
+
+import java.io.*;
+import java.util.*;
+import javax.xml.parsers.*;
+
+import org.xml.sax.*;
+
+import org.ccil.cowan.tagsoup.Parser;
+
+/**
+ * This is a simple implementation of JAXP {@link SAXParser},
+ * to allow easier integration of TagSoup with the default JDK
+ * xml processing stack.
+ *
+ * @author Tatu Saloranta (cowtowncoder@yahoo.com)
+ */
+public class SAXParserImpl
+ extends SAXParser
+{
+ final org.ccil.cowan.tagsoup.Parser parser;
+
+ protected SAXParserImpl() // used by factory, for prototypes
+ {
+ super();
+ parser = new org.ccil.cowan.tagsoup.Parser();
+ }
+
+ public static SAXParserImpl newInstance(Map features)
+ throws SAXException
+ {
+ SAXParserImpl parser = new SAXParserImpl();
+ if (features != null) {
+ Iterator it = features.entrySet().iterator();
+ while (it.hasNext()) {
+ Map.Entry entry = (Map.Entry) it.next();
+ parser.setFeature((String) entry.getKey(), ((Boolean) entry.getValue()).booleanValue());
+ }
+ }
+ return parser;
+ }
+
+ // // // JAXP API implementation:
+
+ /**
+ * To support SAX1 interface, we'll need to use an adapter.
+ * @deprecated
+ */
+ public org.xml.sax.Parser getParser()
+ throws SAXException
+ {
+ return new SAX1ParserAdapter(parser);
+ }
+
+ public XMLReader getXMLReader() { return parser; }
+
+ public boolean isNamespaceAware()
+ {
+ try {
+ return parser.getFeature(Parser.namespacesFeature);
+ } catch (SAXException sex) { // should never happen... so:
+ throw new RuntimeException(sex.getMessage());
+ }
+ }
+
+ public boolean isValidating()
+ {
+ try {
+ return parser.getFeature(Parser.validationFeature);
+ } catch (SAXException sex) { // should never happen... so:
+ throw new RuntimeException(sex.getMessage());
+ }
+ }
+
+ public void setProperty(String name, Object value)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ parser.setProperty(name, value);
+ }
+
+ public Object getProperty(String name)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ return parser.getProperty(name);
+ }
+
+ // // // Additional convenience methods
+
+ public void setFeature(String name, boolean value)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ parser.setFeature(name, value);
+ }
+
+ public boolean getFeature(String name)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ return parser.getFeature(name);
+ }
+}
diff --git a/src/templates/org/ccil/cowan/tagsoup/HTMLModels.java b/src/templates/org/ccil/cowan/tagsoup/HTMLModels.java
new file mode 100644
index 0000000..e982bcf
--- /dev/null
+++ b/src/templates/org/ccil/cowan/tagsoup/HTMLModels.java
@@ -0,0 +1,31 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Defines models for HTMLSchema
+
+/**
+This interface contains generated constants representing HTML content
+models. Logically, it is part of HTMLSchema, but it is more
+convenient to generate the constants into a separate interface.
+*/
+
+package org.ccil.cowan.tagsoup;
+public interface HTMLModels {
+
+ // Start of model definitions
+ @@MODEL_DEFINITIONS@@
+
+ // End of model definitions
+
+ }
diff --git a/src/templates/org/ccil/cowan/tagsoup/HTMLScanner.java b/src/templates/org/ccil/cowan/tagsoup/HTMLScanner.java
new file mode 100644
index 0000000..568493a
--- /dev/null
+++ b/src/templates/org/ccil/cowan/tagsoup/HTMLScanner.java
@@ -0,0 +1,427 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+package org.ccil.cowan.tagsoup;
+import java.io.*;
+import org.xml.sax.SAXException;
+import org.xml.sax.Locator;
+
+/**
+This class implements a table-driven scanner for HTML, allowing for lots of
+defects. It implements the Scanner interface, which accepts a Reader
+object to fetch characters from and a ScanHandler object to report lexical
+events to.
+*/
+
+public class HTMLScanner implements Scanner, Locator {
+
+ // Start of state table
+ @@STATE_TABLE@@
+ // End of state table
+
+ private String thePublicid; // Locator state
+ private String theSystemid;
+ private int theLastLine;
+ private int theLastColumn;
+ private int theCurrentLine;
+ private int theCurrentColumn;
+
+ int theState; // Current state
+ int theNextState; // Next state
+ char[] theOutputBuffer = new char[200]; // Output buffer
+ int theSize; // Current buffer size
+ int[] theWinMap = { // Windows chars map
+ 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178};
+
+ // Compensate for bug in PushbackReader that allows
+ // pushing back EOF.
+ private void unread(PushbackReader r, int c) throws IOException {
+ if (c != -1) r.unread(c);
+ }
+
+ // Locator implementation
+
+ public int getLineNumber() {
+ return theLastLine;
+ }
+ public int getColumnNumber() {
+ return theLastColumn;
+ }
+ public String getPublicId() {
+ return thePublicid;
+ }
+ public String getSystemId() {
+ return theSystemid;
+ }
+
+
+ // Scanner implementation
+
+ /**
+ Reset document locator, supplying systemid and publicid.
+ @param systemid System id
+ @param publicid Public id
+ */
+
+ public void resetDocumentLocator(String publicid, String systemid) {
+ thePublicid = publicid;
+ theSystemid = systemid;
+ theLastLine = theLastColumn = theCurrentLine = theCurrentColumn = 0;
+ }
+
+ /**
+ Scan HTML source, reporting lexical events.
+ @param r0 Reader that provides characters
+ @param h ScanHandler that accepts lexical events.
+ */
+
+ public void scan(Reader r0, ScanHandler h) throws IOException, SAXException {
+ theState = S_PCDATA;
+ PushbackReader r;
+ if (r0 instanceof PushbackReader) {
+ r = (PushbackReader)r0;
+ }
+ else if (r0 instanceof BufferedReader) {
+ r = new PushbackReader(r0);
+ }
+ else {
+ r = new PushbackReader(new BufferedReader(r0));
+ }
+
+ int firstChar = r.read(); // Remove any leading BOM
+ if (firstChar != '\uFEFF') unread(r, firstChar);
+
+ while (theState != S_DONE) {
+ int ch = r.read();
+
+ // Process control characters
+ if (ch >= 0x80 && ch <= 0x9F) ch = theWinMap[ch-0x80];
+
+ if (ch == '\r') {
+ ch = r.read(); // expect LF next
+ if (ch != '\n') {
+ unread(r, ch); // nope
+ ch = '\n';
+ }
+ }
+
+ if (ch == '\n') {
+ theCurrentLine++;
+ theCurrentColumn = 0;
+ }
+ else {
+ theCurrentColumn++;
+ }
+
+ if (!(ch >= 0x20 || ch == '\n' || ch == '\t' || ch == -1)) continue;
+
+ // Search state table
+ int action = 0;
+ for (int i = 0; i < statetable.length; i += 4) {
+ if (theState != statetable[i]) {
+ if (action != 0) break;
+ continue;
+ }
+ if (statetable[i+1] == 0) {
+ action = statetable[i+2];
+ theNextState = statetable[i+3];
+ }
+ else if (statetable[i+1] == ch) {
+ action = statetable[i+2];
+ theNextState = statetable[i+3];
+ break;
+ }
+ }
+// System.err.println("In " + debug_statenames[theState] + " got " + nicechar(ch) + " doing " + debug_actionnames[action] + " then " + debug_statenames[theNextState]);
+ switch (action) {
+ case 0:
+ throw new Error(
+"HTMLScanner can't cope with " + Integer.toString(ch) + " in state " +
+Integer.toString(theState));
+ case A_ADUP:
+ h.adup(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_ADUP_SAVE:
+ h.adup(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ save(ch, h);
+ break;
+ case A_ADUP_STAGC:
+ h.adup(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.stagc(theOutputBuffer, 0, theSize);
+ break;
+ case A_ANAME:
+ h.aname(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_ANAME_ADUP:
+ h.aname(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.adup(theOutputBuffer, 0, theSize);
+ break;
+ case A_ANAME_ADUP_STAGC:
+ h.aname(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.adup(theOutputBuffer, 0, theSize);
+ h.stagc(theOutputBuffer, 0, theSize);
+ break;
+ case A_AVAL:
+ h.aval(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_AVAL_STAGC:
+ h.aval(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.stagc(theOutputBuffer, 0, theSize);
+ break;
+ case A_CDATA:
+ mark();
+ // suppress the final "]]" in the buffer
+ if (theSize > 1) theSize -= 2;
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_ENTITY_START:
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ save(ch, h);
+ break;
+ case A_ENTITY:
+ mark();
+ char ch1 = (char)ch;
+// System.out.println("Got " + ch1 + " in state " + ((theState == S_ENT) ? "S_ENT" : ((theState == S_NCR) ? "S_NCR" : "UNK")));
+ if (theState == S_ENT && ch1 == '#') {
+ theNextState = S_NCR;
+ save(ch, h);
+ break;
+ }
+ else if (theState == S_NCR && (ch1 == 'x' || ch1 == 'X')) {
+ theNextState = S_XNCR;
+ save(ch, h);
+ break;
+ }
+ else if (theState == S_ENT && Character.isLetterOrDigit(ch1)) {
+ save(ch, h);
+ break;
+ }
+ else if (theState == S_NCR && Character.isDigit(ch1)) {
+ save(ch, h);
+ break;
+ }
+ else if (theState == S_XNCR && (Character.isDigit(ch1) || "abcdefABCDEF".indexOf(ch1) != -1)) {
+ save(ch, h);
+ break;
+ }
+
+ // The whole entity reference has been collected
+// System.err.println("%%" + new String(theOutputBuffer, 0, theSize));
+ h.entity(theOutputBuffer, 1, theSize - 1);
+ int ent = h.getEntity();
+// System.err.println("%% value = " + ent);
+ if (ent != 0) {
+ theSize = 0;
+ if (ent >= 0x80 && ent <= 0x9F) {
+ ent = theWinMap[ent-0x80];
+ }
+ if (ent < 0x20) {
+ // Control becomes space
+ ent = 0x20;
+ }
+ else if (ent >= 0xD800 && ent <= 0xDFFF) {
+ // Surrogates get dropped
+ ent = 0;
+ }
+ else if (ent <= 0xFFFF) {
+ // BMP character
+ save(ent, h);
+ }
+ else {
+ // Astral converted to two surrogates
+ ent -= 0x10000;
+ save((ent>>10) + 0xD800, h);
+ save((ent&0x3FF) + 0xDC00, h);
+ }
+ if (ch != ';') {
+ unread(r, ch);
+ theCurrentColumn--;
+ }
+ }
+ else {
+ unread(r, ch);
+ theCurrentColumn--;
+ }
+ theNextState = S_PCDATA;
+ break;
+ case A_ETAG:
+ h.etag(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_DECL:
+ h.decl(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_GI:
+ h.gi(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_GI_STAGC:
+ h.gi(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.stagc(theOutputBuffer, 0, theSize);
+ break;
+ case A_LT:
+ mark();
+ save('<', h);
+ save(ch, h);
+ break;
+ case A_LT_PCDATA:
+ mark();
+ save('<', h);
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_PCDATA:
+ mark();
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_CMNT:
+ mark();
+ h.cmnt(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_MINUS3:
+ save('-', h);
+ save(' ', h);
+ break;
+ case A_MINUS2:
+ save('-', h);
+ save(' ', h);
+ // fall through into A_MINUS
+ case A_MINUS:
+ save('-', h);
+ save(ch, h);
+ break;
+ case A_PI:
+ mark();
+ h.pi(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_PITARGET:
+ h.pitarget(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_PITARGET_PI:
+ h.pitarget(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.pi(theOutputBuffer, 0, theSize);
+ break;
+ case A_SAVE:
+ save(ch, h);
+ break;
+ case A_SKIP:
+ break;
+ case A_SP:
+ save(' ', h);
+ break;
+ case A_STAGC:
+ h.stagc(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_EMPTYTAG:
+ mark();
+// System.err.println("%%% Empty tag seen");
+ if (theSize > 0) h.gi(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.stage(theOutputBuffer, 0, theSize);
+ break;
+ case A_UNGET:
+ unread(r, ch);
+ theCurrentColumn--;
+ break;
+ case A_UNSAVE_PCDATA:
+ if (theSize > 0) theSize--;
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ default:
+ throw new Error("Can't process state " + action);
+ }
+ theState = theNextState;
+ }
+ h.eof(theOutputBuffer, 0, 0);
+ }
+
+ /**
+ * Mark the current scan position as a "point of interest" - start of a tag,
+ * cdata, processing instruction etc.
+ */
+
+ private void mark() {
+ theLastColumn = theCurrentColumn;
+ theLastLine = theCurrentLine;
+ }
+
+ /**
+ A callback for the ScanHandler that allows it to force
+ the lexer state to CDATA content (no markup is recognized except
+ the end of element.
+ */
+
+ public void startCDATA() { theNextState = S_CDATA; }
+
+ private void save(int ch, ScanHandler h) throws IOException, SAXException {
+ if (theSize >= theOutputBuffer.length - 20) {
+ if (theState == S_PCDATA || theState == S_CDATA) {
+ // Return a buffer-sized chunk of PCDATA
+ h.pcdata(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ }
+ else {
+ // Grow the buffer size
+ char[] newOutputBuffer = new char[theOutputBuffer.length * 2];
+ System.arraycopy(theOutputBuffer, 0, newOutputBuffer, 0, theSize+1);
+ theOutputBuffer = newOutputBuffer;
+ }
+ }
+ theOutputBuffer[theSize++] = (char)ch;
+ }
+
+ /**
+ Test procedure. Reads HTML from the standard input and writes
+ PYX to the standard output.
+ */
+
+ public static void main(String[] argv) throws IOException, SAXException {
+ Scanner s = new HTMLScanner();
+ Reader r = new InputStreamReader(System.in, "UTF-8");
+ Writer w = new OutputStreamWriter(System.out, "UTF-8");
+ PYXWriter pw = new PYXWriter(w);
+ s.scan(r, pw);
+ w.close();
+ }
+
+
+ private static String nicechar(int in) {
+ if (in == '\n') return "\\n";
+ if (in < 32) return "0x"+Integer.toHexString(in);
+ return "'"+((char)in)+"'";
+ }
+
+ }
diff --git a/src/templates/org/ccil/cowan/tagsoup/HTMLSchema.java b/src/templates/org/ccil/cowan/tagsoup/HTMLSchema.java
new file mode 100644
index 0000000..8def657
--- /dev/null
+++ b/src/templates/org/ccil/cowan/tagsoup/HTMLSchema.java
@@ -0,0 +1,38 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+/**
+This class provides a Schema that has been preinitialized with HTML
+elements, attributes, and character entity declarations. All the declarations
+normally provided with HTML 4.01 are given, plus some that are IE-specific
+and NS4-specific. Attribute declarations of type CDATA with no default
+value are not included.
+*/
+
+package org.ccil.cowan.tagsoup;
+public class HTMLSchema extends Schema implements HTMLModels {
+
+ /**
+ Returns a newly constructed HTMLSchema object independent of
+ any existing ones.
+ */
+
+ public HTMLSchema() {
+ // Start of Schema calls
+ @@SCHEMA_CALLS@@
+ // End of Schema calls
+ }
+
+
+ }
diff --git a/stml/stml.rnc b/stml/stml.rnc
new file mode 100644
index 0000000..b767640
--- /dev/null
+++ b/stml/stml.rnc
@@ -0,0 +1,49 @@
+# This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+#
+# TagSoup is licensed under the Apache License,
+# Version 2.0. You may obtain a copy of this license at
+# http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+# additional legal rights not granted by this license.
+#
+# TagSoup is distributed in the hope that it will be useful, but
+# unless required by applicable law or agreed to in writing, TagSoup
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+# OF ANY KIND, either express or implied; not even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+# State Table Markup Language
+default namespace = "http://www.ccil.org/~cowan/XML/tagsoup/stml"
+start = statetable
+
+## Root element
+statetable = element statetable {
+ attribute version {"1.0"},
+ attribute id {xsd:ID}?,
+ symbol*,
+ action+,
+ state+
+ }
+
+## A symbol specifies a non-character input to the state machine
+symbol = element symbol {
+ attribute id {xsd:ID}
+ }
+
+## An action is the name for what the state machine does
+action = element action {
+ attribute id {xsd:ID}
+ }
+
+## A state specifies an internal state of the machine
+state = element state {
+ attribute id {xsd:ID},
+ tr*
+ }
+
+## A tr specifies a state transition
+tr = element tr {
+ (attribute symbol {xsd:IDREF} |
+ attribute char {xsd:string {length = "1"}}),
+ attribute action {xsd:IDREF},
+ attribute newstate {xsd:IDREF}
+ }
diff --git a/stml/stml.xslt b/stml/stml.xslt
new file mode 100644
index 0000000..4cc6c9d
--- /dev/null
+++ b/stml/stml.xslt
@@ -0,0 +1,150 @@
+<!--
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+-->
+
+<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:stml="http://www.ccil.org/~cowan/XML/tagsoup/stml"
+ version="1.0">
+
+ <xsl:output method="text"/>
+
+ <xsl:strip-space elements="*"/>
+
+ <!-- The main template. Generates declarations for states and
+ actions, then the statetable itself, and then a comment (used for
+ manual checking) listing all the actions compactly. -->
+ <xsl:template match="stml:statetable">
+ <xsl:apply-templates select="stml:state">
+ <xsl:sort select="@id"/>
+ </xsl:apply-templates>
+
+ <xsl:apply-templates select="stml:action">
+ <xsl:sort select="@id"/>
+ </xsl:apply-templates>
+
+ <xsl:text>&#x9;private static int[] statetable = {&#xA;</xsl:text>
+ <xsl:apply-templates select="stml:state/stml:tr">
+ <xsl:sort select="../@id"/>
+ <xsl:sort select="@symbol"/>
+ <xsl:sort select="@char"/>
+ </xsl:apply-templates>
+ <xsl:text>&#xA;&#x9;};&#xA;</xsl:text>
+
+ <xsl:text>&#x9;private static final String[] debug_actionnames = { ""</xsl:text>
+ <xsl:apply-templates select="stml:action" mode="debug">
+ <xsl:sort select="@id"/>
+ </xsl:apply-templates>
+ <xsl:text>};&#xA;</xsl:text>
+
+ <xsl:text>&#x9;private static final String[] debug_statenames = { ""</xsl:text>
+ <xsl:apply-templates select="stml:state" mode="debug">
+ <xsl:sort select="@id"/>
+ </xsl:apply-templates>
+ <xsl:text>};&#xA;</xsl:text>
+
+ <xsl:text>&#xA;</xsl:text>
+ </xsl:template>
+
+ <!-- Generate a single state declaration. -->
+ <xsl:template match="stml:state">
+ <xsl:text>&#x9;private static final int </xsl:text>
+ <xsl:value-of select="@id"/>
+ <xsl:text> = </xsl:text>
+ <xsl:value-of select="position()"/>
+ <xsl:text>;&#xA;</xsl:text>
+ </xsl:template>
+
+ <!-- Generate a single action declaration. -->
+ <xsl:template match="stml:action">
+ <xsl:text>&#x9;private static final int </xsl:text>
+ <xsl:value-of select="@id"/>
+ <xsl:text> = </xsl:text>
+ <xsl:value-of select="position()"/>
+ <xsl:text>;&#xA;</xsl:text>
+ </xsl:template>
+
+ <!-- Generate a single row of the statetable. -->
+ <xsl:template match="stml:tr">
+ <xsl:choose>
+ <xsl:when test="@symbol = 'EOF'">
+ <xsl:call-template name="dump-tr">
+ <xsl:with-param name="char" select="&quot;-1&quot;"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:when test="@symbol = 'LF'">
+ <xsl:call-template name="dump-tr">
+ <xsl:with-param name="char" select="&quot;'\n'&quot;"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:when test="@symbol = 'default'">
+ <xsl:call-template name="dump-tr">
+ <xsl:with-param name="char" select="&quot;0&quot;"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:when test="@char = &quot;&apos;&quot;">
+ <xsl:call-template name="dump-tr">
+ <xsl:with-param name="char" select="&quot;'\''&quot;"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:when test="@symbol = 'S'">
+ <xsl:call-template name="dump-tr">
+ <xsl:with-param name="char" select="&quot;' '&quot;"/>
+ </xsl:call-template>
+ <xsl:call-template name="dump-tr">
+ <xsl:with-param name="char" select="&quot;'\n'&quot;"/>
+ </xsl:call-template>
+ <xsl:call-template name="dump-tr">
+ <xsl:with-param name="char" select="&quot;'\t'&quot;"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:call-template name="dump-tr">
+ <xsl:with-param name="char"
+ select="concat(&quot;'&quot;, @char, &quot;'&quot;)"/>
+ </xsl:call-template>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <!-- This is a subroutine used to do the actual printing. -->
+ <xsl:template name="dump-tr">
+ <xsl:param name="char"/>
+ <xsl:text>&#x9;&#x9;</xsl:text>
+ <xsl:value-of select="../@id"/>
+ <xsl:text>, </xsl:text>
+ <xsl:value-of select="$char"/>
+ <xsl:text>, </xsl:text>
+ <xsl:value-of select="@action"/>
+ <xsl:text>, </xsl:text>
+ <xsl:value-of select="@newstate"/>
+ <xsl:text>,&#xA;</xsl:text>
+ </xsl:template>
+
+ <!-- Generate a single action name in the "Actions:" comment.
+ The mode is used to keep XSLT from confusing this with the
+ regular actions template that does the action declarations. -->
+ <xsl:template match="stml:action" mode="debug">
+ <xsl:text>, "</xsl:text>
+ <xsl:value-of select="@id"/>
+ <xsl:text>"</xsl:text>
+ </xsl:template>
+
+ <!-- Generate a single stat debug name. -->
+ <xsl:template match="stml:state" mode="debug">
+ <xsl:text>, "</xsl:text>
+ <xsl:value-of select="@id"/>
+ <xsl:text>"</xsl:text>
+ </xsl:template>
+
+</xsl:transform>
diff --git a/tagsoup.1 b/tagsoup.1
new file mode 100644
index 0000000..fba6ae4
--- /dev/null
+++ b/tagsoup.1
@@ -0,0 +1,183 @@
+\' This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+\'
+\' TagSoup is licensed under the Apache License,
+\' Version 2.0. You may obtain a copy of this license at
+\' http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+\' additional legal rights not granted by this license.
+\'
+\' TagSoup is distributed in the hope that it will be useful, but
+\' unless required by applicable law or agreed to in writing, TagSoup
+\' is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+\' OF ANY KIND, either express or implied; not even the implied warranty
+\' of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+\'
+.TH TAGSOUP "1" "January 2008" "TagSoup 1.2" "User Commands"
+.SH NAME
+tagsoup \- convert nasty, ugly HTML to clean XHTML
+.SH SYNOPSIS
+.B java -jar tagsoup-1.2
+[
+.I options
+] [
+.I files
+]
+.SH DESCRIPTION
+.\" Add any additional description here
+.PP
+Rectify arbitrary HTML into clean XHTML,
+using a tailored description of HTML.
+The output will be well-formed XML, but not necessarily
+.I valid
+XHTML.
+.PP
+.TP
+.B --files
+multiple input
+.I files
+should be processed into corresponding output files
+.TP
+.BI --encoding= encoding
+specifies the encoding of input files
+.TP
+.BI --output-encoding= encoding
+specifies the encoding of the output
+(if the encoding name begins with ``utf'',
+the output will not contain character entities;
+otherwise, all non-ASCII characters are
+represented as entities)
+.TP
+.B --html
+output rectified HTML rather than XML,
+omitting the XML declaration
+and any namespace declarations
+.TP
+.B --method=html
+output rectified HTML rather than XML
+(end-tags are omitted for empty elements,
+and no character escaping is done in
+script and style elements)
+.TP
+.B --omit-xml-declaration
+omit the XML declaration
+.TP
+.B --lexical
+output lexical features (specifically comments and any DOCTYPE declaration)
+.TP
+.B --nons
+suppress namespaces in output
+.TP
+.B --nobogons
+suppress unknown non-HTML elements in output
+.TP
+.B --nodefaults
+suppress default attribute values
+.TP
+.B --nocolons
+change explicit colons
+in element and attribute names
+to underscores
+.TP
+.B --norestart
+don't restart any restartable elements
+.TP
+.B --ignorable
+pass through ignorable whitespace
+(whitespace in element-only content)
+via SAX method handler ignorableWhitespace
+.TP
+.B --any
+treat unknown non-HTML elements as allowing any content (default)
+.TP
+.B --emptybogons
+treat unknown non-HTML elements as empty elements
+.TP
+.B --norootbogons
+don't allow unknown non-HTML elements to be root elements
+.TP
+.BI --doctype-system= system-id
+force DOCTYPE declaration to be output with specified system identifier
+.TP
+.BI --doctype-public= public-id
+force DOCTYPE declaration to be output with specified public identifier
+.TP
+.B --standalone=[yes|no]
+specify standalone pseudo-attribute in output XML declaration
+.TP
+.BI --version= version
+specify version pseudo-attribute in output XML declaration
+(does not affect actual version of XML output)
+.TP
+.B --nocdata
+treat the CDATA-content elements
+.I script
+and
+.I style
+as ordinary elements
+(mostly for testing)
+.TP
+.B --pyx
+output PYX format rather than XML
+(mostly for testing)
+.TP
+.B --pyxin
+input is PYX-format HTML
+(mostly for testing)
+.TP
+.B --reuse
+reuse the same Parser object internally
+(for testing only)
+.TP
+.B --help
+output basic help
+.TP
+.B --version
+output version number
+.PP
+.B TagSoup
+is a parser and reformatter for nasty, ugly HTML.
+Its normal processing mode is to accept HTML files on the command line,
+or from the standard input if none are given, and output them
+as clean XML
+to the standard output. The encoding is assumed to be the platform-local
+encoding on input, and is always UTF-8 on output.
+.PP
+When the
+.B --files
+option is given, each input file is processed into an output file of the
+corresponding name, with the extension changed to
+.IR xhtml .
+If the extension is already
+.IR xhtml ,
+it is changed to
+.IR xhtml_ .
+.PP
+TagSoup will repair, by whatever means necessary,
+violations of XML well-formedness. In particular, it will fix up
+malformed attribute names and supply missing attribute-value quotation marks.
+More significantly, it supplies end-tags where HTML allows them
+to be omitted, and sometimes where it doesn't. It will even supply
+start-tags where necessary; for example, if a document begins with a
+<li> tag, TagSoup will automatically prefix it with <html><body><ul>.
+.PP
+.SH BUGS
+TagSoup can be fooled by missing close quotes after attribute values, and by
+incorrect character encodings (it does not contain an encoding guesser).
+.PP
+TagSoup doesn't understand namespace declarations, which are not properly
+part of HTML. Instead, any element or attribute name beginning
+.IR foo :
+will be put into the artificial namespace
+.RI urn:x-prefix: foo .
+.PP
+For the same reasons, namespace-qualified attributes like
+xml:space
+can't be returned as default values,
+though an explicit attribute in the xml namespace
+will be returned with the proper namespace URI.
+.SH AUTHOR
+John Cowan <cowan@ccil.org>
+.SH COPYRIGHT
+Copyright \(co 2002-2008 John Cowan
+.br
+TagSoup is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/tagsoup.txt b/tagsoup.txt
new file mode 100644
index 0000000..91b5090
--- /dev/null
+++ b/tagsoup.txt
@@ -0,0 +1,160 @@
+´ This file is part of TagSoup and is Copyright 2002‐2008 by John
+Cowan. ´ ´ TagSoup is licensed under the Apache License, ´ Ver‐
+sion 2.0. You may obtain a copy of this license at ´
+http://www.apache.org/licenses/LICENSE‐2.0 . You may also have ´
+additional legal rights not granted by this license. ´ ´ TagSoup
+is distributed in the hope that it will be useful, but ´ unless
+required by applicable law or agreed to in writing, TagSoup ´ is
+distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+´ OF ANY KIND, either express or implied; not even the implied
+warranty ´ of MERCHANTABILITY or FITNESS FOR A PARTICULAR PUR‐
+TAGSOUP(1) User Commands TAGSOUP(1)
+
+
+
+POSE. ´
+
+NAME
+ tagsoup - convert nasty, ugly HTML to clean XHTML
+
+SYNOPSIS
+ java -jar tagsoup-1.2 [ options ] [ files ]
+
+DESCRIPTION
+ Rectify arbitrary HTML into clean XHTML, using a tailored description
+ of HTML. The output will be well-formed XML, but not necessarily valid
+ XHTML.
+
+
+ --files
+ multiple input files should be processed into corresponding out‐
+ put files
+
+ --encoding=encoding
+ specifies the encoding of input files
+
+ --output-encoding=encoding
+ specifies the encoding of the output (if the encoding name
+ begins with ‘‘utf’’, the output will not contain character enti‐
+ ties; otherwise, all non-ASCII characters are represented as
+ entities)
+
+ --html output rectified HTML rather than XML, omitting the XML declara‐
+ tion and any namespace declarations
+
+ --method=html
+ output rectified HTML rather than XML (end-tags are omitted for
+ empty elements, and no character escaping is done in script and
+ style elements)
+
+ --omit-xml-declaration
+ omit the XML declaration
+
+ --lexical
+ output lexical features (specifically comments and any DOCTYPE
+ declaration)
+
+ --nons suppress namespaces in output
+
+ --nobogons
+ suppress unknown non-HTML elements in output
+
+ --nodefaults
+ suppress default attribute values
+
+ --nocolons
+ change explicit colons in element and attribute names to under‐
+ scores
+
+ --norestart
+ don’t restart any restartable elements
+
+ --ignorable
+ pass through ignorable whitespace (whitespace in element-only
+ content) via SAX method handler ignorableWhitespace
+
+ --any treat unknown non-HTML elements as allowing any content
+ (default)
+
+ --emptybogons
+ treat unknown non-HTML elements as empty elements
+
+ --norootbogons
+ don’t allow unknown non-HTML elements to be root elements
+
+ --doctype-system=system-id
+ force DOCTYPE declaration to be output with specified system
+ identifier
+
+ --doctype-public=public-id
+ force DOCTYPE declaration to be output with specified public
+ identifier
+
+ --standalone=[yes|no]
+ specify standalone pseudo-attribute in output XML declaration
+
+ --version=version
+ specify version pseudo-attribute in output XML declaration (does
+ not affect actual version of XML output)
+
+ --nocdata
+ treat the CDATA-content elements script and style as ordinary
+ elements (mostly for testing)
+
+ --pyx output PYX format rather than XML (mostly for testing)
+
+ --pyxin
+ input is PYX-format HTML (mostly for testing)
+
+ --reuse
+ reuse the same Parser object internally (for testing only)
+
+ --help output basic help
+
+ --version
+ output version number
+
+ TagSoup is a parser and reformatter for nasty, ugly HTML. Its normal
+ processing mode is to accept HTML files on the command line, or from
+ the standard input if none are given, and output them as clean XML to
+ the standard output. The encoding is assumed to be the platform-local
+ encoding on input, and is always UTF-8 on output.
+
+ When the --files option is given, each input file is processed into an
+ output file of the corresponding name, with the extension changed to
+ xhtml. If the extension is already xhtml, it is changed to xhtml_.
+
+ TagSoup will repair, by whatever means necessary, violations of XML
+ well-formedness. In particular, it will fix up malformed attribute
+ names and supply missing attribute-value quotation marks. More signif‐
+ icantly, it supplies end-tags where HTML allows them to be omitted, and
+ sometimes where it doesn’t. It will even supply start-tags where nec‐
+ essary; for example, if a document begins with a <li> tag, TagSoup will
+ automatically prefix it with <html><body><ul>.
+
+
+BUGS
+ TagSoup can be fooled by missing close quotes after attribute values,
+ and by incorrect character encodings (it does not contain an encoding
+ guesser).
+
+ TagSoup doesn’t understand namespace declarations, which are not prop‐
+ erly part of HTML. Instead, any element or attribute name beginning
+ foo: will be put into the artificial namespace urn:x-prefix:foo.
+
+ For the same reasons, namespace-qualified attributes like xml:space
+ can’t be returned as default values, though an explicit attribute in
+ the xml namespace will be returned with the proper namespace URI.
+
+AUTHOR
+ John Cowan <cowan@ccil.org>
+
+COPYRIGHT
+ Copyright © 2002-2008 John Cowan
+ TagSoup is free software; see the source for copying conditions. There
+ is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICU‐
+ LAR PURPOSE.
+
+
+
+TagSoup 1.2 January 2008 TAGSOUP(1)
diff --git a/tssl/tssl-models.xslt b/tssl/tssl-models.xslt
new file mode 100644
index 0000000..6a983b4
--- /dev/null
+++ b/tssl/tssl-models.xslt
@@ -0,0 +1,47 @@
+<!-- Generate Java code to be inserted into HTMLModels.java. -->
+
+<!--
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+-->
+
+<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:tssl="http://www.ccil.org/~cowan/XML/tagsoup/tssl"
+ version="1.0">
+
+ <xsl:output method="text"/>
+
+ <xsl:strip-space elements="*"/>
+
+ <!-- The main template. We are going to generate Java constant
+ definitions for the groups in the file. -->
+ <xsl:template match="tssl:schema">
+ <xsl:apply-templates select="tssl:group">
+ <xsl:sort select="@id"/>
+ </xsl:apply-templates>
+ </xsl:template>
+
+ <!-- Generate a declaration for a single group. -->
+ <xsl:template match="tssl:group" name="tssl:group">
+ <xsl:param name="id" select="@id"/>
+ <xsl:param name="number" select="position()"/>
+ <xsl:text>&#x9;public static final int </xsl:text>
+ <xsl:value-of select="$id"/>
+ <xsl:text> = 1 &lt;&lt; </xsl:text>
+ <xsl:value-of select="$number"/>
+ <xsl:text>;&#xA;</xsl:text>
+ </xsl:template>
+
+</xsl:transform>
diff --git a/tssl/tssl-validate.xslt b/tssl/tssl-validate.xslt
new file mode 100644
index 0000000..81faab5
--- /dev/null
+++ b/tssl/tssl-validate.xslt
@@ -0,0 +1,40 @@
+<!-- Generate complaints if the schema is invalid in some way. -->
+
+<!--
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+-->
+
+<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:tssl="http://www.ccil.org/~cowan/XML/tagsoup/tssl"
+ version="1.0">
+
+ <xsl:output method="text"/>
+
+ <xsl:strip-space elements="*"/>
+
+ <!-- Generates a report if an element does not belong to at least
+ one of the groups that its parent element contains. -->
+ <xsl:template match="tssl:element/tssl:element">
+ <xsl:if test="not(tssl:memberOfAny) and not(tssl:memberOf/@group = ../tssl:contains/@group)">
+ <xsl:value-of select="@name"/>
+ <xsl:text> is not in the content model of </xsl:text>
+ <xsl:value-of select="../@name"/>
+ <xsl:text>&#xA;</xsl:text>
+ </xsl:if>
+ <xsl:apply-templates/>
+ </xsl:template>
+
+
+
+</xsl:transform>
diff --git a/tssl/tssl.rnc b/tssl/tssl.rnc
new file mode 100644
index 0000000..4443073
--- /dev/null
+++ b/tssl/tssl.rnc
@@ -0,0 +1,75 @@
+# This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+#
+# TagSoup is licensed under the Apache License,
+# Version 2.0. You may obtain a copy of this license at
+# http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+# additional legal rights not granted by this license.
+#
+# TagSoup is distributed in the hope that it will be useful, but
+# unless required by applicable law or agreed to in writing, TagSoup
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+# OF ANY KIND, either express or implied; not even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+default namespace = "http://www.ccil.org/~cowan/XML/tagsoup/tssl"
+
+start = schema
+
+## A TSSL schema has a namespace, which is applied to the elements by default,
+## and a name, which is used solely for documentation. It contains entity
+## definitions, a root element, and attributes to be applied to all elements.
+schema = element schema {
+ attribute ns {xsd:anyURI},
+ attribute prefix {xsd:NCName},
+ attribute name {xsd:NCName},
+ attribute version {"1.0"},
+ entity*,
+ group+,
+ \element,
+ \attribute*
+ }
+
+## An entity has a name and a Unicode codepoint in hex.
+entity = element entity {
+ attribute name {xsd:NCName},
+ attribute codepoint {xsd:string}
+ }
+
+## A group is a named group of elements. Every element belongs to one
+## or more groups and has a content model consisting of one or more groups.
+group = element group {
+ attribute id {xsd:ID}
+ }
+
+## An element has a name and a namespace (currently ignored).
+## It can have any of several types of content and can be restartable
+## or not. The element is also a member of one or more model groups
+## (with arbitrary names), and can contain as children zero or more
+## model groups. Elements also have attributes and "natural" children.
+\element = element element {
+ attribute ns {xsd:anyURI}?,
+ attribute name {xsd:NCName},
+ attribute type {type},
+ attribute closeMode { "unclosable" | "restartable" }?,
+ attribute text-parent { "true" | "false" }?,
+ (element memberOf { attribute group {xsd:IDREF}}+ |
+ element isRoot { empty} |
+ element memberOfAny { empty }),
+ element contains { attribute group {xsd:IDREF}}*,
+ \attribute*,
+ \element*
+ }
+
+## Here are the attribute types:
+
+type = "element" | "any" | "empty" | "mixed" | "string" | "cdata"
+
+## An attribute has a name and a namespace (currently not supported).
+## It also has an optional type and an optional default value.
+\attribute = element attribute {
+ attribute ns {xsd:anyURI}?,
+ attribute name {xsd:NCName},
+ attribute type {"ID" | "IDREF" | "IDREFS" | "NMTOKEN" | "BOOLEAN"}?,
+ attribute default {xsd:string}?
+ }
+
diff --git a/tssl/tssl.xslt b/tssl/tssl.xslt
new file mode 100644
index 0000000..a25be10
--- /dev/null
+++ b/tssl/tssl.xslt
@@ -0,0 +1,220 @@
+<!-- Generate Java code to be inserted into HTMLSchema.java. -->
+
+<!--
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+-->
+
+<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:tssl="http://www.ccil.org/~cowan/XML/tagsoup/tssl"
+ version="1.0">
+
+ <xsl:output method="text"/>
+
+ <xsl:strip-space elements="*"/>
+
+ <!-- The main template. This generates calls on the Schema routines
+ setURI(), setPrefix(), elementType(), parent(), attribute(),
+ and entity() in that order. Several special cases are
+ handled by template calls. -->
+ <xsl:template match="tssl:schema">
+ <!-- setURI() -->
+ <xsl:text>&#x9;&#x9;setURI("</xsl:text>
+ <xsl:value-of select="@ns"/>
+ <xsl:text>");&#xA;</xsl:text>
+ <!-- setPrefix() -->
+ <xsl:text>&#x9;&#x9;setPrefix("</xsl:text>
+ <xsl:value-of select="@prefix"/>
+ <xsl:text>");&#xA;</xsl:text>
+ <!-- elementType() special cases -->
+ <xsl:text>&#x9;&#x9;elementType("&lt;pcdata>", M_EMPTY, M_PCDATA, 0);&#xA;</xsl:text>
+ <xsl:text>&#x9;&#x9;elementType("&lt;root>", </xsl:text>
+ <xsl:apply-templates select="tssl:element/tssl:isRoot"/>
+ <xsl:text>, M_EMPTY, 0);&#xA;</xsl:text>
+ <!-- elementType() main loop -->
+ <xsl:apply-templates select="//tssl:element">
+ <xsl:sort select="@name"/>
+ </xsl:apply-templates>
+ <!-- parent() special cases -->
+ <xsl:call-template name="parent">
+ <xsl:with-param name="elem" select="'&lt;pcdata>'"/>
+ <xsl:with-param name="parent" select="//tssl:element[@text-parent='true']/@name"/>
+ </xsl:call-template>
+ <xsl:call-template name="parent">
+ <xsl:with-param name="elem" select="tssl:element/@name"/>
+ <xsl:with-param name="parent" select="'&lt;root>'"/>
+ </xsl:call-template>
+ <!-- parent() main loop -->
+ <xsl:apply-templates select="//tssl:element/tssl:element" mode="parent">
+ <xsl:sort select="@name"/>
+ </xsl:apply-templates>
+ <xsl:apply-templates select="//tssl:element/tssl:attribute">
+ <xsl:sort select="../@name"/>
+ <xsl:sort select="@name"/>
+ </xsl:apply-templates>
+ <!-- attribute() main loop -->
+ <xsl:apply-templates select="tssl:attribute">
+ <xsl:sort select="@name"/>
+ </xsl:apply-templates>
+ <!-- entity() main loop -->
+ <xsl:apply-templates select="tssl:entity">
+ <xsl:sort select="@name"/>
+ </xsl:apply-templates>
+ </xsl:template>
+
+ <!-- Generates a single call to elementType(). -->
+ <xsl:template match="tssl:element">
+ <xsl:text>&#x9;&#x9;elementType("</xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text>", </xsl:text>
+ <xsl:choose>
+ <xsl:when test="@type = 'element'">
+ <xsl:apply-templates select="tssl:contains"/>
+ </xsl:when>
+ <xsl:when test="@type = 'string'">
+ <xsl:text>M_PCDATA</xsl:text>
+ </xsl:when>
+ <xsl:when test="@type = 'mixed'">
+ <xsl:text>M_PCDATA|</xsl:text>
+ <xsl:apply-templates select="tssl:contains"/>
+ </xsl:when>
+ <xsl:when test="@type = 'empty'">
+ <xsl:text>M_EMPTY</xsl:text>
+ </xsl:when>
+ <xsl:when test="@type = 'any'">
+ <xsl:text>M_ANY</xsl:text>
+ </xsl:when>
+ <xsl:when test="@type = 'cdata'">
+ <xsl:text>M_PCDATA</xsl:text>
+ </xsl:when>
+ </xsl:choose>
+ <xsl:text>, </xsl:text>
+ <xsl:apply-templates select="tssl:memberOf"/>
+ <xsl:apply-templates select="tssl:memberOfAny"/>
+ <xsl:apply-templates select="tssl:isRoot"/>
+ <xsl:text>, </xsl:text>
+ <xsl:choose>
+ <xsl:when test="@type = 'cdata'">
+ <xsl:text>F_CDATA</xsl:text>
+ </xsl:when>
+ <xsl:when test="@closeMode = 'restartable'">
+ <xsl:text>F_RESTART</xsl:text>
+ </xsl:when>
+ <xsl:when test="@closeMode = 'unclosable'">
+ <xsl:text>F_NOFORCE</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>0</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ <xsl:text>);&#xA;</xsl:text>
+ </xsl:template>
+
+ <!-- Applied from tssl:element to generate the contains argument. -->
+ <xsl:template match="tssl:contains">
+ <xsl:value-of select="@group"/>
+ <xsl:if test="position() != last()">
+ <xsl:text>|</xsl:text>
+ </xsl:if>
+ </xsl:template>
+
+ <!-- Applied from tssl:element to generate the memberOf argument. -->
+ <xsl:template match="tssl:memberOf">
+ <xsl:value-of select="@group"/>
+ <xsl:if test="position() != last()">
+ <xsl:text>|</xsl:text>
+ </xsl:if>
+ </xsl:template>
+
+ <!-- Applied from tssl:element to handle memberOfAny. -->
+ <xsl:template match="tssl:memberOfAny">
+ <xsl:text>M_ANY &amp; ~M_ROOT</xsl:text>
+ </xsl:template>
+
+ <!-- Applied from tssl:element to handle isRoot. -->
+ <xsl:template match="tssl:isRoot">
+ <xsl:text>M_ROOT</xsl:text>
+ </xsl:template>
+
+ <!-- Generates a single call to parent(). The mode is used to prevent XSLT
+ from getting confused and generating elementType calls instead. -->
+ <xsl:template match="tssl:element/tssl:element" name="parent" mode="parent">
+ <xsl:param name="elem" select="@name"/>
+ <xsl:param name="parent" select="../@name"/>
+ <xsl:text>&#x9;&#x9;parent("</xsl:text>
+ <xsl:value-of select="$elem"/>
+ <xsl:text>", "</xsl:text>
+ <xsl:value-of select="$parent"/>
+ <xsl:text>");&#xA;</xsl:text>
+ </xsl:template>
+
+ <!-- Generates a single call to attribute(). -->
+ <xsl:template match="tssl:element/tssl:attribute" name="tssl:attribute">
+ <xsl:param name="elem" select="../@name"/>
+ <xsl:param name="attr" select="@name"/>
+ <xsl:param name="type" select="@type"/>
+ <xsl:param name="default" select="@default"/>
+ <xsl:text>&#x9;&#x9;attribute("</xsl:text>
+ <xsl:value-of select="$elem"/>
+ <xsl:text>", "</xsl:text>
+ <xsl:value-of select="$attr"/>
+ <xsl:text>", "</xsl:text>
+ <xsl:choose>
+ <xsl:when test="$type">
+ <xsl:value-of select="$type"/>
+ </xsl:when>
+ <xsl:when test="not($type)">
+ <xsl:text>CDATA</xsl:text>
+ </xsl:when>
+ </xsl:choose>
+ <xsl:text>", </xsl:text>
+ <xsl:choose>
+ <xsl:when test="$default">
+ <xsl:text>"</xsl:text>
+ <xsl:value-of select="$default"/>
+ <xsl:text>"</xsl:text>
+ </xsl:when>
+ <xsl:when test="not($default)">
+ <xsl:text>null</xsl:text>
+ </xsl:when>
+ </xsl:choose>
+ <xsl:text>);&#xA;</xsl:text>
+ </xsl:template>
+
+ <!-- Generates calls to attribute() (using the above template)
+ based on the global attribute definitions. -->
+ <xsl:template match="tssl:schema/tssl:attribute">
+ <xsl:variable name="attr" select="@name"/>
+ <xsl:variable name="type" select="@type"/>
+ <xsl:variable name="default" select="@default"/>
+ <xsl:for-each select="//tssl:element">
+ <xsl:sort select="@name"/>
+ <xsl:call-template name="tssl:attribute">
+ <xsl:with-param name="elem" select="@name"/>
+ <xsl:with-param name="attr" select="$attr"/>
+ <xsl:with-param name="type" select="$type"/>
+ <xsl:with-param name="default" select="$default"/>
+ </xsl:call-template>
+ </xsl:for-each>
+ </xsl:template>
+
+ <!-- Generates a single call to entity(). -->
+ <xsl:template match="tssl:entity">
+ <xsl:text>&#x9;&#x9;entity("</xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text>", 0x</xsl:text>
+ <xsl:value-of select="@codepoint"/>
+ <xsl:text>);&#xA;</xsl:text>
+ </xsl:template>
+
+</xsl:transform>