path: root/src/java/org/ccil/cowan/tagsoup/CommandLine.java
diff options
Diffstat (limited to 'src/java/org/ccil/cowan/tagsoup/CommandLine.java')
1 files changed, 289 insertions, 0 deletions
diff --git a/src/java/org/ccil/cowan/tagsoup/CommandLine.java b/src/java/org/ccil/cowan/tagsoup/CommandLine.java
new file mode 100644
index 0000000..dd0c022
--- /dev/null
+++ b/src/java/org/ccil/cowan/tagsoup/CommandLine.java
@@ -0,0 +1,289 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// OF ANY KIND, either express or implied; not even the implied warranty
+// The TagSoup command line UI
+package org.ccil.cowan.tagsoup;
+import java.util.Hashtable;
+import java.util.Enumeration;
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+import org.xml.sax.*;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.ext.LexicalHandler;
+The stand-alone TagSoup program.
+public class CommandLine {
+ static Hashtable options = new Hashtable(); static {
+ options.put("--nocdata", Boolean.FALSE); // CDATA elements are normal
+ options.put("--files", Boolean.FALSE); // process arguments as separate files
+ options.put("--reuse", Boolean.FALSE); // reuse a single Parser
+ options.put("--nons", Boolean.FALSE); // no namespaces
+ options.put("--nobogons", Boolean.FALSE); // suppress unknown elements
+ options.put("--any", Boolean.FALSE); // unknowns have ANY content model
+ options.put("--emptybogons", Boolean.FALSE); // unknowns have EMPTY content model
+ options.put("--norootbogons", Boolean.FALSE); // unknowns can't be the root
+ options.put("--pyxin", Boolean.FALSE); // input is PYX
+ options.put("--lexical", Boolean.FALSE); // output comments
+ options.put("--pyx", Boolean.FALSE); // output is PYX
+ options.put("--html", Boolean.FALSE); // output is HTML
+ options.put("--method=", Boolean.FALSE); // output method
+ options.put("--doctype-public=", Boolean.FALSE); // override public id
+ options.put("--doctype-system=", Boolean.FALSE); // override system id
+ options.put("--output-encoding=", Boolean.FALSE); // output encoding
+ options.put("--omit-xml-declaration", Boolean.FALSE); // omit XML decl
+ options.put("--encoding=", Boolean.FALSE); // specify encoding
+ options.put("--help", Boolean.FALSE); // display help
+ options.put("--version", Boolean.FALSE); // display version
+ options.put("--nodefaults", Boolean.FALSE); // no default attrs
+ options.put("--nocolons", Boolean.FALSE); // colon to underscore
+ options.put("--norestart", Boolean.FALSE); // no restartable elements
+ options.put("--ignorable", Boolean.FALSE); // return ignorable whitespace
+ }
+ /**
+ Main method. Processes specified files or standard input.
+ **/
+ public static void main(String[] argv) throws IOException, SAXException {
+ int optind = getopts(options, argv);
+ if (hasOption(options, "--help")) {
+ doHelp();
+ return;
+ }
+ if (hasOption(options, "--version")) {
+ System.err.println("TagSoup version 1.2");
+ return;
+ }
+ if (argv.length == optind) {
+ process("", System.out);
+ }
+ else if (hasOption(options, "--files")) {
+ for (int i = optind; i < argv.length; i++) {
+ String src = argv[i];
+ String dst;
+ int j = src.lastIndexOf('.');
+ if (j == -1)
+ dst = src + ".xhtml";
+ else if (src.endsWith(".xhtml"))
+ dst = src + "_";
+ else
+ dst = src.substring(0, j) + ".xhtml";
+ System.err.println("src: " + src + " dst: " + dst);
+ OutputStream os = new FileOutputStream(dst);
+ process(src, os);
+ }
+ }
+ else {
+ for (int i = optind; i < argv.length; i++) {
+ System.err.println("src: " + argv[i]);
+ process(argv[i], System.out);
+ }
+ }
+ }
+ // Print the help message
+ private static void doHelp() {
+ System.err.print("usage: java -jar tagsoup-*.jar ");
+ System.err.print(" [ ");
+ boolean first = true;
+ for (Enumeration e = options.keys(); e.hasMoreElements(); ) {
+ if (!first) {
+ System.err.print("| ");
+ }
+ first = false;
+ String key = (String)(e.nextElement());
+ System.err.print(key);
+ if (key.endsWith("="))
+ System.err.print("?");
+ System.err.print(" ");
+ }
+ System.err.println("]*");
+ }
+ private static Parser theParser = null;
+ private static HTMLSchema theSchema = null;
+ private static String theOutputEncoding = null;
+ // Process one source onto an output stream.
+ private static void process(String src, OutputStream os)
+ throws IOException, SAXException {
+ XMLReader r;
+ if (hasOption(options, "--reuse")) {
+ if (theParser == null) theParser = new Parser();
+ r = theParser;
+ }
+ else {
+ r = new Parser();
+ }
+ theSchema = new HTMLSchema();
+ r.setProperty(Parser.schemaProperty, theSchema);
+ if (hasOption(options, "--nocdata")) {
+ r.setFeature(Parser.CDATAElementsFeature, false);
+ }
+ if (hasOption(options, "--nons") || hasOption(options, "--html")) {
+ r.setFeature(Parser.namespacesFeature, false);
+ }
+ if (hasOption(options, "--nobogons")) {
+ r.setFeature(Parser.ignoreBogonsFeature, true);
+ }
+ if (hasOption(options, "--any")) {
+ r.setFeature(Parser.bogonsEmptyFeature, false);
+ }
+ else if (hasOption(options, "--emptybogons")) {
+ r.setFeature(Parser.bogonsEmptyFeature, true);
+ }
+ if (hasOption(options, "--norootbogons")) {
+ r.setFeature(Parser.rootBogonsFeature, false);
+ }
+ if (hasOption(options, "--nodefaults")) {
+ r.setFeature(Parser.defaultAttributesFeature, false);
+ }
+ if (hasOption(options, "--nocolons")) {
+ r.setFeature(Parser.translateColonsFeature, true);
+ }
+ if (hasOption(options, "--norestart")) {
+ r.setFeature(Parser.restartElementsFeature, false);
+ }
+ if (hasOption(options, "--ignorable")) {
+ r.setFeature(Parser.ignorableWhitespaceFeature, true);
+ }
+ if (hasOption(options, "--pyxin")) {
+ r.setProperty(Parser.scannerProperty, new PYXScanner());
+ }
+ Writer w;
+ if (theOutputEncoding == null) {
+ w = new OutputStreamWriter(os);
+ }
+ else {
+ w = new OutputStreamWriter(os, theOutputEncoding);
+ }
+ ContentHandler h = chooseContentHandler(w);
+ r.setContentHandler(h);
+ if (hasOption(options, "--lexical") && h instanceof LexicalHandler) {
+ r.setProperty(Parser.lexicalHandlerProperty, h);
+ }
+ InputSource s = new InputSource();
+ if (src != "") {
+ s.setSystemId(src);
+ }
+ else {
+ s.setByteStream(System.in);
+ }
+ if (hasOption(options, "--encoding=")) {
+// System.out.println("%% Found --encoding");
+ String encoding = (String)options.get("--encoding=");
+ if (encoding != null) s.setEncoding(encoding);
+ }
+ r.parse(s);
+ }
+ // Pick a content handler to generate the desired format.
+ private static ContentHandler chooseContentHandler(Writer w) {
+ XMLWriter x;
+ if (hasOption(options, "--pyx")) {
+ return new PYXWriter(w);
+ }
+ x = new XMLWriter(w);
+ if (hasOption(options, "--html")) {
+ x.setOutputProperty(XMLWriter.METHOD, "html");
+ x.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes");
+ }
+ if (hasOption(options, "--method=")) {
+ String method = (String)options.get("--method=");
+ if (method != null) {
+ x.setOutputProperty(XMLWriter.METHOD, method);
+ }
+ }
+ if (hasOption(options, "--doctype-public=")) {
+ String doctype_public = (String)options.get("--doctype-public=");
+ if (doctype_public != null) {
+ x.setOutputProperty(XMLWriter.DOCTYPE_PUBLIC, doctype_public);
+ }
+ }
+ if (hasOption(options, "--doctype-system=")) {
+ String doctype_system = (String)options.get("--doctype-system=");
+ if (doctype_system != null) {
+ x.setOutputProperty(XMLWriter.DOCTYPE_SYSTEM, doctype_system);
+ }
+ }
+ if (hasOption(options, "--output-encoding=")) {
+ theOutputEncoding = (String)options.get("--output-encoding=");
+// System.err.println("%%%% Output encoding is " + theOutputEncoding);
+ if (theOutputEncoding != null) {
+ x.setOutputProperty(XMLWriter.ENCODING, theOutputEncoding);
+ }
+ }
+ if (hasOption(options, "--omit-xml-declaration")) {
+ x.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes");
+ }
+ x.setPrefix(theSchema.getURI(), "");
+ return x;
+ }
+ // Options processing
+ private static int getopts(Hashtable options, String[] argv) {
+ int optind;
+ for (optind = 0; optind < argv.length; optind++) {
+ String arg = argv[optind];
+ String value = null;
+ if (arg.charAt(0) != '-') break;
+ int eqsign = arg.indexOf('=');
+ if (eqsign != -1) {
+ value = arg.substring(eqsign + 1, arg.length());
+ arg = arg.substring(0, eqsign + 1);
+ }
+ if (options.containsKey(arg)) {
+ if (value == null) options.put(arg, Boolean.TRUE);
+ else options.put(arg, value);
+// System.out.println("%% Parsed [" + arg + "]=[" + value + "]");
+ }
+ else {
+ System.err.print("Unknown option ");
+ System.err.println(arg);
+ System.exit(1);
+ }
+ }
+ return optind;
+ }
+ // Return true if an option exists.
+ private static boolean hasOption(Hashtable options, String option) {
+ if (Boolean.getBoolean(option)) return true;
+ else if (options.get(option) != Boolean.FALSE) return true;
+ return false;
+ }
+ }