diff options
Diffstat (limited to 'src/java/org/ccil/cowan/tagsoup/CommandLine.java')
-rw-r--r-- | src/java/org/ccil/cowan/tagsoup/CommandLine.java | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/src/java/org/ccil/cowan/tagsoup/CommandLine.java b/src/java/org/ccil/cowan/tagsoup/CommandLine.java new file mode 100644 index 0000000..dd0c022 --- /dev/null +++ b/src/java/org/ccil/cowan/tagsoup/CommandLine.java @@ -0,0 +1,289 @@ +// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. +// +// TagSoup is licensed under the Apache License, +// Version 2.0. You may obtain a copy of this license at +// http://www.apache.org/licenses/LICENSE-2.0 . You may also have +// additional legal rights not granted by this license. +// +// TagSoup is distributed in the hope that it will be useful, but +// unless required by applicable law or agreed to in writing, TagSoup +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, either express or implied; not even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +// +// The TagSoup command line UI + +package org.ccil.cowan.tagsoup; +import java.util.Hashtable; +import java.util.Enumeration; +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import org.xml.sax.*; +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.ext.LexicalHandler; + + +/** +The stand-alone TagSoup program. +**/ +public class CommandLine { + + static Hashtable options = new Hashtable(); static { + options.put("--nocdata", Boolean.FALSE); // CDATA elements are normal + options.put("--files", Boolean.FALSE); // process arguments as separate files + options.put("--reuse", Boolean.FALSE); // reuse a single Parser + options.put("--nons", Boolean.FALSE); // no namespaces + options.put("--nobogons", Boolean.FALSE); // suppress unknown elements + options.put("--any", Boolean.FALSE); // unknowns have ANY content model + options.put("--emptybogons", Boolean.FALSE); // unknowns have EMPTY content model + options.put("--norootbogons", Boolean.FALSE); // unknowns can't be the root + options.put("--pyxin", Boolean.FALSE); // input is PYX + options.put("--lexical", Boolean.FALSE); // output comments + options.put("--pyx", Boolean.FALSE); // output is PYX + options.put("--html", Boolean.FALSE); // output is HTML + options.put("--method=", Boolean.FALSE); // output method + options.put("--doctype-public=", Boolean.FALSE); // override public id + options.put("--doctype-system=", Boolean.FALSE); // override system id + options.put("--output-encoding=", Boolean.FALSE); // output encoding + options.put("--omit-xml-declaration", Boolean.FALSE); // omit XML decl + options.put("--encoding=", Boolean.FALSE); // specify encoding + options.put("--help", Boolean.FALSE); // display help + options.put("--version", Boolean.FALSE); // display version + options.put("--nodefaults", Boolean.FALSE); // no default attrs + options.put("--nocolons", Boolean.FALSE); // colon to underscore + options.put("--norestart", Boolean.FALSE); // no restartable elements + options.put("--ignorable", Boolean.FALSE); // return ignorable whitespace + } + + /** + Main method. Processes specified files or standard input. + **/ + + public static void main(String[] argv) throws IOException, SAXException { + int optind = getopts(options, argv); + if (hasOption(options, "--help")) { + doHelp(); + return; + } + if (hasOption(options, "--version")) { + System.err.println("TagSoup version 1.2"); + return; + } + if (argv.length == optind) { + process("", System.out); + } + else if (hasOption(options, "--files")) { + for (int i = optind; i < argv.length; i++) { + String src = argv[i]; + String dst; + int j = src.lastIndexOf('.'); + if (j == -1) + dst = src + ".xhtml"; + else if (src.endsWith(".xhtml")) + dst = src + "_"; + else + dst = src.substring(0, j) + ".xhtml"; + System.err.println("src: " + src + " dst: " + dst); + OutputStream os = new FileOutputStream(dst); + process(src, os); + } + } + else { + for (int i = optind; i < argv.length; i++) { + System.err.println("src: " + argv[i]); + process(argv[i], System.out); + } + } + } + + // Print the help message + + private static void doHelp() { + System.err.print("usage: java -jar tagsoup-*.jar "); + System.err.print(" [ "); + boolean first = true; + for (Enumeration e = options.keys(); e.hasMoreElements(); ) { + if (!first) { + System.err.print("| "); + } + first = false; + String key = (String)(e.nextElement()); + System.err.print(key); + if (key.endsWith("=")) + System.err.print("?"); + System.err.print(" "); + } + System.err.println("]*"); + } + + private static Parser theParser = null; + private static HTMLSchema theSchema = null; + private static String theOutputEncoding = null; + + // Process one source onto an output stream. + + private static void process(String src, OutputStream os) + throws IOException, SAXException { + XMLReader r; + if (hasOption(options, "--reuse")) { + if (theParser == null) theParser = new Parser(); + r = theParser; + } + else { + r = new Parser(); + } + theSchema = new HTMLSchema(); + r.setProperty(Parser.schemaProperty, theSchema); + + if (hasOption(options, "--nocdata")) { + r.setFeature(Parser.CDATAElementsFeature, false); + } + + if (hasOption(options, "--nons") || hasOption(options, "--html")) { + r.setFeature(Parser.namespacesFeature, false); + } + + if (hasOption(options, "--nobogons")) { + r.setFeature(Parser.ignoreBogonsFeature, true); + } + + if (hasOption(options, "--any")) { + r.setFeature(Parser.bogonsEmptyFeature, false); + } + else if (hasOption(options, "--emptybogons")) { + r.setFeature(Parser.bogonsEmptyFeature, true); + } + + if (hasOption(options, "--norootbogons")) { + r.setFeature(Parser.rootBogonsFeature, false); + } + + if (hasOption(options, "--nodefaults")) { + r.setFeature(Parser.defaultAttributesFeature, false); + } + if (hasOption(options, "--nocolons")) { + r.setFeature(Parser.translateColonsFeature, true); + } + + if (hasOption(options, "--norestart")) { + r.setFeature(Parser.restartElementsFeature, false); + } + + if (hasOption(options, "--ignorable")) { + r.setFeature(Parser.ignorableWhitespaceFeature, true); + } + + if (hasOption(options, "--pyxin")) { + r.setProperty(Parser.scannerProperty, new PYXScanner()); + } + + Writer w; + if (theOutputEncoding == null) { + w = new OutputStreamWriter(os); + } + else { + w = new OutputStreamWriter(os, theOutputEncoding); + } + ContentHandler h = chooseContentHandler(w); + r.setContentHandler(h); + if (hasOption(options, "--lexical") && h instanceof LexicalHandler) { + r.setProperty(Parser.lexicalHandlerProperty, h); + } + InputSource s = new InputSource(); + if (src != "") { + s.setSystemId(src); + } + else { + s.setByteStream(System.in); + } + if (hasOption(options, "--encoding=")) { +// System.out.println("%% Found --encoding"); + String encoding = (String)options.get("--encoding="); + if (encoding != null) s.setEncoding(encoding); + } + r.parse(s); + } + + // Pick a content handler to generate the desired format. + + private static ContentHandler chooseContentHandler(Writer w) { + XMLWriter x; + if (hasOption(options, "--pyx")) { + return new PYXWriter(w); + } + + x = new XMLWriter(w); + if (hasOption(options, "--html")) { + x.setOutputProperty(XMLWriter.METHOD, "html"); + x.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes"); + } + if (hasOption(options, "--method=")) { + String method = (String)options.get("--method="); + if (method != null) { + x.setOutputProperty(XMLWriter.METHOD, method); + } + } + if (hasOption(options, "--doctype-public=")) { + String doctype_public = (String)options.get("--doctype-public="); + if (doctype_public != null) { + x.setOutputProperty(XMLWriter.DOCTYPE_PUBLIC, doctype_public); + } + } + if (hasOption(options, "--doctype-system=")) { + String doctype_system = (String)options.get("--doctype-system="); + if (doctype_system != null) { + x.setOutputProperty(XMLWriter.DOCTYPE_SYSTEM, doctype_system); + } + } + if (hasOption(options, "--output-encoding=")) { + theOutputEncoding = (String)options.get("--output-encoding="); +// System.err.println("%%%% Output encoding is " + theOutputEncoding); + if (theOutputEncoding != null) { + x.setOutputProperty(XMLWriter.ENCODING, theOutputEncoding); + } + } + if (hasOption(options, "--omit-xml-declaration")) { + x.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes"); + } + x.setPrefix(theSchema.getURI(), ""); + return x; + } + + // Options processing + + private static int getopts(Hashtable options, String[] argv) { + int optind; + for (optind = 0; optind < argv.length; optind++) { + String arg = argv[optind]; + String value = null; + if (arg.charAt(0) != '-') break; + int eqsign = arg.indexOf('='); + if (eqsign != -1) { + value = arg.substring(eqsign + 1, arg.length()); + arg = arg.substring(0, eqsign + 1); + } + if (options.containsKey(arg)) { + if (value == null) options.put(arg, Boolean.TRUE); + else options.put(arg, value); +// System.out.println("%% Parsed [" + arg + "]=[" + value + "]"); + } + else { + System.err.print("Unknown option "); + System.err.println(arg); + System.exit(1); + } + } + return optind; + } + + // Return true if an option exists. + + private static boolean hasOption(Hashtable options, String option) { + if (Boolean.getBoolean(option)) return true; + else if (options.get(option) != Boolean.FALSE) return true; + return false; + } + + } |