aboutsummaryrefslogtreecommitdiff
path: root/src/org/ccil/cowan/tagsoup/PYXScanner.java
blob: ebfba26a0f94f35478868bc743d38789ec8dae53 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
//
// TagSoup is licensed under the Apache License,
// Version 2.0.  You may obtain a copy of this license at
// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
// additional legal rights not granted by this license.
//
// TagSoup is distributed in the hope that it will be useful, but
// unless required by applicable law or agreed to in writing, TagSoup
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
// OF ANY KIND, either express or implied; not even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// 
// 
// This file is part of TagSoup.
// 
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.  You may also distribute
// and/or modify it under version 2.1 of the Academic Free License.
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
// 
// 
// PYX Scanner

package org.ccil.cowan.tagsoup;
import java.io.*;
import org.xml.sax.SAXException;

/**
A Scanner that accepts PYX format instead of HTML.
Useful primarily for debugging.
**/
public class PYXScanner implements Scanner {

        public void resetDocumentLocator(String publicid, String systemid) {
	// Need this method for interface compatibility, but note
	// that PyxScanner does not implement Locator.
        }

	public void scan(Reader r, ScanHandler h) throws IOException, SAXException {
		BufferedReader br = new BufferedReader(r);
		String s;
		char[] buff = null;
		boolean instag = false;
		while ((s = br.readLine()) != null) {
			int size = s.length();
			if (buff == null || buff.length < size) {
				buff = new char[size];
				}
			s.getChars(0, size, buff, 0);
			switch (buff[0]) {
			case '(':
				if (instag) {
					h.stagc(buff, 0, 0);
					instag = false;
					}
				h.gi(buff, 1, size - 1);
				instag = true;
				break;
			case ')':
				if (instag) {
					h.stagc(buff, 0, 0);
					instag = false;
					}
				h.etag(buff, 1, size - 1);
				break;
			case '?':
				if (instag) {
					h.stagc(buff, 0, 0);
					instag = false;
					}
				h.pi(buff, 1, size - 1);
				break;
			case 'A':
				int sp = s.indexOf(' ');
				h.aname(buff, 1, sp - 1);
				h.aval(buff, sp + 1, size - sp - 1);
				break;
			case '-':
				if (instag) {
					h.stagc(buff, 0, 0);
					instag = false;
					}
				if (s.equals("-\\n")) {
					buff[0] = '\n';
					h.pcdata(buff, 0, 1);
					}
				else {
					// FIXME:
					// Does not decode \t and \\ in input
					h.pcdata(buff, 1, size - 1);
					}
				break;
			case 'E':
				if (instag) {
					h.stagc(buff, 0, 0);
					instag = false;
					}
				h.entity(buff, 1, size - 1);
				break;
			default:
//				System.err.print("Gotcha ");
//				System.err.print(s);
//				System.err.print('\n');
				break;
				}
			}
		h.eof(buff, 0, 0);
		}

	public void startCDATA() { }

	public static void main(String[] argv) throws IOException, SAXException {
		Scanner s = new PYXScanner();
		Reader r = new InputStreamReader(System.in, "UTF-8");
		Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8"));
		s.scan(r, new PYXWriter(w));
		}
	}