diff options
Diffstat (limited to 'tool/src/main/java/org/antlr/tool/GrammarSpelunker.java')
-rw-r--r-- | tool/src/main/java/org/antlr/tool/GrammarSpelunker.java | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/tool/src/main/java/org/antlr/tool/GrammarSpelunker.java b/tool/src/main/java/org/antlr/tool/GrammarSpelunker.java new file mode 100644 index 0000000..d7986ec --- /dev/null +++ b/tool/src/main/java/org/antlr/tool/GrammarSpelunker.java @@ -0,0 +1,250 @@ +/* + * [The "BSD license"] + * Copyright (c) 2010 Terence Parr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.tool; + +import java.io.*; +import java.util.ArrayList; +import java.util.List; + +/** Load a grammar file and scan it just until we learn a few items + * of interest. Currently: name, type, imports, tokenVocab, language option. + * + * GrammarScanner (at bottom of this class) converts grammar to stuff like: + * + * grammar Java ; options { backtrack true memoize true } + * import JavaDecl JavaAnnotations JavaExpr ; + * ... : ... + * + * First ':' or '@' indicates we can stop looking for imports/options. + * + * Then we just grab interesting grammar properties. + */ +public class GrammarSpelunker { + protected String grammarFileName; + protected String token; + protected Scanner scanner; + + // grammar info / properties + protected String grammarModifier; + protected String grammarName; + protected String tokenVocab; + protected String language = "Java"; // default + protected String inputDirectory; + protected List<String> importedGrammars; + + public GrammarSpelunker(String inputDirectory, String grammarFileName) { + this.inputDirectory = inputDirectory; + this.grammarFileName = grammarFileName; + } + + void consume() throws IOException { token = scanner.nextToken(); } + + protected void match(String expecting) throws IOException { + //System.out.println("match "+expecting+"; is "+token); + if ( token.equals(expecting) ) consume(); + else throw new Error("Error parsing "+grammarFileName+": '"+token+ + "' not expected '"+expecting+"'"); + } + + public void parse() throws IOException { + Reader r = new FileReader((inputDirectory != null ? inputDirectory + File.separator : "") + grammarFileName); + BufferedReader br = new BufferedReader(r); + try { + scanner = new Scanner(br); + consume(); + grammarHeader(); + // scan until imports or options + while ( token!=null && !token.equals("@") && !token.equals(":") && + !token.equals("import") && !token.equals("options") ) + { + consume(); + } + if ( token.equals("options") ) options(); + // scan until options or first rule + while ( token!=null && !token.equals("@") && !token.equals(":") && + !token.equals("import") ) + { + consume(); + } + if ( token.equals("import") ) imports(); + // ignore rest of input; close up shop + } + finally { + if ( br!=null ) br.close(); + } + } + + protected void grammarHeader() throws IOException { + if ( token==null ) return; + if ( token.equals("tree") || token.equals("parser") || token.equals("lexer") ) { + grammarModifier=token; + consume(); + } + match("grammar"); + grammarName = token; + consume(); // move beyond name + } + + // looks like "options { backtrack true ; tokenVocab MyTokens ; }" + protected void options() throws IOException { + match("options"); + match("{"); + while ( token!=null && !token.equals("}") ) { + String name = token; + consume(); + String value = token; + consume(); + match(";"); + if ( name.equals("tokenVocab") ) tokenVocab = value; + if ( name.equals("language") ) language = value; + } + match("}"); + } + + // looks like "import JavaDecl JavaAnnotations JavaExpr ;" + protected void imports() throws IOException { + match("import"); + importedGrammars = new ArrayList<String>(); + while ( token!=null && !token.equals(";") ) { + importedGrammars.add(token); + consume(); + } + match(";"); + if ( importedGrammars.size()==0 ) importedGrammars = null; + } + + public String getGrammarModifier() { return grammarModifier; } + + public String getGrammarName() { return grammarName; } + + public String getTokenVocab() { return tokenVocab; } + + public String getLanguage() { return language; } + + public List<String> getImportedGrammars() { return importedGrammars; } + + /** Strip comments and then return stream of words and + * tokens {';', ':', '{', '}'} + */ + public static class Scanner { + public static final int EOF = -1; + Reader input; + int c; + + public Scanner(Reader input) throws IOException { + this.input = input; + consume(); + } + + boolean isDIGIT() { return c>='0'&&c<='9'; } + boolean isID_START() { return c>='a'&&c<='z' || c>='A'&&c<='Z'; } + boolean isID_LETTER() { return isID_START() || c>='0'&&c<='9' || c=='_'; } + + void consume() throws IOException { c = input.read(); } + + public String nextToken() throws IOException { + while ( c!=EOF ) { + //System.out.println("check "+(char)c); + switch ( c ) { + case ';' : consume(); return ";"; + case '{' : consume(); return "{"; + case '}' : consume(); return "}"; + case ':' : consume(); return ":"; + case '@' : consume(); return "@"; + case '/' : COMMENT(); break; + case '\'': return STRING(); + default: + if ( isID_START() ) return ID(); + else if ( isDIGIT() ) return INT(); + consume(); // ignore anything else + } + } + return null; + } + + /** NAME : LETTER+ ; // NAME is sequence of >=1 letter */ + String ID() throws IOException { + StringBuffer buf = new StringBuffer(); + while ( c!=EOF && isID_LETTER() ) { buf.append((char)c); consume(); } + return buf.toString(); + } + + String INT() throws IOException { + StringBuffer buf = new StringBuffer(); + while ( c!=EOF && isDIGIT() ) { buf.append((char)c); consume(); } + return buf.toString(); + } + + String STRING() throws IOException { + StringBuffer buf = new StringBuffer(); + consume(); + while ( c!=EOF && c!='\'' ) { + if ( c=='\\' ) { + buf.append((char)c); + consume(); + } + buf.append((char)c); + consume(); + } + consume(); // scan past ' + return buf.toString(); + } + + void COMMENT() throws IOException { + if ( c=='/' ) { + consume(); + if ( c=='*' ) { + consume(); + scarf: + while ( true ) { + if ( c=='*' ) { + consume(); + if ( c=='/' ) { consume(); break scarf; } + } + else { + while ( c!=EOF && c!='*' ) consume(); + } + } + } + else if ( c=='/' ) { + while ( c!=EOF && c!='\n' ) consume(); + } + } + } + } + + /** Tester; Give grammar filename as arg */ + public static void main(String[] args) throws IOException { + GrammarSpelunker g = new GrammarSpelunker(".", args[0]); + g.parse(); + System.out.println(g.grammarModifier+" grammar "+g.grammarName); + System.out.println("language="+g.language); + System.out.println("tokenVocab="+g.tokenVocab); + System.out.println("imports="+g.importedGrammars); + } +} |