aboutsummaryrefslogtreecommitdiff
path: root/tool/src/main/java/org/antlr/tool/Interpreter.java
diff options
context:
space:
mode:
Diffstat (limited to 'tool/src/main/java/org/antlr/tool/Interpreter.java')
-rw-r--r--tool/src/main/java/org/antlr/tool/Interpreter.java453
1 files changed, 453 insertions, 0 deletions
diff --git a/tool/src/main/java/org/antlr/tool/Interpreter.java b/tool/src/main/java/org/antlr/tool/Interpreter.java
new file mode 100644
index 0000000..fe4e95c
--- /dev/null
+++ b/tool/src/main/java/org/antlr/tool/Interpreter.java
@@ -0,0 +1,453 @@
+/*
+ * [The "BSD license"]
+ * Copyright (c) 2010 Terence Parr
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.antlr.tool;
+
+import org.antlr.analysis.DFA;
+import org.antlr.analysis.*;
+import org.antlr.misc.IntervalSet;
+import org.antlr.runtime.*;
+import org.antlr.runtime.debug.BlankDebugEventListener;
+import org.antlr.runtime.debug.DebugEventListener;
+import org.antlr.runtime.debug.ParseTreeBuilder;
+import org.antlr.runtime.tree.ParseTree;
+
+import java.util.List;
+import java.util.Stack;
+
+/** The recognition interpreter/engine for grammars. Separated
+ * out of Grammar as it's related, but technically not a Grammar function.
+ * You create an interpreter for a grammar and an input stream. This object
+ * can act as a TokenSource so that you can hook up two grammars (via
+ * a CommonTokenStream) to lex/parse. Being a token source only makes sense
+ * for a lexer grammar of course.
+ */
+public class Interpreter implements TokenSource {
+ protected Grammar grammar;
+ protected IntStream input;
+
+ /** A lexer listener that just creates token objects as they
+ * are matched. scan() use this listener to get a single object.
+ * To get a stream of tokens, you must call scan() multiple times,
+ * recording the token object result after each call.
+ */
+ class LexerActionGetTokenType extends BlankDebugEventListener {
+ public CommonToken token;
+ Grammar g;
+ public LexerActionGetTokenType(Grammar g) {
+ this.g = g;
+ }
+
+ public void exitRule(String grammarFileName, String ruleName) {
+ if ( !ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ){
+ int type = g.getTokenType(ruleName);
+ int channel = Token.DEFAULT_CHANNEL;
+ token = new CommonToken((CharStream)input,type,channel,0,0);
+ }
+ }
+ }
+
+ public Interpreter(Grammar grammar, IntStream input) {
+ this.grammar = grammar;
+ this.input = input;
+ }
+
+ public Token nextToken() {
+ if ( grammar.type!=Grammar.LEXER ) {
+ return null;
+ }
+ if ( input.LA(1)==CharStream.EOF ) {
+ return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,input.index(),input.index());
+ }
+ int start = input.index();
+ int charPos = ((CharStream)input).getCharPositionInLine();
+ CommonToken token = null;
+ loop:
+ while (input.LA(1)!=CharStream.EOF) {
+ try {
+ token = scan(Grammar.ARTIFICIAL_TOKENS_RULENAME, null);
+ break;
+ }
+ catch (RecognitionException re) {
+ // report a problem and try for another
+ reportScanError(re);
+ continue loop;
+ }
+ }
+ // the scan can only set type
+ // we must set the line, and other junk here to make it a complete token
+ int stop = input.index()-1;
+ if ( token==null ) {
+ return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,start,start);
+ }
+ token.setLine(((CharStream)input).getLine());
+ token.setStartIndex(start);
+ token.setStopIndex(stop);
+ token.setCharPositionInLine(charPos);
+ return token;
+ }
+
+ /** For a given input char stream, try to match against the NFA
+ * starting at startRule. This is a deterministic parse even though
+ * it is using an NFA because it uses DFAs at each decision point to
+ * predict which alternative will succeed. This is exactly what the
+ * generated parser will do.
+ *
+ * This only does lexer grammars.
+ *
+ * Return the token type associated with the final rule end state.
+ */
+ public void scan(String startRule,
+ DebugEventListener actions,
+ List visitedStates)
+ throws RecognitionException
+ {
+ if ( grammar.type!=Grammar.LEXER ) {
+ return;
+ }
+ CharStream in = (CharStream)this.input;
+ //System.out.println("scan("+startRule+",'"+in.substring(in.index(),in.size()-1)+"')");
+ // Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
+ if ( grammar.getRuleStartState(startRule)==null ) {
+ grammar.buildNFA();
+ }
+
+ if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
+ // Create the DFA predictors for each decision
+ grammar.createLookaheadDFAs();
+ }
+
+ // do the parse
+ Stack ruleInvocationStack = new Stack();
+ NFAState start = grammar.getRuleStartState(startRule);
+ NFAState stop = grammar.getRuleStopState(startRule);
+ parseEngine(startRule, start, stop, in, ruleInvocationStack,
+ actions, visitedStates);
+ }
+
+ public CommonToken scan(String startRule)
+ throws RecognitionException
+ {
+ return scan(startRule, null);
+ }
+
+ public CommonToken scan(String startRule,
+ List visitedStates)
+ throws RecognitionException
+ {
+ LexerActionGetTokenType actions = new LexerActionGetTokenType(grammar);
+ scan(startRule, actions, visitedStates);
+ return actions.token;
+ }
+
+ public void parse(String startRule,
+ DebugEventListener actions,
+ List visitedStates)
+ throws RecognitionException
+ {
+ //System.out.println("parse("+startRule+")");
+ // Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
+ if ( grammar.getRuleStartState(startRule)==null ) {
+ grammar.buildNFA();
+ }
+ if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
+ // Create the DFA predictors for each decision
+ grammar.createLookaheadDFAs();
+ }
+ // do the parse
+ Stack ruleInvocationStack = new Stack();
+ NFAState start = grammar.getRuleStartState(startRule);
+ NFAState stop = grammar.getRuleStopState(startRule);
+ parseEngine(startRule, start, stop, input, ruleInvocationStack,
+ actions, visitedStates);
+ }
+
+ public ParseTree parse(String startRule)
+ throws RecognitionException
+ {
+ return parse(startRule, null);
+ }
+
+ public ParseTree parse(String startRule, List visitedStates)
+ throws RecognitionException
+ {
+ ParseTreeBuilder actions = new ParseTreeBuilder(grammar.name);
+ try {
+ parse(startRule, actions, visitedStates);
+ }
+ catch (RecognitionException re) {
+ // Errors are tracked via the ANTLRDebugInterface
+ // Exceptions are used just to blast out of the parse engine
+ // The error will be in the parse tree.
+ }
+ return actions.getTree();
+ }
+
+ /** Fill a list of all NFA states visited during the parse */
+ protected void parseEngine(String startRule,
+ NFAState start,
+ NFAState stop,
+ IntStream input,
+ Stack ruleInvocationStack,
+ DebugEventListener actions,
+ List visitedStates)
+ throws RecognitionException
+ {
+ NFAState s = start;
+ if ( actions!=null ) {
+ actions.enterRule(s.nfa.grammar.getFileName(), start.enclosingRule.name);
+ }
+ int t = input.LA(1);
+ while ( s!=stop ) {
+ if ( visitedStates!=null ) {
+ visitedStates.add(s);
+ }
+ /*
+ System.out.println("parse state "+s.stateNumber+" input="+
+ s.nfa.grammar.getTokenDisplayName(t));
+ */
+ // CASE 1: decision state
+ if ( s.getDecisionNumber()>0 && s.nfa.grammar.getNumberOfAltsForDecisionNFA(s)>1 ) {
+ // decision point, must predict and jump to alt
+ DFA dfa = s.nfa.grammar.getLookaheadDFA(s.getDecisionNumber());
+ /*
+ if ( s.nfa.grammar.type!=Grammar.LEXER ) {
+ System.out.println("decision: "+
+ dfa.getNFADecisionStartState().getDescription()+
+ " input="+s.nfa.grammar.getTokenDisplayName(t));
+ }
+ */
+ int m = input.mark();
+ int predictedAlt = predict(dfa);
+ if ( predictedAlt == NFA.INVALID_ALT_NUMBER ) {
+ String description = dfa.getNFADecisionStartState().getDescription();
+ NoViableAltException nvae =
+ new NoViableAltException(description,
+ dfa.getDecisionNumber(),
+ s.stateNumber,
+ input);
+ if ( actions!=null ) {
+ actions.recognitionException(nvae);
+ }
+ input.consume(); // recover
+ throw nvae;
+ }
+ input.rewind(m);
+ int parseAlt =
+ s.translateDisplayAltToWalkAlt(predictedAlt);
+ /*
+ if ( s.nfa.grammar.type!=Grammar.LEXER ) {
+ System.out.println("predicted alt "+predictedAlt+", parseAlt "+
+ parseAlt);
+ }
+ */
+ NFAState alt;
+ if ( parseAlt > s.nfa.grammar.getNumberOfAltsForDecisionNFA(s) ) {
+ // implied branch of loop etc...
+ alt = s.nfa.grammar.nfa.getState( s.endOfBlockStateNumber );
+ }
+ else {
+ alt = s.nfa.grammar.getNFAStateForAltOfDecision(s, parseAlt);
+ }
+ s = (NFAState)alt.transition[0].target;
+ continue;
+ }
+
+ // CASE 2: finished matching a rule
+ if ( s.isAcceptState() ) { // end of rule node
+ if ( actions!=null ) {
+ actions.exitRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
+ }
+ if ( ruleInvocationStack.empty() ) {
+ // done parsing. Hit the start state.
+ //System.out.println("stack empty in stop state for "+s.getEnclosingRule());
+ break;
+ }
+ // pop invoking state off the stack to know where to return to
+ NFAState invokingState = (NFAState)ruleInvocationStack.pop();
+ RuleClosureTransition invokingTransition =
+ (RuleClosureTransition)invokingState.transition[0];
+ // move to node after state that invoked this rule
+ s = invokingTransition.followState;
+ continue;
+ }
+
+ Transition trans = s.transition[0];
+ Label label = trans.label;
+ if ( label.isSemanticPredicate() ) {
+ FailedPredicateException fpe =
+ new FailedPredicateException(input,
+ s.enclosingRule.name,
+ "can't deal with predicates yet");
+ if ( actions!=null ) {
+ actions.recognitionException(fpe);
+ }
+ }
+
+ // CASE 3: epsilon transition
+ if ( label.isEpsilon() ) {
+ // CASE 3a: rule invocation state
+ if ( trans instanceof RuleClosureTransition ) {
+ ruleInvocationStack.push(s);
+ s = (NFAState)trans.target;
+ //System.out.println("call "+s.enclosingRule.name+" from "+s.nfa.grammar.getFileName());
+ if ( actions!=null ) {
+ actions.enterRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
+ }
+ // could be jumping to new grammar, make sure DFA created
+ if ( !s.nfa.grammar.allDecisionDFAHaveBeenCreated() ) {
+ s.nfa.grammar.createLookaheadDFAs();
+ }
+ }
+ // CASE 3b: plain old epsilon transition, just move
+ else {
+ s = (NFAState)trans.target;
+ }
+ }
+
+ // CASE 4: match label on transition
+ else if ( label.matches(t) ) {
+ if ( actions!=null ) {
+ if ( s.nfa.grammar.type == Grammar.PARSER ||
+ s.nfa.grammar.type == Grammar.COMBINED )
+ {
+ actions.consumeToken(((TokenStream)input).LT(1));
+ }
+ }
+ s = (NFAState)s.transition[0].target;
+ input.consume();
+ t = input.LA(1);
+ }
+
+ // CASE 5: error condition; label is inconsistent with input
+ else {
+ if ( label.isAtom() ) {
+ MismatchedTokenException mte =
+ new MismatchedTokenException(label.getAtom(), input);
+ if ( actions!=null ) {
+ actions.recognitionException(mte);
+ }
+ input.consume(); // recover
+ throw mte;
+ }
+ else if ( label.isSet() ) {
+ MismatchedSetException mse =
+ new MismatchedSetException(((IntervalSet)label.getSet()).toRuntimeBitSet(),
+ input);
+ if ( actions!=null ) {
+ actions.recognitionException(mse);
+ }
+ input.consume(); // recover
+ throw mse;
+ }
+ else if ( label.isSemanticPredicate() ) {
+ FailedPredicateException fpe =
+ new FailedPredicateException(input,
+ s.enclosingRule.name,
+ label.getSemanticContext().toString());
+ if ( actions!=null ) {
+ actions.recognitionException(fpe);
+ }
+ input.consume(); // recover
+ throw fpe;
+ }
+ else {
+ throw new RecognitionException(input); // unknown error
+ }
+ }
+ }
+ //System.out.println("hit stop state for "+stop.getEnclosingRule());
+ if ( actions!=null ) {
+ actions.exitRule(s.nfa.grammar.getFileName(), stop.enclosingRule.name);
+ }
+ }
+
+ /** Given an input stream, return the unique alternative predicted by
+ * matching the input. Upon error, return NFA.INVALID_ALT_NUMBER
+ * The first symbol of lookahead is presumed to be primed; that is,
+ * input.lookahead(1) must point at the input symbol you want to start
+ * predicting with.
+ */
+ public int predict(DFA dfa) {
+ DFAState s = dfa.startState;
+ int c = input.LA(1);
+ Transition eotTransition = null;
+ dfaLoop:
+ while ( !s.isAcceptState() ) {
+ /*
+ System.out.println("DFA.predict("+s.getStateNumber()+", "+
+ dfa.getNFA().getGrammar().getTokenName(c)+")");
+ */
+ // for each edge of s, look for intersection with current char
+ for (int i=0; i<s.getNumberOfTransitions(); i++) {
+ Transition t = s.transition(i);
+ // special case: EOT matches any char
+ if ( t.label.matches(c) ) {
+ // take transition i
+ s = (DFAState)t.target;
+ input.consume();
+ c = input.LA(1);
+ continue dfaLoop;
+ }
+ if ( t.label.getAtom()==Label.EOT ) {
+ eotTransition = t;
+ }
+ }
+ if ( eotTransition!=null ) {
+ s = (DFAState)eotTransition.target;
+ continue dfaLoop;
+ }
+ /*
+ ErrorManager.error(ErrorManager.MSG_NO_VIABLE_DFA_ALT,
+ s,
+ dfa.nfa.grammar.getTokenName(c));
+ */
+ return NFA.INVALID_ALT_NUMBER;
+ }
+ // woohoo! We know which alt to predict
+ // nothing emanates from a stop state; must terminate anyway
+ /*
+ System.out.println("DFA stop state "+s.getStateNumber()+" predicts "+
+ s.getUniquelyPredictedAlt());
+ */
+ return s.getUniquelyPredictedAlt();
+ }
+
+ public void reportScanError(RecognitionException re) {
+ CharStream cs = (CharStream)input;
+ // print as good of a message as we can, given that we do not have
+ // a Lexer object and, hence, cannot call the routine to get a
+ // decent error message.
+ System.err.println("problem matching token at "+
+ cs.getLine()+":"+cs.getCharPositionInLine()+" "+re);
+ }
+
+ public String getSourceName() {
+ return input.getSourceName();
+ }
+
+}