1 files changed, 453 insertions, 0 deletions
diff --git a/tool/src/main/java/org/antlr/tool/Interpreter.java b/tool/src/main/java/org/antlr/tool/Interpreter.java
new file mode 100644
index 0000000..fe4e95c
--- /dev/null
+++ b/tool/src/main/java/org/antlr/tool/Interpreter.java
@@ -0,0 +1,453 @@
+/*
+ * [The "BSD license"]
+ *  Copyright (c) 2010 Terence Parr
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *  3. The name of the author may not be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.antlr.tool;
+
+import org.antlr.analysis.DFA;
+import org.antlr.analysis.*;
+import org.antlr.misc.IntervalSet;
+import org.antlr.runtime.*;
+import org.antlr.runtime.debug.BlankDebugEventListener;
+import org.antlr.runtime.debug.DebugEventListener;
+import org.antlr.runtime.debug.ParseTreeBuilder;
+import org.antlr.runtime.tree.ParseTree;
+
+import java.util.List;
+import java.util.Stack;
+
+/** The recognition interpreter/engine for grammars.  Separated
+ *  out of Grammar as it's related, but technically not a Grammar function.
+ *  You create an interpreter for a grammar and an input stream.  This object
+ *  can act as a TokenSource so that you can hook up two grammars (via
+ *  a CommonTokenStream) to lex/parse.  Being a token source only makes sense
+ *  for a lexer grammar of course.
+ */
+public class Interpreter implements TokenSource {
+	protected Grammar grammar;
+	protected IntStream input;
+
+	/** A lexer listener that just creates token objects as they
+	 *  are matched.  scan() use this listener to get a single object.
+	 *  To get a stream of tokens, you must call scan() multiple times,
+	 *  recording the token object result after each call.
+	 */
+	class LexerActionGetTokenType extends BlankDebugEventListener {
+		public CommonToken token;
+		Grammar g;
+		public LexerActionGetTokenType(Grammar g) {
+			this.g = g;
+		}
+
+		public void exitRule(String grammarFileName, String ruleName) {
+			if ( !ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ){
+				int type = g.getTokenType(ruleName);
+				int channel = Token.DEFAULT_CHANNEL;
+				token = new CommonToken((CharStream)input,type,channel,0,0);
+			}
+		}
+	}
+
+	public Interpreter(Grammar grammar, IntStream input) {
+		this.grammar = grammar;
+		this.input = input;
+	}
+
+	public Token nextToken() {
+		if ( grammar.type!=Grammar.LEXER ) {
+			return null;
+		}
+		if ( input.LA(1)==CharStream.EOF ) {
+            return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,input.index(),input.index());
+		}
+		int start = input.index();
+		int charPos = ((CharStream)input).getCharPositionInLine();
+		CommonToken token = null;
+		loop:
+		while (input.LA(1)!=CharStream.EOF) {
+			try {
+				token = scan(Grammar.ARTIFICIAL_TOKENS_RULENAME, null);
+				break;
+			}
+			catch (RecognitionException re) {
+				// report a problem and try for another
+				reportScanError(re);
+				continue loop;
+			}
+		}
+		// the scan can only set type
+		// we must set the line, and other junk here to make it a complete token
+		int stop = input.index()-1;
+		if ( token==null ) {
+            return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,start,start);
+		}
+		token.setLine(((CharStream)input).getLine());
+		token.setStartIndex(start);
+		token.setStopIndex(stop);
+		token.setCharPositionInLine(charPos);
+		return token;
+	}
+
+	/** For a given input char stream, try to match against the NFA
+	 *  starting at startRule.  This is a deterministic parse even though
+	 *  it is using an NFA because it uses DFAs at each decision point to
+	 *  predict which alternative will succeed.  This is exactly what the
+	 *  generated parser will do.
+	 *
+	 *  This only does lexer grammars.
+	 *
+	 *  Return the token type associated with the final rule end state.
+	 */
+	public void scan(String startRule,
+					 DebugEventListener actions,
+					 List visitedStates)
+		throws RecognitionException
+	{
+		if ( grammar.type!=Grammar.LEXER ) {
+			return;
+		}
+		CharStream in = (CharStream)this.input;
+		//System.out.println("scan("+startRule+",'"+in.substring(in.index(),in.size()-1)+"')");
+		// Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
+		if ( grammar.getRuleStartState(startRule)==null ) {
+			grammar.buildNFA();
+		}
+
+		if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
+			// Create the DFA predictors for each decision
+			grammar.createLookaheadDFAs();
+		}
+
+		// do the parse
+		Stack ruleInvocationStack = new Stack();
+		NFAState start = grammar.getRuleStartState(startRule);
+		NFAState stop = grammar.getRuleStopState(startRule);
+		parseEngine(startRule, start, stop, in, ruleInvocationStack,
+					actions, visitedStates);
+	}
+
+	public CommonToken scan(String startRule)
+		throws RecognitionException
+	{
+		return scan(startRule, null);
+	}
+
+	public CommonToken scan(String startRule,
+							List visitedStates)
+		throws RecognitionException
+	{
+		LexerActionGetTokenType actions = new LexerActionGetTokenType(grammar);
+		scan(startRule, actions, visitedStates);
+		return actions.token;
+	}
+
+	public void parse(String startRule,
+					  DebugEventListener actions,
+					  List visitedStates)
+		throws RecognitionException
+	{
+		//System.out.println("parse("+startRule+")");
+		// Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
+		if ( grammar.getRuleStartState(startRule)==null ) {
+			grammar.buildNFA();
+		}
+		if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
+			// Create the DFA predictors for each decision
+			grammar.createLookaheadDFAs();
+		}
+		// do the parse
+		Stack ruleInvocationStack = new Stack();
+		NFAState start = grammar.getRuleStartState(startRule);
+		NFAState stop = grammar.getRuleStopState(startRule);
+		parseEngine(startRule, start, stop, input, ruleInvocationStack,
+					actions, visitedStates);
+	}
+
+	public ParseTree parse(String startRule)
+		throws RecognitionException
+	{
+		return parse(startRule, null);
+	}
+
+	public ParseTree parse(String startRule, List visitedStates)
+		throws RecognitionException
+	{
+		ParseTreeBuilder actions = new ParseTreeBuilder(grammar.name);
+		try {
+			parse(startRule, actions, visitedStates);
+		}
+		catch (RecognitionException re) {
+			// Errors are tracked via the ANTLRDebugInterface
+			// Exceptions are used just to blast out of the parse engine
+			// The error will be in the parse tree.
+		}
+		return actions.getTree();
+	}
+
+	/** Fill a list of all NFA states visited during the parse */
+	protected void parseEngine(String startRule,
+							   NFAState start,
+							   NFAState stop,
+							   IntStream input,
+							   Stack ruleInvocationStack,
+							   DebugEventListener actions,
+							   List visitedStates)
+		throws RecognitionException
+	{
+		NFAState s = start;
+		if ( actions!=null ) {
+			actions.enterRule(s.nfa.grammar.getFileName(), start.enclosingRule.name);
+		}
+		int t = input.LA(1);
+		while ( s!=stop ) {
+			if ( visitedStates!=null ) {
+				visitedStates.add(s);
+			}
+			/*
+			System.out.println("parse state "+s.stateNumber+" input="+
+				s.nfa.grammar.getTokenDisplayName(t));
+				*/
+			// CASE 1: decision state
+			if ( s.getDecisionNumber()>0 && s.nfa.grammar.getNumberOfAltsForDecisionNFA(s)>1 ) {
+				// decision point, must predict and jump to alt
+				DFA dfa = s.nfa.grammar.getLookaheadDFA(s.getDecisionNumber());
+				/*
+				if ( s.nfa.grammar.type!=Grammar.LEXER ) {
+					System.out.println("decision: "+
+								   dfa.getNFADecisionStartState().getDescription()+
+								   " input="+s.nfa.grammar.getTokenDisplayName(t));
+				}
+				*/
+				int m = input.mark();
+				int predictedAlt = predict(dfa);
+				if ( predictedAlt == NFA.INVALID_ALT_NUMBER ) {
+					String description = dfa.getNFADecisionStartState().getDescription();
+					NoViableAltException nvae =
+						new NoViableAltException(description,
+													  dfa.getDecisionNumber(),
+													  s.stateNumber,
+													  input);
+					if ( actions!=null ) {
+						actions.recognitionException(nvae);
+					}
+					input.consume(); // recover
+					throw nvae;
+				}
+				input.rewind(m);
+				int parseAlt =
+					s.translateDisplayAltToWalkAlt(predictedAlt);
+				/*
+				if ( s.nfa.grammar.type!=Grammar.LEXER ) {
+					System.out.println("predicted alt "+predictedAlt+", parseAlt "+
+									   parseAlt);
+				}
+				*/
+				NFAState alt;
+				if ( parseAlt > s.nfa.grammar.getNumberOfAltsForDecisionNFA(s) ) {
+					// implied branch of loop etc...
+					alt = s.nfa.grammar.nfa.getState( s.endOfBlockStateNumber );
+				}
+				else {
+					alt = s.nfa.grammar.getNFAStateForAltOfDecision(s, parseAlt);
+				}
+				s = (NFAState)alt.transition[0].target;
+				continue;
+			}
+
+			// CASE 2: finished matching a rule
+			if ( s.isAcceptState() ) { // end of rule node
+				if ( actions!=null ) {
+					actions.exitRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
+				}
+				if ( ruleInvocationStack.empty() ) {
+					// done parsing.  Hit the start state.
+					//System.out.println("stack empty in stop state for "+s.getEnclosingRule());
+					break;
+				}
+				// pop invoking state off the stack to know where to return to
+				NFAState invokingState = (NFAState)ruleInvocationStack.pop();
+				RuleClosureTransition invokingTransition =
+						(RuleClosureTransition)invokingState.transition[0];
+				// move to node after state that invoked this rule
+				s = invokingTransition.followState;
+				continue;
+			}
+
+			Transition trans = s.transition[0];
+			Label label = trans.label;
+			if ( label.isSemanticPredicate() ) {
+				FailedPredicateException fpe =
+					new FailedPredicateException(input,
+												 s.enclosingRule.name,
+												 "can't deal with predicates yet");
+				if ( actions!=null ) {
+					actions.recognitionException(fpe);
+				}
+			}
+
+			// CASE 3: epsilon transition
+			if ( label.isEpsilon() ) {
+				// CASE 3a: rule invocation state
+				if ( trans instanceof RuleClosureTransition ) {
+					ruleInvocationStack.push(s);
+					s = (NFAState)trans.target;
+					//System.out.println("call "+s.enclosingRule.name+" from "+s.nfa.grammar.getFileName());
+					if ( actions!=null ) {
+						actions.enterRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
+					}
+					// could be jumping to new grammar, make sure DFA created
+					if ( !s.nfa.grammar.allDecisionDFAHaveBeenCreated() ) {
+						s.nfa.grammar.createLookaheadDFAs();
+					}
+				}
+				// CASE 3b: plain old epsilon transition, just move
+				else {
+					s = (NFAState)trans.target;
+				}
+			}
+
+			// CASE 4: match label on transition
+			else if ( label.matches(t) ) {
+				if ( actions!=null ) {
+					if ( s.nfa.grammar.type == Grammar.PARSER ||
+						 s.nfa.grammar.type == Grammar.COMBINED )
+					{
+						actions.consumeToken(((TokenStream)input).LT(1));
+					}
+				}
+				s = (NFAState)s.transition[0].target;
+				input.consume();
+				t = input.LA(1);
+			}
+
+			// CASE 5: error condition; label is inconsistent with input
+			else {
+				if ( label.isAtom() ) {
+					MismatchedTokenException mte =
+						new MismatchedTokenException(label.getAtom(), input);
+					if ( actions!=null ) {
+						actions.recognitionException(mte);
+					}
+					input.consume(); // recover
+					throw mte;
+				}
+				else if ( label.isSet() ) {
+					MismatchedSetException mse =
+						new MismatchedSetException(((IntervalSet)label.getSet()).toRuntimeBitSet(),
+												   input);
+					if ( actions!=null ) {
+						actions.recognitionException(mse);
+					}
+					input.consume(); // recover
+					throw mse;
+				}
+				else if ( label.isSemanticPredicate() ) {
+					FailedPredicateException fpe =
+						new FailedPredicateException(input,
+													 s.enclosingRule.name,
+													 label.getSemanticContext().toString());
+					if ( actions!=null ) {
+						actions.recognitionException(fpe);
+					}
+					input.consume(); // recover
+					throw fpe;
+				}
+				else {
+					throw new RecognitionException(input); // unknown error
+				}
+			}
+		}
+		//System.out.println("hit stop state for "+stop.getEnclosingRule());
+		if ( actions!=null ) {
+			actions.exitRule(s.nfa.grammar.getFileName(), stop.enclosingRule.name);
+		}
+	}
+
+	/** Given an input stream, return the unique alternative predicted by
+	 *  matching the input.  Upon error, return NFA.INVALID_ALT_NUMBER
+	 *  The first symbol of lookahead is presumed to be primed; that is,
+	 *  input.lookahead(1) must point at the input symbol you want to start
+	 *  predicting with.
+	 */
+	public int predict(DFA dfa) {
+		DFAState s = dfa.startState;
+		int c = input.LA(1);
+		Transition eotTransition = null;
+	dfaLoop:
+		while ( !s.isAcceptState() ) {
+			/*
+			System.out.println("DFA.predict("+s.getStateNumber()+", "+
+					dfa.getNFA().getGrammar().getTokenName(c)+")");
+			*/
+			// for each edge of s, look for intersection with current char
+			for (int i=0; i<s.getNumberOfTransitions(); i++) {
+				Transition t = s.transition(i);
+				// special case: EOT matches any char
+				if ( t.label.matches(c) ) {
+					// take transition i
+					s = (DFAState)t.target;
+					input.consume();
+					c = input.LA(1);
+					continue dfaLoop;
+				}
+				if ( t.label.getAtom()==Label.EOT ) {
+					eotTransition = t;
+				}
+			}
+			if ( eotTransition!=null ) {
+				s = (DFAState)eotTransition.target;
+				continue dfaLoop;
+			}
+			/*
+			ErrorManager.error(ErrorManager.MSG_NO_VIABLE_DFA_ALT,
+							   s,
+							   dfa.nfa.grammar.getTokenName(c));
+			*/
+			return NFA.INVALID_ALT_NUMBER;
+		}
+		// woohoo!  We know which alt to predict
+		// nothing emanates from a stop state; must terminate anyway
+		/*
+		System.out.println("DFA stop state "+s.getStateNumber()+" predicts "+
+				s.getUniquelyPredictedAlt());
+		*/
+		return s.getUniquelyPredictedAlt();
+	}
+
+	public void reportScanError(RecognitionException re) {
+		CharStream cs = (CharStream)input;
+		// print as good of a message as we can, given that we do not have
+		// a Lexer object and, hence, cannot call the routine to get a
+		// decent error message.
+		System.err.println("problem matching token at "+
+			cs.getLine()+":"+cs.getCharPositionInLine()+" "+re);
+	}
+
+	public String getSourceName() {
+		return input.getSourceName();
+	}
+
+}