aboutsummaryrefslogtreecommitdiff
path: root/antlr-3.4/runtime/Java/src/main/java/org/antlr/runtime/BaseRecognizer.java
diff options
context:
space:
mode:
Diffstat (limited to 'antlr-3.4/runtime/Java/src/main/java/org/antlr/runtime/BaseRecognizer.java')
-rw-r--r--antlr-3.4/runtime/Java/src/main/java/org/antlr/runtime/BaseRecognizer.java886
1 files changed, 886 insertions, 0 deletions
diff --git a/antlr-3.4/runtime/Java/src/main/java/org/antlr/runtime/BaseRecognizer.java b/antlr-3.4/runtime/Java/src/main/java/org/antlr/runtime/BaseRecognizer.java
new file mode 100644
index 0000000..667664d
--- /dev/null
+++ b/antlr-3.4/runtime/Java/src/main/java/org/antlr/runtime/BaseRecognizer.java
@@ -0,0 +1,886 @@
+/*
+ [The "BSD license"]
+ Copyright (c) 2005-2009 Terence Parr
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3. The name of the author may not be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.antlr.runtime;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/** A generic recognizer that can handle recognizers generated from
+ * lexer, parser, and tree grammars. This is all the parsing
+ * support code essentially; most of it is error recovery stuff and
+ * backtracking.
+ */
+public abstract class BaseRecognizer {
+ public static final int MEMO_RULE_FAILED = -2;
+ public static final int MEMO_RULE_UNKNOWN = -1;
+ public static final int INITIAL_FOLLOW_STACK_SIZE = 100;
+
+ // copies from Token object for convenience in actions
+ public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
+ public static final int HIDDEN = Token.HIDDEN_CHANNEL;
+
+ public static final String NEXT_TOKEN_RULE_NAME = "nextToken";
+
+ /** State of a lexer, parser, or tree parser are collected into a state
+ * object so the state can be shared. This sharing is needed to
+ * have one grammar import others and share same error variables
+ * and other state variables. It's a kind of explicit multiple
+ * inheritance via delegation of methods and shared state.
+ */
+ protected RecognizerSharedState state;
+
+ public BaseRecognizer() {
+ state = new RecognizerSharedState();
+ }
+
+ public BaseRecognizer(RecognizerSharedState state) {
+ if ( state==null ) {
+ state = new RecognizerSharedState();
+ }
+ this.state = state;
+ }
+
+ /** reset the parser's state; subclasses must rewinds the input stream */
+ public void reset() {
+ // wack everything related to error recovery
+ if ( state==null ) {
+ return; // no shared state work to do
+ }
+ state._fsp = -1;
+ state.errorRecovery = false;
+ state.lastErrorIndex = -1;
+ state.failed = false;
+ state.syntaxErrors = 0;
+ // wack everything related to backtracking and memoization
+ state.backtracking = 0;
+ for (int i = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) { // wipe cache
+ state.ruleMemo[i] = null;
+ }
+ }
+
+
+ /** Match current input symbol against ttype. Attempt
+ * single token insertion or deletion error recovery. If
+ * that fails, throw MismatchedTokenException.
+ *
+ * To turn off single token insertion or deletion error
+ * recovery, override recoverFromMismatchedToken() and have it
+ * throw an exception. See TreeParser.recoverFromMismatchedToken().
+ * This way any error in a rule will cause an exception and
+ * immediate exit from rule. Rule would recover by resynchronizing
+ * to the set of symbols that can follow rule ref.
+ */
+ public Object match(IntStream input, int ttype, BitSet follow)
+ throws RecognitionException
+ {
+ //System.out.println("match "+((TokenStream)input).LT(1));
+ Object matchedSymbol = getCurrentInputSymbol(input);
+ if ( input.LA(1)==ttype ) {
+ input.consume();
+ state.errorRecovery = false;
+ state.failed = false;
+ return matchedSymbol;
+ }
+ if ( state.backtracking>0 ) {
+ state.failed = true;
+ return matchedSymbol;
+ }
+ matchedSymbol = recoverFromMismatchedToken(input, ttype, follow);
+ return matchedSymbol;
+ }
+
+ /** Match the wildcard: in a symbol */
+ public void matchAny(IntStream input) {
+ state.errorRecovery = false;
+ state.failed = false;
+ input.consume();
+ }
+
+ public boolean mismatchIsUnwantedToken(IntStream input, int ttype) {
+ return input.LA(2)==ttype;
+ }
+
+ public boolean mismatchIsMissingToken(IntStream input, BitSet follow) {
+ if ( follow==null ) {
+ // we have no information about the follow; we can only consume
+ // a single token and hope for the best
+ return false;
+ }
+ // compute what can follow this grammar element reference
+ if ( follow.member(Token.EOR_TOKEN_TYPE) ) {
+ BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW();
+ follow = follow.or(viableTokensFollowingThisRule);
+ if ( state._fsp>=0 ) { // remove EOR if we're not the start symbol
+ follow.remove(Token.EOR_TOKEN_TYPE);
+ }
+ }
+ // if current token is consistent with what could come after set
+ // then we know we're missing a token; error recovery is free to
+ // "insert" the missing token
+
+ //System.out.println("viable tokens="+follow.toString(getTokenNames()));
+ //System.out.println("LT(1)="+((TokenStream)input).LT(1));
+
+ // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
+ // in follow set to indicate that the fall of the start symbol is
+ // in the set (EOF can follow).
+ if ( follow.member(input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE) ) {
+ //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
+ return true;
+ }
+ return false;
+ }
+
+ /** Report a recognition problem.
+ *
+ * This method sets errorRecovery to indicate the parser is recovering
+ * not parsing. Once in recovery mode, no errors are generated.
+ * To get out of recovery mode, the parser must successfully match
+ * a token (after a resync). So it will go:
+ *
+ * 1. error occurs
+ * 2. enter recovery mode, report error
+ * 3. consume until token found in resynch set
+ * 4. try to resume parsing
+ * 5. next match() will reset errorRecovery mode
+ *
+ * If you override, make sure to update syntaxErrors if you care about that.
+ */
+ public void reportError(RecognitionException e) {
+ // if we've already reported an error and have not matched a token
+ // yet successfully, don't report any errors.
+ if ( state.errorRecovery ) {
+ //System.err.print("[SPURIOUS] ");
+ return;
+ }
+ state.syntaxErrors++; // don't count spurious
+ state.errorRecovery = true;
+
+ displayRecognitionError(this.getTokenNames(), e);
+ }
+
+ public void displayRecognitionError(String[] tokenNames,
+ RecognitionException e)
+ {
+ String hdr = getErrorHeader(e);
+ String msg = getErrorMessage(e, tokenNames);
+ emitErrorMessage(hdr+" "+msg);
+ }
+
+ /** What error message should be generated for the various
+ * exception types?
+ *
+ * Not very object-oriented code, but I like having all error message
+ * generation within one method rather than spread among all of the
+ * exception classes. This also makes it much easier for the exception
+ * handling because the exception classes do not have to have pointers back
+ * to this object to access utility routines and so on. Also, changing
+ * the message for an exception type would be difficult because you
+ * would have to subclassing exception, but then somehow get ANTLR
+ * to make those kinds of exception objects instead of the default.
+ * This looks weird, but trust me--it makes the most sense in terms
+ * of flexibility.
+ *
+ * For grammar debugging, you will want to override this to add
+ * more information such as the stack frame with
+ * getRuleInvocationStack(e, this.getClass().getName()) and,
+ * for no viable alts, the decision description and state etc...
+ *
+ * Override this to change the message generated for one or more
+ * exception types.
+ */
+ public String getErrorMessage(RecognitionException e, String[] tokenNames) {
+ String msg = e.getMessage();
+ if ( e instanceof UnwantedTokenException ) {
+ UnwantedTokenException ute = (UnwantedTokenException)e;
+ String tokenName="<unknown>";
+ if ( ute.expecting== Token.EOF ) {
+ tokenName = "EOF";
+ }
+ else {
+ tokenName = tokenNames[ute.expecting];
+ }
+ msg = "extraneous input "+getTokenErrorDisplay(ute.getUnexpectedToken())+
+ " expecting "+tokenName;
+ }
+ else if ( e instanceof MissingTokenException ) {
+ MissingTokenException mte = (MissingTokenException)e;
+ String tokenName="<unknown>";
+ if ( mte.expecting== Token.EOF ) {
+ tokenName = "EOF";
+ }
+ else {
+ tokenName = tokenNames[mte.expecting];
+ }
+ msg = "missing "+tokenName+" at "+getTokenErrorDisplay(e.token);
+ }
+ else if ( e instanceof MismatchedTokenException ) {
+ MismatchedTokenException mte = (MismatchedTokenException)e;
+ String tokenName="<unknown>";
+ if ( mte.expecting== Token.EOF ) {
+ tokenName = "EOF";
+ }
+ else {
+ tokenName = tokenNames[mte.expecting];
+ }
+ msg = "mismatched input "+getTokenErrorDisplay(e.token)+
+ " expecting "+tokenName;
+ }
+ else if ( e instanceof MismatchedTreeNodeException ) {
+ MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
+ String tokenName="<unknown>";
+ if ( mtne.expecting==Token.EOF ) {
+ tokenName = "EOF";
+ }
+ else {
+ tokenName = tokenNames[mtne.expecting];
+ }
+ msg = "mismatched tree node: "+mtne.node+
+ " expecting "+tokenName;
+ }
+ else if ( e instanceof NoViableAltException ) {
+ //NoViableAltException nvae = (NoViableAltException)e;
+ // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
+ // and "(decision="+nvae.decisionNumber+") and
+ // "state "+nvae.stateNumber
+ msg = "no viable alternative at input "+getTokenErrorDisplay(e.token);
+ }
+ else if ( e instanceof EarlyExitException ) {
+ //EarlyExitException eee = (EarlyExitException)e;
+ // for development, can add "(decision="+eee.decisionNumber+")"
+ msg = "required (...)+ loop did not match anything at input "+
+ getTokenErrorDisplay(e.token);
+ }
+ else if ( e instanceof MismatchedSetException ) {
+ MismatchedSetException mse = (MismatchedSetException)e;
+ msg = "mismatched input "+getTokenErrorDisplay(e.token)+
+ " expecting set "+mse.expecting;
+ }
+ else if ( e instanceof MismatchedNotSetException ) {
+ MismatchedNotSetException mse = (MismatchedNotSetException)e;
+ msg = "mismatched input "+getTokenErrorDisplay(e.token)+
+ " expecting set "+mse.expecting;
+ }
+ else if ( e instanceof FailedPredicateException ) {
+ FailedPredicateException fpe = (FailedPredicateException)e;
+ msg = "rule "+fpe.ruleName+" failed predicate: {"+
+ fpe.predicateText+"}?";
+ }
+ return msg;
+ }
+
+ /** Get number of recognition errors (lexer, parser, tree parser). Each
+ * recognizer tracks its own number. So parser and lexer each have
+ * separate count. Does not count the spurious errors found between
+ * an error and next valid token match
+ *
+ * See also reportError()
+ */
+ public int getNumberOfSyntaxErrors() {
+ return state.syntaxErrors;
+ }
+
+ /** What is the error header, normally line/character position information? */
+ public String getErrorHeader(RecognitionException e) {
+ if ( getSourceName()!=null )
+ return getSourceName()+" line "+e.line+":"+e.charPositionInLine;
+
+ return "line "+e.line+":"+e.charPositionInLine;
+ }
+
+ /** How should a token be displayed in an error message? The default
+ * is to display just the text, but during development you might
+ * want to have a lot of information spit out. Override in that case
+ * to use t.toString() (which, for CommonToken, dumps everything about
+ * the token). This is better than forcing you to override a method in
+ * your token objects because you don't have to go modify your lexer
+ * so that it creates a new Java type.
+ */
+ public String getTokenErrorDisplay(Token t) {
+ String s = t.getText();
+ if ( s==null ) {
+ if ( t.getType()==Token.EOF ) {
+ s = "<EOF>";
+ }
+ else {
+ s = "<"+t.getType()+">";
+ }
+ }
+ s = s.replaceAll("\n","\\\\n");
+ s = s.replaceAll("\r","\\\\r");
+ s = s.replaceAll("\t","\\\\t");
+ return "'"+s+"'";
+ }
+
+ /** Override this method to change where error messages go */
+ public void emitErrorMessage(String msg) {
+ System.err.println(msg);
+ }
+
+ /** Recover from an error found on the input stream. This is
+ * for NoViableAlt and mismatched symbol exceptions. If you enable
+ * single token insertion and deletion, this will usually not
+ * handle mismatched symbol exceptions but there could be a mismatched
+ * token that the match() routine could not recover from.
+ */
+ public void recover(IntStream input, RecognitionException re) {
+ if ( state.lastErrorIndex==input.index() ) {
+ // uh oh, another error at same token index; must be a case
+ // where LT(1) is in the recovery token set so nothing is
+ // consumed; consume a single token so at least to prevent
+ // an infinite loop; this is a failsafe.
+ input.consume();
+ }
+ state.lastErrorIndex = input.index();
+ BitSet followSet = computeErrorRecoverySet();
+ beginResync();
+ consumeUntil(input, followSet);
+ endResync();
+ }
+
+ /** A hook to listen in on the token consumption during error recovery.
+ * The DebugParser subclasses this to fire events to the listenter.
+ */
+ public void beginResync() {
+ }
+
+ public void endResync() {
+ }
+
+ /* Compute the error recovery set for the current rule. During
+ * rule invocation, the parser pushes the set of tokens that can
+ * follow that rule reference on the stack; this amounts to
+ * computing FIRST of what follows the rule reference in the
+ * enclosing rule. This local follow set only includes tokens
+ * from within the rule; i.e., the FIRST computation done by
+ * ANTLR stops at the end of a rule.
+ *
+ * EXAMPLE
+ *
+ * When you find a "no viable alt exception", the input is not
+ * consistent with any of the alternatives for rule r. The best
+ * thing to do is to consume tokens until you see something that
+ * can legally follow a call to r *or* any rule that called r.
+ * You don't want the exact set of viable next tokens because the
+ * input might just be missing a token--you might consume the
+ * rest of the input looking for one of the missing tokens.
+ *
+ * Consider grammar:
+ *
+ * a : '[' b ']'
+ * | '(' b ')'
+ * ;
+ * b : c '^' INT ;
+ * c : ID
+ * | INT
+ * ;
+ *
+ * At each rule invocation, the set of tokens that could follow
+ * that rule is pushed on a stack. Here are the various "local"
+ * follow sets:
+ *
+ * FOLLOW(b1_in_a) = FIRST(']') = ']'
+ * FOLLOW(b2_in_a) = FIRST(')') = ')'
+ * FOLLOW(c_in_b) = FIRST('^') = '^'
+ *
+ * Upon erroneous input "[]", the call chain is
+ *
+ * a -> b -> c
+ *
+ * and, hence, the follow context stack is:
+ *
+ * depth local follow set after call to rule
+ * 0 <EOF> a (from main())
+ * 1 ']' b
+ * 3 '^' c
+ *
+ * Notice that ')' is not included, because b would have to have
+ * been called from a different context in rule a for ')' to be
+ * included.
+ *
+ * For error recovery, we cannot consider FOLLOW(c)
+ * (context-sensitive or otherwise). We need the combined set of
+ * all context-sensitive FOLLOW sets--the set of all tokens that
+ * could follow any reference in the call chain. We need to
+ * resync to one of those tokens. Note that FOLLOW(c)='^' and if
+ * we resync'd to that token, we'd consume until EOF. We need to
+ * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
+ * In this case, for input "[]", LA(1) is in this set so we would
+ * not consume anything and after printing an error rule c would
+ * return normally. It would not find the required '^' though.
+ * At this point, it gets a mismatched token error and throws an
+ * exception (since LA(1) is not in the viable following token
+ * set). The rule exception handler tries to recover, but finds
+ * the same recovery set and doesn't consume anything. Rule b
+ * exits normally returning to rule a. Now it finds the ']' (and
+ * with the successful match exits errorRecovery mode).
+ *
+ * So, you cna see that the parser walks up call chain looking
+ * for the token that was a member of the recovery set.
+ *
+ * Errors are not generated in errorRecovery mode.
+ *
+ * ANTLR's error recovery mechanism is based upon original ideas:
+ *
+ * "Algorithms + Data Structures = Programs" by Niklaus Wirth
+ *
+ * and
+ *
+ * "A note on error recovery in recursive descent parsers":
+ * http://portal.acm.org/citation.cfm?id=947902.947905
+ *
+ * Later, Josef Grosch had some good ideas:
+ *
+ * "Efficient and Comfortable Error Recovery in Recursive Descent
+ * Parsers":
+ * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
+ *
+ * Like Grosch I implemented local FOLLOW sets that are combined
+ * at run-time upon error to avoid overhead during parsing.
+ */
+ protected BitSet computeErrorRecoverySet() {
+ return combineFollows(false);
+ }
+
+ /** Compute the context-sensitive FOLLOW set for current rule.
+ * This is set of token types that can follow a specific rule
+ * reference given a specific call chain. You get the set of
+ * viable tokens that can possibly come next (lookahead depth 1)
+ * given the current call chain. Contrast this with the
+ * definition of plain FOLLOW for rule r:
+ *
+ * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
+ *
+ * where x in T* and alpha, beta in V*; T is set of terminals and
+ * V is the set of terminals and nonterminals. In other words,
+ * FOLLOW(r) is the set of all tokens that can possibly follow
+ * references to r in *any* sentential form (context). At
+ * runtime, however, we know precisely which context applies as
+ * we have the call chain. We may compute the exact (rather
+ * than covering superset) set of following tokens.
+ *
+ * For example, consider grammar:
+ *
+ * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
+ * | "return" expr '.'
+ * ;
+ * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
+ * atom : INT // FOLLOW(atom)=={'+',')',';','.'}
+ * | '(' expr ')'
+ * ;
+ *
+ * The FOLLOW sets are all inclusive whereas context-sensitive
+ * FOLLOW sets are precisely what could follow a rule reference.
+ * For input input "i=(3);", here is the derivation:
+ *
+ * stat => ID '=' expr ';'
+ * => ID '=' atom ('+' atom)* ';'
+ * => ID '=' '(' expr ')' ('+' atom)* ';'
+ * => ID '=' '(' atom ')' ('+' atom)* ';'
+ * => ID '=' '(' INT ')' ('+' atom)* ';'
+ * => ID '=' '(' INT ')' ';'
+ *
+ * At the "3" token, you'd have a call chain of
+ *
+ * stat -> expr -> atom -> expr -> atom
+ *
+ * What can follow that specific nested ref to atom? Exactly ')'
+ * as you can see by looking at the derivation of this specific
+ * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
+ *
+ * You want the exact viable token set when recovering from a
+ * token mismatch. Upon token mismatch, if LA(1) is member of
+ * the viable next token set, then you know there is most likely
+ * a missing token in the input stream. "Insert" one by just not
+ * throwing an exception.
+ */
+ protected BitSet computeContextSensitiveRuleFOLLOW() {
+ return combineFollows(true);
+ }
+
+ // what is exact? it seems to only add sets from above on stack
+ // if EOR is in set i. When it sees a set w/o EOR, it stops adding.
+ // Why would we ever want them all? Maybe no viable alt instead of
+ // mismatched token?
+ protected BitSet combineFollows(boolean exact) {
+ int top = state._fsp;
+ BitSet followSet = new BitSet();
+ for (int i=top; i>=0; i--) {
+ BitSet localFollowSet = (BitSet)state.following[i];
+ /*
+ System.out.println("local follow depth "+i+"="+
+ localFollowSet.toString(getTokenNames())+")");
+ */
+ followSet.orInPlace(localFollowSet);
+ if ( exact ) {
+ // can we see end of rule?
+ if ( localFollowSet.member(Token.EOR_TOKEN_TYPE) ) {
+ // Only leave EOR in set if at top (start rule); this lets
+ // us know if have to include follow(start rule); i.e., EOF
+ if ( i>0 ) {
+ followSet.remove(Token.EOR_TOKEN_TYPE);
+ }
+ }
+ else { // can't see end of rule, quit
+ break;
+ }
+ }
+ }
+ return followSet;
+ }
+
+ /** Attempt to recover from a single missing or extra token.
+ *
+ * EXTRA TOKEN
+ *
+ * LA(1) is not what we are looking for. If LA(2) has the right token,
+ * however, then assume LA(1) is some extra spurious token. Delete it
+ * and LA(2) as if we were doing a normal match(), which advances the
+ * input.
+ *
+ * MISSING TOKEN
+ *
+ * If current token is consistent with what could come after
+ * ttype then it is ok to "insert" the missing token, else throw
+ * exception For example, Input "i=(3;" is clearly missing the
+ * ')'. When the parser returns from the nested call to expr, it
+ * will have call chain:
+ *
+ * stat -> expr -> atom
+ *
+ * and it will be trying to match the ')' at this point in the
+ * derivation:
+ *
+ * => ID '=' '(' INT ')' ('+' atom)* ';'
+ * ^
+ * match() will see that ';' doesn't match ')' and report a
+ * mismatched token error. To recover, it sees that LA(1)==';'
+ * is in the set of tokens that can follow the ')' token
+ * reference in rule atom. It can assume that you forgot the ')'.
+ */
+ protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow)
+ throws RecognitionException
+ {
+ RecognitionException e = null;
+ // if next token is what we are looking for then "delete" this token
+ if ( mismatchIsUnwantedToken(input, ttype) ) {
+ e = new UnwantedTokenException(ttype, input);
+ /*
+ System.err.println("recoverFromMismatchedToken deleting "+
+ ((TokenStream)input).LT(1)+
+ " since "+((TokenStream)input).LT(2)+" is what we want");
+ */
+ beginResync();
+ input.consume(); // simply delete extra token
+ endResync();
+ reportError(e); // report after consuming so AW sees the token in the exception
+ // we want to return the token we're actually matching
+ Object matchedSymbol = getCurrentInputSymbol(input);
+ input.consume(); // move past ttype token as if all were ok
+ return matchedSymbol;
+ }
+ // can't recover with single token deletion, try insertion
+ if ( mismatchIsMissingToken(input, follow) ) {
+ Object inserted = getMissingSymbol(input, e, ttype, follow);
+ e = new MissingTokenException(ttype, input, inserted);
+ reportError(e); // report after inserting so AW sees the token in the exception
+ return inserted;
+ }
+ // even that didn't work; must throw the exception
+ e = new MismatchedTokenException(ttype, input);
+ throw e;
+ }
+
+ /** Not currently used */
+ public Object recoverFromMismatchedSet(IntStream input,
+ RecognitionException e,
+ BitSet follow)
+ throws RecognitionException
+ {
+ if ( mismatchIsMissingToken(input, follow) ) {
+ // System.out.println("missing token");
+ reportError(e);
+ // we don't know how to conjure up a token for sets yet
+ return getMissingSymbol(input, e, Token.INVALID_TOKEN_TYPE, follow);
+ }
+ // TODO do single token deletion like above for Token mismatch
+ throw e;
+ }
+
+ /** Match needs to return the current input symbol, which gets put
+ * into the label for the associated token ref; e.g., x=ID. Token
+ * and tree parsers need to return different objects. Rather than test
+ * for input stream type or change the IntStream interface, I use
+ * a simple method to ask the recognizer to tell me what the current
+ * input symbol is.
+ *
+ * This is ignored for lexers.
+ */
+ protected Object getCurrentInputSymbol(IntStream input) { return null; }
+
+ /** Conjure up a missing token during error recovery.
+ *
+ * The recognizer attempts to recover from single missing
+ * symbols. But, actions might refer to that missing symbol.
+ * For example, x=ID {f($x);}. The action clearly assumes
+ * that there has been an identifier matched previously and that
+ * $x points at that token. If that token is missing, but
+ * the next token in the stream is what we want we assume that
+ * this token is missing and we keep going. Because we
+ * have to return some token to replace the missing token,
+ * we have to conjure one up. This method gives the user control
+ * over the tokens returned for missing tokens. Mostly,
+ * you will want to create something special for identifier
+ * tokens. For literals such as '{' and ',', the default
+ * action in the parser or tree parser works. It simply creates
+ * a CommonToken of the appropriate type. The text will be the token.
+ * If you change what tokens must be created by the lexer,
+ * override this method to create the appropriate tokens.
+ */
+ protected Object getMissingSymbol(IntStream input,
+ RecognitionException e,
+ int expectedTokenType,
+ BitSet follow)
+ {
+ return null;
+ }
+
+ public void consumeUntil(IntStream input, int tokenType) {
+ //System.out.println("consumeUntil "+tokenType);
+ int ttype = input.LA(1);
+ while (ttype != Token.EOF && ttype != tokenType) {
+ input.consume();
+ ttype = input.LA(1);
+ }
+ }
+
+ /** Consume tokens until one matches the given token set */
+ public void consumeUntil(IntStream input, BitSet set) {
+ //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
+ int ttype = input.LA(1);
+ while (ttype != Token.EOF && !set.member(ttype) ) {
+ //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
+ input.consume();
+ ttype = input.LA(1);
+ }
+ }
+
+ /** Push a rule's follow set using our own hardcoded stack */
+ protected void pushFollow(BitSet fset) {
+ if ( (state._fsp +1)>=state.following.length ) {
+ BitSet[] f = new BitSet[state.following.length*2];
+ System.arraycopy(state.following, 0, f, 0, state.following.length);
+ state.following = f;
+ }
+ state.following[++state._fsp] = fset;
+ }
+
+ /** Return List<String> of the rules in your parser instance
+ * leading up to a call to this method. You could override if
+ * you want more details such as the file/line info of where
+ * in the parser java code a rule is invoked.
+ *
+ * This is very useful for error messages and for context-sensitive
+ * error recovery.
+ */
+ public List getRuleInvocationStack() {
+ String parserClassName = getClass().getName();
+ return getRuleInvocationStack(new Throwable(), parserClassName);
+ }
+
+ /** A more general version of getRuleInvocationStack where you can
+ * pass in, for example, a RecognitionException to get it's rule
+ * stack trace. This routine is shared with all recognizers, hence,
+ * static.
+ *
+ * TODO: move to a utility class or something; weird having lexer call this
+ */
+ public static List getRuleInvocationStack(Throwable e,
+ String recognizerClassName)
+ {
+ List rules = new ArrayList();
+ StackTraceElement[] stack = e.getStackTrace();
+ int i = 0;
+ for (i=stack.length-1; i>=0; i--) {
+ StackTraceElement t = stack[i];
+ if ( t.getClassName().startsWith("org.antlr.runtime.") ) {
+ continue; // skip support code such as this method
+ }
+ if ( t.getMethodName().equals(NEXT_TOKEN_RULE_NAME) ) {
+ continue;
+ }
+ if ( !t.getClassName().equals(recognizerClassName) ) {
+ continue; // must not be part of this parser
+ }
+ rules.add(t.getMethodName());
+ }
+ return rules;
+ }
+
+ public int getBacktrackingLevel() { return state.backtracking; }
+
+ public void setBacktrackingLevel(int n) { state.backtracking = n; }
+
+ /** Return whether or not a backtracking attempt failed. */
+ public boolean failed() { return state.failed; }
+
+ /** Used to print out token names like ID during debugging and
+ * error reporting. The generated parsers implement a method
+ * that overrides this to point to their String[] tokenNames.
+ */
+ public String[] getTokenNames() {
+ return null;
+ }
+
+ /** For debugging and other purposes, might want the grammar name.
+ * Have ANTLR generate an implementation for this method.
+ */
+ public String getGrammarFileName() {
+ return null;
+ }
+
+ public abstract String getSourceName();
+
+ /** A convenience method for use most often with template rewrites.
+ * Convert a List<Token> to List<String>
+ */
+ public List toStrings(List tokens) {
+ if ( tokens==null ) return null;
+ List strings = new ArrayList(tokens.size());
+ for (int i=0; i<tokens.size(); i++) {
+ strings.add(((Token)tokens.get(i)).getText());
+ }
+ return strings;
+ }
+
+ /** Given a rule number and a start token index number, return
+ * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
+ * start index. If this rule has parsed input starting from the
+ * start index before, then return where the rule stopped parsing.
+ * It returns the index of the last token matched by the rule.
+ *
+ * For now we use a hashtable and just the slow Object-based one.
+ * Later, we can make a special one for ints and also one that
+ * tosses out data after we commit past input position i.
+ */
+ public int getRuleMemoization(int ruleIndex, int ruleStartIndex) {
+ if ( state.ruleMemo[ruleIndex]==null ) {
+ state.ruleMemo[ruleIndex] = new HashMap();
+ }
+ Integer stopIndexI =
+ (Integer)state.ruleMemo[ruleIndex].get(new Integer(ruleStartIndex));
+ if ( stopIndexI==null ) {
+ return MEMO_RULE_UNKNOWN;
+ }
+ return stopIndexI.intValue();
+ }
+
+ /** Has this rule already parsed input at the current index in the
+ * input stream? Return the stop token index or MEMO_RULE_UNKNOWN.
+ * If we attempted but failed to parse properly before, return
+ * MEMO_RULE_FAILED.
+ *
+ * This method has a side-effect: if we have seen this input for
+ * this rule and successfully parsed before, then seek ahead to
+ * 1 past the stop token matched for this rule last time.
+ */
+ public boolean alreadyParsedRule(IntStream input, int ruleIndex) {
+ int stopIndex = getRuleMemoization(ruleIndex, input.index());
+ if ( stopIndex==MEMO_RULE_UNKNOWN ) {
+ return false;
+ }
+ if ( stopIndex==MEMO_RULE_FAILED ) {
+ //System.out.println("rule "+ruleIndex+" will never succeed");
+ state.failed=true;
+ }
+ else {
+ //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
+ input.seek(stopIndex+1); // jump to one past stop token
+ }
+ return true;
+ }
+
+ /** Record whether or not this rule parsed the input at this position
+ * successfully. Use a standard java hashtable for now.
+ */
+ public void memoize(IntStream input,
+ int ruleIndex,
+ int ruleStartIndex)
+ {
+ int stopTokenIndex = state.failed?MEMO_RULE_FAILED:input.index()-1;
+ if ( state.ruleMemo==null ) {
+ System.err.println("!!!!!!!!! memo array is null for "+ getGrammarFileName());
+ }
+ if ( ruleIndex >= state.ruleMemo.length ) {
+ System.err.println("!!!!!!!!! memo size is "+state.ruleMemo.length+", but rule index is "+ruleIndex);
+ }
+ if ( state.ruleMemo[ruleIndex]!=null ) {
+ state.ruleMemo[ruleIndex].put(
+ new Integer(ruleStartIndex), new Integer(stopTokenIndex)
+ );
+ }
+ }
+
+ /** return how many rule/input-index pairs there are in total.
+ * TODO: this includes synpreds. :(
+ */
+ public int getRuleMemoizationCacheSize() {
+ int n = 0;
+ for (int i = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) {
+ Map ruleMap = state.ruleMemo[i];
+ if ( ruleMap!=null ) {
+ n += ruleMap.size(); // how many input indexes are recorded?
+ }
+ }
+ return n;
+ }
+
+ public void traceIn(String ruleName, int ruleIndex, Object inputSymbol) {
+ System.out.print("enter "+ruleName+" "+inputSymbol);
+ if ( state.backtracking>0 ) {
+ System.out.print(" backtracking="+state.backtracking);
+ }
+ System.out.println();
+ }
+
+ public void traceOut(String ruleName,
+ int ruleIndex,
+ Object inputSymbol)
+ {
+ System.out.print("exit "+ruleName+" "+inputSymbol);
+ if ( state.backtracking>0 ) {
+ System.out.print(" backtracking="+state.backtracking);
+ if ( state.failed ) System.out.print(" failed");
+ else System.out.print(" succeeded");
+ }
+ System.out.println();
+ }
+
+}