aboutsummaryrefslogtreecommitdiff
path: root/runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs')
-rw-r--r--runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs1046
1 files changed, 1046 insertions, 0 deletions
diff --git a/runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs b/runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs
new file mode 100644
index 0000000..3d71c23
--- /dev/null
+++ b/runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs
@@ -0,0 +1,1046 @@
+/*
+ * [The "BSD licence"]
+ * Copyright (c) 2005-2008 Terence Parr
+ * All rights reserved.
+ *
+ * Conversion to C#:
+ * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+namespace Antlr.Runtime {
+ using System.Collections.Generic;
+
+ using ArgumentNullException = System.ArgumentNullException;
+ using Array = System.Array;
+ using Conditional = System.Diagnostics.ConditionalAttribute;
+ using Exception = System.Exception;
+ using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener;
+ using MethodBase = System.Reflection.MethodBase;
+ using NotSupportedException = System.NotSupportedException;
+ using Regex = System.Text.RegularExpressions.Regex;
+ using StackFrame = System.Diagnostics.StackFrame;
+ using StackTrace = System.Diagnostics.StackTrace;
+ using TextWriter = System.IO.TextWriter;
+ using Type = System.Type;
+
+ /** <summary>
+ * A generic recognizer that can handle recognizers generated from
+ * lexer, parser, and tree grammars. This is all the parsing
+ * support code essentially; most of it is error recovery stuff and
+ * backtracking.
+ * </summary>
+ */
+ public abstract class BaseRecognizer {
+ public const int MemoRuleFailed = -2;
+ public const int MemoRuleUnknown = -1;
+ public const int InitialFollowStackSize = 100;
+
+ // copies from Token object for convenience in actions
+ public const int DefaultTokenChannel = TokenChannels.Default;
+ public const int Hidden = TokenChannels.Hidden;
+
+ public const string NextTokenRuleName = "nextToken";
+
+ /** <summary>
+ * State of a lexer, parser, or tree parser are collected into a state
+ * object so the state can be shared. This sharing is needed to
+ * have one grammar import others and share same error variables
+ * and other state variables. It's a kind of explicit multiple
+ * inheritance via delegation of methods and shared state.
+ * </summary>
+ */
+ protected internal RecognizerSharedState state;
+
+ public BaseRecognizer()
+ : this(new RecognizerSharedState()) {
+ }
+
+ public BaseRecognizer(RecognizerSharedState state) {
+ if (state == null) {
+ state = new RecognizerSharedState();
+ }
+ this.state = state;
+ InitDFAs();
+ }
+
+ public TextWriter TraceDestination {
+ get;
+ set;
+ }
+
+ protected virtual void InitDFAs() {
+ }
+
+ /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */
+ public virtual void Reset() {
+ // wack everything related to error recovery
+ if (state == null) {
+ return; // no shared state work to do
+ }
+ state._fsp = -1;
+ state.errorRecovery = false;
+ state.lastErrorIndex = -1;
+ state.failed = false;
+ state.syntaxErrors = 0;
+ // wack everything related to backtracking and memoization
+ state.backtracking = 0;
+ for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { // wipe cache
+ state.ruleMemo[i] = null;
+ }
+ }
+
+
+ /** <summary>
+ * Match current input symbol against ttype. Attempt
+ * single token insertion or deletion error recovery. If
+ * that fails, throw MismatchedTokenException.
+ * </summary>
+ *
+ * <remarks>
+ * To turn off single token insertion or deletion error
+ * recovery, override recoverFromMismatchedToken() and have it
+ * throw an exception. See TreeParser.recoverFromMismatchedToken().
+ * This way any error in a rule will cause an exception and
+ * immediate exit from rule. Rule would recover by resynchronizing
+ * to the set of symbols that can follow rule ref.
+ * </remarks>
+ */
+ public virtual object Match(IIntStream input, int ttype, BitSet follow) {
+ //System.out.println("match "+((TokenStream)input).LT(1));
+ object matchedSymbol = GetCurrentInputSymbol(input);
+ if (input.LA(1) == ttype) {
+ input.Consume();
+ state.errorRecovery = false;
+ state.failed = false;
+ return matchedSymbol;
+ }
+ if (state.backtracking > 0) {
+ state.failed = true;
+ return matchedSymbol;
+ }
+ matchedSymbol = RecoverFromMismatchedToken(input, ttype, follow);
+ return matchedSymbol;
+ }
+
+ /** <summary>Match the wildcard: in a symbol</summary> */
+ public virtual void MatchAny(IIntStream input) {
+ state.errorRecovery = false;
+ state.failed = false;
+ input.Consume();
+ }
+
+ public virtual bool MismatchIsUnwantedToken(IIntStream input, int ttype) {
+ return input.LA(2) == ttype;
+ }
+
+ public virtual bool MismatchIsMissingToken(IIntStream input, BitSet follow) {
+ if (follow == null) {
+ // we have no information about the follow; we can only consume
+ // a single token and hope for the best
+ return false;
+ }
+ // compute what can follow this grammar element reference
+ if (follow.Member(TokenTypes.EndOfRule)) {
+ BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW();
+ follow = follow.Or(viableTokensFollowingThisRule);
+ if (state._fsp >= 0) { // remove EOR if we're not the start symbol
+ follow.Remove(TokenTypes.EndOfRule);
+ }
+ }
+ // if current token is consistent with what could come after set
+ // then we know we're missing a token; error recovery is free to
+ // "insert" the missing token
+
+ //System.out.println("viable tokens="+follow.toString(getTokenNames()));
+ //System.out.println("LT(1)="+((TokenStream)input).LT(1));
+
+ // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
+ // in follow set to indicate that the fall of the start symbol is
+ // in the set (EOF can follow).
+ if (follow.Member(input.LA(1)) || follow.Member(TokenTypes.EndOfRule)) {
+ //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
+ return true;
+ }
+ return false;
+ }
+
+ /** <summary>Report a recognition problem.</summary>
+ *
+ * <remarks>
+ * This method sets errorRecovery to indicate the parser is recovering
+ * not parsing. Once in recovery mode, no errors are generated.
+ * To get out of recovery mode, the parser must successfully match
+ * a token (after a resync). So it will go:
+ *
+ * 1. error occurs
+ * 2. enter recovery mode, report error
+ * 3. consume until token found in resynch set
+ * 4. try to resume parsing
+ * 5. next match() will reset errorRecovery mode
+ *
+ * If you override, make sure to update syntaxErrors if you care about that.
+ * </remarks>
+ */
+ public virtual void ReportError(RecognitionException e) {
+ // if we've already reported an error and have not matched a token
+ // yet successfully, don't report any errors.
+ if (state.errorRecovery) {
+ //System.err.print("[SPURIOUS] ");
+ return;
+ }
+ state.syntaxErrors++; // don't count spurious
+ state.errorRecovery = true;
+
+ DisplayRecognitionError(this.TokenNames, e);
+ }
+
+ public virtual void DisplayRecognitionError(string[] tokenNames,
+ RecognitionException e) {
+ string hdr = GetErrorHeader(e);
+ string msg = GetErrorMessage(e, tokenNames);
+ EmitErrorMessage(hdr + " " + msg);
+ }
+
+ /** <summary>What error message should be generated for the various exception types?</summary>
+ *
+ * <remarks>
+ * Not very object-oriented code, but I like having all error message
+ * generation within one method rather than spread among all of the
+ * exception classes. This also makes it much easier for the exception
+ * handling because the exception classes do not have to have pointers back
+ * to this object to access utility routines and so on. Also, changing
+ * the message for an exception type would be difficult because you
+ * would have to subclassing exception, but then somehow get ANTLR
+ * to make those kinds of exception objects instead of the default.
+ * This looks weird, but trust me--it makes the most sense in terms
+ * of flexibility.
+ *
+ * For grammar debugging, you will want to override this to add
+ * more information such as the stack frame with
+ * getRuleInvocationStack(e, this.getClass().getName()) and,
+ * for no viable alts, the decision description and state etc...
+ *
+ * Override this to change the message generated for one or more
+ * exception types.
+ * </remarks>
+ */
+ public virtual string GetErrorMessage(RecognitionException e, string[] tokenNames) {
+ string msg = e.Message;
+ if (e is UnwantedTokenException) {
+ UnwantedTokenException ute = (UnwantedTokenException)e;
+ string tokenName = "<unknown>";
+ if (ute.Expecting == TokenTypes.EndOfFile) {
+ tokenName = "EndOfFile";
+ } else {
+ tokenName = tokenNames[ute.Expecting];
+ }
+ msg = "extraneous input " + GetTokenErrorDisplay(ute.UnexpectedToken) +
+ " expecting " + tokenName;
+ } else if (e is MissingTokenException) {
+ MissingTokenException mte = (MissingTokenException)e;
+ string tokenName = "<unknown>";
+ if (mte.Expecting == TokenTypes.EndOfFile) {
+ tokenName = "EndOfFile";
+ } else {
+ tokenName = tokenNames[mte.Expecting];
+ }
+ msg = "missing " + tokenName + " at " + GetTokenErrorDisplay(e.Token);
+ } else if (e is MismatchedTokenException) {
+ MismatchedTokenException mte = (MismatchedTokenException)e;
+ string tokenName = "<unknown>";
+ if (mte.Expecting == TokenTypes.EndOfFile) {
+ tokenName = "EndOfFile";
+ } else {
+ tokenName = tokenNames[mte.Expecting];
+ }
+ msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
+ " expecting " + tokenName;
+ } else if (e is MismatchedTreeNodeException) {
+ MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
+ string tokenName = "<unknown>";
+ if (mtne.Expecting == TokenTypes.EndOfFile) {
+ tokenName = "EndOfFile";
+ } else {
+ tokenName = tokenNames[mtne.Expecting];
+ }
+ // workaround for a .NET framework bug (NullReferenceException)
+ string nodeText = (mtne.Node != null) ? mtne.Node.ToString() ?? string.Empty : string.Empty;
+ msg = "mismatched tree node: " + nodeText + " expecting " + tokenName;
+ } else if (e is NoViableAltException) {
+ //NoViableAltException nvae = (NoViableAltException)e;
+ // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
+ // and "(decision="+nvae.decisionNumber+") and
+ // "state "+nvae.stateNumber
+ msg = "no viable alternative at input " + GetTokenErrorDisplay(e.Token);
+ } else if (e is EarlyExitException) {
+ //EarlyExitException eee = (EarlyExitException)e;
+ // for development, can add "(decision="+eee.decisionNumber+")"
+ msg = "required (...)+ loop did not match anything at input " +
+ GetTokenErrorDisplay(e.Token);
+ } else if (e is MismatchedSetException) {
+ MismatchedSetException mse = (MismatchedSetException)e;
+ msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
+ " expecting set " + mse.Expecting;
+ } else if (e is MismatchedNotSetException) {
+ MismatchedNotSetException mse = (MismatchedNotSetException)e;
+ msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
+ " expecting set " + mse.Expecting;
+ } else if (e is FailedPredicateException) {
+ FailedPredicateException fpe = (FailedPredicateException)e;
+ msg = "rule " + fpe.RuleName + " failed predicate: {" +
+ fpe.PredicateText + "}?";
+ }
+ return msg;
+ }
+
+ /** <summary>
+ * Get number of recognition errors (lexer, parser, tree parser). Each
+ * recognizer tracks its own number. So parser and lexer each have
+ * separate count. Does not count the spurious errors found between
+ * an error and next valid token match
+ * </summary>
+ *
+ * <seealso cref="reportError()"/>
+ */
+ public virtual int NumberOfSyntaxErrors {
+ get {
+ return state.syntaxErrors;
+ }
+ }
+
+ /** <summary>What is the error header, normally line/character position information?</summary> */
+ public virtual string GetErrorHeader(RecognitionException e) {
+ return "line " + e.Line + ":" + (e.CharPositionInLine + 1);
+ }
+
+ /** <summary>
+ * How should a token be displayed in an error message? The default
+ * is to display just the text, but during development you might
+ * want to have a lot of information spit out. Override in that case
+ * to use t.ToString() (which, for CommonToken, dumps everything about
+ * the token). This is better than forcing you to override a method in
+ * your token objects because you don't have to go modify your lexer
+ * so that it creates a new Java type.
+ * </summary>
+ */
+ public virtual string GetTokenErrorDisplay(IToken t) {
+ string s = t.Text;
+ if (s == null) {
+ if (t.Type == TokenTypes.EndOfFile) {
+ s = "<EOF>";
+ } else {
+ s = "<" + t.Type + ">";
+ }
+ }
+ s = Regex.Replace(s, "\n", "\\\\n");
+ s = Regex.Replace(s, "\r", "\\\\r");
+ s = Regex.Replace(s, "\t", "\\\\t");
+ return "'" + s + "'";
+ }
+
+ /** <summary>Override this method to change where error messages go</summary> */
+ public virtual void EmitErrorMessage(string msg) {
+ if (TraceDestination != null)
+ TraceDestination.WriteLine(msg);
+ }
+
+ /** <summary>
+ * Recover from an error found on the input stream. This is
+ * for NoViableAlt and mismatched symbol exceptions. If you enable
+ * single token insertion and deletion, this will usually not
+ * handle mismatched symbol exceptions but there could be a mismatched
+ * token that the match() routine could not recover from.
+ * </summary>
+ */
+ public virtual void Recover(IIntStream input, RecognitionException re) {
+ if (state.lastErrorIndex == input.Index) {
+ // uh oh, another error at same token index; must be a case
+ // where LT(1) is in the recovery token set so nothing is
+ // consumed; consume a single token so at least to prevent
+ // an infinite loop; this is a failsafe.
+ input.Consume();
+ }
+ state.lastErrorIndex = input.Index;
+ BitSet followSet = ComputeErrorRecoverySet();
+ BeginResync();
+ ConsumeUntil(input, followSet);
+ EndResync();
+ }
+
+ /** <summary>
+ * A hook to listen in on the token consumption during error recovery.
+ * The DebugParser subclasses this to fire events to the listenter.
+ * </summary>
+ */
+ public virtual void BeginResync() {
+ }
+
+ public virtual void EndResync() {
+ }
+
+ /* Compute the error recovery set for the current rule. During
+ * rule invocation, the parser pushes the set of tokens that can
+ * follow that rule reference on the stack; this amounts to
+ * computing FIRST of what follows the rule reference in the
+ * enclosing rule. This local follow set only includes tokens
+ * from within the rule; i.e., the FIRST computation done by
+ * ANTLR stops at the end of a rule.
+ *
+ * EXAMPLE
+ *
+ * When you find a "no viable alt exception", the input is not
+ * consistent with any of the alternatives for rule r. The best
+ * thing to do is to consume tokens until you see something that
+ * can legally follow a call to r *or* any rule that called r.
+ * You don't want the exact set of viable next tokens because the
+ * input might just be missing a token--you might consume the
+ * rest of the input looking for one of the missing tokens.
+ *
+ * Consider grammar:
+ *
+ * a : '[' b ']'
+ * | '(' b ')'
+ * ;
+ * b : c '^' INT ;
+ * c : ID
+ * | INT
+ * ;
+ *
+ * At each rule invocation, the set of tokens that could follow
+ * that rule is pushed on a stack. Here are the various "local"
+ * follow sets:
+ *
+ * FOLLOW(b1_in_a) = FIRST(']') = ']'
+ * FOLLOW(b2_in_a) = FIRST(')') = ')'
+ * FOLLOW(c_in_b) = FIRST('^') = '^'
+ *
+ * Upon erroneous input "[]", the call chain is
+ *
+ * a -> b -> c
+ *
+ * and, hence, the follow context stack is:
+ *
+ * depth local follow set after call to rule
+ * 0 <EOF> a (from main())
+ * 1 ']' b
+ * 3 '^' c
+ *
+ * Notice that ')' is not included, because b would have to have
+ * been called from a different context in rule a for ')' to be
+ * included.
+ *
+ * For error recovery, we cannot consider FOLLOW(c)
+ * (context-sensitive or otherwise). We need the combined set of
+ * all context-sensitive FOLLOW sets--the set of all tokens that
+ * could follow any reference in the call chain. We need to
+ * resync to one of those tokens. Note that FOLLOW(c)='^' and if
+ * we resync'd to that token, we'd consume until EOF. We need to
+ * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
+ * In this case, for input "[]", LA(1) is in this set so we would
+ * not consume anything and after printing an error rule c would
+ * return normally. It would not find the required '^' though.
+ * At this point, it gets a mismatched token error and throws an
+ * exception (since LA(1) is not in the viable following token
+ * set). The rule exception handler tries to recover, but finds
+ * the same recovery set and doesn't consume anything. Rule b
+ * exits normally returning to rule a. Now it finds the ']' (and
+ * with the successful match exits errorRecovery mode).
+ *
+ * So, you cna see that the parser walks up call chain looking
+ * for the token that was a member of the recovery set.
+ *
+ * Errors are not generated in errorRecovery mode.
+ *
+ * ANTLR's error recovery mechanism is based upon original ideas:
+ *
+ * "Algorithms + Data Structures = Programs" by Niklaus Wirth
+ *
+ * and
+ *
+ * "A note on error recovery in recursive descent parsers":
+ * http://portal.acm.org/citation.cfm?id=947902.947905
+ *
+ * Later, Josef Grosch had some good ideas:
+ *
+ * "Efficient and Comfortable Error Recovery in Recursive Descent
+ * Parsers":
+ * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
+ *
+ * Like Grosch I implemented local FOLLOW sets that are combined
+ * at run-time upon error to avoid overhead during parsing.
+ */
+ protected virtual BitSet ComputeErrorRecoverySet() {
+ return CombineFollows(false);
+ }
+
+ /** <summary>
+ * Compute the context-sensitive FOLLOW set for current rule.
+ * This is set of token types that can follow a specific rule
+ * reference given a specific call chain. You get the set of
+ * viable tokens that can possibly come next (lookahead depth 1)
+ * given the current call chain. Contrast this with the
+ * definition of plain FOLLOW for rule r:
+ * </summary>
+ *
+ * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
+ *
+ * where x in T* and alpha, beta in V*; T is set of terminals and
+ * V is the set of terminals and nonterminals. In other words,
+ * FOLLOW(r) is the set of all tokens that can possibly follow
+ * references to r in *any* sentential form (context). At
+ * runtime, however, we know precisely which context applies as
+ * we have the call chain. We may compute the exact (rather
+ * than covering superset) set of following tokens.
+ *
+ * For example, consider grammar:
+ *
+ * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
+ * | "return" expr '.'
+ * ;
+ * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
+ * atom : INT // FOLLOW(atom)=={'+',')',';','.'}
+ * | '(' expr ')'
+ * ;
+ *
+ * The FOLLOW sets are all inclusive whereas context-sensitive
+ * FOLLOW sets are precisely what could follow a rule reference.
+ * For input input "i=(3);", here is the derivation:
+ *
+ * stat => ID '=' expr ';'
+ * => ID '=' atom ('+' atom)* ';'
+ * => ID '=' '(' expr ')' ('+' atom)* ';'
+ * => ID '=' '(' atom ')' ('+' atom)* ';'
+ * => ID '=' '(' INT ')' ('+' atom)* ';'
+ * => ID '=' '(' INT ')' ';'
+ *
+ * At the "3" token, you'd have a call chain of
+ *
+ * stat -> expr -> atom -> expr -> atom
+ *
+ * What can follow that specific nested ref to atom? Exactly ')'
+ * as you can see by looking at the derivation of this specific
+ * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
+ *
+ * You want the exact viable token set when recovering from a
+ * token mismatch. Upon token mismatch, if LA(1) is member of
+ * the viable next token set, then you know there is most likely
+ * a missing token in the input stream. "Insert" one by just not
+ * throwing an exception.
+ */
+ protected virtual BitSet ComputeContextSensitiveRuleFOLLOW() {
+ return CombineFollows(true);
+ }
+
+ // what is exact? it seems to only add sets from above on stack
+ // if EOR is in set i. When it sees a set w/o EOR, it stops adding.
+ // Why would we ever want them all? Maybe no viable alt instead of
+ // mismatched token?
+ protected virtual BitSet CombineFollows(bool exact) {
+ int top = state._fsp;
+ BitSet followSet = new BitSet();
+ for (int i = top; i >= 0; i--) {
+ BitSet localFollowSet = (BitSet)state.following[i];
+ /*
+ System.out.println("local follow depth "+i+"="+
+ localFollowSet.toString(getTokenNames())+")");
+ */
+ followSet.OrInPlace(localFollowSet);
+ if (exact) {
+ // can we see end of rule?
+ if (localFollowSet.Member(TokenTypes.EndOfRule)) {
+ // Only leave EOR in set if at top (start rule); this lets
+ // us know if have to include follow(start rule); i.e., EOF
+ if (i > 0) {
+ followSet.Remove(TokenTypes.EndOfRule);
+ }
+ } else { // can't see end of rule, quit
+ break;
+ }
+ }
+ }
+ return followSet;
+ }
+
+ /** <summary>Attempt to recover from a single missing or extra token.</summary>
+ *
+ * EXTRA TOKEN
+ *
+ * LA(1) is not what we are looking for. If LA(2) has the right token,
+ * however, then assume LA(1) is some extra spurious token. Delete it
+ * and LA(2) as if we were doing a normal match(), which advances the
+ * input.
+ *
+ * MISSING TOKEN
+ *
+ * If current token is consistent with what could come after
+ * ttype then it is ok to "insert" the missing token, else throw
+ * exception For example, Input "i=(3;" is clearly missing the
+ * ')'. When the parser returns from the nested call to expr, it
+ * will have call chain:
+ *
+ * stat -> expr -> atom
+ *
+ * and it will be trying to match the ')' at this point in the
+ * derivation:
+ *
+ * => ID '=' '(' INT ')' ('+' atom)* ';'
+ * ^
+ * match() will see that ';' doesn't match ')' and report a
+ * mismatched token error. To recover, it sees that LA(1)==';'
+ * is in the set of tokens that can follow the ')' token
+ * reference in rule atom. It can assume that you forgot the ')'.
+ */
+ protected virtual object RecoverFromMismatchedToken(IIntStream input, int ttype, BitSet follow) {
+ RecognitionException e = null;
+ // if next token is what we are looking for then "delete" this token
+ if (MismatchIsUnwantedToken(input, ttype)) {
+ e = new UnwantedTokenException(ttype, input, TokenNames);
+ /*
+ System.err.println("recoverFromMismatchedToken deleting "+
+ ((TokenStream)input).LT(1)+
+ " since "+((TokenStream)input).LT(2)+" is what we want");
+ */
+ BeginResync();
+ input.Consume(); // simply delete extra token
+ EndResync();
+ ReportError(e); // report after consuming so AW sees the token in the exception
+ // we want to return the token we're actually matching
+ object matchedSymbol = GetCurrentInputSymbol(input);
+ input.Consume(); // move past ttype token as if all were ok
+ return matchedSymbol;
+ }
+ // can't recover with single token deletion, try insertion
+ if (MismatchIsMissingToken(input, follow)) {
+ object inserted = GetMissingSymbol(input, e, ttype, follow);
+ e = new MissingTokenException(ttype, input, inserted);
+ ReportError(e); // report after inserting so AW sees the token in the exception
+ return inserted;
+ }
+ // even that didn't work; must throw the exception
+ e = new MismatchedTokenException(ttype, input, TokenNames);
+ throw e;
+ }
+
+ /** Not currently used */
+ public virtual object RecoverFromMismatchedSet(IIntStream input,
+ RecognitionException e,
+ BitSet follow) {
+ if (MismatchIsMissingToken(input, follow)) {
+ // System.out.println("missing token");
+ ReportError(e);
+ // we don't know how to conjure up a token for sets yet
+ return GetMissingSymbol(input, e, TokenTypes.Invalid, follow);
+ }
+ // TODO do single token deletion like above for Token mismatch
+ throw e;
+ }
+
+ /** <summary>
+ * Match needs to return the current input symbol, which gets put
+ * into the label for the associated token ref; e.g., x=ID. Token
+ * and tree parsers need to return different objects. Rather than test
+ * for input stream type or change the IntStream interface, I use
+ * a simple method to ask the recognizer to tell me what the current
+ * input symbol is.
+ * </summary>
+ *
+ * <remarks>This is ignored for lexers.</remarks>
+ */
+ protected virtual object GetCurrentInputSymbol(IIntStream input) {
+ return null;
+ }
+
+ /** <summary>Conjure up a missing token during error recovery.</summary>
+ *
+ * <remarks>
+ * The recognizer attempts to recover from single missing
+ * symbols. But, actions might refer to that missing symbol.
+ * For example, x=ID {f($x);}. The action clearly assumes
+ * that there has been an identifier matched previously and that
+ * $x points at that token. If that token is missing, but
+ * the next token in the stream is what we want we assume that
+ * this token is missing and we keep going. Because we
+ * have to return some token to replace the missing token,
+ * we have to conjure one up. This method gives the user control
+ * over the tokens returned for missing tokens. Mostly,
+ * you will want to create something special for identifier
+ * tokens. For literals such as '{' and ',', the default
+ * action in the parser or tree parser works. It simply creates
+ * a CommonToken of the appropriate type. The text will be the token.
+ * If you change what tokens must be created by the lexer,
+ * override this method to create the appropriate tokens.
+ * </remarks>
+ */
+ protected virtual object GetMissingSymbol(IIntStream input,
+ RecognitionException e,
+ int expectedTokenType,
+ BitSet follow) {
+ return null;
+ }
+
+ public virtual void ConsumeUntil(IIntStream input, int tokenType) {
+ //System.out.println("consumeUntil "+tokenType);
+ int ttype = input.LA(1);
+ while (ttype != TokenTypes.EndOfFile && ttype != tokenType) {
+ input.Consume();
+ ttype = input.LA(1);
+ }
+ }
+
+ /** <summary>Consume tokens until one matches the given token set</summary> */
+ public virtual void ConsumeUntil(IIntStream input, BitSet set) {
+ //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
+ int ttype = input.LA(1);
+ while (ttype != TokenTypes.EndOfFile && !set.Member(ttype)) {
+ //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
+ input.Consume();
+ ttype = input.LA(1);
+ }
+ }
+
+ /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */
+ protected void PushFollow(BitSet fset) {
+ if ((state._fsp + 1) >= state.following.Length) {
+ Array.Resize(ref state.following, state.following.Length * 2);
+ }
+ state.following[++state._fsp] = fset;
+ }
+
+ protected void PopFollow() {
+ state._fsp--;
+ }
+
+ /** <summary>
+ * Return List<String> of the rules in your parser instance
+ * leading up to a call to this method. You could override if
+ * you want more details such as the file/line info of where
+ * in the parser java code a rule is invoked.
+ * </summary>
+ *
+ * <remarks>
+ * This is very useful for error messages and for context-sensitive
+ * error recovery.
+ * </remarks>
+ */
+ public virtual IList<string> GetRuleInvocationStack() {
+ return GetRuleInvocationStack(new StackTrace(true));
+ }
+
+ /** <summary>
+ * A more general version of GetRuleInvocationStack where you can
+ * pass in the StackTrace of, for example, a RecognitionException
+ * to get it's rule stack trace.
+ * </summary>
+ */
+ public static IList<string> GetRuleInvocationStack(StackTrace trace) {
+ if (trace == null)
+ throw new ArgumentNullException("trace");
+
+ List<string> rules = new List<string>();
+ StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0];
+
+ for (int i = stack.Length - 1; i >= 0; i--) {
+ StackFrame frame = stack[i];
+ MethodBase method = frame.GetMethod();
+ GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true);
+ if (attributes != null && attributes.Length > 0)
+ rules.Add(attributes[0].Name);
+ }
+
+ return rules;
+ }
+
+ public virtual int BacktrackingLevel {
+ get {
+ return state.backtracking;
+ }
+ set {
+ state.backtracking = value;
+ }
+ }
+
+ /** <summary>Return whether or not a backtracking attempt failed.</summary> */
+ public virtual bool Failed {
+ get {
+ return state.failed;
+ }
+ }
+
+ /** <summary>
+ * Used to print out token names like ID during debugging and
+ * error reporting. The generated parsers implement a method
+ * that overrides this to point to their String[] tokenNames.
+ * </summary>
+ */
+ public virtual string[] TokenNames {
+ get {
+ return null;
+ }
+ }
+
+ /** <summary>
+ * For debugging and other purposes, might want the grammar name.
+ * Have ANTLR generate an implementation for this method.
+ * </summary>
+ */
+ public virtual string GrammarFileName {
+ get {
+ return null;
+ }
+ }
+
+ public abstract string SourceName {
+ get;
+ }
+
+ /** <summary>
+ * A convenience method for use most often with template rewrites.
+ * Convert a List<Token> to List<String>
+ * </summary>
+ */
+ public virtual List<string> ToStrings(ICollection<IToken> tokens) {
+ if (tokens == null)
+ return null;
+
+ List<string> strings = new List<string>(tokens.Count);
+ foreach (IToken token in tokens) {
+ strings.Add(token.Text);
+ }
+
+ return strings;
+ }
+
+ /** <summary>
+ * Given a rule number and a start token index number, return
+ * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
+ * start index. If this rule has parsed input starting from the
+ * start index before, then return where the rule stopped parsing.
+ * It returns the index of the last token matched by the rule.
+ * </summary>
+ *
+ * <remarks>
+ * For now we use a hashtable and just the slow Object-based one.
+ * Later, we can make a special one for ints and also one that
+ * tosses out data after we commit past input position i.
+ * </remarks>
+ */
+ public virtual int GetRuleMemoization(int ruleIndex, int ruleStartIndex) {
+ if (state.ruleMemo[ruleIndex] == null) {
+ state.ruleMemo[ruleIndex] = new Dictionary<int, int>();
+ }
+
+ int stopIndex;
+ if (!state.ruleMemo[ruleIndex].TryGetValue(ruleStartIndex, out stopIndex))
+ return MemoRuleUnknown;
+
+ return stopIndex;
+ }
+
+ /** <summary>
+ * Has this rule already parsed input at the current index in the
+ * input stream? Return the stop token index or MEMO_RULE_UNKNOWN.
+ * If we attempted but failed to parse properly before, return
+ * MEMO_RULE_FAILED.
+ * </summary>
+ *
+ * <remarks>
+ * This method has a side-effect: if we have seen this input for
+ * this rule and successfully parsed before, then seek ahead to
+ * 1 past the stop token matched for this rule last time.
+ * </remarks>
+ */
+ public virtual bool AlreadyParsedRule(IIntStream input, int ruleIndex) {
+ int stopIndex = GetRuleMemoization(ruleIndex, input.Index);
+ if (stopIndex == MemoRuleUnknown) {
+ return false;
+ }
+ if (stopIndex == MemoRuleFailed) {
+ //System.out.println("rule "+ruleIndex+" will never succeed");
+ state.failed = true;
+ } else {
+ //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
+ input.Seek(stopIndex + 1); // jump to one past stop token
+ }
+ return true;
+ }
+
+ /** <summary>
+ * Record whether or not this rule parsed the input at this position
+ * successfully. Use a standard java hashtable for now.
+ * </summary>
+ */
+ public virtual void Memoize(IIntStream input,
+ int ruleIndex,
+ int ruleStartIndex) {
+ int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1;
+ if (state.ruleMemo == null) {
+ if (TraceDestination != null)
+ TraceDestination.WriteLine("!!!!!!!!! memo array is null for " + GrammarFileName);
+ }
+ if (ruleIndex >= state.ruleMemo.Length) {
+ if (TraceDestination != null)
+ TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex);
+ }
+ if (state.ruleMemo[ruleIndex] != null) {
+ state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
+ }
+ }
+
+ /** <summary>return how many rule/input-index pairs there are in total.</summary>
+ * TODO: this includes synpreds. :(
+ */
+ public virtual int GetRuleMemoizationCacheSize() {
+ int n = 0;
+ for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) {
+ var ruleMap = state.ruleMemo[i];
+ if (ruleMap != null) {
+ n += ruleMap.Count; // how many input indexes are recorded?
+ }
+ }
+ return n;
+ }
+
+ [Conditional("ANTLR_TRACE")]
+ public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol) {
+ if (TraceDestination == null)
+ return;
+
+ TraceDestination.Write("enter " + ruleName + " " + inputSymbol);
+ if (state.backtracking > 0) {
+ TraceDestination.Write(" backtracking=" + state.backtracking);
+ }
+ TraceDestination.WriteLine();
+ }
+
+ [Conditional("ANTLR_TRACE")]
+ public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol) {
+ if (TraceDestination == null)
+ return;
+
+ TraceDestination.Write("exit " + ruleName + " " + inputSymbol);
+ if (state.backtracking > 0) {
+ TraceDestination.Write(" backtracking=" + state.backtracking);
+ if (state.failed)
+ TraceDestination.Write(" failed");
+ else
+ TraceDestination.Write(" succeeded");
+ }
+ TraceDestination.WriteLine();
+ }
+
+ #region Debugging support
+ public virtual IDebugEventListener DebugListener {
+ get {
+ return null;
+ }
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugEnterRule(string grammarFileName, string ruleName) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.EnterRule(grammarFileName, ruleName);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugExitRule(string grammarFileName, string ruleName) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.ExitRule(grammarFileName, ruleName);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugEnterSubRule(int decisionNumber) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.EnterSubRule(decisionNumber);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugExitSubRule(int decisionNumber) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.ExitSubRule(decisionNumber);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugEnterAlt(int alt) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.EnterAlt(alt);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.EnterDecision(decisionNumber, couldBacktrack);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugExitDecision(int decisionNumber) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.ExitDecision(decisionNumber);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugLocation(int line, int charPositionInLine) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.Location(line, charPositionInLine);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugSemanticPredicate(bool result, string predicate) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.SemanticPredicate(result, predicate);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugBeginBacktrack(int level) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.BeginBacktrack(level);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugEndBacktrack(int level, bool successful) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.EndBacktrack(level, successful);
+ }
+
+ [Conditional("ANTLR_DEBUG")]
+ protected virtual void DebugRecognitionException(RecognitionException ex) {
+ IDebugEventListener dbg = DebugListener;
+ if (dbg != null)
+ dbg.RecognitionException(ex);
+ }
+ #endregion
+ }
+}