diff options
Diffstat (limited to 'runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs')
-rw-r--r-- | runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs | 1046 |
1 files changed, 1046 insertions, 0 deletions
diff --git a/runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs b/runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs new file mode 100644 index 0000000..3d71c23 --- /dev/null +++ b/runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs @@ -0,0 +1,1046 @@ +/* + * [The "BSD licence"] + * Copyright (c) 2005-2008 Terence Parr + * All rights reserved. + * + * Conversion to C#: + * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +namespace Antlr.Runtime { + using System.Collections.Generic; + + using ArgumentNullException = System.ArgumentNullException; + using Array = System.Array; + using Conditional = System.Diagnostics.ConditionalAttribute; + using Exception = System.Exception; + using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener; + using MethodBase = System.Reflection.MethodBase; + using NotSupportedException = System.NotSupportedException; + using Regex = System.Text.RegularExpressions.Regex; + using StackFrame = System.Diagnostics.StackFrame; + using StackTrace = System.Diagnostics.StackTrace; + using TextWriter = System.IO.TextWriter; + using Type = System.Type; + + /** <summary> + * A generic recognizer that can handle recognizers generated from + * lexer, parser, and tree grammars. This is all the parsing + * support code essentially; most of it is error recovery stuff and + * backtracking. + * </summary> + */ + public abstract class BaseRecognizer { + public const int MemoRuleFailed = -2; + public const int MemoRuleUnknown = -1; + public const int InitialFollowStackSize = 100; + + // copies from Token object for convenience in actions + public const int DefaultTokenChannel = TokenChannels.Default; + public const int Hidden = TokenChannels.Hidden; + + public const string NextTokenRuleName = "nextToken"; + + /** <summary> + * State of a lexer, parser, or tree parser are collected into a state + * object so the state can be shared. This sharing is needed to + * have one grammar import others and share same error variables + * and other state variables. It's a kind of explicit multiple + * inheritance via delegation of methods and shared state. + * </summary> + */ + protected internal RecognizerSharedState state; + + public BaseRecognizer() + : this(new RecognizerSharedState()) { + } + + public BaseRecognizer(RecognizerSharedState state) { + if (state == null) { + state = new RecognizerSharedState(); + } + this.state = state; + InitDFAs(); + } + + public TextWriter TraceDestination { + get; + set; + } + + protected virtual void InitDFAs() { + } + + /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */ + public virtual void Reset() { + // wack everything related to error recovery + if (state == null) { + return; // no shared state work to do + } + state._fsp = -1; + state.errorRecovery = false; + state.lastErrorIndex = -1; + state.failed = false; + state.syntaxErrors = 0; + // wack everything related to backtracking and memoization + state.backtracking = 0; + for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { // wipe cache + state.ruleMemo[i] = null; + } + } + + + /** <summary> + * Match current input symbol against ttype. Attempt + * single token insertion or deletion error recovery. If + * that fails, throw MismatchedTokenException. + * </summary> + * + * <remarks> + * To turn off single token insertion or deletion error + * recovery, override recoverFromMismatchedToken() and have it + * throw an exception. See TreeParser.recoverFromMismatchedToken(). + * This way any error in a rule will cause an exception and + * immediate exit from rule. Rule would recover by resynchronizing + * to the set of symbols that can follow rule ref. + * </remarks> + */ + public virtual object Match(IIntStream input, int ttype, BitSet follow) { + //System.out.println("match "+((TokenStream)input).LT(1)); + object matchedSymbol = GetCurrentInputSymbol(input); + if (input.LA(1) == ttype) { + input.Consume(); + state.errorRecovery = false; + state.failed = false; + return matchedSymbol; + } + if (state.backtracking > 0) { + state.failed = true; + return matchedSymbol; + } + matchedSymbol = RecoverFromMismatchedToken(input, ttype, follow); + return matchedSymbol; + } + + /** <summary>Match the wildcard: in a symbol</summary> */ + public virtual void MatchAny(IIntStream input) { + state.errorRecovery = false; + state.failed = false; + input.Consume(); + } + + public virtual bool MismatchIsUnwantedToken(IIntStream input, int ttype) { + return input.LA(2) == ttype; + } + + public virtual bool MismatchIsMissingToken(IIntStream input, BitSet follow) { + if (follow == null) { + // we have no information about the follow; we can only consume + // a single token and hope for the best + return false; + } + // compute what can follow this grammar element reference + if (follow.Member(TokenTypes.EndOfRule)) { + BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW(); + follow = follow.Or(viableTokensFollowingThisRule); + if (state._fsp >= 0) { // remove EOR if we're not the start symbol + follow.Remove(TokenTypes.EndOfRule); + } + } + // if current token is consistent with what could come after set + // then we know we're missing a token; error recovery is free to + // "insert" the missing token + + //System.out.println("viable tokens="+follow.toString(getTokenNames())); + //System.out.println("LT(1)="+((TokenStream)input).LT(1)); + + // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR + // in follow set to indicate that the fall of the start symbol is + // in the set (EOF can follow). + if (follow.Member(input.LA(1)) || follow.Member(TokenTypes.EndOfRule)) { + //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); + return true; + } + return false; + } + + /** <summary>Report a recognition problem.</summary> + * + * <remarks> + * This method sets errorRecovery to indicate the parser is recovering + * not parsing. Once in recovery mode, no errors are generated. + * To get out of recovery mode, the parser must successfully match + * a token (after a resync). So it will go: + * + * 1. error occurs + * 2. enter recovery mode, report error + * 3. consume until token found in resynch set + * 4. try to resume parsing + * 5. next match() will reset errorRecovery mode + * + * If you override, make sure to update syntaxErrors if you care about that. + * </remarks> + */ + public virtual void ReportError(RecognitionException e) { + // if we've already reported an error and have not matched a token + // yet successfully, don't report any errors. + if (state.errorRecovery) { + //System.err.print("[SPURIOUS] "); + return; + } + state.syntaxErrors++; // don't count spurious + state.errorRecovery = true; + + DisplayRecognitionError(this.TokenNames, e); + } + + public virtual void DisplayRecognitionError(string[] tokenNames, + RecognitionException e) { + string hdr = GetErrorHeader(e); + string msg = GetErrorMessage(e, tokenNames); + EmitErrorMessage(hdr + " " + msg); + } + + /** <summary>What error message should be generated for the various exception types?</summary> + * + * <remarks> + * Not very object-oriented code, but I like having all error message + * generation within one method rather than spread among all of the + * exception classes. This also makes it much easier for the exception + * handling because the exception classes do not have to have pointers back + * to this object to access utility routines and so on. Also, changing + * the message for an exception type would be difficult because you + * would have to subclassing exception, but then somehow get ANTLR + * to make those kinds of exception objects instead of the default. + * This looks weird, but trust me--it makes the most sense in terms + * of flexibility. + * + * For grammar debugging, you will want to override this to add + * more information such as the stack frame with + * getRuleInvocationStack(e, this.getClass().getName()) and, + * for no viable alts, the decision description and state etc... + * + * Override this to change the message generated for one or more + * exception types. + * </remarks> + */ + public virtual string GetErrorMessage(RecognitionException e, string[] tokenNames) { + string msg = e.Message; + if (e is UnwantedTokenException) { + UnwantedTokenException ute = (UnwantedTokenException)e; + string tokenName = "<unknown>"; + if (ute.Expecting == TokenTypes.EndOfFile) { + tokenName = "EndOfFile"; + } else { + tokenName = tokenNames[ute.Expecting]; + } + msg = "extraneous input " + GetTokenErrorDisplay(ute.UnexpectedToken) + + " expecting " + tokenName; + } else if (e is MissingTokenException) { + MissingTokenException mte = (MissingTokenException)e; + string tokenName = "<unknown>"; + if (mte.Expecting == TokenTypes.EndOfFile) { + tokenName = "EndOfFile"; + } else { + tokenName = tokenNames[mte.Expecting]; + } + msg = "missing " + tokenName + " at " + GetTokenErrorDisplay(e.Token); + } else if (e is MismatchedTokenException) { + MismatchedTokenException mte = (MismatchedTokenException)e; + string tokenName = "<unknown>"; + if (mte.Expecting == TokenTypes.EndOfFile) { + tokenName = "EndOfFile"; + } else { + tokenName = tokenNames[mte.Expecting]; + } + msg = "mismatched input " + GetTokenErrorDisplay(e.Token) + + " expecting " + tokenName; + } else if (e is MismatchedTreeNodeException) { + MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e; + string tokenName = "<unknown>"; + if (mtne.Expecting == TokenTypes.EndOfFile) { + tokenName = "EndOfFile"; + } else { + tokenName = tokenNames[mtne.Expecting]; + } + // workaround for a .NET framework bug (NullReferenceException) + string nodeText = (mtne.Node != null) ? mtne.Node.ToString() ?? string.Empty : string.Empty; + msg = "mismatched tree node: " + nodeText + " expecting " + tokenName; + } else if (e is NoViableAltException) { + //NoViableAltException nvae = (NoViableAltException)e; + // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" + // and "(decision="+nvae.decisionNumber+") and + // "state "+nvae.stateNumber + msg = "no viable alternative at input " + GetTokenErrorDisplay(e.Token); + } else if (e is EarlyExitException) { + //EarlyExitException eee = (EarlyExitException)e; + // for development, can add "(decision="+eee.decisionNumber+")" + msg = "required (...)+ loop did not match anything at input " + + GetTokenErrorDisplay(e.Token); + } else if (e is MismatchedSetException) { + MismatchedSetException mse = (MismatchedSetException)e; + msg = "mismatched input " + GetTokenErrorDisplay(e.Token) + + " expecting set " + mse.Expecting; + } else if (e is MismatchedNotSetException) { + MismatchedNotSetException mse = (MismatchedNotSetException)e; + msg = "mismatched input " + GetTokenErrorDisplay(e.Token) + + " expecting set " + mse.Expecting; + } else if (e is FailedPredicateException) { + FailedPredicateException fpe = (FailedPredicateException)e; + msg = "rule " + fpe.RuleName + " failed predicate: {" + + fpe.PredicateText + "}?"; + } + return msg; + } + + /** <summary> + * Get number of recognition errors (lexer, parser, tree parser). Each + * recognizer tracks its own number. So parser and lexer each have + * separate count. Does not count the spurious errors found between + * an error and next valid token match + * </summary> + * + * <seealso cref="reportError()"/> + */ + public virtual int NumberOfSyntaxErrors { + get { + return state.syntaxErrors; + } + } + + /** <summary>What is the error header, normally line/character position information?</summary> */ + public virtual string GetErrorHeader(RecognitionException e) { + return "line " + e.Line + ":" + (e.CharPositionInLine + 1); + } + + /** <summary> + * How should a token be displayed in an error message? The default + * is to display just the text, but during development you might + * want to have a lot of information spit out. Override in that case + * to use t.ToString() (which, for CommonToken, dumps everything about + * the token). This is better than forcing you to override a method in + * your token objects because you don't have to go modify your lexer + * so that it creates a new Java type. + * </summary> + */ + public virtual string GetTokenErrorDisplay(IToken t) { + string s = t.Text; + if (s == null) { + if (t.Type == TokenTypes.EndOfFile) { + s = "<EOF>"; + } else { + s = "<" + t.Type + ">"; + } + } + s = Regex.Replace(s, "\n", "\\\\n"); + s = Regex.Replace(s, "\r", "\\\\r"); + s = Regex.Replace(s, "\t", "\\\\t"); + return "'" + s + "'"; + } + + /** <summary>Override this method to change where error messages go</summary> */ + public virtual void EmitErrorMessage(string msg) { + if (TraceDestination != null) + TraceDestination.WriteLine(msg); + } + + /** <summary> + * Recover from an error found on the input stream. This is + * for NoViableAlt and mismatched symbol exceptions. If you enable + * single token insertion and deletion, this will usually not + * handle mismatched symbol exceptions but there could be a mismatched + * token that the match() routine could not recover from. + * </summary> + */ + public virtual void Recover(IIntStream input, RecognitionException re) { + if (state.lastErrorIndex == input.Index) { + // uh oh, another error at same token index; must be a case + // where LT(1) is in the recovery token set so nothing is + // consumed; consume a single token so at least to prevent + // an infinite loop; this is a failsafe. + input.Consume(); + } + state.lastErrorIndex = input.Index; + BitSet followSet = ComputeErrorRecoverySet(); + BeginResync(); + ConsumeUntil(input, followSet); + EndResync(); + } + + /** <summary> + * A hook to listen in on the token consumption during error recovery. + * The DebugParser subclasses this to fire events to the listenter. + * </summary> + */ + public virtual void BeginResync() { + } + + public virtual void EndResync() { + } + + /* Compute the error recovery set for the current rule. During + * rule invocation, the parser pushes the set of tokens that can + * follow that rule reference on the stack; this amounts to + * computing FIRST of what follows the rule reference in the + * enclosing rule. This local follow set only includes tokens + * from within the rule; i.e., the FIRST computation done by + * ANTLR stops at the end of a rule. + * + * EXAMPLE + * + * When you find a "no viable alt exception", the input is not + * consistent with any of the alternatives for rule r. The best + * thing to do is to consume tokens until you see something that + * can legally follow a call to r *or* any rule that called r. + * You don't want the exact set of viable next tokens because the + * input might just be missing a token--you might consume the + * rest of the input looking for one of the missing tokens. + * + * Consider grammar: + * + * a : '[' b ']' + * | '(' b ')' + * ; + * b : c '^' INT ; + * c : ID + * | INT + * ; + * + * At each rule invocation, the set of tokens that could follow + * that rule is pushed on a stack. Here are the various "local" + * follow sets: + * + * FOLLOW(b1_in_a) = FIRST(']') = ']' + * FOLLOW(b2_in_a) = FIRST(')') = ')' + * FOLLOW(c_in_b) = FIRST('^') = '^' + * + * Upon erroneous input "[]", the call chain is + * + * a -> b -> c + * + * and, hence, the follow context stack is: + * + * depth local follow set after call to rule + * 0 <EOF> a (from main()) + * 1 ']' b + * 3 '^' c + * + * Notice that ')' is not included, because b would have to have + * been called from a different context in rule a for ')' to be + * included. + * + * For error recovery, we cannot consider FOLLOW(c) + * (context-sensitive or otherwise). We need the combined set of + * all context-sensitive FOLLOW sets--the set of all tokens that + * could follow any reference in the call chain. We need to + * resync to one of those tokens. Note that FOLLOW(c)='^' and if + * we resync'd to that token, we'd consume until EOF. We need to + * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + * In this case, for input "[]", LA(1) is in this set so we would + * not consume anything and after printing an error rule c would + * return normally. It would not find the required '^' though. + * At this point, it gets a mismatched token error and throws an + * exception (since LA(1) is not in the viable following token + * set). The rule exception handler tries to recover, but finds + * the same recovery set and doesn't consume anything. Rule b + * exits normally returning to rule a. Now it finds the ']' (and + * with the successful match exits errorRecovery mode). + * + * So, you cna see that the parser walks up call chain looking + * for the token that was a member of the recovery set. + * + * Errors are not generated in errorRecovery mode. + * + * ANTLR's error recovery mechanism is based upon original ideas: + * + * "Algorithms + Data Structures = Programs" by Niklaus Wirth + * + * and + * + * "A note on error recovery in recursive descent parsers": + * http://portal.acm.org/citation.cfm?id=947902.947905 + * + * Later, Josef Grosch had some good ideas: + * + * "Efficient and Comfortable Error Recovery in Recursive Descent + * Parsers": + * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + * + * Like Grosch I implemented local FOLLOW sets that are combined + * at run-time upon error to avoid overhead during parsing. + */ + protected virtual BitSet ComputeErrorRecoverySet() { + return CombineFollows(false); + } + + /** <summary> + * Compute the context-sensitive FOLLOW set for current rule. + * This is set of token types that can follow a specific rule + * reference given a specific call chain. You get the set of + * viable tokens that can possibly come next (lookahead depth 1) + * given the current call chain. Contrast this with the + * definition of plain FOLLOW for rule r: + * </summary> + * + * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} + * + * where x in T* and alpha, beta in V*; T is set of terminals and + * V is the set of terminals and nonterminals. In other words, + * FOLLOW(r) is the set of all tokens that can possibly follow + * references to r in *any* sentential form (context). At + * runtime, however, we know precisely which context applies as + * we have the call chain. We may compute the exact (rather + * than covering superset) set of following tokens. + * + * For example, consider grammar: + * + * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} + * | "return" expr '.' + * ; + * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} + * atom : INT // FOLLOW(atom)=={'+',')',';','.'} + * | '(' expr ')' + * ; + * + * The FOLLOW sets are all inclusive whereas context-sensitive + * FOLLOW sets are precisely what could follow a rule reference. + * For input input "i=(3);", here is the derivation: + * + * stat => ID '=' expr ';' + * => ID '=' atom ('+' atom)* ';' + * => ID '=' '(' expr ')' ('+' atom)* ';' + * => ID '=' '(' atom ')' ('+' atom)* ';' + * => ID '=' '(' INT ')' ('+' atom)* ';' + * => ID '=' '(' INT ')' ';' + * + * At the "3" token, you'd have a call chain of + * + * stat -> expr -> atom -> expr -> atom + * + * What can follow that specific nested ref to atom? Exactly ')' + * as you can see by looking at the derivation of this specific + * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. + * + * You want the exact viable token set when recovering from a + * token mismatch. Upon token mismatch, if LA(1) is member of + * the viable next token set, then you know there is most likely + * a missing token in the input stream. "Insert" one by just not + * throwing an exception. + */ + protected virtual BitSet ComputeContextSensitiveRuleFOLLOW() { + return CombineFollows(true); + } + + // what is exact? it seems to only add sets from above on stack + // if EOR is in set i. When it sees a set w/o EOR, it stops adding. + // Why would we ever want them all? Maybe no viable alt instead of + // mismatched token? + protected virtual BitSet CombineFollows(bool exact) { + int top = state._fsp; + BitSet followSet = new BitSet(); + for (int i = top; i >= 0; i--) { + BitSet localFollowSet = (BitSet)state.following[i]; + /* + System.out.println("local follow depth "+i+"="+ + localFollowSet.toString(getTokenNames())+")"); + */ + followSet.OrInPlace(localFollowSet); + if (exact) { + // can we see end of rule? + if (localFollowSet.Member(TokenTypes.EndOfRule)) { + // Only leave EOR in set if at top (start rule); this lets + // us know if have to include follow(start rule); i.e., EOF + if (i > 0) { + followSet.Remove(TokenTypes.EndOfRule); + } + } else { // can't see end of rule, quit + break; + } + } + } + return followSet; + } + + /** <summary>Attempt to recover from a single missing or extra token.</summary> + * + * EXTRA TOKEN + * + * LA(1) is not what we are looking for. If LA(2) has the right token, + * however, then assume LA(1) is some extra spurious token. Delete it + * and LA(2) as if we were doing a normal match(), which advances the + * input. + * + * MISSING TOKEN + * + * If current token is consistent with what could come after + * ttype then it is ok to "insert" the missing token, else throw + * exception For example, Input "i=(3;" is clearly missing the + * ')'. When the parser returns from the nested call to expr, it + * will have call chain: + * + * stat -> expr -> atom + * + * and it will be trying to match the ')' at this point in the + * derivation: + * + * => ID '=' '(' INT ')' ('+' atom)* ';' + * ^ + * match() will see that ';' doesn't match ')' and report a + * mismatched token error. To recover, it sees that LA(1)==';' + * is in the set of tokens that can follow the ')' token + * reference in rule atom. It can assume that you forgot the ')'. + */ + protected virtual object RecoverFromMismatchedToken(IIntStream input, int ttype, BitSet follow) { + RecognitionException e = null; + // if next token is what we are looking for then "delete" this token + if (MismatchIsUnwantedToken(input, ttype)) { + e = new UnwantedTokenException(ttype, input, TokenNames); + /* + System.err.println("recoverFromMismatchedToken deleting "+ + ((TokenStream)input).LT(1)+ + " since "+((TokenStream)input).LT(2)+" is what we want"); + */ + BeginResync(); + input.Consume(); // simply delete extra token + EndResync(); + ReportError(e); // report after consuming so AW sees the token in the exception + // we want to return the token we're actually matching + object matchedSymbol = GetCurrentInputSymbol(input); + input.Consume(); // move past ttype token as if all were ok + return matchedSymbol; + } + // can't recover with single token deletion, try insertion + if (MismatchIsMissingToken(input, follow)) { + object inserted = GetMissingSymbol(input, e, ttype, follow); + e = new MissingTokenException(ttype, input, inserted); + ReportError(e); // report after inserting so AW sees the token in the exception + return inserted; + } + // even that didn't work; must throw the exception + e = new MismatchedTokenException(ttype, input, TokenNames); + throw e; + } + + /** Not currently used */ + public virtual object RecoverFromMismatchedSet(IIntStream input, + RecognitionException e, + BitSet follow) { + if (MismatchIsMissingToken(input, follow)) { + // System.out.println("missing token"); + ReportError(e); + // we don't know how to conjure up a token for sets yet + return GetMissingSymbol(input, e, TokenTypes.Invalid, follow); + } + // TODO do single token deletion like above for Token mismatch + throw e; + } + + /** <summary> + * Match needs to return the current input symbol, which gets put + * into the label for the associated token ref; e.g., x=ID. Token + * and tree parsers need to return different objects. Rather than test + * for input stream type or change the IntStream interface, I use + * a simple method to ask the recognizer to tell me what the current + * input symbol is. + * </summary> + * + * <remarks>This is ignored for lexers.</remarks> + */ + protected virtual object GetCurrentInputSymbol(IIntStream input) { + return null; + } + + /** <summary>Conjure up a missing token during error recovery.</summary> + * + * <remarks> + * The recognizer attempts to recover from single missing + * symbols. But, actions might refer to that missing symbol. + * For example, x=ID {f($x);}. The action clearly assumes + * that there has been an identifier matched previously and that + * $x points at that token. If that token is missing, but + * the next token in the stream is what we want we assume that + * this token is missing and we keep going. Because we + * have to return some token to replace the missing token, + * we have to conjure one up. This method gives the user control + * over the tokens returned for missing tokens. Mostly, + * you will want to create something special for identifier + * tokens. For literals such as '{' and ',', the default + * action in the parser or tree parser works. It simply creates + * a CommonToken of the appropriate type. The text will be the token. + * If you change what tokens must be created by the lexer, + * override this method to create the appropriate tokens. + * </remarks> + */ + protected virtual object GetMissingSymbol(IIntStream input, + RecognitionException e, + int expectedTokenType, + BitSet follow) { + return null; + } + + public virtual void ConsumeUntil(IIntStream input, int tokenType) { + //System.out.println("consumeUntil "+tokenType); + int ttype = input.LA(1); + while (ttype != TokenTypes.EndOfFile && ttype != tokenType) { + input.Consume(); + ttype = input.LA(1); + } + } + + /** <summary>Consume tokens until one matches the given token set</summary> */ + public virtual void ConsumeUntil(IIntStream input, BitSet set) { + //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); + int ttype = input.LA(1); + while (ttype != TokenTypes.EndOfFile && !set.Member(ttype)) { + //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); + input.Consume(); + ttype = input.LA(1); + } + } + + /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */ + protected void PushFollow(BitSet fset) { + if ((state._fsp + 1) >= state.following.Length) { + Array.Resize(ref state.following, state.following.Length * 2); + } + state.following[++state._fsp] = fset; + } + + protected void PopFollow() { + state._fsp--; + } + + /** <summary> + * Return List<String> of the rules in your parser instance + * leading up to a call to this method. You could override if + * you want more details such as the file/line info of where + * in the parser java code a rule is invoked. + * </summary> + * + * <remarks> + * This is very useful for error messages and for context-sensitive + * error recovery. + * </remarks> + */ + public virtual IList<string> GetRuleInvocationStack() { + return GetRuleInvocationStack(new StackTrace(true)); + } + + /** <summary> + * A more general version of GetRuleInvocationStack where you can + * pass in the StackTrace of, for example, a RecognitionException + * to get it's rule stack trace. + * </summary> + */ + public static IList<string> GetRuleInvocationStack(StackTrace trace) { + if (trace == null) + throw new ArgumentNullException("trace"); + + List<string> rules = new List<string>(); + StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0]; + + for (int i = stack.Length - 1; i >= 0; i--) { + StackFrame frame = stack[i]; + MethodBase method = frame.GetMethod(); + GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true); + if (attributes != null && attributes.Length > 0) + rules.Add(attributes[0].Name); + } + + return rules; + } + + public virtual int BacktrackingLevel { + get { + return state.backtracking; + } + set { + state.backtracking = value; + } + } + + /** <summary>Return whether or not a backtracking attempt failed.</summary> */ + public virtual bool Failed { + get { + return state.failed; + } + } + + /** <summary> + * Used to print out token names like ID during debugging and + * error reporting. The generated parsers implement a method + * that overrides this to point to their String[] tokenNames. + * </summary> + */ + public virtual string[] TokenNames { + get { + return null; + } + } + + /** <summary> + * For debugging and other purposes, might want the grammar name. + * Have ANTLR generate an implementation for this method. + * </summary> + */ + public virtual string GrammarFileName { + get { + return null; + } + } + + public abstract string SourceName { + get; + } + + /** <summary> + * A convenience method for use most often with template rewrites. + * Convert a List<Token> to List<String> + * </summary> + */ + public virtual List<string> ToStrings(ICollection<IToken> tokens) { + if (tokens == null) + return null; + + List<string> strings = new List<string>(tokens.Count); + foreach (IToken token in tokens) { + strings.Add(token.Text); + } + + return strings; + } + + /** <summary> + * Given a rule number and a start token index number, return + * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from + * start index. If this rule has parsed input starting from the + * start index before, then return where the rule stopped parsing. + * It returns the index of the last token matched by the rule. + * </summary> + * + * <remarks> + * For now we use a hashtable and just the slow Object-based one. + * Later, we can make a special one for ints and also one that + * tosses out data after we commit past input position i. + * </remarks> + */ + public virtual int GetRuleMemoization(int ruleIndex, int ruleStartIndex) { + if (state.ruleMemo[ruleIndex] == null) { + state.ruleMemo[ruleIndex] = new Dictionary<int, int>(); + } + + int stopIndex; + if (!state.ruleMemo[ruleIndex].TryGetValue(ruleStartIndex, out stopIndex)) + return MemoRuleUnknown; + + return stopIndex; + } + + /** <summary> + * Has this rule already parsed input at the current index in the + * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. + * If we attempted but failed to parse properly before, return + * MEMO_RULE_FAILED. + * </summary> + * + * <remarks> + * This method has a side-effect: if we have seen this input for + * this rule and successfully parsed before, then seek ahead to + * 1 past the stop token matched for this rule last time. + * </remarks> + */ + public virtual bool AlreadyParsedRule(IIntStream input, int ruleIndex) { + int stopIndex = GetRuleMemoization(ruleIndex, input.Index); + if (stopIndex == MemoRuleUnknown) { + return false; + } + if (stopIndex == MemoRuleFailed) { + //System.out.println("rule "+ruleIndex+" will never succeed"); + state.failed = true; + } else { + //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed); + input.Seek(stopIndex + 1); // jump to one past stop token + } + return true; + } + + /** <summary> + * Record whether or not this rule parsed the input at this position + * successfully. Use a standard java hashtable for now. + * </summary> + */ + public virtual void Memoize(IIntStream input, + int ruleIndex, + int ruleStartIndex) { + int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1; + if (state.ruleMemo == null) { + if (TraceDestination != null) + TraceDestination.WriteLine("!!!!!!!!! memo array is null for " + GrammarFileName); + } + if (ruleIndex >= state.ruleMemo.Length) { + if (TraceDestination != null) + TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex); + } + if (state.ruleMemo[ruleIndex] != null) { + state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex; + } + } + + /** <summary>return how many rule/input-index pairs there are in total.</summary> + * TODO: this includes synpreds. :( + */ + public virtual int GetRuleMemoizationCacheSize() { + int n = 0; + for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { + var ruleMap = state.ruleMemo[i]; + if (ruleMap != null) { + n += ruleMap.Count; // how many input indexes are recorded? + } + } + return n; + } + + [Conditional("ANTLR_TRACE")] + public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol) { + if (TraceDestination == null) + return; + + TraceDestination.Write("enter " + ruleName + " " + inputSymbol); + if (state.backtracking > 0) { + TraceDestination.Write(" backtracking=" + state.backtracking); + } + TraceDestination.WriteLine(); + } + + [Conditional("ANTLR_TRACE")] + public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol) { + if (TraceDestination == null) + return; + + TraceDestination.Write("exit " + ruleName + " " + inputSymbol); + if (state.backtracking > 0) { + TraceDestination.Write(" backtracking=" + state.backtracking); + if (state.failed) + TraceDestination.Write(" failed"); + else + TraceDestination.Write(" succeeded"); + } + TraceDestination.WriteLine(); + } + + #region Debugging support + public virtual IDebugEventListener DebugListener { + get { + return null; + } + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugEnterRule(string grammarFileName, string ruleName) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.EnterRule(grammarFileName, ruleName); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugExitRule(string grammarFileName, string ruleName) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.ExitRule(grammarFileName, ruleName); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugEnterSubRule(int decisionNumber) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.EnterSubRule(decisionNumber); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugExitSubRule(int decisionNumber) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.ExitSubRule(decisionNumber); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugEnterAlt(int alt) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.EnterAlt(alt); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.EnterDecision(decisionNumber, couldBacktrack); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugExitDecision(int decisionNumber) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.ExitDecision(decisionNumber); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugLocation(int line, int charPositionInLine) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.Location(line, charPositionInLine); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugSemanticPredicate(bool result, string predicate) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.SemanticPredicate(result, predicate); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugBeginBacktrack(int level) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.BeginBacktrack(level); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugEndBacktrack(int level, bool successful) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.EndBacktrack(level, successful); + } + + [Conditional("ANTLR_DEBUG")] + protected virtual void DebugRecognitionException(RecognitionException ex) { + IDebugEventListener dbg = DebugListener; + if (dbg != null) + dbg.RecognitionException(ex); + } + #endregion + } +} |