1 files changed, 1485 insertions, 0 deletions
diff --git a/antlr-3.4/runtime/Python/antlr3/recognizers.py b/antlr-3.4/runtime/Python/antlr3/recognizers.py
new file mode 100644
index 0000000..d48280a
--- /dev/null
+++ b/antlr-3.4/runtime/Python/antlr3/recognizers.py
@@ -0,0 +1,1485 @@
+"""ANTLR3 runtime package"""
+
+# begin[licence]
+#
+# [The "BSD licence"]
+# Copyright (c) 2005-2008 Terence Parr
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# end[licence]
+
+import sys
+import inspect
+
+from antlr3 import compatible_api_versions
+from antlr3.constants import DEFAULT_CHANNEL, HIDDEN_CHANNEL, EOF, \
+     EOR_TOKEN_TYPE, INVALID_TOKEN_TYPE
+from antlr3.exceptions import RecognitionException, MismatchedTokenException, \
+     MismatchedRangeException, MismatchedTreeNodeException, \
+     NoViableAltException, EarlyExitException, MismatchedSetException, \
+     MismatchedNotSetException, FailedPredicateException, \
+     BacktrackingFailed, UnwantedTokenException, MissingTokenException
+from antlr3.tokens import CommonToken, SKIP_TOKEN
+from antlr3.compat import set, frozenset, reversed
+
+
+class RecognizerSharedState(object):
+    """
+    The set of fields needed by an abstract recognizer to recognize input
+    and recover from errors etc...  As a separate state object, it can be
+    shared among multiple grammars; e.g., when one grammar imports another.
+
+    These fields are publically visible but the actual state pointer per
+    parser is protected.
+    """
+
+    def __init__(self):
+        # Track the set of token types that can follow any rule invocation.
+        # Stack grows upwards.
+        self.following = []
+
+        # This is true when we see an error and before having successfully
+        # matched a token.  Prevents generation of more than one error message
+        # per error.
+        self.errorRecovery = False
+
+        # The index into the input stream where the last error occurred.
+        # This is used to prevent infinite loops where an error is found
+        # but no token is consumed during recovery...another error is found,
+        # ad naseum.  This is a failsafe mechanism to guarantee that at least
+        # one token/tree node is consumed for two errors.
+        self.lastErrorIndex = -1
+
+        # If 0, no backtracking is going on.  Safe to exec actions etc...
+        # If >0 then it's the level of backtracking.
+        self.backtracking = 0
+
+        # An array[size num rules] of Map<Integer,Integer> that tracks
+        # the stop token index for each rule.  ruleMemo[ruleIndex] is
+        # the memoization table for ruleIndex.  For key ruleStartIndex, you
+        # get back the stop token for associated rule or MEMO_RULE_FAILED.
+        #
+        # This is only used if rule memoization is on (which it is by default).
+        self.ruleMemo = None
+
+        ## Did the recognizer encounter a syntax error?  Track how many.
+        self.syntaxErrors = 0
+
+
+        # LEXER FIELDS (must be in same state object to avoid casting
+        # constantly in generated code and Lexer object) :(
+
+
+	## The goal of all lexer rules/methods is to create a token object.
+        # This is an instance variable as multiple rules may collaborate to
+        # create a single token.  nextToken will return this object after
+        # matching lexer rule(s).  If you subclass to allow multiple token
+        # emissions, then set this to the last token to be matched or
+        # something nonnull so that the auto token emit mechanism will not
+        # emit another token.
+        self.token = None
+
+        ## What character index in the stream did the current token start at?
+        # Needed, for example, to get the text for current token.  Set at
+        # the start of nextToken.
+        self.tokenStartCharIndex = -1
+
+        ## The line on which the first character of the token resides
+        self.tokenStartLine = None
+
+        ## The character position of first character within the line
+        self.tokenStartCharPositionInLine = None
+
+        ## The channel number for the current token
+        self.channel = None
+
+        ## The token type for the current token
+        self.type = None
+
+        ## You can set the text for the current token to override what is in
+        # the input char buffer.  Use setText() or can set this instance var.
+        self.text = None
+
+
+class BaseRecognizer(object):
+    """
+    @brief Common recognizer functionality.
+
+    A generic recognizer that can handle recognizers generated from
+    lexer, parser, and tree grammars.  This is all the parsing
+    support code essentially; most of it is error recovery stuff and
+    backtracking.
+    """
+
+    MEMO_RULE_FAILED = -2
+    MEMO_RULE_UNKNOWN = -1
+
+    # copies from Token object for convenience in actions
+    DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL
+
+    # for convenience in actions
+    HIDDEN = HIDDEN_CHANNEL
+
+    # overridden by generated subclasses
+    tokenNames = None
+
+    # The api_version attribute has been introduced in 3.3. If it is not
+    # overwritten in the generated recognizer, we assume a default of v0.
+    api_version = 0
+
+    def __init__(self, state=None):
+        # Input stream of the recognizer. Must be initialized by a subclass.
+        self.input = None
+
+        ## State of a lexer, parser, or tree parser are collected into a state
+        # object so the state can be shared.  This sharing is needed to
+        # have one grammar import others and share same error variables
+        # and other state variables.  It's a kind of explicit multiple
+        # inheritance via delegation of methods and shared state.
+        if state is None:
+            state = RecognizerSharedState()
+        self._state = state
+
+        if self.api_version not in compatible_api_versions:
+            raise RuntimeError(
+                ("ANTLR version mismatch: "
+                 "The recognizer has been generated with API V%s, "
+                 "but this runtime does not support this.")
+                % self.api_version)
+
+    # this one only exists to shut up pylint :(
+    def setInput(self, input):
+        self.input = input
+
+
+    def reset(self):
+        """
+        reset the parser's state; subclasses must rewinds the input stream
+        """
+
+        # wack everything related to error recovery
+        if self._state is None:
+            # no shared state work to do
+            return
+
+        self._state.following = []
+        self._state.errorRecovery = False
+        self._state.lastErrorIndex = -1
+        self._state.syntaxErrors = 0
+        # wack everything related to backtracking and memoization
+        self._state.backtracking = 0
+        if self._state.ruleMemo is not None:
+            self._state.ruleMemo = {}
+
+
+    def match(self, input, ttype, follow):
+        """
+        Match current input symbol against ttype.  Attempt
+        single token insertion or deletion error recovery.  If
+        that fails, throw MismatchedTokenException.
+
+        To turn off single token insertion or deletion error
+        recovery, override recoverFromMismatchedToken() and have it
+        throw an exception. See TreeParser.recoverFromMismatchedToken().
+        This way any error in a rule will cause an exception and
+        immediate exit from rule.  Rule would recover by resynchronizing
+        to the set of symbols that can follow rule ref.
+        """
+
+        matchedSymbol = self.getCurrentInputSymbol(input)
+        if self.input.LA(1) == ttype:
+            self.input.consume()
+            self._state.errorRecovery = False
+            return matchedSymbol
+
+        if self._state.backtracking > 0:
+            # FIXME: need to return matchedSymbol here as well. damn!!
+            raise BacktrackingFailed
+
+        matchedSymbol = self.recoverFromMismatchedToken(input, ttype, follow)
+        return matchedSymbol
+
+
+    def matchAny(self, input):
+        """Match the wildcard: in a symbol"""
+
+        self._state.errorRecovery = False
+        self.input.consume()
+
+
+    def mismatchIsUnwantedToken(self, input, ttype):
+        return input.LA(2) == ttype
+
+
+    def mismatchIsMissingToken(self, input, follow):
+        if follow is None:
+            # we have no information about the follow; we can only consume
+            # a single token and hope for the best
+            return False
+
+        # compute what can follow this grammar element reference
+        if EOR_TOKEN_TYPE in follow:
+            viableTokensFollowingThisRule = self.computeContextSensitiveRuleFOLLOW()
+            follow = follow | viableTokensFollowingThisRule
+
+            if len(self._state.following) > 0:
+                # remove EOR if we're not the start symbol
+                follow = follow - set([EOR_TOKEN_TYPE])
+
+        # if current token is consistent with what could come after set
+        # then we know we're missing a token; error recovery is free to
+        # "insert" the missing token
+        if input.LA(1) in follow or EOR_TOKEN_TYPE in follow:
+            return True
+
+        return False
+
+
+    def reportError(self, e):
+        """Report a recognition problem.
+
+        This method sets errorRecovery to indicate the parser is recovering
+        not parsing.  Once in recovery mode, no errors are generated.
+        To get out of recovery mode, the parser must successfully match
+        a token (after a resync).  So it will go:
+
+        1. error occurs
+        2. enter recovery mode, report error
+        3. consume until token found in resynch set
+        4. try to resume parsing
+        5. next match() will reset errorRecovery mode
+
+        If you override, make sure to update syntaxErrors if you care about
+        that.
+
+        """
+
+        # if we've already reported an error and have not matched a token
+        # yet successfully, don't report any errors.
+        if self._state.errorRecovery:
+            return
+
+        self._state.syntaxErrors += 1 # don't count spurious
+        self._state.errorRecovery = True
+
+        self.displayRecognitionError(self.tokenNames, e)
+
+
+    def displayRecognitionError(self, tokenNames, e):
+        hdr = self.getErrorHeader(e)
+        msg = self.getErrorMessage(e, tokenNames)
+        self.emitErrorMessage(hdr+" "+msg)
+
+
+    def getErrorMessage(self, e, tokenNames):
+        """
+        What error message should be generated for the various
+        exception types?
+
+        Not very object-oriented code, but I like having all error message
+        generation within one method rather than spread among all of the
+        exception classes. This also makes it much easier for the exception
+        handling because the exception classes do not have to have pointers back
+        to this object to access utility routines and so on. Also, changing
+        the message for an exception type would be difficult because you
+        would have to subclassing exception, but then somehow get ANTLR
+        to make those kinds of exception objects instead of the default.
+        This looks weird, but trust me--it makes the most sense in terms
+        of flexibility.
+
+        For grammar debugging, you will want to override this to add
+        more information such as the stack frame with
+        getRuleInvocationStack(e, this.getClass().getName()) and,
+        for no viable alts, the decision description and state etc...
+
+        Override this to change the message generated for one or more
+        exception types.
+        """
+
+        if isinstance(e, UnwantedTokenException):
+            tokenName = "<unknown>"
+            if e.expecting == EOF:
+                tokenName = "EOF"
+
+            else:
+                tokenName = self.tokenNames[e.expecting]
+
+            msg = "extraneous input %s expecting %s" % (
+                self.getTokenErrorDisplay(e.getUnexpectedToken()),
+                tokenName
+                )
+
+        elif isinstance(e, MissingTokenException):
+            tokenName = "<unknown>"
+            if e.expecting == EOF:
+                tokenName = "EOF"
+
+            else:
+                tokenName = self.tokenNames[e.expecting]
+
+            msg = "missing %s at %s" % (
+                tokenName, self.getTokenErrorDisplay(e.token)
+                )
+
+        elif isinstance(e, MismatchedTokenException):
+            tokenName = "<unknown>"
+            if e.expecting == EOF:
+                tokenName = "EOF"
+            else:
+                tokenName = self.tokenNames[e.expecting]
+
+            msg = "mismatched input " \
+                  + self.getTokenErrorDisplay(e.token) \
+                  + " expecting " \
+                  + tokenName
+
+        elif isinstance(e, MismatchedTreeNodeException):
+            tokenName = "<unknown>"
+            if e.expecting == EOF:
+                tokenName = "EOF"
+            else:
+                tokenName = self.tokenNames[e.expecting]
+
+            msg = "mismatched tree node: %s expecting %s" \
+                  % (e.node, tokenName)
+
+        elif isinstance(e, NoViableAltException):
+            msg = "no viable alternative at input " \
+                  + self.getTokenErrorDisplay(e.token)
+
+        elif isinstance(e, EarlyExitException):
+            msg = "required (...)+ loop did not match anything at input " \
+                  + self.getTokenErrorDisplay(e.token)
+
+        elif isinstance(e, MismatchedSetException):
+            msg = "mismatched input " \
+                  + self.getTokenErrorDisplay(e.token) \
+                  + " expecting set " \
+                  + repr(e.expecting)
+
+        elif isinstance(e, MismatchedNotSetException):
+            msg = "mismatched input " \
+                  + self.getTokenErrorDisplay(e.token) \
+                  + " expecting set " \
+                  + repr(e.expecting)
+
+        elif isinstance(e, FailedPredicateException):
+            msg = "rule " \
+                  + e.ruleName \
+                  + " failed predicate: {" \
+                  + e.predicateText \
+                  + "}?"
+
+        else:
+            msg = str(e)
+
+        return msg
+
+
+    def getNumberOfSyntaxErrors(self):
+        """
+        Get number of recognition errors (lexer, parser, tree parser).  Each
+        recognizer tracks its own number.  So parser and lexer each have
+        separate count.  Does not count the spurious errors found between
+        an error and next valid token match
+
+        See also reportError()
+	"""
+        return self._state.syntaxErrors
+
+
+    def getErrorHeader(self, e):
+        """
+        What is the error header, normally line/character position information?
+        """
+
+        source_name = self.getSourceName()
+        if source_name is not None:
+            return "%s line %d:%d" % (source_name, e.line, e.charPositionInLine)
+        return "line %d:%d" % (e.line, e.charPositionInLine)
+
+
+    def getTokenErrorDisplay(self, t):
+        """
+        How should a token be displayed in an error message? The default
+        is to display just the text, but during development you might
+        want to have a lot of information spit out.  Override in that case
+        to use t.toString() (which, for CommonToken, dumps everything about
+        the token). This is better than forcing you to override a method in
+        your token objects because you don't have to go modify your lexer
+        so that it creates a new Java type.
+        """
+
+        s = t.text
+        if s is None:
+            if t.type == EOF:
+                s = "<EOF>"
+            else:
+                s = "<"+t.type+">"
+
+        return repr(s)
+
+
+    def emitErrorMessage(self, msg):
+        """Override this method to change where error messages go"""
+        sys.stderr.write(msg + '\n')
+
+
+    def recover(self, input, re):
+        """
+        Recover from an error found on the input stream.  This is
+        for NoViableAlt and mismatched symbol exceptions.  If you enable
+        single token insertion and deletion, this will usually not
+        handle mismatched symbol exceptions but there could be a mismatched
+        token that the match() routine could not recover from.
+        """
+
+        # PROBLEM? what if input stream is not the same as last time
+        # perhaps make lastErrorIndex a member of input
+        if self._state.lastErrorIndex == input.index():
+            # uh oh, another error at same token index; must be a case
+            # where LT(1) is in the recovery token set so nothing is
+            # consumed; consume a single token so at least to prevent
+            # an infinite loop; this is a failsafe.
+            input.consume()
+
+        self._state.lastErrorIndex = input.index()
+        followSet = self.computeErrorRecoverySet()
+
+        self.beginResync()
+        self.consumeUntil(input, followSet)
+        self.endResync()
+
+
+    def beginResync(self):
+        """
+        A hook to listen in on the token consumption during error recovery.
+        The DebugParser subclasses this to fire events to the listenter.
+        """
+
+        pass
+
+
+    def endResync(self):
+        """
+        A hook to listen in on the token consumption during error recovery.
+        The DebugParser subclasses this to fire events to the listenter.
+        """
+
+        pass
+
+
+    def computeErrorRecoverySet(self):
+        """
+        Compute the error recovery set for the current rule.  During
+        rule invocation, the parser pushes the set of tokens that can
+        follow that rule reference on the stack; this amounts to
+        computing FIRST of what follows the rule reference in the
+        enclosing rule. This local follow set only includes tokens
+        from within the rule; i.e., the FIRST computation done by
+        ANTLR stops at the end of a rule.
+
+        EXAMPLE
+
+        When you find a "no viable alt exception", the input is not
+        consistent with any of the alternatives for rule r.  The best
+        thing to do is to consume tokens until you see something that
+        can legally follow a call to r *or* any rule that called r.
+        You don't want the exact set of viable next tokens because the
+        input might just be missing a token--you might consume the
+        rest of the input looking for one of the missing tokens.
+
+        Consider grammar:
+
+        a : '[' b ']'
+          | '(' b ')'
+          ;
+        b : c '^' INT ;
+        c : ID
+          | INT
+          ;
+
+        At each rule invocation, the set of tokens that could follow
+        that rule is pushed on a stack.  Here are the various "local"
+        follow sets:
+
+        FOLLOW(b1_in_a) = FIRST(']') = ']'
+        FOLLOW(b2_in_a) = FIRST(')') = ')'
+        FOLLOW(c_in_b) = FIRST('^') = '^'
+
+        Upon erroneous input "[]", the call chain is
+
+        a -> b -> c
+
+        and, hence, the follow context stack is:
+
+        depth  local follow set     after call to rule
+          0         \<EOF>                    a (from main())
+          1          ']'                     b
+          3          '^'                     c
+
+        Notice that ')' is not included, because b would have to have
+        been called from a different context in rule a for ')' to be
+        included.
+
+        For error recovery, we cannot consider FOLLOW(c)
+        (context-sensitive or otherwise).  We need the combined set of
+        all context-sensitive FOLLOW sets--the set of all tokens that
+        could follow any reference in the call chain.  We need to
+        resync to one of those tokens.  Note that FOLLOW(c)='^' and if
+        we resync'd to that token, we'd consume until EOF.  We need to
+        sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
+        In this case, for input "[]", LA(1) is in this set so we would
+        not consume anything and after printing an error rule c would
+        return normally.  It would not find the required '^' though.
+        At this point, it gets a mismatched token error and throws an
+        exception (since LA(1) is not in the viable following token
+        set).  The rule exception handler tries to recover, but finds
+        the same recovery set and doesn't consume anything.  Rule b
+        exits normally returning to rule a.  Now it finds the ']' (and
+        with the successful match exits errorRecovery mode).
+
+        So, you cna see that the parser walks up call chain looking
+        for the token that was a member of the recovery set.
+
+        Errors are not generated in errorRecovery mode.
+
+        ANTLR's error recovery mechanism is based upon original ideas:
+
+        "Algorithms + Data Structures = Programs" by Niklaus Wirth
+
+        and
+
+        "A note on error recovery in recursive descent parsers":
+        http://portal.acm.org/citation.cfm?id=947902.947905
+
+        Later, Josef Grosch had some good ideas:
+
+        "Efficient and Comfortable Error Recovery in Recursive Descent
+        Parsers":
+        ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
+
+        Like Grosch I implemented local FOLLOW sets that are combined
+        at run-time upon error to avoid overhead during parsing.
+        """
+
+        return self.combineFollows(False)
+
+
+    def computeContextSensitiveRuleFOLLOW(self):
+        """
+        Compute the context-sensitive FOLLOW set for current rule.
+        This is set of token types that can follow a specific rule
+        reference given a specific call chain.  You get the set of
+        viable tokens that can possibly come next (lookahead depth 1)
+        given the current call chain.  Contrast this with the
+        definition of plain FOLLOW for rule r:
+
+         FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
+
+        where x in T* and alpha, beta in V*; T is set of terminals and
+        V is the set of terminals and nonterminals.  In other words,
+        FOLLOW(r) is the set of all tokens that can possibly follow
+        references to r in *any* sentential form (context).  At
+        runtime, however, we know precisely which context applies as
+        we have the call chain.  We may compute the exact (rather
+        than covering superset) set of following tokens.
+
+        For example, consider grammar:
+
+        stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
+             | "return" expr '.'
+             ;
+        expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
+        atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
+             | '(' expr ')'
+             ;
+
+        The FOLLOW sets are all inclusive whereas context-sensitive
+        FOLLOW sets are precisely what could follow a rule reference.
+        For input input "i=(3);", here is the derivation:
+
+        stat => ID '=' expr ';'
+             => ID '=' atom ('+' atom)* ';'
+             => ID '=' '(' expr ')' ('+' atom)* ';'
+             => ID '=' '(' atom ')' ('+' atom)* ';'
+             => ID '=' '(' INT ')' ('+' atom)* ';'
+             => ID '=' '(' INT ')' ';'
+
+        At the "3" token, you'd have a call chain of
+
+          stat -> expr -> atom -> expr -> atom
+
+        What can follow that specific nested ref to atom?  Exactly ')'
+        as you can see by looking at the derivation of this specific
+        input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
+
+        You want the exact viable token set when recovering from a
+        token mismatch.  Upon token mismatch, if LA(1) is member of
+        the viable next token set, then you know there is most likely
+        a missing token in the input stream.  "Insert" one by just not
+        throwing an exception.
+        """
+
+        return self.combineFollows(True)
+
+
+    def combineFollows(self, exact):
+        followSet = set()
+        for idx, localFollowSet in reversed(list(enumerate(self._state.following))):
+            followSet |= localFollowSet
+            if exact:
+                # can we see end of rule?
+                if EOR_TOKEN_TYPE in localFollowSet:
+                    # Only leave EOR in set if at top (start rule); this lets
+                    # us know if have to include follow(start rule); i.e., EOF
+                    if idx > 0:
+                        followSet.remove(EOR_TOKEN_TYPE)
+
+                else:
+                    # can't see end of rule, quit
+                    break
+
+        return followSet
+
+
+    def recoverFromMismatchedToken(self, input, ttype, follow):
+        """Attempt to recover from a single missing or extra token.
+
+        EXTRA TOKEN
+
+        LA(1) is not what we are looking for.  If LA(2) has the right token,
+        however, then assume LA(1) is some extra spurious token.  Delete it
+        and LA(2) as if we were doing a normal match(), which advances the
+        input.
+
+        MISSING TOKEN
+
+        If current token is consistent with what could come after
+        ttype then it is ok to 'insert' the missing token, else throw
+        exception For example, Input 'i=(3;' is clearly missing the
+        ')'.  When the parser returns from the nested call to expr, it
+        will have call chain:
+
+          stat -> expr -> atom
+
+        and it will be trying to match the ')' at this point in the
+        derivation:
+
+             => ID '=' '(' INT ')' ('+' atom)* ';'
+                                ^
+        match() will see that ';' doesn't match ')' and report a
+        mismatched token error.  To recover, it sees that LA(1)==';'
+        is in the set of tokens that can follow the ')' token
+        reference in rule atom.  It can assume that you forgot the ')'.
+        """
+
+        e = None
+
+        # if next token is what we are looking for then "delete" this token
+        if self.mismatchIsUnwantedToken(input, ttype):
+            e = UnwantedTokenException(ttype, input)
+
+            self.beginResync()
+            input.consume() # simply delete extra token
+            self.endResync()
+
+            # report after consuming so AW sees the token in the exception
+            self.reportError(e)
+
+            # we want to return the token we're actually matching
+            matchedSymbol = self.getCurrentInputSymbol(input)
+
+            # move past ttype token as if all were ok
+            input.consume()
+            return matchedSymbol
+
+        # can't recover with single token deletion, try insertion
+        if self.mismatchIsMissingToken(input, follow):
+            inserted = self.getMissingSymbol(input, e, ttype, follow)
+            e = MissingTokenException(ttype, input, inserted)
+
+            # report after inserting so AW sees the token in the exception
+            self.reportError(e)
+            return inserted
+
+        # even that didn't work; must throw the exception
+        e = MismatchedTokenException(ttype, input)
+        raise e
+
+
+    def recoverFromMismatchedSet(self, input, e, follow):
+        """Not currently used"""
+
+        if self.mismatchIsMissingToken(input, follow):
+            self.reportError(e)
+            # we don't know how to conjure up a token for sets yet
+            return self.getMissingSymbol(input, e, INVALID_TOKEN_TYPE, follow)
+
+        # TODO do single token deletion like above for Token mismatch
+        raise e
+
+
+    def getCurrentInputSymbol(self, input):
+        """
+        Match needs to return the current input symbol, which gets put
+        into the label for the associated token ref; e.g., x=ID.  Token
+        and tree parsers need to return different objects. Rather than test
+        for input stream type or change the IntStream interface, I use
+        a simple method to ask the recognizer to tell me what the current
+        input symbol is.
+
+        This is ignored for lexers.
+        """
+
+        return None
+
+
+    def getMissingSymbol(self, input, e, expectedTokenType, follow):
+        """Conjure up a missing token during error recovery.
+
+        The recognizer attempts to recover from single missing
+        symbols. But, actions might refer to that missing symbol.
+        For example, x=ID {f($x);}. The action clearly assumes
+        that there has been an identifier matched previously and that
+        $x points at that token. If that token is missing, but
+        the next token in the stream is what we want we assume that
+        this token is missing and we keep going. Because we
+        have to return some token to replace the missing token,
+        we have to conjure one up. This method gives the user control
+        over the tokens returned for missing tokens. Mostly,
+        you will want to create something special for identifier
+        tokens. For literals such as '{' and ',', the default
+        action in the parser or tree parser works. It simply creates
+        a CommonToken of the appropriate type. The text will be the token.
+        If you change what tokens must be created by the lexer,
+        override this method to create the appropriate tokens.
+        """
+
+        return None
+
+
+##     def recoverFromMissingElement(self, input, e, follow):
+##         """
+##         This code is factored out from mismatched token and mismatched set
+##         recovery.  It handles "single token insertion" error recovery for
+##         both.  No tokens are consumed to recover from insertions.  Return
+##         true if recovery was possible else return false.
+##         """
+
+##         if self.mismatchIsMissingToken(input, follow):
+##             self.reportError(e)
+##             return True
+
+##         # nothing to do; throw exception
+##         return False
+
+
+    def consumeUntil(self, input, tokenTypes):
+        """
+        Consume tokens until one matches the given token or token set
+
+        tokenTypes can be a single token type or a set of token types
+
+        """
+
+        if not isinstance(tokenTypes, (set, frozenset)):
+            tokenTypes = frozenset([tokenTypes])
+
+        ttype = input.LA(1)
+        while ttype != EOF and ttype not in tokenTypes:
+            input.consume()
+            ttype = input.LA(1)
+
+
+    def getRuleInvocationStack(self):
+        """
+        Return List<String> of the rules in your parser instance
+        leading up to a call to this method.  You could override if
+        you want more details such as the file/line info of where
+        in the parser java code a rule is invoked.
+
+        This is very useful for error messages and for context-sensitive
+        error recovery.
+
+        You must be careful, if you subclass a generated recognizers.
+        The default implementation will only search the module of self
+        for rules, but the subclass will not contain any rules.
+        You probably want to override this method to look like
+
+        def getRuleInvocationStack(self):
+            return self._getRuleInvocationStack(<class>.__module__)
+
+        where <class> is the class of the generated recognizer, e.g.
+        the superclass of self.
+        """
+
+        return self._getRuleInvocationStack(self.__module__)
+
+
+    def _getRuleInvocationStack(cls, module):
+        """
+        A more general version of getRuleInvocationStack where you can
+        pass in, for example, a RecognitionException to get it's rule
+        stack trace.  This routine is shared with all recognizers, hence,
+        static.
+
+        TODO: move to a utility class or something; weird having lexer call
+        this
+        """
+
+        # mmmhhh,... perhaps look at the first argument
+        # (f_locals[co_varnames[0]]?) and test if it's a (sub)class of
+        # requested recognizer...
+
+        rules = []
+        for frame in reversed(inspect.stack()):
+            code = frame[0].f_code
+            codeMod = inspect.getmodule(code)
+            if codeMod is None:
+                continue
+
+            # skip frames not in requested module
+            if codeMod.__name__ != module:
+                continue
+
+            # skip some unwanted names
+            if code.co_name in ('nextToken', '<module>'):
+                continue
+
+            rules.append(code.co_name)
+
+        return rules
+
+    _getRuleInvocationStack = classmethod(_getRuleInvocationStack)
+
+
+    def getBacktrackingLevel(self):
+        return self._state.backtracking
+
+    def setBacktrackingLevel(self, n):
+        self._state.backtracking = n
+
+
+    def getGrammarFileName(self):
+        """For debugging and other purposes, might want the grammar name.
+
+        Have ANTLR generate an implementation for this method.
+        """
+
+        return self.grammarFileName
+
+
+    def getSourceName(self):
+        raise NotImplementedError
+
+
+    def toStrings(self, tokens):
+        """A convenience method for use most often with template rewrites.
+
+        Convert a List<Token> to List<String>
+        """
+
+        if tokens is None:
+            return None
+
+        return [token.text for token in tokens]
+
+
+    def getRuleMemoization(self, ruleIndex, ruleStartIndex):
+        """
+        Given a rule number and a start token index number, return
+        MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
+        start index.  If this rule has parsed input starting from the
+        start index before, then return where the rule stopped parsing.
+        It returns the index of the last token matched by the rule.
+        """
+
+        if ruleIndex not in self._state.ruleMemo:
+            self._state.ruleMemo[ruleIndex] = {}
+
+        return self._state.ruleMemo[ruleIndex].get(
+            ruleStartIndex, self.MEMO_RULE_UNKNOWN
+            )
+
+
+    def alreadyParsedRule(self, input, ruleIndex):
+        """
+        Has this rule already parsed input at the current index in the
+        input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
+        If we attempted but failed to parse properly before, return
+        MEMO_RULE_FAILED.
+
+        This method has a side-effect: if we have seen this input for
+        this rule and successfully parsed before, then seek ahead to
+        1 past the stop token matched for this rule last time.
+        """
+
+        stopIndex = self.getRuleMemoization(ruleIndex, input.index())
+        if stopIndex == self.MEMO_RULE_UNKNOWN:
+            return False
+
+        if stopIndex == self.MEMO_RULE_FAILED:
+            raise BacktrackingFailed
+
+        else:
+            input.seek(stopIndex + 1)
+
+        return True
+
+
+    def memoize(self, input, ruleIndex, ruleStartIndex, success):
+        """
+        Record whether or not this rule parsed the input at this position
+        successfully.
+        """
+
+        if success:
+            stopTokenIndex = input.index() - 1
+        else:
+            stopTokenIndex = self.MEMO_RULE_FAILED
+
+        if ruleIndex in self._state.ruleMemo:
+            self._state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex
+
+
+    def traceIn(self, ruleName, ruleIndex, inputSymbol):
+        sys.stdout.write("enter %s %s" % (ruleName, inputSymbol))
+
+        if self._state.backtracking > 0:
+            sys.stdout.write(" backtracking=%s" % self._state.backtracking)
+
+        sys.stdout.write('\n')
+
+
+    def traceOut(self, ruleName, ruleIndex, inputSymbol):
+        sys.stdout.write("exit %s %s" % (ruleName, inputSymbol))
+
+        if self._state.backtracking > 0:
+            sys.stdout.write(" backtracking=%s" % self._state.backtracking)
+
+        # mmmm... we use BacktrackingFailed exceptions now. So how could we
+        # get that information here?
+        #if self._state.failed:
+        #    sys.stdout.write(" failed")
+        #else:
+        #    sys.stdout.write(" succeeded")
+
+        sys.stdout.write('\n')
+
+
+class TokenSource(object):
+    """
+    @brief Abstract baseclass for token producers.
+
+    A source of tokens must provide a sequence of tokens via nextToken()
+    and also must reveal it's source of characters; CommonToken's text is
+    computed from a CharStream; it only store indices into the char stream.
+
+    Errors from the lexer are never passed to the parser.  Either you want
+    to keep going or you do not upon token recognition error.  If you do not
+    want to continue lexing then you do not want to continue parsing.  Just
+    throw an exception not under RecognitionException and Java will naturally
+    toss you all the way out of the recognizers.  If you want to continue
+    lexing then you should not throw an exception to the parser--it has already
+    requested a token.  Keep lexing until you get a valid one.  Just report
+    errors and keep going, looking for a valid token.
+    """
+
+    def nextToken(self):
+        """Return a Token object from your input stream (usually a CharStream).
+
+        Do not fail/return upon lexing error; keep chewing on the characters
+        until you get a good one; errors are not passed through to the parser.
+        """
+
+        raise NotImplementedError
+
+
+    def __iter__(self):
+        """The TokenSource is an interator.
+
+        The iteration will not include the final EOF token, see also the note
+        for the next() method.
+
+        """
+
+        return self
+
+
+    def next(self):
+        """Return next token or raise StopIteration.
+
+        Note that this will raise StopIteration when hitting the EOF token,
+        so EOF will not be part of the iteration.
+
+        """
+
+        token = self.nextToken()
+        if token is None or token.type == EOF:
+            raise StopIteration
+        return token
+
+
+class Lexer(BaseRecognizer, TokenSource):
+    """
+    @brief Baseclass for generated lexer classes.
+
+    A lexer is recognizer that draws input symbols from a character stream.
+    lexer grammars result in a subclass of this object. A Lexer object
+    uses simplified match() and error recovery mechanisms in the interest
+    of speed.
+    """
+
+    def __init__(self, input, state=None):
+        BaseRecognizer.__init__(self, state)
+        TokenSource.__init__(self)
+
+        # Where is the lexer drawing characters from?
+        self.input = input
+
+
+    def reset(self):
+        BaseRecognizer.reset(self) # reset all recognizer state variables
+
+        if self.input is not None:
+            # rewind the input
+            self.input.seek(0)
+
+        if self._state is None:
+            # no shared state work to do
+            return
+
+        # wack Lexer state variables
+        self._state.token = None
+        self._state.type = INVALID_TOKEN_TYPE
+        self._state.channel = DEFAULT_CHANNEL
+        self._state.tokenStartCharIndex = -1
+        self._state.tokenStartLine = -1
+        self._state.tokenStartCharPositionInLine = -1
+        self._state.text = None
+
+
+    def makeEOFToken(self):
+        eof = CommonToken(
+            type=EOF, channel=DEFAULT_CHANNEL,
+            input=self.input,
+            start=self.input.index(), stop=self.input.index())
+        eof.line = self.input.line
+        eof.charPositionInLine = self.input.charPositionInLine
+        return eof
+
+    def nextToken(self):
+        """
+        Return a token from this source; i.e., match a token on the char
+        stream.
+        """
+
+        while 1:
+            self._state.token = None
+            self._state.channel = DEFAULT_CHANNEL
+            self._state.tokenStartCharIndex = self.input.index()
+            self._state.tokenStartCharPositionInLine = self.input.charPositionInLine
+            self._state.tokenStartLine = self.input.line
+            self._state.text = None
+            if self.input.LA(1) == EOF:
+                return self.makeEOFToken()
+
+            try:
+                self.mTokens()
+
+                if self._state.token is None:
+                    self.emit()
+
+                elif self._state.token == SKIP_TOKEN:
+                    continue
+
+                return self._state.token
+
+            except NoViableAltException, re:
+                self.reportError(re)
+                self.recover(re) # throw out current char and try again
+
+            except RecognitionException, re:
+                self.reportError(re)
+                # match() routine has already called recover()
+
+
+    def skip(self):
+        """
+        Instruct the lexer to skip creating a token for current lexer rule
+        and look for another token.  nextToken() knows to keep looking when
+        a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
+        if token==null at end of any token rule, it creates one for you
+        and emits it.
+        """
+
+        self._state.token = SKIP_TOKEN
+
+
+    def mTokens(self):
+        """This is the lexer entry point that sets instance var 'token'"""
+
+        # abstract method
+        raise NotImplementedError
+
+
+    def setCharStream(self, input):
+        """Set the char stream and reset the lexer"""
+        self.input = None
+        self.reset()
+        self.input = input
+
+
+    def getSourceName(self):
+        return self.input.getSourceName()
+
+
+    def emit(self, token=None):
+        """
+        The standard method called to automatically emit a token at the
+        outermost lexical rule.  The token object should point into the
+        char buffer start..stop.  If there is a text override in 'text',
+        use that to set the token's text.  Override this method to emit
+        custom Token objects.
+
+        If you are building trees, then you should also override
+        Parser or TreeParser.getMissingSymbol().
+        """
+
+        if token is None:
+            token = CommonToken(
+                input=self.input,
+                type=self._state.type,
+                channel=self._state.channel,
+                start=self._state.tokenStartCharIndex,
+                stop=self.getCharIndex()-1
+                )
+            token.line = self._state.tokenStartLine
+            token.text = self._state.text
+            token.charPositionInLine = self._state.tokenStartCharPositionInLine
+
+        self._state.token = token
+
+        return token
+
+
+    def match(self, s):
+        if isinstance(s, basestring):
+            for c in s:
+                if self.input.LA(1) != ord(c):
+                    if self._state.backtracking > 0:
+                        raise BacktrackingFailed
+
+                    mte = MismatchedTokenException(c, self.input)
+                    self.recover(mte)
+                    raise mte
+
+                self.input.consume()
+
+        else:
+            if self.input.LA(1) != s:
+                if self._state.backtracking > 0:
+                    raise BacktrackingFailed
+
+                mte = MismatchedTokenException(unichr(s), self.input)
+                self.recover(mte) # don't really recover; just consume in lexer
+                raise mte
+
+            self.input.consume()
+
+
+    def matchAny(self):
+        self.input.consume()
+
+
+    def matchRange(self, a, b):
+        if self.input.LA(1) < a or self.input.LA(1) > b:
+            if self._state.backtracking > 0:
+                raise BacktrackingFailed
+
+            mre = MismatchedRangeException(unichr(a), unichr(b), self.input)
+            self.recover(mre)
+            raise mre
+
+        self.input.consume()
+
+
+    def getLine(self):
+        return self.input.line
+
+
+    def getCharPositionInLine(self):
+        return self.input.charPositionInLine
+
+
+    def getCharIndex(self):
+        """What is the index of the current character of lookahead?"""
+
+        return self.input.index()
+
+
+    def getText(self):
+        """
+        Return the text matched so far for the current token or any
+        text override.
+        """
+        if self._state.text is not None:
+            return self._state.text
+
+        return self.input.substring(
+            self._state.tokenStartCharIndex,
+            self.getCharIndex()-1
+            )
+
+
+    def setText(self, text):
+        """
+        Set the complete text of this token; it wipes any previous
+        changes to the text.
+        """
+        self._state.text = text
+
+
+    text = property(getText, setText)
+
+
+    def reportError(self, e):
+        ## TODO: not thought about recovery in lexer yet.
+
+        ## # if we've already reported an error and have not matched a token
+        ## # yet successfully, don't report any errors.
+        ## if self.errorRecovery:
+        ##     #System.err.print("[SPURIOUS] ");
+        ##     return;
+        ##
+        ## self.errorRecovery = True
+
+        self.displayRecognitionError(self.tokenNames, e)
+
+
+    def getErrorMessage(self, e, tokenNames):
+        msg = None
+
+        if isinstance(e, MismatchedTokenException):
+            msg = "mismatched character " \
+                  + self.getCharErrorDisplay(e.c) \
+                  + " expecting " \
+                  + self.getCharErrorDisplay(e.expecting)
+
+        elif isinstance(e, NoViableAltException):
+            msg = "no viable alternative at character " \
+                  + self.getCharErrorDisplay(e.c)
+
+        elif isinstance(e, EarlyExitException):
+            msg = "required (...)+ loop did not match anything at character " \
+                  + self.getCharErrorDisplay(e.c)
+
+        elif isinstance(e, MismatchedNotSetException):
+            msg = "mismatched character " \
+                  + self.getCharErrorDisplay(e.c) \
+                  + " expecting set " \
+                  + repr(e.expecting)
+
+        elif isinstance(e, MismatchedSetException):
+            msg = "mismatched character " \
+                  + self.getCharErrorDisplay(e.c) \
+                  + " expecting set " \
+                  + repr(e.expecting)
+
+        elif isinstance(e, MismatchedRangeException):
+            msg = "mismatched character " \
+                  + self.getCharErrorDisplay(e.c) \
+                  + " expecting set " \
+                  + self.getCharErrorDisplay(e.a) \
+                  + ".." \
+                  + self.getCharErrorDisplay(e.b)
+
+        else:
+            msg = BaseRecognizer.getErrorMessage(self, e, tokenNames)
+
+        return msg
+
+
+    def getCharErrorDisplay(self, c):
+        if c == EOF:
+            c = '<EOF>'
+        return repr(c)
+
+
+    def recover(self, re):
+        """
+        Lexers can normally match any char in it's vocabulary after matching
+        a token, so do the easy thing and just kill a character and hope
+        it all works out.  You can instead use the rule invocation stack
+        to do sophisticated error recovery if you are in a fragment rule.
+        """
+
+        self.input.consume()
+
+
+    def traceIn(self, ruleName, ruleIndex):
+        inputSymbol = "%s line=%d:%s" % (self.input.LT(1),
+                                         self.getLine(),
+                                         self.getCharPositionInLine()
+                                         )
+
+        BaseRecognizer.traceIn(self, ruleName, ruleIndex, inputSymbol)
+
+
+    def traceOut(self, ruleName, ruleIndex):
+        inputSymbol = "%s line=%d:%s" % (self.input.LT(1),
+                                         self.getLine(),
+                                         self.getCharPositionInLine()
+                                         )
+
+        BaseRecognizer.traceOut(self, ruleName, ruleIndex, inputSymbol)
+
+
+
+class Parser(BaseRecognizer):
+    """
+    @brief Baseclass for generated parser classes.
+    """
+
+    def __init__(self, lexer, state=None):
+        BaseRecognizer.__init__(self, state)
+
+        self.input = lexer
+
+
+    def reset(self):
+        BaseRecognizer.reset(self) # reset all recognizer state variables
+        if self.input is not None:
+            self.input.seek(0) # rewind the input
+
+
+    def getCurrentInputSymbol(self, input):
+        return input.LT(1)
+
+
+    def getMissingSymbol(self, input, e, expectedTokenType, follow):
+        if expectedTokenType == EOF:
+            tokenText = "<missing EOF>"
+        else:
+            tokenText = "<missing " + self.tokenNames[expectedTokenType] + ">"
+        t = CommonToken(type=expectedTokenType, text=tokenText)
+        current = input.LT(1)
+        if current.type == EOF:
+            current = input.LT(-1)
+
+        if current is not None:
+            t.line = current.line
+            t.charPositionInLine = current.charPositionInLine
+        t.channel = DEFAULT_CHANNEL
+        return t
+
+
+    def setTokenStream(self, input):
+        """Set the token stream and reset the parser"""
+
+        self.input = None
+        self.reset()
+        self.input = input
+
+
+    def getTokenStream(self):
+        return self.input
+
+
+    def getSourceName(self):
+        return self.input.getSourceName()
+
+
+    def traceIn(self, ruleName, ruleIndex):
+        BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1))
+
+
+    def traceOut(self, ruleName, ruleIndex):
+        BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1))
+
+
+class RuleReturnScope(object):
+    """
+    Rules can return start/stop info as well as possible trees and templates.
+    """
+
+    def getStart(self):
+        """Return the start token or tree."""
+        return None
+
+
+    def getStop(self):
+        """Return the stop token or tree."""
+        return None
+
+
+    def getTree(self):
+        """Has a value potentially if output=AST."""
+        return None
+
+
+    def getTemplate(self):
+        """Has a value potentially if output=template."""
+        return None
+
+
+class ParserRuleReturnScope(RuleReturnScope):
+    """
+    Rules that return more than a single value must return an object
+    containing all the values.  Besides the properties defined in
+    RuleLabelScope.predefinedRulePropertiesScope there may be user-defined
+    return values.  This class simply defines the minimum properties that
+    are always defined and methods to access the others that might be
+    available depending on output option such as template and tree.
+
+    Note text is not an actual property of the return value, it is computed
+    from start and stop using the input stream's toString() method.  I
+    could add a ctor to this so that we can pass in and store the input
+    stream, but I'm not sure we want to do that.  It would seem to be undefined
+    to get the .text property anyway if the rule matches tokens from multiple
+    input streams.
+
+    I do not use getters for fields of objects that are used simply to
+    group values such as this aggregate.  The getters/setters are there to
+    satisfy the superclass interface.
+    """
+
+    def __init__(self):
+        self.start = None
+        self.stop = None
+        self.tree = None  # only used when output=AST
+
+
+    def getStart(self):
+        return self.start
+
+
+    def getStop(self):
+        return self.stop
+
+
+    def getTree(self):
+        return self.tree