diff options
Diffstat (limited to 'lib/python2.7/site-packages/sepolgen/lex.py')
-rw-r--r-- | lib/python2.7/site-packages/sepolgen/lex.py | 872 |
1 files changed, 872 insertions, 0 deletions
diff --git a/lib/python2.7/site-packages/sepolgen/lex.py b/lib/python2.7/site-packages/sepolgen/lex.py new file mode 100644 index 0000000..c13acef --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/lex.py @@ -0,0 +1,872 @@ +#----------------------------------------------------------------------------- +# ply: lex.py +# +# Author: David M. Beazley (dave@dabeaz.com) +# +# Copyright (C) 2001-2006, David M. Beazley +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +#----------------------------------------------------------------------------- + +__version__ = "2.2" + +import re, sys, types + +from . import util +import collections + + +# Regular expression used to match valid token names +_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') + +# Available instance types. This is used when parsers are defined by a class. +# In Python3 the InstanceType and ObjectType are no more, they've passed, ceased +# to be, they are ex-classes along with old-style classes + +try: + _INSTANCETYPE = (types.InstanceType, types.ObjectType) +except AttributeError: + _INSTANCETYPE = object + +# Exception thrown when invalid token encountered and no default error +# handler is defined. +class LexError(Exception): + def __init__(self,message,s): + self.args = (message,) + self.text = s + +# Token class +class LexToken(object): + def __str__(self): + return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) + def __repr__(self): + return str(self) + def skip(self,n): + self.lexer.skip(n) + +# ----------------------------------------------------------------------------- +# Lexer class +# +# This class encapsulates all of the methods and data associated with a lexer. +# +# input() - Store a new string in the lexer +# token() - Get the next token +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression. This is a list of + # tuples (re,findex) where re is a compiled + # regular expression and findex is a list + # mapping regex group numbers to rules + self.lexretext = None # Current regular expression strings + self.lexstatere = {} # Dictionary mapping lexer states to master regexs + self.lexstateretext = {} # Dictionary mapping lexer states to regex strings + self.lexstate = "INITIAL" # Current lexer state + self.lexstatestack = [] # Stack of lexer states + self.lexstateinfo = None # State information + self.lexstateignore = {} # Dictionary of ignored characters for each state + self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexreflags = 0 # Optional re compile flags + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexerrorf = None # Error rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = "" # Ignored characters + self.lexliterals = "" # Literal characters that can be passed through + self.lexmodule = None # Module + self.lineno = 1 # Current line number + self.lexdebug = 0 # Debugging mode + self.lexoptimize = 0 # Optimized mode + + def clone(self,object=None): + c = Lexer() + c.lexstatere = self.lexstatere + c.lexstateinfo = self.lexstateinfo + c.lexstateretext = self.lexstateretext + c.lexstate = self.lexstate + c.lexstatestack = self.lexstatestack + c.lexstateignore = self.lexstateignore + c.lexstateerrorf = self.lexstateerrorf + c.lexreflags = self.lexreflags + c.lexdata = self.lexdata + c.lexpos = self.lexpos + c.lexlen = self.lexlen + c.lextokens = self.lextokens + c.lexdebug = self.lexdebug + c.lineno = self.lineno + c.lexoptimize = self.lexoptimize + c.lexliterals = self.lexliterals + c.lexmodule = self.lexmodule + + # If the object parameter has been supplied, it means we are attaching the + # lexer to a new object. In this case, we have to rebind all methods in + # the lexstatere and lexstateerrorf tables. + + if object: + newtab = { } + for key, ritem in self.lexstatere.items(): + newre = [] + for cre, findex in ritem: + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object,f[0].__name__),f[1])) + newre.append((cre,newfindex)) + newtab[key] = newre + c.lexstatere = newtab + c.lexstateerrorf = { } + for key, ef in self.lexstateerrorf.items(): + c.lexstateerrorf[key] = getattr(object,ef.__name__) + c.lexmodule = object + + # Set up other attributes + c.begin(c.lexstate) + return c + + # ------------------------------------------------------------ + # writetab() - Write lexer information to a table file + # ------------------------------------------------------------ + def writetab(self,tabfile): + tf = open(tabfile+".py","w") + tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) + tf.write("_lextokens = %s\n" % repr(self.lextokens)) + tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) + tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) + tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) + + tabre = { } + for key, lre in self.lexstatere.items(): + titem = [] + for i in range(len(lre)): + titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1]))) + tabre[key] = titem + + tf.write("_lexstatere = %s\n" % repr(tabre)) + tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) + + taberr = { } + for key, ef in self.lexstateerrorf.items(): + if ef: + taberr[key] = ef.__name__ + else: + taberr[key] = None + tf.write("_lexstateerrorf = %s\n" % repr(taberr)) + tf.close() + + # ------------------------------------------------------------ + # readtab() - Read lexer information from a tab file + # ------------------------------------------------------------ + def readtab(self,tabfile,fdict): + exec("import %s as lextab" % tabfile) + self.lextokens = lextab._lextokens + self.lexreflags = lextab._lexreflags + self.lexliterals = lextab._lexliterals + self.lexstateinfo = lextab._lexstateinfo + self.lexstateignore = lextab._lexstateignore + self.lexstatere = { } + self.lexstateretext = { } + for key,lre in lextab._lexstatere.items(): + titem = [] + txtitem = [] + for i in range(len(lre)): + titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict))) + txtitem.append(lre[i][0]) + self.lexstatere[key] = titem + self.lexstateretext[key] = txtitem + self.lexstateerrorf = { } + for key,ef in lextab._lexstateerrorf.items(): + self.lexstateerrorf[key] = fdict[ef] + self.begin('INITIAL') + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self,s): + if not (isinstance(s,util.bytes_type) or isinstance(s, util.string_type)): + raise ValueError("Expected a string") + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + + # ------------------------------------------------------------ + # begin() - Changes the lexing state + # ------------------------------------------------------------ + def begin(self,state): + if state not in self.lexstatere: + raise ValueError("Undefined state") + self.lexre = self.lexstatere[state] + self.lexretext = self.lexstateretext[state] + self.lexignore = self.lexstateignore.get(state,"") + self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexstate = state + + # ------------------------------------------------------------ + # push_state() - Changes the lexing state and saves old on stack + # ------------------------------------------------------------ + def push_state(self,state): + self.lexstatestack.append(self.lexstate) + self.begin(state) + + # ------------------------------------------------------------ + # pop_state() - Restores the previous state + # ------------------------------------------------------------ + def pop_state(self): + self.begin(self.lexstatestack.pop()) + + # ------------------------------------------------------------ + # current_state() - Returns the current lexing state + # ------------------------------------------------------------ + def current_state(self): + return self.lexstate + + # ------------------------------------------------------------ + # skip() - Skip ahead n characters + # ------------------------------------------------------------ + def skip(self,n): + self.lexpos += n + + # ------------------------------------------------------------ + # token() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def token(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + for lexre,lexindexfunc in self.lexre: + m = lexre.match(lexdata,lexpos) + if not m: continue + + # Set last match in lexer so that rules can access it if they want + self.lexmatch = m + + # Create a token for return + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + + lexpos = m.end() + i = m.lastindex + func,tok.type = lexindexfunc[i] + self.lexpos = lexpos + + if not func: + # If no token type was set, it's an ignored token + if tok.type: return tok + break + + # if func not callable, it means it's an ignored token + if not isinstance(func, collections.Callable): + break + + # If token is processed by a function, call it + newtok = func(tok) + + # Every function must return a token, if nothing, we just move to next token + if not newtok: + lexpos = self.lexpos # This is here in case user has updated lexpos. + break + + # Verify type of the token. If not in the token map, raise an error + if not self.lexoptimize: + if newtok.type not in self.lextokens: + raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( + func.__code__.co_filename, func.__code__.co_firstlineno, + func.__name__, newtok.type),lexdata[lexpos:]) + + return newtok + else: + # No match, see if in literals + if lexdata[lexpos] in self.lexliterals: + tok = LexToken() + tok.value = lexdata[lexpos] + tok.lineno = self.lineno + tok.lexer = self + tok.type = tok.value + tok.lexpos = lexpos + self.lexpos = lexpos + 1 + return tok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = "error" + tok.lexer = self + tok.lexpos = lexpos + self.lexpos = lexpos + newtok = self.lexerrorf(tok) + if lexpos == self.lexpos: + # Error method didn't change text position at all. This is an error. + raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + lexpos = self.lexpos + if not newtok: continue + return newtok + + self.lexpos = lexpos + raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + + self.lexpos = lexpos + 1 + if self.lexdata is None: + raise RuntimeError("No input string given with input()") + return None + +# ----------------------------------------------------------------------------- +# _validate_file() +# +# This checks to see if there are duplicated t_rulename() functions or strings +# in the parser input file. This is done using a simple regular expression +# match on each line in the filename. +# ----------------------------------------------------------------------------- + +def _validate_file(filename): + import os.path + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea what the file is. Return OK + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if not m: + m = sre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + print("%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev)) + noerror = 0 + linen += 1 + return noerror + +# ----------------------------------------------------------------------------- +# _funcs_to_names() +# +# Given a list of regular expression functions, this converts it to a list +# suitable for output to a table file +# ----------------------------------------------------------------------------- + +def _funcs_to_names(funclist): + result = [] + for f in funclist: + if f and f[0]: + result.append((f[0].__name__,f[1])) + else: + result.append(f) + return result + +# ----------------------------------------------------------------------------- +# _names_to_funcs() +# +# Given a list of regular expression function names, this converts it back to +# functions. +# ----------------------------------------------------------------------------- + +def _names_to_funcs(namelist,fdict): + result = [] + for n in namelist: + if n and n[0]: + result.append((fdict[n[0]],n[1])) + else: + result.append(n) + return result + +# ----------------------------------------------------------------------------- +# _form_master_re() +# +# This function takes a list of all of the regex components and attempts to +# form the master regular expression. Given limitations in the Python re +# module, it may be necessary to break the master regex into separate expressions. +# ----------------------------------------------------------------------------- + +def _form_master_re(relist,reflags,ldict): + if not relist: return [] + regex = "|".join(relist) + try: + lexre = re.compile(regex,re.VERBOSE | reflags) + + # Build the index to function map for the matching engine + lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) + for f,i in lexre.groupindex.items(): + handle = ldict.get(f,None) + if type(handle) in (types.FunctionType, types.MethodType): + lexindexfunc[i] = (handle,handle.__name__[2:]) + elif handle is not None: + # If rule was specified as a string, we build an anonymous + # callback function to carry out the action + if f.find("ignore_") > 0: + lexindexfunc[i] = (None,None) + print("IGNORE", f) + else: + lexindexfunc[i] = (None, f[2:]) + + return [(lexre,lexindexfunc)],[regex] + except Exception as e: + m = int(len(relist)/2) + if m == 0: m = 1 + llist, lre = _form_master_re(relist[:m],reflags,ldict) + rlist, rre = _form_master_re(relist[m:],reflags,ldict) + return llist+rlist, lre+rre + +# ----------------------------------------------------------------------------- +# def _statetoken(s,names) +# +# Given a declaration name s of the form "t_" and a dictionary whose keys are +# state names, this function returns a tuple (states,tokenname) where states +# is a tuple of state names and tokenname is the name of the token. For example, +# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') +# ----------------------------------------------------------------------------- + +def _statetoken(s,names): + nonstate = 1 + parts = s.split("_") + for i in range(1,len(parts)): + if parts[i] not in names and parts[i] != 'ANY': break + if i > 1: + states = tuple(parts[1:i]) + else: + states = ('INITIAL',) + + if 'ANY' in states: + states = tuple(names.keys()) + + tokenname = "_".join(parts[i:]) + return (states,tokenname) + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0): + global lexer + ldict = None + stateinfo = { 'INITIAL' : 'inclusive'} + error = 0 + files = { } + lexobj = Lexer() + lexobj.lexdebug = debug + lexobj.lexoptimize = optimize + global token,input + + if nowarn: warn = 0 + else: warn = 1 + + if object: module = object + + if module: + # User supplied a module object. + if isinstance(module, types.ModuleType): + ldict = module.__dict__ + elif isinstance(module, _INSTANCETYPE): + _items = [(k,getattr(module,k)) for k in dir(module)] + ldict = { } + for (i,v) in _items: + ldict[i] = v + else: + raise ValueError("Expected a module or instance") + lexobj.lexmodule = module + + else: + # No module given. We might be able to get information from the caller. + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + if optimize and lextab: + try: + lexobj.readtab(lextab,ldict) + token = lexobj.token + input = lexobj.input + lexer = lexobj + return lexobj + + except ImportError: + pass + + # Get the tokens, states, and literals variables (if any) + if (module and isinstance(module,_INSTANCETYPE)): + tokens = getattr(module,"tokens",None) + states = getattr(module,"states",None) + literals = getattr(module,"literals","") + else: + tokens = ldict.get("tokens",None) + states = ldict.get("states",None) + literals = ldict.get("literals","") + + if not tokens: + raise SyntaxError("lex: module does not define 'tokens'") + if not (isinstance(tokens,list) or isinstance(tokens,tuple)): + raise SyntaxError("lex: tokens must be a list or tuple.") + + # Build a dictionary of valid token names + lexobj.lextokens = { } + if not optimize: + for n in tokens: + if not _is_identifier.match(n): + print("lex: Bad token name '%s'" % n) + error = 1 + if warn and n in lexobj.lextokens: + print("lex: Warning. Token '%s' multiply defined." % n) + lexobj.lextokens[n] = None + else: + for n in tokens: lexobj.lextokens[n] = None + + if debug: + print("lex: tokens = '%s'" % list(lexobj.lextokens.keys())) + + try: + for c in literals: + if not (isinstance(c,util.bytes_type) or isinstance(c, util.string_type)) or len(c) > 1: + print("lex: Invalid literal %s. Must be a single character" % repr(c)) + error = 1 + continue + + except TypeError: + print("lex: Invalid literals specification. literals must be a sequence of characters.") + error = 1 + + lexobj.lexliterals = literals + + # Build statemap + if states: + if not (isinstance(states,tuple) or isinstance(states,list)): + print("lex: states must be defined as a tuple or list.") + error = 1 + else: + for s in states: + if not isinstance(s,tuple) or len(s) != 2: + print("lex: invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')" % repr(s)) + error = 1 + continue + name, statetype = s + if isinstance(name, util.string_type): + original_name = name + name = util.encode_input(name) + if not isinstance(name,util.bytes_type) or len(original_name) != len(name): + print("lex: state name %s must be a byte string" % repr(original_name)) + error = 1 + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + print("lex: state type for state %s must be 'inclusive' or 'exclusive'" % name) + error = 1 + continue + if name in stateinfo: + print("lex: state '%s' already defined." % name) + error = 1 + continue + stateinfo[name] = statetype + + # Get a list of symbols with the t_ or s_ prefix + tsymbols = [f for f in ldict.keys() if f[:2] == 't_' ] + + # Now build up a list of functions and a list of strings + + funcsym = { } # Symbols defined as functions + strsym = { } # Symbols defined as strings + toknames = { } # Mapping of symbols to token names + + for s in stateinfo.keys(): + funcsym[s] = [] + strsym[s] = [] + + ignore = { } # Ignore strings by state + errorf = { } # Error functions by state + + if len(tsymbols) == 0: + raise SyntaxError("lex: no rules of the form t_rulename are defined.") + + for f in tsymbols: + t = ldict[f] + states, tokname = _statetoken(f,stateinfo) + toknames[f] = tokname + + if isinstance(t, collections.Callable): + for s in states: funcsym[s].append((f,t)) + elif (isinstance(t, util.bytes_type) or isinstance(t,util.string_type)): + for s in states: strsym[s].append((f,t)) + else: + print("lex: %s not defined as a function or string" % f) + error = 1 + + # Sort the functions by line number + for f in funcsym.values(): + f.sort(key=lambda x: x[1].__code__.co_firstlineno) + + # Sort the strings by regular expression length + for s in strsym.values(): + s.sort(key=lambda x: len(x[1])) + + regexs = { } + + # Build the master regular expressions + for state in stateinfo.keys(): + regex_list = [] + + # Add rules defined by functions first + for fname, f in funcsym[state]: + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + files[file] = None + tokname = toknames[fname] + + ismethod = isinstance(f, types.MethodType) + + if not optimize: + nargs = f.__code__.co_argcount + if ismethod: + reqargs = 2 + else: + reqargs = 1 + if nargs > reqargs: + print("%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__)) + error = 1 + continue + + if nargs < reqargs: + print("%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__)) + error = 1 + continue + + if tokname == 'ignore': + print("%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__)) + error = 1 + continue + + if tokname == 'error': + errorf[state] = f + continue + + if f.__doc__: + if not optimize: + try: + c = re.compile("(?P<%s>%s)" % (f.__name__,f.__doc__), re.VERBOSE | reflags) + if c.match(""): + print("%s:%d: Regular expression for rule '%s' matches empty string." % (file,line,f.__name__)) + error = 1 + continue + except re.error as e: + print("%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e)) + if '#' in f.__doc__: + print("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'." % (file,line, f.__name__)) + error = 1 + continue + + if debug: + print("lex: Adding rule %s -> '%s' (state '%s')" % (f.__name__,f.__doc__, state)) + + # Okay. The regular expression seemed okay. Let's append it to the master regular + # expression we're building + + regex_list.append("(?P<%s>%s)" % (f.__name__,f.__doc__)) + else: + print("%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__)) + + # Now add all of the simple rules + for name,r in strsym[state]: + tokname = toknames[name] + + if tokname == 'ignore': + ignore[state] = r + continue + + if not optimize: + if tokname == 'error': + raise SyntaxError("lex: Rule '%s' must be defined as a function" % name) + error = 1 + continue + + if tokname not in lexobj.lextokens and tokname.find("ignore_") < 0: + print("lex: Rule '%s' defined for an unspecified token %s." % (name,tokname)) + error = 1 + continue + try: + c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | reflags) + if (c.match("")): + print("lex: Regular expression for rule '%s' matches empty string." % name) + error = 1 + continue + except re.error as e: + print("lex: Invalid regular expression for rule '%s'. %s" % (name,e)) + if '#' in r: + print("lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name) + + error = 1 + continue + if debug: + print("lex: Adding rule %s -> '%s' (state '%s')" % (name,r,state)) + + regex_list.append("(?P<%s>%s)" % (name,r)) + + if not regex_list: + print("lex: No rules defined for state '%s'" % state) + error = 1 + + regexs[state] = regex_list + + + if not optimize: + for f in files.keys(): + if not _validate_file(f): + error = 1 + + if error: + raise SyntaxError("lex: Unable to build lexer.") + + # From this point forward, we're reasonably confident that we can build the lexer. + # No more errors will be generated, but there might be some warning messages. + + # Build the master regular expressions + + for state in regexs.keys(): + lexre, re_text = _form_master_re(regexs[state],reflags,ldict) + lexobj.lexstatere[state] = lexre + lexobj.lexstateretext[state] = re_text + if debug: + for i in range(len(re_text)): + print("lex: state '%s'. regex[%d] = '%s'" % (state, i, re_text[i])) + + # For inclusive states, we need to add the INITIAL state + for state,type in stateinfo.items(): + if state != "INITIAL" and type == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + + lexobj.lexstateinfo = stateinfo + lexobj.lexre = lexobj.lexstatere["INITIAL"] + lexobj.lexretext = lexobj.lexstateretext["INITIAL"] + + # Set up ignore variables + lexobj.lexstateignore = ignore + lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") + + # Set up error functions + lexobj.lexstateerrorf = errorf + lexobj.lexerrorf = errorf.get("INITIAL",None) + if warn and not lexobj.lexerrorf: + print("lex: Warning. no t_error rule is defined.") + + # Check state information for ignore and error rules + for s,stype in stateinfo.items(): + if stype == 'exclusive': + if warn and s not in errorf: + print("lex: Warning. no error rule is defined for exclusive state '%s'" % s) + if warn and s not in ignore and lexobj.lexignore: + print("lex: Warning. no ignore rule is defined for exclusive state '%s'" % s) + elif stype == 'inclusive': + if s not in errorf: + errorf[s] = errorf.get("INITIAL",None) + if s not in ignore: + ignore[s] = ignore.get("INITIAL","") + + + # Create global versions of the token() and input() functions + token = lexobj.token + input = lexobj.input + lexer = lexobj + + # If in optimize mode, we write the lextab + if lextab and optimize: + lexobj.writetab(lextab) + + return lexobj + +# ----------------------------------------------------------------------------- +# runmain() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None,data=None): + if not data: + try: + filename = sys.argv[1] + f = open(filename) + data = f.read() + f.close() + except IndexError: + print("Reading from standard input (type EOF to end):") + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while 1: + tok = _token() + if not tok: break + print("(%s,%r,%d,%d)" % (tok.type, tok.value, tok.lineno,tok.lexpos)) + + +# ----------------------------------------------------------------------------- +# @TOKEN(regex) +# +# This decorator function can be used to set the regex expression on a function +# when its docstring might need to be set in an alternative way +# ----------------------------------------------------------------------------- + +def TOKEN(r): + def set_doc(f): + f.__doc__ = r + return f + return set_doc + +# Alternative spelling of the TOKEN decorator +Token = TOKEN + |