summaryrefslogtreecommitdiff
path: root/lib/python2.7/lib2to3/pgen2
diff options
context:
space:
mode:
Diffstat (limited to 'lib/python2.7/lib2to3/pgen2')
-rw-r--r--lib/python2.7/lib2to3/pgen2/__init__.py4
-rw-r--r--lib/python2.7/lib2to3/pgen2/conv.py257
-rw-r--r--lib/python2.7/lib2to3/pgen2/driver.py157
-rw-r--r--lib/python2.7/lib2to3/pgen2/grammar.py184
-rw-r--r--lib/python2.7/lib2to3/pgen2/literals.py60
-rw-r--r--lib/python2.7/lib2to3/pgen2/parse.py201
-rw-r--r--lib/python2.7/lib2to3/pgen2/pgen.py386
-rwxr-xr-xlib/python2.7/lib2to3/pgen2/token.py82
-rw-r--r--lib/python2.7/lib2to3/pgen2/tokenize.py500
9 files changed, 0 insertions, 1831 deletions
diff --git a/lib/python2.7/lib2to3/pgen2/__init__.py b/lib/python2.7/lib2to3/pgen2/__init__.py
deleted file mode 100644
index af39048..0000000
--- a/lib/python2.7/lib2to3/pgen2/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""The pgen2 package."""
diff --git a/lib/python2.7/lib2to3/pgen2/conv.py b/lib/python2.7/lib2to3/pgen2/conv.py
deleted file mode 100644
index 28fbb0b..0000000
--- a/lib/python2.7/lib2to3/pgen2/conv.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""Convert graminit.[ch] spit out by pgen to Python code.
-
-Pgen is the Python parser generator. It is useful to quickly create a
-parser from a grammar file in Python's grammar notation. But I don't
-want my parsers to be written in C (yet), so I'm translating the
-parsing tables to Python data structures and writing a Python parse
-engine.
-
-Note that the token numbers are constants determined by the standard
-Python tokenizer. The standard token module defines these numbers and
-their names (the names are not used much). The token numbers are
-hardcoded into the Python tokenizer and into pgen. A Python
-implementation of the Python tokenizer is also available, in the
-standard tokenize module.
-
-On the other hand, symbol numbers (representing the grammar's
-non-terminals) are assigned by pgen based on the actual grammar
-input.
-
-Note: this module is pretty much obsolete; the pgen module generates
-equivalent grammar tables directly from the Grammar.txt input file
-without having to invoke the Python pgen C program.
-
-"""
-
-# Python imports
-import re
-
-# Local imports
-from pgen2 import grammar, token
-
-
-class Converter(grammar.Grammar):
- """Grammar subclass that reads classic pgen output files.
-
- The run() method reads the tables as produced by the pgen parser
- generator, typically contained in two C files, graminit.h and
- graminit.c. The other methods are for internal use only.
-
- See the base class for more documentation.
-
- """
-
- def run(self, graminit_h, graminit_c):
- """Load the grammar tables from the text files written by pgen."""
- self.parse_graminit_h(graminit_h)
- self.parse_graminit_c(graminit_c)
- self.finish_off()
-
- def parse_graminit_h(self, filename):
- """Parse the .h file written by pgen. (Internal)
-
- This file is a sequence of #define statements defining the
- nonterminals of the grammar as numbers. We build two tables
- mapping the numbers to names and back.
-
- """
- try:
- f = open(filename)
- except IOError, err:
- print "Can't open %s: %s" % (filename, err)
- return False
- self.symbol2number = {}
- self.number2symbol = {}
- lineno = 0
- for line in f:
- lineno += 1
- mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
- if not mo and line.strip():
- print "%s(%s): can't parse %s" % (filename, lineno,
- line.strip())
- else:
- symbol, number = mo.groups()
- number = int(number)
- assert symbol not in self.symbol2number
- assert number not in self.number2symbol
- self.symbol2number[symbol] = number
- self.number2symbol[number] = symbol
- return True
-
- def parse_graminit_c(self, filename):
- """Parse the .c file written by pgen. (Internal)
-
- The file looks as follows. The first two lines are always this:
-
- #include "pgenheaders.h"
- #include "grammar.h"
-
- After that come four blocks:
-
- 1) one or more state definitions
- 2) a table defining dfas
- 3) a table defining labels
- 4) a struct defining the grammar
-
- A state definition has the following form:
- - one or more arc arrays, each of the form:
- static arc arcs_<n>_<m>[<k>] = {
- {<i>, <j>},
- ...
- };
- - followed by a state array, of the form:
- static state states_<s>[<t>] = {
- {<k>, arcs_<n>_<m>},
- ...
- };
-
- """
- try:
- f = open(filename)
- except IOError, err:
- print "Can't open %s: %s" % (filename, err)
- return False
- # The code below essentially uses f's iterator-ness!
- lineno = 0
-
- # Expect the two #include lines
- lineno, line = lineno+1, f.next()
- assert line == '#include "pgenheaders.h"\n', (lineno, line)
- lineno, line = lineno+1, f.next()
- assert line == '#include "grammar.h"\n', (lineno, line)
-
- # Parse the state definitions
- lineno, line = lineno+1, f.next()
- allarcs = {}
- states = []
- while line.startswith("static arc "):
- while line.startswith("static arc "):
- mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
- line)
- assert mo, (lineno, line)
- n, m, k = map(int, mo.groups())
- arcs = []
- for _ in range(k):
- lineno, line = lineno+1, f.next()
- mo = re.match(r"\s+{(\d+), (\d+)},$", line)
- assert mo, (lineno, line)
- i, j = map(int, mo.groups())
- arcs.append((i, j))
- lineno, line = lineno+1, f.next()
- assert line == "};\n", (lineno, line)
- allarcs[(n, m)] = arcs
- lineno, line = lineno+1, f.next()
- mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
- assert mo, (lineno, line)
- s, t = map(int, mo.groups())
- assert s == len(states), (lineno, line)
- state = []
- for _ in range(t):
- lineno, line = lineno+1, f.next()
- mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
- assert mo, (lineno, line)
- k, n, m = map(int, mo.groups())
- arcs = allarcs[n, m]
- assert k == len(arcs), (lineno, line)
- state.append(arcs)
- states.append(state)
- lineno, line = lineno+1, f.next()
- assert line == "};\n", (lineno, line)
- lineno, line = lineno+1, f.next()
- self.states = states
-
- # Parse the dfas
- dfas = {}
- mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
- assert mo, (lineno, line)
- ndfas = int(mo.group(1))
- for i in range(ndfas):
- lineno, line = lineno+1, f.next()
- mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
- line)
- assert mo, (lineno, line)
- symbol = mo.group(2)
- number, x, y, z = map(int, mo.group(1, 3, 4, 5))
- assert self.symbol2number[symbol] == number, (lineno, line)
- assert self.number2symbol[number] == symbol, (lineno, line)
- assert x == 0, (lineno, line)
- state = states[z]
- assert y == len(state), (lineno, line)
- lineno, line = lineno+1, f.next()
- mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
- assert mo, (lineno, line)
- first = {}
- rawbitset = eval(mo.group(1))
- for i, c in enumerate(rawbitset):
- byte = ord(c)
- for j in range(8):
- if byte & (1<<j):
- first[i*8 + j] = 1
- dfas[number] = (state, first)
- lineno, line = lineno+1, f.next()
- assert line == "};\n", (lineno, line)
- self.dfas = dfas
-
- # Parse the labels
- labels = []
- lineno, line = lineno+1, f.next()
- mo = re.match(r"static label labels\[(\d+)\] = {$", line)
- assert mo, (lineno, line)
- nlabels = int(mo.group(1))
- for i in range(nlabels):
- lineno, line = lineno+1, f.next()
- mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
- assert mo, (lineno, line)
- x, y = mo.groups()
- x = int(x)
- if y == "0":
- y = None
- else:
- y = eval(y)
- labels.append((x, y))
- lineno, line = lineno+1, f.next()
- assert line == "};\n", (lineno, line)
- self.labels = labels
-
- # Parse the grammar struct
- lineno, line = lineno+1, f.next()
- assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
- lineno, line = lineno+1, f.next()
- mo = re.match(r"\s+(\d+),$", line)
- assert mo, (lineno, line)
- ndfas = int(mo.group(1))
- assert ndfas == len(self.dfas)
- lineno, line = lineno+1, f.next()
- assert line == "\tdfas,\n", (lineno, line)
- lineno, line = lineno+1, f.next()
- mo = re.match(r"\s+{(\d+), labels},$", line)
- assert mo, (lineno, line)
- nlabels = int(mo.group(1))
- assert nlabels == len(self.labels), (lineno, line)
- lineno, line = lineno+1, f.next()
- mo = re.match(r"\s+(\d+)$", line)
- assert mo, (lineno, line)
- start = int(mo.group(1))
- assert start in self.number2symbol, (lineno, line)
- self.start = start
- lineno, line = lineno+1, f.next()
- assert line == "};\n", (lineno, line)
- try:
- lineno, line = lineno+1, f.next()
- except StopIteration:
- pass
- else:
- assert 0, (lineno, line)
-
- def finish_off(self):
- """Create additional useful structures. (Internal)."""
- self.keywords = {} # map from keyword strings to arc labels
- self.tokens = {} # map from numeric token values to arc labels
- for ilabel, (type, value) in enumerate(self.labels):
- if type == token.NAME and value is not None:
- self.keywords[value] = ilabel
- elif value is None:
- self.tokens[type] = ilabel
diff --git a/lib/python2.7/lib2to3/pgen2/driver.py b/lib/python2.7/lib2to3/pgen2/driver.py
deleted file mode 100644
index 39dafb9..0000000
--- a/lib/python2.7/lib2to3/pgen2/driver.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Modifications:
-# Copyright 2006 Google, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""Parser driver.
-
-This provides a high-level interface to parse a file into a syntax tree.
-
-"""
-
-__author__ = "Guido van Rossum <guido@python.org>"
-
-__all__ = ["Driver", "load_grammar"]
-
-# Python imports
-import codecs
-import os
-import logging
-import StringIO
-import sys
-
-# Pgen imports
-from . import grammar, parse, token, tokenize, pgen
-
-
-class Driver(object):
-
- def __init__(self, grammar, convert=None, logger=None):
- self.grammar = grammar
- if logger is None:
- logger = logging.getLogger()
- self.logger = logger
- self.convert = convert
-
- def parse_tokens(self, tokens, debug=False):
- """Parse a series of tokens and return the syntax tree."""
- # XXX Move the prefix computation into a wrapper around tokenize.
- p = parse.Parser(self.grammar, self.convert)
- p.setup()
- lineno = 1
- column = 0
- type = value = start = end = line_text = None
- prefix = u""
- for quintuple in tokens:
- type, value, start, end, line_text = quintuple
- if start != (lineno, column):
- assert (lineno, column) <= start, ((lineno, column), start)
- s_lineno, s_column = start
- if lineno < s_lineno:
- prefix += "\n" * (s_lineno - lineno)
- lineno = s_lineno
- column = 0
- if column < s_column:
- prefix += line_text[column:s_column]
- column = s_column
- if type in (tokenize.COMMENT, tokenize.NL):
- prefix += value
- lineno, column = end
- if value.endswith("\n"):
- lineno += 1
- column = 0
- continue
- if type == token.OP:
- type = grammar.opmap[value]
- if debug:
- self.logger.debug("%s %r (prefix=%r)",
- token.tok_name[type], value, prefix)
- if p.addtoken(type, value, (prefix, start)):
- if debug:
- self.logger.debug("Stop.")
- break
- prefix = ""
- lineno, column = end
- if value.endswith("\n"):
- lineno += 1
- column = 0
- else:
- # We never broke out -- EOF is too soon (how can this happen???)
- raise parse.ParseError("incomplete input",
- type, value, (prefix, start))
- return p.rootnode
-
- def parse_stream_raw(self, stream, debug=False):
- """Parse a stream and return the syntax tree."""
- tokens = tokenize.generate_tokens(stream.readline)
- return self.parse_tokens(tokens, debug)
-
- def parse_stream(self, stream, debug=False):
- """Parse a stream and return the syntax tree."""
- return self.parse_stream_raw(stream, debug)
-
- def parse_file(self, filename, encoding=None, debug=False):
- """Parse a file and return the syntax tree."""
- stream = codecs.open(filename, "r", encoding)
- try:
- return self.parse_stream(stream, debug)
- finally:
- stream.close()
-
- def parse_string(self, text, debug=False):
- """Parse a string and return the syntax tree."""
- tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline)
- return self.parse_tokens(tokens, debug)
-
-
-def load_grammar(gt="Grammar.txt", gp=None,
- save=True, force=False, logger=None):
- """Load the grammar (maybe from a pickle)."""
- if logger is None:
- logger = logging.getLogger()
- if gp is None:
- head, tail = os.path.splitext(gt)
- if tail == ".txt":
- tail = ""
- gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
- if force or not _newer(gp, gt):
- logger.info("Generating grammar tables from %s", gt)
- g = pgen.generate_grammar(gt)
- if save:
- logger.info("Writing grammar tables to %s", gp)
- try:
- g.dump(gp)
- except IOError, e:
- logger.info("Writing failed:"+str(e))
- else:
- g = grammar.Grammar()
- g.load(gp)
- return g
-
-
-def _newer(a, b):
- """Inquire whether file a was written since file b."""
- if not os.path.exists(a):
- return False
- if not os.path.exists(b):
- return True
- return os.path.getmtime(a) >= os.path.getmtime(b)
-
-
-def main(*args):
- """Main program, when run as a script: produce grammar pickle files.
-
- Calls load_grammar for each argument, a path to a grammar text file.
- """
- if not args:
- args = sys.argv[1:]
- logging.basicConfig(level=logging.INFO, stream=sys.stdout,
- format='%(message)s')
- for gt in args:
- load_grammar(gt, save=True, force=True)
- return True
-
-if __name__ == "__main__":
- sys.exit(int(not main()))
diff --git a/lib/python2.7/lib2to3/pgen2/grammar.py b/lib/python2.7/lib2to3/pgen2/grammar.py
deleted file mode 100644
index 1aa5c43..0000000
--- a/lib/python2.7/lib2to3/pgen2/grammar.py
+++ /dev/null
@@ -1,184 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""This module defines the data structures used to represent a grammar.
-
-These are a bit arcane because they are derived from the data
-structures used by Python's 'pgen' parser generator.
-
-There's also a table here mapping operators to their names in the
-token module; the Python tokenize module reports all operators as the
-fallback token code OP, but the parser needs the actual token code.
-
-"""
-
-# Python imports
-import pickle
-
-# Local imports
-from . import token, tokenize
-
-
-class Grammar(object):
- """Pgen parsing tables conversion class.
-
- Once initialized, this class supplies the grammar tables for the
- parsing engine implemented by parse.py. The parsing engine
- accesses the instance variables directly. The class here does not
- provide initialization of the tables; several subclasses exist to
- do this (see the conv and pgen modules).
-
- The load() method reads the tables from a pickle file, which is
- much faster than the other ways offered by subclasses. The pickle
- file is written by calling dump() (after loading the grammar
- tables using a subclass). The report() method prints a readable
- representation of the tables to stdout, for debugging.
-
- The instance variables are as follows:
-
- symbol2number -- a dict mapping symbol names to numbers. Symbol
- numbers are always 256 or higher, to distinguish
- them from token numbers, which are between 0 and
- 255 (inclusive).
-
- number2symbol -- a dict mapping numbers to symbol names;
- these two are each other's inverse.
-
- states -- a list of DFAs, where each DFA is a list of
- states, each state is a list of arcs, and each
- arc is a (i, j) pair where i is a label and j is
- a state number. The DFA number is the index into
- this list. (This name is slightly confusing.)
- Final states are represented by a special arc of
- the form (0, j) where j is its own state number.
-
- dfas -- a dict mapping symbol numbers to (DFA, first)
- pairs, where DFA is an item from the states list
- above, and first is a set of tokens that can
- begin this grammar rule (represented by a dict
- whose values are always 1).
-
- labels -- a list of (x, y) pairs where x is either a token
- number or a symbol number, and y is either None
- or a string; the strings are keywords. The label
- number is the index in this list; label numbers
- are used to mark state transitions (arcs) in the
- DFAs.
-
- start -- the number of the grammar's start symbol.
-
- keywords -- a dict mapping keyword strings to arc labels.
-
- tokens -- a dict mapping token numbers to arc labels.
-
- """
-
- def __init__(self):
- self.symbol2number = {}
- self.number2symbol = {}
- self.states = []
- self.dfas = {}
- self.labels = [(0, "EMPTY")]
- self.keywords = {}
- self.tokens = {}
- self.symbol2label = {}
- self.start = 256
-
- def dump(self, filename):
- """Dump the grammar tables to a pickle file."""
- f = open(filename, "wb")
- pickle.dump(self.__dict__, f, 2)
- f.close()
-
- def load(self, filename):
- """Load the grammar tables from a pickle file."""
- f = open(filename, "rb")
- d = pickle.load(f)
- f.close()
- self.__dict__.update(d)
-
- def copy(self):
- """
- Copy the grammar.
- """
- new = self.__class__()
- for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
- "tokens", "symbol2label"):
- setattr(new, dict_attr, getattr(self, dict_attr).copy())
- new.labels = self.labels[:]
- new.states = self.states[:]
- new.start = self.start
- return new
-
- def report(self):
- """Dump the grammar tables to standard output, for debugging."""
- from pprint import pprint
- print "s2n"
- pprint(self.symbol2number)
- print "n2s"
- pprint(self.number2symbol)
- print "states"
- pprint(self.states)
- print "dfas"
- pprint(self.dfas)
- print "labels"
- pprint(self.labels)
- print "start", self.start
-
-
-# Map from operator to number (since tokenize doesn't do this)
-
-opmap_raw = """
-( LPAR
-) RPAR
-[ LSQB
-] RSQB
-: COLON
-, COMMA
-; SEMI
-+ PLUS
-- MINUS
-* STAR
-/ SLASH
-| VBAR
-& AMPER
-< LESS
-> GREATER
-= EQUAL
-. DOT
-% PERCENT
-` BACKQUOTE
-{ LBRACE
-} RBRACE
-@ AT
-== EQEQUAL
-!= NOTEQUAL
-<> NOTEQUAL
-<= LESSEQUAL
->= GREATEREQUAL
-~ TILDE
-^ CIRCUMFLEX
-<< LEFTSHIFT
->> RIGHTSHIFT
-** DOUBLESTAR
-+= PLUSEQUAL
--= MINEQUAL
-*= STAREQUAL
-/= SLASHEQUAL
-%= PERCENTEQUAL
-&= AMPEREQUAL
-|= VBAREQUAL
-^= CIRCUMFLEXEQUAL
-<<= LEFTSHIFTEQUAL
->>= RIGHTSHIFTEQUAL
-**= DOUBLESTAREQUAL
-// DOUBLESLASH
-//= DOUBLESLASHEQUAL
--> RARROW
-"""
-
-opmap = {}
-for line in opmap_raw.splitlines():
- if line:
- op, name = line.split()
- opmap[op] = getattr(token, name)
diff --git a/lib/python2.7/lib2to3/pgen2/literals.py b/lib/python2.7/lib2to3/pgen2/literals.py
deleted file mode 100644
index 0b3948a..0000000
--- a/lib/python2.7/lib2to3/pgen2/literals.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""Safely evaluate Python string literals without using eval()."""
-
-import re
-
-simple_escapes = {"a": "\a",
- "b": "\b",
- "f": "\f",
- "n": "\n",
- "r": "\r",
- "t": "\t",
- "v": "\v",
- "'": "'",
- '"': '"',
- "\\": "\\"}
-
-def escape(m):
- all, tail = m.group(0, 1)
- assert all.startswith("\\")
- esc = simple_escapes.get(tail)
- if esc is not None:
- return esc
- if tail.startswith("x"):
- hexes = tail[1:]
- if len(hexes) < 2:
- raise ValueError("invalid hex string escape ('\\%s')" % tail)
- try:
- i = int(hexes, 16)
- except ValueError:
- raise ValueError("invalid hex string escape ('\\%s')" % tail)
- else:
- try:
- i = int(tail, 8)
- except ValueError:
- raise ValueError("invalid octal string escape ('\\%s')" % tail)
- return chr(i)
-
-def evalString(s):
- assert s.startswith("'") or s.startswith('"'), repr(s[:1])
- q = s[0]
- if s[:3] == q*3:
- q = q*3
- assert s.endswith(q), repr(s[-len(q):])
- assert len(s) >= 2*len(q)
- s = s[len(q):-len(q)]
- return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
-
-def test():
- for i in range(256):
- c = chr(i)
- s = repr(c)
- e = evalString(s)
- if e != c:
- print i, c, s, e
-
-
-if __name__ == "__main__":
- test()
diff --git a/lib/python2.7/lib2to3/pgen2/parse.py b/lib/python2.7/lib2to3/pgen2/parse.py
deleted file mode 100644
index 6bebdbb..0000000
--- a/lib/python2.7/lib2to3/pgen2/parse.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""Parser engine for the grammar tables generated by pgen.
-
-The grammar table must be loaded first.
-
-See Parser/parser.c in the Python distribution for additional info on
-how this parsing engine works.
-
-"""
-
-# Local imports
-from . import token
-
-class ParseError(Exception):
- """Exception to signal the parser is stuck."""
-
- def __init__(self, msg, type, value, context):
- Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
- (msg, type, value, context))
- self.msg = msg
- self.type = type
- self.value = value
- self.context = context
-
-class Parser(object):
- """Parser engine.
-
- The proper usage sequence is:
-
- p = Parser(grammar, [converter]) # create instance
- p.setup([start]) # prepare for parsing
- <for each input token>:
- if p.addtoken(...): # parse a token; may raise ParseError
- break
- root = p.rootnode # root of abstract syntax tree
-
- A Parser instance may be reused by calling setup() repeatedly.
-
- A Parser instance contains state pertaining to the current token
- sequence, and should not be used concurrently by different threads
- to parse separate token sequences.
-
- See driver.py for how to get input tokens by tokenizing a file or
- string.
-
- Parsing is complete when addtoken() returns True; the root of the
- abstract syntax tree can then be retrieved from the rootnode
- instance variable. When a syntax error occurs, addtoken() raises
- the ParseError exception. There is no error recovery; the parser
- cannot be used after a syntax error was reported (but it can be
- reinitialized by calling setup()).
-
- """
-
- def __init__(self, grammar, convert=None):
- """Constructor.
-
- The grammar argument is a grammar.Grammar instance; see the
- grammar module for more information.
-
- The parser is not ready yet for parsing; you must call the
- setup() method to get it started.
-
- The optional convert argument is a function mapping concrete
- syntax tree nodes to abstract syntax tree nodes. If not
- given, no conversion is done and the syntax tree produced is
- the concrete syntax tree. If given, it must be a function of
- two arguments, the first being the grammar (a grammar.Grammar
- instance), and the second being the concrete syntax tree node
- to be converted. The syntax tree is converted from the bottom
- up.
-
- A concrete syntax tree node is a (type, value, context, nodes)
- tuple, where type is the node type (a token or symbol number),
- value is None for symbols and a string for tokens, context is
- None or an opaque value used for error reporting (typically a
- (lineno, offset) pair), and nodes is a list of children for
- symbols, and None for tokens.
-
- An abstract syntax tree node may be anything; this is entirely
- up to the converter function.
-
- """
- self.grammar = grammar
- self.convert = convert or (lambda grammar, node: node)
-
- def setup(self, start=None):
- """Prepare for parsing.
-
- This *must* be called before starting to parse.
-
- The optional argument is an alternative start symbol; it
- defaults to the grammar's start symbol.
-
- You can use a Parser instance to parse any number of programs;
- each time you call setup() the parser is reset to an initial
- state determined by the (implicit or explicit) start symbol.
-
- """
- if start is None:
- start = self.grammar.start
- # Each stack entry is a tuple: (dfa, state, node).
- # A node is a tuple: (type, value, context, children),
- # where children is a list of nodes or None, and context may be None.
- newnode = (start, None, None, [])
- stackentry = (self.grammar.dfas[start], 0, newnode)
- self.stack = [stackentry]
- self.rootnode = None
- self.used_names = set() # Aliased to self.rootnode.used_names in pop()
-
- def addtoken(self, type, value, context):
- """Add a token; return True iff this is the end of the program."""
- # Map from token to label
- ilabel = self.classify(type, value, context)
- # Loop until the token is shifted; may raise exceptions
- while True:
- dfa, state, node = self.stack[-1]
- states, first = dfa
- arcs = states[state]
- # Look for a state with this label
- for i, newstate in arcs:
- t, v = self.grammar.labels[i]
- if ilabel == i:
- # Look it up in the list of labels
- assert t < 256
- # Shift a token; we're done with it
- self.shift(type, value, newstate, context)
- # Pop while we are in an accept-only state
- state = newstate
- while states[state] == [(0, state)]:
- self.pop()
- if not self.stack:
- # Done parsing!
- return True
- dfa, state, node = self.stack[-1]
- states, first = dfa
- # Done with this token
- return False
- elif t >= 256:
- # See if it's a symbol and if we're in its first set
- itsdfa = self.grammar.dfas[t]
- itsstates, itsfirst = itsdfa
- if ilabel in itsfirst:
- # Push a symbol
- self.push(t, self.grammar.dfas[t], newstate, context)
- break # To continue the outer while loop
- else:
- if (0, state) in arcs:
- # An accepting state, pop it and try something else
- self.pop()
- if not self.stack:
- # Done parsing, but another token is input
- raise ParseError("too much input",
- type, value, context)
- else:
- # No success finding a transition
- raise ParseError("bad input", type, value, context)
-
- def classify(self, type, value, context):
- """Turn a token into a label. (Internal)"""
- if type == token.NAME:
- # Keep a listing of all used names
- self.used_names.add(value)
- # Check for reserved words
- ilabel = self.grammar.keywords.get(value)
- if ilabel is not None:
- return ilabel
- ilabel = self.grammar.tokens.get(type)
- if ilabel is None:
- raise ParseError("bad token", type, value, context)
- return ilabel
-
- def shift(self, type, value, newstate, context):
- """Shift a token. (Internal)"""
- dfa, state, node = self.stack[-1]
- newnode = (type, value, context, None)
- newnode = self.convert(self.grammar, newnode)
- if newnode is not None:
- node[-1].append(newnode)
- self.stack[-1] = (dfa, newstate, node)
-
- def push(self, type, newdfa, newstate, context):
- """Push a nonterminal. (Internal)"""
- dfa, state, node = self.stack[-1]
- newnode = (type, None, context, [])
- self.stack[-1] = (dfa, newstate, node)
- self.stack.append((newdfa, 0, newnode))
-
- def pop(self):
- """Pop a nonterminal. (Internal)"""
- popdfa, popstate, popnode = self.stack.pop()
- newnode = self.convert(self.grammar, popnode)
- if newnode is not None:
- if self.stack:
- dfa, state, node = self.stack[-1]
- node[-1].append(newnode)
- else:
- self.rootnode = newnode
- self.rootnode.used_names = self.used_names
diff --git a/lib/python2.7/lib2to3/pgen2/pgen.py b/lib/python2.7/lib2to3/pgen2/pgen.py
deleted file mode 100644
index 63084a4..0000000
--- a/lib/python2.7/lib2to3/pgen2/pgen.py
+++ /dev/null
@@ -1,386 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Pgen imports
-from . import grammar, token, tokenize
-
-class PgenGrammar(grammar.Grammar):
- pass
-
-class ParserGenerator(object):
-
- def __init__(self, filename, stream=None):
- close_stream = None
- if stream is None:
- stream = open(filename)
- close_stream = stream.close
- self.filename = filename
- self.stream = stream
- self.generator = tokenize.generate_tokens(stream.readline)
- self.gettoken() # Initialize lookahead
- self.dfas, self.startsymbol = self.parse()
- if close_stream is not None:
- close_stream()
- self.first = {} # map from symbol name to set of tokens
- self.addfirstsets()
-
- def make_grammar(self):
- c = PgenGrammar()
- names = self.dfas.keys()
- names.sort()
- names.remove(self.startsymbol)
- names.insert(0, self.startsymbol)
- for name in names:
- i = 256 + len(c.symbol2number)
- c.symbol2number[name] = i
- c.number2symbol[i] = name
- for name in names:
- dfa = self.dfas[name]
- states = []
- for state in dfa:
- arcs = []
- for label, next in state.arcs.iteritems():
- arcs.append((self.make_label(c, label), dfa.index(next)))
- if state.isfinal:
- arcs.append((0, dfa.index(state)))
- states.append(arcs)
- c.states.append(states)
- c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
- c.start = c.symbol2number[self.startsymbol]
- return c
-
- def make_first(self, c, name):
- rawfirst = self.first[name]
- first = {}
- for label in rawfirst:
- ilabel = self.make_label(c, label)
- ##assert ilabel not in first # XXX failed on <> ... !=
- first[ilabel] = 1
- return first
-
- def make_label(self, c, label):
- # XXX Maybe this should be a method on a subclass of converter?
- ilabel = len(c.labels)
- if label[0].isalpha():
- # Either a symbol name or a named token
- if label in c.symbol2number:
- # A symbol name (a non-terminal)
- if label in c.symbol2label:
- return c.symbol2label[label]
- else:
- c.labels.append((c.symbol2number[label], None))
- c.symbol2label[label] = ilabel
- return ilabel
- else:
- # A named token (NAME, NUMBER, STRING)
- itoken = getattr(token, label, None)
- assert isinstance(itoken, int), label
- assert itoken in token.tok_name, label
- if itoken in c.tokens:
- return c.tokens[itoken]
- else:
- c.labels.append((itoken, None))
- c.tokens[itoken] = ilabel
- return ilabel
- else:
- # Either a keyword or an operator
- assert label[0] in ('"', "'"), label
- value = eval(label)
- if value[0].isalpha():
- # A keyword
- if value in c.keywords:
- return c.keywords[value]
- else:
- c.labels.append((token.NAME, value))
- c.keywords[value] = ilabel
- return ilabel
- else:
- # An operator (any non-numeric token)
- itoken = grammar.opmap[value] # Fails if unknown token
- if itoken in c.tokens:
- return c.tokens[itoken]
- else:
- c.labels.append((itoken, None))
- c.tokens[itoken] = ilabel
- return ilabel
-
- def addfirstsets(self):
- names = self.dfas.keys()
- names.sort()
- for name in names:
- if name not in self.first:
- self.calcfirst(name)
- #print name, self.first[name].keys()
-
- def calcfirst(self, name):
- dfa = self.dfas[name]
- self.first[name] = None # dummy to detect left recursion
- state = dfa[0]
- totalset = {}
- overlapcheck = {}
- for label, next in state.arcs.iteritems():
- if label in self.dfas:
- if label in self.first:
- fset = self.first[label]
- if fset is None:
- raise ValueError("recursion for rule %r" % name)
- else:
- self.calcfirst(label)
- fset = self.first[label]
- totalset.update(fset)
- overlapcheck[label] = fset
- else:
- totalset[label] = 1
- overlapcheck[label] = {label: 1}
- inverse = {}
- for label, itsfirst in overlapcheck.iteritems():
- for symbol in itsfirst:
- if symbol in inverse:
- raise ValueError("rule %s is ambiguous; %s is in the"
- " first sets of %s as well as %s" %
- (name, symbol, label, inverse[symbol]))
- inverse[symbol] = label
- self.first[name] = totalset
-
- def parse(self):
- dfas = {}
- startsymbol = None
- # MSTART: (NEWLINE | RULE)* ENDMARKER
- while self.type != token.ENDMARKER:
- while self.type == token.NEWLINE:
- self.gettoken()
- # RULE: NAME ':' RHS NEWLINE
- name = self.expect(token.NAME)
- self.expect(token.OP, ":")
- a, z = self.parse_rhs()
- self.expect(token.NEWLINE)
- #self.dump_nfa(name, a, z)
- dfa = self.make_dfa(a, z)
- #self.dump_dfa(name, dfa)
- oldlen = len(dfa)
- self.simplify_dfa(dfa)
- newlen = len(dfa)
- dfas[name] = dfa
- #print name, oldlen, newlen
- if startsymbol is None:
- startsymbol = name
- return dfas, startsymbol
-
- def make_dfa(self, start, finish):
- # To turn an NFA into a DFA, we define the states of the DFA
- # to correspond to *sets* of states of the NFA. Then do some
- # state reduction. Let's represent sets as dicts with 1 for
- # values.
- assert isinstance(start, NFAState)
- assert isinstance(finish, NFAState)
- def closure(state):
- base = {}
- addclosure(state, base)
- return base
- def addclosure(state, base):
- assert isinstance(state, NFAState)
- if state in base:
- return
- base[state] = 1
- for label, next in state.arcs:
- if label is None:
- addclosure(next, base)
- states = [DFAState(closure(start), finish)]
- for state in states: # NB states grows while we're iterating
- arcs = {}
- for nfastate in state.nfaset:
- for label, next in nfastate.arcs:
- if label is not None:
- addclosure(next, arcs.setdefault(label, {}))
- for label, nfaset in arcs.iteritems():
- for st in states:
- if st.nfaset == nfaset:
- break
- else:
- st = DFAState(nfaset, finish)
- states.append(st)
- state.addarc(st, label)
- return states # List of DFAState instances; first one is start
-
- def dump_nfa(self, name, start, finish):
- print "Dump of NFA for", name
- todo = [start]
- for i, state in enumerate(todo):
- print " State", i, state is finish and "(final)" or ""
- for label, next in state.arcs:
- if next in todo:
- j = todo.index(next)
- else:
- j = len(todo)
- todo.append(next)
- if label is None:
- print " -> %d" % j
- else:
- print " %s -> %d" % (label, j)
-
- def dump_dfa(self, name, dfa):
- print "Dump of DFA for", name
- for i, state in enumerate(dfa):
- print " State", i, state.isfinal and "(final)" or ""
- for label, next in state.arcs.iteritems():
- print " %s -> %d" % (label, dfa.index(next))
-
- def simplify_dfa(self, dfa):
- # This is not theoretically optimal, but works well enough.
- # Algorithm: repeatedly look for two states that have the same
- # set of arcs (same labels pointing to the same nodes) and
- # unify them, until things stop changing.
-
- # dfa is a list of DFAState instances
- changes = True
- while changes:
- changes = False
- for i, state_i in enumerate(dfa):
- for j in range(i+1, len(dfa)):
- state_j = dfa[j]
- if state_i == state_j:
- #print " unify", i, j
- del dfa[j]
- for state in dfa:
- state.unifystate(state_j, state_i)
- changes = True
- break
-
- def parse_rhs(self):
- # RHS: ALT ('|' ALT)*
- a, z = self.parse_alt()
- if self.value != "|":
- return a, z
- else:
- aa = NFAState()
- zz = NFAState()
- aa.addarc(a)
- z.addarc(zz)
- while self.value == "|":
- self.gettoken()
- a, z = self.parse_alt()
- aa.addarc(a)
- z.addarc(zz)
- return aa, zz
-
- def parse_alt(self):
- # ALT: ITEM+
- a, b = self.parse_item()
- while (self.value in ("(", "[") or
- self.type in (token.NAME, token.STRING)):
- c, d = self.parse_item()
- b.addarc(c)
- b = d
- return a, b
-
- def parse_item(self):
- # ITEM: '[' RHS ']' | ATOM ['+' | '*']
- if self.value == "[":
- self.gettoken()
- a, z = self.parse_rhs()
- self.expect(token.OP, "]")
- a.addarc(z)
- return a, z
- else:
- a, z = self.parse_atom()
- value = self.value
- if value not in ("+", "*"):
- return a, z
- self.gettoken()
- z.addarc(a)
- if value == "+":
- return a, z
- else:
- return a, a
-
- def parse_atom(self):
- # ATOM: '(' RHS ')' | NAME | STRING
- if self.value == "(":
- self.gettoken()
- a, z = self.parse_rhs()
- self.expect(token.OP, ")")
- return a, z
- elif self.type in (token.NAME, token.STRING):
- a = NFAState()
- z = NFAState()
- a.addarc(z, self.value)
- self.gettoken()
- return a, z
- else:
- self.raise_error("expected (...) or NAME or STRING, got %s/%s",
- self.type, self.value)
-
- def expect(self, type, value=None):
- if self.type != type or (value is not None and self.value != value):
- self.raise_error("expected %s/%s, got %s/%s",
- type, value, self.type, self.value)
- value = self.value
- self.gettoken()
- return value
-
- def gettoken(self):
- tup = self.generator.next()
- while tup[0] in (tokenize.COMMENT, tokenize.NL):
- tup = self.generator.next()
- self.type, self.value, self.begin, self.end, self.line = tup
- #print token.tok_name[self.type], repr(self.value)
-
- def raise_error(self, msg, *args):
- if args:
- try:
- msg = msg % args
- except:
- msg = " ".join([msg] + map(str, args))
- raise SyntaxError(msg, (self.filename, self.end[0],
- self.end[1], self.line))
-
-class NFAState(object):
-
- def __init__(self):
- self.arcs = [] # list of (label, NFAState) pairs
-
- def addarc(self, next, label=None):
- assert label is None or isinstance(label, str)
- assert isinstance(next, NFAState)
- self.arcs.append((label, next))
-
-class DFAState(object):
-
- def __init__(self, nfaset, final):
- assert isinstance(nfaset, dict)
- assert isinstance(iter(nfaset).next(), NFAState)
- assert isinstance(final, NFAState)
- self.nfaset = nfaset
- self.isfinal = final in nfaset
- self.arcs = {} # map from label to DFAState
-
- def addarc(self, next, label):
- assert isinstance(label, str)
- assert label not in self.arcs
- assert isinstance(next, DFAState)
- self.arcs[label] = next
-
- def unifystate(self, old, new):
- for label, next in self.arcs.iteritems():
- if next is old:
- self.arcs[label] = new
-
- def __eq__(self, other):
- # Equality test -- ignore the nfaset instance variable
- assert isinstance(other, DFAState)
- if self.isfinal != other.isfinal:
- return False
- # Can't just return self.arcs == other.arcs, because that
- # would invoke this method recursively, with cycles...
- if len(self.arcs) != len(other.arcs):
- return False
- for label, next in self.arcs.iteritems():
- if next is not other.arcs.get(label):
- return False
- return True
-
- __hash__ = None # For Py3 compatibility.
-
-def generate_grammar(filename="Grammar.txt"):
- p = ParserGenerator(filename)
- return p.make_grammar()
diff --git a/lib/python2.7/lib2to3/pgen2/token.py b/lib/python2.7/lib2to3/pgen2/token.py
deleted file mode 100755
index 61468b3..0000000
--- a/lib/python2.7/lib2to3/pgen2/token.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#! /usr/bin/env python
-
-"""Token constants (from "token.h")."""
-
-# Taken from Python (r53757) and modified to include some tokens
-# originally monkeypatched in by pgen2.tokenize
-
-#--start constants--
-ENDMARKER = 0
-NAME = 1
-NUMBER = 2
-STRING = 3
-NEWLINE = 4
-INDENT = 5
-DEDENT = 6
-LPAR = 7
-RPAR = 8
-LSQB = 9
-RSQB = 10
-COLON = 11
-COMMA = 12
-SEMI = 13
-PLUS = 14
-MINUS = 15
-STAR = 16
-SLASH = 17
-VBAR = 18
-AMPER = 19
-LESS = 20
-GREATER = 21
-EQUAL = 22
-DOT = 23
-PERCENT = 24
-BACKQUOTE = 25
-LBRACE = 26
-RBRACE = 27
-EQEQUAL = 28
-NOTEQUAL = 29
-LESSEQUAL = 30
-GREATEREQUAL = 31
-TILDE = 32
-CIRCUMFLEX = 33
-LEFTSHIFT = 34
-RIGHTSHIFT = 35
-DOUBLESTAR = 36
-PLUSEQUAL = 37
-MINEQUAL = 38
-STAREQUAL = 39
-SLASHEQUAL = 40
-PERCENTEQUAL = 41
-AMPEREQUAL = 42
-VBAREQUAL = 43
-CIRCUMFLEXEQUAL = 44
-LEFTSHIFTEQUAL = 45
-RIGHTSHIFTEQUAL = 46
-DOUBLESTAREQUAL = 47
-DOUBLESLASH = 48
-DOUBLESLASHEQUAL = 49
-AT = 50
-OP = 51
-COMMENT = 52
-NL = 53
-RARROW = 54
-ERRORTOKEN = 55
-N_TOKENS = 56
-NT_OFFSET = 256
-#--end constants--
-
-tok_name = {}
-for _name, _value in globals().items():
- if type(_value) is type(0):
- tok_name[_value] = _name
-
-
-def ISTERMINAL(x):
- return x < NT_OFFSET
-
-def ISNONTERMINAL(x):
- return x >= NT_OFFSET
-
-def ISEOF(x):
- return x == ENDMARKER
diff --git a/lib/python2.7/lib2to3/pgen2/tokenize.py b/lib/python2.7/lib2to3/pgen2/tokenize.py
deleted file mode 100644
index e090aa9..0000000
--- a/lib/python2.7/lib2to3/pgen2/tokenize.py
+++ /dev/null
@@ -1,500 +0,0 @@
-# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
-# All rights reserved.
-
-"""Tokenization help for Python programs.
-
-generate_tokens(readline) is a generator that breaks a stream of
-text into Python tokens. It accepts a readline-like method which is called
-repeatedly to get the next line of input (or "" for EOF). It generates
-5-tuples with these members:
-
- the token type (see token.py)
- the token (a string)
- the starting (row, column) indices of the token (a 2-tuple of ints)
- the ending (row, column) indices of the token (a 2-tuple of ints)
- the original line (string)
-
-It is designed to match the working of the Python tokenizer exactly, except
-that it produces COMMENT tokens for comments and gives type OP for all
-operators
-
-Older entry points
- tokenize_loop(readline, tokeneater)
- tokenize(readline, tokeneater=printtoken)
-are the same, except instead of generating tokens, tokeneater is a callback
-function to which the 5 fields described above are passed as 5 arguments,
-each time a new token is found."""
-
-__author__ = 'Ka-Ping Yee <ping@lfw.org>'
-__credits__ = \
- 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
-
-import string, re
-from codecs import BOM_UTF8, lookup
-from lib2to3.pgen2.token import *
-
-from . import token
-__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
- "generate_tokens", "untokenize"]
-del token
-
-try:
- bytes
-except NameError:
- # Support bytes type in Python <= 2.5, so 2to3 turns itself into
- # valid Python 3 code.
- bytes = str
-
-def group(*choices): return '(' + '|'.join(choices) + ')'
-def any(*choices): return group(*choices) + '*'
-def maybe(*choices): return group(*choices) + '?'
-
-Whitespace = r'[ \f\t]*'
-Comment = r'#[^\r\n]*'
-Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
-Name = r'[a-zA-Z_]\w*'
-
-Binnumber = r'0[bB][01]*'
-Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
-Octnumber = r'0[oO]?[0-7]*[lL]?'
-Decnumber = r'[1-9]\d*[lL]?'
-Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
-Exponent = r'[eE][-+]?\d+'
-Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
-Expfloat = r'\d+' + Exponent
-Floatnumber = group(Pointfloat, Expfloat)
-Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
-Number = group(Imagnumber, Floatnumber, Intnumber)
-
-# Tail end of ' string.
-Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
-# Tail end of " string.
-Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
-# Tail end of ''' string.
-Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
-# Tail end of """ string.
-Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
-Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
-# Single-line ' or " string.
-String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
- r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
-
-# Because of leftmost-then-longest match semantics, be sure to put the
-# longest operators first (e.g., if = came before ==, == would get
-# recognized as two instances of =).
-Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
- r"//=?", r"->",
- r"[+\-*/%&|^=<>]=?",
- r"~")
-
-Bracket = '[][(){}]'
-Special = group(r'\r?\n', r'[:;.,`@]')
-Funny = group(Operator, Bracket, Special)
-
-PlainToken = group(Number, Funny, String, Name)
-Token = Ignore + PlainToken
-
-# First (or only) line of ' or " string.
-ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
- group("'", r'\\\r?\n'),
- r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
- group('"', r'\\\r?\n'))
-PseudoExtras = group(r'\\\r?\n', Comment, Triple)
-PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
-
-tokenprog, pseudoprog, single3prog, double3prog = map(
- re.compile, (Token, PseudoToken, Single3, Double3))
-endprogs = {"'": re.compile(Single), '"': re.compile(Double),
- "'''": single3prog, '"""': double3prog,
- "r'''": single3prog, 'r"""': double3prog,
- "u'''": single3prog, 'u"""': double3prog,
- "b'''": single3prog, 'b"""': double3prog,
- "ur'''": single3prog, 'ur"""': double3prog,
- "br'''": single3prog, 'br"""': double3prog,
- "R'''": single3prog, 'R"""': double3prog,
- "U'''": single3prog, 'U"""': double3prog,
- "B'''": single3prog, 'B"""': double3prog,
- "uR'''": single3prog, 'uR"""': double3prog,
- "Ur'''": single3prog, 'Ur"""': double3prog,
- "UR'''": single3prog, 'UR"""': double3prog,
- "bR'''": single3prog, 'bR"""': double3prog,
- "Br'''": single3prog, 'Br"""': double3prog,
- "BR'''": single3prog, 'BR"""': double3prog,
- 'r': None, 'R': None,
- 'u': None, 'U': None,
- 'b': None, 'B': None}
-
-triple_quoted = {}
-for t in ("'''", '"""',
- "r'''", 'r"""', "R'''", 'R"""',
- "u'''", 'u"""', "U'''", 'U"""',
- "b'''", 'b"""', "B'''", 'B"""',
- "ur'''", 'ur"""', "Ur'''", 'Ur"""',
- "uR'''", 'uR"""', "UR'''", 'UR"""',
- "br'''", 'br"""', "Br'''", 'Br"""',
- "bR'''", 'bR"""', "BR'''", 'BR"""',):
- triple_quoted[t] = t
-single_quoted = {}
-for t in ("'", '"',
- "r'", 'r"', "R'", 'R"',
- "u'", 'u"', "U'", 'U"',
- "b'", 'b"', "B'", 'B"',
- "ur'", 'ur"', "Ur'", 'Ur"',
- "uR'", 'uR"', "UR'", 'UR"',
- "br'", 'br"', "Br'", 'Br"',
- "bR'", 'bR"', "BR'", 'BR"', ):
- single_quoted[t] = t
-
-tabsize = 8
-
-class TokenError(Exception): pass
-
-class StopTokenizing(Exception): pass
-
-def printtoken(type, token, start, end, line): # for testing
- (srow, scol) = start
- (erow, ecol) = end
- print "%d,%d-%d,%d:\t%s\t%s" % \
- (srow, scol, erow, ecol, tok_name[type], repr(token))
-
-def tokenize(readline, tokeneater=printtoken):
- """
- The tokenize() function accepts two parameters: one representing the
- input stream, and one providing an output mechanism for tokenize().
-
- The first parameter, readline, must be a callable object which provides
- the same interface as the readline() method of built-in file objects.
- Each call to the function should return one line of input as a string.
-
- The second parameter, tokeneater, must also be a callable object. It is
- called once for each token, with five arguments, corresponding to the
- tuples generated by generate_tokens().
- """
- try:
- tokenize_loop(readline, tokeneater)
- except StopTokenizing:
- pass
-
-# backwards compatible interface
-def tokenize_loop(readline, tokeneater):
- for token_info in generate_tokens(readline):
- tokeneater(*token_info)
-
-class Untokenizer:
-
- def __init__(self):
- self.tokens = []
- self.prev_row = 1
- self.prev_col = 0
-
- def add_whitespace(self, start):
- row, col = start
- assert row <= self.prev_row
- col_offset = col - self.prev_col
- if col_offset:
- self.tokens.append(" " * col_offset)
-
- def untokenize(self, iterable):
- for t in iterable:
- if len(t) == 2:
- self.compat(t, iterable)
- break
- tok_type, token, start, end, line = t
- self.add_whitespace(start)
- self.tokens.append(token)
- self.prev_row, self.prev_col = end
- if tok_type in (NEWLINE, NL):
- self.prev_row += 1
- self.prev_col = 0
- return "".join(self.tokens)
-
- def compat(self, token, iterable):
- startline = False
- indents = []
- toks_append = self.tokens.append
- toknum, tokval = token
- if toknum in (NAME, NUMBER):
- tokval += ' '
- if toknum in (NEWLINE, NL):
- startline = True
- for tok in iterable:
- toknum, tokval = tok[:2]
-
- if toknum in (NAME, NUMBER):
- tokval += ' '
-
- if toknum == INDENT:
- indents.append(tokval)
- continue
- elif toknum == DEDENT:
- indents.pop()
- continue
- elif toknum in (NEWLINE, NL):
- startline = True
- elif startline and indents:
- toks_append(indents[-1])
- startline = False
- toks_append(tokval)
-
-cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
-
-def _get_normal_name(orig_enc):
- """Imitates get_normal_name in tokenizer.c."""
- # Only care about the first 12 characters.
- enc = orig_enc[:12].lower().replace("_", "-")
- if enc == "utf-8" or enc.startswith("utf-8-"):
- return "utf-8"
- if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
- enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
- return "iso-8859-1"
- return orig_enc
-
-def detect_encoding(readline):
- """
- The detect_encoding() function is used to detect the encoding that should
- be used to decode a Python source file. It requires one argment, readline,
- in the same way as the tokenize() generator.
-
- It will call readline a maximum of twice, and return the encoding used
- (as a string) and a list of any lines (left as bytes) it has read
- in.
-
- It detects the encoding from the presence of a utf-8 bom or an encoding
- cookie as specified in pep-0263. If both a bom and a cookie are present, but
- disagree, a SyntaxError will be raised. If the encoding cookie is an invalid
- charset, raise a SyntaxError. Note that if a utf-8 bom is found,
- 'utf-8-sig' is returned.
-
- If no encoding is specified, then the default of 'utf-8' will be returned.
- """
- bom_found = False
- encoding = None
- default = 'utf-8'
- def read_or_stop():
- try:
- return readline()
- except StopIteration:
- return bytes()
-
- def find_cookie(line):
- try:
- line_string = line.decode('ascii')
- except UnicodeDecodeError:
- return None
-
- matches = cookie_re.findall(line_string)
- if not matches:
- return None
- encoding = _get_normal_name(matches[0])
- try:
- codec = lookup(encoding)
- except LookupError:
- # This behaviour mimics the Python interpreter
- raise SyntaxError("unknown encoding: " + encoding)
-
- if bom_found:
- if codec.name != 'utf-8':
- # This behaviour mimics the Python interpreter
- raise SyntaxError('encoding problem: utf-8')
- encoding += '-sig'
- return encoding
-
- first = read_or_stop()
- if first.startswith(BOM_UTF8):
- bom_found = True
- first = first[3:]
- default = 'utf-8-sig'
- if not first:
- return default, []
-
- encoding = find_cookie(first)
- if encoding:
- return encoding, [first]
-
- second = read_or_stop()
- if not second:
- return default, [first]
-
- encoding = find_cookie(second)
- if encoding:
- return encoding, [first, second]
-
- return default, [first, second]
-
-def untokenize(iterable):
- """Transform tokens back into Python source code.
-
- Each element returned by the iterable must be a token sequence
- with at least two elements, a token number and token value. If
- only two tokens are passed, the resulting output is poor.
-
- Round-trip invariant for full input:
- Untokenized source will match input source exactly
-
- Round-trip invariant for limited intput:
- # Output text will tokenize the back to the input
- t1 = [tok[:2] for tok in generate_tokens(f.readline)]
- newcode = untokenize(t1)
- readline = iter(newcode.splitlines(1)).next
- t2 = [tok[:2] for tokin generate_tokens(readline)]
- assert t1 == t2
- """
- ut = Untokenizer()
- return ut.untokenize(iterable)
-
-def generate_tokens(readline):
- """
- The generate_tokens() generator requires one argment, readline, which
- must be a callable object which provides the same interface as the
- readline() method of built-in file objects. Each call to the function
- should return one line of input as a string. Alternately, readline
- can be a callable function terminating with StopIteration:
- readline = open(myfile).next # Example of alternate readline
-
- The generator produces 5-tuples with these members: the token type; the
- token string; a 2-tuple (srow, scol) of ints specifying the row and
- column where the token begins in the source; a 2-tuple (erow, ecol) of
- ints specifying the row and column where the token ends in the source;
- and the line on which the token was found. The line passed is the
- logical line; continuation lines are included.
- """
- lnum = parenlev = continued = 0
- namechars, numchars = string.ascii_letters + '_', '0123456789'
- contstr, needcont = '', 0
- contline = None
- indents = [0]
-
- while 1: # loop over lines in stream
- try:
- line = readline()
- except StopIteration:
- line = ''
- lnum = lnum + 1
- pos, max = 0, len(line)
-
- if contstr: # continued string
- if not line:
- raise TokenError, ("EOF in multi-line string", strstart)
- endmatch = endprog.match(line)
- if endmatch:
- pos = end = endmatch.end(0)
- yield (STRING, contstr + line[:end],
- strstart, (lnum, end), contline + line)
- contstr, needcont = '', 0
- contline = None
- elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
- yield (ERRORTOKEN, contstr + line,
- strstart, (lnum, len(line)), contline)
- contstr = ''
- contline = None
- continue
- else:
- contstr = contstr + line
- contline = contline + line
- continue
-
- elif parenlev == 0 and not continued: # new statement
- if not line: break
- column = 0
- while pos < max: # measure leading whitespace
- if line[pos] == ' ': column = column + 1
- elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize
- elif line[pos] == '\f': column = 0
- else: break
- pos = pos + 1
- if pos == max: break
-
- if line[pos] in '#\r\n': # skip comments or blank lines
- if line[pos] == '#':
- comment_token = line[pos:].rstrip('\r\n')
- nl_pos = pos + len(comment_token)
- yield (COMMENT, comment_token,
- (lnum, pos), (lnum, pos + len(comment_token)), line)
- yield (NL, line[nl_pos:],
- (lnum, nl_pos), (lnum, len(line)), line)
- else:
- yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
- (lnum, pos), (lnum, len(line)), line)
- continue
-
- if column > indents[-1]: # count indents or dedents
- indents.append(column)
- yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
- while column < indents[-1]:
- if column not in indents:
- raise IndentationError(
- "unindent does not match any outer indentation level",
- ("<tokenize>", lnum, pos, line))
- indents = indents[:-1]
- yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
-
- else: # continued statement
- if not line:
- raise TokenError, ("EOF in multi-line statement", (lnum, 0))
- continued = 0
-
- while pos < max:
- pseudomatch = pseudoprog.match(line, pos)
- if pseudomatch: # scan for tokens
- start, end = pseudomatch.span(1)
- spos, epos, pos = (lnum, start), (lnum, end), end
- token, initial = line[start:end], line[start]
-
- if initial in numchars or \
- (initial == '.' and token != '.'): # ordinary number
- yield (NUMBER, token, spos, epos, line)
- elif initial in '\r\n':
- newline = NEWLINE
- if parenlev > 0:
- newline = NL
- yield (newline, token, spos, epos, line)
- elif initial == '#':
- assert not token.endswith("\n")
- yield (COMMENT, token, spos, epos, line)
- elif token in triple_quoted:
- endprog = endprogs[token]
- endmatch = endprog.match(line, pos)
- if endmatch: # all on one line
- pos = endmatch.end(0)
- token = line[start:pos]
- yield (STRING, token, spos, (lnum, pos), line)
- else:
- strstart = (lnum, start) # multiple lines
- contstr = line[start:]
- contline = line
- break
- elif initial in single_quoted or \
- token[:2] in single_quoted or \
- token[:3] in single_quoted:
- if token[-1] == '\n': # continued string
- strstart = (lnum, start)
- endprog = (endprogs[initial] or endprogs[token[1]] or
- endprogs[token[2]])
- contstr, needcont = line[start:], 1
- contline = line
- break
- else: # ordinary string
- yield (STRING, token, spos, epos, line)
- elif initial in namechars: # ordinary name
- yield (NAME, token, spos, epos, line)
- elif initial == '\\': # continued stmt
- # This yield is new; needed for better idempotency:
- yield (NL, token, spos, (lnum, pos), line)
- continued = 1
- else:
- if initial in '([{': parenlev = parenlev + 1
- elif initial in ')]}': parenlev = parenlev - 1
- yield (OP, token, spos, epos, line)
- else:
- yield (ERRORTOKEN, line[pos],
- (lnum, pos), (lnum, pos+1), line)
- pos = pos + 1
-
- for indent in indents[1:]: # pop remaining indent levels
- yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
- yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
-
-if __name__ == '__main__': # testing
- import sys
- if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
- else: tokenize(sys.stdin.readline)