summaryrefslogtreecommitdiff
path: root/python/helpers/coverage/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/helpers/coverage/parser.py')
-rw-r--r--python/helpers/coverage/parser.py442
1 files changed, 171 insertions, 271 deletions
diff --git a/python/helpers/coverage/parser.py b/python/helpers/coverage/parser.py
index cbbb5a6a2ec1..7a145a2a5346 100644
--- a/python/helpers/coverage/parser.py
+++ b/python/helpers/coverage/parser.py
@@ -1,9 +1,11 @@
"""Code parsing for Coverage."""
-import glob, opcode, os, re, sys, token, tokenize
+import dis, re, sys, token, tokenize
from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
-from coverage.backward import open_source
+from coverage.backward import open_source, range # pylint: disable=W0622
+from coverage.backward import reversed # pylint: disable=W0622
+from coverage.backward import bytes_to_ints
from coverage.bytecode import ByteCodes, CodeObjects
from coverage.misc import nice_pair, expensive, join_regex
from coverage.misc import CoverageException, NoSource, NotPython
@@ -32,9 +34,13 @@ class CodeParser(object):
except IOError:
_, err, _ = sys.exc_info()
raise NoSource(
- "No source for code: %r: %s" % (self.filename, err)
+ "No source for code: '%s': %s" % (self.filename, err)
)
+ # Scrap the BOM if it exists.
+ if self.text and ord(self.text[0]) == 0xfeff:
+ self.text = self.text[1:]
+
self.exclude = exclude
self.show_tokens = False
@@ -102,9 +108,9 @@ class CodeParser(object):
first_line = None
empty = True
- tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
+ tokgen = generate_tokens(self.text)
for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
- if self.show_tokens: # pragma: no cover
+ if self.show_tokens: # pragma: not covered
print("%10s %5s %-20r %r" % (
tokenize.tok_name.get(toktype, toktype),
nice_pair((slineno, elineno)), ttext, ltext
@@ -130,8 +136,7 @@ class CodeParser(object):
# (a trick from trace.py in the stdlib.) This works for
# 99.9999% of cases. For the rest (!) see:
# http://stackoverflow.com/questions/1769332/x/1769794#1769794
- for i in range(slineno, elineno+1):
- self.docstrings.add(i)
+ self.docstrings.update(range(slineno, elineno+1))
elif toktype == token.NEWLINE:
if first_line is not None and elineno != first_line:
# We're at the end of a line, and we've ended on a
@@ -170,16 +175,18 @@ class CodeParser(object):
first_line = line
return first_line
- def first_lines(self, lines, ignore=None):
+ def first_lines(self, lines, *ignores):
"""Map the line numbers in `lines` to the correct first line of the
statement.
- Skip any line mentioned in `ignore`.
+ Skip any line mentioned in any of the sequences in `ignores`.
- Returns a sorted list of the first lines.
+ Returns a set of the first lines.
"""
- ignore = ignore or []
+ ignore = set()
+ for ign in ignores:
+ ignore.update(ign)
lset = set()
for l in lines:
if l in ignore:
@@ -187,23 +194,34 @@ class CodeParser(object):
new_l = self.first_line(l)
if new_l not in ignore:
lset.add(new_l)
- return sorted(lset)
+ return lset
def parse_source(self):
"""Parse source text to find executable lines, excluded lines, etc.
- Return values are 1) a sorted list of executable line numbers, and
- 2) a sorted list of excluded line numbers.
+ Return values are 1) a set of executable line numbers, and 2) a set of
+ excluded line numbers.
Reported line numbers are normalized to the first line of multi-line
statements.
"""
- self._raw_parse()
+ try:
+ self._raw_parse()
+ except (tokenize.TokenError, IndentationError):
+ _, tokerr, _ = sys.exc_info()
+ msg, lineno = tokerr.args
+ raise NotPython(
+ "Couldn't parse '%s' as Python source: '%s' at %s" %
+ (self.filename, msg, lineno)
+ )
excluded_lines = self.first_lines(self.excluded)
- ignore = excluded_lines + list(self.docstrings)
- lines = self.first_lines(self.statement_starts, ignore)
+ lines = self.first_lines(
+ self.statement_starts,
+ excluded_lines,
+ self.docstrings
+ )
return lines, excluded_lines
@@ -258,8 +276,8 @@ class CodeParser(object):
## Opcodes that guide the ByteParser.
def _opcode(name):
- """Return the opcode by name from the opcode module."""
- return opcode.opmap[name]
+ """Return the opcode by name from the dis module."""
+ return dis.opmap[name]
def _opcode_set(*names):
"""Return a set of opcodes by the names in `names`."""
@@ -297,7 +315,7 @@ OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
# Opcodes that have a jump destination, but aren't really a jump.
-OPS_NO_JUMP = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
+OPS_NO_JUMP = OPS_PUSH_BLOCK
# Individual opcodes we need below.
OP_BREAK_LOOP = _opcode('BREAK_LOOP')
@@ -314,6 +332,7 @@ class ByteParser(object):
def __init__(self, code=None, text=None, filename=None):
if code:
self.code = code
+ self.text = text
else:
if not text:
assert filename, "If no code or text, need a filename"
@@ -322,6 +341,7 @@ class ByteParser(object):
text = sourcef.read()
finally:
sourcef.close()
+ self.text = text
try:
# Python 2.3 and 2.4 don't like partial last lines, so be sure
@@ -350,69 +370,54 @@ class ByteParser(object):
The iteration includes `self` as its first value.
"""
- return map(lambda c: ByteParser(code=c), CodeObjects(self.code))
-
- # Getting numbers from the lnotab value changed in Py3.0.
- if sys.version_info >= (3, 0):
- def _lnotab_increments(self, lnotab):
- """Return a list of ints from the lnotab bytes in 3.x"""
- return list(lnotab)
- else:
- def _lnotab_increments(self, lnotab):
- """Return a list of ints from the lnotab string in 2.x"""
- return [ord(c) for c in lnotab]
+ children = CodeObjects(self.code)
+ return [ByteParser(code=c, text=self.text) for c in children]
def _bytes_lines(self):
"""Map byte offsets to line numbers in `code`.
Uses co_lnotab described in Python/compile.c to map byte offsets to
- line numbers. Returns a list: [(b0, l0), (b1, l1), ...]
+ line numbers. Produces a sequence: (b0, l0), (b1, l1), ...
+
+ Only byte offsets that correspond to line numbers are included in the
+ results.
"""
# Adapted from dis.py in the standard library.
- byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
- line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
+ byte_increments = bytes_to_ints(self.code.co_lnotab[0::2])
+ line_increments = bytes_to_ints(self.code.co_lnotab[1::2])
- bytes_lines = []
last_line_num = None
line_num = self.code.co_firstlineno
byte_num = 0
for byte_incr, line_incr in zip(byte_increments, line_increments):
if byte_incr:
if line_num != last_line_num:
- bytes_lines.append((byte_num, line_num))
+ yield (byte_num, line_num)
last_line_num = line_num
byte_num += byte_incr
line_num += line_incr
if line_num != last_line_num:
- bytes_lines.append((byte_num, line_num))
- return bytes_lines
+ yield (byte_num, line_num)
def _find_statements(self):
"""Find the statements in `self.code`.
- Return a set of line numbers that start statements. Recurses into all
- code objects reachable from `self.code`.
+ Produce a sequence of line numbers that start statements. Recurses
+ into all code objects reachable from `self.code`.
"""
- stmts = set()
for bp in self.child_parsers():
# Get all of the lineno information from this code.
for _, l in bp._bytes_lines():
- stmts.add(l)
- return stmts
-
- def _disassemble(self): # pragma: no cover
- """Disassemble code, for ad-hoc experimenting."""
-
- import dis
-
- for bp in self.child_parsers():
- print("\n%s: " % bp.code)
- dis.dis(bp.code)
- print("Bytes lines: %r" % bp._bytes_lines())
+ yield l
- print("")
+ def _block_stack_repr(self, block_stack):
+ """Get a string version of `block_stack`, for debugging."""
+ blocks = ", ".join(
+ ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]
+ )
+ return "[" + blocks + "]"
def _split_into_chunks(self):
"""Split the code object into a list of `Chunk` objects.
@@ -423,10 +428,11 @@ class ByteParser(object):
Returns a list of `Chunk` objects.
"""
-
# The list of chunks so far, and the one we're working on.
chunks = []
chunk = None
+
+ # A dict mapping byte offsets of line starts to the line numbers.
bytes_lines_map = dict(self._bytes_lines())
# The block stack: loops and try blocks get pushed here for the
@@ -441,24 +447,38 @@ class ByteParser(object):
# We have to handle the last two bytecodes specially.
ult = penult = None
- for bc in ByteCodes(self.code.co_code):
+ # Get a set of all of the jump-to points.
+ jump_to = set()
+ bytecodes = list(ByteCodes(self.code.co_code))
+ for bc in bytecodes:
+ if bc.jump_to >= 0:
+ jump_to.add(bc.jump_to)
+
+ chunk_lineno = 0
+
+ # Walk the byte codes building chunks.
+ for bc in bytecodes:
# Maybe have to start a new chunk
+ start_new_chunk = False
+ first_chunk = False
if bc.offset in bytes_lines_map:
# Start a new chunk for each source line number.
- if chunk:
- chunk.exits.add(bc.offset)
- chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
- chunks.append(chunk)
+ start_new_chunk = True
+ chunk_lineno = bytes_lines_map[bc.offset]
+ first_chunk = True
+ elif bc.offset in jump_to:
+ # To make chunks have a single entrance, we have to make a new
+ # chunk when we get to a place some bytecode jumps to.
+ start_new_chunk = True
elif bc.op in OPS_CHUNK_BEGIN:
# Jumps deserve their own unnumbered chunk. This fixes
# problems with jumps to jumps getting confused.
+ start_new_chunk = True
+
+ if not chunk or start_new_chunk:
if chunk:
chunk.exits.add(bc.offset)
- chunk = Chunk(bc.offset)
- chunks.append(chunk)
-
- if not chunk:
- chunk = Chunk(bc.offset)
+ chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
chunks.append(chunk)
# Look at the opcode
@@ -487,15 +507,11 @@ class ByteParser(object):
chunk.exits.add(block_stack[-1][1])
chunk = None
if bc.op == OP_END_FINALLY:
- if block_stack:
- # A break that goes through a finally will jump to whatever
- # block is on top of the stack.
- chunk.exits.add(block_stack[-1][1])
# For the finally clause we need to find the closest exception
# block, and use its jump target as an exit.
- for iblock in range(len(block_stack)-1, -1, -1):
- if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS:
- chunk.exits.add(block_stack[iblock][1])
+ for block in reversed(block_stack):
+ if block[0] in OPS_EXCEPT_BLOCKS:
+ chunk.exits.add(block[1])
break
if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
# This is an except clause. We want to overlook the next
@@ -521,23 +537,33 @@ class ByteParser(object):
last_chunk = chunks[-1]
last_chunk.exits.remove(ex)
last_chunk.exits.add(penult.offset)
- chunk = Chunk(penult.offset)
+ chunk = Chunk(
+ penult.offset, last_chunk.line, False
+ )
chunk.exits.add(ex)
chunks.append(chunk)
# Give all the chunks a length.
- chunks[-1].length = bc.next_offset - chunks[-1].byte
+ chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
for i in range(len(chunks)-1):
chunks[i].length = chunks[i+1].byte - chunks[i].byte
+ #self.validate_chunks(chunks)
return chunks
+ def validate_chunks(self, chunks):
+ """Validate the rule that chunks have a single entrance."""
+ # starts is the entrances to the chunks
+ starts = set([ch.byte for ch in chunks])
+ for ch in chunks:
+ assert all([(ex in starts or ex < 0) for ex in ch.exits])
+
def _arcs(self):
"""Find the executable arcs in the code.
- Returns a set of pairs, (from,to). From and to are integer line
- numbers. If from is < 0, then the arc is an entrance into the code
- object. If to is < 0, the arc is an exit from the code object.
+ Yields pairs: (from,to). From and to are integer line numbers. If
+ from is < 0, then the arc is an entrance into the code object. If to
+ is < 0, the arc is an exit from the code object.
"""
chunks = self._split_into_chunks()
@@ -545,65 +571,43 @@ class ByteParser(object):
# A map from byte offsets to chunks jumped into.
byte_chunks = dict([(c.byte, c) for c in chunks])
- # Build a map from byte offsets to actual lines reached.
- byte_lines = {}
- bytes_to_add = set([c.byte for c in chunks])
+ # There's always an entrance at the first chunk.
+ yield (-1, byte_chunks[0].line)
- while bytes_to_add:
- byte_to_add = bytes_to_add.pop()
- if byte_to_add in byte_lines or byte_to_add < 0:
+ # Traverse from the first chunk in each line, and yield arcs where
+ # the trace function will be invoked.
+ for chunk in chunks:
+ if not chunk.first:
continue
- # Which lines does this chunk lead to?
- bytes_considered = set()
- bytes_to_consider = [byte_to_add]
- lines = set()
-
- while bytes_to_consider:
- byte = bytes_to_consider.pop()
- bytes_considered.add(byte)
-
- # Find chunk for byte
- try:
- ch = byte_chunks[byte]
- except KeyError:
- for ch in chunks:
- if ch.byte <= byte < ch.byte+ch.length:
- break
- else:
- # No chunk for this byte!
- raise Exception("Couldn't find chunk @ %d" % byte)
- byte_chunks[byte] = ch
-
- if ch.line:
- lines.add(ch.line)
- else:
- for ex in ch.exits:
- if ex < 0:
- lines.add(ex)
- elif ex not in bytes_considered:
- bytes_to_consider.append(ex)
-
- bytes_to_add.update(ch.exits)
-
- byte_lines[byte_to_add] = lines
-
- # Figure out for each chunk where the exits go.
- arcs = set()
- for chunk in chunks:
- if chunk.line:
- for ex in chunk.exits:
+ chunks_considered = set()
+ chunks_to_consider = [chunk]
+ while chunks_to_consider:
+ # Get the chunk we're considering, and make sure we don't
+ # consider it again
+ this_chunk = chunks_to_consider.pop()
+ chunks_considered.add(this_chunk)
+
+ # For each exit, add the line number if the trace function
+ # would be triggered, or add the chunk to those being
+ # considered if not.
+ for ex in this_chunk.exits:
if ex < 0:
- exit_lines = [ex]
+ yield (chunk.line, ex)
else:
- exit_lines = byte_lines[ex]
- for exit_line in exit_lines:
- if chunk.line != exit_line:
- arcs.add((chunk.line, exit_line))
- for line in byte_lines[0]:
- arcs.add((-1, line))
-
- return arcs
+ next_chunk = byte_chunks[ex]
+ if next_chunk in chunks_considered:
+ continue
+
+ # The trace function is invoked if visiting the first
+ # bytecode in a line, or if the transition is a
+ # backward jump.
+ backward_jump = next_chunk.byte < this_chunk.byte
+ if next_chunk.first or backward_jump:
+ if next_chunk.line != chunk.line:
+ yield (chunk.line, next_chunk.line)
+ else:
+ chunks_to_consider.append(next_chunk)
def _all_chunks(self):
"""Returns a list of `Chunk` objects for this code and its children.
@@ -631,11 +635,11 @@ class ByteParser(object):
class Chunk(object):
- """A sequence of bytecodes with a single entrance.
+ """A sequence of byte codes with a single entrance.
To analyze byte code, we have to divide it into chunks, sequences of byte
- codes such that each basic block has only one entrance, the first
- instruction in the block.
+ codes such that each chunk has only one entrance, the first instruction in
+ the block.
This is almost the CS concept of `basic block`_, except that we're willing
to have many exits from a chunk, and "basic block" is a more cumbersome
@@ -643,158 +647,54 @@ class Chunk(object):
.. _basic block: http://en.wikipedia.org/wiki/Basic_block
+ `line` is the source line number containing this chunk.
+
+ `first` is true if this is the first chunk in the source line.
+
An exit < 0 means the chunk can leave the code (return). The exit is
the negative of the starting line number of the code block.
"""
- def __init__(self, byte, line=0):
+ def __init__(self, byte, line, first):
self.byte = byte
self.line = line
+ self.first = first
self.length = 0
self.exits = set()
def __repr__(self):
- return "<%d+%d @%d %r>" % (
- self.byte, self.length, self.line, list(self.exits)
+ if self.first:
+ bang = "!"
+ else:
+ bang = ""
+ return "<%d+%d @%d%s %r>" % (
+ self.byte, self.length, self.line, bang, list(self.exits)
)
-class AdHocMain(object): # pragma: no cover
- """An ad-hoc main for code parsing experiments."""
+class CachedTokenizer(object):
+ """A one-element cache around tokenize.generate_tokens.
- def main(self, args):
- """A main function for trying the code from the command line."""
+ When reporting, coverage.py tokenizes files twice, once to find the
+ structure of the file, and once to syntax-color it. Tokenizing is
+ expensive, and easily cached.
- from optparse import OptionParser
+ This is a one-element cache so that our twice-in-a-row tokenizing doesn't
+ actually tokenize twice.
- parser = OptionParser()
- parser.add_option(
- "-c", action="store_true", dest="chunks",
- help="Show basic block chunks"
- )
- parser.add_option(
- "-d", action="store_true", dest="dis",
- help="Disassemble"
- )
- parser.add_option(
- "-R", action="store_true", dest="recursive",
- help="Recurse to find source files"
- )
- parser.add_option(
- "-s", action="store_true", dest="source",
- help="Show analyzed source"
- )
- parser.add_option(
- "-t", action="store_true", dest="tokens",
- help="Show tokens"
+ """
+ def __init__(self):
+ self.last_text = None
+ self.last_tokens = None
+
+ def generate_tokens(self, text):
+ """A stand-in for `tokenize.generate_tokens`."""
+ if text != self.last_text:
+ self.last_text = text
+ self.last_tokens = list(
+ tokenize.generate_tokens(StringIO(text).readline)
)
+ return self.last_tokens
- options, args = parser.parse_args()
- if options.recursive:
- if args:
- root = args[0]
- else:
- root = "."
- for root, _, _ in os.walk(root):
- for f in glob.glob(root + "/*.py"):
- self.adhoc_one_file(options, f)
- else:
- self.adhoc_one_file(options, args[0])
-
- def adhoc_one_file(self, options, filename):
- """Process just one file."""
-
- if options.dis or options.chunks:
- try:
- bp = ByteParser(filename=filename)
- except CoverageException:
- _, err, _ = sys.exc_info()
- print("%s" % (err,))
- return
-
- if options.dis:
- print("Main code:")
- bp._disassemble()
-
- if options.chunks:
- chunks = bp._all_chunks()
- if options.recursive:
- print("%6d: %s" % (len(chunks), filename))
- else:
- print("Chunks: %r" % chunks)
- arcs = bp._all_arcs()
- print("Arcs: %r" % sorted(arcs))
-
- if options.source or options.tokens:
- cp = CodeParser(filename=filename, exclude=r"no\s*cover")
- cp.show_tokens = options.tokens
- cp._raw_parse()
-
- if options.source:
- if options.chunks:
- arc_width, arc_chars = self.arc_ascii_art(arcs)
- else:
- arc_width, arc_chars = 0, {}
-
- exit_counts = cp.exit_counts()
-
- for i, ltext in enumerate(cp.lines):
- lineno = i+1
- m0 = m1 = m2 = m3 = a = ' '
- if lineno in cp.statement_starts:
- m0 = '-'
- exits = exit_counts.get(lineno, 0)
- if exits > 1:
- m1 = str(exits)
- if lineno in cp.docstrings:
- m2 = '"'
- if lineno in cp.classdefs:
- m2 = 'C'
- if lineno in cp.excluded:
- m3 = 'x'
- a = arc_chars.get(lineno, '').ljust(arc_width)
- print("%4d %s%s%s%s%s %s" %
- (lineno, m0, m1, m2, m3, a, ltext)
- )
-
- def arc_ascii_art(self, arcs):
- """Draw arcs as ascii art.
-
- Returns a width of characters needed to draw all the arcs, and a
- dictionary mapping line numbers to ascii strings to draw for that line.
-
- """
- arc_chars = {}
- for lfrom, lto in sorted(arcs):
- if lfrom < 0:
- arc_chars[lto] = arc_chars.get(lto, '') + 'v'
- elif lto < 0:
- arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^'
- else:
- if lfrom == lto - 1:
- # Don't show obvious arcs.
- continue
- if lfrom < lto:
- l1, l2 = lfrom, lto
- else:
- l1, l2 = lto, lfrom
- w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)])
- for l in range(l1, l2+1):
- if l == lfrom:
- ch = '<'
- elif l == lto:
- ch = '>'
- else:
- ch = '|'
- arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch
- arc_width = 0
-
- if arc_chars:
- arc_width = max([len(a) for a in arc_chars.values()])
- else:
- arc_width = 0
-
- return arc_width, arc_chars
-
-if __name__ == '__main__':
- AdHocMain().main(sys.argv[1:])
+# Create our generate_tokens cache as a callable replacement function.
+generate_tokens = CachedTokenizer().generate_tokens