summaryrefslogtreecommitdiff
path: root/lib/python2.7/lib2to3/btm_utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/python2.7/lib2to3/btm_utils.py')
-rw-r--r--lib/python2.7/lib2to3/btm_utils.py283
1 files changed, 0 insertions, 283 deletions
diff --git a/lib/python2.7/lib2to3/btm_utils.py b/lib/python2.7/lib2to3/btm_utils.py
deleted file mode 100644
index 2276dc9..0000000
--- a/lib/python2.7/lib2to3/btm_utils.py
+++ /dev/null
@@ -1,283 +0,0 @@
-"Utility functions used by the btm_matcher module"
-
-from . import pytree
-from .pgen2 import grammar, token
-from .pygram import pattern_symbols, python_symbols
-
-syms = pattern_symbols
-pysyms = python_symbols
-tokens = grammar.opmap
-token_labels = token
-
-TYPE_ANY = -1
-TYPE_ALTERNATIVES = -2
-TYPE_GROUP = -3
-
-class MinNode(object):
- """This class serves as an intermediate representation of the
- pattern tree during the conversion to sets of leaf-to-root
- subpatterns"""
-
- def __init__(self, type=None, name=None):
- self.type = type
- self.name = name
- self.children = []
- self.leaf = False
- self.parent = None
- self.alternatives = []
- self.group = []
-
- def __repr__(self):
- return str(self.type) + ' ' + str(self.name)
-
- def leaf_to_root(self):
- """Internal method. Returns a characteristic path of the
- pattern tree. This method must be run for all leaves until the
- linear subpatterns are merged into a single"""
- node = self
- subp = []
- while node:
- if node.type == TYPE_ALTERNATIVES:
- node.alternatives.append(subp)
- if len(node.alternatives) == len(node.children):
- #last alternative
- subp = [tuple(node.alternatives)]
- node.alternatives = []
- node = node.parent
- continue
- else:
- node = node.parent
- subp = None
- break
-
- if node.type == TYPE_GROUP:
- node.group.append(subp)
- #probably should check the number of leaves
- if len(node.group) == len(node.children):
- subp = get_characteristic_subpattern(node.group)
- node.group = []
- node = node.parent
- continue
- else:
- node = node.parent
- subp = None
- break
-
- if node.type == token_labels.NAME and node.name:
- #in case of type=name, use the name instead
- subp.append(node.name)
- else:
- subp.append(node.type)
-
- node = node.parent
- return subp
-
- def get_linear_subpattern(self):
- """Drives the leaf_to_root method. The reason that
- leaf_to_root must be run multiple times is because we need to
- reject 'group' matches; for example the alternative form
- (a | b c) creates a group [b c] that needs to be matched. Since
- matching multiple linear patterns overcomes the automaton's
- capabilities, leaf_to_root merges each group into a single
- choice based on 'characteristic'ity,
-
- i.e. (a|b c) -> (a|b) if b more characteristic than c
-
- Returns: The most 'characteristic'(as defined by
- get_characteristic_subpattern) path for the compiled pattern
- tree.
- """
-
- for l in self.leaves():
- subp = l.leaf_to_root()
- if subp:
- return subp
-
- def leaves(self):
- "Generator that returns the leaves of the tree"
- for child in self.children:
- for x in child.leaves():
- yield x
- if not self.children:
- yield self
-
-def reduce_tree(node, parent=None):
- """
- Internal function. Reduces a compiled pattern tree to an
- intermediate representation suitable for feeding the
- automaton. This also trims off any optional pattern elements(like
- [a], a*).
- """
-
- new_node = None
- #switch on the node type
- if node.type == syms.Matcher:
- #skip
- node = node.children[0]
-
- if node.type == syms.Alternatives :
- #2 cases
- if len(node.children) <= 2:
- #just a single 'Alternative', skip this node
- new_node = reduce_tree(node.children[0], parent)
- else:
- #real alternatives
- new_node = MinNode(type=TYPE_ALTERNATIVES)
- #skip odd children('|' tokens)
- for child in node.children:
- if node.children.index(child)%2:
- continue
- reduced = reduce_tree(child, new_node)
- if reduced is not None:
- new_node.children.append(reduced)
- elif node.type == syms.Alternative:
- if len(node.children) > 1:
-
- new_node = MinNode(type=TYPE_GROUP)
- for child in node.children:
- reduced = reduce_tree(child, new_node)
- if reduced:
- new_node.children.append(reduced)
- if not new_node.children:
- # delete the group if all of the children were reduced to None
- new_node = None
-
- else:
- new_node = reduce_tree(node.children[0], parent)
-
- elif node.type == syms.Unit:
- if (isinstance(node.children[0], pytree.Leaf) and
- node.children[0].value == '('):
- #skip parentheses
- return reduce_tree(node.children[1], parent)
- if ((isinstance(node.children[0], pytree.Leaf) and
- node.children[0].value == '[')
- or
- (len(node.children)>1 and
- hasattr(node.children[1], "value") and
- node.children[1].value == '[')):
- #skip whole unit if its optional
- return None
-
- leaf = True
- details_node = None
- alternatives_node = None
- has_repeater = False
- repeater_node = None
- has_variable_name = False
-
- for child in node.children:
- if child.type == syms.Details:
- leaf = False
- details_node = child
- elif child.type == syms.Repeater:
- has_repeater = True
- repeater_node = child
- elif child.type == syms.Alternatives:
- alternatives_node = child
- if hasattr(child, 'value') and child.value == '=': # variable name
- has_variable_name = True
-
- #skip variable name
- if has_variable_name:
- #skip variable name, '='
- name_leaf = node.children[2]
- if hasattr(name_leaf, 'value') and name_leaf.value == '(':
- # skip parenthesis
- name_leaf = node.children[3]
- else:
- name_leaf = node.children[0]
-
- #set node type
- if name_leaf.type == token_labels.NAME:
- #(python) non-name or wildcard
- if name_leaf.value == 'any':
- new_node = MinNode(type=TYPE_ANY)
- else:
- if hasattr(token_labels, name_leaf.value):
- new_node = MinNode(type=getattr(token_labels, name_leaf.value))
- else:
- new_node = MinNode(type=getattr(pysyms, name_leaf.value))
-
- elif name_leaf.type == token_labels.STRING:
- #(python) name or character; remove the apostrophes from
- #the string value
- name = name_leaf.value.strip("'")
- if name in tokens:
- new_node = MinNode(type=tokens[name])
- else:
- new_node = MinNode(type=token_labels.NAME, name=name)
- elif name_leaf.type == syms.Alternatives:
- new_node = reduce_tree(alternatives_node, parent)
-
- #handle repeaters
- if has_repeater:
- if repeater_node.children[0].value == '*':
- #reduce to None
- new_node = None
- elif repeater_node.children[0].value == '+':
- #reduce to a single occurence i.e. do nothing
- pass
- else:
- #TODO: handle {min, max} repeaters
- raise NotImplementedError
- pass
-
- #add children
- if details_node and new_node is not None:
- for child in details_node.children[1:-1]:
- #skip '<', '>' markers
- reduced = reduce_tree(child, new_node)
- if reduced is not None:
- new_node.children.append(reduced)
- if new_node:
- new_node.parent = parent
- return new_node
-
-
-def get_characteristic_subpattern(subpatterns):
- """Picks the most characteristic from a list of linear patterns
- Current order used is:
- names > common_names > common_chars
- """
- if not isinstance(subpatterns, list):
- return subpatterns
- if len(subpatterns)==1:
- return subpatterns[0]
-
- # first pick out the ones containing variable names
- subpatterns_with_names = []
- subpatterns_with_common_names = []
- common_names = ['in', 'for', 'if' , 'not', 'None']
- subpatterns_with_common_chars = []
- common_chars = "[]().,:"
- for subpattern in subpatterns:
- if any(rec_test(subpattern, lambda x: type(x) is str)):
- if any(rec_test(subpattern,
- lambda x: isinstance(x, str) and x in common_chars)):
- subpatterns_with_common_chars.append(subpattern)
- elif any(rec_test(subpattern,
- lambda x: isinstance(x, str) and x in common_names)):
- subpatterns_with_common_names.append(subpattern)
-
- else:
- subpatterns_with_names.append(subpattern)
-
- if subpatterns_with_names:
- subpatterns = subpatterns_with_names
- elif subpatterns_with_common_names:
- subpatterns = subpatterns_with_common_names
- elif subpatterns_with_common_chars:
- subpatterns = subpatterns_with_common_chars
- # of the remaining subpatterns pick out the longest one
- return max(subpatterns, key=len)
-
-def rec_test(sequence, test_func):
- """Tests test_func on all items of sequence and items of included
- sub-iterables"""
- for x in sequence:
- if isinstance(x, (list, tuple)):
- for y in rec_test(x, test_func):
- yield y
- else:
- yield test_func(x)