diff options
Diffstat (limited to 'runtime/Java/src/main/java/org/antlr/runtime/tree/TreeWizard.java')
-rw-r--r-- | runtime/Java/src/main/java/org/antlr/runtime/tree/TreeWizard.java | 531 |
1 files changed, 531 insertions, 0 deletions
diff --git a/runtime/Java/src/main/java/org/antlr/runtime/tree/TreeWizard.java b/runtime/Java/src/main/java/org/antlr/runtime/tree/TreeWizard.java new file mode 100644 index 0000000..666cfd6 --- /dev/null +++ b/runtime/Java/src/main/java/org/antlr/runtime/tree/TreeWizard.java @@ -0,0 +1,531 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.runtime.tree; + +import org.antlr.runtime.Token; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** Build and navigate trees with this object. Must know about the names + * of tokens so you have to pass in a map or array of token names (from which + * this class can build the map). I.e., Token DECL means nothing unless the + * class can translate it to a token type. + * + * In order to create nodes and navigate, this class needs a TreeAdaptor. + * + * This class can build a token type -> node index for repeated use or for + * iterating over the various nodes with a particular type. + * + * This class works in conjunction with the TreeAdaptor rather than moving + * all this functionality into the adaptor. An adaptor helps build and + * navigate trees using methods. This class helps you do it with string + * patterns like "(A B C)". You can create a tree from that pattern or + * match subtrees against it. + */ +public class TreeWizard { + protected TreeAdaptor adaptor; + protected Map tokenNameToTypeMap; + + public interface ContextVisitor { + // TODO: should this be called visit or something else? + public void visit(Object t, Object parent, int childIndex, Map labels); + } + + public static abstract class Visitor implements ContextVisitor { + public void visit(Object t, Object parent, int childIndex, Map labels) { + visit(t); + } + public abstract void visit(Object t); + } + + /** When using %label:TOKENNAME in a tree for parse(), we must + * track the label. + */ + public static class TreePattern extends CommonTree { + public String label; + public boolean hasTextArg; + public TreePattern(Token payload) { + super(payload); + } + public String toString() { + if ( label!=null ) { + return "%"+label+":"+super.toString(); + } + else { + return super.toString(); + } + } + } + + public static class WildcardTreePattern extends TreePattern { + public WildcardTreePattern(Token payload) { + super(payload); + } + } + + /** This adaptor creates TreePattern objects for use during scan() */ + public static class TreePatternTreeAdaptor extends CommonTreeAdaptor { + public Object create(Token payload) { + return new TreePattern(payload); + } + } + + // TODO: build indexes for the wizard + + /** During fillBuffer(), we can make a reverse index from a set + * of token types of interest to the list of indexes into the + * node stream. This lets us convert a node pointer to a + * stream index semi-efficiently for a list of interesting + * nodes such as function definition nodes (you'll want to seek + * to their bodies for an interpreter). Also useful for doing + * dynamic searches; i.e., go find me all PLUS nodes. + protected Map tokenTypeToStreamIndexesMap; + + /** If tokenTypesToReverseIndex set to INDEX_ALL then indexing + * occurs for all token types. + public static final Set INDEX_ALL = new HashSet(); + + /** A set of token types user would like to index for faster lookup. + * If this is INDEX_ALL, then all token types are tracked. If null, + * then none are indexed. + protected Set tokenTypesToReverseIndex = null; + */ + + public TreeWizard(TreeAdaptor adaptor) { + this.adaptor = adaptor; + } + + public TreeWizard(TreeAdaptor adaptor, Map tokenNameToTypeMap) { + this.adaptor = adaptor; + this.tokenNameToTypeMap = tokenNameToTypeMap; + } + + public TreeWizard(TreeAdaptor adaptor, String[] tokenNames) { + this.adaptor = adaptor; + this.tokenNameToTypeMap = computeTokenTypes(tokenNames); + } + + public TreeWizard(String[] tokenNames) { + this(new CommonTreeAdaptor(), tokenNames); + } + + /** Compute a Map<String, Integer> that is an inverted index of + * tokenNames (which maps int token types to names). + */ + public Map computeTokenTypes(String[] tokenNames) { + Map m = new HashMap(); + if ( tokenNames==null ) { + return m; + } + for (int ttype = Token.MIN_TOKEN_TYPE; ttype < tokenNames.length; ttype++) { + String name = tokenNames[ttype]; + m.put(name, new Integer(ttype)); + } + return m; + } + + /** Using the map of token names to token types, return the type. */ + public int getTokenType(String tokenName) { + if ( tokenNameToTypeMap==null ) { + return Token.INVALID_TOKEN_TYPE; + } + Integer ttypeI = (Integer)tokenNameToTypeMap.get(tokenName); + if ( ttypeI!=null ) { + return ttypeI.intValue(); + } + return Token.INVALID_TOKEN_TYPE; + } + + /** Walk the entire tree and make a node name to nodes mapping. + * For now, use recursion but later nonrecursive version may be + * more efficient. Returns Map<Integer, List> where the List is + * of your AST node type. The Integer is the token type of the node. + * + * TODO: save this index so that find and visit are faster + */ + public Map index(Object t) { + Map m = new HashMap(); + _index(t, m); + return m; + } + + /** Do the work for index */ + protected void _index(Object t, Map m) { + if ( t==null ) { + return; + } + int ttype = adaptor.getType(t); + List elements = (List)m.get(new Integer(ttype)); + if ( elements==null ) { + elements = new ArrayList(); + m.put(new Integer(ttype), elements); + } + elements.add(t); + int n = adaptor.getChildCount(t); + for (int i=0; i<n; i++) { + Object child = adaptor.getChild(t, i); + _index(child, m); + } + } + + /** Return a List of tree nodes with token type ttype */ + public List find(Object t, int ttype) { + final List nodes = new ArrayList(); + visit(t, ttype, new TreeWizard.Visitor() { + public void visit(Object t) { + nodes.add(t); + } + }); + return nodes; + } + + /** Return a List of subtrees matching pattern. */ + public List find(Object t, String pattern) { + final List subtrees = new ArrayList(); + // Create a TreePattern from the pattern + TreePatternLexer tokenizer = new TreePatternLexer(pattern); + TreePatternParser parser = + new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor()); + final TreePattern tpattern = (TreePattern)parser.pattern(); + // don't allow invalid patterns + if ( tpattern==null || + tpattern.isNil() || + tpattern.getClass()==WildcardTreePattern.class ) + { + return null; + } + int rootTokenType = tpattern.getType(); + visit(t, rootTokenType, new TreeWizard.ContextVisitor() { + public void visit(Object t, Object parent, int childIndex, Map labels) { + if ( _parse(t, tpattern, null) ) { + subtrees.add(t); + } + } + }); + return subtrees; + } + + public Object findFirst(Object t, int ttype) { + return null; + } + + public Object findFirst(Object t, String pattern) { + return null; + } + + /** Visit every ttype node in t, invoking the visitor. This is a quicker + * version of the general visit(t, pattern) method. The labels arg + * of the visitor action method is never set (it's null) since using + * a token type rather than a pattern doesn't let us set a label. + */ + public void visit(Object t, int ttype, ContextVisitor visitor) { + _visit(t, null, 0, ttype, visitor); + } + + /** Do the recursive work for visit */ + protected void _visit(Object t, Object parent, int childIndex, int ttype, ContextVisitor visitor) { + if ( t==null ) { + return; + } + if ( adaptor.getType(t)==ttype ) { + visitor.visit(t, parent, childIndex, null); + } + int n = adaptor.getChildCount(t); + for (int i=0; i<n; i++) { + Object child = adaptor.getChild(t, i); + _visit(child, t, i, ttype, visitor); + } + } + + /** For all subtrees that match the pattern, execute the visit action. + * The implementation uses the root node of the pattern in combination + * with visit(t, ttype, visitor) so nil-rooted patterns are not allowed. + * Patterns with wildcard roots are also not allowed. + */ + public void visit(Object t, final String pattern, final ContextVisitor visitor) { + // Create a TreePattern from the pattern + TreePatternLexer tokenizer = new TreePatternLexer(pattern); + TreePatternParser parser = + new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor()); + final TreePattern tpattern = (TreePattern)parser.pattern(); + // don't allow invalid patterns + if ( tpattern==null || + tpattern.isNil() || + tpattern.getClass()==WildcardTreePattern.class ) + { + return; + } + final Map labels = new HashMap(); // reused for each _parse + int rootTokenType = tpattern.getType(); + visit(t, rootTokenType, new TreeWizard.ContextVisitor() { + public void visit(Object t, Object parent, int childIndex, Map unusedlabels) { + // the unusedlabels arg is null as visit on token type doesn't set. + labels.clear(); + if ( _parse(t, tpattern, labels) ) { + visitor.visit(t, parent, childIndex, labels); + } + } + }); + } + + /** Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels + * on the various nodes and '.' (dot) as the node/subtree wildcard, + * return true if the pattern matches and fill the labels Map with + * the labels pointing at the appropriate nodes. Return false if + * the pattern is malformed or the tree does not match. + * + * If a node specifies a text arg in pattern, then that must match + * for that node in t. + * + * TODO: what's a better way to indicate bad pattern? Exceptions are a hassle + */ + public boolean parse(Object t, String pattern, Map labels) { + TreePatternLexer tokenizer = new TreePatternLexer(pattern); + TreePatternParser parser = + new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor()); + TreePattern tpattern = (TreePattern)parser.pattern(); + /* + System.out.println("t="+((Tree)t).toStringTree()); + System.out.println("scant="+tpattern.toStringTree()); + */ + boolean matched = _parse(t, tpattern, labels); + return matched; + } + + public boolean parse(Object t, String pattern) { + return parse(t, pattern, null); + } + + /** Do the work for parse. Check to see if the t2 pattern fits the + * structure and token types in t1. Check text if the pattern has + * text arguments on nodes. Fill labels map with pointers to nodes + * in tree matched against nodes in pattern with labels. + */ + protected boolean _parse(Object t1, TreePattern tpattern, Map labels) { + // make sure both are non-null + if ( t1==null || tpattern==null ) { + return false; + } + // check roots (wildcard matches anything) + if ( tpattern.getClass() != WildcardTreePattern.class ) { + if ( adaptor.getType(t1) != tpattern.getType() ) return false; + // if pattern has text, check node text + if ( tpattern.hasTextArg && !adaptor.getText(t1).equals(tpattern.getText()) ) { + return false; + } + } + if ( tpattern.label!=null && labels!=null ) { + // map label in pattern to node in t1 + labels.put(tpattern.label, t1); + } + // check children + int n1 = adaptor.getChildCount(t1); + int n2 = tpattern.getChildCount(); + if ( n1 != n2 ) { + return false; + } + for (int i=0; i<n1; i++) { + Object child1 = adaptor.getChild(t1, i); + TreePattern child2 = (TreePattern)tpattern.getChild(i); + if ( !_parse(child1, child2, labels) ) { + return false; + } + } + return true; + } + + /** Create a tree or node from the indicated tree pattern that closely + * follows ANTLR tree grammar tree element syntax: + * + * (root child1 ... child2). + * + * You can also just pass in a node: ID + * + * Any node can have a text argument: ID[foo] + * (notice there are no quotes around foo--it's clear it's a string). + * + * nil is a special name meaning "give me a nil node". Useful for + * making lists: (nil A B C) is a list of A B C. + */ + public Object create(String pattern) { + TreePatternLexer tokenizer = new TreePatternLexer(pattern); + TreePatternParser parser = new TreePatternParser(tokenizer, this, adaptor); + Object t = parser.pattern(); + return t; + } + + /** Compare t1 and t2; return true if token types/text, structure match exactly. + * The trees are examined in their entirety so that (A B) does not match + * (A B C) nor (A (B C)). + // TODO: allow them to pass in a comparator + * TODO: have a version that is nonstatic so it can use instance adaptor + * + * I cannot rely on the tree node's equals() implementation as I make + * no constraints at all on the node types nor interface etc... + */ + public static boolean equals(Object t1, Object t2, TreeAdaptor adaptor) { + return _equals(t1, t2, adaptor); + } + + /** Compare type, structure, and text of two trees, assuming adaptor in + * this instance of a TreeWizard. + */ + public boolean equals(Object t1, Object t2) { + return _equals(t1, t2, adaptor); + } + + protected static boolean _equals(Object t1, Object t2, TreeAdaptor adaptor) { + // make sure both are non-null + if ( t1==null || t2==null ) { + return false; + } + // check roots + if ( adaptor.getType(t1) != adaptor.getType(t2) ) { + return false; + } + if ( !adaptor.getText(t1).equals(adaptor.getText(t2)) ) { + return false; + } + // check children + int n1 = adaptor.getChildCount(t1); + int n2 = adaptor.getChildCount(t2); + if ( n1 != n2 ) { + return false; + } + for (int i=0; i<n1; i++) { + Object child1 = adaptor.getChild(t1, i); + Object child2 = adaptor.getChild(t2, i); + if ( !_equals(child1, child2, adaptor) ) { + return false; + } + } + return true; + } + + // TODO: next stuff taken from CommonTreeNodeStream + + /** Given a node, add this to the reverse index tokenTypeToStreamIndexesMap. + * You can override this method to alter how indexing occurs. The + * default is to create a + * + * Map<Integer token type,ArrayList<Integer stream index>> + * + * This data structure allows you to find all nodes with type INT in order. + * + * If you really need to find a node of type, say, FUNC quickly then perhaps + * + * Map<Integertoken type,Map<Object tree node,Integer stream index>> + * + * would be better for you. The interior maps map a tree node to + * the index so you don't have to search linearly for a specific node. + * + * If you change this method, you will likely need to change + * getNodeIndex(), which extracts information. + protected void fillReverseIndex(Object node, int streamIndex) { + //System.out.println("revIndex "+node+"@"+streamIndex); + if ( tokenTypesToReverseIndex==null ) { + return; // no indexing if this is empty (nothing of interest) + } + if ( tokenTypeToStreamIndexesMap==null ) { + tokenTypeToStreamIndexesMap = new HashMap(); // first indexing op + } + int tokenType = adaptor.getType(node); + Integer tokenTypeI = new Integer(tokenType); + if ( !(tokenTypesToReverseIndex==INDEX_ALL || + tokenTypesToReverseIndex.contains(tokenTypeI)) ) + { + return; // tokenType not of interest + } + Integer streamIndexI = new Integer(streamIndex); + ArrayList indexes = (ArrayList)tokenTypeToStreamIndexesMap.get(tokenTypeI); + if ( indexes==null ) { + indexes = new ArrayList(); // no list yet for this token type + indexes.add(streamIndexI); // not there yet, add + tokenTypeToStreamIndexesMap.put(tokenTypeI, indexes); + } + else { + if ( !indexes.contains(streamIndexI) ) { + indexes.add(streamIndexI); // not there yet, add + } + } + } + + /** Track the indicated token type in the reverse index. Call this + * repeatedly for each type or use variant with Set argument to + * set all at once. + * @param tokenType + public void reverseIndex(int tokenType) { + if ( tokenTypesToReverseIndex==null ) { + tokenTypesToReverseIndex = new HashSet(); + } + else if ( tokenTypesToReverseIndex==INDEX_ALL ) { + return; + } + tokenTypesToReverseIndex.add(new Integer(tokenType)); + } + + /** Track the indicated token types in the reverse index. Set + * to INDEX_ALL to track all token types. + public void reverseIndex(Set tokenTypes) { + tokenTypesToReverseIndex = tokenTypes; + } + + /** Given a node pointer, return its index into the node stream. + * This is not its Token stream index. If there is no reverse map + * from node to stream index or the map does not contain entries + * for node's token type, a linear search of entire stream is used. + * + * Return -1 if exact node pointer not in stream. + public int getNodeIndex(Object node) { + //System.out.println("get "+node); + if ( tokenTypeToStreamIndexesMap==null ) { + return getNodeIndexLinearly(node); + } + int tokenType = adaptor.getType(node); + Integer tokenTypeI = new Integer(tokenType); + ArrayList indexes = (ArrayList)tokenTypeToStreamIndexesMap.get(tokenTypeI); + if ( indexes==null ) { + //System.out.println("found linearly; stream index = "+getNodeIndexLinearly(node)); + return getNodeIndexLinearly(node); + } + for (int i = 0; i < indexes.size(); i++) { + Integer streamIndexI = (Integer)indexes.get(i); + Object n = get(streamIndexI.intValue()); + if ( n==node ) { + //System.out.println("found in index; stream index = "+streamIndexI); + return streamIndexI.intValue(); // found it! + } + } + return -1; + } + + */ +} |