diff options
Diffstat (limited to 'runtime/CSharp3/Sources/Antlr3.Runtime/Tree/TreeWizard.cs')
-rw-r--r-- | runtime/CSharp3/Sources/Antlr3.Runtime/Tree/TreeWizard.cs | 736 |
1 files changed, 736 insertions, 0 deletions
diff --git a/runtime/CSharp3/Sources/Antlr3.Runtime/Tree/TreeWizard.cs b/runtime/CSharp3/Sources/Antlr3.Runtime/Tree/TreeWizard.cs new file mode 100644 index 0000000..98c759b --- /dev/null +++ b/runtime/CSharp3/Sources/Antlr3.Runtime/Tree/TreeWizard.cs @@ -0,0 +1,736 @@ +/* + * [The "BSD licence"] + * Copyright (c) 2005-2008 Terence Parr + * All rights reserved. + * + * Conversion to C#: + * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// TODO: build indexes for wizard +//#define BUILD_INDEXES + +namespace Antlr.Runtime.Tree +{ + using System.Collections.Generic; + + using IList = System.Collections.IList; +#if BUILD_INDEXES + using IDictionary = System.Collections.IDictionary; +#endif + + /** <summary> + * Build and navigate trees with this object. Must know about the names + * of tokens so you have to pass in a map or array of token names (from which + * this class can build the map). I.e., Token DECL means nothing unless the + * class can translate it to a token type. + * </summary> + * + * <remarks> + * In order to create nodes and navigate, this class needs a TreeAdaptor. + * + * This class can build a token type -> node index for repeated use or for + * iterating over the various nodes with a particular type. + * + * This class works in conjunction with the TreeAdaptor rather than moving + * all this functionality into the adaptor. An adaptor helps build and + * navigate trees using methods. This class helps you do it with string + * patterns like "(A B C)". You can create a tree from that pattern or + * match subtrees against it. + * </remarks> + */ + public class TreeWizard + { + protected ITreeAdaptor adaptor; + protected IDictionary<string, int> tokenNameToTypeMap; + + public interface IContextVisitor + { + // TODO: should this be called visit or something else? + void Visit( object t, object parent, int childIndex, IDictionary<string, object> labels ); + } + + public abstract class Visitor : IContextVisitor + { + public virtual void Visit( object t, object parent, int childIndex, IDictionary<string, object> labels ) + { + Visit( t ); + } + public abstract void Visit( object t ); + } + + class ActionVisitor : Visitor + { + System.Action<object> _action; + + public ActionVisitor( System.Action<object> action ) + { + _action = action; + } + + public override void Visit( object t ) + { + _action( t ); + } + } + + /** <summary> + * When using %label:TOKENNAME in a tree for parse(), we must + * track the label. + * </summary> + */ + public class TreePattern : CommonTree + { + public string label; + public bool hasTextArg; + public TreePattern( IToken payload ) : + base( payload ) + { + } + public override string ToString() + { + if ( label != null ) + { + return "%" + label + ":"; //+ base.ToString(); + } + else + { + return base.ToString(); + } + } + } + + public class WildcardTreePattern : TreePattern + { + public WildcardTreePattern( IToken payload ) : + base( payload ) + { + } + } + + /** <summary>This adaptor creates TreePattern objects for use during scan()</summary> */ + public class TreePatternTreeAdaptor : CommonTreeAdaptor + { + public override object Create( IToken payload ) + { + return new TreePattern( payload ); + } + } + +#if BUILD_INDEXES + // TODO: build indexes for the wizard + + /** <summary> + * During fillBuffer(), we can make a reverse index from a set + * of token types of interest to the list of indexes into the + * node stream. This lets us convert a node pointer to a + * stream index semi-efficiently for a list of interesting + * nodes such as function definition nodes (you'll want to seek + * to their bodies for an interpreter). Also useful for doing + * dynamic searches; i.e., go find me all PLUS nodes. + * </summary> + */ + protected IDictionary<int, IList<int>> tokenTypeToStreamIndexesMap; + + /** <summary> + * If tokenTypesToReverseIndex set to INDEX_ALL then indexing + * occurs for all token types. + * </summary> + */ + public static readonly HashSet<int> INDEX_ALL = new HashSet<int>(); + + /** <summary> + * A set of token types user would like to index for faster lookup. + * If this is INDEX_ALL, then all token types are tracked. If null, + * then none are indexed. + * </summary> + */ + protected HashSet<int> tokenTypesToReverseIndex = null; +#endif + + public TreeWizard( ITreeAdaptor adaptor ) + { + this.adaptor = adaptor; + } + + public TreeWizard( ITreeAdaptor adaptor, IDictionary<string, int> tokenNameToTypeMap ) + { + this.adaptor = adaptor; + this.tokenNameToTypeMap = tokenNameToTypeMap; + } + + public TreeWizard( ITreeAdaptor adaptor, string[] tokenNames ) + { + this.adaptor = adaptor; + this.tokenNameToTypeMap = ComputeTokenTypes( tokenNames ); + } + + public TreeWizard( string[] tokenNames ) + : this( new CommonTreeAdaptor(), tokenNames ) + { + } + + /** <summary> + * Compute a Map<String, Integer> that is an inverted index of + * tokenNames (which maps int token types to names). + * </summary> + */ + public virtual IDictionary<string, int> ComputeTokenTypes( string[] tokenNames ) + { + IDictionary<string, int> m = new Dictionary<string, int>(); + if ( tokenNames == null ) + { + return m; + } + for ( int ttype = TokenTypes.Min; ttype < tokenNames.Length; ttype++ ) + { + string name = tokenNames[ttype]; + m[name] = ttype; + } + return m; + } + + /** <summary>Using the map of token names to token types, return the type.</summary> */ + public virtual int GetTokenType( string tokenName ) + { + if ( tokenNameToTypeMap == null ) + { + return TokenTypes.Invalid; + } + + int value; + if ( tokenNameToTypeMap.TryGetValue( tokenName, out value ) ) + return value; + + return TokenTypes.Invalid; + } + + /** <summary> + * Walk the entire tree and make a node name to nodes mapping. + * For now, use recursion but later nonrecursive version may be + * more efficient. Returns Map<Integer, List> where the List is + * of your AST node type. The Integer is the token type of the node. + * </summary> + * + * <remarks> + * TODO: save this index so that find and visit are faster + * </remarks> + */ + public IDictionary<int, IList> Index( object t ) + { + IDictionary<int, IList> m = new Dictionary<int, IList>(); + IndexCore( t, m ); + return m; + } + + /** <summary>Do the work for index</summary> */ + protected virtual void IndexCore( object t, IDictionary<int, IList> m ) + { + if ( t == null ) + { + return; + } + int ttype = adaptor.GetType( t ); + IList elements; + if ( !m.TryGetValue( ttype, out elements ) || elements == null ) + { + elements = new List<object>(); + m[ttype] = elements; + } + elements.Add( t ); + int n = adaptor.GetChildCount( t ); + for ( int i = 0; i < n; i++ ) + { + object child = adaptor.GetChild( t, i ); + IndexCore( child, m ); + } + } + + class FindTreeWizardVisitor : TreeWizard.Visitor + { + IList _nodes; + public FindTreeWizardVisitor( IList nodes ) + { + _nodes = nodes; + } + public override void Visit( object t ) + { + _nodes.Add( t ); + } + } + class FindTreeWizardContextVisitor : TreeWizard.IContextVisitor + { + TreeWizard _outer; + TreePattern _tpattern; + IList _subtrees; + public FindTreeWizardContextVisitor( TreeWizard outer, TreePattern tpattern, IList subtrees ) + { + _outer = outer; + _tpattern = tpattern; + _subtrees = subtrees; + } + + public void Visit( object t, object parent, int childIndex, IDictionary<string, object> labels ) + { + if ( _outer.ParseCore( t, _tpattern, null ) ) + { + _subtrees.Add( t ); + } + } + } + + /** <summary>Return a List of tree nodes with token type ttype</summary> */ + public virtual IList Find( object t, int ttype ) + { + IList nodes = new List<object>(); + Visit( t, ttype, new FindTreeWizardVisitor( nodes ) ); + return nodes; + } + + /** <summary>Return a List of subtrees matching pattern.</summary> */ + public virtual IList Find( object t, string pattern ) + { + IList subtrees = new List<object>(); + // Create a TreePattern from the pattern + TreePatternLexer tokenizer = new TreePatternLexer( pattern ); + TreePatternParser parser = + new TreePatternParser( tokenizer, this, new TreePatternTreeAdaptor() ); + TreePattern tpattern = (TreePattern)parser.Pattern(); + // don't allow invalid patterns + if ( tpattern == null || + tpattern.IsNil || + tpattern.GetType() == typeof( WildcardTreePattern ) ) + { + return null; + } + int rootTokenType = tpattern.Type; + Visit( t, rootTokenType, new FindTreeWizardContextVisitor( this, tpattern, subtrees ) ); + return subtrees; + } + + public virtual object FindFirst( object t, int ttype ) + { + return null; + } + + public virtual object FindFirst( object t, string pattern ) + { + return null; + } + + /** <summary> + * Visit every ttype node in t, invoking the visitor. This is a quicker + * version of the general visit(t, pattern) method. The labels arg + * of the visitor action method is never set (it's null) since using + * a token type rather than a pattern doesn't let us set a label. + * </summary> + */ + public void Visit( object t, int ttype, IContextVisitor visitor ) + { + VisitCore( t, null, 0, ttype, visitor ); + } + + public void Visit( object t, int ttype, System.Action<object> action ) + { + Visit( t, ttype, new ActionVisitor( action ) ); + } + + /** <summary>Do the recursive work for visit</summary> */ + protected virtual void VisitCore( object t, object parent, int childIndex, int ttype, IContextVisitor visitor ) + { + if ( t == null ) + { + return; + } + if ( adaptor.GetType( t ) == ttype ) + { + visitor.Visit( t, parent, childIndex, null ); + } + int n = adaptor.GetChildCount( t ); + for ( int i = 0; i < n; i++ ) + { + object child = adaptor.GetChild( t, i ); + VisitCore( child, t, i, ttype, visitor ); + } + } + + class VisitTreeWizardContextVisitor : TreeWizard.IContextVisitor + { + TreeWizard _outer; + IContextVisitor _visitor; + IDictionary<string, object> _labels; + TreePattern _tpattern; + + public VisitTreeWizardContextVisitor( TreeWizard outer, IContextVisitor visitor, IDictionary<string, object> labels, TreePattern tpattern ) + { + _outer = outer; + _visitor = visitor; + _labels = labels; + _tpattern = tpattern; + } + + public void Visit( object t, object parent, int childIndex, IDictionary<string, object> unusedlabels ) + { + // the unusedlabels arg is null as visit on token type doesn't set. + _labels.Clear(); + if ( _outer.ParseCore( t, _tpattern, _labels ) ) + { + _visitor.Visit( t, parent, childIndex, _labels ); + } + } + } + + /** <summary> + * For all subtrees that match the pattern, execute the visit action. + * The implementation uses the root node of the pattern in combination + * with visit(t, ttype, visitor) so nil-rooted patterns are not allowed. + * Patterns with wildcard roots are also not allowed. + * </summary> + */ + public void Visit( object t, string pattern, IContextVisitor visitor ) + { + // Create a TreePattern from the pattern + TreePatternLexer tokenizer = new TreePatternLexer( pattern ); + TreePatternParser parser = + new TreePatternParser( tokenizer, this, new TreePatternTreeAdaptor() ); + TreePattern tpattern = (TreePattern)parser.Pattern(); + // don't allow invalid patterns + if ( tpattern == null || + tpattern.IsNil || + tpattern.GetType() == typeof( WildcardTreePattern ) ) + { + return; + } + IDictionary<string, object> labels = new Dictionary<string, object>(); // reused for each _parse + int rootTokenType = tpattern.Type; + Visit( t, rootTokenType, new VisitTreeWizardContextVisitor( this, visitor, labels, tpattern ) ); + } + + /** <summary> + * Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels + * on the various nodes and '.' (dot) as the node/subtree wildcard, + * return true if the pattern matches and fill the labels Map with + * the labels pointing at the appropriate nodes. Return false if + * the pattern is malformed or the tree does not match. + * </summary> + * + * <remarks> + * If a node specifies a text arg in pattern, then that must match + * for that node in t. + * + * TODO: what's a better way to indicate bad pattern? Exceptions are a hassle + * </remarks> + */ + public bool Parse( object t, string pattern, IDictionary<string, object> labels ) + { + TreePatternLexer tokenizer = new TreePatternLexer( pattern ); + TreePatternParser parser = + new TreePatternParser( tokenizer, this, new TreePatternTreeAdaptor() ); + TreePattern tpattern = (TreePattern)parser.Pattern(); + /* + System.out.println("t="+((Tree)t).toStringTree()); + System.out.println("scant="+tpattern.toStringTree()); + */ + bool matched = ParseCore( t, tpattern, labels ); + return matched; + } + + public bool Parse( object t, string pattern ) + { + return Parse( t, pattern, null ); + } + + /** <summary> + * Do the work for parse. Check to see if the t2 pattern fits the + * structure and token types in t1. Check text if the pattern has + * text arguments on nodes. Fill labels map with pointers to nodes + * in tree matched against nodes in pattern with labels. + * </summary> + */ + protected virtual bool ParseCore( object t1, TreePattern tpattern, IDictionary<string, object> labels ) + { + // make sure both are non-null + if ( t1 == null || tpattern == null ) + { + return false; + } + // check roots (wildcard matches anything) + if ( tpattern.GetType() != typeof( WildcardTreePattern ) ) + { + if ( adaptor.GetType( t1 ) != tpattern.Type ) + { + return false; + } + // if pattern has text, check node text + if ( tpattern.hasTextArg && !adaptor.GetText( t1 ).Equals( tpattern.Text ) ) + { + return false; + } + } + if ( tpattern.label != null && labels != null ) + { + // map label in pattern to node in t1 + labels[tpattern.label] = t1; + } + // check children + int n1 = adaptor.GetChildCount( t1 ); + int n2 = tpattern.ChildCount; + if ( n1 != n2 ) + { + return false; + } + for ( int i = 0; i < n1; i++ ) + { + object child1 = adaptor.GetChild( t1, i ); + TreePattern child2 = (TreePattern)tpattern.GetChild( i ); + if ( !ParseCore( child1, child2, labels ) ) + { + return false; + } + } + return true; + } + + /** <summary> + * Create a tree or node from the indicated tree pattern that closely + * follows ANTLR tree grammar tree element syntax: + * + * (root child1 ... child2). + * </summary> + * + * <remarks> + * You can also just pass in a node: ID + * + * Any node can have a text argument: ID[foo] + * (notice there are no quotes around foo--it's clear it's a string). + * + * nil is a special name meaning "give me a nil node". Useful for + * making lists: (nil A B C) is a list of A B C. + * </remarks> + */ + public virtual object Create( string pattern ) + { + TreePatternLexer tokenizer = new TreePatternLexer( pattern ); + TreePatternParser parser = new TreePatternParser( tokenizer, this, adaptor ); + object t = parser.Pattern(); + return t; + } + + /** <summary> + * Compare t1 and t2; return true if token types/text, structure match exactly. + * The trees are examined in their entirety so that (A B) does not match + * (A B C) nor (A (B C)). + * </summary> + * + * <remarks> + * TODO: allow them to pass in a comparator + * TODO: have a version that is nonstatic so it can use instance adaptor + * + * I cannot rely on the tree node's equals() implementation as I make + * no constraints at all on the node types nor interface etc... + * </remarks> + */ + public static bool Equals( object t1, object t2, ITreeAdaptor adaptor ) + { + return EqualsCore( t1, t2, adaptor ); + } + + /** <summary> + * Compare type, structure, and text of two trees, assuming adaptor in + * this instance of a TreeWizard. + * </summary> + */ + public new bool Equals( object t1, object t2 ) + { + return EqualsCore( t1, t2, adaptor ); + } + + protected static bool EqualsCore( object t1, object t2, ITreeAdaptor adaptor ) + { + // make sure both are non-null + if ( t1 == null || t2 == null ) + { + return false; + } + // check roots + if ( adaptor.GetType( t1 ) != adaptor.GetType( t2 ) ) + { + return false; + } + if ( !adaptor.GetText( t1 ).Equals( adaptor.GetText( t2 ) ) ) + { + return false; + } + // check children + int n1 = adaptor.GetChildCount( t1 ); + int n2 = adaptor.GetChildCount( t2 ); + if ( n1 != n2 ) + { + return false; + } + for ( int i = 0; i < n1; i++ ) + { + object child1 = adaptor.GetChild( t1, i ); + object child2 = adaptor.GetChild( t2, i ); + if ( !EqualsCore( child1, child2, adaptor ) ) + { + return false; + } + } + return true; + } + +#if BUILD_INDEXES + // TODO: next stuff taken from CommonTreeNodeStream + + /** <summary> + * Given a node, add this to the reverse index tokenTypeToStreamIndexesMap. + * You can override this method to alter how indexing occurs. The + * default is to create a + * + * Map<Integer token type,ArrayList<Integer stream index>> + * </summary> + * + * <remarks> + * This data structure allows you to find all nodes with type INT in order. + * + * If you really need to find a node of type, say, FUNC quickly then perhaps + * + * Map<Integertoken type,Map<Object tree node,Integer stream index>> + * + * would be better for you. The interior maps map a tree node to + * the index so you don't have to search linearly for a specific node. + * + * If you change this method, you will likely need to change + * getNodeIndex(), which extracts information. + * </remarks> + */ + protected void fillReverseIndex( object node, int streamIndex ) + { + //System.out.println("revIndex "+node+"@"+streamIndex); + if ( tokenTypesToReverseIndex == null ) + { + return; // no indexing if this is empty (nothing of interest) + } + if ( tokenTypeToStreamIndexesMap == null ) + { + tokenTypeToStreamIndexesMap = new Dictionary<int, IList<int>>(); // first indexing op + } + int tokenType = adaptor.getType( node ); + if ( !( tokenTypesToReverseIndex == INDEX_ALL || + tokenTypesToReverseIndex.Contains( tokenType ) ) ) + { + return; // tokenType not of interest + } + IList<int> indexes; + + if ( !tokenTypeToStreamIndexesMap.TryGetValue( tokenType, out indexes ) || indexes == null ) + { + indexes = new List<int>(); // no list yet for this token type + indexes.Add( streamIndex ); // not there yet, add + tokenTypeToStreamIndexesMap[tokenType] = indexes; + } + else + { + if ( !indexes.Contains( streamIndex ) ) + { + indexes.Add( streamIndex ); // not there yet, add + } + } + } + + /** <summary> + * Track the indicated token type in the reverse index. Call this + * repeatedly for each type or use variant with Set argument to + * set all at once. + * </summary> + * + * <param name="tokenType" /> + */ + public void reverseIndex( int tokenType ) + { + if ( tokenTypesToReverseIndex == null ) + { + tokenTypesToReverseIndex = new HashSet<int>(); + } + else if ( tokenTypesToReverseIndex == INDEX_ALL ) + { + return; + } + tokenTypesToReverseIndex.add( tokenType ); + } + + /** <summary> + * Track the indicated token types in the reverse index. Set + * to INDEX_ALL to track all token types. + * </summary> + */ + public void reverseIndex( HashSet<int> tokenTypes ) + { + tokenTypesToReverseIndex = tokenTypes; + } + + /** <summary> + * Given a node pointer, return its index into the node stream. + * This is not its Token stream index. If there is no reverse map + * from node to stream index or the map does not contain entries + * for node's token type, a linear search of entire stream is used. + * </summary> + * + * <remarks> + * Return -1 if exact node pointer not in stream. + * </remarks> + */ + public int getNodeIndex( object node ) + { + //System.out.println("get "+node); + if ( tokenTypeToStreamIndexesMap == null ) + { + return getNodeIndexLinearly( node ); + } + int tokenType = adaptor.getType( node ); + IList<int> indexes; + if ( !tokenTypeToStreamIndexesMap.TryGetValue( tokenType, out indexes ) || indexes == null ) + { + //System.out.println("found linearly; stream index = "+getNodeIndexLinearly(node)); + return getNodeIndexLinearly( node ); + } + for ( int i = 0; i < indexes.size(); i++ ) + { + int streamIndex = indexes[i]; + object n = get( streamIndex ); + if ( n == node ) + { + //System.out.println("found in index; stream index = "+streamIndexI); + return streamIndex; // found it! + } + } + return -1; + } +#endif + + } +} |