/* * [The "BSD licence"] * Copyright (c) 2005-2008 Terence Parr * All rights reserved. * * Conversion to C#: * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // TODO: build indexes for wizard //#define BUILD_INDEXES namespace Antlr.Runtime.Tree { using System.Collections.Generic; using IList = System.Collections.IList; #if BUILD_INDEXES using IDictionary = System.Collections.IDictionary; #endif /** * Build and navigate trees with this object. Must know about the names * of tokens so you have to pass in a map or array of token names (from which * this class can build the map). I.e., Token DECL means nothing unless the * class can translate it to a token type. * * * * In order to create nodes and navigate, this class needs a TreeAdaptor. * * This class can build a token type -> node index for repeated use or for * iterating over the various nodes with a particular type. * * This class works in conjunction with the TreeAdaptor rather than moving * all this functionality into the adaptor. An adaptor helps build and * navigate trees using methods. This class helps you do it with string * patterns like "(A B C)". You can create a tree from that pattern or * match subtrees against it. * */ public class TreeWizard { protected ITreeAdaptor adaptor; protected IDictionary tokenNameToTypeMap; public interface IContextVisitor { // TODO: should this be called visit or something else? void Visit( object t, object parent, int childIndex, IDictionary labels ); } public abstract class Visitor : IContextVisitor { public virtual void Visit( object t, object parent, int childIndex, IDictionary labels ) { Visit( t ); } public abstract void Visit( object t ); } class ActionVisitor : Visitor { System.Action _action; public ActionVisitor( System.Action action ) { _action = action; } public override void Visit( object t ) { _action( t ); } } /** * When using %label:TOKENNAME in a tree for parse(), we must * track the label. * */ public class TreePattern : CommonTree { public string label; public bool hasTextArg; public TreePattern( IToken payload ) : base( payload ) { } public override string ToString() { if ( label != null ) { return "%" + label + ":"; //+ base.ToString(); } else { return base.ToString(); } } } public class WildcardTreePattern : TreePattern { public WildcardTreePattern( IToken payload ) : base( payload ) { } } /** This adaptor creates TreePattern objects for use during scan() */ public class TreePatternTreeAdaptor : CommonTreeAdaptor { public override object Create( IToken payload ) { return new TreePattern( payload ); } } #if BUILD_INDEXES // TODO: build indexes for the wizard /** * During fillBuffer(), we can make a reverse index from a set * of token types of interest to the list of indexes into the * node stream. This lets us convert a node pointer to a * stream index semi-efficiently for a list of interesting * nodes such as function definition nodes (you'll want to seek * to their bodies for an interpreter). Also useful for doing * dynamic searches; i.e., go find me all PLUS nodes. * */ protected IDictionary> tokenTypeToStreamIndexesMap; /** * If tokenTypesToReverseIndex set to INDEX_ALL then indexing * occurs for all token types. * */ public static readonly HashSet INDEX_ALL = new HashSet(); /** * A set of token types user would like to index for faster lookup. * If this is INDEX_ALL, then all token types are tracked. If null, * then none are indexed. * */ protected HashSet tokenTypesToReverseIndex = null; #endif public TreeWizard( ITreeAdaptor adaptor ) { this.adaptor = adaptor; } public TreeWizard( ITreeAdaptor adaptor, IDictionary tokenNameToTypeMap ) { this.adaptor = adaptor; this.tokenNameToTypeMap = tokenNameToTypeMap; } public TreeWizard( ITreeAdaptor adaptor, string[] tokenNames ) { this.adaptor = adaptor; this.tokenNameToTypeMap = ComputeTokenTypes( tokenNames ); } public TreeWizard( string[] tokenNames ) : this( new CommonTreeAdaptor(), tokenNames ) { } /** * Compute a Map<String, Integer> that is an inverted index of * tokenNames (which maps int token types to names). * */ public virtual IDictionary ComputeTokenTypes( string[] tokenNames ) { IDictionary m = new Dictionary(); if ( tokenNames == null ) { return m; } for ( int ttype = TokenTypes.Min; ttype < tokenNames.Length; ttype++ ) { string name = tokenNames[ttype]; m[name] = ttype; } return m; } /** Using the map of token names to token types, return the type. */ public virtual int GetTokenType( string tokenName ) { if ( tokenNameToTypeMap == null ) { return TokenTypes.Invalid; } int value; if ( tokenNameToTypeMap.TryGetValue( tokenName, out value ) ) return value; return TokenTypes.Invalid; } /** * Walk the entire tree and make a node name to nodes mapping. * For now, use recursion but later nonrecursive version may be * more efficient. Returns Map<Integer, List> where the List is * of your AST node type. The Integer is the token type of the node. * * * * TODO: save this index so that find and visit are faster * */ public IDictionary Index( object t ) { IDictionary m = new Dictionary(); IndexCore( t, m ); return m; } /** Do the work for index */ protected virtual void IndexCore( object t, IDictionary m ) { if ( t == null ) { return; } int ttype = adaptor.GetType( t ); IList elements; if ( !m.TryGetValue( ttype, out elements ) || elements == null ) { elements = new List(); m[ttype] = elements; } elements.Add( t ); int n = adaptor.GetChildCount( t ); for ( int i = 0; i < n; i++ ) { object child = adaptor.GetChild( t, i ); IndexCore( child, m ); } } class FindTreeWizardVisitor : TreeWizard.Visitor { IList _nodes; public FindTreeWizardVisitor( IList nodes ) { _nodes = nodes; } public override void Visit( object t ) { _nodes.Add( t ); } } class FindTreeWizardContextVisitor : TreeWizard.IContextVisitor { TreeWizard _outer; TreePattern _tpattern; IList _subtrees; public FindTreeWizardContextVisitor( TreeWizard outer, TreePattern tpattern, IList subtrees ) { _outer = outer; _tpattern = tpattern; _subtrees = subtrees; } public void Visit( object t, object parent, int childIndex, IDictionary labels ) { if ( _outer.ParseCore( t, _tpattern, null ) ) { _subtrees.Add( t ); } } } /** Return a List of tree nodes with token type ttype */ public virtual IList Find( object t, int ttype ) { IList nodes = new List(); Visit( t, ttype, new FindTreeWizardVisitor( nodes ) ); return nodes; } /** Return a List of subtrees matching pattern. */ public virtual IList Find( object t, string pattern ) { IList subtrees = new List(); // Create a TreePattern from the pattern TreePatternLexer tokenizer = new TreePatternLexer( pattern ); TreePatternParser parser = new TreePatternParser( tokenizer, this, new TreePatternTreeAdaptor() ); TreePattern tpattern = (TreePattern)parser.Pattern(); // don't allow invalid patterns if ( tpattern == null || tpattern.IsNil || tpattern.GetType() == typeof( WildcardTreePattern ) ) { return null; } int rootTokenType = tpattern.Type; Visit( t, rootTokenType, new FindTreeWizardContextVisitor( this, tpattern, subtrees ) ); return subtrees; } public virtual object FindFirst( object t, int ttype ) { return null; } public virtual object FindFirst( object t, string pattern ) { return null; } /** * Visit every ttype node in t, invoking the visitor. This is a quicker * version of the general visit(t, pattern) method. The labels arg * of the visitor action method is never set (it's null) since using * a token type rather than a pattern doesn't let us set a label. * */ public void Visit( object t, int ttype, IContextVisitor visitor ) { VisitCore( t, null, 0, ttype, visitor ); } public void Visit( object t, int ttype, System.Action action ) { Visit( t, ttype, new ActionVisitor( action ) ); } /** Do the recursive work for visit */ protected virtual void VisitCore( object t, object parent, int childIndex, int ttype, IContextVisitor visitor ) { if ( t == null ) { return; } if ( adaptor.GetType( t ) == ttype ) { visitor.Visit( t, parent, childIndex, null ); } int n = adaptor.GetChildCount( t ); for ( int i = 0; i < n; i++ ) { object child = adaptor.GetChild( t, i ); VisitCore( child, t, i, ttype, visitor ); } } class VisitTreeWizardContextVisitor : TreeWizard.IContextVisitor { TreeWizard _outer; IContextVisitor _visitor; IDictionary _labels; TreePattern _tpattern; public VisitTreeWizardContextVisitor( TreeWizard outer, IContextVisitor visitor, IDictionary labels, TreePattern tpattern ) { _outer = outer; _visitor = visitor; _labels = labels; _tpattern = tpattern; } public void Visit( object t, object parent, int childIndex, IDictionary unusedlabels ) { // the unusedlabels arg is null as visit on token type doesn't set. _labels.Clear(); if ( _outer.ParseCore( t, _tpattern, _labels ) ) { _visitor.Visit( t, parent, childIndex, _labels ); } } } /** * For all subtrees that match the pattern, execute the visit action. * The implementation uses the root node of the pattern in combination * with visit(t, ttype, visitor) so nil-rooted patterns are not allowed. * Patterns with wildcard roots are also not allowed. * */ public void Visit( object t, string pattern, IContextVisitor visitor ) { // Create a TreePattern from the pattern TreePatternLexer tokenizer = new TreePatternLexer( pattern ); TreePatternParser parser = new TreePatternParser( tokenizer, this, new TreePatternTreeAdaptor() ); TreePattern tpattern = (TreePattern)parser.Pattern(); // don't allow invalid patterns if ( tpattern == null || tpattern.IsNil || tpattern.GetType() == typeof( WildcardTreePattern ) ) { return; } IDictionary labels = new Dictionary(); // reused for each _parse int rootTokenType = tpattern.Type; Visit( t, rootTokenType, new VisitTreeWizardContextVisitor( this, visitor, labels, tpattern ) ); } /** * Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels * on the various nodes and '.' (dot) as the node/subtree wildcard, * return true if the pattern matches and fill the labels Map with * the labels pointing at the appropriate nodes. Return false if * the pattern is malformed or the tree does not match. * * * * If a node specifies a text arg in pattern, then that must match * for that node in t. * * TODO: what's a better way to indicate bad pattern? Exceptions are a hassle * */ public bool Parse( object t, string pattern, IDictionary labels ) { TreePatternLexer tokenizer = new TreePatternLexer( pattern ); TreePatternParser parser = new TreePatternParser( tokenizer, this, new TreePatternTreeAdaptor() ); TreePattern tpattern = (TreePattern)parser.Pattern(); /* System.out.println("t="+((Tree)t).toStringTree()); System.out.println("scant="+tpattern.toStringTree()); */ bool matched = ParseCore( t, tpattern, labels ); return matched; } public bool Parse( object t, string pattern ) { return Parse( t, pattern, null ); } /** * Do the work for parse. Check to see if the t2 pattern fits the * structure and token types in t1. Check text if the pattern has * text arguments on nodes. Fill labels map with pointers to nodes * in tree matched against nodes in pattern with labels. * */ protected virtual bool ParseCore( object t1, TreePattern tpattern, IDictionary labels ) { // make sure both are non-null if ( t1 == null || tpattern == null ) { return false; } // check roots (wildcard matches anything) if ( tpattern.GetType() != typeof( WildcardTreePattern ) ) { if ( adaptor.GetType( t1 ) != tpattern.Type ) { return false; } // if pattern has text, check node text if ( tpattern.hasTextArg && !adaptor.GetText( t1 ).Equals( tpattern.Text ) ) { return false; } } if ( tpattern.label != null && labels != null ) { // map label in pattern to node in t1 labels[tpattern.label] = t1; } // check children int n1 = adaptor.GetChildCount( t1 ); int n2 = tpattern.ChildCount; if ( n1 != n2 ) { return false; } for ( int i = 0; i < n1; i++ ) { object child1 = adaptor.GetChild( t1, i ); TreePattern child2 = (TreePattern)tpattern.GetChild( i ); if ( !ParseCore( child1, child2, labels ) ) { return false; } } return true; } /** * Create a tree or node from the indicated tree pattern that closely * follows ANTLR tree grammar tree element syntax: * * (root child1 ... child2). * * * * You can also just pass in a node: ID * * Any node can have a text argument: ID[foo] * (notice there are no quotes around foo--it's clear it's a string). * * nil is a special name meaning "give me a nil node". Useful for * making lists: (nil A B C) is a list of A B C. * */ public virtual object Create( string pattern ) { TreePatternLexer tokenizer = new TreePatternLexer( pattern ); TreePatternParser parser = new TreePatternParser( tokenizer, this, adaptor ); object t = parser.Pattern(); return t; } /** * Compare t1 and t2; return true if token types/text, structure match exactly. * The trees are examined in their entirety so that (A B) does not match * (A B C) nor (A (B C)). * * * * TODO: allow them to pass in a comparator * TODO: have a version that is nonstatic so it can use instance adaptor * * I cannot rely on the tree node's equals() implementation as I make * no constraints at all on the node types nor interface etc... * */ public static bool Equals( object t1, object t2, ITreeAdaptor adaptor ) { return EqualsCore( t1, t2, adaptor ); } /** * Compare type, structure, and text of two trees, assuming adaptor in * this instance of a TreeWizard. * */ public new bool Equals( object t1, object t2 ) { return EqualsCore( t1, t2, adaptor ); } protected static bool EqualsCore( object t1, object t2, ITreeAdaptor adaptor ) { // make sure both are non-null if ( t1 == null || t2 == null ) { return false; } // check roots if ( adaptor.GetType( t1 ) != adaptor.GetType( t2 ) ) { return false; } if ( !adaptor.GetText( t1 ).Equals( adaptor.GetText( t2 ) ) ) { return false; } // check children int n1 = adaptor.GetChildCount( t1 ); int n2 = adaptor.GetChildCount( t2 ); if ( n1 != n2 ) { return false; } for ( int i = 0; i < n1; i++ ) { object child1 = adaptor.GetChild( t1, i ); object child2 = adaptor.GetChild( t2, i ); if ( !EqualsCore( child1, child2, adaptor ) ) { return false; } } return true; } #if BUILD_INDEXES // TODO: next stuff taken from CommonTreeNodeStream /** * Given a node, add this to the reverse index tokenTypeToStreamIndexesMap. * You can override this method to alter how indexing occurs. The * default is to create a * * Map<Integer token type,ArrayList<Integer stream index>> * * * * This data structure allows you to find all nodes with type INT in order. * * If you really need to find a node of type, say, FUNC quickly then perhaps * * Map<Integertoken type,Map<Object tree node,Integer stream index>> * * would be better for you. The interior maps map a tree node to * the index so you don't have to search linearly for a specific node. * * If you change this method, you will likely need to change * getNodeIndex(), which extracts information. * */ protected void fillReverseIndex( object node, int streamIndex ) { //System.out.println("revIndex "+node+"@"+streamIndex); if ( tokenTypesToReverseIndex == null ) { return; // no indexing if this is empty (nothing of interest) } if ( tokenTypeToStreamIndexesMap == null ) { tokenTypeToStreamIndexesMap = new Dictionary>(); // first indexing op } int tokenType = adaptor.getType( node ); if ( !( tokenTypesToReverseIndex == INDEX_ALL || tokenTypesToReverseIndex.Contains( tokenType ) ) ) { return; // tokenType not of interest } IList indexes; if ( !tokenTypeToStreamIndexesMap.TryGetValue( tokenType, out indexes ) || indexes == null ) { indexes = new List(); // no list yet for this token type indexes.Add( streamIndex ); // not there yet, add tokenTypeToStreamIndexesMap[tokenType] = indexes; } else { if ( !indexes.Contains( streamIndex ) ) { indexes.Add( streamIndex ); // not there yet, add } } } /** * Track the indicated token type in the reverse index. Call this * repeatedly for each type or use variant with Set argument to * set all at once. * * * */ public void reverseIndex( int tokenType ) { if ( tokenTypesToReverseIndex == null ) { tokenTypesToReverseIndex = new HashSet(); } else if ( tokenTypesToReverseIndex == INDEX_ALL ) { return; } tokenTypesToReverseIndex.add( tokenType ); } /** * Track the indicated token types in the reverse index. Set * to INDEX_ALL to track all token types. * */ public void reverseIndex( HashSet tokenTypes ) { tokenTypesToReverseIndex = tokenTypes; } /** * Given a node pointer, return its index into the node stream. * This is not its Token stream index. If there is no reverse map * from node to stream index or the map does not contain entries * for node's token type, a linear search of entire stream is used. * * * * Return -1 if exact node pointer not in stream. * */ public int getNodeIndex( object node ) { //System.out.println("get "+node); if ( tokenTypeToStreamIndexesMap == null ) { return getNodeIndexLinearly( node ); } int tokenType = adaptor.getType( node ); IList indexes; if ( !tokenTypeToStreamIndexesMap.TryGetValue( tokenType, out indexes ) || indexes == null ) { //System.out.println("found linearly; stream index = "+getNodeIndexLinearly(node)); return getNodeIndexLinearly( node ); } for ( int i = 0; i < indexes.size(); i++ ) { int streamIndex = indexes[i]; object n = get( streamIndex ); if ( n == node ) { //System.out.println("found in index; stream index = "+streamIndexI); return streamIndex; // found it! } } return -1; } #endif } }