diff options
Diffstat (limited to 'antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java')
-rw-r--r-- | antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java b/antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java new file mode 100644 index 0000000..3a8a976 --- /dev/null +++ b/antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java @@ -0,0 +1,382 @@ +/* + * [The "BSD license"] + * Copyright (c) 2010 Terence Parr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.analysis; + +import org.antlr.misc.IntSet; +import org.antlr.misc.IntervalSet; +import org.antlr.tool.Grammar; + +/** A state machine transition label. A label can be either a simple + * label such as a token or character. A label can be a set of char or + * tokens. It can be an epsilon transition. It can be a semantic predicate + * (which assumes an epsilon transition) or a tree of predicates (in a DFA). + * Special label types have to be < 0 to avoid conflict with char. + */ +public class Label implements Comparable, Cloneable { + public static final int INVALID = -7; + + public static final int ACTION = -6; + + public static final int EPSILON = -5; + + public static final String EPSILON_STR = "<EPSILON>"; + + /** label is a semantic predicate; implies label is epsilon also */ + public static final int SEMPRED = -4; + + /** label is a set of tokens or char */ + public static final int SET = -3; + + /** End of Token is like EOF for lexer rules. It implies that no more + * characters are available and that NFA conversion should terminate + * for this path. For example + * + * A : 'a' 'b' | 'a' ; + * + * yields a DFA predictor: + * + * o-a->o-b->1 predict alt 1 + * | + * |-EOT->o predict alt 2 + * + * To generate code for EOT, treat it as the "default" path, which + * implies there is no way to mismatch a char for the state from + * which the EOT emanates. + */ + public static final int EOT = -2; + + public static final int EOF = -1; + + /** We have labels like EPSILON that are below 0; it's hard to + * store them in an array with negative index so use this + * constant as an index shift when accessing arrays based upon + * token type. If real token type is i, then array index would be + * NUM_FAUX_LABELS + i. + */ + public static final int NUM_FAUX_LABELS = -INVALID; + + /** Anything at this value or larger can be considered a simple atom int + * for easy comparison during analysis only; faux labels are not used + * during parse time for real token types or char values. + */ + public static final int MIN_ATOM_VALUE = EOT; + + // TODO: is 0 a valid unicode char? max is FFFF -1, right? + public static final int MIN_CHAR_VALUE = '\u0000'; + public static final int MAX_CHAR_VALUE = '\uFFFF'; + + /** End of rule token type; imaginary token type used only for + * local, partial FOLLOW sets to indicate that the local FOLLOW + * hit the end of rule. During error recovery, the local FOLLOW + * of a token reference may go beyond the end of the rule and have + * to use FOLLOW(rule). I have to just shift the token types to 2..n + * rather than 1..n to accommodate this imaginary token in my bitsets. + * If I didn't use a bitset implementation for runtime sets, I wouldn't + * need this. EOF is another candidate for a run time token type for + * parsers. Follow sets are not computed for lexers so we do not have + * this issue. + */ + public static final int EOR_TOKEN_TYPE = + org.antlr.runtime.Token.EOR_TOKEN_TYPE; + + public static final int DOWN = org.antlr.runtime.Token.DOWN; + public static final int UP = org.antlr.runtime.Token.UP; + + /** tokens and char range overlap; tokens are MIN_TOKEN_TYPE..n */ + public static final int MIN_TOKEN_TYPE = + org.antlr.runtime.Token.MIN_TOKEN_TYPE; + + /** The wildcard '.' char atom implies all valid characters==UNICODE */ + //public static final IntSet ALLCHAR = IntervalSet.of(MIN_CHAR_VALUE,MAX_CHAR_VALUE); + + /** The token type or character value; or, signifies special label. */ + protected int label; + + /** A set of token types or character codes if label==SET */ + // TODO: try IntervalSet for everything + protected IntSet labelSet; + + public Label(int label) { + this.label = label; + } + + /** Make a set label */ + public Label(IntSet labelSet) { + if ( labelSet==null ) { + this.label = SET; + this.labelSet = IntervalSet.of(INVALID); + return; + } + int singleAtom = labelSet.getSingleElement(); + if ( singleAtom!=INVALID ) { + // convert back to a single atomic element if |labelSet|==1 + label = singleAtom; + return; + } + this.label = SET; + this.labelSet = labelSet; + } + + public Object clone() { + Label l; + try { + l = (Label)super.clone(); + l.label = this.label; + l.labelSet = new IntervalSet(); + l.labelSet.addAll(this.labelSet); + } + catch (CloneNotSupportedException e) { + throw new InternalError(); + } + return l; + } + + public void add(Label a) { + if ( isAtom() ) { + labelSet = IntervalSet.of(label); + label=SET; + if ( a.isAtom() ) { + labelSet.add(a.getAtom()); + } + else if ( a.isSet() ) { + labelSet.addAll(a.getSet()); + } + else { + throw new IllegalStateException("can't add element to Label of type "+label); + } + return; + } + if ( isSet() ) { + if ( a.isAtom() ) { + labelSet.add(a.getAtom()); + } + else if ( a.isSet() ) { + labelSet.addAll(a.getSet()); + } + else { + throw new IllegalStateException("can't add element to Label of type "+label); + } + return; + } + throw new IllegalStateException("can't add element to Label of type "+label); + } + + public boolean isAtom() { + return label>=MIN_ATOM_VALUE; + } + + public boolean isEpsilon() { + return label==EPSILON; + } + + public boolean isSemanticPredicate() { + return false; + } + + public boolean isAction() { + return false; + } + + public boolean isSet() { + return label==SET; + } + + /** return the single atom label or INVALID if not a single atom */ + public int getAtom() { + if ( isAtom() ) { + return label; + } + return INVALID; + } + + public IntSet getSet() { + if ( label!=SET ) { + // convert single element to a set if they ask for it. + return IntervalSet.of(label); + } + return labelSet; + } + + public void setSet(IntSet set) { + label=SET; + labelSet = set; + } + + public SemanticContext getSemanticContext() { + return null; + } + + public boolean matches(int atom) { + if ( label==atom ) { + return true; // handle the single atom case efficiently + } + if ( isSet() ) { + return labelSet.member(atom); + } + return false; + } + + public boolean matches(IntSet set) { + if ( isAtom() ) { + return set.member(getAtom()); + } + if ( isSet() ) { + // matches if intersection non-nil + return !getSet().and(set).isNil(); + } + return false; + } + + + public boolean matches(Label other) { + if ( other.isSet() ) { + return matches(other.getSet()); + } + if ( other.isAtom() ) { + return matches(other.getAtom()); + } + return false; + } + + public int hashCode() { + if (label==SET) { + return labelSet.hashCode(); + } + else { + return label; + } + } + + // TODO: do we care about comparing set {A} with atom A? Doesn't now. + public boolean equals(Object o) { + if ( o==null ) { + return false; + } + if ( this == o ) { + return true; // equals if same object + } + // labels must be the same even if epsilon or set or sempred etc... + if ( label!=((Label)o).label ) { + return false; + } + if ( label==SET ) { + return this.labelSet.equals(((Label)o).labelSet); + } + return true; // label values are same, so true + } + + public int compareTo(Object o) { + return this.label-((Label)o).label; + } + + /** Predicates are lists of AST nodes from the NFA created from the + * grammar, but the same predicate could be cut/paste into multiple + * places in the grammar. I must compare the text of all the + * predicates to truly answer whether {p1,p2} .equals {p1,p2}. + * Unfortunately, I cannot rely on the AST.equals() to work properly + * so I must do a brute force O(n^2) nested traversal of the Set + * doing a String compare. + * + * At this point, Labels are not compared for equals when they are + * predicates, but here's the code for future use. + */ + /* + protected boolean predicatesEquals(Set others) { + Iterator iter = semanticContext.iterator(); + while (iter.hasNext()) { + AST predAST = (AST) iter.next(); + Iterator inner = semanticContext.iterator(); + while (inner.hasNext()) { + AST otherPredAST = (AST) inner.next(); + if ( !predAST.getText().equals(otherPredAST.getText()) ) { + return false; + } + } + } + return true; + } + */ + + public String toString() { + switch (label) { + case SET : + return labelSet.toString(); + default : + return String.valueOf(label); + } + } + + public String toString(Grammar g) { + switch (label) { + case SET : + return labelSet.toString(g); + default : + return g.getTokenDisplayName(label); + } + } + + /* + public String predicatesToString() { + if ( semanticContext==NFAConfiguration.DEFAULT_CLAUSE_SEMANTIC_CONTEXT ) { + return "!other preds"; + } + StringBuffer buf = new StringBuffer(); + Iterator iter = semanticContext.iterator(); + while (iter.hasNext()) { + AST predAST = (AST) iter.next(); + buf.append(predAST.getText()); + if ( iter.hasNext() ) { + buf.append("&"); + } + } + return buf.toString(); + } + */ + + public static boolean intersect(Label label, Label edgeLabel) { + boolean hasIntersection = false; + boolean labelIsSet = label.isSet(); + boolean edgeIsSet = edgeLabel.isSet(); + if ( !labelIsSet && !edgeIsSet && edgeLabel.label==label.label ) { + hasIntersection = true; + } + else if ( labelIsSet && edgeIsSet && + !edgeLabel.getSet().and(label.getSet()).isNil() ) { + hasIntersection = true; + } + else if ( labelIsSet && !edgeIsSet && + label.getSet().member(edgeLabel.label) ) { + hasIntersection = true; + } + else if ( !labelIsSet && edgeIsSet && + edgeLabel.getSet().member(label.label) ) { + hasIntersection = true; + } + return hasIntersection; + } +} |