aboutsummaryrefslogtreecommitdiff
path: root/antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java
diff options
context:
space:
mode:
Diffstat (limited to 'antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java')
-rw-r--r--antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java382
1 files changed, 382 insertions, 0 deletions
diff --git a/antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java b/antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java
new file mode 100644
index 0000000..3a8a976
--- /dev/null
+++ b/antlr-3.4/tool/src/main/java/org/antlr/analysis/Label.java
@@ -0,0 +1,382 @@
+/*
+ * [The "BSD license"]
+ * Copyright (c) 2010 Terence Parr
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.antlr.analysis;
+
+import org.antlr.misc.IntSet;
+import org.antlr.misc.IntervalSet;
+import org.antlr.tool.Grammar;
+
+/** A state machine transition label. A label can be either a simple
+ * label such as a token or character. A label can be a set of char or
+ * tokens. It can be an epsilon transition. It can be a semantic predicate
+ * (which assumes an epsilon transition) or a tree of predicates (in a DFA).
+ * Special label types have to be < 0 to avoid conflict with char.
+ */
+public class Label implements Comparable, Cloneable {
+ public static final int INVALID = -7;
+
+ public static final int ACTION = -6;
+
+ public static final int EPSILON = -5;
+
+ public static final String EPSILON_STR = "<EPSILON>";
+
+ /** label is a semantic predicate; implies label is epsilon also */
+ public static final int SEMPRED = -4;
+
+ /** label is a set of tokens or char */
+ public static final int SET = -3;
+
+ /** End of Token is like EOF for lexer rules. It implies that no more
+ * characters are available and that NFA conversion should terminate
+ * for this path. For example
+ *
+ * A : 'a' 'b' | 'a' ;
+ *
+ * yields a DFA predictor:
+ *
+ * o-a->o-b->1 predict alt 1
+ * |
+ * |-EOT->o predict alt 2
+ *
+ * To generate code for EOT, treat it as the "default" path, which
+ * implies there is no way to mismatch a char for the state from
+ * which the EOT emanates.
+ */
+ public static final int EOT = -2;
+
+ public static final int EOF = -1;
+
+ /** We have labels like EPSILON that are below 0; it's hard to
+ * store them in an array with negative index so use this
+ * constant as an index shift when accessing arrays based upon
+ * token type. If real token type is i, then array index would be
+ * NUM_FAUX_LABELS + i.
+ */
+ public static final int NUM_FAUX_LABELS = -INVALID;
+
+ /** Anything at this value or larger can be considered a simple atom int
+ * for easy comparison during analysis only; faux labels are not used
+ * during parse time for real token types or char values.
+ */
+ public static final int MIN_ATOM_VALUE = EOT;
+
+ // TODO: is 0 a valid unicode char? max is FFFF -1, right?
+ public static final int MIN_CHAR_VALUE = '\u0000';
+ public static final int MAX_CHAR_VALUE = '\uFFFF';
+
+ /** End of rule token type; imaginary token type used only for
+ * local, partial FOLLOW sets to indicate that the local FOLLOW
+ * hit the end of rule. During error recovery, the local FOLLOW
+ * of a token reference may go beyond the end of the rule and have
+ * to use FOLLOW(rule). I have to just shift the token types to 2..n
+ * rather than 1..n to accommodate this imaginary token in my bitsets.
+ * If I didn't use a bitset implementation for runtime sets, I wouldn't
+ * need this. EOF is another candidate for a run time token type for
+ * parsers. Follow sets are not computed for lexers so we do not have
+ * this issue.
+ */
+ public static final int EOR_TOKEN_TYPE =
+ org.antlr.runtime.Token.EOR_TOKEN_TYPE;
+
+ public static final int DOWN = org.antlr.runtime.Token.DOWN;
+ public static final int UP = org.antlr.runtime.Token.UP;
+
+ /** tokens and char range overlap; tokens are MIN_TOKEN_TYPE..n */
+ public static final int MIN_TOKEN_TYPE =
+ org.antlr.runtime.Token.MIN_TOKEN_TYPE;
+
+ /** The wildcard '.' char atom implies all valid characters==UNICODE */
+ //public static final IntSet ALLCHAR = IntervalSet.of(MIN_CHAR_VALUE,MAX_CHAR_VALUE);
+
+ /** The token type or character value; or, signifies special label. */
+ protected int label;
+
+ /** A set of token types or character codes if label==SET */
+ // TODO: try IntervalSet for everything
+ protected IntSet labelSet;
+
+ public Label(int label) {
+ this.label = label;
+ }
+
+ /** Make a set label */
+ public Label(IntSet labelSet) {
+ if ( labelSet==null ) {
+ this.label = SET;
+ this.labelSet = IntervalSet.of(INVALID);
+ return;
+ }
+ int singleAtom = labelSet.getSingleElement();
+ if ( singleAtom!=INVALID ) {
+ // convert back to a single atomic element if |labelSet|==1
+ label = singleAtom;
+ return;
+ }
+ this.label = SET;
+ this.labelSet = labelSet;
+ }
+
+ public Object clone() {
+ Label l;
+ try {
+ l = (Label)super.clone();
+ l.label = this.label;
+ l.labelSet = new IntervalSet();
+ l.labelSet.addAll(this.labelSet);
+ }
+ catch (CloneNotSupportedException e) {
+ throw new InternalError();
+ }
+ return l;
+ }
+
+ public void add(Label a) {
+ if ( isAtom() ) {
+ labelSet = IntervalSet.of(label);
+ label=SET;
+ if ( a.isAtom() ) {
+ labelSet.add(a.getAtom());
+ }
+ else if ( a.isSet() ) {
+ labelSet.addAll(a.getSet());
+ }
+ else {
+ throw new IllegalStateException("can't add element to Label of type "+label);
+ }
+ return;
+ }
+ if ( isSet() ) {
+ if ( a.isAtom() ) {
+ labelSet.add(a.getAtom());
+ }
+ else if ( a.isSet() ) {
+ labelSet.addAll(a.getSet());
+ }
+ else {
+ throw new IllegalStateException("can't add element to Label of type "+label);
+ }
+ return;
+ }
+ throw new IllegalStateException("can't add element to Label of type "+label);
+ }
+
+ public boolean isAtom() {
+ return label>=MIN_ATOM_VALUE;
+ }
+
+ public boolean isEpsilon() {
+ return label==EPSILON;
+ }
+
+ public boolean isSemanticPredicate() {
+ return false;
+ }
+
+ public boolean isAction() {
+ return false;
+ }
+
+ public boolean isSet() {
+ return label==SET;
+ }
+
+ /** return the single atom label or INVALID if not a single atom */
+ public int getAtom() {
+ if ( isAtom() ) {
+ return label;
+ }
+ return INVALID;
+ }
+
+ public IntSet getSet() {
+ if ( label!=SET ) {
+ // convert single element to a set if they ask for it.
+ return IntervalSet.of(label);
+ }
+ return labelSet;
+ }
+
+ public void setSet(IntSet set) {
+ label=SET;
+ labelSet = set;
+ }
+
+ public SemanticContext getSemanticContext() {
+ return null;
+ }
+
+ public boolean matches(int atom) {
+ if ( label==atom ) {
+ return true; // handle the single atom case efficiently
+ }
+ if ( isSet() ) {
+ return labelSet.member(atom);
+ }
+ return false;
+ }
+
+ public boolean matches(IntSet set) {
+ if ( isAtom() ) {
+ return set.member(getAtom());
+ }
+ if ( isSet() ) {
+ // matches if intersection non-nil
+ return !getSet().and(set).isNil();
+ }
+ return false;
+ }
+
+
+ public boolean matches(Label other) {
+ if ( other.isSet() ) {
+ return matches(other.getSet());
+ }
+ if ( other.isAtom() ) {
+ return matches(other.getAtom());
+ }
+ return false;
+ }
+
+ public int hashCode() {
+ if (label==SET) {
+ return labelSet.hashCode();
+ }
+ else {
+ return label;
+ }
+ }
+
+ // TODO: do we care about comparing set {A} with atom A? Doesn't now.
+ public boolean equals(Object o) {
+ if ( o==null ) {
+ return false;
+ }
+ if ( this == o ) {
+ return true; // equals if same object
+ }
+ // labels must be the same even if epsilon or set or sempred etc...
+ if ( label!=((Label)o).label ) {
+ return false;
+ }
+ if ( label==SET ) {
+ return this.labelSet.equals(((Label)o).labelSet);
+ }
+ return true; // label values are same, so true
+ }
+
+ public int compareTo(Object o) {
+ return this.label-((Label)o).label;
+ }
+
+ /** Predicates are lists of AST nodes from the NFA created from the
+ * grammar, but the same predicate could be cut/paste into multiple
+ * places in the grammar. I must compare the text of all the
+ * predicates to truly answer whether {p1,p2} .equals {p1,p2}.
+ * Unfortunately, I cannot rely on the AST.equals() to work properly
+ * so I must do a brute force O(n^2) nested traversal of the Set
+ * doing a String compare.
+ *
+ * At this point, Labels are not compared for equals when they are
+ * predicates, but here's the code for future use.
+ */
+ /*
+ protected boolean predicatesEquals(Set others) {
+ Iterator iter = semanticContext.iterator();
+ while (iter.hasNext()) {
+ AST predAST = (AST) iter.next();
+ Iterator inner = semanticContext.iterator();
+ while (inner.hasNext()) {
+ AST otherPredAST = (AST) inner.next();
+ if ( !predAST.getText().equals(otherPredAST.getText()) ) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ */
+
+ public String toString() {
+ switch (label) {
+ case SET :
+ return labelSet.toString();
+ default :
+ return String.valueOf(label);
+ }
+ }
+
+ public String toString(Grammar g) {
+ switch (label) {
+ case SET :
+ return labelSet.toString(g);
+ default :
+ return g.getTokenDisplayName(label);
+ }
+ }
+
+ /*
+ public String predicatesToString() {
+ if ( semanticContext==NFAConfiguration.DEFAULT_CLAUSE_SEMANTIC_CONTEXT ) {
+ return "!other preds";
+ }
+ StringBuffer buf = new StringBuffer();
+ Iterator iter = semanticContext.iterator();
+ while (iter.hasNext()) {
+ AST predAST = (AST) iter.next();
+ buf.append(predAST.getText());
+ if ( iter.hasNext() ) {
+ buf.append("&");
+ }
+ }
+ return buf.toString();
+ }
+ */
+
+ public static boolean intersect(Label label, Label edgeLabel) {
+ boolean hasIntersection = false;
+ boolean labelIsSet = label.isSet();
+ boolean edgeIsSet = edgeLabel.isSet();
+ if ( !labelIsSet && !edgeIsSet && edgeLabel.label==label.label ) {
+ hasIntersection = true;
+ }
+ else if ( labelIsSet && edgeIsSet &&
+ !edgeLabel.getSet().and(label.getSet()).isNil() ) {
+ hasIntersection = true;
+ }
+ else if ( labelIsSet && !edgeIsSet &&
+ label.getSet().member(edgeLabel.label) ) {
+ hasIntersection = true;
+ }
+ else if ( !labelIsSet && edgeIsSet &&
+ edgeLabel.getSet().member(label.label) ) {
+ hasIntersection = true;
+ }
+ return hasIntersection;
+ }
+}