package org.unicode.cldr.draft; import java.text.ParsePosition; import java.util.Arrays; import java.util.List; import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; public class StateMachine { static boolean SHOW_STATE_TRANSITIONS = false; // Utility.getProperty("transitions", false); private static final short START = 0; static final short EXIT = -1; static final short POP = -2; static final short ERROR = -3; static final short UNDEFINED = -4; private final UnicodeMap[] stateToData; private final StateObjectBuilderFactory factory; private String[] stateNames; private String[] actionNames; StateMachine(List stateToData, StateObjectBuilderFactory factory, List stateNames, List actionNames) { this.stateToData = stateToData.toArray(new UnicodeMap[stateToData.size()]); this.stateNames = stateNames.toArray(new String[stateNames.size()]); this.actionNames = actionNames.toArray(new String[actionNames.size()]); this.factory = factory; } /** * Immutable internal object that contains the row of a state machine */ public static class StateAction { boolean advanceToNextCodePoint = false; short nextState = StateMachine.UNDEFINED; short pushState = StateMachine.UNDEFINED; short action = -1; public boolean equals(Object other) { StateAction that = (StateAction) other; return advanceToNextCodePoint == that.advanceToNextCodePoint && nextState == that.nextState && pushState == that.pushState && action == that.action; } } public interface StateObjectBuilderFactory { public StateObjectBuilder getInstance(); } public static class StateObjectBuilder { protected CharSequence string; private StateMachine stateMachine; private short[] stateStack = new short[100]; private int stackSize = 0; private final void push(short state) { stateStack[stackSize++] = state; } private final short pop() { return stateStack[--stackSize]; } protected void init(CharSequence string, StateMachine stateMachine, int start) { this.string = string; this.stateMachine = stateMachine; } protected T getResult() { return null; } protected String getActionName(short action) { return stateMachine.getActionName(action); } protected void handle(int position, StateAction action) { } public String toString() { StringBuilder result = new StringBuilder("["); for (int i = stackSize - 1; i >= 0; --i) { if (i != stackSize - 1) { result.append(", "); } result.append(stateMachine.getStateName(stateStack[i])); } return result.append("]").toString(); } } public String toString(StateAction action) { return "{" + (action.advanceToNextCodePoint ? "+" : "") + getStateName(action.nextState) + (action.pushState == StateMachine.UNDEFINED ? "" : " ^" + getStateName(action.pushState)) + (action.action < 0 ? "" : " " + getActionName(action.action)) + "}"; } private String getStateName(short nextState) { switch (nextState) { case POP: return "pop"; case EXIT: return "exit"; case ERROR: return "errorDeath"; default: return (stateNames == null ? String.valueOf(nextState) : stateNames[nextState]); } } private String getActionName(short action) { return (actionNames == null ? String.valueOf(action) : actionNames[action]); } public List getActionNames() { return Arrays.asList(actionNames); } public String toString() { StringBuffer output = new StringBuffer(); int i = 0; for (UnicodeMap unicodeMap : stateToData) { String stateName = stateNames == null ? String.valueOf(i) : stateNames[i]; output.append(stateName).append(":\n"); if (unicodeMap == null) { output.append("\tnull\n"); } else { for (Object action : unicodeMap.getAvailableValues()) { UnicodeSet sources = unicodeMap.keySet(action); output.append("\t" + sources.toPattern(false) + "\t" + toString((StateAction) action) + "\n"); } } i++; } return output.toString(); } public T parse(CharSequence string, ParsePosition parsePosition) { int i = parsePosition.getIndex(); if (i < 0 || i >= string.length()) { throw new StringIndexOutOfBoundsException(i); } int cp; short state = START; StateObjectBuilder stateObject = factory.getInstance(); stateObject.init(string, this, i); cp = Character.codePointAt(string, 0); if (SHOW_STATE_TRANSITIONS) { System.out.println("@Fetched: " + UTF16.valueOf(cp)); } while (true) { StateAction action = (StateAction) stateToData[state].getValue(cp); if (action.pushState >= 0) { stateObject.push(action.pushState); if (SHOW_STATE_TRANSITIONS) { System.out.println("\t@Pushed " + stateObject); } } if (action.action >= 0) { stateObject.handle(i, action); } switch (state = action.nextState) { default: if (SHOW_STATE_TRANSITIONS) { System.out.println("\t@NextState " + getStateName(state)); } break; case POP: if (SHOW_STATE_TRANSITIONS) { System.out.println("\t@Popping " + stateObject); } state = stateObject.pop(); break; case EXIT: parsePosition.setIndex(i); return stateObject.getResult(); case ERROR: parsePosition.setErrorIndex(i); throw new IllegalArgumentException(getActionName(action.action)); } if (action.advanceToNextCodePoint) { i += UTF16.getCharCount(cp); cp = i < string.length() ? Character.codePointAt(string, i) : 0xFFFF; if (SHOW_STATE_TRANSITIONS) { System.out.println("@Fetched: " + UTF16.valueOf(cp)); } } } } }