1 files changed, 400 insertions, 0 deletions
diff --git a/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java b/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java
new file mode 100644
index 0000000..c3e463b
--- /dev/null
+++ b/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java
@@ -0,0 +1,400 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.common.jimfs;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.regex.PatternSyntaxException;
+
+/**
+ * Translates globs to regex patterns.
+ *
+ * @author Colin Decker
+ */
+final class GlobToRegex {
+
+  /**
+   * Converts the given glob to a regular expression pattern. The given separators determine what
+   * characters the resulting expression breaks on for glob expressions such as * which should not
+   * cross directory boundaries.
+   *
+   * <p>Basic conversions (assuming / as only separator):
+   *
+   * <pre>{@code
+   * ?        = [^/]
+   * *        = [^/]*
+   * **       = .*
+   * [a-z]    = [[^/]&&[a-z]]
+   * [!a-z]   = [[^/]&&[^a-z]]
+   * {a,b,c}  = (a|b|c)
+   * }</pre>
+   */
+  public static String toRegex(String glob, String separators) {
+    return new GlobToRegex(glob, separators).convert();
+  }
+
+  private static final InternalCharMatcher REGEX_RESERVED =
+      InternalCharMatcher.anyOf("^$.?+*\\[]{}()");
+
+  private final String glob;
+  private final String separators;
+  private final InternalCharMatcher separatorMatcher;
+
+  private final StringBuilder builder = new StringBuilder();
+  private final Deque<State> states = new ArrayDeque<>();
+  private int index;
+
+  private GlobToRegex(String glob, String separators) {
+    this.glob = checkNotNull(glob);
+    this.separators = separators;
+    this.separatorMatcher = InternalCharMatcher.anyOf(separators);
+  }
+
+  /**
+   * Converts the glob to a regex one character at a time. A state stack (states) is maintained,
+   * with the state at the top of the stack being the current state at any given time. The current
+   * state is always used to process the next character. When a state processes a character, it may
+   * pop the current state or push a new state as the current state. The resulting regex is written
+   * to {@code builder}.
+   */
+  private String convert() {
+    pushState(NORMAL);
+    for (index = 0; index < glob.length(); index++) {
+      currentState().process(this, glob.charAt(index));
+    }
+    currentState().finish(this);
+    return builder.toString();
+  }
+
+  /** Enters the given state. The current state becomes the previous state. */
+  private void pushState(State state) {
+    states.push(state);
+  }
+
+  /** Returns to the previous state. */
+  private void popState() {
+    states.pop();
+  }
+
+  /** Returns the current state. */
+  private State currentState() {
+    return states.peek();
+  }
+
+  /** Throws a {@link PatternSyntaxException}. */
+  private PatternSyntaxException syntaxError(String desc) {
+    throw new PatternSyntaxException(desc, glob, index);
+  }
+
+  /** Appends the given character as-is to the regex. */
+  private void appendExact(char c) {
+    builder.append(c);
+  }
+
+  /** Appends the regex form of the given normal character or separator from the glob. */
+  private void append(char c) {
+    if (separatorMatcher.matches(c)) {
+      appendSeparator();
+    } else {
+      appendNormal(c);
+    }
+  }
+
+  /** Appends the regex form of the given normal character from the glob. */
+  private void appendNormal(char c) {
+    if (REGEX_RESERVED.matches(c)) {
+      builder.append('\\');
+    }
+    builder.append(c);
+  }
+
+  /** Appends the regex form matching the separators for the path type. */
+  private void appendSeparator() {
+    if (separators.length() == 1) {
+      appendNormal(separators.charAt(0));
+    } else {
+      builder.append('[');
+      for (int i = 0; i < separators.length(); i++) {
+        appendInBracket(separators.charAt(i));
+      }
+      builder.append("]");
+    }
+  }
+
+  /** Appends the regex form that matches anything except the separators for the path type. */
+  private void appendNonSeparator() {
+    builder.append("[^");
+    for (int i = 0; i < separators.length(); i++) {
+      appendInBracket(separators.charAt(i));
+    }
+    builder.append(']');
+  }
+
+  /** Appends the regex form of the glob ? character. */
+  private void appendQuestionMark() {
+    appendNonSeparator();
+  }
+
+  /** Appends the regex form of the glob * character. */
+  private void appendStar() {
+    appendNonSeparator();
+    builder.append('*');
+  }
+
+  /** Appends the regex form of the glob ** pattern. */
+  private void appendStarStar() {
+    builder.append(".*");
+  }
+
+  /** Appends the regex form of the start of a glob [] section. */
+  private void appendBracketStart() {
+    builder.append('[');
+    appendNonSeparator();
+    builder.append("&&[");
+  }
+
+  /** Appends the regex form of the end of a glob [] section. */
+  private void appendBracketEnd() {
+    builder.append("]]");
+  }
+
+  /** Appends the regex form of the given character within a glob [] section. */
+  private void appendInBracket(char c) {
+    // escape \ in regex character class
+    if (c == '\\') {
+      builder.append('\\');
+    }
+
+    builder.append(c);
+  }
+
+  /** Appends the regex form of the start of a glob {} section. */
+  private void appendCurlyBraceStart() {
+    builder.append('(');
+  }
+
+  /** Appends the regex form of the separator (,) within a glob {} section. */
+  private void appendSubpatternSeparator() {
+    builder.append('|');
+  }
+
+  /** Appends the regex form of the end of a glob {} section. */
+  private void appendCurlyBraceEnd() {
+    builder.append(')');
+  }
+
+  /** Converter state. */
+  private abstract static class State {
+    /**
+     * Process the next character with the current state, transitioning the converter to a new state
+     * if necessary.
+     */
+    abstract void process(GlobToRegex converter, char c);
+
+    /** Called after all characters have been read. */
+    void finish(GlobToRegex converter) {}
+  }
+
+  /** Normal state. */
+  private static final State NORMAL =
+      new State() {
+        @Override
+        void process(GlobToRegex converter, char c) {
+          switch (c) {
+            case '?':
+              converter.appendQuestionMark();
+              return;
+            case '[':
+              converter.appendBracketStart();
+              converter.pushState(BRACKET_FIRST_CHAR);
+              return;
+            case '{':
+              converter.appendCurlyBraceStart();
+              converter.pushState(CURLY_BRACE);
+              return;
+            case '*':
+              converter.pushState(STAR);
+              return;
+            case '\\':
+              converter.pushState(ESCAPE);
+              return;
+            default:
+              converter.append(c);
+          }
+        }
+
+        @Override
+        public String toString() {
+          return "NORMAL";
+        }
+      };
+
+  /** State following the reading of a single \. */
+  private static final State ESCAPE =
+      new State() {
+        @Override
+        void process(GlobToRegex converter, char c) {
+          converter.append(c);
+          converter.popState();
+        }
+
+        @Override
+        void finish(GlobToRegex converter) {
+          throw converter.syntaxError("Hanging escape (\\) at end of pattern");
+        }
+
+        @Override
+        public String toString() {
+          return "ESCAPE";
+        }
+      };
+
+  /** State following the reading of a single *. */
+  private static final State STAR =
+      new State() {
+        @Override
+        void process(GlobToRegex converter, char c) {
+          if (c == '*') {
+            converter.appendStarStar();
+            converter.popState();
+          } else {
+            converter.appendStar();
+            converter.popState();
+            converter.currentState().process(converter, c);
+          }
+        }
+
+        @Override
+        void finish(GlobToRegex converter) {
+          converter.appendStar();
+        }
+
+        @Override
+        public String toString() {
+          return "STAR";
+        }
+      };
+
+  /** State immediately following the reading of a [. */
+  private static final State BRACKET_FIRST_CHAR =
+      new State() {
+        @Override
+        void process(GlobToRegex converter, char c) {
+          if (c == ']') {
+            // A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for
+            // example) but doesn't work for the default java.nio.file implementations. In the cases
+            // of "[]]" it produces:
+            // java.util.regex.PatternSyntaxException: Unclosed character class near index 13
+            // ^[[^/]&&[]]\]$
+            //              ^
+            // The error here is slightly different, but trying to make this work would require some
+            // kind of lookahead and break the simplicity of char-by-char conversion here. Also, if
+            // someone wants to include a ']' inside a character class, they should escape it.
+            throw converter.syntaxError("Empty []");
+          }
+          if (c == '!') {
+            converter.appendExact('^');
+          } else if (c == '-') {
+            converter.appendExact(c);
+          } else {
+            converter.appendInBracket(c);
+          }
+          converter.popState();
+          converter.pushState(BRACKET);
+        }
+
+        @Override
+        void finish(GlobToRegex converter) {
+          throw converter.syntaxError("Unclosed [");
+        }
+
+        @Override
+        public String toString() {
+          return "BRACKET_FIRST_CHAR";
+        }
+      };
+
+  /** State inside [brackets], but not at the first character inside the brackets. */
+  private static final State BRACKET =
+      new State() {
+        @Override
+        void process(GlobToRegex converter, char c) {
+          if (c == ']') {
+            converter.appendBracketEnd();
+            converter.popState();
+          } else {
+            converter.appendInBracket(c);
+          }
+        }
+
+        @Override
+        void finish(GlobToRegex converter) {
+          throw converter.syntaxError("Unclosed [");
+        }
+
+        @Override
+        public String toString() {
+          return "BRACKET";
+        }
+      };
+
+  /** State inside {curly braces}. */
+  private static final State CURLY_BRACE =
+      new State() {
+        @Override
+        void process(GlobToRegex converter, char c) {
+          switch (c) {
+            case '?':
+              converter.appendQuestionMark();
+              break;
+            case '[':
+              converter.appendBracketStart();
+              converter.pushState(BRACKET_FIRST_CHAR);
+              break;
+            case '{':
+              throw converter.syntaxError("{ not allowed in subpattern group");
+            case '*':
+              converter.pushState(STAR);
+              break;
+            case '\\':
+              converter.pushState(ESCAPE);
+              break;
+            case '}':
+              converter.appendCurlyBraceEnd();
+              converter.popState();
+              break;
+            case ',':
+              converter.appendSubpatternSeparator();
+              break;
+            default:
+              converter.append(c);
+          }
+        }
+
+        @Override
+        void finish(GlobToRegex converter) {
+          throw converter.syntaxError("Unclosed {");
+        }
+
+        @Override
+        public String toString() {
+          return "CURLY_BRACE";
+        }
+      };
+}