aboutsummaryrefslogtreecommitdiff
path: root/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java
diff options
context:
space:
mode:
Diffstat (limited to 'jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java')
-rw-r--r--jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java400
1 files changed, 400 insertions, 0 deletions
diff --git a/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java b/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java
new file mode 100644
index 0000000..c3e463b
--- /dev/null
+++ b/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java
@@ -0,0 +1,400 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.common.jimfs;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.regex.PatternSyntaxException;
+
+/**
+ * Translates globs to regex patterns.
+ *
+ * @author Colin Decker
+ */
+final class GlobToRegex {
+
+ /**
+ * Converts the given glob to a regular expression pattern. The given separators determine what
+ * characters the resulting expression breaks on for glob expressions such as * which should not
+ * cross directory boundaries.
+ *
+ * <p>Basic conversions (assuming / as only separator):
+ *
+ * <pre>{@code
+ * ? = [^/]
+ * * = [^/]*
+ * ** = .*
+ * [a-z] = [[^/]&&[a-z]]
+ * [!a-z] = [[^/]&&[^a-z]]
+ * {a,b,c} = (a|b|c)
+ * }</pre>
+ */
+ public static String toRegex(String glob, String separators) {
+ return new GlobToRegex(glob, separators).convert();
+ }
+
+ private static final InternalCharMatcher REGEX_RESERVED =
+ InternalCharMatcher.anyOf("^$.?+*\\[]{}()");
+
+ private final String glob;
+ private final String separators;
+ private final InternalCharMatcher separatorMatcher;
+
+ private final StringBuilder builder = new StringBuilder();
+ private final Deque<State> states = new ArrayDeque<>();
+ private int index;
+
+ private GlobToRegex(String glob, String separators) {
+ this.glob = checkNotNull(glob);
+ this.separators = separators;
+ this.separatorMatcher = InternalCharMatcher.anyOf(separators);
+ }
+
+ /**
+ * Converts the glob to a regex one character at a time. A state stack (states) is maintained,
+ * with the state at the top of the stack being the current state at any given time. The current
+ * state is always used to process the next character. When a state processes a character, it may
+ * pop the current state or push a new state as the current state. The resulting regex is written
+ * to {@code builder}.
+ */
+ private String convert() {
+ pushState(NORMAL);
+ for (index = 0; index < glob.length(); index++) {
+ currentState().process(this, glob.charAt(index));
+ }
+ currentState().finish(this);
+ return builder.toString();
+ }
+
+ /** Enters the given state. The current state becomes the previous state. */
+ private void pushState(State state) {
+ states.push(state);
+ }
+
+ /** Returns to the previous state. */
+ private void popState() {
+ states.pop();
+ }
+
+ /** Returns the current state. */
+ private State currentState() {
+ return states.peek();
+ }
+
+ /** Throws a {@link PatternSyntaxException}. */
+ private PatternSyntaxException syntaxError(String desc) {
+ throw new PatternSyntaxException(desc, glob, index);
+ }
+
+ /** Appends the given character as-is to the regex. */
+ private void appendExact(char c) {
+ builder.append(c);
+ }
+
+ /** Appends the regex form of the given normal character or separator from the glob. */
+ private void append(char c) {
+ if (separatorMatcher.matches(c)) {
+ appendSeparator();
+ } else {
+ appendNormal(c);
+ }
+ }
+
+ /** Appends the regex form of the given normal character from the glob. */
+ private void appendNormal(char c) {
+ if (REGEX_RESERVED.matches(c)) {
+ builder.append('\\');
+ }
+ builder.append(c);
+ }
+
+ /** Appends the regex form matching the separators for the path type. */
+ private void appendSeparator() {
+ if (separators.length() == 1) {
+ appendNormal(separators.charAt(0));
+ } else {
+ builder.append('[');
+ for (int i = 0; i < separators.length(); i++) {
+ appendInBracket(separators.charAt(i));
+ }
+ builder.append("]");
+ }
+ }
+
+ /** Appends the regex form that matches anything except the separators for the path type. */
+ private void appendNonSeparator() {
+ builder.append("[^");
+ for (int i = 0; i < separators.length(); i++) {
+ appendInBracket(separators.charAt(i));
+ }
+ builder.append(']');
+ }
+
+ /** Appends the regex form of the glob ? character. */
+ private void appendQuestionMark() {
+ appendNonSeparator();
+ }
+
+ /** Appends the regex form of the glob * character. */
+ private void appendStar() {
+ appendNonSeparator();
+ builder.append('*');
+ }
+
+ /** Appends the regex form of the glob ** pattern. */
+ private void appendStarStar() {
+ builder.append(".*");
+ }
+
+ /** Appends the regex form of the start of a glob [] section. */
+ private void appendBracketStart() {
+ builder.append('[');
+ appendNonSeparator();
+ builder.append("&&[");
+ }
+
+ /** Appends the regex form of the end of a glob [] section. */
+ private void appendBracketEnd() {
+ builder.append("]]");
+ }
+
+ /** Appends the regex form of the given character within a glob [] section. */
+ private void appendInBracket(char c) {
+ // escape \ in regex character class
+ if (c == '\\') {
+ builder.append('\\');
+ }
+
+ builder.append(c);
+ }
+
+ /** Appends the regex form of the start of a glob {} section. */
+ private void appendCurlyBraceStart() {
+ builder.append('(');
+ }
+
+ /** Appends the regex form of the separator (,) within a glob {} section. */
+ private void appendSubpatternSeparator() {
+ builder.append('|');
+ }
+
+ /** Appends the regex form of the end of a glob {} section. */
+ private void appendCurlyBraceEnd() {
+ builder.append(')');
+ }
+
+ /** Converter state. */
+ private abstract static class State {
+ /**
+ * Process the next character with the current state, transitioning the converter to a new state
+ * if necessary.
+ */
+ abstract void process(GlobToRegex converter, char c);
+
+ /** Called after all characters have been read. */
+ void finish(GlobToRegex converter) {}
+ }
+
+ /** Normal state. */
+ private static final State NORMAL =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ switch (c) {
+ case '?':
+ converter.appendQuestionMark();
+ return;
+ case '[':
+ converter.appendBracketStart();
+ converter.pushState(BRACKET_FIRST_CHAR);
+ return;
+ case '{':
+ converter.appendCurlyBraceStart();
+ converter.pushState(CURLY_BRACE);
+ return;
+ case '*':
+ converter.pushState(STAR);
+ return;
+ case '\\':
+ converter.pushState(ESCAPE);
+ return;
+ default:
+ converter.append(c);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "NORMAL";
+ }
+ };
+
+ /** State following the reading of a single \. */
+ private static final State ESCAPE =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ converter.append(c);
+ converter.popState();
+ }
+
+ @Override
+ void finish(GlobToRegex converter) {
+ throw converter.syntaxError("Hanging escape (\\) at end of pattern");
+ }
+
+ @Override
+ public String toString() {
+ return "ESCAPE";
+ }
+ };
+
+ /** State following the reading of a single *. */
+ private static final State STAR =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ if (c == '*') {
+ converter.appendStarStar();
+ converter.popState();
+ } else {
+ converter.appendStar();
+ converter.popState();
+ converter.currentState().process(converter, c);
+ }
+ }
+
+ @Override
+ void finish(GlobToRegex converter) {
+ converter.appendStar();
+ }
+
+ @Override
+ public String toString() {
+ return "STAR";
+ }
+ };
+
+ /** State immediately following the reading of a [. */
+ private static final State BRACKET_FIRST_CHAR =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ if (c == ']') {
+ // A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for
+ // example) but doesn't work for the default java.nio.file implementations. In the cases
+ // of "[]]" it produces:
+ // java.util.regex.PatternSyntaxException: Unclosed character class near index 13
+ // ^[[^/]&&[]]\]$
+ // ^
+ // The error here is slightly different, but trying to make this work would require some
+ // kind of lookahead and break the simplicity of char-by-char conversion here. Also, if
+ // someone wants to include a ']' inside a character class, they should escape it.
+ throw converter.syntaxError("Empty []");
+ }
+ if (c == '!') {
+ converter.appendExact('^');
+ } else if (c == '-') {
+ converter.appendExact(c);
+ } else {
+ converter.appendInBracket(c);
+ }
+ converter.popState();
+ converter.pushState(BRACKET);
+ }
+
+ @Override
+ void finish(GlobToRegex converter) {
+ throw converter.syntaxError("Unclosed [");
+ }
+
+ @Override
+ public String toString() {
+ return "BRACKET_FIRST_CHAR";
+ }
+ };
+
+ /** State inside [brackets], but not at the first character inside the brackets. */
+ private static final State BRACKET =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ if (c == ']') {
+ converter.appendBracketEnd();
+ converter.popState();
+ } else {
+ converter.appendInBracket(c);
+ }
+ }
+
+ @Override
+ void finish(GlobToRegex converter) {
+ throw converter.syntaxError("Unclosed [");
+ }
+
+ @Override
+ public String toString() {
+ return "BRACKET";
+ }
+ };
+
+ /** State inside {curly braces}. */
+ private static final State CURLY_BRACE =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ switch (c) {
+ case '?':
+ converter.appendQuestionMark();
+ break;
+ case '[':
+ converter.appendBracketStart();
+ converter.pushState(BRACKET_FIRST_CHAR);
+ break;
+ case '{':
+ throw converter.syntaxError("{ not allowed in subpattern group");
+ case '*':
+ converter.pushState(STAR);
+ break;
+ case '\\':
+ converter.pushState(ESCAPE);
+ break;
+ case '}':
+ converter.appendCurlyBraceEnd();
+ converter.popState();
+ break;
+ case ',':
+ converter.appendSubpatternSeparator();
+ break;
+ default:
+ converter.append(c);
+ }
+ }
+
+ @Override
+ void finish(GlobToRegex converter) {
+ throw converter.syntaxError("Unclosed {");
+ }
+
+ @Override
+ public String toString() {
+ return "CURLY_BRACE";
+ }
+ };
+}