path: root/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java
diff options
authorYuexi Ma <yuexima@google.com>2020-03-04 18:43:18 -0800
committerYuexi Ma <yuexima@google.com>2020-03-05 03:04:14 +0000
commitcef92d673c81daab4b0dad931591947c86e0dc8a (patch)
treef8cbc802a806455c5fdfeab7f696c63641b9f9f5 /jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java
parent68591711a9034281d5fe11fc7a30e535bbce125c (diff)
parent93a6c6782a9fdf1365face2461876b8644b2a404 (diff)
Test: n/a Bug: 150784654 Change-Id: I6fb223f1bd657a6a3d0be1492f63a7774e21943e
Diffstat (limited to 'jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java')
1 files changed, 400 insertions, 0 deletions
diff --git a/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java b/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java
new file mode 100644
index 0000000..c3e463b
--- /dev/null
+++ b/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java
@@ -0,0 +1,400 @@
+ * Copyright 2013 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.common.jimfs;
+import static com.google.common.base.Preconditions.checkNotNull;
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.regex.PatternSyntaxException;
+ * Translates globs to regex patterns.
+ *
+ * @author Colin Decker
+ */
+final class GlobToRegex {
+ /**
+ * Converts the given glob to a regular expression pattern. The given separators determine what
+ * characters the resulting expression breaks on for glob expressions such as * which should not
+ * cross directory boundaries.
+ *
+ * <p>Basic conversions (assuming / as only separator):
+ *
+ * <pre>{@code
+ * ? = [^/]
+ * * = [^/]*
+ * ** = .*
+ * [a-z] = [[^/]&&[a-z]]
+ * [!a-z] = [[^/]&&[^a-z]]
+ * {a,b,c} = (a|b|c)
+ * }</pre>
+ */
+ public static String toRegex(String glob, String separators) {
+ return new GlobToRegex(glob, separators).convert();
+ }
+ private static final InternalCharMatcher REGEX_RESERVED =
+ InternalCharMatcher.anyOf("^$.?+*\\[]{}()");
+ private final String glob;
+ private final String separators;
+ private final InternalCharMatcher separatorMatcher;
+ private final StringBuilder builder = new StringBuilder();
+ private final Deque<State> states = new ArrayDeque<>();
+ private int index;
+ private GlobToRegex(String glob, String separators) {
+ this.glob = checkNotNull(glob);
+ this.separators = separators;
+ this.separatorMatcher = InternalCharMatcher.anyOf(separators);
+ }
+ /**
+ * Converts the glob to a regex one character at a time. A state stack (states) is maintained,
+ * with the state at the top of the stack being the current state at any given time. The current
+ * state is always used to process the next character. When a state processes a character, it may
+ * pop the current state or push a new state as the current state. The resulting regex is written
+ * to {@code builder}.
+ */
+ private String convert() {
+ pushState(NORMAL);
+ for (index = 0; index < glob.length(); index++) {
+ currentState().process(this, glob.charAt(index));
+ }
+ currentState().finish(this);
+ return builder.toString();
+ }
+ /** Enters the given state. The current state becomes the previous state. */
+ private void pushState(State state) {
+ states.push(state);
+ }
+ /** Returns to the previous state. */
+ private void popState() {
+ states.pop();
+ }
+ /** Returns the current state. */
+ private State currentState() {
+ return states.peek();
+ }
+ /** Throws a {@link PatternSyntaxException}. */
+ private PatternSyntaxException syntaxError(String desc) {
+ throw new PatternSyntaxException(desc, glob, index);
+ }
+ /** Appends the given character as-is to the regex. */
+ private void appendExact(char c) {
+ builder.append(c);
+ }
+ /** Appends the regex form of the given normal character or separator from the glob. */
+ private void append(char c) {
+ if (separatorMatcher.matches(c)) {
+ appendSeparator();
+ } else {
+ appendNormal(c);
+ }
+ }
+ /** Appends the regex form of the given normal character from the glob. */
+ private void appendNormal(char c) {
+ if (REGEX_RESERVED.matches(c)) {
+ builder.append('\\');
+ }
+ builder.append(c);
+ }
+ /** Appends the regex form matching the separators for the path type. */
+ private void appendSeparator() {
+ if (separators.length() == 1) {
+ appendNormal(separators.charAt(0));
+ } else {
+ builder.append('[');
+ for (int i = 0; i < separators.length(); i++) {
+ appendInBracket(separators.charAt(i));
+ }
+ builder.append("]");
+ }
+ }
+ /** Appends the regex form that matches anything except the separators for the path type. */
+ private void appendNonSeparator() {
+ builder.append("[^");
+ for (int i = 0; i < separators.length(); i++) {
+ appendInBracket(separators.charAt(i));
+ }
+ builder.append(']');
+ }
+ /** Appends the regex form of the glob ? character. */
+ private void appendQuestionMark() {
+ appendNonSeparator();
+ }
+ /** Appends the regex form of the glob * character. */
+ private void appendStar() {
+ appendNonSeparator();
+ builder.append('*');
+ }
+ /** Appends the regex form of the glob ** pattern. */
+ private void appendStarStar() {
+ builder.append(".*");
+ }
+ /** Appends the regex form of the start of a glob [] section. */
+ private void appendBracketStart() {
+ builder.append('[');
+ appendNonSeparator();
+ builder.append("&&[");
+ }
+ /** Appends the regex form of the end of a glob [] section. */
+ private void appendBracketEnd() {
+ builder.append("]]");
+ }
+ /** Appends the regex form of the given character within a glob [] section. */
+ private void appendInBracket(char c) {
+ // escape \ in regex character class
+ if (c == '\\') {
+ builder.append('\\');
+ }
+ builder.append(c);
+ }
+ /** Appends the regex form of the start of a glob {} section. */
+ private void appendCurlyBraceStart() {
+ builder.append('(');
+ }
+ /** Appends the regex form of the separator (,) within a glob {} section. */
+ private void appendSubpatternSeparator() {
+ builder.append('|');
+ }
+ /** Appends the regex form of the end of a glob {} section. */
+ private void appendCurlyBraceEnd() {
+ builder.append(')');
+ }
+ /** Converter state. */
+ private abstract static class State {
+ /**
+ * Process the next character with the current state, transitioning the converter to a new state
+ * if necessary.
+ */
+ abstract void process(GlobToRegex converter, char c);
+ /** Called after all characters have been read. */
+ void finish(GlobToRegex converter) {}
+ }
+ /** Normal state. */
+ private static final State NORMAL =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ switch (c) {
+ case '?':
+ converter.appendQuestionMark();
+ return;
+ case '[':
+ converter.appendBracketStart();
+ converter.pushState(BRACKET_FIRST_CHAR);
+ return;
+ case '{':
+ converter.appendCurlyBraceStart();
+ converter.pushState(CURLY_BRACE);
+ return;
+ case '*':
+ converter.pushState(STAR);
+ return;
+ case '\\':
+ converter.pushState(ESCAPE);
+ return;
+ default:
+ converter.append(c);
+ }
+ }
+ @Override
+ public String toString() {
+ return "NORMAL";
+ }
+ };
+ /** State following the reading of a single \. */
+ private static final State ESCAPE =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ converter.append(c);
+ converter.popState();
+ }
+ @Override
+ void finish(GlobToRegex converter) {
+ throw converter.syntaxError("Hanging escape (\\) at end of pattern");
+ }
+ @Override
+ public String toString() {
+ return "ESCAPE";
+ }
+ };
+ /** State following the reading of a single *. */
+ private static final State STAR =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ if (c == '*') {
+ converter.appendStarStar();
+ converter.popState();
+ } else {
+ converter.appendStar();
+ converter.popState();
+ converter.currentState().process(converter, c);
+ }
+ }
+ @Override
+ void finish(GlobToRegex converter) {
+ converter.appendStar();
+ }
+ @Override
+ public String toString() {
+ return "STAR";
+ }
+ };
+ /** State immediately following the reading of a [. */
+ private static final State BRACKET_FIRST_CHAR =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ if (c == ']') {
+ // A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for
+ // example) but doesn't work for the default java.nio.file implementations. In the cases
+ // of "[]]" it produces:
+ // java.util.regex.PatternSyntaxException: Unclosed character class near index 13
+ // ^[[^/]&&[]]\]$
+ // ^
+ // The error here is slightly different, but trying to make this work would require some
+ // kind of lookahead and break the simplicity of char-by-char conversion here. Also, if
+ // someone wants to include a ']' inside a character class, they should escape it.
+ throw converter.syntaxError("Empty []");
+ }
+ if (c == '!') {
+ converter.appendExact('^');
+ } else if (c == '-') {
+ converter.appendExact(c);
+ } else {
+ converter.appendInBracket(c);
+ }
+ converter.popState();
+ converter.pushState(BRACKET);
+ }
+ @Override
+ void finish(GlobToRegex converter) {
+ throw converter.syntaxError("Unclosed [");
+ }
+ @Override
+ public String toString() {
+ }
+ };
+ /** State inside [brackets], but not at the first character inside the brackets. */
+ private static final State BRACKET =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ if (c == ']') {
+ converter.appendBracketEnd();
+ converter.popState();
+ } else {
+ converter.appendInBracket(c);
+ }
+ }
+ @Override
+ void finish(GlobToRegex converter) {
+ throw converter.syntaxError("Unclosed [");
+ }
+ @Override
+ public String toString() {
+ return "BRACKET";
+ }
+ };
+ /** State inside {curly braces}. */
+ private static final State CURLY_BRACE =
+ new State() {
+ @Override
+ void process(GlobToRegex converter, char c) {
+ switch (c) {
+ case '?':
+ converter.appendQuestionMark();
+ break;
+ case '[':
+ converter.appendBracketStart();
+ converter.pushState(BRACKET_FIRST_CHAR);
+ break;
+ case '{':
+ throw converter.syntaxError("{ not allowed in subpattern group");
+ case '*':
+ converter.pushState(STAR);
+ break;
+ case '\\':
+ converter.pushState(ESCAPE);
+ break;
+ case '}':
+ converter.appendCurlyBraceEnd();
+ converter.popState();
+ break;
+ case ',':
+ converter.appendSubpatternSeparator();
+ break;
+ default:
+ converter.append(c);
+ }
+ }
+ @Override
+ void finish(GlobToRegex converter) {
+ throw converter.syntaxError("Unclosed {");
+ }
+ @Override
+ public String toString() {
+ return "CURLY_BRACE";
+ }
+ };