diff options
Diffstat (limited to 'jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java')
-rw-r--r-- | jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java | 400 |
1 files changed, 400 insertions, 0 deletions
diff --git a/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java b/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java new file mode 100644 index 0000000..c3e463b --- /dev/null +++ b/jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java @@ -0,0 +1,400 @@ +/* + * Copyright 2013 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.common.jimfs; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.regex.PatternSyntaxException; + +/** + * Translates globs to regex patterns. + * + * @author Colin Decker + */ +final class GlobToRegex { + + /** + * Converts the given glob to a regular expression pattern. The given separators determine what + * characters the resulting expression breaks on for glob expressions such as * which should not + * cross directory boundaries. + * + * <p>Basic conversions (assuming / as only separator): + * + * <pre>{@code + * ? = [^/] + * * = [^/]* + * ** = .* + * [a-z] = [[^/]&&[a-z]] + * [!a-z] = [[^/]&&[^a-z]] + * {a,b,c} = (a|b|c) + * }</pre> + */ + public static String toRegex(String glob, String separators) { + return new GlobToRegex(glob, separators).convert(); + } + + private static final InternalCharMatcher REGEX_RESERVED = + InternalCharMatcher.anyOf("^$.?+*\\[]{}()"); + + private final String glob; + private final String separators; + private final InternalCharMatcher separatorMatcher; + + private final StringBuilder builder = new StringBuilder(); + private final Deque<State> states = new ArrayDeque<>(); + private int index; + + private GlobToRegex(String glob, String separators) { + this.glob = checkNotNull(glob); + this.separators = separators; + this.separatorMatcher = InternalCharMatcher.anyOf(separators); + } + + /** + * Converts the glob to a regex one character at a time. A state stack (states) is maintained, + * with the state at the top of the stack being the current state at any given time. The current + * state is always used to process the next character. When a state processes a character, it may + * pop the current state or push a new state as the current state. The resulting regex is written + * to {@code builder}. + */ + private String convert() { + pushState(NORMAL); + for (index = 0; index < glob.length(); index++) { + currentState().process(this, glob.charAt(index)); + } + currentState().finish(this); + return builder.toString(); + } + + /** Enters the given state. The current state becomes the previous state. */ + private void pushState(State state) { + states.push(state); + } + + /** Returns to the previous state. */ + private void popState() { + states.pop(); + } + + /** Returns the current state. */ + private State currentState() { + return states.peek(); + } + + /** Throws a {@link PatternSyntaxException}. */ + private PatternSyntaxException syntaxError(String desc) { + throw new PatternSyntaxException(desc, glob, index); + } + + /** Appends the given character as-is to the regex. */ + private void appendExact(char c) { + builder.append(c); + } + + /** Appends the regex form of the given normal character or separator from the glob. */ + private void append(char c) { + if (separatorMatcher.matches(c)) { + appendSeparator(); + } else { + appendNormal(c); + } + } + + /** Appends the regex form of the given normal character from the glob. */ + private void appendNormal(char c) { + if (REGEX_RESERVED.matches(c)) { + builder.append('\\'); + } + builder.append(c); + } + + /** Appends the regex form matching the separators for the path type. */ + private void appendSeparator() { + if (separators.length() == 1) { + appendNormal(separators.charAt(0)); + } else { + builder.append('['); + for (int i = 0; i < separators.length(); i++) { + appendInBracket(separators.charAt(i)); + } + builder.append("]"); + } + } + + /** Appends the regex form that matches anything except the separators for the path type. */ + private void appendNonSeparator() { + builder.append("[^"); + for (int i = 0; i < separators.length(); i++) { + appendInBracket(separators.charAt(i)); + } + builder.append(']'); + } + + /** Appends the regex form of the glob ? character. */ + private void appendQuestionMark() { + appendNonSeparator(); + } + + /** Appends the regex form of the glob * character. */ + private void appendStar() { + appendNonSeparator(); + builder.append('*'); + } + + /** Appends the regex form of the glob ** pattern. */ + private void appendStarStar() { + builder.append(".*"); + } + + /** Appends the regex form of the start of a glob [] section. */ + private void appendBracketStart() { + builder.append('['); + appendNonSeparator(); + builder.append("&&["); + } + + /** Appends the regex form of the end of a glob [] section. */ + private void appendBracketEnd() { + builder.append("]]"); + } + + /** Appends the regex form of the given character within a glob [] section. */ + private void appendInBracket(char c) { + // escape \ in regex character class + if (c == '\\') { + builder.append('\\'); + } + + builder.append(c); + } + + /** Appends the regex form of the start of a glob {} section. */ + private void appendCurlyBraceStart() { + builder.append('('); + } + + /** Appends the regex form of the separator (,) within a glob {} section. */ + private void appendSubpatternSeparator() { + builder.append('|'); + } + + /** Appends the regex form of the end of a glob {} section. */ + private void appendCurlyBraceEnd() { + builder.append(')'); + } + + /** Converter state. */ + private abstract static class State { + /** + * Process the next character with the current state, transitioning the converter to a new state + * if necessary. + */ + abstract void process(GlobToRegex converter, char c); + + /** Called after all characters have been read. */ + void finish(GlobToRegex converter) {} + } + + /** Normal state. */ + private static final State NORMAL = + new State() { + @Override + void process(GlobToRegex converter, char c) { + switch (c) { + case '?': + converter.appendQuestionMark(); + return; + case '[': + converter.appendBracketStart(); + converter.pushState(BRACKET_FIRST_CHAR); + return; + case '{': + converter.appendCurlyBraceStart(); + converter.pushState(CURLY_BRACE); + return; + case '*': + converter.pushState(STAR); + return; + case '\\': + converter.pushState(ESCAPE); + return; + default: + converter.append(c); + } + } + + @Override + public String toString() { + return "NORMAL"; + } + }; + + /** State following the reading of a single \. */ + private static final State ESCAPE = + new State() { + @Override + void process(GlobToRegex converter, char c) { + converter.append(c); + converter.popState(); + } + + @Override + void finish(GlobToRegex converter) { + throw converter.syntaxError("Hanging escape (\\) at end of pattern"); + } + + @Override + public String toString() { + return "ESCAPE"; + } + }; + + /** State following the reading of a single *. */ + private static final State STAR = + new State() { + @Override + void process(GlobToRegex converter, char c) { + if (c == '*') { + converter.appendStarStar(); + converter.popState(); + } else { + converter.appendStar(); + converter.popState(); + converter.currentState().process(converter, c); + } + } + + @Override + void finish(GlobToRegex converter) { + converter.appendStar(); + } + + @Override + public String toString() { + return "STAR"; + } + }; + + /** State immediately following the reading of a [. */ + private static final State BRACKET_FIRST_CHAR = + new State() { + @Override + void process(GlobToRegex converter, char c) { + if (c == ']') { + // A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for + // example) but doesn't work for the default java.nio.file implementations. In the cases + // of "[]]" it produces: + // java.util.regex.PatternSyntaxException: Unclosed character class near index 13 + // ^[[^/]&&[]]\]$ + // ^ + // The error here is slightly different, but trying to make this work would require some + // kind of lookahead and break the simplicity of char-by-char conversion here. Also, if + // someone wants to include a ']' inside a character class, they should escape it. + throw converter.syntaxError("Empty []"); + } + if (c == '!') { + converter.appendExact('^'); + } else if (c == '-') { + converter.appendExact(c); + } else { + converter.appendInBracket(c); + } + converter.popState(); + converter.pushState(BRACKET); + } + + @Override + void finish(GlobToRegex converter) { + throw converter.syntaxError("Unclosed ["); + } + + @Override + public String toString() { + return "BRACKET_FIRST_CHAR"; + } + }; + + /** State inside [brackets], but not at the first character inside the brackets. */ + private static final State BRACKET = + new State() { + @Override + void process(GlobToRegex converter, char c) { + if (c == ']') { + converter.appendBracketEnd(); + converter.popState(); + } else { + converter.appendInBracket(c); + } + } + + @Override + void finish(GlobToRegex converter) { + throw converter.syntaxError("Unclosed ["); + } + + @Override + public String toString() { + return "BRACKET"; + } + }; + + /** State inside {curly braces}. */ + private static final State CURLY_BRACE = + new State() { + @Override + void process(GlobToRegex converter, char c) { + switch (c) { + case '?': + converter.appendQuestionMark(); + break; + case '[': + converter.appendBracketStart(); + converter.pushState(BRACKET_FIRST_CHAR); + break; + case '{': + throw converter.syntaxError("{ not allowed in subpattern group"); + case '*': + converter.pushState(STAR); + break; + case '\\': + converter.pushState(ESCAPE); + break; + case '}': + converter.appendCurlyBraceEnd(); + converter.popState(); + break; + case ',': + converter.appendSubpatternSeparator(); + break; + default: + converter.append(c); + } + } + + @Override + void finish(GlobToRegex converter) { + throw converter.syntaxError("Unclosed {"); + } + + @Override + public String toString() { + return "CURLY_BRACE"; + } + }; +} |