diff options
Diffstat (limited to 'src/main/java/com/google/escapevelocity/Parser.java')
-rw-r--r-- | src/main/java/com/google/escapevelocity/Parser.java | 214 |
1 files changed, 158 insertions, 56 deletions
diff --git a/src/main/java/com/google/escapevelocity/Parser.java b/src/main/java/com/google/escapevelocity/Parser.java index 9982be3..0beaf18 100644 --- a/src/main/java/com/google/escapevelocity/Parser.java +++ b/src/main/java/com/google/escapevelocity/Parser.java @@ -11,6 +11,25 @@ * or implied. See the License for the specific language governing permissions and limitations under * the License. */ + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package com.google.escapevelocity; import com.google.escapevelocity.DirectiveNode.SetNode; @@ -29,14 +48,16 @@ import com.google.escapevelocity.TokenNode.ForEachTokenNode; import com.google.escapevelocity.TokenNode.IfTokenNode; import com.google.escapevelocity.TokenNode.MacroDefinitionTokenNode; import com.google.escapevelocity.TokenNode.NestedTokenNode; +import com.google.common.base.CharMatcher; +import com.google.common.base.Verify; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableListMultimap; +import com.google.common.collect.Iterables; +import com.google.common.primitives.Chars; +import com.google.common.primitives.Ints; import java.io.IOException; import java.io.LineNumberReader; import java.io.Reader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; /** * A parser that reads input from the given {@link Reader} and parses it to produce a @@ -53,12 +74,20 @@ class Parser { /** * The invariant of this parser is that {@code c} is always the next character of interest. - * This means that we never have to "unget" a character by reading too far. For example, after - * we parse an integer, {@code c} will be the first character after the integer, which is exactly - * the state we will be in when there are no more digits. + * This means that we almost never have to "unget" a character by reading too far. For example, + * after we parse an integer, {@code c} will be the first character after the integer, which is + * exactly the state we will be in when there are no more digits. + * + * <p>Sometimes we need to read two characters ahead, and in that case we use {@link #pushback}. */ private int c; + /** + * A single character of pushback. If this is not negative, the {@link #next()} method will + * return it instead of reading a character. + */ + private int pushback = -1; + Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener) throws IOException { this.reader = new LineNumberReader(reader); @@ -127,11 +156,29 @@ class Parser { */ private void next() throws IOException { if (c != EOF) { - c = reader.read(); + if (pushback < 0) { + c = reader.read(); + } else { + c = pushback; + pushback = -1; + } } } /** + * Saves the current character {@code c} to be read again, and sets {@code c} to the given + * {@code c1}. Suppose the text contains {@code xy} and we have just read {@code y}. + * So {@code c == 'y'}. Now if we execute {@code pushback('x')}, we will have + * {@code c == 'x'} and the next call to {@link #next()} will set {@code c == 'y'}. Subsequent + * calls to {@code next()} will continue reading from {@link #reader}. So the pushback + * essentially puts us back in the state we were in before we read {@code y}. + */ + private void pushback(int c1) { + pushback = c; + c = c1; + } + + /** * If {@code c} is a space character, keeps reading until {@code c} is a non-space character or * there are no more characters. */ @@ -174,17 +221,24 @@ class Parser { private Node parseNode() throws IOException { if (c == '#') { next(); - if (c == '#') { - return parseComment(); - } else if (isAsciiLetter(c) || c == '{') { - return parseDirective(); - } else if (c == '[') { - return parseHashSquare(); - } else { - // For consistency with Velocity, we treat # not followed by # or a letter as a plain - // character, and we treat #$foo as a literal # followed by the reference $foo. - // But the # is its own ConstantExpressionNode; we don't try to merge it with adjacent text. - return new ConstantExpressionNode(resourceName, lineNumber(), "#"); + switch (c) { + case '#': + return parseLineComment(); + case '*': + return parseBlockComment(); + case '[': + return parseHashSquare(); + case '{': + return parseDirective(); + default: + if (isAsciiLetter(c)) { + return parseDirective(); + } else { + // For consistency with Velocity, we treat # not followed by a letter or one of the + // characters above as a plain character, and we treat #$foo as a literal # followed by + // the reference $foo. + return parsePlainText('#'); + } } } if (c == EOF) { @@ -200,13 +254,15 @@ class Parser { assert c == '['; next(); if (c != '[') { - return new ConstantExpressionNode(resourceName, lineNumber(), "#["); + return parsePlainText(new StringBuilder("#[")); } + int startLine = lineNumber(); next(); StringBuilder sb = new StringBuilder(); while (true) { if (c == EOF) { - throw parseException("Unterminated #[[ - did not see matching ]]#"); + throw new ParseException( + "Unterminated #[[ - did not see matching ]]#", resourceName, startLine); } if (c == '#') { // This might be the last character of ]]# or it might just be a random #. @@ -458,10 +514,10 @@ class Parser { } /** - * Parses and discards a comment, which is {@code ##} followed by any number of characters up to - * and including the next newline. + * Parses and discards a line comment, which is {@code ##} followed by any number of characters + * up to and including the next newline. */ - private Node parseComment() throws IOException { + private Node parseLineComment() throws IOException { int lineNumber = lineNumber(); while (c != '\n' && c != EOF) { next(); @@ -471,6 +527,27 @@ class Parser { } /** + * Parses and discards a block comment, which is {@code #*} followed by everything up to and + * including the next {@code *#}. + */ + private Node parseBlockComment() throws IOException { + assert c == '*'; + int startLine = lineNumber(); + int lastC = '\0'; + next(); + while (!(lastC == '*' && c == '#')) { + if (c == EOF) { + throw new ParseException( + "Unterminated #* - did not see matching *#", resourceName, startLine); + } + lastC = c; + next(); + } + next(); + return new CommentTokenNode(resourceName, startLine); + } + + /** * Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given * {@code firstChar} is the first character of the plain text, and {@link #c} is the second * (if the plain text is more than one character). @@ -478,7 +555,10 @@ class Parser { private Node parsePlainText(int firstChar) throws IOException { StringBuilder sb = new StringBuilder(); sb.appendCodePoint(firstChar); + return parsePlainText(sb); + } + private Node parsePlainText(StringBuilder sb) throws IOException { literal: while (true) { switch (c) { @@ -508,7 +588,27 @@ class Parser { * * <p>On entry to this method, {@link #c} is the character immediately after the {@code $}. */ - private ReferenceNode parseReference() throws IOException { + private Node parseReference() throws IOException { + if (c == '{') { + next(); + if (!isAsciiLetter(c)) { + return parsePlainText(new StringBuilder("${")); + } + ReferenceNode node = parseReferenceNoBrace(); + expect('}'); + return node; + } else { + return parseReferenceNoBrace(); + } + } + + /** + * Same as {@link #parseReference()}, except it really must be a reference. A {@code $} in + * normal text doesn't start a reference if it is not followed by an identifier. But in an + * expression, for example in {@code #if ($x == 23)}, {@code $} must be followed by an + * identifier. + */ + private ReferenceNode parseRequiredReference() throws IOException { if (c == '{') { next(); ReferenceNode node = parseReferenceNoBrace(); @@ -568,6 +668,11 @@ class Parser { private ReferenceNode parseReferenceMember(ReferenceNode lhs) throws IOException { assert c == '.'; next(); + if (!isAsciiLetter(c)) { + // We've seen something like `$foo.!`, so it turns out it's not a member after all. + pushback('.'); + return lhs; + } String id = parseId("Member"); ReferenceNode reference; if (c == '(') { @@ -670,19 +775,15 @@ class Parser { * Maps a code point to the operators that begin with that code point. For example, maps * {@code <} to {@code LESS} and {@code LESS_OR_EQUAL}. */ - private static final Map<Integer, List<Operator>> CODE_POINT_TO_OPERATORS; + private static final ImmutableListMultimap<Integer, Operator> CODE_POINT_TO_OPERATORS; static { - Map<Integer, List<Operator>> map = new HashMap<>(); + ImmutableListMultimap.Builder<Integer, Operator> builder = ImmutableListMultimap.builder(); for (Operator operator : Operator.values()) { if (operator != Operator.STOP) { - Integer key = operator.symbol.codePointAt(0); - if (!map.containsKey(key)) { - map.put(key, new ArrayList<Operator>()); - } - map.get(key).add(operator); + builder.put((int) operator.symbol.charAt(0), operator); } } - CODE_POINT_TO_OPERATORS = Collections.unmodifiableMap(map); + CODE_POINT_TO_OPERATORS = builder.build(); } /** @@ -753,17 +854,17 @@ class Parser { */ private void nextOperator() throws IOException { skipSpace(); - List<Operator> possibleOperators = CODE_POINT_TO_OPERATORS.get(c); - if (possibleOperators == null) { + ImmutableList<Operator> possibleOperators = CODE_POINT_TO_OPERATORS.get(c); + if (possibleOperators.isEmpty()) { currentOperator = Operator.STOP; return; } - int firstChar = c; + char firstChar = Chars.checkedCast(c); next(); Operator operator = null; for (Operator possibleOperator : possibleOperators) { if (possibleOperator.symbol.length() == 1) { - assert operator == null; + Verify.verify(operator == null); operator = possibleOperator; } else if (possibleOperator.symbol.charAt(1) == c) { next(); @@ -771,7 +872,8 @@ class Parser { } } if (operator == null) { - throw parseException("Expected " + possibleOperators.get(0) + ", not just " + firstChar); + throw parseException( + "Expected " + Iterables.getOnlyElement(possibleOperators) + ", not just " + firstChar); } currentOperator = operator; } @@ -818,7 +920,7 @@ class Parser { ExpressionNode node; if (c == '$') { next(); - node = parseReference(); + node = parseRequiredReference(); } else if (c == '"') { node = parseStringLiteral(); } else if (c == '-') { @@ -869,10 +971,8 @@ class Parser { sb.appendCodePoint(c); next(); } - int value; - try { - value = Integer.parseInt(sb.toString()); - } catch (NumberFormatException e) { + Integer value = Ints.tryParse(sb.toString()); + if (value == null) { throw parseException("Invalid integer: " + sb); } return new ConstantExpressionNode(resourceName, lineNumber(), value); @@ -896,29 +996,31 @@ class Parser { return new ConstantExpressionNode(resourceName, lineNumber(), value); } - private static final ImmutableAsciiSet ASCII_LETTER = - ImmutableAsciiSet.ofRange('A', 'Z') - .union(ImmutableAsciiSet.ofRange('a', 'z')); + private static final CharMatcher ASCII_LETTER = + CharMatcher.inRange('A', 'Z') + .or(CharMatcher.inRange('a', 'z')) + .precomputed(); - private static final ImmutableAsciiSet ASCII_DIGIT = - ImmutableAsciiSet.ofRange('0', '9'); + private static final CharMatcher ASCII_DIGIT = + CharMatcher.inRange('0', '9') + .precomputed(); - private static final ImmutableAsciiSet ID_CHAR = + private static final CharMatcher ID_CHAR = ASCII_LETTER - .union(ASCII_DIGIT) - .union(ImmutableAsciiSet.of('-')) - .union(ImmutableAsciiSet.of('_')); + .or(ASCII_DIGIT) + .or(CharMatcher.anyOf("-_")) + .precomputed(); private static boolean isAsciiLetter(int c) { - return ASCII_LETTER.contains(c); + return (char) c == c && ASCII_LETTER.matches((char) c); } private static boolean isAsciiDigit(int c) { - return ASCII_DIGIT.contains(c); + return (char) c == c && ASCII_DIGIT.matches((char) c); } private static boolean isIdChar(int c) { - return ID_CHAR.contains(c); + return (char) c == c && ID_CHAR.matches((char) c); } /** |