aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/com/google/escapevelocity/Parser.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/com/google/escapevelocity/Parser.java')
-rw-r--r--src/main/java/com/google/escapevelocity/Parser.java1094
1 files changed, 1094 insertions, 0 deletions
diff --git a/src/main/java/com/google/escapevelocity/Parser.java b/src/main/java/com/google/escapevelocity/Parser.java
new file mode 100644
index 0000000..4416c48
--- /dev/null
+++ b/src/main/java/com/google/escapevelocity/Parser.java
@@ -0,0 +1,1094 @@
+/*
+ * Copyright (C) 2018 Google, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.escapevelocity;
+
+import com.google.common.base.CharMatcher;
+import com.google.common.base.Verify;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableListMultimap;
+import com.google.common.collect.Iterables;
+import com.google.common.primitives.Chars;
+import com.google.common.primitives.Ints;
+import com.google.escapevelocity.DirectiveNode.SetNode;
+import com.google.escapevelocity.ExpressionNode.BinaryExpressionNode;
+import com.google.escapevelocity.ExpressionNode.NotExpressionNode;
+import com.google.escapevelocity.ReferenceNode.IndexReferenceNode;
+import com.google.escapevelocity.ReferenceNode.MemberReferenceNode;
+import com.google.escapevelocity.ReferenceNode.MethodReferenceNode;
+import com.google.escapevelocity.ReferenceNode.PlainReferenceNode;
+import com.google.escapevelocity.TokenNode.CommentTokenNode;
+import com.google.escapevelocity.TokenNode.ElseIfTokenNode;
+import com.google.escapevelocity.TokenNode.ElseTokenNode;
+import com.google.escapevelocity.TokenNode.EndTokenNode;
+import com.google.escapevelocity.TokenNode.EofNode;
+import com.google.escapevelocity.TokenNode.ForEachTokenNode;
+import com.google.escapevelocity.TokenNode.IfTokenNode;
+import com.google.escapevelocity.TokenNode.MacroDefinitionTokenNode;
+import com.google.escapevelocity.TokenNode.NestedTokenNode;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.Reader;
+
+/**
+ * A parser that reads input from the given {@link Reader} and parses it to produce a
+ * {@link Template}.
+ *
+ * @author emcmanus@google.com (Éamonn McManus)
+ */
+class Parser {
+ private static final int EOF = -1;
+
+ private final LineNumberReader reader;
+ private final String resourceName;
+ private final Template.ResourceOpener resourceOpener;
+
+ /**
+ * The invariant of this parser is that {@code c} is always the next character of interest.
+ * This means that we almost never have to "unget" a character by reading too far. For example,
+ * after we parse an integer, {@code c} will be the first character after the integer, which is
+ * exactly the state we will be in when there are no more digits.
+ *
+ * <p>Sometimes we need to read two characters ahead, and in that case we use {@link #pushback}.
+ */
+ private int c;
+
+ /**
+ * A single character of pushback. If this is not negative, the {@link #next()} method will
+ * return it instead of reading a character.
+ */
+ private int pushback = -1;
+
+ Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener)
+ throws IOException {
+ this.reader = new LineNumberReader(reader);
+ this.reader.setLineNumber(1);
+ next();
+ this.resourceName = resourceName;
+ this.resourceOpener = resourceOpener;
+ }
+
+ /**
+ * Parse the input completely to produce a {@link Template}.
+ *
+ * <p>Parsing happens in two phases. First, we parse a sequence of "tokens", where tokens include
+ * entire references such as <pre>
+ * ${x.foo()[23]}
+ * </pre>or entire directives such as<pre>
+ * #set ($x = $y + $z)
+ * </pre>But tokens do not span complex constructs. For example,<pre>
+ * #if ($x == $y) something #end
+ * </pre>is three tokens:<pre>
+ * #if ($x == $y)
+ * (literal text " something ")
+ * #end
+ * </pre>
+ *
+ * <p>The second phase then takes the sequence of tokens and constructs a parse tree out of it.
+ * Some nodes in the parse tree will be unchanged from the token sequence, such as the <pre>
+ * ${x.foo()[23]}
+ * #set ($x = $y + $z)
+ * </pre> examples above. But a construct such as the {@code #if ... #end} mentioned above will
+ * become a single IfNode in the parse tree in the second phase.
+ *
+ * <p>The main reason for this approach is that Velocity has two kinds of lexical contexts. At the
+ * top level, there can be arbitrary literal text; references like <code>${x.foo()}</code>; and
+ * directives like {@code #if} or {@code #set}. Inside the parentheses of a directive, however,
+ * neither arbitrary text nor directives can appear, but expressions can, so we need to tokenize
+ * the inside of <pre>
+ * #if ($x == $a + $b)
+ * </pre> as the five tokens "$x", "==", "$a", "+", "$b". Rather than having a classical
+ * parser/lexer combination, where the lexer would need to switch between these two modes, we
+ * replace the lexer with an ad-hoc parser that is the first phase described above, and we
+ * define a simple parser over the resultant tokens that is the second phase.
+ */
+ Template parse() throws IOException {
+ ImmutableList<Node> tokens = parseTokens();
+ return new Reparser(tokens).reparse();
+ }
+
+ private ImmutableList<Node> parseTokens() throws IOException {
+ ImmutableList.Builder<Node> tokens = ImmutableList.builder();
+ Node token;
+ do {
+ token = parseNode();
+ tokens.add(token);
+ } while (!(token instanceof EofNode));
+ return tokens.build();
+ }
+
+ private int lineNumber() {
+ return reader.getLineNumber();
+ }
+
+ /**
+ * Gets the next character from the reader and assigns it to {@code c}. If there are no more
+ * characters, sets {@code c} to {@link #EOF} if it is not already.
+ */
+ private void next() throws IOException {
+ if (c != EOF) {
+ if (pushback < 0) {
+ c = reader.read();
+ } else {
+ c = pushback;
+ pushback = -1;
+ }
+ }
+ }
+
+ /**
+ * Saves the current character {@code c} to be read again, and sets {@code c} to the given
+ * {@code c1}. Suppose the text contains {@code xy} and we have just read {@code y}.
+ * So {@code c == 'y'}. Now if we execute {@code pushback('x')}, we will have
+ * {@code c == 'x'} and the next call to {@link #next()} will set {@code c == 'y'}. Subsequent
+ * calls to {@code next()} will continue reading from {@link #reader}. So the pushback
+ * essentially puts us back in the state we were in before we read {@code y}.
+ */
+ private void pushback(int c1) {
+ pushback = c;
+ c = c1;
+ }
+
+ /**
+ * If {@code c} is a space character, keeps reading until {@code c} is a non-space character or
+ * there are no more characters.
+ */
+ private void skipSpace() throws IOException {
+ while (Character.isWhitespace(c)) {
+ next();
+ }
+ }
+
+ /**
+ * Gets the next character from the reader, and if it is a space character, keeps reading until
+ * a non-space character is found.
+ */
+ private void nextNonSpace() throws IOException {
+ next();
+ skipSpace();
+ }
+
+ /**
+ * Skips any space in the reader, and then throws an exception if the first non-space character
+ * found is not the expected one. Sets {@code c} to the first character after that expected one.
+ */
+ private void expect(char expected) throws IOException {
+ skipSpace();
+ if (c == expected) {
+ next();
+ } else {
+ throw parseException("Expected " + expected);
+ }
+ }
+
+ /**
+ * Parses a single node from the reader, as part of the first parsing phase.
+ * <pre>{@code
+ * <template> -> <empty> |
+ * <directive> <template> |
+ * <non-directive> <template>
+ * }</pre>
+ */
+ private Node parseNode() throws IOException {
+ if (c == '#') {
+ next();
+ switch (c) {
+ case '#':
+ return parseLineComment();
+ case '*':
+ return parseBlockComment();
+ case '[':
+ return parseHashSquare();
+ case '{':
+ return parseDirective();
+ default:
+ if (isAsciiLetter(c)) {
+ return parseDirective();
+ } else {
+ // For consistency with Velocity, we treat # not followed by a letter or one of the
+ // characters above as a plain character, and we treat #$foo as a literal # followed by
+ // the reference $foo.
+ return parsePlainText('#');
+ }
+ }
+ }
+ if (c == EOF) {
+ return new EofNode(resourceName, lineNumber());
+ }
+ return parseNonDirective();
+ }
+
+ private Node parseHashSquare() throws IOException {
+ // We've just seen #[ which might be the start of a #[[quoted block]]#. If the next character
+ // is not another [ then it's not a quoted block, but it *is* a literal #[ followed by whatever
+ // that next character is.
+ assert c == '[';
+ next();
+ if (c != '[') {
+ return parsePlainText(new StringBuilder("#["));
+ }
+ int startLine = lineNumber();
+ next();
+ StringBuilder sb = new StringBuilder();
+ while (true) {
+ if (c == EOF) {
+ throw new ParseException(
+ "Unterminated #[[ - did not see matching ]]#", resourceName, startLine);
+ }
+ if (c == '#') {
+ // This might be the last character of ]]# or it might just be a random #.
+ int len = sb.length();
+ if (len > 1 && sb.charAt(len - 1) == ']' && sb.charAt(len - 2) == ']') {
+ next();
+ break;
+ }
+ }
+ sb.append((char) c);
+ next();
+ }
+ String quoted = sb.substring(0, sb.length() - 2);
+ return new ConstantExpressionNode(resourceName, lineNumber(), quoted);
+ }
+
+ /**
+ * Parses a single non-directive node from the reader.
+ * <pre>{@code
+ * <non-directive> -> <reference> |
+ * <text containing neither $ nor #>
+ * }</pre>
+ */
+ private Node parseNonDirective() throws IOException {
+ if (c == '$') {
+ next();
+ if (isAsciiLetter(c) || c == '{') {
+ return parseReference();
+ } else {
+ return parsePlainText('$');
+ }
+ } else {
+ int firstChar = c;
+ next();
+ return parsePlainText(firstChar);
+ }
+ }
+
+ /**
+ * Parses a single directive token from the reader. Directives can be spelled with or without
+ * braces, for example {@code #if} or {@code #{if}}. We omit the brace spelling in the productions
+ * here: <pre>{@code
+ * <directive> -> <if-token> |
+ * <else-token> |
+ * <elseif-token> |
+ * <end-token> |
+ * <foreach-token> |
+ * <set-token> |
+ * <parse-token> |
+ * <macro-token> |
+ * <macro-call> |
+ * <comment>
+ * }</pre>
+ */
+ private Node parseDirective() throws IOException {
+ String directive;
+ if (c == '{') {
+ next();
+ directive = parseId("Directive inside #{...}");
+ expect('}');
+ } else {
+ directive = parseId("Directive");
+ }
+ Node node;
+ switch (directive) {
+ case "end":
+ node = new EndTokenNode(resourceName, lineNumber());
+ break;
+ case "if":
+ case "elseif":
+ node = parseIfOrElseIf(directive);
+ break;
+ case "else":
+ node = new ElseTokenNode(resourceName, lineNumber());
+ break;
+ case "foreach":
+ node = parseForEach();
+ break;
+ case "set":
+ node = parseSet();
+ break;
+ case "parse":
+ node = parseParse();
+ break;
+ case "macro":
+ node = parseMacroDefinition();
+ break;
+ default:
+ node = parsePossibleMacroCall(directive);
+ }
+ // Velocity skips a newline after any directive.
+ // TODO(emcmanus): in fact it also skips space before the newline, which should be implemented.
+ if (c == '\n') {
+ next();
+ }
+ return node;
+ }
+
+ /**
+ * Parses the condition following {@code #if} or {@code #elseif}.
+ * <pre>{@code
+ * <if-token> -> #if ( <condition> )
+ * <elseif-token> -> #elseif ( <condition> )
+ * }</pre>
+ *
+ * @param directive either {@code "if"} or {@code "elseif"}.
+ */
+ private Node parseIfOrElseIf(String directive) throws IOException {
+ expect('(');
+ ExpressionNode condition = parseExpression();
+ expect(')');
+ return directive.equals("if") ? new IfTokenNode(condition) : new ElseIfTokenNode(condition);
+ }
+
+ /**
+ * Parses a {@code #foreach} token from the reader. <pre>{@code
+ * <foreach-token> -> #foreach ( $<id> in <expression> )
+ * }</pre>
+ */
+ private Node parseForEach() throws IOException {
+ expect('(');
+ expect('$');
+ String var = parseId("For-each variable");
+ skipSpace();
+ boolean bad = false;
+ if (c != 'i') {
+ bad = true;
+ } else {
+ next();
+ if (c != 'n') {
+ bad = true;
+ }
+ }
+ if (bad) {
+ throw parseException("Expected 'in' for #foreach");
+ }
+ next();
+ ExpressionNode collection = parseExpression();
+ expect(')');
+ return new ForEachTokenNode(var, collection);
+ }
+
+ /**
+ * Parses a {@code #set} token from the reader. <pre>{@code
+ * <set-token> -> #set ( $<id> = <expression>)
+ * }</pre>
+ */
+ private Node parseSet() throws IOException {
+ expect('(');
+ expect('$');
+ String var = parseId("#set variable");
+ expect('=');
+ ExpressionNode expression = parseExpression();
+ expect(')');
+ return new SetNode(var, expression);
+ }
+
+ /**
+ * Parses a {@code #parse} token from the reader. <pre>{@code
+ * <parse-token> -> #parse ( <string-literal> )
+ * }</pre>
+ *
+ * <p>The way this works is inconsistent with Velocity. In Velocity, the {@code #parse} directive
+ * is evaluated when it is encountered during template evaluation. That means that the argument
+ * can be a variable, and it also means that you can use {@code #if} to choose whether or not
+ * to do the {@code #parse}. Neither of those is true in EscapeVelocity. The contents of the
+ * {@code #parse} are integrated into the containing template pretty much as if they had been
+ * written inline. That also means that EscapeVelocity allows forward references to macros
+ * inside {@code #parse} directives, which Velocity does not.
+ */
+ private Node parseParse() throws IOException {
+ expect('(');
+ skipSpace();
+ if (c != '"' && c != '\'') {
+ throw parseException("#parse only supported with string literal argument");
+ }
+ ExpressionNode nestedResourceNameExpression = parseStringLiteral(c, false);
+ String nestedResourceName = nestedResourceNameExpression.evaluate(null).toString();
+ expect(')');
+ try (Reader nestedReader = resourceOpener.openResource(nestedResourceName)) {
+ Parser nestedParser = new Parser(nestedReader, nestedResourceName, resourceOpener);
+ ImmutableList<Node> nestedTokens = nestedParser.parseTokens();
+ return new NestedTokenNode(nestedResourceName, nestedTokens);
+ }
+ }
+
+ /**
+ * Parses a {@code #macro} token from the reader. <pre>{@code
+ * <macro-token> -> #macro ( <id> <macro-parameter-list> )
+ * <macro-parameter-list> -> <empty> |
+ * $<id> <macro-parameter-list>
+ * }</pre>
+ *
+ * <p>Macro parameters are optionally separated by commas.
+ */
+ private Node parseMacroDefinition() throws IOException {
+ expect('(');
+ skipSpace();
+ String name = parseId("Macro name");
+ ImmutableList.Builder<String> parameterNames = ImmutableList.builder();
+ while (true) {
+ skipSpace();
+ if (c == ')') {
+ next();
+ break;
+ }
+ if (c == ',') {
+ next();
+ skipSpace();
+ }
+ if (c != '$') {
+ throw parseException("Macro parameters should look like $name");
+ }
+ next();
+ parameterNames.add(parseId("Macro parameter name"));
+ }
+ return new MacroDefinitionTokenNode(resourceName, lineNumber(), name, parameterNames.build());
+ }
+
+ /**
+ * Parses an identifier after {@code #} that is not one of the standard directives. The assumption
+ * is that it is a call of a macro that is defined in the template. Macro definitions are
+ * extracted from the template during the second parsing phase (and not during evaluation of the
+ * template as you might expect). This means that a macro can be called before it is defined.
+ * <pre>{@code
+ * <macro-call> -> # <id> ( <expression-list> )
+ * <expression-list> -> <empty> |
+ * <expression> <optional-comma> <expression-list>
+ * <optional-comma> -> <empty> | ,
+ * }</pre>
+ */
+ private Node parsePossibleMacroCall(String directive) throws IOException {
+ skipSpace();
+ if (c != '(') {
+ throw parseException("Unrecognized directive #" + directive);
+ }
+ next();
+ ImmutableList.Builder<Node> parameterNodes = ImmutableList.builder();
+ while (true) {
+ skipSpace();
+ if (c == ')') {
+ next();
+ break;
+ }
+ parameterNodes.add(parsePrimary());
+ if (c == ',') {
+ // The documentation doesn't say so, but you can apparently have an optional comma in
+ // macro calls.
+ next();
+ }
+ }
+ return new DirectiveNode.MacroCallNode(
+ resourceName, lineNumber(), directive, parameterNodes.build());
+ }
+
+ /**
+ * Parses and discards a line comment, which is {@code ##} followed by any number of characters
+ * up to and including the next newline.
+ */
+ private Node parseLineComment() throws IOException {
+ int lineNumber = lineNumber();
+ while (c != '\n' && c != EOF) {
+ next();
+ }
+ next();
+ return new CommentTokenNode(resourceName, lineNumber);
+ }
+
+ /**
+ * Parses and discards a block comment, which is {@code #*} followed by everything up to and
+ * including the next {@code *#}.
+ */
+ private Node parseBlockComment() throws IOException {
+ assert c == '*';
+ int startLine = lineNumber();
+ int lastC = '\0';
+ next();
+ // Consistently with Velocity, we do not make it an error if a #* comment is not closed.
+ while (!(lastC == '*' && c == '#') && c != EOF) {
+ lastC = c;
+ next();
+ }
+ next(); // this may read EOF twice, which works
+ return new CommentTokenNode(resourceName, startLine);
+ }
+
+ /**
+ * Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given
+ * {@code firstChar} is the first character of the plain text, and {@link #c} is the second
+ * (if the plain text is more than one character).
+ */
+ private Node parsePlainText(int firstChar) throws IOException {
+ StringBuilder sb = new StringBuilder();
+ sb.appendCodePoint(firstChar);
+ return parsePlainText(sb);
+ }
+
+ private Node parsePlainText(StringBuilder sb) throws IOException {
+ literal:
+ while (true) {
+ switch (c) {
+ case EOF:
+ case '$':
+ case '#':
+ break literal;
+ default:
+ // Just some random character.
+ }
+ sb.appendCodePoint(c);
+ next();
+ }
+ return new ConstantExpressionNode(resourceName, lineNumber(), sb.toString());
+ }
+
+ /**
+ * Parses a reference, which is everything that can start with a {@code $}. References can
+ * optionally be enclosed in braces, so {@code $x} and {@code ${x}} are the same. Braces are
+ * useful when text after the reference would otherwise be parsed as part of it. For example,
+ * {@code ${x}y} is a reference to the variable {@code $x}, followed by the plain text {@code y}.
+ * Of course {@code $xy} would be a reference to the variable {@code $xy}.
+ * <pre>{@code
+ * <reference> -> $<reference-no-brace> |
+ * ${<reference-no-brace>}
+ * }</pre>
+ *
+ * <p>On entry to this method, {@link #c} is the character immediately after the {@code $}.
+ */
+ private Node parseReference() throws IOException {
+ if (c == '{') {
+ next();
+ if (!isAsciiLetter(c)) {
+ return parsePlainText(new StringBuilder("${"));
+ }
+ ReferenceNode node = parseReferenceNoBrace();
+ expect('}');
+ return node;
+ } else {
+ return parseReferenceNoBrace();
+ }
+ }
+
+ /**
+ * Same as {@link #parseReference()}, except it really must be a reference. A {@code $} in
+ * normal text doesn't start a reference if it is not followed by an identifier. But in an
+ * expression, for example in {@code #if ($x == 23)}, {@code $} must be followed by an
+ * identifier.
+ */
+ private ReferenceNode parseRequiredReference() throws IOException {
+ if (c == '{') {
+ next();
+ ReferenceNode node = parseReferenceNoBrace();
+ expect('}');
+ return node;
+ } else {
+ return parseReferenceNoBrace();
+ }
+ }
+
+ /**
+ * Parses a reference, in the simple form without braces.
+ * <pre>{@code
+ * <reference-no-brace> -> <id><reference-suffix>
+ * }</pre>
+ */
+ private ReferenceNode parseReferenceNoBrace() throws IOException {
+ String id = parseId("Reference");
+ ReferenceNode lhs = new PlainReferenceNode(resourceName, lineNumber(), id);
+ return parseReferenceSuffix(lhs);
+ }
+
+ /**
+ * Parses the modifiers that can appear at the tail of a reference.
+ * <pre>{@code
+ * <reference-suffix> -> <empty> |
+ * <reference-member> |
+ * <reference-index>
+ * }</pre>
+ *
+ * @param lhs the reference node representing the first part of the reference
+ * {@code $x} in {@code $x.foo} or {@code $x.foo()}, or later {@code $x.y} in {@code $x.y.z}.
+ */
+ private ReferenceNode parseReferenceSuffix(ReferenceNode lhs) throws IOException {
+ switch (c) {
+ case '.':
+ return parseReferenceMember(lhs);
+ case '[':
+ return parseReferenceIndex(lhs);
+ default:
+ return lhs;
+ }
+ }
+
+ /**
+ * Parses a reference member, which is either a property reference like {@code $x.y} or a method
+ * call like {@code $x.y($z)}.
+ * <pre>{@code
+ * <reference-member> -> .<id><reference-property-or-method><reference-suffix>
+ * <reference-property-or-method> -> <id> |
+ * <id> ( <method-parameter-list> )
+ * }</pre>
+ *
+ * @param lhs the reference node representing what appears to the left of the dot, like the
+ * {@code $x} in {@code $x.foo} or {@code $x.foo()}.
+ */
+ private ReferenceNode parseReferenceMember(ReferenceNode lhs) throws IOException {
+ assert c == '.';
+ next();
+ if (!isAsciiLetter(c)) {
+ // We've seen something like `$foo.!`, so it turns out it's not a member after all.
+ pushback('.');
+ return lhs;
+ }
+ String id = parseId("Member");
+ ReferenceNode reference;
+ if (c == '(') {
+ reference = parseReferenceMethodParams(lhs, id);
+ } else {
+ reference = new MemberReferenceNode(lhs, id);
+ }
+ return parseReferenceSuffix(reference);
+ }
+
+ /**
+ * Parses the parameters to a method reference, like {@code $foo.bar($a, $b)}.
+ * <pre>{@code
+ * <method-parameter-list> -> <empty> |
+ * <non-empty-method-parameter-list>
+ * <non-empty-method-parameter-list> -> <expression> |
+ * <expression> , <non-empty-method-parameter-list>
+ * }</pre>
+ *
+ * @param lhs the reference node representing what appears to the left of the dot, like the
+ * {@code $x} in {@code $x.foo()}.
+ */
+ private ReferenceNode parseReferenceMethodParams(ReferenceNode lhs, String id)
+ throws IOException {
+ assert c == '(';
+ nextNonSpace();
+ ImmutableList.Builder<ExpressionNode> args = ImmutableList.builder();
+ if (c != ')') {
+ args.add(parseExpression());
+ while (c == ',') {
+ nextNonSpace();
+ args.add(parseExpression());
+ }
+ if (c != ')') {
+ throw parseException("Expected )");
+ }
+ }
+ assert c == ')';
+ next();
+ return new MethodReferenceNode(lhs, id, args.build());
+ }
+
+ /**
+ * Parses an index suffix to a method, like {@code $x[$i]}.
+ * <pre>{@code
+ * <reference-index> -> [ <expression> ]
+ * }</pre>
+ *
+ * @param lhs the reference node representing what appears to the left of the dot, like the
+ * {@code $x} in {@code $x[$i]}.
+ */
+ private ReferenceNode parseReferenceIndex(ReferenceNode lhs) throws IOException {
+ assert c == '[';
+ next();
+ ExpressionNode index = parseExpression();
+ if (c != ']') {
+ throw parseException("Expected ]");
+ }
+ next();
+ ReferenceNode reference = new IndexReferenceNode(lhs, index);
+ return parseReferenceSuffix(reference);
+ }
+
+ enum Operator {
+ /**
+ * A dummy operator with low precedence. When parsing subexpressions, we always stop when we
+ * reach an operator of lower precedence than the "current precedence". For example, when
+ * parsing {@code 1 + 2 * 3 + 4}, we'll stop parsing the subexpression {@code * 3 + 4} when
+ * we reach the {@code +} because it has lower precedence than {@code *}. This dummy operator,
+ * then, behaves like {@code +} when the minimum precedence is {@code *}. We also return it
+ * if we're looking for an operator and don't find one. If this operator is {@code ⊙}, it's as
+ * if our expressions are bracketed with it, like {@code ⊙ 1 + 2 * 3 + 4 ⊙}.
+ */
+ STOP("", 0),
+
+ // If a one-character operator is a prefix of a two-character operator, like < and <=, then
+ // the one-character operator must come first.
+ OR("||", 1),
+ AND("&&", 2),
+ EQUAL("==", 3), NOT_EQUAL("!=", 3),
+ LESS("<", 4), LESS_OR_EQUAL("<=", 4), GREATER(">", 4), GREATER_OR_EQUAL(">=", 4),
+ PLUS("+", 5), MINUS("-", 5),
+ TIMES("*", 6), DIVIDE("/", 6), REMAINDER("%", 6);
+
+ final String symbol;
+ final int precedence;
+
+ Operator(String symbol, int precedence) {
+ this.symbol = symbol;
+ this.precedence = precedence;
+ }
+
+ @Override
+ public String toString() {
+ return symbol;
+ }
+ }
+
+ /**
+ * Maps a code point to the operators that begin with that code point. For example, maps
+ * {@code <} to {@code LESS} and {@code LESS_OR_EQUAL}.
+ */
+ private static final ImmutableListMultimap<Integer, Operator> CODE_POINT_TO_OPERATORS;
+ static {
+ ImmutableListMultimap.Builder<Integer, Operator> builder = ImmutableListMultimap.builder();
+ for (Operator operator : Operator.values()) {
+ if (operator != Operator.STOP) {
+ builder.put((int) operator.symbol.charAt(0), operator);
+ }
+ }
+ CODE_POINT_TO_OPERATORS = builder.build();
+ }
+
+ /**
+ * Parses an expression, which can occur within a directive like {@code #if} or {@code #set},
+ * or within a reference like {@code $x[$a + $b]} or {@code $x.m($a + $b)}.
+ * <pre>{@code
+ * <expression> -> <and-expression> |
+ * <expression> || <and-expression>
+ * <and-expression> -> <relational-expression> |
+ * <and-expression> && <relational-expression>
+ * <equality-exression> -> <relational-expression> |
+ * <equality-expression> <equality-op> <relational-expression>
+ * <equality-op> -> == | !=
+ * <relational-expression> -> <additive-expression> |
+ * <relational-expression> <relation> <additive-expression>
+ * <relation> -> < | <= | > | >=
+ * <additive-expression> -> <multiplicative-expression> |
+ * <additive-expression> <add-op> <multiplicative-expression>
+ * <add-op> -> + | -
+ * <multiplicative-expression> -> <unary-expression> |
+ * <multiplicative-expression> <mult-op> <unary-expression>
+ * <mult-op> -> * | / | %
+ * }</pre>
+ */
+ private ExpressionNode parseExpression() throws IOException {
+ ExpressionNode lhs = parseUnaryExpression();
+ return new OperatorParser().parse(lhs, 1);
+ }
+
+ /**
+ * An operator-precedence parser for the binary operations we understand. It implements an
+ * <a href="http://en.wikipedia.org/wiki/Operator-precedence_parser">algorithm</a> from Wikipedia
+ * that uses recursion rather than having an explicit stack of operators and values.
+ */
+ private class OperatorParser {
+ /**
+ * The operator we have just scanned, in the same way that {@link #c} is the character we have
+ * just read. If we were not able to scan an operator, this will be {@link Operator#STOP}.
+ */
+ private Operator currentOperator;
+
+ OperatorParser() throws IOException {
+ nextOperator();
+ }
+
+ /**
+ * Parse a subexpression whose left-hand side is {@code lhs} and where we only consider
+ * operators with precedence at least {@code minPrecedence}.
+ *
+ * @return the parsed subexpression
+ */
+ ExpressionNode parse(ExpressionNode lhs, int minPrecedence) throws IOException {
+ while (currentOperator.precedence >= minPrecedence) {
+ Operator operator = currentOperator;
+ ExpressionNode rhs = parseUnaryExpression();
+ nextOperator();
+ while (currentOperator.precedence > operator.precedence) {
+ rhs = parse(rhs, currentOperator.precedence);
+ }
+ lhs = new BinaryExpressionNode(lhs, operator, rhs);
+ }
+ return lhs;
+ }
+
+ /**
+ * Updates {@link #currentOperator} to be an operator read from the input,
+ * or {@link Operator#STOP} if there is none.
+ */
+ private void nextOperator() throws IOException {
+ skipSpace();
+ ImmutableList<Operator> possibleOperators = CODE_POINT_TO_OPERATORS.get(c);
+ if (possibleOperators.isEmpty()) {
+ currentOperator = Operator.STOP;
+ return;
+ }
+ char firstChar = Chars.checkedCast(c);
+ next();
+ Operator operator = null;
+ for (Operator possibleOperator : possibleOperators) {
+ if (possibleOperator.symbol.length() == 1) {
+ Verify.verify(operator == null);
+ operator = possibleOperator;
+ } else if (possibleOperator.symbol.charAt(1) == c) {
+ next();
+ operator = possibleOperator;
+ }
+ }
+ if (operator == null) {
+ throw parseException(
+ "Expected " + Iterables.getOnlyElement(possibleOperators) + ", not just " + firstChar);
+ }
+ currentOperator = operator;
+ }
+ }
+
+ /**
+ * Parses an expression not containing any operators (except inside parentheses).
+ * <pre>{@code
+ * <unary-expression> -> <primary> |
+ * ( <expression> ) |
+ * ! <unary-expression>
+ * }</pre>
+ */
+ private ExpressionNode parseUnaryExpression() throws IOException {
+ skipSpace();
+ ExpressionNode node;
+ if (c == '(') {
+ nextNonSpace();
+ node = parseExpression();
+ expect(')');
+ skipSpace();
+ return node;
+ } else if (c == '!') {
+ next();
+ node = new NotExpressionNode(parseUnaryExpression());
+ skipSpace();
+ return node;
+ } else {
+ return parsePrimary();
+ }
+ }
+
+ /**
+ * Parses an expression containing only literals or references.
+ * <pre>{@code
+ * <primary> -> <reference> |
+ * <string-literal> |
+ * <integer-literal> |
+ * <boolean-literal>
+ * }</pre>
+ */
+ private ExpressionNode parsePrimary() throws IOException {
+ ExpressionNode node;
+ if (c == '$') {
+ next();
+ node = parseRequiredReference();
+ } else if (c == '"') {
+ node = parseStringLiteral(c, true);
+ } else if (c == '\'') {
+ node = parseStringLiteral(c, false);
+ } else if (c == '-') {
+ // Velocity does not have a negation operator. If we see '-' it must be the start of a
+ // negative integer literal.
+ next();
+ node = parseIntLiteral("-");
+ } else if (isAsciiDigit(c)) {
+ node = parseIntLiteral("");
+ } else if (isAsciiLetter(c)) {
+ node = parseBooleanLiteral();
+ } else {
+ throw parseException("Expected an expression");
+ }
+ skipSpace();
+ return node;
+ }
+
+ /**
+ * Parses a string literal, which may contain references to be expanded. Examples are
+ * {@code "foo"} or {@code "foo${bar}baz"}.
+ * <pre>{@code
+ * <string-literal> -> <double-quote-literal> | <single-quote-literal>
+ * <double-quote-literal> -> " <double-quote-string-contents> "
+ * <double-quote-string-contents> -> <empty> |
+ * <reference> <double-quote-string-contents> |
+ * <character-other-than-"> <double-quote-string-contents>
+ * <single-quote-literal> -> ' <single-quote-string-contents> '
+ * <single-quote-string-contents> -> <empty> |
+ * <character-other-than-'> <single-quote-string-contents>
+ * }</pre>
+ */
+ private ExpressionNode parseStringLiteral(int quote, boolean allowReferences)
+ throws IOException {
+ assert c == quote;
+ next();
+ ImmutableList.Builder<Node> nodes = ImmutableList.builder();
+ StringBuilder sb = new StringBuilder();
+ while (c != quote) {
+ switch (c) {
+ case '\n':
+ case EOF:
+ throw parseException("Unterminated string constant");
+ case '\\':
+ throw parseException(
+ "Escapes in string constants are not currently supported");
+ case '$':
+ if (allowReferences) {
+ if (sb.length() > 0) {
+ nodes.add(new ConstantExpressionNode(resourceName, lineNumber(), sb.toString()));
+ sb.setLength(0);
+ }
+ next();
+ nodes.add(parseReference());
+ break;
+ }
+ // fall through
+ default:
+ sb.appendCodePoint(c);
+ next();
+ }
+ }
+ next();
+ if (sb.length() > 0) {
+ nodes.add(new ConstantExpressionNode(resourceName, lineNumber(), sb.toString()));
+ }
+ return new StringLiteralNode(resourceName, lineNumber(), nodes.build());
+ }
+
+ private static class StringLiteralNode extends ExpressionNode {
+ private final ImmutableList<Node> nodes;
+
+ StringLiteralNode(String resourceName, int lineNumber, ImmutableList<Node> nodes) {
+ super(resourceName, lineNumber);
+ this.nodes = nodes;
+ }
+
+ @Override
+ Object evaluate(EvaluationContext context) {
+ StringBuilder sb = new StringBuilder();
+ for (Node node : nodes) {
+ sb.append(node.evaluate(context));
+ }
+ return sb.toString();
+ }
+ }
+
+ private ExpressionNode parseIntLiteral(String prefix) throws IOException {
+ StringBuilder sb = new StringBuilder(prefix);
+ while (isAsciiDigit(c)) {
+ sb.appendCodePoint(c);
+ next();
+ }
+ Integer value = Ints.tryParse(sb.toString());
+ if (value == null) {
+ throw parseException("Invalid integer: " + sb);
+ }
+ return new ConstantExpressionNode(resourceName, lineNumber(), value);
+ }
+
+ /**
+ * Parses a boolean literal, either {@code true} or {@code false}.
+ * <boolean-literal> -> true |
+ * false
+ */
+ private ExpressionNode parseBooleanLiteral() throws IOException {
+ String s = parseId("Identifier without $");
+ boolean value;
+ if (s.equals("true")) {
+ value = true;
+ } else if (s.equals("false")) {
+ value = false;
+ } else {
+ throw parseException("Identifier in expression must be preceded by $ or be true or false");
+ }
+ return new ConstantExpressionNode(resourceName, lineNumber(), value);
+ }
+
+ private static final CharMatcher ASCII_LETTER =
+ CharMatcher.inRange('A', 'Z')
+ .or(CharMatcher.inRange('a', 'z'))
+ .precomputed();
+
+ private static final CharMatcher ASCII_DIGIT =
+ CharMatcher.inRange('0', '9')
+ .precomputed();
+
+ private static final CharMatcher ID_CHAR =
+ ASCII_LETTER
+ .or(ASCII_DIGIT)
+ .or(CharMatcher.anyOf("-_"))
+ .precomputed();
+
+ private static boolean isAsciiLetter(int c) {
+ return (char) c == c && ASCII_LETTER.matches((char) c);
+ }
+
+ private static boolean isAsciiDigit(int c) {
+ return (char) c == c && ASCII_DIGIT.matches((char) c);
+ }
+
+ private static boolean isIdChar(int c) {
+ return (char) c == c && ID_CHAR.matches((char) c);
+ }
+
+ /**
+ * Parse an identifier as specified by the
+ * <a href="http://velocity.apache.org/engine/devel/vtl-reference-guide.html#Variables">VTL
+ * </a>. Identifiers are ASCII: starts with a letter, then letters, digits, {@code -} and
+ * {@code _}.
+ */
+ private String parseId(String what) throws IOException {
+ if (!isAsciiLetter(c)) {
+ throw parseException(what + " should start with an ASCII letter");
+ }
+ StringBuilder id = new StringBuilder();
+ while (isIdChar(c)) {
+ id.appendCodePoint(c);
+ next();
+ }
+ return id.toString();
+ }
+
+ /**
+ * Returns an exception to be thrown describing a parse error with the given message, and
+ * including information about where it occurred.
+ */
+ private ParseException parseException(String message) throws IOException {
+ StringBuilder context = new StringBuilder();
+ if (c == EOF) {
+ context.append("EOF");
+ } else {
+ int count = 0;
+ while (c != EOF && count < 20) {
+ context.appendCodePoint(c);
+ next();
+ count++;
+ }
+ if (c != EOF) {
+ context.append("...");
+ }
+ }
+ return new ParseException(message, resourceName, lineNumber(), context.toString());
+ }
+}