aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/com/google/escapevelocity/Parser.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/com/google/escapevelocity/Parser.java')
-rw-r--r--src/main/java/com/google/escapevelocity/Parser.java963
1 files changed, 963 insertions, 0 deletions
diff --git a/src/main/java/com/google/escapevelocity/Parser.java b/src/main/java/com/google/escapevelocity/Parser.java
new file mode 100644
index 0000000..9982be3
--- /dev/null
+++ b/src/main/java/com/google/escapevelocity/Parser.java
@@ -0,0 +1,963 @@
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.escapevelocity;
+
+import com.google.escapevelocity.DirectiveNode.SetNode;
+import com.google.escapevelocity.ExpressionNode.BinaryExpressionNode;
+import com.google.escapevelocity.ExpressionNode.NotExpressionNode;
+import com.google.escapevelocity.ReferenceNode.IndexReferenceNode;
+import com.google.escapevelocity.ReferenceNode.MemberReferenceNode;
+import com.google.escapevelocity.ReferenceNode.MethodReferenceNode;
+import com.google.escapevelocity.ReferenceNode.PlainReferenceNode;
+import com.google.escapevelocity.TokenNode.CommentTokenNode;
+import com.google.escapevelocity.TokenNode.ElseIfTokenNode;
+import com.google.escapevelocity.TokenNode.ElseTokenNode;
+import com.google.escapevelocity.TokenNode.EndTokenNode;
+import com.google.escapevelocity.TokenNode.EofNode;
+import com.google.escapevelocity.TokenNode.ForEachTokenNode;
+import com.google.escapevelocity.TokenNode.IfTokenNode;
+import com.google.escapevelocity.TokenNode.MacroDefinitionTokenNode;
+import com.google.escapevelocity.TokenNode.NestedTokenNode;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A parser that reads input from the given {@link Reader} and parses it to produce a
+ * {@link Template}.
+ *
+ * @author emcmanus@google.com (Éamonn McManus)
+ */
+class Parser {
+ private static final int EOF = -1;
+
+ private final LineNumberReader reader;
+ private final String resourceName;
+ private final Template.ResourceOpener resourceOpener;
+
+ /**
+ * The invariant of this parser is that {@code c} is always the next character of interest.
+ * This means that we never have to "unget" a character by reading too far. For example, after
+ * we parse an integer, {@code c} will be the first character after the integer, which is exactly
+ * the state we will be in when there are no more digits.
+ */
+ private int c;
+
+ Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener)
+ throws IOException {
+ this.reader = new LineNumberReader(reader);
+ this.reader.setLineNumber(1);
+ next();
+ this.resourceName = resourceName;
+ this.resourceOpener = resourceOpener;
+ }
+
+ /**
+ * Parse the input completely to produce a {@link Template}.
+ *
+ * <p>Parsing happens in two phases. First, we parse a sequence of "tokens", where tokens include
+ * entire references such as <pre>
+ * ${x.foo()[23]}
+ * </pre>or entire directives such as<pre>
+ * #set ($x = $y + $z)
+ * </pre>But tokens do not span complex constructs. For example,<pre>
+ * #if ($x == $y) something #end
+ * </pre>is three tokens:<pre>
+ * #if ($x == $y)
+ * (literal text " something ")
+ * #end
+ * </pre>
+ *
+ * <p>The second phase then takes the sequence of tokens and constructs a parse tree out of it.
+ * Some nodes in the parse tree will be unchanged from the token sequence, such as the <pre>
+ * ${x.foo()[23]}
+ * #set ($x = $y + $z)
+ * </pre> examples above. But a construct such as the {@code #if ... #end} mentioned above will
+ * become a single IfNode in the parse tree in the second phase.
+ *
+ * <p>The main reason for this approach is that Velocity has two kinds of lexical contexts. At the
+ * top level, there can be arbitrary literal text; references like <code>${x.foo()}</code>; and
+ * directives like {@code #if} or {@code #set}. Inside the parentheses of a directive, however,
+ * neither arbitrary text nor directives can appear, but expressions can, so we need to tokenize
+ * the inside of <pre>
+ * #if ($x == $a + $b)
+ * </pre> as the five tokens "$x", "==", "$a", "+", "$b". Rather than having a classical
+ * parser/lexer combination, where the lexer would need to switch between these two modes, we
+ * replace the lexer with an ad-hoc parser that is the first phase described above, and we
+ * define a simple parser over the resultant tokens that is the second phase.
+ */
+ Template parse() throws IOException {
+ ImmutableList<Node> tokens = parseTokens();
+ return new Reparser(tokens).reparse();
+ }
+
+ private ImmutableList<Node> parseTokens() throws IOException {
+ ImmutableList.Builder<Node> tokens = ImmutableList.builder();
+ Node token;
+ do {
+ token = parseNode();
+ tokens.add(token);
+ } while (!(token instanceof EofNode));
+ return tokens.build();
+ }
+
+ private int lineNumber() {
+ return reader.getLineNumber();
+ }
+
+ /**
+ * Gets the next character from the reader and assigns it to {@code c}. If there are no more
+ * characters, sets {@code c} to {@link #EOF} if it is not already.
+ */
+ private void next() throws IOException {
+ if (c != EOF) {
+ c = reader.read();
+ }
+ }
+
+ /**
+ * If {@code c} is a space character, keeps reading until {@code c} is a non-space character or
+ * there are no more characters.
+ */
+ private void skipSpace() throws IOException {
+ while (Character.isWhitespace(c)) {
+ next();
+ }
+ }
+
+ /**
+ * Gets the next character from the reader, and if it is a space character, keeps reading until
+ * a non-space character is found.
+ */
+ private void nextNonSpace() throws IOException {
+ next();
+ skipSpace();
+ }
+
+ /**
+ * Skips any space in the reader, and then throws an exception if the first non-space character
+ * found is not the expected one. Sets {@code c} to the first character after that expected one.
+ */
+ private void expect(char expected) throws IOException {
+ skipSpace();
+ if (c == expected) {
+ next();
+ } else {
+ throw parseException("Expected " + expected);
+ }
+ }
+
+ /**
+ * Parses a single node from the reader, as part of the first parsing phase.
+ * <pre>{@code
+ * <template> -> <empty> |
+ * <directive> <template> |
+ * <non-directive> <template>
+ * }</pre>
+ */
+ private Node parseNode() throws IOException {
+ if (c == '#') {
+ next();
+ if (c == '#') {
+ return parseComment();
+ } else if (isAsciiLetter(c) || c == '{') {
+ return parseDirective();
+ } else if (c == '[') {
+ return parseHashSquare();
+ } else {
+ // For consistency with Velocity, we treat # not followed by # or a letter as a plain
+ // character, and we treat #$foo as a literal # followed by the reference $foo.
+ // But the # is its own ConstantExpressionNode; we don't try to merge it with adjacent text.
+ return new ConstantExpressionNode(resourceName, lineNumber(), "#");
+ }
+ }
+ if (c == EOF) {
+ return new EofNode(resourceName, lineNumber());
+ }
+ return parseNonDirective();
+ }
+
+ private Node parseHashSquare() throws IOException {
+ // We've just seen #[ which might be the start of a #[[quoted block]]#. If the next character
+ // is not another [ then it's not a quoted block, but it *is* a literal #[ followed by whatever
+ // that next character is.
+ assert c == '[';
+ next();
+ if (c != '[') {
+ return new ConstantExpressionNode(resourceName, lineNumber(), "#[");
+ }
+ next();
+ StringBuilder sb = new StringBuilder();
+ while (true) {
+ if (c == EOF) {
+ throw parseException("Unterminated #[[ - did not see matching ]]#");
+ }
+ if (c == '#') {
+ // This might be the last character of ]]# or it might just be a random #.
+ int len = sb.length();
+ if (len > 1 && sb.charAt(len - 1) == ']' && sb.charAt(len - 2) == ']') {
+ next();
+ break;
+ }
+ }
+ sb.append((char) c);
+ next();
+ }
+ String quoted = sb.substring(0, sb.length() - 2);
+ return new ConstantExpressionNode(resourceName, lineNumber(), quoted);
+ }
+
+ /**
+ * Parses a single non-directive node from the reader.
+ * <pre>{@code
+ * <non-directive> -> <reference> |
+ * <text containing neither $ nor #>
+ * }</pre>
+ */
+ private Node parseNonDirective() throws IOException {
+ if (c == '$') {
+ next();
+ if (isAsciiLetter(c) || c == '{') {
+ return parseReference();
+ } else {
+ return parsePlainText('$');
+ }
+ } else {
+ int firstChar = c;
+ next();
+ return parsePlainText(firstChar);
+ }
+ }
+
+ /**
+ * Parses a single directive token from the reader. Directives can be spelled with or without
+ * braces, for example {@code #if} or {@code #{if}}. We omit the brace spelling in the productions
+ * here: <pre>{@code
+ * <directive> -> <if-token> |
+ * <else-token> |
+ * <elseif-token> |
+ * <end-token> |
+ * <foreach-token> |
+ * <set-token> |
+ * <parse-token> |
+ * <macro-token> |
+ * <macro-call> |
+ * <comment>
+ * }</pre>
+ */
+ private Node parseDirective() throws IOException {
+ String directive;
+ if (c == '{') {
+ next();
+ directive = parseId("Directive inside #{...}");
+ expect('}');
+ } else {
+ directive = parseId("Directive");
+ }
+ Node node;
+ switch (directive) {
+ case "end":
+ node = new EndTokenNode(resourceName, lineNumber());
+ break;
+ case "if":
+ case "elseif":
+ node = parseIfOrElseIf(directive);
+ break;
+ case "else":
+ node = new ElseTokenNode(resourceName, lineNumber());
+ break;
+ case "foreach":
+ node = parseForEach();
+ break;
+ case "set":
+ node = parseSet();
+ break;
+ case "parse":
+ node = parseParse();
+ break;
+ case "macro":
+ node = parseMacroDefinition();
+ break;
+ default:
+ node = parsePossibleMacroCall(directive);
+ }
+ // Velocity skips a newline after any directive.
+ // TODO(emcmanus): in fact it also skips space before the newline, which should be implemented.
+ if (c == '\n') {
+ next();
+ }
+ return node;
+ }
+
+ /**
+ * Parses the condition following {@code #if} or {@code #elseif}.
+ * <pre>{@code
+ * <if-token> -> #if ( <condition> )
+ * <elseif-token> -> #elseif ( <condition> )
+ * }</pre>
+ *
+ * @param directive either {@code "if"} or {@code "elseif"}.
+ */
+ private Node parseIfOrElseIf(String directive) throws IOException {
+ expect('(');
+ ExpressionNode condition = parseExpression();
+ expect(')');
+ return directive.equals("if") ? new IfTokenNode(condition) : new ElseIfTokenNode(condition);
+ }
+
+ /**
+ * Parses a {@code #foreach} token from the reader. <pre>{@code
+ * <foreach-token> -> #foreach ( $<id> in <expression> )
+ * }</pre>
+ */
+ private Node parseForEach() throws IOException {
+ expect('(');
+ expect('$');
+ String var = parseId("For-each variable");
+ skipSpace();
+ boolean bad = false;
+ if (c != 'i') {
+ bad = true;
+ } else {
+ next();
+ if (c != 'n') {
+ bad = true;
+ }
+ }
+ if (bad) {
+ throw parseException("Expected 'in' for #foreach");
+ }
+ next();
+ ExpressionNode collection = parseExpression();
+ expect(')');
+ return new ForEachTokenNode(var, collection);
+ }
+
+ /**
+ * Parses a {@code #set} token from the reader. <pre>{@code
+ * <set-token> -> #set ( $<id> = <expression>)
+ * }</pre>
+ */
+ private Node parseSet() throws IOException {
+ expect('(');
+ expect('$');
+ String var = parseId("#set variable");
+ expect('=');
+ ExpressionNode expression = parseExpression();
+ expect(')');
+ return new SetNode(var, expression);
+ }
+
+ /**
+ * Parses a {@code #parse} token from the reader. <pre>{@code
+ * <parse-token> -> #parse ( <string-literal> )
+ * }</pre>
+ *
+ * <p>The way this works is inconsistent with Velocity. In Velocity, the {@code #parse} directive
+ * is evaluated when it is encountered during template evaluation. That means that the argument
+ * can be a variable, and it also means that you can use {@code #if} to choose whether or not
+ * to do the {@code #parse}. Neither of those is true in EscapeVelocity. The contents of the
+ * {@code #parse} are integrated into the containing template pretty much as if they had been
+ * written inline. That also means that EscapeVelocity allows forward references to macros
+ * inside {@code #parse} directives, which Velocity does not.
+ */
+ private Node parseParse() throws IOException {
+ expect('(');
+ skipSpace();
+ if (c != '"') {
+ throw parseException("#parse only supported with string literal argument");
+ }
+ String nestedResourceName = readStringLiteral();
+ expect(')');
+ try (Reader nestedReader = resourceOpener.openResource(nestedResourceName)) {
+ Parser nestedParser = new Parser(nestedReader, nestedResourceName, resourceOpener);
+ ImmutableList<Node> nestedTokens = nestedParser.parseTokens();
+ return new NestedTokenNode(nestedResourceName, nestedTokens);
+ }
+ }
+
+ /**
+ * Parses a {@code #macro} token from the reader. <pre>{@code
+ * <macro-token> -> #macro ( <id> <macro-parameter-list> )
+ * <macro-parameter-list> -> <empty> |
+ * $<id> <macro-parameter-list>
+ * }</pre>
+ *
+ * <p>Macro parameters are not separated by commas, though method-reference parameters are.
+ */
+ private Node parseMacroDefinition() throws IOException {
+ expect('(');
+ skipSpace();
+ String name = parseId("Macro name");
+ ImmutableList.Builder<String> parameterNames = ImmutableList.builder();
+ while (true) {
+ skipSpace();
+ if (c == ')') {
+ next();
+ break;
+ }
+ if (c != '$') {
+ throw parseException("Macro parameters should look like $name");
+ }
+ next();
+ parameterNames.add(parseId("Macro parameter name"));
+ }
+ return new MacroDefinitionTokenNode(resourceName, lineNumber(), name, parameterNames.build());
+ }
+
+ /**
+ * Parses an identifier after {@code #} that is not one of the standard directives. The assumption
+ * is that it is a call of a macro that is defined in the template. Macro definitions are
+ * extracted from the template during the second parsing phase (and not during evaluation of the
+ * template as you might expect). This means that a macro can be called before it is defined.
+ * <pre>{@code
+ * <macro-call> -> # <id> ( <expression-list> )
+ * <expression-list> -> <empty> |
+ * <expression> <optional-comma> <expression-list>
+ * <optional-comma> -> <empty> | ,
+ * }</pre>
+ */
+ private Node parsePossibleMacroCall(String directive) throws IOException {
+ skipSpace();
+ if (c != '(') {
+ throw parseException("Unrecognized directive #" + directive);
+ }
+ next();
+ ImmutableList.Builder<Node> parameterNodes = ImmutableList.builder();
+ while (true) {
+ skipSpace();
+ if (c == ')') {
+ next();
+ break;
+ }
+ parameterNodes.add(parsePrimary());
+ if (c == ',') {
+ // The documentation doesn't say so, but you can apparently have an optional comma in
+ // macro calls.
+ next();
+ }
+ }
+ return new DirectiveNode.MacroCallNode(
+ resourceName, lineNumber(), directive, parameterNodes.build());
+ }
+
+ /**
+ * Parses and discards a comment, which is {@code ##} followed by any number of characters up to
+ * and including the next newline.
+ */
+ private Node parseComment() throws IOException {
+ int lineNumber = lineNumber();
+ while (c != '\n' && c != EOF) {
+ next();
+ }
+ next();
+ return new CommentTokenNode(resourceName, lineNumber);
+ }
+
+ /**
+ * Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given
+ * {@code firstChar} is the first character of the plain text, and {@link #c} is the second
+ * (if the plain text is more than one character).
+ */
+ private Node parsePlainText(int firstChar) throws IOException {
+ StringBuilder sb = new StringBuilder();
+ sb.appendCodePoint(firstChar);
+
+ literal:
+ while (true) {
+ switch (c) {
+ case EOF:
+ case '$':
+ case '#':
+ break literal;
+ default:
+ // Just some random character.
+ }
+ sb.appendCodePoint(c);
+ next();
+ }
+ return new ConstantExpressionNode(resourceName, lineNumber(), sb.toString());
+ }
+
+ /**
+ * Parses a reference, which is everything that can start with a {@code $}. References can
+ * optionally be enclosed in braces, so {@code $x} and {@code ${x}} are the same. Braces are
+ * useful when text after the reference would otherwise be parsed as part of it. For example,
+ * {@code ${x}y} is a reference to the variable {@code $x}, followed by the plain text {@code y}.
+ * Of course {@code $xy} would be a reference to the variable {@code $xy}.
+ * <pre>{@code
+ * <reference> -> $<reference-no-brace> |
+ * ${<reference-no-brace>}
+ * }</pre>
+ *
+ * <p>On entry to this method, {@link #c} is the character immediately after the {@code $}.
+ */
+ private ReferenceNode parseReference() throws IOException {
+ if (c == '{') {
+ next();
+ ReferenceNode node = parseReferenceNoBrace();
+ expect('}');
+ return node;
+ } else {
+ return parseReferenceNoBrace();
+ }
+ }
+
+ /**
+ * Parses a reference, in the simple form without braces.
+ * <pre>{@code
+ * <reference-no-brace> -> <id><reference-suffix>
+ * }</pre>
+ */
+ private ReferenceNode parseReferenceNoBrace() throws IOException {
+ String id = parseId("Reference");
+ ReferenceNode lhs = new PlainReferenceNode(resourceName, lineNumber(), id);
+ return parseReferenceSuffix(lhs);
+ }
+
+ /**
+ * Parses the modifiers that can appear at the tail of a reference.
+ * <pre>{@code
+ * <reference-suffix> -> <empty> |
+ * <reference-member> |
+ * <reference-index>
+ * }</pre>
+ *
+ * @param lhs the reference node representing the first part of the reference
+ * {@code $x} in {@code $x.foo} or {@code $x.foo()}, or later {@code $x.y} in {@code $x.y.z}.
+ */
+ private ReferenceNode parseReferenceSuffix(ReferenceNode lhs) throws IOException {
+ switch (c) {
+ case '.':
+ return parseReferenceMember(lhs);
+ case '[':
+ return parseReferenceIndex(lhs);
+ default:
+ return lhs;
+ }
+ }
+
+ /**
+ * Parses a reference member, which is either a property reference like {@code $x.y} or a method
+ * call like {@code $x.y($z)}.
+ * <pre>{@code
+ * <reference-member> -> .<id><reference-property-or-method><reference-suffix>
+ * <reference-property-or-method> -> <id> |
+ * <id> ( <method-parameter-list> )
+ * }</pre>
+ *
+ * @param lhs the reference node representing what appears to the left of the dot, like the
+ * {@code $x} in {@code $x.foo} or {@code $x.foo()}.
+ */
+ private ReferenceNode parseReferenceMember(ReferenceNode lhs) throws IOException {
+ assert c == '.';
+ next();
+ String id = parseId("Member");
+ ReferenceNode reference;
+ if (c == '(') {
+ reference = parseReferenceMethodParams(lhs, id);
+ } else {
+ reference = new MemberReferenceNode(lhs, id);
+ }
+ return parseReferenceSuffix(reference);
+ }
+
+ /**
+ * Parses the parameters to a method reference, like {@code $foo.bar($a, $b)}.
+ * <pre>{@code
+ * <method-parameter-list> -> <empty> |
+ * <non-empty-method-parameter-list>
+ * <non-empty-method-parameter-list> -> <expression> |
+ * <expression> , <non-empty-method-parameter-list>
+ * }</pre>
+ *
+ * @param lhs the reference node representing what appears to the left of the dot, like the
+ * {@code $x} in {@code $x.foo()}.
+ */
+ private ReferenceNode parseReferenceMethodParams(ReferenceNode lhs, String id)
+ throws IOException {
+ assert c == '(';
+ nextNonSpace();
+ ImmutableList.Builder<ExpressionNode> args = ImmutableList.builder();
+ if (c != ')') {
+ args.add(parseExpression());
+ while (c == ',') {
+ nextNonSpace();
+ args.add(parseExpression());
+ }
+ if (c != ')') {
+ throw parseException("Expected )");
+ }
+ }
+ assert c == ')';
+ next();
+ return new MethodReferenceNode(lhs, id, args.build());
+ }
+
+ /**
+ * Parses an index suffix to a method, like {@code $x[$i]}.
+ * <pre>{@code
+ * <reference-index> -> [ <expression> ]
+ * }</pre>
+ *
+ * @param lhs the reference node representing what appears to the left of the dot, like the
+ * {@code $x} in {@code $x[$i]}.
+ */
+ private ReferenceNode parseReferenceIndex(ReferenceNode lhs) throws IOException {
+ assert c == '[';
+ next();
+ ExpressionNode index = parseExpression();
+ if (c != ']') {
+ throw parseException("Expected ]");
+ }
+ next();
+ ReferenceNode reference = new IndexReferenceNode(lhs, index);
+ return parseReferenceSuffix(reference);
+ }
+
+ enum Operator {
+ /**
+ * A dummy operator with low precedence. When parsing subexpressions, we always stop when we
+ * reach an operator of lower precedence than the "current precedence". For example, when
+ * parsing {@code 1 + 2 * 3 + 4}, we'll stop parsing the subexpression {@code * 3 + 4} when
+ * we reach the {@code +} because it has lower precedence than {@code *}. This dummy operator,
+ * then, behaves like {@code +} when the minimum precedence is {@code *}. We also return it
+ * if we're looking for an operator and don't find one. If this operator is {@code ⊙}, it's as
+ * if our expressions are bracketed with it, like {@code ⊙ 1 + 2 * 3 + 4 ⊙}.
+ */
+ STOP("", 0),
+
+ // If a one-character operator is a prefix of a two-character operator, like < and <=, then
+ // the one-character operator must come first.
+ OR("||", 1),
+ AND("&&", 2),
+ EQUAL("==", 3), NOT_EQUAL("!=", 3),
+ LESS("<", 4), LESS_OR_EQUAL("<=", 4), GREATER(">", 4), GREATER_OR_EQUAL(">=", 4),
+ PLUS("+", 5), MINUS("-", 5),
+ TIMES("*", 6), DIVIDE("/", 6), REMAINDER("%", 6);
+
+ final String symbol;
+ final int precedence;
+
+ Operator(String symbol, int precedence) {
+ this.symbol = symbol;
+ this.precedence = precedence;
+ }
+
+ @Override
+ public String toString() {
+ return symbol;
+ }
+ }
+
+ /**
+ * Maps a code point to the operators that begin with that code point. For example, maps
+ * {@code <} to {@code LESS} and {@code LESS_OR_EQUAL}.
+ */
+ private static final Map<Integer, List<Operator>> CODE_POINT_TO_OPERATORS;
+ static {
+ Map<Integer, List<Operator>> map = new HashMap<>();
+ for (Operator operator : Operator.values()) {
+ if (operator != Operator.STOP) {
+ Integer key = operator.symbol.codePointAt(0);
+ if (!map.containsKey(key)) {
+ map.put(key, new ArrayList<Operator>());
+ }
+ map.get(key).add(operator);
+ }
+ }
+ CODE_POINT_TO_OPERATORS = Collections.unmodifiableMap(map);
+ }
+
+ /**
+ * Parses an expression, which can occur within a directive like {@code #if} or {@code #set},
+ * or within a reference like {@code $x[$a + $b]} or {@code $x.m($a + $b)}.
+ * <pre>{@code
+ * <expression> -> <and-expression> |
+ * <expression> || <and-expression>
+ * <and-expression> -> <relational-expression> |
+ * <and-expression> && <relational-expression>
+ * <equality-exression> -> <relational-expression> |
+ * <equality-expression> <equality-op> <relational-expression>
+ * <equality-op> -> == | !=
+ * <relational-expression> -> <additive-expression> |
+ * <relational-expression> <relation> <additive-expression>
+ * <relation> -> < | <= | > | >=
+ * <additive-expression> -> <multiplicative-expression> |
+ * <additive-expression> <add-op> <multiplicative-expression>
+ * <add-op> -> + | -
+ * <multiplicative-expression> -> <unary-expression> |
+ * <multiplicative-expression> <mult-op> <unary-expression>
+ * <mult-op> -> * | / | %
+ * }</pre>
+ */
+ private ExpressionNode parseExpression() throws IOException {
+ ExpressionNode lhs = parseUnaryExpression();
+ return new OperatorParser().parse(lhs, 1);
+ }
+
+ /**
+ * An operator-precedence parser for the binary operations we understand. It implements an
+ * <a href="http://en.wikipedia.org/wiki/Operator-precedence_parser">algorithm</a> from Wikipedia
+ * that uses recursion rather than having an explicit stack of operators and values.
+ */
+ private class OperatorParser {
+ /**
+ * The operator we have just scanned, in the same way that {@link #c} is the character we have
+ * just read. If we were not able to scan an operator, this will be {@link Operator#STOP}.
+ */
+ private Operator currentOperator;
+
+ OperatorParser() throws IOException {
+ nextOperator();
+ }
+
+ /**
+ * Parse a subexpression whose left-hand side is {@code lhs} and where we only consider
+ * operators with precedence at least {@code minPrecedence}.
+ *
+ * @return the parsed subexpression
+ */
+ ExpressionNode parse(ExpressionNode lhs, int minPrecedence) throws IOException {
+ while (currentOperator.precedence >= minPrecedence) {
+ Operator operator = currentOperator;
+ ExpressionNode rhs = parseUnaryExpression();
+ nextOperator();
+ while (currentOperator.precedence > operator.precedence) {
+ rhs = parse(rhs, currentOperator.precedence);
+ }
+ lhs = new BinaryExpressionNode(lhs, operator, rhs);
+ }
+ return lhs;
+ }
+
+ /**
+ * Updates {@link #currentOperator} to be an operator read from the input,
+ * or {@link Operator#STOP} if there is none.
+ */
+ private void nextOperator() throws IOException {
+ skipSpace();
+ List<Operator> possibleOperators = CODE_POINT_TO_OPERATORS.get(c);
+ if (possibleOperators == null) {
+ currentOperator = Operator.STOP;
+ return;
+ }
+ int firstChar = c;
+ next();
+ Operator operator = null;
+ for (Operator possibleOperator : possibleOperators) {
+ if (possibleOperator.symbol.length() == 1) {
+ assert operator == null;
+ operator = possibleOperator;
+ } else if (possibleOperator.symbol.charAt(1) == c) {
+ next();
+ operator = possibleOperator;
+ }
+ }
+ if (operator == null) {
+ throw parseException("Expected " + possibleOperators.get(0) + ", not just " + firstChar);
+ }
+ currentOperator = operator;
+ }
+ }
+
+ /**
+ * Parses an expression not containing any operators (except inside parentheses).
+ * <pre>{@code
+ * <unary-expression> -> <primary> |
+ * ( <expression> ) |
+ * ! <unary-expression>
+ * }</pre>
+ */
+ private ExpressionNode parseUnaryExpression() throws IOException {
+ skipSpace();
+ ExpressionNode node;
+ if (c == '(') {
+ nextNonSpace();
+ node = parseExpression();
+ expect(')');
+ skipSpace();
+ return node;
+ } else if (c == '!') {
+ next();
+ node = new NotExpressionNode(parseUnaryExpression());
+ skipSpace();
+ return node;
+ } else {
+ return parsePrimary();
+ }
+ }
+
+
+ /**
+ * Parses an expression containing only literals or references.
+ * <pre>{@code
+ * <primary> -> <reference> |
+ * <string-literal> |
+ * <integer-literal> |
+ * <boolean-literal>
+ * }</pre>
+ */
+ private ExpressionNode parsePrimary() throws IOException {
+ ExpressionNode node;
+ if (c == '$') {
+ next();
+ node = parseReference();
+ } else if (c == '"') {
+ node = parseStringLiteral();
+ } else if (c == '-') {
+ // Velocity does not have a negation operator. If we see '-' it must be the start of a
+ // negative integer literal.
+ next();
+ node = parseIntLiteral("-");
+ } else if (isAsciiDigit(c)) {
+ node = parseIntLiteral("");
+ } else if (isAsciiLetter(c)) {
+ node = parseBooleanLiteral();
+ } else {
+ throw parseException("Expected an expression");
+ }
+ skipSpace();
+ return node;
+ }
+
+ private ExpressionNode parseStringLiteral() throws IOException {
+ return new ConstantExpressionNode(resourceName, lineNumber(), readStringLiteral());
+ }
+
+ private String readStringLiteral() throws IOException {
+ assert c == '"';
+ StringBuilder sb = new StringBuilder();
+ next();
+ while (c != '"') {
+ if (c == '\n' || c == EOF) {
+ throw parseException("Unterminated string constant");
+ }
+ if (c == '$' || c == '\\') {
+ // In real Velocity, you can have a $ reference expanded inside a "" string literal.
+ // There are also '' string literals where that is not so. We haven't needed that yet
+ // so it's not supported.
+ throw parseException(
+ "Escapes or references in string constants are not currently supported");
+ }
+ sb.appendCodePoint(c);
+ next();
+ }
+ next();
+ return sb.toString();
+ }
+
+ private ExpressionNode parseIntLiteral(String prefix) throws IOException {
+ StringBuilder sb = new StringBuilder(prefix);
+ while (isAsciiDigit(c)) {
+ sb.appendCodePoint(c);
+ next();
+ }
+ int value;
+ try {
+ value = Integer.parseInt(sb.toString());
+ } catch (NumberFormatException e) {
+ throw parseException("Invalid integer: " + sb);
+ }
+ return new ConstantExpressionNode(resourceName, lineNumber(), value);
+ }
+
+ /**
+ * Parses a boolean literal, either {@code true} or {@code false}.
+ * <boolean-literal> -> true |
+ * false
+ */
+ private ExpressionNode parseBooleanLiteral() throws IOException {
+ String s = parseId("Identifier without $");
+ boolean value;
+ if (s.equals("true")) {
+ value = true;
+ } else if (s.equals("false")) {
+ value = false;
+ } else {
+ throw parseException("Identifier in expression must be preceded by $ or be true or false");
+ }
+ return new ConstantExpressionNode(resourceName, lineNumber(), value);
+ }
+
+ private static final ImmutableAsciiSet ASCII_LETTER =
+ ImmutableAsciiSet.ofRange('A', 'Z')
+ .union(ImmutableAsciiSet.ofRange('a', 'z'));
+
+ private static final ImmutableAsciiSet ASCII_DIGIT =
+ ImmutableAsciiSet.ofRange('0', '9');
+
+ private static final ImmutableAsciiSet ID_CHAR =
+ ASCII_LETTER
+ .union(ASCII_DIGIT)
+ .union(ImmutableAsciiSet.of('-'))
+ .union(ImmutableAsciiSet.of('_'));
+
+ private static boolean isAsciiLetter(int c) {
+ return ASCII_LETTER.contains(c);
+ }
+
+ private static boolean isAsciiDigit(int c) {
+ return ASCII_DIGIT.contains(c);
+ }
+
+ private static boolean isIdChar(int c) {
+ return ID_CHAR.contains(c);
+ }
+
+ /**
+ * Parse an identifier as specified by the
+ * <a href="http://velocity.apache.org/engine/devel/vtl-reference-guide.html#Variables">VTL
+ * </a>. Identifiers are ASCII: starts with a letter, then letters, digits, {@code -} and
+ * {@code _}.
+ */
+ private String parseId(String what) throws IOException {
+ if (!isAsciiLetter(c)) {
+ throw parseException(what + " should start with an ASCII letter");
+ }
+ StringBuilder id = new StringBuilder();
+ while (isIdChar(c)) {
+ id.appendCodePoint(c);
+ next();
+ }
+ return id.toString();
+ }
+
+ /**
+ * Returns an exception to be thrown describing a parse error with the given message, and
+ * including information about where it occurred.
+ */
+ private ParseException parseException(String message) throws IOException {
+ StringBuilder context = new StringBuilder();
+ if (c == EOF) {
+ context.append("EOF");
+ } else {
+ int count = 0;
+ while (c != EOF && count < 20) {
+ context.appendCodePoint(c);
+ next();
+ count++;
+ }
+ if (c != EOF) {
+ context.append("...");
+ }
+ }
+ return new ParseException(message, resourceName, lineNumber(), context.toString());
+ }
+}