1 files changed, 963 insertions, 0 deletions
diff --git a/src/main/java/com/google/escapevelocity/Parser.java b/src/main/java/com/google/escapevelocity/Parser.java
new file mode 100644
index 0000000..9982be3
--- /dev/null
+++ b/src/main/java/com/google/escapevelocity/Parser.java
@@ -0,0 +1,963 @@
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.escapevelocity;
+
+import com.google.escapevelocity.DirectiveNode.SetNode;
+import com.google.escapevelocity.ExpressionNode.BinaryExpressionNode;
+import com.google.escapevelocity.ExpressionNode.NotExpressionNode;
+import com.google.escapevelocity.ReferenceNode.IndexReferenceNode;
+import com.google.escapevelocity.ReferenceNode.MemberReferenceNode;
+import com.google.escapevelocity.ReferenceNode.MethodReferenceNode;
+import com.google.escapevelocity.ReferenceNode.PlainReferenceNode;
+import com.google.escapevelocity.TokenNode.CommentTokenNode;
+import com.google.escapevelocity.TokenNode.ElseIfTokenNode;
+import com.google.escapevelocity.TokenNode.ElseTokenNode;
+import com.google.escapevelocity.TokenNode.EndTokenNode;
+import com.google.escapevelocity.TokenNode.EofNode;
+import com.google.escapevelocity.TokenNode.ForEachTokenNode;
+import com.google.escapevelocity.TokenNode.IfTokenNode;
+import com.google.escapevelocity.TokenNode.MacroDefinitionTokenNode;
+import com.google.escapevelocity.TokenNode.NestedTokenNode;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A parser that reads input from the given {@link Reader} and parses it to produce a
+ * {@link Template}.
+ *
+ * @author emcmanus@google.com (Éamonn McManus)
+ */
+class Parser {
+  private static final int EOF = -1;
+
+  private final LineNumberReader reader;
+  private final String resourceName;
+  private final Template.ResourceOpener resourceOpener;
+
+  /**
+   * The invariant of this parser is that {@code c} is always the next character of interest.
+   * This means that we never have to "unget" a character by reading too far. For example, after
+   * we parse an integer, {@code c} will be the first character after the integer, which is exactly
+   * the state we will be in when there are no more digits.
+   */
+  private int c;
+
+  Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener)
+      throws IOException {
+    this.reader = new LineNumberReader(reader);
+    this.reader.setLineNumber(1);
+    next();
+    this.resourceName = resourceName;
+    this.resourceOpener = resourceOpener;
+  }
+
+  /**
+   * Parse the input completely to produce a {@link Template}.
+   *
+   * <p>Parsing happens in two phases. First, we parse a sequence of "tokens", where tokens include
+   * entire references such as <pre>
+   *    ${x.foo()[23]}
+   * </pre>or entire directives such as<pre>
+   *    #set ($x = $y + $z)
+   * </pre>But tokens do not span complex constructs. For example,<pre>
+   *    #if ($x == $y) something #end
+   * </pre>is three tokens:<pre>
+   *    #if ($x == $y)
+   *    (literal text " something ")
+   *   #end
+   * </pre>
+   *
+   * <p>The second phase then takes the sequence of tokens and constructs a parse tree out of it.
+   * Some nodes in the parse tree will be unchanged from the token sequence, such as the <pre>
+   *    ${x.foo()[23]}
+   *    #set ($x = $y + $z)
+   * </pre> examples above. But a construct such as the {@code #if ... #end} mentioned above will
+   * become a single IfNode in the parse tree in the second phase.
+   *
+   * <p>The main reason for this approach is that Velocity has two kinds of lexical contexts. At the
+   * top level, there can be arbitrary literal text; references like <code>${x.foo()}</code>; and
+   * directives like {@code #if} or {@code #set}. Inside the parentheses of a directive, however,
+   * neither arbitrary text nor directives can appear, but expressions can, so we need to tokenize
+   * the inside of <pre>
+   *    #if ($x == $a + $b)
+   * </pre> as the five tokens "$x", "==", "$a", "+", "$b". Rather than having a classical
+   * parser/lexer combination, where the lexer would need to switch between these two modes, we
+   * replace the lexer with an ad-hoc parser that is the first phase described above, and we
+   * define a simple parser over the resultant tokens that is the second phase.
+   */
+  Template parse() throws IOException {
+    ImmutableList<Node> tokens = parseTokens();
+    return new Reparser(tokens).reparse();
+  }
+
+  private ImmutableList<Node> parseTokens() throws IOException {
+    ImmutableList.Builder<Node> tokens = ImmutableList.builder();
+    Node token;
+    do {
+      token = parseNode();
+      tokens.add(token);
+    } while (!(token instanceof EofNode));
+    return tokens.build();
+  }
+
+  private int lineNumber() {
+    return reader.getLineNumber();
+  }
+
+  /**
+   * Gets the next character from the reader and assigns it to {@code c}. If there are no more
+   * characters, sets {@code c} to {@link #EOF} if it is not already.
+   */
+  private void next() throws IOException {
+    if (c != EOF) {
+      c = reader.read();
+    }
+  }
+
+  /**
+   * If {@code c} is a space character, keeps reading until {@code c} is a non-space character or
+   * there are no more characters.
+   */
+  private void skipSpace() throws IOException {
+    while (Character.isWhitespace(c)) {
+      next();
+    }
+  }
+
+  /**
+   * Gets the next character from the reader, and if it is a space character, keeps reading until
+   * a non-space character is found.
+   */
+  private void nextNonSpace() throws IOException {
+    next();
+    skipSpace();
+  }
+
+  /**
+   * Skips any space in the reader, and then throws an exception if the first non-space character
+   * found is not the expected one. Sets {@code c} to the first character after that expected one.
+   */
+  private void expect(char expected) throws IOException {
+    skipSpace();
+    if (c == expected) {
+      next();
+    } else {
+      throw parseException("Expected " + expected);
+    }
+  }
+
+  /**
+   * Parses a single node from the reader, as part of the first parsing phase.
+   * <pre>{@code
+   * <template> -> <empty> |
+   *               <directive> <template> |
+   *               <non-directive> <template>
+   * }</pre>
+   */
+  private Node parseNode() throws IOException {
+    if (c == '#') {
+      next();
+      if (c == '#') {
+        return parseComment();
+      } else if (isAsciiLetter(c) || c == '{') {
+        return parseDirective();
+      } else if (c == '[') {
+        return parseHashSquare();
+      } else {
+        // For consistency with Velocity, we treat # not followed by # or a letter as a plain
+        // character, and we treat #$foo as a literal # followed by the reference $foo.
+        // But the # is its own ConstantExpressionNode; we don't try to merge it with adjacent text.
+        return new ConstantExpressionNode(resourceName, lineNumber(), "#");
+      }
+    }
+    if (c == EOF) {
+      return new EofNode(resourceName, lineNumber());
+    }
+    return parseNonDirective();
+  }
+
+  private Node parseHashSquare() throws IOException {
+    // We've just seen #[ which might be the start of a #[[quoted block]]#. If the next character
+    // is not another [ then it's not a quoted block, but it *is* a literal #[ followed by whatever
+    // that next character is.
+    assert c == '[';
+    next();
+    if (c != '[') {
+      return new ConstantExpressionNode(resourceName, lineNumber(), "#[");
+    }
+    next();
+    StringBuilder sb = new StringBuilder();
+    while (true) {
+      if (c == EOF) {
+        throw parseException("Unterminated #[[ - did not see matching ]]#");
+      }
+      if (c == '#') {
+        // This might be the last character of ]]# or it might just be a random #.
+        int len = sb.length();
+        if (len > 1 && sb.charAt(len - 1) == ']' && sb.charAt(len - 2) == ']') {
+          next();
+          break;
+        }
+      }
+      sb.append((char) c);
+      next();
+    }
+    String quoted = sb.substring(0, sb.length() - 2);
+    return new ConstantExpressionNode(resourceName, lineNumber(), quoted);
+  }
+
+  /**
+   * Parses a single non-directive node from the reader.
+   * <pre>{@code
+   * <non-directive> -> <reference> |
+   *                    <text containing neither $ nor #>
+   * }</pre>
+   */
+  private Node parseNonDirective() throws IOException {
+    if (c == '$') {
+      next();
+      if (isAsciiLetter(c) || c == '{') {
+        return parseReference();
+      } else {
+        return parsePlainText('$');
+      }
+    } else {
+      int firstChar = c;
+      next();
+      return parsePlainText(firstChar);
+    }
+  }
+
+  /**
+   * Parses a single directive token from the reader. Directives can be spelled with or without
+   * braces, for example {@code #if} or {@code #{if}}. We omit the brace spelling in the productions
+   * here: <pre>{@code
+   * <directive> -> <if-token> |
+   *                <else-token> |
+   *                <elseif-token> |
+   *                <end-token> |
+   *                <foreach-token> |
+   *                <set-token> |
+   *                <parse-token> |
+   *                <macro-token> |
+   *                <macro-call> |
+   *                <comment>
+   * }</pre>
+   */
+  private Node parseDirective() throws IOException {
+    String directive;
+    if (c == '{') {
+      next();
+      directive = parseId("Directive inside #{...}");
+      expect('}');
+    } else {
+      directive = parseId("Directive");
+    }
+    Node node;
+    switch (directive) {
+      case "end":
+        node = new EndTokenNode(resourceName, lineNumber());
+        break;
+      case "if":
+      case "elseif":
+        node = parseIfOrElseIf(directive);
+        break;
+      case "else":
+        node = new ElseTokenNode(resourceName, lineNumber());
+        break;
+      case "foreach":
+        node = parseForEach();
+        break;
+      case "set":
+        node = parseSet();
+        break;
+      case "parse":
+        node = parseParse();
+        break;
+      case "macro":
+        node = parseMacroDefinition();
+        break;
+      default:
+        node = parsePossibleMacroCall(directive);
+    }
+    // Velocity skips a newline after any directive.
+    // TODO(emcmanus): in fact it also skips space before the newline, which should be implemented.
+    if (c == '\n') {
+      next();
+    }
+    return node;
+  }
+
+  /**
+   * Parses the condition following {@code #if} or {@code #elseif}.
+   * <pre>{@code
+   * <if-token> -> #if ( <condition> )
+   * <elseif-token> -> #elseif ( <condition> )
+   * }</pre>
+   *
+   * @param directive either {@code "if"} or {@code "elseif"}.
+   */
+  private Node parseIfOrElseIf(String directive) throws IOException {
+    expect('(');
+    ExpressionNode condition = parseExpression();
+    expect(')');
+    return directive.equals("if") ? new IfTokenNode(condition) : new ElseIfTokenNode(condition);
+  }
+
+  /**
+   * Parses a {@code #foreach} token from the reader. <pre>{@code
+   * <foreach-token> -> #foreach ( $<id> in <expression> )
+   * }</pre>
+   */
+  private Node parseForEach() throws IOException {
+    expect('(');
+    expect('$');
+    String var = parseId("For-each variable");
+    skipSpace();
+    boolean bad = false;
+    if (c != 'i') {
+      bad = true;
+    } else {
+      next();
+      if (c != 'n') {
+        bad = true;
+      }
+    }
+    if (bad) {
+      throw parseException("Expected 'in' for #foreach");
+    }
+    next();
+    ExpressionNode collection = parseExpression();
+    expect(')');
+    return new ForEachTokenNode(var, collection);
+  }
+
+  /**
+   * Parses a {@code #set} token from the reader. <pre>{@code
+   * <set-token> -> #set ( $<id> = <expression>)
+   * }</pre>
+   */
+  private Node parseSet() throws IOException {
+    expect('(');
+    expect('$');
+    String var = parseId("#set variable");
+    expect('=');
+    ExpressionNode expression = parseExpression();
+    expect(')');
+    return new SetNode(var, expression);
+  }
+
+  /**
+   * Parses a {@code #parse} token from the reader. <pre>{@code
+   * <parse-token> -> #parse ( <string-literal> )
+   * }</pre>
+   *
+   * <p>The way this works is inconsistent with Velocity. In Velocity, the {@code #parse} directive
+   * is evaluated when it is encountered during template evaluation. That means that the argument
+   * can be a variable, and it also means that you can use {@code #if} to choose whether or not
+   * to do the {@code #parse}. Neither of those is true in EscapeVelocity. The contents of the
+   * {@code #parse} are integrated into the containing template pretty much as if they had been
+   * written inline. That also means that EscapeVelocity allows forward references to macros
+   * inside {@code #parse} directives, which Velocity does not.
+   */
+  private Node parseParse() throws IOException {
+    expect('(');
+    skipSpace();
+    if (c != '"') {
+      throw parseException("#parse only supported with string literal argument");
+    }
+    String nestedResourceName = readStringLiteral();
+    expect(')');
+    try (Reader nestedReader = resourceOpener.openResource(nestedResourceName)) {
+      Parser nestedParser = new Parser(nestedReader, nestedResourceName, resourceOpener);
+      ImmutableList<Node> nestedTokens = nestedParser.parseTokens();
+      return new NestedTokenNode(nestedResourceName, nestedTokens);
+    }
+  }
+
+  /**
+   * Parses a {@code #macro} token from the reader. <pre>{@code
+   * <macro-token> -> #macro ( <id> <macro-parameter-list> )
+   * <macro-parameter-list> -> <empty> |
+   *                           $<id> <macro-parameter-list>
+   * }</pre>
+   *
+   * <p>Macro parameters are not separated by commas, though method-reference parameters are.
+   */
+  private Node parseMacroDefinition() throws IOException {
+    expect('(');
+    skipSpace();
+    String name = parseId("Macro name");
+    ImmutableList.Builder<String> parameterNames = ImmutableList.builder();
+    while (true) {
+      skipSpace();
+      if (c == ')') {
+        next();
+        break;
+      }
+      if (c != '$') {
+        throw parseException("Macro parameters should look like $name");
+      }
+      next();
+      parameterNames.add(parseId("Macro parameter name"));
+    }
+    return new MacroDefinitionTokenNode(resourceName, lineNumber(), name, parameterNames.build());
+  }
+
+  /**
+   * Parses an identifier after {@code #} that is not one of the standard directives. The assumption
+   * is that it is a call of a macro that is defined in the template. Macro definitions are
+   * extracted from the template during the second parsing phase (and not during evaluation of the
+   * template as you might expect). This means that a macro can be called before it is defined.
+   * <pre>{@code
+   * <macro-call> -> # <id> ( <expression-list> )
+   * <expression-list> -> <empty> |
+   *                      <expression> <optional-comma> <expression-list>
+   * <optional-comma> -> <empty> | ,
+   * }</pre>
+   */
+  private Node parsePossibleMacroCall(String directive) throws IOException {
+    skipSpace();
+    if (c != '(') {
+      throw parseException("Unrecognized directive #" + directive);
+    }
+    next();
+    ImmutableList.Builder<Node> parameterNodes = ImmutableList.builder();
+    while (true) {
+      skipSpace();
+      if (c == ')') {
+        next();
+        break;
+      }
+      parameterNodes.add(parsePrimary());
+      if (c == ',') {
+        // The documentation doesn't say so, but you can apparently have an optional comma in
+        // macro calls.
+        next();
+      }
+    }
+    return new DirectiveNode.MacroCallNode(
+        resourceName, lineNumber(), directive, parameterNodes.build());
+  }
+
+  /**
+   * Parses and discards a comment, which is {@code ##} followed by any number of characters up to
+   * and including the next newline.
+   */
+  private Node parseComment() throws IOException {
+    int lineNumber = lineNumber();
+    while (c != '\n' && c != EOF) {
+      next();
+    }
+    next();
+    return new CommentTokenNode(resourceName, lineNumber);
+  }
+
+  /**
+   * Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given
+   * {@code firstChar} is the first character of the plain text, and {@link #c} is the second
+   * (if the plain text is more than one character).
+   */
+  private Node parsePlainText(int firstChar) throws IOException {
+    StringBuilder sb = new StringBuilder();
+    sb.appendCodePoint(firstChar);
+
+    literal:
+    while (true) {
+      switch (c) {
+        case EOF:
+        case '$':
+        case '#':
+          break literal;
+        default:
+          // Just some random character.
+      }
+      sb.appendCodePoint(c);
+      next();
+    }
+    return new ConstantExpressionNode(resourceName, lineNumber(), sb.toString());
+  }
+
+  /**
+   * Parses a reference, which is everything that can start with a {@code $}. References can
+   * optionally be enclosed in braces, so {@code $x} and {@code ${x}} are the same. Braces are
+   * useful when text after the reference would otherwise be parsed as part of it. For example,
+   * {@code ${x}y} is a reference to the variable {@code $x}, followed by the plain text {@code y}.
+   * Of course {@code $xy} would be a reference to the variable {@code $xy}.
+   * <pre>{@code
+   * <reference> -> $<reference-no-brace> |
+   *                ${<reference-no-brace>}
+   * }</pre>
+   *
+   * <p>On entry to this method, {@link #c} is the character immediately after the {@code $}.
+   */
+  private ReferenceNode parseReference() throws IOException {
+    if (c == '{') {
+      next();
+      ReferenceNode node = parseReferenceNoBrace();
+      expect('}');
+      return node;
+    } else {
+      return parseReferenceNoBrace();
+    }
+  }
+
+  /**
+   * Parses a reference, in the simple form without braces.
+   * <pre>{@code
+   * <reference-no-brace> -> <id><reference-suffix>
+   * }</pre>
+   */
+  private ReferenceNode parseReferenceNoBrace() throws IOException {
+    String id = parseId("Reference");
+    ReferenceNode lhs = new PlainReferenceNode(resourceName, lineNumber(), id);
+    return parseReferenceSuffix(lhs);
+  }
+
+  /**
+   * Parses the modifiers that can appear at the tail of a reference.
+   * <pre>{@code
+   * <reference-suffix> -> <empty> |
+   *                       <reference-member> |
+   *                       <reference-index>
+   * }</pre>
+   *
+   * @param lhs the reference node representing the first part of the reference
+   * {@code $x} in {@code $x.foo} or {@code $x.foo()}, or later {@code $x.y} in {@code $x.y.z}.
+   */
+  private ReferenceNode parseReferenceSuffix(ReferenceNode lhs) throws IOException {
+    switch (c) {
+      case '.':
+        return parseReferenceMember(lhs);
+      case '[':
+        return parseReferenceIndex(lhs);
+      default:
+        return lhs;
+    }
+  }
+
+  /**
+   * Parses a reference member, which is either a property reference like {@code $x.y} or a method
+   * call like {@code $x.y($z)}.
+   * <pre>{@code
+   * <reference-member> -> .<id><reference-property-or-method><reference-suffix>
+   * <reference-property-or-method> -> <id> |
+   *                                   <id> ( <method-parameter-list> )
+   * }</pre>
+   *
+   * @param lhs the reference node representing what appears to the left of the dot, like the
+   *     {@code $x} in {@code $x.foo} or {@code $x.foo()}.
+   */
+  private ReferenceNode parseReferenceMember(ReferenceNode lhs) throws IOException {
+    assert c == '.';
+    next();
+    String id = parseId("Member");
+    ReferenceNode reference;
+    if (c == '(') {
+      reference = parseReferenceMethodParams(lhs, id);
+    } else {
+      reference = new MemberReferenceNode(lhs, id);
+    }
+    return parseReferenceSuffix(reference);
+  }
+
+  /**
+   * Parses the parameters to a method reference, like {@code $foo.bar($a, $b)}.
+   * <pre>{@code
+   * <method-parameter-list> -> <empty> |
+   *                            <non-empty-method-parameter-list>
+   * <non-empty-method-parameter-list> -> <expression> |
+   *                                      <expression> , <non-empty-method-parameter-list>
+   * }</pre>
+   *
+   * @param lhs the reference node representing what appears to the left of the dot, like the
+   *     {@code $x} in {@code $x.foo()}.
+   */
+  private ReferenceNode parseReferenceMethodParams(ReferenceNode lhs, String id)
+      throws IOException {
+    assert c == '(';
+    nextNonSpace();
+    ImmutableList.Builder<ExpressionNode> args = ImmutableList.builder();
+    if (c != ')') {
+      args.add(parseExpression());
+      while (c == ',') {
+        nextNonSpace();
+        args.add(parseExpression());
+      }
+      if (c != ')') {
+        throw parseException("Expected )");
+      }
+    }
+    assert c == ')';
+    next();
+    return new MethodReferenceNode(lhs, id, args.build());
+  }
+
+  /**
+   * Parses an index suffix to a method, like {@code $x[$i]}.
+   * <pre>{@code
+   * <reference-index> -> [ <expression> ]
+   * }</pre>
+   *
+   * @param lhs the reference node representing what appears to the left of the dot, like the
+   *     {@code $x} in {@code $x[$i]}.
+   */
+  private ReferenceNode parseReferenceIndex(ReferenceNode lhs) throws IOException {
+    assert c == '[';
+    next();
+    ExpressionNode index = parseExpression();
+    if (c != ']') {
+      throw parseException("Expected ]");
+    }
+    next();
+    ReferenceNode reference = new IndexReferenceNode(lhs, index);
+    return parseReferenceSuffix(reference);
+  }
+
+  enum Operator {
+    /**
+     * A dummy operator with low precedence. When parsing subexpressions, we always stop when we
+     * reach an operator of lower precedence than the "current precedence". For example, when
+     * parsing {@code 1 + 2 * 3 + 4}, we'll stop parsing the subexpression {@code * 3 + 4} when
+     * we reach the {@code +} because it has lower precedence than {@code *}. This dummy operator,
+     * then, behaves like {@code +} when the minimum precedence is {@code *}. We also return it
+     * if we're looking for an operator and don't find one. If this operator is {@code ⊙}, it's as
+     * if our expressions are bracketed with it, like {@code ⊙ 1 + 2 * 3 + 4 ⊙}.
+     */
+    STOP("", 0),
+
+    // If a one-character operator is a prefix of a two-character operator, like < and <=, then
+    // the one-character operator must come first.
+    OR("||", 1),
+    AND("&&", 2),
+    EQUAL("==", 3), NOT_EQUAL("!=", 3),
+    LESS("<", 4), LESS_OR_EQUAL("<=", 4), GREATER(">", 4), GREATER_OR_EQUAL(">=", 4),
+    PLUS("+", 5), MINUS("-", 5),
+    TIMES("*", 6), DIVIDE("/", 6), REMAINDER("%", 6);
+
+    final String symbol;
+    final int precedence;
+
+    Operator(String symbol, int precedence) {
+      this.symbol = symbol;
+      this.precedence = precedence;
+    }
+
+    @Override
+    public String toString() {
+      return symbol;
+    }
+  }
+
+  /**
+   * Maps a code point to the operators that begin with that code point. For example, maps
+   * {@code <} to {@code LESS} and {@code LESS_OR_EQUAL}.
+   */
+  private static final Map<Integer, List<Operator>> CODE_POINT_TO_OPERATORS;
+  static {
+    Map<Integer, List<Operator>> map = new HashMap<>();
+    for (Operator operator : Operator.values()) {
+      if (operator != Operator.STOP) {
+        Integer key = operator.symbol.codePointAt(0);
+        if (!map.containsKey(key)) {
+          map.put(key, new ArrayList<Operator>());
+        }
+        map.get(key).add(operator);
+      }
+    }
+    CODE_POINT_TO_OPERATORS = Collections.unmodifiableMap(map);
+  }
+
+  /**
+   * Parses an expression, which can occur within a directive like {@code #if} or {@code #set},
+   * or within a reference like {@code $x[$a + $b]} or {@code $x.m($a + $b)}.
+   * <pre>{@code
+   * <expression> -> <and-expression> |
+   *                 <expression> || <and-expression>
+   * <and-expression> -> <relational-expression> |
+   *                     <and-expression> && <relational-expression>
+   * <equality-exression> -> <relational-expression> |
+   *                         <equality-expression> <equality-op> <relational-expression>
+   * <equality-op> -> == | !=
+   * <relational-expression> -> <additive-expression> |
+   *                            <relational-expression> <relation> <additive-expression>
+   * <relation> -> < | <= | > | >=
+   * <additive-expression> -> <multiplicative-expression> |
+   *                          <additive-expression> <add-op> <multiplicative-expression>
+   * <add-op> -> + | -
+   * <multiplicative-expression> -> <unary-expression> |
+   *                                <multiplicative-expression> <mult-op> <unary-expression>
+   * <mult-op> -> * | / | %
+   * }</pre>
+   */
+  private ExpressionNode parseExpression() throws IOException {
+    ExpressionNode lhs = parseUnaryExpression();
+    return new OperatorParser().parse(lhs, 1);
+  }
+
+  /**
+   * An operator-precedence parser for the binary operations we understand. It implements an
+   * <a href="http://en.wikipedia.org/wiki/Operator-precedence_parser">algorithm</a> from Wikipedia
+   * that uses recursion rather than having an explicit stack of operators and values.
+   */
+  private class OperatorParser {
+    /**
+     * The operator we have just scanned, in the same way that {@link #c} is the character we have
+     * just read. If we were not able to scan an operator, this will be {@link Operator#STOP}.
+     */
+    private Operator currentOperator;
+
+    OperatorParser() throws IOException {
+      nextOperator();
+    }
+
+    /**
+     * Parse a subexpression whose left-hand side is {@code lhs} and where we only consider
+     * operators with precedence at least {@code minPrecedence}.
+     *
+     * @return the parsed subexpression
+     */
+    ExpressionNode parse(ExpressionNode lhs, int minPrecedence) throws IOException {
+      while (currentOperator.precedence >= minPrecedence) {
+        Operator operator = currentOperator;
+        ExpressionNode rhs = parseUnaryExpression();
+        nextOperator();
+        while (currentOperator.precedence > operator.precedence) {
+          rhs = parse(rhs, currentOperator.precedence);
+        }
+        lhs = new BinaryExpressionNode(lhs, operator, rhs);
+      }
+      return lhs;
+    }
+
+    /**
+     * Updates {@link #currentOperator} to be an operator read from the input,
+     * or {@link Operator#STOP} if there is none.
+     */
+    private void nextOperator() throws IOException {
+      skipSpace();
+      List<Operator> possibleOperators = CODE_POINT_TO_OPERATORS.get(c);
+      if (possibleOperators == null) {
+        currentOperator = Operator.STOP;
+        return;
+      }
+      int firstChar = c;
+      next();
+      Operator operator = null;
+      for (Operator possibleOperator : possibleOperators) {
+        if (possibleOperator.symbol.length() == 1) {
+          assert operator == null;
+          operator = possibleOperator;
+        } else if (possibleOperator.symbol.charAt(1) == c) {
+          next();
+          operator = possibleOperator;
+        }
+      }
+      if (operator == null) {
+        throw parseException("Expected " + possibleOperators.get(0) + ", not just " + firstChar);
+      }
+      currentOperator = operator;
+    }
+  }
+
+  /**
+   * Parses an expression not containing any operators (except inside parentheses).
+   * <pre>{@code
+   * <unary-expression> -> <primary> |
+   *                       ( <expression> ) |
+   *                       ! <unary-expression>
+   * }</pre>
+   */
+  private ExpressionNode parseUnaryExpression() throws IOException {
+    skipSpace();
+    ExpressionNode node;
+    if (c == '(') {
+      nextNonSpace();
+      node = parseExpression();
+      expect(')');
+      skipSpace();
+      return node;
+    } else if (c == '!') {
+      next();
+      node = new NotExpressionNode(parseUnaryExpression());
+      skipSpace();
+      return node;
+    } else {
+      return parsePrimary();
+    }
+  }
+
+
+  /**
+   * Parses an expression containing only literals or references.
+   * <pre>{@code
+   * <primary> -> <reference> |
+   *              <string-literal> |
+   *              <integer-literal> |
+   *              <boolean-literal>
+   * }</pre>
+   */
+  private ExpressionNode parsePrimary() throws IOException {
+    ExpressionNode node;
+    if (c == '$') {
+      next();
+      node = parseReference();
+    } else if (c == '"') {
+      node = parseStringLiteral();
+    } else if (c == '-') {
+      // Velocity does not have a negation operator. If we see '-' it must be the start of a
+      // negative integer literal.
+      next();
+      node = parseIntLiteral("-");
+    } else if (isAsciiDigit(c)) {
+      node = parseIntLiteral("");
+    } else if (isAsciiLetter(c)) {
+      node = parseBooleanLiteral();
+    } else {
+      throw parseException("Expected an expression");
+    }
+    skipSpace();
+    return node;
+  }
+
+  private ExpressionNode parseStringLiteral() throws IOException {
+    return new ConstantExpressionNode(resourceName, lineNumber(), readStringLiteral());
+  }
+
+  private String readStringLiteral() throws IOException {
+    assert c == '"';
+    StringBuilder sb = new StringBuilder();
+    next();
+    while (c != '"') {
+      if (c == '\n' || c == EOF) {
+        throw parseException("Unterminated string constant");
+      }
+      if (c == '$' || c == '\\') {
+        // In real Velocity, you can have a $ reference expanded inside a "" string literal.
+        // There are also '' string literals where that is not so. We haven't needed that yet
+        // so it's not supported.
+        throw parseException(
+            "Escapes or references in string constants are not currently supported");
+      }
+      sb.appendCodePoint(c);
+      next();
+    }
+    next();
+    return sb.toString();
+  }
+
+  private ExpressionNode parseIntLiteral(String prefix) throws IOException {
+    StringBuilder sb = new StringBuilder(prefix);
+    while (isAsciiDigit(c)) {
+      sb.appendCodePoint(c);
+      next();
+    }
+    int value;
+    try {
+      value = Integer.parseInt(sb.toString());
+    } catch (NumberFormatException e) {
+      throw parseException("Invalid integer: " + sb);
+    }
+    return new ConstantExpressionNode(resourceName, lineNumber(), value);
+  }
+
+  /**
+   * Parses a boolean literal, either {@code true} or {@code false}.
+   * <boolean-literal> -> true |
+   *                      false
+   */
+  private ExpressionNode parseBooleanLiteral() throws IOException {
+    String s = parseId("Identifier without $");
+    boolean value;
+    if (s.equals("true")) {
+      value = true;
+    } else if (s.equals("false")) {
+      value = false;
+    } else {
+      throw parseException("Identifier in expression must be preceded by $ or be true or false");
+    }
+    return new ConstantExpressionNode(resourceName, lineNumber(), value);
+  }
+
+  private static final ImmutableAsciiSet ASCII_LETTER =
+      ImmutableAsciiSet.ofRange('A', 'Z')
+          .union(ImmutableAsciiSet.ofRange('a', 'z'));
+
+  private static final ImmutableAsciiSet ASCII_DIGIT =
+      ImmutableAsciiSet.ofRange('0', '9');
+
+  private static final ImmutableAsciiSet ID_CHAR =
+      ASCII_LETTER
+          .union(ASCII_DIGIT)
+          .union(ImmutableAsciiSet.of('-'))
+          .union(ImmutableAsciiSet.of('_'));
+
+  private static boolean isAsciiLetter(int c) {
+    return ASCII_LETTER.contains(c);
+  }
+
+  private static boolean isAsciiDigit(int c) {
+    return ASCII_DIGIT.contains(c);
+  }
+
+  private static boolean isIdChar(int c) {
+    return ID_CHAR.contains(c);
+  }
+
+  /**
+   * Parse an identifier as specified by the
+   * <a href="http://velocity.apache.org/engine/devel/vtl-reference-guide.html#Variables">VTL
+   * </a>. Identifiers are ASCII: starts with a letter, then letters, digits, {@code -} and
+   * {@code _}.
+   */
+  private String parseId(String what) throws IOException {
+    if (!isAsciiLetter(c)) {
+      throw parseException(what + " should start with an ASCII letter");
+    }
+    StringBuilder id = new StringBuilder();
+    while (isIdChar(c)) {
+      id.appendCodePoint(c);
+      next();
+    }
+    return id.toString();
+  }
+
+  /**
+   * Returns an exception to be thrown describing a parse error with the given message, and
+   * including information about where it occurred.
+   */
+  private ParseException parseException(String message) throws IOException {
+    StringBuilder context = new StringBuilder();
+    if (c == EOF) {
+      context.append("EOF");
+    } else {
+      int count = 0;
+      while (c != EOF && count < 20) {
+        context.appendCodePoint(c);
+        next();
+        count++;
+      }
+      if (c != EOF) {
+        context.append("...");
+      }
+    }
+    return new ParseException(message, resourceName, lineNumber(), context.toString());
+  }
+}