diff options
Diffstat (limited to 'velocity-engine-core/src/main/parser/Parser.jjt')
-rw-r--r-- | velocity-engine-core/src/main/parser/Parser.jjt | 2642 |
1 files changed, 2642 insertions, 0 deletions
diff --git a/velocity-engine-core/src/main/parser/Parser.jjt b/velocity-engine-core/src/main/parser/Parser.jjt new file mode 100644 index 00000000..593d044b --- /dev/null +++ b/velocity-engine-core/src/main/parser/Parser.jjt @@ -0,0 +1,2642 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * NOTE : please see documentation at bottom of this file. (It was placed there its tiring + * to always have to page past it... :) + */ +options +{ + /** The default package for this parser kit. This is now done from Maven. + NODE_PACKAGE="org.apache.velocity.runtime.parser"; + */ + + /** A source file will be generated for each non-terminal */ + MULTI=true; + + /** + * Each node will have access to the parser, I did this so + * some global information can be shared via the parser. I + * think this will come in handly keeping track of + * context, and being able to push changes back into + * the context when nodes make modifications to the + * context by setting properties, variables and + * what not. + */ + NODE_USES_PARSER=true; + + /** + * The parser must be non-static in order for the + * above option to work, otherwise the parser value + * is passed in as null, which isn't all the useful ;) + */ + STATIC=false; + + /** + * Enables the use of a visitor that each of nodes + * will accept. This way we can separate the logic + * of node processing in a visitor and out of the + * nodes themselves. If processing changes then + * the nothing has to change in the node code. + */ + VISITOR=true; + + /** + * Declare that we are accepting unicode input and + * that we are using a custom character stream class + * Note that the char stream class is really a slightly + * modified ASCII_CharStream, as it appears we are safe + * because we only deal with pre-encoding-converted + * Readers rather than raw input streams. + */ + UNICODE_INPUT=true; + USER_CHAR_STREAM=true; + + /** + * for debugging purposes. Those are now handled from within javacc-maven-plugin debugging flags in pom.xml + DEBUG_PARSER = true; + DEBUG_LOOKAHEAD = true; + DEBUG_TOKEN_MANAGER = true; + */ + + +} + +PARSER_BEGIN(${parser.basename}Parser) +package ${parser.package}; + +import java.io.*; +import java.util.*; +import org.apache.velocity.Template; +import org.apache.velocity.exception.VelocityException; +import org.apache.velocity.runtime.RuntimeServices; +import org.apache.velocity.runtime.parser.*; +import org.apache.velocity.runtime.parser.node.*; +import org.apache.velocity.runtime.directive.*; +import org.apache.velocity.runtime.directive.MacroParseException; +import org.apache.velocity.runtime.RuntimeConstants; +import static org.apache.velocity.runtime.RuntimeConstants.SpaceGobbling; + +import org.slf4j.Logger; + +/** + * This class is responsible for parsing a Velocity + * template. This class was generated by JavaCC using + * the JJTree extension to produce an Abstract + * Syntax Tree (AST) of the template. + * + * Please look at the Parser.jjt file which is + * what controls the generation of this class. + * + * @author <a href="mailto:jvanzyl@apache.org">Jason van Zyl</a> + * @author <a href="mailto:geirm@optonline.net">Geir Magnusson Jr.</a> + * @author <a href="hps@intermeta.de">Henning P. Schmiedehausen</a> + * @version $Id$ +*/ +public class ${parser.basename}Parser implements Parser +{ + /** + * Parser debugging flag. + * When debug is active, javacc Parser will contain (among other things) + * a trace_call() method. So we use the presence of this method to + * initialize our flag. + */ + private static boolean debugParser; + static + { + try + { + ${parser.basename}Parser.class.getDeclaredMethod("trace_call", String.class); + debugParser = true; + } + catch(NoSuchMethodException nsfe) + { + debugParser = false; + } + } + + /** + * Our own trace method. Use sparsingly in production, since each + * and every call will introduce an execution branch and slow down parsing. + */ + public static void trace(String message) + { + if (debugParser) System.out.println(message); + } + + /** + * Keep track of defined macros, used for escape processing + */ + private Map macroNames = new HashMap(); + + /** + * Current template we are parsing. Passed to us in parse() + */ + public Template currentTemplate = null; + + /** + * Set to true if the property + * RuntimeConstants.RUNTIME_REFERENCES_STRICT_ESCAPE is set to true + */ + public boolean strictEscape = false; + + /** + * Set to true if the propoerty + * RuntimeConstants.PARSER_HYPHEN_ALLOWED is set to true + */ + public boolean hyphenAllowedInIdentifiers = false; + + VelocityCharStream velcharstream = null; + + private RuntimeServices rsvc = null; + + @Override + public RuntimeServices getRuntimeServices() + { + return rsvc; + } + + private Logger log = null; + + /** + * This constructor was added to allow the re-use of parsers. + * The normal constructor takes a single argument which + * an InputStream. This simply creates a re-usable parser + * object, we satisfy the requirement of an InputStream + * by using a newline character as an input stream. + */ + public ${parser.basename}Parser( RuntimeServices rs) + { + /* + * need to call the CTOR first thing. + */ + + this( new VelocityCharStream( + new ByteArrayInputStream("\n".getBytes()), 1, 1 )); + + /* + * then initialize logger + */ + + log = rs.getLog("parser"); + + + /* + * now setup a VCS for later use + */ + velcharstream = new VelocityCharStream( + new ByteArrayInputStream("\n".getBytes()), 1, 1 ); + + + strictEscape = + rs.getBoolean(RuntimeConstants.RUNTIME_REFERENCES_STRICT_ESCAPE, false); + + hyphenAllowedInIdentifiers = + rs.getBoolean(RuntimeConstants.PARSER_HYPHEN_ALLOWED, false); + + /* + * and save the RuntimeServices + */ + rsvc = rs; + + /* + * then initialize customizable characters + */ + dollar = '${parser.char.dollar}'; + hash = '${parser.char.hash}'; + at = '${parser.char.at}'; + asterisk = '${parser.char.asterisk}'; + } + + /** + * This was also added to allow parsers to be + * re-usable. Normal JavaCC use entails passing an + * input stream to the constructor and the parsing + * process is carried out once. We want to be able + * to re-use parsers: we do this by adding this + * method and re-initializing the lexer with + * the new stream that we want parsed. + */ + @Override + public SimpleNode parse( Reader reader, Template template ) + throws ParseException + { + SimpleNode sn = null; + + currentTemplate = template; + + try + { + token_source.clearStateVars(); + + /* + * reinitialize the VelocityCharStream + * with the new reader + */ + velcharstream.ReInit( reader, 1, 1 ); + + /* + * now reinit the Parser with this CharStream + */ + ReInit( velcharstream ); + + /* + * do that voodoo... + */ + sn = process(); + } + catch (MacroParseException mee) + { + /* + * thrown by the Macro class when something is amiss in the + * Macro specification + */ + log.error("{}: {}", template.getName(), mee.getMessage(), mee); + throw mee; + } + catch (ParseException pe) + { + log.error("{}: {}", currentTemplate.getName(), pe.getMessage()); + throw new TemplateParseException (pe.currentToken, + pe.expectedTokenSequences, pe.tokenImage, currentTemplate.getName()); + } + catch (TokenMgrError tme) + { + throw new ParseException("Lexical error: " + tme.toString()); + } + catch (Exception e) + { + String msg = template.getName() + ": " + e.getMessage(); + log.error(msg, e); + throw new VelocityException(msg, e, getRuntimeServices().getLogContext().getStackTrace()); + } + + currentTemplate = null; + + return sn; + } + + /** + * This method gets a Directive from the directives Hashtable + */ + @Override + public Directive getDirective(String directive) + { + return (Directive) rsvc.getDirective(directive); + } + + /** + * This method finds out of the directive exists in the directives Map. + */ + @Override + public boolean isDirective(String directive) + { + return rsvc.getDirective(directive) != null; + } + + + /** + * Produces a processed output for an escaped control or + * pluggable directive + */ + private String escapedDirective( String strImage ) + { + int iLast = strImage.lastIndexOf("\\"); + + String strDirective = strImage.substring(iLast + 1); + + boolean bRecognizedDirective = false; + + // we don't have to call substring method all the time in this method + String dirTag = strDirective.substring(1); + if (dirTag.charAt(0) == '{') + { + dirTag = dirTag.substring(1, dirTag.length() - 1); + } + + /* + * If this is a predefined derective or if we detect + * a macro definition (this is aproximate at best) then + * we absorb the forward slash. If in strict reference + * mode then we always absord the forward slash regardless + * if the derective is defined or not. + */ + + if (strictEscape + || isDirective(dirTag) + || macroNames.containsKey(dirTag) + || rsvc.isVelocimacro(dirTag, currentTemplate)) + { + bRecognizedDirective = true; + } + else + { + /* order for speed? */ + + if ( dirTag.equals("if") + || dirTag.equals("end") + || dirTag.equals("set") + || dirTag.equals("else") + || dirTag.equals("elseif") + ) + { + bRecognizedDirective = true; + } + } + + /* + * if so, make the proper prefix string (let the escapes do their thing..) + * otherwise, just return what it is.. + */ + + if (bRecognizedDirective) + return ( strImage.substring(0,iLast/2) + strDirective); + else + return ( strImage ); + } + + /** + * Check whether there is a left parenthesis with leading optional + * whitespaces. This method is used in the semantic look ahead of + * Directive method. This is done in code instead of as a production + * for simplicity and efficiency. + */ + private boolean isLeftParenthesis() + { + char c; + int no = 0; + try { + while(true) + { + /** + * Read a character + */ + c = velcharstream.readChar(); + no++; + if (c == '(') + { + return true; + } + /** + * if not a white space return + */ + else if (c != ' ' && c != '\n' && c != '\r' && c != '\t') + { + return false; + } + } + } + catch(IOException e) + { + } + finally + { + /** + * Backup the stream to the initial state + */ + velcharstream.backup(no); + } + return false; + } + + /** + * Check whether there is a right parenthesis with leading optional + * whitespaces. This method is used in the semantic look ahead of + * Directive method. This is done in code instead of as a production + * for simplicity and efficiency. + */ + private boolean isRightParenthesis() + { + char c; + int no = -1; + try { + while(true) + { + /** + * Read a character + */ + if (no == -1) + { + switch (getToken(1).kind) + { + case RPAREN: + return true; + case WHITESPACE: + case NEWLINE: + no = 0; + break; + default: + return false; + } + } + c = velcharstream.readChar(); + no++; + if (c == ')') + { + return true; + } + /** + * if not a white space return + */ + else if (c != ' ' && c != '\n' && c != '\r' && c != '\t') + { + return false; + } + } + } + catch(IOException e) + { + } + finally + { + /** + * Backup the stream to the initial state + */ + if (no > 0) velcharstream.backup(no); + } + return false; + } + + /** + * We use this method in a lookahead to determine if we are in a macro + * default value assignment. The standard lookahead is not smart enough. + * here we look for the equals after the reference. + */ + private boolean isAssignment() + { + // Basically if the last character read was not '$' then false + if (token_source.getCurrentLexicalState() != REFERENCE) return false; + + char c = ' '; + int backup = 0; + try + { + // Read through any white space + while(Character.isWhitespace(c)) + { + c = velcharstream.readChar(); + backup++; + } + + // This is what we are ultimately looking for + if (c != '=') return false; + } + catch (IOException e) + { + } + finally + { + velcharstream.backup(backup); + } + + return true; + } + + @Override + public Template getCurrentTemplate() + { + return currentTemplate; + } + + @Override + public void resetCurrentTemplate() + { + currentTemplate = null; + } + + @Override + public char dollar() + { + return dollar; + } + + @Override + public char hash() + { + return hash; + } + + @Override + public char at() + { + return at; + } + + @Override + public char asterisk() + { + return asterisk; + } + + private char dollar = '$'; + private char hash = '#'; + private char at = '@'; + private char asterisk = '*'; +} + +PARSER_END(${parser.basename}Parser) + +TOKEN_MGR_DECLS: +{ + private int fileDepth = 0; + + private int lparen = 0; + private int rparen = 0; + private int curlyLevel = 0; + List stateStack = new ArrayList(50); + + private boolean inComment; + private boolean inSet; + + /** + * Our own trace method. Use sparsingly in production, since each + * and every call will introduce an execution branch and slow down parsing. + */ + public static void trace(String message) + { + ${parser.basename}Parser.trace(message); + } + + /** + * Switches to a new state (add some log to the default method) + */ + public void switchTo(int lexState) + { + trace(" switch to " + lexStateNames[lexState]); + SwitchTo(lexState); + } + + public int getCurrentLexicalState() + { + return curLexState; + } + + /** + * pops a state off the stack, and restores paren counts + * + * @return boolean : success of operation + */ + public boolean stateStackPop() + { + ParserState s; + try + { + s = (ParserState) stateStack.remove(stateStack.size() - 1); // stack.pop + } + catch(IndexOutOfBoundsException e) + { + // empty stack + lparen=0; + switchTo(DEFAULT); + return false; + } + + trace(" stack pop (" + stateStack.size() + ")"); + lparen = s.lparen; + rparen = s.rparen; + curlyLevel = s.curlyLevel; + + switchTo(s.lexstate); + + return true; + } + + /** + * pushes the current state onto the 'state stack', + * and maintains the parens counts + * public because we need it in PD & VM handling + * + * @return boolean : success. It can fail if the state machine + * gets messed up (do don't mess it up :) + */ + public boolean stateStackPush() + { + trace(" (" + stateStack.size() + ") pushing cur state : " + lexStateNames[curLexState] ); + + ParserState s = new ParserState(); + s.lparen = lparen; + s.rparen = rparen; + s.curlyLevel = curlyLevel; + s.lexstate = curLexState; + + stateStack.add(s); // stack.push + + lparen = 0; + curlyLevel = 0; + + return true; + } + + /** + * Clears all state variables, resets to + * start values, clears stateStack. Call + * before parsing. + */ + public void clearStateVars() + { + stateStack.clear(); + + lparen = 0; + rparen = 0; + curlyLevel = 0; + inComment = false; + inSet = false; + + return; + } + + public void setInSet(boolean value) + { + inSet = value; + } + + public boolean isInSet() + { + return inSet; + } + + /** + * Holds the state of the parsing process. + */ + private static class ParserState + { + int lparen; + int rparen; + int curlyLevel; + int lexstate; + } + + /** + * handles the dropdown logic when encountering a RPAREN + */ + private void RPARENHandler() + { + /* + * Ultimately, we want to drop down to the state below + * the one that has an open (if we hit bottom (DEFAULT), + * that's fine. It's just text schmoo. + */ + + boolean closed = false; + + if (inComment) + closed = true; + + while( !closed ) + { + /* + * look at current state. If we haven't seen a lparen + * in this state then we drop a state, because this + * lparen clearly closes our state + */ + + if( lparen > 0) + { + /* + * if rparen + 1 == lparen, then this state is closed. + * Otherwise, increment and keep parsing + */ + + if( lparen == rparen + 1) + { + stateStackPop(); + } + else + { + rparen++; + } + + closed = true; + } + else + { + /* + * now, drop a state + */ + + if(!stateStackPop()) + break; + } + } + } +} + +/* ------------------------------------------------------------------------ + * + * Tokens + * + * ------------------------------------------------------------------------- */ + +/* The VelocityCharStream will send a zero-width whitespace + just before EOF to let us accept a terminal $ or # +*/ +<PRE_DIRECTIVE,PRE_REFERENCE,PRE_OLD_REFERENCE> +TOKEN : +{ + <LONE_SYMBOL: "\u001C" > + { + stateStackPop(); + } +} + +/* In all other states, keep the zero-width whitespace for now */ +<REFERENCE,REFMODIFIER,OLD_REFMODIFIER,REFMOD3,REFINDEX,DIRECTIVE,REFMOD2,DEFAULT,REFMOD,IN_TEXTBLOCK,IN_MULTILINE_COMMENT,IN_FORMAL_COMMENT,IN_SINGLE_LINE_COMMENT> +TOKEN : +{ + <ZERO_WIDTH_WHITESPACE: "\u001C"> +} + +<REFERENCE, REFMODIFIER, OLD_REFMODIFIER, REFMOD3> +TOKEN: +{ + <INDEX_LBRACKET: "["> + { + stateStackPush(); + switchTo(REFINDEX); + } + | + /* we need to give precedence to the logical 'or' here, it's a hack to avoid multiplying parsing modes */ + <LOGICAL_OR_2: "||"> + { + stateStackPop(); + } + | + <PIPE: "|"> + { + if (curlyLevel == 1) + { + switchTo(ALT_VAL); + } + else + { + stateStackPop(); + } + } +} + +<REFINDEX> +TOKEN: +{ + <INDEX_RBRACKET: "]"> + { + stateStackPop(); + } +} + + +<DIRECTIVE,REFMOD2,ALT_VAL> +TOKEN: +{ + <LBRACKET: "["> +| <RBRACKET: "]"> +| <COMMA:","> +} + +<DIRECTIVE,REFMOD2,ALT_VAL> +TOKEN: +{ + <DOUBLEDOT : ".." > +} + +<DIRECTIVE, REFMOD2,ALT_VAL> +TOKEN: +{ + <COLON : ":" > +} + +<DIRECTIVE, REFMOD2, ALT_VAL> +TOKEN : +{ + <LEFT_CURLEY : "{" > + { + ++curlyLevel; + } + | + <RIGHT_CURLEY : "}" > + { + --curlyLevel; + if (curLexState == ALT_VAL && curlyLevel == 0) + { + stateStackPop(); + } + } +} + +<DIRECTIVE,REFMODIFIER,OLD_REFMODIFIER> +TOKEN: +{ + <LPAREN: "("> + { + if (!inComment) + lparen++; + + /* + * If in REFERENCE and we have seen the dot, then move + * to REFMOD2 -> Modifier() + */ + + if (curLexState == REFMODIFIER || curLexState == OLD_REFMODIFIER ) + switchTo( REFMOD2 ); + } +} + +/* + * we never will see a ')' in anything but DIRECTIVE and REFMOD2. + * Each have their own + */ +<DIRECTIVE> +TOKEN: +{ + <RPAREN: ")"> + { + RPARENHandler(); + } +} + + +<REFMOD2> +TOKEN: +{ + /* + * in REFMOD2, we don't want to bind the whitespace and \n like we + * do when closing a directive. + */ + <REFMOD2_RPAREN: ")"> + { + /* + * need to simply switch back to REFERENCE, not drop down the stack + * because we can (infinitely) chain, ala + * $foo.bar().blargh().woogie().doogie() + */ + + switchTo( REFMOD3 ); + } +} + +/*---------------------------------------------- + * + * escape "\\" handling for the built-in directives + * + *--------------------------------------------- */ +TOKEN: +{ + /* + * We have to do this, because we want these to be a Text node, and + * whatever follows to be peer to this text in the tree. + * + * We need to touch the ASTs for these, because we want an even # of \'s + * to render properly in front of the block + * + * This is really simplistic. I actually would prefer to find them in + * grammatical context, but I am neither smart nor rested, a receipe + * for disaster, another long night with Mr. Parser, or both. + */ + + <ESCAPE_DIRECTIVE : (<DOUBLE_ESCAPE>)* "\\${parser.char.hash}" (<WORD> | <BRACKETED_WORD>) > +} + + +/* + * We added the lexical states REFERENCE, REFMODIFIER, REFMOD2 to + * address JIRA issue VELOCITY-631. With SET_DIRECTIVE only in the + * DEFAULT lexical state the following VTL fails "$a#set($b = 1)" + * because the Reference token uses LOOKAHEAD(2) combined with the + * fact that we explicity set the lex state to REFERENCE with the $ + * token, which means we would never evaluate this token during the + * look ahead. This general issue is disscussed here: + * + * http://www.engr.mun.ca/~theo/JavaCC-FAQ/javacc-faq-ie.htm#tth_sEc3.12 + * + */ +<DEFAULT, PRE_REFERENCE, PRE_OLD_REFERENCE, REFERENCE, REFMODIFIER, OLD_REFMODIFIER, REFMOD2, REFMOD3> +TOKEN: +{ + <SET_DIRECTIVE: ("${parser.char.hash}set" | "${parser.char.hash}{set}") (" "|"\t")* "("> + { + if (! inComment) + { + trace(" #set : going to DIRECTIVE" ); + + stateStackPush(); + setInSet(true); + switchTo(DIRECTIVE); + } + + /* + * need the LPAREN action + */ + + if (!inComment) + { + lparen++; + + /* + * If in REFERENCE and we have seen the dot, then move + * to REFMOD2 -> Modifier() + */ + + if (curLexState == REFMODIFIER || curLexState == OLD_REFMODIFIER ) + switchTo( REFMOD2 ); + } + } +} + +<*> +MORE : +{ + /* + * Note : DOLLARBANG is a duplicate of DOLLAR. They must be identical. + */ + + <DOLLAR: ("\\")* "${parser.char.dollar}"> + { + if (! inComment) + { + /* + * if we find ourselves in REFERENCE or PRE_REFERENCE, we need to pop down + * to end the previous ref + */ + + if (curLexState == REFERENCE || curLexState == PRE_REFERENCE || curLexState == PRE_OLD_REFERENCE) + { + stateStackPop(); + } + + int preReferenceState = parser.hyphenAllowedInIdentifiers ? PRE_OLD_REFERENCE : PRE_REFERENCE; + + trace( " $ : going to " + lexStateNames[preReferenceState]); + + /* do not push PRE states */ + if (curLexState != PRE_REFERENCE && curLexState != PRE_DIRECTIVE && curLexState != PRE_OLD_REFERENCE) + { + stateStackPush(); + } + switchTo(preReferenceState); + } + } + +| <DOLLARBANG: ("\\")* "${parser.char.dollar}" ("\\")* "!"> + { + if (! inComment) + { + /* + * if we find ourselves in REFERENCE or PRE_REFERENCE, we need to pop down + * to end the previous ref + */ + + if (curLexState == REFERENCE || curLexState == PRE_REFERENCE || curLexState == PRE_OLD_REFERENCE) + { + stateStackPop(); + } + + int preReferenceState = parser.hyphenAllowedInIdentifiers ? PRE_OLD_REFERENCE : PRE_REFERENCE; + + trace( " $ : going to " + lexStateNames[preReferenceState]); + + /* do not push PRE states */ + if (curLexState != PRE_REFERENCE && curLexState != PRE_DIRECTIVE && curLexState != PRE_OLD_REFERENCE) + { + stateStackPush(); + } + switchTo(preReferenceState); + } + } + +| "${parser.char.hash}[[" + { + if (!inComment) + { + inComment = true; + /* do not push PRE states */ + if (curLexState != PRE_REFERENCE && curLexState != PRE_DIRECTIVE && curLexState != PRE_OLD_REFERENCE) + { + stateStackPush(); + } + switchTo( IN_TEXTBLOCK ); + } + } + +| <"${parser.char.hash}${parser.char.asterisk}${parser.char.asterisk}" ~["${parser.char.hash}","\u001C"]> + { + if (!inComment) + { + input_stream.backup(1); + inComment = true; + /* do not push PRE states */ + if (curLexState != PRE_REFERENCE && curLexState != PRE_DIRECTIVE && curLexState != PRE_OLD_REFERENCE) + { + stateStackPush(); + } + switchTo( IN_FORMAL_COMMENT); + } + } + +| "${parser.char.hash}${parser.char.asterisk}" + { + if (!inComment) + { + inComment=true; + /* do not push PRE states */ + if (curLexState != PRE_REFERENCE && curLexState != PRE_DIRECTIVE && curLexState != PRE_OLD_REFERENCE) + { + stateStackPush(); + } + switchTo( IN_MULTI_LINE_COMMENT ); + } + } + +| <HASH : "${parser.char.hash}" > + { + if (! inComment) + { + /* + * We can have the situation where #if($foo)$foo#end. + * We need to transition out of REFERENCE before going to DIRECTIVE. + * I don't really like this, but I can't think of a legal way + * you are going into DIRECTIVE while in REFERENCE. -gmj + */ + + if (curLexState == REFERENCE || curLexState == PRE_REFERENCE || curLexState == PRE_OLD_REFERENCE || curLexState == REFMODIFIER || curLexState == OLD_REFMODIFIER ) + { + stateStackPop(); + } + + trace(" # : going to PRE_DIRECTIVE" ); + + /* do not push PRE states */ + if (curLexState != PRE_REFERENCE && curLexState != PRE_DIRECTIVE && curLexState != PRE_OLD_REFERENCE) + { + stateStackPush(); + } + switchTo(PRE_DIRECTIVE); + } + } +} + +// treat the single line comment case separately +// to avoid ##<EOF> errors +<DEFAULT,PRE_DIRECTIVE,DIRECTIVE,REFERENCE,PRE_REFERENCE,PRE_OLD_REFERENCE,REFMOD2,REFMOD3,REFMODIFIER,OLD_REFMODIFIER> +TOKEN : +{ + <SINGLE_LINE_COMMENT_START: "${parser.char.hash}${parser.char.hash}"> + { + if (!inComment) + { + if (curLexState == REFERENCE || curLexState == PRE_REFERENCE || curLexState == PRE_OLD_REFERENCE) + { + stateStackPop(); + } + + inComment = true; + stateStackPush(); + switchTo(IN_SINGLE_LINE_COMMENT); + } + } +} + +/* ----------------------------------------------------------------------- + * + * *_COMMENT Lexical tokens + * + *-----------------------------------------------------------------------*/ +<IN_SINGLE_LINE_COMMENT> +TOKEN : +{ + <SINGLE_LINE_COMMENT: "\n" | "\r" | "\r\n"> + { + inComment = false; + stateStackPop(); + if (curLexState == REFERENCE || curLexState == REFMOD3) + { + // end of reference: pop again + stateStackPop(); + } + } + +} + +<IN_FORMAL_COMMENT> +TOKEN : +{ + <FORMAL_COMMENT: "${parser.char.asterisk}${parser.char.hash}" > + { + inComment = false; + stateStackPop(); + if (curLexState == REFERENCE || curLexState == REFMOD3) + { + // end of reference: pop again + stateStackPop(); + } + } +} + +<IN_MULTI_LINE_COMMENT> +TOKEN : +{ + <MULTI_LINE_COMMENT: "${parser.char.asterisk}${parser.char.hash}" > + { + inComment = false; + stateStackPop(); + if (curLexState == REFERENCE || curLexState == REFMOD3) + { + // end of reference: pop again + stateStackPop(); + } + } +} + +<IN_TEXTBLOCK> +TOKEN : +{ + <TEXTBLOCK: "]]${parser.char.hash}" > + { + inComment = false; + stateStackPop(); + } +} + +<IN_SINGLE_LINE_COMMENT,IN_FORMAL_COMMENT,IN_MULTI_LINE_COMMENT> +SKIP : +{ + < ~[] > +} + +<IN_TEXTBLOCK> +MORE : +{ + < ~["\u001C"] > +} + +/* ----------------------------------------------------------------------- + * + * DIRECTIVE Lexical State (some of it, anyway) + * + * ---------------------------------------------------------------------- */ + +<DEFAULT,REFINDEX,REFMOD2,DIRECTIVE,ALT_VAL> +TOKEN: +{ + <WHITESPACE : ([" ","\t"])+> +| <NEWLINE : ("\n" | "\r" | "\r\n") > + { + trace(" NEWLINE :"); + + /* if (isInSet()) */ + setInSet(false); + } +} + +/* needed for stuff like #foo() followed by ( '$' | '#' )* followed by ( <WHITESPACE> | <ENDLINE> ) + so that directive postfix doesn't eat the '$'s and '#'s +*/ +<PRE_DIRECTIVE, PRE_REFERENCE, PRE_OLD_REFERENCE> +TOKEN: +{ + <SUFFIX: ([" ","\t"])* ("\n" | "\r" | "\r\n")> + { + stateStackPop(); + } +} + +<DIRECTIVE,REFMOD2,REFINDEX,ALT_VAL> +TOKEN : +{ +// <STRING_LITERAL: ( "\"" ( ~["\"","\n","\r"] )* "\"" ) | ( "'" ( ~["'","\n","\r"] )* "'" ) > + < STRING_LITERAL: + ("\"" + ( (~["\"","\u001C"]) + | ("\\" + ( ["n","t","b","r","f"] + | ["0"-"7"] ( ["0"-"7"] )? + | ["0"-"3"] ["0"-"7"] ["0"-"7"] + | "u" ["0"-"9", "a"-"f", "A"-"F"] ["0"-"9", "a"-"f", "A"-"F"] ["0"-"9", "a"-"f", "A"-"F"] ["0"-"9", "a"-"f", "A"-"F"] + ) + ) + | ("\"\"") + | ( "\\" (" ")* "\n") + )* + "\"" + ) + | + ("\'" + ( (~["\'","\u001C"]) + | ("''") + | ( "\\" (" ")* "\n") + )* + "\'" + ) + > + + { + /* + * - if we are in DIRECTIVE and haven't seen ( yet, then also drop out. + * don't forget to account for the beloved yet wierd #set + * - finally, if we are in REFMOD2 (remember : $foo.bar( ) then " is ok! + */ + + if( curLexState == DIRECTIVE && !isInSet() && lparen == 0) + stateStackPop(); + } +} + +<REFERENCE,DIRECTIVE,REFMODIFIER,OLD_REFMODIFIER,REFMOD2,REFINDEX,ALT_VAL> +TOKEN: +{ + <TRUE: "true"> +| <FALSE: "false"> +} + +<DIRECTIVE,REFMOD2,REFINDEX,ALT_VAL> +TOKEN : +{ + <MINUS: "-"> +| <PLUS: "+"> +| <MULTIPLY: "*"> +| <DIVIDE: "/"> +| <MODULUS: "%"> +| <LOGICAL_AND: "&&" | "and" > +| <LOGICAL_OR: "||" | "or" > +| <LOGICAL_LT: "<" | "lt" > +| <LOGICAL_LE: "<=" | "le" > +| <LOGICAL_GT: ">" | "gt" > +| <LOGICAL_GE: ">=" | "ge" > +| <LOGICAL_EQUALS: "==" | "eq" > +| <LOGICAL_NOT_EQUALS: "!=" | "ne" > +| <LOGICAL_NOT: "!" | "not" > +| <EQUALS: "=" > +} + +<PRE_DIRECTIVE> +TOKEN : +{ + <END: ( "end" | "{end}" )> + { + stateStackPop(); + } + +| <IF_DIRECTIVE: "if" | "{if}"> + { + switchTo(DIRECTIVE); + } + +| <ELSEIF: "elseif" | "{elseif}"> + { + switchTo(DIRECTIVE); + } + +| <ELSE: "else" | "{else}"> + { + stateStackPop(); + } +} + +<PRE_DIRECTIVE,DIRECTIVE,REFMOD2,REFINDEX,ALT_VAL> +TOKEN: +{ + <#DIGIT: [ "0"-"9" ] > + + /* + * treat FLOATING_POINT_LITERAL and INTEGER_LITERAL differently as a range can only handle integers. + */ + + /** + * Note -- we also define an integer as ending with a double period, + * in order to avoid 1..3 being defined as floating point (1.) then a period, then a integer + */ +| <INTEGER_LITERAL: ("-")? (<DIGIT>)+ ("..")? > + { + + /* + * Remove the double period if it is there + */ + if (matchedToken.image.endsWith("..")) { + input_stream.backup(2); + matchedToken.image = matchedToken.image.substring(0,matchedToken.image.length()-2); + } + + /* + * check to see if we are in set + * ex. #set($foo = $foo + 3) + * because we want to handle the \n after + */ + + if ( lparen == 0 && !isInSet() && curLexState != REFMOD2 && curLexState != REFINDEX && curLexState != ALT_VAL) + { + stateStackPop(); + } + } + +| <FLOATING_POINT_LITERAL: + ("-")? (<DIGIT>)+ "." (<DIGIT>)* (<EXPONENT>)? + | ("-")? "." (<DIGIT>)+ (<EXPONENT>)? + | ("-")? (<DIGIT>)+ <EXPONENT> + > + { + /* + * check to see if we are in set + * ex. #set $foo = $foo + 3 + * because we want to handle the \n after + */ + + if ( lparen == 0 && !isInSet() && curLexState != REFMOD2 && curLexState != ALT_VAL) + { + stateStackPop(); + } +} +| + <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > + +} + +/** + * TODO, the "@" symbol for block macros to be correct really should prefix WORD + * and BRACKETED_WORD, e.g., <WORD ["@"] ( <LETTER... etc... + * However, having the conditional character at the beginning screws up + * Macro parse. As it is now you can have #@1234 defined as a macro + * Which is not correct. + */ + +<PRE_DIRECTIVE,DIRECTIVE> +TOKEN: +{ + <#LETTER: [ "a"-"z", "A"-"Z" ] > +| <#DIRECTIVE_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "_" ] > +| <WORD: ( <LETTER> | ["_"] | ["${parser.char.at}"]) (<DIRECTIVE_CHAR>)* > +| <BRACKETED_WORD: "{" ( <LETTER> | ["_"] | ["${parser.char.at}"]) (<DIRECTIVE_CHAR>)* "}" > +} + +/* ----------------------------------------------------------------------- + * + * REFERENCE Lexical States + * + * This is more than a single state, because of the structure of + * the VTL references. We use three states because the set of tokens + * for each state can be different. + * + * $foo.bar( "arg" ) + * ^ ^ ^ ^ ^ + * | | | | | + * |_________________ > PRE_REFERENCE : state initiated by the '$' character. + * | | | | (or PRE_OLD_REFERENCE if '-' is allowed in identifiers) + * |________________> REFERENCE : state initiated by the identifier. Continues + * | | | until end of the reference, or the . character. + * |_____________ > REFMODIFIER : state switched to when the <DOT> is encountered. + * | | (or OLD_REFMODIFIER if '-' is allowed in identifiers) + * | | note that this is a switch, not a push. See notes at bottom. + * |_________ > REFMOD2 : state switch to when the LPAREN is encountered. + * | again, this is a switch, not a push. + * |_ > REFMOD3 : state only checking for a possible '.' or '[' continuation. + * + * During the REFERENCE, REFMODIFIER or REFMOD3 lex states we will switch to: + * - REFINDEX if a bracket '[' is encountered: $foo[1], $foo.bar[1], $foo.bar( "arg" )[1] + * - ALT_VAL if a pipe '|' is encountered (only for formal references): ${foo|'foo'} + * ---------------------------------------------------------------------------- */ + +<PRE_REFERENCE,REFMODIFIER,REFMOD2> +TOKEN : +{ + <#ALPHA_CHAR: ["a"-"z", "A"-"Z", "_"] > +| <#IDENTIFIER_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "_" ] > +| <IDENTIFIER: ( <ALPHA_CHAR> ) (<IDENTIFIER_CHAR>)* > + { + if (curLexState == PRE_REFERENCE) + { + switchTo(REFERENCE); + } + } +} + +<PRE_OLD_REFERENCE,OLD_REFMODIFIER> +TOKEN : +{ + <#OLD_ALPHA_CHAR: ["a"-"z", "A"-"Z", "_"] > +| <#OLD_IDENTIFIER_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "_", "-" ] > +| <OLD_IDENTIFIER: ( <OLD_ALPHA_CHAR> ) (<OLD_IDENTIFIER_CHAR>)* > + { + if (curLexState == PRE_OLD_REFERENCE) + { + switchTo(REFERENCE); + } + } +} + + +<REFERENCE,REFMODIFIER,OLD_REFMODIFIER,REFMOD2,REFMOD3> +TOKEN: +{ + <DOT: "." <ALPHA_CHAR>> + { + /* + * push the alpha char back into the stream so the following identifier + * is complete + */ + + input_stream.backup(1); + + /* + * and munge the <DOT> so we just get a . when we have normal text that + * looks like a ref.ident + */ + + matchedToken.image = "."; + + int refModifierState = parser.hyphenAllowedInIdentifiers ? OLD_REFMODIFIER : REFMODIFIER; + + trace("DOT : switching to " + lexStateNames[refModifierState]); + switchTo(refModifierState); + + } +} + +<PRE_REFERENCE,PRE_OLD_REFERENCE,REFERENCE,REFMODIFIER,OLD_REFMODIFIER,REFMOD3> +TOKEN : +{ + <LCURLY: "{"> + { + ++curlyLevel; + } +| <RCURLY: "}"> + { + /* maybe it wasn't for our state */ + while (curlyLevel == 0 && curLexState != DEFAULT) + { + stateStackPop(); + } + /* At this point, here are all the possible states: + * - DEFAULT, which means the '}' is schmoo + * - DIRECTIVE or REFMOD2, which means the '}' is a closing map curly + * - one of the other REFERENCE states or ALT_VAL, which means the '}' ends the reference + * If we're in the last case, pop up state. + */ + if (curLexState != DEFAULT && curLexState != DIRECTIVE && curLexState != REFMOD2) + { + stateStackPop(); + } + } +} + +<PRE_REFERENCE,PRE_OLD_REFERENCE,REFERENCE,REFMODIFIER,OLD_REFMODIFIER,REFMOD,REFMOD3> +SPECIAL_TOKEN : +{ + <REFERENCE_TERMINATOR: ~[] > + { + /* + * push every terminator character back into the stream + */ + + input_stream.backup(1); + + trace("REF_TERM :"); + + stateStackPop(); + } +} + +<PRE_DIRECTIVE> +SPECIAL_TOKEN : +{ + <DIRECTIVE_TERMINATOR: ~[] > + { + trace("DIRECTIVE_TERM :"); + + input_stream.backup(1); + stateStackPop(); + } +} + +/* TEXT must end with a newline, and contain at least one non-whitespace character in the first line, + so that the <WHITESPACE> <NEWLINE> sequence is not read as a TEXT (needed for space gobbling) +*/ +TOKEN : +{ + <DOUBLE_ESCAPE : "\\\\"> +| <ESCAPE: "\\" > +| <TEXT: (~["${parser.char.dollar}", "${parser.char.hash}", "\\", "\r", "\n","\u001C"])* (~["${parser.char.dollar}", "${parser.char.hash}", "\\", "\r", "\n", " ", "\t","\u001C"])+ (~["${parser.char.dollar}", "${parser.char.hash}", "\\", "\r", "\n","\u001C"])* <NEWLINE> ((~["${parser.char.dollar}", "${parser.char.hash}", "\\", "\r", "\n","\u001C"])* <NEWLINE>)* > +} + +TOKEN : +{ + <INLINE_TEXT: (~["${parser.char.dollar}", "${parser.char.hash}", "\\", "\r", "\n","\u001C"])+ > +} + +/** + * This method is what starts the whole parsing + * process. After the parsing is complete and + * the template has been turned into an AST, + * this method returns the root of AST which + * can subsequently be traversed by a visitor + * which implements the ParserVisitor interface + * which is generated automatically by JavaCC + */ +SimpleNode process() : +{ + boolean afterNewline = true; +} +{ + ( LOOKAHEAD({ getToken(1).kind != EOF }) afterNewline = Statement(afterNewline) )* <EOF> + { return jjtThis; } +} + +/** + * These are the types of statements that + * are acceptable in Velocity templates. + */ +boolean Statement(boolean afterNewline) #void : +{ +} +{ + LOOKAHEAD( { getToken(1).kind == IF_DIRECTIVE || afterNewline && getToken(1).kind == WHITESPACE && getToken(2).kind == IF_DIRECTIVE } ) afterNewline = IfStatement(afterNewline) { return afterNewline; } +| LOOKAHEAD(2) Reference() { return false; } +| LOOKAHEAD(2) afterNewline = Comment() { return afterNewline; } +| Textblock() { return false; } +| LOOKAHEAD( { getToken(1).kind == SET_DIRECTIVE || afterNewline && getToken(1).kind == WHITESPACE && getToken(2).kind == SET_DIRECTIVE } ) afterNewline = SetDirective(afterNewline) { return afterNewline; } +| EscapedDirective() { return false; } +| Escape() { return false; } +| LOOKAHEAD( { getToken(1).kind == WORD || getToken(1).kind == BRACKETED_WORD || afterNewline && getToken(1).kind == WHITESPACE && ( getToken(2).kind == WORD || getToken(2).kind == BRACKETED_WORD ) } ) afterNewline = Directive(afterNewline) { return afterNewline; } +| afterNewline = Text() { return afterNewline; } +| (<NEWLINE>) #Text { return true; } +| (((<INLINE_TEXT>) { afterNewline = false; } ) ((<TEXT>) { afterNewline = true; })? ) #Text { return afterNewline; } +| (<WHITESPACE>) #Text { return false; } +| (<SUFFIX>) #Text { return true; } +| LOOKAHEAD(2) EndingZeroWidthWhitespace() { return afterNewline; } +| (<LOGICAL_OR_2>) #Text { return afterNewline; } // needed here since it can be triggered in <REFERENCE> mode out of any boolean evaluation +| (<ZERO_WIDTH_WHITESPACE>) #Text { afterNewline = !afterNewline; return false; } +} + +void EndingZeroWidthWhitespace() #void : {} +{ + <ZERO_WIDTH_WHITESPACE> <EOF> { } +} + +/** + * used to separate the notion of a valid directive that has been + * escaped, versus something that looks like a directive and + * is just schmoo. This is important to do as a separate production + * that creates a node, because we want this, in either case, to stop + * the further parsing of the Directive() tree. + */ +void EscapedDirective() : {} +{ + { + Token t = null; + } + + t = <ESCAPE_DIRECTIVE> + { + /* + * churn and burn.. + */ + t.image = escapedDirective( t.image ); + } +} + +/** + * Used to catch and process escape sequences in grammatical constructs + * as escapes outside of VTL are just characters. Right now we have both + * this and the EscapeDirective() construction because in the EscapeDirective() + * case, we want to suck in the #<directive> and here we don't. We just want + * the escapes to render correctly + */ +void Escape() : {} +{ + { + Token t = null; + int count = 0; + boolean control = false; + } + + ( LOOKAHEAD(2) t = <DOUBLE_ESCAPE> + { + count++; + } + )+ + { + /* + * first, check to see if we have a control directive + */ + switch(t.next.kind ) { + case IF_DIRECTIVE : + case ELSE : + case ELSEIF : + case END : + control = true; + break; + } + + /* + * if that failed, lets lookahead to see if we matched a PD or a VM + */ + String nTag = t.next.image.substring(1); + if (strictEscape + || isDirective(nTag) + || macroNames.containsKey(nTag) + || rsvc.isVelocimacro(nTag, currentTemplate)) + { + control = true; + } + + jjtThis.val = ""; + + for( int i = 0; i < count; i++) + jjtThis.val += ( control ? "\\" : "\\\\"); + } + +} + +boolean Comment() : {} +{ + <SINGLE_LINE_COMMENT_START> ( <SINGLE_LINE_COMMENT> ) ? { return true; } +| <MULTI_LINE_COMMENT> { return false; } +| <FORMAL_COMMENT> { return false; } +} + +void Textblock() : {} +{ + <TEXTBLOCK> +} + +void FloatingPointLiteral() : {} +{ + <FLOATING_POINT_LITERAL> +} + +void IntegerLiteral() : {} +{ + <INTEGER_LITERAL> +} + +void StringLiteral() : {} +{ + <STRING_LITERAL> +} + +/** + * This method corresponds to variable + * references in Velocity templates. + * The following are examples of variable + * references that may be found in a + * template: + * + * $foo + * $bar + * + */ +void Identifier() : {} +{ + <IDENTIFIER> | <OLD_IDENTIFIER> +} + +void Word() : {} +{ + <WORD> +} + +/** + * Supports the arguments for the Pluggable Directives + */ +int DirectiveArg() #void : {} +{ + Reference() + { + return ParserTreeConstants.JJTREFERENCE; + } +| Word() + { + return ParserTreeConstants.JJTWORD; + } +| StringLiteral() + { + return ParserTreeConstants.JJTSTRINGLITERAL; + } + +| IntegerLiteral() + { + return ParserTreeConstants.JJTINTEGERLITERAL; + } + /* + * Need to put this before the floating point expansion + */ +| LOOKAHEAD( <LBRACKET> (<WHITESPACE> | <NEWLINE>)* ( Reference() | IntegerLiteral()) (<WHITESPACE> | <NEWLINE>)* <DOUBLEDOT> ) IntegerRange() + { + return ParserTreeConstants.JJTINTEGERRANGE; + } +| FloatingPointLiteral() + { + return ParserTreeConstants.JJTFLOATINGPOINTLITERAL; + } +| Map() + { + return ParserTreeConstants.JJTMAP; + } +| ObjectArray() + { + return ParserTreeConstants.JJTOBJECTARRAY; + } +| True() + { + return ParserTreeConstants.JJTTRUE; + } +| False() + { + return ParserTreeConstants.JJTFALSE; + } +} + +void DirectiveAssign() : {} +{ + Reference() +} + + +/** + * Supports the Pluggable Directives + * #foo( arg+ ) + * @return true if ends with a newline + */ +boolean Directive(boolean afterNewline) : +{ + Token id = null, t = null, u = null, end = null, _else = null; + int argType; + int argPos = 0; + Directive d; + int directiveType; + boolean isVM = false; + boolean isMacro = false; + ArrayList argtypes = new ArrayList(4); + String blockPrefix = ""; + ASTBlock block = null, elseBlock = null; + boolean hasParentheses = false; + boolean newlineAtStart = afterNewline; +} +{ + [ + (t = <WHITESPACE>) + { + // only possible if not after new line + jjtThis.setPrefix(t.image); + t = null; + } + ] + /* + * note that if we were escaped, that is now handled by + * EscapedDirective() + */ + ((id = <WORD>) | (id = <BRACKETED_WORD>)) + { + String directiveName; + int p = id.image.lastIndexOf(hash); + if (id.kind == StandardParserConstants.BRACKETED_WORD) + { + directiveName = id.image.substring(p + 2, id.image.length() - 1); + } + else + { + directiveName = id.image.substring(p + 1); + } + + d = getDirective(directiveName); + + /* + * Velocimacro support : if the directive is macro directive + * then set the flag so after the block parsing, we add the VM + * right then. (So available if used w/in the current template ) + */ + + if (directiveName.equals("macro")) + { + isMacro = true; + } + + /* + * set the directive name from here. No reason for the thing to know + * about parser tokens + */ + + jjtThis.setDirectiveName(directiveName); + + if ( d == null) + { + if( directiveName.charAt(0) == at ) + { + // block macro call of type: #@foobar($arg1 $arg2) astBody #end + directiveType = Directive.BLOCK; + } + else + { + /* + * if null, then not a real directive, but maybe a Velocimacro + */ + isVM = rsvc.isVelocimacro(directiveName, currentTemplate); + + directiveType = Directive.LINE; + } + } + else + { + directiveType = d.getType(); + } + + /* + * now, switch us out of PRE_DIRECTIVE + */ + + token_source.switchTo(DIRECTIVE); + argPos = 0; + } + + + /** + * Look for the pattern [WHITESPACE] <LPAREN> + */ + ( + LOOKAHEAD( { isLeftParenthesis() } ) + /* + * if this is indeed a token, match the #foo ( arg, arg... ) pattern + */ + ( + (<WHITESPACE> | <NEWLINE>)* <LPAREN> + ( + LOOKAHEAD({ !isRightParenthesis() }) (<WHITESPACE> | <NEWLINE>)* [<COMMA> (<WHITESPACE> | <NEWLINE>)*] + ( + [ + LOOKAHEAD( { isMacro && isAssignment() }) + DirectiveAssign() (<WHITESPACE> | <NEWLINE>)* <EQUALS> ( <WHITESPACE> | <NEWLINE> )* + { + argtypes.add(ParserTreeConstants.JJTDIRECTIVEASSIGN); + } + ] + LOOKAHEAD( { !isRightParenthesis() } ) + ( + argType = DirectiveArg() + { + argtypes.add(argType); + if (d == null && argType == ParserTreeConstants.JJTWORD) + { + if (isVM) + { + throw new MacroParseException("Invalid argument " + + (argPos+1) + " in macro call " + id.image, currentTemplate.getName(), id); + } + } + argPos++; + } + ) + | + { + if (!isMacro) + { + // We only allow line comments in macro definitions for now + throw new MacroParseException("A Line comment is not allowed in " + id.image + + " arguments", currentTemplate.getName(), id); + } + } + <SINGLE_LINE_COMMENT_START> [<SINGLE_LINE_COMMENT>] + ) + )* (<WHITESPACE> | <NEWLINE>)* <RPAREN> + { hasParentheses = true; } + ) + | + { + token_source.stateStackPop(); + } + ) + { afterNewline = false; } + [ + // Conditions where whitespace and newline postfix is eaten by space gobbling at this point: + // - block directive + // - new line before directive without backward compatibility mode + // - backward compatibility mode *with parentheses* + // - #include() or #parse() + LOOKAHEAD(2, { directiveType != Directive.LINE || newlineAtStart && rsvc.getSpaceGobbling() != SpaceGobbling.BC || rsvc.getSpaceGobbling() == SpaceGobbling.BC && hasParentheses || d != null && (d instanceof Include || d instanceof Parse) }) + ( [ ( t = <WHITESPACE> ) ] ( u = <NEWLINE> ) ) + { + afterNewline = true; + if (directiveType == Directive.LINE) + { + jjtThis.setPostfix(t == null ? u.image : t.image + u.image); + } + else + { + blockPrefix = (t == null ? u.image : t.image + u.image); + } + t = u = null; + } + ] + { + if (d != null) + { + d.checkArgs(argtypes, id, currentTemplate.getName()); + } + if (directiveType == Directive.LINE) + { + return afterNewline; + } + } + /* + * and the following block if the PD needs it + */ + ( + ( + ( + LOOKAHEAD( { getToken(1).kind != END && getToken(1).kind != ELSE && ( !afterNewline || getToken(1).kind != WHITESPACE || getToken(2).kind != END && getToken(2).kind != ELSE ) }) afterNewline = Statement(afterNewline) + )* + { + block = jjtThis; + block.setPrefix(blockPrefix); + blockPrefix = ""; + } + ) + #Block + ) + [ + LOOKAHEAD( 1, { afterNewline }) + (t = <WHITESPACE>) + { + block.setPostfix(t.image); + t = null; + } + ] + /* + * then an optional #else for the #foreach directive + */ + ( + [ + LOOKAHEAD( { d != null && (d instanceof Foreach) && getToken(1).kind == ELSE } ) + ( + (_else = <ELSE>) + ( + [ + LOOKAHEAD(2) ( [ ( t = <WHITESPACE> ) ] ( u = <NEWLINE> ) ) + { + jjtThis.setPrefix(t == null ? u.image : t.image + u.image); + t = u = null; + afterNewline = true; + } + ] + ( + LOOKAHEAD( { getToken(1).kind != END && (!afterNewline || getToken(1).kind != WHITESPACE || getToken(2).kind != END) }) + afterNewline = Statement(afterNewline) + )* + { + elseBlock = jjtThis; + } + ) + #Block + { + int pos = _else.image.lastIndexOf(hash); + if (pos > 0) + { + block.setMorePostfix(_else.image.substring(0, pos)); + } + block = elseBlock; + } + ) + ] + ) + [ + LOOKAHEAD( 1, { afterNewline }) + (t = <WHITESPACE>) + { + block.setPostfix(t.image); + t = null; + afterNewline = false; + } + ] + ( + (end = <END>) + { afterNewline = false; } + [ + LOOKAHEAD(2, { newlineAtStart || rsvc.getSpaceGobbling() == SpaceGobbling.BC }) + ( [ ( t = <WHITESPACE> ) ] ( u = <NEWLINE> ) ) + { + jjtThis.setPostfix(t == null ? u.image : t.image + u.image); + t = u = null; + afterNewline = true; + } + ] + { + int pos = end.image.lastIndexOf(hash); + if (pos > 0) + { + block.setMorePostfix(end.image.substring(0, pos)); + } + } + ) + { + /* + * VM : if we are processing a #macro directive, we need to + * process the block. In truth, I can just register the name + * and do the work later when init-ing. That would work + * as long as things were always defined before use. This way + * we don't have to worry about forward references and such... + */ + if (isMacro) + { + // Add the macro name so that we can peform escape processing + // on defined macros + String macroName = jjtThis.jjtGetChild(0).getFirstToken().image; + macroNames.put(macroName, macroName); + } + if (d != null) + { + d.checkArgs(argtypes, id, currentTemplate.getName()); + } + /* + * VM : end + */ + return afterNewline; + } +} + +/** + * for creating a map in a #set + * + * #set($foo = {$foo : $bar, $blargh : $thingy}) + */ +void Map() : {} +{ + <LEFT_CURLEY> + ( + LOOKAHEAD(( <WHITESPACE> | <NEWLINE> )* Parameter() <COLON>) ( Parameter() <COLON> Parameter() (<COMMA> Parameter() <COLON> Parameter() )* ) + | + ( <WHITESPACE> | <NEWLINE> )* + ) + + /** note: need both tokens as they are generated in different states **/ + ( <RIGHT_CURLEY> | <RCURLY> ) +} + +void ObjectArray() : {} +{ + <LBRACKET> [ Parameter() ( <COMMA> Parameter() )* ] <RBRACKET> +} + + +/** + * supports the [n..m] vector generator for use in + * the #foreach() to generate measured ranges w/o + * needing explicit support from the app/servlet + */ +void IntegerRange() : {} +{ + <LBRACKET> (<WHITESPACE> | <NEWLINE>)* + ( Reference() | IntegerLiteral()) + (<WHITESPACE>|<NEWLINE>)* <DOUBLEDOT> (<WHITESPACE>|<NEWLINE>)* + (Reference() | IntegerLiteral()) + (<WHITESPACE>|<NEWLINE>)* <RBRACKET> +} + + +/** + * A Simplified parameter more suitable for an index position: $foo[$index] + */ +void IndexParameter() #void: {} +{ + (<WHITESPACE>|<NEWLINE>)* + ( + Expression() + ) + (<WHITESPACE>|<NEWLINE>)* +} + + +/** + * This method has yet to be fully implemented + * but will allow arbitrarily nested method + * calls + */ +void Parameter() #void: {} +{ + (<WHITESPACE>|<NEWLINE>)* + ( + StringLiteral() + | IntegerLiteral() + | LOOKAHEAD( <LBRACKET> ( <WHITESPACE> | <NEWLINE> )* ( Reference() | IntegerLiteral()) ( <WHITESPACE> | <NEWLINE> )* <DOUBLEDOT> ) IntegerRange() + | Map() + | ObjectArray() + | True() + | False() + | Reference() + | FloatingPointLiteral() + ) + (<WHITESPACE>|<NEWLINE>)* +} + +/** + * This method has yet to be fully implemented + * but will allow arbitrarily nested method + * calls + */ +void Method() : {} +{ + Identifier() <LPAREN> [ Expression() ( <COMMA> Expression() )* ] <REFMOD2_RPAREN> +} + + +void Index() : {} +{ + <INDEX_LBRACKET> IndexParameter() <INDEX_RBRACKET> +} + +void Reference() : {} +{ + /* + * A reference is either $<FOO> or ${<FOO>} or ${<FOO>'|'<ALTERNATE_VALUE>) + */ + + ( + ( <IDENTIFIER> | <OLD_IDENTIFIER> ) (Index())* + (LOOKAHEAD(2) <DOT> (LOOKAHEAD(3) Method() | Identifier() ) (Index())* )* + ) + | + ( + <LCURLY> + ( <IDENTIFIER> | <OLD_IDENTIFIER> ) (Index())* + (LOOKAHEAD(2) <DOT> (LOOKAHEAD(3) Method() | Identifier() ) (Index())* )* + [ <PIPE> Expression() ] + ( <RCURLY> | <RIGHT_CURLEY> ) + ) +} + +void True() : {} +{ + <TRUE> +} + +void False() : {} +{ + <FALSE> +} + + +/** + * This is somewhat of a kludge, the problem is that the parser picks + * up on '$[' , or '$![' as being a Reference, and does not dismiss it even though + * there is no <Identifier> between $ and [, This has something to do + * with the LOOKAHEAD in Reference, but I never found a way to resolve + * it in a more fashionable way.. + */ +<DEFAULT,PRE_REFERENCE,PRE_OLD_REFERENCE> +TOKEN : +{ + <EMPTY_INDEX : ("$[" | "$![" | "$\\![" | "$.")> +} + + +/** + * This method is responsible for allowing + * all non-grammar text to pass through + * unscathed. + * @return true if last read token was a newline + */ +boolean Text() : +{ + Token t = null; +} +{ + <TEXT> { return true; } + | <DOT> { return false; } + | <RPAREN> { return false; } + | <LPAREN> { return false; } + | <INTEGER_LITERAL> { return false; } + | <FLOATING_POINT_LITERAL> { return false; } + | <STRING_LITERAL> { return false; } + | <ESCAPE> { return false; } + | <LCURLY> { return false; } + | <RCURLY> { return false; } + | <EMPTY_INDEX> { return false; } + | <PIPE> { return false; } + | t=<LONE_SYMBOL> + { + /* Drop the ending zero-width whitespace */ + t.image = t.image.substring(0, t.image.length() - 1); return false; + } +} + +/* ----------------------------------------------------------------------- + * + * Defined Directive Syntax + * + * ----------------------------------------------------------------------*/ + +boolean IfStatement(boolean afterNewline) : +{ + Token t = null, u = null, end = null; + ASTBlock lastBlock = null; + boolean newlineAtStart = afterNewline; +} +{ + [ ( t = <WHITESPACE> ) + { + // only possible if not after new line + jjtThis.setPrefix(t.image); + t = null; + } + ] + <IF_DIRECTIVE> ( <WHITESPACE> | <NEWLINE> )* <LPAREN> Expression() <RPAREN> + ( + [ + LOOKAHEAD(2) ( [ ( t = <WHITESPACE> ) ] ( u = <NEWLINE> ) ) + { + jjtThis.setPrefix(t == null ? u.image : t.image + u.image); + t = u = null; + afterNewline = true; + } + ] + ( LOOKAHEAD( + { + (getToken(1).kind != ELSEIF && getToken(1).kind != ELSE && getToken(1).kind != END) && + (!afterNewline || getToken(1).kind != WHITESPACE || (getToken(2).kind != ELSEIF && getToken(2).kind != ELSE && getToken(2).kind != END)) + }) + afterNewline = Statement(afterNewline) )* + { + lastBlock = jjtThis; + } + ) #Block + [ LOOKAHEAD( { getToken(1).kind == ELSEIF || (afterNewline && getToken(1).kind == WHITESPACE && getToken(2).kind == ELSEIF) }) + ( LOOKAHEAD( { getToken(1).kind == ELSEIF || (afterNewline && getToken(1).kind == WHITESPACE && getToken(2).kind == ELSEIF) }) ( lastBlock = ElseIfStatement(lastBlock, afterNewline) { afterNewline = lastBlock.endsWithNewline; } ))+ ] + [ LOOKAHEAD( { getToken(1).kind == ELSE || (afterNewline && getToken(1).kind == WHITESPACE && getToken(2).kind == ELSE) } ) lastBlock = ElseStatement(lastBlock, afterNewline) { afterNewline = lastBlock.endsWithNewline; } ] + [ LOOKAHEAD( 1, { afterNewline } ) ( t = <WHITESPACE> ) + { + lastBlock.setPostfix(t.image); + t = null; + } + ] + (end = <END>) + { afterNewline = false; } + [ + LOOKAHEAD(2, { newlineAtStart || rsvc.getSpaceGobbling() == SpaceGobbling.BC } ) + ( [ ( t = <WHITESPACE> ) ] ( u = <NEWLINE> ) ) + { + jjtThis.setPostfix(t == null ? u.image : t.image + u.image); + afterNewline = true; + } + ] + { + int pos = end.image.lastIndexOf(hash); + if (pos > 0) + { + lastBlock.setMorePostfix(end.image.substring(0, pos)); + } + return afterNewline; + } +} + +ASTBlock ElseStatement(ASTBlock previousBlock, boolean afterNewline) : +{ + Token t = null, u = null, _else = null; + ASTBlock block = null; +} +{ + [ ( t = <WHITESPACE> ) + { + previousBlock.setPostfix(t.image); + t = null; + } + ] + (_else = <ELSE>) + ( + [ + LOOKAHEAD(2) ( [ ( t = <WHITESPACE> ) ] ( u = <NEWLINE> ) ) + { + jjtThis.setPrefix(t == null ? u.image : t.image + u.image); + t = u = null; + afterNewline = true; + } + ] + ( LOOKAHEAD( { getToken(1).kind != END && (!afterNewline || getToken(1).kind != WHITESPACE || getToken(2).kind != END) }) afterNewline = Statement(afterNewline) )* + { + block = jjtThis; + block.endsWithNewline = afterNewline; + } + ) + #Block + { + int pos = _else.image.lastIndexOf(hash); + if (pos > 0) + { + previousBlock.setMorePostfix(_else.image.substring(0, pos)); + } + return block; + } +} + +ASTBlock ElseIfStatement(ASTBlock previousBlock, boolean afterNewline) : +{ + Token t = null, u = null, elseif = null; + ASTBlock block = null; +} +{ + [ ( t = <WHITESPACE> ) + { + previousBlock.setPostfix(t.image); + t = null; + } + ] + (elseif = <ELSEIF>) ( <WHITESPACE> | <NEWLINE> )* + <LPAREN> Expression() <RPAREN> + ( + [ + LOOKAHEAD(2) ( [ ( t = <WHITESPACE> ) ] ( u = <NEWLINE> ) ) + { + jjtThis.setPrefix(t == null ? u.image : t.image + u.image); + t = u = null; + afterNewline = true; + } + ] + ( LOOKAHEAD( { (getToken(1).kind != ELSEIF && getToken(1).kind != ELSE && getToken(1).kind != END) && (!afterNewline || getToken(1).kind != WHITESPACE || (getToken(2).kind != ELSEIF && getToken(2).kind != ELSE && getToken(2).kind != END)) }) afterNewline = Statement(afterNewline) )* + { + block = jjtThis; + block.endsWithNewline = afterNewline; + } + ) + #Block + { + int pos = elseif.image.lastIndexOf(hash); + if (pos > 0) + { + previousBlock.setMorePostfix(elseif.image.substring(0, pos)); + } + return block; + } +} + +/** + * Currently support both types of set : + * #set( expr ) + * #set expr + */ +boolean SetDirective(boolean afterNewline) : +{ + Token t = null, u = null; + boolean endsWithNewline = false; +} +{ + [ ( t = <WHITESPACE> ) + { + // only possible after new line + jjtThis.setPrefix(t.image); + t = null; + } + ] + <SET_DIRECTIVE>(( <WHITESPACE> | <NEWLINE> )* Reference() ( <WHITESPACE> | <NEWLINE> )* <EQUALS> Expression() <RPAREN> + { + /* + * ensure that inSet is false. Leads to some amusing bugs... + */ + + token_source.setInSet(false); + } + [ + LOOKAHEAD(2, { afterNewline || rsvc.getSpaceGobbling() == SpaceGobbling.BC } ) + ( [ ( t = <WHITESPACE> ) ] ( u = <NEWLINE> ) ) + { + jjtThis.setPostfix(t == null ? u.image : t.image + u.image); + endsWithNewline = true; + } + ] ) + { + return endsWithNewline; + } +} + +/* ----------------------------------------------------------------------- + * + * Expression Syntax + * + * ----------------------------------------------------------------------*/ + +void Expression() : {} +{ +// LOOKAHEAD( PrimaryExpression() <EQUALS> ) Assignment() +//| +ConditionalOrExpression() +} + +void Assignment() #Assignment(2) : {} +{ + PrimaryExpression() <EQUALS> Expression() +} + +void ConditionalOrExpression() #void : {} +{ + ConditionalAndExpression() + ( ( <LOGICAL_OR> | <LOGICAL_OR_2> ) ConditionalAndExpression() #OrNode(2) )* +} + + +void ConditionalAndExpression() #void : {} +{ + EqualityExpression() + ( <LOGICAL_AND> EqualityExpression() #AndNode(2) )* +} + +void EqualityExpression() #void : {} +{ + RelationalExpression() + ( + <LOGICAL_EQUALS> RelationalExpression() #EQNode(2) + | <LOGICAL_NOT_EQUALS> RelationalExpression() #NENode(2) + )* +} + +void RelationalExpression() #void : {} +{ + AdditiveExpression() + ( + <LOGICAL_LT> AdditiveExpression() #LTNode(2) + | <LOGICAL_GT> AdditiveExpression() #GTNode(2) + | <LOGICAL_LE> AdditiveExpression() #LENode(2) + | <LOGICAL_GE> AdditiveExpression() #GENode(2) + )* +} + +void AdditiveExpression() #void : {} +{ + MultiplicativeExpression() + ( + <PLUS> MultiplicativeExpression() #AddNode(2) + | <MINUS> MultiplicativeExpression() #SubtractNode(2) + )* +} + +void MultiplicativeExpression() #void : {} +{ + UnaryExpression() + ( + <MULTIPLY> UnaryExpression() #MulNode(2) + | <DIVIDE> UnaryExpression() #DivNode(2) + | <MODULUS> UnaryExpression() #ModNode(2) + )* +} + +void UnaryExpression() #void : {} +{ + ( <WHITESPACE> | <NEWLINE> )* + ( + <LOGICAL_NOT> UnaryExpression() #NotNode(1) + | <MINUS> PrimaryExpression() #NegateNode(1) + | PrimaryExpression() + ) +} + +void PrimaryExpression() #void : {} +{ + ( <WHITESPACE> | <NEWLINE> )* + ( + StringLiteral() + | Reference() + | IntegerLiteral() + | LOOKAHEAD( <LBRACKET> ( <WHITESPACE> | <NEWLINE> )* ( Reference() | IntegerLiteral()) ( <WHITESPACE> | <NEWLINE> )* <DOUBLEDOT> ) IntegerRange() + | FloatingPointLiteral() + | Map() + | ObjectArray() + | True() + | False() + | <LPAREN> Expression() <RPAREN> + ) + ( <WHITESPACE> | <NEWLINE> )* +} + +/* ====================================================================== + + Notes + ----- + + template == the input stream for this parser, contains 'VTL' + mixed in with 'schmoo' + + VTL == Velocity Template Language : the references, directives, etc + + schmoo == the non-VTL component of a template + + reference == VTL entity that represents data within the context. ex. $foo + + directive == VTL entity that denotes 'action' (#set, #foreach, #if ) + + defined directive (DD) == VTL directive entity that is expressed + explicitly w/in this grammar + + pluggable directive (PD) == VTL directive entity that is defined outside of the + grammar. PD's allow VTL to be easily expandable w/o parser modification. + + The problem with parsing VTL is that an input stream consists generally of + little bits of VTL mixed in with 'other stuff, referred to as 'schmoo'. + Unlike other languages, like C or Java, where the parser can punt whenever + it encounters input that doesn't conform to the grammar, the VTL parser can't do + that. It must simply output the schmoo and keep going. + + There are a few things that we do here : + - define a set of parser states (DEFAULT, DIRECTIVE, REFERENCE, etc) + - define for each parser state a set of tokens for each state + - define the VTL grammar, expressed (mostly) in the productions such as Text(), + SetStatement(), etc. + + It is clear that this expression of the VTL grammar (the contents + of this .jjt file) is maturing and evolving as we learn more about + how to parse VTL ( and as I learn about parsing...), so in the event + this documentation is in disagreement w/ the source, the source + takes precedence. :) + + Parser States + ------------- + DEFAULT : This is the base or starting state, and strangely enough, the + default state. + + PRE_DIRECTIVE : State immediately following '#' before we figure out which + defined or pluggable directive (or neither) we are working with. + + DIRECTIVE : This state is triggered by the a match of a DD or a PD. + + PRE_REFERENCE : Triggered by '$'. Analagous to PRE_DIRECTIVE. When '-' is + allowed in identifiers, this state is called PRE_OLD_REFERENCE. + + REFERENCE : Triggered by the <IDENTIFIER> + + REFMODIFIER : Triggered by .<alpha> when in REFERENCE, REFMODIFIER or REFMOD3. When '-' + is allowed in identifiers, this state is called OLD_REFMODIFIER. + + REFMOD2 : Triggered by '(' when in REFMODIFIER + + REFMOD3 : Triggered by the corresponding ')' + + REFINDEX : Array index. Triggered by '[' in REFERENCE, REFMODIFIER, REFMOD3. + + ALT_VAL : Alternate value. Triggered by '|' in REFERENCE, REFMODIFIER, REFMOD3. + + (cont) + + Escape Sequences + ---------------- + The escape processing in VTL is very simple. The '\' character acts + only as an escape when : + + 1) On or more touch a VTL element. + + A VTL element is either : + + 1) It preceeds a reference that is in the context. + + 2) It preceeds a defined directive (#set, #if, #end, etc) or a valid + pluggable directive, such as #foreach + + In all other cases the '\' is just another piece of text. The purpose of this + is to allow the non-VTL parts of a template (the 'schmoo') to not have to be + altered for processing by Velocity. + + So if in the context $foo and $bar were defined and $woogie was not + + \$foo \$bar \$woogie + + would output + + $foo $bar \$woogie + + Further, you can stack them and they affect left to right, just like convention + escape characters in other languages. + + \$foo = $foo + \\$foo = \<foo> + \\\$foo = \$foo + + + What You Expect + --------------- + The recent versions of the parser are trying to support precise output to + support general template use. The directives do not render trailing + whitespace and newlines if followed by a newline. They will render + preceeding whitespace. The only exception is #set, which also eats + preceeding whitespace. + + So, with a template : + + ------ + #set($foo="foo") + #if($foo) + \$foo = $foo + #end + ------ + + it will render precisely : + + ------ + $foo = foo + ------ + +*/ |