diff options
Diffstat (limited to 'syntax/grammar.txt')
-rw-r--r-- | syntax/grammar.txt | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/syntax/grammar.txt b/syntax/grammar.txt new file mode 100644 index 0000000..7f5dfc8 --- /dev/null +++ b/syntax/grammar.txt @@ -0,0 +1,129 @@ + +Grammar of Starlark +================== + +File = {Statement | newline} eof . + +Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt . + +DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite . + +Parameters = Parameter {',' Parameter}. + +Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier . + +IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] . + +ForStmt = 'for' LoopVariables 'in' Expression ':' Suite . + +WhileStmt = 'while' Test ':' Suite . + +Suite = [newline indent {Statement} outdent] | SimpleStmt . + +SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' . +# NOTE: '\n' optional at EOF + +SmallStmt = ReturnStmt + | BreakStmt | ContinueStmt | PassStmt + | AssignStmt + | ExprStmt + | LoadStmt + . + +ReturnStmt = 'return' [Expression] . +BreakStmt = 'break' . +ContinueStmt = 'continue' . +PassStmt = 'pass' . +AssignStmt = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression . +ExprStmt = Expression . + +LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' . + +Test = LambdaExpr + | IfExpr + | PrimaryExpr + | UnaryExpr + | BinaryExpr + . + +LambdaExpr = 'lambda' [Parameters] ':' Test . + +IfExpr = Test 'if' Test 'else' Test . + +PrimaryExpr = Operand + | PrimaryExpr DotSuffix + | PrimaryExpr CallSuffix + | PrimaryExpr SliceSuffix + . + +Operand = identifier + | int | float | string + | ListExpr | ListComp + | DictExpr | DictComp + | '(' [Expression [',']] ')' + | ('-' | '+') PrimaryExpr + . + +DotSuffix = '.' identifier . +CallSuffix = '(' [Arguments [',']] ')' . +SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . + +Arguments = Argument {',' Argument} . +Argument = Test | identifier '=' Test | '*' Test | '**' Test . + +ListExpr = '[' [Expression [',']] ']' . +ListComp = '[' Test {CompClause} ']'. + +DictExpr = '{' [Entries [',']] '}' . +DictComp = '{' Entry {CompClause} '}' . +Entries = Entry {',' Entry} . +Entry = Test ':' Test . + +CompClause = 'for' LoopVariables 'in' Test | 'if' Test . + +UnaryExpr = 'not' Test . + +BinaryExpr = Test {Binop Test} . + +Binop = 'or' + | 'and' + | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in' + | '|' + | '^' + | '&' + | '-' | '+' + | '*' | '%' | '/' | '//' + . + +Expression = Test {',' Test} . +# NOTE: trailing comma permitted only when within [...] or (...). + +LoopVariables = PrimaryExpr {',' PrimaryExpr} . + + +# Notation (similar to Go spec): +- lowercase and 'quoted' items are lexical tokens. +- Capitalized names denote grammar productions. +- (...) implies grouping +- x | y means either x or y. +- [x] means x is optional +- {x} means x is repeated zero or more times +- The end of each declaration is marked with a period. + +# Tokens +- spaces: newline, eof, indent, outdent. +- identifier. +- literals: string, int, float. +- plus all quoted tokens such as '+=', 'return'. + +# Notes: +- Ambiguity is resolved using operator precedence. +- The grammar does not enforce the legal order of params and args, + nor that the first compclause must be a 'for'. + +TODO: +- explain how the lexer generates indent, outdent, and newline tokens. +- why is unary NOT separated from unary - and +? +- the grammar is (mostly) in LL(1) style so, for example, + dot expressions are formed suffixes, not complete expressions, + which makes the spec harder to read. Reorganize into non-LL(1) form? |