aboutsummaryrefslogtreecommitdiff
path: root/syntax/grammar.txt
diff options
context:
space:
mode:
Diffstat (limited to 'syntax/grammar.txt')
-rw-r--r--syntax/grammar.txt129
1 files changed, 129 insertions, 0 deletions
diff --git a/syntax/grammar.txt b/syntax/grammar.txt
new file mode 100644
index 0000000..7f5dfc8
--- /dev/null
+++ b/syntax/grammar.txt
@@ -0,0 +1,129 @@
+
+Grammar of Starlark
+==================
+
+File = {Statement | newline} eof .
+
+Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt .
+
+DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite .
+
+Parameters = Parameter {',' Parameter}.
+
+Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier .
+
+IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] .
+
+ForStmt = 'for' LoopVariables 'in' Expression ':' Suite .
+
+WhileStmt = 'while' Test ':' Suite .
+
+Suite = [newline indent {Statement} outdent] | SimpleStmt .
+
+SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' .
+# NOTE: '\n' optional at EOF
+
+SmallStmt = ReturnStmt
+ | BreakStmt | ContinueStmt | PassStmt
+ | AssignStmt
+ | ExprStmt
+ | LoadStmt
+ .
+
+ReturnStmt = 'return' [Expression] .
+BreakStmt = 'break' .
+ContinueStmt = 'continue' .
+PassStmt = 'pass' .
+AssignStmt = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression .
+ExprStmt = Expression .
+
+LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' .
+
+Test = LambdaExpr
+ | IfExpr
+ | PrimaryExpr
+ | UnaryExpr
+ | BinaryExpr
+ .
+
+LambdaExpr = 'lambda' [Parameters] ':' Test .
+
+IfExpr = Test 'if' Test 'else' Test .
+
+PrimaryExpr = Operand
+ | PrimaryExpr DotSuffix
+ | PrimaryExpr CallSuffix
+ | PrimaryExpr SliceSuffix
+ .
+
+Operand = identifier
+ | int | float | string
+ | ListExpr | ListComp
+ | DictExpr | DictComp
+ | '(' [Expression [',']] ')'
+ | ('-' | '+') PrimaryExpr
+ .
+
+DotSuffix = '.' identifier .
+CallSuffix = '(' [Arguments [',']] ')' .
+SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .
+
+Arguments = Argument {',' Argument} .
+Argument = Test | identifier '=' Test | '*' Test | '**' Test .
+
+ListExpr = '[' [Expression [',']] ']' .
+ListComp = '[' Test {CompClause} ']'.
+
+DictExpr = '{' [Entries [',']] '}' .
+DictComp = '{' Entry {CompClause} '}' .
+Entries = Entry {',' Entry} .
+Entry = Test ':' Test .
+
+CompClause = 'for' LoopVariables 'in' Test | 'if' Test .
+
+UnaryExpr = 'not' Test .
+
+BinaryExpr = Test {Binop Test} .
+
+Binop = 'or'
+ | 'and'
+ | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in'
+ | '|'
+ | '^'
+ | '&'
+ | '-' | '+'
+ | '*' | '%' | '/' | '//'
+ .
+
+Expression = Test {',' Test} .
+# NOTE: trailing comma permitted only when within [...] or (...).
+
+LoopVariables = PrimaryExpr {',' PrimaryExpr} .
+
+
+# Notation (similar to Go spec):
+- lowercase and 'quoted' items are lexical tokens.
+- Capitalized names denote grammar productions.
+- (...) implies grouping
+- x | y means either x or y.
+- [x] means x is optional
+- {x} means x is repeated zero or more times
+- The end of each declaration is marked with a period.
+
+# Tokens
+- spaces: newline, eof, indent, outdent.
+- identifier.
+- literals: string, int, float.
+- plus all quoted tokens such as '+=', 'return'.
+
+# Notes:
+- Ambiguity is resolved using operator precedence.
+- The grammar does not enforce the legal order of params and args,
+ nor that the first compclause must be a 'for'.
+
+TODO:
+- explain how the lexer generates indent, outdent, and newline tokens.
+- why is unary NOT separated from unary - and +?
+- the grammar is (mostly) in LL(1) style so, for example,
+ dot expressions are formed suffixes, not complete expressions,
+ which makes the spec harder to read. Reorganize into non-LL(1) form?