aboutsummaryrefslogtreecommitdiff
path: root/syntax/grammar.txt
blob: 7f5dfc811b028e38170ec3a79941007e489573e5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

Grammar of Starlark
==================

File = {Statement | newline} eof .

Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt .

DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite .

Parameters = Parameter {',' Parameter}.

Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier .

IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] .

ForStmt = 'for' LoopVariables 'in' Expression ':' Suite .

WhileStmt = 'while' Test ':' Suite .

Suite = [newline indent {Statement} outdent] | SimpleStmt .

SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' .
# NOTE: '\n' optional at EOF

SmallStmt = ReturnStmt
          | BreakStmt | ContinueStmt | PassStmt
          | AssignStmt
          | ExprStmt
          | LoadStmt
          .

ReturnStmt   = 'return' [Expression] .
BreakStmt    = 'break' .
ContinueStmt = 'continue' .
PassStmt     = 'pass' .
AssignStmt   = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression .
ExprStmt     = Expression .

LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' .

Test = LambdaExpr
     | IfExpr
     | PrimaryExpr
     | UnaryExpr
     | BinaryExpr
     .

LambdaExpr = 'lambda' [Parameters] ':' Test .

IfExpr = Test 'if' Test 'else' Test .

PrimaryExpr = Operand
            | PrimaryExpr DotSuffix
            | PrimaryExpr CallSuffix
            | PrimaryExpr SliceSuffix
            .

Operand = identifier
        | int | float | string
        | ListExpr | ListComp
        | DictExpr | DictComp
        | '(' [Expression [',']] ')'
        | ('-' | '+') PrimaryExpr
        .

DotSuffix   = '.' identifier .
CallSuffix  = '(' [Arguments [',']] ')' .
SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .

Arguments = Argument {',' Argument} .
Argument  = Test | identifier '=' Test | '*' Test | '**' Test .

ListExpr = '[' [Expression [',']] ']' .
ListComp = '[' Test {CompClause} ']'.

DictExpr = '{' [Entries [',']] '}' .
DictComp = '{' Entry {CompClause} '}' .
Entries  = Entry {',' Entry} .
Entry    = Test ':' Test .

CompClause = 'for' LoopVariables 'in' Test | 'if' Test .

UnaryExpr = 'not' Test .

BinaryExpr = Test {Binop Test} .

Binop = 'or'
      | 'and'
      | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in'
      | '|'
      | '^'
      | '&'
      | '-' | '+'
      | '*' | '%' | '/' | '//'
      .

Expression = Test {',' Test} .
# NOTE: trailing comma permitted only when within [...] or (...).

LoopVariables = PrimaryExpr {',' PrimaryExpr} .


# Notation (similar to Go spec):
- lowercase and 'quoted' items are lexical tokens.
- Capitalized names denote grammar productions.
- (...) implies grouping
- x | y means either x or y.
- [x] means x is optional
- {x} means x is repeated zero or more times
- The end of each declaration is marked with a period.

# Tokens
- spaces: newline, eof, indent, outdent.
- identifier.
- literals: string, int, float.
- plus all quoted tokens such as '+=', 'return'.

# Notes:
- Ambiguity is resolved using operator precedence.
- The grammar does not enforce the legal order of params and args,
  nor that the first compclause must be a 'for'.

TODO:
- explain how the lexer generates indent, outdent, and newline tokens.
- why is unary NOT separated from unary - and +?
- the grammar is (mostly) in LL(1) style so, for example,
  dot expressions are formed suffixes, not complete expressions,
  which makes the spec harder to read.  Reorganize into non-LL(1) form?