aboutsummaryrefslogtreecommitdiff
path: root/syntax/parse.go
diff options
context:
space:
mode:
Diffstat (limited to 'syntax/parse.go')
-rw-r--r--syntax/parse.go1028
1 files changed, 1028 insertions, 0 deletions
diff --git a/syntax/parse.go b/syntax/parse.go
new file mode 100644
index 0000000..f4c8fff
--- /dev/null
+++ b/syntax/parse.go
@@ -0,0 +1,1028 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// This file defines a recursive-descent parser for Starlark.
+// The LL(1) grammar of Starlark and the names of many productions follow Python 2.7.
+//
+// TODO(adonovan): use syntax.Error more systematically throughout the
+// package. Verify that error positions are correct using the
+// chunkedfile mechanism.
+
+import "log"
+
+// Enable this flag to print the token stream and log.Fatal on the first error.
+const debug = false
+
+// A Mode value is a set of flags (or 0) that controls optional parser functionality.
+type Mode uint
+
+const (
+ RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments
+)
+
+// Parse parses the input data and returns the corresponding parse tree.
+//
+// If src != nil, ParseFile parses the source from src and the filename
+// is only used when recording position information.
+// The type of the argument for the src parameter must be string,
+// []byte, io.Reader, or FilePortion.
+// If src == nil, ParseFile parses the file specified by filename.
+func Parse(filename string, src interface{}, mode Mode) (f *File, err error) {
+ in, err := newScanner(filename, src, mode&RetainComments != 0)
+ if err != nil {
+ return nil, err
+ }
+ p := parser{in: in}
+ defer p.in.recover(&err)
+
+ p.nextToken() // read first lookahead token
+ f = p.parseFile()
+ if f != nil {
+ f.Path = filename
+ }
+ p.assignComments(f)
+ return f, nil
+}
+
+// ParseCompoundStmt parses a single compound statement:
+// a blank line, a def, for, while, or if statement, or a
+// semicolon-separated list of simple statements followed
+// by a newline. These are the units on which the REPL operates.
+// ParseCompoundStmt does not consume any following input.
+// The parser calls the readline function each
+// time it needs a new line of input.
+func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) {
+ in, err := newScanner(filename, readline, false)
+ if err != nil {
+ return nil, err
+ }
+
+ p := parser{in: in}
+ defer p.in.recover(&err)
+
+ p.nextToken() // read first lookahead token
+
+ var stmts []Stmt
+ switch p.tok {
+ case DEF, IF, FOR, WHILE:
+ stmts = p.parseStmt(stmts)
+ case NEWLINE:
+ // blank line
+ default:
+ stmts = p.parseSimpleStmt(stmts, false)
+ // Require but don't consume newline, to avoid blocking again.
+ if p.tok != NEWLINE {
+ p.in.errorf(p.in.pos, "invalid syntax")
+ }
+ }
+
+ return &File{Path: filename, Stmts: stmts}, nil
+}
+
+// ParseExpr parses a Starlark expression.
+// A comma-separated list of expressions is parsed as a tuple.
+// See Parse for explanation of parameters.
+func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) {
+ in, err := newScanner(filename, src, mode&RetainComments != 0)
+ if err != nil {
+ return nil, err
+ }
+ p := parser{in: in}
+ defer p.in.recover(&err)
+
+ p.nextToken() // read first lookahead token
+
+ // Use parseExpr, not parseTest, to permit an unparenthesized tuple.
+ expr = p.parseExpr(false)
+
+ // A following newline (e.g. "f()\n") appears outside any brackets,
+ // on a non-blank line, and thus results in a NEWLINE token.
+ if p.tok == NEWLINE {
+ p.nextToken()
+ }
+
+ if p.tok != EOF {
+ p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok)
+ }
+ p.assignComments(expr)
+ return expr, nil
+}
+
+type parser struct {
+ in *scanner
+ tok Token
+ tokval tokenValue
+}
+
+// nextToken advances the scanner and returns the position of the
+// previous token.
+func (p *parser) nextToken() Position {
+ oldpos := p.tokval.pos
+ p.tok = p.in.nextToken(&p.tokval)
+ // enable to see the token stream
+ if debug {
+ log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos)
+ }
+ return oldpos
+}
+
+// file_input = (NEWLINE | stmt)* EOF
+func (p *parser) parseFile() *File {
+ var stmts []Stmt
+ for p.tok != EOF {
+ if p.tok == NEWLINE {
+ p.nextToken()
+ continue
+ }
+ stmts = p.parseStmt(stmts)
+ }
+ return &File{Stmts: stmts}
+}
+
+func (p *parser) parseStmt(stmts []Stmt) []Stmt {
+ if p.tok == DEF {
+ return append(stmts, p.parseDefStmt())
+ } else if p.tok == IF {
+ return append(stmts, p.parseIfStmt())
+ } else if p.tok == FOR {
+ return append(stmts, p.parseForStmt())
+ } else if p.tok == WHILE {
+ return append(stmts, p.parseWhileStmt())
+ }
+ return p.parseSimpleStmt(stmts, true)
+}
+
+func (p *parser) parseDefStmt() Stmt {
+ defpos := p.nextToken() // consume DEF
+ id := p.parseIdent()
+ p.consume(LPAREN)
+ params := p.parseParams()
+ p.consume(RPAREN)
+ p.consume(COLON)
+ body := p.parseSuite()
+ return &DefStmt{
+ Def: defpos,
+ Name: id,
+ Params: params,
+ Body: body,
+ }
+}
+
+func (p *parser) parseIfStmt() Stmt {
+ ifpos := p.nextToken() // consume IF
+ cond := p.parseTest()
+ p.consume(COLON)
+ body := p.parseSuite()
+ ifStmt := &IfStmt{
+ If: ifpos,
+ Cond: cond,
+ True: body,
+ }
+ tail := ifStmt
+ for p.tok == ELIF {
+ elifpos := p.nextToken() // consume ELIF
+ cond := p.parseTest()
+ p.consume(COLON)
+ body := p.parseSuite()
+ elif := &IfStmt{
+ If: elifpos,
+ Cond: cond,
+ True: body,
+ }
+ tail.ElsePos = elifpos
+ tail.False = []Stmt{elif}
+ tail = elif
+ }
+ if p.tok == ELSE {
+ tail.ElsePos = p.nextToken() // consume ELSE
+ p.consume(COLON)
+ tail.False = p.parseSuite()
+ }
+ return ifStmt
+}
+
+func (p *parser) parseForStmt() Stmt {
+ forpos := p.nextToken() // consume FOR
+ vars := p.parseForLoopVariables()
+ p.consume(IN)
+ x := p.parseExpr(false)
+ p.consume(COLON)
+ body := p.parseSuite()
+ return &ForStmt{
+ For: forpos,
+ Vars: vars,
+ X: x,
+ Body: body,
+ }
+}
+
+func (p *parser) parseWhileStmt() Stmt {
+ whilepos := p.nextToken() // consume WHILE
+ cond := p.parseTest()
+ p.consume(COLON)
+ body := p.parseSuite()
+ return &WhileStmt{
+ While: whilepos,
+ Cond: cond,
+ Body: body,
+ }
+}
+
+// Equivalent to 'exprlist' production in Python grammar.
+//
+// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA?
+func (p *parser) parseForLoopVariables() Expr {
+ // Avoid parseExpr because it would consume the IN token
+ // following x in "for x in y: ...".
+ v := p.parsePrimaryWithSuffix()
+ if p.tok != COMMA {
+ return v
+ }
+
+ list := []Expr{v}
+ for p.tok == COMMA {
+ p.nextToken()
+ if terminatesExprList(p.tok) {
+ break
+ }
+ list = append(list, p.parsePrimaryWithSuffix())
+ }
+ return &TupleExpr{List: list}
+}
+
+// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE
+// In REPL mode, it does not consume the NEWLINE.
+func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt {
+ for {
+ stmts = append(stmts, p.parseSmallStmt())
+ if p.tok != SEMI {
+ break
+ }
+ p.nextToken() // consume SEMI
+ if p.tok == NEWLINE || p.tok == EOF {
+ break
+ }
+ }
+ // EOF without NEWLINE occurs in `if x: pass`, for example.
+ if p.tok != EOF && consumeNL {
+ p.consume(NEWLINE)
+ }
+
+ return stmts
+}
+
+// small_stmt = RETURN expr?
+// | PASS | BREAK | CONTINUE
+// | LOAD ...
+// | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign
+// | expr
+func (p *parser) parseSmallStmt() Stmt {
+ switch p.tok {
+ case RETURN:
+ pos := p.nextToken() // consume RETURN
+ var result Expr
+ if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI {
+ result = p.parseExpr(false)
+ }
+ return &ReturnStmt{Return: pos, Result: result}
+
+ case BREAK, CONTINUE, PASS:
+ tok := p.tok
+ pos := p.nextToken() // consume it
+ return &BranchStmt{Token: tok, TokenPos: pos}
+
+ case LOAD:
+ return p.parseLoadStmt()
+ }
+
+ // Assignment
+ x := p.parseExpr(false)
+ switch p.tok {
+ case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ:
+ op := p.tok
+ pos := p.nextToken() // consume op
+ rhs := p.parseExpr(false)
+ return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs}
+ }
+
+ // Expression statement (e.g. function call, doc string).
+ return &ExprStmt{X: x}
+}
+
+// stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')'
+func (p *parser) parseLoadStmt() *LoadStmt {
+ loadPos := p.nextToken() // consume LOAD
+ lparen := p.consume(LPAREN)
+
+ if p.tok != STRING {
+ p.in.errorf(p.in.pos, "first operand of load statement must be a string literal")
+ }
+ module := p.parsePrimary().(*Literal)
+
+ var from, to []*Ident
+ for p.tok != RPAREN && p.tok != EOF {
+ p.consume(COMMA)
+ if p.tok == RPAREN {
+ break // allow trailing comma
+ }
+ switch p.tok {
+ case STRING:
+ // load("module", "id")
+ // To name is same as original.
+ lit := p.parsePrimary().(*Literal)
+ id := &Ident{
+ NamePos: lit.TokenPos.add(`"`),
+ Name: lit.Value.(string),
+ }
+ to = append(to, id)
+ from = append(from, id)
+
+ case IDENT:
+ // load("module", to="from")
+ id := p.parseIdent()
+ to = append(to, id)
+ if p.tok != EQ {
+ p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name)
+ }
+ p.consume(EQ)
+ if p.tok != STRING {
+ p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name)
+ }
+ lit := p.parsePrimary().(*Literal)
+ from = append(from, &Ident{
+ NamePos: lit.TokenPos.add(`"`),
+ Name: lit.Value.(string),
+ })
+
+ case RPAREN:
+ p.in.errorf(p.in.pos, "trailing comma in load statement")
+
+ default:
+ p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok)
+ }
+ }
+ rparen := p.consume(RPAREN)
+
+ if len(to) == 0 {
+ p.in.errorf(lparen, "load statement must import at least 1 symbol")
+ }
+ return &LoadStmt{
+ Load: loadPos,
+ Module: module,
+ To: to,
+ From: from,
+ Rparen: rparen,
+ }
+}
+
+// suite is typically what follows a COLON (e.g. after DEF or FOR).
+// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT
+func (p *parser) parseSuite() []Stmt {
+ if p.tok == NEWLINE {
+ p.nextToken() // consume NEWLINE
+ p.consume(INDENT)
+ var stmts []Stmt
+ for p.tok != OUTDENT && p.tok != EOF {
+ stmts = p.parseStmt(stmts)
+ }
+ p.consume(OUTDENT)
+ return stmts
+ }
+
+ return p.parseSimpleStmt(nil, true)
+}
+
+func (p *parser) parseIdent() *Ident {
+ if p.tok != IDENT {
+ p.in.error(p.in.pos, "not an identifier")
+ }
+ id := &Ident{
+ NamePos: p.tokval.pos,
+ Name: p.tokval.raw,
+ }
+ p.nextToken()
+ return id
+}
+
+func (p *parser) consume(t Token) Position {
+ if p.tok != t {
+ p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t)
+ }
+ return p.nextToken()
+}
+
+// params = (param COMMA)* param COMMA?
+// |
+//
+// param = IDENT
+// | IDENT EQ test
+// | STAR
+// | STAR IDENT
+// | STARSTAR IDENT
+//
+// parseParams parses a parameter list. The resulting expressions are of the form:
+//
+// *Ident x
+// *Binary{Op: EQ, X: *Ident, Y: Expr} x=y
+// *Unary{Op: STAR} *
+// *Unary{Op: STAR, X: *Ident} *args
+// *Unary{Op: STARSTAR, X: *Ident} **kwargs
+func (p *parser) parseParams() []Expr {
+ var params []Expr
+ for p.tok != RPAREN && p.tok != COLON && p.tok != EOF {
+ if len(params) > 0 {
+ p.consume(COMMA)
+ }
+ if p.tok == RPAREN {
+ break
+ }
+
+ // * or *args or **kwargs
+ if p.tok == STAR || p.tok == STARSTAR {
+ op := p.tok
+ pos := p.nextToken()
+ var x Expr
+ if op == STARSTAR || p.tok == IDENT {
+ x = p.parseIdent()
+ }
+ params = append(params, &UnaryExpr{
+ OpPos: pos,
+ Op: op,
+ X: x,
+ })
+ continue
+ }
+
+ // IDENT
+ // IDENT = test
+ id := p.parseIdent()
+ if p.tok == EQ { // default value
+ eq := p.nextToken()
+ dflt := p.parseTest()
+ params = append(params, &BinaryExpr{
+ X: id,
+ OpPos: eq,
+ Op: EQ,
+ Y: dflt,
+ })
+ continue
+ }
+
+ params = append(params, id)
+ }
+ return params
+}
+
+// parseExpr parses an expression, possible consisting of a
+// comma-separated list of 'test' expressions.
+//
+// In many cases we must use parseTest to avoid ambiguity such as
+// f(x, y) vs. f((x, y)).
+func (p *parser) parseExpr(inParens bool) Expr {
+ x := p.parseTest()
+ if p.tok != COMMA {
+ return x
+ }
+
+ // tuple
+ exprs := p.parseExprs([]Expr{x}, inParens)
+ return &TupleExpr{List: exprs}
+}
+
+// parseExprs parses a comma-separated list of expressions, starting with the comma.
+// It is used to parse tuples and list elements.
+// expr_list = (',' expr)* ','?
+func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr {
+ for p.tok == COMMA {
+ pos := p.nextToken()
+ if terminatesExprList(p.tok) {
+ if !allowTrailingComma {
+ p.in.error(pos, "unparenthesized tuple with trailing comma")
+ }
+ break
+ }
+ exprs = append(exprs, p.parseTest())
+ }
+ return exprs
+}
+
+// parseTest parses a 'test', a single-component expression.
+func (p *parser) parseTest() Expr {
+ if p.tok == LAMBDA {
+ return p.parseLambda(true)
+ }
+
+ x := p.parseTestPrec(0)
+
+ // conditional expression (t IF cond ELSE f)
+ if p.tok == IF {
+ ifpos := p.nextToken()
+ cond := p.parseTestPrec(0)
+ if p.tok != ELSE {
+ p.in.error(ifpos, "conditional expression without else clause")
+ }
+ elsepos := p.nextToken()
+ else_ := p.parseTest()
+ return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_}
+ }
+
+ return x
+}
+
+// parseTestNoCond parses a a single-component expression without
+// consuming a trailing 'if expr else expr'.
+func (p *parser) parseTestNoCond() Expr {
+ if p.tok == LAMBDA {
+ return p.parseLambda(false)
+ }
+ return p.parseTestPrec(0)
+}
+
+// parseLambda parses a lambda expression.
+// The allowCond flag allows the body to be an 'a if b else c' conditional.
+func (p *parser) parseLambda(allowCond bool) Expr {
+ lambda := p.nextToken()
+ var params []Expr
+ if p.tok != COLON {
+ params = p.parseParams()
+ }
+ p.consume(COLON)
+
+ var body Expr
+ if allowCond {
+ body = p.parseTest()
+ } else {
+ body = p.parseTestNoCond()
+ }
+
+ return &LambdaExpr{
+ Lambda: lambda,
+ Params: params,
+ Body: body,
+ }
+}
+
+func (p *parser) parseTestPrec(prec int) Expr {
+ if prec >= len(preclevels) {
+ return p.parsePrimaryWithSuffix()
+ }
+
+ // expr = NOT expr
+ if p.tok == NOT && prec == int(precedence[NOT]) {
+ pos := p.nextToken()
+ x := p.parseTestPrec(prec)
+ return &UnaryExpr{
+ OpPos: pos,
+ Op: NOT,
+ X: x,
+ }
+ }
+
+ return p.parseBinopExpr(prec)
+}
+
+// expr = test (OP test)*
+// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing.
+func (p *parser) parseBinopExpr(prec int) Expr {
+ x := p.parseTestPrec(prec + 1)
+ for first := true; ; first = false {
+ if p.tok == NOT {
+ p.nextToken() // consume NOT
+ // In this context, NOT must be followed by IN.
+ // Replace NOT IN by a single NOT_IN token.
+ if p.tok != IN {
+ p.in.errorf(p.in.pos, "got %#v, want in", p.tok)
+ }
+ p.tok = NOT_IN
+ }
+
+ // Binary operator of specified precedence?
+ opprec := int(precedence[p.tok])
+ if opprec < prec {
+ return x
+ }
+
+ // Comparisons are non-associative.
+ if !first && opprec == int(precedence[EQL]) {
+ p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)",
+ x.(*BinaryExpr).Op, p.tok)
+ }
+
+ op := p.tok
+ pos := p.nextToken()
+ y := p.parseTestPrec(opprec + 1)
+ x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
+ }
+}
+
+// precedence maps each operator to its precedence (0-7), or -1 for other tokens.
+var precedence [maxToken]int8
+
+// preclevels groups operators of equal precedence.
+// Comparisons are nonassociative; other binary operators associate to the left.
+// Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary.
+// See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators
+var preclevels = [...][]Token{
+ {OR}, // or
+ {AND}, // and
+ {NOT}, // not (unary)
+ {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in
+ {PIPE}, // |
+ {CIRCUMFLEX}, // ^
+ {AMP}, // &
+ {LTLT, GTGT}, // << >>
+ {MINUS, PLUS}, // -
+ {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / //
+}
+
+func init() {
+ // populate precedence table
+ for i := range precedence {
+ precedence[i] = -1
+ }
+ for level, tokens := range preclevels {
+ for _, tok := range tokens {
+ precedence[tok] = int8(level)
+ }
+ }
+}
+
+// primary_with_suffix = primary
+// | primary '.' IDENT
+// | primary slice_suffix
+// | primary call_suffix
+func (p *parser) parsePrimaryWithSuffix() Expr {
+ x := p.parsePrimary()
+ for {
+ switch p.tok {
+ case DOT:
+ dot := p.nextToken()
+ id := p.parseIdent()
+ x = &DotExpr{Dot: dot, X: x, Name: id}
+ case LBRACK:
+ x = p.parseSliceSuffix(x)
+ case LPAREN:
+ x = p.parseCallSuffix(x)
+ default:
+ return x
+ }
+ }
+}
+
+// slice_suffix = '[' expr? ':' expr? ':' expr? ']'
+func (p *parser) parseSliceSuffix(x Expr) Expr {
+ lbrack := p.nextToken()
+ var lo, hi, step Expr
+ if p.tok != COLON {
+ y := p.parseExpr(false)
+
+ // index x[y]
+ if p.tok == RBRACK {
+ rbrack := p.nextToken()
+ return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack}
+ }
+
+ lo = y
+ }
+
+ // slice or substring x[lo:hi:step]
+ if p.tok == COLON {
+ p.nextToken()
+ if p.tok != COLON && p.tok != RBRACK {
+ hi = p.parseTest()
+ }
+ }
+ if p.tok == COLON {
+ p.nextToken()
+ if p.tok != RBRACK {
+ step = p.parseTest()
+ }
+ }
+ rbrack := p.consume(RBRACK)
+ return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack}
+}
+
+// call_suffix = '(' arg_list? ')'
+func (p *parser) parseCallSuffix(fn Expr) Expr {
+ lparen := p.consume(LPAREN)
+ var rparen Position
+ var args []Expr
+ if p.tok == RPAREN {
+ rparen = p.nextToken()
+ } else {
+ args = p.parseArgs()
+ rparen = p.consume(RPAREN)
+ }
+ return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen}
+}
+
+// parseArgs parses a list of actual parameter values (arguments).
+// It mirrors the structure of parseParams.
+// arg_list = ((arg COMMA)* arg COMMA?)?
+func (p *parser) parseArgs() []Expr {
+ var args []Expr
+ for p.tok != RPAREN && p.tok != EOF {
+ if len(args) > 0 {
+ p.consume(COMMA)
+ }
+ if p.tok == RPAREN {
+ break
+ }
+
+ // *args or **kwargs
+ if p.tok == STAR || p.tok == STARSTAR {
+ op := p.tok
+ pos := p.nextToken()
+ x := p.parseTest()
+ args = append(args, &UnaryExpr{
+ OpPos: pos,
+ Op: op,
+ X: x,
+ })
+ continue
+ }
+
+ // We use a different strategy from Bazel here to stay within LL(1).
+ // Instead of looking ahead two tokens (IDENT, EQ) we parse
+ // 'test = test' then check that the first was an IDENT.
+ x := p.parseTest()
+
+ if p.tok == EQ {
+ // name = value
+ if _, ok := x.(*Ident); !ok {
+ p.in.errorf(p.in.pos, "keyword argument must have form name=expr")
+ }
+ eq := p.nextToken()
+ y := p.parseTest()
+ x = &BinaryExpr{
+ X: x,
+ OpPos: eq,
+ Op: EQ,
+ Y: y,
+ }
+ }
+
+ args = append(args, x)
+ }
+ return args
+}
+
+// primary = IDENT
+// | INT | FLOAT | STRING | BYTES
+// | '[' ... // list literal or comprehension
+// | '{' ... // dict literal or comprehension
+// | '(' ... // tuple or parenthesized expression
+// | ('-'|'+'|'~') primary_with_suffix
+func (p *parser) parsePrimary() Expr {
+ switch p.tok {
+ case IDENT:
+ return p.parseIdent()
+
+ case INT, FLOAT, STRING, BYTES:
+ var val interface{}
+ tok := p.tok
+ switch tok {
+ case INT:
+ if p.tokval.bigInt != nil {
+ val = p.tokval.bigInt
+ } else {
+ val = p.tokval.int
+ }
+ case FLOAT:
+ val = p.tokval.float
+ case STRING, BYTES:
+ val = p.tokval.string
+ }
+ raw := p.tokval.raw
+ pos := p.nextToken()
+ return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val}
+
+ case LBRACK:
+ return p.parseList()
+
+ case LBRACE:
+ return p.parseDict()
+
+ case LPAREN:
+ lparen := p.nextToken()
+ if p.tok == RPAREN {
+ // empty tuple
+ rparen := p.nextToken()
+ return &TupleExpr{Lparen: lparen, Rparen: rparen}
+ }
+ e := p.parseExpr(true) // allow trailing comma
+ rparen := p.consume(RPAREN)
+ return &ParenExpr{
+ Lparen: lparen,
+ X: e,
+ Rparen: rparen,
+ }
+
+ case MINUS, PLUS, TILDE: // unary
+ tok := p.tok
+ pos := p.nextToken()
+ x := p.parsePrimaryWithSuffix()
+ return &UnaryExpr{
+ OpPos: pos,
+ Op: tok,
+ X: x,
+ }
+ }
+ p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok)
+ panic("unreachable")
+}
+
+// list = '[' ']'
+// | '[' expr ']'
+// | '[' expr expr_list ']'
+// | '[' expr (FOR loop_variables IN expr)+ ']'
+func (p *parser) parseList() Expr {
+ lbrack := p.nextToken()
+ if p.tok == RBRACK {
+ // empty List
+ rbrack := p.nextToken()
+ return &ListExpr{Lbrack: lbrack, Rbrack: rbrack}
+ }
+
+ x := p.parseTest()
+
+ if p.tok == FOR {
+ // list comprehension
+ return p.parseComprehensionSuffix(lbrack, x, RBRACK)
+ }
+
+ exprs := []Expr{x}
+ if p.tok == COMMA {
+ // multi-item list literal
+ exprs = p.parseExprs(exprs, true) // allow trailing comma
+ }
+
+ rbrack := p.consume(RBRACK)
+ return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack}
+}
+
+// dict = '{' '}'
+// | '{' dict_entry_list '}'
+// | '{' dict_entry FOR loop_variables IN expr '}'
+func (p *parser) parseDict() Expr {
+ lbrace := p.nextToken()
+ if p.tok == RBRACE {
+ // empty dict
+ rbrace := p.nextToken()
+ return &DictExpr{Lbrace: lbrace, Rbrace: rbrace}
+ }
+
+ x := p.parseDictEntry()
+
+ if p.tok == FOR {
+ // dict comprehension
+ return p.parseComprehensionSuffix(lbrace, x, RBRACE)
+ }
+
+ entries := []Expr{x}
+ for p.tok == COMMA {
+ p.nextToken()
+ if p.tok == RBRACE {
+ break
+ }
+ entries = append(entries, p.parseDictEntry())
+ }
+
+ rbrace := p.consume(RBRACE)
+ return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace}
+}
+
+// dict_entry = test ':' test
+func (p *parser) parseDictEntry() *DictEntry {
+ k := p.parseTest()
+ colon := p.consume(COLON)
+ v := p.parseTest()
+ return &DictEntry{Key: k, Colon: colon, Value: v}
+}
+
+// comp_suffix = FOR loopvars IN expr comp_suffix
+// | IF expr comp_suffix
+// | ']' or ')' (end)
+//
+// There can be multiple FOR/IF clauses; the first is always a FOR.
+func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr {
+ var clauses []Node
+ for p.tok != endBrace {
+ if p.tok == FOR {
+ pos := p.nextToken()
+ vars := p.parseForLoopVariables()
+ in := p.consume(IN)
+ // Following Python 3, the operand of IN cannot be:
+ // - a conditional expression ('x if y else z'),
+ // due to conflicts in Python grammar
+ // ('if' is used by the comprehension);
+ // - a lambda expression
+ // - an unparenthesized tuple.
+ x := p.parseTestPrec(0)
+ clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x})
+ } else if p.tok == IF {
+ pos := p.nextToken()
+ cond := p.parseTestNoCond()
+ clauses = append(clauses, &IfClause{If: pos, Cond: cond})
+ } else {
+ p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace)
+ }
+ }
+ rbrace := p.nextToken()
+
+ return &Comprehension{
+ Curly: endBrace == RBRACE,
+ Lbrack: lbrace,
+ Body: body,
+ Clauses: clauses,
+ Rbrack: rbrace,
+ }
+}
+
+func terminatesExprList(tok Token) bool {
+ switch tok {
+ case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI:
+ return true
+ }
+ return false
+}
+
+// Comment assignment.
+// We build two lists of all subnodes, preorder and postorder.
+// The preorder list is ordered by start location, with outer nodes first.
+// The postorder list is ordered by end location, with outer nodes last.
+// We use the preorder list to assign each whole-line comment to the syntax
+// immediately following it, and we use the postorder list to assign each
+// end-of-line comment to the syntax immediately preceding it.
+
+// flattenAST returns the list of AST nodes, both in prefix order and in postfix
+// order.
+func flattenAST(root Node) (pre, post []Node) {
+ stack := []Node{}
+ Walk(root, func(n Node) bool {
+ if n != nil {
+ pre = append(pre, n)
+ stack = append(stack, n)
+ } else {
+ post = append(post, stack[len(stack)-1])
+ stack = stack[:len(stack)-1]
+ }
+ return true
+ })
+ return pre, post
+}
+
+// assignComments attaches comments to nearby syntax.
+func (p *parser) assignComments(n Node) {
+ // Leave early if there are no comments
+ if len(p.in.lineComments)+len(p.in.suffixComments) == 0 {
+ return
+ }
+
+ pre, post := flattenAST(n)
+
+ // Assign line comments to syntax immediately following.
+ line := p.in.lineComments
+ for _, x := range pre {
+ start, _ := x.Span()
+
+ switch x.(type) {
+ case *File:
+ continue
+ }
+
+ for len(line) > 0 && !start.isBefore(line[0].Start) {
+ x.AllocComments()
+ x.Comments().Before = append(x.Comments().Before, line[0])
+ line = line[1:]
+ }
+ }
+
+ // Remaining line comments go at end of file.
+ if len(line) > 0 {
+ n.AllocComments()
+ n.Comments().After = append(n.Comments().After, line...)
+ }
+
+ // Assign suffix comments to syntax immediately before.
+ suffix := p.in.suffixComments
+ for i := len(post) - 1; i >= 0; i-- {
+ x := post[i]
+
+ // Do not assign suffix comments to file
+ switch x.(type) {
+ case *File:
+ continue
+ }
+
+ _, end := x.Span()
+ if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) {
+ x.AllocComments()
+ x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1])
+ suffix = suffix[:len(suffix)-1]
+ }
+ }
+}