diff options
Diffstat (limited to 'internal/compile/compile.go')
-rw-r--r-- | internal/compile/compile.go | 1916 |
1 files changed, 1916 insertions, 0 deletions
diff --git a/internal/compile/compile.go b/internal/compile/compile.go new file mode 100644 index 0000000..c314e6e --- /dev/null +++ b/internal/compile/compile.go @@ -0,0 +1,1916 @@ +// Package compile defines the Starlark bytecode compiler. +// It is an internal package of the Starlark interpreter and is not directly accessible to clients. +// +// The compiler generates byte code with optional uint32 operands for a +// virtual machine with the following components: +// - a program counter, which is an index into the byte code array. +// - an operand stack, whose maximum size is computed for each function by the compiler. +// - an stack of active iterators. +// - an array of local variables. +// The number of local variables and their indices are computed by the resolver. +// Locals (possibly including parameters) that are shared with nested functions +// are 'cells': their locals array slot will contain a value of type 'cell', +// an indirect value in a box that is explicitly read/updated by instructions. +// - an array of free variables, for nested functions. +// Free variables are a subset of the ancestors' cell variables. +// As with locals and cells, these are computed by the resolver. +// - an array of global variables, shared among all functions in the same module. +// All elements are initially nil. +// - two maps of predeclared and universal identifiers. +// +// Each function has a line number table that maps each program counter +// offset to a source position, including the column number. +// +// Operands, logically uint32s, are encoded using little-endian 7-bit +// varints, the top bit indicating that more bytes follow. +// +package compile // import "go.starlark.net/internal/compile" + +import ( + "bytes" + "fmt" + "log" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "go.starlark.net/resolve" + "go.starlark.net/syntax" +) + +// Disassemble causes the assembly code for each function +// to be printed to stderr as it is generated. +var Disassemble = false + +const debug = false // make code generation verbose, for debugging the compiler + +// Increment this to force recompilation of saved bytecode files. +const Version = 12 + +type Opcode uint8 + +// "x DUP x x" is a "stack picture" that describes the state of the +// stack before and after execution of the instruction. +// +// OP<index> indicates an immediate operand that is an index into the +// specified table: locals, names, freevars, constants. +const ( + NOP Opcode = iota // - NOP - + + // stack operations + DUP // x DUP x x + DUP2 // x y DUP2 x y x y + POP // x POP - + EXCH // x y EXCH y x + + // binary comparisons + // (order must match Token) + LT + GT + GE + LE + EQL + NEQ + + // binary arithmetic + // (order must match Token) + PLUS + MINUS + STAR + SLASH + SLASHSLASH + PERCENT + AMP + PIPE + CIRCUMFLEX + LTLT + GTGT + + IN + + // unary operators + UPLUS // x UPLUS x + UMINUS // x UMINUS -x + TILDE // x TILDE ~x + + NONE // - NONE None + TRUE // - TRUE True + FALSE // - FALSE False + MANDATORY // - MANDATORY Mandatory [sentinel value for required kwonly args] + + ITERPUSH // iterable ITERPUSH - [pushes the iterator stack] + ITERPOP // - ITERPOP - [pops the iterator stack] + NOT // value NOT bool + RETURN // value RETURN - + SETINDEX // a i new SETINDEX - + INDEX // a i INDEX elem + SETDICT // dict key value SETDICT - + SETDICTUNIQ // dict key value SETDICTUNIQ - + APPEND // list elem APPEND - + SLICE // x lo hi step SLICE slice + INPLACE_ADD // x y INPLACE_ADD z where z is x+y or x.extend(y) + MAKEDICT // - MAKEDICT dict + + // --- opcodes with an argument must go below this line --- + + // control flow + JMP // - JMP<addr> - + CJMP // cond CJMP<addr> - + ITERJMP // - ITERJMP<addr> elem (and fall through) [acts on topmost iterator] + // or: - ITERJMP<addr> - (and jump) + + CONSTANT // - CONSTANT<constant> value + MAKETUPLE // x1 ... xn MAKETUPLE<n> tuple + MAKELIST // x1 ... xn MAKELIST<n> list + MAKEFUNC // defaults+freevars MAKEFUNC<func> fn + LOAD // from1 ... fromN module LOAD<n> v1 ... vN + SETLOCAL // value SETLOCAL<local> - + SETGLOBAL // value SETGLOBAL<global> - + LOCAL // - LOCAL<local> value + FREE // - FREE<freevar> cell + FREECELL // - FREECELL<freevar> value (content of FREE cell) + LOCALCELL // - LOCALCELL<local> value (content of LOCAL cell) + SETLOCALCELL // value SETLOCALCELL<local> - (set content of LOCAL cell) + GLOBAL // - GLOBAL<global> value + PREDECLARED // - PREDECLARED<name> value + UNIVERSAL // - UNIVERSAL<name> value + ATTR // x ATTR<name> y y = x.name + SETFIELD // x y SETFIELD<name> - x.name = y + UNPACK // iterable UNPACK<n> vn ... v1 + + // n>>8 is #positional args and n&0xff is #named args (pairs). + CALL // fn positional named CALL<n> result + CALL_VAR // fn positional named *args CALL_VAR<n> result + CALL_KW // fn positional named **kwargs CALL_KW<n> result + CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result + + OpcodeArgMin = JMP + OpcodeMax = CALL_VAR_KW +) + +// TODO(adonovan): add dynamic checks for missing opcodes in the tables below. + +var opcodeNames = [...]string{ + AMP: "amp", + APPEND: "append", + ATTR: "attr", + CALL: "call", + CALL_KW: "call_kw ", + CALL_VAR: "call_var", + CALL_VAR_KW: "call_var_kw", + CIRCUMFLEX: "circumflex", + CJMP: "cjmp", + CONSTANT: "constant", + DUP2: "dup2", + DUP: "dup", + EQL: "eql", + EXCH: "exch", + FALSE: "false", + FREE: "free", + FREECELL: "freecell", + GE: "ge", + GLOBAL: "global", + GT: "gt", + GTGT: "gtgt", + IN: "in", + INDEX: "index", + INPLACE_ADD: "inplace_add", + ITERJMP: "iterjmp", + ITERPOP: "iterpop", + ITERPUSH: "iterpush", + JMP: "jmp", + LE: "le", + LOAD: "load", + LOCAL: "local", + LOCALCELL: "localcell", + LT: "lt", + LTLT: "ltlt", + MAKEDICT: "makedict", + MAKEFUNC: "makefunc", + MAKELIST: "makelist", + MAKETUPLE: "maketuple", + MANDATORY: "mandatory", + MINUS: "minus", + NEQ: "neq", + NONE: "none", + NOP: "nop", + NOT: "not", + PERCENT: "percent", + PIPE: "pipe", + PLUS: "plus", + POP: "pop", + PREDECLARED: "predeclared", + RETURN: "return", + SETDICT: "setdict", + SETDICTUNIQ: "setdictuniq", + SETFIELD: "setfield", + SETGLOBAL: "setglobal", + SETINDEX: "setindex", + SETLOCAL: "setlocal", + SETLOCALCELL: "setlocalcell", + SLASH: "slash", + SLASHSLASH: "slashslash", + SLICE: "slice", + STAR: "star", + TILDE: "tilde", + TRUE: "true", + UMINUS: "uminus", + UNIVERSAL: "universal", + UNPACK: "unpack", + UPLUS: "uplus", +} + +const variableStackEffect = 0x7f + +// stackEffect records the effect on the size of the operand stack of +// each kind of instruction. For some instructions this requires computation. +var stackEffect = [...]int8{ + AMP: -1, + APPEND: -2, + ATTR: 0, + CALL: variableStackEffect, + CALL_KW: variableStackEffect, + CALL_VAR: variableStackEffect, + CALL_VAR_KW: variableStackEffect, + CIRCUMFLEX: -1, + CJMP: -1, + CONSTANT: +1, + DUP2: +2, + DUP: +1, + EQL: -1, + FALSE: +1, + FREE: +1, + FREECELL: +1, + GE: -1, + GLOBAL: +1, + GT: -1, + GTGT: -1, + IN: -1, + INDEX: -1, + INPLACE_ADD: -1, + ITERJMP: variableStackEffect, + ITERPOP: 0, + ITERPUSH: -1, + JMP: 0, + LE: -1, + LOAD: -1, + LOCAL: +1, + LOCALCELL: +1, + LT: -1, + LTLT: -1, + MAKEDICT: +1, + MAKEFUNC: 0, + MAKELIST: variableStackEffect, + MAKETUPLE: variableStackEffect, + MANDATORY: +1, + MINUS: -1, + NEQ: -1, + NONE: +1, + NOP: 0, + NOT: 0, + PERCENT: -1, + PIPE: -1, + PLUS: -1, + POP: -1, + PREDECLARED: +1, + RETURN: -1, + SETLOCALCELL: -1, + SETDICT: -3, + SETDICTUNIQ: -3, + SETFIELD: -2, + SETGLOBAL: -1, + SETINDEX: -3, + SETLOCAL: -1, + SLASH: -1, + SLASHSLASH: -1, + SLICE: -3, + STAR: -1, + TRUE: +1, + UMINUS: 0, + UNIVERSAL: +1, + UNPACK: variableStackEffect, + UPLUS: 0, +} + +func (op Opcode) String() string { + if op < OpcodeMax { + if name := opcodeNames[op]; name != "" { + return name + } + } + return fmt.Sprintf("illegal op (%d)", op) +} + +// A Program is a Starlark file in executable form. +// +// Programs are serialized by the Program.Encode method, +// which must be updated whenever this declaration is changed. +type Program struct { + Loads []Binding // name (really, string) and position of each load stmt + Names []string // names of attributes and predeclared variables + Constants []interface{} // = string | int64 | float64 | *big.Int | Bytes + Functions []*Funcode + Globals []Binding // for error messages and tracing + Toplevel *Funcode // module initialization function +} + +// The type of a bytes literal value, to distinguish from text string. +type Bytes string + +// A Funcode is the code of a compiled Starlark function. +// +// Funcodes are serialized by the encoder.function method, +// which must be updated whenever this declaration is changed. +type Funcode struct { + Prog *Program + Pos syntax.Position // position of def or lambda token + Name string // name of this function + Doc string // docstring of this function + Code []byte // the byte code + pclinetab []uint16 // mapping from pc to linenum + Locals []Binding // locals, parameters first + Cells []int // indices of Locals that require cells + Freevars []Binding // for tracing + MaxStack int + NumParams int + NumKwonlyParams int + HasVarargs, HasKwargs bool + + // -- transient state -- + + lntOnce sync.Once + lnt []pclinecol // decoded line number table +} + +type pclinecol struct { + pc uint32 + line, col int32 +} + +// A Binding is the name and position of a binding identifier. +type Binding struct { + Name string + Pos syntax.Position +} + +// A pcomp holds the compiler state for a Program. +type pcomp struct { + prog *Program // what we're building + + names map[string]uint32 + constants map[interface{}]uint32 + functions map[*Funcode]uint32 +} + +// An fcomp holds the compiler state for a Funcode. +type fcomp struct { + fn *Funcode // what we're building + + pcomp *pcomp + pos syntax.Position // current position of generated code + loops []loop + block *block +} + +type loop struct { + break_, continue_ *block +} + +type block struct { + insns []insn + + // If the last insn is a RETURN, jmp and cjmp are nil. + // If the last insn is a CJMP or ITERJMP, + // cjmp and jmp are the "true" and "false" successors. + // Otherwise, jmp is the sole successor. + jmp, cjmp *block + + initialstack int // for stack depth computation + + // Used during encoding + index int // -1 => not encoded yet + addr uint32 +} + +type insn struct { + op Opcode + arg uint32 + line, col int32 +} + +// Position returns the source position for program counter pc. +func (fn *Funcode) Position(pc uint32) syntax.Position { + fn.lntOnce.Do(fn.decodeLNT) + + // Binary search to find last LNT entry not greater than pc. + // To avoid dynamic dispatch, this is a specialization of + // sort.Search using this predicate: + // !(i < len(fn.lnt)-1 && fn.lnt[i+1].pc <= pc) + n := len(fn.lnt) + i, j := 0, n + for i < j { + h := int(uint(i+j) >> 1) + if !(h >= n-1 || fn.lnt[h+1].pc > pc) { + i = h + 1 + } else { + j = h + } + } + + var line, col int32 + if i < n { + line = fn.lnt[i].line + col = fn.lnt[i].col + } + + pos := fn.Pos // copy the (annoyingly inaccessible) filename + pos.Col = col + pos.Line = line + return pos +} + +// decodeLNT decodes the line number table and populates fn.lnt. +// It is called at most once. +func (fn *Funcode) decodeLNT() { + // Conceptually the table contains rows of the form + // (pc uint32, line int32, col int32), sorted by pc. + // We use a delta encoding, since the differences + // between successive pc, line, and column values + // are typically small and positive (though line and + // especially column differences may be negative). + // The delta encoding starts from + // {pc: 0, line: fn.Pos.Line, col: fn.Pos.Col}. + // + // Each entry is packed into one or more 16-bit values: + // Δpc uint4 + // Δline int5 + // Δcol int6 + // incomplete uint1 + // The top 4 bits are the unsigned delta pc. + // The next 5 bits are the signed line number delta. + // The next 6 bits are the signed column number delta. + // The bottom bit indicates that more rows follow because + // one of the deltas was maxed out. + // These field widths were chosen from a sample of real programs, + // and allow >97% of rows to be encoded in a single uint16. + + fn.lnt = make([]pclinecol, 0, len(fn.pclinetab)) // a minor overapproximation + entry := pclinecol{ + pc: 0, + line: fn.Pos.Line, + col: fn.Pos.Col, + } + for _, x := range fn.pclinetab { + entry.pc += uint32(x) >> 12 + entry.line += int32((int16(x) << 4) >> (16 - 5)) // sign extend Δline + entry.col += int32((int16(x) << 9) >> (16 - 6)) // sign extend Δcol + if (x & 1) == 0 { + fn.lnt = append(fn.lnt, entry) + } + } +} + +// bindings converts resolve.Bindings to compiled form. +func bindings(bindings []*resolve.Binding) []Binding { + res := make([]Binding, len(bindings)) + for i, bind := range bindings { + res[i].Name = bind.First.Name + res[i].Pos = bind.First.NamePos + } + return res +} + +// Expr compiles an expression to a program whose toplevel function evaluates it. +func Expr(expr syntax.Expr, name string, locals []*resolve.Binding) *Program { + pos := syntax.Start(expr) + stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}} + return File(stmts, pos, name, locals, nil) +} + +// File compiles the statements of a file into a program. +func File(stmts []syntax.Stmt, pos syntax.Position, name string, locals, globals []*resolve.Binding) *Program { + pcomp := &pcomp{ + prog: &Program{ + Globals: bindings(globals), + }, + names: make(map[string]uint32), + constants: make(map[interface{}]uint32), + functions: make(map[*Funcode]uint32), + } + pcomp.prog.Toplevel = pcomp.function(name, pos, stmts, locals, nil) + + return pcomp.prog +} + +func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*resolve.Binding) *Funcode { + fcomp := &fcomp{ + pcomp: pcomp, + pos: pos, + fn: &Funcode{ + Prog: pcomp.prog, + Pos: pos, + Name: name, + Doc: docStringFromBody(stmts), + Locals: bindings(locals), + Freevars: bindings(freevars), + }, + } + + // Record indices of locals that require cells. + for i, local := range locals { + if local.Scope == resolve.Cell { + fcomp.fn.Cells = append(fcomp.fn.Cells, i) + } + } + + if debug { + fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos) + } + + // Convert AST to a CFG of instructions. + entry := fcomp.newBlock() + fcomp.block = entry + fcomp.stmts(stmts) + if fcomp.block != nil { + fcomp.emit(NONE) + fcomp.emit(RETURN) + } + + var oops bool // something bad happened + + setinitialstack := func(b *block, depth int) { + if b.initialstack == -1 { + b.initialstack = depth + } else if b.initialstack != depth { + fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n", + b.index, b.initialstack, depth) + oops = true + } + } + + // Linearize the CFG: + // compute order, address, and initial + // stack depth of each reachable block. + var pc uint32 + var blocks []*block + var maxstack int + var visit func(b *block) + visit = func(b *block) { + if b.index >= 0 { + return // already visited + } + b.index = len(blocks) + b.addr = pc + blocks = append(blocks, b) + + stack := b.initialstack + if debug { + fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack) + } + var cjmpAddr *uint32 + var isiterjmp int + for i, insn := range b.insns { + pc++ + + // Compute size of argument. + if insn.op >= OpcodeArgMin { + switch insn.op { + case ITERJMP: + isiterjmp = 1 + fallthrough + case CJMP: + cjmpAddr = &b.insns[i].arg + pc += 4 + default: + pc += uint32(argLen(insn.arg)) + } + } + + // Compute effect on stack. + se := insn.stackeffect() + if debug { + fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se) + } + stack += se + if stack < 0 { + fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc) + oops = true + } + if stack+isiterjmp > maxstack { + maxstack = stack + isiterjmp + } + } + + if debug { + fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n", + b.addr, b.index) + if b.jmp != nil { + fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index) + } + if b.cjmp != nil { + fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index) + } + } + + // Place the jmp block next. + if b.jmp != nil { + // jump threading (empty cycles are impossible) + for b.jmp.insns == nil { + b.jmp = b.jmp.jmp + } + + setinitialstack(b.jmp, stack+isiterjmp) + if b.jmp.index < 0 { + // Successor is not yet visited: + // place it next and fall through. + visit(b.jmp) + } else { + // Successor already visited; + // explicit backward jump required. + pc += 5 + } + } + + // Then the cjmp block. + if b.cjmp != nil { + // jump threading (empty cycles are impossible) + for b.cjmp.insns == nil { + b.cjmp = b.cjmp.jmp + } + + setinitialstack(b.cjmp, stack) + visit(b.cjmp) + + // Patch the CJMP/ITERJMP, if present. + if cjmpAddr != nil { + *cjmpAddr = b.cjmp.addr + } + } + } + setinitialstack(entry, 0) + visit(entry) + + fn := fcomp.fn + fn.MaxStack = maxstack + + // Emit bytecode (and position table). + if Disassemble { + fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc) + } + fcomp.generate(blocks, pc) + + if debug { + fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack) + } + + // Don't panic until we've completed printing of the function. + if oops { + panic("internal error") + } + + if debug { + fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos) + } + + return fn +} + +func docStringFromBody(body []syntax.Stmt) string { + if len(body) == 0 { + return "" + } + expr, ok := body[0].(*syntax.ExprStmt) + if !ok { + return "" + } + lit, ok := expr.X.(*syntax.Literal) + if !ok { + return "" + } + if lit.Token != syntax.STRING { + return "" + } + return lit.Value.(string) +} + +func (insn *insn) stackeffect() int { + se := int(stackEffect[insn.op]) + if se == variableStackEffect { + arg := int(insn.arg) + switch insn.op { + case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW: + se = -int(2*(insn.arg&0xff) + insn.arg>>8) + if insn.op != CALL { + se-- + } + if insn.op == CALL_VAR_KW { + se-- + } + case ITERJMP: + // Stack effect differs by successor: + // +1 for jmp/false/ok + // 0 for cjmp/true/exhausted + // Handled specially in caller. + se = 0 + case MAKELIST, MAKETUPLE: + se = 1 - arg + case UNPACK: + se = arg - 1 + default: + panic(insn.op) + } + } + return se +} + +// generate emits the linear instruction stream from the CFG, +// and builds the PC-to-line number table. +func (fcomp *fcomp) generate(blocks []*block, codelen uint32) { + code := make([]byte, 0, codelen) + var pclinetab []uint16 + prev := pclinecol{ + pc: 0, + line: fcomp.fn.Pos.Line, + col: fcomp.fn.Pos.Col, + } + + for _, b := range blocks { + if Disassemble { + fmt.Fprintf(os.Stderr, "%d:\n", b.index) + } + pc := b.addr + for _, insn := range b.insns { + if insn.line != 0 { + // Instruction has a source position. Delta-encode it. + // See Funcode.Position for the encoding. + for { + var incomplete uint16 + + // Δpc, uint4 + deltapc := pc - prev.pc + if deltapc > 0x0f { + deltapc = 0x0f + incomplete = 1 + } + prev.pc += deltapc + + // Δline, int5 + deltaline, ok := clip(insn.line-prev.line, -0x10, 0x0f) + if !ok { + incomplete = 1 + } + prev.line += deltaline + + // Δcol, int6 + deltacol, ok := clip(insn.col-prev.col, -0x20, 0x1f) + if !ok { + incomplete = 1 + } + prev.col += deltacol + + entry := uint16(deltapc<<12) | uint16(deltaline&0x1f)<<7 | uint16(deltacol&0x3f)<<1 | incomplete + pclinetab = append(pclinetab, entry) + if incomplete == 0 { + break + } + } + + if Disassemble { + fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s:%d:%d\n", + filepath.Base(fcomp.fn.Pos.Filename()), insn.line, insn.col) + } + } + if Disassemble { + PrintOp(fcomp.fn, pc, insn.op, insn.arg) + } + code = append(code, byte(insn.op)) + pc++ + if insn.op >= OpcodeArgMin { + if insn.op == CJMP || insn.op == ITERJMP { + code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes + } else { + code = addUint32(code, insn.arg, 0) + } + pc = uint32(len(code)) + } + } + + if b.jmp != nil && b.jmp.index != b.index+1 { + addr := b.jmp.addr + if Disassemble { + fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n", + pc, addr, b.jmp.index) + } + code = append(code, byte(JMP)) + code = addUint32(code, addr, 4) + } + } + if len(code) != int(codelen) { + panic("internal error: wrong code length") + } + + fcomp.fn.pclinetab = pclinetab + fcomp.fn.Code = code +} + +// clip returns the value nearest x in the range [min...max], +// and whether it equals x. +func clip(x, min, max int32) (int32, bool) { + if x > max { + return max, false + } else if x < min { + return min, false + } else { + return x, true + } +} + +// addUint32 encodes x as 7-bit little-endian varint. +// TODO(adonovan): opt: steal top two bits of opcode +// to encode the number of complete bytes that follow. +func addUint32(code []byte, x uint32, min int) []byte { + end := len(code) + min + for x >= 0x80 { + code = append(code, byte(x)|0x80) + x >>= 7 + } + code = append(code, byte(x)) + // Pad the operand with NOPs to exactly min bytes. + for len(code) < end { + code = append(code, byte(NOP)) + } + return code +} + +func argLen(x uint32) int { + n := 0 + for x >= 0x80 { + n++ + x >>= 7 + } + return n + 1 +} + +// PrintOp prints an instruction. +// It is provided for debugging. +func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) { + if op < OpcodeArgMin { + fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op) + return + } + + var comment string + switch op { + case CONSTANT: + switch x := fn.Prog.Constants[arg].(type) { + case string: + comment = strconv.Quote(x) + case Bytes: + comment = "b" + strconv.Quote(string(x)) + default: + comment = fmt.Sprint(x) + } + case MAKEFUNC: + comment = fn.Prog.Functions[arg].Name + case SETLOCAL, LOCAL: + comment = fn.Locals[arg].Name + case SETGLOBAL, GLOBAL: + comment = fn.Prog.Globals[arg].Name + case ATTR, SETFIELD, PREDECLARED, UNIVERSAL: + comment = fn.Prog.Names[arg] + case FREE: + comment = fn.Freevars[arg].Name + case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW: + comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff) + default: + // JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK: + // arg is just a number + } + var buf bytes.Buffer + fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg) + if comment != "" { + fmt.Fprint(&buf, "\t; ", comment) + } + fmt.Fprintln(&buf) + os.Stderr.Write(buf.Bytes()) +} + +// newBlock returns a new block. +func (fcomp) newBlock() *block { + return &block{index: -1, initialstack: -1} +} + +// emit emits an instruction to the current block. +func (fcomp *fcomp) emit(op Opcode) { + if op >= OpcodeArgMin { + panic("missing arg: " + op.String()) + } + insn := insn{op: op, line: fcomp.pos.Line, col: fcomp.pos.Col} + fcomp.block.insns = append(fcomp.block.insns, insn) + fcomp.pos.Line = 0 + fcomp.pos.Col = 0 +} + +// emit1 emits an instruction with an immediate operand. +func (fcomp *fcomp) emit1(op Opcode, arg uint32) { + if op < OpcodeArgMin { + panic("unwanted arg: " + op.String()) + } + insn := insn{op: op, arg: arg, line: fcomp.pos.Line, col: fcomp.pos.Col} + fcomp.block.insns = append(fcomp.block.insns, insn) + fcomp.pos.Line = 0 + fcomp.pos.Col = 0 +} + +// jump emits a jump to the specified block. +// On return, the current block is unset. +func (fcomp *fcomp) jump(b *block) { + if b == fcomp.block { + panic("self-jump") // unreachable: Starlark has no arbitrary looping constructs + } + fcomp.block.jmp = b + fcomp.block = nil +} + +// condjump emits a conditional jump (CJMP or ITERJMP) +// to the specified true/false blocks. +// (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.) +// On return, the current block is unset. +func (fcomp *fcomp) condjump(op Opcode, t, f *block) { + if !(op == CJMP || op == ITERJMP) { + panic("not a conditional jump: " + op.String()) + } + fcomp.emit1(op, 0) // fill in address later + fcomp.block.cjmp = t + fcomp.jump(f) +} + +// nameIndex returns the index of the specified name +// within the name pool, adding it if necessary. +func (pcomp *pcomp) nameIndex(name string) uint32 { + index, ok := pcomp.names[name] + if !ok { + index = uint32(len(pcomp.prog.Names)) + pcomp.names[name] = index + pcomp.prog.Names = append(pcomp.prog.Names, name) + } + return index +} + +// constantIndex returns the index of the specified constant +// within the constant pool, adding it if necessary. +func (pcomp *pcomp) constantIndex(v interface{}) uint32 { + index, ok := pcomp.constants[v] + if !ok { + index = uint32(len(pcomp.prog.Constants)) + pcomp.constants[v] = index + pcomp.prog.Constants = append(pcomp.prog.Constants, v) + } + return index +} + +// functionIndex returns the index of the specified function +// AST the nestedfun pool, adding it if necessary. +func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 { + index, ok := pcomp.functions[fn] + if !ok { + index = uint32(len(pcomp.prog.Functions)) + pcomp.functions[fn] = index + pcomp.prog.Functions = append(pcomp.prog.Functions, fn) + } + return index +} + +// string emits code to push the specified string. +func (fcomp *fcomp) string(s string) { + fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(s)) +} + +// setPos sets the current source position. +// It should be called prior to any operation that can fail dynamically. +// All positions are assumed to belong to the same file. +func (fcomp *fcomp) setPos(pos syntax.Position) { + fcomp.pos = pos +} + +// set emits code to store the top-of-stack value +// to the specified local, cell, or global variable. +func (fcomp *fcomp) set(id *syntax.Ident) { + bind := id.Binding.(*resolve.Binding) + switch bind.Scope { + case resolve.Local: + fcomp.emit1(SETLOCAL, uint32(bind.Index)) + case resolve.Cell: + fcomp.emit1(SETLOCALCELL, uint32(bind.Index)) + case resolve.Global: + fcomp.emit1(SETGLOBAL, uint32(bind.Index)) + default: + log.Panicf("%s: set(%s): not global/local/cell (%d)", id.NamePos, id.Name, bind.Scope) + } +} + +// lookup emits code to push the value of the specified variable. +func (fcomp *fcomp) lookup(id *syntax.Ident) { + bind := id.Binding.(*resolve.Binding) + if bind.Scope != resolve.Universal { // (universal lookup can't fail) + fcomp.setPos(id.NamePos) + } + switch bind.Scope { + case resolve.Local: + fcomp.emit1(LOCAL, uint32(bind.Index)) + case resolve.Free: + fcomp.emit1(FREECELL, uint32(bind.Index)) + case resolve.Cell: + fcomp.emit1(LOCALCELL, uint32(bind.Index)) + case resolve.Global: + fcomp.emit1(GLOBAL, uint32(bind.Index)) + case resolve.Predeclared: + fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name)) + case resolve.Universal: + fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name)) + default: + log.Panicf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, bind.Scope) + } +} + +func (fcomp *fcomp) stmts(stmts []syntax.Stmt) { + for _, stmt := range stmts { + fcomp.stmt(stmt) + } +} + +func (fcomp *fcomp) stmt(stmt syntax.Stmt) { + switch stmt := stmt.(type) { + case *syntax.ExprStmt: + if _, ok := stmt.X.(*syntax.Literal); ok { + // Opt: don't compile doc comments only to pop them. + return + } + fcomp.expr(stmt.X) + fcomp.emit(POP) + + case *syntax.BranchStmt: + // Resolver invariant: break/continue appear only within loops. + switch stmt.Token { + case syntax.PASS: + // no-op + case syntax.BREAK: + b := fcomp.loops[len(fcomp.loops)-1].break_ + fcomp.jump(b) + fcomp.block = fcomp.newBlock() // dead code + case syntax.CONTINUE: + b := fcomp.loops[len(fcomp.loops)-1].continue_ + fcomp.jump(b) + fcomp.block = fcomp.newBlock() // dead code + } + + case *syntax.IfStmt: + // Keep consistent with CondExpr. + t := fcomp.newBlock() + f := fcomp.newBlock() + done := fcomp.newBlock() + + fcomp.ifelse(stmt.Cond, t, f) + + fcomp.block = t + fcomp.stmts(stmt.True) + fcomp.jump(done) + + fcomp.block = f + fcomp.stmts(stmt.False) + fcomp.jump(done) + + fcomp.block = done + + case *syntax.AssignStmt: + switch stmt.Op { + case syntax.EQ: + // simple assignment: x = y + fcomp.expr(stmt.RHS) + fcomp.assign(stmt.OpPos, stmt.LHS) + + case syntax.PLUS_EQ, + syntax.MINUS_EQ, + syntax.STAR_EQ, + syntax.SLASH_EQ, + syntax.SLASHSLASH_EQ, + syntax.PERCENT_EQ, + syntax.AMP_EQ, + syntax.PIPE_EQ, + syntax.CIRCUMFLEX_EQ, + syntax.LTLT_EQ, + syntax.GTGT_EQ: + // augmented assignment: x += y + + var set func() + + // Evaluate "address" of x exactly once to avoid duplicate side-effects. + switch lhs := unparen(stmt.LHS).(type) { + case *syntax.Ident: + // x = ... + fcomp.lookup(lhs) + set = func() { + fcomp.set(lhs) + } + + case *syntax.IndexExpr: + // x[y] = ... + fcomp.expr(lhs.X) + fcomp.expr(lhs.Y) + fcomp.emit(DUP2) + fcomp.setPos(lhs.Lbrack) + fcomp.emit(INDEX) + set = func() { + fcomp.setPos(lhs.Lbrack) + fcomp.emit(SETINDEX) + } + + case *syntax.DotExpr: + // x.f = ... + fcomp.expr(lhs.X) + fcomp.emit(DUP) + name := fcomp.pcomp.nameIndex(lhs.Name.Name) + fcomp.setPos(lhs.Dot) + fcomp.emit1(ATTR, name) + set = func() { + fcomp.setPos(lhs.Dot) + fcomp.emit1(SETFIELD, name) + } + + default: + panic(lhs) + } + + fcomp.expr(stmt.RHS) + + if stmt.Op == syntax.PLUS_EQ { + // Allow the runtime to optimize list += iterable. + fcomp.setPos(stmt.OpPos) + fcomp.emit(INPLACE_ADD) + } else { + fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS) + } + set() + } + + case *syntax.DefStmt: + fcomp.function(stmt.Function.(*resolve.Function)) + fcomp.set(stmt.Name) + + case *syntax.ForStmt: + // Keep consistent with ForClause. + head := fcomp.newBlock() + body := fcomp.newBlock() + tail := fcomp.newBlock() + + fcomp.expr(stmt.X) + fcomp.setPos(stmt.For) + fcomp.emit(ITERPUSH) + fcomp.jump(head) + + fcomp.block = head + fcomp.condjump(ITERJMP, tail, body) + + fcomp.block = body + fcomp.assign(stmt.For, stmt.Vars) + fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head}) + fcomp.stmts(stmt.Body) + fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] + fcomp.jump(head) + + fcomp.block = tail + fcomp.emit(ITERPOP) + + case *syntax.WhileStmt: + head := fcomp.newBlock() + body := fcomp.newBlock() + done := fcomp.newBlock() + + fcomp.jump(head) + fcomp.block = head + fcomp.ifelse(stmt.Cond, body, done) + + fcomp.block = body + fcomp.loops = append(fcomp.loops, loop{break_: done, continue_: head}) + fcomp.stmts(stmt.Body) + fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] + fcomp.jump(head) + + fcomp.block = done + + case *syntax.ReturnStmt: + if stmt.Result != nil { + fcomp.expr(stmt.Result) + } else { + fcomp.emit(NONE) + } + fcomp.emit(RETURN) + fcomp.block = fcomp.newBlock() // dead code + + case *syntax.LoadStmt: + for i := range stmt.From { + fcomp.string(stmt.From[i].Name) + } + module := stmt.Module.Value.(string) + fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Binding{ + Name: module, + Pos: stmt.Module.TokenPos, + }) + fcomp.string(module) + fcomp.setPos(stmt.Load) + fcomp.emit1(LOAD, uint32(len(stmt.From))) + for i := range stmt.To { + fcomp.set(stmt.To[len(stmt.To)-1-i]) + } + + default: + start, _ := stmt.Span() + log.Panicf("%s: exec: unexpected statement %T", start, stmt) + } +} + +// assign implements lhs = rhs for arbitrary expressions lhs. +// RHS is on top of stack, consumed. +func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) { + switch lhs := lhs.(type) { + case *syntax.ParenExpr: + // (lhs) = rhs + fcomp.assign(pos, lhs.X) + + case *syntax.Ident: + // x = rhs + fcomp.set(lhs) + + case *syntax.TupleExpr: + // x, y = rhs + fcomp.assignSequence(pos, lhs.List) + + case *syntax.ListExpr: + // [x, y] = rhs + fcomp.assignSequence(pos, lhs.List) + + case *syntax.IndexExpr: + // x[y] = rhs + fcomp.expr(lhs.X) + fcomp.emit(EXCH) + fcomp.expr(lhs.Y) + fcomp.emit(EXCH) + fcomp.setPos(lhs.Lbrack) + fcomp.emit(SETINDEX) + + case *syntax.DotExpr: + // x.f = rhs + fcomp.expr(lhs.X) + fcomp.emit(EXCH) + fcomp.setPos(lhs.Dot) + fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name)) + + default: + panic(lhs) + } +} + +func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) { + fcomp.setPos(pos) + fcomp.emit1(UNPACK, uint32(len(lhs))) + for i := range lhs { + fcomp.assign(pos, lhs[i]) + } +} + +func (fcomp *fcomp) expr(e syntax.Expr) { + switch e := e.(type) { + case *syntax.ParenExpr: + fcomp.expr(e.X) + + case *syntax.Ident: + fcomp.lookup(e) + + case *syntax.Literal: + // e.Value is int64, float64, *bigInt, string + v := e.Value + if e.Token == syntax.BYTES { + v = Bytes(v.(string)) + } + fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(v)) + + case *syntax.ListExpr: + for _, x := range e.List { + fcomp.expr(x) + } + fcomp.emit1(MAKELIST, uint32(len(e.List))) + + case *syntax.CondExpr: + // Keep consistent with IfStmt. + t := fcomp.newBlock() + f := fcomp.newBlock() + done := fcomp.newBlock() + + fcomp.ifelse(e.Cond, t, f) + + fcomp.block = t + fcomp.expr(e.True) + fcomp.jump(done) + + fcomp.block = f + fcomp.expr(e.False) + fcomp.jump(done) + + fcomp.block = done + + case *syntax.IndexExpr: + fcomp.expr(e.X) + fcomp.expr(e.Y) + fcomp.setPos(e.Lbrack) + fcomp.emit(INDEX) + + case *syntax.SliceExpr: + fcomp.setPos(e.Lbrack) + fcomp.expr(e.X) + if e.Lo != nil { + fcomp.expr(e.Lo) + } else { + fcomp.emit(NONE) + } + if e.Hi != nil { + fcomp.expr(e.Hi) + } else { + fcomp.emit(NONE) + } + if e.Step != nil { + fcomp.expr(e.Step) + } else { + fcomp.emit(NONE) + } + fcomp.emit(SLICE) + + case *syntax.Comprehension: + if e.Curly { + fcomp.emit(MAKEDICT) + } else { + fcomp.emit1(MAKELIST, 0) + } + fcomp.comprehension(e, 0) + + case *syntax.TupleExpr: + fcomp.tuple(e.List) + + case *syntax.DictExpr: + fcomp.emit(MAKEDICT) + for _, entry := range e.List { + entry := entry.(*syntax.DictEntry) + fcomp.emit(DUP) + fcomp.expr(entry.Key) + fcomp.expr(entry.Value) + fcomp.setPos(entry.Colon) + fcomp.emit(SETDICTUNIQ) + } + + case *syntax.UnaryExpr: + fcomp.expr(e.X) + fcomp.setPos(e.OpPos) + switch e.Op { + case syntax.MINUS: + fcomp.emit(UMINUS) + case syntax.PLUS: + fcomp.emit(UPLUS) + case syntax.NOT: + fcomp.emit(NOT) + case syntax.TILDE: + fcomp.emit(TILDE) + default: + log.Panicf("%s: unexpected unary op: %s", e.OpPos, e.Op) + } + + case *syntax.BinaryExpr: + switch e.Op { + // short-circuit operators + // TODO(adonovan): use ifelse to simplify conditions. + case syntax.OR: + // x or y => if x then x else y + done := fcomp.newBlock() + y := fcomp.newBlock() + + fcomp.expr(e.X) + fcomp.emit(DUP) + fcomp.condjump(CJMP, done, y) + + fcomp.block = y + fcomp.emit(POP) // discard X + fcomp.expr(e.Y) + fcomp.jump(done) + + fcomp.block = done + + case syntax.AND: + // x and y => if x then y else x + done := fcomp.newBlock() + y := fcomp.newBlock() + + fcomp.expr(e.X) + fcomp.emit(DUP) + fcomp.condjump(CJMP, y, done) + + fcomp.block = y + fcomp.emit(POP) // discard X + fcomp.expr(e.Y) + fcomp.jump(done) + + fcomp.block = done + + case syntax.PLUS: + fcomp.plus(e) + + default: + // all other strict binary operator (includes comparisons) + fcomp.expr(e.X) + fcomp.expr(e.Y) + fcomp.binop(e.OpPos, e.Op) + } + + case *syntax.DotExpr: + fcomp.expr(e.X) + fcomp.setPos(e.Dot) + fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name)) + + case *syntax.CallExpr: + fcomp.call(e) + + case *syntax.LambdaExpr: + fcomp.function(e.Function.(*resolve.Function)) + + default: + start, _ := e.Span() + log.Panicf("%s: unexpected expr %T", start, e) + } +} + +type summand struct { + x syntax.Expr + plusPos syntax.Position +} + +// plus emits optimized code for ((a+b)+...)+z that avoids naive +// quadratic behavior for strings, tuples, and lists, +// and folds together adjacent literals of the same type. +func (fcomp *fcomp) plus(e *syntax.BinaryExpr) { + // Gather all the right operands of the left tree of plusses. + // A tree (((a+b)+c)+d) becomes args=[a +b +c +d]. + args := make([]summand, 0, 2) // common case: 2 operands + for plus := e; ; { + args = append(args, summand{unparen(plus.Y), plus.OpPos}) + left := unparen(plus.X) + x, ok := left.(*syntax.BinaryExpr) + if !ok || x.Op != syntax.PLUS { + args = append(args, summand{x: left}) + break + } + plus = x + } + // Reverse args to syntactic order. + for i, n := 0, len(args)/2; i < n; i++ { + j := len(args) - 1 - i + args[i], args[j] = args[j], args[i] + } + + // Fold sums of adjacent literals of the same type: ""+"", []+[], ()+(). + out := args[:0] // compact in situ + for i := 0; i < len(args); { + j := i + 1 + if code := addable(args[i].x); code != 0 { + for j < len(args) && addable(args[j].x) == code { + j++ + } + if j > i+1 { + args[i].x = add(code, args[i:j]) + } + } + out = append(out, args[i]) + i = j + } + args = out + + // Emit code for an n-ary sum (n > 0). + fcomp.expr(args[0].x) + for _, summand := range args[1:] { + fcomp.expr(summand.x) + fcomp.setPos(summand.plusPos) + fcomp.emit(PLUS) + } + + // If len(args) > 2, use of an accumulator instead of a chain of + // PLUS operations may be more efficient. + // However, no gain was measured on a workload analogous to Bazel loading; + // TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload. + // + // We cannot use a single n-ary SUM operation + // a b c SUM<3> + // because we need to report a distinct error for each + // individual '+' operation, so three additional operations are + // needed: + // + // ACCSTART => create buffer and append to it + // ACCUM => append to buffer + // ACCEND => get contents of buffer + // + // For string, list, and tuple values, the interpreter can + // optimize these operations by using a mutable buffer. + // For all other types, ACCSTART and ACCEND would behave like + // the identity function and ACCUM behaves like PLUS. + // ACCUM must correctly support user-defined operations + // such as list+foo. + // + // fcomp.emit(ACCSTART) + // for _, summand := range args[1:] { + // fcomp.expr(summand.x) + // fcomp.setPos(summand.plusPos) + // fcomp.emit(ACCUM) + // } + // fcomp.emit(ACCEND) +} + +// addable reports whether e is a statically addable +// expression: a [s]tring, [b]ytes, [l]ist, or [t]uple. +func addable(e syntax.Expr) rune { + switch e := e.(type) { + case *syntax.Literal: + // TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding. + switch e.Token { + case syntax.STRING: + return 's' + case syntax.BYTES: + return 'b' + } + case *syntax.ListExpr: + return 'l' + case *syntax.TupleExpr: + return 't' + } + return 0 +} + +// add returns an expression denoting the sum of args, +// which are all addable values of the type indicated by code. +// The resulting syntax is degenerate, lacking position, etc. +func add(code rune, args []summand) syntax.Expr { + switch code { + case 's', 'b': + var buf strings.Builder + for _, arg := range args { + buf.WriteString(arg.x.(*syntax.Literal).Value.(string)) + } + tok := syntax.STRING + if code == 'b' { + tok = syntax.BYTES + } + return &syntax.Literal{Token: tok, Value: buf.String()} + case 'l': + var elems []syntax.Expr + for _, arg := range args { + elems = append(elems, arg.x.(*syntax.ListExpr).List...) + } + return &syntax.ListExpr{List: elems} + case 't': + var elems []syntax.Expr + for _, arg := range args { + elems = append(elems, arg.x.(*syntax.TupleExpr).List...) + } + return &syntax.TupleExpr{List: elems} + } + panic(code) +} + +func unparen(e syntax.Expr) syntax.Expr { + if p, ok := e.(*syntax.ParenExpr); ok { + return unparen(p.X) + } + return e +} + +func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) { + // TODO(adonovan): simplify by assuming syntax and compiler constants align. + fcomp.setPos(pos) + switch op { + // arithmetic + case syntax.PLUS: + fcomp.emit(PLUS) + case syntax.MINUS: + fcomp.emit(MINUS) + case syntax.STAR: + fcomp.emit(STAR) + case syntax.SLASH: + fcomp.emit(SLASH) + case syntax.SLASHSLASH: + fcomp.emit(SLASHSLASH) + case syntax.PERCENT: + fcomp.emit(PERCENT) + case syntax.AMP: + fcomp.emit(AMP) + case syntax.PIPE: + fcomp.emit(PIPE) + case syntax.CIRCUMFLEX: + fcomp.emit(CIRCUMFLEX) + case syntax.LTLT: + fcomp.emit(LTLT) + case syntax.GTGT: + fcomp.emit(GTGT) + case syntax.IN: + fcomp.emit(IN) + case syntax.NOT_IN: + fcomp.emit(IN) + fcomp.emit(NOT) + + // comparisons + case syntax.EQL, + syntax.NEQ, + syntax.GT, + syntax.LT, + syntax.LE, + syntax.GE: + fcomp.emit(Opcode(op-syntax.EQL) + EQL) + + default: + log.Panicf("%s: unexpected binary op: %s", pos, op) + } +} + +func (fcomp *fcomp) call(call *syntax.CallExpr) { + // TODO(adonovan): opt: Use optimized path for calling methods + // of built-ins: x.f(...) to avoid materializing a closure. + // if dot, ok := call.Fcomp.(*syntax.DotExpr); ok { + // fcomp.expr(dot.X) + // fcomp.args(call) + // fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name)) + // return + // } + + // usual case + fcomp.expr(call.Fn) + op, arg := fcomp.args(call) + fcomp.setPos(call.Lparen) + fcomp.emit1(op, arg) +} + +// args emits code to push a tuple of positional arguments +// and a tuple of named arguments containing alternating keys and values. +// Either or both tuples may be empty (TODO(adonovan): optimize). +func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) { + var callmode int + // Compute the number of each kind of parameter. + var p, n int // number of positional, named arguments + var varargs, kwargs syntax.Expr + for _, arg := range call.Args { + if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ { + + // named argument (name, value) + fcomp.string(binary.X.(*syntax.Ident).Name) + fcomp.expr(binary.Y) + n++ + continue + } + if unary, ok := arg.(*syntax.UnaryExpr); ok { + if unary.Op == syntax.STAR { + callmode |= 1 + varargs = unary.X + continue + } else if unary.Op == syntax.STARSTAR { + callmode |= 2 + kwargs = unary.X + continue + } + } + + // positional argument + fcomp.expr(arg) + p++ + } + + // Python2 and Python3 both permit named arguments + // to appear both before and after a *args argument: + // f(1, 2, x=3, *[4], y=5, **dict(z=6)) + // + // They also differ in their evaluation order: + // Python2: 1 2 3 5 4 6 (*args and **kwargs evaluated last) + // Python3: 1 2 4 3 5 6 (positional args evaluated before named args) + // Starlark-in-Java historically used a third order: + // Lexical: 1 2 3 4 5 6 (all args evaluated left-to-right) + // + // After discussion in github.com/bazelbuild/starlark#13, the + // spec now requires Starlark to statically reject named + // arguments after *args (e.g. y=5), and to use Python2-style + // evaluation order. This is both easy to implement and + // consistent with lexical order: + // + // f(1, 2, x=3, *[4], **dict(z=6)) # 1 2 3 4 6 + + // *args + if varargs != nil { + fcomp.expr(varargs) + } + + // **kwargs + if kwargs != nil { + fcomp.expr(kwargs) + } + + // TODO(adonovan): avoid this with a more flexible encoding. + if p >= 256 || n >= 256 { + // resolve already checked this; should be unreachable + panic("too many arguments in call") + } + + return CALL + Opcode(callmode), uint32(p<<8 | n) +} + +func (fcomp *fcomp) tuple(elems []syntax.Expr) { + for _, elem := range elems { + fcomp.expr(elem) + } + fcomp.emit1(MAKETUPLE, uint32(len(elems))) +} + +func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) { + if clauseIndex == len(comp.Clauses) { + fcomp.emit(DUP) // accumulator + if comp.Curly { + // dict: {k:v for ...} + // Parser ensures that body is of form k:v. + // Python-style set comprehensions {body for vars in x} + // are not supported. + entry := comp.Body.(*syntax.DictEntry) + fcomp.expr(entry.Key) + fcomp.expr(entry.Value) + fcomp.setPos(entry.Colon) + fcomp.emit(SETDICT) + } else { + // list: [body for vars in x] + fcomp.expr(comp.Body) + fcomp.emit(APPEND) + } + return + } + + clause := comp.Clauses[clauseIndex] + switch clause := clause.(type) { + case *syntax.IfClause: + t := fcomp.newBlock() + done := fcomp.newBlock() + fcomp.ifelse(clause.Cond, t, done) + + fcomp.block = t + fcomp.comprehension(comp, clauseIndex+1) + fcomp.jump(done) + + fcomp.block = done + return + + case *syntax.ForClause: + // Keep consistent with ForStmt. + head := fcomp.newBlock() + body := fcomp.newBlock() + tail := fcomp.newBlock() + + fcomp.expr(clause.X) + fcomp.setPos(clause.For) + fcomp.emit(ITERPUSH) + fcomp.jump(head) + + fcomp.block = head + fcomp.condjump(ITERJMP, tail, body) + + fcomp.block = body + fcomp.assign(clause.For, clause.Vars) + fcomp.comprehension(comp, clauseIndex+1) + fcomp.jump(head) + + fcomp.block = tail + fcomp.emit(ITERPOP) + return + } + + start, _ := clause.Span() + log.Panicf("%s: unexpected comprehension clause %T", start, clause) +} + +func (fcomp *fcomp) function(f *resolve.Function) { + // Evaluation of the defaults may fail, so record the position. + fcomp.setPos(f.Pos) + + // To reduce allocation, we emit a combined tuple + // for the defaults and the freevars. + // The function knows where to split it at run time. + + // Generate tuple of parameter defaults. For: + // def f(p1, p2=dp2, p3=dp3, *, k1, k2=dk2, k3, **kwargs) + // the tuple is: + // (dp2, dp3, MANDATORY, dk2, MANDATORY). + ndefaults := 0 + seenStar := false + for _, param := range f.Params { + switch param := param.(type) { + case *syntax.BinaryExpr: + fcomp.expr(param.Y) + ndefaults++ + case *syntax.UnaryExpr: + seenStar = true // * or *args (also **kwargs) + case *syntax.Ident: + if seenStar { + fcomp.emit(MANDATORY) + ndefaults++ + } + } + } + + // Capture the cells of the function's + // free variables from the lexical environment. + for _, freevar := range f.FreeVars { + // Don't call fcomp.lookup because we want + // the cell itself, not its content. + switch freevar.Scope { + case resolve.Free: + fcomp.emit1(FREE, uint32(freevar.Index)) + case resolve.Cell: + fcomp.emit1(LOCAL, uint32(freevar.Index)) + } + } + + fcomp.emit1(MAKETUPLE, uint32(ndefaults+len(f.FreeVars))) + + funcode := fcomp.pcomp.function(f.Name, f.Pos, f.Body, f.Locals, f.FreeVars) + + if debug { + // TODO(adonovan): do compilations sequentially not as a tree, + // to make the log easier to read. + // Simplify by identifying Toplevel and functionIndex 0. + fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos) + } + + // def f(a, *, b=1) has only 2 parameters. + numParams := len(f.Params) + if f.NumKwonlyParams > 0 && !f.HasVarargs { + numParams-- + } + + funcode.NumParams = numParams + funcode.NumKwonlyParams = f.NumKwonlyParams + funcode.HasVarargs = f.HasVarargs + funcode.HasKwargs = f.HasKwargs + fcomp.emit1(MAKEFUNC, fcomp.pcomp.functionIndex(funcode)) +} + +// ifelse emits a Boolean control flow decision. +// On return, the current block is unset. +func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) { + switch cond := cond.(type) { + case *syntax.UnaryExpr: + if cond.Op == syntax.NOT { + // if not x then goto t else goto f + // => + // if x then goto f else goto t + fcomp.ifelse(cond.X, f, t) + return + } + + case *syntax.BinaryExpr: + switch cond.Op { + case syntax.AND: + // if x and y then goto t else goto f + // => + // if x then ifelse(y, t, f) else goto f + fcomp.expr(cond.X) + y := fcomp.newBlock() + fcomp.condjump(CJMP, y, f) + + fcomp.block = y + fcomp.ifelse(cond.Y, t, f) + return + + case syntax.OR: + // if x or y then goto t else goto f + // => + // if x then goto t else ifelse(y, t, f) + fcomp.expr(cond.X) + y := fcomp.newBlock() + fcomp.condjump(CJMP, t, y) + + fcomp.block = y + fcomp.ifelse(cond.Y, t, f) + return + case syntax.NOT_IN: + // if x not in y then goto t else goto f + // => + // if x in y then goto f else goto t + copy := *cond + copy.Op = syntax.IN + fcomp.expr(©) + fcomp.condjump(CJMP, f, t) + return + } + } + + // general case + fcomp.expr(cond) + fcomp.condjump(CJMP, t, f) +} |