diff options
-rw-r--r-- | eval.go | 1113 | ||||
-rw-r--r-- | eval_test.go | 51 | ||||
-rw-r--r-- | internal/compile/compile.go | 1599 | ||||
-rw-r--r-- | internal/compile/compile_test.go | 74 | ||||
-rw-r--r-- | internal/compile/serial.go | 164 | ||||
-rw-r--r-- | interp.go | 612 | ||||
-rw-r--r-- | repl/repl.go | 23 | ||||
-rw-r--r-- | syntax/scan.go | 8 | ||||
-rw-r--r-- | syntax/syntax.go | 2 | ||||
-rw-r--r-- | testdata/assign.sky | 2 | ||||
-rw-r--r-- | testdata/dict.sky | 36 | ||||
-rw-r--r-- | testdata/function.sky | 11 | ||||
-rw-r--r-- | value.go | 35 |
13 files changed, 2690 insertions, 1040 deletions
@@ -7,14 +7,14 @@ package skylark import ( "bytes" "fmt" - "log" + "io" "math" - "math/big" "sort" "strings" "unicode" "unicode/utf8" + "github.com/google/skylark/internal/compile" "github.com/google/skylark/resolve" "github.com/google/skylark/syntax" ) @@ -105,14 +105,10 @@ func (d StringDict) Has(key string) bool { _, ok := d[key]; return ok } // A Frame holds the execution state of a single Skylark function call // or module toplevel. type Frame struct { - thread *Thread // thread-associated state - parent *Frame // caller's frame (or nil) - posn syntax.Position // source position of PC (set during call and error) - fn *Function // current function (nil at toplevel) - predeclared StringDict // names predeclared for this module - globals []Value // global variables of enclosing module - locals []Value // local variables, starting with parameters - result Value // operand of current function's return statement + parent *Frame // caller's frame (or nil) + posn syntax.Position // source position of PC, set during error + callpc uint32 // PC of position of active call, set during call + fn *Function // current function (or toplevel) } func (fr *Frame) errorf(posn syntax.Position, format string, args ...interface{}) *EvalError { @@ -122,7 +118,12 @@ func (fr *Frame) errorf(posn syntax.Position, format string, args ...interface{} } // Position returns the source position of the current point of execution in this frame. -func (fr *Frame) Position() syntax.Position { return fr.posn } +func (fr *Frame) Position() syntax.Position { + if fr.posn.IsValid() { + return fr.posn // leaf frame only (the error) + } + return fr.fn.funcode.Position(fr.callpc) // position of active call +} // Function returns the frame's function, or nil for the top-level of a module. func (fr *Frame) Function() *Function { return fr.fn } @@ -130,50 +131,6 @@ func (fr *Frame) Function() *Function { return fr.fn } // Parent returns the frame of the enclosing function call, if any. func (fr *Frame) Parent() *Frame { return fr.parent } -// set updates the environment binding for name to value. -func (fr *Frame) set(id *syntax.Ident, v Value) { - switch resolve.Scope(id.Scope) { - case resolve.Local: - fr.locals[id.Index] = v - case resolve.Global: - fr.globals[id.Index] = v - default: - log.Fatalf("%s: set(%s): neither global nor local (%d)", id.NamePos, id.Name, id.Scope) - } -} - -// lookup returns the value of name in the environment. -func (fr *Frame) lookup(id *syntax.Ident) (Value, error) { - switch resolve.Scope(id.Scope) { - case resolve.Local: - if v := fr.locals[id.Index]; v != nil { - return v, nil - } - case resolve.Free: - return fr.fn.freevars[id.Index], nil - case resolve.Global: - if v := fr.globals[id.Index]; v != nil { - return v, nil - } - case resolve.Predeclared: - if v := fr.predeclared[id.Name]; v != nil { - return v, nil - } - if id.Name == "PACKAGE_NAME" { - // Gross spec, gross hack. - // Users should just call package_name() function. - if v, ok := fr.predeclared["package_name"].(*Builtin); ok { - return v.fn(fr.thread, v, nil, nil) - } - } - return nil, fr.errorf(id.NamePos, "internal error: predeclared variable %s is uninitialized", id.Name) - case resolve.Universal: - return Universe[id.Name], nil - } - return nil, fr.errorf(id.NamePos, "%s variable %s referenced before assignment", - resolve.Scope(id.Scope), id.Name) -} - // An EvalError is a Skylark evaluation error and its associated call stack. type EvalError struct { Msg string @@ -198,16 +155,7 @@ func (fr *Frame) WriteBacktrace(out *bytes.Buffer) { print = func(fr *Frame) { if fr != nil { print(fr.parent) - - name := "<toplevel>" - if fr.fn != nil { - name = fr.fn.Name() - } - fmt.Fprintf(out, " %s:%d:%d: in %s\n", - fr.posn.Filename(), - fr.posn.Line, - fr.posn.Col, - name) + fmt.Fprintf(out, " %s: in %s\n", fr.Position(), fr.fn.Name()) } } print(fr) @@ -222,109 +170,113 @@ func (e *EvalError) Stack() []*Frame { return stack } +// A Program is a compiled Skylark program. +// +// Programs are immutable, and contain no Values. +// A Program may be created by parsing a source file (see SourceProgram) +// or by loading a previously saved compiled program (see CompiledProgram). +type Program struct { + compiled *compile.Program +} + +// CompilerVersion is the version number of the protocol for compiled +// files. Applications must not run programs compiled by one version +// with an interpreter at another version, and should thus incorporate +// the compiler version into the cache key when reusing compiled code. +const CompilerVersion = compile.Version + +// NumLoads returns the number of load statements in the compiled program. +func (prog *Program) NumLoads() int { return len(prog.compiled.Loads) } + +// Load(i) returns the name and position of the i'th module directly +// loaded by this one, where 0 <= i < NumLoads(). +// The name is unresolved---exactly as it appears in the source. +func (prog *Program) Load(i int) (string, syntax.Position) { + id := prog.compiled.Loads[i] + return id.Name, id.Pos +} + +// WriteTo writes the compiled module to the specified output stream. +func (prog *Program) Write(out io.Writer) error { return prog.compiled.Write(out) } + // ExecFile parses, resolves, and executes a Skylark file in the // specified global environment, which may be modified during execution. // -// The filename and src parameters are as for syntax.Parse. +// Thread is the state associated with the Skylark thread. +// +// The filename and src parameters are as for syntax.Parse: +// filename is the name of the file to execute, +// and the name that appears in error messages; +// src is an optional source of bytes to use +// instead of filename. +// +// predeclared defines the predeclared names specific to this module. +// Execution does not modify this dictionary, though it may mutate +// its values. // // If ExecFile fails during evaluation, it returns an *EvalError // containing a backtrace. func ExecFile(thread *Thread, filename string, src interface{}, predeclared StringDict) (StringDict, error) { - return Exec(ExecOptions{ - Thread: thread, - Filename: filename, - Source: src, - Predeclared: predeclared, - }) -} - -// ExecOptions specifies the arguments to Exec. -type ExecOptions struct { - // Thread is the state associated with the Skylark thread. - Thread *Thread - - // Filename is the name of the file to execute, - // and the name that appears in error messages. - Filename string - - // Source is an optional source of bytes to use - // instead of Filename. See syntax.Parse for details. - Source interface{} - - // Predeclared defines the predeclared names specific to this module. - // Execution does not modify this dictionary. - Predeclared StringDict + // Parse, resolve, and compile a Skylark source file. + _, mod, err := SourceProgram(filename, src, predeclared.Has) + if err != nil { + return nil, err + } - // BeforeExec is an optional function that is called after the - // syntax tree has been resolved but before execution. If it - // returns an error, execution is not attempted. - BeforeExec func(*Thread, syntax.Node) error + g, err := mod.Init(thread, predeclared) + g.Freeze() + return g, err } -// Exec is a variant of ExecFile that gives the client greater control -// over optional features. -func Exec(opts ExecOptions) (StringDict, error) { - if debug { - fmt.Printf("ExecFile %s\n", opts.Filename) - defer fmt.Printf("ExecFile %s done\n", opts.Filename) - } - f, err := syntax.Parse(opts.Filename, opts.Source, 0) +// SourceProgram produces a new program by parsing, resolving, +// and compiling a Skylark source file. +// On success, it returns the parsed file and the compiled program. +// The filename and src parameters are as for syntax.Parse. +// +// The isPredeclared predicate reports whether a name is +// a pre-declared identifier of the current module. +// Its typical value is predeclared.Has, +// where predeclared is a StringDict of pre-declared values. +func SourceProgram(filename string, src interface{}, isPredeclared func(string) bool) (*syntax.File, *Program, error) { + f, err := syntax.Parse(filename, src, 0) if err != nil { - return nil, err + return nil, nil, err } - predeclared := opts.Predeclared - if err := resolve.File(f, predeclared.Has, Universe.Has); err != nil { - return nil, err + if err := resolve.File(f, isPredeclared, Universe.Has); err != nil { + return f, nil, err } - thread := opts.Thread - - if opts.BeforeExec != nil { - if err := opts.BeforeExec(thread, f); err != nil { - return nil, err - } - } + compiled := compile.File(f.Stmts, f.Locals, f.Globals) - globals := make([]Value, len(f.Globals)) - fr := thread.Push(predeclared, globals, len(f.Locals)) - err = fr.ExecStmts(f.Stmts) - thread.Pop() + return f, &Program{compiled}, nil +} - // Convert the global environment to a map, and freeze it. - // We return a (partial) map even in case of error. - globalsMap := make(StringDict, len(f.Globals)) - for i, id := range f.Globals { - if v := globals[i]; v != nil { - v.Freeze() - globalsMap[id.Name] = v - } +// CompiledProgram produces a new program from the representation +// of a compiled program previously saved by Program.Write. +func CompiledProgram(in io.Reader) (*Program, error) { + prog, err := compile.ReadProgram(in) + if err != nil { + return nil, err } - - return globalsMap, err + return &Program{prog}, nil } -// Push pushes a new Frame on the specified thread's stack, and returns it. -// It must be followed by a call to Pop when the frame is no longer needed. -// -// Most clients do not need this low-level function; use ExecFile or Eval instead. -func (thread *Thread) Push(predeclared StringDict, globals []Value, nlocals int) *Frame { - fr := &Frame{ - thread: thread, - parent: thread.frame, +// Init creates a set of global variables for the program, +// executes the toplevel code of the specified program, +// and returns a new, unfrozen dictionary of the globals. +func (prog *Program) Init(thread *Thread, predeclared StringDict) (StringDict, error) { + toplevel := &Function{ + funcode: prog.compiled.Toplevel, predeclared: predeclared, - globals: globals, - locals: make([]Value, nlocals), + globals: make([]Value, len(prog.compiled.Globals)), } - thread.frame = fr - return fr -} -// Pop removes the topmost frame from the thread's stack. -// -// Most clients do not need this low-level function; use ExecFile or Eval instead. -func (thread *Thread) Pop() { - thread.frame = thread.frame.parent + _, err := toplevel.Call(thread, nil, nil) + + // Convert the global environment to a map and freeze it. + // We return a (partial) map even in case of error. + return toplevel.Globals(), err } // Eval parses, resolves, and evaluates an expression within the @@ -348,229 +300,15 @@ func Eval(thread *Thread, filename string, src interface{}, env StringDict) (Val return nil, err } - // There are no globals. - fr := thread.Push(env, nil, len(locals)) - v, err := eval(fr, expr) - thread.Pop() - return v, err -} - -// Sentinel values used for control flow. Internal use only. -var ( - errContinue = fmt.Errorf("continue") - errBreak = fmt.Errorf("break") - errReturn = fmt.Errorf("return") -) - -// ExecStmts executes the statements in the context of the specified -// frame, which must provide sufficient local slots. -// -// Most clients do not need this function; use Exec or Eval instead. -func (fr *Frame) ExecStmts(stmts []syntax.Stmt) error { - for _, stmt := range stmts { - if err := exec(fr, stmt); err != nil { - return err - } + f := &Function{ + funcode: compile.Expr(expr, locals), + predeclared: env, + globals: nil, } - return nil + return f.Call(thread, nil, nil) } -func exec(fr *Frame, stmt syntax.Stmt) error { - switch stmt := stmt.(type) { - case *syntax.ExprStmt: - _, err := eval(fr, stmt.X) - return err - - case *syntax.BranchStmt: - switch stmt.Token { - case syntax.PASS: - return nil // no-op - case syntax.BREAK: - return errBreak - case syntax.CONTINUE: - return errContinue - } - - case *syntax.IfStmt: - cond, err := eval(fr, stmt.Cond) - if err != nil { - return err - } - if cond.Truth() { - return fr.ExecStmts(stmt.True) - } else { - return fr.ExecStmts(stmt.False) - } - - case *syntax.AssignStmt: - switch stmt.Op { - case syntax.EQ: - // simple assignment: x = y - y, err := eval(fr, stmt.RHS) - if err != nil { - return err - } - return assign(fr, stmt.OpPos, stmt.LHS, y) - - case syntax.PLUS_EQ, - syntax.MINUS_EQ, - syntax.STAR_EQ, - syntax.SLASH_EQ, - syntax.SLASHSLASH_EQ, - syntax.PERCENT_EQ: - // augmented assignment: x += y - - var old Value // old value loaded from "address" x - var set func(fr *Frame, new Value) error - - // Evaluate "address" of x exactly once to avoid duplicate side-effects. - switch lhs := stmt.LHS.(type) { - case *syntax.Ident: - // x += ... - x, err := fr.lookup(lhs) - if err != nil { - return err - } - old = x - set = func(fr *Frame, new Value) error { - fr.set(lhs, new) - return nil - } - - case *syntax.IndexExpr: - // x[y] += ... - x, err := eval(fr, lhs.X) - if err != nil { - return err - } - y, err := eval(fr, lhs.Y) - if err != nil { - return err - } - old, err = getIndex(fr, lhs.Lbrack, x, y) - if err != nil { - return err - } - set = func(fr *Frame, new Value) error { - return setIndex(fr, lhs.Lbrack, x, y, new) - } - - case *syntax.DotExpr: - // x.f += ... - x, err := eval(fr, lhs.X) - if err != nil { - return err - } - old, err = getAttr(fr, x, lhs) - if err != nil { - return err - } - set = func(fr *Frame, new Value) error { - return setField(fr, x, lhs, new) - } - } - - y, err := eval(fr, stmt.RHS) - if err != nil { - return err - } - - // Special case, following Python: - // If x is a list, x += y is sugar for x.extend(y). - if xlist, ok := old.(*List); ok && stmt.Op == syntax.PLUS_EQ { - // It's possible that y is not Iterable but - // nonetheless defines x+y, in which case we - // should fall back to the general case. - if yiter, ok := y.(Iterable); ok { - if err := xlist.checkMutable("apply += to", true); err != nil { - return fr.errorf(stmt.OpPos, "%v", err) - } - listExtend(xlist, yiter) - return nil - } - } - - new, err := Binary(stmt.Op-syntax.PLUS_EQ+syntax.PLUS, old, y) - if err != nil { - return fr.errorf(stmt.OpPos, "%v", err) - } - return set(fr, new) - - default: - log.Fatalf("%s: unexpected assignment operator: %s", stmt.OpPos, stmt.Op) - } - - case *syntax.DefStmt: - f, err := evalFunction(fr, stmt.Def, stmt.Name.Name, &stmt.Function) - if err != nil { - return err - } - fr.set(stmt.Name, f) - return nil - - case *syntax.ForStmt: - x, err := eval(fr, stmt.X) - if err != nil { - return err - } - iter := Iterate(x) - if iter == nil { - return fr.errorf(stmt.For, "%s value is not iterable", x.Type()) - } - defer iter.Done() - var elem Value - for iter.Next(&elem) { - if err := assign(fr, stmt.For, stmt.Vars, elem); err != nil { - return err - } - if err := fr.ExecStmts(stmt.Body); err != nil { - if err == errBreak { - break - } else if err == errContinue { - continue - } else { - return err - } - } - } - return nil - - case *syntax.ReturnStmt: - if stmt.Result != nil { - x, err := eval(fr, stmt.Result) - if err != nil { - return err - } - fr.result = x - } else { - fr.result = None - } - return errReturn - - case *syntax.LoadStmt: - module := stmt.ModuleName() - if fr.thread.Load == nil { - return fr.errorf(stmt.Load, "load not implemented by this application") - } - fr.posn = stmt.Load - dict, err := fr.thread.Load(fr.thread, module) - if err != nil { - return fr.errorf(stmt.Load, "cannot load %s: %v", module, err) - } - for i, from := range stmt.From { - v, ok := dict[from.Name] - if !ok { - return fr.errorf(stmt.From[i].NamePos, "load: name %s not found in module %s", from.Name, module) - } - fr.set(stmt.To[i], v) - } - return nil - } - - start, _ := stmt.Span() - log.Fatalf("%s: exec: unexpected statement %T", start, stmt) - panic("unreachable") -} +// The following functions are primitive operations of the byte code interpreter. // list += iterable func listExtend(x *List, y Iterable) { @@ -588,38 +326,36 @@ func listExtend(x *List, y Iterable) { } // getAttr implements x.dot. -func getAttr(fr *Frame, x Value, dot *syntax.DotExpr) (Value, error) { - name := dot.Name.Name - +func getAttr(fr *Frame, x Value, name string) (Value, error) { // field or method? if x, ok := x.(HasAttrs); ok { if v, err := x.Attr(name); v != nil || err != nil { - return v, wrapError(fr, dot.Dot, err) + return v, err } } - return nil, fr.errorf(dot.Dot, "%s has no .%s field or method", x.Type(), name) + return nil, fmt.Errorf("%s has no .%s field or method", x.Type(), name) } // setField implements x.name = y. -func setField(fr *Frame, x Value, dot *syntax.DotExpr, y Value) error { +func setField(fr *Frame, x Value, name string, y Value) error { if x, ok := x.(HasSetField); ok { - err := x.SetField(dot.Name.Name, y) - return wrapError(fr, dot.Dot, err) + err := x.SetField(name, y) + return err } - return fr.errorf(dot.Dot, "can't assign to .%s field of %s", dot.Name.Name, x.Type()) + return fmt.Errorf("can't assign to .%s field of %s", name, x.Type()) } // getIndex implements x[y]. -func getIndex(fr *Frame, lbrack syntax.Position, x, y Value) (Value, error) { +func getIndex(fr *Frame, x, y Value) (Value, error) { switch x := x.(type) { case Mapping: // dict z, found, err := x.Get(y) if err != nil { - return nil, fr.errorf(lbrack, "%v", err) + return nil, err } if !found { - return nil, fr.errorf(lbrack, "key %v not in %s", y, x.Type()) + return nil, fmt.Errorf("key %v not in %s", y, x.Type()) } return z, nil @@ -627,299 +363,47 @@ func getIndex(fr *Frame, lbrack syntax.Position, x, y Value) (Value, error) { n := x.Len() i, err := AsInt32(y) if err != nil { - return nil, fr.errorf(lbrack, "%s index: %s", x.Type(), err) + return nil, fmt.Errorf("%s index: %s", x.Type(), err) } if i < 0 { i += n } if i < 0 || i >= n { - return nil, fr.errorf(lbrack, "%s index %d out of range [0:%d]", + return nil, fmt.Errorf("%s index %d out of range [0:%d]", x.Type(), i, n) } return x.Index(i), nil } - return nil, fr.errorf(lbrack, "unhandled index operation %s[%s]", x.Type(), y.Type()) + return nil, fmt.Errorf("unhandled index operation %s[%s]", x.Type(), y.Type()) } // setIndex implements x[y] = z. -func setIndex(fr *Frame, lbrack syntax.Position, x, y, z Value) error { +func setIndex(fr *Frame, x, y, z Value) error { switch x := x.(type) { case *Dict: if err := x.Set(y, z); err != nil { - return fr.errorf(lbrack, "%v", err) + return err } case HasSetIndex: i, err := AsInt32(y) if err != nil { - return wrapError(fr, lbrack, err) + return err } if i < 0 { i += x.Len() } if i < 0 || i >= x.Len() { - return fr.errorf(lbrack, "%s index %d out of range [0:%d]", x.Type(), i, x.Len()) + return fmt.Errorf("%s index %d out of range [0:%d]", x.Type(), i, x.Len()) } - return wrapError(fr, lbrack, x.SetIndex(i, z)) + return x.SetIndex(i, z) default: - return fr.errorf(lbrack, "%s value does not support item assignment", x.Type()) + return fmt.Errorf("%s value does not support item assignment", x.Type()) } return nil } -// assign implements lhs = rhs for arbitrary expressions lhs. -func assign(fr *Frame, pos syntax.Position, lhs syntax.Expr, rhs Value) error { - switch lhs := lhs.(type) { - case *syntax.Ident: - // x = rhs - fr.set(lhs, rhs) - - case *syntax.TupleExpr: - // (x, y) = rhs - return assignSequence(fr, pos, lhs.List, rhs) - - case *syntax.ListExpr: - // [x, y] = rhs - return assignSequence(fr, pos, lhs.List, rhs) - - case *syntax.IndexExpr: - // x[y] = rhs - x, err := eval(fr, lhs.X) - if err != nil { - return err - } - y, err := eval(fr, lhs.Y) - if err != nil { - return err - } - return setIndex(fr, lhs.Lbrack, x, y, rhs) - - case *syntax.DotExpr: - // x.f = rhs - x, err := eval(fr, lhs.X) - if err != nil { - return err - } - return setField(fr, x, lhs, rhs) - - case *syntax.ParenExpr: - return assign(fr, pos, lhs.X, rhs) - - default: - return fr.errorf(pos, "ill-formed assignment: %T", lhs) - } - return nil -} - -func assignSequence(fr *Frame, pos syntax.Position, lhs []syntax.Expr, rhs Value) error { - nlhs := len(lhs) - n := Len(rhs) - if n < 0 { - return fr.errorf(pos, "got %s in sequence assignment", rhs.Type()) - } else if n > nlhs { - return fr.errorf(pos, "too many values to unpack (got %d, want %d)", n, nlhs) - } else if n < nlhs { - return fr.errorf(pos, "too few values to unpack (got %d, want %d)", n, nlhs) - } - - // If the rhs is not indexable, extract its elements into a - // temporary tuple before doing the assignment. - ix, ok := rhs.(Indexable) - if !ok { - tuple := make(Tuple, n) - iter := Iterate(rhs) - if iter == nil { - return fr.errorf(pos, "non-iterable sequence: %s", rhs.Type()) - } - for i := 0; i < n; i++ { - iter.Next(&tuple[i]) - } - iter.Done() - ix = tuple - } - - for i := 0; i < n; i++ { - if err := assign(fr, pos, lhs[i], ix.Index(i)); err != nil { - return err - } - } - return nil -} - -func eval(fr *Frame, e syntax.Expr) (Value, error) { - switch e := e.(type) { - case *syntax.Ident: - return fr.lookup(e) - - case *syntax.Literal: - switch e.Token { - case syntax.INT: - switch e.Value.(type) { - case int64: - return MakeInt64(e.Value.(int64)), nil - case *big.Int: - return Int{e.Value.(*big.Int)}, nil - } - case syntax.FLOAT: - return Float(e.Value.(float64)), nil - case syntax.STRING: - return String(e.Value.(string)), nil - } - - case *syntax.ListExpr: - vals := make([]Value, len(e.List)) - for i, x := range e.List { - v, err := eval(fr, x) - if err != nil { - return nil, err - } - vals[i] = v - } - return NewList(vals), nil - - case *syntax.CondExpr: - cond, err := eval(fr, e.Cond) - if err != nil { - return nil, err - } - if cond.Truth() { - return eval(fr, e.True) - } else { - return eval(fr, e.False) - } - - case *syntax.IndexExpr: - x, err := eval(fr, e.X) - if err != nil { - return nil, err - } - y, err := eval(fr, e.Y) - if err != nil { - return nil, err - } - return getIndex(fr, e.Lbrack, x, y) - - case *syntax.SliceExpr: - return evalSliceExpr(fr, e) - - case *syntax.Comprehension: - var result Value - if e.Curly { - result = new(Dict) - } else { - result = new(List) - } - return result, evalComprehension(fr, e, result, 0) - - case *syntax.TupleExpr: - n := len(e.List) - tuple := make(Tuple, n) - for i, x := range e.List { - v, err := eval(fr, x) - if err != nil { - return nil, err - } - tuple[i] = v - } - return tuple, nil - - case *syntax.DictExpr: - dict := new(Dict) - for i, entry := range e.List { - entry := entry.(*syntax.DictEntry) - k, err := eval(fr, entry.Key) - if err != nil { - return nil, err - } - v, err := eval(fr, entry.Value) - if err != nil { - return nil, err - } - if err := dict.Set(k, v); err != nil { - return nil, fr.errorf(e.Lbrace, "%v", err) - } - if dict.Len() != i+1 { - return nil, fr.errorf(e.Lbrace, "duplicate key: %v", k) - } - } - return dict, nil - - case *syntax.UnaryExpr: - x, err := eval(fr, e.X) - if err != nil { - return nil, err - } - y, err := Unary(e.Op, x) - if err != nil { - return nil, fr.errorf(e.OpPos, "%s", err) - } - return y, nil - - case *syntax.BinaryExpr: - x, err := eval(fr, e.X) - if err != nil { - return nil, err - } - - // short-circuit operators - switch e.Op { - case syntax.OR: - if x.Truth() { - return x, nil - } - return eval(fr, e.Y) - case syntax.AND: - if !x.Truth() { - return x, nil - } - return eval(fr, e.Y) - } - - y, err := eval(fr, e.Y) - if err != nil { - return nil, err - } - - // comparisons - switch e.Op { - case syntax.EQL, syntax.NEQ, syntax.GT, syntax.LT, syntax.LE, syntax.GE: - ok, err := Compare(e.Op, x, y) - if err != nil { - return nil, fr.errorf(e.OpPos, "%s", err) - } - return Bool(ok), nil - } - - // binary operators - z, err := Binary(e.Op, x, y) - if err != nil { - return nil, fr.errorf(e.OpPos, "%s", err) - } - return z, nil - - case *syntax.DotExpr: - x, err := eval(fr, e.X) - if err != nil { - return nil, err - } - return getAttr(fr, x, e) - - case *syntax.CallExpr: - return evalCall(fr, e) - - case *syntax.LambdaExpr: - return evalFunction(fr, e.Lambda, "lambda", &e.Function) - - case *syntax.ParenExpr: - return eval(fr, e.X) - } - - start, _ := e.Span() - log.Fatalf("%s: unexpected expr %T", start, e) - panic("unreachable") -} - // Unary applies a unary operator (+, -, not) to its operand. func Unary(op syntax.Token, x Value) (Value, error) { switch op { @@ -1283,139 +767,6 @@ func repeat(elems []Value, n int) (res []Value) { return res } -func evalCall(fr *Frame, call *syntax.CallExpr) (Value, error) { - var fn Value - - // Use optimized path for calling methods of built-ins: x.f(...) - if dot, ok := call.Fn.(*syntax.DotExpr); ok { - recv, err := eval(fr, dot.X) - if err != nil { - return nil, err - } - - name := dot.Name.Name - if method := builtinMethodOf(recv, name); method != nil { - args, kwargs, err := evalArgs(fr, call) - if err != nil { - return nil, err - } - - // Make the call. - res, err := method(name, recv, args, kwargs) - return res, wrapError(fr, call.Lparen, err) - } - - // Fall back to usual path. - fn, err = getAttr(fr, recv, dot) - if err != nil { - return nil, err - } - } else { - var err error - fn, err = eval(fr, call.Fn) - if err != nil { - return nil, err - } - } - - args, kwargs, err := evalArgs(fr, call) - if err != nil { - return nil, err - } - - // Make the call. - fr.posn = call.Lparen - res, err := Call(fr.thread, fn, args, kwargs) - return res, wrapError(fr, call.Lparen, err) -} - -// wrapError wraps the error in a skylark.EvalError only if needed. -func wrapError(fr *Frame, posn syntax.Position, err error) error { - switch err := err.(type) { - case nil, *EvalError: - return err - } - return fr.errorf(posn, "%s", err.Error()) -} - -func evalArgs(fr *Frame, call *syntax.CallExpr) (args Tuple, kwargs []Tuple, err error) { - // evaluate arguments. - var kwargsAlloc Tuple // allocate a single backing array - for i, arg := range call.Args { - // keyword argument, k=v - if binop, ok := arg.(*syntax.BinaryExpr); ok && binop.Op == syntax.EQ { - k := binop.X.(*syntax.Ident).Name - v, err := eval(fr, binop.Y) - if err != nil { - return nil, nil, err - } - if kwargs == nil { - nkwargs := len(call.Args) - i // more than enough - kwargsAlloc = make(Tuple, 2*nkwargs) - kwargs = make([]Tuple, 0, nkwargs) - } - pair := kwargsAlloc[:2:2] - kwargsAlloc = kwargsAlloc[2:] - pair[0], pair[1] = String(k), v - kwargs = append(kwargs, pair) - continue - } - - // *args and **kwargs arguments - if unop, ok := arg.(*syntax.UnaryExpr); ok { - if unop.Op == syntax.STAR { - // *args - x, err := eval(fr, unop.X) - if err != nil { - return nil, nil, err - } - iter := Iterate(x) - if iter == nil { - return nil, nil, fr.errorf(unop.OpPos, "argument after * must be iterable, not %s", x.Type()) - } - defer iter.Done() - var elem Value - for iter.Next(&elem) { - args = append(args, elem) - } - continue - } - - if unop.Op == syntax.STARSTAR { - // **kwargs - x, err := eval(fr, unop.X) - if err != nil { - return nil, nil, err - } - xdict, ok := x.(*Dict) - if !ok { - return nil, nil, fr.errorf(unop.OpPos, "argument after ** must be a mapping, not %s", x.Type()) - } - items := xdict.Items() - for _, item := range items { - if _, ok := item[0].(String); !ok { - return nil, nil, fr.errorf(unop.OpPos, "keywords must be strings, not %s", item[0].Type()) - } - } - if kwargs == nil { - kwargs = items - } else { - kwargs = append(kwargs, items...) - } - continue - } - } - - // ordinary argument - v, err := eval(fr, arg) - if err != nil { - return nil, nil, err - } - args = append(args, v) - } - return args, kwargs, err -} - // Call calls the function fn with the specified positional and keyword arguments. func Call(thread *Thread, fn Value, args Tuple, kwargs []Tuple) (Value, error) { c, ok := fn.(Callable) @@ -1430,41 +781,6 @@ func Call(thread *Thread, fn Value, args Tuple, kwargs []Tuple) (Value, error) { return res, err } -func evalSliceExpr(fr *Frame, e *syntax.SliceExpr) (Value, error) { - // Unlike Python, Skylark does not allow a slice on the LHS of - // an assignment statement. - - x, err := eval(fr, e.X) - if err != nil { - return nil, err - } - - var lo, hi, step Value = None, None, None - if e.Lo != nil { - lo, err = eval(fr, e.Lo) - if err != nil { - return nil, err - } - } - if e.Hi != nil { - hi, err = eval(fr, e.Hi) - if err != nil { - return nil, err - } - } - if e.Step != nil { - step, err = eval(fr, e.Step) - if err != nil { - return nil, err - } - } - res, err := slice(x, lo, hi, step) - if err != nil { - return nil, fr.errorf(e.Lbrack, "%s", err) - } - return res, nil -} - func slice(x, lo, hi, step_ Value) (Value, error) { n := Len(x) if n < 0 { @@ -1615,150 +931,9 @@ func asIndex(v Value, len int, result *int) error { return nil } -func evalComprehension(fr *Frame, comp *syntax.Comprehension, result Value, clauseIndex int) error { - if clauseIndex == len(comp.Clauses) { - if comp.Curly { - // dict: {k:v for ...} - // Parser ensures that body is of form k:v. - // Python-style set comprehensions {body for vars in x} - // are not supported. - entry := comp.Body.(*syntax.DictEntry) - k, err := eval(fr, entry.Key) - if err != nil { - return err - } - v, err := eval(fr, entry.Value) - if err != nil { - return err - } - if err := result.(*Dict).Set(k, v); err != nil { - return fr.errorf(entry.Colon, "%v", err) - } - } else { - // list: [body for vars in x] - x, err := eval(fr, comp.Body) - if err != nil { - return err - } - list := result.(*List) - list.elems = append(list.elems, x) - } - return nil - } - - clause := comp.Clauses[clauseIndex] - switch clause := clause.(type) { - case *syntax.IfClause: - cond, err := eval(fr, clause.Cond) - if err != nil { - return err - } - if cond.Truth() { - return evalComprehension(fr, comp, result, clauseIndex+1) - } - return nil - - case *syntax.ForClause: - x, err := eval(fr, clause.X) - if err != nil { - return err - } - iter := Iterate(x) - if iter == nil { - return fr.errorf(clause.For, "%s value is not iterable", x.Type()) - } - defer iter.Done() - var elem Value - for iter.Next(&elem) { - if err := assign(fr, clause.For, clause.Vars, elem); err != nil { - return err - } - - if err := evalComprehension(fr, comp, result, clauseIndex+1); err != nil { - return err - } - } - return nil - } - - start, _ := clause.Span() - log.Fatalf("%s: unexpected comprehension clause %T", start, clause) - panic("unreachable") -} - -func evalFunction(fr *Frame, pos syntax.Position, name string, function *syntax.Function) (Value, error) { - // Example: f(x, y=dflt, *args, **kwargs) - - // Evaluate parameter defaults. - var defaults Tuple // parameter default values - for _, param := range function.Params { - if binary, ok := param.(*syntax.BinaryExpr); ok { - // e.g. y=dflt - dflt, err := eval(fr, binary.Y) - if err != nil { - return nil, err - } - defaults = append(defaults, dflt) - } - } - - // Capture the values of the function's - // free variables from the lexical environment. - freevars := make([]Value, len(function.FreeVars)) - for i, freevar := range function.FreeVars { - v, err := fr.lookup(freevar) - if err != nil { - return nil, fr.errorf(pos, "%s", err) - } - freevars[i] = v - } - - return &Function{ - name: name, - position: pos, - syntax: function, - predeclared: fr.predeclared, - globals: fr.globals, - defaults: defaults, - freevars: freevars, - }, nil -} - -func (fn *Function) Call(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) { - if debug { - fmt.Printf("call of %s %v %v\n", fn.Name(), args, kwargs) - } - - // detect recursion - for fr := thread.frame; fr != nil; fr = fr.parent { - // We look for the same syntactic function, - // not function value, otherwise the user could - // defeat the check by writing the Y combinator. - if fr.fn != nil && fr.fn.syntax == fn.syntax { - return nil, fmt.Errorf("function %s called recursively", fn.Name()) - } - } - - fr := thread.Push(fn.predeclared, fn.globals, len(fn.syntax.Locals)) - fr.fn = fn - err := fn.setArgs(fr, args, kwargs) - if err == nil { - err = fr.ExecStmts(fn.syntax.Body) - } - thread.Pop() - - if err != nil { - if err == errReturn { - return fr.result, nil - } - return nil, err - } - return None, nil -} - // setArgs sets the values of the formal parameters of function fn in -// frame fr based on the actual parameter values in args and kwargs. -func (fn *Function) setArgs(fr *Frame, args Tuple, kwargs []Tuple) error { +// based on the actual parameter values in args and kwargs. +func setArgs(locals []Value, fn *Function, args Tuple, kwargs []Tuple) error { cond := func(x bool, y, z interface{}) interface{} { if x { return y @@ -1767,27 +942,27 @@ func (fn *Function) setArgs(fr *Frame, args Tuple, kwargs []Tuple) error { } // nparams is the number of ordinary parameters (sans * or **). - nparams := len(fn.syntax.Params) - if fn.syntax.HasVarargs { + nparams := fn.NumParams() + if fn.HasVarargs() { nparams-- } - if fn.syntax.HasKwargs { + if fn.HasKwargs() { nparams-- } // This is the algorithm from PyEval_EvalCodeEx. var kwdict *Dict n := len(args) - if nparams > 0 || fn.syntax.HasVarargs || fn.syntax.HasKwargs { - if fn.syntax.HasKwargs { + if nparams > 0 || fn.HasVarargs() || fn.HasKwargs() { + if fn.HasKwargs() { kwdict = new(Dict) - fr.locals[len(fn.syntax.Params)-1] = kwdict + locals[fn.NumParams()-1] = kwdict } // too many args? if len(args) > nparams { - if !fn.syntax.HasVarargs { - return fr.errorf(fn.position, "function %s takes %s %d argument%s (%d given)", + if !fn.HasVarargs() { + return fmt.Errorf("function %s takes %s %d argument%s (%d given)", fn.Name(), cond(len(fn.defaults) > 0, "at most", "exactly"), nparams, @@ -1803,32 +978,32 @@ func (fn *Function) setArgs(fr *Frame, args Tuple, kwargs []Tuple) error { // ordinary parameters for i := 0; i < n; i++ { - fr.locals[i] = args[i] + locals[i] = args[i] defined.set(i) } // variadic arguments - if fn.syntax.HasVarargs { + if fn.HasVarargs() { tuple := make(Tuple, len(args)-n) for i := n; i < len(args); i++ { tuple[i-n] = args[i] } - fr.locals[nparams] = tuple + locals[nparams] = tuple } // keyword arguments - paramIdents := fn.syntax.Locals[:nparams] + paramIdents := fn.funcode.Locals[:nparams] for _, pair := range kwargs { k, v := pair[0].(String), pair[1] if i := findParam(paramIdents, string(k)); i >= 0 { if defined.set(i) { - return fr.errorf(fn.position, "function %s got multiple values for keyword argument %s", fn.Name(), k) + return fmt.Errorf("function %s got multiple values for keyword argument %s", fn.Name(), k) } - fr.locals[i] = v + locals[i] = v continue } if kwdict == nil { - return fr.errorf(fn.position, "function %s got an unexpected keyword argument %s", fn.Name(), k) + return fmt.Errorf("function %s got an unexpected keyword argument %s", fn.Name(), k) } kwdict.Set(k, v) } @@ -1841,9 +1016,9 @@ func (fn *Function) setArgs(fr *Frame, args Tuple, kwargs []Tuple) error { i := len(args) for ; i < m; i++ { if !defined.get(i) { - return fr.errorf(fn.position, "function %s takes %s %d argument%s (%d given)", + return fmt.Errorf("function %s takes %s %d argument%s (%d given)", fn.Name(), - cond(fn.syntax.HasVarargs || len(fn.defaults) > 0, "at least", "exactly"), + cond(fn.HasVarargs() || len(fn.defaults) > 0, "at least", "exactly"), m, cond(m == 1, "", "s"), defined.len()) @@ -1853,17 +1028,17 @@ func (fn *Function) setArgs(fr *Frame, args Tuple, kwargs []Tuple) error { // set default values for ; i < nparams; i++ { if !defined.get(i) { - fr.locals[i] = fn.defaults[i-m] + locals[i] = fn.defaults[i-m] } } } } else if nactual := len(args) + len(kwargs); nactual > 0 { - return fr.errorf(fn.position, "function %s takes no arguments (%d given)", fn.Name(), nactual) + return fmt.Errorf("function %s takes no arguments (%d given)", fn.Name(), nactual) } return nil } -func findParam(params []*syntax.Ident, name string) int { +func findParam(params []compile.Ident, name string) int { for i, param := range params { if param.Name == name { return i diff --git a/eval_test.go b/eval_test.go index 359739b..500c77d 100644 --- a/eval_test.go +++ b/eval_test.go @@ -321,18 +321,15 @@ f() buf := new(bytes.Buffer) print := func(thread *skylark.Thread, msg string) { caller := thread.Caller() - name := "<module>" - if caller.Function() != nil { - name = caller.Function().Name() - } - fmt.Fprintf(buf, "%s: %s: %s\n", caller.Position(), name, msg) + fmt.Fprintf(buf, "%s: %s: %s\n", + caller.Position(), caller.Function().Name(), msg) } thread := &skylark.Thread{Print: print} if _, err := skylark.ExecFile(thread, "foo.go", src, nil); err != nil { t.Fatal(err) } - want := "foo.go:2:6: <module>: hello\n" + - "foo.go:3:15: f: world\n" + want := "foo.go:2: <toplevel>: hello\n" + + "foo.go:3: f: world\n" if got := buf.String(); got != want { t.Errorf("output was %s, want %s", got, want) } @@ -427,12 +424,13 @@ i() switch err := err.(type) { case *skylark.EvalError: got := err.Backtrace() + // Compiled code currently has no column information. const want = `Traceback (most recent call last): - crash.go:6:2: in <toplevel> - crash.go:5:18: in i - crash.go:4:20: in h - crash.go:3:12: in g - crash.go:2:19: in f + crash.go:6: in <toplevel> + crash.go:5: in i + crash.go:4: in h + crash.go:3: in g + crash.go:2: in f Error: floored division by zero` if got != want { t.Errorf("error was %s, want %s", got, want) @@ -447,23 +445,12 @@ Error: floored division by zero` // TestRepeatedExec parses and resolves a file syntax tree once then // executes it repeatedly with different values of its predeclared variables. func TestRepeatedExec(t *testing.T) { - f, err := syntax.Parse("repeat.sky", "y = 2 * x", 0) + predeclared := skylark.StringDict{"x": skylark.None} + _, prog, err := skylark.SourceProgram("repeat.sky", "y = 2 * x", predeclared.Has) if err != nil { - t.Fatal(f) // parse error - } - - isPredeclared := func(name string) bool { return name == "x" } // x, but not y - - if err := resolve.File(f, isPredeclared, skylark.Universe.Has); err != nil { - t.Fatal(err) // resolve error - } - - const yIndex = 0 - if yName := f.Globals[yIndex].Name; yName != "y" { - t.Fatalf("global[%d] = %s, want y", yIndex, yName) + t.Fatal(err) } - thread := new(skylark.Thread) for _, test := range []struct { x, want skylark.Value }{ @@ -471,17 +458,15 @@ func TestRepeatedExec(t *testing.T) { {x: skylark.String("mur"), want: skylark.String("murmur")}, {x: skylark.Tuple{skylark.None}, want: skylark.Tuple{skylark.None, skylark.None}}, } { - predeclared := skylark.StringDict{"x": test.x} - globals := make([]skylark.Value, len(f.Globals)) - fr := thread.Push(predeclared, globals, len(f.Locals)) - if err := fr.ExecStmts(f.Stmts); err != nil { + predeclared["x"] = test.x // update the values in dictionary + thread := new(skylark.Thread) + if globals, err := prog.Init(thread, predeclared); err != nil { t.Errorf("x=%v: %v", test.x, err) // exec error - } else if eq, err := skylark.Equal(globals[yIndex], test.want); err != nil { + } else if eq, err := skylark.Equal(globals["y"], test.want); err != nil { t.Errorf("x=%v: %v", test.x, err) // comparison error } else if !eq { - t.Errorf("x=%v: got y=%v, want %v", test.x, globals[yIndex], test.want) + t.Errorf("x=%v: got y=%v, want %v", test.x, globals["y"], test.want) } - thread.Pop() } } diff --git a/internal/compile/compile.go b/internal/compile/compile.go new file mode 100644 index 0000000..fd00e62 --- /dev/null +++ b/internal/compile/compile.go @@ -0,0 +1,1599 @@ +// The compile package defines the Skylark bytecode compiler. +// It is an internal package of the Skylark interpreter and is not directly accessible to clients. +// +// The compiler generates byte code with optional uint32 operands for a +// virtual machine with the following components: +// - a program counter, which is an index into the byte code array. +// - an operand stack, whose maximum size is computed for each function by the compiler. +// - an stack of active iterators. +// - an array of local variables. +// The number of local variables and their indices are computed by the resolver. +// - an array of free variables, for nested functions. +// As with locals, these are computed by the resolver. +// - an array of global variables, shared among all functions in the same module. +// All elements are initially nil. +// - two maps of predeclared and universal identifiers. +// +// A line number table maps each program counter value to a source position; +// these source positions do not currently record column information. +// +// Operands, logically uint32s, are encoded using little-endian 7-bit +// varints, the top bit indicating that more bytes follow. +// +package compile + +// TODO(adonovan): +// Optimizations: +// - add dynamic checks for missing opcodes in the various tables. +// +// - optimize the + operator, which is often applied to many strings or lists +// in sequence, leading to quadratic behavior: +// 1) simplify a left tree of plusses ((a+b)+c) into a sequence [a, +b, +c]. +// We need to keep each + that could fail, to report the correct position. +// 2) in the sequence, combine adjacent list/tuple/string literals: +// [a...]+[b...] -> [a... b...] +// (a...)+(b...) -> (a... b...) +// "a..."+"b..." -> "a... b..." +// 3) in the sequence, implement a + op of a literal and following +// non-literals by a buffer: +// e.g. "a..." + x + y +// => s=stringbuf(); s.append("a..."); s.append(x); s.append(y); s.string() +// e.g. [a...] + x + y +// => z=list(); z.append(each of a...); z.extend(x); z.extend(y); z +// The apparent position of each append op is that of the '+' token. + +import ( + "bytes" + "fmt" + "log" + "math/big" + "os" + "path/filepath" + + "github.com/google/skylark/resolve" + "github.com/google/skylark/syntax" +) + +const debug = false // TODO(adonovan): use a bitmap of options; and regexp to match files + +// Increment this to force recompilation of saved bytecode files. +const Version = 1 + +type Opcode uint8 + +// "x DUP x x" is a "stack picture" that describes the state of the +// stack before and after execution of the instruction. +// +// OP<index> indicates an immediate operand that is an index into the +// specified table: locals, names, freevars, constants. +const ( + NOP Opcode = iota // - NOP - + + // stack operations + DUP // x DUP x x + DUP2 // x y DUP2 x y x y + POP // x POP - + EXCH // x y EXCH y x + + // binary comparisons + // (order must match Token) + LT + GT + GE + LE + EQL + NEQ + + // binary arithmetic + // (order must match Token) + PLUS + MINUS + STAR + SLASH + SLASHSLASH + PERCENT + AMP + PIPE + + IN + + // unary operators + UPLUS // x UPLUS x + UMINUS // x UMINUS -x + + NONE // - NONE None + TRUE // - TRUE True + FALSE // - FALSE False + + ITERPUSH // iterable ITERPUSH - [pushes the iterator stack] + ITERPOP // - ITERPOP - [pops the iterator stack] + NOT // value NOT bool + RETURN // value RETURN - + SETINDEX // a i new SETINDEX - + INDEX // a i INDEX elem + SETDICT // dict key value SETDICT - + SETDICTUNIQ // dict key value SETDICTUNIQ - + APPEND // list elem APPEND - + SLICE // x lo hi step SLICE slice + INPLACE_ADD // x y INPLACE_ADD z where z is x+y or x.extend(y) + MAKEDICT // - MAKEDICT dict + + // --- opcodes with an argument must go below this line --- + + // control flow + JMP // - JMP<addr> - + CJMP // cond CJMP<addr> - + ITERJMP // - ITERJMP<addr> elem (and fall through) [acts on topmost iterator] + // or: - ITERJMP<addr> - (and jump) + + INT // - INT<constant> value + FLOAT // - FLOAT<constant> value + BIGINT // - BIGINT<constant> value + STRING // - STRING<constant> value + MAKETUPLE // x1 ... xn MAKETUPLE<n> tuple + MAKELIST // x1 ... xn MAKELIST<n> list + MAKEFUNC // args kwargs MAKEFUNC<func> fn + LOAD // from1 ... fromN module LOAD<n> v1 ... vN + SETLOCAL // value SETLOCAL<local> - + SETGLOBAL // value SETGLOBAL<global> - + LOCAL // - LOCAL<local> value + FREE // - FREE<freevar> value + GLOBAL // - GLOBAL<global> value + PREDECLARED // - PREDECLARED<name> value + UNIVERSAL // - UNIVERSAL<name> value + ATTR // x ATTR<name> y y = x.name + SETFIELD // x y SETFIELD<name> - x.name = y + UNPACK // iterable UNPACK<n> vn ... v1 + + // n>>8 is #positional args and n&0xff is #named args (pairs). + CALL // fn positional named CALL<n> result + CALL_VAR // fn positional named *args CALL_VAR<n> result + CALL_KW // fn positional named **kwargs CALL_KW<n> result + CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result + + OpcodeArgMin = JMP + OpcodeMax = CALL_VAR_KW +) + +var opcodeNames = [...]string{ + AMP: "amp", + APPEND: "append", + ATTR: "attr", + BIGINT: "bigint", + CALL: "call", + CALL_KW: "call_kw ", + CALL_VAR: "call_var", + CALL_VAR_KW: "call_var_kw", + CJMP: "cjmp", + DUP: "dup", + DUP2: "dup2", + EQL: "eql", + FALSE: "false", + FLOAT: "float", + FREE: "free", + GE: "ge", + GLOBAL: "global", + GT: "gt", + IN: "in", + INDEX: "index", + INPLACE_ADD: "inplace_add", + INT: "int", + ITERJMP: "iterjmp", + ITERPUSH: "iterpush", + ITERPOP: "iterpop", + JMP: "jmp", + LE: "le", + LOAD: "load", + LOCAL: "local", + LT: "lt", + MAKEDICT: "makedict", + MAKEFUNC: "makefunc", + MAKELIST: "makelist", + MAKETUPLE: "maketuple", + MINUS: "minus", + NEQ: "neq", + NONE: "none", + NOP: "nop", + NOT: "not", + PERCENT: "percent", + PIPE: "pipe", + PLUS: "plus", + POP: "pop", + PREDECLARED: "predeclared", + RETURN: "return", + SETDICT: "setdict", + SETDICTUNIQ: "setdictuniq", + SETFIELD: "setfield", + SETGLOBAL: "setglobal", + SETINDEX: "setindex", + SETLOCAL: "setlocal", + SLASH: "slash", + SLASHSLASH: "slashslash", + SLICE: "slice", + STAR: "star", + STRING: "string", + TRUE: "true", + UMINUS: "uminus", + UNIVERSAL: "universal", + UNPACK: "unpack", + UPLUS: "uplus", +} + +const variableStackEffect = 0x7f + +// stackEffect records the effect on the size of the operand stack of +// each kind of instruction. For some instructions this requires computation. +var stackEffect = [...]int8{ + AMP: -1, + APPEND: -2, + ATTR: 0, + BIGINT: +1, + CALL: variableStackEffect, + CALL_KW: variableStackEffect, + CALL_VAR: variableStackEffect, + CALL_VAR_KW: variableStackEffect, + CJMP: -1, + DUP: +1, + DUP2: +2, + EQL: -1, + FALSE: +1, + FLOAT: +1, + FREE: +1, + GE: -1, + GLOBAL: +1, + GT: -1, + IN: -1, + INDEX: -1, + INPLACE_ADD: -1, + INT: +1, + ITERJMP: variableStackEffect, + ITERPUSH: -1, + ITERPOP: 0, + JMP: 0, + LE: -1, + LOAD: -1, + LOCAL: +1, + LT: -1, + MAKEDICT: +1, + MAKEFUNC: -1, + MAKELIST: variableStackEffect, + MAKETUPLE: variableStackEffect, + MINUS: -1, + NEQ: -1, + NONE: +1, + NOP: 0, + NOT: 0, + PERCENT: -1, + PREDECLARED: +1, + PIPE: -1, + PLUS: -1, + POP: -1, + RETURN: -1, + SETDICT: -3, + SETDICTUNIQ: -3, + SETFIELD: -2, + SETGLOBAL: -1, + SETINDEX: -3, + SETLOCAL: -1, + SLASH: -1, + SLASHSLASH: -1, + SLICE: -3, + STAR: -1, + STRING: +1, + TRUE: +1, + UNPACK: variableStackEffect, + UNIVERSAL: +1, +} + +func (op Opcode) String() string { + if op < OpcodeMax { + return opcodeNames[op] + } + return fmt.Sprintf("illegal op (%d)", op) +} + +// A Program is a Skylark file in executable form. +// +// Programs are serialized by the gobProgram function, +// which must be updated whenever this declaration is changed. +type Program struct { + Loads []Ident // name (really, string) and position of each load stmt + Names []string // names of attributes and predeclared variables + Constants []interface{} // = string | int64 | float64 + Functions []*Funcode + Globals []Ident // for error messages and tracing + Toplevel *Funcode // module initialization function +} + +// A Funcode is the code of a compiled Skylark function. +// +// Funcodes are serialized by the gobFunc function, +// which must be updated whenever this declaration is changed. +type Funcode struct { + Prog *Program + Pos syntax.Position // position of def or lambda token + Name string // name of this function + Code []byte // the byte code + pclinetab []uint16 // mapping from pc to linenum + Locals []Ident // for error messages and tracing + Freevars []Ident // for tracing + MaxStack int + NumParams int + HasVarargs, HasKwargs bool +} + +// An Ident is the name and position of an identifier. +type Ident struct { + Name string + Pos syntax.Position +} + +// A pcomp holds the compiler state for a Program. +type pcomp struct { + prog *Program // what we're building + + names map[string]uint32 + constants map[interface{}]uint32 + functions map[*Funcode]uint32 +} + +// An fcomp holds the compiler state for a Funcode. +type fcomp struct { + fn *Funcode // what we're building + + pcomp *pcomp + pos syntax.Position // current position of generated code + loops []loop + block *block +} + +type loop struct { + break_, continue_ *block +} + +type block struct { + insns []insn + + // If the last insn is a RETURN, jmp and cjmp are nil. + // If the last insn is a CJMP or ITERJMP, + // cjmp and jmp are the "true" and "false" successors. + // Otherwise, jmp is the sole successor. + jmp, cjmp *block + + initialstack int // for stack depth computation + + // Used during encoding + index int // -1 => not encoded yet + addr uint32 +} + +type insn struct { + op Opcode + arg uint32 + line int32 +} + +func (fn *Funcode) Position(pc uint32) syntax.Position { + // Conceptually the table contains rows of the form (pc uint32, + // line int32). Since the pc always increases, usually by a + // small amount, and the line number typically also does too + // although it may decrease, again typically by a small amount, + // we use delta encoding, starting from {pc: 0, line: 0}. + // + // Each entry is encoded in 16 bits. + // The top 8 bits are the unsigned delta pc; the next 7 bits are + // the signed line number delta; and the bottom bit indicates + // that more rows follow because one of the deltas was maxed out. + // + // TODO(adonovan): opt: improve the encoding; include the column. + + pos := fn.Pos // copy the (annoyingly inaccessible) filename + pos.Line = 0 + pos.Col = 0 + + // Position returns the record for the + // largest PC value not greater than 'pc'. + var prevpc uint32 + complete := true + for _, x := range fn.pclinetab { + nextpc := prevpc + uint32(x>>8) + if complete && nextpc > pc { + return pos + } + prevpc = nextpc + pos.Line += int32(int8(x) >> 1) // sign extend Δline from 7 to 32 bits + complete = (x & 1) == 0 + } + return pos +} + +// idents convert syntactic identifiers to compiled form. +func idents(ids []*syntax.Ident) []Ident { + res := make([]Ident, len(ids)) + for i, id := range ids { + res[i].Name = id.Name + res[i].Pos = id.NamePos + } + return res +} + +// Expr compiles an expression to a program consisting of a single toplevel function. +func Expr(expr syntax.Expr, locals []*syntax.Ident) *Funcode { + stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}} + return File(stmts, locals, nil).Toplevel +} + +// File compiles the statements of a file into a program. +func File(stmts []syntax.Stmt, locals, globals []*syntax.Ident) *Program { + pcomp := &pcomp{ + prog: &Program{ + Globals: idents(globals), + }, + names: make(map[string]uint32), + constants: make(map[interface{}]uint32), + functions: make(map[*Funcode]uint32), + } + + var pos syntax.Position + if len(stmts) > 0 { + pos = syntax.Start(stmts[0]) + } + + pcomp.prog.Toplevel = pcomp.function("<toplevel>", pos, stmts, locals, nil) + + return pcomp.prog +} + +func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*syntax.Ident) *Funcode { + fcomp := &fcomp{ + pcomp: pcomp, + pos: pos, + fn: &Funcode{ + Prog: pcomp.prog, + Pos: pos, + Name: name, + Locals: idents(locals), + Freevars: idents(freevars), + }, + } + + if debug { + fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos) + } + + // Convert AST to a CFG of instructions. + entry := fcomp.newBlock() + fcomp.block = entry + fcomp.stmts(stmts) + if fcomp.block != nil { + fcomp.emit(NONE) + fcomp.emit(RETURN) + } + + var oops bool // something bad happened + + setinitialstack := func(b *block, depth int) { + if b.initialstack == -1 { + b.initialstack = depth + } else if b.initialstack != depth { + fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n", + b.index, b.initialstack, depth) + oops = true + } + } + + // Linearize the CFG: + // compute order, address, and initial + // stack depth of each reachable block. + var pc uint32 + var blocks []*block + var maxstack int + var visit func(b *block) + visit = func(b *block) { + if b.index >= 0 { + return // already visited + } + b.index = len(blocks) + b.addr = pc + blocks = append(blocks, b) + + stack := b.initialstack + if debug { + fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack) + } + var cjmpAddr *uint32 + var isiterjmp int + for i, insn := range b.insns { + pc++ + + // Compute size of argument. + if insn.op >= OpcodeArgMin { + switch insn.op { + case ITERJMP: + isiterjmp = 1 + fallthrough + case CJMP: + cjmpAddr = &b.insns[i].arg + pc += 4 + default: + pc += uint32(argLen(insn.arg)) + } + } + + // Compute effect on stack. + se := insn.stackeffect() + if debug { + fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se) + } + stack += se + if stack < 0 { + fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc) + oops = true + } + if stack+isiterjmp > maxstack { + maxstack = stack + isiterjmp + } + } + + if debug { + fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n", + b.addr, b.index) + if b.jmp != nil { + fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index) + } + if b.cjmp != nil { + fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index) + } + } + + // Place the jmp block next. + if b.jmp != nil { + // jump threading (empty cycles are impossible) + for b.jmp.insns == nil { + b.jmp = b.jmp.jmp + } + + setinitialstack(b.jmp, stack+isiterjmp) + if b.jmp.index < 0 { + // Successor is not yet visited: + // place it next and fall through. + visit(b.jmp) + } else { + // Successor already visited; + // explicit backward jump required. + pc += 5 + } + } + + // Then the cjmp block. + if b.cjmp != nil { + // jump threading (empty cycles are impossible) + for b.cjmp.insns == nil { + b.cjmp = b.cjmp.jmp + } + + setinitialstack(b.cjmp, stack) + visit(b.cjmp) + + // Patch the CJMP/ITERJMP, if present. + if cjmpAddr != nil { + *cjmpAddr = b.cjmp.addr + } + } + } + setinitialstack(entry, 0) + visit(entry) + + fn := fcomp.fn + fn.MaxStack = maxstack + + // Emit bytecode (and position table). + if debug { + fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc) + } + fcomp.generate(blocks, pc) + + if debug { + fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack) + } + + // Don't panic until we've completed printing of the function. + if oops { + panic("internal error") + } + + if debug { + fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos) + } + + return fn +} + +func (insn *insn) stackeffect() int { + se := int(stackEffect[insn.op]) + if se == variableStackEffect { + arg := int(insn.arg) + switch insn.op { + case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW: + se = -int(2*(insn.arg&0xff) + insn.arg>>8) + if insn.op != CALL { + se-- + } + if insn.op == CALL_VAR_KW { + se-- + } + case ITERJMP: + // Stack effect differs by successor: + // +1 for jmp/false/ok + // 0 for cjmp/true/exhausted + // Handled specially in caller. + se = 0 + case MAKELIST, MAKETUPLE: + se = 1 - arg + case UNPACK: + se = arg - 1 + default: + panic(insn.op) + } + } + return se +} + +// generate emits the linear instruction stream from the CFG, +// and builds the PC-to-line number table. +func (fcomp *fcomp) generate(blocks []*block, codelen uint32) { + code := make([]byte, 0, codelen) + var pclinetab []uint16 + var prev struct { + pc uint32 + line int32 + } + + for _, b := range blocks { + if debug { + fmt.Fprintf(os.Stderr, "%d:\n", b.index) + } + pc := b.addr + for _, insn := range b.insns { + if insn.line != 0 { + // Instruction has a source position. Delta-encode it. + // See Funcode.Position for the encoding. + for { + var incomplete uint16 + + deltapc := pc - prev.pc + if deltapc > 0xff { + deltapc = 0xff + incomplete = 1 + } + prev.pc += deltapc + + deltaline := insn.line - prev.line + if deltaline > 0x3f { + deltaline = 0x3f + incomplete = 1 + } else if deltaline < -0x40 { + deltaline = -0x40 + incomplete = 1 + } + prev.line += deltaline + + entry := uint16(deltapc<<8) | uint16(uint8(deltaline<<1)) | incomplete + pclinetab = append(pclinetab, entry) + if incomplete == 0 { + break + } + } + + if debug { + fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s %d\n", + filepath.Base(fcomp.fn.Pos.Filename()), insn.line) + } + } + if debug { + PrintOp(fcomp.fn, pc, insn.op, insn.arg) + } + code = append(code, byte(insn.op)) + pc++ + if insn.op >= OpcodeArgMin { + if insn.op == CJMP || insn.op == ITERJMP { + code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes + } else { + code = addUint32(code, insn.arg, 0) + } + pc = uint32(len(code)) + } + } + + if b.jmp != nil && b.jmp.index != b.index+1 { + addr := b.jmp.addr + if debug { + fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n", + pc, addr, b.jmp.index) + } + code = append(code, byte(JMP)) + code = addUint32(code, addr, 4) + } + } + if len(code) != int(codelen) { + panic("internal error: wrong code length") + } + + fcomp.fn.pclinetab = pclinetab + fcomp.fn.Code = code +} + +// addUint32 encodes x as 7-bit little-endian varint. +// TODO(adonovan): opt: steal top two bits of opcode +// to encode the number of complete bytes that follow. +func addUint32(code []byte, x uint32, min int) []byte { + end := len(code) + min + for x >= 0x80 { + code = append(code, byte(x)|0x80) + x >>= 7 + } + code = append(code, byte(x)) + // Pad the operand with NOPs to exactly min bytes. + for len(code) < end { + code = append(code, byte(NOP)) + } + return code +} + +func argLen(x uint32) int { + n := 0 + for x >= 0x80 { + n++ + x >>= 7 + } + return n + 1 +} + +// PrintOp prints an instruction. +// It is provided for debugging. +func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) { + if op < OpcodeArgMin { + fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op) + return + } + + var comment string + switch op { + case INT, FLOAT, BIGINT: + comment = fmt.Sprint(fn.Prog.Constants[arg]) + case STRING: + comment = fmt.Sprintf("%q", fn.Prog.Constants[arg]) + case MAKEFUNC: + comment = fn.Prog.Functions[arg].Name + case SETLOCAL, LOCAL: + comment = fn.Locals[arg].Name + case SETGLOBAL, GLOBAL: + comment = fn.Prog.Globals[arg].Name + case ATTR, SETFIELD, PREDECLARED, UNIVERSAL: + comment = fn.Prog.Names[arg] + case FREE: + comment = fn.Freevars[arg].Name + case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW: + comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff) + default: + // JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK: + // arg is just a number + } + var buf bytes.Buffer + fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg) + if comment != "" { + fmt.Fprint(&buf, "\t; ", comment) + } + fmt.Fprintln(&buf) + os.Stderr.Write(buf.Bytes()) +} + +// newBlock returns a new block. +func (fcomp) newBlock() *block { + return &block{index: -1, initialstack: -1} +} + +// emit emits an instruction to the current block. +func (fcomp *fcomp) emit(op Opcode) { + if op >= OpcodeArgMin { + panic("missing arg: " + op.String()) + } + insn := insn{op: op, line: fcomp.pos.Line} + fcomp.block.insns = append(fcomp.block.insns, insn) + fcomp.pos.Line = 0 +} + +// emit1 emits an instruction with an immediate operand. +func (fcomp *fcomp) emit1(op Opcode, arg uint32) { + if op < OpcodeArgMin { + panic("unwanted arg: " + op.String()) + } + insn := insn{op: op, arg: arg, line: fcomp.pos.Line} + fcomp.block.insns = append(fcomp.block.insns, insn) + fcomp.pos.Line = 0 +} + +// jump emits a jump to the specified block. +// On return, the current block is unset. +func (fcomp *fcomp) jump(b *block) { + if b == fcomp.block { + panic("self-jump") // unreachable: Skylark has no arbitrary looping constructs + } + fcomp.block.jmp = b + fcomp.block = nil +} + +// condjump emits a conditional jump (CJMP or ITERJMP) +// to the specified true/false blocks. +// (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.) +// On return, the current block is unset. +func (fcomp *fcomp) condjump(op Opcode, t, f *block) { + if !(op == CJMP || op == ITERJMP) { + panic("not a conditional jump: " + op.String()) + } + fcomp.emit1(op, 0) // fill in address later + fcomp.block.cjmp = t + fcomp.jump(f) +} + +// nameIndex returns the index of the specified name +// within the name pool, adding it if necessary. +func (pcomp *pcomp) nameIndex(name string) uint32 { + index, ok := pcomp.names[name] + if !ok { + index = uint32(len(pcomp.prog.Names)) + pcomp.names[name] = index + pcomp.prog.Names = append(pcomp.prog.Names, name) + } + return index +} + +// constantIndex returns the index of the specified constant +// within the constant pool, adding it if necessary. +func (pcomp *pcomp) constantIndex(v interface{}) uint32 { + index, ok := pcomp.constants[v] + if !ok { + index = uint32(len(pcomp.prog.Constants)) + pcomp.constants[v] = index + pcomp.prog.Constants = append(pcomp.prog.Constants, v) + } + return index +} + +// functionIndex returns the index of the specified function +// AST the nestedfun pool, adding it if necessary. +func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 { + index, ok := pcomp.functions[fn] + if !ok { + index = uint32(len(pcomp.prog.Functions)) + pcomp.functions[fn] = index + pcomp.prog.Functions = append(pcomp.prog.Functions, fn) + } + return index +} + +// string emits code to push the specified string. +func (fcomp *fcomp) string(s string) { + fcomp.emit1(STRING, fcomp.pcomp.constantIndex(s)) +} + +// int64 emits code to push the specified integer. +func (fcomp *fcomp) int64(i int64) { + fcomp.emit1(INT, fcomp.pcomp.constantIndex(i)) +} + +// setPos sets the current source position. +// It should be called prior to any operation that can fail dynamically. +// All positions are assumed to belong to the same file. +func (fcomp *fcomp) setPos(pos syntax.Position) { + fcomp.pos = pos +} + +// set emits code to store the top-of-stack value +// to the specified local or global variable. +func (fcomp *fcomp) set(id *syntax.Ident) { + switch resolve.Scope(id.Scope) { + case resolve.Local: + fcomp.emit1(SETLOCAL, uint32(id.Index)) + case resolve.Global: + fcomp.emit1(SETGLOBAL, uint32(id.Index)) + default: + log.Fatalf("%s: set(%s): neither global nor local (%d)", id.NamePos, id.Name, id.Scope) + } +} + +// lookup emits code to push the value of the specified variable. +func (fcomp *fcomp) lookup(id *syntax.Ident) { + switch resolve.Scope(id.Scope) { + case resolve.Local: + fcomp.setPos(id.NamePos) + fcomp.emit1(LOCAL, uint32(id.Index)) + case resolve.Free: + fcomp.emit1(FREE, uint32(id.Index)) + case resolve.Global: + fcomp.setPos(id.NamePos) + fcomp.emit1(GLOBAL, uint32(id.Index)) + case resolve.Predeclared: + fcomp.setPos(id.NamePos) + fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name)) + case resolve.Universal: + fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name)) + default: + log.Fatalf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, id.Scope) + } +} + +func (fcomp *fcomp) stmts(stmts []syntax.Stmt) { + for _, stmt := range stmts { + fcomp.stmt(stmt) + } +} + +func (fcomp *fcomp) stmt(stmt syntax.Stmt) { + switch stmt := stmt.(type) { + case *syntax.ExprStmt: + if _, ok := stmt.X.(*syntax.Literal); ok { + // Opt: don't compile doc comments only to pop them. + return + } + fcomp.expr(stmt.X) + fcomp.emit(POP) + + case *syntax.BranchStmt: + // Resolver invariant: break/continue appear only within loops. + switch stmt.Token { + case syntax.PASS: + // no-op + case syntax.BREAK: + b := fcomp.loops[len(fcomp.loops)-1].break_ + fcomp.jump(b) + fcomp.block = fcomp.newBlock() // dead code + case syntax.CONTINUE: + b := fcomp.loops[len(fcomp.loops)-1].continue_ + fcomp.jump(b) + fcomp.block = fcomp.newBlock() // dead code + } + + case *syntax.IfStmt: + // Keep consistent with CondExpr. + t := fcomp.newBlock() + f := fcomp.newBlock() + done := fcomp.newBlock() + + fcomp.ifelse(stmt.Cond, t, f) + + fcomp.block = t + fcomp.stmts(stmt.True) + fcomp.jump(done) + + fcomp.block = f + fcomp.stmts(stmt.False) + fcomp.jump(done) + + fcomp.block = done + + case *syntax.AssignStmt: + switch stmt.Op { + case syntax.EQ: + // simple assignment: x = y + fcomp.expr(stmt.RHS) + fcomp.assign(stmt.OpPos, stmt.LHS) + + case syntax.PLUS_EQ, + syntax.MINUS_EQ, + syntax.STAR_EQ, + syntax.SLASH_EQ, + syntax.SLASHSLASH_EQ, + syntax.PERCENT_EQ: + // augmented assignment: x += y + + var set func() + + // Evaluate "address" of x exactly once to avoid duplicate side-effects. + switch lhs := stmt.LHS.(type) { + case *syntax.Ident: + // x = ... + fcomp.lookup(lhs) + set = func() { + fcomp.set(lhs) + } + + case *syntax.IndexExpr: + // x[y] = ... + fcomp.expr(lhs.X) + fcomp.expr(lhs.Y) + fcomp.emit(DUP2) + fcomp.setPos(lhs.Lbrack) + fcomp.emit(INDEX) + set = func() { + fcomp.setPos(lhs.Lbrack) + fcomp.emit(SETINDEX) + } + + case *syntax.DotExpr: + // x.f = ... + fcomp.expr(lhs.X) + fcomp.emit(DUP) + name := fcomp.pcomp.nameIndex(lhs.Name.Name) + fcomp.setPos(lhs.Dot) + fcomp.emit1(ATTR, name) + set = func() { + fcomp.setPos(lhs.Dot) + fcomp.emit1(SETFIELD, name) + } + + default: + panic(lhs) + } + + fcomp.expr(stmt.RHS) + + if stmt.Op == syntax.PLUS_EQ { + // Allow the runtime to optimize list += iterable. + fcomp.setPos(stmt.OpPos) + fcomp.emit(INPLACE_ADD) + } else { + fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS) + } + set() + } + + case *syntax.DefStmt: + fcomp.function(stmt.Def, stmt.Name.Name, &stmt.Function) + fcomp.set(stmt.Name) + + case *syntax.ForStmt: + // Keep consistent with ForClause. + head := fcomp.newBlock() + body := fcomp.newBlock() + tail := fcomp.newBlock() + + fcomp.expr(stmt.X) + fcomp.setPos(stmt.For) + fcomp.emit(ITERPUSH) + fcomp.jump(head) + + fcomp.block = head + fcomp.condjump(ITERJMP, tail, body) + + fcomp.block = body + fcomp.assign(stmt.For, stmt.Vars) + fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head}) + fcomp.stmts(stmt.Body) + fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] + fcomp.jump(head) + + fcomp.block = tail + fcomp.emit(ITERPOP) + + case *syntax.ReturnStmt: + if stmt.Result != nil { + fcomp.expr(stmt.Result) + } else { + fcomp.emit(NONE) + } + fcomp.emit(RETURN) + fcomp.block = fcomp.newBlock() // dead code + + case *syntax.LoadStmt: + for i := range stmt.From { + fcomp.string(stmt.From[i].Name) + } + module := stmt.Module.Value.(string) + fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Ident{ + Name: module, + Pos: stmt.Module.TokenPos, + }) + fcomp.string(module) + fcomp.setPos(stmt.Load) + fcomp.emit1(LOAD, uint32(len(stmt.From))) + for i := range stmt.To { + fcomp.emit1(SETGLOBAL, uint32(stmt.To[len(stmt.To)-1-i].Index)) + } + + default: + start, _ := stmt.Span() + log.Fatalf("%s: exec: unexpected statement %T", start, stmt) + } +} + +// assign implements lhs = rhs for arbitrary expressions lhs. +// RHS is on top of stack, consumed. +func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) { + switch lhs := lhs.(type) { + case *syntax.ParenExpr: + // (lhs) = rhs + fcomp.assign(pos, lhs.X) + + case *syntax.Ident: + // x = rhs + fcomp.set(lhs) + + case *syntax.TupleExpr: + // x, y = rhs + fcomp.assignSequence(pos, lhs.List) + + case *syntax.ListExpr: + // [x, y] = rhs + fcomp.assignSequence(pos, lhs.List) + + case *syntax.IndexExpr: + // x[y] = rhs + fcomp.expr(lhs.X) + fcomp.emit(EXCH) + fcomp.expr(lhs.Y) + fcomp.emit(EXCH) + fcomp.setPos(lhs.Lbrack) + fcomp.emit(SETINDEX) + + case *syntax.DotExpr: + // x.f = rhs + fcomp.expr(lhs.X) + fcomp.emit(EXCH) + fcomp.setPos(lhs.Dot) + fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name)) + + default: + panic(lhs) + } +} + +func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) { + fcomp.setPos(pos) + fcomp.emit1(UNPACK, uint32(len(lhs))) + for i := range lhs { + fcomp.assign(pos, lhs[i]) + } +} + +func (fcomp *fcomp) expr(e syntax.Expr) { + switch e := e.(type) { + case *syntax.ParenExpr: + fcomp.expr(e.X) + + case *syntax.Ident: + fcomp.lookup(e) + + case *syntax.Literal: + switch e.Token { + case syntax.INT: + switch e.Value.(type) { + case int64: + fcomp.int64(e.Value.(int64)) + case *big.Int: + fcomp.emit1(BIGINT, fcomp.pcomp.constantIndex(e.Value.(*big.Int).String())) + } + case syntax.FLOAT: + fcomp.emit1(FLOAT, fcomp.pcomp.constantIndex(e.Value.(float64))) + case syntax.STRING: + fcomp.string(e.Value.(string)) + } + + case *syntax.ListExpr: + for _, x := range e.List { + fcomp.expr(x) + } + fcomp.emit1(MAKELIST, uint32(len(e.List))) + + case *syntax.CondExpr: + // Keep consistent with IfStmt. + t := fcomp.newBlock() + f := fcomp.newBlock() + done := fcomp.newBlock() + + fcomp.ifelse(e.Cond, t, f) + + fcomp.block = t + fcomp.expr(e.True) + fcomp.jump(done) + + fcomp.block = f + fcomp.expr(e.False) + fcomp.jump(done) + + fcomp.block = done + + case *syntax.IndexExpr: + fcomp.expr(e.X) + fcomp.expr(e.Y) + fcomp.setPos(e.Lbrack) + fcomp.emit(INDEX) + + case *syntax.SliceExpr: + fcomp.setPos(e.Lbrack) + fcomp.expr(e.X) + if e.Lo != nil { + fcomp.expr(e.Lo) + } else { + fcomp.emit(NONE) + } + if e.Hi != nil { + fcomp.expr(e.Hi) + } else { + fcomp.emit(NONE) + } + if e.Step != nil { + fcomp.expr(e.Step) + } else { + fcomp.emit(NONE) + } + fcomp.emit(SLICE) + + case *syntax.Comprehension: + if e.Curly { + fcomp.emit(MAKEDICT) + } else { + fcomp.emit1(MAKELIST, 0) + } + fcomp.comprehension(e, 0) + + case *syntax.TupleExpr: + fcomp.tuple(e.List) + + case *syntax.DictExpr: + fcomp.emit(MAKEDICT) + for _, entry := range e.List { + entry := entry.(*syntax.DictEntry) + fcomp.emit(DUP) + fcomp.expr(entry.Key) + fcomp.expr(entry.Value) + fcomp.setPos(entry.Colon) + fcomp.emit(SETDICTUNIQ) + } + + case *syntax.UnaryExpr: + fcomp.expr(e.X) + fcomp.setPos(e.OpPos) + switch e.Op { + case syntax.MINUS: + fcomp.emit(UMINUS) + case syntax.PLUS: + fcomp.emit(UPLUS) + case syntax.NOT: + fcomp.emit(NOT) + default: + log.Fatalf("%s: unexpected unary op: %s", e.OpPos, e.Op) + } + + case *syntax.BinaryExpr: + // short-circuit operators + // TODO(adonovan): use ifelse to simplify conditions. + switch e.Op { + case syntax.OR: + // x or y => if x then x else y + done := fcomp.newBlock() + y := fcomp.newBlock() + + fcomp.expr(e.X) + fcomp.emit(DUP) + fcomp.condjump(CJMP, done, y) + + fcomp.block = y + fcomp.emit(POP) // discard X + fcomp.expr(e.Y) + fcomp.jump(done) + + fcomp.block = done + return + + case syntax.AND: + // x and y => if x then y else x + done := fcomp.newBlock() + y := fcomp.newBlock() + + fcomp.expr(e.X) + fcomp.emit(DUP) + fcomp.condjump(CJMP, y, done) + + fcomp.block = y + fcomp.emit(POP) // discard X + fcomp.expr(e.Y) + fcomp.jump(done) + + fcomp.block = done + return + } + + // strict binary operator (includes comparisons) + fcomp.expr(e.X) + fcomp.expr(e.Y) + fcomp.binop(e.OpPos, e.Op) + + case *syntax.DotExpr: + fcomp.expr(e.X) + fcomp.setPos(e.Dot) + fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name)) + + case *syntax.CallExpr: + fcomp.call(e) + + case *syntax.LambdaExpr: + fcomp.function(e.Lambda, "lambda", &e.Function) + + default: + start, _ := e.Span() + log.Fatalf("%s: unexpected expr %T", start, e) + } +} + +func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) { + // TODO(adonovan): opt: simplify constant expressions, especially string+string. + // See comment at top. + + // TODO(adonovan): simplify by assuming syntax and compiler constants align. + fcomp.setPos(pos) + switch op { + // arithmetic + case syntax.PLUS: + fcomp.emit(PLUS) + case syntax.MINUS: + fcomp.emit(MINUS) + case syntax.STAR: + fcomp.emit(STAR) + case syntax.SLASH: + fcomp.emit(SLASH) + case syntax.SLASHSLASH: + fcomp.emit(SLASHSLASH) + case syntax.PERCENT: + fcomp.emit(PERCENT) + case syntax.AMP: + fcomp.emit(AMP) + case syntax.PIPE: + fcomp.emit(PIPE) + case syntax.IN: + fcomp.emit(IN) + case syntax.NOT_IN: + fcomp.emit(IN) + fcomp.emit(NOT) + + // comparisons + case syntax.EQL, + syntax.NEQ, + syntax.GT, + syntax.LT, + syntax.LE, + syntax.GE: + fcomp.emit(Opcode(op-syntax.EQL) + EQL) + + default: + log.Fatalf("%s: unexpected binary op: %s", pos, op) + } +} + +func (fcomp *fcomp) call(call *syntax.CallExpr) { + // TODO(adonovan): opt: Use optimized path for calling methods + // of built-ins: x.f(...). + // if dot, ok := call.Fcomp.(*syntax.DotExpr); ok { + // fcomp.expr(dot.X) + // fcomp.args(call) + // fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name)) + // return + // } + + // usual case + fcomp.expr(call.Fn) + op, arg := fcomp.args(call) + fcomp.setPos(call.Lparen) + fcomp.emit1(op, arg) +} + +// args emits code to push a tuple of positional arguments +// and a tuple of named arguments containing alternating keys and values. +// Either or both tuples may be empty (TODO(adonovan): optimize). +func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) { + var callmode int + // Compute the number of each kind of parameter. + // TODO(adonovan): do this in resolver. + var p, n int // number of positional, named arguments + var varargs, kwargs syntax.Expr + for _, arg := range call.Args { + if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ { + n++ + continue + } + if unary, ok := arg.(*syntax.UnaryExpr); ok { + if unary.Op == syntax.STAR { + callmode |= 1 + varargs = unary.X + continue + } else if unary.Op == syntax.STARSTAR { + callmode |= 2 + kwargs = unary.X + continue + } + } + p++ + } + + // positional arguments + for _, elem := range call.Args[:p] { + fcomp.expr(elem) + } + + // named argument pairs (name, value, ..., name, value) + named := call.Args[p : p+n] + for _, arg := range named { + binary := arg.(*syntax.BinaryExpr) + fcomp.string(binary.X.(*syntax.Ident).Name) + fcomp.expr(binary.Y) + } + + // *args + if varargs != nil { + fcomp.expr(varargs) + } + + // **kwargs + if kwargs != nil { + fcomp.expr(kwargs) + } + + // TODO(adonovan): don't do this. + if p >= 256 || n >= 256 { + log.Fatalf("%s: compiler error: too many arguments in call", call.Lparen) + } + + return CALL + Opcode(callmode), uint32(p<<8 | n) +} + +func (fcomp *fcomp) tuple(elems []syntax.Expr) { + for _, elem := range elems { + fcomp.expr(elem) + } + fcomp.emit1(MAKETUPLE, uint32(len(elems))) +} + +func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) { + if clauseIndex == len(comp.Clauses) { + fcomp.emit(DUP) // accumulator + if comp.Curly { + // dict: {k:v for ...} + // Parser ensures that body is of form k:v. + // Python-style set comprehensions {body for vars in x} + // are not supported. + entry := comp.Body.(*syntax.DictEntry) + fcomp.expr(entry.Key) + fcomp.expr(entry.Value) + fcomp.setPos(entry.Colon) + fcomp.emit(SETDICT) + } else { + // list: [body for vars in x] + fcomp.expr(comp.Body) + fcomp.emit(APPEND) + } + return + } + + clause := comp.Clauses[clauseIndex] + switch clause := clause.(type) { + case *syntax.IfClause: + t := fcomp.newBlock() + done := fcomp.newBlock() + fcomp.ifelse(clause.Cond, t, done) + + fcomp.block = t + fcomp.comprehension(comp, clauseIndex+1) + fcomp.jump(done) + + fcomp.block = done + return + + case *syntax.ForClause: + // Keep consistent with ForStmt. + head := fcomp.newBlock() + body := fcomp.newBlock() + tail := fcomp.newBlock() + + fcomp.expr(clause.X) + fcomp.setPos(clause.For) + fcomp.emit(ITERPUSH) + fcomp.jump(head) + + fcomp.block = head + fcomp.condjump(ITERJMP, tail, body) + + fcomp.block = body + fcomp.assign(clause.For, clause.Vars) + fcomp.comprehension(comp, clauseIndex+1) + fcomp.jump(head) + + fcomp.block = tail + fcomp.emit(ITERPOP) + return + } + + start, _ := clause.Span() + log.Fatalf("%s: unexpected comprehension clause %T", start, clause) +} + +func (fcomp *fcomp) function(pos syntax.Position, name string, f *syntax.Function) { + // Evalution of the elements of both MAKETUPLEs may fail, + // so record the position. + fcomp.setPos(pos) + + // Generate tuple of parameter defaults. + n := 0 + for _, param := range f.Params { + if binary, ok := param.(*syntax.BinaryExpr); ok { + fcomp.expr(binary.Y) + n++ + } + } + fcomp.emit1(MAKETUPLE, uint32(n)) + + // Capture the values of the function's + // free variables from the lexical environment. + for _, freevar := range f.FreeVars { + fcomp.lookup(freevar) + } + fcomp.emit1(MAKETUPLE, uint32(len(f.FreeVars))) + + funcode := fcomp.pcomp.function(name, pos, f.Body, f.Locals, f.FreeVars) + + if debug { + // TODO(adonovan): do compilations sequentially not as a tree, + // to make the log easier to read. + // Simplify by identifying Toplevel and functionIndex 0. + fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos) + } + + funcode.NumParams = len(f.Params) + funcode.HasVarargs = f.HasVarargs + funcode.HasKwargs = f.HasKwargs + fcomp.emit1(MAKEFUNC, fcomp.pcomp.functionIndex(funcode)) +} + +// ifelse emits a Boolean control flow decision. +// On return, the current block is unset. +func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) { + switch cond := cond.(type) { + case *syntax.UnaryExpr: + if cond.Op == syntax.NOT { + // if not x then goto t else goto f + // => + // if x then goto f else goto t + fcomp.ifelse(cond.X, f, t) + return + } + + case *syntax.BinaryExpr: + switch cond.Op { + case syntax.AND: + // if x and y then goto t else goto f + // => + // if x then ifelse(y, t, f) else goto f + fcomp.expr(cond.X) + y := fcomp.newBlock() + fcomp.condjump(CJMP, y, f) + + fcomp.block = y + fcomp.ifelse(cond.Y, t, f) + return + + case syntax.OR: + // if x or y then goto t else goto f + // => + // if x then goto t else ifelse(y, t, f) + fcomp.expr(cond.X) + y := fcomp.newBlock() + fcomp.condjump(CJMP, t, y) + + fcomp.block = y + fcomp.ifelse(cond.Y, t, f) + return + case syntax.NOT_IN: + // if x not in y then goto t else goto f + // => + // if x in y then goto f else goto t + copy := *cond + copy.Op = syntax.IN + fcomp.expr(©) + fcomp.condjump(CJMP, f, t) + return + } + } + + // general case + fcomp.expr(cond) + fcomp.condjump(CJMP, t, f) +} diff --git a/internal/compile/compile_test.go b/internal/compile/compile_test.go new file mode 100644 index 0000000..a545817 --- /dev/null +++ b/internal/compile/compile_test.go @@ -0,0 +1,74 @@ +package compile_test + +import ( + "bytes" + "strings" + "testing" + + "github.com/google/skylark" +) + +// TestSerialization verifies that a serialized program can be loaded, +// deserialized, and executed. +func TestSerialization(t *testing.T) { + predeclared := skylark.StringDict{ + "x": skylark.String("mur"), + "n": skylark.MakeInt(2), + } + const src = ` +def mul(a, b): + return a * b + +y = mul(x, n) +` + _, oldProg, err := skylark.SourceProgram("mul.sky", src, predeclared.Has) + if err != nil { + t.Fatal(err) + } + + buf := new(bytes.Buffer) + if err := oldProg.Write(buf); err != nil { + t.Fatalf("oldProg.WriteTo: %v", err) + } + + newProg, err := skylark.CompiledProgram(buf) + if err != nil { + t.Fatalf("CompiledProgram: %v", err) + } + + thread := new(skylark.Thread) + globals, err := newProg.Init(thread, predeclared) + if err != nil { + t.Fatalf("newProg.Init: %v", err) + } + if got, want := globals["y"], skylark.String("murmur"); got != want { + t.Errorf("Value of global was %s, want %s", got, want) + t.Logf("globals: %v", globals) + } + + // Verify stack frame. + predeclared["n"] = skylark.None + _, err = newProg.Init(thread, predeclared) + evalErr, ok := err.(*skylark.EvalError) + if !ok { + t.Fatalf("newProg.Init call returned err %v, want *EvalError", err) + } + const want = `Traceback (most recent call last): + mul.sky:5: in <toplevel> + mul.sky:3: in mul +Error: unknown binary op: string * NoneType` + if got := evalErr.Backtrace(); got != want { + t.Fatalf("got <<%s>>, want <<%s>>", got, want) + } +} + +func TestGarbage(t *testing.T) { + const garbage = "This is not a compiled Skylark program." + _, err := skylark.CompiledProgram(strings.NewReader(garbage)) + if err == nil { + t.Fatalf("CompiledProgram did not report an error when decoding garbage") + } + if !strings.Contains(err.Error(), "not a compiled module") { + t.Fatalf("CompiledProgram reported the wrong error when decoding garbage: %v", err) + } +} diff --git a/internal/compile/serial.go b/internal/compile/serial.go new file mode 100644 index 0000000..706456f --- /dev/null +++ b/internal/compile/serial.go @@ -0,0 +1,164 @@ +package compile + +// This files defines functions to read and write a compile.Program to a file. +// Currently we use gob encoding because it is convenient. +// +// It is the client's responsibility to manage version skew between the +// compiler used to produce a file and the interpreter that consumes it. +// The version number is provided as a constant. Incompatible protocol +// changes should also increment the version number. + +import ( + "encoding/gob" + "fmt" + "io" + + "github.com/google/skylark/syntax" +) + +const magic = "!sky" + +type gobProgram struct { + Version int + Filename string + Loads []gobIdent + Names []string + Constants []interface{} + Functions []gobFunction + Globals []gobIdent + Toplevel gobFunction +} + +type gobFunction struct { + Id gobIdent // hack: name and pos + Code []byte + Pclinetab []uint16 + Locals []gobIdent + Freevars []gobIdent + MaxStack int + NumParams int + HasVarargs, HasKwargs bool +} + +type gobIdent struct { + Name string + Line, Col int32 // the filename is gobProgram.Filename +} + +// Write writes a compiled Skylark program to out. +func (prog *Program) Write(out io.Writer) error { + out.Write([]byte(magic)) + + gobIdents := func(idents []Ident) []gobIdent { + res := make([]gobIdent, len(idents)) + for i, id := range idents { + res[i].Name = id.Name + res[i].Line = id.Pos.Line + res[i].Col = id.Pos.Col + } + return res + } + + gobFunc := func(fn *Funcode) gobFunction { + return gobFunction{ + Id: gobIdent{ + Name: fn.Name, + Line: fn.Pos.Line, + Col: fn.Pos.Col, + }, + Code: fn.Code, + Pclinetab: fn.pclinetab, + Locals: gobIdents(fn.Locals), + Freevars: gobIdents(fn.Freevars), + MaxStack: fn.MaxStack, + NumParams: fn.NumParams, + HasVarargs: fn.HasVarargs, + HasKwargs: fn.HasKwargs, + } + } + + gp := &gobProgram{ + Version: Version, + Filename: prog.Toplevel.Pos.Filename(), + Loads: gobIdents(prog.Loads), + Names: prog.Names, + Constants: prog.Constants, + Functions: make([]gobFunction, len(prog.Functions)), + Globals: gobIdents(prog.Globals), + Toplevel: gobFunc(prog.Toplevel), + } + for i, f := range prog.Functions { + gp.Functions[i] = gobFunc(f) + } + + return gob.NewEncoder(out).Encode(gp) +} + +// ReadProgram reads a compiled Skylark program from in. +func ReadProgram(in io.Reader) (*Program, error) { + magicBuf := []byte(magic) + n, err := in.Read(magicBuf) + if err != nil { + return nil, err + } + if n != len(magic) { + return nil, fmt.Errorf("not a compiled module: no magic number") + } + if string(magicBuf) != magic { + return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q", + magicBuf, magic) + } + + dec := gob.NewDecoder(in) + var gp gobProgram + if err := dec.Decode(&gp); err != nil { + return nil, fmt.Errorf("decoding program: %v", err) + } + + if gp.Version != Version { + return nil, fmt.Errorf("version mismatch: read %d, want %d", + gp.Version, Version) + } + + file := gp.Filename // copy, to avoid keeping gp live + + ungobIdents := func(idents []gobIdent) []Ident { + res := make([]Ident, len(idents)) + for i, id := range idents { + res[i].Name = id.Name + res[i].Pos = syntax.MakePosition(&file, id.Line, id.Col) + } + return res + } + + prog := &Program{ + Loads: ungobIdents(gp.Loads), + Names: gp.Names, + Constants: gp.Constants, + Globals: ungobIdents(gp.Globals), + Functions: make([]*Funcode, len(gp.Functions)), + } + + ungobFunc := func(gf *gobFunction) *Funcode { + pos := syntax.MakePosition(&file, gf.Id.Line, gf.Id.Col) + return &Funcode{ + Prog: prog, + Pos: pos, + Name: gf.Id.Name, + Code: gf.Code, + pclinetab: gf.Pclinetab, + Locals: ungobIdents(gf.Locals), + Freevars: ungobIdents(gf.Freevars), + MaxStack: gf.MaxStack, + NumParams: gf.NumParams, + HasVarargs: gf.HasVarargs, + HasKwargs: gf.HasKwargs, + } + } + + for i := range gp.Functions { + prog.Functions[i] = ungobFunc(&gp.Functions[i]) + } + prog.Toplevel = ungobFunc(&gp.Toplevel) + return prog, nil +} diff --git a/interp.go b/interp.go new file mode 100644 index 0000000..6854ebe --- /dev/null +++ b/interp.go @@ -0,0 +1,612 @@ +package skylark + +// This file defines the bytecode interpreter. + +import ( + "fmt" + "math/big" + "os" + "unsafe" + + "github.com/google/skylark/internal/compile" + "github.com/google/skylark/syntax" +) + +const vmdebug = false // TODO(adonovan): use a bitfield of specific kinds of error. + +// TODO(adonovan): +// - optimize position table. +// - opt: reduce allocations by preallocating a large stack, saving it +// in the thread, and slicing it. +// - opt: record MaxIterStack during compilation and preallocate the stack. + +func (fn *Function) Call(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) { + if debug { + fmt.Printf("call of %s %v %v\n", fn.Name(), args, kwargs) + } + + // detect recursion + for fr := thread.frame; fr != nil; fr = fr.parent { + // We look for the same function code, + // not function value, otherwise the user could + // defeat the check by writing the Y combinator. + if fr.fn != nil && fr.fn.funcode == fn.funcode { + return nil, fmt.Errorf("function %s called recursively", fn.Name()) + } + } + + thread.frame = &Frame{parent: thread.frame, fn: fn} + result, err := call(thread, args, kwargs) + thread.frame = thread.frame.parent + return result, err +} + +func call(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) { + fr := thread.frame + f := fr.fn.funcode + nlocals := len(f.Locals) + stack := make([]Value, nlocals+f.MaxStack) + locals := stack[:nlocals:nlocals] // local variables, starting with parameters + stack = stack[nlocals:] + + err := setArgs(locals, fr.fn, args, kwargs) + if err != nil { + return nil, fr.errorf(fr.fn.Position(), "%v", err) + } + + if vmdebug { + fmt.Printf("Entering %s @ %s\n", f.Name, f.Position(0)) + fmt.Printf("%d stack, %d locals\n", len(stack), len(locals)) + defer fmt.Println("Leaving ", f.Name) + } + + // TODO(adonovan): add static check that beneath this point + // - there is exactly one return statement + // - there is no redefinition of 'err'. + + var iterstack []Iterator // stack of active iterators + + sp := 0 + var pc, savedpc uint32 + var result Value + code := f.Code +loop: + for { + savedpc = pc + + op := compile.Opcode(code[pc]) + pc++ + var arg uint32 + if op >= compile.OpcodeArgMin { + // TODO(adonovan): opt: profile this. + // Perhaps compiling big endian would be less work to decode? + for s := uint(0); ; s += 7 { + b := code[pc] + pc++ + arg |= uint32(b&0x7f) << s + if b < 0x80 { + break + } + } + } + if vmdebug { + fmt.Fprintln(os.Stderr, stack[:sp]) // very verbose! + compile.PrintOp(f, savedpc, op, arg) + } + + switch op { + case compile.NOP: + // nop + + case compile.DUP: + stack[sp] = stack[sp-1] + sp++ + + case compile.DUP2: + stack[sp] = stack[sp-2] + stack[sp+1] = stack[sp-1] + sp += 2 + + case compile.POP: + sp-- + + case compile.EXCH: + stack[sp-2], stack[sp-1] = stack[sp-1], stack[sp-2] + + case compile.EQL, compile.NEQ, compile.GT, compile.LT, compile.LE, compile.GE: + op := syntax.Token(op-compile.EQL) + syntax.EQL + y := stack[sp-1] + x := stack[sp-2] + sp -= 2 + ok, err2 := Compare(op, x, y) + if err2 != nil { + err = err2 + break loop + } + stack[sp] = Bool(ok) + sp++ + + case compile.PLUS, + compile.MINUS, + compile.STAR, + compile.SLASH, + compile.SLASHSLASH, + compile.PERCENT, + compile.PIPE, + compile.AMP, + compile.IN: + binop := syntax.Token(op-compile.PLUS) + syntax.PLUS + if op == compile.IN { + binop = syntax.IN // IN token is out of order + } + y := stack[sp-1] + x := stack[sp-2] + sp -= 2 + z, err2 := Binary(binop, x, y) + if err2 != nil { + err = err2 + break loop + } + stack[sp] = z + sp++ + + case compile.UPLUS, compile.UMINUS: + unop := syntax.Token(op-compile.UPLUS) + syntax.PLUS + x := stack[sp-1] + y, err2 := Unary(unop, x) + if err2 != nil { + err = err2 + break loop + } + stack[sp-1] = y + + case compile.INPLACE_ADD: + y := stack[sp-1] + x := stack[sp-2] + sp -= 2 + + // It's possible that y is not Iterable but + // nonetheless defines x+y, in which case we + // should fall back to the general case. + var z Value + if xlist, ok := x.(*List); ok { + if yiter, ok := y.(Iterable); ok { + if err = xlist.checkMutable("apply += to", true); err != nil { + break loop + } + listExtend(xlist, yiter) + z = xlist + } + } + if z == nil { + z, err = Binary(syntax.PLUS, x, y) + if err != nil { + break loop + } + } + + stack[sp] = z + sp++ + + case compile.NONE: + stack[sp] = None + sp++ + + case compile.TRUE: + stack[sp] = True + sp++ + + case compile.FALSE: + stack[sp] = False + sp++ + + case compile.JMP: + pc = arg + + case compile.CALL, compile.CALL_VAR, compile.CALL_KW, compile.CALL_VAR_KW: + var kwargs Value + if op == compile.CALL_KW || op == compile.CALL_VAR_KW { + kwargs = stack[sp-1] + sp-- + } + + var args Value + if op == compile.CALL_VAR || op == compile.CALL_VAR_KW { + args = stack[sp-1] + sp-- + } + + // named args (pairs) + var kvpairs []Tuple + if nkvpairs := int(arg & 0xff); nkvpairs > 0 { + kvpairs = make([]Tuple, 0, nkvpairs) + kvpairsAlloc := make(Tuple, 2*nkvpairs) // allocate a single backing array + sp -= 2 * nkvpairs + for i := 0; i < nkvpairs; i++ { + pair := kvpairsAlloc[:2:2] + kvpairsAlloc = kvpairsAlloc[2:] + pair[0] = stack[sp+2*i] // name + pair[1] = stack[sp+2*i+1] // value + kvpairs = append(kvpairs, pair) + } + } + if kwargs != nil { + // Add key/value items from **kwargs dictionary. + dict, ok := kwargs.(*Dict) + if !ok { + err = fmt.Errorf("argument after ** must be a mapping, not %s", kwargs.Type()) + break loop + } + items := dict.Items() + for _, item := range items { + if _, ok := item[0].(String); !ok { + err = fmt.Errorf("keywords must be strings, not %s", item[0].Type()) + break loop + } + } + if len(kvpairs) == 0 { + kvpairs = items + } else { + kvpairs = append(kvpairs, items...) + } + } + + // positional args + var positional Tuple + if npos := int(arg >> 8); npos > 0 { + positional = make(Tuple, npos) + sp -= npos + copy(positional, stack[sp:]) + } + if args != nil { + // Add elements from *args sequence. + iter := Iterate(args) + if iter == nil { + err = fmt.Errorf("argument after * must be iterable, not %s", args.Type()) + break loop + } + var elem Value + for iter.Next(&elem) { + positional = append(positional, elem) + } + iter.Done() + } + + function := stack[sp-1] + + if vmdebug { + fmt.Printf("VM call %s args=%s kwargs=%s @%s\n", + function, positional, kvpairs, f.Position(fr.callpc)) + } + + fr.callpc = savedpc + z, err2 := Call(thread, function, positional, kvpairs) + if err2 != nil { + err = err2 + break loop + } + if vmdebug { + fmt.Printf("Resuming %s @ %s\n", f.Name, f.Position(0)) + } + stack[sp-1] = z + + case compile.ITERPUSH: + x := stack[sp-1] + sp-- + iter := Iterate(x) + if iter == nil { + err = fmt.Errorf("%s value is not iterable", x.Type()) + break loop + } + iterstack = append(iterstack, iter) + + case compile.ITERJMP: + iter := iterstack[len(iterstack)-1] + if iter.Next(&stack[sp]) { + sp++ + } else { + pc = arg + } + + case compile.ITERPOP: + n := len(iterstack) - 1 + iterstack[n].Done() + iterstack = iterstack[:n] + + case compile.NOT: + stack[sp-1] = !stack[sp-1].Truth() + + case compile.RETURN: + result = stack[sp-1] + break loop + + case compile.SETINDEX: + z := stack[sp-1] + y := stack[sp-2] + x := stack[sp-3] + sp -= 3 + err = setIndex(fr, x, y, z) + if err != nil { + break loop + } + + case compile.INDEX: + y := stack[sp-1] + x := stack[sp-2] + sp -= 2 + z, err2 := getIndex(fr, x, y) + if err2 != nil { + err = err2 + break loop + } + stack[sp] = z + sp++ + + case compile.ATTR: + x := stack[sp-1] + name := f.Prog.Names[arg] + y, err2 := getAttr(fr, x, name) + if err2 != nil { + err = err2 + break loop + } + stack[sp-1] = y + + case compile.SETFIELD: + y := stack[sp-1] + x := stack[sp-2] + sp -= 2 + name := f.Prog.Names[arg] + if err2 := setField(fr, x, name, y); err2 != nil { + err = err2 + break loop + } + + case compile.MAKEDICT: + stack[sp] = new(Dict) + sp++ + + case compile.SETDICT, compile.SETDICTUNIQ: + dict := stack[sp-3].(*Dict) + k := stack[sp-2] + v := stack[sp-1] + sp -= 3 + oldlen := dict.Len() + if err2 := dict.Set(k, v); err2 != nil { + err = err2 + break loop + } + if op == compile.SETDICTUNIQ && dict.Len() == oldlen { + err = fmt.Errorf("duplicate key: %v", k) + break loop + } + + case compile.APPEND: + elem := stack[sp-1] + list := stack[sp-2].(*List) + sp -= 2 + list.elems = append(list.elems, elem) + + case compile.SLICE: + x := stack[sp-4] + lo := stack[sp-3] + hi := stack[sp-2] + step := stack[sp-1] + sp -= 4 + res, err2 := slice(x, lo, hi, step) + if err2 != nil { + err = err2 + break loop + } + stack[sp] = res + sp++ + + case compile.UNPACK: + n := int(arg) + iterable := stack[sp-1] + sp-- + iter := Iterate(iterable) + if iter == nil { + err = fmt.Errorf("got %s in sequence assignment", iterable.Type()) + break loop + } + i := 0 + sp += n + for i < n && iter.Next(&stack[sp-1-i]) { + i++ + } + var dummy Value + if iter.Next(&dummy) { + // NB: Len may return -1 here in obscure cases. + err = fmt.Errorf("too many values to unpack (got %d, want %d)", Len(iterable), n) + break loop + } + iter.Done() + if i < n { + err = fmt.Errorf("too few values to unpack (got %d, want %d)", i, n) + break loop + } + + case compile.CJMP: + if stack[sp-1].Truth() { + pc = arg + } + sp-- + + case compile.INT: + stack[sp] = MakeInt64(f.Prog.Constants[arg].(int64)) + sp++ + + case compile.BIGINT: + s := f.Prog.Constants[arg].(string) + bigint := new(big.Int) + if _, ok := bigint.SetString(s, 10); !ok { + err = fmt.Errorf("internal error: failed to parse compiled large integer constant") + break loop + } + stack[sp] = Int{bigint} + sp++ + + case compile.FLOAT: + stack[sp] = Float(f.Prog.Constants[arg].(float64)) + sp++ + + case compile.STRING: + // TODO(adonovan): opt: avoid allocation here + // by prematerializing String values. + // stack[sp] = String(f.Prog.Constants[arg].(string)) + stack[sp] = string2String(f.Prog.Constants[arg]) + sp++ + + case compile.MAKETUPLE: + n := int(arg) + tuple := make(Tuple, n) + sp -= n + copy(tuple, stack[sp:]) + stack[sp] = tuple + sp++ + + case compile.MAKELIST: + n := int(arg) + elems := make([]Value, n) + sp -= n + copy(elems, stack[sp:]) + stack[sp] = NewList(elems) + sp++ + + case compile.MAKEFUNC: + funcode := f.Prog.Functions[arg] + freevars := stack[sp-1].(Tuple) + defaults := stack[sp-2].(Tuple) + sp -= 2 + stack[sp] = &Function{ + funcode: funcode, + predeclared: fr.fn.predeclared, + globals: fr.fn.globals, + defaults: defaults, + freevars: freevars, + } + sp++ + + case compile.LOAD: + n := int(arg) + module := string(stack[sp-1].(String)) + sp-- + + if thread.Load == nil { + err = fmt.Errorf("load not implemented by this application") + break loop + } + + dict, err2 := thread.Load(thread, module) + if err2 != nil { + err = fmt.Errorf("cannot load %s: %v", module, err2) + break loop + } + + for i := 0; i < n; i++ { + from := string(stack[sp-1-i].(String)) + v, ok := dict[from] + if !ok { + err = fmt.Errorf("load: name %s not found in module %s", from, module) + break loop + } + stack[sp-1-i] = v + } + + case compile.SETLOCAL: + locals[arg] = stack[sp-1] + sp-- + + case compile.SETGLOBAL: + fr.fn.globals[arg] = stack[sp-1] + sp-- + + case compile.LOCAL: + x := locals[arg] + if x == nil { + err = fmt.Errorf("local variable %s referenced before assignment", f.Locals[arg].Name) + break loop + } + stack[sp] = x + sp++ + + case compile.FREE: + stack[sp] = fr.fn.freevars[arg] + sp++ + + case compile.GLOBAL: + x := fr.fn.globals[arg] + if x == nil { + err = fmt.Errorf("global variable %s referenced before assignment", f.Prog.Globals[arg].Name) + break loop + } + stack[sp] = x + sp++ + + case compile.PREDECLARED: + name := f.Prog.Names[arg] + x := fr.fn.predeclared[name] + if x == nil { + if name == "PACKAGE_NAME" { + // Gross spec, gross hack. + // Users should just call package_name() function. + if v, ok := fr.fn.predeclared["package_name"].(*Builtin); ok { + x, _ = v.fn(thread, v, nil, nil) + } + } + if x == nil { + err = fmt.Errorf("internal error: predeclared variable %s is uninitialized", name) + break loop + } + } + stack[sp] = x + sp++ + + case compile.UNIVERSAL: + stack[sp] = Universe[f.Prog.Names[arg]] + sp++ + + default: + err = fmt.Errorf("unimplemented: %s", op) + break loop + } + } + + // ITERPOP the rest of the iterator stack. + for _, iter := range iterstack { + iter.Done() + } + + if err != nil { + if _, ok := err.(*EvalError); !ok { + err = fr.errorf(f.Position(savedpc), "%s", err.Error()) + } + } + return result, err +} + +// string2String converts an empty interface containing a string into a +// Value containing a String, without allocating a new string header; +// see github.com/golang/go/issues/24582. +// TODO(adonovan): do this safely by preconverting constants a priori. +// This requires that Program provide a field for use by the interpreter. +func string2String(x interface{}) (y Value) { + // Equivalent to: + // return String(x.(string)) + // but avoids allocation. + + _ = x.(string) + type iface struct { + typ, data unsafe.Pointer + } + xi := (*iface)((unsafe.Pointer)(&x)) + yi := (*iface)((unsafe.Pointer)(&y)) + si := (*iface)((unsafe.Pointer)(&dummystr)) + + yi.typ = si.typ + yi.data = xi.data + return y +} + +var dummystr Value = String("") diff --git a/repl/repl.go b/repl/repl.go index caf8d8d..f6f5a90 100644 --- a/repl/repl.go +++ b/repl/repl.go @@ -32,7 +32,6 @@ import ( "github.com/chzyer/readline" "github.com/google/skylark" - "github.com/google/skylark/resolve" "github.com/google/skylark/syntax" ) @@ -164,30 +163,20 @@ func rep(rl *readline.Instance, thread *skylark.Thread, globals skylark.StringDi // execFileNoFreeze is skylark.ExecFile without globals.Freeze(). func execFileNoFreeze(thread *skylark.Thread, src interface{}, globals skylark.StringDict) error { - // parse - f, err := syntax.Parse("<stdin>", src, 0) + _, prog, err := skylark.SourceProgram("<stdin>", src, globals.Has) if err != nil { return err } - // resolve - if err := resolve.File(f, globals.Has, skylark.Universe.Has); err != nil { - return err - } + res, err := prog.Init(thread, globals) - // execute - // The global names from the previous call become the predeclared names of this call. - globalsArray := make([]skylark.Value, len(f.Globals)) - fr := thread.Push(globals, globalsArray, len(f.Locals)) - err = fr.ExecStmts(f.Stmts) - thread.Pop() + // The global names from the previous call become + // the predeclared names of this call. // Copy globals back to the caller's map. // If execution failed, some globals may be undefined. - for i, id := range f.Globals { - if v := globalsArray[i]; v != nil { - globals[id.Name] = v - } + for k, v := range res { + globals[k] = v } return err diff --git a/syntax/scan.go b/syntax/scan.go index 76f2b4b..57ca5ed 100644 --- a/syntax/scan.go +++ b/syntax/scan.go @@ -181,6 +181,9 @@ func (p Position) Filename() string { return "<unknown>" } +// MakePosition returns position with the specified components. +func MakePosition(file *string, line, col int32) Position { return Position{file, line, col} } + // add returns the position at the end of s, assuming it starts at p. func (p Position) add(s string) Position { if n := strings.Count(s, "\n"); n > 0 { @@ -193,7 +196,10 @@ func (p Position) add(s string) Position { } func (p Position) String() string { - return fmt.Sprintf("%s:%d:%d", p.Filename(), p.Line, p.Col) + if p.Col > 0 { + return fmt.Sprintf("%s:%d:%d", p.Filename(), p.Line, p.Col) + } + return fmt.Sprintf("%s:%d", p.Filename(), p.Line) } func (p Position) isBefore(q Position) bool { diff --git a/syntax/syntax.go b/syntax/syntax.go index 3480c66..99548b1 100644 --- a/syntax/syntax.go +++ b/syntax/syntax.go @@ -298,7 +298,7 @@ type CallExpr struct { commentsRef Fn Expr Lparen Position - Args []Expr + Args []Expr // arg = expr | ident=expr | *expr | **expr Rparen Position } diff --git a/testdata/assign.sky b/testdata/assign.sky index b209d26..ca6b3c5 100644 --- a/testdata/assign.sky +++ b/testdata/assign.sky @@ -258,7 +258,7 @@ assert.fails(lambda: setX(hf), "cannot set field on a frozen hasfields") assert.fails(lambda: setY(hf), "cannot set field on a frozen hasfields") --- -# destucturing assigmnent in a for loop. +# destucturing assignment in a for loop. load("assert.sky", "assert") def f(): diff --git a/testdata/dict.sky b/testdata/dict.sky index e948e72..e6ec762 100644 --- a/testdata/dict.sky +++ b/testdata/dict.sky @@ -165,10 +165,8 @@ assert.fails(iterator3, "insert.*during iteration") def f(): x = {1:2, 2:4} a, x[0] = x - # There are two possible outcomes, depending on iteration order: - if not (a == 1 and x == {0: 2, 1: 2, 2: 4} or - a == 2 and x == {0: 1, 1: 2, 2: 4}): - assert.fail("unexpected results: a=%s x=%s" % (a, x)) + assert.eq(a, 1) + assert.eq(x, {1: 2, 2: 4, 0: 2}) f() # Regression test for a bug in hashtable.delete @@ -206,3 +204,33 @@ def test_delete(): assert.eq(str(d), '{}') test_delete() + +--- +# Verify position of an "unhashable key" error in a dict literal. + +_ = { + "one": 1, + ["two"]: 2, ### "unhashable type: list" + "three": 3, +} + +--- +# Verify position of a "duplicate key" error in a dict literal. + +_ = { + "one": 1, + "one": 1, ### `duplicate key: "one"` + "three": 3, +} + +--- +# Verify position of an "unhashable key" error in a dict comprehension. + +_ = { + k: v ### "unhashable type: list" + for k, v in [ + ("one", 1), + (["two"], 2), + ("three", 3), + ] +} diff --git a/testdata/function.sky b/testdata/function.sky index a86c703..5bc0d2b 100644 --- a/testdata/function.sky +++ b/testdata/function.sky @@ -173,3 +173,14 @@ assert.fails(lambda: f( 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, mm = 100), 'multiple values for keyword argument "mm"') + + +--- +# Regression test for a bug in CALL_VAR_KW. + +load("assert.sky", "assert") + +def f(a, b, x, y): + return a+b+x+y + +assert.eq(f(*("a", "b"), **dict(y="y", x="x")) + ".", 'abxy.') @@ -73,6 +73,7 @@ import ( "strings" "unicode/utf8" + "github.com/google/skylark/internal/compile" "github.com/google/skylark/syntax" ) @@ -465,34 +466,40 @@ func (*stringIterator) Done() {} // A Function is a function defined by a Skylark def statement. type Function struct { - name string // "lambda" for anonymous functions - position syntax.Position // position of def or lambda token - syntax *syntax.Function + funcode *compile.Funcode predeclared StringDict // names predeclared in the current module globals []Value // globals of the current module defaults Tuple freevars Tuple } -func (fn *Function) Name() string { return fn.name } -func (fn *Function) Hash() (uint32, error) { return hashString(fn.name), nil } +func (fn *Function) Name() string { return fn.funcode.Name } // "lambda" for anonymous functions +func (fn *Function) Hash() (uint32, error) { return hashString(fn.funcode.Name), nil } func (fn *Function) Freeze() { fn.defaults.Freeze(); fn.freevars.Freeze() } func (fn *Function) String() string { return toString(fn) } func (fn *Function) Type() string { return "function" } func (fn *Function) Truth() Bool { return true } -// syntax accessors -// -// We do not expose the syntax tree; future versions of Function may dispense with it. +// Globals returns a new, unfrozen StringDict containing all global +// variables so far defined in the function's module. +func (fn *Function) Globals() StringDict { + m := make(StringDict, len(fn.funcode.Prog.Globals)) + for i, id := range fn.funcode.Prog.Globals { + if v := fn.globals[i]; v != nil { + m[id.Name] = v + } + } + return m +} -func (fn *Function) Position() syntax.Position { return fn.position } -func (fn *Function) NumParams() int { return len(fn.syntax.Params) } +func (fn *Function) Position() syntax.Position { return fn.funcode.Pos } +func (fn *Function) NumParams() int { return fn.funcode.NumParams } func (fn *Function) Param(i int) (string, syntax.Position) { - id := fn.syntax.Locals[i] - return id.Name, id.NamePos + id := fn.funcode.Locals[i] + return id.Name, id.Pos } -func (fn *Function) HasVarargs() bool { return fn.syntax.HasVarargs } -func (fn *Function) HasKwargs() bool { return fn.syntax.HasKwargs } +func (fn *Function) HasVarargs() bool { return fn.funcode.HasVarargs } +func (fn *Function) HasKwargs() bool { return fn.funcode.HasKwargs } // A Builtin is a function implemented in Go. type Builtin struct { |