aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--internal/compile/codegen_test.go117
-rw-r--r--internal/compile/compile.go178
-rw-r--r--syntax/parse.go22
-rw-r--r--syntax/parse_test.go2
-rw-r--r--testdata/misc.sky20
5 files changed, 289 insertions, 50 deletions
diff --git a/internal/compile/codegen_test.go b/internal/compile/codegen_test.go
new file mode 100644
index 0000000..a37ac29
--- /dev/null
+++ b/internal/compile/codegen_test.go
@@ -0,0 +1,117 @@
+package compile
+
+import (
+ "bytes"
+ "fmt"
+ "testing"
+
+ "github.com/google/skylark/resolve"
+ "github.com/google/skylark/syntax"
+)
+
+// TestPlusFolding ensures that the compiler generates optimized code for
+// n-ary addition of strings, lists, and tuples.
+func TestPlusFolding(t *testing.T) {
+ isPredeclared := func(name string) bool { return name == "x" }
+ isUniversal := func(name string) bool { return false }
+ for i, test := range []struct {
+ src string // source expression
+ want string // disassembled code
+ }{
+ {
+ // string folding
+ // const + const + const + const => const
+ `"a" + "b" + "c" + "d"`,
+ `string "abcd"; return`,
+ },
+ {
+ // string folding with variable:
+ // const + const + var + const + const => sum(const, var, const)
+ `"a" + "b" + x + "c" + "d"`,
+ `string "ab"; predeclared x; plus; string "cd"; plus; return`,
+ },
+ {
+ // list folding
+ `[1] + [2] + [3]`,
+ `int 1; int 2; int 3; makelist<3>; return`,
+ },
+ {
+ // list folding with variable
+ `[1] + [2] + x + [3]`,
+ `int 1; int 2; makelist<2>; ` +
+ `predeclared x; plus; ` +
+ `int 3; makelist<1>; plus; ` +
+ `return`,
+ },
+ {
+ // tuple folding
+ `() + (1,) + (2, 3)`,
+ `int 1; int 2; int 3; maketuple<3>; return`,
+ },
+ {
+ // tuple folding with variable
+ `() + (1,) + x + (2, 3)`,
+ `int 1; maketuple<1>; predeclared x; plus; ` +
+ `int 2; int 3; maketuple<2>; plus; ` +
+ `return`,
+ },
+ } {
+ expr, err := syntax.ParseExpr("in.sky", test.src, 0)
+ if err != nil {
+ t.Errorf("#%d: %v", i, err)
+ continue
+ }
+ locals, err := resolve.Expr(expr, isPredeclared, isUniversal)
+ if err != nil {
+ t.Errorf("#%d: %v", i, err)
+ continue
+ }
+ got := disassemble(Expr(expr, locals))
+ if test.want != got {
+ t.Errorf("expression <<%s>> generated <<%s>>, want <<%s>>",
+ test.src, got, test.want)
+ }
+ }
+}
+
+// disassemble is a trivial disassembler tailored to the accumulator test.
+func disassemble(f *Funcode) string {
+ out := new(bytes.Buffer)
+ code := f.Code
+ for pc := 0; pc < len(code); {
+ op := Opcode(code[pc])
+ pc++
+ // TODO(adonovan): factor in common with interpreter.
+ var arg uint32
+ if op >= OpcodeArgMin {
+ for s := uint(0); ; s += 7 {
+ b := code[pc]
+ pc++
+ arg |= uint32(b&0x7f) << s
+ if b < 0x80 {
+ break
+ }
+ }
+ }
+
+ if out.Len() > 0 {
+ out.WriteString("; ")
+ }
+ fmt.Fprintf(out, "%s", op)
+ if op >= OpcodeArgMin {
+ switch op {
+ case INT, FLOAT, BIGINT:
+ fmt.Fprintf(out, " %v", f.Prog.Constants[arg])
+ case STRING:
+ fmt.Fprintf(out, " %q", f.Prog.Constants[arg])
+ case LOCAL:
+ fmt.Fprintf(out, " %s", f.Locals[arg].Name)
+ case PREDECLARED:
+ fmt.Fprintf(out, " %s", f.Prog.Names[arg])
+ default:
+ fmt.Fprintf(out, "<%d>", arg)
+ }
+ }
+ }
+ return out.String()
+}
diff --git a/internal/compile/compile.go b/internal/compile/compile.go
index fd00e62..01d33a1 100644
--- a/internal/compile/compile.go
+++ b/internal/compile/compile.go
@@ -22,26 +22,6 @@
//
package compile
-// TODO(adonovan):
-// Optimizations:
-// - add dynamic checks for missing opcodes in the various tables.
-//
-// - optimize the + operator, which is often applied to many strings or lists
-// in sequence, leading to quadratic behavior:
-// 1) simplify a left tree of plusses ((a+b)+c) into a sequence [a, +b, +c].
-// We need to keep each + that could fail, to report the correct position.
-// 2) in the sequence, combine adjacent list/tuple/string literals:
-// [a...]+[b...] -> [a... b...]
-// (a...)+(b...) -> (a... b...)
-// "a..."+"b..." -> "a... b..."
-// 3) in the sequence, implement a + op of a literal and following
-// non-literals by a buffer:
-// e.g. "a..." + x + y
-// => s=stringbuf(); s.append("a..."); s.append(x); s.append(y); s.string()
-// e.g. [a...] + x + y
-// => z=list(); z.append(each of a...); z.extend(x); z.extend(y); z
-// The apparent position of each append op is that of the '+' token.
-
import (
"bytes"
"fmt"
@@ -155,6 +135,8 @@ const (
OpcodeMax = CALL_VAR_KW
)
+// TODO(adonovan): add dynamic checks for missing opcodes in the tables below.
+
var opcodeNames = [...]string{
AMP: "amp",
APPEND: "append",
@@ -179,8 +161,8 @@ var opcodeNames = [...]string{
INPLACE_ADD: "inplace_add",
INT: "int",
ITERJMP: "iterjmp",
- ITERPUSH: "iterpush",
ITERPOP: "iterpop",
+ ITERPUSH: "iterpush",
JMP: "jmp",
LE: "le",
LOAD: "load",
@@ -1256,9 +1238,9 @@ func (fcomp *fcomp) expr(e syntax.Expr) {
}
case *syntax.BinaryExpr:
+ switch e.Op {
// short-circuit operators
// TODO(adonovan): use ifelse to simplify conditions.
- switch e.Op {
case syntax.OR:
// x or y => if x then x else y
done := fcomp.newBlock()
@@ -1274,7 +1256,6 @@ func (fcomp *fcomp) expr(e syntax.Expr) {
fcomp.jump(done)
fcomp.block = done
- return
case syntax.AND:
// x and y => if x then y else x
@@ -1291,13 +1272,16 @@ func (fcomp *fcomp) expr(e syntax.Expr) {
fcomp.jump(done)
fcomp.block = done
- return
- }
- // strict binary operator (includes comparisons)
- fcomp.expr(e.X)
- fcomp.expr(e.Y)
- fcomp.binop(e.OpPos, e.Op)
+ case syntax.PLUS:
+ fcomp.plus(e)
+
+ default:
+ // all other strict binary operator (includes comparisons)
+ fcomp.expr(e.X)
+ fcomp.expr(e.Y)
+ fcomp.binop(e.OpPos, e.Op)
+ }
case *syntax.DotExpr:
fcomp.expr(e.X)
@@ -1316,6 +1300,142 @@ func (fcomp *fcomp) expr(e syntax.Expr) {
}
}
+type summand struct {
+ x syntax.Expr
+ plusPos syntax.Position
+}
+
+// plus emits optimized code for ((a+b)+...)+z that avoid naive
+// quadratic behavior for strings, tuples, and lists,
+// and folds together adjacent literals of the same type.
+func (fcomp *fcomp) plus(e *syntax.BinaryExpr) {
+ // Gather all the right operands of the left tree of plusses.
+ // A tree (((a+b)+c)+d) becomes summmands=[a +b +c +d].
+ args := make([]summand, 0, 2) // common case: 2 operands
+ for plus := e; ; {
+ args = append(args, summand{unparen(plus.Y), plus.OpPos})
+ left := unparen(plus.X)
+ x, ok := left.(*syntax.BinaryExpr)
+ if !ok || x.Op != syntax.PLUS {
+ args = append(args, summand{x: left})
+ break
+ }
+ plus = x
+ }
+ // Reverse args to syntactic order.
+ for i, n := 0, len(args)/2; i < n; i++ {
+ j := len(args) - 1 - i
+ args[i], args[j] = args[j], args[i]
+ }
+
+ // Fold sums of adjacent literals of the same type: ""+"", []+[], ()+().
+ out := args[:0] // compact in situ
+ for i := 0; i < len(args); {
+ j := i + 1
+ if code := addable(args[i].x); code != 0 {
+ for j < len(args) && addable(args[j].x) == code {
+ j++
+ }
+ if j > i+1 {
+ args[i].x = add(code, args[i:j])
+ }
+ }
+ out = append(out, args[i])
+ i = j
+ }
+ args = out
+
+ // Emit code for an n-ary sum (n >= 0).
+ fcomp.expr(args[0].x)
+ for _, summand := range args[1:] {
+ fcomp.expr(summand.x)
+ fcomp.setPos(summand.plusPos)
+ fcomp.emit(PLUS)
+ }
+
+ // If len(args) > 2, use of an accumulator instead of a chain of
+ // PLUS operations may be more efficient.
+ // However, no gain was measured on a workload analogous to Bazel loading;
+ // TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload.
+ //
+ // We cannot use a single n-ary SUM operation
+ // a b c SUM<3>
+ // because we need to report a distinct error for each
+ // individual '+' operation, so three additional operations are
+ // needed:
+ //
+ // ACCSTART => create buffer and append to it
+ // ACCUM => append to buffer
+ // ACCEND => get contents of buffer
+ //
+ // For string, list, and tuple values, the interpreter can
+ // optimize these operations by using a mutable buffer.
+ // For all other types, ACCSTART and ACCEND would behave like
+ // the identity function and ACCUM behaves like PLUS.
+ // ACCUM must correctly support user-defined operations
+ // such as list+foo.
+ //
+ // fcomp.emit(ACCSTART)
+ // for _, summand := range args[1:] {
+ // fcomp.expr(summand.x)
+ // fcomp.setPos(summand.plusPos)
+ // fcomp.emit(ACCUM)
+ // }
+ // fcomp.emit(ACCEND)
+}
+
+// addable reports whether e is a statically addable
+// expression: a [s]tring, [l]ist, or [t]uple.
+func addable(e syntax.Expr) rune {
+ switch e := e.(type) {
+ case *syntax.Literal:
+ // TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding.
+ switch e.Token {
+ case syntax.STRING:
+ return 's'
+ }
+ case *syntax.ListExpr:
+ return 'l'
+ case *syntax.TupleExpr:
+ return 't'
+ }
+ return 0
+}
+
+// add returns an expression denoting the sum of args,
+// which are all addable values of the type indicated by code.
+// The resulting syntax may be degenerate.
+func add(code rune, args []summand) syntax.Expr {
+ switch code {
+ case 's':
+ var buf bytes.Buffer
+ for _, arg := range args {
+ buf.WriteString(arg.x.(*syntax.Literal).Value.(string))
+ }
+ return &syntax.Literal{Token: syntax.STRING, Value: buf.String()}
+ case 'l':
+ var elems []syntax.Expr
+ for _, arg := range args {
+ elems = append(elems, arg.x.(*syntax.ListExpr).List...)
+ }
+ return &syntax.ListExpr{List: elems}
+ case 't':
+ var elems []syntax.Expr
+ for _, arg := range args {
+ elems = append(elems, arg.x.(*syntax.TupleExpr).List...)
+ }
+ return &syntax.TupleExpr{List: elems}
+ }
+ panic(code)
+}
+
+func unparen(e syntax.Expr) syntax.Expr {
+ if p, ok := e.(*syntax.ParenExpr); ok {
+ return unparen(p.X)
+ }
+ return e
+}
+
func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) {
// TODO(adonovan): opt: simplify constant expressions, especially string+string.
// See comment at top.
diff --git a/syntax/parse.go b/syntax/parse.go
index e7d3106..ed8aeaf 100644
--- a/syntax/parse.go
+++ b/syntax/parse.go
@@ -11,7 +11,7 @@ package syntax
// package. Verify that error positions are correct using the
// chunkedfile mechanism.
-import "log"
+import log "log"
// Enable this flag to print the token stream and log.Fatal on the first error.
const debug = false
@@ -579,7 +579,7 @@ func (p *parser) parseBinopExpr(prec int) Expr {
op := p.tok
pos := p.nextToken()
y := p.parseTestPrec(opprec + 1)
- x = makeBinaryExpr(op, pos, x, y)
+ x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
}
}
@@ -613,24 +613,6 @@ func init() {
}
}
-func makeBinaryExpr(op Token, pos Position, x, y Expr) Expr {
- // Concatenate literal strings during parsing.
- if op == PLUS {
- if x, ok := x.(*Literal); ok && x.Token == STRING {
- if y, ok := y.(*Literal); ok && y.Token == STRING {
- // The Span of this synthetic node will be wrong.
- return &Literal{
- Token: STRING,
- TokenPos: x.TokenPos,
- Raw: x.Raw + " + " + y.Raw, // ugh
- Value: x.Value.(string) + y.Value.(string),
- }
- }
- }
- }
- return &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
-}
-
// primary_with_suffix = primary
// | primary '.' IDENT
// | primary slice_suffix
diff --git a/syntax/parse_test.go b/syntax/parse_test.go
index d511d3f..bee9388 100644
--- a/syntax/parse_test.go
+++ b/syntax/parse_test.go
@@ -85,7 +85,7 @@ func TestExprParseTrees(t *testing.T) {
{`-1 + +2`,
`(BinaryExpr X=(UnaryExpr Op=- X=1) Op=+ Y=(UnaryExpr Op=+ X=2))`},
{`"foo" + "bar"`,
- `"foobar"`}, // concatenated
+ `(BinaryExpr X="foo" Op=+ Y="bar")`},
{`-1 * 2`, // prec(unary -) > prec(binary *)
`(BinaryExpr X=(UnaryExpr Op=- X=1) Op=* Y=2)`},
{`-x[i]`, // prec(unary -) < prec(x[i])
diff --git a/testdata/misc.sky b/testdata/misc.sky
index 5e36b49..e0b46f7 100644
--- a/testdata/misc.sky
+++ b/testdata/misc.sky
@@ -111,3 +111,23 @@ load('assert.sky', 'assert', 'matches') ### "matches not found in module"
---
# Load does not expose universals accessible in other modules.
load('assert.sky', 'len') ### "len not found in module"
+
+
+---
+# Test plus folding optimization.
+load('assert.sky', 'assert')
+
+s = "s"
+l = [4]
+t = (4,)
+
+assert.eq("a" + "b" + "c", "abc")
+assert.eq("a" + "b" + s + "c", "absc")
+assert.eq(() + (1,) + (2, 3), (1, 2, 3))
+assert.eq(() + (1,) + t + (2, 3), (1, 4, 2, 3))
+assert.eq([] + [1] + [2, 3], [1, 2, 3])
+assert.eq([] + [1] + l + [2, 3], [1, 4, 2, 3])
+
+assert.fails(lambda: "a" + "b" + 1 + "c", "unknown binary op: string \\+ int")
+assert.fails(lambda: () + () + 1 + (), "unknown binary op: tuple \\+ int")
+assert.fails(lambda: [] + [] + 1 + [], "unknown binary op: list \\+ int")