diff options
author | Alan Donovan <adonovan@google.com> | 2014-06-16 15:46:07 -0400 |
---|---|---|
committer | Alan Donovan <adonovan@google.com> | 2014-06-16 15:46:07 -0400 |
commit | 9b38eafe607ba43ac44da301e6e04a1b7c2e8d90 (patch) | |
tree | 261efe554ab56cdbc5651cebc38f26b37daa4c48 | |
parent | 47c0a8f0c30aa0ffdc2109b0a1b585123e2c0534 (diff) | |
download | golang-x-tools-9b38eafe607ba43ac44da301e6e04a1b7c2e8d90.tar.gz |
go/pointer: implement pointer equivalence via hash-value numbering, a pre-solver optimization.
This reduces solver time by about 40%.
See hvn.go for detailed description.
Also in this CL:
- Update package docs.
- Added various global opt/debug options for maintainer convenience.
- Added logging of phase timing.
- Added stdlib_test, disabled by default, that runs the analysis
on all tests in $GOROOT.
- include types when dumping solution
LGTM=crawshaw
R=crawshaw, dannyb
CC=golang-codereviews
https://golang.org/cl/96650048
-rw-r--r-- | go/pointer/TODO | 17 | ||||
-rw-r--r-- | go/pointer/analysis.go | 168 | ||||
-rw-r--r-- | go/pointer/api.go | 8 | ||||
-rw-r--r-- | go/pointer/callgraph.go | 11 | ||||
-rw-r--r-- | go/pointer/constraint.go | 64 | ||||
-rw-r--r-- | go/pointer/doc.go | 286 | ||||
-rw-r--r-- | go/pointer/gen.go | 72 | ||||
-rw-r--r-- | go/pointer/hvn.go | 968 | ||||
-rw-r--r-- | go/pointer/intrinsics.go | 8 | ||||
-rw-r--r-- | go/pointer/opt.go | 22 | ||||
-rw-r--r-- | go/pointer/pointer_test.go | 3 | ||||
-rw-r--r-- | go/pointer/print.go | 2 | ||||
-rw-r--r-- | go/pointer/reflect.go | 168 | ||||
-rw-r--r-- | go/pointer/solve.go | 81 | ||||
-rw-r--r-- | go/pointer/stdlib_test.go | 133 | ||||
-rw-r--r-- | go/pointer/testdata/finalizer.go | 8 | ||||
-rw-r--r-- | go/pointer/util.go | 38 |
17 files changed, 1770 insertions, 287 deletions
diff --git a/go/pointer/TODO b/go/pointer/TODO index 5b7b16846..9be3b8caf 100644 --- a/go/pointer/TODO +++ b/go/pointer/TODO @@ -17,11 +17,26 @@ CONSTRAINT GENERATION: 3) soundly track physical field offsets. (Summarise dannyb's email here.) A downside is that we can't keep the identity field of struct allocations that identifies the object. +- add to pts(a.panic) a label representing all runtime panics, e.g. + runtime.{TypeAssertionError,errorString,errorCString}. OPTIMISATIONS -- pre-solver: PE via HVN/HRU and LE. +- pre-solver: + pointer equivalence: extend HVN to HRU + location equivalence - solver: HCD, LCD. +- experiment with map+slice worklist in lieu of bitset. + It may have faster insert. MISC: - Test on all platforms. Currently we assume these go/build tags: linux, amd64, !cgo. + +MAINTAINABILITY +- Think about ways to make debugging this code easier. PTA logs + routinely exceed a million lines and require training to read. + +BUGS: +- There's a crash bug in stdlib_test + reflection, rVCallConstraint. + + diff --git a/go/pointer/analysis.go b/go/pointer/analysis.go index 884962b59..cb423f4ea 100644 --- a/go/pointer/analysis.go +++ b/go/pointer/analysis.go @@ -12,7 +12,9 @@ import ( "io" "os" "reflect" + "runtime" "runtime/debug" + "sort" "code.google.com/p/go.tools/go/callgraph" "code.google.com/p/go.tools/go/ssa" @@ -20,6 +22,18 @@ import ( "code.google.com/p/go.tools/go/types/typeutil" ) +const ( + // optimization options; enable all when committing + optRenumber = true // enable renumbering optimization (makes logs hard to read) + optHVN = true // enable pointer equivalence via Hash-Value Numbering + + // debugging options; disable all when committing + debugHVN = false // enable assertions in HVN + debugHVNVerbose = false // enable extra HVN logging + debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below) + debugTimers = true // show running time of each phase +) + // object.flags bitmask values. const ( otTagged = 1 << iota // type-tagged object @@ -72,7 +86,7 @@ type nodeid uint32 type node struct { // If non-nil, this node is the start of an object // (addressable memory location). - // The following obj.size words implicitly belong to the object; + // The following obj.size nodes implicitly belong to the object; // they locate their object by scanning back. obj *object @@ -85,21 +99,10 @@ type node struct { // an object of aggregate type (struct, tuple, array) this is. subelement *fieldInfo // e.g. ".a.b[*].c" - // Points-to sets. - pts nodeset // points-to set of this node - prevPts nodeset // pts(n) in previous iteration (for difference propagation) - - // Graph edges - copyTo nodeset // simple copy constraint edges - - // Complex constraints attached to this node (x). - // - *loadConstraint y=*x - // - *offsetAddrConstraint y=&x.f or y=&x[0] - // - *storeConstraint *x=z - // - *typeFilterConstraint y=x.(I) - // - *untagConstraint y=x.(C) - // - *invokeConstraint y=x.f(params...) - complex []constraint + // Solver state for the canonical node of this pointer- + // equivalence class. Each node is created with its own state + // but they become shared after HVN. + solve *solverState } // An analysis instance holds the state of a single pointer analysis problem. @@ -119,6 +122,8 @@ type analysis struct { globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton localval map[ssa.Value]nodeid // node for each local ssa.Value localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton + atFuncs map[*ssa.Function]bool // address-taken functions (for presolver) + mapValues []nodeid // values of makemap objects (indirect in HVN) work nodeset // solver's worklist result *Result // results of the analysis track track // pointerlike types whose aliasing we track @@ -136,9 +141,10 @@ type analysis struct { runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer } -// enclosingObj returns the object (addressible memory object) that encloses node id. -// Panic ensues if that node does not belong to any object. -func (a *analysis) enclosingObj(id nodeid) *object { +// enclosingObj returns the first node of the addressable memory +// object that encloses node id. Panic ensues if that node does not +// belong to any object. +func (a *analysis) enclosingObj(id nodeid) nodeid { // Find previous node with obj != nil. for i := id; i >= 0; i-- { n := a.nodes[i] @@ -146,7 +152,7 @@ func (a *analysis) enclosingObj(id nodeid) *object { if i+nodeid(obj.size) <= id { break // out of bounds } - return obj + return i } } panic("node has no enclosing object") @@ -156,7 +162,7 @@ func (a *analysis) enclosingObj(id nodeid) *object { // Panic ensues if that node is not addressable. func (a *analysis) labelFor(id nodeid) *Label { return &Label{ - obj: a.enclosingObj(id), + obj: a.nodes[a.enclosingObj(id)].obj, subelement: a.nodes[id].subelement, } } @@ -222,6 +228,7 @@ func Analyze(config *Config) (result *Result, err error) { globalobj: make(map[ssa.Value]nodeid), flattenMemo: make(map[types.Type][]*fieldInfo), trackTypes: make(map[types.Type]bool), + atFuncs: make(map[*ssa.Function]bool), hasher: typeutil.MakeHasher(), intrinsics: make(map[*ssa.Function]intrinsic), result: &Result{ @@ -236,7 +243,7 @@ func Analyze(config *Config) (result *Result, err error) { } if a.log != nil { - fmt.Fprintln(a.log, "======== NEW ANALYSIS ========") + fmt.Fprintln(a.log, "==== Starting analysis") } // Pointer analysis requires a complete program for soundness. @@ -275,24 +282,55 @@ func Analyze(config *Config) (result *Result, err error) { a.computeTrackBits() a.generate() + a.showCounts() - if a.log != nil { - // Show size of constraint system. - counts := make(map[reflect.Type]int) - for _, c := range a.constraints { - counts[reflect.TypeOf(c)]++ - } - fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints)) - for t, n := range counts { - fmt.Fprintf(a.log, "\t%s:\t%d\n", t, n) + if optRenumber { + a.renumber() + } + + N := len(a.nodes) // excludes solver-created nodes + + if optHVN { + if debugHVNCrossCheck { + // Cross-check: run the solver once without + // optimization, once with, and compare the + // solutions. + savedConstraints := a.constraints + + a.solve() + a.dumpSolution("A.pts", N) + + // Restore. + a.constraints = savedConstraints + for _, n := range a.nodes { + n.solve = new(solverState) + } + a.nodes = a.nodes[:N] + + // rtypes is effectively part of the solver state. + a.rtypes = typeutil.Map{} + a.rtypes.SetHasher(a.hasher) } - fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes)) + + a.hvn() } - a.optimize() + if debugHVNCrossCheck { + runtime.GC() + runtime.GC() + } a.solve() + // Compare solutions. + if optHVN && debugHVNCrossCheck { + a.dumpSolution("B.pts", N) + + if !diff("A.pts", "B.pts") { + return nil, fmt.Errorf("internal error: optimization changed solution") + } + } + // Create callgraph.Nodes in deterministic order. if cg := a.result.CallGraph; cg != nil { for _, caller := range a.cgnodes { @@ -304,7 +342,7 @@ func Analyze(config *Config) (result *Result, err error) { var space [100]int for _, caller := range a.cgnodes { for _, site := range caller.sites { - for _, callee := range a.nodes[site.targets].pts.AppendTo(space[:0]) { + for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) { a.callEdge(caller, site, nodeid(callee)) } } @@ -342,3 +380,65 @@ func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) { a.warnf(fn.Pos(), " (declared here)") } } + +// dumpSolution writes the PTS solution to the specified file. +// +// It only dumps the nodes that existed before solving. The order in +// which solver-created nodes are created depends on pre-solver +// optimization, so we can't include them in the cross-check. +// +func (a *analysis) dumpSolution(filename string, N int) { + f, err := os.Create(filename) + if err != nil { + panic(err) + } + for id, n := range a.nodes[:N] { + if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil { + panic(err) + } + var sep string + for _, l := range n.solve.pts.AppendTo(a.deltaSpace) { + if l >= N { + break + } + fmt.Fprintf(f, "%s%d", sep, l) + sep = " " + } + fmt.Fprintf(f, "} : %s\n", n.typ) + } + if err := f.Close(); err != nil { + panic(err) + } +} + +// showCounts logs the size of the constraint system. A typical +// optimized distribution is 65% copy, 13% load, 11% addr, 5% +// offsetAddr, 4% store, 2% others. +// +func (a *analysis) showCounts() { + if a.log != nil { + counts := make(map[reflect.Type]int) + for _, c := range a.constraints { + counts[reflect.TypeOf(c)]++ + } + fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints)) + var lines []string + for t, n := range counts { + line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t) + lines = append(lines, line) + } + sort.Sort(sort.Reverse(sort.StringSlice(lines))) + for _, line := range lines { + fmt.Fprintf(a.log, "\t%s\n", line) + } + + fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes)) + + // Show number of pointer equivalence classes. + m := make(map[*solverState]bool) + for _, n := range a.nodes { + m[n.solve] = true + } + fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m)) + } +} diff --git a/go/pointer/api.go b/go/pointer/api.go index 190e459df..890017a04 100644 --- a/go/pointer/api.go +++ b/go/pointer/api.go @@ -124,12 +124,12 @@ type Result struct { // A Pointer is an equivalence class of pointer-like values. // -// A pointer doesn't have a unique type because pointers of distinct +// A Pointer doesn't have a unique type because pointers of distinct // types may alias the same object. // type Pointer struct { a *analysis - n nodeid // non-zero + n nodeid } // A PointsToSet is a set of labels (locations or allocations). @@ -197,7 +197,7 @@ func (s PointsToSet) DynamicTypes() *typeutil.Map { pts = PointsToSet{s.a, new(nodeset)} tmap.Set(tDyn, pts) } - pts.pts.addAll(&s.a.nodes[v].pts) + pts.pts.addAll(&s.a.nodes[v].solve.pts) } } return &tmap @@ -224,7 +224,7 @@ func (p Pointer) PointsTo() PointsToSet { if p.n == 0 { return PointsToSet{} } - return PointsToSet{p.a, &p.a.nodes[p.n].pts} + return PointsToSet{p.a, &p.a.nodes[p.n].solve.pts} } // MayAlias reports whether the receiver pointer may alias diff --git a/go/pointer/callgraph.go b/go/pointer/callgraph.go index 6ef415d22..67062919f 100644 --- a/go/pointer/callgraph.go +++ b/go/pointer/callgraph.go @@ -20,6 +20,17 @@ type cgnode struct { callersite *callsite // where called from, if known; nil for shared contours } +// contour returns a description of this node's contour. +func (n *cgnode) contour() string { + if n.callersite == nil { + return "shared contour" + } + if n.callersite.instr != nil { + return fmt.Sprintf("as called from %s", n.callersite.instr.Parent()) + } + return fmt.Sprintf("as called from intrinsic (targets=n%d)", n.callersite.targets) +} + func (n *cgnode) String() string { return fmt.Sprintf("cg%d:%s", n.obj, n.fn) } diff --git a/go/pointer/constraint.go b/go/pointer/constraint.go index caf235dd4..0f363b904 100644 --- a/go/pointer/constraint.go +++ b/go/pointer/constraint.go @@ -10,22 +10,18 @@ import ( type constraint interface { // For a complex constraint, returns the nodeid of the pointer - // to which it is attached. + // to which it is attached. For addr and copy, returns dst. ptr() nodeid - // indirect returns (by appending to the argument) the constraint's - // "indirect" nodes as defined in (Hardekopf 2007b): - // nodes whose points-to relations are not completely - // represented in the initial constraint graph. - // - // TODO(adonovan): I think we need >1 results in some obscure - // cases. If not, just return a nodeid, like ptr(). - // - indirect(nodes []nodeid) []nodeid - // renumber replaces each nodeid n in the constraint by mapping[n]. renumber(mapping []nodeid) + // presolve is a hook for constraint-specific behaviour during + // pre-solver optimization. Typical implementations mark as + // indirect the set of nodes to which the solver will add copy + // edges or PTS labels. + presolve(h *hvn) + // solve is called for complex constraints when the pts for // the node to which they are attached has changed. solve(a *analysis, delta *nodeset) @@ -41,10 +37,7 @@ type addrConstraint struct { src nodeid } -func (c *addrConstraint) ptr() nodeid { panic("addrConstraint: not a complex constraint") } -func (c *addrConstraint) indirect(nodes []nodeid) []nodeid { - panic("addrConstraint: not a complex constraint") -} +func (c *addrConstraint) ptr() nodeid { return c.dst } func (c *addrConstraint) renumber(mapping []nodeid) { c.dst = mapping[c.dst] c.src = mapping[c.src] @@ -53,14 +46,11 @@ func (c *addrConstraint) renumber(mapping []nodeid) { // dst = src // A simple constraint represented directly as a copyTo graph edge. type copyConstraint struct { - dst nodeid - src nodeid // (ptr) + dst nodeid // (ptr) + src nodeid } -func (c *copyConstraint) ptr() nodeid { panic("copyConstraint: not a complex constraint") } -func (c *copyConstraint) indirect(nodes []nodeid) []nodeid { - panic("copyConstraint: not a complex constraint") -} +func (c *copyConstraint) ptr() nodeid { return c.dst } func (c *copyConstraint) renumber(mapping []nodeid) { c.dst = mapping[c.dst] c.src = mapping[c.src] @@ -70,12 +60,11 @@ func (c *copyConstraint) renumber(mapping []nodeid) { // A complex constraint attached to src (the pointer) type loadConstraint struct { offset uint32 - dst nodeid // (indirect) + dst nodeid src nodeid // (ptr) } -func (c *loadConstraint) ptr() nodeid { return c.src } -func (c *loadConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) } +func (c *loadConstraint) ptr() nodeid { return c.src } func (c *loadConstraint) renumber(mapping []nodeid) { c.dst = mapping[c.dst] c.src = mapping[c.src] @@ -89,8 +78,7 @@ type storeConstraint struct { src nodeid } -func (c *storeConstraint) ptr() nodeid { return c.dst } -func (c *storeConstraint) indirect(nodes []nodeid) []nodeid { return nodes } +func (c *storeConstraint) ptr() nodeid { return c.dst } func (c *storeConstraint) renumber(mapping []nodeid) { c.dst = mapping[c.dst] c.src = mapping[c.src] @@ -100,12 +88,11 @@ func (c *storeConstraint) renumber(mapping []nodeid) { // A complex constraint attached to dst (the pointer) type offsetAddrConstraint struct { offset uint32 - dst nodeid // (indirect) + dst nodeid src nodeid // (ptr) } -func (c *offsetAddrConstraint) ptr() nodeid { return c.src } -func (c *offsetAddrConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) } +func (c *offsetAddrConstraint) ptr() nodeid { return c.src } func (c *offsetAddrConstraint) renumber(mapping []nodeid) { c.dst = mapping[c.dst] c.src = mapping[c.src] @@ -116,12 +103,11 @@ func (c *offsetAddrConstraint) renumber(mapping []nodeid) { // No representation change: pts(dst) and pts(src) contains tagged objects. type typeFilterConstraint struct { typ types.Type // an interface type - dst nodeid // (indirect) - src nodeid // (ptr) + dst nodeid + src nodeid // (ptr) } -func (c *typeFilterConstraint) ptr() nodeid { return c.src } -func (c *typeFilterConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) } +func (c *typeFilterConstraint) ptr() nodeid { return c.src } func (c *typeFilterConstraint) renumber(mapping []nodeid) { c.dst = mapping[c.dst] c.src = mapping[c.src] @@ -139,13 +125,12 @@ func (c *typeFilterConstraint) renumber(mapping []nodeid) { // pts(dst) contains their payloads. type untagConstraint struct { typ types.Type // a concrete type - dst nodeid // (indirect) - src nodeid // (ptr) + dst nodeid + src nodeid // (ptr) exact bool } -func (c *untagConstraint) ptr() nodeid { return c.src } -func (c *untagConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) } +func (c *untagConstraint) ptr() nodeid { return c.src } func (c *untagConstraint) renumber(mapping []nodeid) { c.dst = mapping[c.dst] c.src = mapping[c.src] @@ -156,11 +141,10 @@ func (c *untagConstraint) renumber(mapping []nodeid) { type invokeConstraint struct { method *types.Func // the abstract method iface nodeid // (ptr) the interface - params nodeid // (indirect) the first param in the params/results block + params nodeid // the start of the identity/params/results block } -func (c *invokeConstraint) ptr() nodeid { return c.iface } -func (c *invokeConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.params) } +func (c *invokeConstraint) ptr() nodeid { return c.iface } func (c *invokeConstraint) renumber(mapping []nodeid) { c.iface = mapping[c.iface] c.params = mapping[c.params] diff --git a/go/pointer/doc.go b/go/pointer/doc.go index 13441e11e..00bf2a4e1 100644 --- a/go/pointer/doc.go +++ b/go/pointer/doc.go @@ -7,21 +7,20 @@ Package pointer implements Andersen's analysis, an inclusion-based pointer analysis algorithm first described in (Andersen, 1994). -The implementation is similar to that described in (Pearce et al, -PASTE'04). Unlike many algorithms which interleave constraint -generation and solving, constructing the callgraph as they go, this -implementation for the most part observes a phase ordering (generation -before solving), with only simple (copy) constraints being generated -during solving. (The exception is reflection, which creates various -constraints during solving as new types flow to reflect.Value -operations.) This improves the traction of presolver optimisations, -but imposes certain restrictions, e.g. potential context sensitivity -is limited since all variants must be created a priori. - -We intend to add various presolving optimisations such as Pointer and -Location Equivalence from (Hardekopf & Lin, SAS '07) and solver -optimisations such as Hybrid- and Lazy- Cycle Detection from -(Hardekopf & Lin, PLDI'07), +A pointer analysis relates every pointer expression in a whole program +to the set of memory locations to which it might point. This +information can be used to construct a call graph of the program that +precisely represents the destinations of dynamic function and method +calls. It can also be used to determine, for example, which pairs of +channel operations operate on the same channel. + +The package allows the client to request a set of expressions of +interest for which the points-to information will be returned once the +analysis is complete. In addition, the client may request that a +callgraph is constructed. The example program in example_test.go +demonstrates both of these features. Clients should not request more +information than they need since it may increase the cost of the +analysis significantly. CLASSIFICATION @@ -41,7 +40,7 @@ fields, such as x and y in struct { x, y *int }. It is mostly CONTEXT-INSENSITIVE: most functions are analyzed once, so values can flow in at one call to the function and return out at another. Only some smaller functions are analyzed with consideration -to their calling context. +of their calling context. It has a CONTEXT-SENSITIVE HEAP: objects are named by both allocation site and context, so the objects returned by two distinct calls to f: @@ -54,11 +53,87 @@ complete Go program and summaries for native code. See the (Hind, PASTE'01) survey paper for an explanation of these terms. +SOUNDNESS + +The analysis is fully sound when invoked on pure Go programs that do not +use reflection or unsafe.Pointer conversions. In other words, if there +is any possible execution of the program in which pointer P may point to +object O, the analysis will report that fact. + + +REFLECTION + +By default, the "reflect" library is ignored by the analysis, as if all +its functions were no-ops, but if the client enables the Reflection flag, +the analysis will make a reasonable attempt to model the effects of +calls into this library. However, this comes at a significant +performance cost, and not all features of that library are yet +implemented. In addition, some simplifying approximations must be made +to ensure that the analysis terminates; for example, reflection can be +used to construct an infinite set of types and values of those types, +but the analysis arbitrarily bounds the depth of such types. + +Most but not all reflection operations are supported. +In particular, addressable reflect.Values are not yet implemented, so +operations such as (reflect.Value).Set have no analytic effect. + + +UNSAFE POINTER CONVERSIONS + +The pointer analysis makes no attempt to understand aliasing between the +operand x and result y of an unsafe.Pointer conversion: + y = (*T)(unsafe.Pointer(x)) +It is as if the conversion allocated an entirely new object: + y = new(T) + + +NATIVE CODE + +The analysis cannot model the aliasing effects of functions written in +languages other than Go, such as runtime intrinsics in C or assembly, or +code accessed via cgo. The result is as if such functions are no-ops. +However, various important intrinsics are understood by the analysis, +along with built-ins such as append. + +The analysis currently provides no way for users to specify the aliasing +effects of native code. + +------------------------------------------------------------------------ + +IMPLEMENTATION + +The remaining documentation is intended for package maintainers and +pointer analysis specialists. Maintainers should have a solid +understanding of the referenced papers (especially those by H&L and PKH) +before making making significant changes. + +The implementation is similar to that described in (Pearce et al, +PASTE'04). Unlike many algorithms which interleave constraint +generation and solving, constructing the callgraph as they go, this +implementation for the most part observes a phase ordering (generation +before solving), with only simple (copy) constraints being generated +during solving. (The exception is reflection, which creates various +constraints during solving as new types flow to reflect.Value +operations.) This improves the traction of presolver optimisations, +but imposes certain restrictions, e.g. potential context sensitivity +is limited since all variants must be created a priori. + + TERMINOLOGY +A type is said to be "pointer-like" if it is a reference to an object. +Pointer-like types include pointers and also interfaces, maps, channels, +functions and slices. + We occasionally use C's x->f notation to distinguish the case where x is a struct pointer from x.f where is a struct value. +Pointer analysis literature (and our comments) often uses the notation +dst=*src+offset to mean something different than what it means in Go. +It means: for each node index p in pts(src), the node index p+offset is +in pts(dst). Similarly *dst+offset=src is used for store constraints +and dst=src+offset for offset-address constraints. + NODES @@ -68,18 +143,19 @@ pointers) and members of points-to sets (things that can be pointed at, i.e. "labels"). Nodes are naturally numbered. The numbering enables compact -representations of sets of nodes such as bitvectors or BDDs; and the -ordering enables a very cheap way to group related nodes together. -(For example, passing n parameters consists of generating n parallel -constraints from caller+i to callee+i for 0<=i<n.) - -The zero nodeid means "not a pointer". Currently it's only used for -struct{} or (). We generate all flow constraints, even for non-pointer -types, with the expectations that (a) presolver optimisations will -quickly collapse all the non-pointer ones and (b) we may get more -precise results by treating uintptr as a possible pointer. - -Each node represents a scalar (word-sized) part of a value or object. +representations of sets of nodes such as bitvectors (or BDDs); and the +ordering enables a very cheap way to group related nodes together. For +example, passing n parameters consists of generating n parallel +constraints from caller+i to callee+i for 0<=i<n. + +The zero nodeid means "not a pointer". For simplicity, we generate flow +constraints even for non-pointer types such as int. The pointer +equivalence (PE) presolver optimization detects which variables cannot +point to anything; this includes not only all variables of non-pointer +types (such as int) but also variables of pointer-like types if they are +always nil, or are parameters to a function that is never called. + +Each node represents a scalar part of a value or object. Aggregate types (structs, tuples, arrays) are recursively flattened out into a sequential list of scalar component types, and all the elements of an array are represented by a single node. (The @@ -103,26 +179,24 @@ Objects include: - variable allocations in the stack frame or heap; - maps, channels and slices created by calls to make(); - allocations to construct an interface; - - allocations caused by literals and conversions, - e.g. []byte("foo"), []byte(str). + - allocations caused by conversions, e.g. []byte(str). - arrays allocated by calls to append(); -Many objects have no Go types. For example, the func, map and chan -type kinds in Go are all varieties of pointers, but the respective -objects are actual functions, maps, and channels. Given the way we -model interfaces, they too are pointers to tagged objects with no -Go type. And an *ssa.Global denotes the address of a global variable, -but the object for a Global is the actual data. So, types of objects -are usually "off by one indirection". +Many objects have no Go types. For example, the func, map and chan type +kinds in Go are all varieties of pointers, but their respective objects +are actual functions (executable code), maps (hash tables), and channels +(synchronized queues). Given the way we model interfaces, they too are +pointers to "tagged" objects with no Go type. And an *ssa.Global denotes +the address of a global variable, but the object for a Global is the +actual data. So, the types of an ssa.Value that creates an object is +"off by one indirection": a pointer to the object. -The individual nodes of an object are sometimes referred to as -"labels". +The individual nodes of an object are sometimes referred to as "labels". -Objects containing no nodes (i.e. just empty structs; tuples may be -values but never objects in Go) are padded with an invalid-type node -to have at least one node so that there is something to point to. -(TODO(adonovan): I think this is unnecessary now that we have identity -nodes; check.) +For uniformity, all objects have a non-zero number of fields, even those +of the empty type struct{}. (All arrays are treated as if of length 1, +so there are no empty arrays. The empty tuple is never address-taken, +so is never an object.) TAGGED OBJECTS @@ -133,7 +207,7 @@ An tagged object has the following layout: v ... -The T node's typ field is the dynamic type of the "payload", the value +The T node's typ field is the dynamic type of the "payload": the value v which follows, flattened out. The T node's obj has the otTagged flag. @@ -143,7 +217,7 @@ as a pointer that exclusively points to tagged objects. Tagged objects may be indirect (obj.flags ⊇ {otIndirect}) meaning that the value v is not of type T but *T; this is used only for -reflect.Values that represent lvalues. +reflect.Values that represent lvalues. (These are not implemented yet.) ANALYSIS ABSTRACTION OF EACH TYPE @@ -153,7 +227,7 @@ single node: basic types, pointers, channels, maps, slices, 'func' pointers, interfaces. Pointers - Nothing to say here. + Nothing to say here, oddly. Basic types (bool, string, numbers, unsafe.Pointer) Currently all fields in the flattening of a type, including @@ -172,9 +246,8 @@ Basic types (bool, string, numbers, unsafe.Pointer) zero nodeid, and fields of these types within aggregate other types are omitted. - unsafe.Pointer conversions are not yet modelled as pointer - conversions. Consequently uintptr is always a number and uintptr - nodes do not point to any object. + unsafe.Pointers are not modelled as pointers, so a conversion of an + unsafe.Pointer to *T is (unsoundly) treated equivalent to new(T). Channels An expression of type 'chan T' is a kind of pointer that points @@ -227,12 +300,14 @@ Functions The first node is the function's identity node. Associated with every callsite is a special "targets" variable, - whose pts(·) contains the identity node of each function to which - the call may dispatch. Identity words are not otherwise used. + whose pts() contains the identity node of each function to which + the call may dispatch. Identity words are not otherwise used during + the analysis, but we construct the call graph from the pts() + solution for such nodes. - The following block of nodes represent the flattened-out types of - the parameters and results of the function object, and are - collectively known as its "P/R block". + The following block of contiguous nodes represents the flattened-out + types of the parameters ("P-block") and results ("R-block") of the + function object. The treatment of free variables of closures (*ssa.FreeVar) is like that of global variables; it is not context-sensitive. @@ -255,18 +330,20 @@ Interfaces all of the concrete type's methods; we can't tell a priori which ones may be called. - TypeAssert y = x.(T) is implemented by a dynamic filter triggered by - each tagged object E added to pts(x). If T is an interface that E.T - implements, E is added to pts(y). If T is a concrete type then edge - E.v -> pts(y) is added. + TypeAssert y = x.(T) is implemented by a dynamic constraint + triggered by each tagged object O added to pts(x): a typeFilter + constraint if T is an interface type, or an untag constraint if T is + a concrete type. A typeFilter tests whether O.typ implements T; if + so, O is added to pts(y). An untagFilter tests whether O.typ is + assignable to T,and if so, a copy edge O.v -> y is added. ChangeInterface is a simple copy because the representation of tagged objects is independent of the interface type (in contrast to the "method tables" approach used by the gc runtime). - y := Invoke x.m(...) is implemented by allocating a contiguous P/R - block for the callsite and adding a dynamic rule triggered by each - tagged object E added to pts(x). The rule adds param/results copy + y := Invoke x.m(...) is implemented by allocating contiguous P/R + blocks for the callsite and adding a dynamic rule triggered by each + tagged object added to pts(x). The rule adds param/results copy edges to/from each discovered concrete method. (Q. Why do we model an interface as a pointer to a pair of type and @@ -279,8 +356,7 @@ Interfaces reflect.Value A reflect.Value is modelled very similar to an interface{}, i.e. as - a pointer exclusively to tagged objects, but with two - generalizations. + a pointer exclusively to tagged objects, but with two generalizations. 1) a reflect.Value that represents an lvalue points to an indirect (obj.flags ⊇ {otIndirect}) tagged object, which has a similar @@ -304,6 +380,8 @@ reflect.Value corresponds to the user-visible dynamic type, and the existence of a pointer is an implementation detail. + (NB: indirect tagged objects are not yet implemented) + 2) The dynamic type tag of a tagged object pointed to by a reflect.Value may be an interface type; it need not be concrete. @@ -350,15 +428,15 @@ Structs struct is a pointer to its identity node. That node allows us to distinguish a pointer to a struct from a pointer to its first field. - Field offsets are currently the logical field offsets (plus one for - the identity node), so the sizes of the fields can be ignored by the - analysis. + Field offsets are logical field offsets (plus one for the identity + node), so the sizes of the fields can be ignored by the analysis. - Sound treatment of unsafe.Pointer conversions (not yet implemented) - would require us to model memory layout using physical field offsets - to ascertain which object field(s) might be aliased by a given - FieldAddr of a different base pointer type. It would also require - us to dispense with the identity node. + (The identity node is non-traditional but enables the distiction + described above, which is valuable for code comprehension tools. + Typical pointer analyses for C, whose purpose is compiler + optimization, must soundly model unsafe.Pointer (void*) conversions, + and this requires fidelity to the actual memory layout using physical + field offsets.) *ssa.Field y = x.f creates a simple edge to y from x's node at f's offset. @@ -403,10 +481,16 @@ FUNCTION CALLS A static call consists three steps: - finding the function object of the callee; - creating copy edges from the actual parameter value nodes to the - params block in the function object (this includes the receiver - if the callee is a method); - - creating copy edges from the results block in the function - object to the value nodes for the result of the call. + P-block in the function object (this includes the receiver if + the callee is a method); + - creating copy edges from the R-block in the function object to + the value nodes for the result of the call. + + A static function call is little more than two struct value copies + between the P/R blocks of caller and callee: + + callee.P = caller.P + caller.R = callee.R Context sensitivity @@ -422,13 +506,12 @@ FUNCTION CALLS Dynamic calls work in a similar manner except that the creation of copy edges occurs dynamically, in a similar fashion to a pair of - struct copies: + struct copies in which the callee is indirect: - *fn->params = callargs - callresult = *fn->results + callee->P = caller.P + caller.R = callee->R - (Recall that the function object's params and results blocks are - contiguous.) + (Recall that the function object's P- and R-blocks are contiguous.) Interface method invocation @@ -436,7 +519,8 @@ FUNCTION CALLS callsite and attach a dynamic closure rule to the interface. For each new tagged object that flows to the interface, we look up the concrete method, find its function object, and connect its P/R - block to the callsite's P/R block. + blocks to the callsite's P/R blocks, adding copy edges to the graph + during solving. Recording call targets @@ -451,14 +535,38 @@ FUNCTION CALLS internally this just iterates all "targets" variables' pts(·)s. +PRESOLVER + +We implement Hash-Value Numbering (HVN), a pre-solver constraint +optimization described in Hardekopf & Lin, SAS'07. This is documented +in more detail in hvn.go. We intend to add its cousins HR and HU in +future. + + SOLVER -The solver is currently a very naive Andersen-style implementation, -although it does use difference propagation (Pearce et al, SQC'04). -There is much to do. +The solver is currently a naive Andersen-style implementation; it does +not perform online cycle detection, though we plan to add solver +optimisations such as Hybrid- and Lazy- Cycle Detection from (Hardekopf +& Lin, PLDI'07). + +It uses difference propagation (Pearce et al, SQC'04) to avoid +redundant re-triggering of closure rules for values already seen. + +Points-to sets are represented using sparse bit vectors (similar to +those used in LLVM and gcc), which are more space- and time-efficient +than sets based on Go's built-in map type or dense bit vectors. +Nodes are permuted prior to solving so that object nodes (which may +appear in points-to sets) are lower numbered than non-object (var) +nodes. This improves the density of the set over which the PTSs +range, and thus the efficiency of the representation. -FURTHER READING. +Partly thanks to avoiding map iteration, the execution of the solver is +100% deterministic, a great help during debugging. + + +FURTHER READING Andersen, L. O. 1994. Program analysis and specialization for the C programming language. Ph.D. dissertation. DIKU, University of @@ -492,5 +600,11 @@ international conference on Static Analysis (SAS'07), Hanne Riis Nielson and Gilberto Filé (Eds.). Springer-Verlag, Berlin, Heidelberg, 265-280. +Atanas Rountev and Satish Chandra. 2000. Off-line variable substitution +for scaling points-to analysis. In Proceedings of the ACM SIGPLAN 2000 +conference on Programming language design and implementation (PLDI '00). +ACM, New York, NY, USA, 47-56. DOI=10.1145/349299.349310 +http://doi.acm.org/10.1145/349299.349310 + */ package pointer diff --git a/go/pointer/gen.go b/go/pointer/gen.go index bcb7448b9..65988fb37 100644 --- a/go/pointer/gen.go +++ b/go/pointer/gen.go @@ -59,7 +59,7 @@ func (a *analysis) addNodes(typ types.Type, comment string) nodeid { // func (a *analysis) addOneNode(typ types.Type, comment string, subelement *fieldInfo) nodeid { id := a.nextNode() - a.nodes = append(a.nodes, &node{typ: typ, subelement: subelement}) + a.nodes = append(a.nodes, &node{typ: typ, subelement: subelement, solve: new(solverState)}) if a.log != nil { fmt.Fprintf(a.log, "\tcreate n%d %s for %s%s\n", id, typ, comment, subelement.path()) @@ -178,6 +178,9 @@ func (a *analysis) makeTagged(typ types.Type, cgn *cgnode, data interface{}) nod // makeRtype returns the canonical tagged object of type *rtype whose // payload points to the sole rtype object for T. +// +// TODO(adonovan): move to reflect.go; it's part of the solver really. +// func (a *analysis) makeRtype(T types.Type) nodeid { if v := a.rtypes.At(T); v != nil { return v.(nodeid) @@ -261,7 +264,7 @@ func (a *analysis) taggedValue(obj nodeid) (tDyn types.Type, v nodeid, indirect return n.typ, obj + 1, flags&otIndirect != 0 } -// funcParams returns the first node of the params block of the +// funcParams returns the first node of the params (P) block of the // function whose object node (obj.flags&otFunction) is id. // func (a *analysis) funcParams(id nodeid) nodeid { @@ -272,7 +275,7 @@ func (a *analysis) funcParams(id nodeid) nodeid { return id + 1 } -// funcResults returns the first node of the results block of the +// funcResults returns the first node of the results (R) block of the // function whose object node (obj.flags&otFunction) is id. // func (a *analysis) funcResults(id nodeid) nodeid { @@ -662,9 +665,9 @@ func (a *analysis) genDynamicCall(caller *cgnode, site *callsite, call *ssa.Call // pts(targets) will be the set of possible call targets. site.targets = a.valueNode(call.Value) - // We add dynamic closure rules that store the arguments into, - // and load the results from, the P/R block of each function - // discovered in pts(targets). + // We add dynamic closure rules that store the arguments into + // the P-block and load the results from the R-block of each + // function discovered in pts(targets). sig := call.Signature() var offset uint32 = 1 // P/R block starts at offset 1 @@ -705,9 +708,9 @@ func (a *analysis) genInvoke(caller *cgnode, site *callsite, call *ssa.CallCommo a.copy(result, r, a.sizeof(sig.Results())) } - // We add a dynamic invoke constraint that will add - // edges from the caller's P/R block to the callee's - // P/R block for each discovered call target. + // We add a dynamic invoke constraint that will connect the + // caller's and the callee's P/R blocks for each discovered + // call target. a.addConstraint(&invokeConstraint{call.Method, a.valueNode(call.Value), block}) } @@ -749,7 +752,7 @@ func (a *analysis) genInvokeReflectType(caller *cgnode, site *callsite, call *ss a.copy(params, rtype, 1) params++ - // Copy actual parameters into formal params block. + // Copy actual parameters into formal P-block. // Must loop, since the actuals aren't contiguous. for i, arg := range call.Args { sz := a.sizeof(sig.Params().At(i).Type()) @@ -757,7 +760,7 @@ func (a *analysis) genInvokeReflectType(caller *cgnode, site *callsite, call *ss params += nodeid(sz) } - // Copy formal results block to actual result. + // Copy formal R-block to actual R-block. if result != 0 { a.copy(result, a.funcResults(obj), a.sizeof(sig.Results())) } @@ -790,6 +793,7 @@ func (a *analysis) genCall(caller *cgnode, instr ssa.CallInstruction) { caller.sites = append(caller.sites, site) if a.log != nil { + // TODO(adonovan): debug: improve log message. fmt.Fprintf(a.log, "\t%s to targets %s from %s\n", site, site.targets, caller) } } @@ -863,7 +867,15 @@ func (a *analysis) objectNode(cgn *cgnode, v ssa.Value) nodeid { obj = a.nextNode() tmap := v.Type().Underlying().(*types.Map) a.addNodes(tmap.Key(), "makemap.key") - a.addNodes(tmap.Elem(), "makemap.value") + elem := a.addNodes(tmap.Elem(), "makemap.value") + + // To update the value field, MapUpdate + // generates store-with-offset constraints which + // the presolver can't model, so we must mark + // those nodes indirect. + for id, end := elem, elem+nodeid(a.sizeof(tmap.Elem())); id < end; id++ { + a.mapValues = append(a.mapValues, id) + } a.endObject(obj, cgn, v) case *ssa.MakeInterface: @@ -1140,8 +1152,7 @@ func (a *analysis) genFunc(cgn *cgnode) { impl := a.findIntrinsic(fn) if a.log != nil { - fmt.Fprintln(a.log) - fmt.Fprintln(a.log) + fmt.Fprintf(a.log, "\n\n==== Generating constraints for %s, %s\n", cgn, cgn.contour()) // Hack: don't display body if intrinsic. if impl != nil { @@ -1187,6 +1198,7 @@ func (a *analysis) genFunc(cgn *cgnode) { // Create value nodes for all value instructions // since SSA may contain forward references. + var space [10]*ssa.Value for _, b := range fn.Blocks { for _, instr := range b.Instrs { switch instr := instr.(type) { @@ -1202,6 +1214,19 @@ func (a *analysis) genFunc(cgn *cgnode) { id := a.addNodes(instr.Type(), comment) a.setValueNode(instr, id, cgn) } + + // Record all address-taken functions (for presolver). + rands := instr.Operands(space[:0]) + if call, ok := instr.(ssa.CallInstruction); ok && !call.Common().IsInvoke() { + // Skip CallCommon.Value in "call" mode. + // TODO(adonovan): fix: relies on unspecified ordering. Specify it. + rands = rands[1:] + } + for _, rand := range rands { + if atf, ok := (*rand).(*ssa.Function); ok { + a.atFuncs[atf] = true + } + } } } @@ -1218,14 +1243,29 @@ func (a *analysis) genFunc(cgn *cgnode) { // genMethodsOf generates nodes and constraints for all methods of type T. func (a *analysis) genMethodsOf(T types.Type) { + itf := isInterface(T) + + // TODO(adonovan): can we skip this entirely if itf is true? + // I think so, but the answer may depend on reflection. mset := a.prog.MethodSets.MethodSet(T) for i, n := 0, mset.Len(); i < n; i++ { - a.valueNode(a.prog.Method(mset.At(i))) + m := a.prog.Method(mset.At(i)) + a.valueNode(m) + + if !itf { + // Methods of concrete types are address-taken functions. + a.atFuncs[m] = true + } } } // generate generates offline constraints for the entire program. func (a *analysis) generate() { + start("Constraint generation") + if a.log != nil { + fmt.Fprintln(a.log, "==== Generating constraints") + } + // Create a dummy node since we use the nodeid 0 for // non-pointerlike variables. a.addNodes(tInvalid, "(zero)") @@ -1273,4 +1313,6 @@ func (a *analysis) generate() { a.globalval = nil a.localval = nil a.localobj = nil + + stop("Constraint generation") } diff --git a/go/pointer/hvn.go b/go/pointer/hvn.go new file mode 100644 index 000000000..1dd9049c2 --- /dev/null +++ b/go/pointer/hvn.go @@ -0,0 +1,968 @@ +package pointer + +// This file implements Hash-Value Numbering (HVN), a pre-solver +// constraint optimization described in Hardekopf & Lin, SAS'07 (see +// doc.go) that analyses the graph topology to determine which sets of +// variables are "pointer equivalent" (PE), i.e. must have identical +// points-to sets in the solution. +// +// A separate ("offline") graph is constructed. Its nodes are those of +// the main-graph, plus an additional node *X for each pointer node X. +// With this graph we can reason about the unknown points-to set of +// dereferenced pointers. (We do not generalize this to represent +// unknown fields x->f, perhaps because such fields would be numerous, +// though it might be worth an experiment.) +// +// Nodes whose points-to relations are not entirely captured by the +// graph are marked as "indirect": the *X nodes, the parameters of +// address-taken functions (which includes all functions in method +// sets), or nodes updated by the solver rules for reflection, etc. +// +// All addr (y=&x) nodes are initially assigned a pointer-equivalence +// (PE) label equal to x's nodeid in the main graph. (These are the +// only PE labels that are less than len(a.nodes).) +// +// All offsetAddr (y=&x.f) constraints are initially assigned a PE +// label; such labels are memoized, keyed by (x, f), so that equivalent +// nodes y as assigned the same label. +// +// Then we process each strongly connected component (SCC) of the graph +// in topological order, assigning it a PE label based on the set P of +// PE labels that flow to it from its immediate dependencies. +// +// If any node in P is "indirect", the entire SCC is assigned a fresh PE +// label. Otherwise: +// +// |P|=0 if P is empty, all nodes in the SCC are non-pointers (e.g. +// uninitialized variables, or formal params of dead functions) +// and the SCC is assigned the PE label of zero. +// +// |P|=1 if P is a singleton, the SCC is assigned the same label as the +// sole element of P. +// +// |P|>1 if P contains multiple labels, a unique label representing P is +// invented and recorded in an hash table, so that other +// equivalent SCCs may also be assigned this label, akin to +// conventional hash-value numbering in a compiler. +// +// Finally, a renumbering is computed such that each node is replaced by +// the lowest-numbered node with the same PE label. All constraints are +// renumbered, and any resulting duplicates are eliminated. +// +// The only nodes that are not renumbered are the objects x in addr +// (y=&x) constraints, since the ids of these nodes (and fields derived +// from them via offsetAddr rules) are the elements of all points-to +// sets, so they must remain as they are if we want the same solution. +// +// The solverStates (node.solve) for nodes in the same equivalence class +// are linked together so that all nodes in the class have the same +// solution. This avoids the need to renumber nodeids buried in +// Queries, cgnodes, etc (like (*analysis).renumber() does) since only +// the solution is needed. +// +// The result of HVN is that the number of distinct nodes and +// constraints is reduced, but the solution is identical (almost---see +// CROSS-CHECK below). In particular, both linear and cyclic chains of +// copies are each replaced by a single node. +// +// Nodes and constraints created "online" (e.g. while solving reflection +// constraints) are not subject to this optimization. +// +// PERFORMANCE +// +// In two benchmarks (oracle and godoc), HVN eliminates about two thirds +// of nodes, the majority accounted for by non-pointers: nodes of +// non-pointer type, pointers that remain nil, formal parameters of dead +// functions, nodes of untracked types, etc. It also reduces the number +// of constraints, also by about two thirds, and the solving time by +// 30--42%, although we must pay about 15% for the running time of HVN +// itself. The benefit is greater for larger applications. +// +// There are many possible optimizations to improve the performance: +// * Use fewer than 1:1 onodes to main graph nodes: many of the onodes +// we create are not needed. +// * HU (HVN with Union---see paper): coalesce "union" peLabels when +// their expanded-out sets are equal. +// * HR (HVN with deReference---see paper): this will require that we +// apply HVN until fixed point, which may need more bookkeeping of the +// correspondance of main nodes to onodes. +// * Location Equivalence (see paper): have points-to sets contain not +// locations but location-equivalence class labels, each representing +// a set of locations. +// * HVN with field-sensitive ref: model each of the fields of a +// pointer-to-struct. +// +// CROSS-CHECK +// +// To verify the soundness of the optimization, when the +// debugHVNCrossCheck option is enabled, we run the solver twice, once +// before and once after running HVN, dumping the solution to disk, and +// then we compare the results. If they are not identical, the analysis +// panics. +// +// The solution dumped to disk includes only the N*N submatrix of the +// complete solution where N is the number of nodes after generation. +// In other words, we ignore pointer variables and objects created by +// the solver itself, since their numbering depends on the solver order, +// which is affected by the optimization. In any case, that's the only +// part the client cares about. +// +// The cross-check is too strict and may fail spuriously. Although the +// H&L paper describing HVN states that the solutions obtained should be +// identical, this is not the case in practice because HVN can collapse +// cycles involving *p even when pts(p)={}. Consider this example +// distilled from testdata/hello.go: +// +// var x T +// func f(p **T) { +// t0 = *p +// ... +// t1 = φ(t0, &x) +// *p = t1 +// } +// +// If f is dead code, we get: +// unoptimized: pts(p)={} pts(t0)={} pts(t1)={&x} +// optimized: pts(p)={} pts(t0)=pts(t1)=pts(*p)={&x} +// +// It's hard to argue that this is a bug: the result is sound and the +// loss of precision is inconsequential---f is dead code, after all. +// But unfortunately it limits the usefulness of the cross-check since +// failures must be carefully analyzed. Ben Hardekopf suggests (in +// personal correspondence) some approaches to mitigating it: +// +// If there is a node with an HVN points-to set that is a superset +// of the NORM points-to set, then either it's a bug or it's a +// result of this issue. If it's a result of this issue, then in +// the offline constraint graph there should be a REF node inside +// some cycle that reaches this node, and in the NORM solution the +// pointer being dereferenced by that REF node should be the empty +// set. If that isn't true then this is a bug. If it is true, then +// you can further check that in the NORM solution the "extra" +// points-to info in the HVN solution does in fact come from that +// purported cycle (if it doesn't, then this is still a bug). If +// you're doing the further check then you'll need to do it for +// each "extra" points-to element in the HVN points-to set. +// +// There are probably ways to optimize these checks by taking +// advantage of graph properties. For example, extraneous points-to +// info will flow through the graph and end up in many +// nodes. Rather than checking every node with extra info, you +// could probably work out the "origin point" of the extra info and +// just check there. Note that the check in the first bullet is +// looking for soundness bugs, while the check in the second bullet +// is looking for precision bugs; depending on your needs, you may +// care more about one than the other. +// +// which we should evaluate. The cross-check is nonetheless invaluable +// for all but one of the programs in the pointer_test suite. + +import ( + "fmt" + "io" + "log" + "reflect" + + "code.google.com/p/go.tools/container/intsets" + "code.google.com/p/go.tools/go/types" +) + +// A peLabel is a pointer-equivalence label: two nodes with the same +// peLabel have identical points-to solutions. +// +// The numbers are allocated consecutively like so: +// 0 not a pointer +// 1..N-1 addrConstraints (equals the constraint's .src field, hence sparse) +// ... offsetAddr constraints +// ... SCCs (with indirect nodes or multiple inputs) +// +// Each PE label denotes a set of pointers containing a single addr, a +// single offsetAddr, or some set of other PE labels. +// +type peLabel int + +type hvn struct { + a *analysis + N int // len(a.nodes) immediately after constraint generation + log io.Writer // (optional) log of HVN lemmas + onodes []*onode // nodes of the offline graph + label peLabel // the next available PE label + hvnLabel map[string]peLabel // hash-value numbering (PE label) for each set of onodeids + stack []onodeid // DFS stack + index int32 // next onode.index, from Tarjan's SCC algorithm + + // For each distinct offsetAddrConstraint (src, offset) pair, + // offsetAddrLabels records a unique PE label >= N. + offsetAddrLabels map[offsetAddr]peLabel +} + +// The index of an node in the offline graph. +// (Currently the first N align with the main nodes, +// but this may change with HRU.) +type onodeid uint32 + +// An onode is a node in the offline constraint graph. +// (Where ambiguous, members of analysis.nodes are referred to as +// "main graph" nodes.) +// +// Edges in the offline constraint graph (edges and implicit) point to +// the source, i.e. against the flow of values: they are dependencies. +// Implicit edges are used for SCC computation, but not for gathering +// incoming labels. +// +type onode struct { + rep onodeid // index of representative of SCC in offline constraint graph + + edges intsets.Sparse // constraint edges X-->Y (this onode is X) + implicit intsets.Sparse // implicit edges *X-->*Y (this onode is X) + peLabels intsets.Sparse // set of peLabels are pointer-equivalent to this one + indirect bool // node has points-to relations not represented in graph + + // Tarjan's SCC algorithm + index, lowlink int32 // Tarjan numbering + scc int32 // -ve => on stack; 0 => unvisited; +ve => node is root of a found SCC +} + +type offsetAddr struct { + ptr nodeid + offset uint32 +} + +// nextLabel issues the next unused pointer-equivalence label. +func (h *hvn) nextLabel() peLabel { + h.label++ + return h.label +} + +// ref(X) returns the index of the onode for *X. +func (h *hvn) ref(id onodeid) onodeid { + return id + onodeid(len(h.a.nodes)) +} + +// hvn computes pointer-equivalence labels (peLabels) using the Hash-based +// Value Numbering (HVN) algorithm described in Hardekopf & Lin, SAS'07. +// +func (a *analysis) hvn() { + start("HVN") + + if a.log != nil { + fmt.Fprintf(a.log, "\n\n==== Pointer equivalence optimization\n\n") + } + + h := hvn{ + a: a, + N: len(a.nodes), + log: a.log, + hvnLabel: make(map[string]peLabel), + offsetAddrLabels: make(map[offsetAddr]peLabel), + } + + if h.log != nil { + fmt.Fprintf(h.log, "\nCreating offline graph nodes...\n") + } + + // Create offline nodes. The first N nodes correspond to main + // graph nodes; the next N are their corresponding ref() nodes. + h.onodes = make([]*onode, 2*h.N) + for id := range a.nodes { + id := onodeid(id) + h.onodes[id] = &onode{} + h.onodes[h.ref(id)] = &onode{indirect: true} + } + + // Each node initially represents just itself. + for id, o := range h.onodes { + o.rep = onodeid(id) + } + + h.markIndirectNodes() + + // Reserve the first N PE labels for addrConstraints. + h.label = peLabel(h.N) + + // Add offline constraint edges. + if h.log != nil { + fmt.Fprintf(h.log, "\nAdding offline graph edges...\n") + } + for _, c := range a.constraints { + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "; %s\n", c) + } + c.presolve(&h) + } + + // Find and collapse SCCs. + if h.log != nil { + fmt.Fprintf(h.log, "\nFinding SCCs...\n") + } + h.index = 1 + for id, o := range h.onodes { + if id > 0 && o.index == 0 { + // Start depth-first search at each unvisited node. + h.visit(onodeid(id)) + } + } + + // Dump the solution + // (NB: somewhat redundant with logging from simplify().) + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\nPointer equivalences:\n") + for id, o := range h.onodes { + if id == 0 { + continue + } + if id == int(h.N) { + fmt.Fprintf(h.log, "---\n") + } + fmt.Fprintf(h.log, "o%d\t", id) + if o.rep != onodeid(id) { + fmt.Fprintf(h.log, "rep=o%d", o.rep) + } else { + fmt.Fprintf(h.log, "p%d", o.peLabels.Min()) + if o.indirect { + fmt.Fprint(h.log, " indirect") + } + } + fmt.Fprintln(h.log) + } + } + + // Simplify the main constraint graph + h.simplify() + + a.showCounts() + + stop("HVN") +} + +// ---- constraint-specific rules ---- + +// dst := &src +func (c *addrConstraint) presolve(h *hvn) { + // Each object (src) is an initial PE label. + label := peLabel(c.src) // label < N + if debugHVNVerbose && h.log != nil { + // duplicate log messages are possible + fmt.Fprintf(h.log, "\tcreate p%d: {&n%d}\n", label, c.src) + } + odst := onodeid(c.dst) + osrc := onodeid(c.src) + + // Assign dst this label. + h.onodes[odst].peLabels.Insert(int(label)) + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\to%d has p%d\n", odst, label) + } + + h.addImplicitEdge(h.ref(odst), osrc) // *dst ~~> src. +} + +// dst = src +func (c *copyConstraint) presolve(h *hvn) { + odst := onodeid(c.dst) + osrc := onodeid(c.src) + h.addEdge(odst, osrc) // dst --> src + h.addImplicitEdge(h.ref(odst), h.ref(osrc)) // *dst ~~> *src +} + +// dst = *src + offset +func (c *loadConstraint) presolve(h *hvn) { + odst := onodeid(c.dst) + osrc := onodeid(c.src) + if c.offset == 0 { + h.addEdge(odst, h.ref(osrc)) // dst --> *src + } else { + // We don't interpret load-with-offset, e.g. results + // of map value lookup, R-block of dynamic call, slice + // copy/append, reflection. + h.markIndirect(odst, "load with offset") + } +} + +// *dst + offset = src +func (c *storeConstraint) presolve(h *hvn) { + odst := onodeid(c.dst) + osrc := onodeid(c.src) + if c.offset == 0 { + h.onodes[h.ref(odst)].edges.Insert(int(osrc)) // *dst --> src + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\to%d --> o%d\n", h.ref(odst), osrc) + } + } else { + // We don't interpret store-with-offset. + // See discussion of soundness at markIndirectNodes. + } +} + +// dst = &src.offset +func (c *offsetAddrConstraint) presolve(h *hvn) { + // Give each distinct (addr, offset) pair a fresh PE label. + // The cache performs CSE, effectively. + key := offsetAddr{c.src, c.offset} + label, ok := h.offsetAddrLabels[key] + if !ok { + label = h.nextLabel() + h.offsetAddrLabels[key] = label + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\tcreate p%d: {&n%d.#%d}\n", + label, c.src, c.offset) + } + } + + // Assign dst this label. + h.onodes[c.dst].peLabels.Insert(int(label)) + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\to%d has p%d\n", c.dst, label) + } +} + +// dst = src.(typ) where typ is an interface +func (c *typeFilterConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.dst), "typeFilter result") +} + +// dst = src.(typ) where typ is concrete +func (c *untagConstraint) presolve(h *hvn) { + odst := onodeid(c.dst) + for end := odst + onodeid(h.a.sizeof(c.typ)); odst < end; odst++ { + h.markIndirect(odst, "untag result") + } +} + +// dst = src.method(c.params...) +func (c *invokeConstraint) presolve(h *hvn) { + // All methods are address-taken functions, so + // their formal P-blocks were already marked indirect. + + // Mark the caller's targets node as indirect. + sig := c.method.Type().(*types.Signature) + id := c.params + h.markIndirect(onodeid(c.params), "invoke targets node") + id++ + + id += nodeid(h.a.sizeof(sig.Params())) + + // Mark the caller's R-block as indirect. + end := id + nodeid(h.a.sizeof(sig.Results())) + for id < end { + h.markIndirect(onodeid(id), "invoke R-block") + id++ + } +} + +// markIndirectNodes marks as indirect nodes whose points-to relations +// are not entirely captured by the offline graph, including: +// +// (a) All address-taken nodes (including the following nodes within +// the same object). This is described in the paper. +// +// The most subtle cause of indirect nodes is the generation of +// store-with-offset constraints since the offline graph doesn't +// represent them. A global audit of constraint generation reveals the +// following uses of store-with-offset: +// +// (b) genDynamicCall, for P-blocks of dynamically called functions, +// to which dynamic copy edges will be added to them during +// solving: from storeConstraint for standalone functions, +// and from invokeConstraint for methods. +// All such P-blocks must be marked indirect. +// (c) MakeUpdate, to update the value part of a map object. +// All MakeMap objects's value parts must be marked indirect. +// (d) copyElems, to update the destination array. +// All array elements must be marked indirect. +// +// Not all indirect marking happens here. ref() nodes are marked +// indirect at construction, and each constraint's presolve() method may +// mark additional nodes. +// +func (h *hvn) markIndirectNodes() { + // (a) all address-taken nodes, plus all nodes following them + // within the same object, since these may be indirectly + // stored or address-taken. + for _, c := range h.a.constraints { + if c, ok := c.(*addrConstraint); ok { + start := h.a.enclosingObj(c.src) + end := start + nodeid(h.a.nodes[start].obj.size) + for id := c.src; id < end; id++ { + h.markIndirect(onodeid(id), "A-T object") + } + } + } + + // (b) P-blocks of all address-taken functions. + for id := 0; id < h.N; id++ { + obj := h.a.nodes[id].obj + + // TODO(adonovan): opt: if obj.cgn.fn is a method and + // obj.cgn is not its shared contour, this is an + // "inlined" static method call. We needn't consider it + // address-taken since no invokeConstraint will affect it. + + if obj != nil && obj.flags&otFunction != 0 && h.a.atFuncs[obj.cgn.fn] { + // address-taken function + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "n%d is address-taken: %s\n", id, obj.cgn.fn) + } + h.markIndirect(onodeid(id), "A-T func identity") + id++ + sig := obj.cgn.fn.Signature + psize := h.a.sizeof(sig.Params()) + if sig.Recv() != nil { + psize += h.a.sizeof(sig.Recv().Type()) + } + for end := id + int(psize); id < end; id++ { + h.markIndirect(onodeid(id), "A-T func P-block") + } + id-- + continue + } + } + + // (c) all map objects' value fields. + for _, id := range h.a.mapValues { + h.markIndirect(onodeid(id), "makemap.value") + } + + // (d) all array element objects. + // TODO(adonovan): opt: can we do better? + for id := 0; id < h.N; id++ { + // Identity node for an object of array type? + if tArray, ok := h.a.nodes[id].typ.(*types.Array); ok { + // Mark the array element nodes indirect. + // (Skip past the identity field.) + for _ = range h.a.flatten(tArray.Elem()) { + id++ + h.markIndirect(onodeid(id), "array elem") + } + } + } +} + +func (h *hvn) markIndirect(oid onodeid, comment string) { + h.onodes[oid].indirect = true + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\to%d is indirect: %s\n", oid, comment) + } +} + +// Adds an edge dst-->src. +// Note the unusual convention: edges are dependency (contraflow) edges. +func (h *hvn) addEdge(odst, osrc onodeid) { + h.onodes[odst].edges.Insert(int(osrc)) + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\to%d --> o%d\n", odst, osrc) + } +} + +func (h *hvn) addImplicitEdge(odst, osrc onodeid) { + h.onodes[odst].implicit.Insert(int(osrc)) + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\to%d ~~> o%d\n", odst, osrc) + } +} + +// visit implements the depth-first search of Tarjan's SCC algorithm. +// Precondition: x is canonical. +func (h *hvn) visit(x onodeid) { + h.checkCanonical(x) + xo := h.onodes[x] + xo.index = h.index + xo.lowlink = h.index + h.index++ + + h.stack = append(h.stack, x) // push + assert(xo.scc == 0, "node revisited") + xo.scc = -1 + + var deps []int + deps = xo.edges.AppendTo(deps) + deps = xo.implicit.AppendTo(deps) + + for _, y := range deps { + // Loop invariant: x is canonical. + + y := h.find(onodeid(y)) + + if x == y { + continue // nodes already coalesced + } + + xo := h.onodes[x] + yo := h.onodes[y] + + switch { + case yo.scc > 0: + // y is already a collapsed SCC + + case yo.scc < 0: + // y is on the stack, and thus in the current SCC. + if yo.index < xo.lowlink { + xo.lowlink = yo.index + } + + default: + // y is unvisited; visit it now. + h.visit(y) + // Note: x and y are now non-canonical. + + x = h.find(onodeid(x)) + + if yo.lowlink < xo.lowlink { + xo.lowlink = yo.lowlink + } + } + } + h.checkCanonical(x) + + // Is x the root of an SCC? + if xo.lowlink == xo.index { + // Coalesce all nodes in the SCC. + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "scc o%d\n", x) + } + for { + // Pop y from stack. + i := len(h.stack) - 1 + y := h.stack[i] + h.stack = h.stack[:i] + + h.checkCanonical(x) + xo := h.onodes[x] + h.checkCanonical(y) + yo := h.onodes[y] + + if xo == yo { + // SCC is complete. + xo.scc = 1 + h.labelSCC(x) + break + } + h.coalesce(x, y) + } + } +} + +// Precondition: x is canonical. +func (h *hvn) labelSCC(x onodeid) { + h.checkCanonical(x) + xo := h.onodes[x] + xpe := &xo.peLabels + + // All indirect nodes get new labels. + if xo.indirect { + label := h.nextLabel() + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\tcreate p%d: indirect SCC\n", label) + fmt.Fprintf(h.log, "\to%d has p%d\n", x, label) + } + + // Remove pre-labeling, in case a direct pre-labeled node was + // merged with an indirect one. + xpe.Clear() + xpe.Insert(int(label)) + + return + } + + // Invariant: all peLabels sets are non-empty. + // Those that are logically empty contain zero as their sole element. + // No other sets contains zero. + + // Find all labels coming in to the coalesced SCC node. + for _, y := range xo.edges.AppendTo(nil) { + y := h.find(onodeid(y)) + if y == x { + continue // already coalesced + } + ype := &h.onodes[y].peLabels + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\tedge from o%d = %s\n", y, ype) + } + + if ype.IsEmpty() { + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\tnode has no PE label\n") + } + } + assert(!ype.IsEmpty(), "incoming node has no PE label") + + if ype.Has(0) { + // {0} represents a non-pointer. + assert(ype.Len() == 1, "PE set contains {0, ...}") + } else { + xpe.UnionWith(ype) + } + } + + switch xpe.Len() { + case 0: + // SCC has no incoming non-zero PE labels: it is a non-pointer. + xpe.Insert(0) + + case 1: + // already a singleton + + default: + // SCC has multiple incoming non-zero PE labels. + // Find the canonical label representing this set. + // We use String() as a fingerprint consistent with Equals(). + key := xpe.String() + label, ok := h.hvnLabel[key] + if !ok { + label = h.nextLabel() + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\tcreate p%d: union %s\n", label, xpe.String()) + } + h.hvnLabel[key] = label + } + xpe.Clear() + xpe.Insert(int(label)) + } + + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\to%d has p%d\n", x, xpe.Min()) + } +} + +// coalesce combines two nodes in the offline constraint graph. +// Precondition: x and y are canonical. +func (h *hvn) coalesce(x, y onodeid) { + xo := h.onodes[x] + yo := h.onodes[y] + + // x becomes y's canonical representative. + yo.rep = x + + if debugHVNVerbose && h.log != nil { + fmt.Fprintf(h.log, "\tcoalesce o%d into o%d\n", y, x) + } + + // x accumulates y's edges. + xo.edges.UnionWith(&yo.edges) + yo.edges.Clear() + + // x accumulates y's implicit edges. + xo.implicit.UnionWith(&yo.implicit) + yo.implicit.Clear() + + // x accumulates y's pointer-equivalence labels. + xo.peLabels.UnionWith(&yo.peLabels) + yo.peLabels.Clear() + + // x accumulates y's indirect flag. + if yo.indirect { + xo.indirect = true + } +} + +// simplify computes a degenerate renumbering of nodeids from the PE +// labels assigned by the hvn, and uses it to simplify the main +// constraint graph, eliminating non-pointer nodes and duplicate +// constraints. +// +func (h *hvn) simplify() { + // canon maps each peLabel to its canonical main node. + canon := make([]nodeid, h.label) + for i := range canon { + canon[i] = nodeid(h.N) // indicates "unset" + } + + // mapping maps each main node index to the index of the canonical node. + mapping := make([]nodeid, len(h.a.nodes)) + + for id := range h.a.nodes { + id := nodeid(id) + if id == 0 { + canon[0] = 0 + mapping[0] = 0 + continue + } + oid := h.find(onodeid(id)) + peLabels := &h.onodes[oid].peLabels + assert(peLabels.Len() == 1, "PE class is not a singleton") + label := peLabel(peLabels.Min()) + + canonId := canon[label] + if canonId == nodeid(h.N) { + // id becomes the representative of the PE label. + canonId = id + canon[label] = canonId + + if h.a.log != nil { + fmt.Fprintf(h.a.log, "\tpts(n%d) is canonical : \t(%s)\n", + id, h.a.nodes[id].typ) + } + + } else { + // Link the solver states for the two nodes. + assert(h.a.nodes[canonId].solve != nil, "missing solver state") + h.a.nodes[id].solve = h.a.nodes[canonId].solve + + if h.a.log != nil { + // TODO(adonovan): debug: reorganize the log so it prints + // one line: + // pe y = x1, ..., xn + // for each canonical y. Requires allocation. + fmt.Fprintf(h.a.log, "\tpts(n%d) = pts(n%d) : %s\n", + id, canonId, h.a.nodes[id].typ) + } + } + + mapping[id] = canonId + } + + // Renumber the constraints, eliminate duplicates, and eliminate + // any containing non-pointers (n0). + addrs := make(map[addrConstraint]bool) + copys := make(map[copyConstraint]bool) + loads := make(map[loadConstraint]bool) + stores := make(map[storeConstraint]bool) + offsetAddrs := make(map[offsetAddrConstraint]bool) + untags := make(map[untagConstraint]bool) + typeFilters := make(map[typeFilterConstraint]bool) + invokes := make(map[invokeConstraint]bool) + + nbefore := len(h.a.constraints) + cc := h.a.constraints[:0] // in-situ compaction + for _, c := range h.a.constraints { + // Renumber. + switch c := c.(type) { + case *addrConstraint: + // Don't renumber c.src since it is the label of + // an addressable object and will appear in PT sets. + c.dst = mapping[c.dst] + default: + c.renumber(mapping) + } + + if c.ptr() == 0 { + continue // skip: constraint attached to non-pointer + } + + var dup bool + switch c := c.(type) { + case *addrConstraint: + _, dup = addrs[*c] + addrs[*c] = true + + case *copyConstraint: + if c.src == c.dst { + continue // skip degenerate copies + } + if c.src == 0 { + continue // skip copy from non-pointer + } + _, dup = copys[*c] + copys[*c] = true + + case *loadConstraint: + if c.src == 0 { + continue // skip load from non-pointer + } + _, dup = loads[*c] + loads[*c] = true + + case *storeConstraint: + if c.src == 0 { + continue // skip store from non-pointer + } + _, dup = stores[*c] + stores[*c] = true + + case *offsetAddrConstraint: + if c.src == 0 { + continue // skip offset from non-pointer + } + _, dup = offsetAddrs[*c] + offsetAddrs[*c] = true + + case *untagConstraint: + if c.src == 0 { + continue // skip untag of non-pointer + } + _, dup = untags[*c] + untags[*c] = true + + case *typeFilterConstraint: + if c.src == 0 { + continue // skip filter of non-pointer + } + _, dup = typeFilters[*c] + typeFilters[*c] = true + + case *invokeConstraint: + if c.params == 0 { + panic("non-pointer invoke.params") + } + if c.iface == 0 { + continue // skip invoke on non-pointer + } + _, dup = invokes[*c] + invokes[*c] = true + + default: + // We don't bother de-duping advanced constraints + // (e.g. reflection) since they are uncommon. + + // Eliminate constraints containing non-pointer nodeids. + // + // We use reflection to find the fields to avoid + // adding yet another method to constraint. + // + // TODO(adonovan): experiment with a constraint + // method that returns a slice of pointers to + // nodeids fields to enable uniform iteration; + // the renumber() method could be removed and + // implemented using the new one. + // + // TODO(adonovan): opt: this is unsound since + // some constraints still have an effect if one + // of the operands is zero: rVCall, rVMapIndex, + // rvSetMapIndex. Handle them specially. + rtNodeid := reflect.TypeOf(nodeid(0)) + x := reflect.ValueOf(c).Elem() + for i, nf := 0, x.NumField(); i < nf; i++ { + f := x.Field(i) + if f.Type() == rtNodeid { + if f.Uint() == 0 { + dup = true // skip it + break + } + } + } + } + if dup { + continue // skip duplicates + } + + cc = append(cc, c) + } + h.a.constraints = cc + + log.Printf("#constraints: was %d, now %d\n", nbefore, len(h.a.constraints)) +} + +// find returns the canonical onodeid for x. +// (The onodes form a disjoint set forest.) +func (h *hvn) find(x onodeid) onodeid { + // TODO(adonovan): opt: this is a CPU hotspot. Try "union by rank". + xo := h.onodes[x] + rep := xo.rep + if rep != x { + rep = h.find(rep) // simple path compression + xo.rep = rep + } + return rep +} + +func (h *hvn) checkCanonical(x onodeid) { + if debugHVN { + assert(x == h.find(x), "not canonical") + } +} + +func assert(p bool, msg string) { + if debugHVN && !p { + panic("assertion failed: " + msg) + } +} diff --git a/go/pointer/intrinsics.go b/go/pointer/intrinsics.go index b6e0b053f..61e2f7eee 100644 --- a/go/pointer/intrinsics.go +++ b/go/pointer/intrinsics.go @@ -255,13 +255,15 @@ func ext۰NotYetImplemented(a *analysis, cgn *cgnode) { // runtime.SetFinalizer(x, f) type runtimeSetFinalizerConstraint struct { - targets nodeid + targets nodeid // (indirect) f nodeid // (ptr) x nodeid } -func (c *runtimeSetFinalizerConstraint) ptr() nodeid { return c.f } -func (c *runtimeSetFinalizerConstraint) indirect(nodes []nodeid) []nodeid { return nodes } +func (c *runtimeSetFinalizerConstraint) ptr() nodeid { return c.f } +func (c *runtimeSetFinalizerConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.targets), "SetFinalizer.targets") +} func (c *runtimeSetFinalizerConstraint) renumber(mapping []nodeid) { c.targets = mapping[c.targets] c.f = mapping[c.f] diff --git a/go/pointer/opt.go b/go/pointer/opt.go index e89953428..2620cc0d8 100644 --- a/go/pointer/opt.go +++ b/go/pointer/opt.go @@ -4,17 +4,12 @@ package pointer -// This file defines the constraint optimiser ("pre-solver"). - -import ( - "fmt" -) - -func (a *analysis) optimize() { - a.renumber() +// This file implements renumbering, a pre-solver optimization to +// improve the efficiency of the solver's points-to set representation. +// +// TODO(adonovan): rename file "renumber.go" - // TODO(adonovan): opt: PE (HVN, HRU), LE, etc. -} +import "fmt" // renumber permutes a.nodes so that all nodes within an addressable // object appear before all non-addressable nodes, maintaining the @@ -30,7 +25,14 @@ func (a *analysis) optimize() { // NB: nodes added during solving (e.g. for reflection, SetFinalizer) // will be appended to the end. // +// Renumbering makes the PTA log inscrutable. To aid debugging, later +// phases (e.g. HVN) must not rely on it having occurred. +// func (a *analysis) renumber() { + if a.log != nil { + fmt.Fprintf(a.log, "\n\n==== Renumbering\n\n") + } + N := nodeid(len(a.nodes)) newNodes := make([]*node, N, N) renumbering := make([]nodeid, N, N) // maps old to new diff --git a/go/pointer/pointer_test.go b/go/pointer/pointer_test.go index 777281ef3..112c1e2d0 100644 --- a/go/pointer/pointer_test.go +++ b/go/pointer/pointer_test.go @@ -44,7 +44,7 @@ var inputs = []string{ "testdata/fmtexcerpt.go", "testdata/func.go", "testdata/funcreflect.go", - "testdata/hello.go", + "testdata/hello.go", // NB: causes spurious failure of HVN cross-check "testdata/interfaces.go", "testdata/mapreflect.go", "testdata/maps.go", @@ -287,6 +287,7 @@ func doOneInput(input, filename string) bool { } var log bytes.Buffer + fmt.Fprintf(&log, "Input: %s\n", filename) // Run the analysis. config := &pointer.Config{ diff --git a/go/pointer/print.go b/go/pointer/print.go index a0b11e8a8..4f2f4c7ae 100644 --- a/go/pointer/print.go +++ b/go/pointer/print.go @@ -35,7 +35,7 @@ func (c *untagConstraint) String() string { } func (c *invokeConstraint) String() string { - return fmt.Sprintf("invoke n%d.%s(n%d ...)", c.iface, c.method.Name(), c.params+1) + return fmt.Sprintf("invoke n%d.%s(n%d ...)", c.iface, c.method.Name(), c.params) } func (n nodeid) String() string { diff --git a/go/pointer/reflect.go b/go/pointer/reflect.go index fe51718f6..6e5c54cab 100644 --- a/go/pointer/reflect.go +++ b/go/pointer/reflect.go @@ -20,6 +20,9 @@ package pointer // yet implemented) correspond to reflect.Values with // reflect.flagAddr.] // A picture would help too. +// +// TODO(adonovan): try factoring up the common parts of the majority of +// these constraints that are single input, single output. import ( "fmt" @@ -159,8 +162,10 @@ type rVBytesConstraint struct { result nodeid // (indirect) } -func (c *rVBytesConstraint) ptr() nodeid { return c.v } -func (c *rVBytesConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rVBytesConstraint) ptr() nodeid { return c.v } +func (c *rVBytesConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rVBytes.result") +} func (c *rVBytesConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.result = mapping[c.result] @@ -205,7 +210,7 @@ func ext۰reflect۰Value۰Bytes(a *analysis, cgn *cgnode) { // result = v.Call(in) type rVCallConstraint struct { cgn *cgnode - targets nodeid + targets nodeid // (indirect) v nodeid // (ptr) arg nodeid // = in[*] result nodeid // (indirect) @@ -213,13 +218,9 @@ type rVCallConstraint struct { } func (c *rVCallConstraint) ptr() nodeid { return c.v } -func (c *rVCallConstraint) indirect(nodes []nodeid) []nodeid { - nodes = append(nodes, c.result) - // TODO(adonovan): we may be able to handle 'targets' out-of-band - // so that all implementations indirect() return a single value. - // We can then dispense with the slice. - nodes = append(nodes, c.targets) - return nodes +func (c *rVCallConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.targets), "rVCall.targets") + h.markIndirect(onodeid(c.result), "rVCall.result") } func (c *rVCallConstraint) renumber(mapping []nodeid) { c.targets = mapping[c.targets] @@ -363,8 +364,10 @@ type rVElemConstraint struct { result nodeid // (indirect) } -func (c *rVElemConstraint) ptr() nodeid { return c.v } -func (c *rVElemConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rVElemConstraint) ptr() nodeid { return c.v } +func (c *rVElemConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rVElem.result") +} func (c *rVElemConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.result = mapping[c.result] @@ -426,8 +429,10 @@ type rVIndexConstraint struct { result nodeid // (indirect) } -func (c *rVIndexConstraint) ptr() nodeid { return c.v } -func (c *rVIndexConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rVIndexConstraint) ptr() nodeid { return c.v } +func (c *rVIndexConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rVIndex.result") +} func (c *rVIndexConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.result = mapping[c.result] @@ -488,8 +493,10 @@ type rVInterfaceConstraint struct { result nodeid // (indirect) } -func (c *rVInterfaceConstraint) ptr() nodeid { return c.v } -func (c *rVInterfaceConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rVInterfaceConstraint) ptr() nodeid { return c.v } +func (c *rVInterfaceConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rVInterface.result") +} func (c *rVInterfaceConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.result = mapping[c.result] @@ -541,8 +548,10 @@ type rVMapIndexConstraint struct { result nodeid // (indirect) } -func (c *rVMapIndexConstraint) ptr() nodeid { return c.v } -func (c *rVMapIndexConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rVMapIndexConstraint) ptr() nodeid { return c.v } +func (c *rVMapIndexConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rVMapIndex.result") +} func (c *rVMapIndexConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.result = mapping[c.result] @@ -595,8 +604,10 @@ type rVMapKeysConstraint struct { result nodeid // (indirect) } -func (c *rVMapKeysConstraint) ptr() nodeid { return c.v } -func (c *rVMapKeysConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rVMapKeysConstraint) ptr() nodeid { return c.v } +func (c *rVMapKeysConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rVMapKeys.result") +} func (c *rVMapKeysConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.result = mapping[c.result] @@ -650,7 +661,7 @@ func ext۰reflect۰Value۰MapKeys(a *analysis, cgn *cgnode) { func ext۰reflect۰Value۰Method(a *analysis, cgn *cgnode) {} // TODO(adonovan) func ext۰reflect۰Value۰MethodByName(a *analysis, cgn *cgnode) {} // TODO(adonovan) -// ---------- func (Value).Recv(Value) ---------- +// ---------- func (Value).Recv(Value) Value ---------- // result, _ = v.Recv() type rVRecvConstraint struct { @@ -659,8 +670,10 @@ type rVRecvConstraint struct { result nodeid // (indirect) } -func (c *rVRecvConstraint) ptr() nodeid { return c.v } -func (c *rVRecvConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rVRecvConstraint) ptr() nodeid { return c.v } +func (c *rVRecvConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rVRecv.result") +} func (c *rVRecvConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.result = mapping[c.result] @@ -714,8 +727,8 @@ type rVSendConstraint struct { x nodeid } -func (c *rVSendConstraint) ptr() nodeid { return c.v } -func (c *rVSendConstraint) indirect(nodes []nodeid) []nodeid { return nodes } +func (c *rVSendConstraint) ptr() nodeid { return c.v } +func (c *rVSendConstraint) presolve(*hvn) {} func (c *rVSendConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.x = mapping[c.x] @@ -767,8 +780,8 @@ type rVSetBytesConstraint struct { x nodeid } -func (c *rVSetBytesConstraint) ptr() nodeid { return c.v } -func (c *rVSetBytesConstraint) indirect(nodes []nodeid) []nodeid { return nodes } +func (c *rVSetBytesConstraint) ptr() nodeid { return c.v } +func (c *rVSetBytesConstraint) presolve(*hvn) {} func (c *rVSetBytesConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.x = mapping[c.x] @@ -816,8 +829,8 @@ type rVSetMapIndexConstraint struct { val nodeid } -func (c *rVSetMapIndexConstraint) ptr() nodeid { return c.v } -func (c *rVSetMapIndexConstraint) indirect(nodes []nodeid) []nodeid { return nodes } +func (c *rVSetMapIndexConstraint) ptr() nodeid { return c.v } +func (c *rVSetMapIndexConstraint) presolve(*hvn) {} func (c *rVSetMapIndexConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.key = mapping[c.key] @@ -868,7 +881,7 @@ func ext۰reflect۰Value۰SetMapIndex(a *analysis, cgn *cgnode) { func ext۰reflect۰Value۰SetPointer(a *analysis, cgn *cgnode) {} // TODO(adonovan) -// ---------- func (Value).Slice(v Value, i, j int) ---------- +// ---------- func (Value).Slice(v Value, i, j int) Value ---------- // result = v.Slice(_, _) type rVSliceConstraint struct { @@ -877,8 +890,10 @@ type rVSliceConstraint struct { result nodeid // (indirect) } -func (c *rVSliceConstraint) ptr() nodeid { return c.v } -func (c *rVSliceConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rVSliceConstraint) ptr() nodeid { return c.v } +func (c *rVSliceConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rVSlice.result") +} func (c *rVSliceConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.result = mapping[c.result] @@ -956,8 +971,10 @@ type reflectChanOfConstraint struct { dirs []types.ChanDir } -func (c *reflectChanOfConstraint) ptr() nodeid { return c.t } -func (c *reflectChanOfConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *reflectChanOfConstraint) ptr() nodeid { return c.t } +func (c *reflectChanOfConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "reflectChanOf.result") +} func (c *reflectChanOfConstraint) renumber(mapping []nodeid) { c.t = mapping[c.t] c.result = mapping[c.result] @@ -1028,8 +1045,10 @@ type reflectIndirectConstraint struct { result nodeid // (indirect) } -func (c *reflectIndirectConstraint) ptr() nodeid { return c.v } -func (c *reflectIndirectConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *reflectIndirectConstraint) ptr() nodeid { return c.v } +func (c *reflectIndirectConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "reflectIndirect.result") +} func (c *reflectIndirectConstraint) renumber(mapping []nodeid) { c.v = mapping[c.v] c.result = mapping[c.result] @@ -1080,8 +1099,10 @@ type reflectMakeChanConstraint struct { result nodeid // (indirect) } -func (c *reflectMakeChanConstraint) ptr() nodeid { return c.typ } -func (c *reflectMakeChanConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *reflectMakeChanConstraint) ptr() nodeid { return c.typ } +func (c *reflectMakeChanConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "reflectMakeChan.result") +} func (c *reflectMakeChanConstraint) renumber(mapping []nodeid) { c.typ = mapping[c.typ] c.result = mapping[c.result] @@ -1138,8 +1159,10 @@ type reflectMakeMapConstraint struct { result nodeid // (indirect) } -func (c *reflectMakeMapConstraint) ptr() nodeid { return c.typ } -func (c *reflectMakeMapConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *reflectMakeMapConstraint) ptr() nodeid { return c.typ } +func (c *reflectMakeMapConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "reflectMakeMap.result") +} func (c *reflectMakeMapConstraint) renumber(mapping []nodeid) { c.typ = mapping[c.typ] c.result = mapping[c.result] @@ -1195,8 +1218,10 @@ type reflectMakeSliceConstraint struct { result nodeid // (indirect) } -func (c *reflectMakeSliceConstraint) ptr() nodeid { return c.typ } -func (c *reflectMakeSliceConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *reflectMakeSliceConstraint) ptr() nodeid { return c.typ } +func (c *reflectMakeSliceConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "reflectMakeSlice.result") +} func (c *reflectMakeSliceConstraint) renumber(mapping []nodeid) { c.typ = mapping[c.typ] c.result = mapping[c.result] @@ -1252,8 +1277,10 @@ type reflectNewConstraint struct { result nodeid // (indirect) } -func (c *reflectNewConstraint) ptr() nodeid { return c.typ } -func (c *reflectNewConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *reflectNewConstraint) ptr() nodeid { return c.typ } +func (c *reflectNewConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "reflectNew.result") +} func (c *reflectNewConstraint) renumber(mapping []nodeid) { c.typ = mapping[c.typ] c.result = mapping[c.result] @@ -1314,8 +1341,10 @@ type reflectPtrToConstraint struct { result nodeid // (indirect) } -func (c *reflectPtrToConstraint) ptr() nodeid { return c.t } -func (c *reflectPtrToConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *reflectPtrToConstraint) ptr() nodeid { return c.t } +func (c *reflectPtrToConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "reflectPtrTo.result") +} func (c *reflectPtrToConstraint) renumber(mapping []nodeid) { c.t = mapping[c.t] c.result = mapping[c.result] @@ -1363,8 +1392,10 @@ type reflectSliceOfConstraint struct { result nodeid // (indirect) } -func (c *reflectSliceOfConstraint) ptr() nodeid { return c.t } -func (c *reflectSliceOfConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *reflectSliceOfConstraint) ptr() nodeid { return c.t } +func (c *reflectSliceOfConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "reflectSliceOf.result") +} func (c *reflectSliceOfConstraint) renumber(mapping []nodeid) { c.t = mapping[c.t] c.result = mapping[c.result] @@ -1410,8 +1441,10 @@ type reflectTypeOfConstraint struct { result nodeid // (indirect) } -func (c *reflectTypeOfConstraint) ptr() nodeid { return c.i } -func (c *reflectTypeOfConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *reflectTypeOfConstraint) ptr() nodeid { return c.i } +func (c *reflectTypeOfConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "reflectTypeOf.result") +} func (c *reflectTypeOfConstraint) renumber(mapping []nodeid) { c.i = mapping[c.i] c.result = mapping[c.result] @@ -1461,8 +1494,10 @@ type reflectZeroConstraint struct { result nodeid // (indirect) } -func (c *reflectZeroConstraint) ptr() nodeid { return c.typ } -func (c *reflectZeroConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *reflectZeroConstraint) ptr() nodeid { return c.typ } +func (c *reflectZeroConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "reflectZero.result") +} func (c *reflectZeroConstraint) renumber(mapping []nodeid) { c.typ = mapping[c.typ] c.result = mapping[c.result] @@ -1521,8 +1556,10 @@ type rtypeElemConstraint struct { result nodeid // (indirect) } -func (c *rtypeElemConstraint) ptr() nodeid { return c.t } -func (c *rtypeElemConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rtypeElemConstraint) ptr() nodeid { return c.t } +func (c *rtypeElemConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rtypeElem.result") +} func (c *rtypeElemConstraint) renumber(mapping []nodeid) { c.t = mapping[c.t] c.result = mapping[c.result] @@ -1572,8 +1609,10 @@ type rtypeFieldByNameConstraint struct { result nodeid // (indirect) } -func (c *rtypeFieldByNameConstraint) ptr() nodeid { return c.t } -func (c *rtypeFieldByNameConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rtypeFieldByNameConstraint) ptr() nodeid { return c.t } +func (c *rtypeFieldByNameConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result+3), "rtypeFieldByName.result.Type") +} func (c *rtypeFieldByNameConstraint) renumber(mapping []nodeid) { c.t = mapping[c.t] c.result = mapping[c.result] @@ -1661,8 +1700,10 @@ type rtypeInOutConstraint struct { i int // -ve if not a constant } -func (c *rtypeInOutConstraint) ptr() nodeid { return c.t } -func (c *rtypeInOutConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rtypeInOutConstraint) ptr() nodeid { return c.t } +func (c *rtypeInOutConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rtypeInOut.result") +} func (c *rtypeInOutConstraint) renumber(mapping []nodeid) { c.t = mapping[c.t] c.result = mapping[c.result] @@ -1736,8 +1777,10 @@ type rtypeKeyConstraint struct { result nodeid // (indirect) } -func (c *rtypeKeyConstraint) ptr() nodeid { return c.t } -func (c *rtypeKeyConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } +func (c *rtypeKeyConstraint) ptr() nodeid { return c.t } +func (c *rtypeKeyConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result), "rtypeKey.result") +} func (c *rtypeKeyConstraint) renumber(mapping []nodeid) { c.t = mapping[c.t] c.result = mapping[c.result] @@ -1784,8 +1827,9 @@ type rtypeMethodByNameConstraint struct { } func (c *rtypeMethodByNameConstraint) ptr() nodeid { return c.t } -func (c *rtypeMethodByNameConstraint) indirect(nodes []nodeid) []nodeid { - return append(nodes, c.result) +func (c *rtypeMethodByNameConstraint) presolve(h *hvn) { + h.markIndirect(onodeid(c.result+3), "rtypeMethodByName.result.Type") + h.markIndirect(onodeid(c.result+4), "rtypeMethodByName.result.Func") } func (c *rtypeMethodByNameConstraint) renumber(mapping []nodeid) { c.t = mapping[c.t] diff --git a/go/pointer/solve.go b/go/pointer/solve.go index e087e051e..ce5d07c6c 100644 --- a/go/pointer/solve.go +++ b/go/pointer/solve.go @@ -13,15 +13,22 @@ import ( "code.google.com/p/go.tools/go/types" ) +type solverState struct { + complex []constraint // complex constraints attached to this node + copyTo nodeset // simple copy constraint edges + pts nodeset // points-to set of this node + prevPTS nodeset // pts(n) in previous iteration (for difference propagation) +} + func (a *analysis) solve() { - var delta nodeset + start("Solving") + if a.log != nil { + fmt.Fprintf(a.log, "\n\n==== Solving constraints\n\n") + } // Solver main loop. - for round := 1; ; round++ { - if a.log != nil { - fmt.Fprintf(a.log, "Solving, round %d\n", round) - } - + var delta nodeset + for { // Add new constraints to the graph: // static constraints from SSA on round 1, // dynamic constraints from reflection thereafter. @@ -39,22 +46,33 @@ func (a *analysis) solve() { n := a.nodes[id] // Difference propagation. - delta.Difference(&n.pts.Sparse, &n.prevPts.Sparse) + delta.Difference(&n.solve.pts.Sparse, &n.solve.prevPTS.Sparse) if delta.IsEmpty() { continue } - n.prevPts.Copy(&n.pts.Sparse) + if a.log != nil { + fmt.Fprintf(a.log, "\t\tpts(n%d : %s) = %s + %s\n", + id, n.typ, &delta, &n.solve.prevPTS) + } + n.solve.prevPTS.Copy(&n.solve.pts.Sparse) // Apply all resolution rules attached to n. a.solveConstraints(n, &delta) if a.log != nil { - fmt.Fprintf(a.log, "\t\tpts(n%d) = %s\n", id, &n.pts) + fmt.Fprintf(a.log, "\t\tpts(n%d) = %s\n", id, &n.solve.pts) } } - if !a.nodes[0].pts.IsEmpty() { - panic(fmt.Sprintf("pts(0) is nonempty: %s", &a.nodes[0].pts)) + if !a.nodes[0].solve.pts.IsEmpty() { + panic(fmt.Sprintf("pts(0) is nonempty: %s", &a.nodes[0].solve.pts)) + } + + // Release working state (but keep final PTS). + for _, n := range a.nodes { + n.solve.complex = nil + n.solve.copyTo.Clear() + n.solve.prevPTS.Clear() } if a.log != nil { @@ -62,11 +80,12 @@ func (a *analysis) solve() { // Dump solution. for i, n := range a.nodes { - if !n.pts.IsEmpty() { - fmt.Fprintf(a.log, "pts(n%d) = %s : %s\n", i, &n.pts, n.typ) + if !n.solve.pts.IsEmpty() { + fmt.Fprintf(a.log, "pts(n%d) = %s : %s\n", i, &n.solve.pts, n.typ) } } } + stop("Solving") } // processNewConstraints takes the new constraints from a.constraints @@ -84,13 +103,13 @@ func (a *analysis) processNewConstraints() { for _, c := range constraints { if c, ok := c.(*addrConstraint); ok { dst := a.nodes[c.dst] - dst.pts.add(c.src) + dst.solve.pts.add(c.src) // Populate the worklist with nodes that point to // something initially (due to addrConstraints) and // have other constraints attached. // (A no-op in round 1.) - if !dst.copyTo.IsEmpty() || len(dst.complex) > 0 { + if !dst.solve.copyTo.IsEmpty() || len(dst.solve.complex) > 0 { a.addWork(c.dst) } } @@ -107,16 +126,16 @@ func (a *analysis) processNewConstraints() { case *copyConstraint: // simple (copy) constraint id = c.src - a.nodes[id].copyTo.add(c.dst) + a.nodes[id].solve.copyTo.add(c.dst) default: // complex constraint id = c.ptr() - ptr := a.nodes[id] - ptr.complex = append(ptr.complex, c) + solve := a.nodes[id].solve + solve.complex = append(solve.complex, c) } - if n := a.nodes[id]; !n.pts.IsEmpty() { - if !n.prevPts.IsEmpty() { + if n := a.nodes[id]; !n.solve.pts.IsEmpty() { + if !n.solve.prevPTS.IsEmpty() { stale.add(id) } a.addWork(id) @@ -125,8 +144,8 @@ func (a *analysis) processNewConstraints() { // Apply new constraints to pre-existing PTS labels. var space [50]int for _, id := range stale.AppendTo(space[:0]) { - n := a.nodes[id] - a.solveConstraints(n, &n.prevPts) + n := a.nodes[nodeid(id)] + a.solveConstraints(n, &n.solve.prevPTS) } } @@ -140,7 +159,7 @@ func (a *analysis) solveConstraints(n *node, delta *nodeset) { } // Process complex constraints dependent on n. - for _, c := range n.complex { + for _, c := range n.solve.complex { if a.log != nil { fmt.Fprintf(a.log, "\t\tconstraint %s\n", c) } @@ -149,10 +168,10 @@ func (a *analysis) solveConstraints(n *node, delta *nodeset) { // Process copy constraints. var copySeen nodeset - for _, x := range n.copyTo.AppendTo(a.deltaSpace) { + for _, x := range n.solve.copyTo.AppendTo(a.deltaSpace) { mid := nodeid(x) if copySeen.add(mid) { - if a.nodes[mid].pts.addAll(delta) { + if a.nodes[mid].solve.pts.addAll(delta) { a.addWork(mid) } } @@ -161,7 +180,11 @@ func (a *analysis) solveConstraints(n *node, delta *nodeset) { // addLabel adds label to the points-to set of ptr and reports whether the set grew. func (a *analysis) addLabel(ptr, label nodeid) bool { - return a.nodes[ptr].pts.add(label) + b := a.nodes[ptr].solve.pts.add(label) + if b && a.log != nil { + fmt.Fprintf(a.log, "\t\tpts(n%d) += n%d\n", ptr, label) + } + return b } func (a *analysis) addWork(id nodeid) { @@ -180,7 +203,7 @@ func (a *analysis) addWork(id nodeid) { // func (a *analysis) onlineCopy(dst, src nodeid) bool { if dst != src { - if nsrc := a.nodes[src]; nsrc.copyTo.add(dst) { + if nsrc := a.nodes[src]; nsrc.solve.copyTo.add(dst) { if a.log != nil { fmt.Fprintf(a.log, "\t\t\tdynamic copy n%d <- n%d\n", dst, src) } @@ -188,7 +211,7 @@ func (a *analysis) onlineCopy(dst, src nodeid) bool { // are followed by addWork, possibly batched // via a 'changed' flag; see if there's a // noticeable penalty to calling addWork here. - return a.nodes[dst].pts.addAll(&nsrc.pts) + return a.nodes[dst].solve.pts.addAll(&nsrc.solve.pts) } } return false @@ -239,7 +262,7 @@ func (c *offsetAddrConstraint) solve(a *analysis, delta *nodeset) { dst := a.nodes[c.dst] for _, x := range delta.AppendTo(a.deltaSpace) { k := nodeid(x) - if dst.pts.add(k + nodeid(c.offset)) { + if dst.solve.pts.add(k + nodeid(c.offset)) { a.addWork(c.dst) } } diff --git a/go/pointer/stdlib_test.go b/go/pointer/stdlib_test.go new file mode 100644 index 000000000..c837bf8d5 --- /dev/null +++ b/go/pointer/stdlib_test.go @@ -0,0 +1,133 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pointer + +// This file runs the pointer analysis on all packages and tests beneath +// $GOROOT. It provides a "smoke test" that the analysis doesn't crash +// on a large input, and a benchmark for performance measurement. +// +// Because it is relatively slow, the --stdlib flag must be enabled for +// this test to run: +// % go test -v code.google.com/p/go.tools/go/pointer --stdlib + +import ( + "flag" + "go/token" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "code.google.com/p/go.tools/go/loader" + "code.google.com/p/go.tools/go/ssa" + "code.google.com/p/go.tools/go/ssa/ssautil" +) + +var runStdlibTest = flag.Bool("stdlib", false, "Run the (slow) stdlib test") + +// TODO(adonovan): move this to go/buildutil package since we have four copies: +// go/{loader,pointer,ssa}/stdlib_test.go and godoc/analysis/analysis.go. +func allPackages() []string { + var pkgs []string + root := filepath.Join(runtime.GOROOT(), "src/pkg") + string(os.PathSeparator) + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + // Prune the search if we encounter any of these names: + switch filepath.Base(path) { + case "testdata", ".hg": + return filepath.SkipDir + } + if info.IsDir() { + pkg := filepath.ToSlash(strings.TrimPrefix(path, root)) + switch pkg { + case "builtin", "pkg": + return filepath.SkipDir // skip these subtrees + case "": + return nil // ignore root of tree + } + pkgs = append(pkgs, pkg) + } + + return nil + }) + return pkgs +} + +func TestStdlib(t *testing.T) { + if !*runStdlibTest { + t.Skip("skipping (slow) stdlib test (use --stdlib)") + } + + // Load, parse and type-check the program. + var conf loader.Config + conf.SourceImports = true + if _, err := conf.FromArgs(allPackages(), true); err != nil { + t.Errorf("FromArgs failed: %v", err) + return + } + + iprog, err := conf.Load() + if err != nil { + t.Fatalf("Load failed: %v", err) + } + + // Create SSA packages. + prog := ssa.Create(iprog, 0) + prog.BuildAll() + + numPkgs := len(prog.AllPackages()) + if want := 140; numPkgs < want { + t.Errorf("Loaded only %d packages, want at least %d", numPkgs, want) + } + + // Determine the set of packages/tests to analyze. + var testPkgs []*ssa.Package + for _, info := range iprog.InitialPackages() { + testPkgs = append(testPkgs, prog.Package(info.Pkg)) + } + testmain := prog.CreateTestMainPackage(testPkgs...) + if testmain == nil { + t.Fatal("analysis scope has tests") + } + + // Run the analysis. + config := &Config{ + Reflection: false, // TODO(adonovan): fix remaining bug in rVCallConstraint, then enable. + BuildCallGraph: true, + Mains: []*ssa.Package{testmain}, + } + // TODO(adonovan): add some query values (affects track bits). + + t0 := time.Now() + + result, err := Analyze(config) + if err != nil { + t.Fatal(err) // internal error in pointer analysis + } + _ = result // TODO(adonovan): measure something + + t1 := time.Now() + + // Dump some statistics. + allFuncs := ssautil.AllFunctions(prog) + var numInstrs int + for fn := range allFuncs { + for _, b := range fn.Blocks { + numInstrs += len(b.Instrs) + } + } + + // determine line count + var lineCount int + prog.Fset.Iterate(func(f *token.File) bool { + lineCount += f.LineCount() + return true + }) + + t.Log("#Source lines: ", lineCount) + t.Log("#Instructions: ", numInstrs) + t.Log("Pointer analysis: ", t1.Sub(t0)) +} diff --git a/go/pointer/testdata/finalizer.go b/go/pointer/testdata/finalizer.go index d80f85acb..97f25c904 100644 --- a/go/pointer/testdata/finalizer.go +++ b/go/pointer/testdata/finalizer.go @@ -68,6 +68,13 @@ func runtimeSetFinalizerIndirect() { setFinalizer(x, final4) } +// Exercise the elimination of SetFinalizer +// constraints with non-pointer operands. +func runtimeSetFinalizerNonpointer() { + runtime.SetFinalizer(nil, (*T).finalize) // x is a non-pointer + runtime.SetFinalizer((*T).finalize, nil) // f is a non-pointer +} + // @calls main.runtimeSetFinalizerIndirect -> runtime.SetFinalizer // @calls runtime.SetFinalizer -> main.final4 @@ -76,6 +83,7 @@ func main() { runtimeSetFinalizer2() runtimeSetFinalizer3() runtimeSetFinalizerIndirect() + runtimeSetFinalizerNonpointer() } var unknown bool // defeat dead-code elimination diff --git a/go/pointer/util.go b/go/pointer/util.go index b4d806467..e3ac4fce6 100644 --- a/go/pointer/util.go +++ b/go/pointer/util.go @@ -7,6 +7,11 @@ package pointer import ( "bytes" "fmt" + "log" + "os" + "os/exec" + "runtime" + "time" "code.google.com/p/go.tools/container/intsets" "code.google.com/p/go.tools/go/types" @@ -220,7 +225,7 @@ func (a *analysis) shouldTrack(T types.Type) bool { } a.trackTypes[T] = track if !track && a.log != nil { - fmt.Fprintf(a.log, "Type not tracked: %s\n", T) + fmt.Fprintf(a.log, "\ttype not tracked: %s\n", T) } } return track @@ -280,3 +285,34 @@ func (ns *nodeset) add(n nodeid) bool { func (x *nodeset) addAll(y *nodeset) bool { return x.UnionWith(&y.Sparse) } + +// Profiling & debugging ------------------------------------------------------- + +var timers = make(map[string]time.Time) + +func start(name string) { + if debugTimers { + timers[name] = time.Now() + log.Printf("%s...\n", name) + } +} + +func stop(name string) { + if debugTimers { + log.Printf("%s took %s\n", name, time.Since(timers[name])) + } +} + +// diff runs the command "diff a b" and reports its success. +func diff(a, b string) bool { + var cmd *exec.Cmd + switch runtime.GOOS { + case "plan9": + cmd = exec.Command("/bin/diff", "-c", a, b) + default: + cmd = exec.Command("/usr/bin/diff", "-u", a, b) + } + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + return cmd.Run() == nil +} |