aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Donovan <adonovan@google.com>2014-06-16 15:46:07 -0400
committerAlan Donovan <adonovan@google.com>2014-06-16 15:46:07 -0400
commit9b38eafe607ba43ac44da301e6e04a1b7c2e8d90 (patch)
tree261efe554ab56cdbc5651cebc38f26b37daa4c48
parent47c0a8f0c30aa0ffdc2109b0a1b585123e2c0534 (diff)
downloadgolang-x-tools-9b38eafe607ba43ac44da301e6e04a1b7c2e8d90.tar.gz
go/pointer: implement pointer equivalence via hash-value numbering, a pre-solver optimization.
This reduces solver time by about 40%. See hvn.go for detailed description. Also in this CL: - Update package docs. - Added various global opt/debug options for maintainer convenience. - Added logging of phase timing. - Added stdlib_test, disabled by default, that runs the analysis on all tests in $GOROOT. - include types when dumping solution LGTM=crawshaw R=crawshaw, dannyb CC=golang-codereviews https://golang.org/cl/96650048
-rw-r--r--go/pointer/TODO17
-rw-r--r--go/pointer/analysis.go168
-rw-r--r--go/pointer/api.go8
-rw-r--r--go/pointer/callgraph.go11
-rw-r--r--go/pointer/constraint.go64
-rw-r--r--go/pointer/doc.go286
-rw-r--r--go/pointer/gen.go72
-rw-r--r--go/pointer/hvn.go968
-rw-r--r--go/pointer/intrinsics.go8
-rw-r--r--go/pointer/opt.go22
-rw-r--r--go/pointer/pointer_test.go3
-rw-r--r--go/pointer/print.go2
-rw-r--r--go/pointer/reflect.go168
-rw-r--r--go/pointer/solve.go81
-rw-r--r--go/pointer/stdlib_test.go133
-rw-r--r--go/pointer/testdata/finalizer.go8
-rw-r--r--go/pointer/util.go38
17 files changed, 1770 insertions, 287 deletions
diff --git a/go/pointer/TODO b/go/pointer/TODO
index 5b7b16846..9be3b8caf 100644
--- a/go/pointer/TODO
+++ b/go/pointer/TODO
@@ -17,11 +17,26 @@ CONSTRAINT GENERATION:
3) soundly track physical field offsets. (Summarise dannyb's email here.)
A downside is that we can't keep the identity field of struct
allocations that identifies the object.
+- add to pts(a.panic) a label representing all runtime panics, e.g.
+ runtime.{TypeAssertionError,errorString,errorCString}.
OPTIMISATIONS
-- pre-solver: PE via HVN/HRU and LE.
+- pre-solver:
+ pointer equivalence: extend HVN to HRU
+ location equivalence
- solver: HCD, LCD.
+- experiment with map+slice worklist in lieu of bitset.
+ It may have faster insert.
MISC:
- Test on all platforms.
Currently we assume these go/build tags: linux, amd64, !cgo.
+
+MAINTAINABILITY
+- Think about ways to make debugging this code easier. PTA logs
+ routinely exceed a million lines and require training to read.
+
+BUGS:
+- There's a crash bug in stdlib_test + reflection, rVCallConstraint.
+
+
diff --git a/go/pointer/analysis.go b/go/pointer/analysis.go
index 884962b59..cb423f4ea 100644
--- a/go/pointer/analysis.go
+++ b/go/pointer/analysis.go
@@ -12,7 +12,9 @@ import (
"io"
"os"
"reflect"
+ "runtime"
"runtime/debug"
+ "sort"
"code.google.com/p/go.tools/go/callgraph"
"code.google.com/p/go.tools/go/ssa"
@@ -20,6 +22,18 @@ import (
"code.google.com/p/go.tools/go/types/typeutil"
)
+const (
+ // optimization options; enable all when committing
+ optRenumber = true // enable renumbering optimization (makes logs hard to read)
+ optHVN = true // enable pointer equivalence via Hash-Value Numbering
+
+ // debugging options; disable all when committing
+ debugHVN = false // enable assertions in HVN
+ debugHVNVerbose = false // enable extra HVN logging
+ debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
+ debugTimers = true // show running time of each phase
+)
+
// object.flags bitmask values.
const (
otTagged = 1 << iota // type-tagged object
@@ -72,7 +86,7 @@ type nodeid uint32
type node struct {
// If non-nil, this node is the start of an object
// (addressable memory location).
- // The following obj.size words implicitly belong to the object;
+ // The following obj.size nodes implicitly belong to the object;
// they locate their object by scanning back.
obj *object
@@ -85,21 +99,10 @@ type node struct {
// an object of aggregate type (struct, tuple, array) this is.
subelement *fieldInfo // e.g. ".a.b[*].c"
- // Points-to sets.
- pts nodeset // points-to set of this node
- prevPts nodeset // pts(n) in previous iteration (for difference propagation)
-
- // Graph edges
- copyTo nodeset // simple copy constraint edges
-
- // Complex constraints attached to this node (x).
- // - *loadConstraint y=*x
- // - *offsetAddrConstraint y=&x.f or y=&x[0]
- // - *storeConstraint *x=z
- // - *typeFilterConstraint y=x.(I)
- // - *untagConstraint y=x.(C)
- // - *invokeConstraint y=x.f(params...)
- complex []constraint
+ // Solver state for the canonical node of this pointer-
+ // equivalence class. Each node is created with its own state
+ // but they become shared after HVN.
+ solve *solverState
}
// An analysis instance holds the state of a single pointer analysis problem.
@@ -119,6 +122,8 @@ type analysis struct {
globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
localval map[ssa.Value]nodeid // node for each local ssa.Value
localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
+ atFuncs map[*ssa.Function]bool // address-taken functions (for presolver)
+ mapValues []nodeid // values of makemap objects (indirect in HVN)
work nodeset // solver's worklist
result *Result // results of the analysis
track track // pointerlike types whose aliasing we track
@@ -136,9 +141,10 @@ type analysis struct {
runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer
}
-// enclosingObj returns the object (addressible memory object) that encloses node id.
-// Panic ensues if that node does not belong to any object.
-func (a *analysis) enclosingObj(id nodeid) *object {
+// enclosingObj returns the first node of the addressable memory
+// object that encloses node id. Panic ensues if that node does not
+// belong to any object.
+func (a *analysis) enclosingObj(id nodeid) nodeid {
// Find previous node with obj != nil.
for i := id; i >= 0; i-- {
n := a.nodes[i]
@@ -146,7 +152,7 @@ func (a *analysis) enclosingObj(id nodeid) *object {
if i+nodeid(obj.size) <= id {
break // out of bounds
}
- return obj
+ return i
}
}
panic("node has no enclosing object")
@@ -156,7 +162,7 @@ func (a *analysis) enclosingObj(id nodeid) *object {
// Panic ensues if that node is not addressable.
func (a *analysis) labelFor(id nodeid) *Label {
return &Label{
- obj: a.enclosingObj(id),
+ obj: a.nodes[a.enclosingObj(id)].obj,
subelement: a.nodes[id].subelement,
}
}
@@ -222,6 +228,7 @@ func Analyze(config *Config) (result *Result, err error) {
globalobj: make(map[ssa.Value]nodeid),
flattenMemo: make(map[types.Type][]*fieldInfo),
trackTypes: make(map[types.Type]bool),
+ atFuncs: make(map[*ssa.Function]bool),
hasher: typeutil.MakeHasher(),
intrinsics: make(map[*ssa.Function]intrinsic),
result: &Result{
@@ -236,7 +243,7 @@ func Analyze(config *Config) (result *Result, err error) {
}
if a.log != nil {
- fmt.Fprintln(a.log, "======== NEW ANALYSIS ========")
+ fmt.Fprintln(a.log, "==== Starting analysis")
}
// Pointer analysis requires a complete program for soundness.
@@ -275,24 +282,55 @@ func Analyze(config *Config) (result *Result, err error) {
a.computeTrackBits()
a.generate()
+ a.showCounts()
- if a.log != nil {
- // Show size of constraint system.
- counts := make(map[reflect.Type]int)
- for _, c := range a.constraints {
- counts[reflect.TypeOf(c)]++
- }
- fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
- for t, n := range counts {
- fmt.Fprintf(a.log, "\t%s:\t%d\n", t, n)
+ if optRenumber {
+ a.renumber()
+ }
+
+ N := len(a.nodes) // excludes solver-created nodes
+
+ if optHVN {
+ if debugHVNCrossCheck {
+ // Cross-check: run the solver once without
+ // optimization, once with, and compare the
+ // solutions.
+ savedConstraints := a.constraints
+
+ a.solve()
+ a.dumpSolution("A.pts", N)
+
+ // Restore.
+ a.constraints = savedConstraints
+ for _, n := range a.nodes {
+ n.solve = new(solverState)
+ }
+ a.nodes = a.nodes[:N]
+
+ // rtypes is effectively part of the solver state.
+ a.rtypes = typeutil.Map{}
+ a.rtypes.SetHasher(a.hasher)
}
- fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
+
+ a.hvn()
}
- a.optimize()
+ if debugHVNCrossCheck {
+ runtime.GC()
+ runtime.GC()
+ }
a.solve()
+ // Compare solutions.
+ if optHVN && debugHVNCrossCheck {
+ a.dumpSolution("B.pts", N)
+
+ if !diff("A.pts", "B.pts") {
+ return nil, fmt.Errorf("internal error: optimization changed solution")
+ }
+ }
+
// Create callgraph.Nodes in deterministic order.
if cg := a.result.CallGraph; cg != nil {
for _, caller := range a.cgnodes {
@@ -304,7 +342,7 @@ func Analyze(config *Config) (result *Result, err error) {
var space [100]int
for _, caller := range a.cgnodes {
for _, site := range caller.sites {
- for _, callee := range a.nodes[site.targets].pts.AppendTo(space[:0]) {
+ for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
a.callEdge(caller, site, nodeid(callee))
}
}
@@ -342,3 +380,65 @@ func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) {
a.warnf(fn.Pos(), " (declared here)")
}
}
+
+// dumpSolution writes the PTS solution to the specified file.
+//
+// It only dumps the nodes that existed before solving. The order in
+// which solver-created nodes are created depends on pre-solver
+// optimization, so we can't include them in the cross-check.
+//
+func (a *analysis) dumpSolution(filename string, N int) {
+ f, err := os.Create(filename)
+ if err != nil {
+ panic(err)
+ }
+ for id, n := range a.nodes[:N] {
+ if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
+ panic(err)
+ }
+ var sep string
+ for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
+ if l >= N {
+ break
+ }
+ fmt.Fprintf(f, "%s%d", sep, l)
+ sep = " "
+ }
+ fmt.Fprintf(f, "} : %s\n", n.typ)
+ }
+ if err := f.Close(); err != nil {
+ panic(err)
+ }
+}
+
+// showCounts logs the size of the constraint system. A typical
+// optimized distribution is 65% copy, 13% load, 11% addr, 5%
+// offsetAddr, 4% store, 2% others.
+//
+func (a *analysis) showCounts() {
+ if a.log != nil {
+ counts := make(map[reflect.Type]int)
+ for _, c := range a.constraints {
+ counts[reflect.TypeOf(c)]++
+ }
+ fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
+ var lines []string
+ for t, n := range counts {
+ line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
+ lines = append(lines, line)
+ }
+ sort.Sort(sort.Reverse(sort.StringSlice(lines)))
+ for _, line := range lines {
+ fmt.Fprintf(a.log, "\t%s\n", line)
+ }
+
+ fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
+
+ // Show number of pointer equivalence classes.
+ m := make(map[*solverState]bool)
+ for _, n := range a.nodes {
+ m[n.solve] = true
+ }
+ fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
+ }
+}
diff --git a/go/pointer/api.go b/go/pointer/api.go
index 190e459df..890017a04 100644
--- a/go/pointer/api.go
+++ b/go/pointer/api.go
@@ -124,12 +124,12 @@ type Result struct {
// A Pointer is an equivalence class of pointer-like values.
//
-// A pointer doesn't have a unique type because pointers of distinct
+// A Pointer doesn't have a unique type because pointers of distinct
// types may alias the same object.
//
type Pointer struct {
a *analysis
- n nodeid // non-zero
+ n nodeid
}
// A PointsToSet is a set of labels (locations or allocations).
@@ -197,7 +197,7 @@ func (s PointsToSet) DynamicTypes() *typeutil.Map {
pts = PointsToSet{s.a, new(nodeset)}
tmap.Set(tDyn, pts)
}
- pts.pts.addAll(&s.a.nodes[v].pts)
+ pts.pts.addAll(&s.a.nodes[v].solve.pts)
}
}
return &tmap
@@ -224,7 +224,7 @@ func (p Pointer) PointsTo() PointsToSet {
if p.n == 0 {
return PointsToSet{}
}
- return PointsToSet{p.a, &p.a.nodes[p.n].pts}
+ return PointsToSet{p.a, &p.a.nodes[p.n].solve.pts}
}
// MayAlias reports whether the receiver pointer may alias
diff --git a/go/pointer/callgraph.go b/go/pointer/callgraph.go
index 6ef415d22..67062919f 100644
--- a/go/pointer/callgraph.go
+++ b/go/pointer/callgraph.go
@@ -20,6 +20,17 @@ type cgnode struct {
callersite *callsite // where called from, if known; nil for shared contours
}
+// contour returns a description of this node's contour.
+func (n *cgnode) contour() string {
+ if n.callersite == nil {
+ return "shared contour"
+ }
+ if n.callersite.instr != nil {
+ return fmt.Sprintf("as called from %s", n.callersite.instr.Parent())
+ }
+ return fmt.Sprintf("as called from intrinsic (targets=n%d)", n.callersite.targets)
+}
+
func (n *cgnode) String() string {
return fmt.Sprintf("cg%d:%s", n.obj, n.fn)
}
diff --git a/go/pointer/constraint.go b/go/pointer/constraint.go
index caf235dd4..0f363b904 100644
--- a/go/pointer/constraint.go
+++ b/go/pointer/constraint.go
@@ -10,22 +10,18 @@ import (
type constraint interface {
// For a complex constraint, returns the nodeid of the pointer
- // to which it is attached.
+ // to which it is attached. For addr and copy, returns dst.
ptr() nodeid
- // indirect returns (by appending to the argument) the constraint's
- // "indirect" nodes as defined in (Hardekopf 2007b):
- // nodes whose points-to relations are not completely
- // represented in the initial constraint graph.
- //
- // TODO(adonovan): I think we need >1 results in some obscure
- // cases. If not, just return a nodeid, like ptr().
- //
- indirect(nodes []nodeid) []nodeid
-
// renumber replaces each nodeid n in the constraint by mapping[n].
renumber(mapping []nodeid)
+ // presolve is a hook for constraint-specific behaviour during
+ // pre-solver optimization. Typical implementations mark as
+ // indirect the set of nodes to which the solver will add copy
+ // edges or PTS labels.
+ presolve(h *hvn)
+
// solve is called for complex constraints when the pts for
// the node to which they are attached has changed.
solve(a *analysis, delta *nodeset)
@@ -41,10 +37,7 @@ type addrConstraint struct {
src nodeid
}
-func (c *addrConstraint) ptr() nodeid { panic("addrConstraint: not a complex constraint") }
-func (c *addrConstraint) indirect(nodes []nodeid) []nodeid {
- panic("addrConstraint: not a complex constraint")
-}
+func (c *addrConstraint) ptr() nodeid { return c.dst }
func (c *addrConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst]
c.src = mapping[c.src]
@@ -53,14 +46,11 @@ func (c *addrConstraint) renumber(mapping []nodeid) {
// dst = src
// A simple constraint represented directly as a copyTo graph edge.
type copyConstraint struct {
- dst nodeid
- src nodeid // (ptr)
+ dst nodeid // (ptr)
+ src nodeid
}
-func (c *copyConstraint) ptr() nodeid { panic("copyConstraint: not a complex constraint") }
-func (c *copyConstraint) indirect(nodes []nodeid) []nodeid {
- panic("copyConstraint: not a complex constraint")
-}
+func (c *copyConstraint) ptr() nodeid { return c.dst }
func (c *copyConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst]
c.src = mapping[c.src]
@@ -70,12 +60,11 @@ func (c *copyConstraint) renumber(mapping []nodeid) {
// A complex constraint attached to src (the pointer)
type loadConstraint struct {
offset uint32
- dst nodeid // (indirect)
+ dst nodeid
src nodeid // (ptr)
}
-func (c *loadConstraint) ptr() nodeid { return c.src }
-func (c *loadConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) }
+func (c *loadConstraint) ptr() nodeid { return c.src }
func (c *loadConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst]
c.src = mapping[c.src]
@@ -89,8 +78,7 @@ type storeConstraint struct {
src nodeid
}
-func (c *storeConstraint) ptr() nodeid { return c.dst }
-func (c *storeConstraint) indirect(nodes []nodeid) []nodeid { return nodes }
+func (c *storeConstraint) ptr() nodeid { return c.dst }
func (c *storeConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst]
c.src = mapping[c.src]
@@ -100,12 +88,11 @@ func (c *storeConstraint) renumber(mapping []nodeid) {
// A complex constraint attached to dst (the pointer)
type offsetAddrConstraint struct {
offset uint32
- dst nodeid // (indirect)
+ dst nodeid
src nodeid // (ptr)
}
-func (c *offsetAddrConstraint) ptr() nodeid { return c.src }
-func (c *offsetAddrConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) }
+func (c *offsetAddrConstraint) ptr() nodeid { return c.src }
func (c *offsetAddrConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst]
c.src = mapping[c.src]
@@ -116,12 +103,11 @@ func (c *offsetAddrConstraint) renumber(mapping []nodeid) {
// No representation change: pts(dst) and pts(src) contains tagged objects.
type typeFilterConstraint struct {
typ types.Type // an interface type
- dst nodeid // (indirect)
- src nodeid // (ptr)
+ dst nodeid
+ src nodeid // (ptr)
}
-func (c *typeFilterConstraint) ptr() nodeid { return c.src }
-func (c *typeFilterConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) }
+func (c *typeFilterConstraint) ptr() nodeid { return c.src }
func (c *typeFilterConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst]
c.src = mapping[c.src]
@@ -139,13 +125,12 @@ func (c *typeFilterConstraint) renumber(mapping []nodeid) {
// pts(dst) contains their payloads.
type untagConstraint struct {
typ types.Type // a concrete type
- dst nodeid // (indirect)
- src nodeid // (ptr)
+ dst nodeid
+ src nodeid // (ptr)
exact bool
}
-func (c *untagConstraint) ptr() nodeid { return c.src }
-func (c *untagConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) }
+func (c *untagConstraint) ptr() nodeid { return c.src }
func (c *untagConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst]
c.src = mapping[c.src]
@@ -156,11 +141,10 @@ func (c *untagConstraint) renumber(mapping []nodeid) {
type invokeConstraint struct {
method *types.Func // the abstract method
iface nodeid // (ptr) the interface
- params nodeid // (indirect) the first param in the params/results block
+ params nodeid // the start of the identity/params/results block
}
-func (c *invokeConstraint) ptr() nodeid { return c.iface }
-func (c *invokeConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.params) }
+func (c *invokeConstraint) ptr() nodeid { return c.iface }
func (c *invokeConstraint) renumber(mapping []nodeid) {
c.iface = mapping[c.iface]
c.params = mapping[c.params]
diff --git a/go/pointer/doc.go b/go/pointer/doc.go
index 13441e11e..00bf2a4e1 100644
--- a/go/pointer/doc.go
+++ b/go/pointer/doc.go
@@ -7,21 +7,20 @@
Package pointer implements Andersen's analysis, an inclusion-based
pointer analysis algorithm first described in (Andersen, 1994).
-The implementation is similar to that described in (Pearce et al,
-PASTE'04). Unlike many algorithms which interleave constraint
-generation and solving, constructing the callgraph as they go, this
-implementation for the most part observes a phase ordering (generation
-before solving), with only simple (copy) constraints being generated
-during solving. (The exception is reflection, which creates various
-constraints during solving as new types flow to reflect.Value
-operations.) This improves the traction of presolver optimisations,
-but imposes certain restrictions, e.g. potential context sensitivity
-is limited since all variants must be created a priori.
-
-We intend to add various presolving optimisations such as Pointer and
-Location Equivalence from (Hardekopf & Lin, SAS '07) and solver
-optimisations such as Hybrid- and Lazy- Cycle Detection from
-(Hardekopf & Lin, PLDI'07),
+A pointer analysis relates every pointer expression in a whole program
+to the set of memory locations to which it might point. This
+information can be used to construct a call graph of the program that
+precisely represents the destinations of dynamic function and method
+calls. It can also be used to determine, for example, which pairs of
+channel operations operate on the same channel.
+
+The package allows the client to request a set of expressions of
+interest for which the points-to information will be returned once the
+analysis is complete. In addition, the client may request that a
+callgraph is constructed. The example program in example_test.go
+demonstrates both of these features. Clients should not request more
+information than they need since it may increase the cost of the
+analysis significantly.
CLASSIFICATION
@@ -41,7 +40,7 @@ fields, such as x and y in struct { x, y *int }.
It is mostly CONTEXT-INSENSITIVE: most functions are analyzed once,
so values can flow in at one call to the function and return out at
another. Only some smaller functions are analyzed with consideration
-to their calling context.
+of their calling context.
It has a CONTEXT-SENSITIVE HEAP: objects are named by both allocation
site and context, so the objects returned by two distinct calls to f:
@@ -54,11 +53,87 @@ complete Go program and summaries for native code.
See the (Hind, PASTE'01) survey paper for an explanation of these terms.
+SOUNDNESS
+
+The analysis is fully sound when invoked on pure Go programs that do not
+use reflection or unsafe.Pointer conversions. In other words, if there
+is any possible execution of the program in which pointer P may point to
+object O, the analysis will report that fact.
+
+
+REFLECTION
+
+By default, the "reflect" library is ignored by the analysis, as if all
+its functions were no-ops, but if the client enables the Reflection flag,
+the analysis will make a reasonable attempt to model the effects of
+calls into this library. However, this comes at a significant
+performance cost, and not all features of that library are yet
+implemented. In addition, some simplifying approximations must be made
+to ensure that the analysis terminates; for example, reflection can be
+used to construct an infinite set of types and values of those types,
+but the analysis arbitrarily bounds the depth of such types.
+
+Most but not all reflection operations are supported.
+In particular, addressable reflect.Values are not yet implemented, so
+operations such as (reflect.Value).Set have no analytic effect.
+
+
+UNSAFE POINTER CONVERSIONS
+
+The pointer analysis makes no attempt to understand aliasing between the
+operand x and result y of an unsafe.Pointer conversion:
+ y = (*T)(unsafe.Pointer(x))
+It is as if the conversion allocated an entirely new object:
+ y = new(T)
+
+
+NATIVE CODE
+
+The analysis cannot model the aliasing effects of functions written in
+languages other than Go, such as runtime intrinsics in C or assembly, or
+code accessed via cgo. The result is as if such functions are no-ops.
+However, various important intrinsics are understood by the analysis,
+along with built-ins such as append.
+
+The analysis currently provides no way for users to specify the aliasing
+effects of native code.
+
+------------------------------------------------------------------------
+
+IMPLEMENTATION
+
+The remaining documentation is intended for package maintainers and
+pointer analysis specialists. Maintainers should have a solid
+understanding of the referenced papers (especially those by H&L and PKH)
+before making making significant changes.
+
+The implementation is similar to that described in (Pearce et al,
+PASTE'04). Unlike many algorithms which interleave constraint
+generation and solving, constructing the callgraph as they go, this
+implementation for the most part observes a phase ordering (generation
+before solving), with only simple (copy) constraints being generated
+during solving. (The exception is reflection, which creates various
+constraints during solving as new types flow to reflect.Value
+operations.) This improves the traction of presolver optimisations,
+but imposes certain restrictions, e.g. potential context sensitivity
+is limited since all variants must be created a priori.
+
+
TERMINOLOGY
+A type is said to be "pointer-like" if it is a reference to an object.
+Pointer-like types include pointers and also interfaces, maps, channels,
+functions and slices.
+
We occasionally use C's x->f notation to distinguish the case where x
is a struct pointer from x.f where is a struct value.
+Pointer analysis literature (and our comments) often uses the notation
+dst=*src+offset to mean something different than what it means in Go.
+It means: for each node index p in pts(src), the node index p+offset is
+in pts(dst). Similarly *dst+offset=src is used for store constraints
+and dst=src+offset for offset-address constraints.
+
NODES
@@ -68,18 +143,19 @@ pointers) and members of points-to sets (things that can be pointed
at, i.e. "labels").
Nodes are naturally numbered. The numbering enables compact
-representations of sets of nodes such as bitvectors or BDDs; and the
-ordering enables a very cheap way to group related nodes together.
-(For example, passing n parameters consists of generating n parallel
-constraints from caller+i to callee+i for 0<=i<n.)
-
-The zero nodeid means "not a pointer". Currently it's only used for
-struct{} or (). We generate all flow constraints, even for non-pointer
-types, with the expectations that (a) presolver optimisations will
-quickly collapse all the non-pointer ones and (b) we may get more
-precise results by treating uintptr as a possible pointer.
-
-Each node represents a scalar (word-sized) part of a value or object.
+representations of sets of nodes such as bitvectors (or BDDs); and the
+ordering enables a very cheap way to group related nodes together. For
+example, passing n parameters consists of generating n parallel
+constraints from caller+i to callee+i for 0<=i<n.
+
+The zero nodeid means "not a pointer". For simplicity, we generate flow
+constraints even for non-pointer types such as int. The pointer
+equivalence (PE) presolver optimization detects which variables cannot
+point to anything; this includes not only all variables of non-pointer
+types (such as int) but also variables of pointer-like types if they are
+always nil, or are parameters to a function that is never called.
+
+Each node represents a scalar part of a value or object.
Aggregate types (structs, tuples, arrays) are recursively flattened
out into a sequential list of scalar component types, and all the
elements of an array are represented by a single node. (The
@@ -103,26 +179,24 @@ Objects include:
- variable allocations in the stack frame or heap;
- maps, channels and slices created by calls to make();
- allocations to construct an interface;
- - allocations caused by literals and conversions,
- e.g. []byte("foo"), []byte(str).
+ - allocations caused by conversions, e.g. []byte(str).
- arrays allocated by calls to append();
-Many objects have no Go types. For example, the func, map and chan
-type kinds in Go are all varieties of pointers, but the respective
-objects are actual functions, maps, and channels. Given the way we
-model interfaces, they too are pointers to tagged objects with no
-Go type. And an *ssa.Global denotes the address of a global variable,
-but the object for a Global is the actual data. So, types of objects
-are usually "off by one indirection".
+Many objects have no Go types. For example, the func, map and chan type
+kinds in Go are all varieties of pointers, but their respective objects
+are actual functions (executable code), maps (hash tables), and channels
+(synchronized queues). Given the way we model interfaces, they too are
+pointers to "tagged" objects with no Go type. And an *ssa.Global denotes
+the address of a global variable, but the object for a Global is the
+actual data. So, the types of an ssa.Value that creates an object is
+"off by one indirection": a pointer to the object.
-The individual nodes of an object are sometimes referred to as
-"labels".
+The individual nodes of an object are sometimes referred to as "labels".
-Objects containing no nodes (i.e. just empty structs; tuples may be
-values but never objects in Go) are padded with an invalid-type node
-to have at least one node so that there is something to point to.
-(TODO(adonovan): I think this is unnecessary now that we have identity
-nodes; check.)
+For uniformity, all objects have a non-zero number of fields, even those
+of the empty type struct{}. (All arrays are treated as if of length 1,
+so there are no empty arrays. The empty tuple is never address-taken,
+so is never an object.)
TAGGED OBJECTS
@@ -133,7 +207,7 @@ An tagged object has the following layout:
v
...
-The T node's typ field is the dynamic type of the "payload", the value
+The T node's typ field is the dynamic type of the "payload": the value
v which follows, flattened out. The T node's obj has the otTagged
flag.
@@ -143,7 +217,7 @@ as a pointer that exclusively points to tagged objects.
Tagged objects may be indirect (obj.flags ⊇ {otIndirect}) meaning that
the value v is not of type T but *T; this is used only for
-reflect.Values that represent lvalues.
+reflect.Values that represent lvalues. (These are not implemented yet.)
ANALYSIS ABSTRACTION OF EACH TYPE
@@ -153,7 +227,7 @@ single node: basic types, pointers, channels, maps, slices, 'func'
pointers, interfaces.
Pointers
- Nothing to say here.
+ Nothing to say here, oddly.
Basic types (bool, string, numbers, unsafe.Pointer)
Currently all fields in the flattening of a type, including
@@ -172,9 +246,8 @@ Basic types (bool, string, numbers, unsafe.Pointer)
zero nodeid, and fields of these types within aggregate other types
are omitted.
- unsafe.Pointer conversions are not yet modelled as pointer
- conversions. Consequently uintptr is always a number and uintptr
- nodes do not point to any object.
+ unsafe.Pointers are not modelled as pointers, so a conversion of an
+ unsafe.Pointer to *T is (unsoundly) treated equivalent to new(T).
Channels
An expression of type 'chan T' is a kind of pointer that points
@@ -227,12 +300,14 @@ Functions
The first node is the function's identity node.
Associated with every callsite is a special "targets" variable,
- whose pts(·) contains the identity node of each function to which
- the call may dispatch. Identity words are not otherwise used.
+ whose pts() contains the identity node of each function to which
+ the call may dispatch. Identity words are not otherwise used during
+ the analysis, but we construct the call graph from the pts()
+ solution for such nodes.
- The following block of nodes represent the flattened-out types of
- the parameters and results of the function object, and are
- collectively known as its "P/R block".
+ The following block of contiguous nodes represents the flattened-out
+ types of the parameters ("P-block") and results ("R-block") of the
+ function object.
The treatment of free variables of closures (*ssa.FreeVar) is like
that of global variables; it is not context-sensitive.
@@ -255,18 +330,20 @@ Interfaces
all of the concrete type's methods; we can't tell a priori which
ones may be called.
- TypeAssert y = x.(T) is implemented by a dynamic filter triggered by
- each tagged object E added to pts(x). If T is an interface that E.T
- implements, E is added to pts(y). If T is a concrete type then edge
- E.v -> pts(y) is added.
+ TypeAssert y = x.(T) is implemented by a dynamic constraint
+ triggered by each tagged object O added to pts(x): a typeFilter
+ constraint if T is an interface type, or an untag constraint if T is
+ a concrete type. A typeFilter tests whether O.typ implements T; if
+ so, O is added to pts(y). An untagFilter tests whether O.typ is
+ assignable to T,and if so, a copy edge O.v -> y is added.
ChangeInterface is a simple copy because the representation of
tagged objects is independent of the interface type (in contrast
to the "method tables" approach used by the gc runtime).
- y := Invoke x.m(...) is implemented by allocating a contiguous P/R
- block for the callsite and adding a dynamic rule triggered by each
- tagged object E added to pts(x). The rule adds param/results copy
+ y := Invoke x.m(...) is implemented by allocating contiguous P/R
+ blocks for the callsite and adding a dynamic rule triggered by each
+ tagged object added to pts(x). The rule adds param/results copy
edges to/from each discovered concrete method.
(Q. Why do we model an interface as a pointer to a pair of type and
@@ -279,8 +356,7 @@ Interfaces
reflect.Value
A reflect.Value is modelled very similar to an interface{}, i.e. as
- a pointer exclusively to tagged objects, but with two
- generalizations.
+ a pointer exclusively to tagged objects, but with two generalizations.
1) a reflect.Value that represents an lvalue points to an indirect
(obj.flags ⊇ {otIndirect}) tagged object, which has a similar
@@ -304,6 +380,8 @@ reflect.Value
corresponds to the user-visible dynamic type, and the existence
of a pointer is an implementation detail.
+ (NB: indirect tagged objects are not yet implemented)
+
2) The dynamic type tag of a tagged object pointed to by a
reflect.Value may be an interface type; it need not be concrete.
@@ -350,15 +428,15 @@ Structs
struct is a pointer to its identity node. That node allows us to
distinguish a pointer to a struct from a pointer to its first field.
- Field offsets are currently the logical field offsets (plus one for
- the identity node), so the sizes of the fields can be ignored by the
- analysis.
+ Field offsets are logical field offsets (plus one for the identity
+ node), so the sizes of the fields can be ignored by the analysis.
- Sound treatment of unsafe.Pointer conversions (not yet implemented)
- would require us to model memory layout using physical field offsets
- to ascertain which object field(s) might be aliased by a given
- FieldAddr of a different base pointer type. It would also require
- us to dispense with the identity node.
+ (The identity node is non-traditional but enables the distiction
+ described above, which is valuable for code comprehension tools.
+ Typical pointer analyses for C, whose purpose is compiler
+ optimization, must soundly model unsafe.Pointer (void*) conversions,
+ and this requires fidelity to the actual memory layout using physical
+ field offsets.)
*ssa.Field y = x.f creates a simple edge to y from x's node at f's offset.
@@ -403,10 +481,16 @@ FUNCTION CALLS
A static call consists three steps:
- finding the function object of the callee;
- creating copy edges from the actual parameter value nodes to the
- params block in the function object (this includes the receiver
- if the callee is a method);
- - creating copy edges from the results block in the function
- object to the value nodes for the result of the call.
+ P-block in the function object (this includes the receiver if
+ the callee is a method);
+ - creating copy edges from the R-block in the function object to
+ the value nodes for the result of the call.
+
+ A static function call is little more than two struct value copies
+ between the P/R blocks of caller and callee:
+
+ callee.P = caller.P
+ caller.R = callee.R
Context sensitivity
@@ -422,13 +506,12 @@ FUNCTION CALLS
Dynamic calls work in a similar manner except that the creation of
copy edges occurs dynamically, in a similar fashion to a pair of
- struct copies:
+ struct copies in which the callee is indirect:
- *fn->params = callargs
- callresult = *fn->results
+ callee->P = caller.P
+ caller.R = callee->R
- (Recall that the function object's params and results blocks are
- contiguous.)
+ (Recall that the function object's P- and R-blocks are contiguous.)
Interface method invocation
@@ -436,7 +519,8 @@ FUNCTION CALLS
callsite and attach a dynamic closure rule to the interface. For
each new tagged object that flows to the interface, we look up
the concrete method, find its function object, and connect its P/R
- block to the callsite's P/R block.
+ blocks to the callsite's P/R blocks, adding copy edges to the graph
+ during solving.
Recording call targets
@@ -451,14 +535,38 @@ FUNCTION CALLS
internally this just iterates all "targets" variables' pts(·)s.
+PRESOLVER
+
+We implement Hash-Value Numbering (HVN), a pre-solver constraint
+optimization described in Hardekopf & Lin, SAS'07. This is documented
+in more detail in hvn.go. We intend to add its cousins HR and HU in
+future.
+
+
SOLVER
-The solver is currently a very naive Andersen-style implementation,
-although it does use difference propagation (Pearce et al, SQC'04).
-There is much to do.
+The solver is currently a naive Andersen-style implementation; it does
+not perform online cycle detection, though we plan to add solver
+optimisations such as Hybrid- and Lazy- Cycle Detection from (Hardekopf
+& Lin, PLDI'07).
+
+It uses difference propagation (Pearce et al, SQC'04) to avoid
+redundant re-triggering of closure rules for values already seen.
+
+Points-to sets are represented using sparse bit vectors (similar to
+those used in LLVM and gcc), which are more space- and time-efficient
+than sets based on Go's built-in map type or dense bit vectors.
+Nodes are permuted prior to solving so that object nodes (which may
+appear in points-to sets) are lower numbered than non-object (var)
+nodes. This improves the density of the set over which the PTSs
+range, and thus the efficiency of the representation.
-FURTHER READING.
+Partly thanks to avoiding map iteration, the execution of the solver is
+100% deterministic, a great help during debugging.
+
+
+FURTHER READING
Andersen, L. O. 1994. Program analysis and specialization for the C
programming language. Ph.D. dissertation. DIKU, University of
@@ -492,5 +600,11 @@ international conference on Static Analysis (SAS'07), Hanne Riis
Nielson and Gilberto Filé (Eds.). Springer-Verlag, Berlin, Heidelberg,
265-280.
+Atanas Rountev and Satish Chandra. 2000. Off-line variable substitution
+for scaling points-to analysis. In Proceedings of the ACM SIGPLAN 2000
+conference on Programming language design and implementation (PLDI '00).
+ACM, New York, NY, USA, 47-56. DOI=10.1145/349299.349310
+http://doi.acm.org/10.1145/349299.349310
+
*/
package pointer
diff --git a/go/pointer/gen.go b/go/pointer/gen.go
index bcb7448b9..65988fb37 100644
--- a/go/pointer/gen.go
+++ b/go/pointer/gen.go
@@ -59,7 +59,7 @@ func (a *analysis) addNodes(typ types.Type, comment string) nodeid {
//
func (a *analysis) addOneNode(typ types.Type, comment string, subelement *fieldInfo) nodeid {
id := a.nextNode()
- a.nodes = append(a.nodes, &node{typ: typ, subelement: subelement})
+ a.nodes = append(a.nodes, &node{typ: typ, subelement: subelement, solve: new(solverState)})
if a.log != nil {
fmt.Fprintf(a.log, "\tcreate n%d %s for %s%s\n",
id, typ, comment, subelement.path())
@@ -178,6 +178,9 @@ func (a *analysis) makeTagged(typ types.Type, cgn *cgnode, data interface{}) nod
// makeRtype returns the canonical tagged object of type *rtype whose
// payload points to the sole rtype object for T.
+//
+// TODO(adonovan): move to reflect.go; it's part of the solver really.
+//
func (a *analysis) makeRtype(T types.Type) nodeid {
if v := a.rtypes.At(T); v != nil {
return v.(nodeid)
@@ -261,7 +264,7 @@ func (a *analysis) taggedValue(obj nodeid) (tDyn types.Type, v nodeid, indirect
return n.typ, obj + 1, flags&otIndirect != 0
}
-// funcParams returns the first node of the params block of the
+// funcParams returns the first node of the params (P) block of the
// function whose object node (obj.flags&otFunction) is id.
//
func (a *analysis) funcParams(id nodeid) nodeid {
@@ -272,7 +275,7 @@ func (a *analysis) funcParams(id nodeid) nodeid {
return id + 1
}
-// funcResults returns the first node of the results block of the
+// funcResults returns the first node of the results (R) block of the
// function whose object node (obj.flags&otFunction) is id.
//
func (a *analysis) funcResults(id nodeid) nodeid {
@@ -662,9 +665,9 @@ func (a *analysis) genDynamicCall(caller *cgnode, site *callsite, call *ssa.Call
// pts(targets) will be the set of possible call targets.
site.targets = a.valueNode(call.Value)
- // We add dynamic closure rules that store the arguments into,
- // and load the results from, the P/R block of each function
- // discovered in pts(targets).
+ // We add dynamic closure rules that store the arguments into
+ // the P-block and load the results from the R-block of each
+ // function discovered in pts(targets).
sig := call.Signature()
var offset uint32 = 1 // P/R block starts at offset 1
@@ -705,9 +708,9 @@ func (a *analysis) genInvoke(caller *cgnode, site *callsite, call *ssa.CallCommo
a.copy(result, r, a.sizeof(sig.Results()))
}
- // We add a dynamic invoke constraint that will add
- // edges from the caller's P/R block to the callee's
- // P/R block for each discovered call target.
+ // We add a dynamic invoke constraint that will connect the
+ // caller's and the callee's P/R blocks for each discovered
+ // call target.
a.addConstraint(&invokeConstraint{call.Method, a.valueNode(call.Value), block})
}
@@ -749,7 +752,7 @@ func (a *analysis) genInvokeReflectType(caller *cgnode, site *callsite, call *ss
a.copy(params, rtype, 1)
params++
- // Copy actual parameters into formal params block.
+ // Copy actual parameters into formal P-block.
// Must loop, since the actuals aren't contiguous.
for i, arg := range call.Args {
sz := a.sizeof(sig.Params().At(i).Type())
@@ -757,7 +760,7 @@ func (a *analysis) genInvokeReflectType(caller *cgnode, site *callsite, call *ss
params += nodeid(sz)
}
- // Copy formal results block to actual result.
+ // Copy formal R-block to actual R-block.
if result != 0 {
a.copy(result, a.funcResults(obj), a.sizeof(sig.Results()))
}
@@ -790,6 +793,7 @@ func (a *analysis) genCall(caller *cgnode, instr ssa.CallInstruction) {
caller.sites = append(caller.sites, site)
if a.log != nil {
+ // TODO(adonovan): debug: improve log message.
fmt.Fprintf(a.log, "\t%s to targets %s from %s\n", site, site.targets, caller)
}
}
@@ -863,7 +867,15 @@ func (a *analysis) objectNode(cgn *cgnode, v ssa.Value) nodeid {
obj = a.nextNode()
tmap := v.Type().Underlying().(*types.Map)
a.addNodes(tmap.Key(), "makemap.key")
- a.addNodes(tmap.Elem(), "makemap.value")
+ elem := a.addNodes(tmap.Elem(), "makemap.value")
+
+ // To update the value field, MapUpdate
+ // generates store-with-offset constraints which
+ // the presolver can't model, so we must mark
+ // those nodes indirect.
+ for id, end := elem, elem+nodeid(a.sizeof(tmap.Elem())); id < end; id++ {
+ a.mapValues = append(a.mapValues, id)
+ }
a.endObject(obj, cgn, v)
case *ssa.MakeInterface:
@@ -1140,8 +1152,7 @@ func (a *analysis) genFunc(cgn *cgnode) {
impl := a.findIntrinsic(fn)
if a.log != nil {
- fmt.Fprintln(a.log)
- fmt.Fprintln(a.log)
+ fmt.Fprintf(a.log, "\n\n==== Generating constraints for %s, %s\n", cgn, cgn.contour())
// Hack: don't display body if intrinsic.
if impl != nil {
@@ -1187,6 +1198,7 @@ func (a *analysis) genFunc(cgn *cgnode) {
// Create value nodes for all value instructions
// since SSA may contain forward references.
+ var space [10]*ssa.Value
for _, b := range fn.Blocks {
for _, instr := range b.Instrs {
switch instr := instr.(type) {
@@ -1202,6 +1214,19 @@ func (a *analysis) genFunc(cgn *cgnode) {
id := a.addNodes(instr.Type(), comment)
a.setValueNode(instr, id, cgn)
}
+
+ // Record all address-taken functions (for presolver).
+ rands := instr.Operands(space[:0])
+ if call, ok := instr.(ssa.CallInstruction); ok && !call.Common().IsInvoke() {
+ // Skip CallCommon.Value in "call" mode.
+ // TODO(adonovan): fix: relies on unspecified ordering. Specify it.
+ rands = rands[1:]
+ }
+ for _, rand := range rands {
+ if atf, ok := (*rand).(*ssa.Function); ok {
+ a.atFuncs[atf] = true
+ }
+ }
}
}
@@ -1218,14 +1243,29 @@ func (a *analysis) genFunc(cgn *cgnode) {
// genMethodsOf generates nodes and constraints for all methods of type T.
func (a *analysis) genMethodsOf(T types.Type) {
+ itf := isInterface(T)
+
+ // TODO(adonovan): can we skip this entirely if itf is true?
+ // I think so, but the answer may depend on reflection.
mset := a.prog.MethodSets.MethodSet(T)
for i, n := 0, mset.Len(); i < n; i++ {
- a.valueNode(a.prog.Method(mset.At(i)))
+ m := a.prog.Method(mset.At(i))
+ a.valueNode(m)
+
+ if !itf {
+ // Methods of concrete types are address-taken functions.
+ a.atFuncs[m] = true
+ }
}
}
// generate generates offline constraints for the entire program.
func (a *analysis) generate() {
+ start("Constraint generation")
+ if a.log != nil {
+ fmt.Fprintln(a.log, "==== Generating constraints")
+ }
+
// Create a dummy node since we use the nodeid 0 for
// non-pointerlike variables.
a.addNodes(tInvalid, "(zero)")
@@ -1273,4 +1313,6 @@ func (a *analysis) generate() {
a.globalval = nil
a.localval = nil
a.localobj = nil
+
+ stop("Constraint generation")
}
diff --git a/go/pointer/hvn.go b/go/pointer/hvn.go
new file mode 100644
index 000000000..1dd9049c2
--- /dev/null
+++ b/go/pointer/hvn.go
@@ -0,0 +1,968 @@
+package pointer
+
+// This file implements Hash-Value Numbering (HVN), a pre-solver
+// constraint optimization described in Hardekopf & Lin, SAS'07 (see
+// doc.go) that analyses the graph topology to determine which sets of
+// variables are "pointer equivalent" (PE), i.e. must have identical
+// points-to sets in the solution.
+//
+// A separate ("offline") graph is constructed. Its nodes are those of
+// the main-graph, plus an additional node *X for each pointer node X.
+// With this graph we can reason about the unknown points-to set of
+// dereferenced pointers. (We do not generalize this to represent
+// unknown fields x->f, perhaps because such fields would be numerous,
+// though it might be worth an experiment.)
+//
+// Nodes whose points-to relations are not entirely captured by the
+// graph are marked as "indirect": the *X nodes, the parameters of
+// address-taken functions (which includes all functions in method
+// sets), or nodes updated by the solver rules for reflection, etc.
+//
+// All addr (y=&x) nodes are initially assigned a pointer-equivalence
+// (PE) label equal to x's nodeid in the main graph. (These are the
+// only PE labels that are less than len(a.nodes).)
+//
+// All offsetAddr (y=&x.f) constraints are initially assigned a PE
+// label; such labels are memoized, keyed by (x, f), so that equivalent
+// nodes y as assigned the same label.
+//
+// Then we process each strongly connected component (SCC) of the graph
+// in topological order, assigning it a PE label based on the set P of
+// PE labels that flow to it from its immediate dependencies.
+//
+// If any node in P is "indirect", the entire SCC is assigned a fresh PE
+// label. Otherwise:
+//
+// |P|=0 if P is empty, all nodes in the SCC are non-pointers (e.g.
+// uninitialized variables, or formal params of dead functions)
+// and the SCC is assigned the PE label of zero.
+//
+// |P|=1 if P is a singleton, the SCC is assigned the same label as the
+// sole element of P.
+//
+// |P|>1 if P contains multiple labels, a unique label representing P is
+// invented and recorded in an hash table, so that other
+// equivalent SCCs may also be assigned this label, akin to
+// conventional hash-value numbering in a compiler.
+//
+// Finally, a renumbering is computed such that each node is replaced by
+// the lowest-numbered node with the same PE label. All constraints are
+// renumbered, and any resulting duplicates are eliminated.
+//
+// The only nodes that are not renumbered are the objects x in addr
+// (y=&x) constraints, since the ids of these nodes (and fields derived
+// from them via offsetAddr rules) are the elements of all points-to
+// sets, so they must remain as they are if we want the same solution.
+//
+// The solverStates (node.solve) for nodes in the same equivalence class
+// are linked together so that all nodes in the class have the same
+// solution. This avoids the need to renumber nodeids buried in
+// Queries, cgnodes, etc (like (*analysis).renumber() does) since only
+// the solution is needed.
+//
+// The result of HVN is that the number of distinct nodes and
+// constraints is reduced, but the solution is identical (almost---see
+// CROSS-CHECK below). In particular, both linear and cyclic chains of
+// copies are each replaced by a single node.
+//
+// Nodes and constraints created "online" (e.g. while solving reflection
+// constraints) are not subject to this optimization.
+//
+// PERFORMANCE
+//
+// In two benchmarks (oracle and godoc), HVN eliminates about two thirds
+// of nodes, the majority accounted for by non-pointers: nodes of
+// non-pointer type, pointers that remain nil, formal parameters of dead
+// functions, nodes of untracked types, etc. It also reduces the number
+// of constraints, also by about two thirds, and the solving time by
+// 30--42%, although we must pay about 15% for the running time of HVN
+// itself. The benefit is greater for larger applications.
+//
+// There are many possible optimizations to improve the performance:
+// * Use fewer than 1:1 onodes to main graph nodes: many of the onodes
+// we create are not needed.
+// * HU (HVN with Union---see paper): coalesce "union" peLabels when
+// their expanded-out sets are equal.
+// * HR (HVN with deReference---see paper): this will require that we
+// apply HVN until fixed point, which may need more bookkeeping of the
+// correspondance of main nodes to onodes.
+// * Location Equivalence (see paper): have points-to sets contain not
+// locations but location-equivalence class labels, each representing
+// a set of locations.
+// * HVN with field-sensitive ref: model each of the fields of a
+// pointer-to-struct.
+//
+// CROSS-CHECK
+//
+// To verify the soundness of the optimization, when the
+// debugHVNCrossCheck option is enabled, we run the solver twice, once
+// before and once after running HVN, dumping the solution to disk, and
+// then we compare the results. If they are not identical, the analysis
+// panics.
+//
+// The solution dumped to disk includes only the N*N submatrix of the
+// complete solution where N is the number of nodes after generation.
+// In other words, we ignore pointer variables and objects created by
+// the solver itself, since their numbering depends on the solver order,
+// which is affected by the optimization. In any case, that's the only
+// part the client cares about.
+//
+// The cross-check is too strict and may fail spuriously. Although the
+// H&L paper describing HVN states that the solutions obtained should be
+// identical, this is not the case in practice because HVN can collapse
+// cycles involving *p even when pts(p)={}. Consider this example
+// distilled from testdata/hello.go:
+//
+// var x T
+// func f(p **T) {
+// t0 = *p
+// ...
+// t1 = φ(t0, &x)
+// *p = t1
+// }
+//
+// If f is dead code, we get:
+// unoptimized: pts(p)={} pts(t0)={} pts(t1)={&x}
+// optimized: pts(p)={} pts(t0)=pts(t1)=pts(*p)={&x}
+//
+// It's hard to argue that this is a bug: the result is sound and the
+// loss of precision is inconsequential---f is dead code, after all.
+// But unfortunately it limits the usefulness of the cross-check since
+// failures must be carefully analyzed. Ben Hardekopf suggests (in
+// personal correspondence) some approaches to mitigating it:
+//
+// If there is a node with an HVN points-to set that is a superset
+// of the NORM points-to set, then either it's a bug or it's a
+// result of this issue. If it's a result of this issue, then in
+// the offline constraint graph there should be a REF node inside
+// some cycle that reaches this node, and in the NORM solution the
+// pointer being dereferenced by that REF node should be the empty
+// set. If that isn't true then this is a bug. If it is true, then
+// you can further check that in the NORM solution the "extra"
+// points-to info in the HVN solution does in fact come from that
+// purported cycle (if it doesn't, then this is still a bug). If
+// you're doing the further check then you'll need to do it for
+// each "extra" points-to element in the HVN points-to set.
+//
+// There are probably ways to optimize these checks by taking
+// advantage of graph properties. For example, extraneous points-to
+// info will flow through the graph and end up in many
+// nodes. Rather than checking every node with extra info, you
+// could probably work out the "origin point" of the extra info and
+// just check there. Note that the check in the first bullet is
+// looking for soundness bugs, while the check in the second bullet
+// is looking for precision bugs; depending on your needs, you may
+// care more about one than the other.
+//
+// which we should evaluate. The cross-check is nonetheless invaluable
+// for all but one of the programs in the pointer_test suite.
+
+import (
+ "fmt"
+ "io"
+ "log"
+ "reflect"
+
+ "code.google.com/p/go.tools/container/intsets"
+ "code.google.com/p/go.tools/go/types"
+)
+
+// A peLabel is a pointer-equivalence label: two nodes with the same
+// peLabel have identical points-to solutions.
+//
+// The numbers are allocated consecutively like so:
+// 0 not a pointer
+// 1..N-1 addrConstraints (equals the constraint's .src field, hence sparse)
+// ... offsetAddr constraints
+// ... SCCs (with indirect nodes or multiple inputs)
+//
+// Each PE label denotes a set of pointers containing a single addr, a
+// single offsetAddr, or some set of other PE labels.
+//
+type peLabel int
+
+type hvn struct {
+ a *analysis
+ N int // len(a.nodes) immediately after constraint generation
+ log io.Writer // (optional) log of HVN lemmas
+ onodes []*onode // nodes of the offline graph
+ label peLabel // the next available PE label
+ hvnLabel map[string]peLabel // hash-value numbering (PE label) for each set of onodeids
+ stack []onodeid // DFS stack
+ index int32 // next onode.index, from Tarjan's SCC algorithm
+
+ // For each distinct offsetAddrConstraint (src, offset) pair,
+ // offsetAddrLabels records a unique PE label >= N.
+ offsetAddrLabels map[offsetAddr]peLabel
+}
+
+// The index of an node in the offline graph.
+// (Currently the first N align with the main nodes,
+// but this may change with HRU.)
+type onodeid uint32
+
+// An onode is a node in the offline constraint graph.
+// (Where ambiguous, members of analysis.nodes are referred to as
+// "main graph" nodes.)
+//
+// Edges in the offline constraint graph (edges and implicit) point to
+// the source, i.e. against the flow of values: they are dependencies.
+// Implicit edges are used for SCC computation, but not for gathering
+// incoming labels.
+//
+type onode struct {
+ rep onodeid // index of representative of SCC in offline constraint graph
+
+ edges intsets.Sparse // constraint edges X-->Y (this onode is X)
+ implicit intsets.Sparse // implicit edges *X-->*Y (this onode is X)
+ peLabels intsets.Sparse // set of peLabels are pointer-equivalent to this one
+ indirect bool // node has points-to relations not represented in graph
+
+ // Tarjan's SCC algorithm
+ index, lowlink int32 // Tarjan numbering
+ scc int32 // -ve => on stack; 0 => unvisited; +ve => node is root of a found SCC
+}
+
+type offsetAddr struct {
+ ptr nodeid
+ offset uint32
+}
+
+// nextLabel issues the next unused pointer-equivalence label.
+func (h *hvn) nextLabel() peLabel {
+ h.label++
+ return h.label
+}
+
+// ref(X) returns the index of the onode for *X.
+func (h *hvn) ref(id onodeid) onodeid {
+ return id + onodeid(len(h.a.nodes))
+}
+
+// hvn computes pointer-equivalence labels (peLabels) using the Hash-based
+// Value Numbering (HVN) algorithm described in Hardekopf & Lin, SAS'07.
+//
+func (a *analysis) hvn() {
+ start("HVN")
+
+ if a.log != nil {
+ fmt.Fprintf(a.log, "\n\n==== Pointer equivalence optimization\n\n")
+ }
+
+ h := hvn{
+ a: a,
+ N: len(a.nodes),
+ log: a.log,
+ hvnLabel: make(map[string]peLabel),
+ offsetAddrLabels: make(map[offsetAddr]peLabel),
+ }
+
+ if h.log != nil {
+ fmt.Fprintf(h.log, "\nCreating offline graph nodes...\n")
+ }
+
+ // Create offline nodes. The first N nodes correspond to main
+ // graph nodes; the next N are their corresponding ref() nodes.
+ h.onodes = make([]*onode, 2*h.N)
+ for id := range a.nodes {
+ id := onodeid(id)
+ h.onodes[id] = &onode{}
+ h.onodes[h.ref(id)] = &onode{indirect: true}
+ }
+
+ // Each node initially represents just itself.
+ for id, o := range h.onodes {
+ o.rep = onodeid(id)
+ }
+
+ h.markIndirectNodes()
+
+ // Reserve the first N PE labels for addrConstraints.
+ h.label = peLabel(h.N)
+
+ // Add offline constraint edges.
+ if h.log != nil {
+ fmt.Fprintf(h.log, "\nAdding offline graph edges...\n")
+ }
+ for _, c := range a.constraints {
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "; %s\n", c)
+ }
+ c.presolve(&h)
+ }
+
+ // Find and collapse SCCs.
+ if h.log != nil {
+ fmt.Fprintf(h.log, "\nFinding SCCs...\n")
+ }
+ h.index = 1
+ for id, o := range h.onodes {
+ if id > 0 && o.index == 0 {
+ // Start depth-first search at each unvisited node.
+ h.visit(onodeid(id))
+ }
+ }
+
+ // Dump the solution
+ // (NB: somewhat redundant with logging from simplify().)
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\nPointer equivalences:\n")
+ for id, o := range h.onodes {
+ if id == 0 {
+ continue
+ }
+ if id == int(h.N) {
+ fmt.Fprintf(h.log, "---\n")
+ }
+ fmt.Fprintf(h.log, "o%d\t", id)
+ if o.rep != onodeid(id) {
+ fmt.Fprintf(h.log, "rep=o%d", o.rep)
+ } else {
+ fmt.Fprintf(h.log, "p%d", o.peLabels.Min())
+ if o.indirect {
+ fmt.Fprint(h.log, " indirect")
+ }
+ }
+ fmt.Fprintln(h.log)
+ }
+ }
+
+ // Simplify the main constraint graph
+ h.simplify()
+
+ a.showCounts()
+
+ stop("HVN")
+}
+
+// ---- constraint-specific rules ----
+
+// dst := &src
+func (c *addrConstraint) presolve(h *hvn) {
+ // Each object (src) is an initial PE label.
+ label := peLabel(c.src) // label < N
+ if debugHVNVerbose && h.log != nil {
+ // duplicate log messages are possible
+ fmt.Fprintf(h.log, "\tcreate p%d: {&n%d}\n", label, c.src)
+ }
+ odst := onodeid(c.dst)
+ osrc := onodeid(c.src)
+
+ // Assign dst this label.
+ h.onodes[odst].peLabels.Insert(int(label))
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\to%d has p%d\n", odst, label)
+ }
+
+ h.addImplicitEdge(h.ref(odst), osrc) // *dst ~~> src.
+}
+
+// dst = src
+func (c *copyConstraint) presolve(h *hvn) {
+ odst := onodeid(c.dst)
+ osrc := onodeid(c.src)
+ h.addEdge(odst, osrc) // dst --> src
+ h.addImplicitEdge(h.ref(odst), h.ref(osrc)) // *dst ~~> *src
+}
+
+// dst = *src + offset
+func (c *loadConstraint) presolve(h *hvn) {
+ odst := onodeid(c.dst)
+ osrc := onodeid(c.src)
+ if c.offset == 0 {
+ h.addEdge(odst, h.ref(osrc)) // dst --> *src
+ } else {
+ // We don't interpret load-with-offset, e.g. results
+ // of map value lookup, R-block of dynamic call, slice
+ // copy/append, reflection.
+ h.markIndirect(odst, "load with offset")
+ }
+}
+
+// *dst + offset = src
+func (c *storeConstraint) presolve(h *hvn) {
+ odst := onodeid(c.dst)
+ osrc := onodeid(c.src)
+ if c.offset == 0 {
+ h.onodes[h.ref(odst)].edges.Insert(int(osrc)) // *dst --> src
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\to%d --> o%d\n", h.ref(odst), osrc)
+ }
+ } else {
+ // We don't interpret store-with-offset.
+ // See discussion of soundness at markIndirectNodes.
+ }
+}
+
+// dst = &src.offset
+func (c *offsetAddrConstraint) presolve(h *hvn) {
+ // Give each distinct (addr, offset) pair a fresh PE label.
+ // The cache performs CSE, effectively.
+ key := offsetAddr{c.src, c.offset}
+ label, ok := h.offsetAddrLabels[key]
+ if !ok {
+ label = h.nextLabel()
+ h.offsetAddrLabels[key] = label
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\tcreate p%d: {&n%d.#%d}\n",
+ label, c.src, c.offset)
+ }
+ }
+
+ // Assign dst this label.
+ h.onodes[c.dst].peLabels.Insert(int(label))
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\to%d has p%d\n", c.dst, label)
+ }
+}
+
+// dst = src.(typ) where typ is an interface
+func (c *typeFilterConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.dst), "typeFilter result")
+}
+
+// dst = src.(typ) where typ is concrete
+func (c *untagConstraint) presolve(h *hvn) {
+ odst := onodeid(c.dst)
+ for end := odst + onodeid(h.a.sizeof(c.typ)); odst < end; odst++ {
+ h.markIndirect(odst, "untag result")
+ }
+}
+
+// dst = src.method(c.params...)
+func (c *invokeConstraint) presolve(h *hvn) {
+ // All methods are address-taken functions, so
+ // their formal P-blocks were already marked indirect.
+
+ // Mark the caller's targets node as indirect.
+ sig := c.method.Type().(*types.Signature)
+ id := c.params
+ h.markIndirect(onodeid(c.params), "invoke targets node")
+ id++
+
+ id += nodeid(h.a.sizeof(sig.Params()))
+
+ // Mark the caller's R-block as indirect.
+ end := id + nodeid(h.a.sizeof(sig.Results()))
+ for id < end {
+ h.markIndirect(onodeid(id), "invoke R-block")
+ id++
+ }
+}
+
+// markIndirectNodes marks as indirect nodes whose points-to relations
+// are not entirely captured by the offline graph, including:
+//
+// (a) All address-taken nodes (including the following nodes within
+// the same object). This is described in the paper.
+//
+// The most subtle cause of indirect nodes is the generation of
+// store-with-offset constraints since the offline graph doesn't
+// represent them. A global audit of constraint generation reveals the
+// following uses of store-with-offset:
+//
+// (b) genDynamicCall, for P-blocks of dynamically called functions,
+// to which dynamic copy edges will be added to them during
+// solving: from storeConstraint for standalone functions,
+// and from invokeConstraint for methods.
+// All such P-blocks must be marked indirect.
+// (c) MakeUpdate, to update the value part of a map object.
+// All MakeMap objects's value parts must be marked indirect.
+// (d) copyElems, to update the destination array.
+// All array elements must be marked indirect.
+//
+// Not all indirect marking happens here. ref() nodes are marked
+// indirect at construction, and each constraint's presolve() method may
+// mark additional nodes.
+//
+func (h *hvn) markIndirectNodes() {
+ // (a) all address-taken nodes, plus all nodes following them
+ // within the same object, since these may be indirectly
+ // stored or address-taken.
+ for _, c := range h.a.constraints {
+ if c, ok := c.(*addrConstraint); ok {
+ start := h.a.enclosingObj(c.src)
+ end := start + nodeid(h.a.nodes[start].obj.size)
+ for id := c.src; id < end; id++ {
+ h.markIndirect(onodeid(id), "A-T object")
+ }
+ }
+ }
+
+ // (b) P-blocks of all address-taken functions.
+ for id := 0; id < h.N; id++ {
+ obj := h.a.nodes[id].obj
+
+ // TODO(adonovan): opt: if obj.cgn.fn is a method and
+ // obj.cgn is not its shared contour, this is an
+ // "inlined" static method call. We needn't consider it
+ // address-taken since no invokeConstraint will affect it.
+
+ if obj != nil && obj.flags&otFunction != 0 && h.a.atFuncs[obj.cgn.fn] {
+ // address-taken function
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "n%d is address-taken: %s\n", id, obj.cgn.fn)
+ }
+ h.markIndirect(onodeid(id), "A-T func identity")
+ id++
+ sig := obj.cgn.fn.Signature
+ psize := h.a.sizeof(sig.Params())
+ if sig.Recv() != nil {
+ psize += h.a.sizeof(sig.Recv().Type())
+ }
+ for end := id + int(psize); id < end; id++ {
+ h.markIndirect(onodeid(id), "A-T func P-block")
+ }
+ id--
+ continue
+ }
+ }
+
+ // (c) all map objects' value fields.
+ for _, id := range h.a.mapValues {
+ h.markIndirect(onodeid(id), "makemap.value")
+ }
+
+ // (d) all array element objects.
+ // TODO(adonovan): opt: can we do better?
+ for id := 0; id < h.N; id++ {
+ // Identity node for an object of array type?
+ if tArray, ok := h.a.nodes[id].typ.(*types.Array); ok {
+ // Mark the array element nodes indirect.
+ // (Skip past the identity field.)
+ for _ = range h.a.flatten(tArray.Elem()) {
+ id++
+ h.markIndirect(onodeid(id), "array elem")
+ }
+ }
+ }
+}
+
+func (h *hvn) markIndirect(oid onodeid, comment string) {
+ h.onodes[oid].indirect = true
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\to%d is indirect: %s\n", oid, comment)
+ }
+}
+
+// Adds an edge dst-->src.
+// Note the unusual convention: edges are dependency (contraflow) edges.
+func (h *hvn) addEdge(odst, osrc onodeid) {
+ h.onodes[odst].edges.Insert(int(osrc))
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\to%d --> o%d\n", odst, osrc)
+ }
+}
+
+func (h *hvn) addImplicitEdge(odst, osrc onodeid) {
+ h.onodes[odst].implicit.Insert(int(osrc))
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\to%d ~~> o%d\n", odst, osrc)
+ }
+}
+
+// visit implements the depth-first search of Tarjan's SCC algorithm.
+// Precondition: x is canonical.
+func (h *hvn) visit(x onodeid) {
+ h.checkCanonical(x)
+ xo := h.onodes[x]
+ xo.index = h.index
+ xo.lowlink = h.index
+ h.index++
+
+ h.stack = append(h.stack, x) // push
+ assert(xo.scc == 0, "node revisited")
+ xo.scc = -1
+
+ var deps []int
+ deps = xo.edges.AppendTo(deps)
+ deps = xo.implicit.AppendTo(deps)
+
+ for _, y := range deps {
+ // Loop invariant: x is canonical.
+
+ y := h.find(onodeid(y))
+
+ if x == y {
+ continue // nodes already coalesced
+ }
+
+ xo := h.onodes[x]
+ yo := h.onodes[y]
+
+ switch {
+ case yo.scc > 0:
+ // y is already a collapsed SCC
+
+ case yo.scc < 0:
+ // y is on the stack, and thus in the current SCC.
+ if yo.index < xo.lowlink {
+ xo.lowlink = yo.index
+ }
+
+ default:
+ // y is unvisited; visit it now.
+ h.visit(y)
+ // Note: x and y are now non-canonical.
+
+ x = h.find(onodeid(x))
+
+ if yo.lowlink < xo.lowlink {
+ xo.lowlink = yo.lowlink
+ }
+ }
+ }
+ h.checkCanonical(x)
+
+ // Is x the root of an SCC?
+ if xo.lowlink == xo.index {
+ // Coalesce all nodes in the SCC.
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "scc o%d\n", x)
+ }
+ for {
+ // Pop y from stack.
+ i := len(h.stack) - 1
+ y := h.stack[i]
+ h.stack = h.stack[:i]
+
+ h.checkCanonical(x)
+ xo := h.onodes[x]
+ h.checkCanonical(y)
+ yo := h.onodes[y]
+
+ if xo == yo {
+ // SCC is complete.
+ xo.scc = 1
+ h.labelSCC(x)
+ break
+ }
+ h.coalesce(x, y)
+ }
+ }
+}
+
+// Precondition: x is canonical.
+func (h *hvn) labelSCC(x onodeid) {
+ h.checkCanonical(x)
+ xo := h.onodes[x]
+ xpe := &xo.peLabels
+
+ // All indirect nodes get new labels.
+ if xo.indirect {
+ label := h.nextLabel()
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\tcreate p%d: indirect SCC\n", label)
+ fmt.Fprintf(h.log, "\to%d has p%d\n", x, label)
+ }
+
+ // Remove pre-labeling, in case a direct pre-labeled node was
+ // merged with an indirect one.
+ xpe.Clear()
+ xpe.Insert(int(label))
+
+ return
+ }
+
+ // Invariant: all peLabels sets are non-empty.
+ // Those that are logically empty contain zero as their sole element.
+ // No other sets contains zero.
+
+ // Find all labels coming in to the coalesced SCC node.
+ for _, y := range xo.edges.AppendTo(nil) {
+ y := h.find(onodeid(y))
+ if y == x {
+ continue // already coalesced
+ }
+ ype := &h.onodes[y].peLabels
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\tedge from o%d = %s\n", y, ype)
+ }
+
+ if ype.IsEmpty() {
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\tnode has no PE label\n")
+ }
+ }
+ assert(!ype.IsEmpty(), "incoming node has no PE label")
+
+ if ype.Has(0) {
+ // {0} represents a non-pointer.
+ assert(ype.Len() == 1, "PE set contains {0, ...}")
+ } else {
+ xpe.UnionWith(ype)
+ }
+ }
+
+ switch xpe.Len() {
+ case 0:
+ // SCC has no incoming non-zero PE labels: it is a non-pointer.
+ xpe.Insert(0)
+
+ case 1:
+ // already a singleton
+
+ default:
+ // SCC has multiple incoming non-zero PE labels.
+ // Find the canonical label representing this set.
+ // We use String() as a fingerprint consistent with Equals().
+ key := xpe.String()
+ label, ok := h.hvnLabel[key]
+ if !ok {
+ label = h.nextLabel()
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\tcreate p%d: union %s\n", label, xpe.String())
+ }
+ h.hvnLabel[key] = label
+ }
+ xpe.Clear()
+ xpe.Insert(int(label))
+ }
+
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\to%d has p%d\n", x, xpe.Min())
+ }
+}
+
+// coalesce combines two nodes in the offline constraint graph.
+// Precondition: x and y are canonical.
+func (h *hvn) coalesce(x, y onodeid) {
+ xo := h.onodes[x]
+ yo := h.onodes[y]
+
+ // x becomes y's canonical representative.
+ yo.rep = x
+
+ if debugHVNVerbose && h.log != nil {
+ fmt.Fprintf(h.log, "\tcoalesce o%d into o%d\n", y, x)
+ }
+
+ // x accumulates y's edges.
+ xo.edges.UnionWith(&yo.edges)
+ yo.edges.Clear()
+
+ // x accumulates y's implicit edges.
+ xo.implicit.UnionWith(&yo.implicit)
+ yo.implicit.Clear()
+
+ // x accumulates y's pointer-equivalence labels.
+ xo.peLabels.UnionWith(&yo.peLabels)
+ yo.peLabels.Clear()
+
+ // x accumulates y's indirect flag.
+ if yo.indirect {
+ xo.indirect = true
+ }
+}
+
+// simplify computes a degenerate renumbering of nodeids from the PE
+// labels assigned by the hvn, and uses it to simplify the main
+// constraint graph, eliminating non-pointer nodes and duplicate
+// constraints.
+//
+func (h *hvn) simplify() {
+ // canon maps each peLabel to its canonical main node.
+ canon := make([]nodeid, h.label)
+ for i := range canon {
+ canon[i] = nodeid(h.N) // indicates "unset"
+ }
+
+ // mapping maps each main node index to the index of the canonical node.
+ mapping := make([]nodeid, len(h.a.nodes))
+
+ for id := range h.a.nodes {
+ id := nodeid(id)
+ if id == 0 {
+ canon[0] = 0
+ mapping[0] = 0
+ continue
+ }
+ oid := h.find(onodeid(id))
+ peLabels := &h.onodes[oid].peLabels
+ assert(peLabels.Len() == 1, "PE class is not a singleton")
+ label := peLabel(peLabels.Min())
+
+ canonId := canon[label]
+ if canonId == nodeid(h.N) {
+ // id becomes the representative of the PE label.
+ canonId = id
+ canon[label] = canonId
+
+ if h.a.log != nil {
+ fmt.Fprintf(h.a.log, "\tpts(n%d) is canonical : \t(%s)\n",
+ id, h.a.nodes[id].typ)
+ }
+
+ } else {
+ // Link the solver states for the two nodes.
+ assert(h.a.nodes[canonId].solve != nil, "missing solver state")
+ h.a.nodes[id].solve = h.a.nodes[canonId].solve
+
+ if h.a.log != nil {
+ // TODO(adonovan): debug: reorganize the log so it prints
+ // one line:
+ // pe y = x1, ..., xn
+ // for each canonical y. Requires allocation.
+ fmt.Fprintf(h.a.log, "\tpts(n%d) = pts(n%d) : %s\n",
+ id, canonId, h.a.nodes[id].typ)
+ }
+ }
+
+ mapping[id] = canonId
+ }
+
+ // Renumber the constraints, eliminate duplicates, and eliminate
+ // any containing non-pointers (n0).
+ addrs := make(map[addrConstraint]bool)
+ copys := make(map[copyConstraint]bool)
+ loads := make(map[loadConstraint]bool)
+ stores := make(map[storeConstraint]bool)
+ offsetAddrs := make(map[offsetAddrConstraint]bool)
+ untags := make(map[untagConstraint]bool)
+ typeFilters := make(map[typeFilterConstraint]bool)
+ invokes := make(map[invokeConstraint]bool)
+
+ nbefore := len(h.a.constraints)
+ cc := h.a.constraints[:0] // in-situ compaction
+ for _, c := range h.a.constraints {
+ // Renumber.
+ switch c := c.(type) {
+ case *addrConstraint:
+ // Don't renumber c.src since it is the label of
+ // an addressable object and will appear in PT sets.
+ c.dst = mapping[c.dst]
+ default:
+ c.renumber(mapping)
+ }
+
+ if c.ptr() == 0 {
+ continue // skip: constraint attached to non-pointer
+ }
+
+ var dup bool
+ switch c := c.(type) {
+ case *addrConstraint:
+ _, dup = addrs[*c]
+ addrs[*c] = true
+
+ case *copyConstraint:
+ if c.src == c.dst {
+ continue // skip degenerate copies
+ }
+ if c.src == 0 {
+ continue // skip copy from non-pointer
+ }
+ _, dup = copys[*c]
+ copys[*c] = true
+
+ case *loadConstraint:
+ if c.src == 0 {
+ continue // skip load from non-pointer
+ }
+ _, dup = loads[*c]
+ loads[*c] = true
+
+ case *storeConstraint:
+ if c.src == 0 {
+ continue // skip store from non-pointer
+ }
+ _, dup = stores[*c]
+ stores[*c] = true
+
+ case *offsetAddrConstraint:
+ if c.src == 0 {
+ continue // skip offset from non-pointer
+ }
+ _, dup = offsetAddrs[*c]
+ offsetAddrs[*c] = true
+
+ case *untagConstraint:
+ if c.src == 0 {
+ continue // skip untag of non-pointer
+ }
+ _, dup = untags[*c]
+ untags[*c] = true
+
+ case *typeFilterConstraint:
+ if c.src == 0 {
+ continue // skip filter of non-pointer
+ }
+ _, dup = typeFilters[*c]
+ typeFilters[*c] = true
+
+ case *invokeConstraint:
+ if c.params == 0 {
+ panic("non-pointer invoke.params")
+ }
+ if c.iface == 0 {
+ continue // skip invoke on non-pointer
+ }
+ _, dup = invokes[*c]
+ invokes[*c] = true
+
+ default:
+ // We don't bother de-duping advanced constraints
+ // (e.g. reflection) since they are uncommon.
+
+ // Eliminate constraints containing non-pointer nodeids.
+ //
+ // We use reflection to find the fields to avoid
+ // adding yet another method to constraint.
+ //
+ // TODO(adonovan): experiment with a constraint
+ // method that returns a slice of pointers to
+ // nodeids fields to enable uniform iteration;
+ // the renumber() method could be removed and
+ // implemented using the new one.
+ //
+ // TODO(adonovan): opt: this is unsound since
+ // some constraints still have an effect if one
+ // of the operands is zero: rVCall, rVMapIndex,
+ // rvSetMapIndex. Handle them specially.
+ rtNodeid := reflect.TypeOf(nodeid(0))
+ x := reflect.ValueOf(c).Elem()
+ for i, nf := 0, x.NumField(); i < nf; i++ {
+ f := x.Field(i)
+ if f.Type() == rtNodeid {
+ if f.Uint() == 0 {
+ dup = true // skip it
+ break
+ }
+ }
+ }
+ }
+ if dup {
+ continue // skip duplicates
+ }
+
+ cc = append(cc, c)
+ }
+ h.a.constraints = cc
+
+ log.Printf("#constraints: was %d, now %d\n", nbefore, len(h.a.constraints))
+}
+
+// find returns the canonical onodeid for x.
+// (The onodes form a disjoint set forest.)
+func (h *hvn) find(x onodeid) onodeid {
+ // TODO(adonovan): opt: this is a CPU hotspot. Try "union by rank".
+ xo := h.onodes[x]
+ rep := xo.rep
+ if rep != x {
+ rep = h.find(rep) // simple path compression
+ xo.rep = rep
+ }
+ return rep
+}
+
+func (h *hvn) checkCanonical(x onodeid) {
+ if debugHVN {
+ assert(x == h.find(x), "not canonical")
+ }
+}
+
+func assert(p bool, msg string) {
+ if debugHVN && !p {
+ panic("assertion failed: " + msg)
+ }
+}
diff --git a/go/pointer/intrinsics.go b/go/pointer/intrinsics.go
index b6e0b053f..61e2f7eee 100644
--- a/go/pointer/intrinsics.go
+++ b/go/pointer/intrinsics.go
@@ -255,13 +255,15 @@ func ext۰NotYetImplemented(a *analysis, cgn *cgnode) {
// runtime.SetFinalizer(x, f)
type runtimeSetFinalizerConstraint struct {
- targets nodeid
+ targets nodeid // (indirect)
f nodeid // (ptr)
x nodeid
}
-func (c *runtimeSetFinalizerConstraint) ptr() nodeid { return c.f }
-func (c *runtimeSetFinalizerConstraint) indirect(nodes []nodeid) []nodeid { return nodes }
+func (c *runtimeSetFinalizerConstraint) ptr() nodeid { return c.f }
+func (c *runtimeSetFinalizerConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.targets), "SetFinalizer.targets")
+}
func (c *runtimeSetFinalizerConstraint) renumber(mapping []nodeid) {
c.targets = mapping[c.targets]
c.f = mapping[c.f]
diff --git a/go/pointer/opt.go b/go/pointer/opt.go
index e89953428..2620cc0d8 100644
--- a/go/pointer/opt.go
+++ b/go/pointer/opt.go
@@ -4,17 +4,12 @@
package pointer
-// This file defines the constraint optimiser ("pre-solver").
-
-import (
- "fmt"
-)
-
-func (a *analysis) optimize() {
- a.renumber()
+// This file implements renumbering, a pre-solver optimization to
+// improve the efficiency of the solver's points-to set representation.
+//
+// TODO(adonovan): rename file "renumber.go"
- // TODO(adonovan): opt: PE (HVN, HRU), LE, etc.
-}
+import "fmt"
// renumber permutes a.nodes so that all nodes within an addressable
// object appear before all non-addressable nodes, maintaining the
@@ -30,7 +25,14 @@ func (a *analysis) optimize() {
// NB: nodes added during solving (e.g. for reflection, SetFinalizer)
// will be appended to the end.
//
+// Renumbering makes the PTA log inscrutable. To aid debugging, later
+// phases (e.g. HVN) must not rely on it having occurred.
+//
func (a *analysis) renumber() {
+ if a.log != nil {
+ fmt.Fprintf(a.log, "\n\n==== Renumbering\n\n")
+ }
+
N := nodeid(len(a.nodes))
newNodes := make([]*node, N, N)
renumbering := make([]nodeid, N, N) // maps old to new
diff --git a/go/pointer/pointer_test.go b/go/pointer/pointer_test.go
index 777281ef3..112c1e2d0 100644
--- a/go/pointer/pointer_test.go
+++ b/go/pointer/pointer_test.go
@@ -44,7 +44,7 @@ var inputs = []string{
"testdata/fmtexcerpt.go",
"testdata/func.go",
"testdata/funcreflect.go",
- "testdata/hello.go",
+ "testdata/hello.go", // NB: causes spurious failure of HVN cross-check
"testdata/interfaces.go",
"testdata/mapreflect.go",
"testdata/maps.go",
@@ -287,6 +287,7 @@ func doOneInput(input, filename string) bool {
}
var log bytes.Buffer
+ fmt.Fprintf(&log, "Input: %s\n", filename)
// Run the analysis.
config := &pointer.Config{
diff --git a/go/pointer/print.go b/go/pointer/print.go
index a0b11e8a8..4f2f4c7ae 100644
--- a/go/pointer/print.go
+++ b/go/pointer/print.go
@@ -35,7 +35,7 @@ func (c *untagConstraint) String() string {
}
func (c *invokeConstraint) String() string {
- return fmt.Sprintf("invoke n%d.%s(n%d ...)", c.iface, c.method.Name(), c.params+1)
+ return fmt.Sprintf("invoke n%d.%s(n%d ...)", c.iface, c.method.Name(), c.params)
}
func (n nodeid) String() string {
diff --git a/go/pointer/reflect.go b/go/pointer/reflect.go
index fe51718f6..6e5c54cab 100644
--- a/go/pointer/reflect.go
+++ b/go/pointer/reflect.go
@@ -20,6 +20,9 @@ package pointer
// yet implemented) correspond to reflect.Values with
// reflect.flagAddr.]
// A picture would help too.
+//
+// TODO(adonovan): try factoring up the common parts of the majority of
+// these constraints that are single input, single output.
import (
"fmt"
@@ -159,8 +162,10 @@ type rVBytesConstraint struct {
result nodeid // (indirect)
}
-func (c *rVBytesConstraint) ptr() nodeid { return c.v }
-func (c *rVBytesConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rVBytesConstraint) ptr() nodeid { return c.v }
+func (c *rVBytesConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rVBytes.result")
+}
func (c *rVBytesConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.result = mapping[c.result]
@@ -205,7 +210,7 @@ func ext۰reflect۰Value۰Bytes(a *analysis, cgn *cgnode) {
// result = v.Call(in)
type rVCallConstraint struct {
cgn *cgnode
- targets nodeid
+ targets nodeid // (indirect)
v nodeid // (ptr)
arg nodeid // = in[*]
result nodeid // (indirect)
@@ -213,13 +218,9 @@ type rVCallConstraint struct {
}
func (c *rVCallConstraint) ptr() nodeid { return c.v }
-func (c *rVCallConstraint) indirect(nodes []nodeid) []nodeid {
- nodes = append(nodes, c.result)
- // TODO(adonovan): we may be able to handle 'targets' out-of-band
- // so that all implementations indirect() return a single value.
- // We can then dispense with the slice.
- nodes = append(nodes, c.targets)
- return nodes
+func (c *rVCallConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.targets), "rVCall.targets")
+ h.markIndirect(onodeid(c.result), "rVCall.result")
}
func (c *rVCallConstraint) renumber(mapping []nodeid) {
c.targets = mapping[c.targets]
@@ -363,8 +364,10 @@ type rVElemConstraint struct {
result nodeid // (indirect)
}
-func (c *rVElemConstraint) ptr() nodeid { return c.v }
-func (c *rVElemConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rVElemConstraint) ptr() nodeid { return c.v }
+func (c *rVElemConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rVElem.result")
+}
func (c *rVElemConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.result = mapping[c.result]
@@ -426,8 +429,10 @@ type rVIndexConstraint struct {
result nodeid // (indirect)
}
-func (c *rVIndexConstraint) ptr() nodeid { return c.v }
-func (c *rVIndexConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rVIndexConstraint) ptr() nodeid { return c.v }
+func (c *rVIndexConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rVIndex.result")
+}
func (c *rVIndexConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.result = mapping[c.result]
@@ -488,8 +493,10 @@ type rVInterfaceConstraint struct {
result nodeid // (indirect)
}
-func (c *rVInterfaceConstraint) ptr() nodeid { return c.v }
-func (c *rVInterfaceConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rVInterfaceConstraint) ptr() nodeid { return c.v }
+func (c *rVInterfaceConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rVInterface.result")
+}
func (c *rVInterfaceConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.result = mapping[c.result]
@@ -541,8 +548,10 @@ type rVMapIndexConstraint struct {
result nodeid // (indirect)
}
-func (c *rVMapIndexConstraint) ptr() nodeid { return c.v }
-func (c *rVMapIndexConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rVMapIndexConstraint) ptr() nodeid { return c.v }
+func (c *rVMapIndexConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rVMapIndex.result")
+}
func (c *rVMapIndexConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.result = mapping[c.result]
@@ -595,8 +604,10 @@ type rVMapKeysConstraint struct {
result nodeid // (indirect)
}
-func (c *rVMapKeysConstraint) ptr() nodeid { return c.v }
-func (c *rVMapKeysConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rVMapKeysConstraint) ptr() nodeid { return c.v }
+func (c *rVMapKeysConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rVMapKeys.result")
+}
func (c *rVMapKeysConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.result = mapping[c.result]
@@ -650,7 +661,7 @@ func ext۰reflect۰Value۰MapKeys(a *analysis, cgn *cgnode) {
func ext۰reflect۰Value۰Method(a *analysis, cgn *cgnode) {} // TODO(adonovan)
func ext۰reflect۰Value۰MethodByName(a *analysis, cgn *cgnode) {} // TODO(adonovan)
-// ---------- func (Value).Recv(Value) ----------
+// ---------- func (Value).Recv(Value) Value ----------
// result, _ = v.Recv()
type rVRecvConstraint struct {
@@ -659,8 +670,10 @@ type rVRecvConstraint struct {
result nodeid // (indirect)
}
-func (c *rVRecvConstraint) ptr() nodeid { return c.v }
-func (c *rVRecvConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rVRecvConstraint) ptr() nodeid { return c.v }
+func (c *rVRecvConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rVRecv.result")
+}
func (c *rVRecvConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.result = mapping[c.result]
@@ -714,8 +727,8 @@ type rVSendConstraint struct {
x nodeid
}
-func (c *rVSendConstraint) ptr() nodeid { return c.v }
-func (c *rVSendConstraint) indirect(nodes []nodeid) []nodeid { return nodes }
+func (c *rVSendConstraint) ptr() nodeid { return c.v }
+func (c *rVSendConstraint) presolve(*hvn) {}
func (c *rVSendConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.x = mapping[c.x]
@@ -767,8 +780,8 @@ type rVSetBytesConstraint struct {
x nodeid
}
-func (c *rVSetBytesConstraint) ptr() nodeid { return c.v }
-func (c *rVSetBytesConstraint) indirect(nodes []nodeid) []nodeid { return nodes }
+func (c *rVSetBytesConstraint) ptr() nodeid { return c.v }
+func (c *rVSetBytesConstraint) presolve(*hvn) {}
func (c *rVSetBytesConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.x = mapping[c.x]
@@ -816,8 +829,8 @@ type rVSetMapIndexConstraint struct {
val nodeid
}
-func (c *rVSetMapIndexConstraint) ptr() nodeid { return c.v }
-func (c *rVSetMapIndexConstraint) indirect(nodes []nodeid) []nodeid { return nodes }
+func (c *rVSetMapIndexConstraint) ptr() nodeid { return c.v }
+func (c *rVSetMapIndexConstraint) presolve(*hvn) {}
func (c *rVSetMapIndexConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.key = mapping[c.key]
@@ -868,7 +881,7 @@ func ext۰reflect۰Value۰SetMapIndex(a *analysis, cgn *cgnode) {
func ext۰reflect۰Value۰SetPointer(a *analysis, cgn *cgnode) {} // TODO(adonovan)
-// ---------- func (Value).Slice(v Value, i, j int) ----------
+// ---------- func (Value).Slice(v Value, i, j int) Value ----------
// result = v.Slice(_, _)
type rVSliceConstraint struct {
@@ -877,8 +890,10 @@ type rVSliceConstraint struct {
result nodeid // (indirect)
}
-func (c *rVSliceConstraint) ptr() nodeid { return c.v }
-func (c *rVSliceConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rVSliceConstraint) ptr() nodeid { return c.v }
+func (c *rVSliceConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rVSlice.result")
+}
func (c *rVSliceConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.result = mapping[c.result]
@@ -956,8 +971,10 @@ type reflectChanOfConstraint struct {
dirs []types.ChanDir
}
-func (c *reflectChanOfConstraint) ptr() nodeid { return c.t }
-func (c *reflectChanOfConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *reflectChanOfConstraint) ptr() nodeid { return c.t }
+func (c *reflectChanOfConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "reflectChanOf.result")
+}
func (c *reflectChanOfConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t]
c.result = mapping[c.result]
@@ -1028,8 +1045,10 @@ type reflectIndirectConstraint struct {
result nodeid // (indirect)
}
-func (c *reflectIndirectConstraint) ptr() nodeid { return c.v }
-func (c *reflectIndirectConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *reflectIndirectConstraint) ptr() nodeid { return c.v }
+func (c *reflectIndirectConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "reflectIndirect.result")
+}
func (c *reflectIndirectConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v]
c.result = mapping[c.result]
@@ -1080,8 +1099,10 @@ type reflectMakeChanConstraint struct {
result nodeid // (indirect)
}
-func (c *reflectMakeChanConstraint) ptr() nodeid { return c.typ }
-func (c *reflectMakeChanConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *reflectMakeChanConstraint) ptr() nodeid { return c.typ }
+func (c *reflectMakeChanConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "reflectMakeChan.result")
+}
func (c *reflectMakeChanConstraint) renumber(mapping []nodeid) {
c.typ = mapping[c.typ]
c.result = mapping[c.result]
@@ -1138,8 +1159,10 @@ type reflectMakeMapConstraint struct {
result nodeid // (indirect)
}
-func (c *reflectMakeMapConstraint) ptr() nodeid { return c.typ }
-func (c *reflectMakeMapConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *reflectMakeMapConstraint) ptr() nodeid { return c.typ }
+func (c *reflectMakeMapConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "reflectMakeMap.result")
+}
func (c *reflectMakeMapConstraint) renumber(mapping []nodeid) {
c.typ = mapping[c.typ]
c.result = mapping[c.result]
@@ -1195,8 +1218,10 @@ type reflectMakeSliceConstraint struct {
result nodeid // (indirect)
}
-func (c *reflectMakeSliceConstraint) ptr() nodeid { return c.typ }
-func (c *reflectMakeSliceConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *reflectMakeSliceConstraint) ptr() nodeid { return c.typ }
+func (c *reflectMakeSliceConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "reflectMakeSlice.result")
+}
func (c *reflectMakeSliceConstraint) renumber(mapping []nodeid) {
c.typ = mapping[c.typ]
c.result = mapping[c.result]
@@ -1252,8 +1277,10 @@ type reflectNewConstraint struct {
result nodeid // (indirect)
}
-func (c *reflectNewConstraint) ptr() nodeid { return c.typ }
-func (c *reflectNewConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *reflectNewConstraint) ptr() nodeid { return c.typ }
+func (c *reflectNewConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "reflectNew.result")
+}
func (c *reflectNewConstraint) renumber(mapping []nodeid) {
c.typ = mapping[c.typ]
c.result = mapping[c.result]
@@ -1314,8 +1341,10 @@ type reflectPtrToConstraint struct {
result nodeid // (indirect)
}
-func (c *reflectPtrToConstraint) ptr() nodeid { return c.t }
-func (c *reflectPtrToConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *reflectPtrToConstraint) ptr() nodeid { return c.t }
+func (c *reflectPtrToConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "reflectPtrTo.result")
+}
func (c *reflectPtrToConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t]
c.result = mapping[c.result]
@@ -1363,8 +1392,10 @@ type reflectSliceOfConstraint struct {
result nodeid // (indirect)
}
-func (c *reflectSliceOfConstraint) ptr() nodeid { return c.t }
-func (c *reflectSliceOfConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *reflectSliceOfConstraint) ptr() nodeid { return c.t }
+func (c *reflectSliceOfConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "reflectSliceOf.result")
+}
func (c *reflectSliceOfConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t]
c.result = mapping[c.result]
@@ -1410,8 +1441,10 @@ type reflectTypeOfConstraint struct {
result nodeid // (indirect)
}
-func (c *reflectTypeOfConstraint) ptr() nodeid { return c.i }
-func (c *reflectTypeOfConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *reflectTypeOfConstraint) ptr() nodeid { return c.i }
+func (c *reflectTypeOfConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "reflectTypeOf.result")
+}
func (c *reflectTypeOfConstraint) renumber(mapping []nodeid) {
c.i = mapping[c.i]
c.result = mapping[c.result]
@@ -1461,8 +1494,10 @@ type reflectZeroConstraint struct {
result nodeid // (indirect)
}
-func (c *reflectZeroConstraint) ptr() nodeid { return c.typ }
-func (c *reflectZeroConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *reflectZeroConstraint) ptr() nodeid { return c.typ }
+func (c *reflectZeroConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "reflectZero.result")
+}
func (c *reflectZeroConstraint) renumber(mapping []nodeid) {
c.typ = mapping[c.typ]
c.result = mapping[c.result]
@@ -1521,8 +1556,10 @@ type rtypeElemConstraint struct {
result nodeid // (indirect)
}
-func (c *rtypeElemConstraint) ptr() nodeid { return c.t }
-func (c *rtypeElemConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rtypeElemConstraint) ptr() nodeid { return c.t }
+func (c *rtypeElemConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rtypeElem.result")
+}
func (c *rtypeElemConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t]
c.result = mapping[c.result]
@@ -1572,8 +1609,10 @@ type rtypeFieldByNameConstraint struct {
result nodeid // (indirect)
}
-func (c *rtypeFieldByNameConstraint) ptr() nodeid { return c.t }
-func (c *rtypeFieldByNameConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rtypeFieldByNameConstraint) ptr() nodeid { return c.t }
+func (c *rtypeFieldByNameConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result+3), "rtypeFieldByName.result.Type")
+}
func (c *rtypeFieldByNameConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t]
c.result = mapping[c.result]
@@ -1661,8 +1700,10 @@ type rtypeInOutConstraint struct {
i int // -ve if not a constant
}
-func (c *rtypeInOutConstraint) ptr() nodeid { return c.t }
-func (c *rtypeInOutConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rtypeInOutConstraint) ptr() nodeid { return c.t }
+func (c *rtypeInOutConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rtypeInOut.result")
+}
func (c *rtypeInOutConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t]
c.result = mapping[c.result]
@@ -1736,8 +1777,10 @@ type rtypeKeyConstraint struct {
result nodeid // (indirect)
}
-func (c *rtypeKeyConstraint) ptr() nodeid { return c.t }
-func (c *rtypeKeyConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) }
+func (c *rtypeKeyConstraint) ptr() nodeid { return c.t }
+func (c *rtypeKeyConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result), "rtypeKey.result")
+}
func (c *rtypeKeyConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t]
c.result = mapping[c.result]
@@ -1784,8 +1827,9 @@ type rtypeMethodByNameConstraint struct {
}
func (c *rtypeMethodByNameConstraint) ptr() nodeid { return c.t }
-func (c *rtypeMethodByNameConstraint) indirect(nodes []nodeid) []nodeid {
- return append(nodes, c.result)
+func (c *rtypeMethodByNameConstraint) presolve(h *hvn) {
+ h.markIndirect(onodeid(c.result+3), "rtypeMethodByName.result.Type")
+ h.markIndirect(onodeid(c.result+4), "rtypeMethodByName.result.Func")
}
func (c *rtypeMethodByNameConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t]
diff --git a/go/pointer/solve.go b/go/pointer/solve.go
index e087e051e..ce5d07c6c 100644
--- a/go/pointer/solve.go
+++ b/go/pointer/solve.go
@@ -13,15 +13,22 @@ import (
"code.google.com/p/go.tools/go/types"
)
+type solverState struct {
+ complex []constraint // complex constraints attached to this node
+ copyTo nodeset // simple copy constraint edges
+ pts nodeset // points-to set of this node
+ prevPTS nodeset // pts(n) in previous iteration (for difference propagation)
+}
+
func (a *analysis) solve() {
- var delta nodeset
+ start("Solving")
+ if a.log != nil {
+ fmt.Fprintf(a.log, "\n\n==== Solving constraints\n\n")
+ }
// Solver main loop.
- for round := 1; ; round++ {
- if a.log != nil {
- fmt.Fprintf(a.log, "Solving, round %d\n", round)
- }
-
+ var delta nodeset
+ for {
// Add new constraints to the graph:
// static constraints from SSA on round 1,
// dynamic constraints from reflection thereafter.
@@ -39,22 +46,33 @@ func (a *analysis) solve() {
n := a.nodes[id]
// Difference propagation.
- delta.Difference(&n.pts.Sparse, &n.prevPts.Sparse)
+ delta.Difference(&n.solve.pts.Sparse, &n.solve.prevPTS.Sparse)
if delta.IsEmpty() {
continue
}
- n.prevPts.Copy(&n.pts.Sparse)
+ if a.log != nil {
+ fmt.Fprintf(a.log, "\t\tpts(n%d : %s) = %s + %s\n",
+ id, n.typ, &delta, &n.solve.prevPTS)
+ }
+ n.solve.prevPTS.Copy(&n.solve.pts.Sparse)
// Apply all resolution rules attached to n.
a.solveConstraints(n, &delta)
if a.log != nil {
- fmt.Fprintf(a.log, "\t\tpts(n%d) = %s\n", id, &n.pts)
+ fmt.Fprintf(a.log, "\t\tpts(n%d) = %s\n", id, &n.solve.pts)
}
}
- if !a.nodes[0].pts.IsEmpty() {
- panic(fmt.Sprintf("pts(0) is nonempty: %s", &a.nodes[0].pts))
+ if !a.nodes[0].solve.pts.IsEmpty() {
+ panic(fmt.Sprintf("pts(0) is nonempty: %s", &a.nodes[0].solve.pts))
+ }
+
+ // Release working state (but keep final PTS).
+ for _, n := range a.nodes {
+ n.solve.complex = nil
+ n.solve.copyTo.Clear()
+ n.solve.prevPTS.Clear()
}
if a.log != nil {
@@ -62,11 +80,12 @@ func (a *analysis) solve() {
// Dump solution.
for i, n := range a.nodes {
- if !n.pts.IsEmpty() {
- fmt.Fprintf(a.log, "pts(n%d) = %s : %s\n", i, &n.pts, n.typ)
+ if !n.solve.pts.IsEmpty() {
+ fmt.Fprintf(a.log, "pts(n%d) = %s : %s\n", i, &n.solve.pts, n.typ)
}
}
}
+ stop("Solving")
}
// processNewConstraints takes the new constraints from a.constraints
@@ -84,13 +103,13 @@ func (a *analysis) processNewConstraints() {
for _, c := range constraints {
if c, ok := c.(*addrConstraint); ok {
dst := a.nodes[c.dst]
- dst.pts.add(c.src)
+ dst.solve.pts.add(c.src)
// Populate the worklist with nodes that point to
// something initially (due to addrConstraints) and
// have other constraints attached.
// (A no-op in round 1.)
- if !dst.copyTo.IsEmpty() || len(dst.complex) > 0 {
+ if !dst.solve.copyTo.IsEmpty() || len(dst.solve.complex) > 0 {
a.addWork(c.dst)
}
}
@@ -107,16 +126,16 @@ func (a *analysis) processNewConstraints() {
case *copyConstraint:
// simple (copy) constraint
id = c.src
- a.nodes[id].copyTo.add(c.dst)
+ a.nodes[id].solve.copyTo.add(c.dst)
default:
// complex constraint
id = c.ptr()
- ptr := a.nodes[id]
- ptr.complex = append(ptr.complex, c)
+ solve := a.nodes[id].solve
+ solve.complex = append(solve.complex, c)
}
- if n := a.nodes[id]; !n.pts.IsEmpty() {
- if !n.prevPts.IsEmpty() {
+ if n := a.nodes[id]; !n.solve.pts.IsEmpty() {
+ if !n.solve.prevPTS.IsEmpty() {
stale.add(id)
}
a.addWork(id)
@@ -125,8 +144,8 @@ func (a *analysis) processNewConstraints() {
// Apply new constraints to pre-existing PTS labels.
var space [50]int
for _, id := range stale.AppendTo(space[:0]) {
- n := a.nodes[id]
- a.solveConstraints(n, &n.prevPts)
+ n := a.nodes[nodeid(id)]
+ a.solveConstraints(n, &n.solve.prevPTS)
}
}
@@ -140,7 +159,7 @@ func (a *analysis) solveConstraints(n *node, delta *nodeset) {
}
// Process complex constraints dependent on n.
- for _, c := range n.complex {
+ for _, c := range n.solve.complex {
if a.log != nil {
fmt.Fprintf(a.log, "\t\tconstraint %s\n", c)
}
@@ -149,10 +168,10 @@ func (a *analysis) solveConstraints(n *node, delta *nodeset) {
// Process copy constraints.
var copySeen nodeset
- for _, x := range n.copyTo.AppendTo(a.deltaSpace) {
+ for _, x := range n.solve.copyTo.AppendTo(a.deltaSpace) {
mid := nodeid(x)
if copySeen.add(mid) {
- if a.nodes[mid].pts.addAll(delta) {
+ if a.nodes[mid].solve.pts.addAll(delta) {
a.addWork(mid)
}
}
@@ -161,7 +180,11 @@ func (a *analysis) solveConstraints(n *node, delta *nodeset) {
// addLabel adds label to the points-to set of ptr and reports whether the set grew.
func (a *analysis) addLabel(ptr, label nodeid) bool {
- return a.nodes[ptr].pts.add(label)
+ b := a.nodes[ptr].solve.pts.add(label)
+ if b && a.log != nil {
+ fmt.Fprintf(a.log, "\t\tpts(n%d) += n%d\n", ptr, label)
+ }
+ return b
}
func (a *analysis) addWork(id nodeid) {
@@ -180,7 +203,7 @@ func (a *analysis) addWork(id nodeid) {
//
func (a *analysis) onlineCopy(dst, src nodeid) bool {
if dst != src {
- if nsrc := a.nodes[src]; nsrc.copyTo.add(dst) {
+ if nsrc := a.nodes[src]; nsrc.solve.copyTo.add(dst) {
if a.log != nil {
fmt.Fprintf(a.log, "\t\t\tdynamic copy n%d <- n%d\n", dst, src)
}
@@ -188,7 +211,7 @@ func (a *analysis) onlineCopy(dst, src nodeid) bool {
// are followed by addWork, possibly batched
// via a 'changed' flag; see if there's a
// noticeable penalty to calling addWork here.
- return a.nodes[dst].pts.addAll(&nsrc.pts)
+ return a.nodes[dst].solve.pts.addAll(&nsrc.solve.pts)
}
}
return false
@@ -239,7 +262,7 @@ func (c *offsetAddrConstraint) solve(a *analysis, delta *nodeset) {
dst := a.nodes[c.dst]
for _, x := range delta.AppendTo(a.deltaSpace) {
k := nodeid(x)
- if dst.pts.add(k + nodeid(c.offset)) {
+ if dst.solve.pts.add(k + nodeid(c.offset)) {
a.addWork(c.dst)
}
}
diff --git a/go/pointer/stdlib_test.go b/go/pointer/stdlib_test.go
new file mode 100644
index 000000000..c837bf8d5
--- /dev/null
+++ b/go/pointer/stdlib_test.go
@@ -0,0 +1,133 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pointer
+
+// This file runs the pointer analysis on all packages and tests beneath
+// $GOROOT. It provides a "smoke test" that the analysis doesn't crash
+// on a large input, and a benchmark for performance measurement.
+//
+// Because it is relatively slow, the --stdlib flag must be enabled for
+// this test to run:
+// % go test -v code.google.com/p/go.tools/go/pointer --stdlib
+
+import (
+ "flag"
+ "go/token"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strings"
+ "testing"
+ "time"
+
+ "code.google.com/p/go.tools/go/loader"
+ "code.google.com/p/go.tools/go/ssa"
+ "code.google.com/p/go.tools/go/ssa/ssautil"
+)
+
+var runStdlibTest = flag.Bool("stdlib", false, "Run the (slow) stdlib test")
+
+// TODO(adonovan): move this to go/buildutil package since we have four copies:
+// go/{loader,pointer,ssa}/stdlib_test.go and godoc/analysis/analysis.go.
+func allPackages() []string {
+ var pkgs []string
+ root := filepath.Join(runtime.GOROOT(), "src/pkg") + string(os.PathSeparator)
+ filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
+ // Prune the search if we encounter any of these names:
+ switch filepath.Base(path) {
+ case "testdata", ".hg":
+ return filepath.SkipDir
+ }
+ if info.IsDir() {
+ pkg := filepath.ToSlash(strings.TrimPrefix(path, root))
+ switch pkg {
+ case "builtin", "pkg":
+ return filepath.SkipDir // skip these subtrees
+ case "":
+ return nil // ignore root of tree
+ }
+ pkgs = append(pkgs, pkg)
+ }
+
+ return nil
+ })
+ return pkgs
+}
+
+func TestStdlib(t *testing.T) {
+ if !*runStdlibTest {
+ t.Skip("skipping (slow) stdlib test (use --stdlib)")
+ }
+
+ // Load, parse and type-check the program.
+ var conf loader.Config
+ conf.SourceImports = true
+ if _, err := conf.FromArgs(allPackages(), true); err != nil {
+ t.Errorf("FromArgs failed: %v", err)
+ return
+ }
+
+ iprog, err := conf.Load()
+ if err != nil {
+ t.Fatalf("Load failed: %v", err)
+ }
+
+ // Create SSA packages.
+ prog := ssa.Create(iprog, 0)
+ prog.BuildAll()
+
+ numPkgs := len(prog.AllPackages())
+ if want := 140; numPkgs < want {
+ t.Errorf("Loaded only %d packages, want at least %d", numPkgs, want)
+ }
+
+ // Determine the set of packages/tests to analyze.
+ var testPkgs []*ssa.Package
+ for _, info := range iprog.InitialPackages() {
+ testPkgs = append(testPkgs, prog.Package(info.Pkg))
+ }
+ testmain := prog.CreateTestMainPackage(testPkgs...)
+ if testmain == nil {
+ t.Fatal("analysis scope has tests")
+ }
+
+ // Run the analysis.
+ config := &Config{
+ Reflection: false, // TODO(adonovan): fix remaining bug in rVCallConstraint, then enable.
+ BuildCallGraph: true,
+ Mains: []*ssa.Package{testmain},
+ }
+ // TODO(adonovan): add some query values (affects track bits).
+
+ t0 := time.Now()
+
+ result, err := Analyze(config)
+ if err != nil {
+ t.Fatal(err) // internal error in pointer analysis
+ }
+ _ = result // TODO(adonovan): measure something
+
+ t1 := time.Now()
+
+ // Dump some statistics.
+ allFuncs := ssautil.AllFunctions(prog)
+ var numInstrs int
+ for fn := range allFuncs {
+ for _, b := range fn.Blocks {
+ numInstrs += len(b.Instrs)
+ }
+ }
+
+ // determine line count
+ var lineCount int
+ prog.Fset.Iterate(func(f *token.File) bool {
+ lineCount += f.LineCount()
+ return true
+ })
+
+ t.Log("#Source lines: ", lineCount)
+ t.Log("#Instructions: ", numInstrs)
+ t.Log("Pointer analysis: ", t1.Sub(t0))
+}
diff --git a/go/pointer/testdata/finalizer.go b/go/pointer/testdata/finalizer.go
index d80f85acb..97f25c904 100644
--- a/go/pointer/testdata/finalizer.go
+++ b/go/pointer/testdata/finalizer.go
@@ -68,6 +68,13 @@ func runtimeSetFinalizerIndirect() {
setFinalizer(x, final4)
}
+// Exercise the elimination of SetFinalizer
+// constraints with non-pointer operands.
+func runtimeSetFinalizerNonpointer() {
+ runtime.SetFinalizer(nil, (*T).finalize) // x is a non-pointer
+ runtime.SetFinalizer((*T).finalize, nil) // f is a non-pointer
+}
+
// @calls main.runtimeSetFinalizerIndirect -> runtime.SetFinalizer
// @calls runtime.SetFinalizer -> main.final4
@@ -76,6 +83,7 @@ func main() {
runtimeSetFinalizer2()
runtimeSetFinalizer3()
runtimeSetFinalizerIndirect()
+ runtimeSetFinalizerNonpointer()
}
var unknown bool // defeat dead-code elimination
diff --git a/go/pointer/util.go b/go/pointer/util.go
index b4d806467..e3ac4fce6 100644
--- a/go/pointer/util.go
+++ b/go/pointer/util.go
@@ -7,6 +7,11 @@ package pointer
import (
"bytes"
"fmt"
+ "log"
+ "os"
+ "os/exec"
+ "runtime"
+ "time"
"code.google.com/p/go.tools/container/intsets"
"code.google.com/p/go.tools/go/types"
@@ -220,7 +225,7 @@ func (a *analysis) shouldTrack(T types.Type) bool {
}
a.trackTypes[T] = track
if !track && a.log != nil {
- fmt.Fprintf(a.log, "Type not tracked: %s\n", T)
+ fmt.Fprintf(a.log, "\ttype not tracked: %s\n", T)
}
}
return track
@@ -280,3 +285,34 @@ func (ns *nodeset) add(n nodeid) bool {
func (x *nodeset) addAll(y *nodeset) bool {
return x.UnionWith(&y.Sparse)
}
+
+// Profiling & debugging -------------------------------------------------------
+
+var timers = make(map[string]time.Time)
+
+func start(name string) {
+ if debugTimers {
+ timers[name] = time.Now()
+ log.Printf("%s...\n", name)
+ }
+}
+
+func stop(name string) {
+ if debugTimers {
+ log.Printf("%s took %s\n", name, time.Since(timers[name]))
+ }
+}
+
+// diff runs the command "diff a b" and reports its success.
+func diff(a, b string) bool {
+ var cmd *exec.Cmd
+ switch runtime.GOOS {
+ case "plan9":
+ cmd = exec.Command("/bin/diff", "-c", a, b)
+ default:
+ cmd = exec.Command("/usr/bin/diff", "-u", a, b)
+ }
+ cmd.Stdout = os.Stderr
+ cmd.Stderr = os.Stderr
+ return cmd.Run() == nil
+}