aboutsummaryrefslogtreecommitdiff
path: root/cmp
diff options
context:
space:
mode:
authorJoe Tsai <joetsai@digital-static.net>2017-07-20 15:19:44 -0700
committerGitHub <noreply@github.com>2017-07-20 15:19:44 -0700
commitf9054c6a605e1bb9318d701929105b0fd8a7cac8 (patch)
tree824f18b6cd3e3ae6a127757fef23a6ec0a5e4797 /cmp
parent2b1da0b74500c33a0cd25f26b79594b46c816b6d (diff)
downloadgo-cmp-f9054c6a605e1bb9318d701929105b0fd8a7cac8.tar.gz
Add diffing abilities to reporting (#9)
This adds an internal package with the following API: func Difference(nx, ny int, f EqualFunc) (eq bool, es EditScript) type EditScript []EditType type EditType int8 const Identity EditType = iota ... type EqualFunc func(ix int, iy int) Result type Result struct{ ... } The cmp.SliceIndex type has a new method SplitKeys to indicate that elements are being compared in x and y are in different indexes. The debug visualizer can be run: go test -tags=debug -v github.com/google/go-cmp/cmp/internal/diff Reviewed-By: kevlar@google.com
Diffstat (limited to 'cmp')
-rw-r--r--cmp/compare.go84
-rw-r--r--cmp/compare_test.go79
-rw-r--r--cmp/internal/diff/debug_disable.go17
-rw-r--r--cmp/internal/diff/debug_enable.go122
-rw-r--r--cmp/internal/diff/diff.go373
-rw-r--r--cmp/internal/diff/diff_test.go467
-rw-r--r--cmp/path.go50
-rw-r--r--cmp/reporter.go27
8 files changed, 1118 insertions, 101 deletions
diff --git a/cmp/compare.go b/cmp/compare.go
index 694a933..2294579 100644
--- a/cmp/compare.go
+++ b/cmp/compare.go
@@ -29,6 +29,8 @@ package cmp
import (
"fmt"
"reflect"
+
+ "github.com/google/go-cmp/cmp/internal/diff"
)
// BUG: Maps with keys containing NaN values cannot be properly compared due to
@@ -37,6 +39,8 @@ import (
//
// See https://golang.org/issue/11104 for more details.
+var nothing = reflect.Value{}
+
// Equal reports whether x and y are equal by recursively applying the
// following rules in the given order to x and y and all of their sub-values:
//
@@ -79,7 +83,7 @@ import (
func Equal(x, y interface{}, opts ...Option) bool {
s := newState(opts)
s.compareAny(reflect.ValueOf(x), reflect.ValueOf(y))
- return s.eq
+ return s.result.Equal()
}
// Diff returns a human-readable report of the differences between two values.
@@ -101,8 +105,8 @@ func Diff(x, y interface{}, opts ...Option) string {
}
type state struct {
- eq bool // Current result of comparison
- curPath Path // The current path in the value tree
+ result diff.Result // The current result of comparison
+ curPath Path // The current path in the value tree
// dsCheck tracks the state needed to periodically perform checks that
// user provided func(T, T) bool functions are symmetric and deterministic.
@@ -123,7 +127,7 @@ type state struct {
}
func newState(opts []Option) *state {
- s := &state{eq: true}
+ s := new(state)
for _, opt := range opts {
s.processOption(opt)
}
@@ -184,6 +188,7 @@ func (s *state) compareAny(vx, vy reflect.Value) {
t := vx.Type()
if len(s.curPath) == 0 {
s.curPath.push(&pathStep{typ: t})
+ defer s.curPath.pop()
}
// Rule 1: Check whether an option applies on this node in the value tree.
@@ -378,29 +383,52 @@ func (s *state) callFunc(f, x, y reflect.Value) bool {
}
func (s *state) compareArray(vx, vy reflect.Value, t reflect.Type) {
- step := &sliceIndex{pathStep{t.Elem()}, 0}
+ step := &sliceIndex{pathStep{t.Elem()}, 0, 0}
s.curPath.push(step)
- defer s.curPath.pop()
- // Regardless of the lengths, we always try to compare the elements.
- // If one slice is longer, we will report the elements of the longer
- // slice as different (relative to an invalid reflect.Value).
- nmin := vx.Len()
- if nmin > vy.Len() {
- nmin = vy.Len()
- }
- for i := 0; i < nmin; i++ {
- step.key = i
- s.compareAny(vx.Index(i), vy.Index(i))
- }
- for i := nmin; i < vx.Len(); i++ {
- step.key = i
- s.report(false, vx.Index(i), reflect.Value{})
+ // Compute an edit-script for slices vx and vy.
+ oldResult, oldReporter := s.result, s.reporter
+ s.reporter = nil // Remove reporter to avoid spurious printouts
+ eq, es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
+ step.xkey, step.ykey = ix, iy
+ s.result = diff.Result{} // Reset result
+ s.compareAny(vx.Index(ix), vy.Index(iy))
+ return s.result
+ })
+ s.result, s.reporter = oldResult, oldReporter
+
+ // Equal or no edit-script, so report entire slices as is.
+ if eq || es == nil {
+ s.curPath.pop() // Pop first since we are reporting the whole slice
+ s.report(eq, vx, vy)
+ return
}
- for i := nmin; i < vy.Len(); i++ {
- step.key = i
- s.report(false, reflect.Value{}, vy.Index(i))
+
+ // Replay the edit-script.
+ var ix, iy int
+ for _, e := range es {
+ switch e {
+ case diff.UniqueX:
+ step.xkey, step.ykey = ix, -1
+ s.report(false, vx.Index(ix), nothing)
+ ix++
+ case diff.UniqueY:
+ step.xkey, step.ykey = -1, iy
+ s.report(false, nothing, vy.Index(iy))
+ iy++
+ default:
+ step.xkey, step.ykey = ix, iy
+ if e == diff.Identity {
+ s.report(true, vx.Index(ix), vy.Index(iy))
+ } else {
+ s.compareAny(vx.Index(ix), vy.Index(iy))
+ }
+ ix++
+ iy++
+ }
}
+ s.curPath.pop()
+ return
}
func (s *state) compareMap(vx, vy reflect.Value, t reflect.Type) {
@@ -422,9 +450,9 @@ func (s *state) compareMap(vx, vy reflect.Value, t reflect.Type) {
case vvx.IsValid() && vvy.IsValid():
s.compareAny(vvx, vvy)
case vvx.IsValid() && !vvy.IsValid():
- s.report(false, vvx, reflect.Value{})
+ s.report(false, vvx, nothing)
case !vvx.IsValid() && vvy.IsValid():
- s.report(false, reflect.Value{}, vvy)
+ s.report(false, nothing, vvy)
default:
// It is possible for both vvx and vvy to be invalid if the
// key contained a NaN value in it. There is no way in
@@ -470,7 +498,11 @@ func (s *state) compareStruct(vx, vy reflect.Value, t reflect.Type) {
// report records the result of a single comparison.
// It also calls Report if any reporter is registered.
func (s *state) report(eq bool, vx, vy reflect.Value) {
- s.eq = s.eq && eq
+ if eq {
+ s.result.NSame++
+ } else {
+ s.result.NDiff++
+ }
if s.reporter != nil {
s.reporter.Report(vx, vy, eq, s.curPath)
}
diff --git a/cmp/compare_test.go b/cmp/compare_test.go
index 113e50e..11c30c2 100644
--- a/cmp/compare_test.go
+++ b/cmp/compare_test.go
@@ -6,6 +6,7 @@ package cmp_test
import (
"bytes"
+ "crypto/md5"
"fmt"
"io"
"math/rand"
@@ -101,8 +102,8 @@ func TestDiff(t *testing.T) {
if gotPanic != "" {
t.Fatalf("unexpected panic message: %s", gotPanic)
}
- if strings.TrimSpace(gotDiff) != strings.TrimSpace(tt.wantDiff) {
- t.Fatalf("difference message:\ngot:\n%s\nwant:\n%s", gotDiff, tt.wantDiff)
+ if got, want := strings.TrimSpace(gotDiff), strings.TrimSpace(tt.wantDiff); got != want {
+ t.Fatalf("difference message:\ngot:\n%s\n\nwant:\n%s", got, want)
}
} else {
if !strings.Contains(gotPanic, tt.wantPanic) {
@@ -117,10 +118,9 @@ func comparerTests() []test {
const label = "Comparer"
return []test{{
- label: label,
- x: 1,
- y: 1,
- wantDiff: "",
+ label: label,
+ x: 1,
+ y: 1,
}, {
label: label,
x: 1,
@@ -164,10 +164,9 @@ func comparerTests() []test {
opts: []cmp.Option{struct{ cmp.Option }{}},
wantPanic: "unknown option",
}, {
- label: label,
- x: struct{ A, B, C int }{1, 2, 3},
- y: struct{ A, B, C int }{1, 2, 3},
- wantDiff: "",
+ label: label,
+ x: struct{ A, B, C int }{1, 2, 3},
+ y: struct{ A, B, C int }{1, 2, 3},
}, {
label: label,
x: struct{ A, B, C int }{1, 2, 3},
@@ -244,17 +243,19 @@ func comparerTests() []test {
y: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")},
wantPanic: "cannot handle unexported field",
}, {
- label: label,
- x: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")},
- y: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")},
- opts: []cmp.Option{cmp.Comparer(equalRegexp)},
- wantDiff: "",
+ label: label,
+ x: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")},
+ y: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")},
+ opts: []cmp.Option{cmp.Comparer(equalRegexp)},
}, {
- label: label,
- x: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")},
- y: []*regexp.Regexp{nil, regexp.MustCompile("a*b*d*")},
- opts: []cmp.Option{cmp.Comparer(equalRegexp)},
- wantDiff: "{[]*regexp.Regexp}[1]:\n\t-: \"a*b*c*\"\n\t+: \"a*b*d*\"\n",
+ label: label,
+ x: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")},
+ y: []*regexp.Regexp{nil, regexp.MustCompile("a*b*d*")},
+ opts: []cmp.Option{cmp.Comparer(equalRegexp)},
+ wantDiff: `
+{[]*regexp.Regexp}[1]:
+ -: "a*b*c*"
+ +: "a*b*d*"`,
}, {
label: label,
x: func() ***int {
@@ -307,6 +308,14 @@ root:
+: "hello2"`,
}, {
label: label,
+ x: md5.Sum([]byte{'a'}),
+ y: md5.Sum([]byte{'b'}),
+ wantDiff: `
+{[16]uint8}:
+ -: [16]uint8{0x0c, 0xc1, 0x75, 0xb9, 0xc0, 0xf1, 0xb6, 0xa8, 0x31, 0xc3, 0x99, 0xe2, 0x69, 0x77, 0x26, 0x61}
+ +: [16]uint8{0x92, 0xeb, 0x5f, 0xfe, 0xe6, 0xae, 0x2f, 0xec, 0x3a, 0xd7, 0x1c, 0x77, 0x75, 0x31, 0x57, 0x8f}`,
+ }, {
+ label: label,
x: make([]int, 1000),
y: make([]int, 1000),
opts: []cmp.Option{
@@ -1392,8 +1401,7 @@ func project1Tests() []test {
y: ts.Eagle{Slaps: []ts.Slap{{
Args: &pb.MetaData{Stringer: pb.Stringer{"metadata"}},
}}},
- opts: []cmp.Option{cmp.Comparer(pb.Equal)},
- wantDiff: "",
+ opts: []cmp.Option{cmp.Comparer(pb.Equal)},
}, {
label: label,
x: ts.Eagle{Slaps: []ts.Slap{{}, {}, {}, {}, {
@@ -1435,10 +1443,10 @@ func project1Tests() []test {
{teststructs.Eagle}.Slaps[0].Immutable.MildSlap:
-: false
+: true
-{teststructs.Eagle}.Slaps[0].Immutable.LoveRadius.Summer.Summary.Devices[1]:
+{teststructs.Eagle}.Slaps[0].Immutable.LoveRadius.Summer.Summary.Devices[1->?]:
-: "bar"
+: <non-existent>
-{teststructs.Eagle}.Slaps[0].Immutable.LoveRadius.Summer.Summary.Devices[2]:
+{teststructs.Eagle}.Slaps[0].Immutable.LoveRadius.Summer.Summary.Devices[2->?]:
-: "baz"
+: <non-existent>`,
}}
@@ -1524,14 +1532,11 @@ func project2Tests() []test {
}(),
opts: []cmp.Option{cmp.Comparer(pb.Equal), equalDish},
wantDiff: `
-{teststructs.GermBatch}.DirtyGerms[18][0]:
+{teststructs.GermBatch}.DirtyGerms[18][0->?]:
-: "germ2"
- +: "germ3"
-{teststructs.GermBatch}.DirtyGerms[18][1]:
- -: "germ3"
- +: "germ4"
-{teststructs.GermBatch}.DirtyGerms[18][2]:
- -: "germ4"
+ +: <non-existent>
+{teststructs.GermBatch}.DirtyGerms[18][?->2]:
+ -: <non-existent>
+: "germ2"`,
}, {
label: label,
@@ -1562,7 +1567,7 @@ func project2Tests() []test {
{teststructs.GermBatch}.DirtyGerms[17]:
-: <non-existent>
+: []*testprotos.Germ{"germ1"}
-{teststructs.GermBatch}.DirtyGerms[18][2]:
+{teststructs.GermBatch}.DirtyGerms[18][2->?]:
-: "germ4"
+: <non-existent>
{teststructs.GermBatch}.DishMap[1]:
@@ -1736,14 +1741,8 @@ func project4Tests() []test {
}(),
opts: []cmp.Option{allowVisibility, transformProtos, cmp.Comparer(pb.Equal)},
wantDiff: `
-{teststructs.Cartel}.Headquarter.subDivisions[0]:
+{teststructs.Cartel}.Headquarter.subDivisions[0->?]:
-: "alpha"
- +: "bravo"
-{teststructs.Cartel}.Headquarter.subDivisions[1]:
- -: "bravo"
- +: "charlie"
-{teststructs.Cartel}.Headquarter.subDivisions[2]:
- -: "charlie"
+: <non-existent>
{teststructs.Cartel}.Headquarter.publicMessage[2]:
-: 0x03
@@ -1754,7 +1753,7 @@ func project4Tests() []test {
{teststructs.Cartel}.poisons[0].poisonType:
-: 1
+: 5
-{teststructs.Cartel}.poisons[1]:
+{teststructs.Cartel}.poisons[1->?]:
-: &teststructs.Poison{poisonType: 2, manufactuer: "acme2"}
+: <non-existent>`,
}}
diff --git a/cmp/internal/diff/debug_disable.go b/cmp/internal/diff/debug_disable.go
new file mode 100644
index 0000000..42afa49
--- /dev/null
+++ b/cmp/internal/diff/debug_disable.go
@@ -0,0 +1,17 @@
+// Copyright 2017, The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE.md file.
+
+// +build !debug
+
+package diff
+
+var debug debugger
+
+type debugger struct{}
+
+func (debugger) Begin(_, _ int, f EqualFunc, _, _ *EditScript) EqualFunc {
+ return f
+}
+func (debugger) Update() {}
+func (debugger) Finish() {}
diff --git a/cmp/internal/diff/debug_enable.go b/cmp/internal/diff/debug_enable.go
new file mode 100644
index 0000000..ba46c62
--- /dev/null
+++ b/cmp/internal/diff/debug_enable.go
@@ -0,0 +1,122 @@
+// Copyright 2017, The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE.md file.
+
+// +build debug
+
+package diff
+
+import (
+ "fmt"
+ "strings"
+ "sync"
+ "time"
+)
+
+// The algorithm can be seen running in real-time by enabling debugging:
+// go test -tags=debug -v
+//
+// Example output:
+// === RUN TestDifference/#34
+// ┌───────────────────────────────┐
+// │ \ · · · · · · · · · · · · · · │
+// │ · # · · · · · · · · · · · · · │
+// │ · \ · · · · · · · · · · · · · │
+// │ · · \ · · · · · · · · · · · · │
+// │ · · · X # · · · · · · · · · · │
+// │ · · · # \ · · · · · · · · · · │
+// │ · · · · · # # · · · · · · · · │
+// │ · · · · · # \ · · · · · · · · │
+// │ · · · · · · · \ · · · · · · · │
+// │ · · · · · · · · \ · · · · · · │
+// │ · · · · · · · · · \ · · · · · │
+// │ · · · · · · · · · · \ · · # · │
+// │ · · · · · · · · · · · \ # # · │
+// │ · · · · · · · · · · · # # # · │
+// │ · · · · · · · · · · # # # # · │
+// │ · · · · · · · · · # # # # # · │
+// │ · · · · · · · · · · · · · · \ │
+// └───────────────────────────────┘
+// [.Y..M.XY......YXYXY.|]
+//
+// The grid represents the edit-graph where the horizontal axis represents
+// list X and the vertical axis represents list Y. The start of the two lists
+// is the top-left, while the ends are the bottom-right. The '·' represents
+// an unexplored node in the graph. The '\' indicates that the two symbols
+// from list X and Y are equal. The 'X' indicates that two symbols are similar
+// (but not exactly equal) to each other. The '#' indicates that the two symbols
+// are different (and not similar). The algorithm traverses this graph trying to
+// make the paths starting in the top-left and the bottom-right connect.
+//
+// The series of '.', 'X', 'Y', and 'M' characters at the bottom represents
+// the currently established path from the forward and reverse searches,
+// seperated by a '|' character.
+
+const (
+ updateDelay = 100 * time.Millisecond
+ finishDelay = 500 * time.Millisecond
+ ansiTerminal = true // ANSI escape codes used to move terminal cursor
+)
+
+var debug debugger
+
+type debugger struct {
+ sync.Mutex
+ p1, p2 EditScript
+ fwdPath, revPath *EditScript
+ grid []byte
+ lines int
+}
+
+func (dbg *debugger) Begin(nx, ny int, f EqualFunc, p1, p2 *EditScript) EqualFunc {
+ dbg.Lock()
+ dbg.fwdPath, dbg.revPath = p1, p2
+ top := "┌─" + strings.Repeat("──", nx) + "┐\n"
+ row := "│ " + strings.Repeat("· ", nx) + "│\n"
+ btm := "└─" + strings.Repeat("──", nx) + "┘\n"
+ dbg.grid = []byte(top + strings.Repeat(row, ny) + btm)
+ dbg.lines = strings.Count(dbg.String(), "\n")
+ fmt.Print(dbg)
+
+ // Wrap the EqualFunc so that we can intercept each result.
+ return func(ix, iy int) (r Result) {
+ cell := dbg.grid[len(top)+iy*len(row):][len("│ ")+len("· ")*ix:][:len("·")]
+ for i := range cell {
+ cell[i] = 0 // Zero out the multiple bytes of UTF-8 middle-dot
+ }
+ switch r = f(ix, iy); {
+ case r.Equal():
+ cell[0] = '\\'
+ case r.Similar():
+ cell[0] = 'X'
+ default:
+ cell[0] = '#'
+ }
+ return
+ }
+}
+
+func (dbg *debugger) Update() {
+ dbg.print(updateDelay)
+}
+
+func (dbg *debugger) Finish() {
+ dbg.print(finishDelay)
+ dbg.Unlock()
+}
+
+func (dbg *debugger) String() string {
+ dbg.p1, dbg.p2 = *dbg.fwdPath, dbg.p2[:0]
+ for i := len(*dbg.revPath) - 1; i >= 0; i-- {
+ dbg.p2 = append(dbg.p2, (*dbg.revPath)[i])
+ }
+ return fmt.Sprintf("%s[%v|%v]\n\n", dbg.grid, dbg.p1, dbg.p2)
+}
+
+func (dbg *debugger) print(d time.Duration) {
+ if ansiTerminal {
+ fmt.Printf("\x1b[%dA", dbg.lines) // Reset terminal cursor
+ }
+ fmt.Print(dbg)
+ time.Sleep(d)
+}
diff --git a/cmp/internal/diff/diff.go b/cmp/internal/diff/diff.go
new file mode 100644
index 0000000..baa41fd
--- /dev/null
+++ b/cmp/internal/diff/diff.go
@@ -0,0 +1,373 @@
+// Copyright 2017, The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE.md file.
+
+// Package diff implements an algorithm for producing edit-scripts.
+// The edit-script is a sequence of operations needed to transform one list
+// of symbols into another (or vice-versa). The edits allowed are insertions,
+// deletions, and modifications. The summation of all edits is called the
+// Levenshtein distance as this problem is well-known in computer science.
+//
+// This package prioritizes performance over accuracy. That is, the run time
+// is more important than obtaining a minimal Levenshtein distance.
+package diff
+
+// EditType represents a single operation within an edit-script.
+type EditType uint8
+
+const (
+ // Identity indicates that a symbol pair is identical in both list X and Y.
+ Identity EditType = iota
+ // UniqueX indicates that a symbol only exists in X and not Y.
+ UniqueX
+ // UniqueY indicates that a symbol only exists in Y and not X.
+ UniqueY
+ // Modified indicates that a symbol pair is a modification of each other.
+ Modified
+)
+
+// EditScript represents the series of differences between two lists.
+type EditScript []EditType
+
+// String returns a human-readable string representing the edit-script where
+// Identity, UniqueX, UniqueY, and Modified are represented by the
+// '.', 'X', 'Y', and 'M' characters, respectively.
+func (es EditScript) String() string {
+ b := make([]byte, len(es))
+ for i, e := range es {
+ switch e {
+ case Identity:
+ b[i] = '.'
+ case UniqueX:
+ b[i] = 'X'
+ case UniqueY:
+ b[i] = 'Y'
+ case Modified:
+ b[i] = 'M'
+ default:
+ panic("invalid edit-type")
+ }
+ }
+ return string(b)
+}
+
+// stats returns a histogram of the number of each type of edit operation.
+func (es EditScript) stats() (s struct{ NI, NX, NY, NM int }) {
+ for _, e := range es {
+ switch e {
+ case Identity:
+ s.NI++
+ case UniqueX:
+ s.NX++
+ case UniqueY:
+ s.NY++
+ case Modified:
+ s.NM++
+ default:
+ panic("invalid edit-type")
+ }
+ }
+ return
+}
+
+// Dist is the Levenshtein distance and is guaranteed to be 0 if and only if
+// lists X and Y are equal.
+func (es EditScript) Dist() int { return len(es) - es.stats().NI }
+
+// LenX is the length of the X list.
+func (es EditScript) LenX() int { return len(es) - es.stats().NY }
+
+// LenY is the length of the Y list.
+func (es EditScript) LenY() int { return len(es) - es.stats().NX }
+
+// EqualFunc reports whether the symbols at indexes ix and iy are equal.
+// When called by Difference, the index is guaranteed to be within nx and ny.
+type EqualFunc func(ix int, iy int) Result
+
+// Result is the result of comparison.
+// NSame is the number of sub-elements that are equal.
+// NDiff is the number of sub-elements that are not equal.
+type Result struct{ NSame, NDiff int }
+
+// Equal indicates whether the symbols are equal. Two symbols are equal
+// if and only if NDiff == 0. If Equal, then they are also Similar.
+func (r Result) Equal() bool { return r.NDiff == 0 }
+
+// Similar indicates whether two symbols are similar and may be represented
+// by using the Modified type. As a special case, we consider binary comparisons
+// (i.e., those that return Result{1, 0} or Result{0, 1}) to be similar.
+//
+// The exact ratio of NSame to NDiff to determine similarity may change.
+func (r Result) Similar() bool {
+ // Use NSame+1 to offset NSame so that binary comparisons are similar.
+ return r.NSame+1 >= r.NDiff
+}
+
+// Difference reports whether two lists of lengths nx and ny are equal
+// given the definition of equality provided as f.
+//
+// This function may return a edit-script, which is a sequence of operations
+// needed to convert one list into the other. If non-nil, the following
+// invariants for the edit-script are maintained:
+// • eq == (es.Dist()==0)
+// • nx == es.LenX()
+// • ny == es.LenY()
+//
+// This algorithm is not guaranteed to be an optimal solution (i.e., one that
+// produces an edit-script with a minimal Levenshtein distance). This algorithm
+// favors performance over optimality. The exact output is not guaranteed to
+// be stable and may change over time.
+func Difference(nx, ny int, f EqualFunc) (eq bool, es EditScript) {
+ es = searchGraph(nx, ny, f)
+ st := es.stats()
+ eq = len(es) == st.NI
+ if !eq && st.NI < (nx+ny)/4 {
+ return eq, nil // Edit-script more distracting than helpful
+ }
+ return eq, es
+}
+
+func searchGraph(nx, ny int, f EqualFunc) EditScript {
+ // This algorithm is based on traversing what is known as an "edit-graph".
+ // See Figure 1 from "An O(ND) Difference Algorithm and Its Variations"
+ // by Eugene W. Myers. Since D can be as large as N itself, this is
+ // effectively O(N^2). Unlike the algorithm from that paper, we are not
+ // interested in the optimal path, but at least some "decent" path.
+ //
+ // For example, let X and Y be lists of symbols:
+ // X = [A B C A B B A]
+ // Y = [C B A B A C]
+ //
+ // The edit-graph can be drawn as the following:
+ // A B C A B B A
+ // ┌─────────────┐
+ // C │_|_|\|_|_|_|_│ 0
+ // B │_|\|_|_|\|\|_│ 1
+ // A │\|_|_|\|_|_|\│ 2
+ // B │_|\|_|_|\|\|_│ 3
+ // A │\|_|_|\|_|_|\│ 4
+ // C │ | |\| | | | │ 5
+ // └─────────────┘ 6
+ // 0 1 2 3 4 5 6 7
+ //
+ // List X is written along the horizontal axis, while list Y is written
+ // along the vertical axis. At any point on this grid, if the symbol in
+ // list X matches the corresponding symbol in list Y, then a '\' is drawn.
+ // The goal of any minimal edit-script algorithm is to find a path from the
+ // top-left corner to the bottom-right corner, while traveling through the
+ // fewest horizontal or vertical edges.
+ // A horizontal edge is equivalent to inserting a symbol from list X.
+ // A vertical edge is equivalent to inserting a symbol from list Y.
+ // A diagonal edge is equivalent to a matching symbol between both X and Y.
+
+ // Invariants:
+ // • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx
+ // • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny
+ //
+ // In general:
+ // • fwdFrontier.X < revFrontier.X
+ // • fwdFrontier.Y < revFrontier.Y
+ // Unless, it is time for the algorithm to terminate.
+ fwdPath := path{+1, point{0, 0}, make(EditScript, 0, (nx+ny)/2)}
+ revPath := path{-1, point{nx, ny}, make(EditScript, 0)}
+ fwdFrontier := fwdPath.point // Forward search frontier
+ revFrontier := revPath.point // Reverse search frontier
+
+ // Search budget bounds the cost of searching for better paths.
+ // The longest sequence of non-matching symbols that can be tolerated is
+ // approximately the square-root of the search budget.
+ searchBudget := 4 * (nx + ny) // O(n)
+
+ // The algorithm below is a greedy, meet-in-the-middle algorithm for
+ // computing sub-optimal edit-scripts between two lists.
+ //
+ // The algorithm is approximately as follows:
+ // • Searching for differences switches back-and-forth between
+ // a search that starts at the beginning (the top-left corner), and
+ // a search that starts at the end (the bottom-right corner). The goal of
+ // the search is connect with the search from the opposite corner.
+ // • As we search, we build a path in a greedy manner, where the first
+ // match seen is added to the path (this is sub-optimal, but provides a
+ // decent result in practice). When matches are found, we try the next pair
+ // of symbols in the lists and follow all matches as far as possible.
+ // • When searching for matches, we search along a diagonal going through
+ // through the "frontier" point. If no matches are found, we advance the
+ // frontier towards the opposite corner.
+ // • This algorithm terminates when either the X coordinates or the
+ // Y coordinates of the forward and reverse frontier points ever intersect.
+ //
+ // This algorithm is correct even if searching only in the forward direction
+ // or in the reverse direction. We do both because it is commonly observed
+ // that two lists commonly differ because elements were added to the front
+ // or end of the other list.
+ //
+ // Running the tests with the "debug" build tag prints a visualization of
+ // the algorithm running in real-time. This is educational for understanding
+ // how the algorithm works. See debug_enable.go.
+ f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)
+ for {
+ // Forward search from the beginning.
+ if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
+ break
+ }
+ for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
+ // Search in a diagonal pattern for a match.
+ z := zigzag(i)
+ p := point{fwdFrontier.X + z, fwdFrontier.Y - z}
+ switch {
+ case p.X >= revPath.X || p.Y < fwdPath.Y:
+ stop1 = true // Hit top-right corner
+ case p.Y >= revPath.Y || p.X < fwdPath.X:
+ stop2 = true // Hit bottom-left corner
+ case f(p.X, p.Y).Equal():
+ // Match found, so connect the path to this point.
+ fwdPath.connect(p, f)
+ fwdPath.append(Identity)
+ // Follow sequence of matches as far as possible.
+ for fwdPath.X < revPath.X && fwdPath.Y < revPath.Y {
+ if !f(fwdPath.X, fwdPath.Y).Equal() {
+ break
+ }
+ fwdPath.append(Identity)
+ }
+ fwdFrontier = fwdPath.point
+ stop1, stop2 = true, true
+ default:
+ searchBudget-- // Match not found
+ }
+ debug.Update()
+ }
+ // Advance the frontier towards reverse point.
+ if revPath.X-fwdFrontier.X >= revPath.Y-fwdFrontier.Y {
+ fwdFrontier.X++
+ } else {
+ fwdFrontier.Y++
+ }
+
+ // Reverse search from the end.
+ if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
+ break
+ }
+ for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
+ // Search in a diagonal pattern for a match.
+ z := zigzag(i)
+ p := point{revFrontier.X - z, revFrontier.Y + z}
+ switch {
+ case fwdPath.X >= p.X || revPath.Y < p.Y:
+ stop1 = true // Hit bottom-left corner
+ case fwdPath.Y >= p.Y || revPath.X < p.X:
+ stop2 = true // Hit top-right corner
+ case f(p.X-1, p.Y-1).Equal():
+ // Match found, so connect the path to this point.
+ revPath.connect(p, f)
+ revPath.append(Identity)
+ // Follow sequence of matches as far as possible.
+ for fwdPath.X < revPath.X && fwdPath.Y < revPath.Y {
+ if !f(revPath.X-1, revPath.Y-1).Equal() {
+ break
+ }
+ revPath.append(Identity)
+ }
+ revFrontier = revPath.point
+ stop1, stop2 = true, true
+ default:
+ searchBudget-- // Match not found
+ }
+ debug.Update()
+ }
+ // Advance the frontier towards forward point.
+ if revFrontier.X-fwdPath.X >= revFrontier.Y-fwdPath.Y {
+ revFrontier.X--
+ } else {
+ revFrontier.Y--
+ }
+ }
+
+ // Join the forward and reverse paths and then append the reverse path.
+ fwdPath.connect(revPath.point, f)
+ for i := len(revPath.es) - 1; i >= 0; i-- {
+ t := revPath.es[i]
+ revPath.es = revPath.es[:i]
+ fwdPath.append(t)
+ }
+ debug.Finish()
+ return fwdPath.es
+}
+
+type path struct {
+ dir int // +1 if forward, -1 if reverse
+ point // Leading point of the EditScript path
+ es EditScript
+}
+
+// connect appends any necessary Identity, Modified, UniqueX, or UniqueY types
+// to the edit-script to connect p.point to dst.
+func (p *path) connect(dst point, f EqualFunc) {
+ if p.dir > 0 {
+ // Connect in forward direction.
+ for dst.X > p.X && dst.Y > p.Y {
+ switch r := f(p.X, p.Y); {
+ case r.Equal():
+ p.append(Identity)
+ case r.Similar():
+ p.append(Modified)
+ case dst.X-p.X >= dst.Y-p.Y:
+ p.append(UniqueX)
+ default:
+ p.append(UniqueY)
+ }
+ }
+ for dst.X > p.X {
+ p.append(UniqueX)
+ }
+ for dst.Y > p.Y {
+ p.append(UniqueY)
+ }
+ } else {
+ // Connect in reverse direction.
+ for p.X > dst.X && p.Y > dst.Y {
+ switch r := f(p.X-1, p.Y-1); {
+ case r.Equal():
+ p.append(Identity)
+ case r.Similar():
+ p.append(Modified)
+ case p.Y-dst.Y >= p.X-dst.X:
+ p.append(UniqueY)
+ default:
+ p.append(UniqueX)
+ }
+ }
+ for p.X > dst.X {
+ p.append(UniqueX)
+ }
+ for p.Y > dst.Y {
+ p.append(UniqueY)
+ }
+ }
+}
+
+func (p *path) append(t EditType) {
+ p.es = append(p.es, t)
+ switch t {
+ case Identity, Modified:
+ p.add(p.dir, p.dir)
+ case UniqueX:
+ p.add(p.dir, 0)
+ case UniqueY:
+ p.add(0, p.dir)
+ }
+ debug.Update()
+}
+
+type point struct{ X, Y int }
+
+func (p *point) add(dx, dy int) { p.X += dx; p.Y += dy }
+
+// zigzag maps a consecutive sequence of integers to a zig-zag sequence.
+// [0 1 2 3 4 5 ...] => [0 -1 +1 -2 +2 ...]
+func zigzag(x int) int {
+ if x&1 != 0 {
+ x = ^x
+ }
+ return x >> 1
+}
diff --git a/cmp/internal/diff/diff_test.go b/cmp/internal/diff/diff_test.go
new file mode 100644
index 0000000..5996ea2
--- /dev/null
+++ b/cmp/internal/diff/diff_test.go
@@ -0,0 +1,467 @@
+// Copyright 2017, The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE.md file.
+
+package diff
+
+import (
+ "fmt"
+ "math/rand"
+ "strings"
+ "testing"
+ "unicode"
+)
+
+func TestDifference(t *testing.T) {
+ tests := []struct {
+ // Before passing x and y to Difference, we strip all spaces so that
+ // they can be used by the test author to indicate a missing symbol
+ // in one of the lists.
+ x, y string
+ want string
+ }{{
+ x: "",
+ y: "",
+ want: "",
+ }, {
+ x: "#",
+ y: "#",
+ want: ".",
+ }, {
+ x: "##",
+ y: "# ",
+ want: ".X",
+ }, {
+ x: "a#",
+ y: "A ",
+ want: "MX",
+ }, {
+ x: "#a",
+ y: " A",
+ want: "XM",
+ }, {
+ x: "# ",
+ y: "##",
+ want: ".Y",
+ }, {
+ x: " #",
+ y: "@#",
+ want: "Y.",
+ }, {
+ x: "@#",
+ y: " #",
+ want: "X.",
+ }, {
+ x: "##########0123456789",
+ y: " 0123456789",
+ want: "XXXXXXXXXX..........",
+ }, {
+ x: " 0123456789",
+ y: "##########0123456789",
+ want: "YYYYYYYYYY..........",
+ }, {
+ x: "#####0123456789#####",
+ y: " 0123456789 ",
+ want: "XXXXX..........XXXXX",
+ }, {
+ x: " 0123456789 ",
+ y: "#####0123456789#####",
+ want: "YYYYY..........YYYYY",
+ }, {
+ x: "01234##########56789",
+ y: "01234 56789",
+ want: ".....XXXXXXXXXX.....",
+ }, {
+ x: "01234 56789",
+ y: "01234##########56789",
+ want: ".....YYYYYYYYYY.....",
+ }, {
+ x: "0123456789##########",
+ y: "0123456789 ",
+ want: "..........XXXXXXXXXX",
+ }, {
+ x: "0123456789 ",
+ y: "0123456789##########",
+ want: "..........YYYYYYYYYY",
+ }, {
+ x: "abcdefghij0123456789",
+ y: "ABCDEFGHIJ0123456789",
+ want: "MMMMMMMMMM..........",
+ }, {
+ x: "ABCDEFGHIJ0123456789",
+ y: "abcdefghij0123456789",
+ want: "MMMMMMMMMM..........",
+ }, {
+ x: "01234abcdefghij56789",
+ y: "01234ABCDEFGHIJ56789",
+ want: ".....MMMMMMMMMM.....",
+ }, {
+ x: "01234ABCDEFGHIJ56789",
+ y: "01234abcdefghij56789",
+ want: ".....MMMMMMMMMM.....",
+ }, {
+ x: "0123456789abcdefghij",
+ y: "0123456789ABCDEFGHIJ",
+ want: "..........MMMMMMMMMM",
+ }, {
+ x: "0123456789ABCDEFGHIJ",
+ y: "0123456789abcdefghij",
+ want: "..........MMMMMMMMMM",
+ }, {
+ x: "ABCDEFGHIJ0123456789 ",
+ y: " 0123456789abcdefghij",
+ want: "XXXXXXXXXX..........YYYYYYYYYY",
+ }, {
+ x: " 0123456789abcdefghij",
+ y: "ABCDEFGHIJ0123456789 ",
+ want: "YYYYYYYYYY..........XXXXXXXXXX",
+ }, {
+ x: "ABCDE0123456789 FGHIJ",
+ y: " 0123456789abcdefghij",
+ want: "XXXXX..........YYYYYMMMMM",
+ }, {
+ x: " 0123456789abcdefghij",
+ y: "ABCDE0123456789 FGHIJ",
+ want: "YYYYY..........XXXXXMMMMM",
+ }, {
+ x: "ABCDE01234F G H I J 56789 ",
+ y: " 01234 a b c d e56789fghij",
+ want: "XXXXX.....XYXYXYXYXY.....YYYYY",
+ }, {
+ x: " 01234a b c d e 56789fghij",
+ y: "ABCDE01234 F G H I J56789 ",
+ want: "YYYYY.....XYXYXYXYXY.....XXXXX",
+ }, {
+ x: "FGHIJ01234ABCDE56789 ",
+ y: " 01234abcde56789fghij",
+ want: "XXXXX.....MMMMM.....YYYYY",
+ }, {
+ x: " 01234abcde56789fghij",
+ y: "FGHIJ01234ABCDE56789 ",
+ want: "YYYYY.....MMMMM.....XXXXX",
+ }, {
+ x: "ABCAB BA ",
+ y: " C BABAC",
+ want: "XX.X.Y..Y",
+ }, {
+ x: "# #### ###",
+ y: "#y####yy###",
+ want: ".Y....YY...",
+ }, {
+ x: "# #### # ##x#x",
+ y: "#y####y y## # ",
+ want: ".Y....YXY..X.X",
+ }, {
+ x: "###z#z###### x #",
+ y: "#y##Z#Z###### yy#",
+ want: ".Y..M.M......XYY.",
+ }, {
+ x: "0 12z3x 456789 x x 0",
+ y: "0y12Z3 y456789y y y0",
+ want: ".Y..M.XY......YXYXY.",
+ }, {
+ x: "0 2 4 6 8 ..................abXXcdEXF.ghXi",
+ y: " 1 3 5 7 9..................AB CDE F.GH I",
+ want: "XYXYXYXYXY..................MMXXMM.X..MMXM",
+ }, {
+ x: "I HG.F EDC BA..................9 7 5 3 1 ",
+ y: "iXhg.FXEdcXXba.................. 8 6 4 2 0",
+ want: "MYMM..Y.MMYYMM..................XYXYXYXYXY",
+ }, {
+ x: "x1234",
+ y: " 1234",
+ want: "X....",
+ }, {
+ x: "x123x4",
+ y: " 123 4",
+ want: "X...X.",
+ }, {
+ x: "x1234x56",
+ y: " 1234 ",
+ want: "X....XXX",
+ }, {
+ x: "x1234xxx56",
+ y: " 1234 56",
+ want: "X....XXX..",
+ }, {
+ x: ".1234...ab",
+ y: " 1234 AB",
+ want: "X....XXXMM",
+ }, {
+ x: "x1234xxab.",
+ y: " 1234 AB ",
+ want: "X....XXMMX",
+ }, {
+ x: " 0123456789",
+ y: "9012345678 ",
+ want: "Y.........X",
+ }, {
+ x: " 0123456789",
+ y: "8901234567 ",
+ want: "YY........XX",
+ }, {
+ x: " 0123456789",
+ y: "7890123456 ",
+ want: "YYY.......XXX",
+ }, {
+ x: " 0123456789",
+ y: "6789012345 ",
+ want: "YYYY......XXXX",
+ }, {
+ x: "0123456789 ",
+ y: " 5678901234",
+ want: "XXXXX.....YYYYY",
+ }, {
+ x: "0123456789 ",
+ y: " 4567890123",
+ want: "XXXX......YYYY",
+ }, {
+ x: "0123456789 ",
+ y: " 3456789012",
+ want: "XXX.......YYY",
+ }, {
+ x: "0123456789 ",
+ y: " 2345678901",
+ want: "XX........YY",
+ }, {
+ x: "0123456789 ",
+ y: " 1234567890",
+ want: "X.........Y",
+ }, {
+ x: "0123456789",
+ y: "9876543210",
+ }, {
+ x: "0123456789",
+ y: "6725819034",
+ }, {
+ x: "FBQMOIGTLN72X90E4SP651HKRJUDA83CVZW",
+ y: "5WHXO10R9IVKZLCTAJ8P3NSEQM472G6UBDF",
+ }}
+
+ for _, tt := range tests {
+ tRun(t, "", func(t *testing.T) {
+ x := strings.Replace(tt.x, " ", "", -1)
+ y := strings.Replace(tt.y, " ", "", -1)
+ es := testStrings(t, x, y)
+ if got := es.String(); got != tt.want {
+ t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, tt.want)
+ }
+ })
+ }
+}
+
+func TestDifferenceFuzz(t *testing.T) {
+ tests := []struct{ px, py, pm float32 }{
+ {px: 0.0, py: 0.0, pm: 0.1},
+ {px: 0.0, py: 0.1, pm: 0.0},
+ {px: 0.1, py: 0.0, pm: 0.0},
+ {px: 0.0, py: 0.1, pm: 0.1},
+ {px: 0.1, py: 0.0, pm: 0.1},
+ {px: 0.2, py: 0.2, pm: 0.2},
+ {px: 0.3, py: 0.1, pm: 0.2},
+ {px: 0.1, py: 0.3, pm: 0.2},
+ {px: 0.2, py: 0.2, pm: 0.2},
+ {px: 0.3, py: 0.3, pm: 0.3},
+ {px: 0.1, py: 0.1, pm: 0.5},
+ {px: 0.4, py: 0.1, pm: 0.5},
+ {px: 0.3, py: 0.2, pm: 0.5},
+ {px: 0.2, py: 0.3, pm: 0.5},
+ {px: 0.1, py: 0.4, pm: 0.5},
+ }
+
+ for i, tt := range tests {
+ tRun(t, fmt.Sprintf("P%d", i), func(t *testing.T) {
+ // Sweep from 1B to 1KiB.
+ for n := 1; n <= 1024; n <<= 1 {
+ tRun(t, fmt.Sprintf("N%d", n), func(t *testing.T) {
+ for j := 0; j < 10; j++ {
+ x, y := generateStrings(n, tt.px, tt.py, tt.pm, int64(j))
+ testStrings(t, x, y)
+ }
+ })
+ }
+ })
+ }
+}
+
+func benchmarkDifference(b *testing.B, n int) {
+ // TODO: Use testing.B.Run when we drop Go1.6 support.
+ x, y := generateStrings(n, 0.05, 0.05, 0.10, 0)
+ b.ReportAllocs()
+ b.SetBytes(int64(len(x) + len(y)))
+ for i := 0; i < b.N; i++ {
+ Difference(len(x), len(y), func(ix, iy int) Result {
+ return compareByte(x[ix], y[iy])
+ })
+ }
+}
+func BenchmarkDifference1K(b *testing.B) { benchmarkDifference(b, 1<<10) }
+func BenchmarkDifference4K(b *testing.B) { benchmarkDifference(b, 1<<12) }
+func BenchmarkDifference16K(b *testing.B) { benchmarkDifference(b, 1<<14) }
+func BenchmarkDifference64K(b *testing.B) { benchmarkDifference(b, 1<<16) }
+func BenchmarkDifference256K(b *testing.B) { benchmarkDifference(b, 1<<18) }
+func BenchmarkDifference1M(b *testing.B) { benchmarkDifference(b, 1<<20) }
+
+func generateStrings(n int, px, py, pm float32, seed int64) (string, string) {
+ if px+py+pm > 1.0 {
+ panic("invalid probabilities")
+ }
+ py += px
+ pm += py
+
+ b := make([]byte, n)
+ r := rand.New(rand.NewSource(seed))
+ r.Read(b)
+
+ var x, y []byte
+ for len(b) > 0 {
+ switch p := r.Float32(); {
+ case p < px: // UniqueX
+ x = append(x, b[0])
+ case p < py: // UniqueY
+ y = append(y, b[0])
+ case p < pm: // Modified
+ x = append(x, 'A'+(b[0]%26))
+ y = append(y, 'a'+(b[0]%26))
+ default: // Identity
+ x = append(x, b[0])
+ y = append(y, b[0])
+ }
+ b = b[1:]
+ }
+ return string(x), string(y)
+}
+
+func testStrings(t *testing.T, x, y string) EditScript {
+ wantEq := x == y
+ eq, es := Difference(len(x), len(y), func(ix, iy int) Result {
+ return compareByte(x[ix], y[iy])
+ })
+ if eq != wantEq {
+ t.Errorf("equality mismatch: got %v, want %v", eq, wantEq)
+ }
+ if es != nil {
+ if es.LenX() != len(x) {
+ t.Errorf("es.LenX = %d, want %d", es.LenX(), len(x))
+ }
+ if es.LenY() != len(y) {
+ t.Errorf("es.LenY = %d, want %d", es.LenY(), len(y))
+ }
+ if got := (es.Dist() == 0); got != wantEq {
+ t.Errorf("violation of equality invariant: got %v, want %v", got, wantEq)
+ }
+ if !validateScript(x, y, es) {
+ t.Errorf("invalid edit script: %v", es)
+ }
+ }
+ return es
+}
+
+func validateScript(x, y string, es EditScript) bool {
+ var bx, by []byte
+ for _, e := range es {
+ switch e {
+ case Identity:
+ if !compareByte(x[len(bx)], y[len(by)]).Equal() {
+ return false
+ }
+ bx = append(bx, x[len(bx)])
+ by = append(by, y[len(by)])
+ case UniqueX:
+ bx = append(bx, x[len(bx)])
+ case UniqueY:
+ by = append(by, y[len(by)])
+ case Modified:
+ if !compareByte(x[len(bx)], y[len(by)]).Similar() {
+ return false
+ }
+ bx = append(bx, x[len(bx)])
+ by = append(by, y[len(by)])
+ }
+ }
+ return string(bx) == x && string(by) == y
+}
+
+// compareByte returns a Result where the result is Equal if x == y,
+// similar if x and y differ only in casing, and different otherwise.
+func compareByte(x, y byte) (r Result) {
+ switch {
+ case x == y:
+ return equalResult // Identity
+ case unicode.ToUpper(rune(x)) == unicode.ToUpper(rune(y)):
+ return similarResult // Modified
+ default:
+ return differentResult // UniqueX or UniqueY
+ }
+}
+
+var (
+ equalResult = Result{NDiff: 0}
+ similarResult = Result{NDiff: 1}
+ differentResult = Result{NDiff: 2}
+)
+
+func TestResult(t *testing.T) {
+ tests := []struct {
+ result Result
+ wantEqual bool
+ wantSimilar bool
+ }{
+ // equalResult is equal since NDiff == 0, by definition of Equal method.
+ {equalResult, true, true},
+ // similarResult is similar since it is a binary result where only one
+ // element was compared (i.e., Either NSame==1 or NDiff==1).
+ {similarResult, false, true},
+ // differentResult is different since there are enough differences that
+ // it isn't even considered similar.
+ {differentResult, false, false},
+
+ // Zero value is always equal.
+ {Result{NSame: 0, NDiff: 0}, true, true},
+
+ // Binary comparisons (where NSame+NDiff == 1) are always similar.
+ {Result{NSame: 1, NDiff: 0}, true, true},
+ {Result{NSame: 0, NDiff: 1}, false, true},
+
+ // More complex ratios. The exact ratio for similarity may change,
+ // and may require updates to these test cases.
+ {Result{NSame: 1, NDiff: 1}, false, true},
+ {Result{NSame: 1, NDiff: 2}, false, true},
+ {Result{NSame: 1, NDiff: 3}, false, false},
+ {Result{NSame: 2, NDiff: 1}, false, true},
+ {Result{NSame: 2, NDiff: 2}, false, true},
+ {Result{NSame: 2, NDiff: 3}, false, true},
+ {Result{NSame: 3, NDiff: 1}, false, true},
+ {Result{NSame: 3, NDiff: 2}, false, true},
+ {Result{NSame: 3, NDiff: 3}, false, true},
+ {Result{NSame: 1000, NDiff: 0}, true, true},
+ {Result{NSame: 1000, NDiff: 1}, false, true},
+ {Result{NSame: 1000, NDiff: 2}, false, true},
+ {Result{NSame: 0, NDiff: 1000}, false, false},
+ {Result{NSame: 1, NDiff: 1000}, false, false},
+ {Result{NSame: 2, NDiff: 1000}, false, false},
+ }
+
+ for _, tt := range tests {
+ if got := tt.result.Equal(); got != tt.wantEqual {
+ t.Errorf("%#v.Equal() = %v, want %v", tt.result, got, tt.wantEqual)
+ }
+ if got := tt.result.Similar(); got != tt.wantSimilar {
+ t.Errorf("%#v.Similar() = %v, want %v", tt.result, got, tt.wantSimilar)
+ }
+ }
+}
+
+// TODO: Delete this hack when we drop Go1.6 support.
+func tRun(t *testing.T, name string, f func(t *testing.T)) {
+ type runner interface {
+ Run(string, func(t *testing.T)) bool
+ }
+ var ti interface{} = t
+ if r, ok := ti.(runner); ok {
+ r.Run(name, f)
+ } else {
+ t.Logf("Test: %s", name)
+ f(t)
+ }
+}
diff --git a/cmp/path.go b/cmp/path.go
index 5a88dbd..0c2eb33 100644
--- a/cmp/path.go
+++ b/cmp/path.go
@@ -36,7 +36,19 @@ type (
// SliceIndex is an index operation on a slice or array at some index Key.
SliceIndex interface {
PathStep
- Key() int
+ Key() int // May return -1 if in a split state
+
+ // SplitKeys returns the indexes for indexing into slices in the
+ // x and y values, respectively. These indexes may differ due to the
+ // insertion or removal of an element in one of the slices, causing
+ // all of the indexes to be shifted. If an index is -1, then that
+ // indicates that the element does not exist in the associated slice.
+ //
+ // Key is guaranteed to return -1 if and only if the indexes returned
+ // by SplitKeys are not the same. SplitKeys will never return -1 for
+ // both indexes.
+ SplitKeys() (x int, y int)
+
isSliceIndex()
}
// MapIndex is an index operation on a map at some index Key.
@@ -163,7 +175,7 @@ type (
sliceIndex struct {
pathStep
- key int
+ xkey, ykey int
}
mapIndex struct {
pathStep
@@ -205,19 +217,39 @@ func (ps pathStep) String() string {
return fmt.Sprintf("{%s}", s)
}
-func (si sliceIndex) String() string { return fmt.Sprintf("[%d]", si.key) }
+func (si sliceIndex) String() string {
+ switch {
+ case si.xkey == si.ykey:
+ return fmt.Sprintf("[%d]", si.xkey)
+ case si.ykey == -1:
+ // [5->?] means "I don't know where X[5] went"
+ return fmt.Sprintf("[%d->?]", si.xkey)
+ case si.xkey == -1:
+ // [?->3] means "I don't know where Y[3] came from"
+ return fmt.Sprintf("[?->%d]", si.ykey)
+ default:
+ // [5->3] means "X[5] moved to Y[3]"
+ return fmt.Sprintf("[%d->%d]", si.xkey, si.ykey)
+ }
+}
func (mi mapIndex) String() string { return fmt.Sprintf("[%#v]", mi.key) }
func (ta typeAssertion) String() string { return fmt.Sprintf(".(%v)", ta.typ) }
func (sf structField) String() string { return fmt.Sprintf(".%s", sf.name) }
func (in indirect) String() string { return "*" }
func (tf transform) String() string { return fmt.Sprintf("%s()", tf.trans.name) }
-func (si sliceIndex) Key() int { return si.key }
-func (mi mapIndex) Key() reflect.Value { return mi.key }
-func (sf structField) Name() string { return sf.name }
-func (sf structField) Index() int { return sf.idx }
-func (tf transform) Name() string { return tf.trans.name }
-func (tf transform) Func() reflect.Value { return tf.trans.fnc }
+func (si sliceIndex) Key() int {
+ if si.xkey != si.ykey {
+ return -1
+ }
+ return si.xkey
+}
+func (si sliceIndex) SplitKeys() (x, y int) { return si.xkey, si.ykey }
+func (mi mapIndex) Key() reflect.Value { return mi.key }
+func (sf structField) Name() string { return sf.name }
+func (sf structField) Index() int { return sf.idx }
+func (tf transform) Name() string { return tf.trans.name }
+func (tf transform) Func() reflect.Value { return tf.trans.fnc }
func (pathStep) isPathStep() {}
func (sliceIndex) isSliceIndex() {}
diff --git a/cmp/reporter.go b/cmp/reporter.go
index 1b812f5..3f3454b 100644
--- a/cmp/reporter.go
+++ b/cmp/reporter.go
@@ -26,31 +26,6 @@ type defaultReporter struct {
var _ reporter = (*defaultReporter)(nil)
func (r *defaultReporter) Report(x, y reflect.Value, eq bool, p Path) {
- // TODO: Is there a way to nicely print added/modified/removed elements
- // from a slice? This will most certainly require support from the
- // equality logic, but what would be the right API for this?
- //
- // The current API is equivalent to a Hamming distance for measuring the
- // difference between two sequences of symbols. That is, the only operation
- // we can represent is substitution. The new API would need to handle a
- // Levenshtein distance, such that insertions, deletions, and substitutions
- // are permitted. Furthermore, this will require an algorithm for computing
- // the edit distance. Unfortunately, the time complexity for a minimal
- // edit distance algorithm is not much better than O(n^2).
- // There are approximations for the algorithm that can run much faster.
- // See literature on computing Levenshtein distance.
- //
- // Passing in a pair of x and y is actually good for representing insertion
- // and deletion by the fact that x or y may be an invalid value. However,
- // we may need to pass in two paths px and py, to indicate the paths
- // relative to x and y. Alternative, since we only perform the Levenshtein
- // distance on slices, maybe we alter the SliceIndex type to record
- // two different indexes.
-
- // TODO: Perhaps we should coalesce differences on primitive kinds
- // together if the number of differences exceeds some ratio.
- // For example, comparing two SHA256s leads to many byte differences.
-
if eq {
// TODO: Maybe print some equal results for context?
return // Ignore equal results
@@ -62,7 +37,7 @@ func (r *defaultReporter) Report(x, y reflect.Value, eq bool, p Path) {
sx := prettyPrint(x, true)
sy := prettyPrint(y, true)
if sx == sy {
- // Use of Stringer is not helpful, so rely on more exact formatting.
+ // Stringer is not helpful, so rely on more exact formatting.
sx = prettyPrint(x, false)
sy = prettyPrint(y, false)
}