diff options
author | Joe Tsai <joetsai@digital-static.net> | 2017-07-20 15:19:44 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-20 15:19:44 -0700 |
commit | f9054c6a605e1bb9318d701929105b0fd8a7cac8 (patch) | |
tree | 824f18b6cd3e3ae6a127757fef23a6ec0a5e4797 /cmp | |
parent | 2b1da0b74500c33a0cd25f26b79594b46c816b6d (diff) | |
download | go-cmp-f9054c6a605e1bb9318d701929105b0fd8a7cac8.tar.gz |
Add diffing abilities to reporting (#9)
This adds an internal package with the following API:
func Difference(nx, ny int, f EqualFunc) (eq bool, es EditScript)
type EditScript []EditType
type EditType int8
const Identity EditType = iota ...
type EqualFunc func(ix int, iy int) Result
type Result struct{ ... }
The cmp.SliceIndex type has a new method SplitKeys to indicate that
elements are being compared in x and y are in different indexes.
The debug visualizer can be run:
go test -tags=debug -v github.com/google/go-cmp/cmp/internal/diff
Reviewed-By: kevlar@google.com
Diffstat (limited to 'cmp')
-rw-r--r-- | cmp/compare.go | 84 | ||||
-rw-r--r-- | cmp/compare_test.go | 79 | ||||
-rw-r--r-- | cmp/internal/diff/debug_disable.go | 17 | ||||
-rw-r--r-- | cmp/internal/diff/debug_enable.go | 122 | ||||
-rw-r--r-- | cmp/internal/diff/diff.go | 373 | ||||
-rw-r--r-- | cmp/internal/diff/diff_test.go | 467 | ||||
-rw-r--r-- | cmp/path.go | 50 | ||||
-rw-r--r-- | cmp/reporter.go | 27 |
8 files changed, 1118 insertions, 101 deletions
diff --git a/cmp/compare.go b/cmp/compare.go index 694a933..2294579 100644 --- a/cmp/compare.go +++ b/cmp/compare.go @@ -29,6 +29,8 @@ package cmp import ( "fmt" "reflect" + + "github.com/google/go-cmp/cmp/internal/diff" ) // BUG: Maps with keys containing NaN values cannot be properly compared due to @@ -37,6 +39,8 @@ import ( // // See https://golang.org/issue/11104 for more details. +var nothing = reflect.Value{} + // Equal reports whether x and y are equal by recursively applying the // following rules in the given order to x and y and all of their sub-values: // @@ -79,7 +83,7 @@ import ( func Equal(x, y interface{}, opts ...Option) bool { s := newState(opts) s.compareAny(reflect.ValueOf(x), reflect.ValueOf(y)) - return s.eq + return s.result.Equal() } // Diff returns a human-readable report of the differences between two values. @@ -101,8 +105,8 @@ func Diff(x, y interface{}, opts ...Option) string { } type state struct { - eq bool // Current result of comparison - curPath Path // The current path in the value tree + result diff.Result // The current result of comparison + curPath Path // The current path in the value tree // dsCheck tracks the state needed to periodically perform checks that // user provided func(T, T) bool functions are symmetric and deterministic. @@ -123,7 +127,7 @@ type state struct { } func newState(opts []Option) *state { - s := &state{eq: true} + s := new(state) for _, opt := range opts { s.processOption(opt) } @@ -184,6 +188,7 @@ func (s *state) compareAny(vx, vy reflect.Value) { t := vx.Type() if len(s.curPath) == 0 { s.curPath.push(&pathStep{typ: t}) + defer s.curPath.pop() } // Rule 1: Check whether an option applies on this node in the value tree. @@ -378,29 +383,52 @@ func (s *state) callFunc(f, x, y reflect.Value) bool { } func (s *state) compareArray(vx, vy reflect.Value, t reflect.Type) { - step := &sliceIndex{pathStep{t.Elem()}, 0} + step := &sliceIndex{pathStep{t.Elem()}, 0, 0} s.curPath.push(step) - defer s.curPath.pop() - // Regardless of the lengths, we always try to compare the elements. - // If one slice is longer, we will report the elements of the longer - // slice as different (relative to an invalid reflect.Value). - nmin := vx.Len() - if nmin > vy.Len() { - nmin = vy.Len() - } - for i := 0; i < nmin; i++ { - step.key = i - s.compareAny(vx.Index(i), vy.Index(i)) - } - for i := nmin; i < vx.Len(); i++ { - step.key = i - s.report(false, vx.Index(i), reflect.Value{}) + // Compute an edit-script for slices vx and vy. + oldResult, oldReporter := s.result, s.reporter + s.reporter = nil // Remove reporter to avoid spurious printouts + eq, es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result { + step.xkey, step.ykey = ix, iy + s.result = diff.Result{} // Reset result + s.compareAny(vx.Index(ix), vy.Index(iy)) + return s.result + }) + s.result, s.reporter = oldResult, oldReporter + + // Equal or no edit-script, so report entire slices as is. + if eq || es == nil { + s.curPath.pop() // Pop first since we are reporting the whole slice + s.report(eq, vx, vy) + return } - for i := nmin; i < vy.Len(); i++ { - step.key = i - s.report(false, reflect.Value{}, vy.Index(i)) + + // Replay the edit-script. + var ix, iy int + for _, e := range es { + switch e { + case diff.UniqueX: + step.xkey, step.ykey = ix, -1 + s.report(false, vx.Index(ix), nothing) + ix++ + case diff.UniqueY: + step.xkey, step.ykey = -1, iy + s.report(false, nothing, vy.Index(iy)) + iy++ + default: + step.xkey, step.ykey = ix, iy + if e == diff.Identity { + s.report(true, vx.Index(ix), vy.Index(iy)) + } else { + s.compareAny(vx.Index(ix), vy.Index(iy)) + } + ix++ + iy++ + } } + s.curPath.pop() + return } func (s *state) compareMap(vx, vy reflect.Value, t reflect.Type) { @@ -422,9 +450,9 @@ func (s *state) compareMap(vx, vy reflect.Value, t reflect.Type) { case vvx.IsValid() && vvy.IsValid(): s.compareAny(vvx, vvy) case vvx.IsValid() && !vvy.IsValid(): - s.report(false, vvx, reflect.Value{}) + s.report(false, vvx, nothing) case !vvx.IsValid() && vvy.IsValid(): - s.report(false, reflect.Value{}, vvy) + s.report(false, nothing, vvy) default: // It is possible for both vvx and vvy to be invalid if the // key contained a NaN value in it. There is no way in @@ -470,7 +498,11 @@ func (s *state) compareStruct(vx, vy reflect.Value, t reflect.Type) { // report records the result of a single comparison. // It also calls Report if any reporter is registered. func (s *state) report(eq bool, vx, vy reflect.Value) { - s.eq = s.eq && eq + if eq { + s.result.NSame++ + } else { + s.result.NDiff++ + } if s.reporter != nil { s.reporter.Report(vx, vy, eq, s.curPath) } diff --git a/cmp/compare_test.go b/cmp/compare_test.go index 113e50e..11c30c2 100644 --- a/cmp/compare_test.go +++ b/cmp/compare_test.go @@ -6,6 +6,7 @@ package cmp_test import ( "bytes" + "crypto/md5" "fmt" "io" "math/rand" @@ -101,8 +102,8 @@ func TestDiff(t *testing.T) { if gotPanic != "" { t.Fatalf("unexpected panic message: %s", gotPanic) } - if strings.TrimSpace(gotDiff) != strings.TrimSpace(tt.wantDiff) { - t.Fatalf("difference message:\ngot:\n%s\nwant:\n%s", gotDiff, tt.wantDiff) + if got, want := strings.TrimSpace(gotDiff), strings.TrimSpace(tt.wantDiff); got != want { + t.Fatalf("difference message:\ngot:\n%s\n\nwant:\n%s", got, want) } } else { if !strings.Contains(gotPanic, tt.wantPanic) { @@ -117,10 +118,9 @@ func comparerTests() []test { const label = "Comparer" return []test{{ - label: label, - x: 1, - y: 1, - wantDiff: "", + label: label, + x: 1, + y: 1, }, { label: label, x: 1, @@ -164,10 +164,9 @@ func comparerTests() []test { opts: []cmp.Option{struct{ cmp.Option }{}}, wantPanic: "unknown option", }, { - label: label, - x: struct{ A, B, C int }{1, 2, 3}, - y: struct{ A, B, C int }{1, 2, 3}, - wantDiff: "", + label: label, + x: struct{ A, B, C int }{1, 2, 3}, + y: struct{ A, B, C int }{1, 2, 3}, }, { label: label, x: struct{ A, B, C int }{1, 2, 3}, @@ -244,17 +243,19 @@ func comparerTests() []test { y: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")}, wantPanic: "cannot handle unexported field", }, { - label: label, - x: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")}, - y: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")}, - opts: []cmp.Option{cmp.Comparer(equalRegexp)}, - wantDiff: "", + label: label, + x: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")}, + y: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")}, + opts: []cmp.Option{cmp.Comparer(equalRegexp)}, }, { - label: label, - x: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")}, - y: []*regexp.Regexp{nil, regexp.MustCompile("a*b*d*")}, - opts: []cmp.Option{cmp.Comparer(equalRegexp)}, - wantDiff: "{[]*regexp.Regexp}[1]:\n\t-: \"a*b*c*\"\n\t+: \"a*b*d*\"\n", + label: label, + x: []*regexp.Regexp{nil, regexp.MustCompile("a*b*c*")}, + y: []*regexp.Regexp{nil, regexp.MustCompile("a*b*d*")}, + opts: []cmp.Option{cmp.Comparer(equalRegexp)}, + wantDiff: ` +{[]*regexp.Regexp}[1]: + -: "a*b*c*" + +: "a*b*d*"`, }, { label: label, x: func() ***int { @@ -307,6 +308,14 @@ root: +: "hello2"`, }, { label: label, + x: md5.Sum([]byte{'a'}), + y: md5.Sum([]byte{'b'}), + wantDiff: ` +{[16]uint8}: + -: [16]uint8{0x0c, 0xc1, 0x75, 0xb9, 0xc0, 0xf1, 0xb6, 0xa8, 0x31, 0xc3, 0x99, 0xe2, 0x69, 0x77, 0x26, 0x61} + +: [16]uint8{0x92, 0xeb, 0x5f, 0xfe, 0xe6, 0xae, 0x2f, 0xec, 0x3a, 0xd7, 0x1c, 0x77, 0x75, 0x31, 0x57, 0x8f}`, + }, { + label: label, x: make([]int, 1000), y: make([]int, 1000), opts: []cmp.Option{ @@ -1392,8 +1401,7 @@ func project1Tests() []test { y: ts.Eagle{Slaps: []ts.Slap{{ Args: &pb.MetaData{Stringer: pb.Stringer{"metadata"}}, }}}, - opts: []cmp.Option{cmp.Comparer(pb.Equal)}, - wantDiff: "", + opts: []cmp.Option{cmp.Comparer(pb.Equal)}, }, { label: label, x: ts.Eagle{Slaps: []ts.Slap{{}, {}, {}, {}, { @@ -1435,10 +1443,10 @@ func project1Tests() []test { {teststructs.Eagle}.Slaps[0].Immutable.MildSlap: -: false +: true -{teststructs.Eagle}.Slaps[0].Immutable.LoveRadius.Summer.Summary.Devices[1]: +{teststructs.Eagle}.Slaps[0].Immutable.LoveRadius.Summer.Summary.Devices[1->?]: -: "bar" +: <non-existent> -{teststructs.Eagle}.Slaps[0].Immutable.LoveRadius.Summer.Summary.Devices[2]: +{teststructs.Eagle}.Slaps[0].Immutable.LoveRadius.Summer.Summary.Devices[2->?]: -: "baz" +: <non-existent>`, }} @@ -1524,14 +1532,11 @@ func project2Tests() []test { }(), opts: []cmp.Option{cmp.Comparer(pb.Equal), equalDish}, wantDiff: ` -{teststructs.GermBatch}.DirtyGerms[18][0]: +{teststructs.GermBatch}.DirtyGerms[18][0->?]: -: "germ2" - +: "germ3" -{teststructs.GermBatch}.DirtyGerms[18][1]: - -: "germ3" - +: "germ4" -{teststructs.GermBatch}.DirtyGerms[18][2]: - -: "germ4" + +: <non-existent> +{teststructs.GermBatch}.DirtyGerms[18][?->2]: + -: <non-existent> +: "germ2"`, }, { label: label, @@ -1562,7 +1567,7 @@ func project2Tests() []test { {teststructs.GermBatch}.DirtyGerms[17]: -: <non-existent> +: []*testprotos.Germ{"germ1"} -{teststructs.GermBatch}.DirtyGerms[18][2]: +{teststructs.GermBatch}.DirtyGerms[18][2->?]: -: "germ4" +: <non-existent> {teststructs.GermBatch}.DishMap[1]: @@ -1736,14 +1741,8 @@ func project4Tests() []test { }(), opts: []cmp.Option{allowVisibility, transformProtos, cmp.Comparer(pb.Equal)}, wantDiff: ` -{teststructs.Cartel}.Headquarter.subDivisions[0]: +{teststructs.Cartel}.Headquarter.subDivisions[0->?]: -: "alpha" - +: "bravo" -{teststructs.Cartel}.Headquarter.subDivisions[1]: - -: "bravo" - +: "charlie" -{teststructs.Cartel}.Headquarter.subDivisions[2]: - -: "charlie" +: <non-existent> {teststructs.Cartel}.Headquarter.publicMessage[2]: -: 0x03 @@ -1754,7 +1753,7 @@ func project4Tests() []test { {teststructs.Cartel}.poisons[0].poisonType: -: 1 +: 5 -{teststructs.Cartel}.poisons[1]: +{teststructs.Cartel}.poisons[1->?]: -: &teststructs.Poison{poisonType: 2, manufactuer: "acme2"} +: <non-existent>`, }} diff --git a/cmp/internal/diff/debug_disable.go b/cmp/internal/diff/debug_disable.go new file mode 100644 index 0000000..42afa49 --- /dev/null +++ b/cmp/internal/diff/debug_disable.go @@ -0,0 +1,17 @@ +// Copyright 2017, The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE.md file. + +// +build !debug + +package diff + +var debug debugger + +type debugger struct{} + +func (debugger) Begin(_, _ int, f EqualFunc, _, _ *EditScript) EqualFunc { + return f +} +func (debugger) Update() {} +func (debugger) Finish() {} diff --git a/cmp/internal/diff/debug_enable.go b/cmp/internal/diff/debug_enable.go new file mode 100644 index 0000000..ba46c62 --- /dev/null +++ b/cmp/internal/diff/debug_enable.go @@ -0,0 +1,122 @@ +// Copyright 2017, The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE.md file. + +// +build debug + +package diff + +import ( + "fmt" + "strings" + "sync" + "time" +) + +// The algorithm can be seen running in real-time by enabling debugging: +// go test -tags=debug -v +// +// Example output: +// === RUN TestDifference/#34 +// ┌───────────────────────────────┐ +// │ \ · · · · · · · · · · · · · · │ +// │ · # · · · · · · · · · · · · · │ +// │ · \ · · · · · · · · · · · · · │ +// │ · · \ · · · · · · · · · · · · │ +// │ · · · X # · · · · · · · · · · │ +// │ · · · # \ · · · · · · · · · · │ +// │ · · · · · # # · · · · · · · · │ +// │ · · · · · # \ · · · · · · · · │ +// │ · · · · · · · \ · · · · · · · │ +// │ · · · · · · · · \ · · · · · · │ +// │ · · · · · · · · · \ · · · · · │ +// │ · · · · · · · · · · \ · · # · │ +// │ · · · · · · · · · · · \ # # · │ +// │ · · · · · · · · · · · # # # · │ +// │ · · · · · · · · · · # # # # · │ +// │ · · · · · · · · · # # # # # · │ +// │ · · · · · · · · · · · · · · \ │ +// └───────────────────────────────┘ +// [.Y..M.XY......YXYXY.|] +// +// The grid represents the edit-graph where the horizontal axis represents +// list X and the vertical axis represents list Y. The start of the two lists +// is the top-left, while the ends are the bottom-right. The '·' represents +// an unexplored node in the graph. The '\' indicates that the two symbols +// from list X and Y are equal. The 'X' indicates that two symbols are similar +// (but not exactly equal) to each other. The '#' indicates that the two symbols +// are different (and not similar). The algorithm traverses this graph trying to +// make the paths starting in the top-left and the bottom-right connect. +// +// The series of '.', 'X', 'Y', and 'M' characters at the bottom represents +// the currently established path from the forward and reverse searches, +// seperated by a '|' character. + +const ( + updateDelay = 100 * time.Millisecond + finishDelay = 500 * time.Millisecond + ansiTerminal = true // ANSI escape codes used to move terminal cursor +) + +var debug debugger + +type debugger struct { + sync.Mutex + p1, p2 EditScript + fwdPath, revPath *EditScript + grid []byte + lines int +} + +func (dbg *debugger) Begin(nx, ny int, f EqualFunc, p1, p2 *EditScript) EqualFunc { + dbg.Lock() + dbg.fwdPath, dbg.revPath = p1, p2 + top := "┌─" + strings.Repeat("──", nx) + "┐\n" + row := "│ " + strings.Repeat("· ", nx) + "│\n" + btm := "└─" + strings.Repeat("──", nx) + "┘\n" + dbg.grid = []byte(top + strings.Repeat(row, ny) + btm) + dbg.lines = strings.Count(dbg.String(), "\n") + fmt.Print(dbg) + + // Wrap the EqualFunc so that we can intercept each result. + return func(ix, iy int) (r Result) { + cell := dbg.grid[len(top)+iy*len(row):][len("│ ")+len("· ")*ix:][:len("·")] + for i := range cell { + cell[i] = 0 // Zero out the multiple bytes of UTF-8 middle-dot + } + switch r = f(ix, iy); { + case r.Equal(): + cell[0] = '\\' + case r.Similar(): + cell[0] = 'X' + default: + cell[0] = '#' + } + return + } +} + +func (dbg *debugger) Update() { + dbg.print(updateDelay) +} + +func (dbg *debugger) Finish() { + dbg.print(finishDelay) + dbg.Unlock() +} + +func (dbg *debugger) String() string { + dbg.p1, dbg.p2 = *dbg.fwdPath, dbg.p2[:0] + for i := len(*dbg.revPath) - 1; i >= 0; i-- { + dbg.p2 = append(dbg.p2, (*dbg.revPath)[i]) + } + return fmt.Sprintf("%s[%v|%v]\n\n", dbg.grid, dbg.p1, dbg.p2) +} + +func (dbg *debugger) print(d time.Duration) { + if ansiTerminal { + fmt.Printf("\x1b[%dA", dbg.lines) // Reset terminal cursor + } + fmt.Print(dbg) + time.Sleep(d) +} diff --git a/cmp/internal/diff/diff.go b/cmp/internal/diff/diff.go new file mode 100644 index 0000000..baa41fd --- /dev/null +++ b/cmp/internal/diff/diff.go @@ -0,0 +1,373 @@ +// Copyright 2017, The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE.md file. + +// Package diff implements an algorithm for producing edit-scripts. +// The edit-script is a sequence of operations needed to transform one list +// of symbols into another (or vice-versa). The edits allowed are insertions, +// deletions, and modifications. The summation of all edits is called the +// Levenshtein distance as this problem is well-known in computer science. +// +// This package prioritizes performance over accuracy. That is, the run time +// is more important than obtaining a minimal Levenshtein distance. +package diff + +// EditType represents a single operation within an edit-script. +type EditType uint8 + +const ( + // Identity indicates that a symbol pair is identical in both list X and Y. + Identity EditType = iota + // UniqueX indicates that a symbol only exists in X and not Y. + UniqueX + // UniqueY indicates that a symbol only exists in Y and not X. + UniqueY + // Modified indicates that a symbol pair is a modification of each other. + Modified +) + +// EditScript represents the series of differences between two lists. +type EditScript []EditType + +// String returns a human-readable string representing the edit-script where +// Identity, UniqueX, UniqueY, and Modified are represented by the +// '.', 'X', 'Y', and 'M' characters, respectively. +func (es EditScript) String() string { + b := make([]byte, len(es)) + for i, e := range es { + switch e { + case Identity: + b[i] = '.' + case UniqueX: + b[i] = 'X' + case UniqueY: + b[i] = 'Y' + case Modified: + b[i] = 'M' + default: + panic("invalid edit-type") + } + } + return string(b) +} + +// stats returns a histogram of the number of each type of edit operation. +func (es EditScript) stats() (s struct{ NI, NX, NY, NM int }) { + for _, e := range es { + switch e { + case Identity: + s.NI++ + case UniqueX: + s.NX++ + case UniqueY: + s.NY++ + case Modified: + s.NM++ + default: + panic("invalid edit-type") + } + } + return +} + +// Dist is the Levenshtein distance and is guaranteed to be 0 if and only if +// lists X and Y are equal. +func (es EditScript) Dist() int { return len(es) - es.stats().NI } + +// LenX is the length of the X list. +func (es EditScript) LenX() int { return len(es) - es.stats().NY } + +// LenY is the length of the Y list. +func (es EditScript) LenY() int { return len(es) - es.stats().NX } + +// EqualFunc reports whether the symbols at indexes ix and iy are equal. +// When called by Difference, the index is guaranteed to be within nx and ny. +type EqualFunc func(ix int, iy int) Result + +// Result is the result of comparison. +// NSame is the number of sub-elements that are equal. +// NDiff is the number of sub-elements that are not equal. +type Result struct{ NSame, NDiff int } + +// Equal indicates whether the symbols are equal. Two symbols are equal +// if and only if NDiff == 0. If Equal, then they are also Similar. +func (r Result) Equal() bool { return r.NDiff == 0 } + +// Similar indicates whether two symbols are similar and may be represented +// by using the Modified type. As a special case, we consider binary comparisons +// (i.e., those that return Result{1, 0} or Result{0, 1}) to be similar. +// +// The exact ratio of NSame to NDiff to determine similarity may change. +func (r Result) Similar() bool { + // Use NSame+1 to offset NSame so that binary comparisons are similar. + return r.NSame+1 >= r.NDiff +} + +// Difference reports whether two lists of lengths nx and ny are equal +// given the definition of equality provided as f. +// +// This function may return a edit-script, which is a sequence of operations +// needed to convert one list into the other. If non-nil, the following +// invariants for the edit-script are maintained: +// • eq == (es.Dist()==0) +// • nx == es.LenX() +// • ny == es.LenY() +// +// This algorithm is not guaranteed to be an optimal solution (i.e., one that +// produces an edit-script with a minimal Levenshtein distance). This algorithm +// favors performance over optimality. The exact output is not guaranteed to +// be stable and may change over time. +func Difference(nx, ny int, f EqualFunc) (eq bool, es EditScript) { + es = searchGraph(nx, ny, f) + st := es.stats() + eq = len(es) == st.NI + if !eq && st.NI < (nx+ny)/4 { + return eq, nil // Edit-script more distracting than helpful + } + return eq, es +} + +func searchGraph(nx, ny int, f EqualFunc) EditScript { + // This algorithm is based on traversing what is known as an "edit-graph". + // See Figure 1 from "An O(ND) Difference Algorithm and Its Variations" + // by Eugene W. Myers. Since D can be as large as N itself, this is + // effectively O(N^2). Unlike the algorithm from that paper, we are not + // interested in the optimal path, but at least some "decent" path. + // + // For example, let X and Y be lists of symbols: + // X = [A B C A B B A] + // Y = [C B A B A C] + // + // The edit-graph can be drawn as the following: + // A B C A B B A + // ┌─────────────┐ + // C │_|_|\|_|_|_|_│ 0 + // B │_|\|_|_|\|\|_│ 1 + // A │\|_|_|\|_|_|\│ 2 + // B │_|\|_|_|\|\|_│ 3 + // A │\|_|_|\|_|_|\│ 4 + // C │ | |\| | | | │ 5 + // └─────────────┘ 6 + // 0 1 2 3 4 5 6 7 + // + // List X is written along the horizontal axis, while list Y is written + // along the vertical axis. At any point on this grid, if the symbol in + // list X matches the corresponding symbol in list Y, then a '\' is drawn. + // The goal of any minimal edit-script algorithm is to find a path from the + // top-left corner to the bottom-right corner, while traveling through the + // fewest horizontal or vertical edges. + // A horizontal edge is equivalent to inserting a symbol from list X. + // A vertical edge is equivalent to inserting a symbol from list Y. + // A diagonal edge is equivalent to a matching symbol between both X and Y. + + // Invariants: + // • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx + // • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny + // + // In general: + // • fwdFrontier.X < revFrontier.X + // • fwdFrontier.Y < revFrontier.Y + // Unless, it is time for the algorithm to terminate. + fwdPath := path{+1, point{0, 0}, make(EditScript, 0, (nx+ny)/2)} + revPath := path{-1, point{nx, ny}, make(EditScript, 0)} + fwdFrontier := fwdPath.point // Forward search frontier + revFrontier := revPath.point // Reverse search frontier + + // Search budget bounds the cost of searching for better paths. + // The longest sequence of non-matching symbols that can be tolerated is + // approximately the square-root of the search budget. + searchBudget := 4 * (nx + ny) // O(n) + + // The algorithm below is a greedy, meet-in-the-middle algorithm for + // computing sub-optimal edit-scripts between two lists. + // + // The algorithm is approximately as follows: + // • Searching for differences switches back-and-forth between + // a search that starts at the beginning (the top-left corner), and + // a search that starts at the end (the bottom-right corner). The goal of + // the search is connect with the search from the opposite corner. + // • As we search, we build a path in a greedy manner, where the first + // match seen is added to the path (this is sub-optimal, but provides a + // decent result in practice). When matches are found, we try the next pair + // of symbols in the lists and follow all matches as far as possible. + // • When searching for matches, we search along a diagonal going through + // through the "frontier" point. If no matches are found, we advance the + // frontier towards the opposite corner. + // • This algorithm terminates when either the X coordinates or the + // Y coordinates of the forward and reverse frontier points ever intersect. + // + // This algorithm is correct even if searching only in the forward direction + // or in the reverse direction. We do both because it is commonly observed + // that two lists commonly differ because elements were added to the front + // or end of the other list. + // + // Running the tests with the "debug" build tag prints a visualization of + // the algorithm running in real-time. This is educational for understanding + // how the algorithm works. See debug_enable.go. + f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es) + for { + // Forward search from the beginning. + if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 { + break + } + for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ { + // Search in a diagonal pattern for a match. + z := zigzag(i) + p := point{fwdFrontier.X + z, fwdFrontier.Y - z} + switch { + case p.X >= revPath.X || p.Y < fwdPath.Y: + stop1 = true // Hit top-right corner + case p.Y >= revPath.Y || p.X < fwdPath.X: + stop2 = true // Hit bottom-left corner + case f(p.X, p.Y).Equal(): + // Match found, so connect the path to this point. + fwdPath.connect(p, f) + fwdPath.append(Identity) + // Follow sequence of matches as far as possible. + for fwdPath.X < revPath.X && fwdPath.Y < revPath.Y { + if !f(fwdPath.X, fwdPath.Y).Equal() { + break + } + fwdPath.append(Identity) + } + fwdFrontier = fwdPath.point + stop1, stop2 = true, true + default: + searchBudget-- // Match not found + } + debug.Update() + } + // Advance the frontier towards reverse point. + if revPath.X-fwdFrontier.X >= revPath.Y-fwdFrontier.Y { + fwdFrontier.X++ + } else { + fwdFrontier.Y++ + } + + // Reverse search from the end. + if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 { + break + } + for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ { + // Search in a diagonal pattern for a match. + z := zigzag(i) + p := point{revFrontier.X - z, revFrontier.Y + z} + switch { + case fwdPath.X >= p.X || revPath.Y < p.Y: + stop1 = true // Hit bottom-left corner + case fwdPath.Y >= p.Y || revPath.X < p.X: + stop2 = true // Hit top-right corner + case f(p.X-1, p.Y-1).Equal(): + // Match found, so connect the path to this point. + revPath.connect(p, f) + revPath.append(Identity) + // Follow sequence of matches as far as possible. + for fwdPath.X < revPath.X && fwdPath.Y < revPath.Y { + if !f(revPath.X-1, revPath.Y-1).Equal() { + break + } + revPath.append(Identity) + } + revFrontier = revPath.point + stop1, stop2 = true, true + default: + searchBudget-- // Match not found + } + debug.Update() + } + // Advance the frontier towards forward point. + if revFrontier.X-fwdPath.X >= revFrontier.Y-fwdPath.Y { + revFrontier.X-- + } else { + revFrontier.Y-- + } + } + + // Join the forward and reverse paths and then append the reverse path. + fwdPath.connect(revPath.point, f) + for i := len(revPath.es) - 1; i >= 0; i-- { + t := revPath.es[i] + revPath.es = revPath.es[:i] + fwdPath.append(t) + } + debug.Finish() + return fwdPath.es +} + +type path struct { + dir int // +1 if forward, -1 if reverse + point // Leading point of the EditScript path + es EditScript +} + +// connect appends any necessary Identity, Modified, UniqueX, or UniqueY types +// to the edit-script to connect p.point to dst. +func (p *path) connect(dst point, f EqualFunc) { + if p.dir > 0 { + // Connect in forward direction. + for dst.X > p.X && dst.Y > p.Y { + switch r := f(p.X, p.Y); { + case r.Equal(): + p.append(Identity) + case r.Similar(): + p.append(Modified) + case dst.X-p.X >= dst.Y-p.Y: + p.append(UniqueX) + default: + p.append(UniqueY) + } + } + for dst.X > p.X { + p.append(UniqueX) + } + for dst.Y > p.Y { + p.append(UniqueY) + } + } else { + // Connect in reverse direction. + for p.X > dst.X && p.Y > dst.Y { + switch r := f(p.X-1, p.Y-1); { + case r.Equal(): + p.append(Identity) + case r.Similar(): + p.append(Modified) + case p.Y-dst.Y >= p.X-dst.X: + p.append(UniqueY) + default: + p.append(UniqueX) + } + } + for p.X > dst.X { + p.append(UniqueX) + } + for p.Y > dst.Y { + p.append(UniqueY) + } + } +} + +func (p *path) append(t EditType) { + p.es = append(p.es, t) + switch t { + case Identity, Modified: + p.add(p.dir, p.dir) + case UniqueX: + p.add(p.dir, 0) + case UniqueY: + p.add(0, p.dir) + } + debug.Update() +} + +type point struct{ X, Y int } + +func (p *point) add(dx, dy int) { p.X += dx; p.Y += dy } + +// zigzag maps a consecutive sequence of integers to a zig-zag sequence. +// [0 1 2 3 4 5 ...] => [0 -1 +1 -2 +2 ...] +func zigzag(x int) int { + if x&1 != 0 { + x = ^x + } + return x >> 1 +} diff --git a/cmp/internal/diff/diff_test.go b/cmp/internal/diff/diff_test.go new file mode 100644 index 0000000..5996ea2 --- /dev/null +++ b/cmp/internal/diff/diff_test.go @@ -0,0 +1,467 @@ +// Copyright 2017, The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE.md file. + +package diff + +import ( + "fmt" + "math/rand" + "strings" + "testing" + "unicode" +) + +func TestDifference(t *testing.T) { + tests := []struct { + // Before passing x and y to Difference, we strip all spaces so that + // they can be used by the test author to indicate a missing symbol + // in one of the lists. + x, y string + want string + }{{ + x: "", + y: "", + want: "", + }, { + x: "#", + y: "#", + want: ".", + }, { + x: "##", + y: "# ", + want: ".X", + }, { + x: "a#", + y: "A ", + want: "MX", + }, { + x: "#a", + y: " A", + want: "XM", + }, { + x: "# ", + y: "##", + want: ".Y", + }, { + x: " #", + y: "@#", + want: "Y.", + }, { + x: "@#", + y: " #", + want: "X.", + }, { + x: "##########0123456789", + y: " 0123456789", + want: "XXXXXXXXXX..........", + }, { + x: " 0123456789", + y: "##########0123456789", + want: "YYYYYYYYYY..........", + }, { + x: "#####0123456789#####", + y: " 0123456789 ", + want: "XXXXX..........XXXXX", + }, { + x: " 0123456789 ", + y: "#####0123456789#####", + want: "YYYYY..........YYYYY", + }, { + x: "01234##########56789", + y: "01234 56789", + want: ".....XXXXXXXXXX.....", + }, { + x: "01234 56789", + y: "01234##########56789", + want: ".....YYYYYYYYYY.....", + }, { + x: "0123456789##########", + y: "0123456789 ", + want: "..........XXXXXXXXXX", + }, { + x: "0123456789 ", + y: "0123456789##########", + want: "..........YYYYYYYYYY", + }, { + x: "abcdefghij0123456789", + y: "ABCDEFGHIJ0123456789", + want: "MMMMMMMMMM..........", + }, { + x: "ABCDEFGHIJ0123456789", + y: "abcdefghij0123456789", + want: "MMMMMMMMMM..........", + }, { + x: "01234abcdefghij56789", + y: "01234ABCDEFGHIJ56789", + want: ".....MMMMMMMMMM.....", + }, { + x: "01234ABCDEFGHIJ56789", + y: "01234abcdefghij56789", + want: ".....MMMMMMMMMM.....", + }, { + x: "0123456789abcdefghij", + y: "0123456789ABCDEFGHIJ", + want: "..........MMMMMMMMMM", + }, { + x: "0123456789ABCDEFGHIJ", + y: "0123456789abcdefghij", + want: "..........MMMMMMMMMM", + }, { + x: "ABCDEFGHIJ0123456789 ", + y: " 0123456789abcdefghij", + want: "XXXXXXXXXX..........YYYYYYYYYY", + }, { + x: " 0123456789abcdefghij", + y: "ABCDEFGHIJ0123456789 ", + want: "YYYYYYYYYY..........XXXXXXXXXX", + }, { + x: "ABCDE0123456789 FGHIJ", + y: " 0123456789abcdefghij", + want: "XXXXX..........YYYYYMMMMM", + }, { + x: " 0123456789abcdefghij", + y: "ABCDE0123456789 FGHIJ", + want: "YYYYY..........XXXXXMMMMM", + }, { + x: "ABCDE01234F G H I J 56789 ", + y: " 01234 a b c d e56789fghij", + want: "XXXXX.....XYXYXYXYXY.....YYYYY", + }, { + x: " 01234a b c d e 56789fghij", + y: "ABCDE01234 F G H I J56789 ", + want: "YYYYY.....XYXYXYXYXY.....XXXXX", + }, { + x: "FGHIJ01234ABCDE56789 ", + y: " 01234abcde56789fghij", + want: "XXXXX.....MMMMM.....YYYYY", + }, { + x: " 01234abcde56789fghij", + y: "FGHIJ01234ABCDE56789 ", + want: "YYYYY.....MMMMM.....XXXXX", + }, { + x: "ABCAB BA ", + y: " C BABAC", + want: "XX.X.Y..Y", + }, { + x: "# #### ###", + y: "#y####yy###", + want: ".Y....YY...", + }, { + x: "# #### # ##x#x", + y: "#y####y y## # ", + want: ".Y....YXY..X.X", + }, { + x: "###z#z###### x #", + y: "#y##Z#Z###### yy#", + want: ".Y..M.M......XYY.", + }, { + x: "0 12z3x 456789 x x 0", + y: "0y12Z3 y456789y y y0", + want: ".Y..M.XY......YXYXY.", + }, { + x: "0 2 4 6 8 ..................abXXcdEXF.ghXi", + y: " 1 3 5 7 9..................AB CDE F.GH I", + want: "XYXYXYXYXY..................MMXXMM.X..MMXM", + }, { + x: "I HG.F EDC BA..................9 7 5 3 1 ", + y: "iXhg.FXEdcXXba.................. 8 6 4 2 0", + want: "MYMM..Y.MMYYMM..................XYXYXYXYXY", + }, { + x: "x1234", + y: " 1234", + want: "X....", + }, { + x: "x123x4", + y: " 123 4", + want: "X...X.", + }, { + x: "x1234x56", + y: " 1234 ", + want: "X....XXX", + }, { + x: "x1234xxx56", + y: " 1234 56", + want: "X....XXX..", + }, { + x: ".1234...ab", + y: " 1234 AB", + want: "X....XXXMM", + }, { + x: "x1234xxab.", + y: " 1234 AB ", + want: "X....XXMMX", + }, { + x: " 0123456789", + y: "9012345678 ", + want: "Y.........X", + }, { + x: " 0123456789", + y: "8901234567 ", + want: "YY........XX", + }, { + x: " 0123456789", + y: "7890123456 ", + want: "YYY.......XXX", + }, { + x: " 0123456789", + y: "6789012345 ", + want: "YYYY......XXXX", + }, { + x: "0123456789 ", + y: " 5678901234", + want: "XXXXX.....YYYYY", + }, { + x: "0123456789 ", + y: " 4567890123", + want: "XXXX......YYYY", + }, { + x: "0123456789 ", + y: " 3456789012", + want: "XXX.......YYY", + }, { + x: "0123456789 ", + y: " 2345678901", + want: "XX........YY", + }, { + x: "0123456789 ", + y: " 1234567890", + want: "X.........Y", + }, { + x: "0123456789", + y: "9876543210", + }, { + x: "0123456789", + y: "6725819034", + }, { + x: "FBQMOIGTLN72X90E4SP651HKRJUDA83CVZW", + y: "5WHXO10R9IVKZLCTAJ8P3NSEQM472G6UBDF", + }} + + for _, tt := range tests { + tRun(t, "", func(t *testing.T) { + x := strings.Replace(tt.x, " ", "", -1) + y := strings.Replace(tt.y, " ", "", -1) + es := testStrings(t, x, y) + if got := es.String(); got != tt.want { + t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, tt.want) + } + }) + } +} + +func TestDifferenceFuzz(t *testing.T) { + tests := []struct{ px, py, pm float32 }{ + {px: 0.0, py: 0.0, pm: 0.1}, + {px: 0.0, py: 0.1, pm: 0.0}, + {px: 0.1, py: 0.0, pm: 0.0}, + {px: 0.0, py: 0.1, pm: 0.1}, + {px: 0.1, py: 0.0, pm: 0.1}, + {px: 0.2, py: 0.2, pm: 0.2}, + {px: 0.3, py: 0.1, pm: 0.2}, + {px: 0.1, py: 0.3, pm: 0.2}, + {px: 0.2, py: 0.2, pm: 0.2}, + {px: 0.3, py: 0.3, pm: 0.3}, + {px: 0.1, py: 0.1, pm: 0.5}, + {px: 0.4, py: 0.1, pm: 0.5}, + {px: 0.3, py: 0.2, pm: 0.5}, + {px: 0.2, py: 0.3, pm: 0.5}, + {px: 0.1, py: 0.4, pm: 0.5}, + } + + for i, tt := range tests { + tRun(t, fmt.Sprintf("P%d", i), func(t *testing.T) { + // Sweep from 1B to 1KiB. + for n := 1; n <= 1024; n <<= 1 { + tRun(t, fmt.Sprintf("N%d", n), func(t *testing.T) { + for j := 0; j < 10; j++ { + x, y := generateStrings(n, tt.px, tt.py, tt.pm, int64(j)) + testStrings(t, x, y) + } + }) + } + }) + } +} + +func benchmarkDifference(b *testing.B, n int) { + // TODO: Use testing.B.Run when we drop Go1.6 support. + x, y := generateStrings(n, 0.05, 0.05, 0.10, 0) + b.ReportAllocs() + b.SetBytes(int64(len(x) + len(y))) + for i := 0; i < b.N; i++ { + Difference(len(x), len(y), func(ix, iy int) Result { + return compareByte(x[ix], y[iy]) + }) + } +} +func BenchmarkDifference1K(b *testing.B) { benchmarkDifference(b, 1<<10) } +func BenchmarkDifference4K(b *testing.B) { benchmarkDifference(b, 1<<12) } +func BenchmarkDifference16K(b *testing.B) { benchmarkDifference(b, 1<<14) } +func BenchmarkDifference64K(b *testing.B) { benchmarkDifference(b, 1<<16) } +func BenchmarkDifference256K(b *testing.B) { benchmarkDifference(b, 1<<18) } +func BenchmarkDifference1M(b *testing.B) { benchmarkDifference(b, 1<<20) } + +func generateStrings(n int, px, py, pm float32, seed int64) (string, string) { + if px+py+pm > 1.0 { + panic("invalid probabilities") + } + py += px + pm += py + + b := make([]byte, n) + r := rand.New(rand.NewSource(seed)) + r.Read(b) + + var x, y []byte + for len(b) > 0 { + switch p := r.Float32(); { + case p < px: // UniqueX + x = append(x, b[0]) + case p < py: // UniqueY + y = append(y, b[0]) + case p < pm: // Modified + x = append(x, 'A'+(b[0]%26)) + y = append(y, 'a'+(b[0]%26)) + default: // Identity + x = append(x, b[0]) + y = append(y, b[0]) + } + b = b[1:] + } + return string(x), string(y) +} + +func testStrings(t *testing.T, x, y string) EditScript { + wantEq := x == y + eq, es := Difference(len(x), len(y), func(ix, iy int) Result { + return compareByte(x[ix], y[iy]) + }) + if eq != wantEq { + t.Errorf("equality mismatch: got %v, want %v", eq, wantEq) + } + if es != nil { + if es.LenX() != len(x) { + t.Errorf("es.LenX = %d, want %d", es.LenX(), len(x)) + } + if es.LenY() != len(y) { + t.Errorf("es.LenY = %d, want %d", es.LenY(), len(y)) + } + if got := (es.Dist() == 0); got != wantEq { + t.Errorf("violation of equality invariant: got %v, want %v", got, wantEq) + } + if !validateScript(x, y, es) { + t.Errorf("invalid edit script: %v", es) + } + } + return es +} + +func validateScript(x, y string, es EditScript) bool { + var bx, by []byte + for _, e := range es { + switch e { + case Identity: + if !compareByte(x[len(bx)], y[len(by)]).Equal() { + return false + } + bx = append(bx, x[len(bx)]) + by = append(by, y[len(by)]) + case UniqueX: + bx = append(bx, x[len(bx)]) + case UniqueY: + by = append(by, y[len(by)]) + case Modified: + if !compareByte(x[len(bx)], y[len(by)]).Similar() { + return false + } + bx = append(bx, x[len(bx)]) + by = append(by, y[len(by)]) + } + } + return string(bx) == x && string(by) == y +} + +// compareByte returns a Result where the result is Equal if x == y, +// similar if x and y differ only in casing, and different otherwise. +func compareByte(x, y byte) (r Result) { + switch { + case x == y: + return equalResult // Identity + case unicode.ToUpper(rune(x)) == unicode.ToUpper(rune(y)): + return similarResult // Modified + default: + return differentResult // UniqueX or UniqueY + } +} + +var ( + equalResult = Result{NDiff: 0} + similarResult = Result{NDiff: 1} + differentResult = Result{NDiff: 2} +) + +func TestResult(t *testing.T) { + tests := []struct { + result Result + wantEqual bool + wantSimilar bool + }{ + // equalResult is equal since NDiff == 0, by definition of Equal method. + {equalResult, true, true}, + // similarResult is similar since it is a binary result where only one + // element was compared (i.e., Either NSame==1 or NDiff==1). + {similarResult, false, true}, + // differentResult is different since there are enough differences that + // it isn't even considered similar. + {differentResult, false, false}, + + // Zero value is always equal. + {Result{NSame: 0, NDiff: 0}, true, true}, + + // Binary comparisons (where NSame+NDiff == 1) are always similar. + {Result{NSame: 1, NDiff: 0}, true, true}, + {Result{NSame: 0, NDiff: 1}, false, true}, + + // More complex ratios. The exact ratio for similarity may change, + // and may require updates to these test cases. + {Result{NSame: 1, NDiff: 1}, false, true}, + {Result{NSame: 1, NDiff: 2}, false, true}, + {Result{NSame: 1, NDiff: 3}, false, false}, + {Result{NSame: 2, NDiff: 1}, false, true}, + {Result{NSame: 2, NDiff: 2}, false, true}, + {Result{NSame: 2, NDiff: 3}, false, true}, + {Result{NSame: 3, NDiff: 1}, false, true}, + {Result{NSame: 3, NDiff: 2}, false, true}, + {Result{NSame: 3, NDiff: 3}, false, true}, + {Result{NSame: 1000, NDiff: 0}, true, true}, + {Result{NSame: 1000, NDiff: 1}, false, true}, + {Result{NSame: 1000, NDiff: 2}, false, true}, + {Result{NSame: 0, NDiff: 1000}, false, false}, + {Result{NSame: 1, NDiff: 1000}, false, false}, + {Result{NSame: 2, NDiff: 1000}, false, false}, + } + + for _, tt := range tests { + if got := tt.result.Equal(); got != tt.wantEqual { + t.Errorf("%#v.Equal() = %v, want %v", tt.result, got, tt.wantEqual) + } + if got := tt.result.Similar(); got != tt.wantSimilar { + t.Errorf("%#v.Similar() = %v, want %v", tt.result, got, tt.wantSimilar) + } + } +} + +// TODO: Delete this hack when we drop Go1.6 support. +func tRun(t *testing.T, name string, f func(t *testing.T)) { + type runner interface { + Run(string, func(t *testing.T)) bool + } + var ti interface{} = t + if r, ok := ti.(runner); ok { + r.Run(name, f) + } else { + t.Logf("Test: %s", name) + f(t) + } +} diff --git a/cmp/path.go b/cmp/path.go index 5a88dbd..0c2eb33 100644 --- a/cmp/path.go +++ b/cmp/path.go @@ -36,7 +36,19 @@ type ( // SliceIndex is an index operation on a slice or array at some index Key. SliceIndex interface { PathStep - Key() int + Key() int // May return -1 if in a split state + + // SplitKeys returns the indexes for indexing into slices in the + // x and y values, respectively. These indexes may differ due to the + // insertion or removal of an element in one of the slices, causing + // all of the indexes to be shifted. If an index is -1, then that + // indicates that the element does not exist in the associated slice. + // + // Key is guaranteed to return -1 if and only if the indexes returned + // by SplitKeys are not the same. SplitKeys will never return -1 for + // both indexes. + SplitKeys() (x int, y int) + isSliceIndex() } // MapIndex is an index operation on a map at some index Key. @@ -163,7 +175,7 @@ type ( sliceIndex struct { pathStep - key int + xkey, ykey int } mapIndex struct { pathStep @@ -205,19 +217,39 @@ func (ps pathStep) String() string { return fmt.Sprintf("{%s}", s) } -func (si sliceIndex) String() string { return fmt.Sprintf("[%d]", si.key) } +func (si sliceIndex) String() string { + switch { + case si.xkey == si.ykey: + return fmt.Sprintf("[%d]", si.xkey) + case si.ykey == -1: + // [5->?] means "I don't know where X[5] went" + return fmt.Sprintf("[%d->?]", si.xkey) + case si.xkey == -1: + // [?->3] means "I don't know where Y[3] came from" + return fmt.Sprintf("[?->%d]", si.ykey) + default: + // [5->3] means "X[5] moved to Y[3]" + return fmt.Sprintf("[%d->%d]", si.xkey, si.ykey) + } +} func (mi mapIndex) String() string { return fmt.Sprintf("[%#v]", mi.key) } func (ta typeAssertion) String() string { return fmt.Sprintf(".(%v)", ta.typ) } func (sf structField) String() string { return fmt.Sprintf(".%s", sf.name) } func (in indirect) String() string { return "*" } func (tf transform) String() string { return fmt.Sprintf("%s()", tf.trans.name) } -func (si sliceIndex) Key() int { return si.key } -func (mi mapIndex) Key() reflect.Value { return mi.key } -func (sf structField) Name() string { return sf.name } -func (sf structField) Index() int { return sf.idx } -func (tf transform) Name() string { return tf.trans.name } -func (tf transform) Func() reflect.Value { return tf.trans.fnc } +func (si sliceIndex) Key() int { + if si.xkey != si.ykey { + return -1 + } + return si.xkey +} +func (si sliceIndex) SplitKeys() (x, y int) { return si.xkey, si.ykey } +func (mi mapIndex) Key() reflect.Value { return mi.key } +func (sf structField) Name() string { return sf.name } +func (sf structField) Index() int { return sf.idx } +func (tf transform) Name() string { return tf.trans.name } +func (tf transform) Func() reflect.Value { return tf.trans.fnc } func (pathStep) isPathStep() {} func (sliceIndex) isSliceIndex() {} diff --git a/cmp/reporter.go b/cmp/reporter.go index 1b812f5..3f3454b 100644 --- a/cmp/reporter.go +++ b/cmp/reporter.go @@ -26,31 +26,6 @@ type defaultReporter struct { var _ reporter = (*defaultReporter)(nil) func (r *defaultReporter) Report(x, y reflect.Value, eq bool, p Path) { - // TODO: Is there a way to nicely print added/modified/removed elements - // from a slice? This will most certainly require support from the - // equality logic, but what would be the right API for this? - // - // The current API is equivalent to a Hamming distance for measuring the - // difference between two sequences of symbols. That is, the only operation - // we can represent is substitution. The new API would need to handle a - // Levenshtein distance, such that insertions, deletions, and substitutions - // are permitted. Furthermore, this will require an algorithm for computing - // the edit distance. Unfortunately, the time complexity for a minimal - // edit distance algorithm is not much better than O(n^2). - // There are approximations for the algorithm that can run much faster. - // See literature on computing Levenshtein distance. - // - // Passing in a pair of x and y is actually good for representing insertion - // and deletion by the fact that x or y may be an invalid value. However, - // we may need to pass in two paths px and py, to indicate the paths - // relative to x and y. Alternative, since we only perform the Levenshtein - // distance on slices, maybe we alter the SliceIndex type to record - // two different indexes. - - // TODO: Perhaps we should coalesce differences on primitive kinds - // together if the number of differences exceeds some ratio. - // For example, comparing two SHA256s leads to many byte differences. - if eq { // TODO: Maybe print some equal results for context? return // Ignore equal results @@ -62,7 +37,7 @@ func (r *defaultReporter) Report(x, y reflect.Value, eq bool, p Path) { sx := prettyPrint(x, true) sy := prettyPrint(y, true) if sx == sy { - // Use of Stringer is not helpful, so rely on more exact formatting. + // Stringer is not helpful, so rely on more exact formatting. sx = prettyPrint(x, false) sy = prettyPrint(y, false) } |