aboutsummaryrefslogtreecommitdiff
path: root/cmp/internal
diff options
context:
space:
mode:
authorJoe Tsai <joetsai@digital-static.net>2020-11-24 09:53:22 -0800
committerGitHub <noreply@github.com>2020-11-24 09:53:22 -0800
commit449e17c6c9daf9b0c84a35fef7d79321b9535763 (patch)
tree11323fda8d7dab06915dc2c8c0206dafa28a4cf6 /cmp/internal
parentade6b74536ea3af0d70b4ebd51c08c5d31313078 (diff)
downloadgo-cmp-449e17c6c9daf9b0c84a35fef7d79321b9535763.tar.gz
Fix non-determinism in diffing algorithm (#247)
A previous attempt to add non-determinism to the diffing algorithm unfortunately broke the algorithm for half the cases. This change modifies the algorithm to truly switch between starting with a forward search versus a reverse search. The main for-loop of Difference would switch repeatedly between performing a forward search, then a reverse search, and vice-versa. Since we can't jump into the middle of a for-loop to start with the reverse search first, we use a series of labels and goto statements to accomplish the same effect. Fixes #238
Diffstat (limited to 'cmp/internal')
-rw-r--r--cmp/internal/diff/diff.go48
-rw-r--r--cmp/internal/diff/diff_test.go27
2 files changed, 40 insertions, 35 deletions
diff --git a/cmp/internal/diff/diff.go b/cmp/internal/diff/diff.go
index 441953e..bc196b1 100644
--- a/cmp/internal/diff/diff.go
+++ b/cmp/internal/diff/diff.go
@@ -119,7 +119,7 @@ func (r Result) Similar() bool {
return r.NumSame+1 >= r.NumDiff
}
-var randInt = rand.New(rand.NewSource(time.Now().Unix())).Intn(2)
+var randBool = rand.New(rand.NewSource(time.Now().Unix())).Intn(2) == 0
// Difference reports whether two lists of lengths nx and ny are equal
// given the definition of equality provided as f.
@@ -168,17 +168,6 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// A vertical edge is equivalent to inserting a symbol from list Y.
// A diagonal edge is equivalent to a matching symbol between both X and Y.
- // To ensure flexibility in changing the algorithm in the future,
- // introduce some degree of deliberate instability.
- // This is achieved by fiddling the zigzag iterator to start searching
- // the graph starting from the bottom-right versus than the top-left.
- // The result may differ depending on the starting search location,
- // but still produces a valid edit script.
- zigzagInit := randInt // either 0 or 1
- if flags.Deterministic {
- zigzagInit = 0
- }
-
// Invariants:
// • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx
// • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny
@@ -197,6 +186,11 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// approximately the square-root of the search budget.
searchBudget := 4 * (nx + ny) // O(n)
+ // Running the tests with the "cmp_debug" build tag prints a visualization
+ // of the algorithm running in real-time. This is educational for
+ // understanding how the algorithm works. See debug_enable.go.
+ f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)
+
// The algorithm below is a greedy, meet-in-the-middle algorithm for
// computing sub-optimal edit-scripts between two lists.
//
@@ -214,22 +208,28 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// frontier towards the opposite corner.
// • This algorithm terminates when either the X coordinates or the
// Y coordinates of the forward and reverse frontier points ever intersect.
- //
+
// This algorithm is correct even if searching only in the forward direction
// or in the reverse direction. We do both because it is commonly observed
// that two lists commonly differ because elements were added to the front
// or end of the other list.
//
- // Running the tests with the "cmp_debug" build tag prints a visualization
- // of the algorithm running in real-time. This is educational for
- // understanding how the algorithm works. See debug_enable.go.
- f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)
- for {
+ // Non-deterministically start with either the forward or reverse direction
+ // to introduce some deliberate instability so that we have the flexibility
+ // to change this algorithm in the future.
+ if flags.Deterministic || randBool {
+ goto forwardSearch
+ } else {
+ goto reverseSearch
+ }
+
+forwardSearch:
+ {
// Forward search from the beginning.
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
- break
+ goto finishSearch
}
- for stop1, stop2, i := false, false, zigzagInit; !(stop1 && stop2) && searchBudget > 0; i++ {
+ for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
z := zigzag(i)
p := point{fwdFrontier.X + z, fwdFrontier.Y - z}
@@ -262,10 +262,14 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
} else {
fwdFrontier.Y++
}
+ goto reverseSearch
+ }
+reverseSearch:
+ {
// Reverse search from the end.
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
- break
+ goto finishSearch
}
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
@@ -300,8 +304,10 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
} else {
revFrontier.Y--
}
+ goto forwardSearch
}
+finishSearch:
// Join the forward and reverse paths and then append the reverse path.
fwdPath.connect(revPath.point, f)
for i := len(revPath.es) - 1; i >= 0; i-- {
diff --git a/cmp/internal/diff/diff_test.go b/cmp/internal/diff/diff_test.go
index d97fef8..eacf072 100644
--- a/cmp/internal/diff/diff_test.go
+++ b/cmp/internal/diff/diff_test.go
@@ -10,21 +10,15 @@ import (
"strings"
"testing"
"unicode"
-
- "github.com/google/go-cmp/cmp/internal/flags"
)
-func init() {
- flags.Deterministic = true
-}
-
func TestDifference(t *testing.T) {
tests := []struct {
// Before passing x and y to Difference, we strip all spaces so that
// they can be used by the test author to indicate a missing symbol
// in one of the lists.
x, y string
- want string
+ want string // '|' separated list of possible outputs
}{{
x: "",
y: "",
@@ -36,7 +30,7 @@ func TestDifference(t *testing.T) {
}, {
x: "##",
y: "# ",
- want: ".X",
+ want: ".X|X.",
}, {
x: "a#",
y: "A ",
@@ -48,7 +42,7 @@ func TestDifference(t *testing.T) {
}, {
x: "# ",
y: "##",
- want: ".Y",
+ want: ".Y|Y.",
}, {
x: " #",
y: "@#",
@@ -148,7 +142,7 @@ func TestDifference(t *testing.T) {
}, {
x: "ABCAB BA ",
y: " C BABAC",
- want: "XX.X.Y..Y",
+ want: "XX.X.Y..Y|XX.Y.X..Y",
}, {
x: "# #### ###",
y: "#y####yy###",
@@ -164,7 +158,7 @@ func TestDifference(t *testing.T) {
}, {
x: "0 12z3x 456789 x x 0",
y: "0y12Z3 y456789y y y0",
- want: ".Y..M.XY......YXYXY.",
+ want: ".Y..M.XY......YXYXY.|.Y..M.XY......XYXYY.",
}, {
x: "0 2 4 6 8 ..................abXXcdEXF.ghXi",
y: " 1 3 5 7 9..................AB CDE F.GH I",
@@ -216,7 +210,7 @@ func TestDifference(t *testing.T) {
}, {
x: "0123456789 ",
y: " 5678901234",
- want: "XXXXX.....YYYYY",
+ want: "XXXXX.....YYYYY|YYYYY.....XXXXX",
}, {
x: "0123456789 ",
y: " 4567890123",
@@ -252,9 +246,14 @@ func TestDifference(t *testing.T) {
x := strings.Replace(tt.x, " ", "", -1)
y := strings.Replace(tt.y, " ", "", -1)
es := testStrings(t, x, y)
- if got := es.String(); got != tt.want {
- t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, tt.want)
+ var want string
+ got := es.String()
+ for _, want = range strings.Split(tt.want, "|") {
+ if got == want {
+ return
+ }
}
+ t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, want)
})
}
}