aboutsummaryrefslogtreecommitdiff
path: root/internal/diff
diff options
context:
space:
mode:
Diffstat (limited to 'internal/diff')
-rw-r--r--internal/diff/diff.go169
-rw-r--r--internal/diff/diff_test.go199
-rw-r--r--internal/diff/difftest/difftest.go289
-rw-r--r--internal/diff/difftest/difftest_test.go83
-rw-r--r--internal/diff/export_test.go9
-rw-r--r--internal/diff/lcs/common.go179
-rw-r--r--internal/diff/lcs/common_test.go140
-rw-r--r--internal/diff/lcs/doc.go156
-rw-r--r--internal/diff/lcs/git.sh33
-rw-r--r--internal/diff/lcs/labels.go55
-rw-r--r--internal/diff/lcs/old.go480
-rw-r--r--internal/diff/lcs/old_test.go251
-rw-r--r--internal/diff/lcs/sequence.go113
-rw-r--r--internal/diff/myers/diff.go215
-rw-r--r--internal/diff/myers/diff_test.go16
-rw-r--r--internal/diff/ndiff.go109
-rw-r--r--internal/diff/unified.go248
17 files changed, 2744 insertions, 0 deletions
diff --git a/internal/diff/diff.go b/internal/diff/diff.go
new file mode 100644
index 000000000..2bc63c2a8
--- /dev/null
+++ b/internal/diff/diff.go
@@ -0,0 +1,169 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package diff computes differences between text files or strings.
+package diff
+
+import (
+ "fmt"
+ "sort"
+ "strings"
+)
+
+// An Edit describes the replacement of a portion of a text file.
+type Edit struct {
+ Start, End int // byte offsets of the region to replace
+ New string // the replacement
+}
+
+func (e Edit) String() string {
+ return fmt.Sprintf("{Start:%d,End:%d,New:%s}", e.Start, e.End, e.New)
+}
+
+// Apply applies a sequence of edits to the src buffer and returns the
+// result. Edits are applied in order of start offset; edits with the
+// same start offset are applied in they order they were provided.
+//
+// Apply returns an error if any edit is out of bounds,
+// or if any pair of edits is overlapping.
+func Apply(src string, edits []Edit) (string, error) {
+ edits, size, err := validate(src, edits)
+ if err != nil {
+ return "", err
+ }
+
+ // Apply edits.
+ out := make([]byte, 0, size)
+ lastEnd := 0
+ for _, edit := range edits {
+ if lastEnd < edit.Start {
+ out = append(out, src[lastEnd:edit.Start]...)
+ }
+ out = append(out, edit.New...)
+ lastEnd = edit.End
+ }
+ out = append(out, src[lastEnd:]...)
+
+ if len(out) != size {
+ panic("wrong size")
+ }
+
+ return string(out), nil
+}
+
+// ApplyBytes is like Apply, but it accepts a byte slice.
+// The result is always a new array.
+func ApplyBytes(src []byte, edits []Edit) ([]byte, error) {
+ res, err := Apply(string(src), edits)
+ return []byte(res), err
+}
+
+// validate checks that edits are consistent with src,
+// and returns the size of the patched output.
+// It may return a different slice.
+func validate(src string, edits []Edit) ([]Edit, int, error) {
+ if !sort.IsSorted(editsSort(edits)) {
+ edits = append([]Edit(nil), edits...)
+ SortEdits(edits)
+ }
+
+ // Check validity of edits and compute final size.
+ size := len(src)
+ lastEnd := 0
+ for _, edit := range edits {
+ if !(0 <= edit.Start && edit.Start <= edit.End && edit.End <= len(src)) {
+ return nil, 0, fmt.Errorf("diff has out-of-bounds edits")
+ }
+ if edit.Start < lastEnd {
+ return nil, 0, fmt.Errorf("diff has overlapping edits")
+ }
+ size += len(edit.New) + edit.Start - edit.End
+ lastEnd = edit.End
+ }
+
+ return edits, size, nil
+}
+
+// SortEdits orders a slice of Edits by (start, end) offset.
+// This ordering puts insertions (end = start) before deletions
+// (end > start) at the same point, but uses a stable sort to preserve
+// the order of multiple insertions at the same point.
+// (Apply detects multiple deletions at the same point as an error.)
+func SortEdits(edits []Edit) {
+ sort.Stable(editsSort(edits))
+}
+
+type editsSort []Edit
+
+func (a editsSort) Len() int { return len(a) }
+func (a editsSort) Less(i, j int) bool {
+ if cmp := a[i].Start - a[j].Start; cmp != 0 {
+ return cmp < 0
+ }
+ return a[i].End < a[j].End
+}
+func (a editsSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+
+// lineEdits expands and merges a sequence of edits so that each
+// resulting edit replaces one or more complete lines.
+// See ApplyEdits for preconditions.
+func lineEdits(src string, edits []Edit) ([]Edit, error) {
+ edits, _, err := validate(src, edits)
+ if err != nil {
+ return nil, err
+ }
+
+ // Do all edits begin and end at the start of a line?
+ // TODO(adonovan): opt: is this fast path necessary?
+ // (Also, it complicates the result ownership.)
+ for _, edit := range edits {
+ if edit.Start >= len(src) || // insertion at EOF
+ edit.Start > 0 && src[edit.Start-1] != '\n' || // not at line start
+ edit.End > 0 && src[edit.End-1] != '\n' { // not at line start
+ goto expand
+ }
+ }
+ return edits, nil // aligned
+
+expand:
+ expanded := make([]Edit, 0, len(edits)) // a guess
+ prev := edits[0]
+ // TODO(adonovan): opt: start from the first misaligned edit.
+ // TODO(adonovan): opt: avoid quadratic cost of string += string.
+ for _, edit := range edits[1:] {
+ between := src[prev.End:edit.Start]
+ if !strings.Contains(between, "\n") {
+ // overlapping lines: combine with previous edit.
+ prev.New += between + edit.New
+ prev.End = edit.End
+ } else {
+ // non-overlapping lines: flush previous edit.
+ expanded = append(expanded, expandEdit(prev, src))
+ prev = edit
+ }
+ }
+ return append(expanded, expandEdit(prev, src)), nil // flush final edit
+}
+
+// expandEdit returns edit expanded to complete whole lines.
+func expandEdit(edit Edit, src string) Edit {
+ // Expand start left to start of line.
+ // (delta is the zero-based column number of of start.)
+ start := edit.Start
+ if delta := start - 1 - strings.LastIndex(src[:start], "\n"); delta > 0 {
+ edit.Start -= delta
+ edit.New = src[start-delta:start] + edit.New
+ }
+
+ // Expand end right to end of line.
+ end := edit.End
+ if nl := strings.IndexByte(src[end:], '\n'); nl < 0 {
+ edit.End = len(src) // extend to EOF
+ } else {
+ edit.End = end + nl + 1 // extend beyond \n
+ }
+ edit.New += src[end:edit.End]
+
+ return edit
+}
diff --git a/internal/diff/diff_test.go b/internal/diff/diff_test.go
new file mode 100644
index 000000000..b6881c1f2
--- /dev/null
+++ b/internal/diff/diff_test.go
@@ -0,0 +1,199 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package diff_test
+
+import (
+ "bytes"
+ "math/rand"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "reflect"
+ "strings"
+ "testing"
+ "unicode/utf8"
+
+ "golang.org/x/tools/internal/diff"
+ "golang.org/x/tools/internal/diff/difftest"
+ "golang.org/x/tools/internal/testenv"
+)
+
+func TestApply(t *testing.T) {
+ for _, tc := range difftest.TestCases {
+ t.Run(tc.Name, func(t *testing.T) {
+ got, err := diff.Apply(tc.In, tc.Edits)
+ if err != nil {
+ t.Fatalf("Apply(Edits) failed: %v", err)
+ }
+ if got != tc.Out {
+ t.Errorf("Apply(Edits): got %q, want %q", got, tc.Out)
+ }
+ if tc.LineEdits != nil {
+ got, err := diff.Apply(tc.In, tc.LineEdits)
+ if err != nil {
+ t.Fatalf("Apply(LineEdits) failed: %v", err)
+ }
+ if got != tc.Out {
+ t.Errorf("Apply(LineEdits): got %q, want %q", got, tc.Out)
+ }
+ }
+ })
+ }
+}
+
+func TestNEdits(t *testing.T) {
+ for _, tc := range difftest.TestCases {
+ edits := diff.Strings(tc.In, tc.Out)
+ got, err := diff.Apply(tc.In, edits)
+ if err != nil {
+ t.Fatalf("Apply failed: %v", err)
+ }
+ if got != tc.Out {
+ t.Fatalf("%s: got %q wanted %q", tc.Name, got, tc.Out)
+ }
+ if len(edits) < len(tc.Edits) { // should find subline edits
+ t.Errorf("got %v, expected %v for %#v", edits, tc.Edits, tc)
+ }
+ }
+}
+
+func TestNRandom(t *testing.T) {
+ rand.Seed(1)
+ for i := 0; i < 1000; i++ {
+ a := randstr("abω", 16)
+ b := randstr("abωc", 16)
+ edits := diff.Strings(a, b)
+ got, err := diff.Apply(a, edits)
+ if err != nil {
+ t.Fatalf("Apply failed: %v", err)
+ }
+ if got != b {
+ t.Fatalf("%d: got %q, wanted %q, starting with %q", i, got, b, a)
+ }
+ }
+}
+
+// $ go test -fuzz=FuzzRoundTrip ./internal/diff
+func FuzzRoundTrip(f *testing.F) {
+ f.Fuzz(func(t *testing.T, a, b string) {
+ if !utf8.ValidString(a) || !utf8.ValidString(b) {
+ return // inputs must be text
+ }
+ edits := diff.Strings(a, b)
+ got, err := diff.Apply(a, edits)
+ if err != nil {
+ t.Fatalf("Apply failed: %v", err)
+ }
+ if got != b {
+ t.Fatalf("applying diff(%q, %q) gives %q; edits=%v", a, b, got, edits)
+ }
+ })
+}
+
+func TestLineEdits(t *testing.T) {
+ for _, tc := range difftest.TestCases {
+ t.Run(tc.Name, func(t *testing.T) {
+ // if line edits not specified, it is the same as edits
+ edits := tc.LineEdits
+ if edits == nil {
+ edits = tc.Edits
+ }
+ got, err := diff.LineEdits(tc.In, tc.Edits)
+ if err != nil {
+ t.Fatalf("LineEdits: %v", err)
+ }
+ if !reflect.DeepEqual(got, edits) {
+ t.Errorf("LineEdits got\n%q, want\n%q\n%#v", got, edits, tc)
+ }
+ })
+ }
+}
+
+func TestToUnified(t *testing.T) {
+ testenv.NeedsTool(t, "patch")
+ for _, tc := range difftest.TestCases {
+ t.Run(tc.Name, func(t *testing.T) {
+ unified, err := diff.ToUnified(difftest.FileA, difftest.FileB, tc.In, tc.Edits)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if unified == "" {
+ return
+ }
+ orig := filepath.Join(t.TempDir(), "original")
+ err = os.WriteFile(orig, []byte(tc.In), 0644)
+ if err != nil {
+ t.Fatal(err)
+ }
+ temp := filepath.Join(t.TempDir(), "patched")
+ err = os.WriteFile(temp, []byte(tc.In), 0644)
+ if err != nil {
+ t.Fatal(err)
+ }
+ cmd := exec.Command("patch", "-p0", "-u", "-s", "-o", temp, orig)
+ cmd.Stdin = strings.NewReader(unified)
+ cmd.Stdout = new(bytes.Buffer)
+ cmd.Stderr = new(bytes.Buffer)
+ if err = cmd.Run(); err != nil {
+ t.Fatalf("%v: %q (%q) (%q)", err, cmd.String(),
+ cmd.Stderr, cmd.Stdout)
+ }
+ got, err := os.ReadFile(temp)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if string(got) != tc.Out {
+ t.Errorf("applying unified failed: got\n%q, wanted\n%q unified\n%q",
+ got, tc.Out, unified)
+ }
+
+ })
+ }
+}
+
+func TestRegressionOld001(t *testing.T) {
+ a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
+
+ b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
+ diffs := diff.Strings(a, b)
+ got, err := diff.Apply(a, diffs)
+ if err != nil {
+ t.Fatalf("Apply failed: %v", err)
+ }
+ if got != b {
+ i := 0
+ for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ {
+ }
+ t.Errorf("oops %vd\n%q\n%q", diffs, got, b)
+ t.Errorf("\n%q\n%q", got[i:], b[i:])
+ }
+}
+
+func TestRegressionOld002(t *testing.T) {
+ a := "n\"\n)\n"
+ b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n"
+ diffs := diff.Strings(a, b)
+ got, err := diff.Apply(a, diffs)
+ if err != nil {
+ t.Fatalf("Apply failed: %v", err)
+ }
+ if got != b {
+ i := 0
+ for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ {
+ }
+ t.Errorf("oops %vd\n%q\n%q", diffs, got, b)
+ t.Errorf("\n%q\n%q", got[i:], b[i:])
+ }
+}
+
+// return a random string of length n made of characters from s
+func randstr(s string, n int) string {
+ src := []rune(s)
+ x := make([]rune, n)
+ for i := 0; i < n; i++ {
+ x[i] = src[rand.Intn(len(src))]
+ }
+ return string(x)
+}
diff --git a/internal/diff/difftest/difftest.go b/internal/diff/difftest/difftest.go
new file mode 100644
index 000000000..4a251111b
--- /dev/null
+++ b/internal/diff/difftest/difftest.go
@@ -0,0 +1,289 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package difftest supplies a set of tests that will operate on any
+// implementation of a diff algorithm as exposed by
+// "golang.org/x/tools/internal/diff"
+package difftest
+
+// There are two kinds of tests, semantic tests, and 'golden data' tests.
+// The semantic tests check that the computed diffs transform the input to
+// the output, and that 'patch' accepts the computed unified diffs.
+// The other tests just check that Edits and LineEdits haven't changed
+// unexpectedly. These fields may need to be changed when the diff algorithm
+// changes.
+
+import (
+ "testing"
+
+ "golang.org/x/tools/internal/diff"
+)
+
+const (
+ FileA = "from"
+ FileB = "to"
+ UnifiedPrefix = "--- " + FileA + "\n+++ " + FileB + "\n"
+)
+
+var TestCases = []struct {
+ Name, In, Out, Unified string
+ Edits, LineEdits []diff.Edit
+ NoDiff bool
+}{{
+ Name: "empty",
+ In: "",
+ Out: "",
+}, {
+ Name: "no_diff",
+ In: "gargantuan\n",
+ Out: "gargantuan\n",
+}, {
+ Name: "replace_all",
+ In: "fruit\n",
+ Out: "cheese\n",
+ Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-fruit
++cheese
+`[1:],
+ Edits: []diff.Edit{{Start: 0, End: 5, New: "cheese"}},
+ LineEdits: []diff.Edit{{Start: 0, End: 6, New: "cheese\n"}},
+}, {
+ Name: "insert_rune",
+ In: "gord\n",
+ Out: "gourd\n",
+ Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-gord
++gourd
+`[1:],
+ Edits: []diff.Edit{{Start: 2, End: 2, New: "u"}},
+ LineEdits: []diff.Edit{{Start: 0, End: 5, New: "gourd\n"}},
+}, {
+ Name: "delete_rune",
+ In: "groat\n",
+ Out: "goat\n",
+ Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-groat
++goat
+`[1:],
+ Edits: []diff.Edit{{Start: 1, End: 2, New: ""}},
+ LineEdits: []diff.Edit{{Start: 0, End: 6, New: "goat\n"}},
+}, {
+ Name: "replace_rune",
+ In: "loud\n",
+ Out: "lord\n",
+ Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-loud
++lord
+`[1:],
+ Edits: []diff.Edit{{Start: 2, End: 3, New: "r"}},
+ LineEdits: []diff.Edit{{Start: 0, End: 5, New: "lord\n"}},
+}, {
+ Name: "replace_partials",
+ In: "blanket\n",
+ Out: "bunker\n",
+ Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-blanket
++bunker
+`[1:],
+ Edits: []diff.Edit{
+ {Start: 1, End: 3, New: "u"},
+ {Start: 6, End: 7, New: "r"},
+ },
+ LineEdits: []diff.Edit{{Start: 0, End: 8, New: "bunker\n"}},
+}, {
+ Name: "insert_line",
+ In: "1: one\n3: three\n",
+ Out: "1: one\n2: two\n3: three\n",
+ Unified: UnifiedPrefix + `
+@@ -1,2 +1,3 @@
+ 1: one
++2: two
+ 3: three
+`[1:],
+ Edits: []diff.Edit{{Start: 7, End: 7, New: "2: two\n"}},
+}, {
+ Name: "replace_no_newline",
+ In: "A",
+ Out: "B",
+ Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-A
+\ No newline at end of file
++B
+\ No newline at end of file
+`[1:],
+ Edits: []diff.Edit{{Start: 0, End: 1, New: "B"}},
+}, {
+ Name: "append_empty",
+ In: "", // GNU diff -u special case: -0,0
+ Out: "AB\nC",
+ Unified: UnifiedPrefix + `
+@@ -0,0 +1,2 @@
++AB
++C
+\ No newline at end of file
+`[1:],
+ Edits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
+ LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
+},
+ // TODO(adonovan): fix this test: GNU diff -u prints "+1,2", Unifies prints "+1,3".
+ // {
+ // Name: "add_start",
+ // In: "A",
+ // Out: "B\nCA",
+ // Unified: UnifiedPrefix + `
+ // @@ -1 +1,2 @@
+ // -A
+ // \ No newline at end of file
+ // +B
+ // +CA
+ // \ No newline at end of file
+ // `[1:],
+ // Edits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}},
+ // LineEdits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}},
+ // },
+ {
+ Name: "add_end",
+ In: "A",
+ Out: "AB",
+ Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-A
+\ No newline at end of file
++AB
+\ No newline at end of file
+`[1:],
+ Edits: []diff.Edit{{Start: 1, End: 1, New: "B"}},
+ LineEdits: []diff.Edit{{Start: 0, End: 1, New: "AB"}},
+ }, {
+ Name: "add_empty",
+ In: "",
+ Out: "AB\nC",
+ Unified: UnifiedPrefix + `
+@@ -0,0 +1,2 @@
++AB
++C
+\ No newline at end of file
+`[1:],
+ Edits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
+ LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
+ }, {
+ Name: "add_newline",
+ In: "A",
+ Out: "A\n",
+ Unified: UnifiedPrefix + `
+@@ -1 +1 @@
+-A
+\ No newline at end of file
++A
+`[1:],
+ Edits: []diff.Edit{{Start: 1, End: 1, New: "\n"}},
+ LineEdits: []diff.Edit{{Start: 0, End: 1, New: "A\n"}},
+ }, {
+ Name: "delete_front",
+ In: "A\nB\nC\nA\nB\nB\nA\n",
+ Out: "C\nB\nA\nB\nA\nC\n",
+ Unified: UnifiedPrefix + `
+@@ -1,7 +1,6 @@
+-A
+-B
+ C
++B
+ A
+ B
+-B
+ A
++C
+`[1:],
+ NoDiff: true, // unified diff is different but valid
+ Edits: []diff.Edit{
+ {Start: 0, End: 4, New: ""},
+ {Start: 6, End: 6, New: "B\n"},
+ {Start: 10, End: 12, New: ""},
+ {Start: 14, End: 14, New: "C\n"},
+ },
+ LineEdits: []diff.Edit{
+ {Start: 0, End: 6, New: "C\n"},
+ {Start: 6, End: 8, New: "B\nA\n"},
+ {Start: 10, End: 14, New: "A\n"},
+ {Start: 14, End: 14, New: "C\n"},
+ },
+ }, {
+ Name: "replace_last_line",
+ In: "A\nB\n",
+ Out: "A\nC\n\n",
+ Unified: UnifiedPrefix + `
+@@ -1,2 +1,3 @@
+ A
+-B
++C
++
+`[1:],
+ Edits: []diff.Edit{{Start: 2, End: 3, New: "C\n"}},
+ LineEdits: []diff.Edit{{Start: 2, End: 4, New: "C\n\n"}},
+ },
+ {
+ Name: "multiple_replace",
+ In: "A\nB\nC\nD\nE\nF\nG\n",
+ Out: "A\nH\nI\nJ\nE\nF\nK\n",
+ Unified: UnifiedPrefix + `
+@@ -1,7 +1,7 @@
+ A
+-B
+-C
+-D
++H
++I
++J
+ E
+ F
+-G
++K
+`[1:],
+ Edits: []diff.Edit{
+ {Start: 2, End: 8, New: "H\nI\nJ\n"},
+ {Start: 12, End: 14, New: "K\n"},
+ },
+ NoDiff: true, // diff algorithm produces different delete/insert pattern
+ },
+ {
+ Name: "extra_newline",
+ In: "\nA\n",
+ Out: "A\n",
+ Edits: []diff.Edit{{Start: 0, End: 1, New: ""}},
+ Unified: UnifiedPrefix + `@@ -1,2 +1 @@
+-
+ A
+`,
+ },
+}
+
+func DiffTest(t *testing.T, compute func(before, after string) []diff.Edit) {
+ for _, test := range TestCases {
+ t.Run(test.Name, func(t *testing.T) {
+ edits := compute(test.In, test.Out)
+ got, err := diff.Apply(test.In, edits)
+ if err != nil {
+ t.Fatalf("Apply failed: %v", err)
+ }
+ unified, err := diff.ToUnified(FileA, FileB, test.In, edits)
+ if err != nil {
+ t.Fatalf("ToUnified: %v", err)
+ }
+ if got != test.Out {
+ t.Errorf("Apply: got patched:\n%v\nfrom diff:\n%v\nexpected:\n%v",
+ got, unified, test.Out)
+ }
+ if !test.NoDiff && unified != test.Unified {
+ t.Errorf("Unified: got diff:\n%q\nexpected:\n%q diffs:%v",
+ unified, test.Unified, edits)
+ }
+ })
+ }
+}
diff --git a/internal/diff/difftest/difftest_test.go b/internal/diff/difftest/difftest_test.go
new file mode 100644
index 000000000..a990e5224
--- /dev/null
+++ b/internal/diff/difftest/difftest_test.go
@@ -0,0 +1,83 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package difftest supplies a set of tests that will operate on any
+// implementation of a diff algorithm as exposed by
+// "golang.org/x/tools/internal/diff"
+package difftest_test
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "strings"
+ "testing"
+
+ "golang.org/x/tools/internal/diff/difftest"
+ "golang.org/x/tools/internal/testenv"
+)
+
+func TestVerifyUnified(t *testing.T) {
+ testenv.NeedsTool(t, "diff")
+ for _, test := range difftest.TestCases {
+ t.Run(test.Name, func(t *testing.T) {
+ if test.NoDiff {
+ t.Skip("diff tool produces expected different results")
+ }
+ diff, err := getDiffOutput(test.In, test.Out)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(diff) > 0 {
+ diff = difftest.UnifiedPrefix + diff
+ }
+ if diff != test.Unified {
+ t.Errorf("unified:\n%s\ndiff -u:\n%s", test.Unified, diff)
+ }
+ })
+ }
+}
+
+func getDiffOutput(a, b string) (string, error) {
+ fileA, err := ioutil.TempFile("", "myers.in")
+ if err != nil {
+ return "", err
+ }
+ defer os.Remove(fileA.Name())
+ if _, err := fileA.Write([]byte(a)); err != nil {
+ return "", err
+ }
+ if err := fileA.Close(); err != nil {
+ return "", err
+ }
+ fileB, err := ioutil.TempFile("", "myers.in")
+ if err != nil {
+ return "", err
+ }
+ defer os.Remove(fileB.Name())
+ if _, err := fileB.Write([]byte(b)); err != nil {
+ return "", err
+ }
+ if err := fileB.Close(); err != nil {
+ return "", err
+ }
+ cmd := exec.Command("diff", "-u", fileA.Name(), fileB.Name())
+ cmd.Env = append(cmd.Env, "LANG=en_US.UTF-8")
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ if _, ok := err.(*exec.ExitError); !ok {
+ return "", fmt.Errorf("failed to run diff -u %v %v: %v\n%v", fileA.Name(), fileB.Name(), err, string(out))
+ }
+ }
+ diff := string(out)
+ if len(diff) <= 0 {
+ return diff, nil
+ }
+ bits := strings.SplitN(diff, "\n", 3)
+ if len(bits) != 3 {
+ return "", fmt.Errorf("diff output did not have file prefix:\n%s", diff)
+ }
+ return bits[2], nil
+}
diff --git a/internal/diff/export_test.go b/internal/diff/export_test.go
new file mode 100644
index 000000000..eedf0dd77
--- /dev/null
+++ b/internal/diff/export_test.go
@@ -0,0 +1,9 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package diff
+
+// This file exports some private declarations to tests.
+
+var LineEdits = lineEdits
diff --git a/internal/diff/lcs/common.go b/internal/diff/lcs/common.go
new file mode 100644
index 000000000..c3e82dd26
--- /dev/null
+++ b/internal/diff/lcs/common.go
@@ -0,0 +1,179 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+import (
+ "log"
+ "sort"
+)
+
+// lcs is a longest common sequence
+type lcs []diag
+
+// A diag is a piece of the edit graph where A[X+i] == B[Y+i], for 0<=i<Len.
+// All computed diagonals are parts of a longest common subsequence.
+type diag struct {
+ X, Y int
+ Len int
+}
+
+// sort sorts in place, by lowest X, and if tied, inversely by Len
+func (l lcs) sort() lcs {
+ sort.Slice(l, func(i, j int) bool {
+ if l[i].X != l[j].X {
+ return l[i].X < l[j].X
+ }
+ return l[i].Len > l[j].Len
+ })
+ return l
+}
+
+// validate that the elements of the lcs do not overlap
+// (can only happen when the two-sided algorithm ends early)
+// expects the lcs to be sorted
+func (l lcs) valid() bool {
+ for i := 1; i < len(l); i++ {
+ if l[i-1].X+l[i-1].Len > l[i].X {
+ return false
+ }
+ if l[i-1].Y+l[i-1].Len > l[i].Y {
+ return false
+ }
+ }
+ return true
+}
+
+// repair overlapping lcs
+// only called if two-sided stops early
+func (l lcs) fix() lcs {
+ // from the set of diagonals in l, find a maximal non-conflicting set
+ // this problem may be NP-complete, but we use a greedy heuristic,
+ // which is quadratic, but with a better data structure, could be D log D.
+ // indepedent is not enough: {0,3,1} and {3,0,2} can't both occur in an lcs
+ // which has to have monotone x and y
+ if len(l) == 0 {
+ return nil
+ }
+ sort.Slice(l, func(i, j int) bool { return l[i].Len > l[j].Len })
+ tmp := make(lcs, 0, len(l))
+ tmp = append(tmp, l[0])
+ for i := 1; i < len(l); i++ {
+ var dir direction
+ nxt := l[i]
+ for _, in := range tmp {
+ if dir, nxt = overlap(in, nxt); dir == empty || dir == bad {
+ break
+ }
+ }
+ if nxt.Len > 0 && dir != bad {
+ tmp = append(tmp, nxt)
+ }
+ }
+ tmp.sort()
+ if false && !tmp.valid() { // debug checking
+ log.Fatalf("here %d", len(tmp))
+ }
+ return tmp
+}
+
+type direction int
+
+const (
+ empty direction = iota // diag is empty (so not in lcs)
+ leftdown // proposed acceptably to the left and below
+ rightup // proposed diag is acceptably to the right and above
+ bad // proposed diag is inconsistent with the lcs so far
+)
+
+// overlap trims the proposed diag prop so it doesn't overlap with
+// the existing diag that has already been added to the lcs.
+func overlap(exist, prop diag) (direction, diag) {
+ if prop.X <= exist.X && exist.X < prop.X+prop.Len {
+ // remove the end of prop where it overlaps with the X end of exist
+ delta := prop.X + prop.Len - exist.X
+ prop.Len -= delta
+ if prop.Len <= 0 {
+ return empty, prop
+ }
+ }
+ if exist.X <= prop.X && prop.X < exist.X+exist.Len {
+ // remove the beginning of prop where overlaps with exist
+ delta := exist.X + exist.Len - prop.X
+ prop.Len -= delta
+ if prop.Len <= 0 {
+ return empty, prop
+ }
+ prop.X += delta
+ prop.Y += delta
+ }
+ if prop.Y <= exist.Y && exist.Y < prop.Y+prop.Len {
+ // remove the end of prop that overlaps (in Y) with exist
+ delta := prop.Y + prop.Len - exist.Y
+ prop.Len -= delta
+ if prop.Len <= 0 {
+ return empty, prop
+ }
+ }
+ if exist.Y <= prop.Y && prop.Y < exist.Y+exist.Len {
+ // remove the beginning of peop that overlaps with exist
+ delta := exist.Y + exist.Len - prop.Y
+ prop.Len -= delta
+ if prop.Len <= 0 {
+ return empty, prop
+ }
+ prop.X += delta // no test reaches this code
+ prop.Y += delta
+ }
+ if prop.X+prop.Len <= exist.X && prop.Y+prop.Len <= exist.Y {
+ return leftdown, prop
+ }
+ if exist.X+exist.Len <= prop.X && exist.Y+exist.Len <= prop.Y {
+ return rightup, prop
+ }
+ // prop can't be in an lcs that contains exist
+ return bad, prop
+}
+
+// manipulating Diag and lcs
+
+// prepend a diagonal (x,y)-(x+1,y+1) segment either to an empty lcs
+// or to its first Diag. prepend is only called to extend diagonals
+// the backward direction.
+func (lcs lcs) prepend(x, y int) lcs {
+ if len(lcs) > 0 {
+ d := &lcs[0]
+ if int(d.X) == x+1 && int(d.Y) == y+1 {
+ // extend the diagonal down and to the left
+ d.X, d.Y = int(x), int(y)
+ d.Len++
+ return lcs
+ }
+ }
+
+ r := diag{X: int(x), Y: int(y), Len: 1}
+ lcs = append([]diag{r}, lcs...)
+ return lcs
+}
+
+// append appends a diagonal, or extends the existing one.
+// by adding the edge (x,y)-(x+1.y+1). append is only called
+// to extend diagonals in the forward direction.
+func (lcs lcs) append(x, y int) lcs {
+ if len(lcs) > 0 {
+ last := &lcs[len(lcs)-1]
+ // Expand last element if adjoining.
+ if last.X+last.Len == x && last.Y+last.Len == y {
+ last.Len++
+ return lcs
+ }
+ }
+
+ return append(lcs, diag{X: x, Y: y, Len: 1})
+}
+
+// enforce constraint on d, k
+func ok(d, k int) bool {
+ return d >= 0 && -d <= k && k <= d
+}
diff --git a/internal/diff/lcs/common_test.go b/internal/diff/lcs/common_test.go
new file mode 100644
index 000000000..f19245e40
--- /dev/null
+++ b/internal/diff/lcs/common_test.go
@@ -0,0 +1,140 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+import (
+ "log"
+ "math/rand"
+ "strings"
+ "testing"
+)
+
+type Btest struct {
+ a, b string
+ lcs []string
+}
+
+var Btests = []Btest{
+ {"aaabab", "abaab", []string{"abab", "aaab"}},
+ {"aabbba", "baaba", []string{"aaba"}},
+ {"cabbx", "cbabx", []string{"cabx", "cbbx"}},
+ {"c", "cb", []string{"c"}},
+ {"aaba", "bbb", []string{"b"}},
+ {"bbaabb", "b", []string{"b"}},
+ {"baaabb", "bbaba", []string{"bbb", "baa", "bab"}},
+ {"baaabb", "abbab", []string{"abb", "bab", "aab"}},
+ {"baaba", "aaabba", []string{"aaba"}},
+ {"ca", "cba", []string{"ca"}},
+ {"ccbcbc", "abba", []string{"bb"}},
+ {"ccbcbc", "aabba", []string{"bb"}},
+ {"ccb", "cba", []string{"cb"}},
+ {"caef", "axe", []string{"ae"}},
+ {"bbaabb", "baabb", []string{"baabb"}},
+ // Example from Myers:
+ {"abcabba", "cbabac", []string{"caba", "baba", "cbba"}},
+ {"3456aaa", "aaa", []string{"aaa"}},
+ {"aaa", "aaa123", []string{"aaa"}},
+ {"aabaa", "aacaa", []string{"aaaa"}},
+ {"1a", "a", []string{"a"}},
+ {"abab", "bb", []string{"bb"}},
+ {"123", "ab", []string{""}},
+ {"a", "b", []string{""}},
+ {"abc", "123", []string{""}},
+ {"aa", "aa", []string{"aa"}},
+ {"abcde", "12345", []string{""}},
+ {"aaa3456", "aaa", []string{"aaa"}},
+ {"abcde", "12345a", []string{"a"}},
+ {"ab", "123", []string{""}},
+ {"1a2", "a", []string{"a"}},
+ // for two-sided
+ {"babaab", "cccaba", []string{"aba"}},
+ {"aabbab", "cbcabc", []string{"bab"}},
+ {"abaabb", "bcacab", []string{"baab"}},
+ {"abaabb", "abaaaa", []string{"abaa"}},
+ {"bababb", "baaabb", []string{"baabb"}},
+ {"abbbaa", "cabacc", []string{"aba"}},
+ {"aabbaa", "aacaba", []string{"aaaa", "aaba"}},
+}
+
+func init() {
+ log.SetFlags(log.Lshortfile)
+}
+
+func check(t *testing.T, str string, lcs lcs, want []string) {
+ t.Helper()
+ if !lcs.valid() {
+ t.Errorf("bad lcs %v", lcs)
+ }
+ var got strings.Builder
+ for _, dd := range lcs {
+ got.WriteString(str[dd.X : dd.X+dd.Len])
+ }
+ ans := got.String()
+ for _, w := range want {
+ if ans == w {
+ return
+ }
+ }
+ t.Fatalf("str=%q lcs=%v want=%q got=%q", str, lcs, want, ans)
+}
+
+func checkDiffs(t *testing.T, before string, diffs []Diff, after string) {
+ t.Helper()
+ var ans strings.Builder
+ sofar := 0 // index of position in before
+ for _, d := range diffs {
+ if sofar < d.Start {
+ ans.WriteString(before[sofar:d.Start])
+ }
+ ans.WriteString(after[d.ReplStart:d.ReplEnd])
+ sofar = d.End
+ }
+ ans.WriteString(before[sofar:])
+ if ans.String() != after {
+ t.Fatalf("diff %v took %q to %q, not to %q", diffs, before, ans.String(), after)
+ }
+}
+
+func lcslen(l lcs) int {
+ ans := 0
+ for _, d := range l {
+ ans += int(d.Len)
+ }
+ return ans
+}
+
+// return a random string of length n made of characters from s
+func randstr(s string, n int) string {
+ src := []rune(s)
+ x := make([]rune, n)
+ for i := 0; i < n; i++ {
+ x[i] = src[rand.Intn(len(src))]
+ }
+ return string(x)
+}
+
+func TestLcsFix(t *testing.T) {
+ tests := []struct{ before, after lcs }{
+ {lcs{diag{0, 0, 3}, diag{2, 2, 5}, diag{3, 4, 5}, diag{8, 9, 4}}, lcs{diag{0, 0, 2}, diag{2, 2, 1}, diag{3, 4, 5}, diag{8, 9, 4}}},
+ {lcs{diag{1, 1, 6}, diag{6, 12, 3}}, lcs{diag{1, 1, 5}, diag{6, 12, 3}}},
+ {lcs{diag{0, 0, 4}, diag{3, 5, 4}}, lcs{diag{0, 0, 3}, diag{3, 5, 4}}},
+ {lcs{diag{0, 20, 1}, diag{0, 0, 3}, diag{1, 20, 4}}, lcs{diag{0, 0, 3}, diag{3, 22, 2}}},
+ {lcs{diag{0, 0, 4}, diag{1, 1, 2}}, lcs{diag{0, 0, 4}}},
+ {lcs{diag{0, 0, 4}}, lcs{diag{0, 0, 4}}},
+ {lcs{}, lcs{}},
+ {lcs{diag{0, 0, 4}, diag{1, 1, 6}, diag{3, 3, 2}}, lcs{diag{0, 0, 1}, diag{1, 1, 6}}},
+ }
+ for n, x := range tests {
+ got := x.before.fix()
+ if len(got) != len(x.after) {
+ t.Errorf("got %v, expected %v, for %v", got, x.after, x.before)
+ }
+ olen := lcslen(x.after)
+ glen := lcslen(got)
+ if olen != glen {
+ t.Errorf("%d: lens(%d,%d) differ, %v, %v, %v", n, glen, olen, got, x.after, x.before)
+ }
+ }
+}
diff --git a/internal/diff/lcs/doc.go b/internal/diff/lcs/doc.go
new file mode 100644
index 000000000..dc779f38a
--- /dev/null
+++ b/internal/diff/lcs/doc.go
@@ -0,0 +1,156 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// package lcs contains code to find longest-common-subsequences
+// (and diffs)
+package lcs
+
+/*
+Compute longest-common-subsequences of two slices A, B using
+algorithms from Myers' paper. A longest-common-subsequence
+(LCS from now on) of A and B is a maximal set of lexically increasing
+pairs of subscripts (x,y) with A[x]==B[y]. There may be many LCS, but
+they all have the same length. An LCS determines a sequence of edits
+that changes A into B.
+
+The key concept is the edit graph of A and B.
+If A has length N and B has length M, then the edit graph has
+vertices v[i][j] for 0 <= i <= N, 0 <= j <= M. There is a
+horizontal edge from v[i][j] to v[i+1][j] whenever both are in
+the graph, and a vertical edge from v[i][j] to f[i][j+1] similarly.
+When A[i] == B[j] there is a diagonal edge from v[i][j] to v[i+1][j+1].
+
+A path between in the graph between (0,0) and (N,M) determines a sequence
+of edits converting A into B: each horizontal edge corresponds to removing
+an element of A, and each vertical edge corresponds to inserting an
+element of B.
+
+A vertex (x,y) is on (forward) diagonal k if x-y=k. A path in the graph
+is of length D if it has D non-diagonal edges. The algorithms generate
+forward paths (in which at least one of x,y increases at each edge),
+or backward paths (in which at least one of x,y decreases at each edge),
+or a combination. (Note that the orientation is the traditional mathematical one,
+with the origin in the lower-left corner.)
+
+Here is the edit graph for A:"aabbaa", B:"aacaba". (I know the diagonals look weird.)
+ ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
+ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
+ ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
+ b | | | ___/‾‾‾ | ___/‾‾‾ | | |
+ ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
+ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
+ ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
+ c | | | | | | |
+ ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
+ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
+ ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
+ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
+ ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
+ a a b b a a
+
+
+The algorithm labels a vertex (x,y) with D,k if it is on diagonal k and at
+the end of a maximal path of length D. (Because x-y=k it suffices to remember
+only the x coordinate of the vertex.)
+
+The forward algorithm: Find the longest diagonal starting at (0,0) and
+label its end with D=0,k=0. From that vertex take a vertical step and
+then follow the longest diagonal (up and to the right), and label that vertex
+with D=1,k=-1. From the D=0,k=0 point take a horizontal step and the follow
+the longest diagonal (up and to the right) and label that vertex
+D=1,k=1. In the same way, having labelled all the D vertices,
+from a vertex labelled D,k find two vertices
+tentatively labelled D+1,k-1 and D+1,k+1. There may be two on the same
+diagonal, in which case take the one with the larger x.
+
+Eventually the path gets to (N,M), and the diagonals on it are the LCS.
+
+Here is the edit graph with the ends of D-paths labelled. (So, for instance,
+0/2,2 indicates that x=2,y=2 is labelled with 0, as it should be, since the first
+step is to go up the longest diagonal from (0,0).)
+A:"aabbaa", B:"aacaba"
+ ⊙ ------- ⊙ ------- ⊙ -------(3/3,6)------- ⊙ -------(3/5,6)-------(4/6,6)
+ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
+ ⊙ ------- ⊙ ------- ⊙ -------(2/3,5)------- ⊙ ------- ⊙ ------- ⊙
+ b | | | ___/‾‾‾ | ___/‾‾‾ | | |
+ ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ -------(3/5,4)------- ⊙
+ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
+ ⊙ ------- ⊙ -------(1/2,3)-------(2/3,3)------- ⊙ ------- ⊙ ------- ⊙
+ c | | | | | | |
+ ⊙ ------- ⊙ -------(0/2,2)-------(1/3,2)-------(2/4,2)-------(3/5,2)-------(4/6,2)
+ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
+ ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
+ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
+ ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
+ a a b b a a
+
+The 4-path is reconstructed starting at (4/6,6), horizontal to (3/5,6), diagonal to (3,4), vertical
+to (2/3,3), horizontal to (1/2,3), vertical to (0/2,2), and diagonal to (0,0). As expected,
+there are 4 non-diagonal steps, and the diagonals form an LCS.
+
+There is a symmetric backward algorithm, which gives (backwards labels are prefixed with a colon):
+A:"aabbaa", B:"aacaba"
+ ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙
+ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
+ ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ --------(:0/5,5)-------- ⊙
+ b | | | ____/‾‾‾ | ____/‾‾‾ | | |
+ ⊙ -------- ⊙ -------- ⊙ --------(:1/3,4)-------- ⊙ -------- ⊙ -------- ⊙
+ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
+ (:3/0,3)--------(:2/1,3)-------- ⊙ --------(:2/3,3)--------(:1/4,3)-------- ⊙ -------- ⊙
+ c | | | | | | |
+ ⊙ -------- ⊙ -------- ⊙ --------(:3/3,2)--------(:2/4,2)-------- ⊙ -------- ⊙
+ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
+ (:3/0,1)-------- ⊙ -------- ⊙ -------- ⊙ --------(:3/4,1)-------- ⊙ -------- ⊙
+ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
+ (:4/0,0)-------- ⊙ -------- ⊙ -------- ⊙ --------(:4/4,0)-------- ⊙ -------- ⊙
+ a a b b a a
+
+Neither of these is ideal for use in an editor, where it is undesirable to send very long diffs to the
+front end. It's tricky to decide exactly what 'very long diffs' means, as "replace A by B" is very short.
+We want to control how big D can be, by stopping when it gets too large. The forward algorithm then
+privileges common prefixes, and the backward algorithm privileges common suffixes. Either is an undesirable
+asymmetry.
+
+Fortunately there is a two-sided algorithm, implied by results in Myers' paper. Here's what the labels in
+the edit graph look like.
+A:"aabbaa", B:"aacaba"
+ ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙
+ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
+ ⊙ --------- ⊙ --------- ⊙ --------- (2/3,5) --------- ⊙ --------- (:0/5,5)--------- ⊙
+ b | | | ____/‾‾‾‾ | ____/‾‾‾‾ | | |
+ ⊙ --------- ⊙ --------- ⊙ --------- (:1/3,4)--------- ⊙ --------- ⊙ --------- ⊙
+ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
+ ⊙ --------- (:2/1,3)--------- (1/2,3) ---------(2:2/3,3)--------- (:1/4,3)--------- ⊙ --------- ⊙
+ c | | | | | | |
+ ⊙ --------- ⊙ --------- (0/2,2) --------- (1/3,2) ---------(2:2/4,2)--------- ⊙ --------- ⊙
+ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
+ ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙
+ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
+ ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙
+ a a b b a a
+
+The algorithm stopped when it saw the backwards 2-path ending at (1,3) and the forwards 2-path ending at (3,5). The criterion
+is a backwards path ending at (u,v) and a forward path ending at (x,y), where u <= x and the two points are on the same
+diagonal. (Here the edgegraph has a diagonal, but the criterion is x-y=u-v.) Myers proves there is a forward
+2-path from (0,0) to (1,3), and that together with the backwards 2-path ending at (1,3) gives the expected 4-path.
+Unfortunately the forward path has to be constructed by another run of the forward algorithm; it can't be found from the
+computed labels. That is the worst case. Had the code noticed (x,y)=(u,v)=(3,3) the whole path could be reconstructed
+from the edgegraph. The implementation looks for a number of special cases to try to avoid computing an extra forward path.
+
+If the two-sided algorithm has stop early (because D has become too large) it will have found a forward LCS and a
+backwards LCS. Ideally these go with disjoint prefixes and suffixes of A and B, but disjointness may fail and the two
+computed LCS may conflict. (An easy example is where A is a suffix of B, and shares a short prefix. The backwards LCS
+is all of A, and the forward LCS is a prefix of A.) The algorithm combines the two
+to form a best-effort LCS. In the worst case the forward partial LCS may have to
+be recomputed.
+*/
+
+/* Eugene Myers paper is titled
+"An O(ND) Difference Algorithm and Its Variations"
+and can be found at
+http://www.xmailserver.org/diff2.pdf
+
+(There is a generic implementation of the algorithm the the repository with git hash
+b9ad7e4ade3a686d608e44475390ad428e60e7fc)
+*/
diff --git a/internal/diff/lcs/git.sh b/internal/diff/lcs/git.sh
new file mode 100644
index 000000000..6856f8439
--- /dev/null
+++ b/internal/diff/lcs/git.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+#
+# Copyright 2022 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+#
+# Creates a zip file containing all numbered versions
+# of the commit history of a large source file, for use
+# as input data for the tests of the diff algorithm.
+#
+# Run script from root of the x/tools repo.
+
+set -eu
+
+# WARNING: This script will install the latest version of $file
+# The largest real source file in the x/tools repo.
+# file=internal/lsp/source/completion/completion.go
+# file=internal/lsp/source/diagnostics.go
+file=internal/lsp/protocol/tsprotocol.go
+
+tmp=$(mktemp -d)
+git log $file |
+ awk '/^commit / {print $2}' |
+ nl -ba -nrz |
+ while read n hash; do
+ git checkout --quiet $hash $file
+ cp -f $file $tmp/$n
+ done
+(cd $tmp && zip -q - *) > testdata.zip
+rm -fr $tmp
+git restore --staged $file
+git restore $file
+echo "Created testdata.zip"
diff --git a/internal/diff/lcs/labels.go b/internal/diff/lcs/labels.go
new file mode 100644
index 000000000..0689f1ed7
--- /dev/null
+++ b/internal/diff/lcs/labels.go
@@ -0,0 +1,55 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+import (
+ "fmt"
+)
+
+// For each D, vec[D] has length D+1,
+// and the label for (D, k) is stored in vec[D][(D+k)/2].
+type label struct {
+ vec [][]int
+}
+
+// Temporary checking DO NOT COMMIT true TO PRODUCTION CODE
+const debug = false
+
+// debugging. check that the (d,k) pair is valid
+// (that is, -d<=k<=d and d+k even)
+func checkDK(D, k int) {
+ if k >= -D && k <= D && (D+k)%2 == 0 {
+ return
+ }
+ panic(fmt.Sprintf("out of range, d=%d,k=%d", D, k))
+}
+
+func (t *label) set(D, k, x int) {
+ if debug {
+ checkDK(D, k)
+ }
+ for len(t.vec) <= D {
+ t.vec = append(t.vec, nil)
+ }
+ if t.vec[D] == nil {
+ t.vec[D] = make([]int, D+1)
+ }
+ t.vec[D][(D+k)/2] = x // known that D+k is even
+}
+
+func (t *label) get(d, k int) int {
+ if debug {
+ checkDK(d, k)
+ }
+ return int(t.vec[d][(d+k)/2])
+}
+
+func newtriang(limit int) label {
+ if limit < 100 {
+ // Preallocate if limit is not large.
+ return label{vec: make([][]int, limit)}
+ }
+ return label{}
+}
diff --git a/internal/diff/lcs/old.go b/internal/diff/lcs/old.go
new file mode 100644
index 000000000..7af11fc89
--- /dev/null
+++ b/internal/diff/lcs/old.go
@@ -0,0 +1,480 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+// TODO(adonovan): remove unclear references to "old" in this package.
+
+import (
+ "fmt"
+)
+
+// A Diff is a replacement of a portion of A by a portion of B.
+type Diff struct {
+ Start, End int // offsets of portion to delete in A
+ ReplStart, ReplEnd int // offset of replacement text in B
+}
+
+// DiffStrings returns the differences between two strings.
+// It does not respect rune boundaries.
+func DiffStrings(a, b string) []Diff { return diff(stringSeqs{a, b}) }
+
+// DiffBytes returns the differences between two byte sequences.
+// It does not respect rune boundaries.
+func DiffBytes(a, b []byte) []Diff { return diff(bytesSeqs{a, b}) }
+
+// DiffRunes returns the differences between two rune sequences.
+func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) }
+
+func diff(seqs sequences) []Diff {
+ // A limit on how deeply the LCS algorithm should search. The value is just a guess.
+ const maxDiffs = 30
+ diff, _ := compute(seqs, twosided, maxDiffs/2)
+ return diff
+}
+
+// compute computes the list of differences between two sequences,
+// along with the LCS. It is exercised directly by tests.
+// The algorithm is one of {forward, backward, twosided}.
+func compute(seqs sequences, algo func(*editGraph) lcs, limit int) ([]Diff, lcs) {
+ if limit <= 0 {
+ limit = 1 << 25 // effectively infinity
+ }
+ alen, blen := seqs.lengths()
+ g := &editGraph{
+ seqs: seqs,
+ vf: newtriang(limit),
+ vb: newtriang(limit),
+ limit: limit,
+ ux: alen,
+ uy: blen,
+ delta: alen - blen,
+ }
+ lcs := algo(g)
+ diffs := lcs.toDiffs(alen, blen)
+ return diffs, lcs
+}
+
+// editGraph carries the information for computing the lcs of two sequences.
+type editGraph struct {
+ seqs sequences
+ vf, vb label // forward and backward labels
+
+ limit int // maximal value of D
+ // the bounding rectangle of the current edit graph
+ lx, ly, ux, uy int
+ delta int // common subexpression: (ux-lx)-(uy-ly)
+}
+
+// toDiffs converts an LCS to a list of edits.
+func (lcs lcs) toDiffs(alen, blen int) []Diff {
+ var diffs []Diff
+ var pa, pb int // offsets in a, b
+ for _, l := range lcs {
+ if pa < l.X || pb < l.Y {
+ diffs = append(diffs, Diff{pa, l.X, pb, l.Y})
+ }
+ pa = l.X + l.Len
+ pb = l.Y + l.Len
+ }
+ if pa < alen || pb < blen {
+ diffs = append(diffs, Diff{pa, alen, pb, blen})
+ }
+ return diffs
+}
+
+// --- FORWARD ---
+
+// fdone decides if the forwward path has reached the upper right
+// corner of the rectangle. If so, it also returns the computed lcs.
+func (e *editGraph) fdone(D, k int) (bool, lcs) {
+ // x, y, k are relative to the rectangle
+ x := e.vf.get(D, k)
+ y := x - k
+ if x == e.ux && y == e.uy {
+ return true, e.forwardlcs(D, k)
+ }
+ return false, nil
+}
+
+// run the forward algorithm, until success or up to the limit on D.
+func forward(e *editGraph) lcs {
+ e.setForward(0, 0, e.lx)
+ if ok, ans := e.fdone(0, 0); ok {
+ return ans
+ }
+ // from D to D+1
+ for D := 0; D < e.limit; D++ {
+ e.setForward(D+1, -(D + 1), e.getForward(D, -D))
+ if ok, ans := e.fdone(D+1, -(D + 1)); ok {
+ return ans
+ }
+ e.setForward(D+1, D+1, e.getForward(D, D)+1)
+ if ok, ans := e.fdone(D+1, D+1); ok {
+ return ans
+ }
+ for k := -D + 1; k <= D-1; k += 2 {
+ // these are tricky and easy to get backwards
+ lookv := e.lookForward(k, e.getForward(D, k-1)+1)
+ lookh := e.lookForward(k, e.getForward(D, k+1))
+ if lookv > lookh {
+ e.setForward(D+1, k, lookv)
+ } else {
+ e.setForward(D+1, k, lookh)
+ }
+ if ok, ans := e.fdone(D+1, k); ok {
+ return ans
+ }
+ }
+ }
+ // D is too large
+ // find the D path with maximal x+y inside the rectangle and
+ // use that to compute the found part of the lcs
+ kmax := -e.limit - 1
+ diagmax := -1
+ for k := -e.limit; k <= e.limit; k += 2 {
+ x := e.getForward(e.limit, k)
+ y := x - k
+ if x+y > diagmax && x <= e.ux && y <= e.uy {
+ diagmax, kmax = x+y, k
+ }
+ }
+ return e.forwardlcs(e.limit, kmax)
+}
+
+// recover the lcs by backtracking from the farthest point reached
+func (e *editGraph) forwardlcs(D, k int) lcs {
+ var ans lcs
+ for x := e.getForward(D, k); x != 0 || x-k != 0; {
+ if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) {
+ // if (x-1,y) is labelled D-1, x--,D--,k--,continue
+ D, k, x = D-1, k-1, x-1
+ continue
+ } else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) {
+ // if (x,y-1) is labelled D-1, x, D--,k++, continue
+ D, k = D-1, k+1
+ continue
+ }
+ // if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue
+ y := x - k
+ ans = ans.prepend(x+e.lx-1, y+e.ly-1)
+ x--
+ }
+ return ans
+}
+
+// start at (x,y), go up the diagonal as far as possible,
+// and label the result with d
+func (e *editGraph) lookForward(k, relx int) int {
+ rely := relx - k
+ x, y := relx+e.lx, rely+e.ly
+ if x < e.ux && y < e.uy {
+ x += e.seqs.commonPrefixLen(x, e.ux, y, e.uy)
+ }
+ return x
+}
+
+func (e *editGraph) setForward(d, k, relx int) {
+ x := e.lookForward(k, relx)
+ e.vf.set(d, k, x-e.lx)
+}
+
+func (e *editGraph) getForward(d, k int) int {
+ x := e.vf.get(d, k)
+ return x
+}
+
+// --- BACKWARD ---
+
+// bdone decides if the backward path has reached the lower left corner
+func (e *editGraph) bdone(D, k int) (bool, lcs) {
+ // x, y, k are relative to the rectangle
+ x := e.vb.get(D, k)
+ y := x - (k + e.delta)
+ if x == 0 && y == 0 {
+ return true, e.backwardlcs(D, k)
+ }
+ return false, nil
+}
+
+// run the backward algorithm, until success or up to the limit on D.
+func backward(e *editGraph) lcs {
+ e.setBackward(0, 0, e.ux)
+ if ok, ans := e.bdone(0, 0); ok {
+ return ans
+ }
+ // from D to D+1
+ for D := 0; D < e.limit; D++ {
+ e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
+ if ok, ans := e.bdone(D+1, -(D + 1)); ok {
+ return ans
+ }
+ e.setBackward(D+1, D+1, e.getBackward(D, D))
+ if ok, ans := e.bdone(D+1, D+1); ok {
+ return ans
+ }
+ for k := -D + 1; k <= D-1; k += 2 {
+ // these are tricky and easy to get wrong
+ lookv := e.lookBackward(k, e.getBackward(D, k-1))
+ lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
+ if lookv < lookh {
+ e.setBackward(D+1, k, lookv)
+ } else {
+ e.setBackward(D+1, k, lookh)
+ }
+ if ok, ans := e.bdone(D+1, k); ok {
+ return ans
+ }
+ }
+ }
+
+ // D is too large
+ // find the D path with minimal x+y inside the rectangle and
+ // use that to compute the part of the lcs found
+ kmax := -e.limit - 1
+ diagmin := 1 << 25
+ for k := -e.limit; k <= e.limit; k += 2 {
+ x := e.getBackward(e.limit, k)
+ y := x - (k + e.delta)
+ if x+y < diagmin && x >= 0 && y >= 0 {
+ diagmin, kmax = x+y, k
+ }
+ }
+ if kmax < -e.limit {
+ panic(fmt.Sprintf("no paths when limit=%d?", e.limit))
+ }
+ return e.backwardlcs(e.limit, kmax)
+}
+
+// recover the lcs by backtracking
+func (e *editGraph) backwardlcs(D, k int) lcs {
+ var ans lcs
+ for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; {
+ if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) {
+ // D--, k--, x unchanged
+ D, k = D-1, k-1
+ continue
+ } else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) {
+ // D--, k++, x++
+ D, k, x = D-1, k+1, x+1
+ continue
+ }
+ y := x - (k + e.delta)
+ ans = ans.append(x+e.lx, y+e.ly)
+ x++
+ }
+ return ans
+}
+
+// start at (x,y), go down the diagonal as far as possible,
+func (e *editGraph) lookBackward(k, relx int) int {
+ rely := relx - (k + e.delta) // forward k = k + e.delta
+ x, y := relx+e.lx, rely+e.ly
+ if x > 0 && y > 0 {
+ x -= e.seqs.commonSuffixLen(0, x, 0, y)
+ }
+ return x
+}
+
+// convert to rectangle, and label the result with d
+func (e *editGraph) setBackward(d, k, relx int) {
+ x := e.lookBackward(k, relx)
+ e.vb.set(d, k, x-e.lx)
+}
+
+func (e *editGraph) getBackward(d, k int) int {
+ x := e.vb.get(d, k)
+ return x
+}
+
+// -- TWOSIDED ---
+
+func twosided(e *editGraph) lcs {
+ // The termination condition could be improved, as either the forward
+ // or backward pass could succeed before Myers' Lemma applies.
+ // Aside from questions of efficiency (is the extra testing cost-effective)
+ // this is more likely to matter when e.limit is reached.
+ e.setForward(0, 0, e.lx)
+ e.setBackward(0, 0, e.ux)
+
+ // from D to D+1
+ for D := 0; D < e.limit; D++ {
+ // just finished a backwards pass, so check
+ if got, ok := e.twoDone(D, D); ok {
+ return e.twolcs(D, D, got)
+ }
+ // do a forwards pass (D to D+1)
+ e.setForward(D+1, -(D + 1), e.getForward(D, -D))
+ e.setForward(D+1, D+1, e.getForward(D, D)+1)
+ for k := -D + 1; k <= D-1; k += 2 {
+ // these are tricky and easy to get backwards
+ lookv := e.lookForward(k, e.getForward(D, k-1)+1)
+ lookh := e.lookForward(k, e.getForward(D, k+1))
+ if lookv > lookh {
+ e.setForward(D+1, k, lookv)
+ } else {
+ e.setForward(D+1, k, lookh)
+ }
+ }
+ // just did a forward pass, so check
+ if got, ok := e.twoDone(D+1, D); ok {
+ return e.twolcs(D+1, D, got)
+ }
+ // do a backward pass, D to D+1
+ e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
+ e.setBackward(D+1, D+1, e.getBackward(D, D))
+ for k := -D + 1; k <= D-1; k += 2 {
+ // these are tricky and easy to get wrong
+ lookv := e.lookBackward(k, e.getBackward(D, k-1))
+ lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
+ if lookv < lookh {
+ e.setBackward(D+1, k, lookv)
+ } else {
+ e.setBackward(D+1, k, lookh)
+ }
+ }
+ }
+
+ // D too large. combine a forward and backward partial lcs
+ // first, a forward one
+ kmax := -e.limit - 1
+ diagmax := -1
+ for k := -e.limit; k <= e.limit; k += 2 {
+ x := e.getForward(e.limit, k)
+ y := x - k
+ if x+y > diagmax && x <= e.ux && y <= e.uy {
+ diagmax, kmax = x+y, k
+ }
+ }
+ if kmax < -e.limit {
+ panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit))
+ }
+ lcs := e.forwardlcs(e.limit, kmax)
+ // now a backward one
+ // find the D path with minimal x+y inside the rectangle and
+ // use that to compute the lcs
+ diagmin := 1 << 25 // infinity
+ for k := -e.limit; k <= e.limit; k += 2 {
+ x := e.getBackward(e.limit, k)
+ y := x - (k + e.delta)
+ if x+y < diagmin && x >= 0 && y >= 0 {
+ diagmin, kmax = x+y, k
+ }
+ }
+ if kmax < -e.limit {
+ panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit))
+ }
+ lcs = append(lcs, e.backwardlcs(e.limit, kmax)...)
+ // These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs)
+ ans := lcs.fix()
+ return ans
+}
+
+// Does Myers' Lemma apply?
+func (e *editGraph) twoDone(df, db int) (int, bool) {
+ if (df+db+e.delta)%2 != 0 {
+ return 0, false // diagonals cannot overlap
+ }
+ kmin := -db + e.delta
+ if -df > kmin {
+ kmin = -df
+ }
+ kmax := db + e.delta
+ if df < kmax {
+ kmax = df
+ }
+ for k := kmin; k <= kmax; k += 2 {
+ x := e.vf.get(df, k)
+ u := e.vb.get(db, k-e.delta)
+ if u <= x {
+ // is it worth looking at all the other k?
+ for l := k; l <= kmax; l += 2 {
+ x := e.vf.get(df, l)
+ y := x - l
+ u := e.vb.get(db, l-e.delta)
+ v := u - l
+ if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux {
+ return l, true
+ }
+ }
+ return k, true
+ }
+ }
+ return 0, false
+}
+
+func (e *editGraph) twolcs(df, db, kf int) lcs {
+ // db==df || db+1==df
+ x := e.vf.get(df, kf)
+ y := x - kf
+ kb := kf - e.delta
+ u := e.vb.get(db, kb)
+ v := u - kf
+
+ // Myers proved there is a df-path from (0,0) to (u,v)
+ // and a db-path from (x,y) to (N,M).
+ // In the first case the overall path is the forward path
+ // to (u,v) followed by the backward path to (N,M).
+ // In the second case the path is the backward path to (x,y)
+ // followed by the forward path to (x,y) from (0,0).
+
+ // Look for some special cases to avoid computing either of these paths.
+ if x == u {
+ // "babaab" "cccaba"
+ // already patched together
+ lcs := e.forwardlcs(df, kf)
+ lcs = append(lcs, e.backwardlcs(db, kb)...)
+ return lcs.sort()
+ }
+
+ // is (u-1,v) or (u,v-1) labelled df-1?
+ // if so, that forward df-1-path plus a horizontal or vertical edge
+ // is the df-path to (u,v), then plus the db-path to (N,M)
+ if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 {
+ // "aabbab" "cbcabc"
+ lcs := e.forwardlcs(df-1, u-1-v)
+ lcs = append(lcs, e.backwardlcs(db, kb)...)
+ return lcs.sort()
+ }
+ if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u {
+ // "abaabb" "bcacab"
+ lcs := e.forwardlcs(df-1, u-(v-1))
+ lcs = append(lcs, e.backwardlcs(db, kb)...)
+ return lcs.sort()
+ }
+
+ // The path can't possibly contribute to the lcs because it
+ // is all horizontal or vertical edges
+ if u == 0 || v == 0 || x == e.ux || y == e.uy {
+ // "abaabb" "abaaaa"
+ if u == 0 || v == 0 {
+ return e.backwardlcs(db, kb)
+ }
+ return e.forwardlcs(df, kf)
+ }
+
+ // is (x+1,y) or (x,y+1) labelled db-1?
+ if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 {
+ // "bababb" "baaabb"
+ lcs := e.backwardlcs(db-1, kb+1)
+ lcs = append(lcs, e.forwardlcs(df, kf)...)
+ return lcs.sort()
+ }
+ if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x {
+ // "abbbaa" "cabacc"
+ lcs := e.backwardlcs(db-1, kb-1)
+ lcs = append(lcs, e.forwardlcs(df, kf)...)
+ return lcs.sort()
+ }
+
+ // need to compute another path
+ // "aabbaa" "aacaba"
+ lcs := e.backwardlcs(db, kb)
+ oldx, oldy := e.ux, e.uy
+ e.ux = u
+ e.uy = v
+ lcs = append(lcs, forward(e)...)
+ e.ux, e.uy = oldx, oldy
+ return lcs.sort()
+}
diff --git a/internal/diff/lcs/old_test.go b/internal/diff/lcs/old_test.go
new file mode 100644
index 000000000..0c894316f
--- /dev/null
+++ b/internal/diff/lcs/old_test.go
@@ -0,0 +1,251 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+import (
+ "fmt"
+ "io/ioutil"
+ "log"
+ "math/rand"
+ "strings"
+ "testing"
+)
+
+func TestAlgosOld(t *testing.T) {
+ for i, algo := range []func(*editGraph) lcs{forward, backward, twosided} {
+ t.Run(strings.Fields("forward backward twosided")[i], func(t *testing.T) {
+ for _, tx := range Btests {
+ lim := len(tx.a) + len(tx.b)
+
+ diffs, lcs := compute(stringSeqs{tx.a, tx.b}, algo, lim)
+ check(t, tx.a, lcs, tx.lcs)
+ checkDiffs(t, tx.a, diffs, tx.b)
+
+ diffs, lcs = compute(stringSeqs{tx.b, tx.a}, algo, lim)
+ check(t, tx.b, lcs, tx.lcs)
+ checkDiffs(t, tx.b, diffs, tx.a)
+ }
+ })
+ }
+}
+
+func TestIntOld(t *testing.T) {
+ // need to avoid any characters in btests
+ lfill, rfill := "AAAAAAAAAAAA", "BBBBBBBBBBBB"
+ for _, tx := range Btests {
+ if len(tx.a) < 2 || len(tx.b) < 2 {
+ continue
+ }
+ left := tx.a + lfill
+ right := tx.b + rfill
+ lim := len(tx.a) + len(tx.b)
+ diffs, lcs := compute(stringSeqs{left, right}, twosided, lim)
+ check(t, left, lcs, tx.lcs)
+ checkDiffs(t, left, diffs, right)
+ diffs, lcs = compute(stringSeqs{right, left}, twosided, lim)
+ check(t, right, lcs, tx.lcs)
+ checkDiffs(t, right, diffs, left)
+
+ left = lfill + tx.a
+ right = rfill + tx.b
+ diffs, lcs = compute(stringSeqs{left, right}, twosided, lim)
+ check(t, left, lcs, tx.lcs)
+ checkDiffs(t, left, diffs, right)
+ diffs, lcs = compute(stringSeqs{right, left}, twosided, lim)
+ check(t, right, lcs, tx.lcs)
+ checkDiffs(t, right, diffs, left)
+ }
+}
+
+func TestSpecialOld(t *testing.T) { // exercises lcs.fix
+ a := "golang.org/x/tools/intern"
+ b := "github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/intern"
+ diffs, lcs := compute(stringSeqs{a, b}, twosided, 4)
+ if !lcs.valid() {
+ t.Errorf("%d,%v", len(diffs), lcs)
+ }
+}
+
+func TestRegressionOld001(t *testing.T) {
+ a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
+
+ b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
+ for i := 1; i < len(b); i++ {
+ diffs, lcs := compute(stringSeqs{a, b}, twosided, i) // 14 from gopls
+ if !lcs.valid() {
+ t.Errorf("%d,%v", len(diffs), lcs)
+ }
+ checkDiffs(t, a, diffs, b)
+ }
+}
+
+func TestRegressionOld002(t *testing.T) {
+ a := "n\"\n)\n"
+ b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n"
+ for i := 1; i <= len(b); i++ {
+ diffs, lcs := compute(stringSeqs{a, b}, twosided, i)
+ if !lcs.valid() {
+ t.Errorf("%d,%v", len(diffs), lcs)
+ }
+ checkDiffs(t, a, diffs, b)
+ }
+}
+
+func TestRegressionOld003(t *testing.T) {
+ a := "golang.org/x/hello v1.0.0\nrequire golang.org/x/unused v1"
+ b := "golang.org/x/hello v1"
+ for i := 1; i <= len(a); i++ {
+ diffs, lcs := compute(stringSeqs{a, b}, twosided, i)
+ if !lcs.valid() {
+ t.Errorf("%d,%v", len(diffs), lcs)
+ }
+ checkDiffs(t, a, diffs, b)
+ }
+}
+
+func TestRandOld(t *testing.T) {
+ rand.Seed(1)
+ for i := 0; i < 1000; i++ {
+ // TODO(adonovan): use ASCII and bytesSeqs here? The use of
+ // non-ASCII isn't relevant to the property exercised by the test.
+ a := []rune(randstr("abω", 16))
+ b := []rune(randstr("abωc", 16))
+ seq := runesSeqs{a, b}
+
+ const lim = 24 // large enough to get true lcs
+ _, forw := compute(seq, forward, lim)
+ _, back := compute(seq, backward, lim)
+ _, two := compute(seq, twosided, lim)
+ if lcslen(two) != lcslen(forw) || lcslen(forw) != lcslen(back) {
+ t.Logf("\n%v\n%v\n%v", forw, back, two)
+ t.Fatalf("%d forw:%d back:%d two:%d", i, lcslen(forw), lcslen(back), lcslen(two))
+ }
+ if !two.valid() || !forw.valid() || !back.valid() {
+ t.Errorf("check failure")
+ }
+ }
+}
+
+// TestDiffAPI tests the public API functions (Diff{Bytes,Strings,Runes})
+// to ensure at least miminal parity of the three representations.
+func TestDiffAPI(t *testing.T) {
+ for _, test := range []struct {
+ a, b string
+ wantStrings, wantBytes, wantRunes string
+ }{
+ {"abcXdef", "abcxdef", "[{3 4 3 4}]", "[{3 4 3 4}]", "[{3 4 3 4}]"}, // ASCII
+ {"abcωdef", "abcΩdef", "[{3 5 3 5}]", "[{3 5 3 5}]", "[{3 4 3 4}]"}, // non-ASCII
+ } {
+
+ gotStrings := fmt.Sprint(DiffStrings(test.a, test.b))
+ if gotStrings != test.wantStrings {
+ t.Errorf("DiffStrings(%q, %q) = %v, want %v",
+ test.a, test.b, gotStrings, test.wantStrings)
+ }
+ gotBytes := fmt.Sprint(DiffBytes([]byte(test.a), []byte(test.b)))
+ if gotBytes != test.wantBytes {
+ t.Errorf("DiffBytes(%q, %q) = %v, want %v",
+ test.a, test.b, gotBytes, test.wantBytes)
+ }
+ gotRunes := fmt.Sprint(DiffRunes([]rune(test.a), []rune(test.b)))
+ if gotRunes != test.wantRunes {
+ t.Errorf("DiffRunes(%q, %q) = %v, want %v",
+ test.a, test.b, gotRunes, test.wantRunes)
+ }
+ }
+}
+
+func BenchmarkTwoOld(b *testing.B) {
+ tests := genBench("abc", 96)
+ for i := 0; i < b.N; i++ {
+ for _, tt := range tests {
+ _, two := compute(stringSeqs{tt.before, tt.after}, twosided, 100)
+ if !two.valid() {
+ b.Error("check failed")
+ }
+ }
+ }
+}
+
+func BenchmarkForwOld(b *testing.B) {
+ tests := genBench("abc", 96)
+ for i := 0; i < b.N; i++ {
+ for _, tt := range tests {
+ _, two := compute(stringSeqs{tt.before, tt.after}, forward, 100)
+ if !two.valid() {
+ b.Error("check failed")
+ }
+ }
+ }
+}
+
+func genBench(set string, n int) []struct{ before, after string } {
+ // before and after for benchmarks. 24 strings of length n with
+ // before and after differing at least once, and about 5%
+ rand.Seed(3)
+ var ans []struct{ before, after string }
+ for i := 0; i < 24; i++ {
+ // maybe b should have an approximately known number of diffs
+ a := randstr(set, n)
+ cnt := 0
+ bb := make([]rune, 0, n)
+ for _, r := range a {
+ if rand.Float64() < .05 {
+ cnt++
+ r = 'N'
+ }
+ bb = append(bb, r)
+ }
+ if cnt == 0 {
+ // avoid == shortcut
+ bb[n/2] = 'N'
+ }
+ ans = append(ans, struct{ before, after string }{a, string(bb)})
+ }
+ return ans
+}
+
+// This benchmark represents a common case for a diff command:
+// large file with a single relatively small diff in the middle.
+// (It's not clear whether this is representative of gopls workloads
+// or whether it is important to gopls diff performance.)
+//
+// TODO(adonovan) opt: it could be much faster. For example,
+// comparing a file against itself is about 10x faster than with the
+// small deletion in the middle. Strangely, comparing a file against
+// itself minus the last byte is faster still; I don't know why.
+// There is much low-hanging fruit here for further improvement.
+func BenchmarkLargeFileSmallDiff(b *testing.B) {
+ data, err := ioutil.ReadFile("old.go") // large file
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ n := len(data)
+
+ src := string(data)
+ dst := src[:n*49/100] + src[n*51/100:] // remove 2% from the middle
+ b.Run("string", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ compute(stringSeqs{src, dst}, twosided, len(src)+len(dst))
+ }
+ })
+
+ srcBytes := []byte(src)
+ dstBytes := []byte(dst)
+ b.Run("bytes", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ compute(bytesSeqs{srcBytes, dstBytes}, twosided, len(srcBytes)+len(dstBytes))
+ }
+ })
+
+ srcRunes := []rune(src)
+ dstRunes := []rune(dst)
+ b.Run("runes", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ compute(runesSeqs{srcRunes, dstRunes}, twosided, len(srcRunes)+len(dstRunes))
+ }
+ })
+}
diff --git a/internal/diff/lcs/sequence.go b/internal/diff/lcs/sequence.go
new file mode 100644
index 000000000..2d72d2630
--- /dev/null
+++ b/internal/diff/lcs/sequence.go
@@ -0,0 +1,113 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lcs
+
+// This file defines the abstract sequence over which the LCS algorithm operates.
+
+// sequences abstracts a pair of sequences, A and B.
+type sequences interface {
+ lengths() (int, int) // len(A), len(B)
+ commonPrefixLen(ai, aj, bi, bj int) int // len(commonPrefix(A[ai:aj], B[bi:bj]))
+ commonSuffixLen(ai, aj, bi, bj int) int // len(commonSuffix(A[ai:aj], B[bi:bj]))
+}
+
+type stringSeqs struct{ a, b string }
+
+func (s stringSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
+func (s stringSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
+ return commonPrefixLenString(s.a[ai:aj], s.b[bi:bj])
+}
+func (s stringSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
+ return commonSuffixLenString(s.a[ai:aj], s.b[bi:bj])
+}
+
+// The explicit capacity in s[i:j:j] leads to more efficient code.
+
+type bytesSeqs struct{ a, b []byte }
+
+func (s bytesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
+func (s bytesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
+ return commonPrefixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+}
+func (s bytesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
+ return commonSuffixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+}
+
+type runesSeqs struct{ a, b []rune }
+
+func (s runesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
+func (s runesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
+ return commonPrefixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+}
+func (s runesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
+ return commonSuffixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+}
+
+// TODO(adonovan): optimize these functions using ideas from:
+// - https://go.dev/cl/408116 common.go
+// - https://go.dev/cl/421435 xor_generic.go
+
+// TODO(adonovan): factor using generics when available,
+// but measure performance impact.
+
+// commonPrefixLen* returns the length of the common prefix of a[ai:aj] and b[bi:bj].
+func commonPrefixLenBytes(a, b []byte) int {
+ n := min(len(a), len(b))
+ i := 0
+ for i < n && a[i] == b[i] {
+ i++
+ }
+ return i
+}
+func commonPrefixLenRunes(a, b []rune) int {
+ n := min(len(a), len(b))
+ i := 0
+ for i < n && a[i] == b[i] {
+ i++
+ }
+ return i
+}
+func commonPrefixLenString(a, b string) int {
+ n := min(len(a), len(b))
+ i := 0
+ for i < n && a[i] == b[i] {
+ i++
+ }
+ return i
+}
+
+// commonSuffixLen* returns the length of the common suffix of a[ai:aj] and b[bi:bj].
+func commonSuffixLenBytes(a, b []byte) int {
+ n := min(len(a), len(b))
+ i := 0
+ for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
+ i++
+ }
+ return i
+}
+func commonSuffixLenRunes(a, b []rune) int {
+ n := min(len(a), len(b))
+ i := 0
+ for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
+ i++
+ }
+ return i
+}
+func commonSuffixLenString(a, b string) int {
+ n := min(len(a), len(b))
+ i := 0
+ for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
+ i++
+ }
+ return i
+}
+
+func min(x, y int) int {
+ if x < y {
+ return x
+ } else {
+ return y
+ }
+}
diff --git a/internal/diff/myers/diff.go b/internal/diff/myers/diff.go
new file mode 100644
index 000000000..7c2d4356b
--- /dev/null
+++ b/internal/diff/myers/diff.go
@@ -0,0 +1,215 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package myers implements the Myers diff algorithm.
+package myers
+
+import (
+ "strings"
+
+ "golang.org/x/tools/internal/diff"
+)
+
+// Sources:
+// https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/
+// https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2
+
+func ComputeEdits(before, after string) []diff.Edit {
+ beforeLines := splitLines(before)
+ ops := operations(beforeLines, splitLines(after))
+
+ // Build a table mapping line number to offset.
+ lineOffsets := make([]int, 0, len(beforeLines)+1)
+ total := 0
+ for i := range beforeLines {
+ lineOffsets = append(lineOffsets, total)
+ total += len(beforeLines[i])
+ }
+ lineOffsets = append(lineOffsets, total) // EOF
+
+ edits := make([]diff.Edit, 0, len(ops))
+ for _, op := range ops {
+ start, end := lineOffsets[op.I1], lineOffsets[op.I2]
+ switch op.Kind {
+ case diff.Delete:
+ // Delete: before[I1:I2] is deleted.
+ edits = append(edits, diff.Edit{Start: start, End: end})
+ case diff.Insert:
+ // Insert: after[J1:J2] is inserted at before[I1:I1].
+ if content := strings.Join(op.Content, ""); content != "" {
+ edits = append(edits, diff.Edit{Start: start, End: end, New: content})
+ }
+ }
+ }
+ return edits
+}
+
+type operation struct {
+ Kind diff.OpKind
+ Content []string // content from b
+ I1, I2 int // indices of the line in a
+ J1 int // indices of the line in b, J2 implied by len(Content)
+}
+
+// operations returns the list of operations to convert a into b, consolidating
+// operations for multiple lines and not including equal lines.
+func operations(a, b []string) []*operation {
+ if len(a) == 0 && len(b) == 0 {
+ return nil
+ }
+
+ trace, offset := shortestEditSequence(a, b)
+ snakes := backtrack(trace, len(a), len(b), offset)
+
+ M, N := len(a), len(b)
+
+ var i int
+ solution := make([]*operation, len(a)+len(b))
+
+ add := func(op *operation, i2, j2 int) {
+ if op == nil {
+ return
+ }
+ op.I2 = i2
+ if op.Kind == diff.Insert {
+ op.Content = b[op.J1:j2]
+ }
+ solution[i] = op
+ i++
+ }
+ x, y := 0, 0
+ for _, snake := range snakes {
+ if len(snake) < 2 {
+ continue
+ }
+ var op *operation
+ // delete (horizontal)
+ for snake[0]-snake[1] > x-y {
+ if op == nil {
+ op = &operation{
+ Kind: diff.Delete,
+ I1: x,
+ J1: y,
+ }
+ }
+ x++
+ if x == M {
+ break
+ }
+ }
+ add(op, x, y)
+ op = nil
+ // insert (vertical)
+ for snake[0]-snake[1] < x-y {
+ if op == nil {
+ op = &operation{
+ Kind: diff.Insert,
+ I1: x,
+ J1: y,
+ }
+ }
+ y++
+ }
+ add(op, x, y)
+ op = nil
+ // equal (diagonal)
+ for x < snake[0] {
+ x++
+ y++
+ }
+ if x >= M && y >= N {
+ break
+ }
+ }
+ return solution[:i]
+}
+
+// backtrack uses the trace for the edit sequence computation and returns the
+// "snakes" that make up the solution. A "snake" is a single deletion or
+// insertion followed by zero or diagonals.
+func backtrack(trace [][]int, x, y, offset int) [][]int {
+ snakes := make([][]int, len(trace))
+ d := len(trace) - 1
+ for ; x > 0 && y > 0 && d > 0; d-- {
+ V := trace[d]
+ if len(V) == 0 {
+ continue
+ }
+ snakes[d] = []int{x, y}
+
+ k := x - y
+
+ var kPrev int
+ if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
+ kPrev = k + 1
+ } else {
+ kPrev = k - 1
+ }
+
+ x = V[kPrev+offset]
+ y = x - kPrev
+ }
+ if x < 0 || y < 0 {
+ return snakes
+ }
+ snakes[d] = []int{x, y}
+ return snakes
+}
+
+// shortestEditSequence returns the shortest edit sequence that converts a into b.
+func shortestEditSequence(a, b []string) ([][]int, int) {
+ M, N := len(a), len(b)
+ V := make([]int, 2*(N+M)+1)
+ offset := N + M
+ trace := make([][]int, N+M+1)
+
+ // Iterate through the maximum possible length of the SES (N+M).
+ for d := 0; d <= N+M; d++ {
+ copyV := make([]int, len(V))
+ // k lines are represented by the equation y = x - k. We move in
+ // increments of 2 because end points for even d are on even k lines.
+ for k := -d; k <= d; k += 2 {
+ // At each point, we either go down or to the right. We go down if
+ // k == -d, and we go to the right if k == d. We also prioritize
+ // the maximum x value, because we prefer deletions to insertions.
+ var x int
+ if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
+ x = V[k+1+offset] // down
+ } else {
+ x = V[k-1+offset] + 1 // right
+ }
+
+ y := x - k
+
+ // Diagonal moves while we have equal contents.
+ for x < M && y < N && a[x] == b[y] {
+ x++
+ y++
+ }
+
+ V[k+offset] = x
+
+ // Return if we've exceeded the maximum values.
+ if x == M && y == N {
+ // Makes sure to save the state of the array before returning.
+ copy(copyV, V)
+ trace[d] = copyV
+ return trace, offset
+ }
+ }
+
+ // Save the state of the array.
+ copy(copyV, V)
+ trace[d] = copyV
+ }
+ return nil, 0
+}
+
+func splitLines(text string) []string {
+ lines := strings.SplitAfter(text, "\n")
+ if lines[len(lines)-1] == "" {
+ lines = lines[:len(lines)-1]
+ }
+ return lines
+}
diff --git a/internal/diff/myers/diff_test.go b/internal/diff/myers/diff_test.go
new file mode 100644
index 000000000..f24445558
--- /dev/null
+++ b/internal/diff/myers/diff_test.go
@@ -0,0 +1,16 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package myers_test
+
+import (
+ "testing"
+
+ "golang.org/x/tools/internal/diff/difftest"
+ "golang.org/x/tools/internal/diff/myers"
+)
+
+func TestDiff(t *testing.T) {
+ difftest.DiffTest(t, myers.ComputeEdits)
+}
diff --git a/internal/diff/ndiff.go b/internal/diff/ndiff.go
new file mode 100644
index 000000000..050b08ded
--- /dev/null
+++ b/internal/diff/ndiff.go
@@ -0,0 +1,109 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package diff
+
+import (
+ "bytes"
+ "unicode/utf8"
+
+ "golang.org/x/tools/internal/diff/lcs"
+)
+
+// Strings computes the differences between two strings.
+// The resulting edits respect rune boundaries.
+func Strings(before, after string) []Edit {
+ if before == after {
+ return nil // common case
+ }
+
+ if stringIsASCII(before) && stringIsASCII(after) {
+ // TODO(adonovan): opt: specialize diffASCII for strings.
+ return diffASCII([]byte(before), []byte(after))
+ }
+ return diffRunes([]rune(before), []rune(after))
+}
+
+// Bytes computes the differences between two byte slices.
+// The resulting edits respect rune boundaries.
+func Bytes(before, after []byte) []Edit {
+ if bytes.Equal(before, after) {
+ return nil // common case
+ }
+
+ if bytesIsASCII(before) && bytesIsASCII(after) {
+ return diffASCII(before, after)
+ }
+ return diffRunes(runes(before), runes(after))
+}
+
+func diffASCII(before, after []byte) []Edit {
+ diffs := lcs.DiffBytes(before, after)
+
+ // Convert from LCS diffs.
+ res := make([]Edit, len(diffs))
+ for i, d := range diffs {
+ res[i] = Edit{d.Start, d.End, string(after[d.ReplStart:d.ReplEnd])}
+ }
+ return res
+}
+
+func diffRunes(before, after []rune) []Edit {
+ diffs := lcs.DiffRunes(before, after)
+
+ // The diffs returned by the lcs package use indexes
+ // into whatever slice was passed in.
+ // Convert rune offsets to byte offsets.
+ res := make([]Edit, len(diffs))
+ lastEnd := 0
+ utf8Len := 0
+ for i, d := range diffs {
+ utf8Len += runesLen(before[lastEnd:d.Start]) // text between edits
+ start := utf8Len
+ utf8Len += runesLen(before[d.Start:d.End]) // text deleted by this edit
+ res[i] = Edit{start, utf8Len, string(after[d.ReplStart:d.ReplEnd])}
+ lastEnd = d.End
+ }
+ return res
+}
+
+// runes is like []rune(string(bytes)) without the duplicate allocation.
+func runes(bytes []byte) []rune {
+ n := utf8.RuneCount(bytes)
+ runes := make([]rune, n)
+ for i := 0; i < n; i++ {
+ r, sz := utf8.DecodeRune(bytes)
+ bytes = bytes[sz:]
+ runes[i] = r
+ }
+ return runes
+}
+
+// runesLen returns the length in bytes of the UTF-8 encoding of runes.
+func runesLen(runes []rune) (len int) {
+ for _, r := range runes {
+ len += utf8.RuneLen(r)
+ }
+ return len
+}
+
+// stringIsASCII reports whether s contains only ASCII.
+// TODO(adonovan): combine when x/tools allows generics.
+func stringIsASCII(s string) bool {
+ for i := 0; i < len(s); i++ {
+ if s[i] >= utf8.RuneSelf {
+ return false
+ }
+ }
+ return true
+}
+
+func bytesIsASCII(s []byte) bool {
+ for i := 0; i < len(s); i++ {
+ if s[i] >= utf8.RuneSelf {
+ return false
+ }
+ }
+ return true
+}
diff --git a/internal/diff/unified.go b/internal/diff/unified.go
new file mode 100644
index 000000000..fa376f178
--- /dev/null
+++ b/internal/diff/unified.go
@@ -0,0 +1,248 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package diff
+
+import (
+ "fmt"
+ "log"
+ "strings"
+)
+
+// Unified returns a unified diff of the old and new strings.
+// The old and new labels are the names of the old and new files.
+// If the strings are equal, it returns the empty string.
+func Unified(oldLabel, newLabel, old, new string) string {
+ edits := Strings(old, new)
+ unified, err := ToUnified(oldLabel, newLabel, old, edits)
+ if err != nil {
+ // Can't happen: edits are consistent.
+ log.Fatalf("internal error in diff.Unified: %v", err)
+ }
+ return unified
+}
+
+// ToUnified applies the edits to content and returns a unified diff.
+// The old and new labels are the names of the content and result files.
+// It returns an error if the edits are inconsistent; see ApplyEdits.
+func ToUnified(oldLabel, newLabel, content string, edits []Edit) (string, error) {
+ u, err := toUnified(oldLabel, newLabel, content, edits)
+ if err != nil {
+ return "", err
+ }
+ return u.String(), nil
+}
+
+// unified represents a set of edits as a unified diff.
+type unified struct {
+ // From is the name of the original file.
+ From string
+ // To is the name of the modified file.
+ To string
+ // Hunks is the set of edit hunks needed to transform the file content.
+ Hunks []*hunk
+}
+
+// Hunk represents a contiguous set of line edits to apply.
+type hunk struct {
+ // The line in the original source where the hunk starts.
+ FromLine int
+ // The line in the original source where the hunk finishes.
+ ToLine int
+ // The set of line based edits to apply.
+ Lines []line
+}
+
+// Line represents a single line operation to apply as part of a Hunk.
+type line struct {
+ // Kind is the type of line this represents, deletion, insertion or copy.
+ Kind OpKind
+ // Content is the content of this line.
+ // For deletion it is the line being removed, for all others it is the line
+ // to put in the output.
+ Content string
+}
+
+// OpKind is used to denote the type of operation a line represents.
+// TODO(adonovan): hide this once the myers package no longer references it.
+type OpKind int
+
+const (
+ // Delete is the operation kind for a line that is present in the input
+ // but not in the output.
+ Delete OpKind = iota
+ // Insert is the operation kind for a line that is new in the output.
+ Insert
+ // Equal is the operation kind for a line that is the same in the input and
+ // output, often used to provide context around edited lines.
+ Equal
+)
+
+// String returns a human readable representation of an OpKind. It is not
+// intended for machine processing.
+func (k OpKind) String() string {
+ switch k {
+ case Delete:
+ return "delete"
+ case Insert:
+ return "insert"
+ case Equal:
+ return "equal"
+ default:
+ panic("unknown operation kind")
+ }
+}
+
+const (
+ edge = 3
+ gap = edge * 2
+)
+
+// toUnified takes a file contents and a sequence of edits, and calculates
+// a unified diff that represents those edits.
+func toUnified(fromName, toName string, content string, edits []Edit) (unified, error) {
+ u := unified{
+ From: fromName,
+ To: toName,
+ }
+ if len(edits) == 0 {
+ return u, nil
+ }
+ var err error
+ edits, err = lineEdits(content, edits) // expand to whole lines
+ if err != nil {
+ return u, err
+ }
+ lines := splitLines(content)
+ var h *hunk
+ last := 0
+ toLine := 0
+ for _, edit := range edits {
+ // Compute the zero-based line numbers of the edit start and end.
+ // TODO(adonovan): opt: compute incrementally, avoid O(n^2).
+ start := strings.Count(content[:edit.Start], "\n")
+ end := strings.Count(content[:edit.End], "\n")
+ if edit.End == len(content) && len(content) > 0 && content[len(content)-1] != '\n' {
+ end++ // EOF counts as an implicit newline
+ }
+
+ switch {
+ case h != nil && start == last:
+ //direct extension
+ case h != nil && start <= last+gap:
+ //within range of previous lines, add the joiners
+ addEqualLines(h, lines, last, start)
+ default:
+ //need to start a new hunk
+ if h != nil {
+ // add the edge to the previous hunk
+ addEqualLines(h, lines, last, last+edge)
+ u.Hunks = append(u.Hunks, h)
+ }
+ toLine += start - last
+ h = &hunk{
+ FromLine: start + 1,
+ ToLine: toLine + 1,
+ }
+ // add the edge to the new hunk
+ delta := addEqualLines(h, lines, start-edge, start)
+ h.FromLine -= delta
+ h.ToLine -= delta
+ }
+ last = start
+ for i := start; i < end; i++ {
+ h.Lines = append(h.Lines, line{Kind: Delete, Content: lines[i]})
+ last++
+ }
+ if edit.New != "" {
+ for _, content := range splitLines(edit.New) {
+ h.Lines = append(h.Lines, line{Kind: Insert, Content: content})
+ toLine++
+ }
+ }
+ }
+ if h != nil {
+ // add the edge to the final hunk
+ addEqualLines(h, lines, last, last+edge)
+ u.Hunks = append(u.Hunks, h)
+ }
+ return u, nil
+}
+
+func splitLines(text string) []string {
+ lines := strings.SplitAfter(text, "\n")
+ if lines[len(lines)-1] == "" {
+ lines = lines[:len(lines)-1]
+ }
+ return lines
+}
+
+func addEqualLines(h *hunk, lines []string, start, end int) int {
+ delta := 0
+ for i := start; i < end; i++ {
+ if i < 0 {
+ continue
+ }
+ if i >= len(lines) {
+ return delta
+ }
+ h.Lines = append(h.Lines, line{Kind: Equal, Content: lines[i]})
+ delta++
+ }
+ return delta
+}
+
+// String converts a unified diff to the standard textual form for that diff.
+// The output of this function can be passed to tools like patch.
+func (u unified) String() string {
+ if len(u.Hunks) == 0 {
+ return ""
+ }
+ b := new(strings.Builder)
+ fmt.Fprintf(b, "--- %s\n", u.From)
+ fmt.Fprintf(b, "+++ %s\n", u.To)
+ for _, hunk := range u.Hunks {
+ fromCount, toCount := 0, 0
+ for _, l := range hunk.Lines {
+ switch l.Kind {
+ case Delete:
+ fromCount++
+ case Insert:
+ toCount++
+ default:
+ fromCount++
+ toCount++
+ }
+ }
+ fmt.Fprint(b, "@@")
+ if fromCount > 1 {
+ fmt.Fprintf(b, " -%d,%d", hunk.FromLine, fromCount)
+ } else if hunk.FromLine == 1 && fromCount == 0 {
+ // Match odd GNU diff -u behavior adding to empty file.
+ fmt.Fprintf(b, " -0,0")
+ } else {
+ fmt.Fprintf(b, " -%d", hunk.FromLine)
+ }
+ if toCount > 1 {
+ fmt.Fprintf(b, " +%d,%d", hunk.ToLine, toCount)
+ } else {
+ fmt.Fprintf(b, " +%d", hunk.ToLine)
+ }
+ fmt.Fprint(b, " @@\n")
+ for _, l := range hunk.Lines {
+ switch l.Kind {
+ case Delete:
+ fmt.Fprintf(b, "-%s", l.Content)
+ case Insert:
+ fmt.Fprintf(b, "+%s", l.Content)
+ default:
+ fmt.Fprintf(b, " %s", l.Content)
+ }
+ if !strings.HasSuffix(l.Content, "\n") {
+ fmt.Fprintf(b, "\n\\ No newline at end of file\n")
+ }
+ }
+ }
+ return b.String()
+}