aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Tsai <joetsai@digital-static.net>2022-09-02 15:41:56 -0700
committerGitHub <noreply@github.com>2022-09-02 15:41:56 -0700
commita97318bf6562f2ed2632c5f985db51b1bc5bdcd0 (patch)
treedac641335569a526f52aa826617c4b578ae9b4e7
parent377d28384c85781079e04aab3937170479da8cd6 (diff)
downloadgo-cmp-a97318bf6562f2ed2632c5f985db51b1bc5bdcd0.tar.gz
Adjust heuristic for line-based versus byte-based diffing (#299)
If the string has many characters that require escape sequences to print, then we need to take that into consideration and avoid byte-by-byte diffing. Co-authored-by: Damien Neil <neild@users.noreply.github.com>
-rw-r--r--cmp/compare_test.go17
-rw-r--r--cmp/report_slices.go5
-rw-r--r--cmp/testdata/diffs12
3 files changed, 33 insertions, 1 deletions
diff --git a/cmp/compare_test.go b/cmp/compare_test.go
index dc86f01..88b7d45 100644
--- a/cmp/compare_test.go
+++ b/cmp/compare_test.go
@@ -1403,6 +1403,23 @@ using the AllowUnexported option.`, "\n"),
[]byte("\xffoo"), []byte("foo"), []byte("barbaz"), []byte("added"), []byte("here"), []byte("hrmph\xff"),
},
reason: "should print text byte slices as strings except those with binary",
+ }, {
+ label: label + "/ManyEscapeCharacters",
+ x: `[
+ {"Base32": "NA======"},
+ {"Base32": "NBSQ===="},
+ {"Base32": "NBSWY==="},
+ {"Base32": "NBSWY3A="},
+ {"Base32": "NBSWY3DP"}
+]`,
+ y: `[
+ {"Base32": "NB======"},
+ {"Base32": "NBSQ===="},
+ {"Base32": "NBSWY==="},
+ {"Base32": "NBSWY3A="},
+ {"Base32": "NBSWY3DP"}
+]`,
+ reason: "should use line-based diffing since byte-based diffing is unreadable due to heavy amounts of escaping",
}}
}
diff --git a/cmp/report_slices.go b/cmp/report_slices.go
index b38ed68..23e444f 100644
--- a/cmp/report_slices.go
+++ b/cmp/report_slices.go
@@ -147,7 +147,10 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
})
efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))
efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))
- isPureLinedText = efficiencyLines < 4*efficiencyBytes
+ quotedLength := len(strconv.Quote(sx + sy))
+ unquotedLength := len(sx) + len(sy)
+ escapeExpansionRatio := float64(quotedLength) / float64(unquotedLength)
+ isPureLinedText = efficiencyLines < 4*efficiencyBytes || escapeExpansionRatio > 1.1
}
}
diff --git a/cmp/testdata/diffs b/cmp/testdata/diffs
index 8bff76f..be77b95 100644
--- a/cmp/testdata/diffs
+++ b/cmp/testdata/diffs
@@ -1182,6 +1182,18 @@
+ {0x68, 0x72, 0x6d, 0x70, 0x68, 0xff},
}
>>> TestDiff/Reporter/SliceOfBytesBinary
+<<< TestDiff/Reporter/ManyEscapeCharacters
+ (
+ """
+ [
+- {"Base32": "NA======"},
++ {"Base32": "NB======"},
+ {"Base32": "NBSQ===="},
+ {"Base32": "NBSWY==="},
+ ... // 3 identical lines
+ """
+ )
+>>> TestDiff/Reporter/ManyEscapeCharacters
<<< TestDiff/EmbeddedStruct/ParentStructA/Inequal
teststructs.ParentStructA{
privateStruct: teststructs.privateStruct{