diff options
author | Joe Tsai <joetsai@digital-static.net> | 2022-09-02 15:41:56 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-02 15:41:56 -0700 |
commit | a97318bf6562f2ed2632c5f985db51b1bc5bdcd0 (patch) | |
tree | dac641335569a526f52aa826617c4b578ae9b4e7 | |
parent | 377d28384c85781079e04aab3937170479da8cd6 (diff) | |
download | go-cmp-a97318bf6562f2ed2632c5f985db51b1bc5bdcd0.tar.gz |
Adjust heuristic for line-based versus byte-based diffing (#299)
If the string has many characters that require escape sequences to print,
then we need to take that into consideration and avoid byte-by-byte diffing.
Co-authored-by: Damien Neil <neild@users.noreply.github.com>
-rw-r--r-- | cmp/compare_test.go | 17 | ||||
-rw-r--r-- | cmp/report_slices.go | 5 | ||||
-rw-r--r-- | cmp/testdata/diffs | 12 |
3 files changed, 33 insertions, 1 deletions
diff --git a/cmp/compare_test.go b/cmp/compare_test.go index dc86f01..88b7d45 100644 --- a/cmp/compare_test.go +++ b/cmp/compare_test.go @@ -1403,6 +1403,23 @@ using the AllowUnexported option.`, "\n"), []byte("\xffoo"), []byte("foo"), []byte("barbaz"), []byte("added"), []byte("here"), []byte("hrmph\xff"), }, reason: "should print text byte slices as strings except those with binary", + }, { + label: label + "/ManyEscapeCharacters", + x: `[ + {"Base32": "NA======"}, + {"Base32": "NBSQ===="}, + {"Base32": "NBSWY==="}, + {"Base32": "NBSWY3A="}, + {"Base32": "NBSWY3DP"} +]`, + y: `[ + {"Base32": "NB======"}, + {"Base32": "NBSQ===="}, + {"Base32": "NBSWY==="}, + {"Base32": "NBSWY3A="}, + {"Base32": "NBSWY3DP"} +]`, + reason: "should use line-based diffing since byte-based diffing is unreadable due to heavy amounts of escaping", }} } diff --git a/cmp/report_slices.go b/cmp/report_slices.go index b38ed68..23e444f 100644 --- a/cmp/report_slices.go +++ b/cmp/report_slices.go @@ -147,7 +147,10 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { }) efficiencyLines := float64(esLines.Dist()) / float64(len(esLines)) efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes)) - isPureLinedText = efficiencyLines < 4*efficiencyBytes + quotedLength := len(strconv.Quote(sx + sy)) + unquotedLength := len(sx) + len(sy) + escapeExpansionRatio := float64(quotedLength) / float64(unquotedLength) + isPureLinedText = efficiencyLines < 4*efficiencyBytes || escapeExpansionRatio > 1.1 } } diff --git a/cmp/testdata/diffs b/cmp/testdata/diffs index 8bff76f..be77b95 100644 --- a/cmp/testdata/diffs +++ b/cmp/testdata/diffs @@ -1182,6 +1182,18 @@ + {0x68, 0x72, 0x6d, 0x70, 0x68, 0xff}, } >>> TestDiff/Reporter/SliceOfBytesBinary +<<< TestDiff/Reporter/ManyEscapeCharacters + ( + """ + [ +- {"Base32": "NA======"}, ++ {"Base32": "NB======"}, + {"Base32": "NBSQ===="}, + {"Base32": "NBSWY==="}, + ... // 3 identical lines + """ + ) +>>> TestDiff/Reporter/ManyEscapeCharacters <<< TestDiff/EmbeddedStruct/ParentStructA/Inequal teststructs.ParentStructA{ privateStruct: teststructs.privateStruct{ |