diff options
Diffstat (limited to 'vendor/github.com/google/go-cmp/cmp/report_slices.go')
-rw-r--r-- | vendor/github.com/google/go-cmp/cmp/report_slices.go | 356 |
1 files changed, 318 insertions, 38 deletions
diff --git a/vendor/github.com/google/go-cmp/cmp/report_slices.go b/vendor/github.com/google/go-cmp/cmp/report_slices.go index eafcf2e..2ad3bc8 100644 --- a/vendor/github.com/google/go-cmp/cmp/report_slices.go +++ b/vendor/github.com/google/go-cmp/cmp/report_slices.go @@ -1,13 +1,15 @@ // Copyright 2019, The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE.md file. +// license that can be found in the LICENSE file. package cmp import ( "bytes" "fmt" + "math" "reflect" + "strconv" "strings" "unicode" "unicode/utf8" @@ -23,14 +25,35 @@ func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool { return false // Must be formatting in diff mode case v.NumDiff == 0: return false // No differences detected - case v.NumIgnored+v.NumCompared+v.NumTransformed > 0: - // TODO: Handle the case where someone uses bytes.Equal on a large slice. - return false // Some custom option was used to determined equality case !v.ValueX.IsValid() || !v.ValueY.IsValid(): return false // Both values must be valid + case v.NumIgnored > 0: + return false // Some ignore option was used + case v.NumTransformed > 0: + return false // Some transform option was used + case v.NumCompared > 1: + return false // More than one comparison was used + case v.NumCompared == 1 && v.Type.Name() != "": + // The need for cmp to check applicability of options on every element + // in a slice is a significant performance detriment for large []byte. + // The workaround is to specify Comparer(bytes.Equal), + // which enables cmp to compare []byte more efficiently. + // If they differ, we still want to provide batched diffing. + // The logic disallows named types since they tend to have their own + // String method, with nicer formatting than what this provides. + return false + } + + // Check whether this is an interface with the same concrete types. + t := v.Type + vx, vy := v.ValueX, v.ValueY + if t.Kind() == reflect.Interface && !vx.IsNil() && !vy.IsNil() && vx.Elem().Type() == vy.Elem().Type() { + vx, vy = vx.Elem(), vy.Elem() + t = vx.Type() } - switch t := v.Type; t.Kind() { + // Check whether we provide specialized diffing for this type. + switch t.Kind() { case reflect.String: case reflect.Array, reflect.Slice: // Only slices of primitive types have specialized handling. @@ -42,6 +65,11 @@ func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool { return false } + // Both slice values have to be non-empty. + if t.Kind() == reflect.Slice && (vx.Len() == 0 || vy.Len() == 0) { + return false + } + // If a sufficient number of elements already differ, // use specialized formatting even if length requirement is not met. if v.NumDiff > v.NumSame { @@ -53,7 +81,7 @@ func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool { // Use specialized string diffing for longer slices or strings. const minLength = 64 - return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength + return vx.Len() >= minLength && vy.Len() >= minLength } // FormatDiffSlice prints a diff for the slices (or strings) represented by v. @@ -62,17 +90,23 @@ func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool { func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { assert(opts.DiffMode == diffUnknown) t, vx, vy := v.Type, v.ValueX, v.ValueY + if t.Kind() == reflect.Interface { + vx, vy = vx.Elem(), vy.Elem() + t = vx.Type() + opts = opts.WithTypeMode(emitType) + } // Auto-detect the type of the data. - var isLinedText, isText, isBinary bool var sx, sy string + var ssx, ssy []string + var isString, isMostlyText, isPureLinedText, isBinary bool switch { case t.Kind() == reflect.String: sx, sy = vx.String(), vy.String() - isText = true // Initial estimate, verify later + isString = true case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)): sx, sy = string(vx.Bytes()), string(vy.Bytes()) - isBinary = true // Initial estimate, verify later + isString = true case t.Kind() == reflect.Array: // Arrays need to be addressable for slice operations to work. vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem() @@ -80,13 +114,12 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { vy2.Set(vy) vx, vy = vx2, vy2 } - if isText || isBinary { - var numLines, lastLineIdx, maxLineLen int - isBinary = false + if isString { + var numTotalRunes, numValidRunes, numLines, lastLineIdx, maxLineLen int for i, r := range sx + sy { - if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError { - isBinary = true - break + numTotalRunes++ + if (unicode.IsPrint(r) || unicode.IsSpace(r)) && r != utf8.RuneError { + numValidRunes++ } if r == '\n' { if maxLineLen < i-lastLineIdx { @@ -96,8 +129,26 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { numLines++ } } - isText = !isBinary - isLinedText = isText && numLines >= 4 && maxLineLen <= 256 + isPureText := numValidRunes == numTotalRunes + isMostlyText = float64(numValidRunes) > math.Floor(0.90*float64(numTotalRunes)) + isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024 + isBinary = !isMostlyText + + // Avoid diffing by lines if it produces a significantly more complex + // edit script than diffing by bytes. + if isPureLinedText { + ssx = strings.Split(sx, "\n") + ssy = strings.Split(sy, "\n") + esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result { + return diff.BoolResult(ssx[ix] == ssy[iy]) + }) + esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result { + return diff.BoolResult(sx[ix] == sy[iy]) + }) + efficiencyLines := float64(esLines.Dist()) / float64(len(esLines)) + efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes)) + isPureLinedText = efficiencyLines < 4*efficiencyBytes + } } // Format the string into printable records. @@ -106,9 +157,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { switch { // If the text appears to be multi-lined text, // then perform differencing across individual lines. - case isLinedText: - ssx := strings.Split(sx, "\n") - ssy := strings.Split(sy, "\n") + case isPureLinedText: list = opts.formatDiffSlice( reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line", func(v reflect.Value, d diffMode) textRecord { @@ -117,10 +166,87 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { }, ) delim = "\n" + + // If possible, use a custom triple-quote (""") syntax for printing + // differences in a string literal. This format is more readable, + // but has edge-cases where differences are visually indistinguishable. + // This format is avoided under the following conditions: + // • A line starts with `"""` + // • A line starts with "..." + // • A line contains non-printable characters + // • Adjacent different lines differ only by whitespace + // + // For example: + // """ + // ... // 3 identical lines + // foo + // bar + // - baz + // + BAZ + // """ + isTripleQuoted := true + prevRemoveLines := map[string]bool{} + prevInsertLines := map[string]bool{} + var list2 textList + list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true}) + for _, r := range list { + if !r.Value.Equal(textEllipsis) { + line, _ := strconv.Unquote(string(r.Value.(textLine))) + line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support + normLine := strings.Map(func(r rune) rune { + if unicode.IsSpace(r) { + return -1 // drop whitespace to avoid visually indistinguishable output + } + return r + }, line) + isPrintable := func(r rune) bool { + return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable + } + isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == "" + switch r.Diff { + case diffRemoved: + isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine] + prevRemoveLines[normLine] = true + case diffInserted: + isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine] + prevInsertLines[normLine] = true + } + if !isTripleQuoted { + break + } + r.Value = textLine(line) + r.ElideComma = true + } + if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group + prevRemoveLines = map[string]bool{} + prevInsertLines = map[string]bool{} + } + list2 = append(list2, r) + } + if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 { + list2 = list2[:len(list2)-1] // elide single empty line at the end + } + list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true}) + if isTripleQuoted { + var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"} + switch t.Kind() { + case reflect.String: + if t != reflect.TypeOf(string("")) { + out = opts.FormatType(t, out) + } + case reflect.Slice: + // Always emit type for slices since the triple-quote syntax + // looks like a string (not a slice). + opts = opts.WithTypeMode(emitType) + out = opts.FormatType(t, out) + } + return out + } + // If the text appears to be single-lined text, // then perform differencing in approximately fixed-sized chunks. // The output is printed as quoted strings. - case isText: + case isMostlyText: list = opts.formatDiffSlice( reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte", func(v reflect.Value, d diffMode) textRecord { @@ -128,7 +254,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { return textRecord{Diff: d, Value: textLine(s)} }, ) - delim = "" + // If the text appears to be binary data, // then perform differencing in approximately fixed-sized chunks. // The output is inspired by hexdump. @@ -145,6 +271,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { return textRecord{Diff: d, Value: textLine(s), Comment: comment} }, ) + // For all other slices of primitive types, // then perform differencing in approximately fixed-sized chunks. // The size of each chunk depends on the width of the element kind. @@ -172,7 +299,9 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { switch t.Elem().Kind() { case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: ss = append(ss, fmt.Sprint(v.Index(i).Int())) - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64: + ss = append(ss, fmt.Sprint(v.Index(i).Uint())) + case reflect.Uint8, reflect.Uintptr: ss = append(ss, formatHex(v.Index(i).Uint())) case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: ss = append(ss, fmt.Sprint(v.Index(i).Interface())) @@ -185,8 +314,8 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { } // Wrap the output with appropriate type information. - var out textNode = textWrap{"{", list, "}"} - if !isText { + var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"} + if !isMostlyText { // The "{...}" byte-sequence literal is not valid Go syntax for strings. // Emit the type for extra clarity (e.g. "string{...}"). if t.Kind() == reflect.String { @@ -196,12 +325,12 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { } switch t.Kind() { case reflect.String: - out = textWrap{"strings.Join(", out, fmt.Sprintf(", %q)", delim)} + out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)} if t != reflect.TypeOf(string("")) { out = opts.FormatType(t, out) } case reflect.Slice: - out = textWrap{"bytes.Join(", out, fmt.Sprintf(", %q)", delim)} + out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)} if t != reflect.TypeOf([]byte(nil)) { out = opts.FormatType(t, out) } @@ -225,8 +354,11 @@ func (opts formatOptions) formatDiffSlice( vx, vy reflect.Value, chunkSize int, name string, makeRec func(reflect.Value, diffMode) textRecord, ) (list textList) { - es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result { - return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface()) + eq := func(ix, iy int) bool { + return vx.Index(ix).Interface() == vy.Index(iy).Interface() + } + es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result { + return diff.BoolResult(eq(ix, iy)) }) appendChunks := func(v reflect.Value, d diffMode) int { @@ -242,9 +374,23 @@ func (opts formatOptions) formatDiffSlice( return n0 - v.Len() } + var numDiffs int + maxLen := -1 + if opts.LimitVerbosity { + maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc... + opts.VerbosityLevel-- + } + groups := coalesceAdjacentEdits(name, es) groups = coalesceInterveningIdentical(groups, chunkSize/4) + groups = cleanupSurroundingIdentical(groups, eq) + maxGroup := diffStats{Name: name} for i, ds := range groups { + if maxLen >= 0 && numDiffs >= maxLen { + maxGroup = maxGroup.Append(ds) + continue + } + // Print equal. if ds.NumDiff() == 0 { // Compute the number of leading and trailing equal bytes to print. @@ -273,36 +419,53 @@ func (opts formatOptions) formatDiffSlice( } // Print unequal. + len0 := len(list) nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved) vx = vx.Slice(nx, vx.Len()) ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted) vy = vy.Slice(ny, vy.Len()) + numDiffs += len(list) - len0 + } + if maxGroup.IsZero() { + assert(vx.Len() == 0 && vy.Len() == 0) + } else { + list.AppendEllipsis(maxGroup) } - assert(vx.Len() == 0 && vy.Len() == 0) return list } // coalesceAdjacentEdits coalesces the list of edits into groups of adjacent // equal or unequal counts. +// +// Example: +// +// Input: "..XXY...Y" +// Output: [ +// {NumIdentical: 2}, +// {NumRemoved: 2, NumInserted 1}, +// {NumIdentical: 3}, +// {NumInserted: 1}, +// ] +// func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) { - var prevCase int // Arbitrary index into which case last occurred - lastStats := func(i int) *diffStats { - if prevCase != i { + var prevMode byte + lastStats := func(mode byte) *diffStats { + if prevMode != mode { groups = append(groups, diffStats{Name: name}) - prevCase = i + prevMode = mode } return &groups[len(groups)-1] } for _, e := range es { switch e { case diff.Identity: - lastStats(1).NumIdentical++ + lastStats('=').NumIdentical++ case diff.UniqueX: - lastStats(2).NumRemoved++ + lastStats('!').NumRemoved++ case diff.UniqueY: - lastStats(2).NumInserted++ + lastStats('!').NumInserted++ case diff.Modified: - lastStats(2).NumModified++ + lastStats('!').NumModified++ } } return groups @@ -312,6 +475,35 @@ func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) // equal groups into adjacent unequal groups that currently result in a // dual inserted/removed printout. This acts as a high-pass filter to smooth // out high-frequency changes within the windowSize. +// +// Example: +// +// WindowSize: 16, +// Input: [ +// {NumIdentical: 61}, // group 0 +// {NumRemoved: 3, NumInserted: 1}, // group 1 +// {NumIdentical: 6}, // ├── coalesce +// {NumInserted: 2}, // ├── coalesce +// {NumIdentical: 1}, // ├── coalesce +// {NumRemoved: 9}, // └── coalesce +// {NumIdentical: 64}, // group 2 +// {NumRemoved: 3, NumInserted: 1}, // group 3 +// {NumIdentical: 6}, // ├── coalesce +// {NumInserted: 2}, // ├── coalesce +// {NumIdentical: 1}, // ├── coalesce +// {NumRemoved: 7}, // ├── coalesce +// {NumIdentical: 1}, // ├── coalesce +// {NumRemoved: 2}, // └── coalesce +// {NumIdentical: 63}, // group 4 +// ] +// Output: [ +// {NumIdentical: 61}, +// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, +// {NumIdentical: 64}, +// {NumIdentical: 8, NumRemoved: 12, NumInserted: 3}, +// {NumIdentical: 63}, +// ] +// func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats { groups, groupsOrig := groups[:0], groups for i, ds := range groupsOrig { @@ -331,3 +523,91 @@ func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStat } return groups } + +// cleanupSurroundingIdentical scans through all unequal groups, and +// moves any leading sequence of equal elements to the preceding equal group and +// moves and trailing sequence of equal elements to the succeeding equal group. +// +// This is necessary since coalesceInterveningIdentical may coalesce edit groups +// together such that leading/trailing spans of equal elements becomes possible. +// Note that this can occur even with an optimal diffing algorithm. +// +// Example: +// +// Input: [ +// {NumIdentical: 61}, +// {NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements +// {NumIdentical: 67}, +// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, // assume 10 trailing identical elements +// {NumIdentical: 54}, +// ] +// Output: [ +// {NumIdentical: 64}, // incremented by 3 +// {NumRemoved: 9}, +// {NumIdentical: 67}, +// {NumRemoved: 9}, +// {NumIdentical: 64}, // incremented by 10 +// ] +// +func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats { + var ix, iy int // indexes into sequence x and y + for i, ds := range groups { + // Handle equal group. + if ds.NumDiff() == 0 { + ix += ds.NumIdentical + iy += ds.NumIdentical + continue + } + + // Handle unequal group. + nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified + ny := ds.NumIdentical + ds.NumInserted + ds.NumModified + var numLeadingIdentical, numTrailingIdentical int + for i := 0; i < nx && i < ny && eq(ix+i, iy+i); i++ { + numLeadingIdentical++ + } + for i := 0; i < nx && i < ny && eq(ix+nx-1-i, iy+ny-1-i); i++ { + numTrailingIdentical++ + } + if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 { + if numLeadingIdentical > 0 { + // Remove leading identical span from this group and + // insert it into the preceding group. + if i-1 >= 0 { + groups[i-1].NumIdentical += numLeadingIdentical + } else { + // No preceding group exists, so prepend a new group, + // but do so after we finish iterating over all groups. + defer func() { + groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...) + }() + } + // Increment indexes since the preceding group would have handled this. + ix += numLeadingIdentical + iy += numLeadingIdentical + } + if numTrailingIdentical > 0 { + // Remove trailing identical span from this group and + // insert it into the succeeding group. + if i+1 < len(groups) { + groups[i+1].NumIdentical += numTrailingIdentical + } else { + // No succeeding group exists, so append a new group, + // but do so after we finish iterating over all groups. + defer func() { + groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical}) + }() + } + // Do not increment indexes since the succeeding group will handle this. + } + + // Update this group since some identical elements were removed. + nx -= numIdentical + ny -= numIdentical + groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny} + } + ix += nx + iy += ny + } + return groups +} |