aboutsummaryrefslogtreecommitdiff
path: root/src/sort
diff options
context:
space:
mode:
authorSokolov Yura <funny.falcon@gmail.com>2015-10-12 16:06:38 +0300
committerRuss Cox <rsc@golang.org>2015-12-04 15:17:47 +0000
commit6f6b2f04b5c342edf70944e60c9c9a30eef5a9eb (patch)
tree444b349b948d8c74f637d19d63f1c79d0ad45f4a /src/sort
parent8854bdbd76d66a39b35980cee6643b4d4bd48fd4 (diff)
downloadgo-6f6b2f04b5c342edf70944e60c9c9a30eef5a9eb.tar.gz
go-6f6b2f04b5c342edf70944e60c9c9a30eef5a9eb.zip
sort: improve average quicksort performance
- change way of protection from O(N^2) on duplicate values. Previous algorithm does additional comparisons and swaps on every split pass. Changed algorithm does one ordinal quicksort split pass, and if distribution is skewed, then additional pass to separate pivot's duplicates. Changed algorithm could be slower on very ununique slice, but it is still protected from O(N^2). - increase small slice size and do simple shell sort pass to amortize worst case on small slices. Small slice has higher probability to have skewed distribution, so lets sort it with simpler algorithm. benchmark old ns/op new ns/op delta BenchmarkSortString1K 458374 388641 -15.21% BenchmarkSortInt1K 217851 181796 -16.55% BenchmarkSortInt64K 20539264 16730340 -18.54% BenchmarkSort1e2 98668 95554 -3.16% BenchmarkSort1e4 20278500 18316829 -9.67% BenchmarkSort1e6 3215724392 2795999911 -13.05% number of operations: Size: Total: Swap: Less: % % % Sort 100 Avg -5.98% -18.43% -1.90% Sort 100 Max -14.43% -16.02% -4.51% Sort 300 Avg -7.50% -12.76% -5.96% Sort 300 Max -11.29% -9.60% -4.30% Sort 1000 Avg -12.13% -11.65% -12.25% Sort 1000 Max -13.81% -11.77% -11.89% Sort 3000 Avg -14.61% -9.30% -15.86% Sort 3000 Max -15.81% -8.66% -15.19% Sort 10000 Avg -16.10% -8.47% -17.80% Sort 10000 Max -17.13% -7.63% -16.97% Sort 30000 Avg -17.46% -7.56% -19.57% Sort 30000 Max -18.24% -7.62% -17.68% Sort 100000 Avg -18.83% -6.64% -21.33% Sort 100000 Max -19.72% -6.70% -20.96% Sort 300000 Avg -19.61% -6.16% -22.30% Sort 300000 Max -20.69% -6.15% -21.81% Sort 1000000 Avg -20.42% -5.58% -23.31% Sort 1000000 Max -21.54% -5.56% -23.61% Change-Id: I23868e8b52b5841b358cd5403967c9a97871e4d5 Reviewed-on: https://go-review.googlesource.com/15688 Reviewed-by: Russ Cox <rsc@golang.org>
Diffstat (limited to 'src/sort')
-rw-r--r--src/sort/example_multi_test.go4
-rw-r--r--src/sort/sort.go112
2 files changed, 73 insertions, 43 deletions
diff --git a/src/sort/example_multi_test.go b/src/sort/example_multi_test.go
index ac316540fd..40d12152ce 100644
--- a/src/sort/example_multi_test.go
+++ b/src/sort/example_multi_test.go
@@ -122,10 +122,10 @@ func Example_sortMultiKeys() {
fmt.Println("By language,<lines,user:", changes)
// Output:
- // By user: [{dmr C 100} {glenda Go 200} {gri Smalltalk 80} {gri Go 100} {ken Go 200} {ken C 150} {r Go 100} {r C 150} {rsc Go 200}]
+ // By user: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}]
// By user,<lines: [{dmr C 100} {glenda Go 200} {gri Smalltalk 80} {gri Go 100} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}]
// By user,>lines: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken Go 200} {ken C 150} {r C 150} {r Go 100} {rsc Go 200}]
- // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {ken Go 200} {glenda Go 200} {rsc Go 200} {gri Smalltalk 80}]
+ // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {r Go 100} {gri Go 100} {ken Go 200} {glenda Go 200} {rsc Go 200} {gri Smalltalk 80}]
// By language,<lines,user: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {glenda Go 200} {ken Go 200} {rsc Go 200} {gri Smalltalk 80}]
}
diff --git a/src/sort/sort.go b/src/sort/sort.go
index c7c30426ae..ac8f4a661f 100644
--- a/src/sort/sort.go
+++ b/src/sort/sort.go
@@ -72,7 +72,7 @@ func heapSort(data Interface, a, b int) {
}
}
-// Quicksort, following Bentley and McIlroy,
+// Quicksort, loosely following Bentley and McIlroy,
// ``Engineering a Sort Function,'' SP&E November 1993.
// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
@@ -111,59 +111,82 @@ func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
- // data[lo <= i < a] = pivot
- // data[a <= i < b] < pivot
- // data[b <= i < c] is unexamined
- // data[c <= i < d] > pivot
- // data[d <= i < hi] = pivot
- //
- // Once b meets c, can swap the "= pivot" sections
- // into the middle of the slice.
+ // data[lo < i < a] < pivot
+ // data[a <= i < b] <= pivot
+ // data[b <= i < c] unexamined
+ // data[c <= i < hi-1] > pivot
+ // data[hi-1] >= pivot
pivot := lo
- a, b, c, d := lo+1, lo+1, hi, hi
+ a, c := lo+1, hi-1
+
+ for ; a != c && data.Less(a, pivot); a++ {
+ }
+ b := a
for {
- for b < c {
- if data.Less(b, pivot) { // data[b] < pivot
- b++
- } else if !data.Less(pivot, b) { // data[b] = pivot
- data.Swap(a, b)
- a++
- b++
- } else {
- break
- }
+ for ; b != c && !data.Less(pivot, b); b++ { // data[b] <= pivot
}
- for b < c {
- if data.Less(pivot, c-1) { // data[c-1] > pivot
- c--
- } else if !data.Less(c-1, pivot) { // data[c-1] = pivot
- data.Swap(c-1, d-1)
- c--
- d--
- } else {
- break
- }
+ for ; b != c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
}
- if b >= c {
+ if b == c {
break
}
- // data[b] > pivot; data[c-1] < pivot
+ // data[b] > pivot; data[c-1] <= pivot
data.Swap(b, c-1)
b++
c--
}
-
- n := min(b-a, a-lo)
- swapRange(data, lo, b-n, n)
-
- n = min(hi-d, d-c)
- swapRange(data, c, hi-n, n)
-
- return lo + b - a, hi - (d - c)
+ // If hi-c<3 then there are duplicates (by property of median of nine).
+ // Let be a bit more conservative, and set border to 5.
+ protect := hi-c < 5
+ if !protect && hi-c < (hi-lo)/4 {
+ // Lets test some points for equality to pivot
+ dups := 0
+ if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
+ data.Swap(c, hi-1)
+ c++
+ dups++
+ }
+ if !data.Less(b-1, pivot) { // data[b-1] = pivot
+ b--
+ dups++
+ }
+ // m-lo = (hi-lo)/2 > 6
+ // b-lo > (hi-lo)*3/4-1 > 8
+ // ==> m < b ==> data[m] <= pivot
+ if !data.Less(m, pivot) { // data[m] = pivot
+ data.Swap(m, b-1)
+ b--
+ dups++
+ }
+ // if at least 2 points are equal to pivot, assume skewed distribution
+ protect = dups > 1
+ }
+ if protect {
+ // Protect against a lot of duplicates
+ // Add invariant:
+ // data[a <= i < b] unexamined
+ // data[b <= i < c] = pivot
+ for {
+ for ; a != b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
+ }
+ for ; a != b && data.Less(a, pivot); a++ { // data[a] < pivot
+ }
+ if a == b {
+ break
+ }
+ // data[a] == pivot; data[b-1] < pivot
+ data.Swap(a, b-1)
+ a++
+ b--
+ }
+ }
+ // Swap pivot into middle
+ data.Swap(pivot, b-1)
+ return b - 1, c
}
func quickSort(data Interface, a, b, maxDepth int) {
- for b-a > 7 {
+ for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
@@ -181,6 +204,13 @@ func quickSort(data Interface, a, b, maxDepth int) {
}
}
if b-a > 1 {
+ // Do ShellSort pass with gap 6
+ // It could be written in this simplified form cause b-a <= 12
+ for i := a + 6; i < b; i++ {
+ if data.Less(i, i-6) {
+ data.Swap(i, i-6)
+ }
+ }
insertionSort(data, a, b)
}
}