diff options
author | Ziad Hatahet <hatahet@gmail.com> | 2011-09-07 13:54:33 -0400 |
---|---|---|
committer | Russ Cox <rsc@golang.org> | 2011-09-07 13:54:33 -0400 |
commit | 21e49cbb2dc0d9557b78c8c3c7d46ffc1c6705ca (patch) | |
tree | 6ab3da73d9ae2a4c5d8e291bfb3b517b8ebe383b | |
parent | 7b2f214b6c3867699a37d405cf986d4f599d3f73 (diff) | |
download | go-21e49cbb2dc0d9557b78c8c3c7d46ffc1c6705ca.tar.gz go-21e49cbb2dc0d9557b78c8c3c7d46ffc1c6705ca.zip |
sort: use heapsort to bail out quicksort
See http://research.swtch.com/2008/01/killing-quicksort.html for more
info.
Fixes #467.
R=r, rsc
CC=golang-dev
https://golang.org/cl/4591051
-rw-r--r-- | src/pkg/sort/export_test.go | 9 | ||||
-rw-r--r-- | src/pkg/sort/sort.go | 61 | ||||
-rw-r--r-- | src/pkg/sort/sort_test.go | 68 |
3 files changed, 127 insertions, 11 deletions
diff --git a/src/pkg/sort/export_test.go b/src/pkg/sort/export_test.go new file mode 100644 index 0000000000..b6e30ceb57 --- /dev/null +++ b/src/pkg/sort/export_test.go @@ -0,0 +1,9 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort + +func Heapsort(data Interface) { + heapSort(data, 0, data.Len()) +} diff --git a/src/pkg/sort/sort.go b/src/pkg/sort/sort.go index 0a4a4375f0..83ee170cba 100644 --- a/src/pkg/sort/sort.go +++ b/src/pkg/sort/sort.go @@ -37,10 +37,47 @@ func insertionSort(data Interface, a, b int) { } } +// siftDown implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDown(data Interface, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data.Less(first+child, first+child+1) { + child++ + } + if !data.Less(first+root, first+child) { + return + } + data.Swap(first+root, first+child) + root = child + } +} + +func heapSort(data Interface, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data.Swap(first, first+i) + siftDown(data, lo, i, first) + } +} + // Quicksort, following Bentley and McIlroy, // ``Engineering a Sort Function,'' SP&E November 1993. -// Move the median of the three values data[a], data[b], data[c] into data[a]. +// medianOfThree moves the median of the three values data[a], data[b], data[c] into data[a]. func medianOfThree(data Interface, a, b, c int) { m0 := b m1 := a @@ -123,16 +160,21 @@ func doPivot(data Interface, lo, hi int) (midlo, midhi int) { return lo + b - a, hi - (d - c) } -func quickSort(data Interface, a, b int) { +func quickSort(data Interface, a, b, maxDepth int) { for b-a > 7 { + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- mlo, mhi := doPivot(data, a, b) // Avoiding recursion on the larger subproblem guarantees // a stack depth of at most lg(b-a). if mlo-a < b-mhi { - quickSort(data, a, mlo) + quickSort(data, a, mlo, maxDepth) a = mhi // i.e., quickSort(data, mhi, b) } else { - quickSort(data, mhi, b) + quickSort(data, mhi, b, maxDepth) b = mlo // i.e., quickSort(data, a, mlo) } } @@ -141,7 +183,16 @@ func quickSort(data Interface, a, b int) { } } -func Sort(data Interface) { quickSort(data, 0, data.Len()) } +func Sort(data Interface) { + // Switch to heapsort if depth of 2*ceil(lg(n)) is reached. + n := data.Len() + maxDepth := 0 + for 1<<uint(maxDepth) < n { + maxDepth++ + } + maxDepth *= 2 + quickSort(data, 0, data.Len(), maxDepth) +} func IsSorted(data Interface) bool { n := data.Len() diff --git a/src/pkg/sort/sort_test.go b/src/pkg/sort/sort_test.go index 5007a92a56..a5640151cb 100644 --- a/src/pkg/sort/sort_test.go +++ b/src/pkg/sort/sort_test.go @@ -169,6 +169,13 @@ func (d *testingData) Swap(i, j int) { d.data[i], d.data[j] = d.data[j], d.data[i] } +func min(a, b int) int { + if a < b { + return a + } + return b +} + func lg(n int) int { i := 0 for 1<<uint(i) < n { @@ -177,7 +184,7 @@ func lg(n int) int { return i } -func TestBentleyMcIlroy(t *testing.T) { +func testBentleyMcIlroy(t *testing.T, sort func(Interface)) { sizes := []int{100, 1023, 1024, 1025} if testing.Short() { sizes = []int{100, 127, 128, 129} @@ -253,7 +260,7 @@ func TestBentleyMcIlroy(t *testing.T) { desc := fmt.Sprintf("n=%d m=%d dist=%s mode=%s", n, m, dists[dist], modes[mode]) d := &testingData{desc, t, mdata[0:n], n * lg(n) * 12 / 10, 0} - Sort(d) + sort(d) // If we were testing C qsort, we'd have to make a copy // of the slice and sort it ourselves and then compare @@ -274,9 +281,58 @@ func TestBentleyMcIlroy(t *testing.T) { } } -func min(a, b int) int { - if a < b { - return a +func TestSortBM(t *testing.T) { + testBentleyMcIlroy(t, Sort) +} + +func TestHeapsortBM(t *testing.T) { + testBentleyMcIlroy(t, Heapsort) +} + +// This is based on the "antiquicksort" implementation by M. Douglas McIlroy. +// See http://www.cs.dartmouth.edu/~doug/mdmspe.pdf for more info. +type adversaryTestingData struct { + data []int + keys map[int]int + candidate int +} + +func (d *adversaryTestingData) Len() int { return len(d.data) } + +func (d *adversaryTestingData) Less(i, j int) bool { + if _, present := d.keys[i]; !present { + if _, present := d.keys[j]; !present { + if i == d.candidate { + d.keys[i] = len(d.keys) + } else { + d.keys[j] = len(d.keys) + } + } } - return b + + if _, present := d.keys[i]; !present { + d.candidate = i + return false + } + if _, present := d.keys[j]; !present { + d.candidate = j + return true + } + + return d.keys[i] >= d.keys[j] +} + +func (d *adversaryTestingData) Swap(i, j int) { + d.data[i], d.data[j] = d.data[j], d.data[i] +} + +func TestAdversary(t *testing.T) { + const size = 100 + data := make([]int, size) + for i := 0; i < size; i++ { + data[i] = i + } + + d := &adversaryTestingData{data, make(map[int]int), 0} + Sort(d) // This should degenerate to heapsort. } |