From 8cdb17b48a005a97889f07593c4a619add46ea76 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Wed, 21 Dec 2022 01:07:55 +0900 Subject: [PATCH] put back the old QuickSort, PartialQuickSort, and MergeSort algorithms... (#47788) ...as they were in 1.8 and rename the new PartialQuickSort to QuickerSort Also improve the documentation and API for constructing QuickerSort and test the API Co-authored-by: Lilith Hafner --- base/sort.jl | 241 ++++++++++++++++++++++++++++++++++++------------ test/sorting.jl | 43 ++++++--- 2 files changed, 212 insertions(+), 72 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 2dd81829312d0..6d9f65c61b390 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -86,7 +86,7 @@ issorted(itr; issorted(itr, ord(lt,by,rev,order)) function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering) - _sort!(v, _PartialQuickSort(k), o, (;)) + _sort!(v, QuickerSort(k), o, (;)) maybeview(v, k) end @@ -931,49 +931,40 @@ end """ - PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}, next::Algorithm) <: Algorithm + QuickerSort(next::Algorithm=SMALL_ALGORITHM) <: Algorithm + QuickerSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Algorithm=SMALL_ALGORITHM) <: Algorithm -Indicate that a sorting function should use the partial quick sort algorithm. +Use the `QuickerSort` algorithm with the `next` algorithm as a base case. -Partial quick sort finds and sorts the elements that would end up in positions `lo:hi` using -[`QuickSort`](@ref). It is recursive and uses the `next` algorithm for small chunks +`QuickerSort` is like `QuickSort`, but utilizes scratch space to operate faster and allow +for the possibility of maintaining stability. + +If `lo` and `hi` are provided, finds and sorts the elements in the range `lo:hi`, reordering +but not necessarily sorting other elements in the process. If `lo` or `hi` is `missing`, it +is treated as the first or last index of the input, respectively. + +`lo` and `hi` may be specified together as an `AbstractUnitRange`. Characteristics: * *stable*: preserves the ordering of elements which compare equal (e.g. "a" and "A" in a sort of letters which ignores case). * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). + * *divide-and-conquer*: sort strategy similar to [`QuickSort`](@ref). + * *linear runtime* if `length(lo:hi)` is constant + * *quadratic worst case runtime* in pathological cases + (vanishingly rare for non-malicious input) """ -struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm +struct QuickerSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm lo::L hi::H next::T end -PartialQuickSort(k::Integer) = PartialQuickSort(missing, k, SMALL_ALGORITHM) -PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k), SMALL_ALGORITHM) -_PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(k:k)) -_PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(k)) - -""" - QuickSort - -Indicate that a sorting function should use the quick sort algorithm. +QuickerSort(next::Algorithm=SMALL_ALGORITHM) = QuickerSort(missing, missing, next) +QuickerSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) = QuickerSort(lo, hi, SMALL_ALGORITHM) +QuickerSort(lo::Union{Integer, Missing}, next::Algorithm=SMALL_ALGORITHM) = QuickerSort(lo, lo, next) +QuickerSort(r::OrdinalRange, next::Algorithm=SMALL_ALGORITHM) = QuickerSort(first(r), last(r), next) -Quick sort picks a pivot element, partitions the array based on the pivot, -and then sorts the elements before and after the pivot recursively. - -Characteristics: - * *stable*: preserves the ordering of elements which compare equal - (e.g. "a" and "A" in a sort of letters which ignores case). - * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). - * *good performance* for almost all large collections. - * *quadratic worst case runtime* in pathological cases - (vanishingly rare for non-malicious input) -""" -const QuickSort = PartialQuickSort(missing, missing, SMALL_ALGORITHM) - -# select a pivot for QuickSort +# select a pivot for QuickerSort # # This method is redefined to rand(lo:hi) in Random.jl # We can't use rand here because it is not available in Core.Compiler and @@ -1013,7 +1004,7 @@ function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer pivot, lo-offset end -function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw; +function _sort!(v::AbstractVector, a::QuickerSort, o::Ordering, kw; t=nothing, offset=nothing, swap=false, rev=false) @getkw lo hi scratch @@ -1029,7 +1020,7 @@ function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw; @inbounds v[j] = pivot swap = !swap - # For QuickSort, a.lo === a.hi === missing, so the first two branches get skipped + # For QuickerSort(), a.lo === a.hi === missing, so the first two branches get skipped if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part swap && copyto!(v, lo, t, lo+offset, j-lo) rev && reverse!(v, lo, j-1) @@ -1225,7 +1216,7 @@ the initial optimizations because they can change the input vector's type and or make them `UIntMappable`. If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch -to [`QuickSort`](@ref). +to [`QuickerSort`](@ref). Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then perform a presorted check ([`CheckSorted`](@ref)). @@ -1257,7 +1248,7 @@ Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that r stage. Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and -otherwise we dispatch to [`QuickSort`](@ref). +otherwise we dispatch to [`QuickerSort`](@ref). """ const DEFAULT_STABLE = InitialOptimizations( IsUIntMappable( @@ -1267,9 +1258,9 @@ const DEFAULT_STABLE = InitialOptimizations( ConsiderCountingSort( ConsiderRadixSort( Small{80}( - QuickSort)))))), + QuickerSort())))))), StableCheckSorted( - QuickSort))) + QuickerSort()))) """ DEFAULT_UNSTABLE @@ -1483,7 +1474,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector, end # do partial quicksort - _sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v), (;)) + _sort!(ix, QuickerSort(k), Perm(ord(lt, by, rev, order), v), (;)) maybeview(ix, k) end @@ -1863,18 +1854,53 @@ end ### Unused constructs for backward compatibility ### -struct MergeSortAlg{T <: Algorithm} <: Algorithm - next::T +## Old algorithms ## + +struct QuickSortAlg <: Algorithm end +struct MergeSortAlg <: Algorithm end + +""" + PartialQuickSort{T <: Union{Integer,OrdinalRange}} + +Indicate that a sorting function should use the partial quick sort +algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest +to largest, finding them and sorting them using [`QuickSort`](@ref). + +Characteristics: + * *not stable*: does not preserve the ordering of elements which + compare equal (e.g. "a" and "A" in a sort of letters which + ignores case). + * *in-place* in memory. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). +""" +struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm + k::T end """ - MergeSort + QuickSort -Indicate that a sorting function should use the merge sort algorithm. +Indicate that a sorting function should use the quick sort +algorithm, which is *not* stable. -Merge sort divides the collection into subcollections and -repeatedly merges them, sorting each subcollection at each step, -until the entire collection has been recombined in sorted form. +Characteristics: + * *not stable*: does not preserve the ordering of elements which + compare equal (e.g. "a" and "A" in a sort of letters which + ignores case). + * *in-place* in memory. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). + * *good performance* for large collections. +""" +const QuickSort = QuickSortAlg() + +""" + MergeSort + +Indicate that a sorting function should use the merge sort +algorithm. Merge sort divides the collection into +subcollections and repeatedly merges them, sorting each +subcollection at each step, until the entire +collection has been recombined in sorted form. Characteristics: * *stable*: preserves the ordering of elements which compare @@ -1883,21 +1909,94 @@ Characteristics: * *not in-place* in memory. * *divide-and-conquer* sort strategy. """ -const MergeSort = MergeSortAlg(SMALL_ALGORITHM) +const MergeSort = MergeSortAlg() -function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, offset=nothing) - @getkw lo hi scratch +# selectpivot! +# +# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and +# choose the middle value as a pivot +# +# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be +# greater than the pivot + +@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + @inbounds begin + mi = midpoint(lo, hi) + + # sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place + if lt(o, v[lo], v[mi]) + v[mi], v[lo] = v[lo], v[mi] + end + + if lt(o, v[hi], v[lo]) + if lt(o, v[hi], v[mi]) + v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi] + else + v[hi], v[lo] = v[lo], v[hi] + end + end + + # return the pivot + return v[lo] + end +end + +# partition! +# +# select a pivot, and partition v according to the pivot + +function partition!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + pivot = selectpivot!(v, lo, hi, o) + # pivot == v[lo], v[hi] > pivot + i, j = lo, hi + @inbounds while true + i += 1; j -= 1 + while lt(o, v[i], pivot); i += 1; end; + while lt(o, pivot, v[j]); j -= 1; end; + i >= j && break + v[i], v[j] = v[j], v[i] + end + v[j], v[lo] = pivot, v[j] + + # v[j] == pivot + # v[k] >= pivot for k > j + # v[i] <= pivot for i < j + return j +end + +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering) + @inbounds while lo < hi + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) + j = partition!(v, lo, hi, o) + if j-lo < hi-j + # recurse on the smaller chunk + # this is necessary to preserve O(log(n)) + # stack space in the worst case (rather than O(n)) + lo < (j-1) && sort!(v, lo, j-1, a, o) + lo = j+1 + else + j+1 < hi && sort!(v, j+1, hi, a, o) + hi = j-1 + end + end + return v +end + +sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, t0::Vector{T}) where T = + invoke(sort!, Tuple{typeof.((v, lo, hi, a, o))..., AbstractVector{T}}, v, lo, hi, a, o, t0) # For disambiguation +function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, + t0::Union{AbstractVector{T}, Nothing}=nothing) where T @inbounds if lo < hi - hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw) + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) m = midpoint(lo, hi) - if t === nothing - scratch, t = make_scratch(scratch, eltype(v), m-lo+1) - end + t = t0 === nothing ? similar(v, m-lo+1) : t0 + length(t) < m-lo+1 && resize!(t, m-lo+1) + Base.require_one_based_indexing(t) - _sort!(v, a, o, (;kw..., hi=m, scratch); t, offset) - _sort!(v, a, o, (;kw..., lo=m+1, scratch); t, offset) + sort!(v, lo, m, a, o, t) + sort!(v, m+1, hi, a, o, t) i, j = 1, lo while j <= m @@ -1924,9 +2023,37 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, end end - scratch + return v +end + +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort, + o::Ordering) + @inbounds while lo < hi + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) + j = partition!(v, lo, hi, o) + + if j <= first(a.k) + lo = j+1 + elseif j >= last(a.k) + hi = j-1 + else + # recurse on the smaller chunk + # this is necessary to preserve O(log(n)) + # stack space in the worst case (rather than O(n)) + if j-lo < hi-j + lo < (j-1) && sort!(v, lo, j-1, a, o) + lo = j+1 + else + hi > (j+1) && sort!(v, j+1, hi, a, o) + hi = j-1 + end + end + end + return v end +## Old extensibility mechanisms ## + # Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) @@ -1952,8 +2079,4 @@ function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw) end end -# Keep old internal types so that people can keep dispatching with -# sort!(::AbstractVector, ::Integer, ::Integer, ::Base.QuickSortAlg, ::Ordering) = ... -const QuickSortAlg = typeof(QuickSort) - end # module Sort diff --git a/test/sorting.jl b/test/sorting.jl index 614946a8cc4f6..eb5020547c789 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -79,9 +79,8 @@ end end @testset "stability" begin - for Alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE, - PartialQuickSort(missing, 1729, Base.Sort.SMALL_ALGORITHM), - PartialQuickSort(1729, missing, Base.Sort.SMALL_ALGORITHM)] + for Alg in [InsertionSort, MergeSort, Base.Sort.QuickerSort(), Base.DEFAULT_STABLE, + Base.Sort.QuickerSort(missing, 1729), Base.Sort.QuickerSort(1729, missing)] @test issorted(sort(1:2000, alg=Alg, by=x->0)) @test issorted(sort(1:2000, alg=Alg, by=x->x÷100)) end @@ -334,7 +333,7 @@ end @test c == v # stable algorithms - for alg in [MergeSort, QuickSort, PartialQuickSort(1:n), Base.DEFAULT_STABLE] + for alg in [MergeSort, Base.Sort.QuickerSort(), Base.Sort.QuickerSort(1:n), Base.DEFAULT_STABLE] p = sortperm(v, alg=alg, rev=rev) p2 = sortperm(float(v), alg=alg, rev=rev) @test p == p2 @@ -382,7 +381,7 @@ end end v = randn_with_nans(n,0.1) - for alg in [InsertionSort, MergeSort, QuickSort, PartialQuickSort(n), Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE], + for alg in [InsertionSort, MergeSort, Base.Sort.QuickerSort(), Base.Sort.QuickerSort(1, n), Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE], rev in [false,true] alg === InsertionSort && n >= 3000 && continue # test float sorting with NaNs @@ -589,7 +588,7 @@ end @testset "fallback" begin @test adaptive_sort_test(rand(1:typemax(Int32), len), by=x->x^2)# fallback - @test adaptive_sort_test(rand(Int, len), by=x->0, trusted=QuickSort) + @test adaptive_sort_test(rand(Int, len), by=x->0, trusted=Base.Sort.QuickerSort()) end @test adaptive_sort_test(rand(Int, 20)) # InsertionSort @@ -691,15 +690,16 @@ end @testset "invalid lt (#11429)" begin # lt must be a total linear order (e.g. < not <=) so this usage is # not allowed. Consequently, none of the behavior tested in this - # testset is gaurunteed to work in future minor versions of Julia. + # testset is guaranteed to work in future minor versions of Julia. + + safe_algs = [InsertionSort, MergeSort, Base.Sort.QuickerSort(), Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] n = 1000 v = rand(1:5, n); s = sort(v); # Nevertheless, it still works... - for alg in [InsertionSort, MergeSort, QuickSort, - Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + for alg in safe_algs @test sort(v, alg=alg, lt = <=) == s end @test partialsort(v, 172, lt = <=) == s[172] @@ -709,16 +709,14 @@ end # where i < j if and only if lt(o, v[j], v[i]). This invariant holds even for # this invalid lt order. perm = reverse(sortperm(v, rev=true)) - for alg in [InsertionSort, MergeSort, QuickSort, - Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + for alg in safe_algs @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm end @test partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172] @test partialsort(1:n, 315:415, lt = (i,j) -> v[i]<=v[j]) == perm[315:415] # lt can be very poorly behaved and sort will still permute its input in some way. - for alg in [InsertionSort, MergeSort, QuickSort, - Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + for alg in safe_algs @test sort!(sort(v, alg=alg, lt = (x,y) -> rand([false, true]))) == s end @test partialsort(v, 172, lt = (x,y) -> rand([false, true])) ∈ 1:5 @@ -901,6 +899,25 @@ end @test issorted(sort(rand(Int8, 600))) end +@testset "QuickerSort API" begin + bsqs = Base.Sort.QuickerSort + @test bsqs(1, 2, MergeSort) === bsqs(1, 2, MergeSort) + @test bsqs(missing, 2, MergeSort) === bsqs(missing, 2, MergeSort) + @test bsqs(1, missing, MergeSort) === bsqs(1, missing, MergeSort) + @test bsqs(missing, missing, MergeSort) === bsqs(missing, missing, MergeSort) + @test bsqs(1, MergeSort) === bsqs(1, 1, MergeSort) + @test bsqs(missing, MergeSort) === bsqs(missing, missing, MergeSort) + @test bsqs(MergeSort) === bsqs(missing, missing, MergeSort) + + @test bsqs(1, 2) === bsqs(1, 2, InsertionSort) + @test bsqs(missing, 2) === bsqs(missing, 2, InsertionSort) + @test bsqs(1, missing) === bsqs(1, missing, InsertionSort) + @test bsqs(missing, missing) === bsqs(missing, missing, InsertionSort) + @test bsqs(1) === bsqs(1, 1, InsertionSort) + @test bsqs(missing) === bsqs(missing, missing, InsertionSort) + @test bsqs() === bsqs(missing, missing, InsertionSort) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128,