Skip to content

Commit

Permalink
put back the old QuickSort, PartialQuickSort, and MergeSort algorithm…
Browse files Browse the repository at this point in the history
…s... (JuliaLang#47788)

...as they were in 1.8 and rename the new PartialQuickSort to QuickerSort
Also improve the documentation and API for constructing QuickerSort and test
the API

Co-authored-by: Lilith Hafner <[email protected]>
  • Loading branch information
LilithHafner and Lilith Hafner authored Dec 20, 2022
1 parent a3ba757 commit 8cdb17b
Show file tree
Hide file tree
Showing 2 changed files with 212 additions and 72 deletions.
241 changes: 182 additions & 59 deletions base/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ issorted(itr;
issorted(itr, ord(lt,by,rev,order))

function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering)
_sort!(v, _PartialQuickSort(k), o, (;))
_sort!(v, QuickerSort(k), o, (;))
maybeview(v, k)
end

Expand Down Expand Up @@ -931,49 +931,40 @@ end


"""
PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}, next::Algorithm) <: Algorithm
QuickerSort(next::Algorithm=SMALL_ALGORITHM) <: Algorithm
QuickerSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Algorithm=SMALL_ALGORITHM) <: Algorithm
Indicate that a sorting function should use the partial quick sort algorithm.
Use the `QuickerSort` algorithm with the `next` algorithm as a base case.
Partial quick sort finds and sorts the elements that would end up in positions `lo:hi` using
[`QuickSort`](@ref). It is recursive and uses the `next` algorithm for small chunks
`QuickerSort` is like `QuickSort`, but utilizes scratch space to operate faster and allow
for the possibility of maintaining stability.
If `lo` and `hi` are provided, finds and sorts the elements in the range `lo:hi`, reordering
but not necessarily sorting other elements in the process. If `lo` or `hi` is `missing`, it
is treated as the first or last index of the input, respectively.
`lo` and `hi` may be specified together as an `AbstractUnitRange`.
Characteristics:
* *stable*: preserves the ordering of elements which compare equal
(e.g. "a" and "A" in a sort of letters which ignores case).
* *not in-place* in memory.
* *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
* *divide-and-conquer*: sort strategy similar to [`QuickSort`](@ref).
* *linear runtime* if `length(lo:hi)` is constant
* *quadratic worst case runtime* in pathological cases
(vanishingly rare for non-malicious input)
"""
struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm
struct QuickerSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm
lo::L
hi::H
next::T
end
PartialQuickSort(k::Integer) = PartialQuickSort(missing, k, SMALL_ALGORITHM)
PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k), SMALL_ALGORITHM)
_PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(k:k))
_PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(k))

"""
QuickSort
Indicate that a sorting function should use the quick sort algorithm.
QuickerSort(next::Algorithm=SMALL_ALGORITHM) = QuickerSort(missing, missing, next)
QuickerSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) = QuickerSort(lo, hi, SMALL_ALGORITHM)
QuickerSort(lo::Union{Integer, Missing}, next::Algorithm=SMALL_ALGORITHM) = QuickerSort(lo, lo, next)
QuickerSort(r::OrdinalRange, next::Algorithm=SMALL_ALGORITHM) = QuickerSort(first(r), last(r), next)

Quick sort picks a pivot element, partitions the array based on the pivot,
and then sorts the elements before and after the pivot recursively.
Characteristics:
* *stable*: preserves the ordering of elements which compare equal
(e.g. "a" and "A" in a sort of letters which ignores case).
* *not in-place* in memory.
* *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
* *good performance* for almost all large collections.
* *quadratic worst case runtime* in pathological cases
(vanishingly rare for non-malicious input)
"""
const QuickSort = PartialQuickSort(missing, missing, SMALL_ALGORITHM)

# select a pivot for QuickSort
# select a pivot for QuickerSort
#
# This method is redefined to rand(lo:hi) in Random.jl
# We can't use rand here because it is not available in Core.Compiler and
Expand Down Expand Up @@ -1013,7 +1004,7 @@ function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer
pivot, lo-offset
end

function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw;
function _sort!(v::AbstractVector, a::QuickerSort, o::Ordering, kw;
t=nothing, offset=nothing, swap=false, rev=false)
@getkw lo hi scratch

Expand All @@ -1029,7 +1020,7 @@ function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw;
@inbounds v[j] = pivot
swap = !swap

# For QuickSort, a.lo === a.hi === missing, so the first two branches get skipped
# For QuickerSort(), a.lo === a.hi === missing, so the first two branches get skipped
if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part
swap && copyto!(v, lo, t, lo+offset, j-lo)
rev && reverse!(v, lo, j-1)
Expand Down Expand Up @@ -1225,7 +1216,7 @@ the initial optimizations because they can change the input vector's type and or
make them `UIntMappable`.
If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch
to [`QuickSort`](@ref).
to [`QuickerSort`](@ref).
Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then
perform a presorted check ([`CheckSorted`](@ref)).
Expand Down Expand Up @@ -1257,7 +1248,7 @@ Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that r
stage.
Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and
otherwise we dispatch to [`QuickSort`](@ref).
otherwise we dispatch to [`QuickerSort`](@ref).
"""
const DEFAULT_STABLE = InitialOptimizations(
IsUIntMappable(
Expand All @@ -1267,9 +1258,9 @@ const DEFAULT_STABLE = InitialOptimizations(
ConsiderCountingSort(
ConsiderRadixSort(
Small{80}(
QuickSort)))))),
QuickerSort())))))),
StableCheckSorted(
QuickSort)))
QuickerSort())))
"""
DEFAULT_UNSTABLE
Expand Down Expand Up @@ -1483,7 +1474,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector,
end

# do partial quicksort
_sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v), (;))
_sort!(ix, QuickerSort(k), Perm(ord(lt, by, rev, order), v), (;))

maybeview(ix, k)
end
Expand Down Expand Up @@ -1863,18 +1854,53 @@ end

### Unused constructs for backward compatibility ###

struct MergeSortAlg{T <: Algorithm} <: Algorithm
next::T
## Old algorithms ##

struct QuickSortAlg <: Algorithm end
struct MergeSortAlg <: Algorithm end

"""
PartialQuickSort{T <: Union{Integer,OrdinalRange}}
Indicate that a sorting function should use the partial quick sort
algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest
to largest, finding them and sorting them using [`QuickSort`](@ref).
Characteristics:
* *not stable*: does not preserve the ordering of elements which
compare equal (e.g. "a" and "A" in a sort of letters which
ignores case).
* *in-place* in memory.
* *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
"""
struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm
k::T
end

"""
MergeSort
QuickSort
Indicate that a sorting function should use the merge sort algorithm.
Indicate that a sorting function should use the quick sort
algorithm, which is *not* stable.
Merge sort divides the collection into subcollections and
repeatedly merges them, sorting each subcollection at each step,
until the entire collection has been recombined in sorted form.
Characteristics:
* *not stable*: does not preserve the ordering of elements which
compare equal (e.g. "a" and "A" in a sort of letters which
ignores case).
* *in-place* in memory.
* *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
* *good performance* for large collections.
"""
const QuickSort = QuickSortAlg()

"""
MergeSort
Indicate that a sorting function should use the merge sort
algorithm. Merge sort divides the collection into
subcollections and repeatedly merges them, sorting each
subcollection at each step, until the entire
collection has been recombined in sorted form.
Characteristics:
* *stable*: preserves the ordering of elements which compare
Expand All @@ -1883,21 +1909,94 @@ Characteristics:
* *not in-place* in memory.
* *divide-and-conquer* sort strategy.
"""
const MergeSort = MergeSortAlg(SMALL_ALGORITHM)
const MergeSort = MergeSortAlg()

function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, offset=nothing)
@getkw lo hi scratch
# selectpivot!
#
# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and
# choose the middle value as a pivot
#
# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be
# greater than the pivot

@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
@inbounds begin
mi = midpoint(lo, hi)

# sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place
if lt(o, v[lo], v[mi])
v[mi], v[lo] = v[lo], v[mi]
end

if lt(o, v[hi], v[lo])
if lt(o, v[hi], v[mi])
v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi]
else
v[hi], v[lo] = v[lo], v[hi]
end
end

# return the pivot
return v[lo]
end
end

# partition!
#
# select a pivot, and partition v according to the pivot

function partition!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
pivot = selectpivot!(v, lo, hi, o)
# pivot == v[lo], v[hi] > pivot
i, j = lo, hi
@inbounds while true
i += 1; j -= 1
while lt(o, v[i], pivot); i += 1; end;
while lt(o, pivot, v[j]); j -= 1; end;
i >= j && break
v[i], v[j] = v[j], v[i]
end
v[j], v[lo] = pivot, v[j]

# v[j] == pivot
# v[k] >= pivot for k > j
# v[i] <= pivot for i < j
return j
end

function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering)
@inbounds while lo < hi
hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
j = partition!(v, lo, hi, o)
if j-lo < hi-j
# recurse on the smaller chunk
# this is necessary to preserve O(log(n))
# stack space in the worst case (rather than O(n))
lo < (j-1) && sort!(v, lo, j-1, a, o)
lo = j+1
else
j+1 < hi && sort!(v, j+1, hi, a, o)
hi = j-1
end
end
return v
end

sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, t0::Vector{T}) where T =
invoke(sort!, Tuple{typeof.((v, lo, hi, a, o))..., AbstractVector{T}}, v, lo, hi, a, o, t0) # For disambiguation
function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering,
t0::Union{AbstractVector{T}, Nothing}=nothing) where T
@inbounds if lo < hi
hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw)
hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)

m = midpoint(lo, hi)

if t === nothing
scratch, t = make_scratch(scratch, eltype(v), m-lo+1)
end
t = t0 === nothing ? similar(v, m-lo+1) : t0
length(t) < m-lo+1 && resize!(t, m-lo+1)
Base.require_one_based_indexing(t)

_sort!(v, a, o, (;kw..., hi=m, scratch); t, offset)
_sort!(v, a, o, (;kw..., lo=m+1, scratch); t, offset)
sort!(v, lo, m, a, o, t)
sort!(v, m+1, hi, a, o, t)

i, j = 1, lo
while j <= m
Expand All @@ -1924,9 +2023,37 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing,
end
end

scratch
return v
end

function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort,
o::Ordering)
@inbounds while lo < hi
hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
j = partition!(v, lo, hi, o)

if j <= first(a.k)
lo = j+1
elseif j >= last(a.k)
hi = j-1
else
# recurse on the smaller chunk
# this is necessary to preserve O(log(n))
# stack space in the worst case (rather than O(n))
if j-lo < hi-j
lo < (j-1) && sort!(v, lo, j-1, a, o)
lo = j+1
else
hi > (j+1) && sort!(v, j+1, hi, a, o)
hi = j-1
end
end
end
return v
end

## Old extensibility mechanisms ##

# Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way
sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o)
function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering)
Expand All @@ -1952,8 +2079,4 @@ function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw)
end
end

# Keep old internal types so that people can keep dispatching with
# sort!(::AbstractVector, ::Integer, ::Integer, ::Base.QuickSortAlg, ::Ordering) = ...
const QuickSortAlg = typeof(QuickSort)

end # module Sort
Loading

0 comments on commit 8cdb17b

Please sign in to comment.