/src/serenity/AK/QuickSelect.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2023, the SerenityOS developers. |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include <AK/Math.h> |
10 | | #include <AK/Random.h> |
11 | | #include <AK/StdLibExtras.h> |
12 | | |
13 | | namespace AK { |
14 | | |
15 | | static constexpr int MEDIAN_OF_MEDIAN_CUTOFF = 4500; |
16 | | |
17 | | // FIXME: Stole and adapted these two functions from `Userland/Demos/Tubes/Tubes.cpp` we really need something like this in `AK/Random.h` |
18 | | static inline double random_double() |
19 | 0 | { |
20 | 0 | return get_random<u32>() / static_cast<double>(NumericLimits<u32>::max()); |
21 | 0 | } |
22 | | |
23 | | static inline size_t random_int(size_t min, size_t max) |
24 | 0 | { |
25 | 0 | return min + round_to<size_t>(random_double() * (max - min)); |
26 | 0 | } |
27 | | |
28 | | // Implementations of common pivot functions |
29 | | namespace PivotFunctions { |
30 | | |
31 | | // Just use the first element of the range as the pivot |
32 | | // Mainly used to debug the quick select algorithm |
33 | | // Good with random data since it has nearly no overhead |
34 | | // Attention: Turns the algorithm quadratic if used with already (partially) sorted data |
35 | | template<typename Collection, typename LessThan> |
36 | | size_t first_element([[maybe_unused]] Collection& collection, size_t left, [[maybe_unused]] size_t right, [[maybe_unused]] LessThan less_than) |
37 | | { |
38 | | return left; |
39 | | } |
40 | | |
41 | | // Just use the middle element of the range as the pivot |
42 | | // This is what is used in AK::single_pivot_quick_sort in quicksort.h |
43 | | // Works fairly well with random Data |
44 | | // Works incredibly well with sorted data since the pivot is always a perfect split |
45 | | template<typename Collection, typename LessThan> |
46 | | size_t middle_element([[maybe_unused]] Collection& collection, size_t left, size_t right, [[maybe_unused]] LessThan less_than) |
47 | | { |
48 | | return (left + right) / 2; |
49 | | } |
50 | | |
51 | | // Pick a random Pivot |
52 | | // This is the "Traditional" implementation of both quicksort and quick select |
53 | | // Performs fairly well both with random and sorted data |
54 | | template<typename Collection, typename LessThan> |
55 | | size_t random_element([[maybe_unused]] Collection& collection, size_t left, size_t right, [[maybe_unused]] LessThan less_than) |
56 | 0 | { |
57 | 0 | return random_int(left, right); |
58 | 0 | } |
59 | | |
60 | | // Implementation detail of median_of_medians |
61 | | // Whilst this looks quadratic in runtime, it always gets called with 5 or fewer elements so can be considered constant runtime |
62 | | template<typename Collection, typename LessThan> |
63 | | size_t partition5(Collection& collection, size_t left, size_t right, LessThan less_than) |
64 | 0 | { |
65 | 0 | VERIFY((right - left) <= 5); |
66 | 0 | for (size_t i = left + 1; i <= right; i++) { |
67 | 0 | for (size_t j = i; j > left && less_than(collection.at(j), collection.at(j - 1)); j--) { |
68 | 0 | swap(collection.at(j), collection.at(j - 1)); |
69 | 0 | } |
70 | 0 | } |
71 | 0 | return (left + right) / 2; |
72 | 0 | } |
73 | | |
74 | | // https://en.wikipedia.org/wiki/Median_of_medians |
75 | | // Use the median of medians algorithm to pick a really good pivot |
76 | | // This makes quick select run in linear time but comes with a lot of overhead that only pays off with very large inputs |
77 | | template<typename Collection, typename LessThan> |
78 | | size_t median_of_medians(Collection& collection, size_t left, size_t right, LessThan less_than) |
79 | 0 | { |
80 | 0 | if ((right - left) < 5) |
81 | 0 | return partition5(collection, left, right, less_than); |
82 | | |
83 | 0 | for (size_t i = left; i <= right; i += 5) { |
84 | 0 | size_t sub_right = i + 4; |
85 | 0 | if (sub_right > right) |
86 | 0 | sub_right = right; |
87 | |
|
88 | 0 | size_t median5 = partition5(collection, i, sub_right, less_than); |
89 | 0 | swap(collection.at(median5), collection.at(left + (i - left) / 5)); |
90 | 0 | } |
91 | 0 | size_t mid = (right - left) / 10 + left + 1; |
92 | | |
93 | | // We're using mutual recursion here, using quickselect_inplace to find the pivot for quickselect_inplace. |
94 | | // Whilst this achieves True linear Runtime, it is a lot of overhead, so use only this variant with very large inputs |
95 | 0 | return quickselect_inplace( |
96 | 0 | collection, left, left + ((right - left) / 5), mid, [](auto collection, size_t left, size_t right, auto less_than) { return AK::PivotFunctions::median_of_medians(collection, left, right, less_than); }, less_than); |
97 | 0 | } |
98 | | |
99 | | } |
100 | | |
101 | | // This is the Lomuto Partition scheme which is simpler but less efficient than Hoare's partitioning scheme that is traditionally used with quicksort |
102 | | // https://en.wikipedia.org/wiki/Quicksort#Lomuto_partition_scheme |
103 | | template<typename Collection, typename PivotFn, typename LessThan> |
104 | | static size_t partition(Collection& collection, size_t left, size_t right, PivotFn pivot_fn, LessThan less_than) |
105 | 0 | { |
106 | 0 | auto pivot_index = pivot_fn(collection, left, right, less_than); |
107 | 0 | auto pivot_value = collection.at(pivot_index); |
108 | 0 | swap(collection.at(pivot_index), collection.at(right)); |
109 | 0 | auto store_index = left; |
110 | |
|
111 | 0 | for (size_t i = left; i < right; i++) { |
112 | 0 | if (less_than(collection.at(i), pivot_value)) { |
113 | 0 | swap(collection.at(store_index), collection.at(i)); |
114 | 0 | store_index++; |
115 | 0 | } |
116 | 0 | } |
117 | |
|
118 | 0 | swap(collection.at(right), collection.at(store_index)); |
119 | 0 | return store_index; |
120 | 0 | } Unexecuted instantiation: Builtin.cpp:unsigned long AK::partition<AK::Vector<float, 0ul>, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1, unsigned long, unsigned long, auto:2)#1}, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1&, auto:2&)#1}>(AK::Vector<float, 0ul>&, unsigned long, unsigned long, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1, unsigned long, unsigned long, auto:2)#1}, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1&, auto:2&)#1}) Unexecuted instantiation: Builtin.cpp:_ZN2AKL9partitionINS_6VectorIfLm0EEEZNS_14PivotFunctions17median_of_mediansIS2_ZNS_19quickselect_inplaceIS2_EEmRT_mEUlS7_RT0_E_EEmS7_mmS8_EUlS6_mmS8_E_SA_EEmS7_mmS8_T1_ Unexecuted instantiation: Builtin.cpp:unsigned long AK::partition<AK::Vector<float, 0ul>, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1, unsigned long, unsigned long, auto:2)#2}, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1&, auto:2&)#2}>(AK::Vector<float, 0ul>&, unsigned long, unsigned long, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1, unsigned long, unsigned long, auto:2)#2}, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1&, auto:2&)#2}) |
121 | | |
122 | | template<typename Collection, typename PivotFn, typename LessThan> |
123 | | size_t quickselect_inplace(Collection& collection, size_t left, size_t right, size_t k, PivotFn pivot_fn, LessThan less_than) |
124 | 0 | { |
125 | | // Bail if left is somehow bigger than right and return default constructed result |
126 | | // FIXME: This can also occur when the collection is empty maybe propagate this error somehow? |
127 | | // returning 0 would be a really bad thing since this returns and index and that might lead to memory errors |
128 | | // returning in ErrorOr<size_t> here might be a good option but this is a very specific error that in nearly all circumstances should be considered a bug on the callers site |
129 | 0 | VERIFY(left <= right); |
130 | | |
131 | | // If there's only one element, return that element |
132 | 0 | if (left == right) |
133 | 0 | return left; |
134 | | |
135 | 0 | auto pivot_index = partition(collection, left, right, pivot_fn, less_than); |
136 | | |
137 | | // we found the thing we were searching for |
138 | 0 | if (k == pivot_index) |
139 | 0 | return k; |
140 | | |
141 | | // Recurse on the left side |
142 | 0 | if (k < pivot_index) |
143 | 0 | return quickselect_inplace(collection, left, pivot_index - 1, k, pivot_fn, less_than); |
144 | | |
145 | | // recurse on the right side |
146 | 0 | return quickselect_inplace(collection, pivot_index + 1, right, k, pivot_fn, less_than); |
147 | 0 | } Unexecuted instantiation: unsigned long AK::quickselect_inplace<AK::Vector<float, 0ul>, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1, unsigned long, unsigned long, auto:2)#1}, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1&, auto:2&)#1}>(AK::Vector<float, 0ul>&, unsigned long, unsigned long, unsigned long, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1, unsigned long, unsigned long, auto:2)#1}, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1&, auto:2&)#1}) Unexecuted instantiation: _ZN2AK19quickselect_inplaceINS_6VectorIfLm0EEEZNS_14PivotFunctions17median_of_mediansIS2_ZNS_19quickselect_inplaceIS2_EEmRT_mEUlS7_RT0_E_EEmS7_mmS8_EUlS6_mmS8_E_SA_EEmS7_mmmS8_T1_ Unexecuted instantiation: unsigned long AK::quickselect_inplace<AK::Vector<float, 0ul>, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1, unsigned long, unsigned long, auto:2)#2}, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1&, auto:2&)#2}>(AK::Vector<float, 0ul>&, unsigned long, unsigned long, unsigned long, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1, unsigned long, unsigned long, auto:2)#2}, AK::quickselect_inplace<AK::Vector<float, 0ul> >(AK::Vector<float, 0ul>&, unsigned long)::{lambda(auto:1&, auto:2&)#2}) |
148 | | |
149 | | // |
150 | | template<typename Collection, typename PivotFn, typename LessThan> |
151 | | size_t quickselect_inplace(Collection& collection, size_t k, PivotFn pivot_fn, LessThan less_than) |
152 | | { |
153 | | return quickselect_inplace(collection, 0, collection.size() - 1, k, pivot_fn, less_than); |
154 | | } |
155 | | |
156 | | template<typename Collection, typename PivotFn> |
157 | | size_t quickselect_inplace(Collection& collection, size_t k, PivotFn pivot_fn) |
158 | | { |
159 | | return quickselect_inplace(collection, 0, collection.size() - 1, k, pivot_fn, [](auto& a, auto& b) { return a < b; }); |
160 | | } |
161 | | |
162 | | // All of these quick select implementation versions return the `index` of the resulting element, after the algorithm has run, not the element itself! |
163 | | // As Part of the Algorithm, they all modify the collection in place, partially sorting it in the process. |
164 | | template<typename Collection> |
165 | | size_t quickselect_inplace(Collection& collection, size_t k) |
166 | 0 | { |
167 | 0 | if (collection.size() >= MEDIAN_OF_MEDIAN_CUTOFF) |
168 | 0 | return quickselect_inplace( |
169 | 0 | collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::median_of_medians(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; }); |
170 | | |
171 | 0 | else |
172 | 0 | return quickselect_inplace( |
173 | 0 | collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::random_element(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; }); |
174 | 0 | } |
175 | | |
176 | | } |