/src/icu/source/common/uarrsort.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * |
6 | | * Copyright (C) 2003-2013, International Business Machines |
7 | | * Corporation and others. All Rights Reserved. |
8 | | * |
9 | | ******************************************************************************* |
10 | | * file name: uarrsort.c |
11 | | * encoding: UTF-8 |
12 | | * tab size: 8 (not used) |
13 | | * indentation:4 |
14 | | * |
15 | | * created on: 2003aug04 |
16 | | * created by: Markus W. Scherer |
17 | | * |
18 | | * Internal function for sorting arrays. |
19 | | */ |
20 | | |
21 | | #include "unicode/utypes.h" |
22 | | #include "cmemory.h" |
23 | | #include "uarrsort.h" |
24 | | |
25 | | enum { |
26 | | /** |
27 | | * "from Knuth" |
28 | | * |
29 | | * A binary search over 8 items performs 4 comparisons: |
30 | | * log2(8)=3 to subdivide, +1 to check for equality. |
31 | | * A linear search over 8 items on average also performs 4 comparisons. |
32 | | */ |
33 | | MIN_QSORT=9, |
34 | | STACK_ITEM_SIZE=200 |
35 | | }; |
36 | | |
37 | | /* UComparator convenience implementations ---------------------------------- */ |
38 | | |
39 | | U_CAPI int32_t U_EXPORT2 |
40 | 0 | uprv_uint16Comparator(const void *context, const void *left, const void *right) { |
41 | 0 | (void)context; |
42 | 0 | return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right; |
43 | 0 | } |
44 | | |
45 | | U_CAPI int32_t U_EXPORT2 |
46 | 0 | uprv_int32Comparator(const void *context, const void *left, const void *right) { |
47 | 0 | (void)context; |
48 | 0 | return *(const int32_t *)left - *(const int32_t *)right; |
49 | 0 | } |
50 | | |
51 | | U_CAPI int32_t U_EXPORT2 |
52 | 0 | uprv_uint32Comparator(const void *context, const void *left, const void *right) { |
53 | 0 | (void)context; |
54 | 0 | uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right; |
55 | | |
56 | | /* compare directly because (l-r) would overflow the int32_t result */ |
57 | 0 | if(l<r) { |
58 | 0 | return -1; |
59 | 0 | } else if(l==r) { |
60 | 0 | return 0; |
61 | 0 | } else /* l>r */ { |
62 | 0 | return 1; |
63 | 0 | } |
64 | 0 | } |
65 | | |
66 | | /* Insertion sort using binary search --------------------------------------- */ |
67 | | |
68 | | U_CAPI int32_t U_EXPORT2 |
69 | | uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize, |
70 | 0 | UComparator *cmp, const void *context) { |
71 | 0 | int32_t start=0; |
72 | 0 | UBool found=FALSE; |
73 | | |
74 | | /* Binary search until we get down to a tiny sub-array. */ |
75 | 0 | while((limit-start)>=MIN_QSORT) { |
76 | 0 | int32_t i=(start+limit)/2; |
77 | 0 | int32_t diff=cmp(context, item, array+i*itemSize); |
78 | 0 | if(diff==0) { |
79 | | /* |
80 | | * Found the item. We look for the *last* occurrence of such |
81 | | * an item, for stable sorting. |
82 | | * If we knew that there will be only few equal items, |
83 | | * we could break now and enter the linear search. |
84 | | * However, if there are many equal items, then it should be |
85 | | * faster to continue with the binary search. |
86 | | * It seems likely that we either have all unique items |
87 | | * (where found will never become TRUE in the insertion sort) |
88 | | * or potentially many duplicates. |
89 | | */ |
90 | 0 | found=TRUE; |
91 | 0 | start=i+1; |
92 | 0 | } else if(diff<0) { |
93 | 0 | limit=i; |
94 | 0 | } else { |
95 | 0 | start=i; |
96 | 0 | } |
97 | 0 | } |
98 | | |
99 | | /* Linear search over the remaining tiny sub-array. */ |
100 | 0 | while(start<limit) { |
101 | 0 | int32_t diff=cmp(context, item, array+start*itemSize); |
102 | 0 | if(diff==0) { |
103 | 0 | found=TRUE; |
104 | 0 | } else if(diff<0) { |
105 | 0 | break; |
106 | 0 | } |
107 | 0 | ++start; |
108 | 0 | } |
109 | 0 | return found ? (start-1) : ~start; |
110 | 0 | } |
111 | | |
112 | | static void |
113 | | doInsertionSort(char *array, int32_t length, int32_t itemSize, |
114 | 0 | UComparator *cmp, const void *context, void *pv) { |
115 | 0 | int32_t j; |
116 | |
|
117 | 0 | for(j=1; j<length; ++j) { |
118 | 0 | char *item=array+j*itemSize; |
119 | 0 | int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context); |
120 | 0 | if(insertionPoint<0) { |
121 | 0 | insertionPoint=~insertionPoint; |
122 | 0 | } else { |
123 | 0 | ++insertionPoint; /* one past the last equal item */ |
124 | 0 | } |
125 | 0 | if(insertionPoint<j) { |
126 | 0 | char *dest=array+insertionPoint*itemSize; |
127 | 0 | uprv_memcpy(pv, item, itemSize); /* v=array[j] */ |
128 | 0 | uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*(size_t)itemSize); |
129 | 0 | uprv_memcpy(dest, pv, itemSize); /* array[insertionPoint]=v */ |
130 | 0 | } |
131 | 0 | } |
132 | 0 | } |
133 | | |
134 | | static void |
135 | | insertionSort(char *array, int32_t length, int32_t itemSize, |
136 | 0 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { |
137 | 0 | UAlignedMemory v[STACK_ITEM_SIZE/sizeof(UAlignedMemory)+1]; |
138 | 0 | void *pv; |
139 | | |
140 | | /* allocate an intermediate item variable (v) */ |
141 | 0 | if(itemSize<=STACK_ITEM_SIZE) { |
142 | 0 | pv=v; |
143 | 0 | } else { |
144 | 0 | pv=uprv_malloc(itemSize); |
145 | 0 | if(pv==NULL) { |
146 | 0 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
147 | 0 | return; |
148 | 0 | } |
149 | 0 | } |
150 | | |
151 | 0 | doInsertionSort(array, length, itemSize, cmp, context, pv); |
152 | |
|
153 | 0 | if(pv!=v) { |
154 | 0 | uprv_free(pv); |
155 | 0 | } |
156 | 0 | } |
157 | | |
158 | | /* QuickSort ---------------------------------------------------------------- */ |
159 | | |
160 | | /* |
161 | | * This implementation is semi-recursive: |
162 | | * It recurses for the smaller sub-array to shorten the recursion depth, |
163 | | * and loops for the larger sub-array. |
164 | | * |
165 | | * Loosely after QuickSort algorithms in |
166 | | * Niklaus Wirth |
167 | | * Algorithmen und Datenstrukturen mit Modula-2 |
168 | | * B.G. Teubner Stuttgart |
169 | | * 4. Auflage 1986 |
170 | | * ISBN 3-519-02260-5 |
171 | | */ |
172 | | static void |
173 | | subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize, |
174 | | UComparator *cmp, const void *context, |
175 | 0 | void *px, void *pw) { |
176 | 0 | int32_t left, right; |
177 | | |
178 | | /* start and left are inclusive, limit and right are exclusive */ |
179 | 0 | do { |
180 | 0 | if((start+MIN_QSORT)>=limit) { |
181 | 0 | doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px); |
182 | 0 | break; |
183 | 0 | } |
184 | | |
185 | 0 | left=start; |
186 | 0 | right=limit; |
187 | | |
188 | | /* x=array[middle] */ |
189 | 0 | uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize); |
190 | |
|
191 | 0 | do { |
192 | 0 | while(/* array[left]<x */ |
193 | 0 | cmp(context, array+left*itemSize, px)<0 |
194 | 0 | ) { |
195 | 0 | ++left; |
196 | 0 | } |
197 | 0 | while(/* x<array[right-1] */ |
198 | 0 | cmp(context, px, array+(right-1)*itemSize)<0 |
199 | 0 | ) { |
200 | 0 | --right; |
201 | 0 | } |
202 | | |
203 | | /* swap array[left] and array[right-1] via w; ++left; --right */ |
204 | 0 | if(left<right) { |
205 | 0 | --right; |
206 | |
|
207 | 0 | if(left<right) { |
208 | 0 | uprv_memcpy(pw, array+(size_t)left*itemSize, itemSize); |
209 | 0 | uprv_memcpy(array+(size_t)left*itemSize, array+(size_t)right*itemSize, itemSize); |
210 | 0 | uprv_memcpy(array+(size_t)right*itemSize, pw, itemSize); |
211 | 0 | } |
212 | |
|
213 | 0 | ++left; |
214 | 0 | } |
215 | 0 | } while(left<right); |
216 | | |
217 | | /* sort sub-arrays */ |
218 | 0 | if((right-start)<(limit-left)) { |
219 | | /* sort [start..right[ */ |
220 | 0 | if(start<(right-1)) { |
221 | 0 | subQuickSort(array, start, right, itemSize, cmp, context, px, pw); |
222 | 0 | } |
223 | | |
224 | | /* sort [left..limit[ */ |
225 | 0 | start=left; |
226 | 0 | } else { |
227 | | /* sort [left..limit[ */ |
228 | 0 | if(left<(limit-1)) { |
229 | 0 | subQuickSort(array, left, limit, itemSize, cmp, context, px, pw); |
230 | 0 | } |
231 | | |
232 | | /* sort [start..right[ */ |
233 | 0 | limit=right; |
234 | 0 | } |
235 | 0 | } while(start<(limit-1)); |
236 | 0 | } |
237 | | |
238 | | static void |
239 | | quickSort(char *array, int32_t length, int32_t itemSize, |
240 | 0 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { |
241 | 0 | UAlignedMemory xw[(2*STACK_ITEM_SIZE)/sizeof(UAlignedMemory)+1]; |
242 | 0 | void *p; |
243 | | |
244 | | /* allocate two intermediate item variables (x and w) */ |
245 | 0 | if(itemSize<=STACK_ITEM_SIZE) { |
246 | 0 | p=xw; |
247 | 0 | } else { |
248 | 0 | p=uprv_malloc(2*itemSize); |
249 | 0 | if(p==NULL) { |
250 | 0 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
251 | 0 | return; |
252 | 0 | } |
253 | 0 | } |
254 | | |
255 | 0 | subQuickSort(array, 0, length, itemSize, |
256 | 0 | cmp, context, p, (char *)p+itemSize); |
257 | |
|
258 | 0 | if(p!=xw) { |
259 | 0 | uprv_free(p); |
260 | 0 | } |
261 | 0 | } |
262 | | |
263 | | /* uprv_sortArray() API ----------------------------------------------------- */ |
264 | | |
265 | | /* |
266 | | * Check arguments, select an appropriate implementation, |
267 | | * cast the array to char * so that array+i*itemSize works. |
268 | | */ |
269 | | U_CAPI void U_EXPORT2 |
270 | | uprv_sortArray(void *array, int32_t length, int32_t itemSize, |
271 | | UComparator *cmp, const void *context, |
272 | 0 | UBool sortStable, UErrorCode *pErrorCode) { |
273 | 0 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
274 | 0 | return; |
275 | 0 | } |
276 | 0 | if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) { |
277 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
278 | 0 | return; |
279 | 0 | } |
280 | | |
281 | 0 | if(length<=1) { |
282 | 0 | return; |
283 | 0 | } else if(length<MIN_QSORT || sortStable) { |
284 | 0 | insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode); |
285 | 0 | } else { |
286 | 0 | quickSort((char *)array, length, itemSize, cmp, context, pErrorCode); |
287 | 0 | } |
288 | 0 | } |