Coverage Report

Created: 2022-11-15 06:34

/src/libxml2/timsort.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Taken from https://github.com/swenson/sort
3
 * Revision: 05fd77bfec049ce8b7c408c4d3dd2d51ee061a15
4
 * Removed all code unrelated to Timsort and made minor adjustments for
5
 * cross-platform compatibility.
6
 */
7
8
/*
9
 * The MIT License (MIT)
10
 *
11
 * Copyright (c) 2010-2017 Christopher Swenson.
12
 * Copyright (c) 2012 Vojtech Fried.
13
 * Copyright (c) 2012 Google Inc. All Rights Reserved.
14
 *
15
 * Permission is hereby granted, free of charge, to any person obtaining a
16
 * copy of this software and associated documentation files (the "Software"),
17
 * to deal in the Software without restriction, including without limitation
18
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19
 * and/or sell copies of the Software, and to permit persons to whom the
20
 * Software is furnished to do so, subject to the following conditions:
21
 *
22
 * The above copyright notice and this permission notice shall be included in
23
 * all copies or substantial portions of the Software.
24
 *
25
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
31
 * DEALINGS IN THE SOFTWARE.
32
 */
33
34
#include <stdlib.h>
35
#include <stdio.h>
36
#include <string.h>
37
#ifdef HAVE_STDINT_H
38
#include <stdint.h>
39
#elif defined(_WIN32)
40
typedef unsigned __int64 uint64_t;
41
#endif
42
43
#ifndef SORT_NAME
44
#error "Must declare SORT_NAME"
45
#endif
46
47
#ifndef SORT_TYPE
48
#error "Must declare SORT_TYPE"
49
#endif
50
51
#ifndef SORT_CMP
52
#define SORT_CMP(x, y)  ((x) < (y) ? -1 : ((x) == (y) ? 0 : 1))
53
#endif
54
55
#ifndef TIM_SORT_STACK_SIZE
56
#define TIM_SORT_STACK_SIZE 128
57
#endif
58
59
1
#define SORT_SWAP(x,y) {SORT_TYPE __SORT_SWAP_t = (x); (x) = (y); (y) = __SORT_SWAP_t;}
60
61
62
/* Common, type-agnostic functions and constants that we don't want to declare twice. */
63
#ifndef SORT_COMMON_H
64
#define SORT_COMMON_H
65
66
#ifndef MAX
67
7
#define MAX(x,y) (((x) > (y) ? (x) : (y)))
68
#endif
69
70
#ifndef MIN
71
445
#define MIN(x,y) (((x) < (y) ? (x) : (y)))
72
#endif
73
74
static int compute_minrun(const uint64_t);
75
76
#ifndef CLZ
77
#if defined(__GNUC__) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3))
78
7
#define CLZ __builtin_clzll
79
#else
80
81
static int clzll(uint64_t);
82
83
/* adapted from Hacker's Delight */
84
static int clzll(uint64_t x) {
85
  int n;
86
87
  if (x == 0) {
88
    return 64;
89
  }
90
91
  n = 0;
92
93
  if (x <= 0x00000000FFFFFFFFL) {
94
    n = n + 32;
95
    x = x << 32;
96
  }
97
98
  if (x <= 0x0000FFFFFFFFFFFFL) {
99
    n = n + 16;
100
    x = x << 16;
101
  }
102
103
  if (x <= 0x00FFFFFFFFFFFFFFL) {
104
    n = n + 8;
105
    x = x << 8;
106
  }
107
108
  if (x <= 0x0FFFFFFFFFFFFFFFL) {
109
    n = n + 4;
110
    x = x << 4;
111
  }
112
113
  if (x <= 0x3FFFFFFFFFFFFFFFL) {
114
    n = n + 2;
115
    x = x << 2;
116
  }
117
118
  if (x <= 0x7FFFFFFFFFFFFFFFL) {
119
    n = n + 1;
120
  }
121
122
  return n;
123
}
124
125
#define CLZ clzll
126
#endif
127
#endif
128
129
7
static __inline int compute_minrun(const uint64_t size) {
130
7
  const int top_bit = 64 - CLZ(size);
131
7
  const int shift = MAX(top_bit, 6) - 6;
132
7
  const int minrun = size >> shift;
133
7
  const uint64_t mask = (1ULL << shift) - 1;
134
135
7
  if (mask & size) {
136
6
    return minrun + 1;
137
6
  }
138
139
1
  return minrun;
140
7
}
141
142
#endif /* SORT_COMMON_H */
143
144
10.5k
#define SORT_CONCAT(x, y) x ## _ ## y
145
10.5k
#define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y)
146
10.5k
#define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x)
147
148
6.59k
#define BINARY_INSERTION_FIND          SORT_MAKE_STR(binary_insertion_find)
149
910
#define BINARY_INSERTION_SORT_START    SORT_MAKE_STR(binary_insertion_sort_start)
150
814
#define BINARY_INSERTION_SORT          SORT_MAKE_STR(binary_insertion_sort)
151
1
#define REVERSE_ELEMENTS               SORT_MAKE_STR(reverse_elements)
152
452
#define COUNT_RUN                      SORT_MAKE_STR(count_run)
153
669
#define CHECK_INVARIANT                SORT_MAKE_STR(check_invariant)
154
#define TIM_SORT                       SORT_MAKE_STR(tim_sort)
155
445
#define TIM_SORT_RESIZE                SORT_MAKE_STR(tim_sort_resize)
156
445
#define TIM_SORT_MERGE                 SORT_MAKE_STR(tim_sort_merge)
157
237
#define TIM_SORT_COLLAPSE              SORT_MAKE_STR(tim_sort_collapse)
158
159
#ifndef MAX
160
#define MAX(x,y) (((x) > (y) ? (x) : (y)))
161
#endif
162
#ifndef MIN
163
#define MIN(x,y) (((x) < (y) ? (x) : (y)))
164
#endif
165
166
typedef struct {
167
  size_t start;
168
  size_t length;
169
} TIM_SORT_RUN_T;
170
171
172
void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size);
173
void TIM_SORT(SORT_TYPE *dst, const size_t size);
174
175
176
/* Function used to do a binary search for binary insertion sort */
177
static __inline size_t BINARY_INSERTION_FIND(SORT_TYPE *dst, const SORT_TYPE x,
178
6.59k
    const size_t size) {
179
6.59k
  size_t l, c, r;
180
6.59k
  SORT_TYPE cx;
181
6.59k
  l = 0;
182
6.59k
  r = size - 1;
183
6.59k
  c = r >> 1;
184
185
  /* check for out of bounds at the beginning. */
186
6.59k
  if (SORT_CMP(x, dst[0]) < 0) {
187
477
    return 0;
188
6.11k
  } else if (SORT_CMP(x, dst[r]) > 0) {
189
0
    return r;
190
0
  }
191
192
6.11k
  cx = dst[c];
193
194
24.5k
  while (1) {
195
24.5k
    const int val = SORT_CMP(x, cx);
196
197
24.5k
    if (val < 0) {
198
9.69k
      if (c - l <= 1) {
199
3.72k
        return c;
200
3.72k
      }
201
202
5.96k
      r = c;
203
14.8k
    } else { /* allow = for stability. The binary search favors the right. */
204
14.8k
      if (r - c <= 1) {
205
2.39k
        return c + 1;
206
2.39k
      }
207
208
12.4k
      l = c;
209
12.4k
    }
210
211
18.4k
    c = l + ((r - l) >> 1);
212
18.4k
    cx = dst[c];
213
18.4k
  }
214
6.11k
}
215
216
/* Binary insertion sort, but knowing that the first "start" entries are sorted.  Used in timsort. */
217
910
static void BINARY_INSERTION_SORT_START(SORT_TYPE *dst, const size_t start, const size_t size) {
218
910
  size_t i;
219
220
15.4k
  for (i = start; i < size; i++) {
221
14.4k
    size_t j;
222
14.4k
    SORT_TYPE x;
223
14.4k
    size_t location;
224
225
    /* If this entry is already correct, just move along */
226
14.4k
    if (SORT_CMP(dst[i - 1], dst[i]) <= 0) {
227
7.89k
      continue;
228
7.89k
    }
229
230
    /* Else we need to find the right place, shift everything over, and squeeze in */
231
6.59k
    x = dst[i];
232
6.59k
    location = BINARY_INSERTION_FIND(dst, x, i);
233
234
46.9k
    for (j = i - 1; j >= location; j--) {
235
40.8k
      dst[j + 1] = dst[j];
236
237
40.8k
      if (j == 0) { /* check edge case because j is unsigned */
238
477
        break;
239
477
      }
240
40.8k
    }
241
242
6.59k
    dst[location] = x;
243
6.59k
  }
244
910
}
245
246
/* Binary insertion sort */
247
814
void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size) {
248
  /* don't bother sorting an array of size <= 1 */
249
814
  if (size <= 1) {
250
0
    return;
251
0
  }
252
253
814
  BINARY_INSERTION_SORT_START(dst, 1, size);
254
814
}
255
256
/* timsort implementation, based on timsort.txt */
257
258
1
static __inline void REVERSE_ELEMENTS(SORT_TYPE *dst, size_t start, size_t end) {
259
2
  while (1) {
260
2
    if (start >= end) {
261
1
      return;
262
1
    }
263
264
1
    SORT_SWAP(dst[start], dst[end]);
265
1
    start++;
266
1
    end--;
267
1
  }
268
1
}
269
270
452
static size_t COUNT_RUN(SORT_TYPE *dst, const size_t start, const size_t size) {
271
452
  size_t curr;
272
273
452
  if (size - start == 1) {
274
0
    return 1;
275
0
  }
276
277
452
  if (start >= size - 2) {
278
3
    if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0) {
279
0
      SORT_SWAP(dst[size - 2], dst[size - 1]);
280
0
    }
281
282
3
    return 2;
283
3
  }
284
285
449
  curr = start + 2;
286
287
449
  if (SORT_CMP(dst[start], dst[start + 1]) <= 0) {
288
    /* increasing run */
289
53.3k
    while (1) {
290
53.3k
      if (curr == size - 1) {
291
0
        break;
292
0
      }
293
294
53.3k
      if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) {
295
448
        break;
296
448
      }
297
298
52.9k
      curr++;
299
52.9k
    }
300
301
448
    return curr - start;
302
448
  } else {
303
    /* decreasing run */
304
1
    while (1) {
305
1
      if (curr == size - 1) {
306
0
        break;
307
0
      }
308
309
1
      if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) {
310
1
        break;
311
1
      }
312
313
0
      curr++;
314
0
    }
315
316
    /* reverse in-place */
317
1
    REVERSE_ELEMENTS(dst, start, curr - 1);
318
1
    return curr - start;
319
1
  }
320
449
}
321
322
669
static int CHECK_INVARIANT(TIM_SORT_RUN_T *stack, const int stack_curr) {
323
669
  size_t A, B, C;
324
325
669
  if (stack_curr < 2) {
326
1
    return 1;
327
1
  }
328
329
668
  if (stack_curr == 2) {
330
21
    const size_t A1 = stack[stack_curr - 2].length;
331
21
    const size_t B1 = stack[stack_curr - 1].length;
332
333
21
    if (A1 <= B1) {
334
0
      return 0;
335
0
    }
336
337
21
    return 1;
338
21
  }
339
340
647
  A = stack[stack_curr - 3].length;
341
647
  B = stack[stack_curr - 2].length;
342
647
  C = stack[stack_curr - 1].length;
343
344
647
  if ((A <= B + C) || (B <= C)) {
345
237
    return 0;
346
237
  }
347
348
410
  return 1;
349
647
}
350
351
typedef struct {
352
  size_t alloc;
353
  SORT_TYPE *storage;
354
} TEMP_STORAGE_T;
355
356
445
static void TIM_SORT_RESIZE(TEMP_STORAGE_T *store, const size_t new_size) {
357
445
  if (store->alloc < new_size) {
358
48
    SORT_TYPE *tempstore = (SORT_TYPE *)realloc(store->storage, new_size * sizeof(SORT_TYPE));
359
360
48
    if (tempstore == NULL) {
361
0
      fprintf(stderr, "Error allocating temporary storage for tim sort: need %lu bytes",
362
0
              (unsigned long)(sizeof(SORT_TYPE) * new_size));
363
0
      exit(1);
364
0
    }
365
366
48
    store->storage = tempstore;
367
48
    store->alloc = new_size;
368
48
  }
369
445
}
370
371
static void TIM_SORT_MERGE(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr,
372
445
                           TEMP_STORAGE_T *store) {
373
445
  const size_t A = stack[stack_curr - 2].length;
374
445
  const size_t B = stack[stack_curr - 1].length;
375
445
  const size_t curr = stack[stack_curr - 2].start;
376
445
  SORT_TYPE *storage;
377
445
  size_t i, j, k;
378
445
  TIM_SORT_RESIZE(store, MIN(A, B));
379
445
  storage = store->storage;
380
381
  /* left merge */
382
445
  if (A < B) {
383
275
    memcpy(storage, &dst[curr], A * sizeof(SORT_TYPE));
384
275
    i = 0;
385
275
    j = curr + A;
386
387
69.9k
    for (k = curr; k < curr + A + B; k++) {
388
69.9k
      if ((i < A) && (j < curr + A + B)) {
389
66.0k
        if (SORT_CMP(storage[i], dst[j]) <= 0) {
390
49.6k
          dst[k] = storage[i++];
391
49.6k
        } else {
392
16.3k
          dst[k] = dst[j++];
393
16.3k
        }
394
66.0k
      } else if (i < A) {
395
3.64k
        dst[k] = storage[i++];
396
3.64k
      } else {
397
271
        break;
398
271
      }
399
69.9k
    }
400
275
  } else {
401
    /* right merge */
402
170
    memcpy(storage, &dst[curr + A], B * sizeof(SORT_TYPE));
403
170
    i = B;
404
170
    j = curr + A;
405
170
    k = curr + A + B;
406
407
85.2k
    while (k > curr) {
408
85.2k
      k--;
409
85.2k
      if ((i > 0) && (j > curr)) {
410
85.0k
        if (SORT_CMP(dst[j - 1], storage[i - 1]) > 0) {
411
30.1k
          dst[k] = dst[--j];
412
54.9k
        } else {
413
54.9k
          dst[k] = storage[--i];
414
54.9k
        }
415
85.0k
      } else if (i > 0) {
416
2
        dst[k] = storage[--i];
417
168
      } else {
418
168
        break;
419
168
      }
420
85.2k
    }
421
170
  }
422
445
}
423
424
static int TIM_SORT_COLLAPSE(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr,
425
237
                             TEMP_STORAGE_T *store, const size_t size) {
426
654
  while (1) {
427
654
    size_t A, B, C, D;
428
654
    int ABC, BCD, CD;
429
430
    /* if the stack only has one thing on it, we are done with the collapse */
431
654
    if (stack_curr <= 1) {
432
0
      break;
433
0
    }
434
435
    /* if this is the last merge, just do it */
436
654
    if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
437
0
      TIM_SORT_MERGE(dst, stack, stack_curr, store);
438
0
      stack[0].length += stack[1].length;
439
0
      stack_curr--;
440
0
      break;
441
0
    }
442
    /* check if the invariant is off for a stack of 2 elements */
443
654
    else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
444
1
      TIM_SORT_MERGE(dst, stack, stack_curr, store);
445
1
      stack[0].length += stack[1].length;
446
1
      stack_curr--;
447
1
      break;
448
653
    } else if (stack_curr == 2) {
449
20
      break;
450
20
    }
451
452
633
    B = stack[stack_curr - 3].length;
453
633
    C = stack[stack_curr - 2].length;
454
633
    D = stack[stack_curr - 1].length;
455
456
633
    if (stack_curr >= 4) {
457
527
      A = stack[stack_curr - 4].length;
458
527
      ABC = (A <= B + C);
459
527
    } else {
460
106
      ABC = 0;
461
106
    }
462
463
633
    BCD = (B <= C + D) || ABC;
464
633
    CD = (C <= D);
465
466
    /* Both invariants are good */
467
633
    if (!BCD && !CD) {
468
216
      break;
469
216
    }
470
471
    /* left merge */
472
417
    if (BCD && !CD) {
473
99
      TIM_SORT_MERGE(dst, stack, stack_curr - 1, store);
474
99
      stack[stack_curr - 3].length += stack[stack_curr - 2].length;
475
99
      stack[stack_curr - 2] = stack[stack_curr - 1];
476
99
      stack_curr--;
477
318
    } else {
478
      /* right merge */
479
318
      TIM_SORT_MERGE(dst, stack, stack_curr, store);
480
318
      stack[stack_curr - 2].length += stack[stack_curr - 1].length;
481
318
      stack_curr--;
482
318
    }
483
417
  }
484
485
237
  return stack_curr;
486
237
}
487
488
static __inline int PUSH_NEXT(SORT_TYPE *dst,
489
                              const size_t size,
490
                              TEMP_STORAGE_T *store,
491
                              const size_t minrun,
492
                              TIM_SORT_RUN_T *run_stack,
493
                              size_t *stack_curr,
494
452
                              size_t *curr) {
495
452
  size_t len = COUNT_RUN(dst, *curr, size);
496
452
  size_t run = minrun;
497
498
452
  if (run > size - *curr) {
499
5
    run = size - *curr;
500
5
  }
501
502
452
  if (run > len) {
503
96
    BINARY_INSERTION_SORT_START(&dst[*curr], len, run);
504
96
    len = run;
505
96
  }
506
507
452
  run_stack[*stack_curr].start = *curr;
508
452
  run_stack[*stack_curr].length = len;
509
452
  (*stack_curr)++;
510
452
  *curr += len;
511
512
452
  if (*curr == size) {
513
    /* finish up */
514
34
    while (*stack_curr > 1) {
515
27
      TIM_SORT_MERGE(dst, run_stack, *stack_curr, store);
516
27
      run_stack[*stack_curr - 2].length += run_stack[*stack_curr - 1].length;
517
27
      (*stack_curr)--;
518
27
    }
519
520
7
    if (store->storage != NULL) {
521
7
      free(store->storage);
522
7
      store->storage = NULL;
523
7
    }
524
525
7
    return 0;
526
7
  }
527
528
445
  return 1;
529
452
}
530
531
821
void TIM_SORT(SORT_TYPE *dst, const size_t size) {
532
821
  size_t minrun;
533
821
  TEMP_STORAGE_T _store, *store;
534
821
  TIM_SORT_RUN_T run_stack[TIM_SORT_STACK_SIZE];
535
821
  size_t stack_curr = 0;
536
821
  size_t curr = 0;
537
538
  /* don't bother sorting an array of size 1 */
539
821
  if (size <= 1) {
540
0
    return;
541
0
  }
542
543
821
  if (size < 64) {
544
814
    BINARY_INSERTION_SORT(dst, size);
545
814
    return;
546
814
  }
547
548
  /* compute the minimum run length */
549
7
  minrun = compute_minrun(size);
550
  /* temporary storage for merges */
551
7
  store = &_store;
552
7
  store->alloc = 0;
553
7
  store->storage = NULL;
554
555
7
  if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
556
0
    return;
557
0
  }
558
559
7
  if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
560
1
    return;
561
1
  }
562
563
6
  if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
564
0
    return;
565
0
  }
566
567
669
  while (1) {
568
669
    if (!CHECK_INVARIANT(run_stack, stack_curr)) {
569
237
      stack_curr = TIM_SORT_COLLAPSE(dst, run_stack, stack_curr, store, size);
570
237
      continue;
571
237
    }
572
573
432
    if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
574
6
      return;
575
6
    }
576
432
  }
577
6
}
578
579
#undef SORT_CONCAT
580
#undef SORT_MAKE_STR1
581
#undef SORT_MAKE_STR
582
#undef SORT_NAME
583
#undef SORT_TYPE
584
#undef SORT_CMP
585
#undef TEMP_STORAGE_T
586
#undef TIM_SORT_RUN_T
587
#undef PUSH_NEXT
588
#undef SORT_SWAP
589
#undef SORT_CONCAT
590
#undef SORT_MAKE_STR1
591
#undef SORT_MAKE_STR
592
#undef BINARY_INSERTION_FIND
593
#undef BINARY_INSERTION_SORT_START
594
#undef BINARY_INSERTION_SORT
595
#undef REVERSE_ELEMENTS
596
#undef COUNT_RUN
597
#undef TIM_SORT
598
#undef TIM_SORT_RESIZE
599
#undef TIM_SORT_COLLAPSE
600
#undef TIM_SORT_RUN_T
601
#undef TEMP_STORAGE_T