Coverage Report

Created: 2026-01-10 06:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2/timsort.h
Line
Count
Source
1
/*
2
 * Taken from https://github.com/swenson/sort
3
 * Revision: 05fd77bfec049ce8b7c408c4d3dd2d51ee061a15
4
 * Removed all code unrelated to Timsort and made minor adjustments for
5
 * cross-platform compatibility.
6
 */
7
8
/*
9
 * The MIT License (MIT)
10
 *
11
 * Copyright (c) 2010-2017 Christopher Swenson.
12
 * Copyright (c) 2012 Vojtech Fried.
13
 * Copyright (c) 2012 Google Inc. All Rights Reserved.
14
 *
15
 * Permission is hereby granted, free of charge, to any person obtaining a
16
 * copy of this software and associated documentation files (the "Software"),
17
 * to deal in the Software without restriction, including without limitation
18
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19
 * and/or sell copies of the Software, and to permit persons to whom the
20
 * Software is furnished to do so, subject to the following conditions:
21
 *
22
 * The above copyright notice and this permission notice shall be included in
23
 * all copies or substantial portions of the Software.
24
 *
25
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
31
 * DEALINGS IN THE SOFTWARE.
32
 */
33
34
#include <stdlib.h>
35
#include <stdio.h>
36
#include <string.h>
37
#ifdef HAVE_STDINT_H
38
#include <stdint.h>
39
#elif defined(_WIN32)
40
typedef unsigned __int64 uint64_t;
41
#endif
42
43
#ifndef SORT_NAME
44
#error "Must declare SORT_NAME"
45
#endif
46
47
#ifndef SORT_TYPE
48
#error "Must declare SORT_TYPE"
49
#endif
50
51
#ifndef SORT_CMP
52
#define SORT_CMP(x, y)  ((x) < (y) ? -1 : ((x) == (y) ? 0 : 1))
53
#endif
54
55
#ifndef TIM_SORT_STACK_SIZE
56
#define TIM_SORT_STACK_SIZE 128
57
#endif
58
59
397k
#define SORT_SWAP(x,y) {SORT_TYPE __SORT_SWAP_t = (x); (x) = (y); (y) = __SORT_SWAP_t;}
60
61
62
/* Common, type-agnostic functions and constants that we don't want to declare twice. */
63
#ifndef SORT_COMMON_H
64
#define SORT_COMMON_H
65
66
#ifndef MAX
67
31.4k
#define MAX(x,y) (((x) > (y) ? (x) : (y)))
68
#endif
69
70
#ifndef MIN
71
64.3k
#define MIN(x,y) (((x) < (y) ? (x) : (y)))
72
#endif
73
74
static int compute_minrun(const uint64_t);
75
76
#ifndef CLZ
77
#if defined(__GNUC__) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3))
78
31.4k
#define CLZ __builtin_clzll
79
#else
80
81
static int clzll(uint64_t);
82
83
/* adapted from Hacker's Delight */
84
static int clzll(uint64_t x) {
85
  int n;
86
87
  if (x == 0) {
88
    return 64;
89
  }
90
91
  n = 0;
92
93
  if (x <= 0x00000000FFFFFFFFL) {
94
    n = n + 32;
95
    x = x << 32;
96
  }
97
98
  if (x <= 0x0000FFFFFFFFFFFFL) {
99
    n = n + 16;
100
    x = x << 16;
101
  }
102
103
  if (x <= 0x00FFFFFFFFFFFFFFL) {
104
    n = n + 8;
105
    x = x << 8;
106
  }
107
108
  if (x <= 0x0FFFFFFFFFFFFFFFL) {
109
    n = n + 4;
110
    x = x << 4;
111
  }
112
113
  if (x <= 0x3FFFFFFFFFFFFFFFL) {
114
    n = n + 2;
115
    x = x << 2;
116
  }
117
118
  if (x <= 0x7FFFFFFFFFFFFFFFL) {
119
    n = n + 1;
120
  }
121
122
  return n;
123
}
124
125
#define CLZ clzll
126
#endif
127
#endif
128
129
31.4k
static __inline int compute_minrun(const uint64_t size) {
130
31.4k
  const int top_bit = 64 - CLZ(size);
131
31.4k
  const int shift = MAX(top_bit, 6) - 6;
132
31.4k
  const int minrun = size >> shift;
133
31.4k
  const uint64_t mask = (1ULL << shift) - 1;
134
135
31.4k
  if (mask & size) {
136
21.1k
    return minrun + 1;
137
21.1k
  }
138
139
10.3k
  return minrun;
140
31.4k
}
141
142
#endif /* SORT_COMMON_H */
143
144
4.25M
#define SORT_CONCAT(x, y) x ## _ ## y
145
4.25M
#define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y)
146
4.25M
#define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x)
147
148
1.99M
#define BINARY_INSERTION_FIND          SORT_MAKE_STR(binary_insertion_find)
149
1.00M
#define BINARY_INSERTION_SORT_START    SORT_MAKE_STR(binary_insertion_sort_start)
150
966k
#define BINARY_INSERTION_SORT          SORT_MAKE_STR(binary_insertion_sort)
151
4.62k
#define REVERSE_ELEMENTS               SORT_MAKE_STR(reverse_elements)
152
95.7k
#define COUNT_RUN                      SORT_MAKE_STR(count_run)
153
46.4k
#define CHECK_INVARIANT                SORT_MAKE_STR(check_invariant)
154
#define TIM_SORT                       SORT_MAKE_STR(tim_sort)
155
64.3k
#define TIM_SORT_RESIZE                SORT_MAKE_STR(tim_sort_resize)
156
64.3k
#define TIM_SORT_MERGE                 SORT_MAKE_STR(tim_sort_merge)
157
17.0k
#define TIM_SORT_COLLAPSE              SORT_MAKE_STR(tim_sort_collapse)
158
159
#ifndef MAX
160
#define MAX(x,y) (((x) > (y) ? (x) : (y)))
161
#endif
162
#ifndef MIN
163
#define MIN(x,y) (((x) < (y) ? (x) : (y)))
164
#endif
165
166
typedef struct {
167
  size_t start;
168
  size_t length;
169
} TIM_SORT_RUN_T;
170
171
172
XML_HIDDEN
173
void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size);
174
XML_HIDDEN
175
void TIM_SORT(SORT_TYPE *dst, const size_t size);
176
177
178
/* Function used to do a binary search for binary insertion sort */
179
static __inline size_t BINARY_INSERTION_FIND(SORT_TYPE *dst, const SORT_TYPE x,
180
1.99M
    const size_t size) {
181
1.99M
  size_t l, c, r;
182
1.99M
  SORT_TYPE cx;
183
1.99M
  l = 0;
184
1.99M
  r = size - 1;
185
1.99M
  c = r >> 1;
186
187
  /* check for out of bounds at the beginning. */
188
1.99M
  if (SORT_CMP(x, dst[0]) < 0) {
189
277k
    return 0;
190
1.71M
  } else if (SORT_CMP(x, dst[r]) > 0) {
191
0
    return r;
192
0
  }
193
194
1.71M
  cx = dst[c];
195
196
5.23M
  while (1) {
197
5.23M
    const int val = SORT_CMP(x, cx);
198
199
5.23M
    if (val < 0) {
200
791k
      if (c - l <= 1) {
201
367k
        return c;
202
367k
      }
203
204
423k
      r = c;
205
4.44M
    } else { /* allow = for stability. The binary search favors the right. */
206
4.44M
      if (r - c <= 1) {
207
1.34M
        return c + 1;
208
1.34M
      }
209
210
3.09M
      l = c;
211
3.09M
    }
212
213
3.51M
    c = l + ((r - l) >> 1);
214
3.51M
    cx = dst[c];
215
3.51M
  }
216
1.71M
}
217
218
/* Binary insertion sort, but knowing that the first "start" entries are sorted.  Used in timsort. */
219
1.00M
static void BINARY_INSERTION_SORT_START(SORT_TYPE *dst, const size_t start, const size_t size) {
220
1.00M
  size_t i;
221
222
15.6M
  for (i = start; i < size; i++) {
223
14.6M
    size_t j;
224
14.6M
    SORT_TYPE x;
225
14.6M
    size_t location;
226
227
    /* If this entry is already correct, just move along */
228
14.6M
    if (SORT_CMP(dst[i - 1], dst[i]) <= 0) {
229
12.6M
      continue;
230
12.6M
    }
231
232
    /* Else we need to find the right place, shift everything over, and squeeze in */
233
1.99M
    x = dst[i];
234
1.99M
    location = BINARY_INSERTION_FIND(dst, x, i);
235
236
6.82M
    for (j = i - 1; j >= location; j--) {
237
5.10M
      dst[j + 1] = dst[j];
238
239
5.10M
      if (j == 0) { /* check edge case because j is unsigned */
240
277k
        break;
241
277k
      }
242
5.10M
    }
243
244
1.99M
    dst[location] = x;
245
1.99M
  }
246
1.00M
}
247
248
/* Binary insertion sort */
249
966k
void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size) {
250
  /* don't bother sorting an array of size <= 1 */
251
966k
  if (size <= 1) {
252
0
    return;
253
0
  }
254
255
966k
  BINARY_INSERTION_SORT_START(dst, 1, size);
256
966k
}
257
258
/* timsort implementation, based on timsort.txt */
259
260
4.62k
static __inline void REVERSE_ELEMENTS(SORT_TYPE *dst, size_t start, size_t end) {
261
401k
  while (1) {
262
401k
    if (start >= end) {
263
4.62k
      return;
264
4.62k
    }
265
266
397k
    SORT_SWAP(dst[start], dst[end]);
267
397k
    start++;
268
397k
    end--;
269
397k
  }
270
4.62k
}
271
272
95.7k
static size_t COUNT_RUN(SORT_TYPE *dst, const size_t start, const size_t size) {
273
95.7k
  size_t curr;
274
275
95.7k
  if (size - start == 1) {
276
25.1k
    return 1;
277
25.1k
  }
278
279
70.6k
  if (start >= size - 2) {
280
871
    if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0) {
281
224
      SORT_SWAP(dst[size - 2], dst[size - 1]);
282
224
    }
283
284
871
    return 2;
285
871
  }
286
287
69.8k
  curr = start + 2;
288
289
69.8k
  if (SORT_CMP(dst[start], dst[start + 1]) <= 0) {
290
    /* increasing run */
291
19.7M
    while (1) {
292
19.7M
      if (curr == size - 1) {
293
28.6k
        break;
294
28.6k
      }
295
296
19.7M
      if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) {
297
36.4k
        break;
298
36.4k
      }
299
300
19.7M
      curr++;
301
19.7M
    }
302
303
65.1k
    return curr - start;
304
65.1k
  } else {
305
    /* decreasing run */
306
790k
    while (1) {
307
790k
      if (curr == size - 1) {
308
356
        break;
309
356
      }
310
311
789k
      if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) {
312
4.27k
        break;
313
4.27k
      }
314
315
785k
      curr++;
316
785k
    }
317
318
    /* reverse in-place */
319
4.62k
    REVERSE_ELEMENTS(dst, start, curr - 1);
320
4.62k
    return curr - start;
321
4.62k
  }
322
69.8k
}
323
324
46.4k
static int CHECK_INVARIANT(TIM_SORT_RUN_T *stack, const int stack_curr) {
325
46.4k
  size_t A, B, C;
326
327
46.4k
  if (stack_curr < 2) {
328
2.23k
    return 1;
329
2.23k
  }
330
331
44.1k
  if (stack_curr == 2) {
332
3.88k
    const size_t A1 = stack[stack_curr - 2].length;
333
3.88k
    const size_t B1 = stack[stack_curr - 1].length;
334
335
3.88k
    if (A1 <= B1) {
336
98
      return 0;
337
98
    }
338
339
3.78k
    return 1;
340
3.88k
  }
341
342
40.2k
  A = stack[stack_curr - 3].length;
343
40.2k
  B = stack[stack_curr - 2].length;
344
40.2k
  C = stack[stack_curr - 1].length;
345
346
40.2k
  if ((A <= B + C) || (B <= C)) {
347
16.9k
    return 0;
348
16.9k
  }
349
350
23.3k
  return 1;
351
40.2k
}
352
353
typedef struct {
354
  size_t alloc;
355
  SORT_TYPE *storage;
356
} TEMP_STORAGE_T;
357
358
64.3k
static void TIM_SORT_RESIZE(TEMP_STORAGE_T *store, const size_t new_size) {
359
64.3k
  if (store->alloc < new_size) {
360
35.6k
    SORT_TYPE *tempstore = (SORT_TYPE *)realloc(store->storage, new_size * sizeof(SORT_TYPE));
361
362
35.6k
    if (tempstore == NULL) {
363
0
      fprintf(stderr, "Error allocating temporary storage for tim sort: need %lu bytes",
364
0
              (unsigned long)(sizeof(SORT_TYPE) * new_size));
365
0
      exit(1);
366
0
    }
367
368
35.6k
    store->storage = tempstore;
369
35.6k
    store->alloc = new_size;
370
35.6k
  }
371
64.3k
}
372
373
static void TIM_SORT_MERGE(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr,
374
64.3k
                           TEMP_STORAGE_T *store) {
375
64.3k
  const size_t A = stack[stack_curr - 2].length;
376
64.3k
  const size_t B = stack[stack_curr - 1].length;
377
64.3k
  const size_t curr = stack[stack_curr - 2].start;
378
64.3k
  SORT_TYPE *storage;
379
64.3k
  size_t i, j, k;
380
64.3k
  TIM_SORT_RESIZE(store, MIN(A, B));
381
64.3k
  storage = store->storage;
382
383
  /* left merge */
384
64.3k
  if (A < B) {
385
11.1k
    memcpy(storage, &dst[curr], A * sizeof(SORT_TYPE));
386
11.1k
    i = 0;
387
11.1k
    j = curr + A;
388
389
2.60M
    for (k = curr; k < curr + A + B; k++) {
390
2.59M
      if ((i < A) && (j < curr + A + B)) {
391
2.58M
        if (SORT_CMP(storage[i], dst[j]) <= 0) {
392
2.43M
          dst[k] = storage[i++];
393
2.43M
        } else {
394
145k
          dst[k] = dst[j++];
395
145k
        }
396
2.58M
      } else if (i < A) {
397
6.46k
        dst[k] = storage[i++];
398
10.1k
      } else {
399
10.1k
        break;
400
10.1k
      }
401
2.59M
    }
402
53.1k
  } else {
403
    /* right merge */
404
53.1k
    memcpy(storage, &dst[curr + A], B * sizeof(SORT_TYPE));
405
53.1k
    i = B;
406
53.1k
    j = curr + A;
407
53.1k
    k = curr + A + B;
408
409
4.91M
    while (k > curr) {
410
4.91M
      k--;
411
4.91M
      if ((i > 0) && (j > curr)) {
412
4.86M
        if (SORT_CMP(dst[j - 1], storage[i - 1]) > 0) {
413
999k
          dst[k] = dst[--j];
414
3.86M
        } else {
415
3.86M
          dst[k] = storage[--i];
416
3.86M
        }
417
4.86M
      } else if (i > 0) {
418
781
        dst[k] = storage[--i];
419
52.4k
      } else {
420
52.4k
        break;
421
52.4k
      }
422
4.91M
    }
423
53.1k
  }
424
64.3k
}
425
426
static int TIM_SORT_COLLAPSE(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr,
427
17.0k
                             TEMP_STORAGE_T *store, const size_t size) {
428
45.1k
  while (1) {
429
45.1k
    size_t A, B, C, D;
430
45.1k
    int ABC, BCD, CD;
431
432
    /* if the stack only has one thing on it, we are done with the collapse */
433
45.1k
    if (stack_curr <= 1) {
434
0
      break;
435
0
    }
436
437
    /* if this is the last merge, just do it */
438
45.1k
    if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
439
0
      TIM_SORT_MERGE(dst, stack, stack_curr, store);
440
0
      stack[0].length += stack[1].length;
441
0
      stack_curr--;
442
0
      break;
443
0
    }
444
    /* check if the invariant is off for a stack of 2 elements */
445
45.1k
    else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
446
2.23k
      TIM_SORT_MERGE(dst, stack, stack_curr, store);
447
2.23k
      stack[0].length += stack[1].length;
448
2.23k
      stack_curr--;
449
2.23k
      break;
450
42.8k
    } else if (stack_curr == 2) {
451
3.04k
      break;
452
3.04k
    }
453
454
39.8k
    B = stack[stack_curr - 3].length;
455
39.8k
    C = stack[stack_curr - 2].length;
456
39.8k
    D = stack[stack_curr - 1].length;
457
458
39.8k
    if (stack_curr >= 4) {
459
32.6k
      A = stack[stack_curr - 4].length;
460
32.6k
      ABC = (A <= B + C);
461
32.6k
    } else {
462
7.22k
      ABC = 0;
463
7.22k
    }
464
465
39.8k
    BCD = (B <= C + D) || ABC;
466
39.8k
    CD = (C <= D);
467
468
    /* Both invariants are good */
469
39.8k
    if (!BCD && !CD) {
470
11.7k
      break;
471
11.7k
    }
472
473
    /* left merge */
474
28.0k
    if (BCD && !CD) {
475
5.18k
      TIM_SORT_MERGE(dst, stack, stack_curr - 1, store);
476
5.18k
      stack[stack_curr - 3].length += stack[stack_curr - 2].length;
477
5.18k
      stack[stack_curr - 2] = stack[stack_curr - 1];
478
5.18k
      stack_curr--;
479
22.9k
    } else {
480
      /* right merge */
481
22.9k
      TIM_SORT_MERGE(dst, stack, stack_curr, store);
482
22.9k
      stack[stack_curr - 2].length += stack[stack_curr - 1].length;
483
22.9k
      stack_curr--;
484
22.9k
    }
485
28.0k
  }
486
487
17.0k
  return stack_curr;
488
17.0k
}
489
490
static __inline int PUSH_NEXT(SORT_TYPE *dst,
491
                              const size_t size,
492
                              TEMP_STORAGE_T *store,
493
                              const size_t minrun,
494
                              TIM_SORT_RUN_T *run_stack,
495
                              size_t *stack_curr,
496
95.7k
                              size_t *curr) {
497
95.7k
  size_t len = COUNT_RUN(dst, *curr, size);
498
95.7k
  size_t run = minrun;
499
500
95.7k
  if (run > size - *curr) {
501
30.0k
    run = size - *curr;
502
30.0k
  }
503
504
95.7k
  if (run > len) {
505
34.7k
    BINARY_INSERTION_SORT_START(&dst[*curr], len, run);
506
34.7k
    len = run;
507
34.7k
  }
508
509
95.7k
  run_stack[*stack_curr].start = *curr;
510
95.7k
  run_stack[*stack_curr].length = len;
511
95.7k
  (*stack_curr)++;
512
95.7k
  *curr += len;
513
514
95.7k
  if (*curr == size) {
515
    /* finish up */
516
65.4k
    while (*stack_curr > 1) {
517
33.9k
      TIM_SORT_MERGE(dst, run_stack, *stack_curr, store);
518
33.9k
      run_stack[*stack_curr - 2].length += run_stack[*stack_curr - 1].length;
519
33.9k
      (*stack_curr)--;
520
33.9k
    }
521
522
31.4k
    if (store->storage != NULL) {
523
31.4k
      free(store->storage);
524
31.4k
      store->storage = NULL;
525
31.4k
    }
526
527
31.4k
    return 0;
528
31.4k
  }
529
530
64.3k
  return 1;
531
95.7k
}
532
533
1.03M
void TIM_SORT(SORT_TYPE *dst, const size_t size) {
534
1.03M
  size_t minrun;
535
1.03M
  TEMP_STORAGE_T _store, *store;
536
1.03M
  TIM_SORT_RUN_T run_stack[TIM_SORT_STACK_SIZE];
537
1.03M
  size_t stack_curr = 0;
538
1.03M
  size_t curr = 0;
539
540
  /* don't bother sorting an array of size 1 */
541
1.03M
  if (size <= 1) {
542
33.2k
    return;
543
33.2k
  }
544
545
997k
  if (size < 64) {
546
966k
    BINARY_INSERTION_SORT(dst, size);
547
966k
    return;
548
966k
  }
549
550
  /* compute the minimum run length */
551
31.4k
  minrun = compute_minrun(size);
552
  /* temporary storage for merges */
553
31.4k
  store = &_store;
554
31.4k
  store->alloc = 0;
555
31.4k
  store->storage = NULL;
556
557
31.4k
  if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
558
0
    return;
559
0
  }
560
561
31.4k
  if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
562
28.0k
    return;
563
28.0k
  }
564
565
3.43k
  if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
566
886
    return;
567
886
  }
568
569
46.4k
  while (1) {
570
46.4k
    if (!CHECK_INVARIANT(run_stack, stack_curr)) {
571
17.0k
      stack_curr = TIM_SORT_COLLAPSE(dst, run_stack, stack_curr, store, size);
572
17.0k
      continue;
573
17.0k
    }
574
575
29.3k
    if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
576
2.55k
      return;
577
2.55k
    }
578
29.3k
  }
579
2.55k
}
580
581
#undef SORT_CONCAT
582
#undef SORT_MAKE_STR1
583
#undef SORT_MAKE_STR
584
#undef SORT_NAME
585
#undef SORT_TYPE
586
#undef SORT_CMP
587
#undef TEMP_STORAGE_T
588
#undef TIM_SORT_RUN_T
589
#undef PUSH_NEXT
590
#undef SORT_SWAP
591
#undef SORT_CONCAT
592
#undef SORT_MAKE_STR1
593
#undef SORT_MAKE_STR
594
#undef BINARY_INSERTION_FIND
595
#undef BINARY_INSERTION_SORT_START
596
#undef BINARY_INSERTION_SORT
597
#undef REVERSE_ELEMENTS
598
#undef COUNT_RUN
599
#undef TIM_SORT
600
#undef TIM_SORT_RESIZE
601
#undef TIM_SORT_COLLAPSE
602
#undef TIM_SORT_RUN_T
603
#undef TEMP_STORAGE_T