Coverage Report

Created: 2025-09-27 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/backend/lib/integerset.c
Line
Count
Source
1
/*-------------------------------------------------------------------------
2
 *
3
 * integerset.c
4
 *    Data structure to hold a large set of 64-bit integers efficiently
5
 *
6
 * IntegerSet provides an in-memory data structure to hold a set of
7
 * arbitrary 64-bit integers.  Internally, the values are stored in a
8
 * B-tree, with a special packed representation at the leaf level using
9
 * the Simple-8b algorithm, which can pack clusters of nearby values
10
 * very tightly.
11
 *
12
 * Memory consumption depends on the number of values stored, but also
13
 * on how far the values are from each other.  In the best case, with
14
 * long runs of consecutive integers, memory consumption can be as low as
15
 * 0.1 bytes per integer.  In the worst case, if integers are more than
16
 * 2^32 apart, it uses about 8 bytes per integer.  In typical use, the
17
 * consumption per integer is somewhere between those extremes, depending
18
 * on the range of integers stored, and how "clustered" they are.
19
 *
20
 *
21
 * Interface
22
 * ---------
23
 *
24
 *  intset_create     - Create a new, empty set
25
 *  intset_add_member   - Add an integer to the set
26
 *  intset_is_member    - Test if an integer is in the set
27
 *  intset_begin_iterate  - Begin iterating through all integers in set
28
 *  intset_iterate_next   - Return next set member, if any
29
 *
30
 * intset_create() creates the set in the current memory context.  Subsequent
31
 * operations that add to the data structure will continue to allocate from
32
 * that same context, even if it's not current anymore.
33
 *
34
 * Note that there is no function to free an integer set.  If you need to do
35
 * that, create a dedicated memory context to hold it, and destroy the memory
36
 * context instead.
37
 *
38
 *
39
 * Limitations
40
 * -----------
41
 *
42
 * - Values must be added in order.  (Random insertions would require
43
 *   splitting nodes, which hasn't been implemented.)
44
 *
45
 * - Values cannot be added while iteration is in progress.
46
 *
47
 * - No support for removing values.
48
 *
49
 * None of these limitations are fundamental to the data structure, so they
50
 * could be lifted if needed, by writing some new code.  But the current
51
 * users of this facility don't need them.
52
 *
53
 *
54
 * References
55
 * ----------
56
 *
57
 * Simple-8b encoding is based on:
58
 *
59
 * Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words,
60
 *   Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
61
 *   (https://doi.org/10.1002/spe.948)
62
 *
63
 *
64
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
65
 * Portions Copyright (c) 1994, Regents of the University of California
66
 *
67
 * IDENTIFICATION
68
 *    src/backend/lib/integerset.c
69
 *
70
 *-------------------------------------------------------------------------
71
 */
72
#include "postgres.h"
73
74
#include "lib/integerset.h"
75
#include "utils/memutils.h"
76
77
78
/*
79
 * Maximum number of integers that can be encoded in a single Simple-8b
80
 * codeword. (Defined here before anything else, so that we can size arrays
81
 * using this.)
82
 */
83
0
#define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240
84
85
/*
86
 * Parameters for shape of the in-memory B-tree.
87
 *
88
 * These set the size of each internal and leaf node.  They don't necessarily
89
 * need to be the same, because the tree is just an in-memory structure.
90
 * With the default 64, each node is about 1 kb.
91
 *
92
 * If you change these, you must recalculate MAX_TREE_LEVELS, too!
93
 */
94
0
#define MAX_INTERNAL_ITEMS  64
95
0
#define MAX_LEAF_ITEMS  64
96
97
/*
98
 * Maximum height of the tree.
99
 *
100
 * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree.  The
101
 * theoretical maximum number of items that we can store in a set is 2^64,
102
 * so MAX_TREE_LEVELS should be set so that:
103
 *
104
 *   MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
105
 *
106
 * In practice, we'll need far fewer levels, because you will run out of
107
 * memory long before reaching that number, but let's be conservative.
108
 */
109
0
#define MAX_TREE_LEVELS   11
110
111
/*
112
 * Node structures, for the in-memory B-tree.
113
 *
114
 * An internal node holds a number of downlink pointers to leaf nodes, or
115
 * to internal nodes on a lower level.  For each downlink, the key value
116
 * corresponding to the lower level node is stored in a sorted array.  The
117
 * stored key values are low keys.  In other words, if the downlink has value
118
 * X, then all items stored on that child are >= X.
119
 *
120
 * Each leaf node holds a number of "items", with a varying number of
121
 * integers packed into each item.  Each item consists of two 64-bit words:
122
 * The first word holds the first integer stored in the item, in plain format.
123
 * The second word contains between 0 and 240 more integers, packed using
124
 * Simple-8b encoding.  By storing the first integer in plain, unpacked,
125
 * format, we can use binary search to quickly find an item that holds (or
126
 * would hold) a particular integer.  And by storing the rest in packed form,
127
 * we still get pretty good memory density, if there are clusters of integers
128
 * with similar values.
129
 *
130
 * Each leaf node also has a pointer to the next leaf node, so that the leaf
131
 * nodes can be easily walked from beginning to end when iterating.
132
 */
133
typedef struct intset_node intset_node;
134
typedef struct intset_leaf_node intset_leaf_node;
135
typedef struct intset_internal_node intset_internal_node;
136
137
/* Common structure of both leaf and internal nodes. */
138
struct intset_node
139
{
140
  uint16    level;      /* tree level of this node */
141
  uint16    num_items;    /* number of items in this node */
142
};
143
144
/* Internal node */
145
struct intset_internal_node
146
{
147
  /* common header, must match intset_node */
148
  uint16    level;      /* >= 1 on internal nodes */
149
  uint16    num_items;
150
151
  /*
152
   * 'values' is an array of key values, and 'downlinks' are pointers to
153
   * lower-level nodes, corresponding to the key values.
154
   */
155
  uint64    values[MAX_INTERNAL_ITEMS];
156
  intset_node *downlinks[MAX_INTERNAL_ITEMS];
157
};
158
159
/* Leaf node */
160
typedef struct
161
{
162
  uint64    first;      /* first integer in this item */
163
  uint64    codeword;   /* simple8b encoded differences from 'first' */
164
} leaf_item;
165
166
0
#define MAX_VALUES_PER_LEAF_ITEM  (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
167
168
struct intset_leaf_node
169
{
170
  /* common header, must match intset_node */
171
  uint16    level;      /* 0 on leafs */
172
  uint16    num_items;
173
174
  intset_leaf_node *next;   /* right sibling, if any */
175
176
  leaf_item items[MAX_LEAF_ITEMS];
177
};
178
179
/*
180
 * We buffer insertions in a simple array, before packing and inserting them
181
 * into the B-tree.  MAX_BUFFERED_VALUES sets the size of the buffer.  The
182
 * encoder assumes that it is large enough that we can always fill a leaf
183
 * item with buffered new items.  In other words, MAX_BUFFERED_VALUES must be
184
 * larger than MAX_VALUES_PER_LEAF_ITEM.  For efficiency, make it much larger.
185
 */
186
0
#define MAX_BUFFERED_VALUES     (MAX_VALUES_PER_LEAF_ITEM * 2)
187
188
/*
189
 * IntegerSet is the top-level object representing the set.
190
 *
191
 * The integers are stored in an in-memory B-tree structure, plus an array
192
 * for newly-added integers.  IntegerSet also tracks information about memory
193
 * usage, as well as the current position when iterating the set with
194
 * intset_begin_iterate / intset_iterate_next.
195
 */
196
struct IntegerSet
197
{
198
  /*
199
   * 'context' is the memory context holding this integer set and all its
200
   * tree nodes.
201
   *
202
   * 'mem_used' tracks the amount of memory used.  We don't do anything with
203
   * it in integerset.c itself, but the callers can ask for it with
204
   * intset_memory_usage().
205
   */
206
  MemoryContext context;
207
  uint64    mem_used;
208
209
  uint64    num_entries;  /* total # of values in the set */
210
  uint64    highest_value;  /* highest value stored in this set */
211
212
  /*
213
   * B-tree to hold the packed values.
214
   *
215
   * 'rightmost_nodes' hold pointers to the rightmost node on each level.
216
   * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
217
   * parent, and so forth, all the way up to the root. These are needed when
218
   * adding new values. (Currently, we require that new values are added at
219
   * the end.)
220
   */
221
  int     num_levels;   /* height of the tree */
222
  intset_node *root;      /* root node */
223
  intset_node *rightmost_nodes[MAX_TREE_LEVELS];
224
  intset_leaf_node *leftmost_leaf;  /* leftmost leaf node */
225
226
  /*
227
   * Holding area for new items that haven't been inserted to the tree yet.
228
   */
229
  uint64    buffered_values[MAX_BUFFERED_VALUES];
230
  int     num_buffered_values;
231
232
  /*
233
   * Iterator support.
234
   *
235
   * 'iter_values' is an array of integers ready to be returned to the
236
   * caller; 'iter_num_values' is the length of that array, and
237
   * 'iter_valueno' is the next index.  'iter_node' and 'iter_itemno' point
238
   * to the leaf node, and item within the leaf node, to get the next batch
239
   * of values from.
240
   *
241
   * Normally, 'iter_values' points to 'iter_values_buf', which holds items
242
   * decoded from a leaf item.  But after we have scanned the whole B-tree,
243
   * we iterate through all the unbuffered values, too, by pointing
244
   * iter_values to 'buffered_values'.
245
   */
246
  bool    iter_active;  /* is iteration in progress? */
247
248
  const uint64 *iter_values;
249
  int     iter_num_values;  /* number of elements in 'iter_values' */
250
  int     iter_valueno; /* next index into 'iter_values' */
251
252
  intset_leaf_node *iter_node;  /* current leaf node */
253
  int     iter_itemno;  /* next item in 'iter_node' to decode */
254
255
  uint64    iter_values_buf[MAX_VALUES_PER_LEAF_ITEM];
256
};
257
258
/*
259
 * Prototypes for internal functions.
260
 */
261
static void intset_update_upper(IntegerSet *intset, int level,
262
                intset_node *child, uint64 child_key);
263
static void intset_flush_buffered_values(IntegerSet *intset);
264
265
static int  intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems,
266
                  bool nextkey);
267
static int  intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems,
268
                bool nextkey);
269
270
static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base);
271
static int  simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
272
static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);
273
274
275
/*
276
 * Create a new, initially empty, integer set.
277
 *
278
 * The integer set is created in the current memory context.
279
 * We will do all subsequent allocations in the same context, too, regardless
280
 * of which memory context is current when new integers are added to the set.
281
 */
282
IntegerSet *
283
intset_create(void)
284
0
{
285
0
  IntegerSet *intset;
286
287
0
  intset = (IntegerSet *) palloc(sizeof(IntegerSet));
288
0
  intset->context = CurrentMemoryContext;
289
0
  intset->mem_used = GetMemoryChunkSpace(intset);
290
291
0
  intset->num_entries = 0;
292
0
  intset->highest_value = 0;
293
294
0
  intset->num_levels = 0;
295
0
  intset->root = NULL;
296
0
  memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
297
0
  intset->leftmost_leaf = NULL;
298
299
0
  intset->num_buffered_values = 0;
300
301
0
  intset->iter_active = false;
302
0
  intset->iter_node = NULL;
303
0
  intset->iter_itemno = 0;
304
0
  intset->iter_valueno = 0;
305
0
  intset->iter_num_values = 0;
306
0
  intset->iter_values = NULL;
307
308
0
  return intset;
309
0
}
310
311
/*
312
 * Allocate a new node.
313
 */
314
static intset_internal_node *
315
intset_new_internal_node(IntegerSet *intset)
316
0
{
317
0
  intset_internal_node *n;
318
319
0
  n = (intset_internal_node *) MemoryContextAlloc(intset->context,
320
0
                          sizeof(intset_internal_node));
321
0
  intset->mem_used += GetMemoryChunkSpace(n);
322
323
0
  n->level = 0;       /* caller must set */
324
0
  n->num_items = 0;
325
326
0
  return n;
327
0
}
328
329
static intset_leaf_node *
330
intset_new_leaf_node(IntegerSet *intset)
331
0
{
332
0
  intset_leaf_node *n;
333
334
0
  n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
335
0
                        sizeof(intset_leaf_node));
336
0
  intset->mem_used += GetMemoryChunkSpace(n);
337
338
0
  n->level = 0;
339
0
  n->num_items = 0;
340
0
  n->next = NULL;
341
342
0
  return n;
343
0
}
344
345
/*
346
 * Return the number of entries in the integer set.
347
 */
348
uint64
349
intset_num_entries(IntegerSet *intset)
350
0
{
351
0
  return intset->num_entries;
352
0
}
353
354
/*
355
 * Return the amount of memory used by the integer set.
356
 */
357
uint64
358
intset_memory_usage(IntegerSet *intset)
359
0
{
360
0
  return intset->mem_used;
361
0
}
362
363
/*
364
 * Add a value to the set.
365
 *
366
 * Values must be added in order.
367
 */
368
void
369
intset_add_member(IntegerSet *intset, uint64 x)
370
0
{
371
0
  if (intset->iter_active)
372
0
    elog(ERROR, "cannot add new values to integer set while iteration is in progress");
373
374
0
  if (x <= intset->highest_value && intset->num_entries > 0)
375
0
    elog(ERROR, "cannot add value to integer set out of order");
376
377
0
  if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
378
0
  {
379
    /* Time to flush our buffer */
380
0
    intset_flush_buffered_values(intset);
381
0
    Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
382
0
  }
383
384
  /* Add it to the buffer of newly-added values */
385
0
  intset->buffered_values[intset->num_buffered_values] = x;
386
0
  intset->num_buffered_values++;
387
0
  intset->num_entries++;
388
0
  intset->highest_value = x;
389
0
}
390
391
/*
392
 * Take a batch of buffered values, and pack them into the B-tree.
393
 */
394
static void
395
intset_flush_buffered_values(IntegerSet *intset)
396
0
{
397
0
  uint64     *values = intset->buffered_values;
398
0
  uint64    num_values = intset->num_buffered_values;
399
0
  int     num_packed = 0;
400
0
  intset_leaf_node *leaf;
401
402
0
  leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
403
404
  /*
405
   * If the tree is completely empty, create the first leaf page, which is
406
   * also the root.
407
   */
408
0
  if (leaf == NULL)
409
0
  {
410
    /*
411
     * This is the very first item in the set.
412
     *
413
     * Allocate root node. It's also a leaf.
414
     */
415
0
    leaf = intset_new_leaf_node(intset);
416
417
0
    intset->root = (intset_node *) leaf;
418
0
    intset->leftmost_leaf = leaf;
419
0
    intset->rightmost_nodes[0] = (intset_node *) leaf;
420
0
    intset->num_levels = 1;
421
0
  }
422
423
  /*
424
   * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
425
   * stop.  In most cases, we cannot encode that many values in a single
426
   * value, but this way, the encoder doesn't have to worry about running
427
   * out of input.
428
   */
429
0
  while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
430
0
  {
431
0
    leaf_item item;
432
0
    int     num_encoded;
433
434
    /*
435
     * Construct the next leaf item, packing as many buffered values as
436
     * possible.
437
     */
438
0
    item.first = values[num_packed];
439
0
    item.codeword = simple8b_encode(&values[num_packed + 1],
440
0
                    &num_encoded,
441
0
                    item.first);
442
443
    /*
444
     * Add the item to the node, allocating a new node if the old one is
445
     * full.
446
     */
447
0
    if (leaf->num_items >= MAX_LEAF_ITEMS)
448
0
    {
449
      /* Allocate new leaf and link it to the tree */
450
0
      intset_leaf_node *old_leaf = leaf;
451
452
0
      leaf = intset_new_leaf_node(intset);
453
0
      old_leaf->next = leaf;
454
0
      intset->rightmost_nodes[0] = (intset_node *) leaf;
455
0
      intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
456
0
    }
457
0
    leaf->items[leaf->num_items++] = item;
458
459
0
    num_packed += 1 + num_encoded;
460
0
  }
461
462
  /*
463
   * Move any remaining buffered values to the beginning of the array.
464
   */
465
0
  if (num_packed < intset->num_buffered_values)
466
0
  {
467
0
    memmove(&intset->buffered_values[0],
468
0
        &intset->buffered_values[num_packed],
469
0
        (intset->num_buffered_values - num_packed) * sizeof(uint64));
470
0
  }
471
0
  intset->num_buffered_values -= num_packed;
472
0
}
473
474
/*
475
 * Insert a downlink into parent node, after creating a new node.
476
 *
477
 * Recurses if the parent node is full, too.
478
 */
479
static void
480
intset_update_upper(IntegerSet *intset, int level, intset_node *child,
481
          uint64 child_key)
482
0
{
483
0
  intset_internal_node *parent;
484
485
0
  Assert(level > 0);
486
487
  /*
488
   * Create a new root node, if necessary.
489
   */
490
0
  if (level >= intset->num_levels)
491
0
  {
492
0
    intset_node *oldroot = intset->root;
493
0
    uint64    downlink_key;
494
495
    /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
496
0
    if (intset->num_levels == MAX_TREE_LEVELS)
497
0
      elog(ERROR, "could not expand integer set, maximum number of levels reached");
498
0
    intset->num_levels++;
499
500
    /*
501
     * Get the first value on the old root page, to be used as the
502
     * downlink.
503
     */
504
0
    if (intset->root->level == 0)
505
0
      downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
506
0
    else
507
0
      downlink_key = ((intset_internal_node *) oldroot)->values[0];
508
509
0
    parent = intset_new_internal_node(intset);
510
0
    parent->level = level;
511
0
    parent->values[0] = downlink_key;
512
0
    parent->downlinks[0] = oldroot;
513
0
    parent->num_items = 1;
514
515
0
    intset->root = (intset_node *) parent;
516
0
    intset->rightmost_nodes[level] = (intset_node *) parent;
517
0
  }
518
519
  /*
520
   * Place the downlink on the parent page.
521
   */
522
0
  parent = (intset_internal_node *) intset->rightmost_nodes[level];
523
524
0
  if (parent->num_items < MAX_INTERNAL_ITEMS)
525
0
  {
526
0
    parent->values[parent->num_items] = child_key;
527
0
    parent->downlinks[parent->num_items] = child;
528
0
    parent->num_items++;
529
0
  }
530
0
  else
531
0
  {
532
    /*
533
     * Doesn't fit.  Allocate new parent, with the downlink as the first
534
     * item on it, and recursively insert the downlink to the new parent
535
     * to the grandparent.
536
     */
537
0
    parent = intset_new_internal_node(intset);
538
0
    parent->level = level;
539
0
    parent->values[0] = child_key;
540
0
    parent->downlinks[0] = child;
541
0
    parent->num_items = 1;
542
543
0
    intset->rightmost_nodes[level] = (intset_node *) parent;
544
545
0
    intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
546
0
  }
547
0
}
548
549
/*
550
 * Does the set contain the given value?
551
 */
552
bool
553
intset_is_member(IntegerSet *intset, uint64 x)
554
0
{
555
0
  intset_node *node;
556
0
  intset_leaf_node *leaf;
557
0
  int     level;
558
0
  int     itemno;
559
0
  leaf_item  *item;
560
561
  /*
562
   * The value might be in the buffer of newly-added values.
563
   */
564
0
  if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
565
0
  {
566
0
    itemno = intset_binsrch_uint64(x,
567
0
                     intset->buffered_values,
568
0
                     intset->num_buffered_values,
569
0
                     false);
570
0
    if (itemno >= intset->num_buffered_values)
571
0
      return false;
572
0
    else
573
0
      return (intset->buffered_values[itemno] == x);
574
0
  }
575
576
  /*
577
   * Start from the root, and walk down the B-tree to find the right leaf
578
   * node.
579
   */
580
0
  if (!intset->root)
581
0
    return false;
582
0
  node = intset->root;
583
0
  for (level = intset->num_levels - 1; level > 0; level--)
584
0
  {
585
0
    intset_internal_node *n = (intset_internal_node *) node;
586
587
0
    Assert(node->level == level);
588
589
0
    itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
590
0
    if (itemno == 0)
591
0
      return false;
592
0
    node = n->downlinks[itemno - 1];
593
0
  }
594
0
  Assert(node->level == 0);
595
0
  leaf = (intset_leaf_node *) node;
596
597
  /*
598
   * Binary search to find the right item on the leaf page
599
   */
600
0
  itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
601
0
  if (itemno == 0)
602
0
    return false;
603
0
  item = &leaf->items[itemno - 1];
604
605
  /* Is this a match to the first value on the item? */
606
0
  if (item->first == x)
607
0
    return true;
608
0
  Assert(x > item->first);
609
610
  /* Is it in the packed codeword? */
611
0
  if (simple8b_contains(item->codeword, x, item->first))
612
0
    return true;
613
614
0
  return false;
615
0
}
616
617
/*
618
 * Begin in-order scan through all the values.
619
 *
620
 * While the iteration is in-progress, you cannot add new values to the set.
621
 */
622
void
623
intset_begin_iterate(IntegerSet *intset)
624
0
{
625
  /* Note that we allow an iteration to be abandoned midway */
626
0
  intset->iter_active = true;
627
0
  intset->iter_node = intset->leftmost_leaf;
628
0
  intset->iter_itemno = 0;
629
0
  intset->iter_valueno = 0;
630
0
  intset->iter_num_values = 0;
631
0
  intset->iter_values = intset->iter_values_buf;
632
0
}
633
634
/*
635
 * Returns the next integer, when iterating.
636
 *
637
 * intset_begin_iterate() must be called first.  intset_iterate_next() returns
638
 * the next value in the set.  Returns true, if there was another value, and
639
 * stores the value in *next.  Otherwise, returns false.
640
 */
641
bool
642
intset_iterate_next(IntegerSet *intset, uint64 *next)
643
0
{
644
0
  Assert(intset->iter_active);
645
0
  for (;;)
646
0
  {
647
    /* Return next iter_values[] entry if any */
648
0
    if (intset->iter_valueno < intset->iter_num_values)
649
0
    {
650
0
      *next = intset->iter_values[intset->iter_valueno++];
651
0
      return true;
652
0
    }
653
654
    /* Decode next item in current leaf node, if any */
655
0
    if (intset->iter_node &&
656
0
      intset->iter_itemno < intset->iter_node->num_items)
657
0
    {
658
0
      leaf_item  *item;
659
0
      int     num_decoded;
660
661
0
      item = &intset->iter_node->items[intset->iter_itemno++];
662
663
0
      intset->iter_values_buf[0] = item->first;
664
0
      num_decoded = simple8b_decode(item->codeword,
665
0
                      &intset->iter_values_buf[1],
666
0
                      item->first);
667
0
      intset->iter_num_values = num_decoded + 1;
668
0
      intset->iter_valueno = 0;
669
0
      continue;
670
0
    }
671
672
    /* No more items on this leaf, step to next node */
673
0
    if (intset->iter_node)
674
0
    {
675
0
      intset->iter_node = intset->iter_node->next;
676
0
      intset->iter_itemno = 0;
677
0
      continue;
678
0
    }
679
680
    /*
681
     * We have reached the end of the B-tree.  But we might still have
682
     * some integers in the buffer of newly-added values.
683
     */
684
0
    if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
685
0
    {
686
0
      intset->iter_values = intset->buffered_values;
687
0
      intset->iter_num_values = intset->num_buffered_values;
688
0
      intset->iter_valueno = 0;
689
0
      continue;
690
0
    }
691
692
0
    break;
693
0
  }
694
695
  /* No more results. */
696
0
  intset->iter_active = false;
697
0
  *next = 0;          /* prevent uninitialized-variable warnings */
698
0
  return false;
699
0
}
700
701
/*
702
 * intset_binsrch_uint64() -- search a sorted array of uint64s
703
 *
704
 * Returns the first position with key equal or less than the given key.
705
 * The returned position would be the "insert" location for the given key,
706
 * that is, the position where the new key should be inserted to.
707
 *
708
 * 'nextkey' affects the behavior on equal keys.  If true, and there is an
709
 * equal key in the array, this returns the position immediately after the
710
 * equal key.  If false, this returns the position of the equal key itself.
711
 */
712
static int
713
intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
714
0
{
715
0
  int     low,
716
0
        high,
717
0
        mid;
718
719
0
  low = 0;
720
0
  high = arr_elems;
721
0
  while (high > low)
722
0
  {
723
0
    mid = low + (high - low) / 2;
724
725
0
    if (nextkey)
726
0
    {
727
0
      if (item >= arr[mid])
728
0
        low = mid + 1;
729
0
      else
730
0
        high = mid;
731
0
    }
732
0
    else
733
0
    {
734
0
      if (item > arr[mid])
735
0
        low = mid + 1;
736
0
      else
737
0
        high = mid;
738
0
    }
739
0
  }
740
741
0
  return low;
742
0
}
743
744
/* same, but for an array of leaf items */
745
static int
746
intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
747
0
{
748
0
  int     low,
749
0
        high,
750
0
        mid;
751
752
0
  low = 0;
753
0
  high = arr_elems;
754
0
  while (high > low)
755
0
  {
756
0
    mid = low + (high - low) / 2;
757
758
0
    if (nextkey)
759
0
    {
760
0
      if (item >= arr[mid].first)
761
0
        low = mid + 1;
762
0
      else
763
0
        high = mid;
764
0
    }
765
0
    else
766
0
    {
767
0
      if (item > arr[mid].first)
768
0
        low = mid + 1;
769
0
      else
770
0
        high = mid;
771
0
    }
772
0
  }
773
774
0
  return low;
775
0
}
776
777
/*
778
 * Simple-8b encoding.
779
 *
780
 * The simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
781
 * called "codewords".  The number of integers packed into a single codeword
782
 * depends on the integers being packed; small integers are encoded using
783
 * fewer bits than large integers.  A single codeword can store a single
784
 * 60-bit integer, or two 30-bit integers, for example.
785
 *
786
 * Since we're storing a unique, sorted, set of integers, we actually encode
787
 * the *differences* between consecutive integers.  That way, clusters of
788
 * integers that are close to each other are packed efficiently, regardless
789
 * of their absolute values.
790
 *
791
 * In Simple-8b, each codeword consists of a 4-bit selector, which indicates
792
 * how many integers are encoded in the codeword, and the encoded integers are
793
 * packed into the remaining 60 bits.  The selector allows for 16 different
794
 * ways of using the remaining 60 bits, called "modes".  The number of integers
795
 * packed into a single codeword in each mode is listed in the simple8b_modes
796
 * table below.  For example, consider the following codeword:
797
 *
798
 *      20-bit integer       20-bit integer       20-bit integer
799
 * 1101 00000000000000010010 01111010000100100000 00000000000000010100
800
 * ^
801
 * selector
802
 *
803
 * The selector 1101 is 13 in decimal.  From the modes table below, we see
804
 * that it means that the codeword encodes three 20-bit integers.  In decimal,
805
 * those integers are 18, 500000 and 20.  Because we encode deltas rather than
806
 * absolute values, the actual values that they represent are 18, 500018 and
807
 * 500038.
808
 *
809
 * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes
810
 * (which means 240 or 120 consecutive integers, since we're encoding the
811
 * deltas between integers), without using the rest of the codeword bits
812
 * for anything.
813
 *
814
 * Simple-8b cannot encode integers larger than 60 bits.  Values larger than
815
 * that are always stored in the 'first' field of a leaf item, never in the
816
 * packed codeword.  If there is a sequence of integers that are more than
817
 * 2^60 apart, the codeword will go unused on those items.  To represent that,
818
 * we use a magic EMPTY_CODEWORD codeword value.
819
 */
820
static const struct simple8b_mode
821
{
822
  uint8   bits_per_int;
823
  uint8   num_ints;
824
}     simple8b_modes[17] =
825
826
{
827
  {0, 240},         /* mode  0: 240 zeroes */
828
  {0, 120},         /* mode  1: 120 zeroes */
829
  {1, 60},          /* mode  2: sixty 1-bit integers */
830
  {2, 30},          /* mode  3: thirty 2-bit integers */
831
  {3, 20},          /* mode  4: twenty 3-bit integers */
832
  {4, 15},          /* mode  5: fifteen 4-bit integers */
833
  {5, 12},          /* mode  6: twelve 5-bit integers */
834
  {6, 10},          /* mode  7: ten 6-bit integers */
835
  {7, 8},           /* mode  8: eight 7-bit integers (four bits
836
                 * are wasted) */
837
  {8, 7},           /* mode  9: seven 8-bit integers (four bits
838
                 * are wasted) */
839
  {10, 6},          /* mode 10: six 10-bit integers */
840
  {12, 5},          /* mode 11: five 12-bit integers */
841
  {15, 4},          /* mode 12: four 15-bit integers */
842
  {20, 3},          /* mode 13: three 20-bit integers */
843
  {30, 2},          /* mode 14: two 30-bit integers */
844
  {60, 1},          /* mode 15: one 60-bit integer */
845
846
  {0, 0}            /* sentinel value */
847
};
848
849
/*
850
 * EMPTY_CODEWORD is a special value, used to indicate "no values".
851
 * It is used if the next value is too large to be encoded with Simple-8b.
852
 *
853
 * This value looks like a mode-0 codeword, but we can distinguish it
854
 * because a regular mode-0 codeword would have zeroes in the unused bits.
855
 */
856
0
#define EMPTY_CODEWORD    UINT64CONST(0x0FFFFFFFFFFFFFFF)
857
858
/*
859
 * Encode a number of integers into a Simple-8b codeword.
860
 *
861
 * (What we actually encode are deltas between successive integers.
862
 * "base" is the value before ints[0].)
863
 *
864
 * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
865
 * elements, ensuring that we can produce a full codeword.
866
 *
867
 * Returns the encoded codeword, and sets *num_encoded to the number of
868
 * input integers that were encoded.  That can be zero, if the first delta
869
 * is too large to be encoded.
870
 */
871
static uint64
872
simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
873
0
{
874
0
  int     selector;
875
0
  int     nints;
876
0
  int     bits;
877
0
  uint64    diff;
878
0
  uint64    last_val;
879
0
  uint64    codeword;
880
0
  int     i;
881
882
0
  Assert(ints[0] > base);
883
884
  /*
885
   * Select the "mode" to use for this codeword.
886
   *
887
   * In each iteration, check if the next value can be represented in the
888
   * current mode we're considering.  If it's too large, then step up the
889
   * mode to a wider one, and repeat.  If it fits, move on to the next
890
   * integer.  Repeat until the codeword is full, given the current mode.
891
   *
892
   * Note that we don't have any way to represent unused slots in the
893
   * codeword, so we require each codeword to be "full".  It is always
894
   * possible to produce a full codeword unless the very first delta is too
895
   * large to be encoded.  For example, if the first delta is small but the
896
   * second is too large to be encoded, we'll end up using the last "mode",
897
   * which has nints == 1.
898
   */
899
0
  selector = 0;
900
0
  nints = simple8b_modes[0].num_ints;
901
0
  bits = simple8b_modes[0].bits_per_int;
902
0
  diff = ints[0] - base - 1;
903
0
  last_val = ints[0];
904
0
  i = 0;            /* number of deltas we have accepted */
905
0
  for (;;)
906
0
  {
907
0
    if (diff >= (UINT64CONST(1) << bits))
908
0
    {
909
      /* too large, step up to next mode */
910
0
      selector++;
911
0
      nints = simple8b_modes[selector].num_ints;
912
0
      bits = simple8b_modes[selector].bits_per_int;
913
      /* we might already have accepted enough deltas for this mode */
914
0
      if (i >= nints)
915
0
        break;
916
0
    }
917
0
    else
918
0
    {
919
      /* accept this delta; then done if codeword is full */
920
0
      i++;
921
0
      if (i >= nints)
922
0
        break;
923
      /* examine next delta */
924
0
      Assert(ints[i] > last_val);
925
0
      diff = ints[i] - last_val - 1;
926
0
      last_val = ints[i];
927
0
    }
928
0
  }
929
930
0
  if (nints == 0)
931
0
  {
932
    /*
933
     * The first delta is too large to be encoded with Simple-8b.
934
     *
935
     * If there is at least one not-too-large integer in the input, we
936
     * will encode it using mode 15 (or a more compact mode).  Hence, we
937
     * can only get here if the *first* delta is >= 2^60.
938
     */
939
0
    Assert(i == 0);
940
0
    *num_encoded = 0;
941
0
    return EMPTY_CODEWORD;
942
0
  }
943
944
  /*
945
   * Encode the integers using the selected mode.  Note that we shift them
946
   * into the codeword in reverse order, so that they will come out in the
947
   * correct order in the decoder.
948
   */
949
0
  codeword = 0;
950
0
  if (bits > 0)
951
0
  {
952
0
    for (i = nints - 1; i > 0; i--)
953
0
    {
954
0
      diff = ints[i] - ints[i - 1] - 1;
955
0
      codeword |= diff;
956
0
      codeword <<= bits;
957
0
    }
958
0
    diff = ints[0] - base - 1;
959
0
    codeword |= diff;
960
0
  }
961
962
  /* add selector to the codeword, and return */
963
0
  codeword |= (uint64) selector << 60;
964
965
0
  *num_encoded = nints;
966
0
  return codeword;
967
0
}
968
969
/*
970
 * Decode a codeword into an array of integers.
971
 * Returns the number of integers decoded.
972
 */
973
static int
974
simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
975
0
{
976
0
  int     selector = (codeword >> 60);
977
0
  int     nints = simple8b_modes[selector].num_ints;
978
0
  int     bits = simple8b_modes[selector].bits_per_int;
979
0
  uint64    mask = (UINT64CONST(1) << bits) - 1;
980
0
  uint64    curr_value;
981
982
0
  if (codeword == EMPTY_CODEWORD)
983
0
    return 0;
984
985
0
  curr_value = base;
986
0
  for (int i = 0; i < nints; i++)
987
0
  {
988
0
    uint64    diff = codeword & mask;
989
990
0
    curr_value += 1 + diff;
991
0
    decoded[i] = curr_value;
992
0
    codeword >>= bits;
993
0
  }
994
0
  return nints;
995
0
}
996
997
/*
998
 * This is very similar to simple8b_decode(), but instead of decoding all
999
 * the values to an array, it just checks if the given "key" is part of
1000
 * the codeword.
1001
 */
1002
static bool
1003
simple8b_contains(uint64 codeword, uint64 key, uint64 base)
1004
0
{
1005
0
  int     selector = (codeword >> 60);
1006
0
  int     nints = simple8b_modes[selector].num_ints;
1007
0
  int     bits = simple8b_modes[selector].bits_per_int;
1008
1009
0
  if (codeword == EMPTY_CODEWORD)
1010
0
    return false;
1011
1012
0
  if (bits == 0)
1013
0
  {
1014
    /* Special handling for 0-bit cases. */
1015
0
    return (key - base) <= nints;
1016
0
  }
1017
0
  else
1018
0
  {
1019
0
    uint64    mask = (UINT64CONST(1) << bits) - 1;
1020
0
    uint64    curr_value;
1021
1022
0
    curr_value = base;
1023
0
    for (int i = 0; i < nints; i++)
1024
0
    {
1025
0
      uint64    diff = codeword & mask;
1026
1027
0
      curr_value += 1 + diff;
1028
1029
0
      if (curr_value >= key)
1030
0
      {
1031
0
        if (curr_value == key)
1032
0
          return true;
1033
0
        else
1034
0
          return false;
1035
0
      }
1036
1037
0
      codeword >>= bits;
1038
0
    }
1039
0
  }
1040
0
  return false;
1041
0
}