/src/postgres/src/backend/lib/integerset.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * integerset.c |
4 | | * Data structure to hold a large set of 64-bit integers efficiently |
5 | | * |
6 | | * IntegerSet provides an in-memory data structure to hold a set of |
7 | | * arbitrary 64-bit integers. Internally, the values are stored in a |
8 | | * B-tree, with a special packed representation at the leaf level using |
9 | | * the Simple-8b algorithm, which can pack clusters of nearby values |
10 | | * very tightly. |
11 | | * |
12 | | * Memory consumption depends on the number of values stored, but also |
13 | | * on how far the values are from each other. In the best case, with |
14 | | * long runs of consecutive integers, memory consumption can be as low as |
15 | | * 0.1 bytes per integer. In the worst case, if integers are more than |
16 | | * 2^32 apart, it uses about 8 bytes per integer. In typical use, the |
17 | | * consumption per integer is somewhere between those extremes, depending |
18 | | * on the range of integers stored, and how "clustered" they are. |
19 | | * |
20 | | * |
21 | | * Interface |
22 | | * --------- |
23 | | * |
24 | | * intset_create - Create a new, empty set |
25 | | * intset_add_member - Add an integer to the set |
26 | | * intset_is_member - Test if an integer is in the set |
27 | | * intset_begin_iterate - Begin iterating through all integers in set |
28 | | * intset_iterate_next - Return next set member, if any |
29 | | * |
30 | | * intset_create() creates the set in the current memory context. Subsequent |
31 | | * operations that add to the data structure will continue to allocate from |
32 | | * that same context, even if it's not current anymore. |
33 | | * |
34 | | * Note that there is no function to free an integer set. If you need to do |
35 | | * that, create a dedicated memory context to hold it, and destroy the memory |
36 | | * context instead. |
37 | | * |
38 | | * |
39 | | * Limitations |
40 | | * ----------- |
41 | | * |
42 | | * - Values must be added in order. (Random insertions would require |
43 | | * splitting nodes, which hasn't been implemented.) |
44 | | * |
45 | | * - Values cannot be added while iteration is in progress. |
46 | | * |
47 | | * - No support for removing values. |
48 | | * |
49 | | * None of these limitations are fundamental to the data structure, so they |
50 | | * could be lifted if needed, by writing some new code. But the current |
51 | | * users of this facility don't need them. |
52 | | * |
53 | | * |
54 | | * References |
55 | | * ---------- |
56 | | * |
57 | | * Simple-8b encoding is based on: |
58 | | * |
59 | | * Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words, |
60 | | * Software - Practice & Experience, v.40 n.2, p.131-147, February 2010 |
61 | | * (https://doi.org/10.1002/spe.948) |
62 | | * |
63 | | * |
64 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
65 | | * Portions Copyright (c) 1994, Regents of the University of California |
66 | | * |
67 | | * IDENTIFICATION |
68 | | * src/backend/lib/integerset.c |
69 | | * |
70 | | *------------------------------------------------------------------------- |
71 | | */ |
72 | | #include "postgres.h" |
73 | | |
74 | | #include "lib/integerset.h" |
75 | | #include "utils/memutils.h" |
76 | | |
77 | | |
78 | | /* |
79 | | * Maximum number of integers that can be encoded in a single Simple-8b |
80 | | * codeword. (Defined here before anything else, so that we can size arrays |
81 | | * using this.) |
82 | | */ |
83 | 0 | #define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240 |
84 | | |
85 | | /* |
86 | | * Parameters for shape of the in-memory B-tree. |
87 | | * |
88 | | * These set the size of each internal and leaf node. They don't necessarily |
89 | | * need to be the same, because the tree is just an in-memory structure. |
90 | | * With the default 64, each node is about 1 kb. |
91 | | * |
92 | | * If you change these, you must recalculate MAX_TREE_LEVELS, too! |
93 | | */ |
94 | 0 | #define MAX_INTERNAL_ITEMS 64 |
95 | 0 | #define MAX_LEAF_ITEMS 64 |
96 | | |
97 | | /* |
98 | | * Maximum height of the tree. |
99 | | * |
100 | | * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree. The |
101 | | * theoretical maximum number of items that we can store in a set is 2^64, |
102 | | * so MAX_TREE_LEVELS should be set so that: |
103 | | * |
104 | | * MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64. |
105 | | * |
106 | | * In practice, we'll need far fewer levels, because you will run out of |
107 | | * memory long before reaching that number, but let's be conservative. |
108 | | */ |
109 | 0 | #define MAX_TREE_LEVELS 11 |
110 | | |
111 | | /* |
112 | | * Node structures, for the in-memory B-tree. |
113 | | * |
114 | | * An internal node holds a number of downlink pointers to leaf nodes, or |
115 | | * to internal nodes on a lower level. For each downlink, the key value |
116 | | * corresponding to the lower level node is stored in a sorted array. The |
117 | | * stored key values are low keys. In other words, if the downlink has value |
118 | | * X, then all items stored on that child are >= X. |
119 | | * |
120 | | * Each leaf node holds a number of "items", with a varying number of |
121 | | * integers packed into each item. Each item consists of two 64-bit words: |
122 | | * The first word holds the first integer stored in the item, in plain format. |
123 | | * The second word contains between 0 and 240 more integers, packed using |
124 | | * Simple-8b encoding. By storing the first integer in plain, unpacked, |
125 | | * format, we can use binary search to quickly find an item that holds (or |
126 | | * would hold) a particular integer. And by storing the rest in packed form, |
127 | | * we still get pretty good memory density, if there are clusters of integers |
128 | | * with similar values. |
129 | | * |
130 | | * Each leaf node also has a pointer to the next leaf node, so that the leaf |
131 | | * nodes can be easily walked from beginning to end when iterating. |
132 | | */ |
133 | | typedef struct intset_node intset_node; |
134 | | typedef struct intset_leaf_node intset_leaf_node; |
135 | | typedef struct intset_internal_node intset_internal_node; |
136 | | |
137 | | /* Common structure of both leaf and internal nodes. */ |
138 | | struct intset_node |
139 | | { |
140 | | uint16 level; /* tree level of this node */ |
141 | | uint16 num_items; /* number of items in this node */ |
142 | | }; |
143 | | |
144 | | /* Internal node */ |
145 | | struct intset_internal_node |
146 | | { |
147 | | /* common header, must match intset_node */ |
148 | | uint16 level; /* >= 1 on internal nodes */ |
149 | | uint16 num_items; |
150 | | |
151 | | /* |
152 | | * 'values' is an array of key values, and 'downlinks' are pointers to |
153 | | * lower-level nodes, corresponding to the key values. |
154 | | */ |
155 | | uint64 values[MAX_INTERNAL_ITEMS]; |
156 | | intset_node *downlinks[MAX_INTERNAL_ITEMS]; |
157 | | }; |
158 | | |
159 | | /* Leaf node */ |
160 | | typedef struct |
161 | | { |
162 | | uint64 first; /* first integer in this item */ |
163 | | uint64 codeword; /* simple8b encoded differences from 'first' */ |
164 | | } leaf_item; |
165 | | |
166 | 0 | #define MAX_VALUES_PER_LEAF_ITEM (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD) |
167 | | |
168 | | struct intset_leaf_node |
169 | | { |
170 | | /* common header, must match intset_node */ |
171 | | uint16 level; /* 0 on leafs */ |
172 | | uint16 num_items; |
173 | | |
174 | | intset_leaf_node *next; /* right sibling, if any */ |
175 | | |
176 | | leaf_item items[MAX_LEAF_ITEMS]; |
177 | | }; |
178 | | |
179 | | /* |
180 | | * We buffer insertions in a simple array, before packing and inserting them |
181 | | * into the B-tree. MAX_BUFFERED_VALUES sets the size of the buffer. The |
182 | | * encoder assumes that it is large enough that we can always fill a leaf |
183 | | * item with buffered new items. In other words, MAX_BUFFERED_VALUES must be |
184 | | * larger than MAX_VALUES_PER_LEAF_ITEM. For efficiency, make it much larger. |
185 | | */ |
186 | 0 | #define MAX_BUFFERED_VALUES (MAX_VALUES_PER_LEAF_ITEM * 2) |
187 | | |
188 | | /* |
189 | | * IntegerSet is the top-level object representing the set. |
190 | | * |
191 | | * The integers are stored in an in-memory B-tree structure, plus an array |
192 | | * for newly-added integers. IntegerSet also tracks information about memory |
193 | | * usage, as well as the current position when iterating the set with |
194 | | * intset_begin_iterate / intset_iterate_next. |
195 | | */ |
196 | | struct IntegerSet |
197 | | { |
198 | | /* |
199 | | * 'context' is the memory context holding this integer set and all its |
200 | | * tree nodes. |
201 | | * |
202 | | * 'mem_used' tracks the amount of memory used. We don't do anything with |
203 | | * it in integerset.c itself, but the callers can ask for it with |
204 | | * intset_memory_usage(). |
205 | | */ |
206 | | MemoryContext context; |
207 | | uint64 mem_used; |
208 | | |
209 | | uint64 num_entries; /* total # of values in the set */ |
210 | | uint64 highest_value; /* highest value stored in this set */ |
211 | | |
212 | | /* |
213 | | * B-tree to hold the packed values. |
214 | | * |
215 | | * 'rightmost_nodes' hold pointers to the rightmost node on each level. |
216 | | * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its |
217 | | * parent, and so forth, all the way up to the root. These are needed when |
218 | | * adding new values. (Currently, we require that new values are added at |
219 | | * the end.) |
220 | | */ |
221 | | int num_levels; /* height of the tree */ |
222 | | intset_node *root; /* root node */ |
223 | | intset_node *rightmost_nodes[MAX_TREE_LEVELS]; |
224 | | intset_leaf_node *leftmost_leaf; /* leftmost leaf node */ |
225 | | |
226 | | /* |
227 | | * Holding area for new items that haven't been inserted to the tree yet. |
228 | | */ |
229 | | uint64 buffered_values[MAX_BUFFERED_VALUES]; |
230 | | int num_buffered_values; |
231 | | |
232 | | /* |
233 | | * Iterator support. |
234 | | * |
235 | | * 'iter_values' is an array of integers ready to be returned to the |
236 | | * caller; 'iter_num_values' is the length of that array, and |
237 | | * 'iter_valueno' is the next index. 'iter_node' and 'iter_itemno' point |
238 | | * to the leaf node, and item within the leaf node, to get the next batch |
239 | | * of values from. |
240 | | * |
241 | | * Normally, 'iter_values' points to 'iter_values_buf', which holds items |
242 | | * decoded from a leaf item. But after we have scanned the whole B-tree, |
243 | | * we iterate through all the unbuffered values, too, by pointing |
244 | | * iter_values to 'buffered_values'. |
245 | | */ |
246 | | bool iter_active; /* is iteration in progress? */ |
247 | | |
248 | | const uint64 *iter_values; |
249 | | int iter_num_values; /* number of elements in 'iter_values' */ |
250 | | int iter_valueno; /* next index into 'iter_values' */ |
251 | | |
252 | | intset_leaf_node *iter_node; /* current leaf node */ |
253 | | int iter_itemno; /* next item in 'iter_node' to decode */ |
254 | | |
255 | | uint64 iter_values_buf[MAX_VALUES_PER_LEAF_ITEM]; |
256 | | }; |
257 | | |
258 | | /* |
259 | | * Prototypes for internal functions. |
260 | | */ |
261 | | static void intset_update_upper(IntegerSet *intset, int level, |
262 | | intset_node *child, uint64 child_key); |
263 | | static void intset_flush_buffered_values(IntegerSet *intset); |
264 | | |
265 | | static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, |
266 | | bool nextkey); |
267 | | static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, |
268 | | bool nextkey); |
269 | | |
270 | | static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base); |
271 | | static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base); |
272 | | static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base); |
273 | | |
274 | | |
275 | | /* |
276 | | * Create a new, initially empty, integer set. |
277 | | * |
278 | | * The integer set is created in the current memory context. |
279 | | * We will do all subsequent allocations in the same context, too, regardless |
280 | | * of which memory context is current when new integers are added to the set. |
281 | | */ |
282 | | IntegerSet * |
283 | | intset_create(void) |
284 | 0 | { |
285 | 0 | IntegerSet *intset; |
286 | |
|
287 | 0 | intset = (IntegerSet *) palloc(sizeof(IntegerSet)); |
288 | 0 | intset->context = CurrentMemoryContext; |
289 | 0 | intset->mem_used = GetMemoryChunkSpace(intset); |
290 | |
|
291 | 0 | intset->num_entries = 0; |
292 | 0 | intset->highest_value = 0; |
293 | |
|
294 | 0 | intset->num_levels = 0; |
295 | 0 | intset->root = NULL; |
296 | 0 | memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes)); |
297 | 0 | intset->leftmost_leaf = NULL; |
298 | |
|
299 | 0 | intset->num_buffered_values = 0; |
300 | |
|
301 | 0 | intset->iter_active = false; |
302 | 0 | intset->iter_node = NULL; |
303 | 0 | intset->iter_itemno = 0; |
304 | 0 | intset->iter_valueno = 0; |
305 | 0 | intset->iter_num_values = 0; |
306 | 0 | intset->iter_values = NULL; |
307 | |
|
308 | 0 | return intset; |
309 | 0 | } |
310 | | |
311 | | /* |
312 | | * Allocate a new node. |
313 | | */ |
314 | | static intset_internal_node * |
315 | | intset_new_internal_node(IntegerSet *intset) |
316 | 0 | { |
317 | 0 | intset_internal_node *n; |
318 | |
|
319 | 0 | n = (intset_internal_node *) MemoryContextAlloc(intset->context, |
320 | 0 | sizeof(intset_internal_node)); |
321 | 0 | intset->mem_used += GetMemoryChunkSpace(n); |
322 | |
|
323 | 0 | n->level = 0; /* caller must set */ |
324 | 0 | n->num_items = 0; |
325 | |
|
326 | 0 | return n; |
327 | 0 | } |
328 | | |
329 | | static intset_leaf_node * |
330 | | intset_new_leaf_node(IntegerSet *intset) |
331 | 0 | { |
332 | 0 | intset_leaf_node *n; |
333 | |
|
334 | 0 | n = (intset_leaf_node *) MemoryContextAlloc(intset->context, |
335 | 0 | sizeof(intset_leaf_node)); |
336 | 0 | intset->mem_used += GetMemoryChunkSpace(n); |
337 | |
|
338 | 0 | n->level = 0; |
339 | 0 | n->num_items = 0; |
340 | 0 | n->next = NULL; |
341 | |
|
342 | 0 | return n; |
343 | 0 | } |
344 | | |
345 | | /* |
346 | | * Return the number of entries in the integer set. |
347 | | */ |
348 | | uint64 |
349 | | intset_num_entries(IntegerSet *intset) |
350 | 0 | { |
351 | 0 | return intset->num_entries; |
352 | 0 | } |
353 | | |
354 | | /* |
355 | | * Return the amount of memory used by the integer set. |
356 | | */ |
357 | | uint64 |
358 | | intset_memory_usage(IntegerSet *intset) |
359 | 0 | { |
360 | 0 | return intset->mem_used; |
361 | 0 | } |
362 | | |
363 | | /* |
364 | | * Add a value to the set. |
365 | | * |
366 | | * Values must be added in order. |
367 | | */ |
368 | | void |
369 | | intset_add_member(IntegerSet *intset, uint64 x) |
370 | 0 | { |
371 | 0 | if (intset->iter_active) |
372 | 0 | elog(ERROR, "cannot add new values to integer set while iteration is in progress"); |
373 | | |
374 | 0 | if (x <= intset->highest_value && intset->num_entries > 0) |
375 | 0 | elog(ERROR, "cannot add value to integer set out of order"); |
376 | | |
377 | 0 | if (intset->num_buffered_values >= MAX_BUFFERED_VALUES) |
378 | 0 | { |
379 | | /* Time to flush our buffer */ |
380 | 0 | intset_flush_buffered_values(intset); |
381 | 0 | Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES); |
382 | 0 | } |
383 | | |
384 | | /* Add it to the buffer of newly-added values */ |
385 | 0 | intset->buffered_values[intset->num_buffered_values] = x; |
386 | 0 | intset->num_buffered_values++; |
387 | 0 | intset->num_entries++; |
388 | 0 | intset->highest_value = x; |
389 | 0 | } |
390 | | |
391 | | /* |
392 | | * Take a batch of buffered values, and pack them into the B-tree. |
393 | | */ |
394 | | static void |
395 | | intset_flush_buffered_values(IntegerSet *intset) |
396 | 0 | { |
397 | 0 | uint64 *values = intset->buffered_values; |
398 | 0 | uint64 num_values = intset->num_buffered_values; |
399 | 0 | int num_packed = 0; |
400 | 0 | intset_leaf_node *leaf; |
401 | |
|
402 | 0 | leaf = (intset_leaf_node *) intset->rightmost_nodes[0]; |
403 | | |
404 | | /* |
405 | | * If the tree is completely empty, create the first leaf page, which is |
406 | | * also the root. |
407 | | */ |
408 | 0 | if (leaf == NULL) |
409 | 0 | { |
410 | | /* |
411 | | * This is the very first item in the set. |
412 | | * |
413 | | * Allocate root node. It's also a leaf. |
414 | | */ |
415 | 0 | leaf = intset_new_leaf_node(intset); |
416 | |
|
417 | 0 | intset->root = (intset_node *) leaf; |
418 | 0 | intset->leftmost_leaf = leaf; |
419 | 0 | intset->rightmost_nodes[0] = (intset_node *) leaf; |
420 | 0 | intset->num_levels = 1; |
421 | 0 | } |
422 | | |
423 | | /* |
424 | | * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer, |
425 | | * stop. In most cases, we cannot encode that many values in a single |
426 | | * value, but this way, the encoder doesn't have to worry about running |
427 | | * out of input. |
428 | | */ |
429 | 0 | while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM) |
430 | 0 | { |
431 | 0 | leaf_item item; |
432 | 0 | int num_encoded; |
433 | | |
434 | | /* |
435 | | * Construct the next leaf item, packing as many buffered values as |
436 | | * possible. |
437 | | */ |
438 | 0 | item.first = values[num_packed]; |
439 | 0 | item.codeword = simple8b_encode(&values[num_packed + 1], |
440 | 0 | &num_encoded, |
441 | 0 | item.first); |
442 | | |
443 | | /* |
444 | | * Add the item to the node, allocating a new node if the old one is |
445 | | * full. |
446 | | */ |
447 | 0 | if (leaf->num_items >= MAX_LEAF_ITEMS) |
448 | 0 | { |
449 | | /* Allocate new leaf and link it to the tree */ |
450 | 0 | intset_leaf_node *old_leaf = leaf; |
451 | |
|
452 | 0 | leaf = intset_new_leaf_node(intset); |
453 | 0 | old_leaf->next = leaf; |
454 | 0 | intset->rightmost_nodes[0] = (intset_node *) leaf; |
455 | 0 | intset_update_upper(intset, 1, (intset_node *) leaf, item.first); |
456 | 0 | } |
457 | 0 | leaf->items[leaf->num_items++] = item; |
458 | |
|
459 | 0 | num_packed += 1 + num_encoded; |
460 | 0 | } |
461 | | |
462 | | /* |
463 | | * Move any remaining buffered values to the beginning of the array. |
464 | | */ |
465 | 0 | if (num_packed < intset->num_buffered_values) |
466 | 0 | { |
467 | 0 | memmove(&intset->buffered_values[0], |
468 | 0 | &intset->buffered_values[num_packed], |
469 | 0 | (intset->num_buffered_values - num_packed) * sizeof(uint64)); |
470 | 0 | } |
471 | 0 | intset->num_buffered_values -= num_packed; |
472 | 0 | } |
473 | | |
474 | | /* |
475 | | * Insert a downlink into parent node, after creating a new node. |
476 | | * |
477 | | * Recurses if the parent node is full, too. |
478 | | */ |
479 | | static void |
480 | | intset_update_upper(IntegerSet *intset, int level, intset_node *child, |
481 | | uint64 child_key) |
482 | 0 | { |
483 | 0 | intset_internal_node *parent; |
484 | |
|
485 | 0 | Assert(level > 0); |
486 | | |
487 | | /* |
488 | | * Create a new root node, if necessary. |
489 | | */ |
490 | 0 | if (level >= intset->num_levels) |
491 | 0 | { |
492 | 0 | intset_node *oldroot = intset->root; |
493 | 0 | uint64 downlink_key; |
494 | | |
495 | | /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */ |
496 | 0 | if (intset->num_levels == MAX_TREE_LEVELS) |
497 | 0 | elog(ERROR, "could not expand integer set, maximum number of levels reached"); |
498 | 0 | intset->num_levels++; |
499 | | |
500 | | /* |
501 | | * Get the first value on the old root page, to be used as the |
502 | | * downlink. |
503 | | */ |
504 | 0 | if (intset->root->level == 0) |
505 | 0 | downlink_key = ((intset_leaf_node *) oldroot)->items[0].first; |
506 | 0 | else |
507 | 0 | downlink_key = ((intset_internal_node *) oldroot)->values[0]; |
508 | |
|
509 | 0 | parent = intset_new_internal_node(intset); |
510 | 0 | parent->level = level; |
511 | 0 | parent->values[0] = downlink_key; |
512 | 0 | parent->downlinks[0] = oldroot; |
513 | 0 | parent->num_items = 1; |
514 | |
|
515 | 0 | intset->root = (intset_node *) parent; |
516 | 0 | intset->rightmost_nodes[level] = (intset_node *) parent; |
517 | 0 | } |
518 | | |
519 | | /* |
520 | | * Place the downlink on the parent page. |
521 | | */ |
522 | 0 | parent = (intset_internal_node *) intset->rightmost_nodes[level]; |
523 | |
|
524 | 0 | if (parent->num_items < MAX_INTERNAL_ITEMS) |
525 | 0 | { |
526 | 0 | parent->values[parent->num_items] = child_key; |
527 | 0 | parent->downlinks[parent->num_items] = child; |
528 | 0 | parent->num_items++; |
529 | 0 | } |
530 | 0 | else |
531 | 0 | { |
532 | | /* |
533 | | * Doesn't fit. Allocate new parent, with the downlink as the first |
534 | | * item on it, and recursively insert the downlink to the new parent |
535 | | * to the grandparent. |
536 | | */ |
537 | 0 | parent = intset_new_internal_node(intset); |
538 | 0 | parent->level = level; |
539 | 0 | parent->values[0] = child_key; |
540 | 0 | parent->downlinks[0] = child; |
541 | 0 | parent->num_items = 1; |
542 | |
|
543 | 0 | intset->rightmost_nodes[level] = (intset_node *) parent; |
544 | |
|
545 | 0 | intset_update_upper(intset, level + 1, (intset_node *) parent, child_key); |
546 | 0 | } |
547 | 0 | } |
548 | | |
549 | | /* |
550 | | * Does the set contain the given value? |
551 | | */ |
552 | | bool |
553 | | intset_is_member(IntegerSet *intset, uint64 x) |
554 | 0 | { |
555 | 0 | intset_node *node; |
556 | 0 | intset_leaf_node *leaf; |
557 | 0 | int level; |
558 | 0 | int itemno; |
559 | 0 | leaf_item *item; |
560 | | |
561 | | /* |
562 | | * The value might be in the buffer of newly-added values. |
563 | | */ |
564 | 0 | if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0]) |
565 | 0 | { |
566 | 0 | itemno = intset_binsrch_uint64(x, |
567 | 0 | intset->buffered_values, |
568 | 0 | intset->num_buffered_values, |
569 | 0 | false); |
570 | 0 | if (itemno >= intset->num_buffered_values) |
571 | 0 | return false; |
572 | 0 | else |
573 | 0 | return (intset->buffered_values[itemno] == x); |
574 | 0 | } |
575 | | |
576 | | /* |
577 | | * Start from the root, and walk down the B-tree to find the right leaf |
578 | | * node. |
579 | | */ |
580 | 0 | if (!intset->root) |
581 | 0 | return false; |
582 | 0 | node = intset->root; |
583 | 0 | for (level = intset->num_levels - 1; level > 0; level--) |
584 | 0 | { |
585 | 0 | intset_internal_node *n = (intset_internal_node *) node; |
586 | |
|
587 | 0 | Assert(node->level == level); |
588 | |
|
589 | 0 | itemno = intset_binsrch_uint64(x, n->values, n->num_items, true); |
590 | 0 | if (itemno == 0) |
591 | 0 | return false; |
592 | 0 | node = n->downlinks[itemno - 1]; |
593 | 0 | } |
594 | 0 | Assert(node->level == 0); |
595 | 0 | leaf = (intset_leaf_node *) node; |
596 | | |
597 | | /* |
598 | | * Binary search to find the right item on the leaf page |
599 | | */ |
600 | 0 | itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true); |
601 | 0 | if (itemno == 0) |
602 | 0 | return false; |
603 | 0 | item = &leaf->items[itemno - 1]; |
604 | | |
605 | | /* Is this a match to the first value on the item? */ |
606 | 0 | if (item->first == x) |
607 | 0 | return true; |
608 | 0 | Assert(x > item->first); |
609 | | |
610 | | /* Is it in the packed codeword? */ |
611 | 0 | if (simple8b_contains(item->codeword, x, item->first)) |
612 | 0 | return true; |
613 | | |
614 | 0 | return false; |
615 | 0 | } |
616 | | |
617 | | /* |
618 | | * Begin in-order scan through all the values. |
619 | | * |
620 | | * While the iteration is in-progress, you cannot add new values to the set. |
621 | | */ |
622 | | void |
623 | | intset_begin_iterate(IntegerSet *intset) |
624 | 0 | { |
625 | | /* Note that we allow an iteration to be abandoned midway */ |
626 | 0 | intset->iter_active = true; |
627 | 0 | intset->iter_node = intset->leftmost_leaf; |
628 | 0 | intset->iter_itemno = 0; |
629 | 0 | intset->iter_valueno = 0; |
630 | 0 | intset->iter_num_values = 0; |
631 | 0 | intset->iter_values = intset->iter_values_buf; |
632 | 0 | } |
633 | | |
634 | | /* |
635 | | * Returns the next integer, when iterating. |
636 | | * |
637 | | * intset_begin_iterate() must be called first. intset_iterate_next() returns |
638 | | * the next value in the set. Returns true, if there was another value, and |
639 | | * stores the value in *next. Otherwise, returns false. |
640 | | */ |
641 | | bool |
642 | | intset_iterate_next(IntegerSet *intset, uint64 *next) |
643 | 0 | { |
644 | 0 | Assert(intset->iter_active); |
645 | 0 | for (;;) |
646 | 0 | { |
647 | | /* Return next iter_values[] entry if any */ |
648 | 0 | if (intset->iter_valueno < intset->iter_num_values) |
649 | 0 | { |
650 | 0 | *next = intset->iter_values[intset->iter_valueno++]; |
651 | 0 | return true; |
652 | 0 | } |
653 | | |
654 | | /* Decode next item in current leaf node, if any */ |
655 | 0 | if (intset->iter_node && |
656 | 0 | intset->iter_itemno < intset->iter_node->num_items) |
657 | 0 | { |
658 | 0 | leaf_item *item; |
659 | 0 | int num_decoded; |
660 | |
|
661 | 0 | item = &intset->iter_node->items[intset->iter_itemno++]; |
662 | |
|
663 | 0 | intset->iter_values_buf[0] = item->first; |
664 | 0 | num_decoded = simple8b_decode(item->codeword, |
665 | 0 | &intset->iter_values_buf[1], |
666 | 0 | item->first); |
667 | 0 | intset->iter_num_values = num_decoded + 1; |
668 | 0 | intset->iter_valueno = 0; |
669 | 0 | continue; |
670 | 0 | } |
671 | | |
672 | | /* No more items on this leaf, step to next node */ |
673 | 0 | if (intset->iter_node) |
674 | 0 | { |
675 | 0 | intset->iter_node = intset->iter_node->next; |
676 | 0 | intset->iter_itemno = 0; |
677 | 0 | continue; |
678 | 0 | } |
679 | | |
680 | | /* |
681 | | * We have reached the end of the B-tree. But we might still have |
682 | | * some integers in the buffer of newly-added values. |
683 | | */ |
684 | 0 | if (intset->iter_values == (const uint64 *) intset->iter_values_buf) |
685 | 0 | { |
686 | 0 | intset->iter_values = intset->buffered_values; |
687 | 0 | intset->iter_num_values = intset->num_buffered_values; |
688 | 0 | intset->iter_valueno = 0; |
689 | 0 | continue; |
690 | 0 | } |
691 | | |
692 | 0 | break; |
693 | 0 | } |
694 | | |
695 | | /* No more results. */ |
696 | 0 | intset->iter_active = false; |
697 | 0 | *next = 0; /* prevent uninitialized-variable warnings */ |
698 | 0 | return false; |
699 | 0 | } |
700 | | |
701 | | /* |
702 | | * intset_binsrch_uint64() -- search a sorted array of uint64s |
703 | | * |
704 | | * Returns the first position with key equal or less than the given key. |
705 | | * The returned position would be the "insert" location for the given key, |
706 | | * that is, the position where the new key should be inserted to. |
707 | | * |
708 | | * 'nextkey' affects the behavior on equal keys. If true, and there is an |
709 | | * equal key in the array, this returns the position immediately after the |
710 | | * equal key. If false, this returns the position of the equal key itself. |
711 | | */ |
712 | | static int |
713 | | intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey) |
714 | 0 | { |
715 | 0 | int low, |
716 | 0 | high, |
717 | 0 | mid; |
718 | |
|
719 | 0 | low = 0; |
720 | 0 | high = arr_elems; |
721 | 0 | while (high > low) |
722 | 0 | { |
723 | 0 | mid = low + (high - low) / 2; |
724 | |
|
725 | 0 | if (nextkey) |
726 | 0 | { |
727 | 0 | if (item >= arr[mid]) |
728 | 0 | low = mid + 1; |
729 | 0 | else |
730 | 0 | high = mid; |
731 | 0 | } |
732 | 0 | else |
733 | 0 | { |
734 | 0 | if (item > arr[mid]) |
735 | 0 | low = mid + 1; |
736 | 0 | else |
737 | 0 | high = mid; |
738 | 0 | } |
739 | 0 | } |
740 | |
|
741 | 0 | return low; |
742 | 0 | } |
743 | | |
744 | | /* same, but for an array of leaf items */ |
745 | | static int |
746 | | intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey) |
747 | 0 | { |
748 | 0 | int low, |
749 | 0 | high, |
750 | 0 | mid; |
751 | |
|
752 | 0 | low = 0; |
753 | 0 | high = arr_elems; |
754 | 0 | while (high > low) |
755 | 0 | { |
756 | 0 | mid = low + (high - low) / 2; |
757 | |
|
758 | 0 | if (nextkey) |
759 | 0 | { |
760 | 0 | if (item >= arr[mid].first) |
761 | 0 | low = mid + 1; |
762 | 0 | else |
763 | 0 | high = mid; |
764 | 0 | } |
765 | 0 | else |
766 | 0 | { |
767 | 0 | if (item > arr[mid].first) |
768 | 0 | low = mid + 1; |
769 | 0 | else |
770 | 0 | high = mid; |
771 | 0 | } |
772 | 0 | } |
773 | |
|
774 | 0 | return low; |
775 | 0 | } |
776 | | |
777 | | /* |
778 | | * Simple-8b encoding. |
779 | | * |
780 | | * The simple-8b algorithm packs between 1 and 240 integers into 64-bit words, |
781 | | * called "codewords". The number of integers packed into a single codeword |
782 | | * depends on the integers being packed; small integers are encoded using |
783 | | * fewer bits than large integers. A single codeword can store a single |
784 | | * 60-bit integer, or two 30-bit integers, for example. |
785 | | * |
786 | | * Since we're storing a unique, sorted, set of integers, we actually encode |
787 | | * the *differences* between consecutive integers. That way, clusters of |
788 | | * integers that are close to each other are packed efficiently, regardless |
789 | | * of their absolute values. |
790 | | * |
791 | | * In Simple-8b, each codeword consists of a 4-bit selector, which indicates |
792 | | * how many integers are encoded in the codeword, and the encoded integers are |
793 | | * packed into the remaining 60 bits. The selector allows for 16 different |
794 | | * ways of using the remaining 60 bits, called "modes". The number of integers |
795 | | * packed into a single codeword in each mode is listed in the simple8b_modes |
796 | | * table below. For example, consider the following codeword: |
797 | | * |
798 | | * 20-bit integer 20-bit integer 20-bit integer |
799 | | * 1101 00000000000000010010 01111010000100100000 00000000000000010100 |
800 | | * ^ |
801 | | * selector |
802 | | * |
803 | | * The selector 1101 is 13 in decimal. From the modes table below, we see |
804 | | * that it means that the codeword encodes three 20-bit integers. In decimal, |
805 | | * those integers are 18, 500000 and 20. Because we encode deltas rather than |
806 | | * absolute values, the actual values that they represent are 18, 500018 and |
807 | | * 500038. |
808 | | * |
809 | | * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes |
810 | | * (which means 240 or 120 consecutive integers, since we're encoding the |
811 | | * deltas between integers), without using the rest of the codeword bits |
812 | | * for anything. |
813 | | * |
814 | | * Simple-8b cannot encode integers larger than 60 bits. Values larger than |
815 | | * that are always stored in the 'first' field of a leaf item, never in the |
816 | | * packed codeword. If there is a sequence of integers that are more than |
817 | | * 2^60 apart, the codeword will go unused on those items. To represent that, |
818 | | * we use a magic EMPTY_CODEWORD codeword value. |
819 | | */ |
820 | | static const struct simple8b_mode |
821 | | { |
822 | | uint8 bits_per_int; |
823 | | uint8 num_ints; |
824 | | } simple8b_modes[17] = |
825 | | |
826 | | { |
827 | | {0, 240}, /* mode 0: 240 zeroes */ |
828 | | {0, 120}, /* mode 1: 120 zeroes */ |
829 | | {1, 60}, /* mode 2: sixty 1-bit integers */ |
830 | | {2, 30}, /* mode 3: thirty 2-bit integers */ |
831 | | {3, 20}, /* mode 4: twenty 3-bit integers */ |
832 | | {4, 15}, /* mode 5: fifteen 4-bit integers */ |
833 | | {5, 12}, /* mode 6: twelve 5-bit integers */ |
834 | | {6, 10}, /* mode 7: ten 6-bit integers */ |
835 | | {7, 8}, /* mode 8: eight 7-bit integers (four bits |
836 | | * are wasted) */ |
837 | | {8, 7}, /* mode 9: seven 8-bit integers (four bits |
838 | | * are wasted) */ |
839 | | {10, 6}, /* mode 10: six 10-bit integers */ |
840 | | {12, 5}, /* mode 11: five 12-bit integers */ |
841 | | {15, 4}, /* mode 12: four 15-bit integers */ |
842 | | {20, 3}, /* mode 13: three 20-bit integers */ |
843 | | {30, 2}, /* mode 14: two 30-bit integers */ |
844 | | {60, 1}, /* mode 15: one 60-bit integer */ |
845 | | |
846 | | {0, 0} /* sentinel value */ |
847 | | }; |
848 | | |
849 | | /* |
850 | | * EMPTY_CODEWORD is a special value, used to indicate "no values". |
851 | | * It is used if the next value is too large to be encoded with Simple-8b. |
852 | | * |
853 | | * This value looks like a mode-0 codeword, but we can distinguish it |
854 | | * because a regular mode-0 codeword would have zeroes in the unused bits. |
855 | | */ |
856 | 0 | #define EMPTY_CODEWORD UINT64CONST(0x0FFFFFFFFFFFFFFF) |
857 | | |
858 | | /* |
859 | | * Encode a number of integers into a Simple-8b codeword. |
860 | | * |
861 | | * (What we actually encode are deltas between successive integers. |
862 | | * "base" is the value before ints[0].) |
863 | | * |
864 | | * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD |
865 | | * elements, ensuring that we can produce a full codeword. |
866 | | * |
867 | | * Returns the encoded codeword, and sets *num_encoded to the number of |
868 | | * input integers that were encoded. That can be zero, if the first delta |
869 | | * is too large to be encoded. |
870 | | */ |
871 | | static uint64 |
872 | | simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base) |
873 | 0 | { |
874 | 0 | int selector; |
875 | 0 | int nints; |
876 | 0 | int bits; |
877 | 0 | uint64 diff; |
878 | 0 | uint64 last_val; |
879 | 0 | uint64 codeword; |
880 | 0 | int i; |
881 | |
|
882 | 0 | Assert(ints[0] > base); |
883 | | |
884 | | /* |
885 | | * Select the "mode" to use for this codeword. |
886 | | * |
887 | | * In each iteration, check if the next value can be represented in the |
888 | | * current mode we're considering. If it's too large, then step up the |
889 | | * mode to a wider one, and repeat. If it fits, move on to the next |
890 | | * integer. Repeat until the codeword is full, given the current mode. |
891 | | * |
892 | | * Note that we don't have any way to represent unused slots in the |
893 | | * codeword, so we require each codeword to be "full". It is always |
894 | | * possible to produce a full codeword unless the very first delta is too |
895 | | * large to be encoded. For example, if the first delta is small but the |
896 | | * second is too large to be encoded, we'll end up using the last "mode", |
897 | | * which has nints == 1. |
898 | | */ |
899 | 0 | selector = 0; |
900 | 0 | nints = simple8b_modes[0].num_ints; |
901 | 0 | bits = simple8b_modes[0].bits_per_int; |
902 | 0 | diff = ints[0] - base - 1; |
903 | 0 | last_val = ints[0]; |
904 | 0 | i = 0; /* number of deltas we have accepted */ |
905 | 0 | for (;;) |
906 | 0 | { |
907 | 0 | if (diff >= (UINT64CONST(1) << bits)) |
908 | 0 | { |
909 | | /* too large, step up to next mode */ |
910 | 0 | selector++; |
911 | 0 | nints = simple8b_modes[selector].num_ints; |
912 | 0 | bits = simple8b_modes[selector].bits_per_int; |
913 | | /* we might already have accepted enough deltas for this mode */ |
914 | 0 | if (i >= nints) |
915 | 0 | break; |
916 | 0 | } |
917 | 0 | else |
918 | 0 | { |
919 | | /* accept this delta; then done if codeword is full */ |
920 | 0 | i++; |
921 | 0 | if (i >= nints) |
922 | 0 | break; |
923 | | /* examine next delta */ |
924 | 0 | Assert(ints[i] > last_val); |
925 | 0 | diff = ints[i] - last_val - 1; |
926 | 0 | last_val = ints[i]; |
927 | 0 | } |
928 | 0 | } |
929 | |
|
930 | 0 | if (nints == 0) |
931 | 0 | { |
932 | | /* |
933 | | * The first delta is too large to be encoded with Simple-8b. |
934 | | * |
935 | | * If there is at least one not-too-large integer in the input, we |
936 | | * will encode it using mode 15 (or a more compact mode). Hence, we |
937 | | * can only get here if the *first* delta is >= 2^60. |
938 | | */ |
939 | 0 | Assert(i == 0); |
940 | 0 | *num_encoded = 0; |
941 | 0 | return EMPTY_CODEWORD; |
942 | 0 | } |
943 | | |
944 | | /* |
945 | | * Encode the integers using the selected mode. Note that we shift them |
946 | | * into the codeword in reverse order, so that they will come out in the |
947 | | * correct order in the decoder. |
948 | | */ |
949 | 0 | codeword = 0; |
950 | 0 | if (bits > 0) |
951 | 0 | { |
952 | 0 | for (i = nints - 1; i > 0; i--) |
953 | 0 | { |
954 | 0 | diff = ints[i] - ints[i - 1] - 1; |
955 | 0 | codeword |= diff; |
956 | 0 | codeword <<= bits; |
957 | 0 | } |
958 | 0 | diff = ints[0] - base - 1; |
959 | 0 | codeword |= diff; |
960 | 0 | } |
961 | | |
962 | | /* add selector to the codeword, and return */ |
963 | 0 | codeword |= (uint64) selector << 60; |
964 | |
|
965 | 0 | *num_encoded = nints; |
966 | 0 | return codeword; |
967 | 0 | } |
968 | | |
969 | | /* |
970 | | * Decode a codeword into an array of integers. |
971 | | * Returns the number of integers decoded. |
972 | | */ |
973 | | static int |
974 | | simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base) |
975 | 0 | { |
976 | 0 | int selector = (codeword >> 60); |
977 | 0 | int nints = simple8b_modes[selector].num_ints; |
978 | 0 | int bits = simple8b_modes[selector].bits_per_int; |
979 | 0 | uint64 mask = (UINT64CONST(1) << bits) - 1; |
980 | 0 | uint64 curr_value; |
981 | |
|
982 | 0 | if (codeword == EMPTY_CODEWORD) |
983 | 0 | return 0; |
984 | | |
985 | 0 | curr_value = base; |
986 | 0 | for (int i = 0; i < nints; i++) |
987 | 0 | { |
988 | 0 | uint64 diff = codeword & mask; |
989 | |
|
990 | 0 | curr_value += 1 + diff; |
991 | 0 | decoded[i] = curr_value; |
992 | 0 | codeword >>= bits; |
993 | 0 | } |
994 | 0 | return nints; |
995 | 0 | } |
996 | | |
997 | | /* |
998 | | * This is very similar to simple8b_decode(), but instead of decoding all |
999 | | * the values to an array, it just checks if the given "key" is part of |
1000 | | * the codeword. |
1001 | | */ |
1002 | | static bool |
1003 | | simple8b_contains(uint64 codeword, uint64 key, uint64 base) |
1004 | 0 | { |
1005 | 0 | int selector = (codeword >> 60); |
1006 | 0 | int nints = simple8b_modes[selector].num_ints; |
1007 | 0 | int bits = simple8b_modes[selector].bits_per_int; |
1008 | |
|
1009 | 0 | if (codeword == EMPTY_CODEWORD) |
1010 | 0 | return false; |
1011 | | |
1012 | 0 | if (bits == 0) |
1013 | 0 | { |
1014 | | /* Special handling for 0-bit cases. */ |
1015 | 0 | return (key - base) <= nints; |
1016 | 0 | } |
1017 | 0 | else |
1018 | 0 | { |
1019 | 0 | uint64 mask = (UINT64CONST(1) << bits) - 1; |
1020 | 0 | uint64 curr_value; |
1021 | |
|
1022 | 0 | curr_value = base; |
1023 | 0 | for (int i = 0; i < nints; i++) |
1024 | 0 | { |
1025 | 0 | uint64 diff = codeword & mask; |
1026 | |
|
1027 | 0 | curr_value += 1 + diff; |
1028 | |
|
1029 | 0 | if (curr_value >= key) |
1030 | 0 | { |
1031 | 0 | if (curr_value == key) |
1032 | 0 | return true; |
1033 | 0 | else |
1034 | 0 | return false; |
1035 | 0 | } |
1036 | | |
1037 | 0 | codeword >>= bits; |
1038 | 0 | } |
1039 | 0 | } |
1040 | 0 | return false; |
1041 | 0 | } |