Coverage Report

Created: 2025-06-13 06:06

/src/postgres/src/backend/access/brin/brin_bloom.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * brin_bloom.c
3
 *    Implementation of Bloom opclass for BRIN
4
 *
5
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6
 * Portions Copyright (c) 1994, Regents of the University of California
7
 *
8
 *
9
 * A BRIN opclass summarizing page range into a bloom filter.
10
 *
11
 * Bloom filters allow efficient testing whether a given page range contains
12
 * a particular value. Therefore, if we summarize each page range into a small
13
 * bloom filter, we can easily (and cheaply) test whether it contains values
14
 * we get later.
15
 *
16
 * The index only supports equality operators, similarly to hash indexes.
17
 * Bloom indexes are however much smaller, and support only bitmap scans.
18
 *
19
 * Note: Don't confuse this with bloom indexes, implemented in a contrib
20
 * module. That extension implements an entirely new AM, building a bloom
21
 * filter on multiple columns in a single row. This opclass works with an
22
 * existing AM (BRIN) and builds bloom filter on a column.
23
 *
24
 *
25
 * values vs. hashes
26
 * -----------------
27
 *
28
 * The original column values are not used directly, but are first hashed
29
 * using the regular type-specific hash function, producing a uint32 hash.
30
 * And this hash value is then added to the summary - i.e. it's hashed
31
 * again and added to the bloom filter.
32
 *
33
 * This allows the code to treat all data types (byval/byref/...) the same
34
 * way, with only minimal space requirements, because we're working with
35
 * hashes and not the original values. Everything is uint32.
36
 *
37
 * Of course, this assumes the built-in hash function is reasonably good,
38
 * without too many collisions etc. But that does seem to be the case, at
39
 * least based on past experience. After all, the same hash functions are
40
 * used for hash indexes, hash partitioning and so on.
41
 *
42
 *
43
 * hashing scheme
44
 * --------------
45
 *
46
 * Bloom filters require a number of independent hash functions. There are
47
 * different schemes how to construct them - for example we might use
48
 * hash_uint32_extended with random seeds, but that seems fairly expensive.
49
 * We use a scheme requiring only two functions described in this paper:
50
 *
51
 * Less Hashing, Same Performance:Building a Better Bloom Filter
52
 * Adam Kirsch, Michael Mitzenmacher, Harvard School of Engineering and
53
 * Applied Sciences, Cambridge, Massachusetts [DOI 10.1002/rsa.20208]
54
 *
55
 * The two hash functions h1 and h2 are calculated using hard-coded seeds,
56
 * and then combined using (h1 + i * h2) to generate the hash functions.
57
 *
58
 *
59
 * sizing the bloom filter
60
 * -----------------------
61
 *
62
 * Size of a bloom filter depends on the number of distinct values we will
63
 * store in it, and the desired false positive rate. The higher the number
64
 * of distinct values and/or the lower the false positive rate, the larger
65
 * the bloom filter. On the other hand, we want to keep the index as small
66
 * as possible - that's one of the basic advantages of BRIN indexes.
67
 *
68
 * Although the number of distinct elements (in a page range) depends on
69
 * the data, we can consider it fixed. This simplifies the trade-off to
70
 * just false positive rate vs. size.
71
 *
72
 * At the page range level, false positive rate is a probability the bloom
73
 * filter matches a random value. For the whole index (with sufficiently
74
 * many page ranges) it represents the fraction of the index ranges (and
75
 * thus fraction of the table to be scanned) matching the random value.
76
 *
77
 * Furthermore, the size of the bloom filter is subject to implementation
78
 * limits - it has to fit onto a single index page (8kB by default). As
79
 * the bitmap is inherently random (when "full" about half the bits is set
80
 * to 1, randomly), compression can't help very much.
81
 *
82
 * To reduce the size of a filter (to fit to a page), we have to either
83
 * accept higher false positive rate (undesirable), or reduce the number
84
 * of distinct items to be stored in the filter. We can't alter the input
85
 * data, of course, but we may make the BRIN page ranges smaller - instead
86
 * of the default 128 pages (1MB) we may build index with 16-page ranges,
87
 * or something like that. This should reduce the number of distinct values
88
 * in the page range, making the filter smaller (with fixed false positive
89
 * rate). Even for random data sets this should help, as the number of rows
90
 * per heap page is limited (to ~290 with very narrow tables, likely ~20
91
 * in practice).
92
 *
93
 * Of course, good sizing decisions depend on having the necessary data,
94
 * i.e. number of distinct values in a page range (of a given size) and
95
 * table size (to estimate cost change due to change in false positive
96
 * rate due to having larger index vs. scanning larger indexes). We may
97
 * not have that data - for example when building an index on empty table
98
 * it's not really possible. And for some data we only have estimates for
99
 * the whole table and we can only estimate per-range values (ndistinct).
100
 *
101
 * Another challenge is that while the bloom filter is per-column, it's
102
 * the whole index tuple that has to fit into a page. And for multi-column
103
 * indexes that may include pieces we have no control over (not necessarily
104
 * bloom filters, the other columns may use other BRIN opclasses). So it's
105
 * not entirely clear how to distribute the space between those columns.
106
 *
107
 * The current logic, implemented in brin_bloom_get_ndistinct, attempts to
108
 * make some basic sizing decisions, based on the size of BRIN ranges, and
109
 * the maximum number of rows per range.
110
 *
111
 *
112
 * IDENTIFICATION
113
 *    src/backend/access/brin/brin_bloom.c
114
 */
115
#include "postgres.h"
116
117
#include <math.h>
118
119
#include "access/brin.h"
120
#include "access/brin_internal.h"
121
#include "access/brin_page.h"
122
#include "access/brin_tuple.h"
123
#include "access/genam.h"
124
#include "access/htup_details.h"
125
#include "access/reloptions.h"
126
#include "catalog/pg_am.h"
127
#include "catalog/pg_type.h"
128
#include "common/hashfn.h"
129
#include "utils/fmgrprotos.h"
130
#include "utils/rel.h"
131
132
0
#define BloomEqualStrategyNumber  1
133
134
/*
135
 * Additional SQL level support functions. We only have one, which is
136
 * used to calculate hash of the input value.
137
 *
138
 * Procedure numbers must not use values reserved for BRIN itself; see
139
 * brin_internal.h.
140
 */
141
#define   BLOOM_MAX_PROCNUMS    1 /* maximum support procs we need */
142
0
#define   PROCNUM_HASH      11  /* required */
143
144
/*
145
 * Subtract this from procnum to obtain index in BloomOpaque arrays
146
 * (Must be equal to minimum of private procnums).
147
 */
148
0
#define   PROCNUM_BASE      11
149
150
/*
151
 * Storage type for BRIN's reloptions.
152
 */
153
typedef struct BloomOptions
154
{
155
  int32   vl_len_;    /* varlena header (do not touch directly!) */
156
  double    nDistinctPerRange;  /* number of distinct values per range */
157
  double    falsePositiveRate;  /* false positive for bloom filter */
158
} BloomOptions;
159
160
/*
161
 * The current min value (16) is somewhat arbitrary, but it's based
162
 * on the fact that the filter header is ~20B alone, which is about
163
 * the same as the filter bitmap for 16 distinct items with 1% false
164
 * positive rate. So by allowing lower values we'd not gain much. In
165
 * any case, the min should not be larger than MaxHeapTuplesPerPage
166
 * (~290), which is the theoretical maximum for single-page ranges.
167
 */
168
#define   BLOOM_MIN_NDISTINCT_PER_RANGE   16
169
170
/*
171
 * Used to determine number of distinct items, based on the number of rows
172
 * in a page range. The 10% is somewhat similar to what estimate_num_groups
173
 * does, so we use the same factor here.
174
 */
175
0
#define   BLOOM_DEFAULT_NDISTINCT_PER_RANGE -0.1  /* 10% of values */
176
177
/*
178
 * Allowed range and default value for the false positive range. The exact
179
 * values are somewhat arbitrary, but were chosen considering the various
180
 * parameters (size of filter vs. page size, etc.).
181
 *
182
 * The lower the false-positive rate, the more accurate the filter is, but
183
 * it also gets larger - at some point this eliminates the main advantage
184
 * of BRIN indexes, which is the tiny size. At 0.01% the index is about
185
 * 10% of the table (assuming 290 distinct values per 8kB page).
186
 *
187
 * On the other hand, as the false-positive rate increases, larger part of
188
 * the table has to be scanned due to mismatches - at 25% we're probably
189
 * close to sequential scan being cheaper.
190
 */
191
0
#define   BLOOM_MIN_FALSE_POSITIVE_RATE 0.0001  /* 0.01% fp rate */
192
0
#define   BLOOM_MAX_FALSE_POSITIVE_RATE 0.25  /* 25% fp rate */
193
0
#define   BLOOM_DEFAULT_FALSE_POSITIVE_RATE 0.01  /* 1% fp rate */
194
195
#define BloomGetNDistinctPerRange(opts) \
196
0
  ((opts) && (((BloomOptions *) (opts))->nDistinctPerRange != 0) ? \
197
0
   (((BloomOptions *) (opts))->nDistinctPerRange) : \
198
0
   BLOOM_DEFAULT_NDISTINCT_PER_RANGE)
199
200
#define BloomGetFalsePositiveRate(opts) \
201
0
  ((opts) && (((BloomOptions *) (opts))->falsePositiveRate != 0.0) ? \
202
0
   (((BloomOptions *) (opts))->falsePositiveRate) : \
203
0
   BLOOM_DEFAULT_FALSE_POSITIVE_RATE)
204
205
/*
206
 * And estimate of the largest bloom we can fit onto a page. This is not
207
 * a perfect guarantee, for a couple of reasons. For example, the row may
208
 * be larger because the index has multiple columns.
209
 */
210
#define BloomMaxFilterSize \
211
0
  MAXALIGN_DOWN(BLCKSZ - \
212
          (MAXALIGN(SizeOfPageHeaderData + \
213
              sizeof(ItemIdData)) + \
214
           MAXALIGN(sizeof(BrinSpecialSpace)) + \
215
           SizeOfBrinTuple))
216
217
/*
218
 * Seeds used to calculate two hash functions h1 and h2, which are then used
219
 * to generate k hashes using the (h1 + i * h2) scheme.
220
 */
221
0
#define BLOOM_SEED_1  0x71d924af
222
0
#define BLOOM_SEED_2  0xba48b314
223
224
/*
225
 * Bloom Filter
226
 *
227
 * Represents a bloom filter, built on hashes of the indexed values. That is,
228
 * we compute a uint32 hash of the value, and then store this hash into the
229
 * bloom filter (and compute additional hashes on it).
230
 *
231
 * XXX We could implement "sparse" bloom filters, keeping only the bytes that
232
 * are not entirely 0. But while indexes don't support TOAST, the varlena can
233
 * still be compressed. So this seems unnecessary, because the compression
234
 * should do the same job.
235
 *
236
 * XXX We can also watch the number of bits set in the bloom filter, and then
237
 * stop using it (and not store the bitmap, to save space) when the false
238
 * positive rate gets too high. But even if the false positive rate exceeds the
239
 * desired value, it still can eliminate some page ranges.
240
 */
241
typedef struct BloomFilter
242
{
243
  /* varlena header (do not touch directly!) */
244
  int32   vl_len_;
245
246
  /* space for various flags (unused for now) */
247
  uint16    flags;
248
249
  /* fields for the HASHED phase */
250
  uint8   nhashes;    /* number of hash functions */
251
  uint32    nbits;      /* number of bits in the bitmap (size) */
252
  uint32    nbits_set;    /* number of bits set to 1 */
253
254
  /* data of the bloom filter */
255
  char    data[FLEXIBLE_ARRAY_MEMBER];
256
} BloomFilter;
257
258
/*
259
 * bloom_filter_size
260
 *    Calculate Bloom filter parameters (nbits, nbytes, nhashes).
261
 *
262
 * Given expected number of distinct values and desired false positive rate,
263
 * calculates the optimal parameters of the Bloom filter.
264
 *
265
 * The resulting parameters are returned through nbytesp (number of bytes),
266
 * nbitsp (number of bits) and nhashesp (number of hash functions). If a
267
 * pointer is NULL, the parameter is not returned.
268
 */
269
static void
270
bloom_filter_size(int ndistinct, double false_positive_rate,
271
          int *nbytesp, int *nbitsp, int *nhashesp)
272
0
{
273
0
  double    k;
274
0
  int     nbits,
275
0
        nbytes;
276
277
  /* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
278
0
  nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
279
280
  /* round m to whole bytes */
281
0
  nbytes = ((nbits + 7) / 8);
282
0
  nbits = nbytes * 8;
283
284
  /*
285
   * round(log(2.0) * m / ndistinct), but assume round() may not be
286
   * available on Windows
287
   */
288
0
  k = log(2.0) * nbits / ndistinct;
289
0
  k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
290
291
0
  if (nbytesp)
292
0
    *nbytesp = nbytes;
293
294
0
  if (nbitsp)
295
0
    *nbitsp = nbits;
296
297
0
  if (nhashesp)
298
0
    *nhashesp = (int) k;
299
0
}
300
301
/*
302
 * bloom_init
303
 *    Initialize the Bloom Filter, allocate all the memory.
304
 *
305
 * The filter is initialized with optimal size for ndistinct expected values
306
 * and the requested false positive rate. The filter is stored as varlena.
307
 */
308
static BloomFilter *
309
bloom_init(int ndistinct, double false_positive_rate)
310
0
{
311
0
  Size    len;
312
0
  BloomFilter *filter;
313
314
0
  int     nbits;      /* size of filter / number of bits */
315
0
  int     nbytes;     /* size of filter / number of bytes */
316
0
  int     nhashes;    /* number of hash functions */
317
318
0
  Assert(ndistinct > 0);
319
0
  Assert(false_positive_rate > 0 && false_positive_rate < 1);
320
321
  /* calculate bloom filter size / parameters */
322
0
  bloom_filter_size(ndistinct, false_positive_rate,
323
0
            &nbytes, &nbits, &nhashes);
324
325
  /*
326
   * Reject filters that are obviously too large to store on a page.
327
   *
328
   * Initially the bloom filter is just zeroes and so very compressible, but
329
   * as we add values it gets more and more random, and so less and less
330
   * compressible. So initially everything fits on the page, but we might
331
   * get surprising failures later - we want to prevent that, so we reject
332
   * bloom filter that are obviously too large.
333
   *
334
   * XXX It's not uncommon to oversize the bloom filter a bit, to defend
335
   * against unexpected data anomalies (parts of table with more distinct
336
   * values per range etc.). But we still need to make sure even the
337
   * oversized filter fits on page, if such need arises.
338
   *
339
   * XXX This check is not perfect, because the index may have multiple
340
   * filters that are small individually, but too large when combined.
341
   */
342
0
  if (nbytes > BloomMaxFilterSize)
343
0
    elog(ERROR, "the bloom filter is too large (%d > %zu)", nbytes,
344
0
       BloomMaxFilterSize);
345
346
  /*
347
   * We allocate the whole filter. Most of it is going to be 0 bits, so the
348
   * varlena is easy to compress.
349
   */
350
0
  len = offsetof(BloomFilter, data) + nbytes;
351
352
0
  filter = (BloomFilter *) palloc0(len);
353
354
0
  filter->flags = 0;
355
0
  filter->nhashes = nhashes;
356
0
  filter->nbits = nbits;
357
358
0
  SET_VARSIZE(filter, len);
359
360
0
  return filter;
361
0
}
362
363
364
/*
365
 * bloom_add_value
366
 *    Add value to the bloom filter.
367
 */
368
static BloomFilter *
369
bloom_add_value(BloomFilter *filter, uint32 value, bool *updated)
370
0
{
371
0
  int     i;
372
0
  uint64    h1,
373
0
        h2;
374
375
  /* compute the hashes, used for the bloom filter */
376
0
  h1 = hash_bytes_uint32_extended(value, BLOOM_SEED_1) % filter->nbits;
377
0
  h2 = hash_bytes_uint32_extended(value, BLOOM_SEED_2) % filter->nbits;
378
379
  /* compute the requested number of hashes */
380
0
  for (i = 0; i < filter->nhashes; i++)
381
0
  {
382
    /* h1 + h2 + f(i) */
383
0
    uint32    h = (h1 + i * h2) % filter->nbits;
384
0
    uint32    byte = (h / 8);
385
0
    uint32    bit = (h % 8);
386
387
    /* if the bit is not set, set it and remember we did that */
388
0
    if (!(filter->data[byte] & (0x01 << bit)))
389
0
    {
390
0
      filter->data[byte] |= (0x01 << bit);
391
0
      filter->nbits_set++;
392
0
      if (updated)
393
0
        *updated = true;
394
0
    }
395
0
  }
396
397
0
  return filter;
398
0
}
399
400
401
/*
402
 * bloom_contains_value
403
 *    Check if the bloom filter contains a particular value.
404
 */
405
static bool
406
bloom_contains_value(BloomFilter *filter, uint32 value)
407
0
{
408
0
  int     i;
409
0
  uint64    h1,
410
0
        h2;
411
412
  /* calculate the two hashes */
413
0
  h1 = hash_bytes_uint32_extended(value, BLOOM_SEED_1) % filter->nbits;
414
0
  h2 = hash_bytes_uint32_extended(value, BLOOM_SEED_2) % filter->nbits;
415
416
  /* compute the requested number of hashes */
417
0
  for (i = 0; i < filter->nhashes; i++)
418
0
  {
419
    /* h1 + h2 + f(i) */
420
0
    uint32    h = (h1 + i * h2) % filter->nbits;
421
0
    uint32    byte = (h / 8);
422
0
    uint32    bit = (h % 8);
423
424
    /* if the bit is not set, the value is not there */
425
0
    if (!(filter->data[byte] & (0x01 << bit)))
426
0
      return false;
427
0
  }
428
429
  /* all hashes found in bloom filter */
430
0
  return true;
431
0
}
432
433
typedef struct BloomOpaque
434
{
435
  /*
436
   * XXX At this point we only need a single proc (to compute the hash), but
437
   * let's keep the array just like inclusion and minmax opclasses, for
438
   * consistency. We may need additional procs in the future.
439
   */
440
  FmgrInfo  extra_procinfos[BLOOM_MAX_PROCNUMS];
441
} BloomOpaque;
442
443
static FmgrInfo *bloom_get_procinfo(BrinDesc *bdesc, uint16 attno,
444
                  uint16 procnum);
445
446
447
Datum
448
brin_bloom_opcinfo(PG_FUNCTION_ARGS)
449
0
{
450
0
  BrinOpcInfo *result;
451
452
  /*
453
   * opaque->strategy_procinfos is initialized lazily; here it is set to
454
   * all-uninitialized by palloc0 which sets fn_oid to InvalidOid.
455
   *
456
   * bloom indexes only store the filter as a single BYTEA column
457
   */
458
459
0
  result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) +
460
0
           sizeof(BloomOpaque));
461
0
  result->oi_nstored = 1;
462
0
  result->oi_regular_nulls = true;
463
0
  result->oi_opaque = (BloomOpaque *)
464
0
    MAXALIGN((char *) result + SizeofBrinOpcInfo(1));
465
0
  result->oi_typcache[0] = lookup_type_cache(PG_BRIN_BLOOM_SUMMARYOID, 0);
466
467
0
  PG_RETURN_POINTER(result);
468
0
}
469
470
/*
471
 * brin_bloom_get_ndistinct
472
 *    Determine the ndistinct value used to size bloom filter.
473
 *
474
 * Adjust the ndistinct value based on the pagesPerRange value. First,
475
 * if it's negative, it's assumed to be relative to maximum number of
476
 * tuples in the range (assuming each page gets MaxHeapTuplesPerPage
477
 * tuples, which is likely a significant over-estimate). We also clamp
478
 * the value, not to over-size the bloom filter unnecessarily.
479
 *
480
 * XXX We can only do this when the pagesPerRange value was supplied.
481
 * If it wasn't, it has to be a read-only access to the index, in which
482
 * case we don't really care. But perhaps we should fall-back to the
483
 * default pagesPerRange value?
484
 *
485
 * XXX We might also fetch info about ndistinct estimate for the column,
486
 * and compute the expected number of distinct values in a range. But
487
 * that may be tricky due to data being sorted in various ways, so it
488
 * seems better to rely on the upper estimate.
489
 *
490
 * XXX We might also calculate a better estimate of rows per BRIN range,
491
 * instead of using MaxHeapTuplesPerPage (which probably produces values
492
 * much higher than reality).
493
 */
494
static int
495
brin_bloom_get_ndistinct(BrinDesc *bdesc, BloomOptions *opts)
496
0
{
497
0
  double    ndistinct;
498
0
  double    maxtuples;
499
0
  BlockNumber pagesPerRange;
500
501
0
  pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
502
0
  ndistinct = BloomGetNDistinctPerRange(opts);
503
504
0
  Assert(BlockNumberIsValid(pagesPerRange));
505
506
0
  maxtuples = MaxHeapTuplesPerPage * pagesPerRange;
507
508
  /*
509
   * Similarly to n_distinct, negative values are relative - in this case to
510
   * maximum number of tuples in the page range (maxtuples).
511
   */
512
0
  if (ndistinct < 0)
513
0
    ndistinct = (-ndistinct) * maxtuples;
514
515
  /*
516
   * Positive values are to be used directly, but we still apply a couple of
517
   * safeties to avoid using unreasonably small bloom filters.
518
   */
519
0
  ndistinct = Max(ndistinct, BLOOM_MIN_NDISTINCT_PER_RANGE);
520
521
  /*
522
   * And don't use more than the maximum possible number of tuples, in the
523
   * range, which would be entirely wasteful.
524
   */
525
0
  ndistinct = Min(ndistinct, maxtuples);
526
527
0
  return (int) ndistinct;
528
0
}
529
530
/*
531
 * Examine the given index tuple (which contains partial status of a certain
532
 * page range) by comparing it to the given value that comes from another heap
533
 * tuple.  If the new value is outside the bloom filter specified by the
534
 * existing tuple values, update the index tuple and return true.  Otherwise,
535
 * return false and do not modify in this case.
536
 */
537
Datum
538
brin_bloom_add_value(PG_FUNCTION_ARGS)
539
0
{
540
0
  BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
541
0
  BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
542
0
  Datum   newval = PG_GETARG_DATUM(2);
543
0
  bool    isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3);
544
0
  BloomOptions *opts = (BloomOptions *) PG_GET_OPCLASS_OPTIONS();
545
0
  Oid     colloid = PG_GET_COLLATION();
546
0
  FmgrInfo   *hashFn;
547
0
  uint32    hashValue;
548
0
  bool    updated = false;
549
0
  AttrNumber  attno;
550
0
  BloomFilter *filter;
551
552
0
  Assert(!isnull);
553
554
0
  attno = column->bv_attno;
555
556
  /*
557
   * If this is the first non-null value, we need to initialize the bloom
558
   * filter. Otherwise just extract the existing bloom filter from
559
   * BrinValues.
560
   */
561
0
  if (column->bv_allnulls)
562
0
  {
563
0
    filter = bloom_init(brin_bloom_get_ndistinct(bdesc, opts),
564
0
              BloomGetFalsePositiveRate(opts));
565
0
    column->bv_values[0] = PointerGetDatum(filter);
566
0
    column->bv_allnulls = false;
567
0
    updated = true;
568
0
  }
569
0
  else
570
0
    filter = (BloomFilter *) PG_DETOAST_DATUM(column->bv_values[0]);
571
572
  /*
573
   * Compute the hash of the new value, using the supplied hash function,
574
   * and then add the hash value to the bloom filter.
575
   */
576
0
  hashFn = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH);
577
578
0
  hashValue = DatumGetUInt32(FunctionCall1Coll(hashFn, colloid, newval));
579
580
0
  filter = bloom_add_value(filter, hashValue, &updated);
581
582
0
  column->bv_values[0] = PointerGetDatum(filter);
583
584
0
  PG_RETURN_BOOL(updated);
585
0
}
586
587
/*
588
 * Given an index tuple corresponding to a certain page range and a scan key,
589
 * return whether the scan key is consistent with the index tuple's bloom
590
 * filter.  Return true if so, false otherwise.
591
 */
592
Datum
593
brin_bloom_consistent(PG_FUNCTION_ARGS)
594
0
{
595
0
  BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
596
0
  BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
597
0
  ScanKey    *keys = (ScanKey *) PG_GETARG_POINTER(2);
598
0
  int     nkeys = PG_GETARG_INT32(3);
599
0
  Oid     colloid = PG_GET_COLLATION();
600
0
  AttrNumber  attno;
601
0
  Datum   value;
602
0
  bool    matches;
603
0
  FmgrInfo   *finfo;
604
0
  uint32    hashValue;
605
0
  BloomFilter *filter;
606
0
  int     keyno;
607
608
0
  filter = (BloomFilter *) PG_DETOAST_DATUM(column->bv_values[0]);
609
610
0
  Assert(filter);
611
612
  /*
613
   * Assume all scan keys match. We'll be searching for a scan key
614
   * eliminating the page range (we can stop on the first such key).
615
   */
616
0
  matches = true;
617
618
0
  for (keyno = 0; keyno < nkeys; keyno++)
619
0
  {
620
0
    ScanKey   key = keys[keyno];
621
622
    /* NULL keys are handled and filtered-out in bringetbitmap */
623
0
    Assert(!(key->sk_flags & SK_ISNULL));
624
625
0
    attno = key->sk_attno;
626
0
    value = key->sk_argument;
627
628
0
    switch (key->sk_strategy)
629
0
    {
630
0
      case BloomEqualStrategyNumber:
631
632
        /*
633
         * We want to return the current page range if the bloom
634
         * filter seems to contain the value.
635
         */
636
0
        finfo = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH);
637
638
0
        hashValue = DatumGetUInt32(FunctionCall1Coll(finfo, colloid, value));
639
0
        matches &= bloom_contains_value(filter, hashValue);
640
641
0
        break;
642
0
      default:
643
        /* shouldn't happen */
644
0
        elog(ERROR, "invalid strategy number %d", key->sk_strategy);
645
0
        matches = false;
646
0
        break;
647
0
    }
648
649
0
    if (!matches)
650
0
      break;
651
0
  }
652
653
0
  PG_RETURN_BOOL(matches);
654
0
}
655
656
/*
657
 * Given two BrinValues, update the first of them as a union of the summary
658
 * values contained in both.  The second one is untouched.
659
 *
660
 * XXX We assume the bloom filters have the same parameters for now. In the
661
 * future we should have 'can union' function, to decide if we can combine
662
 * two particular bloom filters.
663
 */
664
Datum
665
brin_bloom_union(PG_FUNCTION_ARGS)
666
0
{
667
0
  int     i;
668
0
  int     nbytes;
669
0
  BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1);
670
0
  BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2);
671
0
  BloomFilter *filter_a;
672
0
  BloomFilter *filter_b;
673
674
0
  Assert(col_a->bv_attno == col_b->bv_attno);
675
0
  Assert(!col_a->bv_allnulls && !col_b->bv_allnulls);
676
677
0
  filter_a = (BloomFilter *) PG_DETOAST_DATUM(col_a->bv_values[0]);
678
0
  filter_b = (BloomFilter *) PG_DETOAST_DATUM(col_b->bv_values[0]);
679
680
  /* make sure the filters use the same parameters */
681
0
  Assert(filter_a && filter_b);
682
0
  Assert(filter_a->nbits == filter_b->nbits);
683
0
  Assert(filter_a->nhashes == filter_b->nhashes);
684
0
  Assert((filter_a->nbits > 0) && (filter_a->nbits % 8 == 0));
685
686
0
  nbytes = (filter_a->nbits) / 8;
687
688
  /* simply OR the bitmaps */
689
0
  for (i = 0; i < nbytes; i++)
690
0
    filter_a->data[i] |= filter_b->data[i];
691
692
  /* update the number of bits set in the filter */
693
0
  filter_a->nbits_set = pg_popcount((const char *) filter_a->data, nbytes);
694
695
  /* if we decompressed filter_a, update the summary */
696
0
  if (PointerGetDatum(filter_a) != col_a->bv_values[0])
697
0
  {
698
0
    pfree(DatumGetPointer(col_a->bv_values[0]));
699
0
    col_a->bv_values[0] = PointerGetDatum(filter_a);
700
0
  }
701
702
  /* also free filter_b, if it was decompressed */
703
0
  if (PointerGetDatum(filter_b) != col_b->bv_values[0])
704
0
    pfree(filter_b);
705
706
0
  PG_RETURN_VOID();
707
0
}
708
709
/*
710
 * Cache and return inclusion opclass support procedure
711
 *
712
 * Return the procedure corresponding to the given function support number
713
 * or null if it does not exist.
714
 */
715
static FmgrInfo *
716
bloom_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum)
717
0
{
718
0
  BloomOpaque *opaque;
719
0
  uint16    basenum = procnum - PROCNUM_BASE;
720
721
  /*
722
   * We cache these in the opaque struct, to avoid repetitive syscache
723
   * lookups.
724
   */
725
0
  opaque = (BloomOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
726
727
0
  if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid)
728
0
  {
729
0
    if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno,
730
0
                        procnum)))
731
0
      fmgr_info_copy(&opaque->extra_procinfos[basenum],
732
0
               index_getprocinfo(bdesc->bd_index, attno, procnum),
733
0
               bdesc->bd_context);
734
0
    else
735
0
      ereport(ERROR,
736
0
          errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
737
0
          errmsg_internal("invalid opclass definition"),
738
0
          errdetail_internal("The operator class is missing support function %d for column %d.",
739
0
                     procnum, attno));
740
0
  }
741
742
0
  return &opaque->extra_procinfos[basenum];
743
0
}
744
745
Datum
746
brin_bloom_options(PG_FUNCTION_ARGS)
747
0
{
748
0
  local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
749
750
0
  init_local_reloptions(relopts, sizeof(BloomOptions));
751
752
0
  add_local_real_reloption(relopts, "n_distinct_per_range",
753
0
               "number of distinct items expected in a BRIN page range",
754
0
               BLOOM_DEFAULT_NDISTINCT_PER_RANGE,
755
0
               -1.0, INT_MAX, offsetof(BloomOptions, nDistinctPerRange));
756
757
0
  add_local_real_reloption(relopts, "false_positive_rate",
758
0
               "desired false-positive rate for the bloom filters",
759
0
               BLOOM_DEFAULT_FALSE_POSITIVE_RATE,
760
0
               BLOOM_MIN_FALSE_POSITIVE_RATE,
761
0
               BLOOM_MAX_FALSE_POSITIVE_RATE,
762
0
               offsetof(BloomOptions, falsePositiveRate));
763
764
0
  PG_RETURN_VOID();
765
0
}
766
767
/*
768
 * brin_bloom_summary_in
769
 *    - input routine for type brin_bloom_summary.
770
 *
771
 * brin_bloom_summary is only used internally to represent summaries
772
 * in BRIN bloom indexes, so it has no operations of its own, and we
773
 * disallow input too.
774
 */
775
Datum
776
brin_bloom_summary_in(PG_FUNCTION_ARGS)
777
0
{
778
  /*
779
   * brin_bloom_summary stores the data in binary form and parsing text
780
   * input is not needed, so disallow this.
781
   */
782
0
  ereport(ERROR,
783
0
      (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
784
0
       errmsg("cannot accept a value of type %s", "pg_brin_bloom_summary")));
785
786
0
  PG_RETURN_VOID();     /* keep compiler quiet */
787
0
}
788
789
790
/*
791
 * brin_bloom_summary_out
792
 *    - output routine for type brin_bloom_summary.
793
 *
794
 * BRIN bloom summaries are serialized into a bytea value, but we want
795
 * to output something nicer humans can understand.
796
 */
797
Datum
798
brin_bloom_summary_out(PG_FUNCTION_ARGS)
799
0
{
800
0
  BloomFilter *filter;
801
0
  StringInfoData str;
802
803
  /* detoast the data to get value with a full 4B header */
804
0
  filter = (BloomFilter *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
805
806
0
  initStringInfo(&str);
807
0
  appendStringInfoChar(&str, '{');
808
809
0
  appendStringInfo(&str, "mode: hashed  nhashes: %u  nbits: %u  nbits_set: %u",
810
0
           filter->nhashes, filter->nbits, filter->nbits_set);
811
812
0
  appendStringInfoChar(&str, '}');
813
814
0
  PG_RETURN_CSTRING(str.data);
815
0
}
816
817
/*
818
 * brin_bloom_summary_recv
819
 *    - binary input routine for type brin_bloom_summary.
820
 */
821
Datum
822
brin_bloom_summary_recv(PG_FUNCTION_ARGS)
823
0
{
824
0
  ereport(ERROR,
825
0
      (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
826
0
       errmsg("cannot accept a value of type %s", "pg_brin_bloom_summary")));
827
828
0
  PG_RETURN_VOID();     /* keep compiler quiet */
829
0
}
830
831
/*
832
 * brin_bloom_summary_send
833
 *    - binary output routine for type brin_bloom_summary.
834
 *
835
 * BRIN bloom summaries are serialized in a bytea value (although the
836
 * type is named differently), so let's just send that.
837
 */
838
Datum
839
brin_bloom_summary_send(PG_FUNCTION_ARGS)
840
0
{
841
0
  return byteasend(fcinfo);
842
0
}