Coverage Report

Created: 2025-08-12 06:43

/src/postgres/src/backend/access/gist/gistsplit.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * gistsplit.c
4
 *    Multi-column page splitting algorithm
5
 *
6
 * This file is concerned with making good page-split decisions in multi-column
7
 * GiST indexes.  The opclass-specific picksplit functions can only be expected
8
 * to produce answers based on a single column.  We first run the picksplit
9
 * function for column 1; then, if there are more columns, we check if any of
10
 * the tuples are "don't cares" so far as the column 1 split is concerned
11
 * (that is, they could go to either side for no additional penalty).  If so,
12
 * we try to redistribute those tuples on the basis of the next column.
13
 * Repeat till we're out of columns.
14
 *
15
 * gistSplitByKey() is the entry point to this file.
16
 *
17
 *
18
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
19
 * Portions Copyright (c) 1994, Regents of the University of California
20
 *
21
 * IDENTIFICATION
22
 *    src/backend/access/gist/gistsplit.c
23
 *
24
 *-------------------------------------------------------------------------
25
 */
26
#include "postgres.h"
27
28
#include "access/gist_private.h"
29
#include "utils/rel.h"
30
31
typedef struct
32
{
33
  OffsetNumber *entries;
34
  int     len;
35
  Datum    *attr;
36
  bool     *isnull;
37
  bool     *dontcare;
38
} GistSplitUnion;
39
40
41
/*
42
 * Form unions of subkeys in itvec[] entries listed in gsvp->entries[],
43
 * ignoring any tuples that are marked in gsvp->dontcare[].  Subroutine for
44
 * gistunionsubkey.
45
 */
46
static void
47
gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
48
           GistSplitUnion *gsvp)
49
0
{
50
0
  IndexTuple *cleanedItVec;
51
0
  int     i,
52
0
        cleanedLen = 0;
53
54
0
  cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
55
56
0
  for (i = 0; i < gsvp->len; i++)
57
0
  {
58
0
    if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]])
59
0
      continue;
60
61
0
    cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
62
0
  }
63
64
0
  gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen,
65
0
             gsvp->attr, gsvp->isnull);
66
67
0
  pfree(cleanedItVec);
68
0
}
69
70
/*
71
 * Recompute unions of left- and right-side subkeys after a page split,
72
 * ignoring any tuples that are marked in spl->spl_dontcare[].
73
 *
74
 * Note: we always recompute union keys for all index columns.  In some cases
75
 * this might represent duplicate work for the leftmost column(s), but it's
76
 * not safe to assume that "zero penalty to move a tuple" means "the union
77
 * key doesn't change at all".  Penalty functions aren't 100% accurate.
78
 */
79
static void
80
gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
81
0
{
82
0
  GistSplitUnion gsvp;
83
84
0
  gsvp.dontcare = spl->spl_dontcare;
85
86
0
  gsvp.entries = spl->splitVector.spl_left;
87
0
  gsvp.len = spl->splitVector.spl_nleft;
88
0
  gsvp.attr = spl->spl_lattr;
89
0
  gsvp.isnull = spl->spl_lisnull;
90
91
0
  gistunionsubkeyvec(giststate, itvec, &gsvp);
92
93
0
  gsvp.entries = spl->splitVector.spl_right;
94
0
  gsvp.len = spl->splitVector.spl_nright;
95
0
  gsvp.attr = spl->spl_rattr;
96
0
  gsvp.isnull = spl->spl_risnull;
97
98
0
  gistunionsubkeyvec(giststate, itvec, &gsvp);
99
0
}
100
101
/*
102
 * Find tuples that are "don't cares", that is could be moved to the other
103
 * side of the split with zero penalty, so far as the attno column is
104
 * concerned.
105
 *
106
 * Don't-care tuples are marked by setting the corresponding entry in
107
 * spl->spl_dontcare[] to "true".  Caller must have initialized that array
108
 * to zeroes.
109
 *
110
 * Returns number of don't-cares found.
111
 */
112
static int
113
findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
114
        GistSplitVector *spl, int attno)
115
0
{
116
0
  int     i;
117
0
  GISTENTRY entry;
118
0
  int     NumDontCare = 0;
119
120
  /*
121
   * First, search the left-side tuples to see if any have zero penalty to
122
   * be added to the right-side union key.
123
   *
124
   * attno column is known all-not-null (see gistSplitByKey), so we need not
125
   * check for nulls
126
   */
127
0
  gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL,
128
0
          (OffsetNumber) 0, false);
129
0
  for (i = 0; i < spl->splitVector.spl_nleft; i++)
130
0
  {
131
0
    int     j = spl->splitVector.spl_left[i];
132
0
    float   penalty = gistpenalty(giststate, attno, &entry, false,
133
0
                      &valvec[j], false);
134
135
0
    if (penalty == 0.0)
136
0
    {
137
0
      spl->spl_dontcare[j] = true;
138
0
      NumDontCare++;
139
0
    }
140
0
  }
141
142
  /* And conversely for the right-side tuples */
143
0
  gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL,
144
0
          (OffsetNumber) 0, false);
145
0
  for (i = 0; i < spl->splitVector.spl_nright; i++)
146
0
  {
147
0
    int     j = spl->splitVector.spl_right[i];
148
0
    float   penalty = gistpenalty(giststate, attno, &entry, false,
149
0
                      &valvec[j], false);
150
151
0
    if (penalty == 0.0)
152
0
    {
153
0
      spl->spl_dontcare[j] = true;
154
0
      NumDontCare++;
155
0
    }
156
0
  }
157
158
0
  return NumDontCare;
159
0
}
160
161
/*
162
 * Remove tuples that are marked don't-cares from the tuple index array a[]
163
 * of length *len.  This is applied separately to the spl_left and spl_right
164
 * arrays.
165
 */
166
static void
167
removeDontCares(OffsetNumber *a, int *len, const bool *dontcare)
168
0
{
169
0
  int     origlen,
170
0
        newlen,
171
0
        i;
172
0
  OffsetNumber *curwpos;
173
174
0
  origlen = newlen = *len;
175
0
  curwpos = a;
176
0
  for (i = 0; i < origlen; i++)
177
0
  {
178
0
    OffsetNumber ai = a[i];
179
180
0
    if (dontcare[ai] == false)
181
0
    {
182
      /* re-emit item into a[] */
183
0
      *curwpos = ai;
184
0
      curwpos++;
185
0
    }
186
0
    else
187
0
      newlen--;
188
0
  }
189
190
0
  *len = newlen;
191
0
}
192
193
/*
194
 * Place a single don't-care tuple into either the left or right side of the
195
 * split, according to which has least penalty for merging the tuple into
196
 * the previously-computed union keys.  We need consider only columns starting
197
 * at attno.
198
 */
199
static void
200
placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v,
201
     IndexTuple itup, OffsetNumber off, int attno)
202
0
{
203
0
  GISTENTRY identry[INDEX_MAX_KEYS];
204
0
  bool    isnull[INDEX_MAX_KEYS];
205
0
  bool    toLeft = true;
206
207
0
  gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0,
208
0
            identry, isnull);
209
210
0
  for (; attno < giststate->nonLeafTupdesc->natts; attno++)
211
0
  {
212
0
    float   lpenalty,
213
0
          rpenalty;
214
0
    GISTENTRY entry;
215
216
0
    gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, false);
217
0
    lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno],
218
0
                 identry + attno, isnull[attno]);
219
0
    gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, false);
220
0
    rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno],
221
0
                 identry + attno, isnull[attno]);
222
223
0
    if (lpenalty != rpenalty)
224
0
    {
225
0
      if (lpenalty > rpenalty)
226
0
        toLeft = false;
227
0
      break;
228
0
    }
229
0
  }
230
231
0
  if (toLeft)
232
0
    v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
233
0
  else
234
0
    v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
235
0
}
236
237
0
#define SWAPVAR( s, d, t ) \
238
0
do { \
239
0
  (t) = (s); \
240
0
  (s) = (d); \
241
0
  (d) = (t); \
242
0
} while(0)
243
244
/*
245
 * Clean up when we did a secondary split but the user-defined PickSplit
246
 * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists
247
 * true).
248
 *
249
 * We consider whether to swap the left and right outputs of the secondary
250
 * split; this can be worthwhile if the penalty for merging those tuples into
251
 * the previously chosen sets is less that way.
252
 *
253
 * In any case we must update the union datums for the current column by
254
 * adding in the previous union keys (oldL/oldR), since the user-defined
255
 * PickSplit method didn't do so.
256
 */
257
static void
258
supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
259
            GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
260
0
{
261
0
  bool    leaveOnLeft = true,
262
0
        tmpBool;
263
0
  GISTENTRY entryL,
264
0
        entryR,
265
0
        entrySL,
266
0
        entrySR;
267
268
0
  gistentryinit(entryL, oldL, r, NULL, 0, false);
269
0
  gistentryinit(entryR, oldR, r, NULL, 0, false);
270
0
  gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
271
0
  gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
272
273
0
  if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
274
0
  {
275
0
    float   penalty1,
276
0
          penalty2;
277
278
0
    penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
279
0
      gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
280
0
    penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
281
0
      gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
282
283
0
    if (penalty1 > penalty2)
284
0
      leaveOnLeft = false;
285
0
  }
286
0
  else
287
0
  {
288
0
    GISTENTRY  *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
289
0
    float   penalty1,
290
0
          penalty2;
291
292
    /*
293
     * There is only one previously defined union, so we just choose swap
294
     * or not by lowest penalty for that side.  We can only get here if a
295
     * secondary split happened to have all NULLs in its column in the
296
     * tuples that the outer recursion level had assigned to one side.
297
     * (Note that the null checks in gistSplitByKey don't prevent the
298
     * case, because they'll only be checking tuples that were considered
299
     * don't-cares at the outer recursion level, not the tuples that went
300
     * into determining the passed-down left and right union keys.)
301
     */
302
0
    penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
303
0
    penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
304
305
0
    if (penalty1 < penalty2)
306
0
      leaveOnLeft = sv->spl_ldatum_exists;
307
0
    else
308
0
      leaveOnLeft = sv->spl_rdatum_exists;
309
0
  }
310
311
0
  if (leaveOnLeft == false)
312
0
  {
313
    /*
314
     * swap left and right
315
     */
316
0
    OffsetNumber *off,
317
0
          noff;
318
0
    Datum   datum;
319
320
0
    SWAPVAR(sv->spl_left, sv->spl_right, off);
321
0
    SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
322
0
    SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
323
0
    gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
324
0
    gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
325
0
  }
326
327
0
  if (sv->spl_ldatum_exists)
328
0
    gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
329
0
             &sv->spl_ldatum, &tmpBool);
330
331
0
  if (sv->spl_rdatum_exists)
332
0
    gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
333
0
             &sv->spl_rdatum, &tmpBool);
334
335
0
  sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
336
0
}
337
338
/*
339
 * Trivial picksplit implementation. Function called only
340
 * if user-defined picksplit puts all keys on the same side of the split.
341
 * That is a bug of user-defined picksplit but we don't want to fail.
342
 */
343
static void
344
genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
345
0
{
346
0
  OffsetNumber i,
347
0
        maxoff;
348
0
  int     nbytes;
349
0
  GistEntryVector *evec;
350
351
0
  maxoff = entryvec->n - 1;
352
353
0
  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
354
355
0
  v->spl_left = (OffsetNumber *) palloc(nbytes);
356
0
  v->spl_right = (OffsetNumber *) palloc(nbytes);
357
0
  v->spl_nleft = v->spl_nright = 0;
358
359
0
  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
360
0
  {
361
0
    if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
362
0
    {
363
0
      v->spl_left[v->spl_nleft] = i;
364
0
      v->spl_nleft++;
365
0
    }
366
0
    else
367
0
    {
368
0
      v->spl_right[v->spl_nright] = i;
369
0
      v->spl_nright++;
370
0
    }
371
0
  }
372
373
  /*
374
   * Form union datums for each side
375
   */
376
0
  evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ);
377
378
0
  evec->n = v->spl_nleft;
379
0
  memcpy(evec->vector, entryvec->vector + FirstOffsetNumber,
380
0
       sizeof(GISTENTRY) * evec->n);
381
0
  v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno],
382
0
                    giststate->supportCollation[attno],
383
0
                    PointerGetDatum(evec),
384
0
                    PointerGetDatum(&nbytes));
385
386
0
  evec->n = v->spl_nright;
387
0
  memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft,
388
0
       sizeof(GISTENTRY) * evec->n);
389
0
  v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno],
390
0
                    giststate->supportCollation[attno],
391
0
                    PointerGetDatum(evec),
392
0
                    PointerGetDatum(&nbytes));
393
0
}
394
395
/*
396
 * Calls user picksplit method for attno column to split tuples into
397
 * two vectors.
398
 *
399
 * Returns false if split is complete (there are no more index columns, or
400
 * there is no need to consider them because split is optimal already).
401
 *
402
 * Returns true and v->spl_dontcare = NULL if the picksplit result is
403
 * degenerate (all tuples seem to be don't-cares), so we should just
404
 * disregard this column and split on the next column(s) instead.
405
 *
406
 * Returns true and v->spl_dontcare != NULL if there are don't-care tuples
407
 * that could be relocated based on the next column(s).  The don't-care
408
 * tuples have been removed from the split and must be reinserted by caller.
409
 * There is at least one non-don't-care tuple on each side of the split,
410
 * and union keys for all columns are updated to include just those tuples.
411
 *
412
 * A true result implies there is at least one more index column.
413
 */
414
static bool
415
gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
416
          IndexTuple *itup, int len, GISTSTATE *giststate)
417
0
{
418
0
  GIST_SPLITVEC *sv = &v->splitVector;
419
420
  /*
421
   * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in
422
   * case we are doing a secondary split (see comments in gist.h).
423
   */
424
0
  sv->spl_ldatum_exists = !(v->spl_lisnull[attno]);
425
0
  sv->spl_rdatum_exists = !(v->spl_risnull[attno]);
426
0
  sv->spl_ldatum = v->spl_lattr[attno];
427
0
  sv->spl_rdatum = v->spl_rattr[attno];
428
429
  /*
430
   * Let the opclass-specific PickSplit method do its thing.  Note that at
431
   * this point we know there are no null keys in the entryvec.
432
   */
433
0
  FunctionCall2Coll(&giststate->picksplitFn[attno],
434
0
            giststate->supportCollation[attno],
435
0
            PointerGetDatum(entryvec),
436
0
            PointerGetDatum(sv));
437
438
0
  if (sv->spl_nleft == 0 || sv->spl_nright == 0)
439
0
  {
440
    /*
441
     * User-defined picksplit failed to create an actual split, ie it put
442
     * everything on the same side.  Complain but cope.
443
     */
444
0
    ereport(DEBUG1,
445
0
        (errcode(ERRCODE_INTERNAL_ERROR),
446
0
         errmsg("picksplit method for column %d of index \"%s\" failed",
447
0
            attno + 1, RelationGetRelationName(r)),
448
0
         errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command.")));
449
450
    /*
451
     * Reinit GIST_SPLITVEC. Although these fields are not used by
452
     * genericPickSplit(), set them up for further processing
453
     */
454
0
    sv->spl_ldatum_exists = !(v->spl_lisnull[attno]);
455
0
    sv->spl_rdatum_exists = !(v->spl_risnull[attno]);
456
0
    sv->spl_ldatum = v->spl_lattr[attno];
457
0
    sv->spl_rdatum = v->spl_rattr[attno];
458
459
    /* Do a generic split */
460
0
    genericPickSplit(giststate, entryvec, sv, attno);
461
0
  }
462
0
  else
463
0
  {
464
    /* hack for compatibility with old picksplit API */
465
0
    if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
466
0
      sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
467
0
    if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
468
0
      sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
469
0
  }
470
471
  /* Clean up if PickSplit didn't take care of a secondary split */
472
0
  if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
473
0
    supportSecondarySplit(r, giststate, attno, sv,
474
0
                v->spl_lattr[attno], v->spl_rattr[attno]);
475
476
  /* emit union datums computed by PickSplit back to v arrays */
477
0
  v->spl_lattr[attno] = sv->spl_ldatum;
478
0
  v->spl_rattr[attno] = sv->spl_rdatum;
479
0
  v->spl_lisnull[attno] = false;
480
0
  v->spl_risnull[attno] = false;
481
482
  /*
483
   * If index columns remain, then consider whether we can improve the split
484
   * by using them.
485
   */
486
0
  v->spl_dontcare = NULL;
487
488
0
  if (attno + 1 < giststate->nonLeafTupdesc->natts)
489
0
  {
490
0
    int     NumDontCare;
491
492
    /*
493
     * Make a quick check to see if left and right union keys are equal;
494
     * if so, the split is certainly degenerate, so tell caller to
495
     * re-split with the next column.
496
     */
497
0
    if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
498
0
      return true;
499
500
    /*
501
     * Locate don't-care tuples, if any.  If there are none, the split is
502
     * optimal, so just fall out and return false.
503
     */
504
0
    v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
505
506
0
    NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
507
508
0
    if (NumDontCare > 0)
509
0
    {
510
      /*
511
       * Remove don't-cares from spl_left[] and spl_right[].
512
       */
513
0
      removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare);
514
0
      removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare);
515
516
      /*
517
       * If all tuples on either side were don't-cares, the split is
518
       * degenerate, and we're best off to ignore it and split on the
519
       * next column.  (We used to try to press on with a secondary
520
       * split by forcing a random tuple on each side to be treated as
521
       * non-don't-care, but it seems unlikely that that technique
522
       * really gives a better result.  Note that we don't want to try a
523
       * secondary split with empty left or right primary split sides,
524
       * because then there is no union key on that side for the
525
       * PickSplit function to try to expand, so it can have no good
526
       * figure of merit for what it's doing.  Also note that this check
527
       * ensures we can't produce a bogus one-side-only split in the
528
       * NumDontCare == 1 special case below.)
529
       */
530
0
      if (sv->spl_nleft == 0 || sv->spl_nright == 0)
531
0
      {
532
0
        v->spl_dontcare = NULL;
533
0
        return true;
534
0
      }
535
536
      /*
537
       * Recompute union keys, considering only non-don't-care tuples.
538
       * NOTE: this will set union keys for remaining index columns,
539
       * which will cause later calls of gistUserPicksplit to pass those
540
       * values down to user-defined PickSplit methods with
541
       * spl_ldatum_exists/spl_rdatum_exists set true.
542
       */
543
0
      gistunionsubkey(giststate, itup, v);
544
545
0
      if (NumDontCare == 1)
546
0
      {
547
        /*
548
         * If there's only one don't-care tuple then we can't do a
549
         * PickSplit on it, so just choose whether to send it left or
550
         * right by comparing penalties.  We needed the
551
         * gistunionsubkey step anyway so that we have appropriate
552
         * union keys for figuring the penalties.
553
         */
554
0
        OffsetNumber toMove;
555
556
        /* find it ... */
557
0
        for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
558
0
        {
559
0
          if (v->spl_dontcare[toMove])
560
0
            break;
561
0
        }
562
0
        Assert(toMove < entryvec->n);
563
564
        /* ... and assign it to cheaper side */
565
0
        placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
566
567
        /*
568
         * At this point the union keys are wrong, but we don't care
569
         * because we're done splitting.  The outermost recursion
570
         * level of gistSplitByKey will fix things before returning.
571
         */
572
0
      }
573
0
      else
574
0
        return true;
575
0
    }
576
0
  }
577
578
0
  return false;
579
0
}
580
581
/*
582
 * simply split page in half
583
 */
584
static void
585
gistSplitHalf(GIST_SPLITVEC *v, int len)
586
0
{
587
0
  int     i;
588
589
0
  v->spl_nright = v->spl_nleft = 0;
590
0
  v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
591
0
  v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
592
0
  for (i = 1; i <= len; i++)
593
0
    if (i < len / 2)
594
0
      v->spl_right[v->spl_nright++] = i;
595
0
    else
596
0
      v->spl_left[v->spl_nleft++] = i;
597
598
  /* we need not compute union keys, caller took care of it */
599
0
}
600
601
/*
602
 * gistSplitByKey: main entry point for page-splitting algorithm
603
 *
604
 * r: index relation
605
 * page: page being split
606
 * itup: array of IndexTuples to be processed
607
 * len: number of IndexTuples to be processed (must be at least 2)
608
 * giststate: additional info about index
609
 * v: working state and output area
610
 * attno: column we are working on (zero-based index)
611
 *
612
 * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays
613
 * to all-true.  On return, spl_left/spl_nleft contain indexes of tuples
614
 * to go left, spl_right/spl_nright contain indexes of tuples to go right,
615
 * spl_lattr/spl_lisnull contain left-side union key values, and
616
 * spl_rattr/spl_risnull contain right-side union key values.  Other fields
617
 * in this struct are workspace for this file.
618
 *
619
 * Outside caller must pass zero for attno.  The function may internally
620
 * recurse to the next column by passing attno+1.
621
 */
622
void
623
gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
624
         GISTSTATE *giststate, GistSplitVector *v, int attno)
625
0
{
626
0
  GistEntryVector *entryvec;
627
0
  OffsetNumber *offNullTuples;
628
0
  int     nOffNullTuples = 0;
629
0
  int     i;
630
631
  /* generate the item array, and identify tuples with null keys */
632
  /* note that entryvec->vector[0] goes unused in this code */
633
0
  entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
634
0
  entryvec->n = len + 1;
635
0
  offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
636
637
0
  for (i = 1; i <= len; i++)
638
0
  {
639
0
    Datum   datum;
640
0
    bool    IsNull;
641
642
0
    datum = index_getattr(itup[i - 1], attno + 1, giststate->leafTupdesc,
643
0
                &IsNull);
644
0
    gistdentryinit(giststate, attno, &(entryvec->vector[i]),
645
0
             datum, r, page, i,
646
0
             false, IsNull);
647
0
    if (IsNull)
648
0
      offNullTuples[nOffNullTuples++] = i;
649
0
  }
650
651
0
  if (nOffNullTuples == len)
652
0
  {
653
    /*
654
     * Corner case: All keys in attno column are null, so just transfer
655
     * our attention to the next column.  If there's no next column, just
656
     * split page in half.
657
     */
658
0
    v->spl_risnull[attno] = v->spl_lisnull[attno] = true;
659
660
0
    if (attno + 1 < giststate->nonLeafTupdesc->natts)
661
0
      gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
662
0
    else
663
0
      gistSplitHalf(&v->splitVector, len);
664
0
  }
665
0
  else if (nOffNullTuples > 0)
666
0
  {
667
0
    int     j = 0;
668
669
    /*
670
     * We don't want to mix NULL and not-NULL keys on one page, so split
671
     * nulls to right page and not-nulls to left.
672
     */
673
0
    v->splitVector.spl_right = offNullTuples;
674
0
    v->splitVector.spl_nright = nOffNullTuples;
675
0
    v->spl_risnull[attno] = true;
676
677
0
    v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
678
0
    v->splitVector.spl_nleft = 0;
679
0
    for (i = 1; i <= len; i++)
680
0
      if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
681
0
        j++;
682
0
      else
683
0
        v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
684
685
    /* Compute union keys, unless outer recursion level will handle it */
686
0
    if (attno == 0 && giststate->nonLeafTupdesc->natts == 1)
687
0
    {
688
0
      v->spl_dontcare = NULL;
689
0
      gistunionsubkey(giststate, itup, v);
690
0
    }
691
0
  }
692
0
  else
693
0
  {
694
    /*
695
     * All keys are not-null, so apply user-defined PickSplit method
696
     */
697
0
    if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
698
0
    {
699
      /*
700
       * Splitting on attno column is not optimal, so consider
701
       * redistributing don't-care tuples according to the next column
702
       */
703
0
      Assert(attno + 1 < giststate->nonLeafTupdesc->natts);
704
705
0
      if (v->spl_dontcare == NULL)
706
0
      {
707
        /*
708
         * This split was actually degenerate, so ignore it altogether
709
         * and just split according to the next column.
710
         */
711
0
        gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
712
0
      }
713
0
      else
714
0
      {
715
        /*
716
         * Form an array of just the don't-care tuples to pass to a
717
         * recursive invocation of this function for the next column.
718
         */
719
0
        IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple));
720
0
        OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
721
0
        int     newlen = 0;
722
0
        GIST_SPLITVEC backupSplit;
723
724
0
        for (i = 0; i < len; i++)
725
0
        {
726
0
          if (v->spl_dontcare[i + 1])
727
0
          {
728
0
            newitup[newlen] = itup[i];
729
0
            map[newlen] = i + 1;
730
0
            newlen++;
731
0
          }
732
0
        }
733
734
0
        Assert(newlen > 0);
735
736
        /*
737
         * Make a backup copy of v->splitVector, since the recursive
738
         * call will overwrite that with its own result.
739
         */
740
0
        backupSplit = v->splitVector;
741
0
        backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
742
0
        memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
743
0
        backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
744
0
        memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
745
746
        /* Recursively decide how to split the don't-care tuples */
747
0
        gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1);
748
749
        /* Merge result of subsplit with non-don't-care tuples */
750
0
        for (i = 0; i < v->splitVector.spl_nleft; i++)
751
0
          backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
752
0
        for (i = 0; i < v->splitVector.spl_nright; i++)
753
0
          backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
754
755
0
        v->splitVector = backupSplit;
756
0
      }
757
0
    }
758
0
  }
759
760
  /*
761
   * If we're handling a multicolumn index, at the end of the recursion
762
   * recompute the left and right union datums for all index columns.  This
763
   * makes sure we hand back correct union datums in all corner cases,
764
   * including when we haven't processed all columns to start with, or when
765
   * a secondary split moved "don't care" tuples from one side to the other
766
   * (we really shouldn't assume that that didn't change the union datums).
767
   *
768
   * Note: when we're in an internal recursion (attno > 0), we do not worry
769
   * about whether the union datums we return with are sensible, since
770
   * calling levels won't care.  Also, in a single-column index, we expect
771
   * that PickSplit (or the special cases above) produced correct union
772
   * datums.
773
   */
774
0
  if (attno == 0 && giststate->nonLeafTupdesc->natts > 1)
775
0
  {
776
0
    v->spl_dontcare = NULL;
777
0
    gistunionsubkey(giststate, itup, v);
778
0
  }
779
0
}