Coverage Report

Created: 2025-06-15 06:31

/src/postgres/src/backend/access/gist/gist.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * gist.c
4
 *    interface routines for the postgres GiST index access method.
5
 *
6
 *
7
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8
 * Portions Copyright (c) 1994, Regents of the University of California
9
 *
10
 * IDENTIFICATION
11
 *    src/backend/access/gist/gist.c
12
 *
13
 *-------------------------------------------------------------------------
14
 */
15
#include "postgres.h"
16
17
#include "access/gist_private.h"
18
#include "access/gistscan.h"
19
#include "access/xloginsert.h"
20
#include "catalog/pg_collation.h"
21
#include "commands/vacuum.h"
22
#include "miscadmin.h"
23
#include "nodes/execnodes.h"
24
#include "storage/predicate.h"
25
#include "utils/fmgrprotos.h"
26
#include "utils/index_selfuncs.h"
27
#include "utils/memutils.h"
28
#include "utils/rel.h"
29
30
/* non-export function prototypes */
31
static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
32
static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
33
              GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
34
static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
35
               GISTSTATE *giststate,
36
               IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
37
               Buffer leftchild, Buffer rightchild,
38
               bool unlockbuf, bool unlockleftchild);
39
static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
40
              GISTSTATE *giststate, List *splitinfo, bool unlockbuf);
41
static void gistprunepage(Relation rel, Page page, Buffer buffer,
42
              Relation heapRel);
43
44
45
0
#define ROTATEDIST(d) do { \
46
0
  SplitPageLayout *tmp = (SplitPageLayout *) palloc0(sizeof(SplitPageLayout)); \
47
0
  tmp->block.blkno = InvalidBlockNumber; \
48
0
  tmp->buffer = InvalidBuffer; \
49
0
  tmp->next = (d); \
50
0
  (d)=tmp; \
51
0
} while(0)
52
53
54
/*
55
 * GiST handler function: return IndexAmRoutine with access method parameters
56
 * and callbacks.
57
 */
58
Datum
59
gisthandler(PG_FUNCTION_ARGS)
60
0
{
61
0
  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
62
63
0
  amroutine->amstrategies = 0;
64
0
  amroutine->amsupport = GISTNProcs;
65
0
  amroutine->amoptsprocnum = GIST_OPTIONS_PROC;
66
0
  amroutine->amcanorder = false;
67
0
  amroutine->amcanorderbyop = true;
68
0
  amroutine->amcanhash = false;
69
0
  amroutine->amconsistentequality = false;
70
0
  amroutine->amconsistentordering = false;
71
0
  amroutine->amcanbackward = false;
72
0
  amroutine->amcanunique = false;
73
0
  amroutine->amcanmulticol = true;
74
0
  amroutine->amoptionalkey = true;
75
0
  amroutine->amsearcharray = false;
76
0
  amroutine->amsearchnulls = true;
77
0
  amroutine->amstorage = true;
78
0
  amroutine->amclusterable = true;
79
0
  amroutine->ampredlocks = true;
80
0
  amroutine->amcanparallel = false;
81
0
  amroutine->amcanbuildparallel = false;
82
0
  amroutine->amcaninclude = true;
83
0
  amroutine->amusemaintenanceworkmem = false;
84
0
  amroutine->amsummarizing = false;
85
0
  amroutine->amparallelvacuumoptions =
86
0
    VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP;
87
0
  amroutine->amkeytype = InvalidOid;
88
89
0
  amroutine->ambuild = gistbuild;
90
0
  amroutine->ambuildempty = gistbuildempty;
91
0
  amroutine->aminsert = gistinsert;
92
0
  amroutine->aminsertcleanup = NULL;
93
0
  amroutine->ambulkdelete = gistbulkdelete;
94
0
  amroutine->amvacuumcleanup = gistvacuumcleanup;
95
0
  amroutine->amcanreturn = gistcanreturn;
96
0
  amroutine->amcostestimate = gistcostestimate;
97
0
  amroutine->amgettreeheight = NULL;
98
0
  amroutine->amoptions = gistoptions;
99
0
  amroutine->amproperty = gistproperty;
100
0
  amroutine->ambuildphasename = NULL;
101
0
  amroutine->amvalidate = gistvalidate;
102
0
  amroutine->amadjustmembers = gistadjustmembers;
103
0
  amroutine->ambeginscan = gistbeginscan;
104
0
  amroutine->amrescan = gistrescan;
105
0
  amroutine->amgettuple = gistgettuple;
106
0
  amroutine->amgetbitmap = gistgetbitmap;
107
0
  amroutine->amendscan = gistendscan;
108
0
  amroutine->ammarkpos = NULL;
109
0
  amroutine->amrestrpos = NULL;
110
0
  amroutine->amestimateparallelscan = NULL;
111
0
  amroutine->aminitparallelscan = NULL;
112
0
  amroutine->amparallelrescan = NULL;
113
0
  amroutine->amtranslatestrategy = NULL;
114
0
  amroutine->amtranslatecmptype = gisttranslatecmptype;
115
116
0
  PG_RETURN_POINTER(amroutine);
117
0
}
118
119
/*
120
 * Create and return a temporary memory context for use by GiST. We
121
 * _always_ invoke user-provided methods in a temporary memory
122
 * context, so that memory leaks in those functions cannot cause
123
 * problems. Also, we use some additional temporary contexts in the
124
 * GiST code itself, to avoid the need to do some awkward manual
125
 * memory management.
126
 */
127
MemoryContext
128
createTempGistContext(void)
129
0
{
130
0
  return AllocSetContextCreate(CurrentMemoryContext,
131
0
                 "GiST temporary context",
132
0
                 ALLOCSET_DEFAULT_SIZES);
133
0
}
134
135
/*
136
 *  gistbuildempty() -- build an empty gist index in the initialization fork
137
 */
138
void
139
gistbuildempty(Relation index)
140
0
{
141
0
  Buffer    buffer;
142
143
  /* Initialize the root page */
144
0
  buffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
145
0
                 EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST);
146
147
  /* Initialize and xlog buffer */
148
0
  START_CRIT_SECTION();
149
0
  GISTInitBuffer(buffer, F_LEAF);
150
0
  MarkBufferDirty(buffer);
151
0
  log_newpage_buffer(buffer, true);
152
0
  END_CRIT_SECTION();
153
154
  /* Unlock and release the buffer */
155
0
  UnlockReleaseBuffer(buffer);
156
0
}
157
158
/*
159
 *  gistinsert -- wrapper for GiST tuple insertion.
160
 *
161
 *    This is the public interface routine for tuple insertion in GiSTs.
162
 *    It doesn't do any work; just locks the relation and passes the buck.
163
 */
164
bool
165
gistinsert(Relation r, Datum *values, bool *isnull,
166
       ItemPointer ht_ctid, Relation heapRel,
167
       IndexUniqueCheck checkUnique,
168
       bool indexUnchanged,
169
       IndexInfo *indexInfo)
170
0
{
171
0
  GISTSTATE  *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
172
0
  IndexTuple  itup;
173
0
  MemoryContext oldCxt;
174
175
  /* Initialize GISTSTATE cache if first call in this statement */
176
0
  if (giststate == NULL)
177
0
  {
178
0
    oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);
179
0
    giststate = initGISTstate(r);
180
0
    giststate->tempCxt = createTempGistContext();
181
0
    indexInfo->ii_AmCache = giststate;
182
0
    MemoryContextSwitchTo(oldCxt);
183
0
  }
184
185
0
  oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
186
187
0
  itup = gistFormTuple(giststate, r, values, isnull, true);
188
0
  itup->t_tid = *ht_ctid;
189
190
0
  gistdoinsert(r, itup, 0, giststate, heapRel, false);
191
192
  /* cleanup */
193
0
  MemoryContextSwitchTo(oldCxt);
194
0
  MemoryContextReset(giststate->tempCxt);
195
196
0
  return false;
197
0
}
198
199
200
/*
201
 * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
202
 * at that offset is atomically removed along with inserting the new tuples.
203
 * This is used to replace a tuple with a new one.
204
 *
205
 * If 'leftchildbuf' is valid, we're inserting the downlink for the page
206
 * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
207
 * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
208
 *
209
 * If 'markfollowright' is true and the page is split, the left child is
210
 * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
211
 * index build, however, there is no concurrent access and the page splitting
212
 * is done in a slightly simpler fashion, and false is passed.
213
 *
214
 * If there is not enough room on the page, it is split. All the split
215
 * pages are kept pinned and locked and returned in *splitinfo, the caller
216
 * is responsible for inserting the downlinks for them. However, if
217
 * 'buffer' is the root page and it needs to be split, gistplacetopage()
218
 * performs the split as one atomic operation, and *splitinfo is set to NIL.
219
 * In that case, we continue to hold the root page locked, and the child
220
 * pages are released; note that new tuple(s) are *not* on the root page
221
 * but in one of the new child pages.
222
 *
223
 * If 'newblkno' is not NULL, returns the block number of page the first
224
 * new/updated tuple was inserted to. Usually it's the given page, but could
225
 * be its right sibling if the page was split.
226
 *
227
 * Returns 'true' if the page was split, 'false' otherwise.
228
 */
229
bool
230
gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
231
        Buffer buffer,
232
        IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
233
        BlockNumber *newblkno,
234
        Buffer leftchildbuf,
235
        List **splitinfo,
236
        bool markfollowright,
237
        Relation heapRel,
238
        bool is_build)
239
0
{
240
0
  BlockNumber blkno = BufferGetBlockNumber(buffer);
241
0
  Page    page = BufferGetPage(buffer);
242
0
  bool    is_leaf = (GistPageIsLeaf(page)) ? true : false;
243
0
  XLogRecPtr  recptr;
244
0
  bool    is_split;
245
246
  /*
247
   * Refuse to modify a page that's incompletely split. This should not
248
   * happen because we finish any incomplete splits while we walk down the
249
   * tree. However, it's remotely possible that another concurrent inserter
250
   * splits a parent page, and errors out before completing the split. We
251
   * will just throw an error in that case, and leave any split we had in
252
   * progress unfinished too. The next insert that comes along will clean up
253
   * the mess.
254
   */
255
0
  if (GistFollowRight(page))
256
0
    elog(ERROR, "concurrent GiST page split was incomplete");
257
258
  /* should never try to insert to a deleted page */
259
0
  Assert(!GistPageIsDeleted(page));
260
261
0
  *splitinfo = NIL;
262
263
  /*
264
   * if isupdate, remove old key: This node's key has been modified, either
265
   * because a child split occurred or because we needed to adjust our key
266
   * for an insert in a child node. Therefore, remove the old version of
267
   * this node's key.
268
   *
269
   * for WAL replay, in the non-split case we handle this by setting up a
270
   * one-element todelete array; in the split case, it's handled implicitly
271
   * because the tuple vector passed to gistSplit won't include this tuple.
272
   */
273
0
  is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
274
275
  /*
276
   * If leaf page is full, try at first to delete dead tuples. And then
277
   * check again.
278
   */
279
0
  if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
280
0
  {
281
0
    gistprunepage(rel, page, buffer, heapRel);
282
0
    is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
283
0
  }
284
285
0
  if (is_split)
286
0
  {
287
    /* no space for insertion */
288
0
    IndexTuple *itvec;
289
0
    int     tlen;
290
0
    SplitPageLayout *dist = NULL,
291
0
           *ptr;
292
0
    BlockNumber oldrlink = InvalidBlockNumber;
293
0
    GistNSN   oldnsn = 0;
294
0
    SplitPageLayout rootpg;
295
0
    bool    is_rootsplit;
296
0
    int     npage;
297
298
0
    is_rootsplit = (blkno == GIST_ROOT_BLKNO);
299
300
    /*
301
     * Form index tuples vector to split. If we're replacing an old tuple,
302
     * remove the old version from the vector.
303
     */
304
0
    itvec = gistextractpage(page, &tlen);
305
0
    if (OffsetNumberIsValid(oldoffnum))
306
0
    {
307
      /* on inner page we should remove old tuple */
308
0
      int     pos = oldoffnum - FirstOffsetNumber;
309
310
0
      tlen--;
311
0
      if (pos != tlen)
312
0
        memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
313
0
    }
314
0
    itvec = gistjoinvector(itvec, &tlen, itup, ntup);
315
0
    dist = gistSplit(rel, page, itvec, tlen, giststate);
316
317
    /*
318
     * Check that split didn't produce too many pages.
319
     */
320
0
    npage = 0;
321
0
    for (ptr = dist; ptr; ptr = ptr->next)
322
0
      npage++;
323
    /* in a root split, we'll add one more page to the list below */
324
0
    if (is_rootsplit)
325
0
      npage++;
326
0
    if (npage > GIST_MAX_SPLIT_PAGES)
327
0
      elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",
328
0
         npage, GIST_MAX_SPLIT_PAGES);
329
330
    /*
331
     * Set up pages to work with. Allocate new buffers for all but the
332
     * leftmost page. The original page becomes the new leftmost page, and
333
     * is just replaced with the new contents.
334
     *
335
     * For a root-split, allocate new buffers for all child pages, the
336
     * original page is overwritten with new root page containing
337
     * downlinks to the new child pages.
338
     */
339
0
    ptr = dist;
340
0
    if (!is_rootsplit)
341
0
    {
342
      /* save old rightlink and NSN */
343
0
      oldrlink = GistPageGetOpaque(page)->rightlink;
344
0
      oldnsn = GistPageGetNSN(page);
345
346
0
      dist->buffer = buffer;
347
0
      dist->block.blkno = BufferGetBlockNumber(buffer);
348
0
      dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
349
350
      /* clean all flags except F_LEAF */
351
0
      GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
352
353
0
      ptr = ptr->next;
354
0
    }
355
0
    for (; ptr; ptr = ptr->next)
356
0
    {
357
      /* Allocate new page */
358
0
      ptr->buffer = gistNewBuffer(rel, heapRel);
359
0
      GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
360
0
      ptr->page = BufferGetPage(ptr->buffer);
361
0
      ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
362
0
      PredicateLockPageSplit(rel,
363
0
                   BufferGetBlockNumber(buffer),
364
0
                   BufferGetBlockNumber(ptr->buffer));
365
0
    }
366
367
    /*
368
     * Now that we know which blocks the new pages go to, set up downlink
369
     * tuples to point to them.
370
     */
371
0
    for (ptr = dist; ptr; ptr = ptr->next)
372
0
    {
373
0
      ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
374
0
      GistTupleSetValid(ptr->itup);
375
0
    }
376
377
    /*
378
     * If this is a root split, we construct the new root page with the
379
     * downlinks here directly, instead of requiring the caller to insert
380
     * them. Add the new root page to the list along with the child pages.
381
     */
382
0
    if (is_rootsplit)
383
0
    {
384
0
      IndexTuple *downlinks;
385
0
      int     ndownlinks = 0;
386
0
      int     i;
387
388
0
      rootpg.buffer = buffer;
389
0
      rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));
390
0
      GistPageGetOpaque(rootpg.page)->flags = 0;
391
392
      /* Prepare a vector of all the downlinks */
393
0
      for (ptr = dist; ptr; ptr = ptr->next)
394
0
        ndownlinks++;
395
0
      downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
396
0
      for (i = 0, ptr = dist; ptr; ptr = ptr->next)
397
0
        downlinks[i++] = ptr->itup;
398
399
0
      rootpg.block.blkno = GIST_ROOT_BLKNO;
400
0
      rootpg.block.num = ndownlinks;
401
0
      rootpg.list = gistfillitupvec(downlinks, ndownlinks,
402
0
                      &(rootpg.lenlist));
403
0
      rootpg.itup = NULL;
404
405
0
      rootpg.next = dist;
406
0
      dist = &rootpg;
407
0
    }
408
0
    else
409
0
    {
410
      /* Prepare split-info to be returned to caller */
411
0
      for (ptr = dist; ptr; ptr = ptr->next)
412
0
      {
413
0
        GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
414
415
0
        si->buf = ptr->buffer;
416
0
        si->downlink = ptr->itup;
417
0
        *splitinfo = lappend(*splitinfo, si);
418
0
      }
419
0
    }
420
421
    /*
422
     * Fill all pages. All the pages are new, ie. freshly allocated empty
423
     * pages, or a temporary copy of the old page.
424
     */
425
0
    for (ptr = dist; ptr; ptr = ptr->next)
426
0
    {
427
0
      char     *data = (char *) (ptr->list);
428
429
0
      for (int i = 0; i < ptr->block.num; i++)
430
0
      {
431
0
        IndexTuple  thistup = (IndexTuple) data;
432
433
0
        if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
434
0
          elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
435
436
        /*
437
         * If this is the first inserted/updated tuple, let the caller
438
         * know which page it landed on.
439
         */
440
0
        if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
441
0
          *newblkno = ptr->block.blkno;
442
443
0
        data += IndexTupleSize(thistup);
444
0
      }
445
446
      /* Set up rightlinks */
447
0
      if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
448
0
        GistPageGetOpaque(ptr->page)->rightlink =
449
0
          ptr->next->block.blkno;
450
0
      else
451
0
        GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
452
453
      /*
454
       * Mark the all but the right-most page with the follow-right
455
       * flag. It will be cleared as soon as the downlink is inserted
456
       * into the parent, but this ensures that if we error out before
457
       * that, the index is still consistent. (in buffering build mode,
458
       * any error will abort the index build anyway, so this is not
459
       * needed.)
460
       */
461
0
      if (ptr->next && !is_rootsplit && markfollowright)
462
0
        GistMarkFollowRight(ptr->page);
463
0
      else
464
0
        GistClearFollowRight(ptr->page);
465
466
      /*
467
       * Copy the NSN of the original page to all pages. The
468
       * F_FOLLOW_RIGHT flags ensure that scans will follow the
469
       * rightlinks until the downlinks are inserted.
470
       */
471
0
      GistPageSetNSN(ptr->page, oldnsn);
472
0
    }
473
474
    /*
475
     * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
476
     * insertion for that. NB: The number of pages and data segments
477
     * specified here must match the calculations in gistXLogSplit()!
478
     */
479
0
    if (!is_build && RelationNeedsWAL(rel))
480
0
      XLogEnsureRecordSpace(npage, 1 + npage * 2);
481
482
0
    START_CRIT_SECTION();
483
484
    /*
485
     * Must mark buffers dirty before XLogInsert, even though we'll still
486
     * be changing their opaque fields below.
487
     */
488
0
    for (ptr = dist; ptr; ptr = ptr->next)
489
0
      MarkBufferDirty(ptr->buffer);
490
0
    if (BufferIsValid(leftchildbuf))
491
0
      MarkBufferDirty(leftchildbuf);
492
493
    /*
494
     * The first page in the chain was a temporary working copy meant to
495
     * replace the old page. Copy it over the old page.
496
     */
497
0
    PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
498
0
    dist->page = BufferGetPage(dist->buffer);
499
500
    /*
501
     * Write the WAL record.
502
     *
503
     * If we're building a new index, however, we don't WAL-log changes
504
     * yet. The LSN-NSN interlock between parent and child requires that
505
     * LSNs never move backwards, so set the LSNs to a value that's
506
     * smaller than any real or fake unlogged LSN that might be generated
507
     * later. (There can't be any concurrent scans during index build, so
508
     * we don't need to be able to detect concurrent splits yet.)
509
     */
510
0
    if (is_build)
511
0
      recptr = GistBuildLSN;
512
0
    else
513
0
    {
514
0
      if (RelationNeedsWAL(rel))
515
0
        recptr = gistXLogSplit(is_leaf,
516
0
                     dist, oldrlink, oldnsn, leftchildbuf,
517
0
                     markfollowright);
518
0
      else
519
0
        recptr = gistGetFakeLSN(rel);
520
0
    }
521
522
0
    for (ptr = dist; ptr; ptr = ptr->next)
523
0
      PageSetLSN(ptr->page, recptr);
524
525
    /*
526
     * Return the new child buffers to the caller.
527
     *
528
     * If this was a root split, we've already inserted the downlink
529
     * pointers, in the form of a new root page. Therefore we can release
530
     * all the new buffers, and keep just the root page locked.
531
     */
532
0
    if (is_rootsplit)
533
0
    {
534
0
      for (ptr = dist->next; ptr; ptr = ptr->next)
535
0
        UnlockReleaseBuffer(ptr->buffer);
536
0
    }
537
0
  }
538
0
  else
539
0
  {
540
    /*
541
     * Enough space.  We always get here if ntup==0.
542
     */
543
0
    START_CRIT_SECTION();
544
545
    /*
546
     * Delete old tuple if any, then insert new tuple(s) if any.  If
547
     * possible, use the fast path of PageIndexTupleOverwrite.
548
     */
549
0
    if (OffsetNumberIsValid(oldoffnum))
550
0
    {
551
0
      if (ntup == 1)
552
0
      {
553
        /* One-for-one replacement, so use PageIndexTupleOverwrite */
554
0
        if (!PageIndexTupleOverwrite(page, oldoffnum, (Item) *itup,
555
0
                       IndexTupleSize(*itup)))
556
0
          elog(ERROR, "failed to add item to index page in \"%s\"",
557
0
             RelationGetRelationName(rel));
558
0
      }
559
0
      else
560
0
      {
561
        /* Delete old, then append new tuple(s) to page */
562
0
        PageIndexTupleDelete(page, oldoffnum);
563
0
        gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
564
0
      }
565
0
    }
566
0
    else
567
0
    {
568
      /* Just append new tuples at the end of the page */
569
0
      gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
570
0
    }
571
572
0
    MarkBufferDirty(buffer);
573
574
0
    if (BufferIsValid(leftchildbuf))
575
0
      MarkBufferDirty(leftchildbuf);
576
577
0
    if (is_build)
578
0
      recptr = GistBuildLSN;
579
0
    else
580
0
    {
581
0
      if (RelationNeedsWAL(rel))
582
0
      {
583
0
        OffsetNumber ndeloffs = 0,
584
0
              deloffs[1];
585
586
0
        if (OffsetNumberIsValid(oldoffnum))
587
0
        {
588
0
          deloffs[0] = oldoffnum;
589
0
          ndeloffs = 1;
590
0
        }
591
592
0
        recptr = gistXLogUpdate(buffer,
593
0
                    deloffs, ndeloffs, itup, ntup,
594
0
                    leftchildbuf);
595
0
      }
596
0
      else
597
0
        recptr = gistGetFakeLSN(rel);
598
0
    }
599
0
    PageSetLSN(page, recptr);
600
601
0
    if (newblkno)
602
0
      *newblkno = blkno;
603
0
  }
604
605
  /*
606
   * If we inserted the downlink for a child page, set NSN and clear
607
   * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
608
   * follow the rightlink if and only if they looked at the parent page
609
   * before we inserted the downlink.
610
   *
611
   * Note that we do this *after* writing the WAL record. That means that
612
   * the possible full page image in the WAL record does not include these
613
   * changes, and they must be replayed even if the page is restored from
614
   * the full page image. There's a chicken-and-egg problem: if we updated
615
   * the child pages first, we wouldn't know the recptr of the WAL record
616
   * we're about to write.
617
   */
618
0
  if (BufferIsValid(leftchildbuf))
619
0
  {
620
0
    Page    leftpg = BufferGetPage(leftchildbuf);
621
622
0
    GistPageSetNSN(leftpg, recptr);
623
0
    GistClearFollowRight(leftpg);
624
625
0
    PageSetLSN(leftpg, recptr);
626
0
  }
627
628
0
  END_CRIT_SECTION();
629
630
0
  return is_split;
631
0
}
632
633
/*
634
 * Workhorse routine for doing insertion into a GiST index. Note that
635
 * this routine assumes it is invoked in a short-lived memory context,
636
 * so it does not bother releasing palloc'd allocations.
637
 */
638
void
639
gistdoinsert(Relation r, IndexTuple itup, Size freespace,
640
       GISTSTATE *giststate, Relation heapRel, bool is_build)
641
0
{
642
0
  ItemId    iid;
643
0
  IndexTuple  idxtuple;
644
0
  GISTInsertStack firststack;
645
0
  GISTInsertStack *stack;
646
0
  GISTInsertState state;
647
0
  bool    xlocked = false;
648
649
0
  memset(&state, 0, sizeof(GISTInsertState));
650
0
  state.freespace = freespace;
651
0
  state.r = r;
652
0
  state.heapRel = heapRel;
653
0
  state.is_build = is_build;
654
655
  /* Start from the root */
656
0
  firststack.blkno = GIST_ROOT_BLKNO;
657
0
  firststack.lsn = 0;
658
0
  firststack.retry_from_parent = false;
659
0
  firststack.parent = NULL;
660
0
  firststack.downlinkoffnum = InvalidOffsetNumber;
661
0
  state.stack = stack = &firststack;
662
663
  /*
664
   * Walk down along the path of smallest penalty, updating the parent
665
   * pointers with the key we're inserting as we go. If we crash in the
666
   * middle, the tree is consistent, although the possible parent updates
667
   * were a waste.
668
   */
669
0
  for (;;)
670
0
  {
671
    /*
672
     * If we split an internal page while descending the tree, we have to
673
     * retry at the parent. (Normally, the LSN-NSN interlock below would
674
     * also catch this and cause us to retry. But LSNs are not updated
675
     * during index build.)
676
     */
677
0
    while (stack->retry_from_parent)
678
0
    {
679
0
      if (xlocked)
680
0
        LockBuffer(stack->buffer, GIST_UNLOCK);
681
0
      xlocked = false;
682
0
      ReleaseBuffer(stack->buffer);
683
0
      state.stack = stack = stack->parent;
684
0
    }
685
686
0
    if (XLogRecPtrIsInvalid(stack->lsn))
687
0
      stack->buffer = ReadBuffer(state.r, stack->blkno);
688
689
    /*
690
     * Be optimistic and grab shared lock first. Swap it for an exclusive
691
     * lock later if we need to update the page.
692
     */
693
0
    if (!xlocked)
694
0
    {
695
0
      LockBuffer(stack->buffer, GIST_SHARE);
696
0
      gistcheckpage(state.r, stack->buffer);
697
0
    }
698
699
0
    stack->page = (Page) BufferGetPage(stack->buffer);
700
0
    stack->lsn = xlocked ?
701
0
      PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);
702
0
    Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn));
703
704
    /*
705
     * If this page was split but the downlink was never inserted to the
706
     * parent because the inserting backend crashed before doing that, fix
707
     * that now.
708
     */
709
0
    if (GistFollowRight(stack->page))
710
0
    {
711
0
      if (!xlocked)
712
0
      {
713
0
        LockBuffer(stack->buffer, GIST_UNLOCK);
714
0
        LockBuffer(stack->buffer, GIST_EXCLUSIVE);
715
0
        xlocked = true;
716
        /* someone might've completed the split when we unlocked */
717
0
        if (!GistFollowRight(stack->page))
718
0
          continue;
719
0
      }
720
0
      gistfixsplit(&state, giststate);
721
722
0
      UnlockReleaseBuffer(stack->buffer);
723
0
      xlocked = false;
724
0
      state.stack = stack = stack->parent;
725
0
      continue;
726
0
    }
727
728
0
    if ((stack->blkno != GIST_ROOT_BLKNO &&
729
0
       stack->parent->lsn < GistPageGetNSN(stack->page)) ||
730
0
      GistPageIsDeleted(stack->page))
731
0
    {
732
      /*
733
       * Concurrent split or page deletion detected. There's no
734
       * guarantee that the downlink for this page is consistent with
735
       * the tuple we're inserting anymore, so go back to parent and
736
       * rechoose the best child.
737
       */
738
0
      UnlockReleaseBuffer(stack->buffer);
739
0
      xlocked = false;
740
0
      state.stack = stack = stack->parent;
741
0
      continue;
742
0
    }
743
744
0
    if (!GistPageIsLeaf(stack->page))
745
0
    {
746
      /*
747
       * This is an internal page so continue to walk down the tree.
748
       * Find the child node that has the minimum insertion penalty.
749
       */
750
0
      BlockNumber childblkno;
751
0
      IndexTuple  newtup;
752
0
      GISTInsertStack *item;
753
0
      OffsetNumber downlinkoffnum;
754
755
0
      downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
756
0
      iid = PageGetItemId(stack->page, downlinkoffnum);
757
0
      idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
758
0
      childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
759
760
      /*
761
       * Check that it's not a leftover invalid tuple from pre-9.1
762
       */
763
0
      if (GistTupleIsInvalid(idxtuple))
764
0
        ereport(ERROR,
765
0
            (errmsg("index \"%s\" contains an inner tuple marked as invalid",
766
0
                RelationGetRelationName(r)),
767
0
             errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
768
0
             errhint("Please REINDEX it.")));
769
770
      /*
771
       * Check that the key representing the target child node is
772
       * consistent with the key we're inserting. Update it if it's not.
773
       */
774
0
      newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
775
0
      if (newtup)
776
0
      {
777
        /*
778
         * Swap shared lock for an exclusive one. Beware, the page may
779
         * change while we unlock/lock the page...
780
         */
781
0
        if (!xlocked)
782
0
        {
783
0
          LockBuffer(stack->buffer, GIST_UNLOCK);
784
0
          LockBuffer(stack->buffer, GIST_EXCLUSIVE);
785
0
          xlocked = true;
786
0
          stack->page = (Page) BufferGetPage(stack->buffer);
787
788
0
          if (PageGetLSN(stack->page) != stack->lsn)
789
0
          {
790
            /* the page was changed while we unlocked it, retry */
791
0
            continue;
792
0
          }
793
0
        }
794
795
        /*
796
         * Update the tuple.
797
         *
798
         * We still hold the lock after gistinserttuple(), but it
799
         * might have to split the page to make the updated tuple fit.
800
         * In that case the updated tuple might migrate to the other
801
         * half of the split, so we have to go back to the parent and
802
         * descend back to the half that's a better fit for the new
803
         * tuple.
804
         */
805
0
        if (gistinserttuple(&state, stack, giststate, newtup,
806
0
                  downlinkoffnum))
807
0
        {
808
          /*
809
           * If this was a root split, the root page continues to be
810
           * the parent and the updated tuple went to one of the
811
           * child pages, so we just need to retry from the root
812
           * page.
813
           */
814
0
          if (stack->blkno != GIST_ROOT_BLKNO)
815
0
          {
816
0
            UnlockReleaseBuffer(stack->buffer);
817
0
            xlocked = false;
818
0
            state.stack = stack = stack->parent;
819
0
          }
820
0
          continue;
821
0
        }
822
0
      }
823
0
      LockBuffer(stack->buffer, GIST_UNLOCK);
824
0
      xlocked = false;
825
826
      /* descend to the chosen child */
827
0
      item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
828
0
      item->blkno = childblkno;
829
0
      item->parent = stack;
830
0
      item->downlinkoffnum = downlinkoffnum;
831
0
      state.stack = stack = item;
832
0
    }
833
0
    else
834
0
    {
835
      /*
836
       * Leaf page. Insert the new key. We've already updated all the
837
       * parents on the way down, but we might have to split the page if
838
       * it doesn't fit. gistinserttuple() will take care of that.
839
       */
840
841
      /*
842
       * Swap shared lock for an exclusive one. Be careful, the page may
843
       * change while we unlock/lock the page...
844
       */
845
0
      if (!xlocked)
846
0
      {
847
0
        LockBuffer(stack->buffer, GIST_UNLOCK);
848
0
        LockBuffer(stack->buffer, GIST_EXCLUSIVE);
849
0
        xlocked = true;
850
0
        stack->page = (Page) BufferGetPage(stack->buffer);
851
0
        stack->lsn = PageGetLSN(stack->page);
852
853
0
        if (stack->blkno == GIST_ROOT_BLKNO)
854
0
        {
855
          /*
856
           * the only page that can become inner instead of leaf is
857
           * the root page, so for root we should recheck it
858
           */
859
0
          if (!GistPageIsLeaf(stack->page))
860
0
          {
861
            /*
862
             * very rare situation: during unlock/lock index with
863
             * number of pages = 1 was increased
864
             */
865
0
            LockBuffer(stack->buffer, GIST_UNLOCK);
866
0
            xlocked = false;
867
0
            continue;
868
0
          }
869
870
          /*
871
           * we don't need to check root split, because checking
872
           * leaf/inner is enough to recognize split for root
873
           */
874
0
        }
875
0
        else if ((GistFollowRight(stack->page) ||
876
0
              stack->parent->lsn < GistPageGetNSN(stack->page)) ||
877
0
             GistPageIsDeleted(stack->page))
878
0
        {
879
          /*
880
           * The page was split or deleted while we momentarily
881
           * unlocked the page. Go back to parent.
882
           */
883
0
          UnlockReleaseBuffer(stack->buffer);
884
0
          xlocked = false;
885
0
          state.stack = stack = stack->parent;
886
0
          continue;
887
0
        }
888
0
      }
889
890
      /* now state.stack->(page, buffer and blkno) points to leaf page */
891
892
0
      gistinserttuple(&state, stack, giststate, itup,
893
0
              InvalidOffsetNumber);
894
0
      LockBuffer(stack->buffer, GIST_UNLOCK);
895
896
      /* Release any pins we might still hold before exiting */
897
0
      for (; stack; stack = stack->parent)
898
0
        ReleaseBuffer(stack->buffer);
899
0
      break;
900
0
    }
901
0
  }
902
0
}
903
904
/*
905
 * Traverse the tree to find path from root page to specified "child" block.
906
 *
907
 * returns a new insertion stack, starting from the parent of "child", up
908
 * to the root. *downlinkoffnum is set to the offset of the downlink in the
909
 * direct parent of child.
910
 *
911
 * To prevent deadlocks, this should lock only one page at a time.
912
 */
913
static GISTInsertStack *
914
gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
915
0
{
916
0
  Page    page;
917
0
  Buffer    buffer;
918
0
  OffsetNumber i,
919
0
        maxoff;
920
0
  ItemId    iid;
921
0
  IndexTuple  idxtuple;
922
0
  List     *fifo;
923
0
  GISTInsertStack *top,
924
0
         *ptr;
925
0
  BlockNumber blkno;
926
927
0
  top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
928
0
  top->blkno = GIST_ROOT_BLKNO;
929
0
  top->downlinkoffnum = InvalidOffsetNumber;
930
931
0
  fifo = list_make1(top);
932
0
  while (fifo != NIL)
933
0
  {
934
    /* Get next page to visit */
935
0
    top = linitial(fifo);
936
0
    fifo = list_delete_first(fifo);
937
938
0
    buffer = ReadBuffer(r, top->blkno);
939
0
    LockBuffer(buffer, GIST_SHARE);
940
0
    gistcheckpage(r, buffer);
941
0
    page = (Page) BufferGetPage(buffer);
942
943
0
    if (GistPageIsLeaf(page))
944
0
    {
945
      /*
946
       * Because we scan the index top-down, all the rest of the pages
947
       * in the queue must be leaf pages as well.
948
       */
949
0
      UnlockReleaseBuffer(buffer);
950
0
      break;
951
0
    }
952
953
    /* currently, internal pages are never deleted */
954
0
    Assert(!GistPageIsDeleted(page));
955
956
0
    top->lsn = BufferGetLSNAtomic(buffer);
957
958
    /*
959
     * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
960
     * downlink. This should not normally happen..
961
     */
962
0
    if (GistFollowRight(page))
963
0
      elog(ERROR, "concurrent GiST page split was incomplete");
964
965
0
    if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
966
0
      GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
967
0
    {
968
      /*
969
       * Page was split while we looked elsewhere. We didn't see the
970
       * downlink to the right page when we scanned the parent, so add
971
       * it to the queue now.
972
       *
973
       * Put the right page ahead of the queue, so that we visit it
974
       * next. That's important, because if this is the lowest internal
975
       * level, just above leaves, we might already have queued up some
976
       * leaf pages, and we assume that there can't be any non-leaf
977
       * pages behind leaf pages.
978
       */
979
0
      ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
980
0
      ptr->blkno = GistPageGetOpaque(page)->rightlink;
981
0
      ptr->downlinkoffnum = InvalidOffsetNumber;
982
0
      ptr->parent = top->parent;
983
984
0
      fifo = lcons(ptr, fifo);
985
0
    }
986
987
0
    maxoff = PageGetMaxOffsetNumber(page);
988
989
0
    for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
990
0
    {
991
0
      iid = PageGetItemId(page, i);
992
0
      idxtuple = (IndexTuple) PageGetItem(page, iid);
993
0
      blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
994
0
      if (blkno == child)
995
0
      {
996
        /* Found it! */
997
0
        UnlockReleaseBuffer(buffer);
998
0
        *downlinkoffnum = i;
999
0
        return top;
1000
0
      }
1001
0
      else
1002
0
      {
1003
        /* Append this child to the list of pages to visit later */
1004
0
        ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
1005
0
        ptr->blkno = blkno;
1006
0
        ptr->downlinkoffnum = i;
1007
0
        ptr->parent = top;
1008
1009
0
        fifo = lappend(fifo, ptr);
1010
0
      }
1011
0
    }
1012
1013
0
    UnlockReleaseBuffer(buffer);
1014
0
  }
1015
1016
0
  elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
1017
0
     RelationGetRelationName(r), child);
1018
0
  return NULL;       /* keep compiler quiet */
1019
0
}
1020
1021
/*
1022
 * Updates the stack so that child->parent is the correct parent of the
1023
 * child. child->parent must be exclusively locked on entry, and will
1024
 * remain so at exit, but it might not be the same page anymore.
1025
 */
1026
static void
1027
gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build)
1028
0
{
1029
0
  GISTInsertStack *parent = child->parent;
1030
0
  ItemId    iid;
1031
0
  IndexTuple  idxtuple;
1032
0
  OffsetNumber maxoff;
1033
0
  GISTInsertStack *ptr;
1034
1035
0
  gistcheckpage(r, parent->buffer);
1036
0
  parent->page = (Page) BufferGetPage(parent->buffer);
1037
0
  maxoff = PageGetMaxOffsetNumber(parent->page);
1038
1039
  /* Check if the downlink is still where it was before */
1040
0
  if (child->downlinkoffnum != InvalidOffsetNumber && child->downlinkoffnum <= maxoff)
1041
0
  {
1042
0
    iid = PageGetItemId(parent->page, child->downlinkoffnum);
1043
0
    idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1044
0
    if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1045
0
      return;       /* still there */
1046
0
  }
1047
1048
  /*
1049
   * The page has changed since we looked. During normal operation, every
1050
   * update of a page changes its LSN, so the LSN we memorized should have
1051
   * changed too.
1052
   *
1053
   * During index build, however, we don't WAL-log the changes until we have
1054
   * built the index, so the LSN doesn't change. There is no concurrent
1055
   * activity during index build, but we might have changed the parent
1056
   * ourselves.
1057
   *
1058
   * We will also get here if child->downlinkoffnum is invalid. That happens
1059
   * if 'parent' had been updated by an earlier call to this function on its
1060
   * grandchild, which had to move right.
1061
   */
1062
0
  Assert(parent->lsn != PageGetLSN(parent->page) || is_build ||
1063
0
       child->downlinkoffnum == InvalidOffsetNumber);
1064
1065
  /*
1066
   * Scan the page to re-find the downlink. If the page was split, it might
1067
   * have moved to a different page, so follow the right links until we find
1068
   * it.
1069
   */
1070
0
  while (true)
1071
0
  {
1072
0
    OffsetNumber i;
1073
1074
0
    maxoff = PageGetMaxOffsetNumber(parent->page);
1075
0
    for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1076
0
    {
1077
0
      iid = PageGetItemId(parent->page, i);
1078
0
      idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1079
0
      if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1080
0
      {
1081
        /* yes!!, found */
1082
0
        child->downlinkoffnum = i;
1083
0
        return;
1084
0
      }
1085
0
    }
1086
1087
0
    parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
1088
0
    parent->downlinkoffnum = InvalidOffsetNumber;
1089
0
    UnlockReleaseBuffer(parent->buffer);
1090
0
    if (parent->blkno == InvalidBlockNumber)
1091
0
    {
1092
      /*
1093
       * End of chain and still didn't find parent. It's a very-very
1094
       * rare situation when the root was split.
1095
       */
1096
0
      break;
1097
0
    }
1098
0
    parent->buffer = ReadBuffer(r, parent->blkno);
1099
0
    LockBuffer(parent->buffer, GIST_EXCLUSIVE);
1100
0
    gistcheckpage(r, parent->buffer);
1101
0
    parent->page = (Page) BufferGetPage(parent->buffer);
1102
0
  }
1103
1104
  /*
1105
   * awful!!, we need search tree to find parent ... , but before we should
1106
   * release all old parent
1107
   */
1108
1109
0
  ptr = child->parent->parent;  /* child->parent already released above */
1110
0
  while (ptr)
1111
0
  {
1112
0
    ReleaseBuffer(ptr->buffer);
1113
0
    ptr = ptr->parent;
1114
0
  }
1115
1116
  /* ok, find new path */
1117
0
  ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
1118
1119
  /* read all buffers as expected by caller */
1120
  /* note we don't lock them or gistcheckpage them here! */
1121
0
  while (ptr)
1122
0
  {
1123
0
    ptr->buffer = ReadBuffer(r, ptr->blkno);
1124
0
    ptr->page = (Page) BufferGetPage(ptr->buffer);
1125
0
    ptr = ptr->parent;
1126
0
  }
1127
1128
  /* install new chain of parents to stack */
1129
0
  child->parent = parent;
1130
1131
  /* make recursive call to normal processing */
1132
0
  LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
1133
0
  gistFindCorrectParent(r, child, is_build);
1134
0
}
1135
1136
/*
1137
 * Form a downlink pointer for the page in 'buf'.
1138
 */
1139
static IndexTuple
1140
gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,
1141
         GISTInsertStack *stack, bool is_build)
1142
0
{
1143
0
  Page    page = BufferGetPage(buf);
1144
0
  OffsetNumber maxoff;
1145
0
  OffsetNumber offset;
1146
0
  IndexTuple  downlink = NULL;
1147
1148
0
  maxoff = PageGetMaxOffsetNumber(page);
1149
0
  for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
1150
0
  {
1151
0
    IndexTuple  ituple = (IndexTuple)
1152
0
      PageGetItem(page, PageGetItemId(page, offset));
1153
1154
0
    if (downlink == NULL)
1155
0
      downlink = CopyIndexTuple(ituple);
1156
0
    else
1157
0
    {
1158
0
      IndexTuple  newdownlink;
1159
1160
0
      newdownlink = gistgetadjusted(rel, downlink, ituple,
1161
0
                      giststate);
1162
0
      if (newdownlink)
1163
0
        downlink = newdownlink;
1164
0
    }
1165
0
  }
1166
1167
  /*
1168
   * If the page is completely empty, we can't form a meaningful downlink
1169
   * for it. But we have to insert a downlink for the page. Any key will do,
1170
   * as long as its consistent with the downlink of parent page, so that we
1171
   * can legally insert it to the parent. A minimal one that matches as few
1172
   * scans as possible would be best, to keep scans from doing useless work,
1173
   * but we don't know how to construct that. So we just use the downlink of
1174
   * the original page that was split - that's as far from optimal as it can
1175
   * get but will do..
1176
   */
1177
0
  if (!downlink)
1178
0
  {
1179
0
    ItemId    iid;
1180
1181
0
    LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
1182
0
    gistFindCorrectParent(rel, stack, is_build);
1183
0
    iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
1184
0
    downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
1185
0
    downlink = CopyIndexTuple(downlink);
1186
0
    LockBuffer(stack->parent->buffer, GIST_UNLOCK);
1187
0
  }
1188
1189
0
  ItemPointerSetBlockNumber(&(downlink->t_tid), BufferGetBlockNumber(buf));
1190
0
  GistTupleSetValid(downlink);
1191
1192
0
  return downlink;
1193
0
}
1194
1195
1196
/*
1197
 * Complete the incomplete split of state->stack->page.
1198
 */
1199
static void
1200
gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
1201
0
{
1202
0
  GISTInsertStack *stack = state->stack;
1203
0
  Buffer    buf;
1204
0
  Page    page;
1205
0
  List     *splitinfo = NIL;
1206
1207
0
  ereport(LOG,
1208
0
      (errmsg("fixing incomplete split in index \"%s\", block %u",
1209
0
          RelationGetRelationName(state->r), stack->blkno)));
1210
1211
0
  Assert(GistFollowRight(stack->page));
1212
0
  Assert(OffsetNumberIsValid(stack->downlinkoffnum));
1213
1214
0
  buf = stack->buffer;
1215
1216
  /*
1217
   * Read the chain of split pages, following the rightlinks. Construct a
1218
   * downlink tuple for each page.
1219
   */
1220
0
  for (;;)
1221
0
  {
1222
0
    GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
1223
0
    IndexTuple  downlink;
1224
1225
0
    page = BufferGetPage(buf);
1226
1227
    /* Form the new downlink tuples to insert to parent */
1228
0
    downlink = gistformdownlink(state->r, buf, giststate, stack, state->is_build);
1229
1230
0
    si->buf = buf;
1231
0
    si->downlink = downlink;
1232
1233
0
    splitinfo = lappend(splitinfo, si);
1234
1235
0
    if (GistFollowRight(page))
1236
0
    {
1237
      /* lock next page */
1238
0
      buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
1239
0
      LockBuffer(buf, GIST_EXCLUSIVE);
1240
0
    }
1241
0
    else
1242
0
      break;
1243
0
  }
1244
1245
  /* Insert the downlinks */
1246
0
  gistfinishsplit(state, stack, giststate, splitinfo, false);
1247
0
}
1248
1249
/*
1250
 * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
1251
 * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
1252
 * 'stack' represents the path from the root to the page being updated.
1253
 *
1254
 * The caller must hold an exclusive lock on stack->buffer.  The lock is still
1255
 * held on return, but the page might not contain the inserted tuple if the
1256
 * page was split. The function returns true if the page was split, false
1257
 * otherwise.
1258
 */
1259
static bool
1260
gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
1261
        GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
1262
0
{
1263
0
  return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
1264
0
              InvalidBuffer, InvalidBuffer, false, false);
1265
0
}
1266
1267
/* ----------------
1268
 * An extended workhorse version of gistinserttuple(). This version allows
1269
 * inserting multiple tuples, or replacing a single tuple with multiple tuples.
1270
 * This is used to recursively update the downlinks in the parent when a page
1271
 * is split.
1272
 *
1273
 * If leftchild and rightchild are valid, we're inserting/replacing the
1274
 * downlink for rightchild, and leftchild is its left sibling. We clear the
1275
 * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
1276
 * insertion of the downlink.
1277
 *
1278
 * To avoid holding locks for longer than necessary, when recursing up the
1279
 * tree to update the parents, the locking is a bit peculiar here. On entry,
1280
 * the caller must hold an exclusive lock on stack->buffer, as well as
1281
 * leftchild and rightchild if given. On return:
1282
 *
1283
 *  - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
1284
 *    always kept pinned, however.
1285
 *  - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
1286
 *    is kept pinned.
1287
 *  - Lock and pin on 'rightchild' are always released.
1288
 *
1289
 * Returns 'true' if the page had to be split. Note that if the page was
1290
 * split, the inserted/updated tuples might've been inserted to a right
1291
 * sibling of stack->buffer instead of stack->buffer itself.
1292
 */
1293
static bool
1294
gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
1295
         GISTSTATE *giststate,
1296
         IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
1297
         Buffer leftchild, Buffer rightchild,
1298
         bool unlockbuf, bool unlockleftchild)
1299
0
{
1300
0
  List     *splitinfo;
1301
0
  bool    is_split;
1302
1303
  /*
1304
   * Check for any rw conflicts (in serializable isolation level) just
1305
   * before we intend to modify the page
1306
   */
1307
0
  CheckForSerializableConflictIn(state->r, NULL, BufferGetBlockNumber(stack->buffer));
1308
1309
  /* Insert the tuple(s) to the page, splitting the page if necessary */
1310
0
  is_split = gistplacetopage(state->r, state->freespace, giststate,
1311
0
                 stack->buffer,
1312
0
                 tuples, ntup,
1313
0
                 oldoffnum, NULL,
1314
0
                 leftchild,
1315
0
                 &splitinfo,
1316
0
                 true,
1317
0
                 state->heapRel,
1318
0
                 state->is_build);
1319
1320
  /*
1321
   * Before recursing up in case the page was split, release locks on the
1322
   * child pages. We don't need to keep them locked when updating the
1323
   * parent.
1324
   */
1325
0
  if (BufferIsValid(rightchild))
1326
0
    UnlockReleaseBuffer(rightchild);
1327
0
  if (BufferIsValid(leftchild) && unlockleftchild)
1328
0
    LockBuffer(leftchild, GIST_UNLOCK);
1329
1330
  /*
1331
   * If we had to split, insert/update the downlinks in the parent. If the
1332
   * caller requested us to release the lock on stack->buffer, tell
1333
   * gistfinishsplit() to do that as soon as it's safe to do so. If we
1334
   * didn't have to split, release it ourselves.
1335
   */
1336
0
  if (splitinfo)
1337
0
    gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
1338
0
  else if (unlockbuf)
1339
0
    LockBuffer(stack->buffer, GIST_UNLOCK);
1340
1341
0
  return is_split;
1342
0
}
1343
1344
/*
1345
 * Finish an incomplete split by inserting/updating the downlinks in parent
1346
 * page. 'splitinfo' contains all the child pages involved in the split,
1347
 * from left-to-right.
1348
 *
1349
 * On entry, the caller must hold a lock on stack->buffer and all the child
1350
 * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
1351
 * released on return. The child pages are always unlocked and unpinned.
1352
 */
1353
static void
1354
gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
1355
        GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
1356
0
{
1357
0
  GISTPageSplitInfo *right;
1358
0
  GISTPageSplitInfo *left;
1359
0
  IndexTuple  tuples[2];
1360
1361
  /* A split always contains at least two halves */
1362
0
  Assert(list_length(splitinfo) >= 2);
1363
1364
  /*
1365
   * We need to insert downlinks for each new page, and update the downlink
1366
   * for the original (leftmost) page in the split. Begin at the rightmost
1367
   * page, inserting one downlink at a time until there's only two pages
1368
   * left. Finally insert the downlink for the last new page and update the
1369
   * downlink for the original page as one operation.
1370
   */
1371
0
  LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
1372
1373
  /*
1374
   * Insert downlinks for the siblings from right to left, until there are
1375
   * only two siblings left.
1376
   */
1377
0
  for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)
1378
0
  {
1379
0
    right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);
1380
0
    left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);
1381
1382
0
    gistFindCorrectParent(state->r, stack, state->is_build);
1383
0
    if (gistinserttuples(state, stack->parent, giststate,
1384
0
               &right->downlink, 1,
1385
0
               InvalidOffsetNumber,
1386
0
               left->buf, right->buf, false, false))
1387
0
    {
1388
      /*
1389
       * If the parent page was split, the existing downlink might have
1390
       * moved.
1391
       */
1392
0
      stack->downlinkoffnum = InvalidOffsetNumber;
1393
0
    }
1394
    /* gistinserttuples() released the lock on right->buf. */
1395
0
  }
1396
1397
0
  right = (GISTPageSplitInfo *) lsecond(splitinfo);
1398
0
  left = (GISTPageSplitInfo *) linitial(splitinfo);
1399
1400
  /*
1401
   * Finally insert downlink for the remaining right page and update the
1402
   * downlink for the original page to not contain the tuples that were
1403
   * moved to the new pages.
1404
   */
1405
0
  tuples[0] = left->downlink;
1406
0
  tuples[1] = right->downlink;
1407
0
  gistFindCorrectParent(state->r, stack, state->is_build);
1408
0
  (void) gistinserttuples(state, stack->parent, giststate,
1409
0
              tuples, 2,
1410
0
              stack->downlinkoffnum,
1411
0
              left->buf, right->buf,
1412
0
              true, /* Unlock parent */
1413
0
              unlockbuf /* Unlock stack->buffer if caller
1414
                     * wants that */
1415
0
    );
1416
1417
  /*
1418
   * The downlink might have moved when we updated it. Even if the page
1419
   * wasn't split, because gistinserttuples() implements updating the old
1420
   * tuple by removing and re-inserting it!
1421
   */
1422
0
  stack->downlinkoffnum = InvalidOffsetNumber;
1423
1424
0
  Assert(left->buf == stack->buffer);
1425
1426
  /*
1427
   * If we split the page because we had to adjust the downlink on an
1428
   * internal page, while descending the tree for inserting a new tuple,
1429
   * then this might no longer be the correct page for the new tuple. The
1430
   * downlink to this page might not cover the new tuple anymore, it might
1431
   * need to go to the newly-created right sibling instead. Tell the caller
1432
   * to walk back up the stack, to re-check at the parent which page to
1433
   * insert to.
1434
   *
1435
   * Normally, the LSN-NSN interlock during the tree descend would also
1436
   * detect that a concurrent split happened (by ourselves), and cause us to
1437
   * retry at the parent. But that mechanism doesn't work during index
1438
   * build, because we don't do WAL-logging, and don't update LSNs, during
1439
   * index build.
1440
   */
1441
0
  stack->retry_from_parent = true;
1442
0
}
1443
1444
/*
1445
 * gistSplit -- split a page in the tree and fill struct
1446
 * used for XLOG and real writes buffers. Function is recursive, ie
1447
 * it will split page until keys will fit in every page.
1448
 */
1449
SplitPageLayout *
1450
gistSplit(Relation r,
1451
      Page page,
1452
      IndexTuple *itup,   /* contains compressed entry */
1453
      int len,
1454
      GISTSTATE *giststate)
1455
0
{
1456
0
  IndexTuple *lvectup,
1457
0
         *rvectup;
1458
0
  GistSplitVector v;
1459
0
  int     i;
1460
0
  SplitPageLayout *res = NULL;
1461
1462
  /* this should never recurse very deeply, but better safe than sorry */
1463
0
  check_stack_depth();
1464
1465
  /* there's no point in splitting an empty page */
1466
0
  Assert(len > 0);
1467
1468
  /*
1469
   * If a single tuple doesn't fit on a page, no amount of splitting will
1470
   * help.
1471
   */
1472
0
  if (len == 1)
1473
0
    ereport(ERROR,
1474
0
        (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1475
0
         errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
1476
0
            IndexTupleSize(itup[0]), GiSTPageSize,
1477
0
            RelationGetRelationName(r))));
1478
1479
0
  memset(v.spl_lisnull, true,
1480
0
       sizeof(bool) * giststate->nonLeafTupdesc->natts);
1481
0
  memset(v.spl_risnull, true,
1482
0
       sizeof(bool) * giststate->nonLeafTupdesc->natts);
1483
0
  gistSplitByKey(r, page, itup, len, giststate, &v, 0);
1484
1485
  /* form left and right vector */
1486
0
  lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1487
0
  rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1488
1489
0
  for (i = 0; i < v.splitVector.spl_nleft; i++)
1490
0
    lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
1491
1492
0
  for (i = 0; i < v.splitVector.spl_nright; i++)
1493
0
    rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
1494
1495
  /* finalize splitting (may need another split) */
1496
0
  if (!gistfitpage(rvectup, v.splitVector.spl_nright))
1497
0
  {
1498
0
    res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
1499
0
  }
1500
0
  else
1501
0
  {
1502
0
    ROTATEDIST(res);
1503
0
    res->block.num = v.splitVector.spl_nright;
1504
0
    res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
1505
0
    res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
1506
0
  }
1507
1508
0
  if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
1509
0
  {
1510
0
    SplitPageLayout *resptr,
1511
0
           *subres;
1512
1513
0
    resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
1514
1515
    /* install on list's tail */
1516
0
    while (resptr->next)
1517
0
      resptr = resptr->next;
1518
1519
0
    resptr->next = res;
1520
0
    res = subres;
1521
0
  }
1522
0
  else
1523
0
  {
1524
0
    ROTATEDIST(res);
1525
0
    res->block.num = v.splitVector.spl_nleft;
1526
0
    res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
1527
0
    res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
1528
0
  }
1529
1530
0
  return res;
1531
0
}
1532
1533
/*
1534
 * Create a GISTSTATE and fill it with information about the index
1535
 */
1536
GISTSTATE *
1537
initGISTstate(Relation index)
1538
0
{
1539
0
  GISTSTATE  *giststate;
1540
0
  MemoryContext scanCxt;
1541
0
  MemoryContext oldCxt;
1542
0
  int     i;
1543
1544
  /* safety check to protect fixed-size arrays in GISTSTATE */
1545
0
  if (index->rd_att->natts > INDEX_MAX_KEYS)
1546
0
    elog(ERROR, "numberOfAttributes %d > %d",
1547
0
       index->rd_att->natts, INDEX_MAX_KEYS);
1548
1549
  /* Create the memory context that will hold the GISTSTATE */
1550
0
  scanCxt = AllocSetContextCreate(CurrentMemoryContext,
1551
0
                  "GiST scan context",
1552
0
                  ALLOCSET_DEFAULT_SIZES);
1553
0
  oldCxt = MemoryContextSwitchTo(scanCxt);
1554
1555
  /* Create and fill in the GISTSTATE */
1556
0
  giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
1557
1558
0
  giststate->scanCxt = scanCxt;
1559
0
  giststate->tempCxt = scanCxt; /* caller must change this if needed */
1560
0
  giststate->leafTupdesc = index->rd_att;
1561
1562
  /*
1563
   * The truncated tupdesc for non-leaf index tuples, which doesn't contain
1564
   * the INCLUDE attributes.
1565
   *
1566
   * It is used to form tuples during tuple adjustment and page split.
1567
   * B-tree creates shortened tuple descriptor for every truncated tuple,
1568
   * because it is doing this less often: it does not have to form truncated
1569
   * tuples during page split.  Also, B-tree is not adjusting tuples on
1570
   * internal pages the way GiST does.
1571
   */
1572
0
  giststate->nonLeafTupdesc = CreateTupleDescTruncatedCopy(index->rd_att,
1573
0
                               IndexRelationGetNumberOfKeyAttributes(index));
1574
1575
0
  for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(index); i++)
1576
0
  {
1577
0
    fmgr_info_copy(&(giststate->consistentFn[i]),
1578
0
             index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
1579
0
             scanCxt);
1580
0
    fmgr_info_copy(&(giststate->unionFn[i]),
1581
0
             index_getprocinfo(index, i + 1, GIST_UNION_PROC),
1582
0
             scanCxt);
1583
1584
    /* opclasses are not required to provide a Compress method */
1585
0
    if (OidIsValid(index_getprocid(index, i + 1, GIST_COMPRESS_PROC)))
1586
0
      fmgr_info_copy(&(giststate->compressFn[i]),
1587
0
               index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
1588
0
               scanCxt);
1589
0
    else
1590
0
      giststate->compressFn[i].fn_oid = InvalidOid;
1591
1592
    /* opclasses are not required to provide a Decompress method */
1593
0
    if (OidIsValid(index_getprocid(index, i + 1, GIST_DECOMPRESS_PROC)))
1594
0
      fmgr_info_copy(&(giststate->decompressFn[i]),
1595
0
               index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
1596
0
               scanCxt);
1597
0
    else
1598
0
      giststate->decompressFn[i].fn_oid = InvalidOid;
1599
1600
0
    fmgr_info_copy(&(giststate->penaltyFn[i]),
1601
0
             index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
1602
0
             scanCxt);
1603
0
    fmgr_info_copy(&(giststate->picksplitFn[i]),
1604
0
             index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
1605
0
             scanCxt);
1606
0
    fmgr_info_copy(&(giststate->equalFn[i]),
1607
0
             index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
1608
0
             scanCxt);
1609
1610
    /* opclasses are not required to provide a Distance method */
1611
0
    if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
1612
0
      fmgr_info_copy(&(giststate->distanceFn[i]),
1613
0
               index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
1614
0
               scanCxt);
1615
0
    else
1616
0
      giststate->distanceFn[i].fn_oid = InvalidOid;
1617
1618
    /* opclasses are not required to provide a Fetch method */
1619
0
    if (OidIsValid(index_getprocid(index, i + 1, GIST_FETCH_PROC)))
1620
0
      fmgr_info_copy(&(giststate->fetchFn[i]),
1621
0
               index_getprocinfo(index, i + 1, GIST_FETCH_PROC),
1622
0
               scanCxt);
1623
0
    else
1624
0
      giststate->fetchFn[i].fn_oid = InvalidOid;
1625
1626
    /*
1627
     * If the index column has a specified collation, we should honor that
1628
     * while doing comparisons.  However, we may have a collatable storage
1629
     * type for a noncollatable indexed data type.  If there's no index
1630
     * collation then specify default collation in case the support
1631
     * functions need collation.  This is harmless if the support
1632
     * functions don't care about collation, so we just do it
1633
     * unconditionally.  (We could alternatively call get_typcollation,
1634
     * but that seems like expensive overkill --- there aren't going to be
1635
     * any cases where a GiST storage type has a nondefault collation.)
1636
     */
1637
0
    if (OidIsValid(index->rd_indcollation[i]))
1638
0
      giststate->supportCollation[i] = index->rd_indcollation[i];
1639
0
    else
1640
0
      giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
1641
0
  }
1642
1643
  /* No opclass information for INCLUDE attributes */
1644
0
  for (; i < index->rd_att->natts; i++)
1645
0
  {
1646
0
    giststate->consistentFn[i].fn_oid = InvalidOid;
1647
0
    giststate->unionFn[i].fn_oid = InvalidOid;
1648
0
    giststate->compressFn[i].fn_oid = InvalidOid;
1649
0
    giststate->decompressFn[i].fn_oid = InvalidOid;
1650
0
    giststate->penaltyFn[i].fn_oid = InvalidOid;
1651
0
    giststate->picksplitFn[i].fn_oid = InvalidOid;
1652
0
    giststate->equalFn[i].fn_oid = InvalidOid;
1653
0
    giststate->distanceFn[i].fn_oid = InvalidOid;
1654
0
    giststate->fetchFn[i].fn_oid = InvalidOid;
1655
0
    giststate->supportCollation[i] = InvalidOid;
1656
0
  }
1657
1658
0
  MemoryContextSwitchTo(oldCxt);
1659
1660
0
  return giststate;
1661
0
}
1662
1663
void
1664
freeGISTstate(GISTSTATE *giststate)
1665
0
{
1666
  /* It's sufficient to delete the scanCxt */
1667
0
  MemoryContextDelete(giststate->scanCxt);
1668
0
}
1669
1670
/*
1671
 * gistprunepage() -- try to remove LP_DEAD items from the given page.
1672
 * Function assumes that buffer is exclusively locked.
1673
 */
1674
static void
1675
gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
1676
0
{
1677
0
  OffsetNumber deletable[MaxIndexTuplesPerPage];
1678
0
  int     ndeletable = 0;
1679
0
  OffsetNumber offnum,
1680
0
        maxoff;
1681
1682
0
  Assert(GistPageIsLeaf(page));
1683
1684
  /*
1685
   * Scan over all items to see which ones need to be deleted according to
1686
   * LP_DEAD flags.
1687
   */
1688
0
  maxoff = PageGetMaxOffsetNumber(page);
1689
0
  for (offnum = FirstOffsetNumber;
1690
0
     offnum <= maxoff;
1691
0
     offnum = OffsetNumberNext(offnum))
1692
0
  {
1693
0
    ItemId    itemId = PageGetItemId(page, offnum);
1694
1695
0
    if (ItemIdIsDead(itemId))
1696
0
      deletable[ndeletable++] = offnum;
1697
0
  }
1698
1699
0
  if (ndeletable > 0)
1700
0
  {
1701
0
    TransactionId snapshotConflictHorizon = InvalidTransactionId;
1702
1703
0
    if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
1704
0
      snapshotConflictHorizon =
1705
0
        index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,
1706
0
                           deletable, ndeletable);
1707
1708
0
    START_CRIT_SECTION();
1709
1710
0
    PageIndexMultiDelete(page, deletable, ndeletable);
1711
1712
    /*
1713
     * Mark the page as not containing any LP_DEAD items.  This is not
1714
     * certainly true (there might be some that have recently been marked,
1715
     * but weren't included in our target-item list), but it will almost
1716
     * always be true and it doesn't seem worth an additional page scan to
1717
     * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
1718
     */
1719
0
    GistClearPageHasGarbage(page);
1720
1721
0
    MarkBufferDirty(buffer);
1722
1723
    /* XLOG stuff */
1724
0
    if (RelationNeedsWAL(rel))
1725
0
    {
1726
0
      XLogRecPtr  recptr;
1727
1728
0
      recptr = gistXLogDelete(buffer,
1729
0
                  deletable, ndeletable,
1730
0
                  snapshotConflictHorizon,
1731
0
                  heapRel);
1732
1733
0
      PageSetLSN(page, recptr);
1734
0
    }
1735
0
    else
1736
0
      PageSetLSN(page, gistGetFakeLSN(rel));
1737
1738
0
    END_CRIT_SECTION();
1739
0
  }
1740
1741
  /*
1742
   * Note: if we didn't find any LP_DEAD items, then the page's
1743
   * F_HAS_GARBAGE hint bit is falsely set.  We do not bother expending a
1744
   * separate write to clear it, however.  We will clear it when we split
1745
   * the page.
1746
   */
1747
0
}