Coverage Report

Created: 2025-08-12 06:43

/src/postgres/src/backend/access/gin/gindatapage.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * gindatapage.c
4
 *    routines for handling GIN posting tree pages.
5
 *
6
 *
7
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8
 * Portions Copyright (c) 1994, Regents of the University of California
9
 *
10
 * IDENTIFICATION
11
 *      src/backend/access/gin/gindatapage.c
12
 *-------------------------------------------------------------------------
13
 */
14
15
#include "postgres.h"
16
17
#include "access/gin_private.h"
18
#include "access/ginxlog.h"
19
#include "access/xloginsert.h"
20
#include "lib/ilist.h"
21
#include "miscadmin.h"
22
#include "storage/predicate.h"
23
#include "utils/rel.h"
24
25
/*
26
 * Min, Max and Target size of posting lists stored on leaf pages, in bytes.
27
 *
28
 * The code can deal with any size, but random access is more efficient when
29
 * a number of smaller lists are stored, rather than one big list. If a
30
 * posting list would become larger than Max size as a result of insertions,
31
 * it is split into two. If a posting list would be smaller than minimum
32
 * size, it is merged with the next posting list.
33
 */
34
0
#define GinPostingListSegmentMaxSize 384
35
0
#define GinPostingListSegmentTargetSize 256
36
0
#define GinPostingListSegmentMinSize 128
37
38
/*
39
 * At least this many items fit in a GinPostingListSegmentMaxSize-bytes
40
 * long segment. This is used when estimating how much space is required
41
 * for N items, at minimum.
42
 */
43
#define MinTuplesPerSegment ((GinPostingListSegmentMaxSize - 2) / 6)
44
45
/*
46
 * A working struct for manipulating a posting tree leaf page.
47
 */
48
typedef struct
49
{
50
  dlist_head  segments;   /* a list of leafSegmentInfos */
51
52
  /*
53
   * The following fields represent how the segments are split across pages,
54
   * if a page split is required. Filled in by leafRepackItems.
55
   */
56
  dlist_node *lastleft;   /* last segment on left page */
57
  int     lsize;      /* total size on left page */
58
  int     rsize;      /* total size on right page */
59
60
  bool    oldformat;    /* page is in pre-9.4 format on disk */
61
62
  /*
63
   * If we need WAL data representing the reconstructed leaf page, it's
64
   * stored here by computeLeafRecompressWALData.
65
   */
66
  void     *walinfo;    /* buffer start */
67
  int     walinfolen;   /* and length */
68
} disassembledLeaf;
69
70
typedef struct
71
{
72
  dlist_node  node;     /* linked list pointers */
73
74
  /*-------------
75
   * 'action' indicates the status of this in-memory segment, compared to
76
   * what's on disk. It is one of the GIN_SEGMENT_* action codes:
77
   *
78
   * UNMODIFIED no changes
79
   * DELETE   the segment is to be removed. 'seg' and 'items' are
80
   *        ignored
81
   * INSERT   this is a completely new segment
82
   * REPLACE    this replaces an existing segment with new content
83
   * ADDITEMS   like REPLACE, but no items have been removed, and we track
84
   *        in detail what items have been added to this segment, in
85
   *        'modifieditems'
86
   *-------------
87
   */
88
  char    action;
89
90
  ItemPointerData *modifieditems;
91
  uint16    nmodifieditems;
92
93
  /*
94
   * The following fields represent the items in this segment. If 'items' is
95
   * not NULL, it contains a palloc'd array of the items in this segment. If
96
   * 'seg' is not NULL, it contains the items in an already-compressed
97
   * format. It can point to an on-disk page (!modified), or a palloc'd
98
   * segment in memory. If both are set, they must represent the same items.
99
   */
100
  GinPostingList *seg;
101
  ItemPointer items;
102
  int     nitems;     /* # of items in 'items', if items != NULL */
103
} leafSegmentInfo;
104
105
static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
106
static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
107
                  GinBtreeStack *stack,
108
                  void *insertdata, BlockNumber updateblkno,
109
                  Page *newlpage, Page *newrpage);
110
111
static disassembledLeaf *disassembleLeaf(Page page);
112
static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
113
static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
114
               int nNewItems);
115
116
static void computeLeafRecompressWALData(disassembledLeaf *leaf);
117
static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
118
static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
119
                   ItemPointerData lbound, ItemPointerData rbound,
120
                   Page lpage, Page rpage);
121
122
/*
123
 * Read TIDs from leaf data page to single uncompressed array. The TIDs are
124
 * returned in ascending order.
125
 *
126
 * advancePast is a hint, indicating that the caller is only interested in
127
 * TIDs > advancePast. To return all items, use ItemPointerSetMin.
128
 *
129
 * Note: This function can still return items smaller than advancePast that
130
 * are in the same posting list as the items of interest, so the caller must
131
 * still check all the returned items. But passing it allows this function to
132
 * skip whole posting lists.
133
 */
134
ItemPointer
135
GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
136
0
{
137
0
  ItemPointer result;
138
139
0
  if (GinPageIsCompressed(page))
140
0
  {
141
0
    GinPostingList *seg = GinDataLeafPageGetPostingList(page);
142
0
    Size    len = GinDataLeafPageGetPostingListSize(page);
143
0
    Pointer   endptr = ((Pointer) seg) + len;
144
0
    GinPostingList *next;
145
146
    /* Skip to the segment containing advancePast+1 */
147
0
    if (ItemPointerIsValid(&advancePast))
148
0
    {
149
0
      next = GinNextPostingListSegment(seg);
150
0
      while ((Pointer) next < endptr &&
151
0
           ginCompareItemPointers(&next->first, &advancePast) <= 0)
152
0
      {
153
0
        seg = next;
154
0
        next = GinNextPostingListSegment(seg);
155
0
      }
156
0
      len = endptr - (Pointer) seg;
157
0
    }
158
159
0
    if (len > 0)
160
0
      result = ginPostingListDecodeAllSegments(seg, len, nitems);
161
0
    else
162
0
    {
163
0
      result = NULL;
164
0
      *nitems = 0;
165
0
    }
166
0
  }
167
0
  else
168
0
  {
169
0
    ItemPointer tmp = dataLeafPageGetUncompressed(page, nitems);
170
171
0
    result = palloc((*nitems) * sizeof(ItemPointerData));
172
0
    memcpy(result, tmp, (*nitems) * sizeof(ItemPointerData));
173
0
  }
174
175
0
  return result;
176
0
}
177
178
/*
179
 * Places all TIDs from leaf data page to bitmap.
180
 */
181
int
182
GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
183
0
{
184
0
  ItemPointer uncompressed;
185
0
  int     nitems;
186
187
0
  if (GinPageIsCompressed(page))
188
0
  {
189
0
    GinPostingList *segment = GinDataLeafPageGetPostingList(page);
190
0
    Size    len = GinDataLeafPageGetPostingListSize(page);
191
192
0
    nitems = ginPostingListDecodeAllSegmentsToTbm(segment, len, tbm);
193
0
  }
194
0
  else
195
0
  {
196
0
    uncompressed = dataLeafPageGetUncompressed(page, &nitems);
197
198
0
    if (nitems > 0)
199
0
      tbm_add_tuples(tbm, uncompressed, nitems, false);
200
0
  }
201
202
0
  return nitems;
203
0
}
204
205
/*
206
 * Get pointer to the uncompressed array of items on a pre-9.4 format
207
 * uncompressed leaf page. The number of items in the array is returned in
208
 * *nitems.
209
 */
210
static ItemPointer
211
dataLeafPageGetUncompressed(Page page, int *nitems)
212
0
{
213
0
  ItemPointer items;
214
215
0
  Assert(!GinPageIsCompressed(page));
216
217
  /*
218
   * In the old pre-9.4 page format, the whole page content is used for
219
   * uncompressed items, and the number of items is stored in 'maxoff'
220
   */
221
0
  items = (ItemPointer) GinDataPageGetData(page);
222
0
  *nitems = GinPageGetOpaque(page)->maxoff;
223
224
0
  return items;
225
0
}
226
227
/*
228
 * Check if we should follow the right link to find the item we're searching
229
 * for.
230
 *
231
 * Compares inserting item pointer with the right bound of the current page.
232
 */
233
static bool
234
dataIsMoveRight(GinBtree btree, Page page)
235
0
{
236
0
  ItemPointer iptr = GinDataPageGetRightBound(page);
237
238
0
  if (GinPageRightMost(page))
239
0
    return false;
240
241
0
  if (GinPageIsDeleted(page))
242
0
    return true;
243
244
0
  return (ginCompareItemPointers(&btree->itemptr, iptr) > 0);
245
0
}
246
247
/*
248
 * Find correct PostingItem in non-leaf page. It is assumed that this is
249
 * the correct page, and the searched value SHOULD be on the page.
250
 */
251
static BlockNumber
252
dataLocateItem(GinBtree btree, GinBtreeStack *stack)
253
0
{
254
0
  OffsetNumber low,
255
0
        high,
256
0
        maxoff;
257
0
  PostingItem *pitem = NULL;
258
0
  int     result;
259
0
  Page    page = BufferGetPage(stack->buffer);
260
261
0
  Assert(!GinPageIsLeaf(page));
262
0
  Assert(GinPageIsData(page));
263
264
0
  if (btree->fullScan)
265
0
  {
266
0
    stack->off = FirstOffsetNumber;
267
0
    stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
268
0
    return btree->getLeftMostChild(btree, page);
269
0
  }
270
271
0
  low = FirstOffsetNumber;
272
0
  maxoff = high = GinPageGetOpaque(page)->maxoff;
273
0
  Assert(high >= low);
274
275
0
  high++;
276
277
0
  while (high > low)
278
0
  {
279
0
    OffsetNumber mid = low + ((high - low) / 2);
280
281
0
    pitem = GinDataPageGetPostingItem(page, mid);
282
283
0
    if (mid == maxoff)
284
0
    {
285
      /*
286
       * Right infinity, page already correctly chosen with a help of
287
       * dataIsMoveRight
288
       */
289
0
      result = -1;
290
0
    }
291
0
    else
292
0
    {
293
0
      pitem = GinDataPageGetPostingItem(page, mid);
294
0
      result = ginCompareItemPointers(&btree->itemptr, &(pitem->key));
295
0
    }
296
297
0
    if (result == 0)
298
0
    {
299
0
      stack->off = mid;
300
0
      return PostingItemGetBlockNumber(pitem);
301
0
    }
302
0
    else if (result > 0)
303
0
      low = mid + 1;
304
0
    else
305
0
      high = mid;
306
0
  }
307
308
0
  Assert(high >= FirstOffsetNumber && high <= maxoff);
309
310
0
  stack->off = high;
311
0
  pitem = GinDataPageGetPostingItem(page, high);
312
0
  return PostingItemGetBlockNumber(pitem);
313
0
}
314
315
/*
316
 * Find link to blkno on non-leaf page, returns offset of PostingItem
317
 */
318
static OffsetNumber
319
dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
320
0
{
321
0
  OffsetNumber i,
322
0
        maxoff = GinPageGetOpaque(page)->maxoff;
323
0
  PostingItem *pitem;
324
325
0
  Assert(!GinPageIsLeaf(page));
326
0
  Assert(GinPageIsData(page));
327
328
  /* if page isn't changed, we return storedOff */
329
0
  if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
330
0
  {
331
0
    pitem = GinDataPageGetPostingItem(page, storedOff);
332
0
    if (PostingItemGetBlockNumber(pitem) == blkno)
333
0
      return storedOff;
334
335
    /*
336
     * we hope, that needed pointer goes to right. It's true if there
337
     * wasn't a deletion
338
     */
339
0
    for (i = storedOff + 1; i <= maxoff; i++)
340
0
    {
341
0
      pitem = GinDataPageGetPostingItem(page, i);
342
0
      if (PostingItemGetBlockNumber(pitem) == blkno)
343
0
        return i;
344
0
    }
345
346
0
    maxoff = storedOff - 1;
347
0
  }
348
349
  /* last chance */
350
0
  for (i = FirstOffsetNumber; i <= maxoff; i++)
351
0
  {
352
0
    pitem = GinDataPageGetPostingItem(page, i);
353
0
    if (PostingItemGetBlockNumber(pitem) == blkno)
354
0
      return i;
355
0
  }
356
357
0
  return InvalidOffsetNumber;
358
0
}
359
360
/*
361
 * Return blkno of leftmost child
362
 */
363
static BlockNumber
364
dataGetLeftMostPage(GinBtree btree, Page page)
365
0
{
366
0
  PostingItem *pitem;
367
368
0
  Assert(!GinPageIsLeaf(page));
369
0
  Assert(GinPageIsData(page));
370
0
  Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
371
372
0
  pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
373
0
  return PostingItemGetBlockNumber(pitem);
374
0
}
375
376
/*
377
 * Add PostingItem to a non-leaf page.
378
 */
379
void
380
GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
381
0
{
382
0
  OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
383
0
  char     *ptr;
384
385
0
  Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber);
386
0
  Assert(!GinPageIsLeaf(page));
387
388
0
  if (offset == InvalidOffsetNumber)
389
0
  {
390
0
    ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1);
391
0
  }
392
0
  else
393
0
  {
394
0
    ptr = (char *) GinDataPageGetPostingItem(page, offset);
395
0
    if (offset != maxoff + 1)
396
0
      memmove(ptr + sizeof(PostingItem),
397
0
          ptr,
398
0
          (maxoff - offset + 1) * sizeof(PostingItem));
399
0
  }
400
0
  memcpy(ptr, data, sizeof(PostingItem));
401
402
0
  maxoff++;
403
0
  GinPageGetOpaque(page)->maxoff = maxoff;
404
405
  /*
406
   * Also set pd_lower to the end of the posting items, to follow the
407
   * "standard" page layout, so that we can squeeze out the unused space
408
   * from full-page images.
409
   */
410
0
  GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
411
0
}
412
413
/*
414
 * Delete posting item from non-leaf page
415
 */
416
void
417
GinPageDeletePostingItem(Page page, OffsetNumber offset)
418
0
{
419
0
  OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
420
421
0
  Assert(!GinPageIsLeaf(page));
422
0
  Assert(offset >= FirstOffsetNumber && offset <= maxoff);
423
424
0
  if (offset != maxoff)
425
0
    memmove(GinDataPageGetPostingItem(page, offset),
426
0
        GinDataPageGetPostingItem(page, offset + 1),
427
0
        sizeof(PostingItem) * (maxoff - offset));
428
429
0
  maxoff--;
430
0
  GinPageGetOpaque(page)->maxoff = maxoff;
431
432
0
  GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
433
0
}
434
435
/*
436
 * Prepare to insert data on a leaf data page.
437
 *
438
 * If it will fit, return GPTP_INSERT after doing whatever setup is needed
439
 * before we enter the insertion critical section.  *ptp_workspace can be
440
 * set to pass information along to the execPlaceToPage function.
441
 *
442
 * If it won't fit, perform a page split and return two temporary page
443
 * images into *newlpage and *newrpage, with result GPTP_SPLIT.
444
 *
445
 * In neither case should the given page buffer be modified here.
446
 */
447
static GinPlaceToPageRC
448
dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
449
             void *insertdata,
450
             void **ptp_workspace,
451
             Page *newlpage, Page *newrpage)
452
0
{
453
0
  GinBtreeDataLeafInsertData *items = insertdata;
454
0
  ItemPointer newItems = &items->items[items->curitem];
455
0
  int     maxitems = items->nitem - items->curitem;
456
0
  Page    page = BufferGetPage(buf);
457
0
  int     i;
458
0
  ItemPointerData rbound;
459
0
  ItemPointerData lbound;
460
0
  bool    needsplit;
461
0
  bool    append;
462
0
  int     segsize;
463
0
  Size    freespace;
464
0
  disassembledLeaf *leaf;
465
0
  leafSegmentInfo *lastleftinfo;
466
0
  ItemPointerData maxOldItem;
467
0
  ItemPointerData remaining;
468
469
0
  rbound = *GinDataPageGetRightBound(page);
470
471
  /*
472
   * Count how many of the new items belong to this page.
473
   */
474
0
  if (!GinPageRightMost(page))
475
0
  {
476
0
    for (i = 0; i < maxitems; i++)
477
0
    {
478
0
      if (ginCompareItemPointers(&newItems[i], &rbound) > 0)
479
0
      {
480
        /*
481
         * This needs to go to some other location in the tree. (The
482
         * caller should've chosen the insert location so that at
483
         * least the first item goes here.)
484
         */
485
0
        Assert(i > 0);
486
0
        break;
487
0
      }
488
0
    }
489
0
    maxitems = i;
490
0
  }
491
492
  /* Disassemble the data on the page */
493
0
  leaf = disassembleLeaf(page);
494
495
  /*
496
   * Are we appending to the end of the page? IOW, are all the new items
497
   * larger than any of the existing items.
498
   */
499
0
  if (!dlist_is_empty(&leaf->segments))
500
0
  {
501
0
    lastleftinfo = dlist_container(leafSegmentInfo, node,
502
0
                     dlist_tail_node(&leaf->segments));
503
0
    if (!lastleftinfo->items)
504
0
      lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
505
0
                             &lastleftinfo->nitems);
506
0
    maxOldItem = lastleftinfo->items[lastleftinfo->nitems - 1];
507
0
    if (ginCompareItemPointers(&newItems[0], &maxOldItem) >= 0)
508
0
      append = true;
509
0
    else
510
0
      append = false;
511
0
  }
512
0
  else
513
0
  {
514
0
    ItemPointerSetMin(&maxOldItem);
515
0
    append = true;
516
0
  }
517
518
  /*
519
   * If we're appending to the end of the page, we will append as many items
520
   * as we can fit (after splitting), and stop when the pages becomes full.
521
   * Otherwise we have to limit the number of new items to insert, because
522
   * once we start packing we can't just stop when we run out of space,
523
   * because we must make sure that all the old items still fit.
524
   */
525
0
  if (GinPageIsCompressed(page))
526
0
    freespace = GinDataLeafPageGetFreeSpace(page);
527
0
  else
528
0
    freespace = 0;
529
0
  if (append)
530
0
  {
531
    /*
532
     * Even when appending, trying to append more items than will fit is
533
     * not completely free, because we will merge the new items and old
534
     * items into an array below. In the best case, every new item fits in
535
     * a single byte, and we can use all the free space on the old page as
536
     * well as the new page. For simplicity, ignore segment overhead etc.
537
     */
538
0
    maxitems = Min(maxitems, freespace + GinDataPageMaxDataSize);
539
0
  }
540
0
  else
541
0
  {
542
    /*
543
     * Calculate a conservative estimate of how many new items we can fit
544
     * on the two pages after splitting.
545
     *
546
     * We can use any remaining free space on the old page to store full
547
     * segments, as well as the new page. Each full-sized segment can hold
548
     * at least MinTuplesPerSegment items
549
     */
550
0
    int     nnewsegments;
551
552
0
    nnewsegments = freespace / GinPostingListSegmentMaxSize;
553
0
    nnewsegments += GinDataPageMaxDataSize / GinPostingListSegmentMaxSize;
554
0
    maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
555
0
  }
556
557
  /* Add the new items to the segment list */
558
0
  if (!addItemsToLeaf(leaf, newItems, maxitems))
559
0
  {
560
    /* all items were duplicates, we have nothing to do */
561
0
    items->curitem += maxitems;
562
563
0
    return GPTP_NO_WORK;
564
0
  }
565
566
  /*
567
   * Pack the items back to compressed segments, ready for writing to disk.
568
   */
569
0
  needsplit = leafRepackItems(leaf, &remaining);
570
571
  /*
572
   * Did all the new items fit?
573
   *
574
   * If we're appending, it's OK if they didn't. But as a sanity check,
575
   * verify that all the old items fit.
576
   */
577
0
  if (ItemPointerIsValid(&remaining))
578
0
  {
579
0
    if (!append || ItemPointerCompare(&maxOldItem, &remaining) >= 0)
580
0
      elog(ERROR, "could not split GIN page; all old items didn't fit");
581
582
    /* Count how many of the new items did fit. */
583
0
    for (i = 0; i < maxitems; i++)
584
0
    {
585
0
      if (ginCompareItemPointers(&newItems[i], &remaining) >= 0)
586
0
        break;
587
0
    }
588
0
    if (i == 0)
589
0
      elog(ERROR, "could not split GIN page; no new items fit");
590
0
    maxitems = i;
591
0
  }
592
593
0
  if (!needsplit)
594
0
  {
595
    /*
596
     * Great, all the items fit on a single page.  If needed, prepare data
597
     * for a WAL record describing the changes we'll make.
598
     */
599
0
    if (RelationNeedsWAL(btree->index) && !btree->isBuild)
600
0
      computeLeafRecompressWALData(leaf);
601
602
    /*
603
     * We're ready to enter the critical section, but
604
     * dataExecPlaceToPageLeaf will need access to the "leaf" data.
605
     */
606
0
    *ptp_workspace = leaf;
607
608
0
    if (append)
609
0
      elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)",
610
0
         maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
611
0
         items->nitem - items->curitem - maxitems);
612
0
    else
613
0
      elog(DEBUG2, "inserted %d new items to block %u; %d bytes (%d to go)",
614
0
         maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
615
0
         items->nitem - items->curitem - maxitems);
616
0
  }
617
0
  else
618
0
  {
619
    /*
620
     * Have to split.
621
     *
622
     * leafRepackItems already divided the segments between the left and
623
     * the right page. It filled the left page as full as possible, and
624
     * put the rest to the right page. When building a new index, that's
625
     * good, because the table is scanned from beginning to end and there
626
     * won't be any more insertions to the left page during the build.
627
     * This packs the index as tight as possible. But otherwise, split
628
     * 50/50, by moving segments from the left page to the right page
629
     * until they're balanced.
630
     *
631
     * As a further heuristic, when appending items to the end of the
632
     * page, try to make the left page 75% full, on the assumption that
633
     * subsequent insertions will probably also go to the end. This packs
634
     * the index somewhat tighter when appending to a table, which is very
635
     * common.
636
     */
637
0
    if (!btree->isBuild)
638
0
    {
639
0
      while (dlist_has_prev(&leaf->segments, leaf->lastleft))
640
0
      {
641
0
        lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
642
643
        /* ignore deleted segments */
644
0
        if (lastleftinfo->action != GIN_SEGMENT_DELETE)
645
0
        {
646
0
          segsize = SizeOfGinPostingList(lastleftinfo->seg);
647
648
          /*
649
           * Note that we check that the right page doesn't become
650
           * more full than the left page even when appending. It's
651
           * possible that we added enough items to make both pages
652
           * more than 75% full.
653
           */
654
0
          if ((leaf->lsize - segsize) - (leaf->rsize + segsize) < 0)
655
0
            break;
656
0
          if (append)
657
0
          {
658
0
            if ((leaf->lsize - segsize) < (BLCKSZ * 3) / 4)
659
0
              break;
660
0
          }
661
662
0
          leaf->lsize -= segsize;
663
0
          leaf->rsize += segsize;
664
0
        }
665
0
        leaf->lastleft = dlist_prev_node(&leaf->segments, leaf->lastleft);
666
0
      }
667
0
    }
668
0
    Assert(leaf->lsize <= GinDataPageMaxDataSize);
669
0
    Assert(leaf->rsize <= GinDataPageMaxDataSize);
670
671
    /*
672
     * Fetch the max item in the left page's last segment; it becomes the
673
     * right bound of the page.
674
     */
675
0
    lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
676
0
    if (!lastleftinfo->items)
677
0
      lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
678
0
                             &lastleftinfo->nitems);
679
0
    lbound = lastleftinfo->items[lastleftinfo->nitems - 1];
680
681
    /*
682
     * Now allocate a couple of temporary page images, and fill them.
683
     */
684
0
    *newlpage = palloc(BLCKSZ);
685
0
    *newrpage = palloc(BLCKSZ);
686
687
0
    dataPlaceToPageLeafSplit(leaf, lbound, rbound,
688
0
                 *newlpage, *newrpage);
689
690
0
    Assert(GinPageRightMost(page) ||
691
0
         ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
692
0
                    GinDataPageGetRightBound(*newrpage)) < 0);
693
694
0
    if (append)
695
0
      elog(DEBUG2, "appended %d items to block %u; split %d/%d (%d to go)",
696
0
         maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
697
0
         items->nitem - items->curitem - maxitems);
698
0
    else
699
0
      elog(DEBUG2, "inserted %d items to block %u; split %d/%d (%d to go)",
700
0
         maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
701
0
         items->nitem - items->curitem - maxitems);
702
0
  }
703
704
0
  items->curitem += maxitems;
705
706
0
  return needsplit ? GPTP_SPLIT : GPTP_INSERT;
707
0
}
708
709
/*
710
 * Perform data insertion after beginPlaceToPage has decided it will fit.
711
 *
712
 * This is invoked within a critical section, and XLOG record creation (if
713
 * needed) is already started.  The target buffer is registered in slot 0.
714
 */
715
static void
716
dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
717
            void *insertdata, void *ptp_workspace)
718
0
{
719
0
  disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace;
720
721
  /* Apply changes to page */
722
0
  dataPlaceToPageLeafRecompress(buf, leaf);
723
724
0
  MarkBufferDirty(buf);
725
726
  /* If needed, register WAL data built by computeLeafRecompressWALData */
727
0
  if (RelationNeedsWAL(btree->index) && !btree->isBuild)
728
0
  {
729
0
    XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
730
0
    XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
731
0
  }
732
0
}
733
734
/*
735
 * Vacuum a posting tree leaf page.
736
 */
737
void
738
ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
739
0
{
740
0
  Page    page = BufferGetPage(buffer);
741
0
  disassembledLeaf *leaf;
742
0
  bool    removedsomething = false;
743
0
  dlist_iter  iter;
744
745
0
  leaf = disassembleLeaf(page);
746
747
  /* Vacuum each segment. */
748
0
  dlist_foreach(iter, &leaf->segments)
749
0
  {
750
0
    leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
751
0
    int     oldsegsize;
752
0
    ItemPointer cleaned;
753
0
    int     ncleaned;
754
755
0
    if (!seginfo->items)
756
0
      seginfo->items = ginPostingListDecode(seginfo->seg,
757
0
                          &seginfo->nitems);
758
0
    if (seginfo->seg)
759
0
      oldsegsize = SizeOfGinPostingList(seginfo->seg);
760
0
    else
761
0
      oldsegsize = GinDataPageMaxDataSize;
762
763
0
    cleaned = ginVacuumItemPointers(gvs,
764
0
                    seginfo->items,
765
0
                    seginfo->nitems,
766
0
                    &ncleaned);
767
0
    pfree(seginfo->items);
768
0
    seginfo->items = NULL;
769
0
    seginfo->nitems = 0;
770
0
    if (cleaned)
771
0
    {
772
0
      if (ncleaned > 0)
773
0
      {
774
0
        int     npacked;
775
776
0
        seginfo->seg = ginCompressPostingList(cleaned,
777
0
                            ncleaned,
778
0
                            oldsegsize,
779
0
                            &npacked);
780
        /* Removing an item never increases the size of the segment */
781
0
        if (npacked != ncleaned)
782
0
          elog(ERROR, "could not fit vacuumed posting list");
783
0
        seginfo->action = GIN_SEGMENT_REPLACE;
784
0
      }
785
0
      else
786
0
      {
787
0
        seginfo->seg = NULL;
788
0
        seginfo->items = NULL;
789
0
        seginfo->action = GIN_SEGMENT_DELETE;
790
0
      }
791
0
      seginfo->nitems = ncleaned;
792
793
0
      removedsomething = true;
794
0
    }
795
0
  }
796
797
  /*
798
   * If we removed any items, reconstruct the page from the pieces.
799
   *
800
   * We don't try to re-encode the segments here, even though some of them
801
   * might be really small now that we've removed some items from them. It
802
   * seems like a waste of effort, as there isn't really any benefit from
803
   * larger segments per se; larger segments only help to pack more items in
804
   * the same space. We might as well delay doing that until the next
805
   * insertion, which will need to re-encode at least part of the page
806
   * anyway.
807
   *
808
   * Also note if the page was in uncompressed, pre-9.4 format before, it is
809
   * now represented as one huge segment that contains all the items. It
810
   * might make sense to split that, to speed up random access, but we don't
811
   * bother. You'll have to REINDEX anyway if you want the full gain of the
812
   * new tighter index format.
813
   */
814
0
  if (removedsomething)
815
0
  {
816
0
    bool    modified;
817
818
    /*
819
     * Make sure we have a palloc'd copy of all segments, after the first
820
     * segment that is modified. (dataPlaceToPageLeafRecompress requires
821
     * this).
822
     */
823
0
    modified = false;
824
0
    dlist_foreach(iter, &leaf->segments)
825
0
    {
826
0
      leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
827
0
                             iter.cur);
828
829
0
      if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
830
0
        modified = true;
831
0
      if (modified && seginfo->action != GIN_SEGMENT_DELETE)
832
0
      {
833
0
        int     segsize = SizeOfGinPostingList(seginfo->seg);
834
0
        GinPostingList *tmp = (GinPostingList *) palloc(segsize);
835
836
0
        memcpy(tmp, seginfo->seg, segsize);
837
0
        seginfo->seg = tmp;
838
0
      }
839
0
    }
840
841
0
    if (RelationNeedsWAL(indexrel))
842
0
      computeLeafRecompressWALData(leaf);
843
844
    /* Apply changes to page */
845
0
    START_CRIT_SECTION();
846
847
0
    dataPlaceToPageLeafRecompress(buffer, leaf);
848
849
0
    MarkBufferDirty(buffer);
850
851
0
    if (RelationNeedsWAL(indexrel))
852
0
    {
853
0
      XLogRecPtr  recptr;
854
855
0
      XLogBeginInsert();
856
0
      XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
857
0
      XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
858
0
      recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
859
0
      PageSetLSN(page, recptr);
860
0
    }
861
862
0
    END_CRIT_SECTION();
863
0
  }
864
0
}
865
866
/*
867
 * Construct a ginxlogRecompressDataLeaf record representing the changes
868
 * in *leaf.  (Because this requires a palloc, we have to do it before
869
 * we enter the critical section that actually updates the page.)
870
 */
871
static void
872
computeLeafRecompressWALData(disassembledLeaf *leaf)
873
0
{
874
0
  int     nmodified = 0;
875
0
  char     *walbufbegin;
876
0
  char     *walbufend;
877
0
  dlist_iter  iter;
878
0
  int     segno;
879
0
  ginxlogRecompressDataLeaf *recompress_xlog;
880
881
  /* Count the modified segments */
882
0
  dlist_foreach(iter, &leaf->segments)
883
0
  {
884
0
    leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
885
0
                           iter.cur);
886
887
0
    if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
888
0
      nmodified++;
889
0
  }
890
891
0
  walbufbegin =
892
0
    palloc(sizeof(ginxlogRecompressDataLeaf) +
893
0
         BLCKSZ +      /* max size needed to hold the segment data */
894
0
         nmodified * 2  /* (segno + action) per action */
895
0
    );
896
0
  walbufend = walbufbegin;
897
898
0
  recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
899
0
  walbufend += sizeof(ginxlogRecompressDataLeaf);
900
901
0
  recompress_xlog->nactions = nmodified;
902
903
0
  segno = 0;
904
0
  dlist_foreach(iter, &leaf->segments)
905
0
  {
906
0
    leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
907
0
                           iter.cur);
908
0
    int     segsize = 0;
909
0
    int     datalen;
910
0
    uint8   action = seginfo->action;
911
912
0
    if (action == GIN_SEGMENT_UNMODIFIED)
913
0
    {
914
0
      segno++;
915
0
      continue;
916
0
    }
917
918
0
    if (action != GIN_SEGMENT_DELETE)
919
0
      segsize = SizeOfGinPostingList(seginfo->seg);
920
921
    /*
922
     * If storing the uncompressed list of added item pointers would take
923
     * more space than storing the compressed segment as is, do that
924
     * instead.
925
     */
926
0
    if (action == GIN_SEGMENT_ADDITEMS &&
927
0
      seginfo->nmodifieditems * sizeof(ItemPointerData) > segsize)
928
0
    {
929
0
      action = GIN_SEGMENT_REPLACE;
930
0
    }
931
932
0
    *((uint8 *) (walbufend++)) = segno;
933
0
    *(walbufend++) = action;
934
935
0
    switch (action)
936
0
    {
937
0
      case GIN_SEGMENT_DELETE:
938
0
        datalen = 0;
939
0
        break;
940
941
0
      case GIN_SEGMENT_ADDITEMS:
942
0
        datalen = seginfo->nmodifieditems * sizeof(ItemPointerData);
943
0
        memcpy(walbufend, &seginfo->nmodifieditems, sizeof(uint16));
944
0
        memcpy(walbufend + sizeof(uint16), seginfo->modifieditems, datalen);
945
0
        datalen += sizeof(uint16);
946
0
        break;
947
948
0
      case GIN_SEGMENT_INSERT:
949
0
      case GIN_SEGMENT_REPLACE:
950
0
        datalen = SHORTALIGN(segsize);
951
0
        memcpy(walbufend, seginfo->seg, segsize);
952
0
        break;
953
954
0
      default:
955
0
        elog(ERROR, "unexpected GIN leaf action %d", action);
956
0
    }
957
0
    walbufend += datalen;
958
959
0
    if (action != GIN_SEGMENT_INSERT)
960
0
      segno++;
961
0
  }
962
963
  /* Pass back the constructed info via *leaf */
964
0
  leaf->walinfo = walbufbegin;
965
0
  leaf->walinfolen = walbufend - walbufbegin;
966
0
}
967
968
/*
969
 * Assemble a disassembled posting tree leaf page back to a buffer.
970
 *
971
 * This just updates the target buffer; WAL stuff is caller's responsibility.
972
 *
973
 * NOTE: The segment pointers must not point directly to the same buffer,
974
 * except for segments that have not been modified and whose preceding
975
 * segments have not been modified either.
976
 */
977
static void
978
dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
979
0
{
980
0
  Page    page = BufferGetPage(buf);
981
0
  char     *ptr;
982
0
  int     newsize;
983
0
  bool    modified = false;
984
0
  dlist_iter  iter;
985
0
  int     segsize;
986
987
  /*
988
   * If the page was in pre-9.4 format before, convert the header, and force
989
   * all segments to be copied to the page whether they were modified or
990
   * not.
991
   */
992
0
  if (!GinPageIsCompressed(page))
993
0
  {
994
0
    Assert(leaf->oldformat);
995
0
    GinPageSetCompressed(page);
996
0
    GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
997
0
    modified = true;
998
0
  }
999
1000
0
  ptr = (char *) GinDataLeafPageGetPostingList(page);
1001
0
  newsize = 0;
1002
0
  dlist_foreach(iter, &leaf->segments)
1003
0
  {
1004
0
    leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
1005
1006
0
    if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
1007
0
      modified = true;
1008
1009
0
    if (seginfo->action != GIN_SEGMENT_DELETE)
1010
0
    {
1011
0
      segsize = SizeOfGinPostingList(seginfo->seg);
1012
1013
0
      if (modified)
1014
0
        memcpy(ptr, seginfo->seg, segsize);
1015
1016
0
      ptr += segsize;
1017
0
      newsize += segsize;
1018
0
    }
1019
0
  }
1020
1021
0
  Assert(newsize <= GinDataPageMaxDataSize);
1022
0
  GinDataPageSetDataSize(page, newsize);
1023
0
}
1024
1025
/*
1026
 * Like dataPlaceToPageLeafRecompress, but writes the disassembled leaf
1027
 * segments to two pages instead of one.
1028
 *
1029
 * This is different from the non-split cases in that this does not modify
1030
 * the original page directly, but writes to temporary in-memory copies of
1031
 * the new left and right pages.
1032
 */
1033
static void
1034
dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
1035
             ItemPointerData lbound, ItemPointerData rbound,
1036
             Page lpage, Page rpage)
1037
0
{
1038
0
  char     *ptr;
1039
0
  int     segsize;
1040
0
  int     lsize;
1041
0
  int     rsize;
1042
0
  dlist_node *node;
1043
0
  dlist_node *firstright;
1044
0
  leafSegmentInfo *seginfo;
1045
1046
  /* Initialize temporary pages to hold the new left and right pages */
1047
0
  GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
1048
0
  GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
1049
1050
  /*
1051
   * Copy the segments that go to the left page.
1052
   *
1053
   * XXX: We should skip copying the unmodified part of the left page, like
1054
   * we do when recompressing.
1055
   */
1056
0
  lsize = 0;
1057
0
  ptr = (char *) GinDataLeafPageGetPostingList(lpage);
1058
0
  firstright = dlist_next_node(&leaf->segments, leaf->lastleft);
1059
0
  for (node = dlist_head_node(&leaf->segments);
1060
0
     node != firstright;
1061
0
     node = dlist_next_node(&leaf->segments, node))
1062
0
  {
1063
0
    seginfo = dlist_container(leafSegmentInfo, node, node);
1064
1065
0
    if (seginfo->action != GIN_SEGMENT_DELETE)
1066
0
    {
1067
0
      segsize = SizeOfGinPostingList(seginfo->seg);
1068
0
      memcpy(ptr, seginfo->seg, segsize);
1069
0
      ptr += segsize;
1070
0
      lsize += segsize;
1071
0
    }
1072
0
  }
1073
0
  Assert(lsize == leaf->lsize);
1074
0
  GinDataPageSetDataSize(lpage, lsize);
1075
0
  *GinDataPageGetRightBound(lpage) = lbound;
1076
1077
  /* Copy the segments that go to the right page */
1078
0
  ptr = (char *) GinDataLeafPageGetPostingList(rpage);
1079
0
  rsize = 0;
1080
0
  for (node = firstright;
1081
0
     ;
1082
0
     node = dlist_next_node(&leaf->segments, node))
1083
0
  {
1084
0
    seginfo = dlist_container(leafSegmentInfo, node, node);
1085
1086
0
    if (seginfo->action != GIN_SEGMENT_DELETE)
1087
0
    {
1088
0
      segsize = SizeOfGinPostingList(seginfo->seg);
1089
0
      memcpy(ptr, seginfo->seg, segsize);
1090
0
      ptr += segsize;
1091
0
      rsize += segsize;
1092
0
    }
1093
1094
0
    if (!dlist_has_next(&leaf->segments, node))
1095
0
      break;
1096
0
  }
1097
0
  Assert(rsize == leaf->rsize);
1098
0
  GinDataPageSetDataSize(rpage, rsize);
1099
0
  *GinDataPageGetRightBound(rpage) = rbound;
1100
0
}
1101
1102
/*
1103
 * Prepare to insert data on an internal data page.
1104
 *
1105
 * If it will fit, return GPTP_INSERT after doing whatever setup is needed
1106
 * before we enter the insertion critical section.  *ptp_workspace can be
1107
 * set to pass information along to the execPlaceToPage function.
1108
 *
1109
 * If it won't fit, perform a page split and return two temporary page
1110
 * images into *newlpage and *newrpage, with result GPTP_SPLIT.
1111
 *
1112
 * In neither case should the given page buffer be modified here.
1113
 *
1114
 * Note: on insertion to an internal node, in addition to inserting the given
1115
 * item, the downlink of the existing item at stack->off will be updated to
1116
 * point to updateblkno.
1117
 */
1118
static GinPlaceToPageRC
1119
dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1120
               void *insertdata, BlockNumber updateblkno,
1121
               void **ptp_workspace,
1122
               Page *newlpage, Page *newrpage)
1123
0
{
1124
0
  Page    page = BufferGetPage(buf);
1125
1126
  /* If it doesn't fit, deal with split case */
1127
0
  if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
1128
0
  {
1129
0
    dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
1130
0
                newlpage, newrpage);
1131
0
    return GPTP_SPLIT;
1132
0
  }
1133
1134
  /* Else, we're ready to proceed with insertion */
1135
0
  return GPTP_INSERT;
1136
0
}
1137
1138
/*
1139
 * Perform data insertion after beginPlaceToPage has decided it will fit.
1140
 *
1141
 * This is invoked within a critical section, and XLOG record creation (if
1142
 * needed) is already started.  The target buffer is registered in slot 0.
1143
 */
1144
static void
1145
dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1146
              void *insertdata, BlockNumber updateblkno,
1147
              void *ptp_workspace)
1148
0
{
1149
0
  Page    page = BufferGetPage(buf);
1150
0
  OffsetNumber off = stack->off;
1151
0
  PostingItem *pitem;
1152
1153
  /* Update existing downlink to point to next page (on internal page) */
1154
0
  pitem = GinDataPageGetPostingItem(page, off);
1155
0
  PostingItemSetBlockNumber(pitem, updateblkno);
1156
1157
  /* Add new item */
1158
0
  pitem = (PostingItem *) insertdata;
1159
0
  GinDataPageAddPostingItem(page, pitem, off);
1160
1161
0
  MarkBufferDirty(buf);
1162
1163
0
  if (RelationNeedsWAL(btree->index) && !btree->isBuild)
1164
0
  {
1165
    /*
1166
     * This must be static, because it has to survive until XLogInsert,
1167
     * and we can't palloc here.  Ugly, but the XLogInsert infrastructure
1168
     * isn't reentrant anyway.
1169
     */
1170
0
    static ginxlogInsertDataInternal data;
1171
1172
0
    data.offset = off;
1173
0
    data.newitem = *pitem;
1174
1175
0
    XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
1176
0
    XLogRegisterBufData(0, &data,
1177
0
              sizeof(ginxlogInsertDataInternal));
1178
0
  }
1179
0
}
1180
1181
/*
1182
 * Prepare to insert data on a posting-tree data page.
1183
 *
1184
 * If it will fit, return GPTP_INSERT after doing whatever setup is needed
1185
 * before we enter the insertion critical section.  *ptp_workspace can be
1186
 * set to pass information along to the execPlaceToPage function.
1187
 *
1188
 * If it won't fit, perform a page split and return two temporary page
1189
 * images into *newlpage and *newrpage, with result GPTP_SPLIT.
1190
 *
1191
 * In neither case should the given page buffer be modified here.
1192
 *
1193
 * Note: on insertion to an internal node, in addition to inserting the given
1194
 * item, the downlink of the existing item at stack->off will be updated to
1195
 * point to updateblkno.
1196
 *
1197
 * Calls relevant function for internal or leaf page because they are handled
1198
 * very differently.
1199
 */
1200
static GinPlaceToPageRC
1201
dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1202
           void *insertdata, BlockNumber updateblkno,
1203
           void **ptp_workspace,
1204
           Page *newlpage, Page *newrpage)
1205
0
{
1206
0
  Page    page = BufferGetPage(buf);
1207
1208
0
  Assert(GinPageIsData(page));
1209
1210
0
  if (GinPageIsLeaf(page))
1211
0
    return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata,
1212
0
                    ptp_workspace,
1213
0
                    newlpage, newrpage);
1214
0
  else
1215
0
    return dataBeginPlaceToPageInternal(btree, buf, stack,
1216
0
                      insertdata, updateblkno,
1217
0
                      ptp_workspace,
1218
0
                      newlpage, newrpage);
1219
0
}
1220
1221
/*
1222
 * Perform data insertion after beginPlaceToPage has decided it will fit.
1223
 *
1224
 * This is invoked within a critical section, and XLOG record creation (if
1225
 * needed) is already started.  The target buffer is registered in slot 0.
1226
 *
1227
 * Calls relevant function for internal or leaf page because they are handled
1228
 * very differently.
1229
 */
1230
static void
1231
dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1232
          void *insertdata, BlockNumber updateblkno,
1233
          void *ptp_workspace)
1234
0
{
1235
0
  Page    page = BufferGetPage(buf);
1236
1237
0
  if (GinPageIsLeaf(page))
1238
0
    dataExecPlaceToPageLeaf(btree, buf, stack, insertdata,
1239
0
                ptp_workspace);
1240
0
  else
1241
0
    dataExecPlaceToPageInternal(btree, buf, stack, insertdata,
1242
0
                  updateblkno, ptp_workspace);
1243
0
}
1244
1245
/*
1246
 * Split internal page and insert new data.
1247
 *
1248
 * Returns new temp pages to *newlpage and *newrpage.
1249
 * The original buffer is left untouched.
1250
 */
1251
static void
1252
dataSplitPageInternal(GinBtree btree, Buffer origbuf,
1253
            GinBtreeStack *stack,
1254
            void *insertdata, BlockNumber updateblkno,
1255
            Page *newlpage, Page *newrpage)
1256
0
{
1257
0
  Page    oldpage = BufferGetPage(origbuf);
1258
0
  OffsetNumber off = stack->off;
1259
0
  int     nitems = GinPageGetOpaque(oldpage)->maxoff;
1260
0
  int     nleftitems;
1261
0
  int     nrightitems;
1262
0
  Size    pageSize = PageGetPageSize(oldpage);
1263
0
  ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
1264
0
  ItemPointer bound;
1265
0
  Page    lpage;
1266
0
  Page    rpage;
1267
0
  OffsetNumber separator;
1268
0
  PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
1269
1270
0
  lpage = PageGetTempPage(oldpage);
1271
0
  rpage = PageGetTempPage(oldpage);
1272
0
  GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
1273
0
  GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
1274
1275
  /*
1276
   * First construct a new list of PostingItems, which includes all the old
1277
   * items, and the new item.
1278
   */
1279
0
  memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
1280
0
       (off - 1) * sizeof(PostingItem));
1281
1282
0
  allitems[off - 1] = *((PostingItem *) insertdata);
1283
0
  memcpy(&allitems[off], GinDataPageGetPostingItem(oldpage, off),
1284
0
       (nitems - (off - 1)) * sizeof(PostingItem));
1285
0
  nitems++;
1286
1287
  /* Update existing downlink to point to next page */
1288
0
  PostingItemSetBlockNumber(&allitems[off], updateblkno);
1289
1290
  /*
1291
   * When creating a new index, fit as many tuples as possible on the left
1292
   * page, on the assumption that the table is scanned from beginning to
1293
   * end. This packs the index as tight as possible.
1294
   */
1295
0
  if (btree->isBuild && GinPageRightMost(oldpage))
1296
0
    separator = GinNonLeafDataPageGetFreeSpace(rpage) / sizeof(PostingItem);
1297
0
  else
1298
0
    separator = nitems / 2;
1299
0
  nleftitems = separator;
1300
0
  nrightitems = nitems - separator;
1301
1302
0
  memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
1303
0
       allitems,
1304
0
       nleftitems * sizeof(PostingItem));
1305
0
  GinPageGetOpaque(lpage)->maxoff = nleftitems;
1306
0
  memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
1307
0
       &allitems[separator],
1308
0
       nrightitems * sizeof(PostingItem));
1309
0
  GinPageGetOpaque(rpage)->maxoff = nrightitems;
1310
1311
  /*
1312
   * Also set pd_lower for both pages, like GinDataPageAddPostingItem does.
1313
   */
1314
0
  GinDataPageSetDataSize(lpage, nleftitems * sizeof(PostingItem));
1315
0
  GinDataPageSetDataSize(rpage, nrightitems * sizeof(PostingItem));
1316
1317
  /* set up right bound for left page */
1318
0
  bound = GinDataPageGetRightBound(lpage);
1319
0
  *bound = GinDataPageGetPostingItem(lpage, nleftitems)->key;
1320
1321
  /* set up right bound for right page */
1322
0
  *GinDataPageGetRightBound(rpage) = oldbound;
1323
1324
  /* return temp pages to caller */
1325
0
  *newlpage = lpage;
1326
0
  *newrpage = rpage;
1327
0
}
1328
1329
/*
1330
 * Construct insertion payload for inserting the downlink for given buffer.
1331
 */
1332
static void *
1333
dataPrepareDownlink(GinBtree btree, Buffer lbuf)
1334
0
{
1335
0
  PostingItem *pitem = palloc(sizeof(PostingItem));
1336
0
  Page    lpage = BufferGetPage(lbuf);
1337
1338
0
  PostingItemSetBlockNumber(pitem, BufferGetBlockNumber(lbuf));
1339
0
  pitem->key = *GinDataPageGetRightBound(lpage);
1340
1341
0
  return pitem;
1342
0
}
1343
1344
/*
1345
 * Fills new root by right bound values from child.
1346
 * Also called from ginxlog, should not use btree
1347
 */
1348
void
1349
ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
1350
0
{
1351
0
  PostingItem li,
1352
0
        ri;
1353
1354
0
  li.key = *GinDataPageGetRightBound(lpage);
1355
0
  PostingItemSetBlockNumber(&li, lblkno);
1356
0
  GinDataPageAddPostingItem(root, &li, InvalidOffsetNumber);
1357
1358
0
  ri.key = *GinDataPageGetRightBound(rpage);
1359
0
  PostingItemSetBlockNumber(&ri, rblkno);
1360
0
  GinDataPageAddPostingItem(root, &ri, InvalidOffsetNumber);
1361
0
}
1362
1363
1364
/*** Functions to work with disassembled leaf pages ***/
1365
1366
/*
1367
 * Disassemble page into a disassembledLeaf struct.
1368
 */
1369
static disassembledLeaf *
1370
disassembleLeaf(Page page)
1371
0
{
1372
0
  disassembledLeaf *leaf;
1373
0
  GinPostingList *seg;
1374
0
  Pointer   segbegin;
1375
0
  Pointer   segend;
1376
1377
0
  leaf = palloc0(sizeof(disassembledLeaf));
1378
0
  dlist_init(&leaf->segments);
1379
1380
0
  if (GinPageIsCompressed(page))
1381
0
  {
1382
    /*
1383
     * Create a leafSegmentInfo entry for each segment.
1384
     */
1385
0
    seg = GinDataLeafPageGetPostingList(page);
1386
0
    segbegin = (Pointer) seg;
1387
0
    segend = segbegin + GinDataLeafPageGetPostingListSize(page);
1388
0
    while ((Pointer) seg < segend)
1389
0
    {
1390
0
      leafSegmentInfo *seginfo = palloc(sizeof(leafSegmentInfo));
1391
1392
0
      seginfo->action = GIN_SEGMENT_UNMODIFIED;
1393
0
      seginfo->seg = seg;
1394
0
      seginfo->items = NULL;
1395
0
      seginfo->nitems = 0;
1396
0
      dlist_push_tail(&leaf->segments, &seginfo->node);
1397
1398
0
      seg = GinNextPostingListSegment(seg);
1399
0
    }
1400
0
    leaf->oldformat = false;
1401
0
  }
1402
0
  else
1403
0
  {
1404
    /*
1405
     * A pre-9.4 format uncompressed page is represented by a single
1406
     * segment, with an array of items.  The corner case is uncompressed
1407
     * page containing no items, which is represented as no segments.
1408
     */
1409
0
    ItemPointer uncompressed;
1410
0
    int     nuncompressed;
1411
0
    leafSegmentInfo *seginfo;
1412
1413
0
    uncompressed = dataLeafPageGetUncompressed(page, &nuncompressed);
1414
1415
0
    if (nuncompressed > 0)
1416
0
    {
1417
0
      seginfo = palloc(sizeof(leafSegmentInfo));
1418
1419
0
      seginfo->action = GIN_SEGMENT_REPLACE;
1420
0
      seginfo->seg = NULL;
1421
0
      seginfo->items = palloc(nuncompressed * sizeof(ItemPointerData));
1422
0
      memcpy(seginfo->items, uncompressed, nuncompressed * sizeof(ItemPointerData));
1423
0
      seginfo->nitems = nuncompressed;
1424
1425
0
      dlist_push_tail(&leaf->segments, &seginfo->node);
1426
0
    }
1427
1428
0
    leaf->oldformat = true;
1429
0
  }
1430
1431
0
  return leaf;
1432
0
}
1433
1434
/*
1435
 * Distribute newItems to the segments.
1436
 *
1437
 * Any segments that acquire new items are decoded, and the new items are
1438
 * merged with the old items.
1439
 *
1440
 * Returns true if any new items were added. False means they were all
1441
 * duplicates of existing items on the page.
1442
 */
1443
static bool
1444
addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
1445
0
{
1446
0
  dlist_iter  iter;
1447
0
  ItemPointer nextnew = newItems;
1448
0
  int     newleft = nNewItems;
1449
0
  bool    modified = false;
1450
0
  leafSegmentInfo *newseg;
1451
1452
  /*
1453
   * If the page is completely empty, just construct one new segment to hold
1454
   * all the new items.
1455
   */
1456
0
  if (dlist_is_empty(&leaf->segments))
1457
0
  {
1458
0
    newseg = palloc(sizeof(leafSegmentInfo));
1459
0
    newseg->seg = NULL;
1460
0
    newseg->items = newItems;
1461
0
    newseg->nitems = nNewItems;
1462
0
    newseg->action = GIN_SEGMENT_INSERT;
1463
0
    dlist_push_tail(&leaf->segments, &newseg->node);
1464
0
    return true;
1465
0
  }
1466
1467
0
  dlist_foreach(iter, &leaf->segments)
1468
0
  {
1469
0
    leafSegmentInfo *cur = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node, iter.cur);
1470
0
    int     nthis;
1471
0
    ItemPointer tmpitems;
1472
0
    int     ntmpitems;
1473
1474
    /*
1475
     * How many of the new items fall into this segment?
1476
     */
1477
0
    if (!dlist_has_next(&leaf->segments, iter.cur))
1478
0
      nthis = newleft;
1479
0
    else
1480
0
    {
1481
0
      leafSegmentInfo *next;
1482
0
      ItemPointerData next_first;
1483
1484
0
      next = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node,
1485
0
                             dlist_next_node(&leaf->segments, iter.cur));
1486
0
      if (next->items)
1487
0
        next_first = next->items[0];
1488
0
      else
1489
0
      {
1490
0
        Assert(next->seg != NULL);
1491
0
        next_first = next->seg->first;
1492
0
      }
1493
1494
0
      nthis = 0;
1495
0
      while (nthis < newleft && ginCompareItemPointers(&nextnew[nthis], &next_first) < 0)
1496
0
        nthis++;
1497
0
    }
1498
0
    if (nthis == 0)
1499
0
      continue;
1500
1501
    /* Merge the new items with the existing items. */
1502
0
    if (!cur->items)
1503
0
      cur->items = ginPostingListDecode(cur->seg, &cur->nitems);
1504
1505
    /*
1506
     * Fast path for the important special case that we're appending to
1507
     * the end of the page: don't let the last segment on the page grow
1508
     * larger than the target, create a new segment before that happens.
1509
     */
1510
0
    if (!dlist_has_next(&leaf->segments, iter.cur) &&
1511
0
      ginCompareItemPointers(&cur->items[cur->nitems - 1], &nextnew[0]) < 0 &&
1512
0
      cur->seg != NULL &&
1513
0
      SizeOfGinPostingList(cur->seg) >= GinPostingListSegmentTargetSize)
1514
0
    {
1515
0
      newseg = palloc(sizeof(leafSegmentInfo));
1516
0
      newseg->seg = NULL;
1517
0
      newseg->items = nextnew;
1518
0
      newseg->nitems = nthis;
1519
0
      newseg->action = GIN_SEGMENT_INSERT;
1520
0
      dlist_push_tail(&leaf->segments, &newseg->node);
1521
0
      modified = true;
1522
0
      break;
1523
0
    }
1524
1525
0
    tmpitems = ginMergeItemPointers(cur->items, cur->nitems,
1526
0
                    nextnew, nthis,
1527
0
                    &ntmpitems);
1528
0
    if (ntmpitems != cur->nitems)
1529
0
    {
1530
      /*
1531
       * If there are no duplicates, track the added items so that we
1532
       * can emit a compact ADDITEMS WAL record later on. (it doesn't
1533
       * seem worth re-checking which items were duplicates, if there
1534
       * were any)
1535
       */
1536
0
      if (ntmpitems == nthis + cur->nitems &&
1537
0
        cur->action == GIN_SEGMENT_UNMODIFIED)
1538
0
      {
1539
0
        cur->action = GIN_SEGMENT_ADDITEMS;
1540
0
        cur->modifieditems = nextnew;
1541
0
        cur->nmodifieditems = nthis;
1542
0
      }
1543
0
      else
1544
0
        cur->action = GIN_SEGMENT_REPLACE;
1545
1546
0
      cur->items = tmpitems;
1547
0
      cur->nitems = ntmpitems;
1548
0
      cur->seg = NULL;
1549
0
      modified = true;
1550
0
    }
1551
1552
0
    nextnew += nthis;
1553
0
    newleft -= nthis;
1554
0
    if (newleft == 0)
1555
0
      break;
1556
0
  }
1557
1558
0
  return modified;
1559
0
}
1560
1561
/*
1562
 * Recompresses all segments that have been modified.
1563
 *
1564
 * If not all the items fit on two pages (ie. after split), we store as
1565
 * many items as fit, and set *remaining to the first item that didn't fit.
1566
 * If all items fit, *remaining is set to invalid.
1567
 *
1568
 * Returns true if the page has to be split.
1569
 */
1570
static bool
1571
leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
1572
0
{
1573
0
  int     pgused = 0;
1574
0
  bool    needsplit = false;
1575
0
  dlist_iter  iter;
1576
0
  int     segsize;
1577
0
  leafSegmentInfo *nextseg;
1578
0
  int     npacked;
1579
0
  bool    modified;
1580
0
  dlist_node *cur_node;
1581
0
  dlist_node *next_node;
1582
1583
0
  ItemPointerSetInvalid(remaining);
1584
1585
  /*
1586
   * cannot use dlist_foreach_modify here because we insert adjacent items
1587
   * while iterating.
1588
   */
1589
0
  for (cur_node = dlist_head_node(&leaf->segments);
1590
0
     cur_node != NULL;
1591
0
     cur_node = next_node)
1592
0
  {
1593
0
    leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
1594
0
                           cur_node);
1595
1596
0
    if (dlist_has_next(&leaf->segments, cur_node))
1597
0
      next_node = dlist_next_node(&leaf->segments, cur_node);
1598
0
    else
1599
0
      next_node = NULL;
1600
1601
    /* Compress the posting list, if necessary */
1602
0
    if (seginfo->action != GIN_SEGMENT_DELETE)
1603
0
    {
1604
0
      if (seginfo->seg == NULL)
1605
0
      {
1606
0
        if (seginfo->nitems > GinPostingListSegmentMaxSize)
1607
0
          npacked = 0; /* no chance that it would fit. */
1608
0
        else
1609
0
        {
1610
0
          seginfo->seg = ginCompressPostingList(seginfo->items,
1611
0
                              seginfo->nitems,
1612
0
                              GinPostingListSegmentMaxSize,
1613
0
                              &npacked);
1614
0
        }
1615
0
        if (npacked != seginfo->nitems)
1616
0
        {
1617
          /*
1618
           * Too large. Compress again to the target size, and
1619
           * create a new segment to represent the remaining items.
1620
           * The new segment is inserted after this one, so it will
1621
           * be processed in the next iteration of this loop.
1622
           */
1623
0
          if (seginfo->seg)
1624
0
            pfree(seginfo->seg);
1625
0
          seginfo->seg = ginCompressPostingList(seginfo->items,
1626
0
                              seginfo->nitems,
1627
0
                              GinPostingListSegmentTargetSize,
1628
0
                              &npacked);
1629
0
          if (seginfo->action != GIN_SEGMENT_INSERT)
1630
0
            seginfo->action = GIN_SEGMENT_REPLACE;
1631
1632
0
          nextseg = palloc(sizeof(leafSegmentInfo));
1633
0
          nextseg->action = GIN_SEGMENT_INSERT;
1634
0
          nextseg->seg = NULL;
1635
0
          nextseg->items = &seginfo->items[npacked];
1636
0
          nextseg->nitems = seginfo->nitems - npacked;
1637
0
          next_node = &nextseg->node;
1638
0
          dlist_insert_after(cur_node, next_node);
1639
0
        }
1640
0
      }
1641
1642
      /*
1643
       * If the segment is very small, merge it with the next segment.
1644
       */
1645
0
      if (SizeOfGinPostingList(seginfo->seg) < GinPostingListSegmentMinSize && next_node)
1646
0
      {
1647
0
        int     nmerged;
1648
1649
0
        nextseg = dlist_container(leafSegmentInfo, node, next_node);
1650
1651
0
        if (seginfo->items == NULL)
1652
0
          seginfo->items = ginPostingListDecode(seginfo->seg,
1653
0
                              &seginfo->nitems);
1654
0
        if (nextseg->items == NULL)
1655
0
          nextseg->items = ginPostingListDecode(nextseg->seg,
1656
0
                              &nextseg->nitems);
1657
0
        nextseg->items =
1658
0
          ginMergeItemPointers(seginfo->items, seginfo->nitems,
1659
0
                     nextseg->items, nextseg->nitems,
1660
0
                     &nmerged);
1661
0
        Assert(nmerged == seginfo->nitems + nextseg->nitems);
1662
0
        nextseg->nitems = nmerged;
1663
0
        nextseg->seg = NULL;
1664
1665
0
        nextseg->action = GIN_SEGMENT_REPLACE;
1666
0
        nextseg->modifieditems = NULL;
1667
0
        nextseg->nmodifieditems = 0;
1668
1669
0
        if (seginfo->action == GIN_SEGMENT_INSERT)
1670
0
        {
1671
0
          dlist_delete(cur_node);
1672
0
          continue;
1673
0
        }
1674
0
        else
1675
0
        {
1676
0
          seginfo->action = GIN_SEGMENT_DELETE;
1677
0
          seginfo->seg = NULL;
1678
0
        }
1679
0
      }
1680
1681
0
      seginfo->items = NULL;
1682
0
      seginfo->nitems = 0;
1683
0
    }
1684
1685
0
    if (seginfo->action == GIN_SEGMENT_DELETE)
1686
0
      continue;
1687
1688
    /*
1689
     * OK, we now have a compressed version of this segment ready for
1690
     * copying to the page. Did we exceed the size that fits on one page?
1691
     */
1692
0
    segsize = SizeOfGinPostingList(seginfo->seg);
1693
0
    if (pgused + segsize > GinDataPageMaxDataSize)
1694
0
    {
1695
0
      if (!needsplit)
1696
0
      {
1697
        /* switch to right page */
1698
0
        Assert(pgused > 0);
1699
0
        leaf->lastleft = dlist_prev_node(&leaf->segments, cur_node);
1700
0
        needsplit = true;
1701
0
        leaf->lsize = pgused;
1702
0
        pgused = 0;
1703
0
      }
1704
0
      else
1705
0
      {
1706
        /*
1707
         * Filled both pages. The last segment we constructed did not
1708
         * fit.
1709
         */
1710
0
        *remaining = seginfo->seg->first;
1711
1712
        /*
1713
         * remove all segments that did not fit from the list.
1714
         */
1715
0
        while (dlist_has_next(&leaf->segments, cur_node))
1716
0
          dlist_delete(dlist_next_node(&leaf->segments, cur_node));
1717
0
        dlist_delete(cur_node);
1718
0
        break;
1719
0
      }
1720
0
    }
1721
1722
0
    pgused += segsize;
1723
0
  }
1724
1725
0
  if (!needsplit)
1726
0
  {
1727
0
    leaf->lsize = pgused;
1728
0
    leaf->rsize = 0;
1729
0
  }
1730
0
  else
1731
0
    leaf->rsize = pgused;
1732
1733
0
  Assert(leaf->lsize <= GinDataPageMaxDataSize);
1734
0
  Assert(leaf->rsize <= GinDataPageMaxDataSize);
1735
1736
  /*
1737
   * Make a palloc'd copy of every segment after the first modified one,
1738
   * because as we start copying items to the original page, we might
1739
   * overwrite an existing segment.
1740
   */
1741
0
  modified = false;
1742
0
  dlist_foreach(iter, &leaf->segments)
1743
0
  {
1744
0
    leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
1745
0
                           iter.cur);
1746
1747
0
    if (!modified && seginfo->action != GIN_SEGMENT_UNMODIFIED)
1748
0
    {
1749
0
      modified = true;
1750
0
    }
1751
0
    else if (modified && seginfo->action == GIN_SEGMENT_UNMODIFIED)
1752
0
    {
1753
0
      GinPostingList *tmp;
1754
1755
0
      segsize = SizeOfGinPostingList(seginfo->seg);
1756
0
      tmp = palloc(segsize);
1757
0
      memcpy(tmp, seginfo->seg, segsize);
1758
0
      seginfo->seg = tmp;
1759
0
    }
1760
0
  }
1761
1762
0
  return needsplit;
1763
0
}
1764
1765
1766
/*** Functions that are exported to the rest of the GIN code ***/
1767
1768
/*
1769
 * Creates new posting tree containing the given TIDs. Returns the page
1770
 * number of the root of the new posting tree.
1771
 *
1772
 * items[] must be in sorted order with no duplicates.
1773
 */
1774
BlockNumber
1775
createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
1776
          GinStatsData *buildStats, Buffer entrybuffer)
1777
0
{
1778
0
  BlockNumber blkno;
1779
0
  Buffer    buffer;
1780
0
  Page    tmppage;
1781
0
  Page    page;
1782
0
  Pointer   ptr;
1783
0
  int     nrootitems;
1784
0
  int     rootsize;
1785
0
  bool    is_build = (buildStats != NULL);
1786
1787
  /* Construct the new root page in memory first. */
1788
0
  tmppage = (Page) palloc(BLCKSZ);
1789
0
  GinInitPage(tmppage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
1790
0
  GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
1791
1792
  /*
1793
   * Write as many of the items to the root page as fit. In segments of max
1794
   * GinPostingListSegmentMaxSize bytes each.
1795
   */
1796
0
  nrootitems = 0;
1797
0
  rootsize = 0;
1798
0
  ptr = (Pointer) GinDataLeafPageGetPostingList(tmppage);
1799
0
  while (nrootitems < nitems)
1800
0
  {
1801
0
    GinPostingList *segment;
1802
0
    int     npacked;
1803
0
    int     segsize;
1804
1805
0
    segment = ginCompressPostingList(&items[nrootitems],
1806
0
                     nitems - nrootitems,
1807
0
                     GinPostingListSegmentMaxSize,
1808
0
                     &npacked);
1809
0
    segsize = SizeOfGinPostingList(segment);
1810
0
    if (rootsize + segsize > GinDataPageMaxDataSize)
1811
0
      break;
1812
1813
0
    memcpy(ptr, segment, segsize);
1814
0
    ptr += segsize;
1815
0
    rootsize += segsize;
1816
0
    nrootitems += npacked;
1817
0
    pfree(segment);
1818
0
  }
1819
0
  GinDataPageSetDataSize(tmppage, rootsize);
1820
1821
  /*
1822
   * All set. Get a new physical page, and copy the in-memory page to it.
1823
   */
1824
0
  buffer = GinNewBuffer(index);
1825
0
  page = BufferGetPage(buffer);
1826
0
  blkno = BufferGetBlockNumber(buffer);
1827
1828
  /*
1829
   * Copy any predicate locks from the entry tree leaf (containing posting
1830
   * list) to the posting tree.
1831
   */
1832
0
  PredicateLockPageSplit(index, BufferGetBlockNumber(entrybuffer), blkno);
1833
1834
0
  START_CRIT_SECTION();
1835
1836
0
  PageRestoreTempPage(tmppage, page);
1837
0
  MarkBufferDirty(buffer);
1838
1839
0
  if (RelationNeedsWAL(index) && !is_build)
1840
0
  {
1841
0
    XLogRecPtr  recptr;
1842
0
    ginxlogCreatePostingTree data;
1843
1844
0
    data.size = rootsize;
1845
1846
0
    XLogBeginInsert();
1847
0
    XLogRegisterData(&data, sizeof(ginxlogCreatePostingTree));
1848
1849
0
    XLogRegisterData(GinDataLeafPageGetPostingList(page),
1850
0
             rootsize);
1851
0
    XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
1852
1853
0
    recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
1854
0
    PageSetLSN(page, recptr);
1855
0
  }
1856
1857
0
  UnlockReleaseBuffer(buffer);
1858
1859
0
  END_CRIT_SECTION();
1860
1861
  /* During index build, count the newly-added data page */
1862
0
  if (buildStats)
1863
0
    buildStats->nDataPages++;
1864
1865
0
  elog(DEBUG2, "created GIN posting tree with %d items", nrootitems);
1866
1867
  /*
1868
   * Add any remaining TIDs to the newly-created posting tree.
1869
   */
1870
0
  if (nitems > nrootitems)
1871
0
  {
1872
0
    ginInsertItemPointers(index, blkno,
1873
0
                items + nrootitems,
1874
0
                nitems - nrootitems,
1875
0
                buildStats);
1876
0
  }
1877
1878
0
  return blkno;
1879
0
}
1880
1881
static void
1882
ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
1883
0
{
1884
0
  memset(btree, 0, sizeof(GinBtreeData));
1885
1886
0
  btree->index = index;
1887
0
  btree->rootBlkno = rootBlkno;
1888
1889
0
  btree->findChildPage = dataLocateItem;
1890
0
  btree->getLeftMostChild = dataGetLeftMostPage;
1891
0
  btree->isMoveRight = dataIsMoveRight;
1892
0
  btree->findItem = NULL;
1893
0
  btree->findChildPtr = dataFindChildPtr;
1894
0
  btree->beginPlaceToPage = dataBeginPlaceToPage;
1895
0
  btree->execPlaceToPage = dataExecPlaceToPage;
1896
0
  btree->fillRoot = ginDataFillRoot;
1897
0
  btree->prepareDownlink = dataPrepareDownlink;
1898
1899
0
  btree->isData = true;
1900
0
  btree->fullScan = false;
1901
0
  btree->isBuild = false;
1902
0
}
1903
1904
/*
1905
 * Inserts array of item pointers, may execute several tree scan (very rare)
1906
 */
1907
void
1908
ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
1909
            ItemPointerData *items, uint32 nitem,
1910
            GinStatsData *buildStats)
1911
0
{
1912
0
  GinBtreeData btree;
1913
0
  GinBtreeDataLeafInsertData insertdata;
1914
0
  GinBtreeStack *stack;
1915
1916
0
  ginPrepareDataScan(&btree, index, rootBlkno);
1917
0
  btree.isBuild = (buildStats != NULL);
1918
0
  insertdata.items = items;
1919
0
  insertdata.nitem = nitem;
1920
0
  insertdata.curitem = 0;
1921
1922
0
  while (insertdata.curitem < insertdata.nitem)
1923
0
  {
1924
    /* search for the leaf page where the first item should go to */
1925
0
    btree.itemptr = insertdata.items[insertdata.curitem];
1926
0
    stack = ginFindLeafPage(&btree, false, true);
1927
1928
0
    ginInsertValue(&btree, stack, &insertdata, buildStats);
1929
0
  }
1930
0
}
1931
1932
/*
1933
 * Starts a new scan on a posting tree.
1934
 */
1935
GinBtreeStack *
1936
ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
1937
0
{
1938
0
  GinBtreeStack *stack;
1939
1940
0
  ginPrepareDataScan(btree, index, rootBlkno);
1941
1942
0
  btree->fullScan = true;
1943
1944
0
  stack = ginFindLeafPage(btree, true, false);
1945
1946
0
  return stack;
1947
0
}