Coverage Report

Created: 2025-06-13 06:06

/src/postgres/src/backend/access/gist/gistbuildbuffers.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * gistbuildbuffers.c
4
 *    node buffer management functions for GiST buffering build algorithm.
5
 *
6
 *
7
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8
 * Portions Copyright (c) 1994, Regents of the University of California
9
 *
10
 * IDENTIFICATION
11
 *    src/backend/access/gist/gistbuildbuffers.c
12
 *
13
 *-------------------------------------------------------------------------
14
 */
15
#include "postgres.h"
16
17
#include "access/gist_private.h"
18
#include "storage/buffile.h"
19
#include "storage/bufmgr.h"
20
#include "utils/rel.h"
21
22
static GISTNodeBufferPage *gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb);
23
static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb,
24
                GISTNodeBuffer *nodeBuffer);
25
static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb,
26
                 GISTNodeBuffer *nodeBuffer);
27
static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb,
28
                 GISTNodeBuffer *nodeBuffer);
29
static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer,
30
                IndexTuple itup);
31
static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer,
32
                IndexTuple *itup);
33
static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb);
34
static void gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum);
35
36
static void ReadTempFileBlock(BufFile *file, long blknum, void *ptr);
37
static void WriteTempFileBlock(BufFile *file, long blknum, const void *ptr);
38
39
40
/*
41
 * Initialize GiST build buffers.
42
 */
43
GISTBuildBuffers *
44
gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)
45
0
{
46
0
  GISTBuildBuffers *gfbb;
47
0
  HASHCTL   hashCtl;
48
49
0
  gfbb = palloc(sizeof(GISTBuildBuffers));
50
0
  gfbb->pagesPerBuffer = pagesPerBuffer;
51
0
  gfbb->levelStep = levelStep;
52
53
  /*
54
   * Create a temporary file to hold buffer pages that are swapped out of
55
   * memory.
56
   */
57
0
  gfbb->pfile = BufFileCreateTemp(false);
58
0
  gfbb->nFileBlocks = 0;
59
60
  /* Initialize free page management. */
61
0
  gfbb->nFreeBlocks = 0;
62
0
  gfbb->freeBlocksLen = 32;
63
0
  gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long));
64
65
  /*
66
   * Current memory context will be used for all in-memory data structures
67
   * of buffers which are persistent during buffering build.
68
   */
69
0
  gfbb->context = CurrentMemoryContext;
70
71
  /*
72
   * nodeBuffersTab hash is association between index blocks and it's
73
   * buffers.
74
   */
75
0
  hashCtl.keysize = sizeof(BlockNumber);
76
0
  hashCtl.entrysize = sizeof(GISTNodeBuffer);
77
0
  hashCtl.hcxt = CurrentMemoryContext;
78
0
  gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
79
0
                     1024,
80
0
                     &hashCtl,
81
0
                     HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
82
83
0
  gfbb->bufferEmptyingQueue = NIL;
84
85
  /*
86
   * Per-level node buffers lists for final buffers emptying process. Node
87
   * buffers are inserted here when they are created.
88
   */
89
0
  gfbb->buffersOnLevelsLen = 1;
90
0
  gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) *
91
0
                       gfbb->buffersOnLevelsLen);
92
0
  gfbb->buffersOnLevels[0] = NIL;
93
94
  /*
95
   * Block numbers of node buffers which last pages are currently loaded
96
   * into main memory.
97
   */
98
0
  gfbb->loadedBuffersLen = 32;
99
0
  gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen *
100
0
                           sizeof(GISTNodeBuffer *));
101
0
  gfbb->loadedBuffersCount = 0;
102
103
0
  gfbb->rootlevel = maxLevel;
104
105
0
  return gfbb;
106
0
}
107
108
/*
109
 * Returns a node buffer for given block. The buffer is created if it
110
 * doesn't exist yet.
111
 */
112
GISTNodeBuffer *
113
gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
114
          BlockNumber nodeBlocknum, int level)
115
0
{
116
0
  GISTNodeBuffer *nodeBuffer;
117
0
  bool    found;
118
119
  /* Find node buffer in hash table */
120
0
  nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab,
121
0
                        &nodeBlocknum,
122
0
                        HASH_ENTER,
123
0
                        &found);
124
0
  if (!found)
125
0
  {
126
    /*
127
     * Node buffer wasn't found. Initialize the new buffer as empty.
128
     */
129
0
    MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
130
131
    /* nodeBuffer->nodeBlocknum is the hash key and was filled in already */
132
0
    nodeBuffer->blocksCount = 0;
133
0
    nodeBuffer->pageBlocknum = InvalidBlockNumber;
134
0
    nodeBuffer->pageBuffer = NULL;
135
0
    nodeBuffer->queuedForEmptying = false;
136
0
    nodeBuffer->isTemp = false;
137
0
    nodeBuffer->level = level;
138
139
    /*
140
     * Add this buffer to the list of buffers on this level. Enlarge
141
     * buffersOnLevels array if needed.
142
     */
143
0
    if (level >= gfbb->buffersOnLevelsLen)
144
0
    {
145
0
      int     i;
146
147
0
      gfbb->buffersOnLevels =
148
0
        (List **) repalloc(gfbb->buffersOnLevels,
149
0
                   (level + 1) * sizeof(List *));
150
151
      /* initialize the enlarged portion */
152
0
      for (i = gfbb->buffersOnLevelsLen; i <= level; i++)
153
0
        gfbb->buffersOnLevels[i] = NIL;
154
0
      gfbb->buffersOnLevelsLen = level + 1;
155
0
    }
156
157
    /*
158
     * Prepend the new buffer to the list of buffers on this level. It's
159
     * not arbitrary that the new buffer is put to the beginning of the
160
     * list: in the final emptying phase we loop through all buffers at
161
     * each level, and flush them. If a page is split during the emptying,
162
     * it's more efficient to flush the new split pages first, before
163
     * moving on to pre-existing pages on the level. The buffers just
164
     * created during the page split are likely still in cache, so
165
     * flushing them immediately is more efficient than putting them to
166
     * the end of the queue.
167
     */
168
0
    gfbb->buffersOnLevels[level] = lcons(nodeBuffer,
169
0
                       gfbb->buffersOnLevels[level]);
170
171
0
    MemoryContextSwitchTo(oldcxt);
172
0
  }
173
174
0
  return nodeBuffer;
175
0
}
176
177
/*
178
 * Allocate memory for a buffer page.
179
 */
180
static GISTNodeBufferPage *
181
gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb)
182
0
{
183
0
  GISTNodeBufferPage *pageBuffer;
184
185
0
  pageBuffer = (GISTNodeBufferPage *) MemoryContextAllocZero(gfbb->context,
186
0
                                 BLCKSZ);
187
0
  pageBuffer->prev = InvalidBlockNumber;
188
189
  /* Set page free space */
190
0
  PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET;
191
0
  return pageBuffer;
192
0
}
193
194
/*
195
 * Add specified buffer into loadedBuffers array.
196
 */
197
static void
198
gistAddLoadedBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
199
0
{
200
  /* Never add a temporary buffer to the array */
201
0
  if (nodeBuffer->isTemp)
202
0
    return;
203
204
  /* Enlarge the array if needed */
205
0
  if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen)
206
0
  {
207
0
    gfbb->loadedBuffersLen *= 2;
208
0
    gfbb->loadedBuffers = (GISTNodeBuffer **)
209
0
      repalloc(gfbb->loadedBuffers,
210
0
           gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *));
211
0
  }
212
213
0
  gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer;
214
0
  gfbb->loadedBuffersCount++;
215
0
}
216
217
/*
218
 * Load last page of node buffer into main memory.
219
 */
220
static void
221
gistLoadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
222
0
{
223
  /* Check if we really should load something */
224
0
  if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0)
225
0
  {
226
    /* Allocate memory for page */
227
0
    nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
228
229
    /* Read block from temporary file */
230
0
    ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum,
231
0
              nodeBuffer->pageBuffer);
232
233
    /* Mark file block as free */
234
0
    gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum);
235
236
    /* Mark node buffer as loaded */
237
0
    gistAddLoadedBuffer(gfbb, nodeBuffer);
238
0
    nodeBuffer->pageBlocknum = InvalidBlockNumber;
239
0
  }
240
0
}
241
242
/*
243
 * Write last page of node buffer to the disk.
244
 */
245
static void
246
gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
247
0
{
248
  /* Check if we have something to write */
249
0
  if (nodeBuffer->pageBuffer)
250
0
  {
251
0
    BlockNumber blkno;
252
253
    /* Get free file block */
254
0
    blkno = gistBuffersGetFreeBlock(gfbb);
255
256
    /* Write block to the temporary file */
257
0
    WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
258
259
    /* Free memory of that page */
260
0
    pfree(nodeBuffer->pageBuffer);
261
0
    nodeBuffer->pageBuffer = NULL;
262
263
    /* Save block number */
264
0
    nodeBuffer->pageBlocknum = blkno;
265
0
  }
266
0
}
267
268
/*
269
 * Write last pages of all node buffers to the disk.
270
 */
271
void
272
gistUnloadNodeBuffers(GISTBuildBuffers *gfbb)
273
0
{
274
0
  int     i;
275
276
  /* Unload all the buffers that have a page loaded in memory. */
277
0
  for (i = 0; i < gfbb->loadedBuffersCount; i++)
278
0
    gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]);
279
280
  /* Now there are no node buffers with loaded last page */
281
0
  gfbb->loadedBuffersCount = 0;
282
0
}
283
284
/*
285
 * Add index tuple to buffer page.
286
 */
287
static void
288
gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup)
289
0
{
290
0
  Size    itupsz = IndexTupleSize(itup);
291
0
  char     *ptr;
292
293
  /* There should be enough of space. */
294
0
  Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz));
295
296
  /* Reduce free space value of page to reserve a spot for the tuple. */
297
0
  PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz);
298
299
  /* Get pointer to the spot we reserved (ie. end of free space). */
300
0
  ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET
301
0
    + PAGE_FREE_SPACE(pageBuffer);
302
303
  /* Copy the index tuple there. */
304
0
  memcpy(ptr, itup, itupsz);
305
0
}
306
307
/*
308
 * Get last item from buffer page and remove it from page.
309
 */
310
static void
311
gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, IndexTuple *itup)
312
0
{
313
0
  IndexTuple  ptr;
314
0
  Size    itupsz;
315
316
0
  Assert(!PAGE_IS_EMPTY(pageBuffer)); /* Page shouldn't be empty */
317
318
  /* Get pointer to last index tuple */
319
0
  ptr = (IndexTuple) ((char *) pageBuffer
320
0
            + BUFFER_PAGE_DATA_OFFSET
321
0
            + PAGE_FREE_SPACE(pageBuffer));
322
0
  itupsz = IndexTupleSize(ptr);
323
324
  /* Make a copy of the tuple */
325
0
  *itup = (IndexTuple) palloc(itupsz);
326
0
  memcpy(*itup, ptr, itupsz);
327
328
  /* Mark the space used by the tuple as free */
329
0
  PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz);
330
0
}
331
332
/*
333
 * Push an index tuple to node buffer.
334
 */
335
void
336
gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
337
             IndexTuple itup)
338
0
{
339
  /*
340
   * Most part of memory operations will be in buffering build persistent
341
   * context. So, let's switch to it.
342
   */
343
0
  MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
344
345
  /*
346
   * If the buffer is currently empty, create the first page.
347
   */
348
0
  if (nodeBuffer->blocksCount == 0)
349
0
  {
350
0
    nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
351
0
    nodeBuffer->blocksCount = 1;
352
0
    gistAddLoadedBuffer(gfbb, nodeBuffer);
353
0
  }
354
355
  /* Load last page of node buffer if it wasn't in memory already */
356
0
  if (!nodeBuffer->pageBuffer)
357
0
    gistLoadNodeBuffer(gfbb, nodeBuffer);
358
359
  /*
360
   * Check if there is enough space on the last page for the tuple.
361
   */
362
0
  if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
363
0
  {
364
    /*
365
     * Nope. Swap previous block to disk and allocate a new one.
366
     */
367
0
    BlockNumber blkno;
368
369
    /* Write filled page to the disk */
370
0
    blkno = gistBuffersGetFreeBlock(gfbb);
371
0
    WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
372
373
    /*
374
     * Reset the in-memory page as empty, and link the previous block to
375
     * the new page by storing its block number in the prev-link.
376
     */
377
0
    PAGE_FREE_SPACE(nodeBuffer->pageBuffer) =
378
0
      BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata));
379
0
    nodeBuffer->pageBuffer->prev = blkno;
380
381
    /* We've just added one more page */
382
0
    nodeBuffer->blocksCount++;
383
0
  }
384
385
0
  gistPlaceItupToPage(nodeBuffer->pageBuffer, itup);
386
387
  /*
388
   * If the buffer just overflowed, add it to the emptying queue.
389
   */
390
0
  if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying)
391
0
  {
392
0
    gfbb->bufferEmptyingQueue = lcons(nodeBuffer,
393
0
                      gfbb->bufferEmptyingQueue);
394
0
    nodeBuffer->queuedForEmptying = true;
395
0
  }
396
397
  /* Restore memory context */
398
0
  MemoryContextSwitchTo(oldcxt);
399
0
}
400
401
/*
402
 * Removes one index tuple from node buffer. Returns true if success and false
403
 * if node buffer is empty.
404
 */
405
bool
406
gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
407
              IndexTuple *itup)
408
0
{
409
  /*
410
   * If node buffer is empty then return false.
411
   */
412
0
  if (nodeBuffer->blocksCount <= 0)
413
0
    return false;
414
415
  /* Load last page of node buffer if needed */
416
0
  if (!nodeBuffer->pageBuffer)
417
0
    gistLoadNodeBuffer(gfbb, nodeBuffer);
418
419
  /*
420
   * Get index tuple from last non-empty page.
421
   */
422
0
  gistGetItupFromPage(nodeBuffer->pageBuffer, itup);
423
424
  /*
425
   * If we just removed the last tuple from the page, fetch previous page on
426
   * this node buffer (if any).
427
   */
428
0
  if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
429
0
  {
430
0
    BlockNumber prevblkno;
431
432
    /*
433
     * blocksCount includes the page in pageBuffer, so decrease it now.
434
     */
435
0
    nodeBuffer->blocksCount--;
436
437
    /*
438
     * If there's more pages, fetch previous one.
439
     */
440
0
    prevblkno = nodeBuffer->pageBuffer->prev;
441
0
    if (prevblkno != InvalidBlockNumber)
442
0
    {
443
      /* There is a previous page. Fetch it. */
444
0
      Assert(nodeBuffer->blocksCount > 0);
445
0
      ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer);
446
447
      /*
448
       * Now that we've read the block in memory, we can release its
449
       * on-disk block for reuse.
450
       */
451
0
      gistBuffersReleaseBlock(gfbb, prevblkno);
452
0
    }
453
0
    else
454
0
    {
455
      /* No more pages. Free memory. */
456
0
      Assert(nodeBuffer->blocksCount == 0);
457
0
      pfree(nodeBuffer->pageBuffer);
458
0
      nodeBuffer->pageBuffer = NULL;
459
0
    }
460
0
  }
461
0
  return true;
462
0
}
463
464
/*
465
 * Select a currently unused block for writing to.
466
 */
467
static long
468
gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb)
469
0
{
470
  /*
471
   * If there are multiple free blocks, we select the one appearing last in
472
   * freeBlocks[].  If there are none, assign the next block at the end of
473
   * the file (causing the file to be extended).
474
   */
475
0
  if (gfbb->nFreeBlocks > 0)
476
0
    return gfbb->freeBlocks[--gfbb->nFreeBlocks];
477
0
  else
478
0
    return gfbb->nFileBlocks++;
479
0
}
480
481
/*
482
 * Return a block# to the freelist.
483
 */
484
static void
485
gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum)
486
0
{
487
0
  int     ndx;
488
489
  /* Enlarge freeBlocks array if full. */
490
0
  if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
491
0
  {
492
0
    gfbb->freeBlocksLen *= 2;
493
0
    gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
494
0
                       gfbb->freeBlocksLen *
495
0
                       sizeof(long));
496
0
  }
497
498
  /* Add blocknum to array */
499
0
  ndx = gfbb->nFreeBlocks++;
500
0
  gfbb->freeBlocks[ndx] = blocknum;
501
0
}
502
503
/*
504
 * Free buffering build data structure.
505
 */
506
void
507
gistFreeBuildBuffers(GISTBuildBuffers *gfbb)
508
0
{
509
  /* Close buffers file. */
510
0
  BufFileClose(gfbb->pfile);
511
512
  /* All other things will be freed on memory context release */
513
0
}
514
515
/*
516
 * Data structure representing information about node buffer for index tuples
517
 * relocation from split node buffer.
518
 */
519
typedef struct
520
{
521
  GISTENTRY entry[INDEX_MAX_KEYS];
522
  bool    isnull[INDEX_MAX_KEYS];
523
  GISTPageSplitInfo *splitinfo;
524
  GISTNodeBuffer *nodeBuffer;
525
} RelocationBufferInfo;
526
527
/*
528
 * At page split, distribute tuples from the buffer of the split page to
529
 * new buffers for the created page halves. This also adjusts the downlinks
530
 * in 'splitinfo' to include the tuples in the buffers.
531
 */
532
void
533
gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
534
                Relation r, int level,
535
                Buffer buffer, List *splitinfo)
536
0
{
537
0
  RelocationBufferInfo *relocationBuffersInfos;
538
0
  bool    found;
539
0
  GISTNodeBuffer *nodeBuffer;
540
0
  BlockNumber blocknum;
541
0
  IndexTuple  itup;
542
0
  int     splitPagesCount = 0;
543
0
  GISTENTRY entry[INDEX_MAX_KEYS];
544
0
  bool    isnull[INDEX_MAX_KEYS];
545
0
  GISTNodeBuffer oldBuf;
546
0
  ListCell   *lc;
547
548
  /* If the split page doesn't have buffers, we have nothing to do. */
549
0
  if (!LEVEL_HAS_BUFFERS(level, gfbb))
550
0
    return;
551
552
  /*
553
   * Get the node buffer of the split page.
554
   */
555
0
  blocknum = BufferGetBlockNumber(buffer);
556
0
  nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
557
0
               HASH_FIND, &found);
558
0
  if (!found)
559
0
  {
560
    /* The page has no buffer, so we have nothing to do. */
561
0
    return;
562
0
  }
563
564
  /*
565
   * Make a copy of the old buffer, as we're going reuse it as the buffer
566
   * for the new left page, which is on the same block as the old page.
567
   * That's not true for the root page, but that's fine because we never
568
   * have a buffer on the root page anyway. The original algorithm as
569
   * described by Arge et al did, but it's of no use, as you might as well
570
   * read the tuples straight from the heap instead of the root buffer.
571
   */
572
0
  Assert(blocknum != GIST_ROOT_BLKNO);
573
0
  memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer));
574
0
  oldBuf.isTemp = true;
575
576
  /* Reset the old buffer, used for the new left page from now on */
577
0
  nodeBuffer->blocksCount = 0;
578
0
  nodeBuffer->pageBuffer = NULL;
579
0
  nodeBuffer->pageBlocknum = InvalidBlockNumber;
580
581
  /*
582
   * Allocate memory for information about relocation buffers.
583
   */
584
0
  splitPagesCount = list_length(splitinfo);
585
0
  relocationBuffersInfos =
586
0
    (RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) *
587
0
                    splitPagesCount);
588
589
  /*
590
   * Fill relocation buffers information for node buffers of pages produced
591
   * by split.
592
   */
593
0
  foreach(lc, splitinfo)
594
0
  {
595
0
    GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc);
596
0
    GISTNodeBuffer *newNodeBuffer;
597
0
    int     i = foreach_current_index(lc);
598
599
    /* Decompress parent index tuple of node buffer page. */
600
0
    gistDeCompressAtt(giststate, r,
601
0
              si->downlink, NULL, (OffsetNumber) 0,
602
0
              relocationBuffersInfos[i].entry,
603
0
              relocationBuffersInfos[i].isnull);
604
605
    /*
606
     * Create a node buffer for the page. The leftmost half is on the same
607
     * block as the old page before split, so for the leftmost half this
608
     * will return the original buffer. The tuples on the original buffer
609
     * were relinked to the temporary buffer, so the original one is now
610
     * empty.
611
     */
612
0
    newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level);
613
614
0
    relocationBuffersInfos[i].nodeBuffer = newNodeBuffer;
615
0
    relocationBuffersInfos[i].splitinfo = si;
616
0
  }
617
618
  /*
619
   * Loop through all index tuples in the buffer of the page being split,
620
   * moving them to buffers for the new pages.  We try to move each tuple to
621
   * the page that will result in the lowest penalty for the leading column
622
   * or, in the case of a tie, the lowest penalty for the earliest column
623
   * that is not tied.
624
   *
625
   * The page searching logic is very similar to gistchoose().
626
   */
627
0
  while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
628
0
  {
629
0
    float   best_penalty[INDEX_MAX_KEYS];
630
0
    int     i,
631
0
          which;
632
0
    IndexTuple  newtup;
633
0
    RelocationBufferInfo *targetBufferInfo;
634
635
0
    gistDeCompressAtt(giststate, r,
636
0
              itup, NULL, (OffsetNumber) 0, entry, isnull);
637
638
    /* default to using first page (shouldn't matter) */
639
0
    which = 0;
640
641
    /*
642
     * best_penalty[j] is the best penalty we have seen so far for column
643
     * j, or -1 when we haven't yet examined column j.  Array entries to
644
     * the right of the first -1 are undefined.
645
     */
646
0
    best_penalty[0] = -1;
647
648
    /*
649
     * Loop over possible target pages, looking for one to move this tuple
650
     * to.
651
     */
652
0
    for (i = 0; i < splitPagesCount; i++)
653
0
    {
654
0
      RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
655
0
      bool    zero_penalty;
656
0
      int     j;
657
658
0
      zero_penalty = true;
659
660
      /* Loop over index attributes. */
661
0
      for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
662
0
      {
663
0
        float   usize;
664
665
        /* Compute penalty for this column. */
666
0
        usize = gistpenalty(giststate, j,
667
0
                  &splitPageInfo->entry[j],
668
0
                  splitPageInfo->isnull[j],
669
0
                  &entry[j], isnull[j]);
670
0
        if (usize > 0)
671
0
          zero_penalty = false;
672
673
0
        if (best_penalty[j] < 0 || usize < best_penalty[j])
674
0
        {
675
          /*
676
           * New best penalty for column.  Tentatively select this
677
           * page as the target, and record the best penalty.  Then
678
           * reset the next column's penalty to "unknown" (and
679
           * indirectly, the same for all the ones to its right).
680
           * This will force us to adopt this page's penalty values
681
           * as the best for all the remaining columns during
682
           * subsequent loop iterations.
683
           */
684
0
          which = i;
685
0
          best_penalty[j] = usize;
686
687
0
          if (j < IndexRelationGetNumberOfKeyAttributes(r) - 1)
688
0
            best_penalty[j + 1] = -1;
689
0
        }
690
0
        else if (best_penalty[j] == usize)
691
0
        {
692
          /*
693
           * The current page is exactly as good for this column as
694
           * the best page seen so far.  The next iteration of this
695
           * loop will compare the next column.
696
           */
697
0
        }
698
0
        else
699
0
        {
700
          /*
701
           * The current page is worse for this column than the best
702
           * page seen so far.  Skip the remaining columns and move
703
           * on to the next page, if any.
704
           */
705
0
          zero_penalty = false; /* so outer loop won't exit */
706
0
          break;
707
0
        }
708
0
      }
709
710
      /*
711
       * If we find a page with zero penalty for all columns, there's no
712
       * need to examine remaining pages; just break out of the loop and
713
       * return it.
714
       */
715
0
      if (zero_penalty)
716
0
        break;
717
0
    }
718
719
    /* OK, "which" is the page index to push the tuple to */
720
0
    targetBufferInfo = &relocationBuffersInfos[which];
721
722
    /* Push item to selected node buffer */
723
0
    gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup);
724
725
    /* Adjust the downlink for this page, if needed. */
726
0
    newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink,
727
0
                 itup, giststate);
728
0
    if (newtup)
729
0
    {
730
0
      gistDeCompressAtt(giststate, r,
731
0
                newtup, NULL, (OffsetNumber) 0,
732
0
                targetBufferInfo->entry,
733
0
                targetBufferInfo->isnull);
734
735
0
      targetBufferInfo->splitinfo->downlink = newtup;
736
0
    }
737
0
  }
738
739
0
  pfree(relocationBuffersInfos);
740
0
}
741
742
743
/*
744
 * Wrappers around BufFile operations. The main difference is that these
745
 * wrappers report errors with ereport(), so that the callers don't need
746
 * to check the return code.
747
 */
748
749
static void
750
ReadTempFileBlock(BufFile *file, long blknum, void *ptr)
751
0
{
752
0
  if (BufFileSeekBlock(file, blknum) != 0)
753
0
    elog(ERROR, "could not seek to block %ld in temporary file", blknum);
754
0
  BufFileReadExact(file, ptr, BLCKSZ);
755
0
}
756
757
static void
758
WriteTempFileBlock(BufFile *file, long blknum, const void *ptr)
759
0
{
760
0
  if (BufFileSeekBlock(file, blknum) != 0)
761
0
    elog(ERROR, "could not seek to block %ld in temporary file", blknum);
762
0
  BufFileWrite(file, ptr, BLCKSZ);
763
0
}