/src/postgres/src/backend/access/gist/gistbuild.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * gistbuild.c |
4 | | * build algorithm for GiST indexes implementation. |
5 | | * |
6 | | * There are two different strategies: |
7 | | * |
8 | | * 1. Sort all input tuples, pack them into GiST leaf pages in the sorted |
9 | | * order, and create downlinks and internal pages as we go. This builds |
10 | | * the index from the bottom up, similar to how B-tree index build |
11 | | * works. |
12 | | * |
13 | | * 2. Start with an empty index, and insert all tuples one by one. |
14 | | * |
15 | | * The sorted method is used if the operator classes for all columns have |
16 | | * a 'sortsupport' defined. Otherwise, we resort to the second strategy. |
17 | | * |
18 | | * The second strategy can optionally use buffers at different levels of |
19 | | * the tree to reduce I/O, see "Buffering build algorithm" in the README |
20 | | * for a more detailed explanation. It initially calls insert over and |
21 | | * over, but switches to the buffered algorithm after a certain number of |
22 | | * tuples (unless buffering mode is disabled). |
23 | | * |
24 | | * |
25 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
26 | | * Portions Copyright (c) 1994, Regents of the University of California |
27 | | * |
28 | | * IDENTIFICATION |
29 | | * src/backend/access/gist/gistbuild.c |
30 | | * |
31 | | *------------------------------------------------------------------------- |
32 | | */ |
33 | | #include "postgres.h" |
34 | | |
35 | | #include <math.h> |
36 | | |
37 | | #include "access/genam.h" |
38 | | #include "access/gist_private.h" |
39 | | #include "access/tableam.h" |
40 | | #include "access/xloginsert.h" |
41 | | #include "miscadmin.h" |
42 | | #include "nodes/execnodes.h" |
43 | | #include "optimizer/optimizer.h" |
44 | | #include "storage/bufmgr.h" |
45 | | #include "storage/bulk_write.h" |
46 | | |
47 | | #include "utils/memutils.h" |
48 | | #include "utils/rel.h" |
49 | | #include "utils/tuplesort.h" |
50 | | |
51 | | /* Step of index tuples for check whether to switch to buffering build mode */ |
52 | 0 | #define BUFFERING_MODE_SWITCH_CHECK_STEP 256 |
53 | | |
54 | | /* |
55 | | * Number of tuples to process in the slow way before switching to buffering |
56 | | * mode, when buffering is explicitly turned on. Also, the number of tuples |
57 | | * to process between readjusting the buffer size parameter, while in |
58 | | * buffering mode. |
59 | | */ |
60 | 0 | #define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096 |
61 | | |
62 | | /* |
63 | | * Strategy used to build the index. It can change between the |
64 | | * GIST_BUFFERING_* modes on the fly, but if the Sorted method is used, |
65 | | * that needs to be decided up-front and cannot be changed afterwards. |
66 | | */ |
67 | | typedef enum |
68 | | { |
69 | | GIST_SORTED_BUILD, /* bottom-up build by sorting */ |
70 | | GIST_BUFFERING_DISABLED, /* in regular build mode and aren't going to |
71 | | * switch */ |
72 | | GIST_BUFFERING_AUTO, /* in regular build mode, but will switch to |
73 | | * buffering build mode if the index grows too |
74 | | * big */ |
75 | | GIST_BUFFERING_STATS, /* gathering statistics of index tuple size |
76 | | * before switching to the buffering build |
77 | | * mode */ |
78 | | GIST_BUFFERING_ACTIVE, /* in buffering build mode */ |
79 | | } GistBuildMode; |
80 | | |
81 | | /* Working state for gistbuild and its callback */ |
82 | | typedef struct |
83 | | { |
84 | | Relation indexrel; |
85 | | Relation heaprel; |
86 | | GISTSTATE *giststate; |
87 | | |
88 | | Size freespace; /* amount of free space to leave on pages */ |
89 | | |
90 | | GistBuildMode buildMode; |
91 | | |
92 | | int64 indtuples; /* number of tuples indexed */ |
93 | | |
94 | | /* |
95 | | * Extra data structures used during a buffering build. 'gfbb' contains |
96 | | * information related to managing the build buffers. 'parentMap' is a |
97 | | * lookup table of the parent of each internal page. |
98 | | */ |
99 | | int64 indtuplesSize; /* total size of all indexed tuples */ |
100 | | GISTBuildBuffers *gfbb; |
101 | | HTAB *parentMap; |
102 | | |
103 | | /* |
104 | | * Extra data structures used during a sorting build. |
105 | | */ |
106 | | Tuplesortstate *sortstate; /* state data for tuplesort.c */ |
107 | | |
108 | | BlockNumber pages_allocated; |
109 | | |
110 | | BulkWriteState *bulkstate; |
111 | | } GISTBuildState; |
112 | | |
113 | 0 | #define GIST_SORTED_BUILD_PAGE_NUM 4 |
114 | | |
115 | | /* |
116 | | * In sorted build, we use a stack of these structs, one for each level, |
117 | | * to hold an in-memory buffer of last pages at the level. |
118 | | * |
119 | | * Sorting GiST build requires good linearization of the sort opclass. This is |
120 | | * not always the case in multidimensional data. To tackle the anomalies, we |
121 | | * buffer index tuples and apply picksplit that can be multidimension-aware. |
122 | | */ |
123 | | typedef struct GistSortedBuildLevelState |
124 | | { |
125 | | int current_page; |
126 | | BlockNumber last_blkno; |
127 | | struct GistSortedBuildLevelState *parent; /* Upper level, if any */ |
128 | | Page pages[GIST_SORTED_BUILD_PAGE_NUM]; |
129 | | } GistSortedBuildLevelState; |
130 | | |
131 | | /* prototypes for private functions */ |
132 | | |
133 | | static void gistSortedBuildCallback(Relation index, ItemPointer tid, |
134 | | Datum *values, bool *isnull, |
135 | | bool tupleIsAlive, void *state); |
136 | | static void gist_indexsortbuild(GISTBuildState *state); |
137 | | static void gist_indexsortbuild_levelstate_add(GISTBuildState *state, |
138 | | GistSortedBuildLevelState *levelstate, |
139 | | IndexTuple itup); |
140 | | static void gist_indexsortbuild_levelstate_flush(GISTBuildState *state, |
141 | | GistSortedBuildLevelState *levelstate); |
142 | | |
143 | | static void gistInitBuffering(GISTBuildState *buildstate); |
144 | | static int calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep); |
145 | | static void gistBuildCallback(Relation index, |
146 | | ItemPointer tid, |
147 | | Datum *values, |
148 | | bool *isnull, |
149 | | bool tupleIsAlive, |
150 | | void *state); |
151 | | static void gistBufferingBuildInsert(GISTBuildState *buildstate, |
152 | | IndexTuple itup); |
153 | | static bool gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, |
154 | | BlockNumber startblkno, int startlevel); |
155 | | static BlockNumber gistbufferinginserttuples(GISTBuildState *buildstate, |
156 | | Buffer buffer, int level, |
157 | | IndexTuple *itup, int ntup, OffsetNumber oldoffnum, |
158 | | BlockNumber parentblk, OffsetNumber downlinkoffnum); |
159 | | static Buffer gistBufferingFindCorrectParent(GISTBuildState *buildstate, |
160 | | BlockNumber childblkno, int level, |
161 | | BlockNumber *parentblkno, |
162 | | OffsetNumber *downlinkoffnum); |
163 | | static void gistProcessEmptyingQueue(GISTBuildState *buildstate); |
164 | | static void gistEmptyAllBuffers(GISTBuildState *buildstate); |
165 | | static int gistGetMaxLevel(Relation index); |
166 | | |
167 | | static void gistInitParentMap(GISTBuildState *buildstate); |
168 | | static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, |
169 | | BlockNumber parent); |
170 | | static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, |
171 | | Buffer parentbuf); |
172 | | static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child); |
173 | | |
174 | | |
175 | | /* |
176 | | * Main entry point to GiST index build. |
177 | | */ |
178 | | IndexBuildResult * |
179 | | gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) |
180 | 0 | { |
181 | 0 | IndexBuildResult *result; |
182 | 0 | double reltuples; |
183 | 0 | GISTBuildState buildstate; |
184 | 0 | MemoryContext oldcxt = CurrentMemoryContext; |
185 | 0 | int fillfactor; |
186 | 0 | Oid SortSupportFnOids[INDEX_MAX_KEYS]; |
187 | 0 | GiSTOptions *options = (GiSTOptions *) index->rd_options; |
188 | | |
189 | | /* |
190 | | * We expect to be called exactly once for any index relation. If that's |
191 | | * not the case, big trouble's what we have. |
192 | | */ |
193 | 0 | if (RelationGetNumberOfBlocks(index) != 0) |
194 | 0 | elog(ERROR, "index \"%s\" already contains data", |
195 | 0 | RelationGetRelationName(index)); |
196 | | |
197 | 0 | buildstate.indexrel = index; |
198 | 0 | buildstate.heaprel = heap; |
199 | 0 | buildstate.sortstate = NULL; |
200 | 0 | buildstate.giststate = initGISTstate(index); |
201 | | |
202 | | /* |
203 | | * Create a temporary memory context that is reset once for each tuple |
204 | | * processed. (Note: we don't bother to make this a child of the |
205 | | * giststate's scanCxt, so we have to delete it separately at the end.) |
206 | | */ |
207 | 0 | buildstate.giststate->tempCxt = createTempGistContext(); |
208 | | |
209 | | /* |
210 | | * Choose build strategy. First check whether the user specified to use |
211 | | * buffering mode. (The use-case for that in the field is somewhat |
212 | | * questionable perhaps, but it's important for testing purposes.) |
213 | | */ |
214 | 0 | if (options) |
215 | 0 | { |
216 | 0 | if (options->buffering_mode == GIST_OPTION_BUFFERING_ON) |
217 | 0 | buildstate.buildMode = GIST_BUFFERING_STATS; |
218 | 0 | else if (options->buffering_mode == GIST_OPTION_BUFFERING_OFF) |
219 | 0 | buildstate.buildMode = GIST_BUFFERING_DISABLED; |
220 | 0 | else /* must be "auto" */ |
221 | 0 | buildstate.buildMode = GIST_BUFFERING_AUTO; |
222 | 0 | } |
223 | 0 | else |
224 | 0 | { |
225 | 0 | buildstate.buildMode = GIST_BUFFERING_AUTO; |
226 | 0 | } |
227 | | |
228 | | /* |
229 | | * Unless buffering mode was forced, see if we can use sorting instead. |
230 | | */ |
231 | 0 | if (buildstate.buildMode != GIST_BUFFERING_STATS) |
232 | 0 | { |
233 | 0 | bool hasallsortsupports = true; |
234 | 0 | int keyscount = IndexRelationGetNumberOfKeyAttributes(index); |
235 | |
|
236 | 0 | for (int i = 0; i < keyscount; i++) |
237 | 0 | { |
238 | 0 | SortSupportFnOids[i] = index_getprocid(index, i + 1, |
239 | 0 | GIST_SORTSUPPORT_PROC); |
240 | 0 | if (!OidIsValid(SortSupportFnOids[i])) |
241 | 0 | { |
242 | 0 | hasallsortsupports = false; |
243 | 0 | break; |
244 | 0 | } |
245 | 0 | } |
246 | 0 | if (hasallsortsupports) |
247 | 0 | buildstate.buildMode = GIST_SORTED_BUILD; |
248 | 0 | } |
249 | | |
250 | | /* |
251 | | * Calculate target amount of free space to leave on pages. |
252 | | */ |
253 | 0 | fillfactor = options ? options->fillfactor : GIST_DEFAULT_FILLFACTOR; |
254 | 0 | buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100; |
255 | | |
256 | | /* |
257 | | * Build the index using the chosen strategy. |
258 | | */ |
259 | 0 | buildstate.indtuples = 0; |
260 | 0 | buildstate.indtuplesSize = 0; |
261 | |
|
262 | 0 | if (buildstate.buildMode == GIST_SORTED_BUILD) |
263 | 0 | { |
264 | | /* |
265 | | * Sort all data, build the index from bottom up. |
266 | | */ |
267 | 0 | buildstate.sortstate = tuplesort_begin_index_gist(heap, |
268 | 0 | index, |
269 | 0 | maintenance_work_mem, |
270 | 0 | NULL, |
271 | 0 | TUPLESORT_NONE); |
272 | | |
273 | | /* Scan the table, adding all tuples to the tuplesort */ |
274 | 0 | reltuples = table_index_build_scan(heap, index, indexInfo, true, true, |
275 | 0 | gistSortedBuildCallback, |
276 | 0 | &buildstate, NULL); |
277 | | |
278 | | /* |
279 | | * Perform the sort and build index pages. |
280 | | */ |
281 | 0 | tuplesort_performsort(buildstate.sortstate); |
282 | |
|
283 | 0 | gist_indexsortbuild(&buildstate); |
284 | |
|
285 | 0 | tuplesort_end(buildstate.sortstate); |
286 | 0 | } |
287 | 0 | else |
288 | 0 | { |
289 | | /* |
290 | | * Initialize an empty index and insert all tuples, possibly using |
291 | | * buffers on intermediate levels. |
292 | | */ |
293 | 0 | Buffer buffer; |
294 | 0 | Page page; |
295 | | |
296 | | /* initialize the root page */ |
297 | 0 | buffer = gistNewBuffer(index, heap); |
298 | 0 | Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO); |
299 | 0 | page = BufferGetPage(buffer); |
300 | |
|
301 | 0 | START_CRIT_SECTION(); |
302 | |
|
303 | 0 | GISTInitBuffer(buffer, F_LEAF); |
304 | |
|
305 | 0 | MarkBufferDirty(buffer); |
306 | 0 | PageSetLSN(page, GistBuildLSN); |
307 | |
|
308 | 0 | UnlockReleaseBuffer(buffer); |
309 | |
|
310 | 0 | END_CRIT_SECTION(); |
311 | | |
312 | | /* Scan the table, inserting all the tuples to the index. */ |
313 | 0 | reltuples = table_index_build_scan(heap, index, indexInfo, true, true, |
314 | 0 | gistBuildCallback, |
315 | 0 | &buildstate, NULL); |
316 | | |
317 | | /* |
318 | | * If buffering was used, flush out all the tuples that are still in |
319 | | * the buffers. |
320 | | */ |
321 | 0 | if (buildstate.buildMode == GIST_BUFFERING_ACTIVE) |
322 | 0 | { |
323 | 0 | elog(DEBUG1, "all tuples processed, emptying buffers"); |
324 | 0 | gistEmptyAllBuffers(&buildstate); |
325 | 0 | gistFreeBuildBuffers(buildstate.gfbb); |
326 | 0 | } |
327 | | |
328 | | /* |
329 | | * We didn't write WAL records as we built the index, so if |
330 | | * WAL-logging is required, write all pages to the WAL now. |
331 | | */ |
332 | 0 | if (RelationNeedsWAL(index)) |
333 | 0 | { |
334 | 0 | log_newpage_range(index, MAIN_FORKNUM, |
335 | 0 | 0, RelationGetNumberOfBlocks(index), |
336 | 0 | true); |
337 | 0 | } |
338 | 0 | } |
339 | | |
340 | | /* okay, all heap tuples are indexed */ |
341 | 0 | MemoryContextSwitchTo(oldcxt); |
342 | 0 | MemoryContextDelete(buildstate.giststate->tempCxt); |
343 | |
|
344 | 0 | freeGISTstate(buildstate.giststate); |
345 | | |
346 | | /* |
347 | | * Return statistics |
348 | | */ |
349 | 0 | result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); |
350 | |
|
351 | 0 | result->heap_tuples = reltuples; |
352 | 0 | result->index_tuples = (double) buildstate.indtuples; |
353 | |
|
354 | 0 | return result; |
355 | 0 | } |
356 | | |
357 | | /*------------------------------------------------------------------------- |
358 | | * Routines for sorted build |
359 | | *------------------------------------------------------------------------- |
360 | | */ |
361 | | |
362 | | /* |
363 | | * Per-tuple callback for table_index_build_scan. |
364 | | */ |
365 | | static void |
366 | | gistSortedBuildCallback(Relation index, |
367 | | ItemPointer tid, |
368 | | Datum *values, |
369 | | bool *isnull, |
370 | | bool tupleIsAlive, |
371 | | void *state) |
372 | 0 | { |
373 | 0 | GISTBuildState *buildstate = (GISTBuildState *) state; |
374 | 0 | MemoryContext oldCtx; |
375 | 0 | Datum compressed_values[INDEX_MAX_KEYS]; |
376 | |
|
377 | 0 | oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); |
378 | | |
379 | | /* Form an index tuple and point it at the heap tuple */ |
380 | 0 | gistCompressValues(buildstate->giststate, index, |
381 | 0 | values, isnull, |
382 | 0 | true, compressed_values); |
383 | |
|
384 | 0 | tuplesort_putindextuplevalues(buildstate->sortstate, |
385 | 0 | buildstate->indexrel, |
386 | 0 | tid, |
387 | 0 | compressed_values, isnull); |
388 | |
|
389 | 0 | MemoryContextSwitchTo(oldCtx); |
390 | 0 | MemoryContextReset(buildstate->giststate->tempCxt); |
391 | | |
392 | | /* Update tuple count. */ |
393 | 0 | buildstate->indtuples += 1; |
394 | 0 | } |
395 | | |
396 | | /* |
397 | | * Build GiST index from bottom up from pre-sorted tuples. |
398 | | */ |
399 | | static void |
400 | | gist_indexsortbuild(GISTBuildState *state) |
401 | 0 | { |
402 | 0 | IndexTuple itup; |
403 | 0 | GistSortedBuildLevelState *levelstate; |
404 | 0 | BulkWriteBuffer rootbuf; |
405 | | |
406 | | /* Reserve block 0 for the root page */ |
407 | 0 | state->pages_allocated = 1; |
408 | |
|
409 | 0 | state->bulkstate = smgr_bulk_start_rel(state->indexrel, MAIN_FORKNUM); |
410 | | |
411 | | /* Allocate a temporary buffer for the first leaf page batch. */ |
412 | 0 | levelstate = palloc0(sizeof(GistSortedBuildLevelState)); |
413 | 0 | levelstate->pages[0] = palloc(BLCKSZ); |
414 | 0 | levelstate->parent = NULL; |
415 | 0 | gistinitpage(levelstate->pages[0], F_LEAF); |
416 | | |
417 | | /* |
418 | | * Fill index pages with tuples in the sorted order. |
419 | | */ |
420 | 0 | while ((itup = tuplesort_getindextuple(state->sortstate, true)) != NULL) |
421 | 0 | { |
422 | 0 | gist_indexsortbuild_levelstate_add(state, levelstate, itup); |
423 | 0 | MemoryContextReset(state->giststate->tempCxt); |
424 | 0 | } |
425 | | |
426 | | /* |
427 | | * Write out the partially full non-root pages. |
428 | | * |
429 | | * Keep in mind that flush can build a new root. If number of pages is > 1 |
430 | | * then new root is required. |
431 | | */ |
432 | 0 | while (levelstate->parent != NULL || levelstate->current_page != 0) |
433 | 0 | { |
434 | 0 | GistSortedBuildLevelState *parent; |
435 | |
|
436 | 0 | gist_indexsortbuild_levelstate_flush(state, levelstate); |
437 | 0 | parent = levelstate->parent; |
438 | 0 | for (int i = 0; i < GIST_SORTED_BUILD_PAGE_NUM; i++) |
439 | 0 | if (levelstate->pages[i]) |
440 | 0 | pfree(levelstate->pages[i]); |
441 | 0 | pfree(levelstate); |
442 | 0 | levelstate = parent; |
443 | 0 | } |
444 | | |
445 | | /* Write out the root */ |
446 | 0 | PageSetLSN(levelstate->pages[0], GistBuildLSN); |
447 | 0 | rootbuf = smgr_bulk_get_buf(state->bulkstate); |
448 | 0 | memcpy(rootbuf, levelstate->pages[0], BLCKSZ); |
449 | 0 | smgr_bulk_write(state->bulkstate, GIST_ROOT_BLKNO, rootbuf, true); |
450 | |
|
451 | 0 | pfree(levelstate); |
452 | |
|
453 | 0 | smgr_bulk_finish(state->bulkstate); |
454 | 0 | } |
455 | | |
456 | | /* |
457 | | * Add tuple to a page. If the pages are full, write them out and re-initialize |
458 | | * a new page first. |
459 | | */ |
460 | | static void |
461 | | gist_indexsortbuild_levelstate_add(GISTBuildState *state, |
462 | | GistSortedBuildLevelState *levelstate, |
463 | | IndexTuple itup) |
464 | 0 | { |
465 | 0 | Size sizeNeeded; |
466 | | |
467 | | /* Check if tuple can be added to the current page */ |
468 | 0 | sizeNeeded = IndexTupleSize(itup) + sizeof(ItemIdData); /* fillfactor ignored */ |
469 | 0 | if (PageGetFreeSpace(levelstate->pages[levelstate->current_page]) < sizeNeeded) |
470 | 0 | { |
471 | 0 | Page newPage; |
472 | 0 | Page old_page = levelstate->pages[levelstate->current_page]; |
473 | 0 | uint16 old_page_flags = GistPageGetOpaque(old_page)->flags; |
474 | |
|
475 | 0 | if (levelstate->current_page + 1 == GIST_SORTED_BUILD_PAGE_NUM) |
476 | 0 | { |
477 | 0 | gist_indexsortbuild_levelstate_flush(state, levelstate); |
478 | 0 | } |
479 | 0 | else |
480 | 0 | levelstate->current_page++; |
481 | |
|
482 | 0 | if (levelstate->pages[levelstate->current_page] == NULL) |
483 | 0 | levelstate->pages[levelstate->current_page] = palloc0(BLCKSZ); |
484 | |
|
485 | 0 | newPage = levelstate->pages[levelstate->current_page]; |
486 | 0 | gistinitpage(newPage, old_page_flags); |
487 | 0 | } |
488 | |
|
489 | 0 | gistfillbuffer(levelstate->pages[levelstate->current_page], &itup, 1, InvalidOffsetNumber); |
490 | 0 | } |
491 | | |
492 | | static void |
493 | | gist_indexsortbuild_levelstate_flush(GISTBuildState *state, |
494 | | GistSortedBuildLevelState *levelstate) |
495 | 0 | { |
496 | 0 | GistSortedBuildLevelState *parent; |
497 | 0 | BlockNumber blkno; |
498 | 0 | MemoryContext oldCtx; |
499 | 0 | IndexTuple union_tuple; |
500 | 0 | SplitPageLayout *dist; |
501 | 0 | IndexTuple *itvec; |
502 | 0 | int vect_len; |
503 | 0 | bool isleaf = GistPageIsLeaf(levelstate->pages[0]); |
504 | |
|
505 | 0 | CHECK_FOR_INTERRUPTS(); |
506 | |
|
507 | 0 | oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt); |
508 | | |
509 | | /* Get index tuples from first page */ |
510 | 0 | itvec = gistextractpage(levelstate->pages[0], &vect_len); |
511 | 0 | if (levelstate->current_page > 0) |
512 | 0 | { |
513 | | /* Append tuples from each page */ |
514 | 0 | for (int i = 1; i < levelstate->current_page + 1; i++) |
515 | 0 | { |
516 | 0 | int len_local; |
517 | 0 | IndexTuple *itvec_local = gistextractpage(levelstate->pages[i], &len_local); |
518 | |
|
519 | 0 | itvec = gistjoinvector(itvec, &vect_len, itvec_local, len_local); |
520 | 0 | pfree(itvec_local); |
521 | 0 | } |
522 | | |
523 | | /* Apply picksplit to list of all collected tuples */ |
524 | 0 | dist = gistSplit(state->indexrel, levelstate->pages[0], itvec, vect_len, state->giststate); |
525 | 0 | } |
526 | 0 | else |
527 | 0 | { |
528 | | /* Create split layout from single page */ |
529 | 0 | dist = (SplitPageLayout *) palloc0(sizeof(SplitPageLayout)); |
530 | 0 | union_tuple = gistunion(state->indexrel, itvec, vect_len, |
531 | 0 | state->giststate); |
532 | 0 | dist->itup = union_tuple; |
533 | 0 | dist->list = gistfillitupvec(itvec, vect_len, &(dist->lenlist)); |
534 | 0 | dist->block.num = vect_len; |
535 | 0 | } |
536 | |
|
537 | 0 | MemoryContextSwitchTo(oldCtx); |
538 | | |
539 | | /* Reset page counter */ |
540 | 0 | levelstate->current_page = 0; |
541 | | |
542 | | /* Create pages for all partitions in split result */ |
543 | 0 | for (; dist != NULL; dist = dist->next) |
544 | 0 | { |
545 | 0 | char *data; |
546 | 0 | BulkWriteBuffer buf; |
547 | 0 | Page target; |
548 | | |
549 | | /* check once per page */ |
550 | 0 | CHECK_FOR_INTERRUPTS(); |
551 | | |
552 | | /* Create page and copy data */ |
553 | 0 | data = (char *) (dist->list); |
554 | 0 | buf = smgr_bulk_get_buf(state->bulkstate); |
555 | 0 | target = (Page) buf; |
556 | 0 | gistinitpage(target, isleaf ? F_LEAF : 0); |
557 | 0 | for (int i = 0; i < dist->block.num; i++) |
558 | 0 | { |
559 | 0 | IndexTuple thistup = (IndexTuple) data; |
560 | |
|
561 | 0 | if (PageAddItem(target, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber) |
562 | 0 | elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(state->indexrel)); |
563 | | |
564 | 0 | data += IndexTupleSize(thistup); |
565 | 0 | } |
566 | 0 | union_tuple = dist->itup; |
567 | | |
568 | | /* |
569 | | * Set the right link to point to the previous page. This is just for |
570 | | * debugging purposes: GiST only follows the right link if a page is |
571 | | * split concurrently to a scan, and that cannot happen during index |
572 | | * build. |
573 | | * |
574 | | * It's a bit counterintuitive that we set the right link on the new |
575 | | * page to point to the previous page, not the other way around. But |
576 | | * GiST pages are not ordered like B-tree pages are, so as long as the |
577 | | * right-links form a chain through all the pages at the same level, |
578 | | * the order doesn't matter. |
579 | | */ |
580 | 0 | if (levelstate->last_blkno) |
581 | 0 | GistPageGetOpaque(target)->rightlink = levelstate->last_blkno; |
582 | | |
583 | | /* |
584 | | * The page is now complete. Assign a block number to it, and pass it |
585 | | * to the bulk writer. |
586 | | */ |
587 | 0 | blkno = state->pages_allocated++; |
588 | 0 | PageSetLSN(target, GistBuildLSN); |
589 | 0 | smgr_bulk_write(state->bulkstate, blkno, buf, true); |
590 | 0 | ItemPointerSetBlockNumber(&(union_tuple->t_tid), blkno); |
591 | 0 | levelstate->last_blkno = blkno; |
592 | | |
593 | | /* |
594 | | * Insert the downlink to the parent page. If this was the root, |
595 | | * create a new page as the parent, which becomes the new root. |
596 | | */ |
597 | 0 | parent = levelstate->parent; |
598 | 0 | if (parent == NULL) |
599 | 0 | { |
600 | 0 | parent = palloc0(sizeof(GistSortedBuildLevelState)); |
601 | 0 | parent->pages[0] = palloc(BLCKSZ); |
602 | 0 | parent->parent = NULL; |
603 | 0 | gistinitpage(parent->pages[0], 0); |
604 | |
|
605 | 0 | levelstate->parent = parent; |
606 | 0 | } |
607 | 0 | gist_indexsortbuild_levelstate_add(state, parent, union_tuple); |
608 | 0 | } |
609 | 0 | } |
610 | | |
611 | | |
612 | | /*------------------------------------------------------------------------- |
613 | | * Routines for non-sorted build |
614 | | *------------------------------------------------------------------------- |
615 | | */ |
616 | | |
617 | | /* |
618 | | * Attempt to switch to buffering mode. |
619 | | * |
620 | | * If there is not enough memory for buffering build, sets bufferingMode |
621 | | * to GIST_BUFFERING_DISABLED, so that we don't bother to try the switch |
622 | | * anymore. Otherwise initializes the build buffers, and sets bufferingMode to |
623 | | * GIST_BUFFERING_ACTIVE. |
624 | | */ |
625 | | static void |
626 | | gistInitBuffering(GISTBuildState *buildstate) |
627 | 0 | { |
628 | 0 | Relation index = buildstate->indexrel; |
629 | 0 | int pagesPerBuffer; |
630 | 0 | Size pageFreeSpace; |
631 | 0 | Size itupAvgSize, |
632 | 0 | itupMinSize; |
633 | 0 | double avgIndexTuplesPerPage, |
634 | 0 | maxIndexTuplesPerPage; |
635 | 0 | int i; |
636 | 0 | int levelStep; |
637 | | |
638 | | /* Calc space of index page which is available for index tuples */ |
639 | 0 | pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData) |
640 | 0 | - sizeof(ItemIdData) |
641 | 0 | - buildstate->freespace; |
642 | | |
643 | | /* |
644 | | * Calculate average size of already inserted index tuples using gathered |
645 | | * statistics. |
646 | | */ |
647 | 0 | itupAvgSize = (double) buildstate->indtuplesSize / |
648 | 0 | (double) buildstate->indtuples; |
649 | | |
650 | | /* |
651 | | * Calculate minimal possible size of index tuple by index metadata. |
652 | | * Minimal possible size of varlena is VARHDRSZ. |
653 | | * |
654 | | * XXX: that's not actually true, as a short varlen can be just 2 bytes. |
655 | | * And we should take padding into account here. |
656 | | */ |
657 | 0 | itupMinSize = (Size) MAXALIGN(sizeof(IndexTupleData)); |
658 | 0 | for (i = 0; i < index->rd_att->natts; i++) |
659 | 0 | { |
660 | 0 | CompactAttribute *attr = TupleDescCompactAttr(index->rd_att, i); |
661 | |
|
662 | 0 | if (attr->attlen < 0) |
663 | 0 | itupMinSize += VARHDRSZ; |
664 | 0 | else |
665 | 0 | itupMinSize += attr->attlen; |
666 | 0 | } |
667 | | |
668 | | /* Calculate average and maximal number of index tuples which fit to page */ |
669 | 0 | avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize; |
670 | 0 | maxIndexTuplesPerPage = pageFreeSpace / itupMinSize; |
671 | | |
672 | | /* |
673 | | * We need to calculate two parameters for the buffering algorithm: |
674 | | * levelStep and pagesPerBuffer. |
675 | | * |
676 | | * levelStep determines the size of subtree that we operate on, while |
677 | | * emptying a buffer. A higher value is better, as you need fewer buffer |
678 | | * emptying steps to build the index. However, if you set it too high, the |
679 | | * subtree doesn't fit in cache anymore, and you quickly lose the benefit |
680 | | * of the buffers. |
681 | | * |
682 | | * In Arge et al's paper, levelStep is chosen as logB(M/4B), where B is |
683 | | * the number of tuples on page (ie. fanout), and M is the amount of |
684 | | * internal memory available. Curiously, they doesn't explain *why* that |
685 | | * setting is optimal. We calculate it by taking the highest levelStep so |
686 | | * that a subtree still fits in cache. For a small B, our way of |
687 | | * calculating levelStep is very close to Arge et al's formula. For a |
688 | | * large B, our formula gives a value that is 2x higher. |
689 | | * |
690 | | * The average size (in pages) of a subtree of depth n can be calculated |
691 | | * as a geometric series: |
692 | | * |
693 | | * B^0 + B^1 + B^2 + ... + B^n = (1 - B^(n + 1)) / (1 - B) |
694 | | * |
695 | | * where B is the average number of index tuples on page. The subtree is |
696 | | * cached in the shared buffer cache and the OS cache, so we choose |
697 | | * levelStep so that the subtree size is comfortably smaller than |
698 | | * effective_cache_size, with a safety factor of 4. |
699 | | * |
700 | | * The estimate on the average number of index tuples on page is based on |
701 | | * average tuple sizes observed before switching to buffered build, so the |
702 | | * real subtree size can be somewhat larger. Also, it would selfish to |
703 | | * gobble the whole cache for our index build. The safety factor of 4 |
704 | | * should account for those effects. |
705 | | * |
706 | | * The other limiting factor for setting levelStep is that while |
707 | | * processing a subtree, we need to hold one page for each buffer at the |
708 | | * next lower buffered level. The max. number of buffers needed for that |
709 | | * is maxIndexTuplesPerPage^levelStep. This is very conservative, but |
710 | | * hopefully maintenance_work_mem is set high enough that you're |
711 | | * constrained by effective_cache_size rather than maintenance_work_mem. |
712 | | * |
713 | | * XXX: the buffer hash table consumes a fair amount of memory too per |
714 | | * buffer, but that is not currently taken into account. That scales on |
715 | | * the total number of buffers used, ie. the index size and on levelStep. |
716 | | * Note that a higher levelStep *reduces* the amount of memory needed for |
717 | | * the hash table. |
718 | | */ |
719 | 0 | levelStep = 1; |
720 | 0 | for (;;) |
721 | 0 | { |
722 | 0 | double subtreesize; |
723 | 0 | double maxlowestlevelpages; |
724 | | |
725 | | /* size of an average subtree at this levelStep (in pages). */ |
726 | 0 | subtreesize = |
727 | 0 | (1 - pow(avgIndexTuplesPerPage, (double) (levelStep + 1))) / |
728 | 0 | (1 - avgIndexTuplesPerPage); |
729 | | |
730 | | /* max number of pages at the lowest level of a subtree */ |
731 | 0 | maxlowestlevelpages = pow(maxIndexTuplesPerPage, (double) levelStep); |
732 | | |
733 | | /* subtree must fit in cache (with safety factor of 4) */ |
734 | 0 | if (subtreesize > effective_cache_size / 4) |
735 | 0 | break; |
736 | | |
737 | | /* each node in the lowest level of a subtree has one page in memory */ |
738 | 0 | if (maxlowestlevelpages > ((double) maintenance_work_mem * 1024) / BLCKSZ) |
739 | 0 | break; |
740 | | |
741 | | /* Good, we can handle this levelStep. See if we can go one higher. */ |
742 | 0 | levelStep++; |
743 | 0 | } |
744 | | |
745 | | /* |
746 | | * We just reached an unacceptable value of levelStep in previous loop. |
747 | | * So, decrease levelStep to get last acceptable value. |
748 | | */ |
749 | 0 | levelStep--; |
750 | | |
751 | | /* |
752 | | * If there's not enough cache or maintenance_work_mem, fall back to plain |
753 | | * inserts. |
754 | | */ |
755 | 0 | if (levelStep <= 0) |
756 | 0 | { |
757 | 0 | elog(DEBUG1, "failed to switch to buffered GiST build"); |
758 | 0 | buildstate->buildMode = GIST_BUFFERING_DISABLED; |
759 | 0 | return; |
760 | 0 | } |
761 | | |
762 | | /* |
763 | | * The second parameter to set is pagesPerBuffer, which determines the |
764 | | * size of each buffer. We adjust pagesPerBuffer also during the build, |
765 | | * which is why this calculation is in a separate function. |
766 | | */ |
767 | 0 | pagesPerBuffer = calculatePagesPerBuffer(buildstate, levelStep); |
768 | | |
769 | | /* Initialize GISTBuildBuffers with these parameters */ |
770 | 0 | buildstate->gfbb = gistInitBuildBuffers(pagesPerBuffer, levelStep, |
771 | 0 | gistGetMaxLevel(index)); |
772 | |
|
773 | 0 | gistInitParentMap(buildstate); |
774 | |
|
775 | 0 | buildstate->buildMode = GIST_BUFFERING_ACTIVE; |
776 | |
|
777 | 0 | elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d", |
778 | 0 | levelStep, pagesPerBuffer); |
779 | 0 | } |
780 | | |
781 | | /* |
782 | | * Calculate pagesPerBuffer parameter for the buffering algorithm. |
783 | | * |
784 | | * Buffer size is chosen so that assuming that tuples are distributed |
785 | | * randomly, emptying half a buffer fills on average one page in every buffer |
786 | | * at the next lower level. |
787 | | */ |
788 | | static int |
789 | | calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep) |
790 | 0 | { |
791 | 0 | double pagesPerBuffer; |
792 | 0 | double avgIndexTuplesPerPage; |
793 | 0 | double itupAvgSize; |
794 | 0 | Size pageFreeSpace; |
795 | | |
796 | | /* Calc space of index page which is available for index tuples */ |
797 | 0 | pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData) |
798 | 0 | - sizeof(ItemIdData) |
799 | 0 | - buildstate->freespace; |
800 | | |
801 | | /* |
802 | | * Calculate average size of already inserted index tuples using gathered |
803 | | * statistics. |
804 | | */ |
805 | 0 | itupAvgSize = (double) buildstate->indtuplesSize / |
806 | 0 | (double) buildstate->indtuples; |
807 | |
|
808 | 0 | avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize; |
809 | | |
810 | | /* |
811 | | * Recalculate required size of buffers. |
812 | | */ |
813 | 0 | pagesPerBuffer = 2 * pow(avgIndexTuplesPerPage, levelStep); |
814 | |
|
815 | 0 | return (int) rint(pagesPerBuffer); |
816 | 0 | } |
817 | | |
818 | | /* |
819 | | * Per-tuple callback for table_index_build_scan. |
820 | | */ |
821 | | static void |
822 | | gistBuildCallback(Relation index, |
823 | | ItemPointer tid, |
824 | | Datum *values, |
825 | | bool *isnull, |
826 | | bool tupleIsAlive, |
827 | | void *state) |
828 | 0 | { |
829 | 0 | GISTBuildState *buildstate = (GISTBuildState *) state; |
830 | 0 | IndexTuple itup; |
831 | 0 | MemoryContext oldCtx; |
832 | |
|
833 | 0 | oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); |
834 | | |
835 | | /* form an index tuple and point it at the heap tuple */ |
836 | 0 | itup = gistFormTuple(buildstate->giststate, index, |
837 | 0 | values, isnull, |
838 | 0 | true); |
839 | 0 | itup->t_tid = *tid; |
840 | | |
841 | | /* Update tuple count and total size. */ |
842 | 0 | buildstate->indtuples += 1; |
843 | 0 | buildstate->indtuplesSize += IndexTupleSize(itup); |
844 | | |
845 | | /* |
846 | | * XXX In buffering builds, the tempCxt is also reset down inside |
847 | | * gistProcessEmptyingQueue(). This is not great because it risks |
848 | | * confusion and possible use of dangling pointers (for example, itup |
849 | | * might be already freed when control returns here). It's generally |
850 | | * better that a memory context be "owned" by only one function. However, |
851 | | * currently this isn't causing issues so it doesn't seem worth the amount |
852 | | * of refactoring that would be needed to avoid it. |
853 | | */ |
854 | 0 | if (buildstate->buildMode == GIST_BUFFERING_ACTIVE) |
855 | 0 | { |
856 | | /* We have buffers, so use them. */ |
857 | 0 | gistBufferingBuildInsert(buildstate, itup); |
858 | 0 | } |
859 | 0 | else |
860 | 0 | { |
861 | | /* |
862 | | * There's no buffers (yet). Since we already have the index relation |
863 | | * locked, we call gistdoinsert directly. |
864 | | */ |
865 | 0 | gistdoinsert(index, itup, buildstate->freespace, |
866 | 0 | buildstate->giststate, buildstate->heaprel, true); |
867 | 0 | } |
868 | |
|
869 | 0 | MemoryContextSwitchTo(oldCtx); |
870 | 0 | MemoryContextReset(buildstate->giststate->tempCxt); |
871 | |
|
872 | 0 | if (buildstate->buildMode == GIST_BUFFERING_ACTIVE && |
873 | 0 | buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0) |
874 | 0 | { |
875 | | /* Adjust the target buffer size now */ |
876 | 0 | buildstate->gfbb->pagesPerBuffer = |
877 | 0 | calculatePagesPerBuffer(buildstate, buildstate->gfbb->levelStep); |
878 | 0 | } |
879 | | |
880 | | /* |
881 | | * In 'auto' mode, check if the index has grown too large to fit in cache, |
882 | | * and switch to buffering mode if it has. |
883 | | * |
884 | | * To avoid excessive calls to smgrnblocks(), only check this every |
885 | | * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples. |
886 | | * |
887 | | * In 'stats' state, switch as soon as we have seen enough tuples to have |
888 | | * some idea of the average tuple size. |
889 | | */ |
890 | 0 | if ((buildstate->buildMode == GIST_BUFFERING_AUTO && |
891 | 0 | buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 && |
892 | 0 | effective_cache_size < smgrnblocks(RelationGetSmgr(index), |
893 | 0 | MAIN_FORKNUM)) || |
894 | 0 | (buildstate->buildMode == GIST_BUFFERING_STATS && |
895 | 0 | buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET)) |
896 | 0 | { |
897 | | /* |
898 | | * Index doesn't fit in effective cache anymore. Try to switch to |
899 | | * buffering build mode. |
900 | | */ |
901 | 0 | gistInitBuffering(buildstate); |
902 | 0 | } |
903 | 0 | } |
904 | | |
905 | | /* |
906 | | * Insert function for buffering index build. |
907 | | */ |
908 | | static void |
909 | | gistBufferingBuildInsert(GISTBuildState *buildstate, IndexTuple itup) |
910 | 0 | { |
911 | | /* Insert the tuple to buffers. */ |
912 | 0 | gistProcessItup(buildstate, itup, 0, buildstate->gfbb->rootlevel); |
913 | | |
914 | | /* If we filled up (half of a) buffer, process buffer emptying. */ |
915 | 0 | gistProcessEmptyingQueue(buildstate); |
916 | 0 | } |
917 | | |
918 | | /* |
919 | | * Process an index tuple. Runs the tuple down the tree until we reach a leaf |
920 | | * page or node buffer, and inserts the tuple there. Returns true if we have |
921 | | * to stop buffer emptying process (because one of child buffers can't take |
922 | | * index tuples anymore). |
923 | | */ |
924 | | static bool |
925 | | gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, |
926 | | BlockNumber startblkno, int startlevel) |
927 | 0 | { |
928 | 0 | GISTSTATE *giststate = buildstate->giststate; |
929 | 0 | GISTBuildBuffers *gfbb = buildstate->gfbb; |
930 | 0 | Relation indexrel = buildstate->indexrel; |
931 | 0 | BlockNumber childblkno; |
932 | 0 | Buffer buffer; |
933 | 0 | bool result = false; |
934 | 0 | BlockNumber blkno; |
935 | 0 | int level; |
936 | 0 | OffsetNumber downlinkoffnum = InvalidOffsetNumber; |
937 | 0 | BlockNumber parentblkno = InvalidBlockNumber; |
938 | |
|
939 | 0 | CHECK_FOR_INTERRUPTS(); |
940 | | |
941 | | /* |
942 | | * Loop until we reach a leaf page (level == 0) or a level with buffers |
943 | | * (not including the level we start at, because we would otherwise make |
944 | | * no progress). |
945 | | */ |
946 | 0 | blkno = startblkno; |
947 | 0 | level = startlevel; |
948 | 0 | for (;;) |
949 | 0 | { |
950 | 0 | ItemId iid; |
951 | 0 | IndexTuple idxtuple, |
952 | 0 | newtup; |
953 | 0 | Page page; |
954 | 0 | OffsetNumber childoffnum; |
955 | | |
956 | | /* Have we reached a level with buffers? */ |
957 | 0 | if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel) |
958 | 0 | break; |
959 | | |
960 | | /* Have we reached a leaf page? */ |
961 | 0 | if (level == 0) |
962 | 0 | break; |
963 | | |
964 | | /* |
965 | | * Nope. Descend down to the next level then. Choose a child to |
966 | | * descend down to. |
967 | | */ |
968 | | |
969 | 0 | buffer = ReadBuffer(indexrel, blkno); |
970 | 0 | LockBuffer(buffer, GIST_EXCLUSIVE); |
971 | |
|
972 | 0 | page = (Page) BufferGetPage(buffer); |
973 | 0 | childoffnum = gistchoose(indexrel, page, itup, giststate); |
974 | 0 | iid = PageGetItemId(page, childoffnum); |
975 | 0 | idxtuple = (IndexTuple) PageGetItem(page, iid); |
976 | 0 | childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); |
977 | |
|
978 | 0 | if (level > 1) |
979 | 0 | gistMemorizeParent(buildstate, childblkno, blkno); |
980 | | |
981 | | /* |
982 | | * Check that the key representing the target child node is consistent |
983 | | * with the key we're inserting. Update it if it's not. |
984 | | */ |
985 | 0 | newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate); |
986 | 0 | if (newtup) |
987 | 0 | { |
988 | 0 | blkno = gistbufferinginserttuples(buildstate, |
989 | 0 | buffer, |
990 | 0 | level, |
991 | 0 | &newtup, |
992 | 0 | 1, |
993 | 0 | childoffnum, |
994 | 0 | InvalidBlockNumber, |
995 | 0 | InvalidOffsetNumber); |
996 | | /* gistbufferinginserttuples() released the buffer */ |
997 | 0 | } |
998 | 0 | else |
999 | 0 | UnlockReleaseBuffer(buffer); |
1000 | | |
1001 | | /* Descend to the child */ |
1002 | 0 | parentblkno = blkno; |
1003 | 0 | blkno = childblkno; |
1004 | 0 | downlinkoffnum = childoffnum; |
1005 | 0 | Assert(level > 0); |
1006 | 0 | level--; |
1007 | 0 | } |
1008 | |
|
1009 | 0 | if (LEVEL_HAS_BUFFERS(level, gfbb)) |
1010 | 0 | { |
1011 | | /* |
1012 | | * We've reached level with buffers. Place the index tuple to the |
1013 | | * buffer, and add the buffer to the emptying queue if it overflows. |
1014 | | */ |
1015 | 0 | GISTNodeBuffer *childNodeBuffer; |
1016 | | |
1017 | | /* Find the buffer or create a new one */ |
1018 | 0 | childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level); |
1019 | | |
1020 | | /* Add index tuple to it */ |
1021 | 0 | gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup); |
1022 | |
|
1023 | 0 | if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb)) |
1024 | 0 | result = true; |
1025 | 0 | } |
1026 | 0 | else |
1027 | 0 | { |
1028 | | /* |
1029 | | * We've reached a leaf page. Place the tuple here. |
1030 | | */ |
1031 | 0 | Assert(level == 0); |
1032 | 0 | buffer = ReadBuffer(indexrel, blkno); |
1033 | 0 | LockBuffer(buffer, GIST_EXCLUSIVE); |
1034 | 0 | gistbufferinginserttuples(buildstate, buffer, level, |
1035 | 0 | &itup, 1, InvalidOffsetNumber, |
1036 | 0 | parentblkno, downlinkoffnum); |
1037 | | /* gistbufferinginserttuples() released the buffer */ |
1038 | 0 | } |
1039 | |
|
1040 | 0 | return result; |
1041 | 0 | } |
1042 | | |
1043 | | /* |
1044 | | * Insert tuples to a given page. |
1045 | | * |
1046 | | * This is analogous with gistinserttuples() in the regular insertion code. |
1047 | | * |
1048 | | * Returns the block number of the page where the (first) new or updated tuple |
1049 | | * was inserted. Usually that's the original page, but might be a sibling page |
1050 | | * if the original page was split. |
1051 | | * |
1052 | | * Caller should hold a lock on 'buffer' on entry. This function will unlock |
1053 | | * and unpin it. |
1054 | | */ |
1055 | | static BlockNumber |
1056 | | gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level, |
1057 | | IndexTuple *itup, int ntup, OffsetNumber oldoffnum, |
1058 | | BlockNumber parentblk, OffsetNumber downlinkoffnum) |
1059 | 0 | { |
1060 | 0 | GISTBuildBuffers *gfbb = buildstate->gfbb; |
1061 | 0 | List *splitinfo; |
1062 | 0 | bool is_split; |
1063 | 0 | BlockNumber placed_to_blk = InvalidBlockNumber; |
1064 | |
|
1065 | 0 | is_split = gistplacetopage(buildstate->indexrel, |
1066 | 0 | buildstate->freespace, |
1067 | 0 | buildstate->giststate, |
1068 | 0 | buffer, |
1069 | 0 | itup, ntup, oldoffnum, &placed_to_blk, |
1070 | 0 | InvalidBuffer, |
1071 | 0 | &splitinfo, |
1072 | 0 | false, |
1073 | 0 | buildstate->heaprel, true); |
1074 | | |
1075 | | /* |
1076 | | * If this is a root split, update the root path item kept in memory. This |
1077 | | * ensures that all path stacks are always complete, including all parent |
1078 | | * nodes up to the root. That simplifies the algorithm to re-find correct |
1079 | | * parent. |
1080 | | */ |
1081 | 0 | if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO) |
1082 | 0 | { |
1083 | 0 | Page page = BufferGetPage(buffer); |
1084 | 0 | OffsetNumber off; |
1085 | 0 | OffsetNumber maxoff; |
1086 | |
|
1087 | 0 | Assert(level == gfbb->rootlevel); |
1088 | 0 | gfbb->rootlevel++; |
1089 | |
|
1090 | 0 | elog(DEBUG2, "splitting GiST root page, now %d levels deep", gfbb->rootlevel); |
1091 | | |
1092 | | /* |
1093 | | * All the downlinks on the old root page are now on one of the child |
1094 | | * pages. Visit all the new child pages to memorize the parents of the |
1095 | | * grandchildren. |
1096 | | */ |
1097 | 0 | if (gfbb->rootlevel > 1) |
1098 | 0 | { |
1099 | 0 | maxoff = PageGetMaxOffsetNumber(page); |
1100 | 0 | for (off = FirstOffsetNumber; off <= maxoff; off++) |
1101 | 0 | { |
1102 | 0 | ItemId iid = PageGetItemId(page, off); |
1103 | 0 | IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); |
1104 | 0 | BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); |
1105 | 0 | Buffer childbuf = ReadBuffer(buildstate->indexrel, childblkno); |
1106 | |
|
1107 | 0 | LockBuffer(childbuf, GIST_SHARE); |
1108 | 0 | gistMemorizeAllDownlinks(buildstate, childbuf); |
1109 | 0 | UnlockReleaseBuffer(childbuf); |
1110 | | |
1111 | | /* |
1112 | | * Also remember that the parent of the new child page is the |
1113 | | * root block. |
1114 | | */ |
1115 | 0 | gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO); |
1116 | 0 | } |
1117 | 0 | } |
1118 | 0 | } |
1119 | | |
1120 | 0 | if (splitinfo) |
1121 | 0 | { |
1122 | | /* |
1123 | | * Insert the downlinks to the parent. This is analogous with |
1124 | | * gistfinishsplit() in the regular insertion code, but the locking is |
1125 | | * simpler, and we have to maintain the buffers on internal nodes and |
1126 | | * the parent map. |
1127 | | */ |
1128 | 0 | IndexTuple *downlinks; |
1129 | 0 | int ndownlinks, |
1130 | 0 | i; |
1131 | 0 | Buffer parentBuffer; |
1132 | 0 | ListCell *lc; |
1133 | | |
1134 | | /* Parent may have changed since we memorized this path. */ |
1135 | 0 | parentBuffer = |
1136 | 0 | gistBufferingFindCorrectParent(buildstate, |
1137 | 0 | BufferGetBlockNumber(buffer), |
1138 | 0 | level, |
1139 | 0 | &parentblk, |
1140 | 0 | &downlinkoffnum); |
1141 | | |
1142 | | /* |
1143 | | * If there's a buffer associated with this page, that needs to be |
1144 | | * split too. gistRelocateBuildBuffersOnSplit() will also adjust the |
1145 | | * downlinks in 'splitinfo', to make sure they're consistent not only |
1146 | | * with the tuples already on the pages, but also the tuples in the |
1147 | | * buffers that will eventually be inserted to them. |
1148 | | */ |
1149 | 0 | gistRelocateBuildBuffersOnSplit(gfbb, |
1150 | 0 | buildstate->giststate, |
1151 | 0 | buildstate->indexrel, |
1152 | 0 | level, |
1153 | 0 | buffer, splitinfo); |
1154 | | |
1155 | | /* Create an array of all the downlink tuples */ |
1156 | 0 | ndownlinks = list_length(splitinfo); |
1157 | 0 | downlinks = (IndexTuple *) palloc(sizeof(IndexTuple) * ndownlinks); |
1158 | 0 | i = 0; |
1159 | 0 | foreach(lc, splitinfo) |
1160 | 0 | { |
1161 | 0 | GISTPageSplitInfo *splitinfo = lfirst(lc); |
1162 | | |
1163 | | /* |
1164 | | * Remember the parent of each new child page in our parent map. |
1165 | | * This assumes that the downlinks fit on the parent page. If the |
1166 | | * parent page is split, too, when we recurse up to insert the |
1167 | | * downlinks, the recursive gistbufferinginserttuples() call will |
1168 | | * update the map again. |
1169 | | */ |
1170 | 0 | if (level > 0) |
1171 | 0 | gistMemorizeParent(buildstate, |
1172 | 0 | BufferGetBlockNumber(splitinfo->buf), |
1173 | 0 | BufferGetBlockNumber(parentBuffer)); |
1174 | | |
1175 | | /* |
1176 | | * Also update the parent map for all the downlinks that got moved |
1177 | | * to a different page. (actually this also loops through the |
1178 | | * downlinks that stayed on the original page, but it does no |
1179 | | * harm). |
1180 | | */ |
1181 | 0 | if (level > 1) |
1182 | 0 | gistMemorizeAllDownlinks(buildstate, splitinfo->buf); |
1183 | | |
1184 | | /* |
1185 | | * Since there's no concurrent access, we can release the lower |
1186 | | * level buffers immediately. This includes the original page. |
1187 | | */ |
1188 | 0 | UnlockReleaseBuffer(splitinfo->buf); |
1189 | 0 | downlinks[i++] = splitinfo->downlink; |
1190 | 0 | } |
1191 | | |
1192 | | /* Insert them into parent. */ |
1193 | 0 | gistbufferinginserttuples(buildstate, parentBuffer, level + 1, |
1194 | 0 | downlinks, ndownlinks, downlinkoffnum, |
1195 | 0 | InvalidBlockNumber, InvalidOffsetNumber); |
1196 | |
|
1197 | 0 | list_free_deep(splitinfo); /* we don't need this anymore */ |
1198 | 0 | } |
1199 | 0 | else |
1200 | 0 | UnlockReleaseBuffer(buffer); |
1201 | |
|
1202 | 0 | return placed_to_blk; |
1203 | 0 | } |
1204 | | |
1205 | | /* |
1206 | | * Find the downlink pointing to a child page. |
1207 | | * |
1208 | | * 'childblkno' indicates the child page to find the parent for. 'level' is |
1209 | | * the level of the child. On entry, *parentblkno and *downlinkoffnum can |
1210 | | * point to a location where the downlink used to be - we will check that |
1211 | | * location first, and save some cycles if it hasn't moved. The function |
1212 | | * returns a buffer containing the downlink, exclusively-locked, and |
1213 | | * *parentblkno and *downlinkoffnum are set to the real location of the |
1214 | | * downlink. |
1215 | | * |
1216 | | * If the child page is a leaf (level == 0), the caller must supply a correct |
1217 | | * parentblkno. Otherwise we use the parent map hash table to find the parent |
1218 | | * block. |
1219 | | * |
1220 | | * This function serves the same purpose as gistFindCorrectParent() during |
1221 | | * normal index inserts, but this is simpler because we don't need to deal |
1222 | | * with concurrent inserts. |
1223 | | */ |
1224 | | static Buffer |
1225 | | gistBufferingFindCorrectParent(GISTBuildState *buildstate, |
1226 | | BlockNumber childblkno, int level, |
1227 | | BlockNumber *parentblkno, |
1228 | | OffsetNumber *downlinkoffnum) |
1229 | 0 | { |
1230 | 0 | BlockNumber parent; |
1231 | 0 | Buffer buffer; |
1232 | 0 | Page page; |
1233 | 0 | OffsetNumber maxoff; |
1234 | 0 | OffsetNumber off; |
1235 | |
|
1236 | 0 | if (level > 0) |
1237 | 0 | parent = gistGetParent(buildstate, childblkno); |
1238 | 0 | else |
1239 | 0 | { |
1240 | | /* |
1241 | | * For a leaf page, the caller must supply a correct parent block |
1242 | | * number. |
1243 | | */ |
1244 | 0 | if (*parentblkno == InvalidBlockNumber) |
1245 | 0 | elog(ERROR, "no parent buffer provided of child %u", childblkno); |
1246 | 0 | parent = *parentblkno; |
1247 | 0 | } |
1248 | | |
1249 | 0 | buffer = ReadBuffer(buildstate->indexrel, parent); |
1250 | 0 | page = BufferGetPage(buffer); |
1251 | 0 | LockBuffer(buffer, GIST_EXCLUSIVE); |
1252 | 0 | gistcheckpage(buildstate->indexrel, buffer); |
1253 | 0 | maxoff = PageGetMaxOffsetNumber(page); |
1254 | | |
1255 | | /* Check if it was not moved */ |
1256 | 0 | if (parent == *parentblkno && *parentblkno != InvalidBlockNumber && |
1257 | 0 | *downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff) |
1258 | 0 | { |
1259 | 0 | ItemId iid = PageGetItemId(page, *downlinkoffnum); |
1260 | 0 | IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); |
1261 | |
|
1262 | 0 | if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno) |
1263 | 0 | { |
1264 | | /* Still there */ |
1265 | 0 | return buffer; |
1266 | 0 | } |
1267 | 0 | } |
1268 | | |
1269 | | /* |
1270 | | * Downlink was not at the offset where it used to be. Scan the page to |
1271 | | * find it. During normal gist insertions, it might've moved to another |
1272 | | * page, to the right, but during a buffering build, we keep track of the |
1273 | | * parent of each page in the lookup table so we should always know what |
1274 | | * page it's on. |
1275 | | */ |
1276 | 0 | for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off)) |
1277 | 0 | { |
1278 | 0 | ItemId iid = PageGetItemId(page, off); |
1279 | 0 | IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); |
1280 | |
|
1281 | 0 | if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno) |
1282 | 0 | { |
1283 | | /* yes!!, found it */ |
1284 | 0 | *downlinkoffnum = off; |
1285 | 0 | return buffer; |
1286 | 0 | } |
1287 | 0 | } |
1288 | | |
1289 | 0 | elog(ERROR, "failed to re-find parent for block %u", childblkno); |
1290 | 0 | return InvalidBuffer; /* keep compiler quiet */ |
1291 | 0 | } |
1292 | | |
1293 | | /* |
1294 | | * Process buffers emptying stack. Emptying of one buffer can cause emptying |
1295 | | * of other buffers. This function iterates until this cascading emptying |
1296 | | * process finished, e.g. until buffers emptying stack is empty. |
1297 | | */ |
1298 | | static void |
1299 | | gistProcessEmptyingQueue(GISTBuildState *buildstate) |
1300 | 0 | { |
1301 | 0 | GISTBuildBuffers *gfbb = buildstate->gfbb; |
1302 | | |
1303 | | /* Iterate while we have elements in buffers emptying stack. */ |
1304 | 0 | while (gfbb->bufferEmptyingQueue != NIL) |
1305 | 0 | { |
1306 | 0 | GISTNodeBuffer *emptyingNodeBuffer; |
1307 | | |
1308 | | /* Get node buffer from emptying stack. */ |
1309 | 0 | emptyingNodeBuffer = (GISTNodeBuffer *) linitial(gfbb->bufferEmptyingQueue); |
1310 | 0 | gfbb->bufferEmptyingQueue = list_delete_first(gfbb->bufferEmptyingQueue); |
1311 | 0 | emptyingNodeBuffer->queuedForEmptying = false; |
1312 | | |
1313 | | /* |
1314 | | * We are going to load last pages of buffers where emptying will be |
1315 | | * to. So let's unload any previously loaded buffers. |
1316 | | */ |
1317 | 0 | gistUnloadNodeBuffers(gfbb); |
1318 | | |
1319 | | /* |
1320 | | * Pop tuples from the buffer and run them down to the buffers at |
1321 | | * lower level, or leaf pages. We continue until one of the lower |
1322 | | * level buffers fills up, or this buffer runs empty. |
1323 | | * |
1324 | | * In Arge et al's paper, the buffer emptying is stopped after |
1325 | | * processing 1/2 node buffer worth of tuples, to avoid overfilling |
1326 | | * any of the lower level buffers. However, it's more efficient to |
1327 | | * keep going until one of the lower level buffers actually fills up, |
1328 | | * so that's what we do. This doesn't need to be exact, if a buffer |
1329 | | * overfills by a few tuples, there's no harm done. |
1330 | | */ |
1331 | 0 | while (true) |
1332 | 0 | { |
1333 | 0 | IndexTuple itup; |
1334 | | |
1335 | | /* Get next index tuple from the buffer */ |
1336 | 0 | if (!gistPopItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup)) |
1337 | 0 | break; |
1338 | | |
1339 | | /* |
1340 | | * Run it down to the underlying node buffer or leaf page. |
1341 | | * |
1342 | | * Note: it's possible that the buffer we're emptying splits as a |
1343 | | * result of this call. If that happens, our emptyingNodeBuffer |
1344 | | * points to the left half of the split. After split, it's very |
1345 | | * likely that the new left buffer is no longer over the half-full |
1346 | | * threshold, but we might as well keep flushing tuples from it |
1347 | | * until we fill a lower-level buffer. |
1348 | | */ |
1349 | 0 | if (gistProcessItup(buildstate, itup, emptyingNodeBuffer->nodeBlocknum, emptyingNodeBuffer->level)) |
1350 | 0 | { |
1351 | | /* |
1352 | | * A lower level buffer filled up. Stop emptying this buffer, |
1353 | | * to avoid overflowing the lower level buffer. |
1354 | | */ |
1355 | 0 | break; |
1356 | 0 | } |
1357 | | |
1358 | | /* Free all the memory allocated during index tuple processing */ |
1359 | 0 | MemoryContextReset(buildstate->giststate->tempCxt); |
1360 | 0 | } |
1361 | 0 | } |
1362 | 0 | } |
1363 | | |
1364 | | /* |
1365 | | * Empty all node buffers, from top to bottom. This is done at the end of |
1366 | | * index build to flush all remaining tuples to the index. |
1367 | | * |
1368 | | * Note: This destroys the buffersOnLevels lists, so the buffers should not |
1369 | | * be inserted to after this call. |
1370 | | */ |
1371 | | static void |
1372 | | gistEmptyAllBuffers(GISTBuildState *buildstate) |
1373 | 0 | { |
1374 | 0 | GISTBuildBuffers *gfbb = buildstate->gfbb; |
1375 | 0 | MemoryContext oldCtx; |
1376 | 0 | int i; |
1377 | |
|
1378 | 0 | oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); |
1379 | | |
1380 | | /* |
1381 | | * Iterate through the levels from top to bottom. |
1382 | | */ |
1383 | 0 | for (i = gfbb->buffersOnLevelsLen - 1; i >= 0; i--) |
1384 | 0 | { |
1385 | | /* |
1386 | | * Empty all buffers on this level. Note that new buffers can pop up |
1387 | | * in the list during the processing, as a result of page splits, so a |
1388 | | * simple walk through the list won't work. We remove buffers from the |
1389 | | * list when we see them empty; a buffer can't become non-empty once |
1390 | | * it's been fully emptied. |
1391 | | */ |
1392 | 0 | while (gfbb->buffersOnLevels[i] != NIL) |
1393 | 0 | { |
1394 | 0 | GISTNodeBuffer *nodeBuffer; |
1395 | |
|
1396 | 0 | nodeBuffer = (GISTNodeBuffer *) linitial(gfbb->buffersOnLevels[i]); |
1397 | |
|
1398 | 0 | if (nodeBuffer->blocksCount != 0) |
1399 | 0 | { |
1400 | | /* |
1401 | | * Add this buffer to the emptying queue, and proceed to empty |
1402 | | * the queue. |
1403 | | */ |
1404 | 0 | if (!nodeBuffer->queuedForEmptying) |
1405 | 0 | { |
1406 | 0 | MemoryContextSwitchTo(gfbb->context); |
1407 | 0 | nodeBuffer->queuedForEmptying = true; |
1408 | 0 | gfbb->bufferEmptyingQueue = |
1409 | 0 | lcons(nodeBuffer, gfbb->bufferEmptyingQueue); |
1410 | 0 | MemoryContextSwitchTo(buildstate->giststate->tempCxt); |
1411 | 0 | } |
1412 | 0 | gistProcessEmptyingQueue(buildstate); |
1413 | 0 | } |
1414 | 0 | else |
1415 | 0 | gfbb->buffersOnLevels[i] = |
1416 | 0 | list_delete_first(gfbb->buffersOnLevels[i]); |
1417 | 0 | } |
1418 | 0 | elog(DEBUG2, "emptied all buffers at level %d", i); |
1419 | 0 | } |
1420 | 0 | MemoryContextSwitchTo(oldCtx); |
1421 | 0 | } |
1422 | | |
1423 | | /* |
1424 | | * Get the depth of the GiST index. |
1425 | | */ |
1426 | | static int |
1427 | | gistGetMaxLevel(Relation index) |
1428 | 0 | { |
1429 | 0 | int maxLevel; |
1430 | 0 | BlockNumber blkno; |
1431 | | |
1432 | | /* |
1433 | | * Traverse down the tree, starting from the root, until we hit the leaf |
1434 | | * level. |
1435 | | */ |
1436 | 0 | maxLevel = 0; |
1437 | 0 | blkno = GIST_ROOT_BLKNO; |
1438 | 0 | while (true) |
1439 | 0 | { |
1440 | 0 | Buffer buffer; |
1441 | 0 | Page page; |
1442 | 0 | IndexTuple itup; |
1443 | |
|
1444 | 0 | buffer = ReadBuffer(index, blkno); |
1445 | | |
1446 | | /* |
1447 | | * There's no concurrent access during index build, so locking is just |
1448 | | * pro forma. |
1449 | | */ |
1450 | 0 | LockBuffer(buffer, GIST_SHARE); |
1451 | 0 | page = (Page) BufferGetPage(buffer); |
1452 | |
|
1453 | 0 | if (GistPageIsLeaf(page)) |
1454 | 0 | { |
1455 | | /* We hit the bottom, so we're done. */ |
1456 | 0 | UnlockReleaseBuffer(buffer); |
1457 | 0 | break; |
1458 | 0 | } |
1459 | | |
1460 | | /* |
1461 | | * Pick the first downlink on the page, and follow it. It doesn't |
1462 | | * matter which downlink we choose, the tree has the same depth |
1463 | | * everywhere, so we just pick the first one. |
1464 | | */ |
1465 | 0 | itup = (IndexTuple) PageGetItem(page, |
1466 | 0 | PageGetItemId(page, FirstOffsetNumber)); |
1467 | 0 | blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); |
1468 | 0 | UnlockReleaseBuffer(buffer); |
1469 | | |
1470 | | /* |
1471 | | * We're going down on the tree. It means that there is yet one more |
1472 | | * level in the tree. |
1473 | | */ |
1474 | 0 | maxLevel++; |
1475 | 0 | } |
1476 | 0 | return maxLevel; |
1477 | 0 | } |
1478 | | |
1479 | | |
1480 | | /* |
1481 | | * Routines for managing the parent map. |
1482 | | * |
1483 | | * Whenever a page is split, we need to insert the downlinks into the parent. |
1484 | | * We need to somehow find the parent page to do that. In normal insertions, |
1485 | | * we keep a stack of nodes visited when we descend the tree. However, in |
1486 | | * buffering build, we can start descending the tree from any internal node, |
1487 | | * when we empty a buffer by cascading tuples to its children. So we don't |
1488 | | * have a full stack up to the root available at that time. |
1489 | | * |
1490 | | * So instead, we maintain a hash table to track the parent of every internal |
1491 | | * page. We don't need to track the parents of leaf nodes, however. Whenever |
1492 | | * we insert to a leaf, we've just descended down from its parent, so we know |
1493 | | * its immediate parent already. This helps a lot to limit the memory used |
1494 | | * by this hash table. |
1495 | | * |
1496 | | * Whenever an internal node is split, the parent map needs to be updated. |
1497 | | * the parent of the new child page needs to be recorded, and also the |
1498 | | * entries for all page whose downlinks are moved to a new page at the split |
1499 | | * needs to be updated. |
1500 | | * |
1501 | | * We also update the parent map whenever we descend the tree. That might seem |
1502 | | * unnecessary, because we maintain the map whenever a downlink is moved or |
1503 | | * created, but it is needed because we switch to buffering mode after |
1504 | | * creating a tree with regular index inserts. Any pages created before |
1505 | | * switching to buffering mode will not be present in the parent map initially, |
1506 | | * but will be added there the first time we visit them. |
1507 | | */ |
1508 | | |
1509 | | typedef struct |
1510 | | { |
1511 | | BlockNumber childblkno; /* hash key */ |
1512 | | BlockNumber parentblkno; |
1513 | | } ParentMapEntry; |
1514 | | |
1515 | | static void |
1516 | | gistInitParentMap(GISTBuildState *buildstate) |
1517 | 0 | { |
1518 | 0 | HASHCTL hashCtl; |
1519 | |
|
1520 | 0 | hashCtl.keysize = sizeof(BlockNumber); |
1521 | 0 | hashCtl.entrysize = sizeof(ParentMapEntry); |
1522 | 0 | hashCtl.hcxt = CurrentMemoryContext; |
1523 | 0 | buildstate->parentMap = hash_create("gistbuild parent map", |
1524 | 0 | 1024, |
1525 | 0 | &hashCtl, |
1526 | 0 | HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); |
1527 | 0 | } |
1528 | | |
1529 | | static void |
1530 | | gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, BlockNumber parent) |
1531 | 0 | { |
1532 | 0 | ParentMapEntry *entry; |
1533 | 0 | bool found; |
1534 | |
|
1535 | 0 | entry = (ParentMapEntry *) hash_search(buildstate->parentMap, |
1536 | 0 | &child, |
1537 | 0 | HASH_ENTER, |
1538 | 0 | &found); |
1539 | 0 | entry->parentblkno = parent; |
1540 | 0 | } |
1541 | | |
1542 | | /* |
1543 | | * Scan all downlinks on a page, and memorize their parent. |
1544 | | */ |
1545 | | static void |
1546 | | gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parentbuf) |
1547 | 0 | { |
1548 | 0 | OffsetNumber maxoff; |
1549 | 0 | OffsetNumber off; |
1550 | 0 | BlockNumber parentblkno = BufferGetBlockNumber(parentbuf); |
1551 | 0 | Page page = BufferGetPage(parentbuf); |
1552 | |
|
1553 | 0 | Assert(!GistPageIsLeaf(page)); |
1554 | |
|
1555 | 0 | maxoff = PageGetMaxOffsetNumber(page); |
1556 | 0 | for (off = FirstOffsetNumber; off <= maxoff; off++) |
1557 | 0 | { |
1558 | 0 | ItemId iid = PageGetItemId(page, off); |
1559 | 0 | IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); |
1560 | 0 | BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); |
1561 | |
|
1562 | 0 | gistMemorizeParent(buildstate, childblkno, parentblkno); |
1563 | 0 | } |
1564 | 0 | } |
1565 | | |
1566 | | static BlockNumber |
1567 | | gistGetParent(GISTBuildState *buildstate, BlockNumber child) |
1568 | 0 | { |
1569 | 0 | ParentMapEntry *entry; |
1570 | 0 | bool found; |
1571 | | |
1572 | | /* Find node buffer in hash table */ |
1573 | 0 | entry = (ParentMapEntry *) hash_search(buildstate->parentMap, |
1574 | 0 | &child, |
1575 | 0 | HASH_FIND, |
1576 | 0 | &found); |
1577 | 0 | if (!found) |
1578 | 0 | elog(ERROR, "could not find parent of block %u in lookup table", child); |
1579 | | |
1580 | 0 | return entry->parentblkno; |
1581 | 0 | } |