/src/postgres/src/backend/access/gin/ginvacuum.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * ginvacuum.c |
4 | | * delete & vacuum routines for the postgres GIN |
5 | | * |
6 | | * |
7 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
8 | | * Portions Copyright (c) 1994, Regents of the University of California |
9 | | * |
10 | | * IDENTIFICATION |
11 | | * src/backend/access/gin/ginvacuum.c |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | |
15 | | #include "postgres.h" |
16 | | |
17 | | #include "access/gin_private.h" |
18 | | #include "access/ginxlog.h" |
19 | | #include "access/xloginsert.h" |
20 | | #include "commands/vacuum.h" |
21 | | #include "miscadmin.h" |
22 | | #include "storage/indexfsm.h" |
23 | | #include "storage/lmgr.h" |
24 | | #include "storage/predicate.h" |
25 | | #include "utils/memutils.h" |
26 | | |
27 | | struct GinVacuumState |
28 | | { |
29 | | Relation index; |
30 | | IndexBulkDeleteResult *result; |
31 | | IndexBulkDeleteCallback callback; |
32 | | void *callback_state; |
33 | | GinState ginstate; |
34 | | BufferAccessStrategy strategy; |
35 | | MemoryContext tmpCxt; |
36 | | }; |
37 | | |
38 | | /* |
39 | | * Vacuums an uncompressed posting list. The size of the must can be specified |
40 | | * in number of items (nitems). |
41 | | * |
42 | | * If none of the items need to be removed, returns NULL. Otherwise returns |
43 | | * a new palloc'd array with the remaining items. The number of remaining |
44 | | * items is returned in *nremaining. |
45 | | */ |
46 | | ItemPointer |
47 | | ginVacuumItemPointers(GinVacuumState *gvs, ItemPointerData *items, |
48 | | int nitem, int *nremaining) |
49 | 0 | { |
50 | 0 | int i, |
51 | 0 | remaining = 0; |
52 | 0 | ItemPointer tmpitems = NULL; |
53 | | |
54 | | /* |
55 | | * Iterate over TIDs array |
56 | | */ |
57 | 0 | for (i = 0; i < nitem; i++) |
58 | 0 | { |
59 | 0 | if (gvs->callback(items + i, gvs->callback_state)) |
60 | 0 | { |
61 | 0 | gvs->result->tuples_removed += 1; |
62 | 0 | if (!tmpitems) |
63 | 0 | { |
64 | | /* |
65 | | * First TID to be deleted: allocate memory to hold the |
66 | | * remaining items. |
67 | | */ |
68 | 0 | tmpitems = palloc(sizeof(ItemPointerData) * nitem); |
69 | 0 | memcpy(tmpitems, items, sizeof(ItemPointerData) * i); |
70 | 0 | } |
71 | 0 | } |
72 | 0 | else |
73 | 0 | { |
74 | 0 | gvs->result->num_index_tuples += 1; |
75 | 0 | if (tmpitems) |
76 | 0 | tmpitems[remaining] = items[i]; |
77 | 0 | remaining++; |
78 | 0 | } |
79 | 0 | } |
80 | |
|
81 | 0 | *nremaining = remaining; |
82 | 0 | return tmpitems; |
83 | 0 | } |
84 | | |
85 | | /* |
86 | | * Create a WAL record for vacuuming entry tree leaf page. |
87 | | */ |
88 | | static void |
89 | | xlogVacuumPage(Relation index, Buffer buffer) |
90 | 0 | { |
91 | 0 | Page page = BufferGetPage(buffer); |
92 | 0 | XLogRecPtr recptr; |
93 | | |
94 | | /* This is only used for entry tree leaf pages. */ |
95 | 0 | Assert(!GinPageIsData(page)); |
96 | 0 | Assert(GinPageIsLeaf(page)); |
97 | |
|
98 | 0 | if (!RelationNeedsWAL(index)) |
99 | 0 | return; |
100 | | |
101 | | /* |
102 | | * Always create a full image, we don't track the changes on the page at |
103 | | * any more fine-grained level. This could obviously be improved... |
104 | | */ |
105 | 0 | XLogBeginInsert(); |
106 | 0 | XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD); |
107 | |
|
108 | 0 | recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE); |
109 | 0 | PageSetLSN(page, recptr); |
110 | 0 | } |
111 | | |
112 | | |
113 | | typedef struct DataPageDeleteStack |
114 | | { |
115 | | struct DataPageDeleteStack *child; |
116 | | struct DataPageDeleteStack *parent; |
117 | | |
118 | | BlockNumber blkno; /* current block number */ |
119 | | Buffer leftBuffer; /* pinned and locked rightest non-deleted page |
120 | | * on left */ |
121 | | bool isRoot; |
122 | | } DataPageDeleteStack; |
123 | | |
124 | | |
125 | | /* |
126 | | * Delete a posting tree page. |
127 | | */ |
128 | | static void |
129 | | ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno, |
130 | | BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot) |
131 | 0 | { |
132 | 0 | Buffer dBuffer; |
133 | 0 | Buffer lBuffer; |
134 | 0 | Buffer pBuffer; |
135 | 0 | Page page, |
136 | 0 | parentPage; |
137 | 0 | BlockNumber rightlink; |
138 | | |
139 | | /* |
140 | | * This function MUST be called only if someone of parent pages hold |
141 | | * exclusive cleanup lock. This guarantees that no insertions currently |
142 | | * happen in this subtree. Caller also acquires Exclusive locks on |
143 | | * deletable, parent and left pages. |
144 | | */ |
145 | 0 | lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno, |
146 | 0 | RBM_NORMAL, gvs->strategy); |
147 | 0 | dBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, deleteBlkno, |
148 | 0 | RBM_NORMAL, gvs->strategy); |
149 | 0 | pBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, parentBlkno, |
150 | 0 | RBM_NORMAL, gvs->strategy); |
151 | |
|
152 | 0 | page = BufferGetPage(dBuffer); |
153 | 0 | rightlink = GinPageGetOpaque(page)->rightlink; |
154 | | |
155 | | /* |
156 | | * Any insert which would have gone on the leaf block will now go to its |
157 | | * right sibling. |
158 | | */ |
159 | 0 | PredicateLockPageCombine(gvs->index, deleteBlkno, rightlink); |
160 | |
|
161 | 0 | START_CRIT_SECTION(); |
162 | | |
163 | | /* Unlink the page by changing left sibling's rightlink */ |
164 | 0 | page = BufferGetPage(lBuffer); |
165 | 0 | GinPageGetOpaque(page)->rightlink = rightlink; |
166 | | |
167 | | /* Delete downlink from parent */ |
168 | 0 | parentPage = BufferGetPage(pBuffer); |
169 | | #ifdef USE_ASSERT_CHECKING |
170 | | do |
171 | | { |
172 | | PostingItem *tod = GinDataPageGetPostingItem(parentPage, myoff); |
173 | | |
174 | | Assert(PostingItemGetBlockNumber(tod) == deleteBlkno); |
175 | | } while (0); |
176 | | #endif |
177 | 0 | GinPageDeletePostingItem(parentPage, myoff); |
178 | |
|
179 | 0 | page = BufferGetPage(dBuffer); |
180 | | |
181 | | /* |
182 | | * we shouldn't change rightlink field to save workability of running |
183 | | * search scan |
184 | | */ |
185 | | |
186 | | /* |
187 | | * Mark page as deleted, and remember last xid which could know its |
188 | | * address. |
189 | | */ |
190 | 0 | GinPageSetDeleted(page); |
191 | 0 | GinPageSetDeleteXid(page, ReadNextTransactionId()); |
192 | |
|
193 | 0 | MarkBufferDirty(pBuffer); |
194 | 0 | MarkBufferDirty(lBuffer); |
195 | 0 | MarkBufferDirty(dBuffer); |
196 | |
|
197 | 0 | if (RelationNeedsWAL(gvs->index)) |
198 | 0 | { |
199 | 0 | XLogRecPtr recptr; |
200 | 0 | ginxlogDeletePage data; |
201 | | |
202 | | /* |
203 | | * We can't pass REGBUF_STANDARD for the deleted page, because we |
204 | | * didn't set pd_lower on pre-9.4 versions. The page might've been |
205 | | * binary-upgraded from an older version, and hence not have pd_lower |
206 | | * set correctly. Ditto for the left page, but removing the item from |
207 | | * the parent updated its pd_lower, so we know that's OK at this |
208 | | * point. |
209 | | */ |
210 | 0 | XLogBeginInsert(); |
211 | 0 | XLogRegisterBuffer(0, dBuffer, 0); |
212 | 0 | XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD); |
213 | 0 | XLogRegisterBuffer(2, lBuffer, 0); |
214 | |
|
215 | 0 | data.parentOffset = myoff; |
216 | 0 | data.rightLink = GinPageGetOpaque(page)->rightlink; |
217 | 0 | data.deleteXid = GinPageGetDeleteXid(page); |
218 | |
|
219 | 0 | XLogRegisterData(&data, sizeof(ginxlogDeletePage)); |
220 | |
|
221 | 0 | recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE); |
222 | 0 | PageSetLSN(page, recptr); |
223 | 0 | PageSetLSN(parentPage, recptr); |
224 | 0 | PageSetLSN(BufferGetPage(lBuffer), recptr); |
225 | 0 | } |
226 | |
|
227 | 0 | ReleaseBuffer(pBuffer); |
228 | 0 | ReleaseBuffer(lBuffer); |
229 | 0 | ReleaseBuffer(dBuffer); |
230 | |
|
231 | 0 | END_CRIT_SECTION(); |
232 | |
|
233 | 0 | gvs->result->pages_newly_deleted++; |
234 | 0 | gvs->result->pages_deleted++; |
235 | 0 | } |
236 | | |
237 | | |
238 | | /* |
239 | | * Scans posting tree and deletes empty pages. Caller must lock root page for |
240 | | * cleanup. During scan path from root to current page is kept exclusively |
241 | | * locked. Also keep left page exclusively locked, because ginDeletePage() |
242 | | * needs it. If we try to relock left page later, it could deadlock with |
243 | | * ginStepRight(). |
244 | | */ |
245 | | static bool |
246 | | ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, |
247 | | DataPageDeleteStack *parent, OffsetNumber myoff) |
248 | 0 | { |
249 | 0 | DataPageDeleteStack *me; |
250 | 0 | Buffer buffer; |
251 | 0 | Page page; |
252 | 0 | bool meDelete = false; |
253 | 0 | bool isempty; |
254 | |
|
255 | 0 | if (isRoot) |
256 | 0 | { |
257 | 0 | me = parent; |
258 | 0 | } |
259 | 0 | else |
260 | 0 | { |
261 | 0 | if (!parent->child) |
262 | 0 | { |
263 | 0 | me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack)); |
264 | 0 | me->parent = parent; |
265 | 0 | parent->child = me; |
266 | 0 | me->leftBuffer = InvalidBuffer; |
267 | 0 | } |
268 | 0 | else |
269 | 0 | me = parent->child; |
270 | 0 | } |
271 | |
|
272 | 0 | buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, |
273 | 0 | RBM_NORMAL, gvs->strategy); |
274 | |
|
275 | 0 | if (!isRoot) |
276 | 0 | LockBuffer(buffer, GIN_EXCLUSIVE); |
277 | |
|
278 | 0 | page = BufferGetPage(buffer); |
279 | |
|
280 | 0 | Assert(GinPageIsData(page)); |
281 | |
|
282 | 0 | if (!GinPageIsLeaf(page)) |
283 | 0 | { |
284 | 0 | OffsetNumber i; |
285 | |
|
286 | 0 | me->blkno = blkno; |
287 | 0 | for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) |
288 | 0 | { |
289 | 0 | PostingItem *pitem = GinDataPageGetPostingItem(page, i); |
290 | |
|
291 | 0 | if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), false, me, i)) |
292 | 0 | i--; |
293 | 0 | } |
294 | |
|
295 | 0 | if (GinPageRightMost(page) && BufferIsValid(me->child->leftBuffer)) |
296 | 0 | { |
297 | 0 | UnlockReleaseBuffer(me->child->leftBuffer); |
298 | 0 | me->child->leftBuffer = InvalidBuffer; |
299 | 0 | } |
300 | 0 | } |
301 | |
|
302 | 0 | if (GinPageIsLeaf(page)) |
303 | 0 | isempty = GinDataLeafPageIsEmpty(page); |
304 | 0 | else |
305 | 0 | isempty = GinPageGetOpaque(page)->maxoff < FirstOffsetNumber; |
306 | |
|
307 | 0 | if (isempty) |
308 | 0 | { |
309 | | /* we never delete the left- or rightmost branch */ |
310 | 0 | if (BufferIsValid(me->leftBuffer) && !GinPageRightMost(page)) |
311 | 0 | { |
312 | 0 | Assert(!isRoot); |
313 | 0 | ginDeletePage(gvs, blkno, BufferGetBlockNumber(me->leftBuffer), |
314 | 0 | me->parent->blkno, myoff, me->parent->isRoot); |
315 | 0 | meDelete = true; |
316 | 0 | } |
317 | 0 | } |
318 | |
|
319 | 0 | if (!meDelete) |
320 | 0 | { |
321 | 0 | if (BufferIsValid(me->leftBuffer)) |
322 | 0 | UnlockReleaseBuffer(me->leftBuffer); |
323 | 0 | me->leftBuffer = buffer; |
324 | 0 | } |
325 | 0 | else |
326 | 0 | { |
327 | 0 | if (!isRoot) |
328 | 0 | LockBuffer(buffer, GIN_UNLOCK); |
329 | |
|
330 | 0 | ReleaseBuffer(buffer); |
331 | 0 | } |
332 | |
|
333 | 0 | if (isRoot) |
334 | 0 | ReleaseBuffer(buffer); |
335 | |
|
336 | 0 | return meDelete; |
337 | 0 | } |
338 | | |
339 | | |
340 | | /* |
341 | | * Scan through posting tree leafs, delete empty tuples. Returns true if there |
342 | | * is at least one empty page. |
343 | | */ |
344 | | static bool |
345 | | ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno) |
346 | 0 | { |
347 | 0 | Buffer buffer; |
348 | 0 | Page page; |
349 | 0 | bool hasVoidPage = false; |
350 | 0 | MemoryContext oldCxt; |
351 | | |
352 | | /* Find leftmost leaf page of posting tree and lock it in exclusive mode */ |
353 | 0 | while (true) |
354 | 0 | { |
355 | 0 | PostingItem *pitem; |
356 | |
|
357 | 0 | buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, |
358 | 0 | RBM_NORMAL, gvs->strategy); |
359 | 0 | LockBuffer(buffer, GIN_SHARE); |
360 | 0 | page = BufferGetPage(buffer); |
361 | |
|
362 | 0 | Assert(GinPageIsData(page)); |
363 | |
|
364 | 0 | if (GinPageIsLeaf(page)) |
365 | 0 | { |
366 | 0 | LockBuffer(buffer, GIN_UNLOCK); |
367 | 0 | LockBuffer(buffer, GIN_EXCLUSIVE); |
368 | 0 | break; |
369 | 0 | } |
370 | | |
371 | 0 | Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); |
372 | |
|
373 | 0 | pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber); |
374 | 0 | blkno = PostingItemGetBlockNumber(pitem); |
375 | 0 | Assert(blkno != InvalidBlockNumber); |
376 | |
|
377 | 0 | UnlockReleaseBuffer(buffer); |
378 | 0 | } |
379 | | |
380 | | /* Iterate all posting tree leaves using rightlinks and vacuum them */ |
381 | 0 | while (true) |
382 | 0 | { |
383 | 0 | oldCxt = MemoryContextSwitchTo(gvs->tmpCxt); |
384 | 0 | ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs); |
385 | 0 | MemoryContextSwitchTo(oldCxt); |
386 | 0 | MemoryContextReset(gvs->tmpCxt); |
387 | |
|
388 | 0 | if (GinDataLeafPageIsEmpty(page)) |
389 | 0 | hasVoidPage = true; |
390 | |
|
391 | 0 | blkno = GinPageGetOpaque(page)->rightlink; |
392 | |
|
393 | 0 | UnlockReleaseBuffer(buffer); |
394 | |
|
395 | 0 | if (blkno == InvalidBlockNumber) |
396 | 0 | break; |
397 | | |
398 | 0 | buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, |
399 | 0 | RBM_NORMAL, gvs->strategy); |
400 | 0 | LockBuffer(buffer, GIN_EXCLUSIVE); |
401 | 0 | page = BufferGetPage(buffer); |
402 | 0 | } |
403 | |
|
404 | 0 | return hasVoidPage; |
405 | 0 | } |
406 | | |
407 | | static void |
408 | | ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno) |
409 | 0 | { |
410 | 0 | if (ginVacuumPostingTreeLeaves(gvs, rootBlkno)) |
411 | 0 | { |
412 | | /* |
413 | | * There is at least one empty page. So we have to rescan the tree |
414 | | * deleting empty pages. |
415 | | */ |
416 | 0 | Buffer buffer; |
417 | 0 | DataPageDeleteStack root, |
418 | 0 | *ptr, |
419 | 0 | *tmp; |
420 | |
|
421 | 0 | buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, rootBlkno, |
422 | 0 | RBM_NORMAL, gvs->strategy); |
423 | | |
424 | | /* |
425 | | * Lock posting tree root for cleanup to ensure there are no |
426 | | * concurrent inserts. |
427 | | */ |
428 | 0 | LockBufferForCleanup(buffer); |
429 | |
|
430 | 0 | memset(&root, 0, sizeof(DataPageDeleteStack)); |
431 | 0 | root.leftBuffer = InvalidBuffer; |
432 | 0 | root.isRoot = true; |
433 | |
|
434 | 0 | ginScanToDelete(gvs, rootBlkno, true, &root, InvalidOffsetNumber); |
435 | |
|
436 | 0 | ptr = root.child; |
437 | |
|
438 | 0 | while (ptr) |
439 | 0 | { |
440 | 0 | tmp = ptr->child; |
441 | 0 | pfree(ptr); |
442 | 0 | ptr = tmp; |
443 | 0 | } |
444 | |
|
445 | 0 | UnlockReleaseBuffer(buffer); |
446 | 0 | } |
447 | 0 | } |
448 | | |
449 | | /* |
450 | | * returns modified page or NULL if page isn't modified. |
451 | | * Function works with original page until first change is occurred, |
452 | | * then page is copied into temporary one. |
453 | | */ |
454 | | static Page |
455 | | ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot) |
456 | 0 | { |
457 | 0 | Page origpage = BufferGetPage(buffer), |
458 | 0 | tmppage; |
459 | 0 | OffsetNumber i, |
460 | 0 | maxoff = PageGetMaxOffsetNumber(origpage); |
461 | |
|
462 | 0 | tmppage = origpage; |
463 | |
|
464 | 0 | *nroot = 0; |
465 | |
|
466 | 0 | for (i = FirstOffsetNumber; i <= maxoff; i++) |
467 | 0 | { |
468 | 0 | IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); |
469 | |
|
470 | 0 | if (GinIsPostingTree(itup)) |
471 | 0 | { |
472 | | /* |
473 | | * store posting tree's roots for further processing, we can't |
474 | | * vacuum it just now due to risk of deadlocks with scans/inserts |
475 | | */ |
476 | 0 | roots[*nroot] = GinGetDownlink(itup); |
477 | 0 | (*nroot)++; |
478 | 0 | } |
479 | 0 | else if (GinGetNPosting(itup) > 0) |
480 | 0 | { |
481 | 0 | int nitems; |
482 | 0 | ItemPointer items_orig; |
483 | 0 | bool free_items_orig; |
484 | 0 | ItemPointer items; |
485 | | |
486 | | /* Get list of item pointers from the tuple. */ |
487 | 0 | if (GinItupIsCompressed(itup)) |
488 | 0 | { |
489 | 0 | items_orig = ginPostingListDecode((GinPostingList *) GinGetPosting(itup), &nitems); |
490 | 0 | free_items_orig = true; |
491 | 0 | } |
492 | 0 | else |
493 | 0 | { |
494 | 0 | items_orig = (ItemPointer) GinGetPosting(itup); |
495 | 0 | nitems = GinGetNPosting(itup); |
496 | 0 | free_items_orig = false; |
497 | 0 | } |
498 | | |
499 | | /* Remove any items from the list that need to be vacuumed. */ |
500 | 0 | items = ginVacuumItemPointers(gvs, items_orig, nitems, &nitems); |
501 | |
|
502 | 0 | if (free_items_orig) |
503 | 0 | pfree(items_orig); |
504 | | |
505 | | /* If any item pointers were removed, recreate the tuple. */ |
506 | 0 | if (items) |
507 | 0 | { |
508 | 0 | OffsetNumber attnum; |
509 | 0 | Datum key; |
510 | 0 | GinNullCategory category; |
511 | 0 | GinPostingList *plist; |
512 | 0 | int plistsize; |
513 | |
|
514 | 0 | if (nitems > 0) |
515 | 0 | { |
516 | 0 | plist = ginCompressPostingList(items, nitems, GinMaxItemSize, NULL); |
517 | 0 | plistsize = SizeOfGinPostingList(plist); |
518 | 0 | } |
519 | 0 | else |
520 | 0 | { |
521 | 0 | plist = NULL; |
522 | 0 | plistsize = 0; |
523 | 0 | } |
524 | | |
525 | | /* |
526 | | * if we already created a temporary page, make changes in |
527 | | * place |
528 | | */ |
529 | 0 | if (tmppage == origpage) |
530 | 0 | { |
531 | | /* |
532 | | * On first difference, create a temporary copy of the |
533 | | * page and copy the tuple's posting list to it. |
534 | | */ |
535 | 0 | tmppage = PageGetTempPageCopy(origpage); |
536 | | |
537 | | /* set itup pointer to new page */ |
538 | 0 | itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); |
539 | 0 | } |
540 | |
|
541 | 0 | attnum = gintuple_get_attrnum(&gvs->ginstate, itup); |
542 | 0 | key = gintuple_get_key(&gvs->ginstate, itup, &category); |
543 | 0 | itup = GinFormTuple(&gvs->ginstate, attnum, key, category, |
544 | 0 | (char *) plist, plistsize, |
545 | 0 | nitems, true); |
546 | 0 | if (plist) |
547 | 0 | pfree(plist); |
548 | 0 | PageIndexTupleDelete(tmppage, i); |
549 | |
|
550 | 0 | if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, false, false) != i) |
551 | 0 | elog(ERROR, "failed to add item to index page in \"%s\"", |
552 | 0 | RelationGetRelationName(gvs->index)); |
553 | | |
554 | 0 | pfree(itup); |
555 | 0 | pfree(items); |
556 | 0 | } |
557 | 0 | } |
558 | 0 | } |
559 | | |
560 | 0 | return (tmppage == origpage) ? NULL : tmppage; |
561 | 0 | } |
562 | | |
563 | | IndexBulkDeleteResult * |
564 | | ginbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, |
565 | | IndexBulkDeleteCallback callback, void *callback_state) |
566 | 0 | { |
567 | 0 | Relation index = info->index; |
568 | 0 | BlockNumber blkno = GIN_ROOT_BLKNO; |
569 | 0 | GinVacuumState gvs; |
570 | 0 | Buffer buffer; |
571 | 0 | BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))]; |
572 | 0 | uint32 nRoot; |
573 | |
|
574 | 0 | gvs.tmpCxt = AllocSetContextCreate(CurrentMemoryContext, |
575 | 0 | "Gin vacuum temporary context", |
576 | 0 | ALLOCSET_DEFAULT_SIZES); |
577 | 0 | gvs.index = index; |
578 | 0 | gvs.callback = callback; |
579 | 0 | gvs.callback_state = callback_state; |
580 | 0 | gvs.strategy = info->strategy; |
581 | 0 | initGinState(&gvs.ginstate, index); |
582 | | |
583 | | /* first time through? */ |
584 | 0 | if (stats == NULL) |
585 | 0 | { |
586 | | /* Yes, so initialize stats to zeroes */ |
587 | 0 | stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); |
588 | | |
589 | | /* |
590 | | * and cleanup any pending inserts |
591 | | */ |
592 | 0 | ginInsertCleanup(&gvs.ginstate, !AmAutoVacuumWorkerProcess(), |
593 | 0 | false, true, stats); |
594 | 0 | } |
595 | | |
596 | | /* we'll re-count the tuples each time */ |
597 | 0 | stats->num_index_tuples = 0; |
598 | 0 | gvs.result = stats; |
599 | |
|
600 | 0 | buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, |
601 | 0 | RBM_NORMAL, info->strategy); |
602 | | |
603 | | /* find leaf page */ |
604 | 0 | for (;;) |
605 | 0 | { |
606 | 0 | Page page = BufferGetPage(buffer); |
607 | 0 | IndexTuple itup; |
608 | |
|
609 | 0 | LockBuffer(buffer, GIN_SHARE); |
610 | |
|
611 | 0 | Assert(!GinPageIsData(page)); |
612 | |
|
613 | 0 | if (GinPageIsLeaf(page)) |
614 | 0 | { |
615 | 0 | LockBuffer(buffer, GIN_UNLOCK); |
616 | 0 | LockBuffer(buffer, GIN_EXCLUSIVE); |
617 | |
|
618 | 0 | if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page)) |
619 | 0 | { |
620 | 0 | LockBuffer(buffer, GIN_UNLOCK); |
621 | 0 | continue; /* check it one more */ |
622 | 0 | } |
623 | 0 | break; |
624 | 0 | } |
625 | | |
626 | 0 | Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); |
627 | |
|
628 | 0 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); |
629 | 0 | blkno = GinGetDownlink(itup); |
630 | 0 | Assert(blkno != InvalidBlockNumber); |
631 | |
|
632 | 0 | UnlockReleaseBuffer(buffer); |
633 | 0 | buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, |
634 | 0 | RBM_NORMAL, info->strategy); |
635 | 0 | } |
636 | | |
637 | | /* right now we found leftmost page in entry's BTree */ |
638 | |
|
639 | 0 | for (;;) |
640 | 0 | { |
641 | 0 | Page page = BufferGetPage(buffer); |
642 | 0 | Page resPage; |
643 | 0 | uint32 i; |
644 | |
|
645 | 0 | Assert(!GinPageIsData(page)); |
646 | |
|
647 | 0 | resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot); |
648 | |
|
649 | 0 | blkno = GinPageGetOpaque(page)->rightlink; |
650 | |
|
651 | 0 | if (resPage) |
652 | 0 | { |
653 | 0 | START_CRIT_SECTION(); |
654 | 0 | PageRestoreTempPage(resPage, page); |
655 | 0 | MarkBufferDirty(buffer); |
656 | 0 | xlogVacuumPage(gvs.index, buffer); |
657 | 0 | UnlockReleaseBuffer(buffer); |
658 | 0 | END_CRIT_SECTION(); |
659 | 0 | } |
660 | 0 | else |
661 | 0 | { |
662 | 0 | UnlockReleaseBuffer(buffer); |
663 | 0 | } |
664 | |
|
665 | 0 | vacuum_delay_point(false); |
666 | |
|
667 | 0 | for (i = 0; i < nRoot; i++) |
668 | 0 | { |
669 | 0 | ginVacuumPostingTree(&gvs, rootOfPostingTree[i]); |
670 | 0 | vacuum_delay_point(false); |
671 | 0 | } |
672 | |
|
673 | 0 | if (blkno == InvalidBlockNumber) /* rightmost page */ |
674 | 0 | break; |
675 | | |
676 | 0 | buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, |
677 | 0 | RBM_NORMAL, info->strategy); |
678 | 0 | LockBuffer(buffer, GIN_EXCLUSIVE); |
679 | 0 | } |
680 | |
|
681 | 0 | MemoryContextDelete(gvs.tmpCxt); |
682 | |
|
683 | 0 | return gvs.result; |
684 | 0 | } |
685 | | |
686 | | IndexBulkDeleteResult * |
687 | | ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) |
688 | 0 | { |
689 | 0 | Relation index = info->index; |
690 | 0 | bool needLock; |
691 | 0 | BlockNumber npages, |
692 | 0 | blkno; |
693 | 0 | BlockNumber totFreePages; |
694 | 0 | GinState ginstate; |
695 | 0 | GinStatsData idxStat; |
696 | | |
697 | | /* |
698 | | * In an autovacuum analyze, we want to clean up pending insertions. |
699 | | * Otherwise, an ANALYZE-only call is a no-op. |
700 | | */ |
701 | 0 | if (info->analyze_only) |
702 | 0 | { |
703 | 0 | if (AmAutoVacuumWorkerProcess()) |
704 | 0 | { |
705 | 0 | initGinState(&ginstate, index); |
706 | 0 | ginInsertCleanup(&ginstate, false, true, true, stats); |
707 | 0 | } |
708 | 0 | return stats; |
709 | 0 | } |
710 | | |
711 | | /* |
712 | | * Set up all-zero stats and cleanup pending inserts if ginbulkdelete |
713 | | * wasn't called |
714 | | */ |
715 | 0 | if (stats == NULL) |
716 | 0 | { |
717 | 0 | stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); |
718 | 0 | initGinState(&ginstate, index); |
719 | 0 | ginInsertCleanup(&ginstate, !AmAutoVacuumWorkerProcess(), |
720 | 0 | false, true, stats); |
721 | 0 | } |
722 | |
|
723 | 0 | memset(&idxStat, 0, sizeof(idxStat)); |
724 | | |
725 | | /* |
726 | | * XXX we always report the heap tuple count as the number of index |
727 | | * entries. This is bogus if the index is partial, but it's real hard to |
728 | | * tell how many distinct heap entries are referenced by a GIN index. |
729 | | */ |
730 | 0 | stats->num_index_tuples = Max(info->num_heap_tuples, 0); |
731 | 0 | stats->estimated_count = info->estimated_count; |
732 | | |
733 | | /* |
734 | | * Need lock unless it's local to this backend. |
735 | | */ |
736 | 0 | needLock = !RELATION_IS_LOCAL(index); |
737 | |
|
738 | 0 | if (needLock) |
739 | 0 | LockRelationForExtension(index, ExclusiveLock); |
740 | 0 | npages = RelationGetNumberOfBlocks(index); |
741 | 0 | if (needLock) |
742 | 0 | UnlockRelationForExtension(index, ExclusiveLock); |
743 | |
|
744 | 0 | totFreePages = 0; |
745 | |
|
746 | 0 | for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++) |
747 | 0 | { |
748 | 0 | Buffer buffer; |
749 | 0 | Page page; |
750 | |
|
751 | 0 | vacuum_delay_point(false); |
752 | |
|
753 | 0 | buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, |
754 | 0 | RBM_NORMAL, info->strategy); |
755 | 0 | LockBuffer(buffer, GIN_SHARE); |
756 | 0 | page = (Page) BufferGetPage(buffer); |
757 | |
|
758 | 0 | if (GinPageIsRecyclable(page)) |
759 | 0 | { |
760 | 0 | Assert(blkno != GIN_ROOT_BLKNO); |
761 | 0 | RecordFreeIndexPage(index, blkno); |
762 | 0 | totFreePages++; |
763 | 0 | } |
764 | 0 | else if (GinPageIsData(page)) |
765 | 0 | { |
766 | 0 | idxStat.nDataPages++; |
767 | 0 | } |
768 | 0 | else if (!GinPageIsList(page)) |
769 | 0 | { |
770 | 0 | idxStat.nEntryPages++; |
771 | |
|
772 | 0 | if (GinPageIsLeaf(page)) |
773 | 0 | idxStat.nEntries += PageGetMaxOffsetNumber(page); |
774 | 0 | } |
775 | |
|
776 | 0 | UnlockReleaseBuffer(buffer); |
777 | 0 | } |
778 | | |
779 | | /* Update the metapage with accurate page and entry counts */ |
780 | 0 | idxStat.nTotalPages = npages; |
781 | 0 | ginUpdateStats(info->index, &idxStat, false); |
782 | | |
783 | | /* Finally, vacuum the FSM */ |
784 | 0 | IndexFreeSpaceMapVacuum(info->index); |
785 | |
|
786 | 0 | stats->pages_free = totFreePages; |
787 | |
|
788 | 0 | if (needLock) |
789 | 0 | LockRelationForExtension(index, ExclusiveLock); |
790 | 0 | stats->num_pages = RelationGetNumberOfBlocks(index); |
791 | 0 | if (needLock) |
792 | 0 | UnlockRelationForExtension(index, ExclusiveLock); |
793 | |
|
794 | 0 | return stats; |
795 | 0 | } |
796 | | |
797 | | /* |
798 | | * Return whether Page can safely be recycled. |
799 | | */ |
800 | | bool |
801 | | GinPageIsRecyclable(Page page) |
802 | 0 | { |
803 | 0 | TransactionId delete_xid; |
804 | |
|
805 | 0 | if (PageIsNew(page)) |
806 | 0 | return true; |
807 | | |
808 | 0 | if (!GinPageIsDeleted(page)) |
809 | 0 | return false; |
810 | | |
811 | 0 | delete_xid = GinPageGetDeleteXid(page); |
812 | |
|
813 | 0 | if (!TransactionIdIsValid(delete_xid)) |
814 | 0 | return true; |
815 | | |
816 | | /* |
817 | | * If no backend still could view delete_xid as in running, all scans |
818 | | * concurrent with ginDeletePage() must have finished. |
819 | | */ |
820 | 0 | return GlobalVisCheckRemovableXid(NULL, delete_xid); |
821 | 0 | } |