/src/postgres/src/backend/access/brin/brin_revmap.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * brin_revmap.c |
3 | | * Range map for BRIN indexes |
4 | | * |
5 | | * The range map (revmap) is a translation structure for BRIN indexes: for each |
6 | | * page range there is one summary tuple, and its location is tracked by the |
7 | | * revmap. Whenever a new tuple is inserted into a table that violates the |
8 | | * previously recorded summary values, a new tuple is inserted into the index |
9 | | * and the revmap is updated to point to it. |
10 | | * |
11 | | * The revmap is stored in the first pages of the index, immediately following |
12 | | * the metapage. When the revmap needs to be expanded, all tuples on the |
13 | | * regular BRIN page at that block (if any) are moved out of the way. |
14 | | * |
15 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
16 | | * Portions Copyright (c) 1994, Regents of the University of California |
17 | | * |
18 | | * IDENTIFICATION |
19 | | * src/backend/access/brin/brin_revmap.c |
20 | | */ |
21 | | #include "postgres.h" |
22 | | |
23 | | #include "access/brin_page.h" |
24 | | #include "access/brin_pageops.h" |
25 | | #include "access/brin_revmap.h" |
26 | | #include "access/brin_tuple.h" |
27 | | #include "access/brin_xlog.h" |
28 | | #include "access/rmgr.h" |
29 | | #include "access/xloginsert.h" |
30 | | #include "miscadmin.h" |
31 | | #include "storage/bufmgr.h" |
32 | | #include "utils/rel.h" |
33 | | |
34 | | |
35 | | /* |
36 | | * In revmap pages, each item stores an ItemPointerData. These defines let one |
37 | | * find the logical revmap page number and index number of the revmap item for |
38 | | * the given heap block number. |
39 | | */ |
40 | | #define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \ |
41 | 0 | ((heapBlk / pagesPerRange) / REVMAP_PAGE_MAXITEMS) |
42 | | #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \ |
43 | 0 | ((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS) |
44 | | |
45 | | |
46 | | struct BrinRevmap |
47 | | { |
48 | | Relation rm_irel; |
49 | | BlockNumber rm_pagesPerRange; |
50 | | BlockNumber rm_lastRevmapPage; /* cached from the metapage */ |
51 | | Buffer rm_metaBuf; |
52 | | Buffer rm_currBuf; |
53 | | }; |
54 | | |
55 | | /* typedef appears in brin_revmap.h */ |
56 | | |
57 | | |
58 | | static BlockNumber revmap_get_blkno(BrinRevmap *revmap, |
59 | | BlockNumber heapBlk); |
60 | | static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk); |
61 | | static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap, |
62 | | BlockNumber heapBlk); |
63 | | static void revmap_physical_extend(BrinRevmap *revmap); |
64 | | |
65 | | /* |
66 | | * Initialize an access object for a range map. This must be freed by |
67 | | * brinRevmapTerminate when caller is done with it. |
68 | | */ |
69 | | BrinRevmap * |
70 | | brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange) |
71 | 0 | { |
72 | 0 | BrinRevmap *revmap; |
73 | 0 | Buffer meta; |
74 | 0 | BrinMetaPageData *metadata; |
75 | 0 | Page page; |
76 | |
|
77 | 0 | meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO); |
78 | 0 | LockBuffer(meta, BUFFER_LOCK_SHARE); |
79 | 0 | page = BufferGetPage(meta); |
80 | 0 | metadata = (BrinMetaPageData *) PageGetContents(page); |
81 | |
|
82 | 0 | revmap = palloc(sizeof(BrinRevmap)); |
83 | 0 | revmap->rm_irel = idxrel; |
84 | 0 | revmap->rm_pagesPerRange = metadata->pagesPerRange; |
85 | 0 | revmap->rm_lastRevmapPage = metadata->lastRevmapPage; |
86 | 0 | revmap->rm_metaBuf = meta; |
87 | 0 | revmap->rm_currBuf = InvalidBuffer; |
88 | |
|
89 | 0 | *pagesPerRange = metadata->pagesPerRange; |
90 | |
|
91 | 0 | LockBuffer(meta, BUFFER_LOCK_UNLOCK); |
92 | |
|
93 | 0 | return revmap; |
94 | 0 | } |
95 | | |
96 | | /* |
97 | | * Release resources associated with a revmap access object. |
98 | | */ |
99 | | void |
100 | | brinRevmapTerminate(BrinRevmap *revmap) |
101 | 0 | { |
102 | 0 | ReleaseBuffer(revmap->rm_metaBuf); |
103 | 0 | if (revmap->rm_currBuf != InvalidBuffer) |
104 | 0 | ReleaseBuffer(revmap->rm_currBuf); |
105 | 0 | pfree(revmap); |
106 | 0 | } |
107 | | |
108 | | /* |
109 | | * Extend the revmap to cover the given heap block number. |
110 | | */ |
111 | | void |
112 | | brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk) |
113 | 0 | { |
114 | 0 | BlockNumber mapBlk PG_USED_FOR_ASSERTS_ONLY; |
115 | |
|
116 | 0 | mapBlk = revmap_extend_and_get_blkno(revmap, heapBlk); |
117 | | |
118 | | /* Ensure the buffer we got is in the expected range */ |
119 | 0 | Assert(mapBlk != InvalidBlockNumber && |
120 | 0 | mapBlk != BRIN_METAPAGE_BLKNO && |
121 | 0 | mapBlk <= revmap->rm_lastRevmapPage); |
122 | 0 | } |
123 | | |
124 | | /* |
125 | | * Prepare to insert an entry into the revmap; the revmap buffer in which the |
126 | | * entry is to reside is locked and returned. Most callers should call |
127 | | * brinRevmapExtend beforehand, as this routine does not extend the revmap if |
128 | | * it's not long enough. |
129 | | * |
130 | | * The returned buffer is also recorded in the revmap struct; finishing that |
131 | | * releases the buffer, therefore the caller needn't do it explicitly. |
132 | | */ |
133 | | Buffer |
134 | | brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk) |
135 | 0 | { |
136 | 0 | Buffer rmBuf; |
137 | |
|
138 | 0 | rmBuf = revmap_get_buffer(revmap, heapBlk); |
139 | 0 | LockBuffer(rmBuf, BUFFER_LOCK_EXCLUSIVE); |
140 | |
|
141 | 0 | return rmBuf; |
142 | 0 | } |
143 | | |
144 | | /* |
145 | | * In the given revmap buffer (locked appropriately by caller), which is used |
146 | | * in a BRIN index of pagesPerRange pages per range, set the element |
147 | | * corresponding to heap block number heapBlk to the given TID. |
148 | | * |
149 | | * Once the operation is complete, the caller must update the LSN on the |
150 | | * returned buffer. |
151 | | * |
152 | | * This is used both in regular operation and during WAL replay. |
153 | | */ |
154 | | void |
155 | | brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, |
156 | | BlockNumber heapBlk, ItemPointerData tid) |
157 | 0 | { |
158 | 0 | RevmapContents *contents; |
159 | 0 | ItemPointerData *iptr; |
160 | 0 | Page page; |
161 | | |
162 | | /* The correct page should already be pinned and locked */ |
163 | 0 | page = BufferGetPage(buf); |
164 | 0 | contents = (RevmapContents *) PageGetContents(page); |
165 | 0 | iptr = (ItemPointerData *) contents->rm_tids; |
166 | 0 | iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk); |
167 | |
|
168 | 0 | if (ItemPointerIsValid(&tid)) |
169 | 0 | ItemPointerSet(iptr, |
170 | 0 | ItemPointerGetBlockNumber(&tid), |
171 | 0 | ItemPointerGetOffsetNumber(&tid)); |
172 | 0 | else |
173 | 0 | ItemPointerSetInvalid(iptr); |
174 | 0 | } |
175 | | |
176 | | /* |
177 | | * Fetch the BrinTuple for a given heap block. |
178 | | * |
179 | | * The buffer containing the tuple is locked, and returned in *buf. The |
180 | | * returned tuple points to the shared buffer and must not be freed; if caller |
181 | | * wants to use it after releasing the buffer lock, it must create its own |
182 | | * palloc'ed copy. As an optimization, the caller can pass a pinned buffer |
183 | | * *buf on entry, which will avoid a pin-unpin cycle when the next tuple is on |
184 | | * the same page as a previous one. |
185 | | * |
186 | | * If no tuple is found for the given heap range, returns NULL. In that case, |
187 | | * *buf might still be updated (and pin must be released by caller), but it's |
188 | | * not locked. |
189 | | * |
190 | | * The output tuple offset within the buffer is returned in *off, and its size |
191 | | * is returned in *size. |
192 | | */ |
193 | | BrinTuple * |
194 | | brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, |
195 | | Buffer *buf, OffsetNumber *off, Size *size, int mode) |
196 | 0 | { |
197 | 0 | Relation idxRel = revmap->rm_irel; |
198 | 0 | BlockNumber mapBlk; |
199 | 0 | RevmapContents *contents; |
200 | 0 | ItemPointerData *iptr; |
201 | 0 | BlockNumber blk; |
202 | 0 | Page page; |
203 | 0 | ItemId lp; |
204 | 0 | BrinTuple *tup; |
205 | 0 | ItemPointerData previptr; |
206 | | |
207 | | /* normalize the heap block number to be the first page in the range */ |
208 | 0 | heapBlk = (heapBlk / revmap->rm_pagesPerRange) * revmap->rm_pagesPerRange; |
209 | | |
210 | | /* |
211 | | * Compute the revmap page number we need. If Invalid is returned (i.e., |
212 | | * the revmap page hasn't been created yet), the requested page range is |
213 | | * not summarized. |
214 | | */ |
215 | 0 | mapBlk = revmap_get_blkno(revmap, heapBlk); |
216 | 0 | if (mapBlk == InvalidBlockNumber) |
217 | 0 | { |
218 | 0 | *off = InvalidOffsetNumber; |
219 | 0 | return NULL; |
220 | 0 | } |
221 | | |
222 | 0 | ItemPointerSetInvalid(&previptr); |
223 | 0 | for (;;) |
224 | 0 | { |
225 | 0 | CHECK_FOR_INTERRUPTS(); |
226 | |
|
227 | 0 | if (revmap->rm_currBuf == InvalidBuffer || |
228 | 0 | BufferGetBlockNumber(revmap->rm_currBuf) != mapBlk) |
229 | 0 | { |
230 | 0 | if (revmap->rm_currBuf != InvalidBuffer) |
231 | 0 | ReleaseBuffer(revmap->rm_currBuf); |
232 | |
|
233 | 0 | Assert(mapBlk != InvalidBlockNumber); |
234 | 0 | revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); |
235 | 0 | } |
236 | |
|
237 | 0 | LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE); |
238 | |
|
239 | 0 | contents = (RevmapContents *) |
240 | 0 | PageGetContents(BufferGetPage(revmap->rm_currBuf)); |
241 | 0 | iptr = contents->rm_tids; |
242 | 0 | iptr += HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk); |
243 | |
|
244 | 0 | if (!ItemPointerIsValid(iptr)) |
245 | 0 | { |
246 | 0 | LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_UNLOCK); |
247 | 0 | return NULL; |
248 | 0 | } |
249 | | |
250 | | /* |
251 | | * Check the TID we got in a previous iteration, if any, and save the |
252 | | * current TID we got from the revmap; if we loop, we can sanity-check |
253 | | * that the next one we get is different. Otherwise we might be stuck |
254 | | * looping forever if the revmap is somehow badly broken. |
255 | | */ |
256 | 0 | if (ItemPointerIsValid(&previptr) && ItemPointerEquals(&previptr, iptr)) |
257 | 0 | ereport(ERROR, |
258 | 0 | (errcode(ERRCODE_INDEX_CORRUPTED), |
259 | 0 | errmsg_internal("corrupted BRIN index: inconsistent range map"))); |
260 | 0 | previptr = *iptr; |
261 | |
|
262 | 0 | blk = ItemPointerGetBlockNumber(iptr); |
263 | 0 | *off = ItemPointerGetOffsetNumber(iptr); |
264 | |
|
265 | 0 | LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_UNLOCK); |
266 | | |
267 | | /* Ok, got a pointer to where the BrinTuple should be. Fetch it. */ |
268 | 0 | if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != blk) |
269 | 0 | { |
270 | 0 | if (BufferIsValid(*buf)) |
271 | 0 | ReleaseBuffer(*buf); |
272 | 0 | *buf = ReadBuffer(idxRel, blk); |
273 | 0 | } |
274 | 0 | LockBuffer(*buf, mode); |
275 | 0 | page = BufferGetPage(*buf); |
276 | | |
277 | | /* If we land on a revmap page, start over */ |
278 | 0 | if (BRIN_IS_REGULAR_PAGE(page)) |
279 | 0 | { |
280 | | /* |
281 | | * If the offset number is greater than what's in the page, it's |
282 | | * possible that the range was desummarized concurrently. Just |
283 | | * return NULL to handle that case. |
284 | | */ |
285 | 0 | if (*off > PageGetMaxOffsetNumber(page)) |
286 | 0 | { |
287 | 0 | LockBuffer(*buf, BUFFER_LOCK_UNLOCK); |
288 | 0 | return NULL; |
289 | 0 | } |
290 | | |
291 | 0 | lp = PageGetItemId(page, *off); |
292 | 0 | if (ItemIdIsUsed(lp)) |
293 | 0 | { |
294 | 0 | tup = (BrinTuple *) PageGetItem(page, lp); |
295 | |
|
296 | 0 | if (tup->bt_blkno == heapBlk) |
297 | 0 | { |
298 | 0 | if (size) |
299 | 0 | *size = ItemIdGetLength(lp); |
300 | | /* found it! */ |
301 | 0 | return tup; |
302 | 0 | } |
303 | 0 | } |
304 | 0 | } |
305 | | |
306 | | /* |
307 | | * No luck. Assume that the revmap was updated concurrently. |
308 | | */ |
309 | 0 | LockBuffer(*buf, BUFFER_LOCK_UNLOCK); |
310 | 0 | } |
311 | | /* not reached, but keep compiler quiet */ |
312 | 0 | return NULL; |
313 | 0 | } |
314 | | |
315 | | /* |
316 | | * Delete an index tuple, marking a page range as unsummarized. |
317 | | * |
318 | | * Index must be locked in ShareUpdateExclusiveLock mode. |
319 | | * |
320 | | * Return false if caller should retry. |
321 | | */ |
322 | | bool |
323 | | brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk) |
324 | 0 | { |
325 | 0 | BrinRevmap *revmap; |
326 | 0 | BlockNumber pagesPerRange; |
327 | 0 | RevmapContents *contents; |
328 | 0 | ItemPointerData *iptr; |
329 | 0 | ItemPointerData invalidIptr; |
330 | 0 | BlockNumber revmapBlk; |
331 | 0 | Buffer revmapBuf; |
332 | 0 | Buffer regBuf; |
333 | 0 | Page revmapPg; |
334 | 0 | Page regPg; |
335 | 0 | OffsetNumber revmapOffset; |
336 | 0 | OffsetNumber regOffset; |
337 | 0 | ItemId lp; |
338 | |
|
339 | 0 | revmap = brinRevmapInitialize(idxrel, &pagesPerRange); |
340 | |
|
341 | 0 | revmapBlk = revmap_get_blkno(revmap, heapBlk); |
342 | 0 | if (!BlockNumberIsValid(revmapBlk)) |
343 | 0 | { |
344 | | /* revmap page doesn't exist: range not summarized, we're done */ |
345 | 0 | brinRevmapTerminate(revmap); |
346 | 0 | return true; |
347 | 0 | } |
348 | | |
349 | | /* Lock the revmap page, obtain the index tuple pointer from it */ |
350 | 0 | revmapBuf = brinLockRevmapPageForUpdate(revmap, heapBlk); |
351 | 0 | revmapPg = BufferGetPage(revmapBuf); |
352 | 0 | revmapOffset = HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk); |
353 | |
|
354 | 0 | contents = (RevmapContents *) PageGetContents(revmapPg); |
355 | 0 | iptr = contents->rm_tids; |
356 | 0 | iptr += revmapOffset; |
357 | |
|
358 | 0 | if (!ItemPointerIsValid(iptr)) |
359 | 0 | { |
360 | | /* no index tuple: range not summarized, we're done */ |
361 | 0 | LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); |
362 | 0 | brinRevmapTerminate(revmap); |
363 | 0 | return true; |
364 | 0 | } |
365 | | |
366 | 0 | regBuf = ReadBuffer(idxrel, ItemPointerGetBlockNumber(iptr)); |
367 | 0 | LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE); |
368 | 0 | regPg = BufferGetPage(regBuf); |
369 | | |
370 | | /* if this is no longer a regular page, tell caller to start over */ |
371 | 0 | if (!BRIN_IS_REGULAR_PAGE(regPg)) |
372 | 0 | { |
373 | 0 | LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); |
374 | 0 | LockBuffer(regBuf, BUFFER_LOCK_UNLOCK); |
375 | 0 | brinRevmapTerminate(revmap); |
376 | 0 | return false; |
377 | 0 | } |
378 | | |
379 | 0 | regOffset = ItemPointerGetOffsetNumber(iptr); |
380 | 0 | if (regOffset > PageGetMaxOffsetNumber(regPg)) |
381 | 0 | ereport(ERROR, |
382 | 0 | (errcode(ERRCODE_INDEX_CORRUPTED), |
383 | 0 | errmsg("corrupted BRIN index: inconsistent range map"))); |
384 | | |
385 | 0 | lp = PageGetItemId(regPg, regOffset); |
386 | 0 | if (!ItemIdIsUsed(lp)) |
387 | 0 | ereport(ERROR, |
388 | 0 | (errcode(ERRCODE_INDEX_CORRUPTED), |
389 | 0 | errmsg("corrupted BRIN index: inconsistent range map"))); |
390 | | |
391 | | /* |
392 | | * Placeholder tuples only appear during unfinished summarization, and we |
393 | | * hold ShareUpdateExclusiveLock, so this function cannot run concurrently |
394 | | * with that. So any placeholder tuples that exist are leftovers from a |
395 | | * crashed or aborted summarization; remove them silently. |
396 | | */ |
397 | | |
398 | 0 | START_CRIT_SECTION(); |
399 | |
|
400 | 0 | ItemPointerSetInvalid(&invalidIptr); |
401 | 0 | brinSetHeapBlockItemptr(revmapBuf, revmap->rm_pagesPerRange, heapBlk, |
402 | 0 | invalidIptr); |
403 | 0 | PageIndexTupleDeleteNoCompact(regPg, regOffset); |
404 | | /* XXX record free space in FSM? */ |
405 | |
|
406 | 0 | MarkBufferDirty(regBuf); |
407 | 0 | MarkBufferDirty(revmapBuf); |
408 | |
|
409 | 0 | if (RelationNeedsWAL(idxrel)) |
410 | 0 | { |
411 | 0 | xl_brin_desummarize xlrec; |
412 | 0 | XLogRecPtr recptr; |
413 | |
|
414 | 0 | xlrec.pagesPerRange = revmap->rm_pagesPerRange; |
415 | 0 | xlrec.heapBlk = heapBlk; |
416 | 0 | xlrec.regOffset = regOffset; |
417 | |
|
418 | 0 | XLogBeginInsert(); |
419 | 0 | XLogRegisterData(&xlrec, SizeOfBrinDesummarize); |
420 | 0 | XLogRegisterBuffer(0, revmapBuf, 0); |
421 | 0 | XLogRegisterBuffer(1, regBuf, REGBUF_STANDARD); |
422 | 0 | recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_DESUMMARIZE); |
423 | 0 | PageSetLSN(revmapPg, recptr); |
424 | 0 | PageSetLSN(regPg, recptr); |
425 | 0 | } |
426 | |
|
427 | 0 | END_CRIT_SECTION(); |
428 | |
|
429 | 0 | UnlockReleaseBuffer(regBuf); |
430 | 0 | LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); |
431 | 0 | brinRevmapTerminate(revmap); |
432 | |
|
433 | 0 | return true; |
434 | 0 | } |
435 | | |
436 | | /* |
437 | | * Given a heap block number, find the corresponding physical revmap block |
438 | | * number and return it. If the revmap page hasn't been allocated yet, return |
439 | | * InvalidBlockNumber. |
440 | | */ |
441 | | static BlockNumber |
442 | | revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) |
443 | 0 | { |
444 | 0 | BlockNumber targetblk; |
445 | | |
446 | | /* obtain revmap block number, skip 1 for metapage block */ |
447 | 0 | targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1; |
448 | | |
449 | | /* Normal case: the revmap page is already allocated */ |
450 | 0 | if (targetblk <= revmap->rm_lastRevmapPage) |
451 | 0 | return targetblk; |
452 | | |
453 | 0 | return InvalidBlockNumber; |
454 | 0 | } |
455 | | |
456 | | /* |
457 | | * Obtain and return a buffer containing the revmap page for the given heap |
458 | | * page. The revmap must have been previously extended to cover that page. |
459 | | * The returned buffer is also recorded in the revmap struct; finishing that |
460 | | * releases the buffer, therefore the caller needn't do it explicitly. |
461 | | */ |
462 | | static Buffer |
463 | | revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) |
464 | 0 | { |
465 | 0 | BlockNumber mapBlk; |
466 | | |
467 | | /* Translate the heap block number to physical index location. */ |
468 | 0 | mapBlk = revmap_get_blkno(revmap, heapBlk); |
469 | |
|
470 | 0 | if (mapBlk == InvalidBlockNumber) |
471 | 0 | elog(ERROR, "revmap does not cover heap block %u", heapBlk); |
472 | | |
473 | | /* Ensure the buffer we got is in the expected range */ |
474 | 0 | Assert(mapBlk != BRIN_METAPAGE_BLKNO && |
475 | 0 | mapBlk <= revmap->rm_lastRevmapPage); |
476 | | |
477 | | /* |
478 | | * Obtain the buffer from which we need to read. If we already have the |
479 | | * correct buffer in our access struct, use that; otherwise, release that, |
480 | | * (if valid) and read the one we need. |
481 | | */ |
482 | 0 | if (revmap->rm_currBuf == InvalidBuffer || |
483 | 0 | mapBlk != BufferGetBlockNumber(revmap->rm_currBuf)) |
484 | 0 | { |
485 | 0 | if (revmap->rm_currBuf != InvalidBuffer) |
486 | 0 | ReleaseBuffer(revmap->rm_currBuf); |
487 | |
|
488 | 0 | revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); |
489 | 0 | } |
490 | |
|
491 | 0 | return revmap->rm_currBuf; |
492 | 0 | } |
493 | | |
494 | | /* |
495 | | * Given a heap block number, find the corresponding physical revmap block |
496 | | * number and return it. If the revmap page hasn't been allocated yet, extend |
497 | | * the revmap until it is. |
498 | | */ |
499 | | static BlockNumber |
500 | | revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) |
501 | 0 | { |
502 | 0 | BlockNumber targetblk; |
503 | | |
504 | | /* obtain revmap block number, skip 1 for metapage block */ |
505 | 0 | targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1; |
506 | | |
507 | | /* Extend the revmap, if necessary */ |
508 | 0 | while (targetblk > revmap->rm_lastRevmapPage) |
509 | 0 | { |
510 | 0 | CHECK_FOR_INTERRUPTS(); |
511 | 0 | revmap_physical_extend(revmap); |
512 | 0 | } |
513 | |
|
514 | 0 | return targetblk; |
515 | 0 | } |
516 | | |
517 | | /* |
518 | | * Try to extend the revmap by one page. This might not happen for a number of |
519 | | * reasons; caller is expected to retry until the expected outcome is obtained. |
520 | | */ |
521 | | static void |
522 | | revmap_physical_extend(BrinRevmap *revmap) |
523 | 0 | { |
524 | 0 | Buffer buf; |
525 | 0 | Page page; |
526 | 0 | Page metapage; |
527 | 0 | BrinMetaPageData *metadata; |
528 | 0 | BlockNumber mapBlk; |
529 | 0 | BlockNumber nblocks; |
530 | 0 | Relation irel = revmap->rm_irel; |
531 | | |
532 | | /* |
533 | | * Lock the metapage. This locks out concurrent extensions of the revmap, |
534 | | * but note that we still need to grab the relation extension lock because |
535 | | * another backend can extend the index with regular BRIN pages. |
536 | | */ |
537 | 0 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_EXCLUSIVE); |
538 | 0 | metapage = BufferGetPage(revmap->rm_metaBuf); |
539 | 0 | metadata = (BrinMetaPageData *) PageGetContents(metapage); |
540 | | |
541 | | /* |
542 | | * Check that our cached lastRevmapPage value was up-to-date; if it |
543 | | * wasn't, update the cached copy and have caller start over. |
544 | | */ |
545 | 0 | if (metadata->lastRevmapPage != revmap->rm_lastRevmapPage) |
546 | 0 | { |
547 | 0 | revmap->rm_lastRevmapPage = metadata->lastRevmapPage; |
548 | 0 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
549 | 0 | return; |
550 | 0 | } |
551 | 0 | mapBlk = metadata->lastRevmapPage + 1; |
552 | |
|
553 | 0 | nblocks = RelationGetNumberOfBlocks(irel); |
554 | 0 | if (mapBlk < nblocks) |
555 | 0 | { |
556 | 0 | buf = ReadBuffer(irel, mapBlk); |
557 | 0 | LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); |
558 | 0 | page = BufferGetPage(buf); |
559 | 0 | } |
560 | 0 | else |
561 | 0 | { |
562 | 0 | buf = ExtendBufferedRel(BMR_REL(irel), MAIN_FORKNUM, NULL, |
563 | 0 | EB_LOCK_FIRST); |
564 | 0 | if (BufferGetBlockNumber(buf) != mapBlk) |
565 | 0 | { |
566 | | /* |
567 | | * Very rare corner case: somebody extended the relation |
568 | | * concurrently after we read its length. If this happens, give |
569 | | * up and have caller start over. We will have to evacuate that |
570 | | * page from under whoever is using it. |
571 | | */ |
572 | 0 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
573 | 0 | UnlockReleaseBuffer(buf); |
574 | 0 | return; |
575 | 0 | } |
576 | 0 | page = BufferGetPage(buf); |
577 | 0 | } |
578 | | |
579 | | /* Check that it's a regular block (or an empty page) */ |
580 | 0 | if (!PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page)) |
581 | 0 | ereport(ERROR, |
582 | 0 | (errcode(ERRCODE_INDEX_CORRUPTED), |
583 | 0 | errmsg("unexpected page type 0x%04X in BRIN index \"%s\" block %u", |
584 | 0 | BrinPageType(page), |
585 | 0 | RelationGetRelationName(irel), |
586 | 0 | BufferGetBlockNumber(buf)))); |
587 | | |
588 | | /* If the page is in use, evacuate it and restart */ |
589 | 0 | if (brin_start_evacuating_page(irel, buf)) |
590 | 0 | { |
591 | 0 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
592 | 0 | brin_evacuate_page(irel, revmap->rm_pagesPerRange, revmap, buf); |
593 | | |
594 | | /* have caller start over */ |
595 | 0 | return; |
596 | 0 | } |
597 | | |
598 | | /* |
599 | | * Ok, we have now locked the metapage and the target block. Re-initialize |
600 | | * the target block as a revmap page, and update the metapage. |
601 | | */ |
602 | 0 | START_CRIT_SECTION(); |
603 | | |
604 | | /* the rm_tids array is initialized to all invalid by PageInit */ |
605 | 0 | brin_page_init(page, BRIN_PAGETYPE_REVMAP); |
606 | 0 | MarkBufferDirty(buf); |
607 | |
|
608 | 0 | metadata->lastRevmapPage = mapBlk; |
609 | | |
610 | | /* |
611 | | * Set pd_lower just past the end of the metadata. This is essential, |
612 | | * because without doing so, metadata will be lost if xlog.c compresses |
613 | | * the page. (We must do this here because pre-v11 versions of PG did not |
614 | | * set the metapage's pd_lower correctly, so a pg_upgraded index might |
615 | | * contain the wrong value.) |
616 | | */ |
617 | 0 | ((PageHeader) metapage)->pd_lower = |
618 | 0 | ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) metapage; |
619 | |
|
620 | 0 | MarkBufferDirty(revmap->rm_metaBuf); |
621 | |
|
622 | 0 | if (RelationNeedsWAL(revmap->rm_irel)) |
623 | 0 | { |
624 | 0 | xl_brin_revmap_extend xlrec; |
625 | 0 | XLogRecPtr recptr; |
626 | |
|
627 | 0 | xlrec.targetBlk = mapBlk; |
628 | |
|
629 | 0 | XLogBeginInsert(); |
630 | 0 | XLogRegisterData(&xlrec, SizeOfBrinRevmapExtend); |
631 | 0 | XLogRegisterBuffer(0, revmap->rm_metaBuf, REGBUF_STANDARD); |
632 | |
|
633 | 0 | XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT); |
634 | |
|
635 | 0 | recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND); |
636 | 0 | PageSetLSN(metapage, recptr); |
637 | 0 | PageSetLSN(page, recptr); |
638 | 0 | } |
639 | |
|
640 | 0 | END_CRIT_SECTION(); |
641 | |
|
642 | 0 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
643 | |
|
644 | 0 | UnlockReleaseBuffer(buf); |
645 | 0 | } |