/src/postgres/src/backend/access/heap/vacuumlazy.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * vacuumlazy.c |
4 | | * Concurrent ("lazy") vacuuming. |
5 | | * |
6 | | * Heap relations are vacuumed in three main phases. In phase I, vacuum scans |
7 | | * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in |
8 | | * a TID store. If that TID store fills up or vacuum finishes scanning the |
9 | | * relation, it progresses to phase II: index vacuuming. Index vacuuming |
10 | | * deletes the dead index entries referenced in the TID store. In phase III, |
11 | | * vacuum scans the blocks of the relation referred to by the TIDs in the TID |
12 | | * store and reaps the corresponding dead items, freeing that space for future |
13 | | * tuples. |
14 | | * |
15 | | * If there are no indexes or index scanning is disabled, phase II may be |
16 | | * skipped. If phase I identified very few dead index entries or if vacuum's |
17 | | * failsafe mechanism has triggered (to avoid transaction ID wraparound), |
18 | | * vacuum may skip phases II and III. |
19 | | * |
20 | | * If the TID store fills up in phase I, vacuum suspends phase I and proceeds |
21 | | * to phases II and III, cleaning up the dead tuples referenced in the current |
22 | | * TID store. This empties the TID store, allowing vacuum to resume phase I. |
23 | | * |
24 | | * In a way, the phases are more like states in a state machine, but they have |
25 | | * been referred to colloquially as phases for so long that they are referred |
26 | | * to as such here. |
27 | | * |
28 | | * Manually invoked VACUUMs may scan indexes during phase II in parallel. For |
29 | | * more information on this, see the comment at the top of vacuumparallel.c. |
30 | | * |
31 | | * In between phases, vacuum updates the freespace map (every |
32 | | * VACUUM_FSM_EVERY_PAGES). |
33 | | * |
34 | | * After completing all three phases, vacuum may truncate the relation if it |
35 | | * has emptied pages at the end. Finally, vacuum updates relation statistics |
36 | | * in pg_class and the cumulative statistics subsystem. |
37 | | * |
38 | | * Relation Scanning: |
39 | | * |
40 | | * Vacuum scans the heap relation, starting at the beginning and progressing |
41 | | * to the end, skipping pages as permitted by their visibility status, vacuum |
42 | | * options, and various other requirements. |
43 | | * |
44 | | * Vacuums are either aggressive or normal. Aggressive vacuums must scan every |
45 | | * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID |
46 | | * wraparound. Normal vacuums may scan otherwise skippable pages for one of |
47 | | * two reasons: |
48 | | * |
49 | | * When page skipping is not disabled, a normal vacuum may scan pages that are |
50 | | * marked all-visible (and even all-frozen) in the visibility map if the range |
51 | | * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the |
52 | | * benefit of kernel readahead (see comment in heap_vac_scan_next_block()). |
53 | | * |
54 | | * A normal vacuum may also scan skippable pages in an effort to freeze them |
55 | | * and decrease the backlog of all-visible but not all-frozen pages that have |
56 | | * to be processed by the next aggressive vacuum. These are referred to as |
57 | | * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not |
58 | | * count as eagerly scanned pages. |
59 | | * |
60 | | * Eagerly scanned pages that are set all-frozen in the VM are successful |
61 | | * eager freezes and those not set all-frozen in the VM are failed eager |
62 | | * freezes. |
63 | | * |
64 | | * Because we want to amortize the overhead of freezing pages over multiple |
65 | | * vacuums, normal vacuums cap the number of successful eager freezes to |
66 | | * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not |
67 | | * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages |
68 | | * may be unfrozen before the next aggressive vacuum, capping the number of |
69 | | * successful eager freezes also caps the downside of eager freezing: |
70 | | * potentially wasted work. |
71 | | * |
72 | | * Once the success cap has been hit, eager scanning is disabled for the |
73 | | * remainder of the vacuum of the relation. |
74 | | * |
75 | | * Success is capped globally because we don't want to limit our successes if |
76 | | * old data happens to be concentrated in a particular part of the table. This |
77 | | * is especially likely to happen for append-mostly workloads where the oldest |
78 | | * data is at the beginning of the unfrozen portion of the relation. |
79 | | * |
80 | | * On the assumption that different regions of the table are likely to contain |
81 | | * similarly aged data, normal vacuums use a localized eager freeze failure |
82 | | * cap. The failure count is reset for each region of the table -- comprised |
83 | | * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate |
84 | | * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures |
85 | | * before suspending eager scanning until the end of the region. |
86 | | * vacuum_max_eager_freeze_failure_rate is configurable both globally and per |
87 | | * table. |
88 | | * |
89 | | * Aggressive vacuums must examine every unfrozen tuple and thus are not |
90 | | * subject to any of the limits imposed by the eager scanning algorithm. |
91 | | * |
92 | | * Once vacuum has decided to scan a given block, it must read the block and |
93 | | * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum |
94 | | * may choose to skip pruning and freezing if it cannot acquire a cleanup lock |
95 | | * on the buffer right away. In this case, it may miss cleaning up dead tuples |
96 | | * and their associated index entries (though it is free to reap any existing |
97 | | * dead items on the page). |
98 | | * |
99 | | * After pruning and freezing, pages that are newly all-visible and all-frozen |
100 | | * are marked as such in the visibility map. |
101 | | * |
102 | | * Dead TID Storage: |
103 | | * |
104 | | * The major space usage for vacuuming is storage for the dead tuple IDs that |
105 | | * are to be removed from indexes. We want to ensure we can vacuum even the |
106 | | * very largest relations with finite memory space usage. To do that, we set |
107 | | * upper bounds on the memory that can be used for keeping track of dead TIDs |
108 | | * at once. |
109 | | * |
110 | | * We are willing to use at most maintenance_work_mem (or perhaps |
111 | | * autovacuum_work_mem) memory space to keep track of dead TIDs. If the |
112 | | * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum |
113 | | * the pages that we've pruned). This frees up the memory space dedicated to |
114 | | * store dead TIDs. |
115 | | * |
116 | | * In practice VACUUM will often complete its initial pass over the target |
117 | | * heap relation without ever running out of space to store TIDs. This means |
118 | | * that there only needs to be one call to lazy_vacuum, after the initial pass |
119 | | * completes. |
120 | | * |
121 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
122 | | * Portions Copyright (c) 1994, Regents of the University of California |
123 | | * |
124 | | * |
125 | | * IDENTIFICATION |
126 | | * src/backend/access/heap/vacuumlazy.c |
127 | | * |
128 | | *------------------------------------------------------------------------- |
129 | | */ |
130 | | #include "postgres.h" |
131 | | |
132 | | #include <math.h> |
133 | | |
134 | | #include "access/genam.h" |
135 | | #include "access/heapam.h" |
136 | | #include "access/htup_details.h" |
137 | | #include "access/multixact.h" |
138 | | #include "access/tidstore.h" |
139 | | #include "access/transam.h" |
140 | | #include "access/visibilitymap.h" |
141 | | #include "access/xloginsert.h" |
142 | | #include "catalog/storage.h" |
143 | | #include "commands/dbcommands.h" |
144 | | #include "commands/progress.h" |
145 | | #include "commands/vacuum.h" |
146 | | #include "common/int.h" |
147 | | #include "common/pg_prng.h" |
148 | | #include "executor/instrument.h" |
149 | | #include "miscadmin.h" |
150 | | #include "pgstat.h" |
151 | | #include "portability/instr_time.h" |
152 | | #include "postmaster/autovacuum.h" |
153 | | #include "storage/bufmgr.h" |
154 | | #include "storage/freespace.h" |
155 | | #include "storage/lmgr.h" |
156 | | #include "storage/read_stream.h" |
157 | | #include "utils/lsyscache.h" |
158 | | #include "utils/pg_rusage.h" |
159 | | #include "utils/timestamp.h" |
160 | | |
161 | | |
162 | | /* |
163 | | * Space/time tradeoff parameters: do these need to be user-tunable? |
164 | | * |
165 | | * To consider truncating the relation, we want there to be at least |
166 | | * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever |
167 | | * is less) potentially-freeable pages. |
168 | | */ |
169 | 0 | #define REL_TRUNCATE_MINIMUM 1000 |
170 | 0 | #define REL_TRUNCATE_FRACTION 16 |
171 | | |
172 | | /* |
173 | | * Timing parameters for truncate locking heuristics. |
174 | | * |
175 | | * These were not exposed as user tunable GUC values because it didn't seem |
176 | | * that the potential for improvement was great enough to merit the cost of |
177 | | * supporting them. |
178 | | */ |
179 | 0 | #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */ |
180 | 0 | #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ |
181 | 0 | #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ |
182 | | |
183 | | /* |
184 | | * Threshold that controls whether we bypass index vacuuming and heap |
185 | | * vacuuming as an optimization |
186 | | */ |
187 | 0 | #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */ |
188 | | |
189 | | /* |
190 | | * Perform a failsafe check each time we scan another 4GB of pages. |
191 | | * (Note that this is deliberately kept to a power-of-two, usually 2^19.) |
192 | | */ |
193 | | #define FAILSAFE_EVERY_PAGES \ |
194 | 0 | ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ)) |
195 | | |
196 | | /* |
197 | | * When a table has no indexes, vacuum the FSM after every 8GB, approximately |
198 | | * (it won't be exact because we only vacuum FSM after processing a heap page |
199 | | * that has some removable tuples). When there are indexes, this is ignored, |
200 | | * and we vacuum FSM after each index/heap cleaning pass. |
201 | | */ |
202 | | #define VACUUM_FSM_EVERY_PAGES \ |
203 | 0 | ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ)) |
204 | | |
205 | | /* |
206 | | * Before we consider skipping a page that's marked as clean in |
207 | | * visibility map, we must've seen at least this many clean pages. |
208 | | */ |
209 | 0 | #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32) |
210 | | |
211 | | /* |
212 | | * Size of the prefetch window for lazy vacuum backwards truncation scan. |
213 | | * Needs to be a power of 2. |
214 | | */ |
215 | 0 | #define PREFETCH_SIZE ((BlockNumber) 32) |
216 | | |
217 | | /* |
218 | | * Macro to check if we are in a parallel vacuum. If true, we are in the |
219 | | * parallel mode and the DSM segment is initialized. |
220 | | */ |
221 | 0 | #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL) |
222 | | |
223 | | /* Phases of vacuum during which we report error context. */ |
224 | | typedef enum |
225 | | { |
226 | | VACUUM_ERRCB_PHASE_UNKNOWN, |
227 | | VACUUM_ERRCB_PHASE_SCAN_HEAP, |
228 | | VACUUM_ERRCB_PHASE_VACUUM_INDEX, |
229 | | VACUUM_ERRCB_PHASE_VACUUM_HEAP, |
230 | | VACUUM_ERRCB_PHASE_INDEX_CLEANUP, |
231 | | VACUUM_ERRCB_PHASE_TRUNCATE, |
232 | | } VacErrPhase; |
233 | | |
234 | | /* |
235 | | * An eager scan of a page that is set all-frozen in the VM is considered |
236 | | * "successful". To spread out freezing overhead across multiple normal |
237 | | * vacuums, we limit the number of successful eager page freezes. The maximum |
238 | | * number of eager page freezes is calculated as a ratio of the all-visible |
239 | | * but not all-frozen pages at the beginning of the vacuum. |
240 | | */ |
241 | 0 | #define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2 |
242 | | |
243 | | /* |
244 | | * On the assumption that different regions of the table tend to have |
245 | | * similarly aged data, once vacuum fails to freeze |
246 | | * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size |
247 | | * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed |
248 | | * to another region of the table with potentially older data. |
249 | | */ |
250 | 0 | #define EAGER_SCAN_REGION_SIZE 4096 |
251 | | |
252 | | /* |
253 | | * heap_vac_scan_next_block() sets these flags to communicate information |
254 | | * about the block it read to the caller. |
255 | | */ |
256 | 0 | #define VAC_BLK_WAS_EAGER_SCANNED (1 << 0) |
257 | 0 | #define VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM (1 << 1) |
258 | | |
259 | | typedef struct LVRelState |
260 | | { |
261 | | /* Target heap relation and its indexes */ |
262 | | Relation rel; |
263 | | Relation *indrels; |
264 | | int nindexes; |
265 | | |
266 | | /* Buffer access strategy and parallel vacuum state */ |
267 | | BufferAccessStrategy bstrategy; |
268 | | ParallelVacuumState *pvs; |
269 | | |
270 | | /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */ |
271 | | bool aggressive; |
272 | | /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */ |
273 | | bool skipwithvm; |
274 | | /* Consider index vacuuming bypass optimization? */ |
275 | | bool consider_bypass_optimization; |
276 | | |
277 | | /* Doing index vacuuming, index cleanup, rel truncation? */ |
278 | | bool do_index_vacuuming; |
279 | | bool do_index_cleanup; |
280 | | bool do_rel_truncate; |
281 | | |
282 | | /* VACUUM operation's cutoffs for freezing and pruning */ |
283 | | struct VacuumCutoffs cutoffs; |
284 | | GlobalVisState *vistest; |
285 | | /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */ |
286 | | TransactionId NewRelfrozenXid; |
287 | | MultiXactId NewRelminMxid; |
288 | | bool skippedallvis; |
289 | | |
290 | | /* Error reporting state */ |
291 | | char *dbname; |
292 | | char *relnamespace; |
293 | | char *relname; |
294 | | char *indname; /* Current index name */ |
295 | | BlockNumber blkno; /* used only for heap operations */ |
296 | | OffsetNumber offnum; /* used only for heap operations */ |
297 | | VacErrPhase phase; |
298 | | bool verbose; /* VACUUM VERBOSE? */ |
299 | | |
300 | | /* |
301 | | * dead_items stores TIDs whose index tuples are deleted by index |
302 | | * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page |
303 | | * that has been processed by lazy_scan_prune. Also needed by |
304 | | * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as |
305 | | * LP_UNUSED during second heap pass. |
306 | | * |
307 | | * Both dead_items and dead_items_info are allocated in shared memory in |
308 | | * parallel vacuum cases. |
309 | | */ |
310 | | TidStore *dead_items; /* TIDs whose index tuples we'll delete */ |
311 | | VacDeadItemsInfo *dead_items_info; |
312 | | |
313 | | BlockNumber rel_pages; /* total number of pages */ |
314 | | BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */ |
315 | | |
316 | | /* |
317 | | * Count of all-visible blocks eagerly scanned (for logging only). This |
318 | | * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD. |
319 | | */ |
320 | | BlockNumber eager_scanned_pages; |
321 | | |
322 | | BlockNumber removed_pages; /* # pages removed by relation truncation */ |
323 | | BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */ |
324 | | |
325 | | /* # pages newly set all-visible in the VM */ |
326 | | BlockNumber vm_new_visible_pages; |
327 | | |
328 | | /* |
329 | | * # pages newly set all-visible and all-frozen in the VM. This is a |
330 | | * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes |
331 | | * all pages set all-visible, but vm_new_visible_frozen_pages includes |
332 | | * only those which were also set all-frozen. |
333 | | */ |
334 | | BlockNumber vm_new_visible_frozen_pages; |
335 | | |
336 | | /* # all-visible pages newly set all-frozen in the VM */ |
337 | | BlockNumber vm_new_frozen_pages; |
338 | | |
339 | | BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */ |
340 | | BlockNumber missed_dead_pages; /* # pages with missed dead tuples */ |
341 | | BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ |
342 | | |
343 | | /* Statistics output by us, for table */ |
344 | | double new_rel_tuples; /* new estimated total # of tuples */ |
345 | | double new_live_tuples; /* new estimated total # of live tuples */ |
346 | | /* Statistics output by index AMs */ |
347 | | IndexBulkDeleteResult **indstats; |
348 | | |
349 | | /* Instrumentation counters */ |
350 | | int num_index_scans; |
351 | | /* Counters that follow are only for scanned_pages */ |
352 | | int64 tuples_deleted; /* # deleted from table */ |
353 | | int64 tuples_frozen; /* # newly frozen */ |
354 | | int64 lpdead_items; /* # deleted from indexes */ |
355 | | int64 live_tuples; /* # live tuples remaining */ |
356 | | int64 recently_dead_tuples; /* # dead, but not yet removable */ |
357 | | int64 missed_dead_tuples; /* # removable, but not removed */ |
358 | | |
359 | | /* State maintained by heap_vac_scan_next_block() */ |
360 | | BlockNumber current_block; /* last block returned */ |
361 | | BlockNumber next_unskippable_block; /* next unskippable block */ |
362 | | bool next_unskippable_allvis; /* its visibility status */ |
363 | | bool next_unskippable_eager_scanned; /* if it was eagerly scanned */ |
364 | | Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */ |
365 | | |
366 | | /* State related to managing eager scanning of all-visible pages */ |
367 | | |
368 | | /* |
369 | | * A normal vacuum that has failed to freeze too many eagerly scanned |
370 | | * blocks in a region suspends eager scanning. |
371 | | * next_eager_scan_region_start is the block number of the first block |
372 | | * eligible for resumed eager scanning. |
373 | | * |
374 | | * When eager scanning is permanently disabled, either initially |
375 | | * (including for aggressive vacuum) or due to hitting the success cap, |
376 | | * this is set to InvalidBlockNumber. |
377 | | */ |
378 | | BlockNumber next_eager_scan_region_start; |
379 | | |
380 | | /* |
381 | | * The remaining number of blocks a normal vacuum will consider eager |
382 | | * scanning when it is successful. When eager scanning is enabled, this is |
383 | | * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of |
384 | | * all-visible but not all-frozen pages. For each eager freeze success, |
385 | | * this is decremented. Once it hits 0, eager scanning is permanently |
386 | | * disabled. It is initialized to 0 if eager scanning starts out disabled |
387 | | * (including for aggressive vacuum). |
388 | | */ |
389 | | BlockNumber eager_scan_remaining_successes; |
390 | | |
391 | | /* |
392 | | * The maximum number of blocks which may be eagerly scanned and not |
393 | | * frozen before eager scanning is temporarily suspended. This is |
394 | | * configurable both globally, via the |
395 | | * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table |
396 | | * storage parameter of the same name. It is calculated as |
397 | | * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks. |
398 | | * It is 0 when eager scanning is disabled. |
399 | | */ |
400 | | BlockNumber eager_scan_max_fails_per_region; |
401 | | |
402 | | /* |
403 | | * The number of eagerly scanned blocks vacuum failed to freeze (due to |
404 | | * age) in the current eager scan region. Vacuum resets it to |
405 | | * eager_scan_max_fails_per_region each time it enters a new region of the |
406 | | * relation. If eager_scan_remaining_fails hits 0, eager scanning is |
407 | | * suspended until the next region. It is also 0 if eager scanning has |
408 | | * been permanently disabled. |
409 | | */ |
410 | | BlockNumber eager_scan_remaining_fails; |
411 | | } LVRelState; |
412 | | |
413 | | |
414 | | /* Struct for saving and restoring vacuum error information. */ |
415 | | typedef struct LVSavedErrInfo |
416 | | { |
417 | | BlockNumber blkno; |
418 | | OffsetNumber offnum; |
419 | | VacErrPhase phase; |
420 | | } LVSavedErrInfo; |
421 | | |
422 | | |
423 | | /* non-export function prototypes */ |
424 | | static void lazy_scan_heap(LVRelState *vacrel); |
425 | | static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, |
426 | | VacuumParams *params); |
427 | | static BlockNumber heap_vac_scan_next_block(ReadStream *stream, |
428 | | void *callback_private_data, |
429 | | void *per_buffer_data); |
430 | | static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis); |
431 | | static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, |
432 | | BlockNumber blkno, Page page, |
433 | | bool sharelock, Buffer vmbuffer); |
434 | | static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, |
435 | | BlockNumber blkno, Page page, |
436 | | Buffer vmbuffer, bool all_visible_according_to_vm, |
437 | | bool *has_lpdead_items, bool *vm_page_frozen); |
438 | | static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, |
439 | | BlockNumber blkno, Page page, |
440 | | bool *has_lpdead_items); |
441 | | static void lazy_vacuum(LVRelState *vacrel); |
442 | | static bool lazy_vacuum_all_indexes(LVRelState *vacrel); |
443 | | static void lazy_vacuum_heap_rel(LVRelState *vacrel); |
444 | | static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, |
445 | | Buffer buffer, OffsetNumber *deadoffsets, |
446 | | int num_offsets, Buffer vmbuffer); |
447 | | static bool lazy_check_wraparound_failsafe(LVRelState *vacrel); |
448 | | static void lazy_cleanup_all_indexes(LVRelState *vacrel); |
449 | | static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel, |
450 | | IndexBulkDeleteResult *istat, |
451 | | double reltuples, |
452 | | LVRelState *vacrel); |
453 | | static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel, |
454 | | IndexBulkDeleteResult *istat, |
455 | | double reltuples, |
456 | | bool estimated_count, |
457 | | LVRelState *vacrel); |
458 | | static bool should_attempt_truncation(LVRelState *vacrel); |
459 | | static void lazy_truncate_heap(LVRelState *vacrel); |
460 | | static BlockNumber count_nondeletable_pages(LVRelState *vacrel, |
461 | | bool *lock_waiter_detected); |
462 | | static void dead_items_alloc(LVRelState *vacrel, int nworkers); |
463 | | static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, |
464 | | int num_offsets); |
465 | | static void dead_items_reset(LVRelState *vacrel); |
466 | | static void dead_items_cleanup(LVRelState *vacrel); |
467 | | static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, |
468 | | TransactionId *visibility_cutoff_xid, bool *all_frozen); |
469 | | static void update_relstats_all_indexes(LVRelState *vacrel); |
470 | | static void vacuum_error_callback(void *arg); |
471 | | static void update_vacuum_error_info(LVRelState *vacrel, |
472 | | LVSavedErrInfo *saved_vacrel, |
473 | | int phase, BlockNumber blkno, |
474 | | OffsetNumber offnum); |
475 | | static void restore_vacuum_error_info(LVRelState *vacrel, |
476 | | const LVSavedErrInfo *saved_vacrel); |
477 | | |
478 | | |
479 | | |
480 | | /* |
481 | | * Helper to set up the eager scanning state for vacuuming a single relation. |
482 | | * Initializes the eager scan management related members of the LVRelState. |
483 | | * |
484 | | * Caller provides whether or not an aggressive vacuum is required due to |
485 | | * vacuum options or for relfrozenxid/relminmxid advancement. |
486 | | */ |
487 | | static void |
488 | | heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params) |
489 | 0 | { |
490 | 0 | uint32 randseed; |
491 | 0 | BlockNumber allvisible; |
492 | 0 | BlockNumber allfrozen; |
493 | 0 | float first_region_ratio; |
494 | 0 | bool oldest_unfrozen_before_cutoff = false; |
495 | | |
496 | | /* |
497 | | * Initialize eager scan management fields to their disabled values. |
498 | | * Aggressive vacuums, normal vacuums of small tables, and normal vacuums |
499 | | * of tables without sufficiently old tuples disable eager scanning. |
500 | | */ |
501 | 0 | vacrel->next_eager_scan_region_start = InvalidBlockNumber; |
502 | 0 | vacrel->eager_scan_max_fails_per_region = 0; |
503 | 0 | vacrel->eager_scan_remaining_fails = 0; |
504 | 0 | vacrel->eager_scan_remaining_successes = 0; |
505 | | |
506 | | /* If eager scanning is explicitly disabled, just return. */ |
507 | 0 | if (params->max_eager_freeze_failure_rate == 0) |
508 | 0 | return; |
509 | | |
510 | | /* |
511 | | * The caller will have determined whether or not an aggressive vacuum is |
512 | | * required by either the vacuum parameters or the relative age of the |
513 | | * oldest unfrozen transaction IDs. An aggressive vacuum must scan every |
514 | | * all-visible page to safely advance the relfrozenxid and/or relminmxid, |
515 | | * so scans of all-visible pages are not considered eager. |
516 | | */ |
517 | 0 | if (vacrel->aggressive) |
518 | 0 | return; |
519 | | |
520 | | /* |
521 | | * Aggressively vacuuming a small relation shouldn't take long, so it |
522 | | * isn't worth amortizing. We use two times the region size as the size |
523 | | * cutoff because the eager scan start block is a random spot somewhere in |
524 | | * the first region, making the second region the first to be eager |
525 | | * scanned normally. |
526 | | */ |
527 | 0 | if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE) |
528 | 0 | return; |
529 | | |
530 | | /* |
531 | | * We only want to enable eager scanning if we are likely to be able to |
532 | | * freeze some of the pages in the relation. |
533 | | * |
534 | | * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact |
535 | | * are technically freezable, but we won't freeze them unless the criteria |
536 | | * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older |
537 | | * than the FreezeLimit/MultiXactCutoff are frozen in the common case. |
538 | | * |
539 | | * So, as a heuristic, we wait until the FreezeLimit has advanced past the |
540 | | * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to |
541 | | * enable eager scanning. |
542 | | */ |
543 | 0 | if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) && |
544 | 0 | TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid, |
545 | 0 | vacrel->cutoffs.FreezeLimit)) |
546 | 0 | oldest_unfrozen_before_cutoff = true; |
547 | |
|
548 | 0 | if (!oldest_unfrozen_before_cutoff && |
549 | 0 | MultiXactIdIsValid(vacrel->cutoffs.relminmxid) && |
550 | 0 | MultiXactIdPrecedes(vacrel->cutoffs.relminmxid, |
551 | 0 | vacrel->cutoffs.MultiXactCutoff)) |
552 | 0 | oldest_unfrozen_before_cutoff = true; |
553 | |
|
554 | 0 | if (!oldest_unfrozen_before_cutoff) |
555 | 0 | return; |
556 | | |
557 | | /* We have met the criteria to eagerly scan some pages. */ |
558 | | |
559 | | /* |
560 | | * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of |
561 | | * all-visible but not all-frozen blocks in the relation. |
562 | | */ |
563 | 0 | visibilitymap_count(vacrel->rel, &allvisible, &allfrozen); |
564 | |
|
565 | 0 | vacrel->eager_scan_remaining_successes = |
566 | 0 | (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE * |
567 | 0 | (allvisible - allfrozen)); |
568 | | |
569 | | /* If every all-visible page is frozen, eager scanning is disabled. */ |
570 | 0 | if (vacrel->eager_scan_remaining_successes == 0) |
571 | 0 | return; |
572 | | |
573 | | /* |
574 | | * Now calculate the bounds of the first eager scan region. Its end block |
575 | | * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE |
576 | | * blocks. This affects the bounds of all subsequent regions and avoids |
577 | | * eager scanning and failing to freeze the same blocks each vacuum of the |
578 | | * relation. |
579 | | */ |
580 | 0 | randseed = pg_prng_uint32(&pg_global_prng_state); |
581 | |
|
582 | 0 | vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE; |
583 | |
|
584 | 0 | Assert(params->max_eager_freeze_failure_rate > 0 && |
585 | 0 | params->max_eager_freeze_failure_rate <= 1); |
586 | |
|
587 | 0 | vacrel->eager_scan_max_fails_per_region = |
588 | 0 | params->max_eager_freeze_failure_rate * |
589 | 0 | EAGER_SCAN_REGION_SIZE; |
590 | | |
591 | | /* |
592 | | * The first region will be smaller than subsequent regions. As such, |
593 | | * adjust the eager freeze failures tolerated for this region. |
594 | | */ |
595 | 0 | first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start / |
596 | 0 | EAGER_SCAN_REGION_SIZE; |
597 | |
|
598 | 0 | vacrel->eager_scan_remaining_fails = |
599 | 0 | vacrel->eager_scan_max_fails_per_region * |
600 | 0 | first_region_ratio; |
601 | 0 | } |
602 | | |
603 | | /* |
604 | | * heap_vacuum_rel() -- perform VACUUM for one heap relation |
605 | | * |
606 | | * This routine sets things up for and then calls lazy_scan_heap, where |
607 | | * almost all work actually takes place. Finalizes everything after call |
608 | | * returns by managing relation truncation and updating rel's pg_class |
609 | | * entry. (Also updates pg_class entries for any indexes that need it.) |
610 | | * |
611 | | * At entry, we have already established a transaction and opened |
612 | | * and locked the relation. |
613 | | */ |
614 | | void |
615 | | heap_vacuum_rel(Relation rel, VacuumParams *params, |
616 | | BufferAccessStrategy bstrategy) |
617 | 0 | { |
618 | 0 | LVRelState *vacrel; |
619 | 0 | bool verbose, |
620 | 0 | instrument, |
621 | 0 | skipwithvm, |
622 | 0 | frozenxid_updated, |
623 | 0 | minmulti_updated; |
624 | 0 | BlockNumber orig_rel_pages, |
625 | 0 | new_rel_pages, |
626 | 0 | new_rel_allvisible, |
627 | 0 | new_rel_allfrozen; |
628 | 0 | PGRUsage ru0; |
629 | 0 | TimestampTz starttime = 0; |
630 | 0 | PgStat_Counter startreadtime = 0, |
631 | 0 | startwritetime = 0; |
632 | 0 | WalUsage startwalusage = pgWalUsage; |
633 | 0 | BufferUsage startbufferusage = pgBufferUsage; |
634 | 0 | ErrorContextCallback errcallback; |
635 | 0 | char **indnames = NULL; |
636 | |
|
637 | 0 | verbose = (params->options & VACOPT_VERBOSE) != 0; |
638 | 0 | instrument = (verbose || (AmAutoVacuumWorkerProcess() && |
639 | 0 | params->log_min_duration >= 0)); |
640 | 0 | if (instrument) |
641 | 0 | { |
642 | 0 | pg_rusage_init(&ru0); |
643 | 0 | if (track_io_timing) |
644 | 0 | { |
645 | 0 | startreadtime = pgStatBlockReadTime; |
646 | 0 | startwritetime = pgStatBlockWriteTime; |
647 | 0 | } |
648 | 0 | } |
649 | | |
650 | | /* Used for instrumentation and stats report */ |
651 | 0 | starttime = GetCurrentTimestamp(); |
652 | |
|
653 | 0 | pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM, |
654 | 0 | RelationGetRelid(rel)); |
655 | | |
656 | | /* |
657 | | * Setup error traceback support for ereport() first. The idea is to set |
658 | | * up an error context callback to display additional information on any |
659 | | * error during a vacuum. During different phases of vacuum, we update |
660 | | * the state so that the error context callback always display current |
661 | | * information. |
662 | | * |
663 | | * Copy the names of heap rel into local memory for error reporting |
664 | | * purposes, too. It isn't always safe to assume that we can get the name |
665 | | * of each rel. It's convenient for code in lazy_scan_heap to always use |
666 | | * these temp copies. |
667 | | */ |
668 | 0 | vacrel = (LVRelState *) palloc0(sizeof(LVRelState)); |
669 | 0 | vacrel->dbname = get_database_name(MyDatabaseId); |
670 | 0 | vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel)); |
671 | 0 | vacrel->relname = pstrdup(RelationGetRelationName(rel)); |
672 | 0 | vacrel->indname = NULL; |
673 | 0 | vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN; |
674 | 0 | vacrel->verbose = verbose; |
675 | 0 | errcallback.callback = vacuum_error_callback; |
676 | 0 | errcallback.arg = vacrel; |
677 | 0 | errcallback.previous = error_context_stack; |
678 | 0 | error_context_stack = &errcallback; |
679 | | |
680 | | /* Set up high level stuff about rel and its indexes */ |
681 | 0 | vacrel->rel = rel; |
682 | 0 | vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes, |
683 | 0 | &vacrel->indrels); |
684 | 0 | vacrel->bstrategy = bstrategy; |
685 | 0 | if (instrument && vacrel->nindexes > 0) |
686 | 0 | { |
687 | | /* Copy index names used by instrumentation (not error reporting) */ |
688 | 0 | indnames = palloc(sizeof(char *) * vacrel->nindexes); |
689 | 0 | for (int i = 0; i < vacrel->nindexes; i++) |
690 | 0 | indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i])); |
691 | 0 | } |
692 | | |
693 | | /* |
694 | | * The index_cleanup param either disables index vacuuming and cleanup or |
695 | | * forces it to go ahead when we would otherwise apply the index bypass |
696 | | * optimization. The default is 'auto', which leaves the final decision |
697 | | * up to lazy_vacuum(). |
698 | | * |
699 | | * The truncate param allows user to avoid attempting relation truncation, |
700 | | * though it can't force truncation to happen. |
701 | | */ |
702 | 0 | Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED); |
703 | 0 | Assert(params->truncate != VACOPTVALUE_UNSPECIFIED && |
704 | 0 | params->truncate != VACOPTVALUE_AUTO); |
705 | | |
706 | | /* |
707 | | * While VacuumFailSafeActive is reset to false before calling this, we |
708 | | * still need to reset it here due to recursive calls. |
709 | | */ |
710 | 0 | VacuumFailsafeActive = false; |
711 | 0 | vacrel->consider_bypass_optimization = true; |
712 | 0 | vacrel->do_index_vacuuming = true; |
713 | 0 | vacrel->do_index_cleanup = true; |
714 | 0 | vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED); |
715 | 0 | if (params->index_cleanup == VACOPTVALUE_DISABLED) |
716 | 0 | { |
717 | | /* Force disable index vacuuming up-front */ |
718 | 0 | vacrel->do_index_vacuuming = false; |
719 | 0 | vacrel->do_index_cleanup = false; |
720 | 0 | } |
721 | 0 | else if (params->index_cleanup == VACOPTVALUE_ENABLED) |
722 | 0 | { |
723 | | /* Force index vacuuming. Note that failsafe can still bypass. */ |
724 | 0 | vacrel->consider_bypass_optimization = false; |
725 | 0 | } |
726 | 0 | else |
727 | 0 | { |
728 | | /* Default/auto, make all decisions dynamically */ |
729 | 0 | Assert(params->index_cleanup == VACOPTVALUE_AUTO); |
730 | 0 | } |
731 | | |
732 | | /* Initialize page counters explicitly (be tidy) */ |
733 | 0 | vacrel->scanned_pages = 0; |
734 | 0 | vacrel->eager_scanned_pages = 0; |
735 | 0 | vacrel->removed_pages = 0; |
736 | 0 | vacrel->new_frozen_tuple_pages = 0; |
737 | 0 | vacrel->lpdead_item_pages = 0; |
738 | 0 | vacrel->missed_dead_pages = 0; |
739 | 0 | vacrel->nonempty_pages = 0; |
740 | | /* dead_items_alloc allocates vacrel->dead_items later on */ |
741 | | |
742 | | /* Allocate/initialize output statistics state */ |
743 | 0 | vacrel->new_rel_tuples = 0; |
744 | 0 | vacrel->new_live_tuples = 0; |
745 | 0 | vacrel->indstats = (IndexBulkDeleteResult **) |
746 | 0 | palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *)); |
747 | | |
748 | | /* Initialize remaining counters (be tidy) */ |
749 | 0 | vacrel->num_index_scans = 0; |
750 | 0 | vacrel->tuples_deleted = 0; |
751 | 0 | vacrel->tuples_frozen = 0; |
752 | 0 | vacrel->lpdead_items = 0; |
753 | 0 | vacrel->live_tuples = 0; |
754 | 0 | vacrel->recently_dead_tuples = 0; |
755 | 0 | vacrel->missed_dead_tuples = 0; |
756 | |
|
757 | 0 | vacrel->vm_new_visible_pages = 0; |
758 | 0 | vacrel->vm_new_visible_frozen_pages = 0; |
759 | 0 | vacrel->vm_new_frozen_pages = 0; |
760 | | |
761 | | /* |
762 | | * Get cutoffs that determine which deleted tuples are considered DEAD, |
763 | | * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine |
764 | | * the extent of the blocks that we'll scan in lazy_scan_heap. It has to |
765 | | * happen in this order to ensure that the OldestXmin cutoff field works |
766 | | * as an upper bound on the XIDs stored in the pages we'll actually scan |
767 | | * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs). |
768 | | * |
769 | | * Next acquire vistest, a related cutoff that's used in pruning. We use |
770 | | * vistest in combination with OldestXmin to ensure that |
771 | | * heap_page_prune_and_freeze() always removes any deleted tuple whose |
772 | | * xmax is < OldestXmin. lazy_scan_prune must never become confused about |
773 | | * whether a tuple should be frozen or removed. (In the future we might |
774 | | * want to teach lazy_scan_prune to recompute vistest from time to time, |
775 | | * to increase the number of dead tuples it can prune away.) |
776 | | */ |
777 | 0 | vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs); |
778 | 0 | vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel); |
779 | 0 | vacrel->vistest = GlobalVisTestFor(rel); |
780 | | |
781 | | /* Initialize state used to track oldest extant XID/MXID */ |
782 | 0 | vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin; |
783 | 0 | vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact; |
784 | | |
785 | | /* |
786 | | * Initialize state related to tracking all-visible page skipping. This is |
787 | | * very important to determine whether or not it is safe to advance the |
788 | | * relfrozenxid/relminmxid. |
789 | | */ |
790 | 0 | vacrel->skippedallvis = false; |
791 | 0 | skipwithvm = true; |
792 | 0 | if (params->options & VACOPT_DISABLE_PAGE_SKIPPING) |
793 | 0 | { |
794 | | /* |
795 | | * Force aggressive mode, and disable skipping blocks using the |
796 | | * visibility map (even those set all-frozen) |
797 | | */ |
798 | 0 | vacrel->aggressive = true; |
799 | 0 | skipwithvm = false; |
800 | 0 | } |
801 | |
|
802 | 0 | vacrel->skipwithvm = skipwithvm; |
803 | | |
804 | | /* |
805 | | * Set up eager scan tracking state. This must happen after determining |
806 | | * whether or not the vacuum must be aggressive, because only normal |
807 | | * vacuums use the eager scan algorithm. |
808 | | */ |
809 | 0 | heap_vacuum_eager_scan_setup(vacrel, params); |
810 | |
|
811 | 0 | if (verbose) |
812 | 0 | { |
813 | 0 | if (vacrel->aggressive) |
814 | 0 | ereport(INFO, |
815 | 0 | (errmsg("aggressively vacuuming \"%s.%s.%s\"", |
816 | 0 | vacrel->dbname, vacrel->relnamespace, |
817 | 0 | vacrel->relname))); |
818 | 0 | else |
819 | 0 | ereport(INFO, |
820 | 0 | (errmsg("vacuuming \"%s.%s.%s\"", |
821 | 0 | vacrel->dbname, vacrel->relnamespace, |
822 | 0 | vacrel->relname))); |
823 | 0 | } |
824 | | |
825 | | /* |
826 | | * Allocate dead_items memory using dead_items_alloc. This handles |
827 | | * parallel VACUUM initialization as part of allocating shared memory |
828 | | * space used for dead_items. (But do a failsafe precheck first, to |
829 | | * ensure that parallel VACUUM won't be attempted at all when relfrozenxid |
830 | | * is already dangerously old.) |
831 | | */ |
832 | 0 | lazy_check_wraparound_failsafe(vacrel); |
833 | 0 | dead_items_alloc(vacrel, params->nworkers); |
834 | | |
835 | | /* |
836 | | * Call lazy_scan_heap to perform all required heap pruning, index |
837 | | * vacuuming, and heap vacuuming (plus related processing) |
838 | | */ |
839 | 0 | lazy_scan_heap(vacrel); |
840 | | |
841 | | /* |
842 | | * Free resources managed by dead_items_alloc. This ends parallel mode in |
843 | | * passing when necessary. |
844 | | */ |
845 | 0 | dead_items_cleanup(vacrel); |
846 | 0 | Assert(!IsInParallelMode()); |
847 | | |
848 | | /* |
849 | | * Update pg_class entries for each of rel's indexes where appropriate. |
850 | | * |
851 | | * Unlike the later update to rel's pg_class entry, this is not critical. |
852 | | * Maintains relpages/reltuples statistics used by the planner only. |
853 | | */ |
854 | 0 | if (vacrel->do_index_cleanup) |
855 | 0 | update_relstats_all_indexes(vacrel); |
856 | | |
857 | | /* Done with rel's indexes */ |
858 | 0 | vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock); |
859 | | |
860 | | /* Optionally truncate rel */ |
861 | 0 | if (should_attempt_truncation(vacrel)) |
862 | 0 | lazy_truncate_heap(vacrel); |
863 | | |
864 | | /* Pop the error context stack */ |
865 | 0 | error_context_stack = errcallback.previous; |
866 | | |
867 | | /* Report that we are now doing final cleanup */ |
868 | 0 | pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, |
869 | 0 | PROGRESS_VACUUM_PHASE_FINAL_CLEANUP); |
870 | | |
871 | | /* |
872 | | * Prepare to update rel's pg_class entry. |
873 | | * |
874 | | * Aggressive VACUUMs must always be able to advance relfrozenxid to a |
875 | | * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff. |
876 | | * Non-aggressive VACUUMs may advance them by any amount, or not at all. |
877 | | */ |
878 | 0 | Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin || |
879 | 0 | TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit : |
880 | 0 | vacrel->cutoffs.relfrozenxid, |
881 | 0 | vacrel->NewRelfrozenXid)); |
882 | 0 | Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact || |
883 | 0 | MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff : |
884 | 0 | vacrel->cutoffs.relminmxid, |
885 | 0 | vacrel->NewRelminMxid)); |
886 | 0 | if (vacrel->skippedallvis) |
887 | 0 | { |
888 | | /* |
889 | | * Must keep original relfrozenxid in a non-aggressive VACUUM that |
890 | | * chose to skip an all-visible page range. The state that tracks new |
891 | | * values will have missed unfrozen XIDs from the pages we skipped. |
892 | | */ |
893 | 0 | Assert(!vacrel->aggressive); |
894 | 0 | vacrel->NewRelfrozenXid = InvalidTransactionId; |
895 | 0 | vacrel->NewRelminMxid = InvalidMultiXactId; |
896 | 0 | } |
897 | | |
898 | | /* |
899 | | * For safety, clamp relallvisible to be not more than what we're setting |
900 | | * pg_class.relpages to |
901 | | */ |
902 | 0 | new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */ |
903 | 0 | visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen); |
904 | 0 | if (new_rel_allvisible > new_rel_pages) |
905 | 0 | new_rel_allvisible = new_rel_pages; |
906 | | |
907 | | /* |
908 | | * An all-frozen block _must_ be all-visible. As such, clamp the count of |
909 | | * all-frozen blocks to the count of all-visible blocks. This matches the |
910 | | * clamping of relallvisible above. |
911 | | */ |
912 | 0 | if (new_rel_allfrozen > new_rel_allvisible) |
913 | 0 | new_rel_allfrozen = new_rel_allvisible; |
914 | | |
915 | | /* |
916 | | * Now actually update rel's pg_class entry. |
917 | | * |
918 | | * In principle new_live_tuples could be -1 indicating that we (still) |
919 | | * don't know the tuple count. In practice that can't happen, since we |
920 | | * scan every page that isn't skipped using the visibility map. |
921 | | */ |
922 | 0 | vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples, |
923 | 0 | new_rel_allvisible, new_rel_allfrozen, |
924 | 0 | vacrel->nindexes > 0, |
925 | 0 | vacrel->NewRelfrozenXid, vacrel->NewRelminMxid, |
926 | 0 | &frozenxid_updated, &minmulti_updated, false); |
927 | | |
928 | | /* |
929 | | * Report results to the cumulative stats system, too. |
930 | | * |
931 | | * Deliberately avoid telling the stats system about LP_DEAD items that |
932 | | * remain in the table due to VACUUM bypassing index and heap vacuuming. |
933 | | * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples". |
934 | | * It seems like a good idea to err on the side of not vacuuming again too |
935 | | * soon in cases where the failsafe prevented significant amounts of heap |
936 | | * vacuuming. |
937 | | */ |
938 | 0 | pgstat_report_vacuum(RelationGetRelid(rel), |
939 | 0 | rel->rd_rel->relisshared, |
940 | 0 | Max(vacrel->new_live_tuples, 0), |
941 | 0 | vacrel->recently_dead_tuples + |
942 | 0 | vacrel->missed_dead_tuples, |
943 | 0 | starttime); |
944 | 0 | pgstat_progress_end_command(); |
945 | |
|
946 | 0 | if (instrument) |
947 | 0 | { |
948 | 0 | TimestampTz endtime = GetCurrentTimestamp(); |
949 | |
|
950 | 0 | if (verbose || params->log_min_duration == 0 || |
951 | 0 | TimestampDifferenceExceeds(starttime, endtime, |
952 | 0 | params->log_min_duration)) |
953 | 0 | { |
954 | 0 | long secs_dur; |
955 | 0 | int usecs_dur; |
956 | 0 | WalUsage walusage; |
957 | 0 | BufferUsage bufferusage; |
958 | 0 | StringInfoData buf; |
959 | 0 | char *msgfmt; |
960 | 0 | int32 diff; |
961 | 0 | double read_rate = 0, |
962 | 0 | write_rate = 0; |
963 | 0 | int64 total_blks_hit; |
964 | 0 | int64 total_blks_read; |
965 | 0 | int64 total_blks_dirtied; |
966 | |
|
967 | 0 | TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur); |
968 | 0 | memset(&walusage, 0, sizeof(WalUsage)); |
969 | 0 | WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage); |
970 | 0 | memset(&bufferusage, 0, sizeof(BufferUsage)); |
971 | 0 | BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage); |
972 | |
|
973 | 0 | total_blks_hit = bufferusage.shared_blks_hit + |
974 | 0 | bufferusage.local_blks_hit; |
975 | 0 | total_blks_read = bufferusage.shared_blks_read + |
976 | 0 | bufferusage.local_blks_read; |
977 | 0 | total_blks_dirtied = bufferusage.shared_blks_dirtied + |
978 | 0 | bufferusage.local_blks_dirtied; |
979 | |
|
980 | 0 | initStringInfo(&buf); |
981 | 0 | if (verbose) |
982 | 0 | { |
983 | | /* |
984 | | * Aggressiveness already reported earlier, in dedicated |
985 | | * VACUUM VERBOSE ereport |
986 | | */ |
987 | 0 | Assert(!params->is_wraparound); |
988 | 0 | msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n"); |
989 | 0 | } |
990 | 0 | else if (params->is_wraparound) |
991 | 0 | { |
992 | | /* |
993 | | * While it's possible for a VACUUM to be both is_wraparound |
994 | | * and !aggressive, that's just a corner-case -- is_wraparound |
995 | | * implies aggressive. Produce distinct output for the corner |
996 | | * case all the same, just in case. |
997 | | */ |
998 | 0 | if (vacrel->aggressive) |
999 | 0 | msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n"); |
1000 | 0 | else |
1001 | 0 | msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n"); |
1002 | 0 | } |
1003 | 0 | else |
1004 | 0 | { |
1005 | 0 | if (vacrel->aggressive) |
1006 | 0 | msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n"); |
1007 | 0 | else |
1008 | 0 | msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"); |
1009 | 0 | } |
1010 | 0 | appendStringInfo(&buf, msgfmt, |
1011 | 0 | vacrel->dbname, |
1012 | 0 | vacrel->relnamespace, |
1013 | 0 | vacrel->relname, |
1014 | 0 | vacrel->num_index_scans); |
1015 | 0 | appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"), |
1016 | 0 | vacrel->removed_pages, |
1017 | 0 | new_rel_pages, |
1018 | 0 | vacrel->scanned_pages, |
1019 | 0 | orig_rel_pages == 0 ? 100.0 : |
1020 | 0 | 100.0 * vacrel->scanned_pages / |
1021 | 0 | orig_rel_pages, |
1022 | 0 | vacrel->eager_scanned_pages); |
1023 | 0 | appendStringInfo(&buf, |
1024 | 0 | _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"), |
1025 | 0 | vacrel->tuples_deleted, |
1026 | 0 | (int64) vacrel->new_rel_tuples, |
1027 | 0 | vacrel->recently_dead_tuples); |
1028 | 0 | if (vacrel->missed_dead_tuples > 0) |
1029 | 0 | appendStringInfo(&buf, |
1030 | 0 | _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"), |
1031 | 0 | vacrel->missed_dead_tuples, |
1032 | 0 | vacrel->missed_dead_pages); |
1033 | 0 | diff = (int32) (ReadNextTransactionId() - |
1034 | 0 | vacrel->cutoffs.OldestXmin); |
1035 | 0 | appendStringInfo(&buf, |
1036 | 0 | _("removable cutoff: %u, which was %d XIDs old when operation ended\n"), |
1037 | 0 | vacrel->cutoffs.OldestXmin, diff); |
1038 | 0 | if (frozenxid_updated) |
1039 | 0 | { |
1040 | 0 | diff = (int32) (vacrel->NewRelfrozenXid - |
1041 | 0 | vacrel->cutoffs.relfrozenxid); |
1042 | 0 | appendStringInfo(&buf, |
1043 | 0 | _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"), |
1044 | 0 | vacrel->NewRelfrozenXid, diff); |
1045 | 0 | } |
1046 | 0 | if (minmulti_updated) |
1047 | 0 | { |
1048 | 0 | diff = (int32) (vacrel->NewRelminMxid - |
1049 | 0 | vacrel->cutoffs.relminmxid); |
1050 | 0 | appendStringInfo(&buf, |
1051 | 0 | _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"), |
1052 | 0 | vacrel->NewRelminMxid, diff); |
1053 | 0 | } |
1054 | 0 | appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"), |
1055 | 0 | vacrel->new_frozen_tuple_pages, |
1056 | 0 | orig_rel_pages == 0 ? 100.0 : |
1057 | 0 | 100.0 * vacrel->new_frozen_tuple_pages / |
1058 | 0 | orig_rel_pages, |
1059 | 0 | vacrel->tuples_frozen); |
1060 | |
|
1061 | 0 | appendStringInfo(&buf, |
1062 | 0 | _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"), |
1063 | 0 | vacrel->vm_new_visible_pages, |
1064 | 0 | vacrel->vm_new_visible_frozen_pages + |
1065 | 0 | vacrel->vm_new_frozen_pages, |
1066 | 0 | vacrel->vm_new_frozen_pages); |
1067 | 0 | if (vacrel->do_index_vacuuming) |
1068 | 0 | { |
1069 | 0 | if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0) |
1070 | 0 | appendStringInfoString(&buf, _("index scan not needed: ")); |
1071 | 0 | else |
1072 | 0 | appendStringInfoString(&buf, _("index scan needed: ")); |
1073 | |
|
1074 | 0 | msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n"); |
1075 | 0 | } |
1076 | 0 | else |
1077 | 0 | { |
1078 | 0 | if (!VacuumFailsafeActive) |
1079 | 0 | appendStringInfoString(&buf, _("index scan bypassed: ")); |
1080 | 0 | else |
1081 | 0 | appendStringInfoString(&buf, _("index scan bypassed by failsafe: ")); |
1082 | |
|
1083 | 0 | msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n"); |
1084 | 0 | } |
1085 | 0 | appendStringInfo(&buf, msgfmt, |
1086 | 0 | vacrel->lpdead_item_pages, |
1087 | 0 | orig_rel_pages == 0 ? 100.0 : |
1088 | 0 | 100.0 * vacrel->lpdead_item_pages / orig_rel_pages, |
1089 | 0 | vacrel->lpdead_items); |
1090 | 0 | for (int i = 0; i < vacrel->nindexes; i++) |
1091 | 0 | { |
1092 | 0 | IndexBulkDeleteResult *istat = vacrel->indstats[i]; |
1093 | |
|
1094 | 0 | if (!istat) |
1095 | 0 | continue; |
1096 | | |
1097 | 0 | appendStringInfo(&buf, |
1098 | 0 | _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"), |
1099 | 0 | indnames[i], |
1100 | 0 | istat->num_pages, |
1101 | 0 | istat->pages_newly_deleted, |
1102 | 0 | istat->pages_deleted, |
1103 | 0 | istat->pages_free); |
1104 | 0 | } |
1105 | 0 | if (track_cost_delay_timing) |
1106 | 0 | { |
1107 | | /* |
1108 | | * We bypass the changecount mechanism because this value is |
1109 | | * only updated by the calling process. We also rely on the |
1110 | | * above call to pgstat_progress_end_command() to not clear |
1111 | | * the st_progress_param array. |
1112 | | */ |
1113 | 0 | appendStringInfo(&buf, _("delay time: %.3f ms\n"), |
1114 | 0 | (double) MyBEEntry->st_progress_param[PROGRESS_VACUUM_DELAY_TIME] / 1000000.0); |
1115 | 0 | } |
1116 | 0 | if (track_io_timing) |
1117 | 0 | { |
1118 | 0 | double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000; |
1119 | 0 | double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000; |
1120 | |
|
1121 | 0 | appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"), |
1122 | 0 | read_ms, write_ms); |
1123 | 0 | } |
1124 | 0 | if (secs_dur > 0 || usecs_dur > 0) |
1125 | 0 | { |
1126 | 0 | read_rate = (double) BLCKSZ * total_blks_read / |
1127 | 0 | (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0); |
1128 | 0 | write_rate = (double) BLCKSZ * total_blks_dirtied / |
1129 | 0 | (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0); |
1130 | 0 | } |
1131 | 0 | appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"), |
1132 | 0 | read_rate, write_rate); |
1133 | 0 | appendStringInfo(&buf, |
1134 | 0 | _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"), |
1135 | 0 | total_blks_hit, |
1136 | 0 | total_blks_read, |
1137 | 0 | total_blks_dirtied); |
1138 | 0 | appendStringInfo(&buf, |
1139 | 0 | _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRId64 " buffers full\n"), |
1140 | 0 | walusage.wal_records, |
1141 | 0 | walusage.wal_fpi, |
1142 | 0 | walusage.wal_bytes, |
1143 | 0 | walusage.wal_buffers_full); |
1144 | 0 | appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0)); |
1145 | |
|
1146 | 0 | ereport(verbose ? INFO : LOG, |
1147 | 0 | (errmsg_internal("%s", buf.data))); |
1148 | 0 | pfree(buf.data); |
1149 | 0 | } |
1150 | 0 | } |
1151 | | |
1152 | | /* Cleanup index statistics and index names */ |
1153 | 0 | for (int i = 0; i < vacrel->nindexes; i++) |
1154 | 0 | { |
1155 | 0 | if (vacrel->indstats[i]) |
1156 | 0 | pfree(vacrel->indstats[i]); |
1157 | |
|
1158 | 0 | if (instrument) |
1159 | 0 | pfree(indnames[i]); |
1160 | 0 | } |
1161 | 0 | } |
1162 | | |
1163 | | /* |
1164 | | * lazy_scan_heap() -- workhorse function for VACUUM |
1165 | | * |
1166 | | * This routine prunes each page in the heap, and considers the need to |
1167 | | * freeze remaining tuples with storage (not including pages that can be |
1168 | | * skipped using the visibility map). Also performs related maintenance |
1169 | | * of the FSM and visibility map. These steps all take place during an |
1170 | | * initial pass over the target heap relation. |
1171 | | * |
1172 | | * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely |
1173 | | * consists of deleting index tuples that point to LP_DEAD items left in |
1174 | | * heap pages following pruning. Earlier initial pass over the heap will |
1175 | | * have collected the TIDs whose index tuples need to be removed. |
1176 | | * |
1177 | | * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which |
1178 | | * largely consists of marking LP_DEAD items (from vacrel->dead_items) |
1179 | | * as LP_UNUSED. This has to happen in a second, final pass over the |
1180 | | * heap, to preserve a basic invariant that all index AMs rely on: no |
1181 | | * extant index tuple can ever be allowed to contain a TID that points to |
1182 | | * an LP_UNUSED line pointer in the heap. We must disallow premature |
1183 | | * recycling of line pointers to avoid index scans that get confused |
1184 | | * about which TID points to which tuple immediately after recycling. |
1185 | | * (Actually, this isn't a concern when target heap relation happens to |
1186 | | * have no indexes, which allows us to safely apply the one-pass strategy |
1187 | | * as an optimization). |
1188 | | * |
1189 | | * In practice we often have enough space to fit all TIDs, and so won't |
1190 | | * need to call lazy_vacuum more than once, after our initial pass over |
1191 | | * the heap has totally finished. Otherwise things are slightly more |
1192 | | * complicated: our "initial pass" over the heap applies only to those |
1193 | | * pages that were pruned before we needed to call lazy_vacuum, and our |
1194 | | * "final pass" over the heap only vacuums these same heap pages. |
1195 | | * However, we process indexes in full every time lazy_vacuum is called, |
1196 | | * which makes index processing very inefficient when memory is in short |
1197 | | * supply. |
1198 | | */ |
1199 | | static void |
1200 | | lazy_scan_heap(LVRelState *vacrel) |
1201 | 0 | { |
1202 | 0 | ReadStream *stream; |
1203 | 0 | BlockNumber rel_pages = vacrel->rel_pages, |
1204 | 0 | blkno = 0, |
1205 | 0 | next_fsm_block_to_vacuum = 0; |
1206 | 0 | BlockNumber orig_eager_scan_success_limit = |
1207 | 0 | vacrel->eager_scan_remaining_successes; /* for logging */ |
1208 | 0 | Buffer vmbuffer = InvalidBuffer; |
1209 | 0 | const int initprog_index[] = { |
1210 | 0 | PROGRESS_VACUUM_PHASE, |
1211 | 0 | PROGRESS_VACUUM_TOTAL_HEAP_BLKS, |
1212 | 0 | PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES |
1213 | 0 | }; |
1214 | 0 | int64 initprog_val[3]; |
1215 | | |
1216 | | /* Report that we're scanning the heap, advertising total # of blocks */ |
1217 | 0 | initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP; |
1218 | 0 | initprog_val[1] = rel_pages; |
1219 | 0 | initprog_val[2] = vacrel->dead_items_info->max_bytes; |
1220 | 0 | pgstat_progress_update_multi_param(3, initprog_index, initprog_val); |
1221 | | |
1222 | | /* Initialize for the first heap_vac_scan_next_block() call */ |
1223 | 0 | vacrel->current_block = InvalidBlockNumber; |
1224 | 0 | vacrel->next_unskippable_block = InvalidBlockNumber; |
1225 | 0 | vacrel->next_unskippable_allvis = false; |
1226 | 0 | vacrel->next_unskippable_eager_scanned = false; |
1227 | 0 | vacrel->next_unskippable_vmbuffer = InvalidBuffer; |
1228 | | |
1229 | | /* |
1230 | | * Set up the read stream for vacuum's first pass through the heap. |
1231 | | * |
1232 | | * This could be made safe for READ_STREAM_USE_BATCHING, but only with |
1233 | | * explicit work in heap_vac_scan_next_block. |
1234 | | */ |
1235 | 0 | stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE, |
1236 | 0 | vacrel->bstrategy, |
1237 | 0 | vacrel->rel, |
1238 | 0 | MAIN_FORKNUM, |
1239 | 0 | heap_vac_scan_next_block, |
1240 | 0 | vacrel, |
1241 | 0 | sizeof(uint8)); |
1242 | |
|
1243 | 0 | while (true) |
1244 | 0 | { |
1245 | 0 | Buffer buf; |
1246 | 0 | Page page; |
1247 | 0 | uint8 blk_info = 0; |
1248 | 0 | bool has_lpdead_items; |
1249 | 0 | void *per_buffer_data = NULL; |
1250 | 0 | bool vm_page_frozen = false; |
1251 | 0 | bool got_cleanup_lock = false; |
1252 | |
|
1253 | 0 | vacuum_delay_point(false); |
1254 | | |
1255 | | /* |
1256 | | * Regularly check if wraparound failsafe should trigger. |
1257 | | * |
1258 | | * There is a similar check inside lazy_vacuum_all_indexes(), but |
1259 | | * relfrozenxid might start to look dangerously old before we reach |
1260 | | * that point. This check also provides failsafe coverage for the |
1261 | | * one-pass strategy, and the two-pass strategy with the index_cleanup |
1262 | | * param set to 'off'. |
1263 | | */ |
1264 | 0 | if (vacrel->scanned_pages > 0 && |
1265 | 0 | vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0) |
1266 | 0 | lazy_check_wraparound_failsafe(vacrel); |
1267 | | |
1268 | | /* |
1269 | | * Consider if we definitely have enough space to process TIDs on page |
1270 | | * already. If we are close to overrunning the available space for |
1271 | | * dead_items TIDs, pause and do a cycle of vacuuming before we tackle |
1272 | | * this page. However, let's force at least one page-worth of tuples |
1273 | | * to be stored as to ensure we do at least some work when the memory |
1274 | | * configured is so low that we run out before storing anything. |
1275 | | */ |
1276 | 0 | if (vacrel->dead_items_info->num_items > 0 && |
1277 | 0 | TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes) |
1278 | 0 | { |
1279 | | /* |
1280 | | * Before beginning index vacuuming, we release any pin we may |
1281 | | * hold on the visibility map page. This isn't necessary for |
1282 | | * correctness, but we do it anyway to avoid holding the pin |
1283 | | * across a lengthy, unrelated operation. |
1284 | | */ |
1285 | 0 | if (BufferIsValid(vmbuffer)) |
1286 | 0 | { |
1287 | 0 | ReleaseBuffer(vmbuffer); |
1288 | 0 | vmbuffer = InvalidBuffer; |
1289 | 0 | } |
1290 | | |
1291 | | /* Perform a round of index and heap vacuuming */ |
1292 | 0 | vacrel->consider_bypass_optimization = false; |
1293 | 0 | lazy_vacuum(vacrel); |
1294 | | |
1295 | | /* |
1296 | | * Vacuum the Free Space Map to make newly-freed space visible on |
1297 | | * upper-level FSM pages. Note that blkno is the previously |
1298 | | * processed block. |
1299 | | */ |
1300 | 0 | FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, |
1301 | 0 | blkno + 1); |
1302 | 0 | next_fsm_block_to_vacuum = blkno; |
1303 | | |
1304 | | /* Report that we are once again scanning the heap */ |
1305 | 0 | pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, |
1306 | 0 | PROGRESS_VACUUM_PHASE_SCAN_HEAP); |
1307 | 0 | } |
1308 | |
|
1309 | 0 | buf = read_stream_next_buffer(stream, &per_buffer_data); |
1310 | | |
1311 | | /* The relation is exhausted. */ |
1312 | 0 | if (!BufferIsValid(buf)) |
1313 | 0 | break; |
1314 | | |
1315 | 0 | blk_info = *((uint8 *) per_buffer_data); |
1316 | 0 | CheckBufferIsPinnedOnce(buf); |
1317 | 0 | page = BufferGetPage(buf); |
1318 | 0 | blkno = BufferGetBlockNumber(buf); |
1319 | |
|
1320 | 0 | vacrel->scanned_pages++; |
1321 | 0 | if (blk_info & VAC_BLK_WAS_EAGER_SCANNED) |
1322 | 0 | vacrel->eager_scanned_pages++; |
1323 | | |
1324 | | /* Report as block scanned, update error traceback information */ |
1325 | 0 | pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); |
1326 | 0 | update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP, |
1327 | 0 | blkno, InvalidOffsetNumber); |
1328 | | |
1329 | | /* |
1330 | | * Pin the visibility map page in case we need to mark the page |
1331 | | * all-visible. In most cases this will be very cheap, because we'll |
1332 | | * already have the correct page pinned anyway. |
1333 | | */ |
1334 | 0 | visibilitymap_pin(vacrel->rel, blkno, &vmbuffer); |
1335 | | |
1336 | | /* |
1337 | | * We need a buffer cleanup lock to prune HOT chains and defragment |
1338 | | * the page in lazy_scan_prune. But when it's not possible to acquire |
1339 | | * a cleanup lock right away, we may be able to settle for reduced |
1340 | | * processing using lazy_scan_noprune. |
1341 | | */ |
1342 | 0 | got_cleanup_lock = ConditionalLockBufferForCleanup(buf); |
1343 | |
|
1344 | 0 | if (!got_cleanup_lock) |
1345 | 0 | LockBuffer(buf, BUFFER_LOCK_SHARE); |
1346 | | |
1347 | | /* Check for new or empty pages before lazy_scan_[no]prune call */ |
1348 | 0 | if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock, |
1349 | 0 | vmbuffer)) |
1350 | 0 | { |
1351 | | /* Processed as new/empty page (lock and pin released) */ |
1352 | 0 | continue; |
1353 | 0 | } |
1354 | | |
1355 | | /* |
1356 | | * If we didn't get the cleanup lock, we can still collect LP_DEAD |
1357 | | * items in the dead_items area for later vacuuming, count live and |
1358 | | * recently dead tuples for vacuum logging, and determine if this |
1359 | | * block could later be truncated. If we encounter any xid/mxids that |
1360 | | * require advancing the relfrozenxid/relminxid, we'll have to wait |
1361 | | * for a cleanup lock and call lazy_scan_prune(). |
1362 | | */ |
1363 | 0 | if (!got_cleanup_lock && |
1364 | 0 | !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items)) |
1365 | 0 | { |
1366 | | /* |
1367 | | * lazy_scan_noprune could not do all required processing. Wait |
1368 | | * for a cleanup lock, and call lazy_scan_prune in the usual way. |
1369 | | */ |
1370 | 0 | Assert(vacrel->aggressive); |
1371 | 0 | LockBuffer(buf, BUFFER_LOCK_UNLOCK); |
1372 | 0 | LockBufferForCleanup(buf); |
1373 | 0 | got_cleanup_lock = true; |
1374 | 0 | } |
1375 | | |
1376 | | /* |
1377 | | * If we have a cleanup lock, we must now prune, freeze, and count |
1378 | | * tuples. We may have acquired the cleanup lock originally, or we may |
1379 | | * have gone back and acquired it after lazy_scan_noprune() returned |
1380 | | * false. Either way, the page hasn't been processed yet. |
1381 | | * |
1382 | | * Like lazy_scan_noprune(), lazy_scan_prune() will count |
1383 | | * recently_dead_tuples and live tuples for vacuum logging, determine |
1384 | | * if the block can later be truncated, and accumulate the details of |
1385 | | * remaining LP_DEAD line pointers on the page into dead_items. These |
1386 | | * dead items include those pruned by lazy_scan_prune() as well as |
1387 | | * line pointers previously marked LP_DEAD. |
1388 | | */ |
1389 | 0 | if (got_cleanup_lock) |
1390 | 0 | lazy_scan_prune(vacrel, buf, blkno, page, |
1391 | 0 | vmbuffer, |
1392 | 0 | blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM, |
1393 | 0 | &has_lpdead_items, &vm_page_frozen); |
1394 | | |
1395 | | /* |
1396 | | * Count an eagerly scanned page as a failure or a success. |
1397 | | * |
1398 | | * Only lazy_scan_prune() freezes pages, so if we didn't get the |
1399 | | * cleanup lock, we won't have frozen the page. However, we only count |
1400 | | * pages that were too new to require freezing as eager freeze |
1401 | | * failures. |
1402 | | * |
1403 | | * We could gather more information from lazy_scan_noprune() about |
1404 | | * whether or not there were tuples with XIDs or MXIDs older than the |
1405 | | * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply |
1406 | | * exclude pages skipped due to cleanup lock contention from eager |
1407 | | * freeze algorithm caps. |
1408 | | */ |
1409 | 0 | if (got_cleanup_lock && |
1410 | 0 | (blk_info & VAC_BLK_WAS_EAGER_SCANNED)) |
1411 | 0 | { |
1412 | | /* Aggressive vacuums do not eager scan. */ |
1413 | 0 | Assert(!vacrel->aggressive); |
1414 | |
|
1415 | 0 | if (vm_page_frozen) |
1416 | 0 | { |
1417 | 0 | if (vacrel->eager_scan_remaining_successes > 0) |
1418 | 0 | vacrel->eager_scan_remaining_successes--; |
1419 | |
|
1420 | 0 | if (vacrel->eager_scan_remaining_successes == 0) |
1421 | 0 | { |
1422 | | /* |
1423 | | * Report only once that we disabled eager scanning. We |
1424 | | * may eagerly read ahead blocks in excess of the success |
1425 | | * or failure caps before attempting to freeze them, so we |
1426 | | * could reach here even after disabling additional eager |
1427 | | * scanning. |
1428 | | */ |
1429 | 0 | if (vacrel->eager_scan_max_fails_per_region > 0) |
1430 | 0 | ereport(vacrel->verbose ? INFO : DEBUG2, |
1431 | 0 | (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of \"%s.%s.%s\"", |
1432 | 0 | orig_eager_scan_success_limit, |
1433 | 0 | vacrel->dbname, vacrel->relnamespace, |
1434 | 0 | vacrel->relname))); |
1435 | | |
1436 | | /* |
1437 | | * If we hit our success cap, permanently disable eager |
1438 | | * scanning by setting the other eager scan management |
1439 | | * fields to their disabled values. |
1440 | | */ |
1441 | 0 | vacrel->eager_scan_remaining_fails = 0; |
1442 | 0 | vacrel->next_eager_scan_region_start = InvalidBlockNumber; |
1443 | 0 | vacrel->eager_scan_max_fails_per_region = 0; |
1444 | 0 | } |
1445 | 0 | } |
1446 | 0 | else if (vacrel->eager_scan_remaining_fails > 0) |
1447 | 0 | vacrel->eager_scan_remaining_fails--; |
1448 | 0 | } |
1449 | | |
1450 | | /* |
1451 | | * Now drop the buffer lock and, potentially, update the FSM. |
1452 | | * |
1453 | | * Our goal is to update the freespace map the last time we touch the |
1454 | | * page. If we'll process a block in the second pass, we may free up |
1455 | | * additional space on the page, so it is better to update the FSM |
1456 | | * after the second pass. If the relation has no indexes, or if index |
1457 | | * vacuuming is disabled, there will be no second heap pass; if this |
1458 | | * particular page has no dead items, the second heap pass will not |
1459 | | * touch this page. So, in those cases, update the FSM now. |
1460 | | * |
1461 | | * Note: In corner cases, it's possible to miss updating the FSM |
1462 | | * entirely. If index vacuuming is currently enabled, we'll skip the |
1463 | | * FSM update now. But if failsafe mode is later activated, or there |
1464 | | * are so few dead tuples that index vacuuming is bypassed, there will |
1465 | | * also be no opportunity to update the FSM later, because we'll never |
1466 | | * revisit this page. Since updating the FSM is desirable but not |
1467 | | * absolutely required, that's OK. |
1468 | | */ |
1469 | 0 | if (vacrel->nindexes == 0 |
1470 | 0 | || !vacrel->do_index_vacuuming |
1471 | 0 | || !has_lpdead_items) |
1472 | 0 | { |
1473 | 0 | Size freespace = PageGetHeapFreeSpace(page); |
1474 | |
|
1475 | 0 | UnlockReleaseBuffer(buf); |
1476 | 0 | RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); |
1477 | | |
1478 | | /* |
1479 | | * Periodically perform FSM vacuuming to make newly-freed space |
1480 | | * visible on upper FSM pages. This is done after vacuuming if the |
1481 | | * table has indexes. There will only be newly-freed space if we |
1482 | | * held the cleanup lock and lazy_scan_prune() was called. |
1483 | | */ |
1484 | 0 | if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items && |
1485 | 0 | blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES) |
1486 | 0 | { |
1487 | 0 | FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, |
1488 | 0 | blkno); |
1489 | 0 | next_fsm_block_to_vacuum = blkno; |
1490 | 0 | } |
1491 | 0 | } |
1492 | 0 | else |
1493 | 0 | UnlockReleaseBuffer(buf); |
1494 | 0 | } |
1495 | | |
1496 | 0 | vacrel->blkno = InvalidBlockNumber; |
1497 | 0 | if (BufferIsValid(vmbuffer)) |
1498 | 0 | ReleaseBuffer(vmbuffer); |
1499 | | |
1500 | | /* |
1501 | | * Report that everything is now scanned. We never skip scanning the last |
1502 | | * block in the relation, so we can pass rel_pages here. |
1503 | | */ |
1504 | 0 | pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, |
1505 | 0 | rel_pages); |
1506 | | |
1507 | | /* now we can compute the new value for pg_class.reltuples */ |
1508 | 0 | vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages, |
1509 | 0 | vacrel->scanned_pages, |
1510 | 0 | vacrel->live_tuples); |
1511 | | |
1512 | | /* |
1513 | | * Also compute the total number of surviving heap entries. In the |
1514 | | * (unlikely) scenario that new_live_tuples is -1, take it as zero. |
1515 | | */ |
1516 | 0 | vacrel->new_rel_tuples = |
1517 | 0 | Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples + |
1518 | 0 | vacrel->missed_dead_tuples; |
1519 | |
|
1520 | 0 | read_stream_end(stream); |
1521 | | |
1522 | | /* |
1523 | | * Do index vacuuming (call each index's ambulkdelete routine), then do |
1524 | | * related heap vacuuming |
1525 | | */ |
1526 | 0 | if (vacrel->dead_items_info->num_items > 0) |
1527 | 0 | lazy_vacuum(vacrel); |
1528 | | |
1529 | | /* |
1530 | | * Vacuum the remainder of the Free Space Map. We must do this whether or |
1531 | | * not there were indexes, and whether or not we bypassed index vacuuming. |
1532 | | * We can pass rel_pages here because we never skip scanning the last |
1533 | | * block of the relation. |
1534 | | */ |
1535 | 0 | if (rel_pages > next_fsm_block_to_vacuum) |
1536 | 0 | FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages); |
1537 | | |
1538 | | /* report all blocks vacuumed */ |
1539 | 0 | pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages); |
1540 | | |
1541 | | /* Do final index cleanup (call each index's amvacuumcleanup routine) */ |
1542 | 0 | if (vacrel->nindexes > 0 && vacrel->do_index_cleanup) |
1543 | 0 | lazy_cleanup_all_indexes(vacrel); |
1544 | 0 | } |
1545 | | |
1546 | | /* |
1547 | | * heap_vac_scan_next_block() -- read stream callback to get the next block |
1548 | | * for vacuum to process |
1549 | | * |
1550 | | * Every time lazy_scan_heap() needs a new block to process during its first |
1551 | | * phase, it invokes read_stream_next_buffer() with a stream set up to call |
1552 | | * heap_vac_scan_next_block() to get the next block. |
1553 | | * |
1554 | | * heap_vac_scan_next_block() uses the visibility map, vacuum options, and |
1555 | | * various thresholds to skip blocks which do not need to be processed and |
1556 | | * returns the next block to process or InvalidBlockNumber if there are no |
1557 | | * remaining blocks. |
1558 | | * |
1559 | | * The visibility status of the next block to process and whether or not it |
1560 | | * was eager scanned is set in the per_buffer_data. |
1561 | | * |
1562 | | * callback_private_data contains a reference to the LVRelState, passed to the |
1563 | | * read stream API during stream setup. The LVRelState is an in/out parameter |
1564 | | * here (locally named `vacrel`). Vacuum options and information about the |
1565 | | * relation are read from it. vacrel->skippedallvis is set if we skip a block |
1566 | | * that's all-visible but not all-frozen (to ensure that we don't update |
1567 | | * relfrozenxid in that case). vacrel also holds information about the next |
1568 | | * unskippable block -- as bookkeeping for this function. |
1569 | | */ |
1570 | | static BlockNumber |
1571 | | heap_vac_scan_next_block(ReadStream *stream, |
1572 | | void *callback_private_data, |
1573 | | void *per_buffer_data) |
1574 | 0 | { |
1575 | 0 | BlockNumber next_block; |
1576 | 0 | LVRelState *vacrel = callback_private_data; |
1577 | 0 | uint8 blk_info = 0; |
1578 | | |
1579 | | /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */ |
1580 | 0 | next_block = vacrel->current_block + 1; |
1581 | | |
1582 | | /* Have we reached the end of the relation? */ |
1583 | 0 | if (next_block >= vacrel->rel_pages) |
1584 | 0 | { |
1585 | 0 | if (BufferIsValid(vacrel->next_unskippable_vmbuffer)) |
1586 | 0 | { |
1587 | 0 | ReleaseBuffer(vacrel->next_unskippable_vmbuffer); |
1588 | 0 | vacrel->next_unskippable_vmbuffer = InvalidBuffer; |
1589 | 0 | } |
1590 | 0 | return InvalidBlockNumber; |
1591 | 0 | } |
1592 | | |
1593 | | /* |
1594 | | * We must be in one of the three following states: |
1595 | | */ |
1596 | 0 | if (next_block > vacrel->next_unskippable_block || |
1597 | 0 | vacrel->next_unskippable_block == InvalidBlockNumber) |
1598 | 0 | { |
1599 | | /* |
1600 | | * 1. We have just processed an unskippable block (or we're at the |
1601 | | * beginning of the scan). Find the next unskippable block using the |
1602 | | * visibility map. |
1603 | | */ |
1604 | 0 | bool skipsallvis; |
1605 | |
|
1606 | 0 | find_next_unskippable_block(vacrel, &skipsallvis); |
1607 | | |
1608 | | /* |
1609 | | * We now know the next block that we must process. It can be the |
1610 | | * next block after the one we just processed, or something further |
1611 | | * ahead. If it's further ahead, we can jump to it, but we choose to |
1612 | | * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive |
1613 | | * pages. Since we're reading sequentially, the OS should be doing |
1614 | | * readahead for us, so there's no gain in skipping a page now and |
1615 | | * then. Skipping such a range might even discourage sequential |
1616 | | * detection. |
1617 | | * |
1618 | | * This test also enables more frequent relfrozenxid advancement |
1619 | | * during non-aggressive VACUUMs. If the range has any all-visible |
1620 | | * pages then skipping makes updating relfrozenxid unsafe, which is a |
1621 | | * real downside. |
1622 | | */ |
1623 | 0 | if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD) |
1624 | 0 | { |
1625 | 0 | next_block = vacrel->next_unskippable_block; |
1626 | 0 | if (skipsallvis) |
1627 | 0 | vacrel->skippedallvis = true; |
1628 | 0 | } |
1629 | 0 | } |
1630 | | |
1631 | | /* Now we must be in one of the two remaining states: */ |
1632 | 0 | if (next_block < vacrel->next_unskippable_block) |
1633 | 0 | { |
1634 | | /* |
1635 | | * 2. We are processing a range of blocks that we could have skipped |
1636 | | * but chose not to. We know that they are all-visible in the VM, |
1637 | | * otherwise they would've been unskippable. |
1638 | | */ |
1639 | 0 | vacrel->current_block = next_block; |
1640 | 0 | blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; |
1641 | 0 | *((uint8 *) per_buffer_data) = blk_info; |
1642 | 0 | return vacrel->current_block; |
1643 | 0 | } |
1644 | 0 | else |
1645 | 0 | { |
1646 | | /* |
1647 | | * 3. We reached the next unskippable block. Process it. On next |
1648 | | * iteration, we will be back in state 1. |
1649 | | */ |
1650 | 0 | Assert(next_block == vacrel->next_unskippable_block); |
1651 | |
|
1652 | 0 | vacrel->current_block = next_block; |
1653 | 0 | if (vacrel->next_unskippable_allvis) |
1654 | 0 | blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; |
1655 | 0 | if (vacrel->next_unskippable_eager_scanned) |
1656 | 0 | blk_info |= VAC_BLK_WAS_EAGER_SCANNED; |
1657 | 0 | *((uint8 *) per_buffer_data) = blk_info; |
1658 | 0 | return vacrel->current_block; |
1659 | 0 | } |
1660 | 0 | } |
1661 | | |
1662 | | /* |
1663 | | * Find the next unskippable block in a vacuum scan using the visibility map. |
1664 | | * The next unskippable block and its visibility information is updated in |
1665 | | * vacrel. |
1666 | | * |
1667 | | * Note: our opinion of which blocks can be skipped can go stale immediately. |
1668 | | * It's okay if caller "misses" a page whose all-visible or all-frozen marking |
1669 | | * was concurrently cleared, though. All that matters is that caller scan all |
1670 | | * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact. |
1671 | | * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with |
1672 | | * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice |
1673 | | * to skip such a range is actually made, making everything safe.) |
1674 | | */ |
1675 | | static void |
1676 | | find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis) |
1677 | 0 | { |
1678 | 0 | BlockNumber rel_pages = vacrel->rel_pages; |
1679 | 0 | BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1; |
1680 | 0 | Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer; |
1681 | 0 | bool next_unskippable_eager_scanned = false; |
1682 | 0 | bool next_unskippable_allvis; |
1683 | |
|
1684 | 0 | *skipsallvis = false; |
1685 | |
|
1686 | 0 | for (;; next_unskippable_block++) |
1687 | 0 | { |
1688 | 0 | uint8 mapbits = visibilitymap_get_status(vacrel->rel, |
1689 | 0 | next_unskippable_block, |
1690 | 0 | &next_unskippable_vmbuffer); |
1691 | |
|
1692 | 0 | next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0; |
1693 | | |
1694 | | /* |
1695 | | * At the start of each eager scan region, normal vacuums with eager |
1696 | | * scanning enabled reset the failure counter, allowing vacuum to |
1697 | | * resume eager scanning if it had been suspended in the previous |
1698 | | * region. |
1699 | | */ |
1700 | 0 | if (next_unskippable_block >= vacrel->next_eager_scan_region_start) |
1701 | 0 | { |
1702 | 0 | vacrel->eager_scan_remaining_fails = |
1703 | 0 | vacrel->eager_scan_max_fails_per_region; |
1704 | 0 | vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE; |
1705 | 0 | } |
1706 | | |
1707 | | /* |
1708 | | * A block is unskippable if it is not all visible according to the |
1709 | | * visibility map. |
1710 | | */ |
1711 | 0 | if (!next_unskippable_allvis) |
1712 | 0 | { |
1713 | 0 | Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0); |
1714 | 0 | break; |
1715 | 0 | } |
1716 | | |
1717 | | /* |
1718 | | * Caller must scan the last page to determine whether it has tuples |
1719 | | * (caller must have the opportunity to set vacrel->nonempty_pages). |
1720 | | * This rule avoids having lazy_truncate_heap() take access-exclusive |
1721 | | * lock on rel to attempt a truncation that fails anyway, just because |
1722 | | * there are tuples on the last page (it is likely that there will be |
1723 | | * tuples on other nearby pages as well, but those can be skipped). |
1724 | | * |
1725 | | * Implement this by always treating the last block as unsafe to skip. |
1726 | | */ |
1727 | 0 | if (next_unskippable_block == rel_pages - 1) |
1728 | 0 | break; |
1729 | | |
1730 | | /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */ |
1731 | 0 | if (!vacrel->skipwithvm) |
1732 | 0 | break; |
1733 | | |
1734 | | /* |
1735 | | * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't |
1736 | | * already frozen by now), so this page can be skipped. |
1737 | | */ |
1738 | 0 | if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0) |
1739 | 0 | continue; |
1740 | | |
1741 | | /* |
1742 | | * Aggressive vacuums cannot skip any all-visible pages that are not |
1743 | | * also all-frozen. |
1744 | | */ |
1745 | 0 | if (vacrel->aggressive) |
1746 | 0 | break; |
1747 | | |
1748 | | /* |
1749 | | * Normal vacuums with eager scanning enabled only skip all-visible |
1750 | | * but not all-frozen pages if they have hit the failure limit for the |
1751 | | * current eager scan region. |
1752 | | */ |
1753 | 0 | if (vacrel->eager_scan_remaining_fails > 0) |
1754 | 0 | { |
1755 | 0 | next_unskippable_eager_scanned = true; |
1756 | 0 | break; |
1757 | 0 | } |
1758 | | |
1759 | | /* |
1760 | | * All-visible blocks are safe to skip in a normal vacuum. But |
1761 | | * remember that the final range contains such a block for later. |
1762 | | */ |
1763 | 0 | *skipsallvis = true; |
1764 | 0 | } |
1765 | | |
1766 | | /* write the local variables back to vacrel */ |
1767 | 0 | vacrel->next_unskippable_block = next_unskippable_block; |
1768 | 0 | vacrel->next_unskippable_allvis = next_unskippable_allvis; |
1769 | 0 | vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned; |
1770 | 0 | vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer; |
1771 | 0 | } |
1772 | | |
1773 | | /* |
1774 | | * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling. |
1775 | | * |
1776 | | * Must call here to handle both new and empty pages before calling |
1777 | | * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal |
1778 | | * with new or empty pages. |
1779 | | * |
1780 | | * It's necessary to consider new pages as a special case, since the rules for |
1781 | | * maintaining the visibility map and FSM with empty pages are a little |
1782 | | * different (though new pages can be truncated away during rel truncation). |
1783 | | * |
1784 | | * Empty pages are not really a special case -- they're just heap pages that |
1785 | | * have no allocated tuples (including even LP_UNUSED items). You might |
1786 | | * wonder why we need to handle them here all the same. It's only necessary |
1787 | | * because of a corner-case involving a hard crash during heap relation |
1788 | | * extension. If we ever make relation-extension crash safe, then it should |
1789 | | * no longer be necessary to deal with empty pages here (or new pages, for |
1790 | | * that matter). |
1791 | | * |
1792 | | * Caller must hold at least a shared lock. We might need to escalate the |
1793 | | * lock in that case, so the type of lock caller holds needs to be specified |
1794 | | * using 'sharelock' argument. |
1795 | | * |
1796 | | * Returns false in common case where caller should go on to call |
1797 | | * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating |
1798 | | * that lazy_scan_heap is done processing the page, releasing lock on caller's |
1799 | | * behalf. |
1800 | | * |
1801 | | * No vm_page_frozen output parameter (like that passed to lazy_scan_prune()) |
1802 | | * is passed here because neither empty nor new pages can be eagerly frozen. |
1803 | | * New pages are never frozen. Empty pages are always set frozen in the VM at |
1804 | | * the same time that they are set all-visible, and we don't eagerly scan |
1805 | | * frozen pages. |
1806 | | */ |
1807 | | static bool |
1808 | | lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, |
1809 | | Page page, bool sharelock, Buffer vmbuffer) |
1810 | 0 | { |
1811 | 0 | Size freespace; |
1812 | |
|
1813 | 0 | if (PageIsNew(page)) |
1814 | 0 | { |
1815 | | /* |
1816 | | * All-zeroes pages can be left over if either a backend extends the |
1817 | | * relation by a single page, but crashes before the newly initialized |
1818 | | * page has been written out, or when bulk-extending the relation |
1819 | | * (which creates a number of empty pages at the tail end of the |
1820 | | * relation), and then enters them into the FSM. |
1821 | | * |
1822 | | * Note we do not enter the page into the visibilitymap. That has the |
1823 | | * downside that we repeatedly visit this page in subsequent vacuums, |
1824 | | * but otherwise we'll never discover the space on a promoted standby. |
1825 | | * The harm of repeated checking ought to normally not be too bad. The |
1826 | | * space usually should be used at some point, otherwise there |
1827 | | * wouldn't be any regular vacuums. |
1828 | | * |
1829 | | * Make sure these pages are in the FSM, to ensure they can be reused. |
1830 | | * Do that by testing if there's any space recorded for the page. If |
1831 | | * not, enter it. We do so after releasing the lock on the heap page, |
1832 | | * the FSM is approximate, after all. |
1833 | | */ |
1834 | 0 | UnlockReleaseBuffer(buf); |
1835 | |
|
1836 | 0 | if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0) |
1837 | 0 | { |
1838 | 0 | freespace = BLCKSZ - SizeOfPageHeaderData; |
1839 | |
|
1840 | 0 | RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); |
1841 | 0 | } |
1842 | |
|
1843 | 0 | return true; |
1844 | 0 | } |
1845 | | |
1846 | 0 | if (PageIsEmpty(page)) |
1847 | 0 | { |
1848 | | /* |
1849 | | * It seems likely that caller will always be able to get a cleanup |
1850 | | * lock on an empty page. But don't take any chances -- escalate to |
1851 | | * an exclusive lock (still don't need a cleanup lock, though). |
1852 | | */ |
1853 | 0 | if (sharelock) |
1854 | 0 | { |
1855 | 0 | LockBuffer(buf, BUFFER_LOCK_UNLOCK); |
1856 | 0 | LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); |
1857 | |
|
1858 | 0 | if (!PageIsEmpty(page)) |
1859 | 0 | { |
1860 | | /* page isn't new or empty -- keep lock and pin for now */ |
1861 | 0 | return false; |
1862 | 0 | } |
1863 | 0 | } |
1864 | 0 | else |
1865 | 0 | { |
1866 | | /* Already have a full cleanup lock (which is more than enough) */ |
1867 | 0 | } |
1868 | | |
1869 | | /* |
1870 | | * Unlike new pages, empty pages are always set all-visible and |
1871 | | * all-frozen. |
1872 | | */ |
1873 | 0 | if (!PageIsAllVisible(page)) |
1874 | 0 | { |
1875 | 0 | uint8 old_vmbits; |
1876 | |
|
1877 | 0 | START_CRIT_SECTION(); |
1878 | | |
1879 | | /* mark buffer dirty before writing a WAL record */ |
1880 | 0 | MarkBufferDirty(buf); |
1881 | | |
1882 | | /* |
1883 | | * It's possible that another backend has extended the heap, |
1884 | | * initialized the page, and then failed to WAL-log the page due |
1885 | | * to an ERROR. Since heap extension is not WAL-logged, recovery |
1886 | | * might try to replay our record setting the page all-visible and |
1887 | | * find that the page isn't initialized, which will cause a PANIC. |
1888 | | * To prevent that, check whether the page has been previously |
1889 | | * WAL-logged, and if not, do that now. |
1890 | | */ |
1891 | 0 | if (RelationNeedsWAL(vacrel->rel) && |
1892 | 0 | PageGetLSN(page) == InvalidXLogRecPtr) |
1893 | 0 | log_newpage_buffer(buf, true); |
1894 | |
|
1895 | 0 | PageSetAllVisible(page); |
1896 | 0 | old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, |
1897 | 0 | InvalidXLogRecPtr, |
1898 | 0 | vmbuffer, InvalidTransactionId, |
1899 | 0 | VISIBILITYMAP_ALL_VISIBLE | |
1900 | 0 | VISIBILITYMAP_ALL_FROZEN); |
1901 | 0 | END_CRIT_SECTION(); |
1902 | | |
1903 | | /* |
1904 | | * If the page wasn't already set all-visible and/or all-frozen in |
1905 | | * the VM, count it as newly set for logging. |
1906 | | */ |
1907 | 0 | if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) |
1908 | 0 | { |
1909 | 0 | vacrel->vm_new_visible_pages++; |
1910 | 0 | vacrel->vm_new_visible_frozen_pages++; |
1911 | 0 | } |
1912 | 0 | else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0) |
1913 | 0 | vacrel->vm_new_frozen_pages++; |
1914 | 0 | } |
1915 | |
|
1916 | 0 | freespace = PageGetHeapFreeSpace(page); |
1917 | 0 | UnlockReleaseBuffer(buf); |
1918 | 0 | RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); |
1919 | 0 | return true; |
1920 | 0 | } |
1921 | | |
1922 | | /* page isn't new or empty -- keep lock and pin */ |
1923 | 0 | return false; |
1924 | 0 | } |
1925 | | |
1926 | | /* qsort comparator for sorting OffsetNumbers */ |
1927 | | static int |
1928 | | cmpOffsetNumbers(const void *a, const void *b) |
1929 | 0 | { |
1930 | 0 | return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b); |
1931 | 0 | } |
1932 | | |
1933 | | /* |
1934 | | * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing. |
1935 | | * |
1936 | | * Caller must hold pin and buffer cleanup lock on the buffer. |
1937 | | * |
1938 | | * vmbuffer is the buffer containing the VM block with visibility information |
1939 | | * for the heap block, blkno. all_visible_according_to_vm is the saved |
1940 | | * visibility status of the heap block looked up earlier by the caller. We |
1941 | | * won't rely entirely on this status, as it may be out of date. |
1942 | | * |
1943 | | * *has_lpdead_items is set to true or false depending on whether, upon return |
1944 | | * from this function, any LP_DEAD items are still present on the page. |
1945 | | * |
1946 | | * *vm_page_frozen is set to true if the page is newly set all-frozen in the |
1947 | | * VM. The caller currently only uses this for determining whether an eagerly |
1948 | | * scanned page was successfully set all-frozen. |
1949 | | */ |
1950 | | static void |
1951 | | lazy_scan_prune(LVRelState *vacrel, |
1952 | | Buffer buf, |
1953 | | BlockNumber blkno, |
1954 | | Page page, |
1955 | | Buffer vmbuffer, |
1956 | | bool all_visible_according_to_vm, |
1957 | | bool *has_lpdead_items, |
1958 | | bool *vm_page_frozen) |
1959 | 0 | { |
1960 | 0 | Relation rel = vacrel->rel; |
1961 | 0 | PruneFreezeResult presult; |
1962 | 0 | int prune_options = 0; |
1963 | |
|
1964 | 0 | Assert(BufferGetBlockNumber(buf) == blkno); |
1965 | | |
1966 | | /* |
1967 | | * Prune all HOT-update chains and potentially freeze tuples on this page. |
1968 | | * |
1969 | | * If the relation has no indexes, we can immediately mark would-be dead |
1970 | | * items LP_UNUSED. |
1971 | | * |
1972 | | * The number of tuples removed from the page is returned in |
1973 | | * presult.ndeleted. It should not be confused with presult.lpdead_items; |
1974 | | * presult.lpdead_items's final value can be thought of as the number of |
1975 | | * tuples that were deleted from indexes. |
1976 | | * |
1977 | | * We will update the VM after collecting LP_DEAD items and freezing |
1978 | | * tuples. Pruning will have determined whether or not the page is |
1979 | | * all-visible. |
1980 | | */ |
1981 | 0 | prune_options = HEAP_PAGE_PRUNE_FREEZE; |
1982 | 0 | if (vacrel->nindexes == 0) |
1983 | 0 | prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW; |
1984 | |
|
1985 | 0 | heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options, |
1986 | 0 | &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN, |
1987 | 0 | &vacrel->offnum, |
1988 | 0 | &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid); |
1989 | |
|
1990 | 0 | Assert(MultiXactIdIsValid(vacrel->NewRelminMxid)); |
1991 | 0 | Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid)); |
1992 | |
|
1993 | 0 | if (presult.nfrozen > 0) |
1994 | 0 | { |
1995 | | /* |
1996 | | * We don't increment the new_frozen_tuple_pages instrumentation |
1997 | | * counter when nfrozen == 0, since it only counts pages with newly |
1998 | | * frozen tuples (don't confuse that with pages newly set all-frozen |
1999 | | * in VM). |
2000 | | */ |
2001 | 0 | vacrel->new_frozen_tuple_pages++; |
2002 | 0 | } |
2003 | | |
2004 | | /* |
2005 | | * VACUUM will call heap_page_is_all_visible() during the second pass over |
2006 | | * the heap to determine all_visible and all_frozen for the page -- this |
2007 | | * is a specialized version of the logic from this function. Now that |
2008 | | * we've finished pruning and freezing, make sure that we're in total |
2009 | | * agreement with heap_page_is_all_visible() using an assertion. |
2010 | | */ |
2011 | | #ifdef USE_ASSERT_CHECKING |
2012 | | /* Note that all_frozen value does not matter when !all_visible */ |
2013 | | if (presult.all_visible) |
2014 | | { |
2015 | | TransactionId debug_cutoff; |
2016 | | bool debug_all_frozen; |
2017 | | |
2018 | | Assert(presult.lpdead_items == 0); |
2019 | | |
2020 | | if (!heap_page_is_all_visible(vacrel, buf, |
2021 | | &debug_cutoff, &debug_all_frozen)) |
2022 | | Assert(false); |
2023 | | |
2024 | | Assert(presult.all_frozen == debug_all_frozen); |
2025 | | |
2026 | | Assert(!TransactionIdIsValid(debug_cutoff) || |
2027 | | debug_cutoff == presult.vm_conflict_horizon); |
2028 | | } |
2029 | | #endif |
2030 | | |
2031 | | /* |
2032 | | * Now save details of the LP_DEAD items from the page in vacrel |
2033 | | */ |
2034 | 0 | if (presult.lpdead_items > 0) |
2035 | 0 | { |
2036 | 0 | vacrel->lpdead_item_pages++; |
2037 | | |
2038 | | /* |
2039 | | * deadoffsets are collected incrementally in |
2040 | | * heap_page_prune_and_freeze() as each dead line pointer is recorded, |
2041 | | * with an indeterminate order, but dead_items_add requires them to be |
2042 | | * sorted. |
2043 | | */ |
2044 | 0 | qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber), |
2045 | 0 | cmpOffsetNumbers); |
2046 | |
|
2047 | 0 | dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items); |
2048 | 0 | } |
2049 | | |
2050 | | /* Finally, add page-local counts to whole-VACUUM counts */ |
2051 | 0 | vacrel->tuples_deleted += presult.ndeleted; |
2052 | 0 | vacrel->tuples_frozen += presult.nfrozen; |
2053 | 0 | vacrel->lpdead_items += presult.lpdead_items; |
2054 | 0 | vacrel->live_tuples += presult.live_tuples; |
2055 | 0 | vacrel->recently_dead_tuples += presult.recently_dead_tuples; |
2056 | | |
2057 | | /* Can't truncate this page */ |
2058 | 0 | if (presult.hastup) |
2059 | 0 | vacrel->nonempty_pages = blkno + 1; |
2060 | | |
2061 | | /* Did we find LP_DEAD items? */ |
2062 | 0 | *has_lpdead_items = (presult.lpdead_items > 0); |
2063 | |
|
2064 | 0 | Assert(!presult.all_visible || !(*has_lpdead_items)); |
2065 | | |
2066 | | /* |
2067 | | * Handle setting visibility map bit based on information from the VM (as |
2068 | | * of last heap_vac_scan_next_block() call), and from all_visible and |
2069 | | * all_frozen variables |
2070 | | */ |
2071 | 0 | if (!all_visible_according_to_vm && presult.all_visible) |
2072 | 0 | { |
2073 | 0 | uint8 old_vmbits; |
2074 | 0 | uint8 flags = VISIBILITYMAP_ALL_VISIBLE; |
2075 | |
|
2076 | 0 | if (presult.all_frozen) |
2077 | 0 | { |
2078 | 0 | Assert(!TransactionIdIsValid(presult.vm_conflict_horizon)); |
2079 | 0 | flags |= VISIBILITYMAP_ALL_FROZEN; |
2080 | 0 | } |
2081 | | |
2082 | | /* |
2083 | | * It should never be the case that the visibility map page is set |
2084 | | * while the page-level bit is clear, but the reverse is allowed (if |
2085 | | * checksums are not enabled). Regardless, set both bits so that we |
2086 | | * get back in sync. |
2087 | | * |
2088 | | * NB: If the heap page is all-visible but the VM bit is not set, we |
2089 | | * don't need to dirty the heap page. However, if checksums are |
2090 | | * enabled, we do need to make sure that the heap page is dirtied |
2091 | | * before passing it to visibilitymap_set(), because it may be logged. |
2092 | | * Given that this situation should only happen in rare cases after a |
2093 | | * crash, it is not worth optimizing. |
2094 | | */ |
2095 | 0 | PageSetAllVisible(page); |
2096 | 0 | MarkBufferDirty(buf); |
2097 | 0 | old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, |
2098 | 0 | InvalidXLogRecPtr, |
2099 | 0 | vmbuffer, presult.vm_conflict_horizon, |
2100 | 0 | flags); |
2101 | | |
2102 | | /* |
2103 | | * If the page wasn't already set all-visible and/or all-frozen in the |
2104 | | * VM, count it as newly set for logging. |
2105 | | */ |
2106 | 0 | if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) |
2107 | 0 | { |
2108 | 0 | vacrel->vm_new_visible_pages++; |
2109 | 0 | if (presult.all_frozen) |
2110 | 0 | { |
2111 | 0 | vacrel->vm_new_visible_frozen_pages++; |
2112 | 0 | *vm_page_frozen = true; |
2113 | 0 | } |
2114 | 0 | } |
2115 | 0 | else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 && |
2116 | 0 | presult.all_frozen) |
2117 | 0 | { |
2118 | 0 | vacrel->vm_new_frozen_pages++; |
2119 | 0 | *vm_page_frozen = true; |
2120 | 0 | } |
2121 | 0 | } |
2122 | | |
2123 | | /* |
2124 | | * As of PostgreSQL 9.2, the visibility map bit should never be set if the |
2125 | | * page-level bit is clear. However, it's possible that the bit got |
2126 | | * cleared after heap_vac_scan_next_block() was called, so we must recheck |
2127 | | * with buffer lock before concluding that the VM is corrupt. |
2128 | | */ |
2129 | 0 | else if (all_visible_according_to_vm && !PageIsAllVisible(page) && |
2130 | 0 | visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0) |
2131 | 0 | { |
2132 | 0 | elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", |
2133 | 0 | vacrel->relname, blkno); |
2134 | 0 | visibilitymap_clear(vacrel->rel, blkno, vmbuffer, |
2135 | 0 | VISIBILITYMAP_VALID_BITS); |
2136 | 0 | } |
2137 | | |
2138 | | /* |
2139 | | * It's possible for the value returned by |
2140 | | * GetOldestNonRemovableTransactionId() to move backwards, so it's not |
2141 | | * wrong for us to see tuples that appear to not be visible to everyone |
2142 | | * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value |
2143 | | * never moves backwards, but GetOldestNonRemovableTransactionId() is |
2144 | | * conservative and sometimes returns a value that's unnecessarily small, |
2145 | | * so if we see that contradiction it just means that the tuples that we |
2146 | | * think are not visible to everyone yet actually are, and the |
2147 | | * PD_ALL_VISIBLE flag is correct. |
2148 | | * |
2149 | | * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set, |
2150 | | * however. |
2151 | | */ |
2152 | 0 | else if (presult.lpdead_items > 0 && PageIsAllVisible(page)) |
2153 | 0 | { |
2154 | 0 | elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u", |
2155 | 0 | vacrel->relname, blkno); |
2156 | 0 | PageClearAllVisible(page); |
2157 | 0 | MarkBufferDirty(buf); |
2158 | 0 | visibilitymap_clear(vacrel->rel, blkno, vmbuffer, |
2159 | 0 | VISIBILITYMAP_VALID_BITS); |
2160 | 0 | } |
2161 | | |
2162 | | /* |
2163 | | * If the all-visible page is all-frozen but not marked as such yet, mark |
2164 | | * it as all-frozen. Note that all_frozen is only valid if all_visible is |
2165 | | * true, so we must check both all_visible and all_frozen. |
2166 | | */ |
2167 | 0 | else if (all_visible_according_to_vm && presult.all_visible && |
2168 | 0 | presult.all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer)) |
2169 | 0 | { |
2170 | 0 | uint8 old_vmbits; |
2171 | | |
2172 | | /* |
2173 | | * Avoid relying on all_visible_according_to_vm as a proxy for the |
2174 | | * page-level PD_ALL_VISIBLE bit being set, since it might have become |
2175 | | * stale -- even when all_visible is set |
2176 | | */ |
2177 | 0 | if (!PageIsAllVisible(page)) |
2178 | 0 | { |
2179 | 0 | PageSetAllVisible(page); |
2180 | 0 | MarkBufferDirty(buf); |
2181 | 0 | } |
2182 | | |
2183 | | /* |
2184 | | * Set the page all-frozen (and all-visible) in the VM. |
2185 | | * |
2186 | | * We can pass InvalidTransactionId as our cutoff_xid, since a |
2187 | | * snapshotConflictHorizon sufficient to make everything safe for REDO |
2188 | | * was logged when the page's tuples were frozen. |
2189 | | */ |
2190 | 0 | Assert(!TransactionIdIsValid(presult.vm_conflict_horizon)); |
2191 | 0 | old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, |
2192 | 0 | InvalidXLogRecPtr, |
2193 | 0 | vmbuffer, InvalidTransactionId, |
2194 | 0 | VISIBILITYMAP_ALL_VISIBLE | |
2195 | 0 | VISIBILITYMAP_ALL_FROZEN); |
2196 | | |
2197 | | /* |
2198 | | * The page was likely already set all-visible in the VM. However, |
2199 | | * there is a small chance that it was modified sometime between |
2200 | | * setting all_visible_according_to_vm and checking the visibility |
2201 | | * during pruning. Check the return value of old_vmbits anyway to |
2202 | | * ensure the visibility map counters used for logging are accurate. |
2203 | | */ |
2204 | 0 | if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) |
2205 | 0 | { |
2206 | 0 | vacrel->vm_new_visible_pages++; |
2207 | 0 | vacrel->vm_new_visible_frozen_pages++; |
2208 | 0 | *vm_page_frozen = true; |
2209 | 0 | } |
2210 | | |
2211 | | /* |
2212 | | * We already checked that the page was not set all-frozen in the VM |
2213 | | * above, so we don't need to test the value of old_vmbits. |
2214 | | */ |
2215 | 0 | else |
2216 | 0 | { |
2217 | 0 | vacrel->vm_new_frozen_pages++; |
2218 | 0 | *vm_page_frozen = true; |
2219 | 0 | } |
2220 | 0 | } |
2221 | 0 | } |
2222 | | |
2223 | | /* |
2224 | | * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing |
2225 | | * |
2226 | | * Caller need only hold a pin and share lock on the buffer, unlike |
2227 | | * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't |
2228 | | * performed here, it's quite possible that an earlier opportunistic pruning |
2229 | | * operation left LP_DEAD items behind. We'll at least collect any such items |
2230 | | * in dead_items for removal from indexes. |
2231 | | * |
2232 | | * For aggressive VACUUM callers, we may return false to indicate that a full |
2233 | | * cleanup lock is required for processing by lazy_scan_prune. This is only |
2234 | | * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from |
2235 | | * one or more tuples on the page. We always return true for non-aggressive |
2236 | | * callers. |
2237 | | * |
2238 | | * If this function returns true, *has_lpdead_items gets set to true or false |
2239 | | * depending on whether, upon return from this function, any LP_DEAD items are |
2240 | | * present on the page. If this function returns false, *has_lpdead_items |
2241 | | * is not updated. |
2242 | | */ |
2243 | | static bool |
2244 | | lazy_scan_noprune(LVRelState *vacrel, |
2245 | | Buffer buf, |
2246 | | BlockNumber blkno, |
2247 | | Page page, |
2248 | | bool *has_lpdead_items) |
2249 | 0 | { |
2250 | 0 | OffsetNumber offnum, |
2251 | 0 | maxoff; |
2252 | 0 | int lpdead_items, |
2253 | 0 | live_tuples, |
2254 | 0 | recently_dead_tuples, |
2255 | 0 | missed_dead_tuples; |
2256 | 0 | bool hastup; |
2257 | 0 | HeapTupleHeader tupleheader; |
2258 | 0 | TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid; |
2259 | 0 | MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid; |
2260 | 0 | OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; |
2261 | |
|
2262 | 0 | Assert(BufferGetBlockNumber(buf) == blkno); |
2263 | |
|
2264 | 0 | hastup = false; /* for now */ |
2265 | |
|
2266 | 0 | lpdead_items = 0; |
2267 | 0 | live_tuples = 0; |
2268 | 0 | recently_dead_tuples = 0; |
2269 | 0 | missed_dead_tuples = 0; |
2270 | |
|
2271 | 0 | maxoff = PageGetMaxOffsetNumber(page); |
2272 | 0 | for (offnum = FirstOffsetNumber; |
2273 | 0 | offnum <= maxoff; |
2274 | 0 | offnum = OffsetNumberNext(offnum)) |
2275 | 0 | { |
2276 | 0 | ItemId itemid; |
2277 | 0 | HeapTupleData tuple; |
2278 | |
|
2279 | 0 | vacrel->offnum = offnum; |
2280 | 0 | itemid = PageGetItemId(page, offnum); |
2281 | |
|
2282 | 0 | if (!ItemIdIsUsed(itemid)) |
2283 | 0 | continue; |
2284 | | |
2285 | 0 | if (ItemIdIsRedirected(itemid)) |
2286 | 0 | { |
2287 | 0 | hastup = true; |
2288 | 0 | continue; |
2289 | 0 | } |
2290 | | |
2291 | 0 | if (ItemIdIsDead(itemid)) |
2292 | 0 | { |
2293 | | /* |
2294 | | * Deliberately don't set hastup=true here. See same point in |
2295 | | * lazy_scan_prune for an explanation. |
2296 | | */ |
2297 | 0 | deadoffsets[lpdead_items++] = offnum; |
2298 | 0 | continue; |
2299 | 0 | } |
2300 | | |
2301 | 0 | hastup = true; /* page prevents rel truncation */ |
2302 | 0 | tupleheader = (HeapTupleHeader) PageGetItem(page, itemid); |
2303 | 0 | if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs, |
2304 | 0 | &NoFreezePageRelfrozenXid, |
2305 | 0 | &NoFreezePageRelminMxid)) |
2306 | 0 | { |
2307 | | /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */ |
2308 | 0 | if (vacrel->aggressive) |
2309 | 0 | { |
2310 | | /* |
2311 | | * Aggressive VACUUMs must always be able to advance rel's |
2312 | | * relfrozenxid to a value >= FreezeLimit (and be able to |
2313 | | * advance rel's relminmxid to a value >= MultiXactCutoff). |
2314 | | * The ongoing aggressive VACUUM won't be able to do that |
2315 | | * unless it can freeze an XID (or MXID) from this tuple now. |
2316 | | * |
2317 | | * The only safe option is to have caller perform processing |
2318 | | * of this page using lazy_scan_prune. Caller might have to |
2319 | | * wait a while for a cleanup lock, but it can't be helped. |
2320 | | */ |
2321 | 0 | vacrel->offnum = InvalidOffsetNumber; |
2322 | 0 | return false; |
2323 | 0 | } |
2324 | | |
2325 | | /* |
2326 | | * Non-aggressive VACUUMs are under no obligation to advance |
2327 | | * relfrozenxid (even by one XID). We can be much laxer here. |
2328 | | * |
2329 | | * Currently we always just accept an older final relfrozenxid |
2330 | | * and/or relminmxid value. We never make caller wait or work a |
2331 | | * little harder, even when it likely makes sense to do so. |
2332 | | */ |
2333 | 0 | } |
2334 | | |
2335 | 0 | ItemPointerSet(&(tuple.t_self), blkno, offnum); |
2336 | 0 | tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); |
2337 | 0 | tuple.t_len = ItemIdGetLength(itemid); |
2338 | 0 | tuple.t_tableOid = RelationGetRelid(vacrel->rel); |
2339 | |
|
2340 | 0 | switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin, |
2341 | 0 | buf)) |
2342 | 0 | { |
2343 | 0 | case HEAPTUPLE_DELETE_IN_PROGRESS: |
2344 | 0 | case HEAPTUPLE_LIVE: |
2345 | | |
2346 | | /* |
2347 | | * Count both cases as live, just like lazy_scan_prune |
2348 | | */ |
2349 | 0 | live_tuples++; |
2350 | |
|
2351 | 0 | break; |
2352 | 0 | case HEAPTUPLE_DEAD: |
2353 | | |
2354 | | /* |
2355 | | * There is some useful work for pruning to do, that won't be |
2356 | | * done due to failure to get a cleanup lock. |
2357 | | */ |
2358 | 0 | missed_dead_tuples++; |
2359 | 0 | break; |
2360 | 0 | case HEAPTUPLE_RECENTLY_DEAD: |
2361 | | |
2362 | | /* |
2363 | | * Count in recently_dead_tuples, just like lazy_scan_prune |
2364 | | */ |
2365 | 0 | recently_dead_tuples++; |
2366 | 0 | break; |
2367 | 0 | case HEAPTUPLE_INSERT_IN_PROGRESS: |
2368 | | |
2369 | | /* |
2370 | | * Do not count these rows as live, just like lazy_scan_prune |
2371 | | */ |
2372 | 0 | break; |
2373 | 0 | default: |
2374 | 0 | elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); |
2375 | 0 | break; |
2376 | 0 | } |
2377 | 0 | } |
2378 | | |
2379 | 0 | vacrel->offnum = InvalidOffsetNumber; |
2380 | | |
2381 | | /* |
2382 | | * By here we know for sure that caller can put off freezing and pruning |
2383 | | * this particular page until the next VACUUM. Remember its details now. |
2384 | | * (lazy_scan_prune expects a clean slate, so we have to do this last.) |
2385 | | */ |
2386 | 0 | vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid; |
2387 | 0 | vacrel->NewRelminMxid = NoFreezePageRelminMxid; |
2388 | | |
2389 | | /* Save any LP_DEAD items found on the page in dead_items */ |
2390 | 0 | if (vacrel->nindexes == 0) |
2391 | 0 | { |
2392 | | /* Using one-pass strategy (since table has no indexes) */ |
2393 | 0 | if (lpdead_items > 0) |
2394 | 0 | { |
2395 | | /* |
2396 | | * Perfunctory handling for the corner case where a single pass |
2397 | | * strategy VACUUM cannot get a cleanup lock, and it turns out |
2398 | | * that there is one or more LP_DEAD items: just count the LP_DEAD |
2399 | | * items as missed_dead_tuples instead. (This is a bit dishonest, |
2400 | | * but it beats having to maintain specialized heap vacuuming code |
2401 | | * forever, for vanishingly little benefit.) |
2402 | | */ |
2403 | 0 | hastup = true; |
2404 | 0 | missed_dead_tuples += lpdead_items; |
2405 | 0 | } |
2406 | 0 | } |
2407 | 0 | else if (lpdead_items > 0) |
2408 | 0 | { |
2409 | | /* |
2410 | | * Page has LP_DEAD items, and so any references/TIDs that remain in |
2411 | | * indexes will be deleted during index vacuuming (and then marked |
2412 | | * LP_UNUSED in the heap) |
2413 | | */ |
2414 | 0 | vacrel->lpdead_item_pages++; |
2415 | |
|
2416 | 0 | dead_items_add(vacrel, blkno, deadoffsets, lpdead_items); |
2417 | |
|
2418 | 0 | vacrel->lpdead_items += lpdead_items; |
2419 | 0 | } |
2420 | | |
2421 | | /* |
2422 | | * Finally, add relevant page-local counts to whole-VACUUM counts |
2423 | | */ |
2424 | 0 | vacrel->live_tuples += live_tuples; |
2425 | 0 | vacrel->recently_dead_tuples += recently_dead_tuples; |
2426 | 0 | vacrel->missed_dead_tuples += missed_dead_tuples; |
2427 | 0 | if (missed_dead_tuples > 0) |
2428 | 0 | vacrel->missed_dead_pages++; |
2429 | | |
2430 | | /* Can't truncate this page */ |
2431 | 0 | if (hastup) |
2432 | 0 | vacrel->nonempty_pages = blkno + 1; |
2433 | | |
2434 | | /* Did we find LP_DEAD items? */ |
2435 | 0 | *has_lpdead_items = (lpdead_items > 0); |
2436 | | |
2437 | | /* Caller won't need to call lazy_scan_prune with same page */ |
2438 | 0 | return true; |
2439 | 0 | } |
2440 | | |
2441 | | /* |
2442 | | * Main entry point for index vacuuming and heap vacuuming. |
2443 | | * |
2444 | | * Removes items collected in dead_items from table's indexes, then marks the |
2445 | | * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap |
2446 | | * for full details. |
2447 | | * |
2448 | | * Also empties dead_items, freeing up space for later TIDs. |
2449 | | * |
2450 | | * We may choose to bypass index vacuuming at this point, though only when the |
2451 | | * ongoing VACUUM operation will definitely only have one index scan/round of |
2452 | | * index vacuuming. |
2453 | | */ |
2454 | | static void |
2455 | | lazy_vacuum(LVRelState *vacrel) |
2456 | 0 | { |
2457 | 0 | bool bypass; |
2458 | | |
2459 | | /* Should not end up here with no indexes */ |
2460 | 0 | Assert(vacrel->nindexes > 0); |
2461 | 0 | Assert(vacrel->lpdead_item_pages > 0); |
2462 | |
|
2463 | 0 | if (!vacrel->do_index_vacuuming) |
2464 | 0 | { |
2465 | 0 | Assert(!vacrel->do_index_cleanup); |
2466 | 0 | dead_items_reset(vacrel); |
2467 | 0 | return; |
2468 | 0 | } |
2469 | | |
2470 | | /* |
2471 | | * Consider bypassing index vacuuming (and heap vacuuming) entirely. |
2472 | | * |
2473 | | * We currently only do this in cases where the number of LP_DEAD items |
2474 | | * for the entire VACUUM operation is close to zero. This avoids sharp |
2475 | | * discontinuities in the duration and overhead of successive VACUUM |
2476 | | * operations that run against the same table with a fixed workload. |
2477 | | * Ideally, successive VACUUM operations will behave as if there are |
2478 | | * exactly zero LP_DEAD items in cases where there are close to zero. |
2479 | | * |
2480 | | * This is likely to be helpful with a table that is continually affected |
2481 | | * by UPDATEs that can mostly apply the HOT optimization, but occasionally |
2482 | | * have small aberrations that lead to just a few heap pages retaining |
2483 | | * only one or two LP_DEAD items. This is pretty common; even when the |
2484 | | * DBA goes out of their way to make UPDATEs use HOT, it is practically |
2485 | | * impossible to predict whether HOT will be applied in 100% of cases. |
2486 | | * It's far easier to ensure that 99%+ of all UPDATEs against a table use |
2487 | | * HOT through careful tuning. |
2488 | | */ |
2489 | 0 | bypass = false; |
2490 | 0 | if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0) |
2491 | 0 | { |
2492 | 0 | BlockNumber threshold; |
2493 | |
|
2494 | 0 | Assert(vacrel->num_index_scans == 0); |
2495 | 0 | Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items); |
2496 | 0 | Assert(vacrel->do_index_vacuuming); |
2497 | 0 | Assert(vacrel->do_index_cleanup); |
2498 | | |
2499 | | /* |
2500 | | * This crossover point at which we'll start to do index vacuuming is |
2501 | | * expressed as a percentage of the total number of heap pages in the |
2502 | | * table that are known to have at least one LP_DEAD item. This is |
2503 | | * much more important than the total number of LP_DEAD items, since |
2504 | | * it's a proxy for the number of heap pages whose visibility map bits |
2505 | | * cannot be set on account of bypassing index and heap vacuuming. |
2506 | | * |
2507 | | * We apply one further precautionary test: the space currently used |
2508 | | * to store the TIDs (TIDs that now all point to LP_DEAD items) must |
2509 | | * not exceed 32MB. This limits the risk that we will bypass index |
2510 | | * vacuuming again and again until eventually there is a VACUUM whose |
2511 | | * dead_items space is not CPU cache resident. |
2512 | | * |
2513 | | * We don't take any special steps to remember the LP_DEAD items (such |
2514 | | * as counting them in our final update to the stats system) when the |
2515 | | * optimization is applied. Though the accounting used in analyze.c's |
2516 | | * acquire_sample_rows() will recognize the same LP_DEAD items as dead |
2517 | | * rows in its own stats report, that's okay. The discrepancy should |
2518 | | * be negligible. If this optimization is ever expanded to cover more |
2519 | | * cases then this may need to be reconsidered. |
2520 | | */ |
2521 | 0 | threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES; |
2522 | 0 | bypass = (vacrel->lpdead_item_pages < threshold && |
2523 | 0 | TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024); |
2524 | 0 | } |
2525 | |
|
2526 | 0 | if (bypass) |
2527 | 0 | { |
2528 | | /* |
2529 | | * There are almost zero TIDs. Behave as if there were precisely |
2530 | | * zero: bypass index vacuuming, but do index cleanup. |
2531 | | * |
2532 | | * We expect that the ongoing VACUUM operation will finish very |
2533 | | * quickly, so there is no point in considering speeding up as a |
2534 | | * failsafe against wraparound failure. (Index cleanup is expected to |
2535 | | * finish very quickly in cases where there were no ambulkdelete() |
2536 | | * calls.) |
2537 | | */ |
2538 | 0 | vacrel->do_index_vacuuming = false; |
2539 | 0 | } |
2540 | 0 | else if (lazy_vacuum_all_indexes(vacrel)) |
2541 | 0 | { |
2542 | | /* |
2543 | | * We successfully completed a round of index vacuuming. Do related |
2544 | | * heap vacuuming now. |
2545 | | */ |
2546 | 0 | lazy_vacuum_heap_rel(vacrel); |
2547 | 0 | } |
2548 | 0 | else |
2549 | 0 | { |
2550 | | /* |
2551 | | * Failsafe case. |
2552 | | * |
2553 | | * We attempted index vacuuming, but didn't finish a full round/full |
2554 | | * index scan. This happens when relfrozenxid or relminmxid is too |
2555 | | * far in the past. |
2556 | | * |
2557 | | * From this point on the VACUUM operation will do no further index |
2558 | | * vacuuming or heap vacuuming. This VACUUM operation won't end up |
2559 | | * back here again. |
2560 | | */ |
2561 | 0 | Assert(VacuumFailsafeActive); |
2562 | 0 | } |
2563 | | |
2564 | | /* |
2565 | | * Forget the LP_DEAD items that we just vacuumed (or just decided to not |
2566 | | * vacuum) |
2567 | | */ |
2568 | 0 | dead_items_reset(vacrel); |
2569 | 0 | } |
2570 | | |
2571 | | /* |
2572 | | * lazy_vacuum_all_indexes() -- Main entry for index vacuuming |
2573 | | * |
2574 | | * Returns true in the common case when all indexes were successfully |
2575 | | * vacuumed. Returns false in rare cases where we determined that the ongoing |
2576 | | * VACUUM operation is at risk of taking too long to finish, leading to |
2577 | | * wraparound failure. |
2578 | | */ |
2579 | | static bool |
2580 | | lazy_vacuum_all_indexes(LVRelState *vacrel) |
2581 | 0 | { |
2582 | 0 | bool allindexes = true; |
2583 | 0 | double old_live_tuples = vacrel->rel->rd_rel->reltuples; |
2584 | 0 | const int progress_start_index[] = { |
2585 | 0 | PROGRESS_VACUUM_PHASE, |
2586 | 0 | PROGRESS_VACUUM_INDEXES_TOTAL |
2587 | 0 | }; |
2588 | 0 | const int progress_end_index[] = { |
2589 | 0 | PROGRESS_VACUUM_INDEXES_TOTAL, |
2590 | 0 | PROGRESS_VACUUM_INDEXES_PROCESSED, |
2591 | 0 | PROGRESS_VACUUM_NUM_INDEX_VACUUMS |
2592 | 0 | }; |
2593 | 0 | int64 progress_start_val[2]; |
2594 | 0 | int64 progress_end_val[3]; |
2595 | |
|
2596 | 0 | Assert(vacrel->nindexes > 0); |
2597 | 0 | Assert(vacrel->do_index_vacuuming); |
2598 | 0 | Assert(vacrel->do_index_cleanup); |
2599 | | |
2600 | | /* Precheck for XID wraparound emergencies */ |
2601 | 0 | if (lazy_check_wraparound_failsafe(vacrel)) |
2602 | 0 | { |
2603 | | /* Wraparound emergency -- don't even start an index scan */ |
2604 | 0 | return false; |
2605 | 0 | } |
2606 | | |
2607 | | /* |
2608 | | * Report that we are now vacuuming indexes and the number of indexes to |
2609 | | * vacuum. |
2610 | | */ |
2611 | 0 | progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX; |
2612 | 0 | progress_start_val[1] = vacrel->nindexes; |
2613 | 0 | pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val); |
2614 | |
|
2615 | 0 | if (!ParallelVacuumIsActive(vacrel)) |
2616 | 0 | { |
2617 | 0 | for (int idx = 0; idx < vacrel->nindexes; idx++) |
2618 | 0 | { |
2619 | 0 | Relation indrel = vacrel->indrels[idx]; |
2620 | 0 | IndexBulkDeleteResult *istat = vacrel->indstats[idx]; |
2621 | |
|
2622 | 0 | vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat, |
2623 | 0 | old_live_tuples, |
2624 | 0 | vacrel); |
2625 | | |
2626 | | /* Report the number of indexes vacuumed */ |
2627 | 0 | pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED, |
2628 | 0 | idx + 1); |
2629 | |
|
2630 | 0 | if (lazy_check_wraparound_failsafe(vacrel)) |
2631 | 0 | { |
2632 | | /* Wraparound emergency -- end current index scan */ |
2633 | 0 | allindexes = false; |
2634 | 0 | break; |
2635 | 0 | } |
2636 | 0 | } |
2637 | 0 | } |
2638 | 0 | else |
2639 | 0 | { |
2640 | | /* Outsource everything to parallel variant */ |
2641 | 0 | parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples, |
2642 | 0 | vacrel->num_index_scans); |
2643 | | |
2644 | | /* |
2645 | | * Do a postcheck to consider applying wraparound failsafe now. Note |
2646 | | * that parallel VACUUM only gets the precheck and this postcheck. |
2647 | | */ |
2648 | 0 | if (lazy_check_wraparound_failsafe(vacrel)) |
2649 | 0 | allindexes = false; |
2650 | 0 | } |
2651 | | |
2652 | | /* |
2653 | | * We delete all LP_DEAD items from the first heap pass in all indexes on |
2654 | | * each call here (except calls where we choose to do the failsafe). This |
2655 | | * makes the next call to lazy_vacuum_heap_rel() safe (except in the event |
2656 | | * of the failsafe triggering, which prevents the next call from taking |
2657 | | * place). |
2658 | | */ |
2659 | 0 | Assert(vacrel->num_index_scans > 0 || |
2660 | 0 | vacrel->dead_items_info->num_items == vacrel->lpdead_items); |
2661 | 0 | Assert(allindexes || VacuumFailsafeActive); |
2662 | | |
2663 | | /* |
2664 | | * Increase and report the number of index scans. Also, we reset |
2665 | | * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED. |
2666 | | * |
2667 | | * We deliberately include the case where we started a round of bulk |
2668 | | * deletes that we weren't able to finish due to the failsafe triggering. |
2669 | | */ |
2670 | 0 | vacrel->num_index_scans++; |
2671 | 0 | progress_end_val[0] = 0; |
2672 | 0 | progress_end_val[1] = 0; |
2673 | 0 | progress_end_val[2] = vacrel->num_index_scans; |
2674 | 0 | pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val); |
2675 | |
|
2676 | 0 | return allindexes; |
2677 | 0 | } |
2678 | | |
2679 | | /* |
2680 | | * Read stream callback for vacuum's third phase (second pass over the heap). |
2681 | | * Gets the next block from the TID store and returns it or InvalidBlockNumber |
2682 | | * if there are no further blocks to vacuum. |
2683 | | * |
2684 | | * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING. |
2685 | | */ |
2686 | | static BlockNumber |
2687 | | vacuum_reap_lp_read_stream_next(ReadStream *stream, |
2688 | | void *callback_private_data, |
2689 | | void *per_buffer_data) |
2690 | 0 | { |
2691 | 0 | TidStoreIter *iter = callback_private_data; |
2692 | 0 | TidStoreIterResult *iter_result; |
2693 | |
|
2694 | 0 | iter_result = TidStoreIterateNext(iter); |
2695 | 0 | if (iter_result == NULL) |
2696 | 0 | return InvalidBlockNumber; |
2697 | | |
2698 | | /* |
2699 | | * Save the TidStoreIterResult for later, so we can extract the offsets. |
2700 | | * It is safe to copy the result, according to TidStoreIterateNext(). |
2701 | | */ |
2702 | 0 | memcpy(per_buffer_data, iter_result, sizeof(*iter_result)); |
2703 | |
|
2704 | 0 | return iter_result->blkno; |
2705 | 0 | } |
2706 | | |
2707 | | /* |
2708 | | * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy |
2709 | | * |
2710 | | * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages |
2711 | | * that never had lazy_scan_prune record LP_DEAD items are not visited at all. |
2712 | | * |
2713 | | * We may also be able to truncate the line pointer array of the heap pages we |
2714 | | * visit. If there is a contiguous group of LP_UNUSED items at the end of the |
2715 | | * array, it can be reclaimed as free space. These LP_UNUSED items usually |
2716 | | * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from |
2717 | | * each page to LP_UNUSED, and then consider if it's possible to truncate the |
2718 | | * page's line pointer array). |
2719 | | * |
2720 | | * Note: the reason for doing this as a second pass is we cannot remove the |
2721 | | * tuples until we've removed their index entries, and we want to process |
2722 | | * index entry removal in batches as large as possible. |
2723 | | */ |
2724 | | static void |
2725 | | lazy_vacuum_heap_rel(LVRelState *vacrel) |
2726 | 0 | { |
2727 | 0 | ReadStream *stream; |
2728 | 0 | BlockNumber vacuumed_pages = 0; |
2729 | 0 | Buffer vmbuffer = InvalidBuffer; |
2730 | 0 | LVSavedErrInfo saved_err_info; |
2731 | 0 | TidStoreIter *iter; |
2732 | |
|
2733 | 0 | Assert(vacrel->do_index_vacuuming); |
2734 | 0 | Assert(vacrel->do_index_cleanup); |
2735 | 0 | Assert(vacrel->num_index_scans > 0); |
2736 | | |
2737 | | /* Report that we are now vacuuming the heap */ |
2738 | 0 | pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, |
2739 | 0 | PROGRESS_VACUUM_PHASE_VACUUM_HEAP); |
2740 | | |
2741 | | /* Update error traceback information */ |
2742 | 0 | update_vacuum_error_info(vacrel, &saved_err_info, |
2743 | 0 | VACUUM_ERRCB_PHASE_VACUUM_HEAP, |
2744 | 0 | InvalidBlockNumber, InvalidOffsetNumber); |
2745 | |
|
2746 | 0 | iter = TidStoreBeginIterate(vacrel->dead_items); |
2747 | | |
2748 | | /* |
2749 | | * Set up the read stream for vacuum's second pass through the heap. |
2750 | | * |
2751 | | * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does |
2752 | | * not need to wait for IO and does not perform locking. Once we support |
2753 | | * parallelism it should still be fine, as presumably the holder of locks |
2754 | | * would never be blocked by IO while holding the lock. |
2755 | | */ |
2756 | 0 | stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE | |
2757 | 0 | READ_STREAM_USE_BATCHING, |
2758 | 0 | vacrel->bstrategy, |
2759 | 0 | vacrel->rel, |
2760 | 0 | MAIN_FORKNUM, |
2761 | 0 | vacuum_reap_lp_read_stream_next, |
2762 | 0 | iter, |
2763 | 0 | sizeof(TidStoreIterResult)); |
2764 | |
|
2765 | 0 | while (true) |
2766 | 0 | { |
2767 | 0 | BlockNumber blkno; |
2768 | 0 | Buffer buf; |
2769 | 0 | Page page; |
2770 | 0 | TidStoreIterResult *iter_result; |
2771 | 0 | Size freespace; |
2772 | 0 | OffsetNumber offsets[MaxOffsetNumber]; |
2773 | 0 | int num_offsets; |
2774 | |
|
2775 | 0 | vacuum_delay_point(false); |
2776 | |
|
2777 | 0 | buf = read_stream_next_buffer(stream, (void **) &iter_result); |
2778 | | |
2779 | | /* The relation is exhausted */ |
2780 | 0 | if (!BufferIsValid(buf)) |
2781 | 0 | break; |
2782 | | |
2783 | 0 | vacrel->blkno = blkno = BufferGetBlockNumber(buf); |
2784 | |
|
2785 | 0 | Assert(iter_result); |
2786 | 0 | num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets)); |
2787 | 0 | Assert(num_offsets <= lengthof(offsets)); |
2788 | | |
2789 | | /* |
2790 | | * Pin the visibility map page in case we need to mark the page |
2791 | | * all-visible. In most cases this will be very cheap, because we'll |
2792 | | * already have the correct page pinned anyway. |
2793 | | */ |
2794 | 0 | visibilitymap_pin(vacrel->rel, blkno, &vmbuffer); |
2795 | | |
2796 | | /* We need a non-cleanup exclusive lock to mark dead_items unused */ |
2797 | 0 | LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); |
2798 | 0 | lazy_vacuum_heap_page(vacrel, blkno, buf, offsets, |
2799 | 0 | num_offsets, vmbuffer); |
2800 | | |
2801 | | /* Now that we've vacuumed the page, record its available space */ |
2802 | 0 | page = BufferGetPage(buf); |
2803 | 0 | freespace = PageGetHeapFreeSpace(page); |
2804 | |
|
2805 | 0 | UnlockReleaseBuffer(buf); |
2806 | 0 | RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); |
2807 | 0 | vacuumed_pages++; |
2808 | 0 | } |
2809 | |
|
2810 | 0 | read_stream_end(stream); |
2811 | 0 | TidStoreEndIterate(iter); |
2812 | |
|
2813 | 0 | vacrel->blkno = InvalidBlockNumber; |
2814 | 0 | if (BufferIsValid(vmbuffer)) |
2815 | 0 | ReleaseBuffer(vmbuffer); |
2816 | | |
2817 | | /* |
2818 | | * We set all LP_DEAD items from the first heap pass to LP_UNUSED during |
2819 | | * the second heap pass. No more, no less. |
2820 | | */ |
2821 | 0 | Assert(vacrel->num_index_scans > 1 || |
2822 | 0 | (vacrel->dead_items_info->num_items == vacrel->lpdead_items && |
2823 | 0 | vacuumed_pages == vacrel->lpdead_item_pages)); |
2824 | |
|
2825 | 0 | ereport(DEBUG2, |
2826 | 0 | (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages", |
2827 | 0 | vacrel->relname, vacrel->dead_items_info->num_items, |
2828 | 0 | vacuumed_pages))); |
2829 | | |
2830 | | /* Revert to the previous phase information for error traceback */ |
2831 | 0 | restore_vacuum_error_info(vacrel, &saved_err_info); |
2832 | 0 | } |
2833 | | |
2834 | | /* |
2835 | | * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the |
2836 | | * vacrel->dead_items store. |
2837 | | * |
2838 | | * Caller must have an exclusive buffer lock on the buffer (though a full |
2839 | | * cleanup lock is also acceptable). vmbuffer must be valid and already have |
2840 | | * a pin on blkno's visibility map page. |
2841 | | */ |
2842 | | static void |
2843 | | lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, |
2844 | | OffsetNumber *deadoffsets, int num_offsets, |
2845 | | Buffer vmbuffer) |
2846 | 0 | { |
2847 | 0 | Page page = BufferGetPage(buffer); |
2848 | 0 | OffsetNumber unused[MaxHeapTuplesPerPage]; |
2849 | 0 | int nunused = 0; |
2850 | 0 | TransactionId visibility_cutoff_xid; |
2851 | 0 | bool all_frozen; |
2852 | 0 | LVSavedErrInfo saved_err_info; |
2853 | |
|
2854 | 0 | Assert(vacrel->do_index_vacuuming); |
2855 | |
|
2856 | 0 | pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno); |
2857 | | |
2858 | | /* Update error traceback information */ |
2859 | 0 | update_vacuum_error_info(vacrel, &saved_err_info, |
2860 | 0 | VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno, |
2861 | 0 | InvalidOffsetNumber); |
2862 | |
|
2863 | 0 | START_CRIT_SECTION(); |
2864 | |
|
2865 | 0 | for (int i = 0; i < num_offsets; i++) |
2866 | 0 | { |
2867 | 0 | ItemId itemid; |
2868 | 0 | OffsetNumber toff = deadoffsets[i]; |
2869 | |
|
2870 | 0 | itemid = PageGetItemId(page, toff); |
2871 | |
|
2872 | 0 | Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid)); |
2873 | 0 | ItemIdSetUnused(itemid); |
2874 | 0 | unused[nunused++] = toff; |
2875 | 0 | } |
2876 | |
|
2877 | 0 | Assert(nunused > 0); |
2878 | | |
2879 | | /* Attempt to truncate line pointer array now */ |
2880 | 0 | PageTruncateLinePointerArray(page); |
2881 | | |
2882 | | /* |
2883 | | * Mark buffer dirty before we write WAL. |
2884 | | */ |
2885 | 0 | MarkBufferDirty(buffer); |
2886 | | |
2887 | | /* XLOG stuff */ |
2888 | 0 | if (RelationNeedsWAL(vacrel->rel)) |
2889 | 0 | { |
2890 | 0 | log_heap_prune_and_freeze(vacrel->rel, buffer, |
2891 | 0 | InvalidTransactionId, |
2892 | 0 | false, /* no cleanup lock required */ |
2893 | 0 | PRUNE_VACUUM_CLEANUP, |
2894 | 0 | NULL, 0, /* frozen */ |
2895 | 0 | NULL, 0, /* redirected */ |
2896 | 0 | NULL, 0, /* dead */ |
2897 | 0 | unused, nunused); |
2898 | 0 | } |
2899 | | |
2900 | | /* |
2901 | | * End critical section, so we safely can do visibility tests (which |
2902 | | * possibly need to perform IO and allocate memory!). If we crash now the |
2903 | | * page (including the corresponding vm bit) might not be marked all |
2904 | | * visible, but that's fine. A later vacuum will fix that. |
2905 | | */ |
2906 | 0 | END_CRIT_SECTION(); |
2907 | | |
2908 | | /* |
2909 | | * Now that we have removed the LP_DEAD items from the page, once again |
2910 | | * check if the page has become all-visible. The page is already marked |
2911 | | * dirty, exclusively locked, and, if needed, a full page image has been |
2912 | | * emitted. |
2913 | | */ |
2914 | 0 | Assert(!PageIsAllVisible(page)); |
2915 | 0 | if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid, |
2916 | 0 | &all_frozen)) |
2917 | 0 | { |
2918 | 0 | uint8 old_vmbits; |
2919 | 0 | uint8 flags = VISIBILITYMAP_ALL_VISIBLE; |
2920 | |
|
2921 | 0 | if (all_frozen) |
2922 | 0 | { |
2923 | 0 | Assert(!TransactionIdIsValid(visibility_cutoff_xid)); |
2924 | 0 | flags |= VISIBILITYMAP_ALL_FROZEN; |
2925 | 0 | } |
2926 | |
|
2927 | 0 | PageSetAllVisible(page); |
2928 | 0 | old_vmbits = visibilitymap_set(vacrel->rel, blkno, buffer, |
2929 | 0 | InvalidXLogRecPtr, |
2930 | 0 | vmbuffer, visibility_cutoff_xid, |
2931 | 0 | flags); |
2932 | | |
2933 | | /* |
2934 | | * If the page wasn't already set all-visible and/or all-frozen in the |
2935 | | * VM, count it as newly set for logging. |
2936 | | */ |
2937 | 0 | if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) |
2938 | 0 | { |
2939 | 0 | vacrel->vm_new_visible_pages++; |
2940 | 0 | if (all_frozen) |
2941 | 0 | vacrel->vm_new_visible_frozen_pages++; |
2942 | 0 | } |
2943 | | |
2944 | 0 | else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 && |
2945 | 0 | all_frozen) |
2946 | 0 | vacrel->vm_new_frozen_pages++; |
2947 | 0 | } |
2948 | | |
2949 | | /* Revert to the previous phase information for error traceback */ |
2950 | 0 | restore_vacuum_error_info(vacrel, &saved_err_info); |
2951 | 0 | } |
2952 | | |
2953 | | /* |
2954 | | * Trigger the failsafe to avoid wraparound failure when vacrel table has a |
2955 | | * relfrozenxid and/or relminmxid that is dangerously far in the past. |
2956 | | * Triggering the failsafe makes the ongoing VACUUM bypass any further index |
2957 | | * vacuuming and heap vacuuming. Truncating the heap is also bypassed. |
2958 | | * |
2959 | | * Any remaining work (work that VACUUM cannot just bypass) is typically sped |
2960 | | * up when the failsafe triggers. VACUUM stops applying any cost-based delay |
2961 | | * that it started out with. |
2962 | | * |
2963 | | * Returns true when failsafe has been triggered. |
2964 | | */ |
2965 | | static bool |
2966 | | lazy_check_wraparound_failsafe(LVRelState *vacrel) |
2967 | 0 | { |
2968 | | /* Don't warn more than once per VACUUM */ |
2969 | 0 | if (VacuumFailsafeActive) |
2970 | 0 | return true; |
2971 | | |
2972 | 0 | if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs))) |
2973 | 0 | { |
2974 | 0 | const int progress_index[] = { |
2975 | 0 | PROGRESS_VACUUM_INDEXES_TOTAL, |
2976 | 0 | PROGRESS_VACUUM_INDEXES_PROCESSED |
2977 | 0 | }; |
2978 | 0 | int64 progress_val[2] = {0, 0}; |
2979 | |
|
2980 | 0 | VacuumFailsafeActive = true; |
2981 | | |
2982 | | /* |
2983 | | * Abandon use of a buffer access strategy to allow use of all of |
2984 | | * shared buffers. We assume the caller who allocated the memory for |
2985 | | * the BufferAccessStrategy will free it. |
2986 | | */ |
2987 | 0 | vacrel->bstrategy = NULL; |
2988 | | |
2989 | | /* Disable index vacuuming, index cleanup, and heap rel truncation */ |
2990 | 0 | vacrel->do_index_vacuuming = false; |
2991 | 0 | vacrel->do_index_cleanup = false; |
2992 | 0 | vacrel->do_rel_truncate = false; |
2993 | | |
2994 | | /* Reset the progress counters */ |
2995 | 0 | pgstat_progress_update_multi_param(2, progress_index, progress_val); |
2996 | |
|
2997 | 0 | ereport(WARNING, |
2998 | 0 | (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans", |
2999 | 0 | vacrel->dbname, vacrel->relnamespace, vacrel->relname, |
3000 | 0 | vacrel->num_index_scans), |
3001 | 0 | errdetail("The table's relfrozenxid or relminmxid is too far in the past."), |
3002 | 0 | errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n" |
3003 | 0 | "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs."))); |
3004 | | |
3005 | | /* Stop applying cost limits from this point on */ |
3006 | 0 | VacuumCostActive = false; |
3007 | 0 | VacuumCostBalance = 0; |
3008 | |
|
3009 | 0 | return true; |
3010 | 0 | } |
3011 | | |
3012 | 0 | return false; |
3013 | 0 | } |
3014 | | |
3015 | | /* |
3016 | | * lazy_cleanup_all_indexes() -- cleanup all indexes of relation. |
3017 | | */ |
3018 | | static void |
3019 | | lazy_cleanup_all_indexes(LVRelState *vacrel) |
3020 | 0 | { |
3021 | 0 | double reltuples = vacrel->new_rel_tuples; |
3022 | 0 | bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages; |
3023 | 0 | const int progress_start_index[] = { |
3024 | 0 | PROGRESS_VACUUM_PHASE, |
3025 | 0 | PROGRESS_VACUUM_INDEXES_TOTAL |
3026 | 0 | }; |
3027 | 0 | const int progress_end_index[] = { |
3028 | 0 | PROGRESS_VACUUM_INDEXES_TOTAL, |
3029 | 0 | PROGRESS_VACUUM_INDEXES_PROCESSED |
3030 | 0 | }; |
3031 | 0 | int64 progress_start_val[2]; |
3032 | 0 | int64 progress_end_val[2] = {0, 0}; |
3033 | |
|
3034 | 0 | Assert(vacrel->do_index_cleanup); |
3035 | 0 | Assert(vacrel->nindexes > 0); |
3036 | | |
3037 | | /* |
3038 | | * Report that we are now cleaning up indexes and the number of indexes to |
3039 | | * cleanup. |
3040 | | */ |
3041 | 0 | progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP; |
3042 | 0 | progress_start_val[1] = vacrel->nindexes; |
3043 | 0 | pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val); |
3044 | |
|
3045 | 0 | if (!ParallelVacuumIsActive(vacrel)) |
3046 | 0 | { |
3047 | 0 | for (int idx = 0; idx < vacrel->nindexes; idx++) |
3048 | 0 | { |
3049 | 0 | Relation indrel = vacrel->indrels[idx]; |
3050 | 0 | IndexBulkDeleteResult *istat = vacrel->indstats[idx]; |
3051 | |
|
3052 | 0 | vacrel->indstats[idx] = |
3053 | 0 | lazy_cleanup_one_index(indrel, istat, reltuples, |
3054 | 0 | estimated_count, vacrel); |
3055 | | |
3056 | | /* Report the number of indexes cleaned up */ |
3057 | 0 | pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED, |
3058 | 0 | idx + 1); |
3059 | 0 | } |
3060 | 0 | } |
3061 | 0 | else |
3062 | 0 | { |
3063 | | /* Outsource everything to parallel variant */ |
3064 | 0 | parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples, |
3065 | 0 | vacrel->num_index_scans, |
3066 | 0 | estimated_count); |
3067 | 0 | } |
3068 | | |
3069 | | /* Reset the progress counters */ |
3070 | 0 | pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val); |
3071 | 0 | } |
3072 | | |
3073 | | /* |
3074 | | * lazy_vacuum_one_index() -- vacuum index relation. |
3075 | | * |
3076 | | * Delete all the index tuples containing a TID collected in |
3077 | | * vacrel->dead_items. Also update running statistics. Exact |
3078 | | * details depend on index AM's ambulkdelete routine. |
3079 | | * |
3080 | | * reltuples is the number of heap tuples to be passed to the |
3081 | | * bulkdelete callback. It's always assumed to be estimated. |
3082 | | * See indexam.sgml for more info. |
3083 | | * |
3084 | | * Returns bulk delete stats derived from input stats |
3085 | | */ |
3086 | | static IndexBulkDeleteResult * |
3087 | | lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, |
3088 | | double reltuples, LVRelState *vacrel) |
3089 | 0 | { |
3090 | 0 | IndexVacuumInfo ivinfo; |
3091 | 0 | LVSavedErrInfo saved_err_info; |
3092 | |
|
3093 | 0 | ivinfo.index = indrel; |
3094 | 0 | ivinfo.heaprel = vacrel->rel; |
3095 | 0 | ivinfo.analyze_only = false; |
3096 | 0 | ivinfo.report_progress = false; |
3097 | 0 | ivinfo.estimated_count = true; |
3098 | 0 | ivinfo.message_level = DEBUG2; |
3099 | 0 | ivinfo.num_heap_tuples = reltuples; |
3100 | 0 | ivinfo.strategy = vacrel->bstrategy; |
3101 | | |
3102 | | /* |
3103 | | * Update error traceback information. |
3104 | | * |
3105 | | * The index name is saved during this phase and restored immediately |
3106 | | * after this phase. See vacuum_error_callback. |
3107 | | */ |
3108 | 0 | Assert(vacrel->indname == NULL); |
3109 | 0 | vacrel->indname = pstrdup(RelationGetRelationName(indrel)); |
3110 | 0 | update_vacuum_error_info(vacrel, &saved_err_info, |
3111 | 0 | VACUUM_ERRCB_PHASE_VACUUM_INDEX, |
3112 | 0 | InvalidBlockNumber, InvalidOffsetNumber); |
3113 | | |
3114 | | /* Do bulk deletion */ |
3115 | 0 | istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items, |
3116 | 0 | vacrel->dead_items_info); |
3117 | | |
3118 | | /* Revert to the previous phase information for error traceback */ |
3119 | 0 | restore_vacuum_error_info(vacrel, &saved_err_info); |
3120 | 0 | pfree(vacrel->indname); |
3121 | 0 | vacrel->indname = NULL; |
3122 | |
|
3123 | 0 | return istat; |
3124 | 0 | } |
3125 | | |
3126 | | /* |
3127 | | * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation. |
3128 | | * |
3129 | | * Calls index AM's amvacuumcleanup routine. reltuples is the number |
3130 | | * of heap tuples and estimated_count is true if reltuples is an |
3131 | | * estimated value. See indexam.sgml for more info. |
3132 | | * |
3133 | | * Returns bulk delete stats derived from input stats |
3134 | | */ |
3135 | | static IndexBulkDeleteResult * |
3136 | | lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, |
3137 | | double reltuples, bool estimated_count, |
3138 | | LVRelState *vacrel) |
3139 | 0 | { |
3140 | 0 | IndexVacuumInfo ivinfo; |
3141 | 0 | LVSavedErrInfo saved_err_info; |
3142 | |
|
3143 | 0 | ivinfo.index = indrel; |
3144 | 0 | ivinfo.heaprel = vacrel->rel; |
3145 | 0 | ivinfo.analyze_only = false; |
3146 | 0 | ivinfo.report_progress = false; |
3147 | 0 | ivinfo.estimated_count = estimated_count; |
3148 | 0 | ivinfo.message_level = DEBUG2; |
3149 | |
|
3150 | 0 | ivinfo.num_heap_tuples = reltuples; |
3151 | 0 | ivinfo.strategy = vacrel->bstrategy; |
3152 | | |
3153 | | /* |
3154 | | * Update error traceback information. |
3155 | | * |
3156 | | * The index name is saved during this phase and restored immediately |
3157 | | * after this phase. See vacuum_error_callback. |
3158 | | */ |
3159 | 0 | Assert(vacrel->indname == NULL); |
3160 | 0 | vacrel->indname = pstrdup(RelationGetRelationName(indrel)); |
3161 | 0 | update_vacuum_error_info(vacrel, &saved_err_info, |
3162 | 0 | VACUUM_ERRCB_PHASE_INDEX_CLEANUP, |
3163 | 0 | InvalidBlockNumber, InvalidOffsetNumber); |
3164 | |
|
3165 | 0 | istat = vac_cleanup_one_index(&ivinfo, istat); |
3166 | | |
3167 | | /* Revert to the previous phase information for error traceback */ |
3168 | 0 | restore_vacuum_error_info(vacrel, &saved_err_info); |
3169 | 0 | pfree(vacrel->indname); |
3170 | 0 | vacrel->indname = NULL; |
3171 | |
|
3172 | 0 | return istat; |
3173 | 0 | } |
3174 | | |
3175 | | /* |
3176 | | * should_attempt_truncation - should we attempt to truncate the heap? |
3177 | | * |
3178 | | * Don't even think about it unless we have a shot at releasing a goodly |
3179 | | * number of pages. Otherwise, the time taken isn't worth it, mainly because |
3180 | | * an AccessExclusive lock must be replayed on any hot standby, where it can |
3181 | | * be particularly disruptive. |
3182 | | * |
3183 | | * Also don't attempt it if wraparound failsafe is in effect. The entire |
3184 | | * system might be refusing to allocate new XIDs at this point. The system |
3185 | | * definitely won't return to normal unless and until VACUUM actually advances |
3186 | | * the oldest relfrozenxid -- which hasn't happened for target rel just yet. |
3187 | | * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to |
3188 | | * truncate the table under these circumstances, an XID exhaustion error might |
3189 | | * make it impossible for VACUUM to fix the underlying XID exhaustion problem. |
3190 | | * There is very little chance of truncation working out when the failsafe is |
3191 | | * in effect in any case. lazy_scan_prune makes the optimistic assumption |
3192 | | * that any LP_DEAD items it encounters will always be LP_UNUSED by the time |
3193 | | * we're called. |
3194 | | */ |
3195 | | static bool |
3196 | | should_attempt_truncation(LVRelState *vacrel) |
3197 | 0 | { |
3198 | 0 | BlockNumber possibly_freeable; |
3199 | |
|
3200 | 0 | if (!vacrel->do_rel_truncate || VacuumFailsafeActive) |
3201 | 0 | return false; |
3202 | | |
3203 | 0 | possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages; |
3204 | 0 | if (possibly_freeable > 0 && |
3205 | 0 | (possibly_freeable >= REL_TRUNCATE_MINIMUM || |
3206 | 0 | possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION)) |
3207 | 0 | return true; |
3208 | | |
3209 | 0 | return false; |
3210 | 0 | } |
3211 | | |
3212 | | /* |
3213 | | * lazy_truncate_heap - try to truncate off any empty pages at the end |
3214 | | */ |
3215 | | static void |
3216 | | lazy_truncate_heap(LVRelState *vacrel) |
3217 | 0 | { |
3218 | 0 | BlockNumber orig_rel_pages = vacrel->rel_pages; |
3219 | 0 | BlockNumber new_rel_pages; |
3220 | 0 | bool lock_waiter_detected; |
3221 | 0 | int lock_retry; |
3222 | | |
3223 | | /* Report that we are now truncating */ |
3224 | 0 | pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, |
3225 | 0 | PROGRESS_VACUUM_PHASE_TRUNCATE); |
3226 | | |
3227 | | /* Update error traceback information one last time */ |
3228 | 0 | update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE, |
3229 | 0 | vacrel->nonempty_pages, InvalidOffsetNumber); |
3230 | | |
3231 | | /* |
3232 | | * Loop until no more truncating can be done. |
3233 | | */ |
3234 | 0 | do |
3235 | 0 | { |
3236 | | /* |
3237 | | * We need full exclusive lock on the relation in order to do |
3238 | | * truncation. If we can't get it, give up rather than waiting --- we |
3239 | | * don't want to block other backends, and we don't want to deadlock |
3240 | | * (which is quite possible considering we already hold a lower-grade |
3241 | | * lock). |
3242 | | */ |
3243 | 0 | lock_waiter_detected = false; |
3244 | 0 | lock_retry = 0; |
3245 | 0 | while (true) |
3246 | 0 | { |
3247 | 0 | if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock)) |
3248 | 0 | break; |
3249 | | |
3250 | | /* |
3251 | | * Check for interrupts while trying to (re-)acquire the exclusive |
3252 | | * lock. |
3253 | | */ |
3254 | 0 | CHECK_FOR_INTERRUPTS(); |
3255 | |
|
3256 | 0 | if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT / |
3257 | 0 | VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) |
3258 | 0 | { |
3259 | | /* |
3260 | | * We failed to establish the lock in the specified number of |
3261 | | * retries. This means we give up truncating. |
3262 | | */ |
3263 | 0 | ereport(vacrel->verbose ? INFO : DEBUG2, |
3264 | 0 | (errmsg("\"%s\": stopping truncate due to conflicting lock request", |
3265 | 0 | vacrel->relname))); |
3266 | 0 | return; |
3267 | 0 | } |
3268 | | |
3269 | 0 | (void) WaitLatch(MyLatch, |
3270 | 0 | WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, |
3271 | 0 | VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL, |
3272 | 0 | WAIT_EVENT_VACUUM_TRUNCATE); |
3273 | 0 | ResetLatch(MyLatch); |
3274 | 0 | } |
3275 | | |
3276 | | /* |
3277 | | * Now that we have exclusive lock, look to see if the rel has grown |
3278 | | * whilst we were vacuuming with non-exclusive lock. If so, give up; |
3279 | | * the newly added pages presumably contain non-deletable tuples. |
3280 | | */ |
3281 | 0 | new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel); |
3282 | 0 | if (new_rel_pages != orig_rel_pages) |
3283 | 0 | { |
3284 | | /* |
3285 | | * Note: we intentionally don't update vacrel->rel_pages with the |
3286 | | * new rel size here. If we did, it would amount to assuming that |
3287 | | * the new pages are empty, which is unlikely. Leaving the numbers |
3288 | | * alone amounts to assuming that the new pages have the same |
3289 | | * tuple density as existing ones, which is less unlikely. |
3290 | | */ |
3291 | 0 | UnlockRelation(vacrel->rel, AccessExclusiveLock); |
3292 | 0 | return; |
3293 | 0 | } |
3294 | | |
3295 | | /* |
3296 | | * Scan backwards from the end to verify that the end pages actually |
3297 | | * contain no tuples. This is *necessary*, not optional, because |
3298 | | * other backends could have added tuples to these pages whilst we |
3299 | | * were vacuuming. |
3300 | | */ |
3301 | 0 | new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected); |
3302 | 0 | vacrel->blkno = new_rel_pages; |
3303 | |
|
3304 | 0 | if (new_rel_pages >= orig_rel_pages) |
3305 | 0 | { |
3306 | | /* can't do anything after all */ |
3307 | 0 | UnlockRelation(vacrel->rel, AccessExclusiveLock); |
3308 | 0 | return; |
3309 | 0 | } |
3310 | | |
3311 | | /* |
3312 | | * Okay to truncate. |
3313 | | */ |
3314 | 0 | RelationTruncate(vacrel->rel, new_rel_pages); |
3315 | | |
3316 | | /* |
3317 | | * We can release the exclusive lock as soon as we have truncated. |
3318 | | * Other backends can't safely access the relation until they have |
3319 | | * processed the smgr invalidation that smgrtruncate sent out ... but |
3320 | | * that should happen as part of standard invalidation processing once |
3321 | | * they acquire lock on the relation. |
3322 | | */ |
3323 | 0 | UnlockRelation(vacrel->rel, AccessExclusiveLock); |
3324 | | |
3325 | | /* |
3326 | | * Update statistics. Here, it *is* correct to adjust rel_pages |
3327 | | * without also touching reltuples, since the tuple count wasn't |
3328 | | * changed by the truncation. |
3329 | | */ |
3330 | 0 | vacrel->removed_pages += orig_rel_pages - new_rel_pages; |
3331 | 0 | vacrel->rel_pages = new_rel_pages; |
3332 | |
|
3333 | 0 | ereport(vacrel->verbose ? INFO : DEBUG2, |
3334 | 0 | (errmsg("table \"%s\": truncated %u to %u pages", |
3335 | 0 | vacrel->relname, |
3336 | 0 | orig_rel_pages, new_rel_pages))); |
3337 | 0 | orig_rel_pages = new_rel_pages; |
3338 | 0 | } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected); |
3339 | 0 | } |
3340 | | |
3341 | | /* |
3342 | | * Rescan end pages to verify that they are (still) empty of tuples. |
3343 | | * |
3344 | | * Returns number of nondeletable pages (last nonempty page + 1). |
3345 | | */ |
3346 | | static BlockNumber |
3347 | | count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected) |
3348 | 0 | { |
3349 | 0 | BlockNumber blkno; |
3350 | 0 | BlockNumber prefetchedUntil; |
3351 | 0 | instr_time starttime; |
3352 | | |
3353 | | /* Initialize the starttime if we check for conflicting lock requests */ |
3354 | 0 | INSTR_TIME_SET_CURRENT(starttime); |
3355 | | |
3356 | | /* |
3357 | | * Start checking blocks at what we believe relation end to be and move |
3358 | | * backwards. (Strange coding of loop control is needed because blkno is |
3359 | | * unsigned.) To make the scan faster, we prefetch a few blocks at a time |
3360 | | * in forward direction, so that OS-level readahead can kick in. |
3361 | | */ |
3362 | 0 | blkno = vacrel->rel_pages; |
3363 | 0 | StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0, |
3364 | 0 | "prefetch size must be power of 2"); |
3365 | 0 | prefetchedUntil = InvalidBlockNumber; |
3366 | 0 | while (blkno > vacrel->nonempty_pages) |
3367 | 0 | { |
3368 | 0 | Buffer buf; |
3369 | 0 | Page page; |
3370 | 0 | OffsetNumber offnum, |
3371 | 0 | maxoff; |
3372 | 0 | bool hastup; |
3373 | | |
3374 | | /* |
3375 | | * Check if another process requests a lock on our relation. We are |
3376 | | * holding an AccessExclusiveLock here, so they will be waiting. We |
3377 | | * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we |
3378 | | * only check if that interval has elapsed once every 32 blocks to |
3379 | | * keep the number of system calls and actual shared lock table |
3380 | | * lookups to a minimum. |
3381 | | */ |
3382 | 0 | if ((blkno % 32) == 0) |
3383 | 0 | { |
3384 | 0 | instr_time currenttime; |
3385 | 0 | instr_time elapsed; |
3386 | |
|
3387 | 0 | INSTR_TIME_SET_CURRENT(currenttime); |
3388 | 0 | elapsed = currenttime; |
3389 | 0 | INSTR_TIME_SUBTRACT(elapsed, starttime); |
3390 | 0 | if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000) |
3391 | 0 | >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL) |
3392 | 0 | { |
3393 | 0 | if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock)) |
3394 | 0 | { |
3395 | 0 | ereport(vacrel->verbose ? INFO : DEBUG2, |
3396 | 0 | (errmsg("table \"%s\": suspending truncate due to conflicting lock request", |
3397 | 0 | vacrel->relname))); |
3398 | | |
3399 | 0 | *lock_waiter_detected = true; |
3400 | 0 | return blkno; |
3401 | 0 | } |
3402 | 0 | starttime = currenttime; |
3403 | 0 | } |
3404 | 0 | } |
3405 | | |
3406 | | /* |
3407 | | * We don't insert a vacuum delay point here, because we have an |
3408 | | * exclusive lock on the table which we want to hold for as short a |
3409 | | * time as possible. We still need to check for interrupts however. |
3410 | | */ |
3411 | 0 | CHECK_FOR_INTERRUPTS(); |
3412 | |
|
3413 | 0 | blkno--; |
3414 | | |
3415 | | /* If we haven't prefetched this lot yet, do so now. */ |
3416 | 0 | if (prefetchedUntil > blkno) |
3417 | 0 | { |
3418 | 0 | BlockNumber prefetchStart; |
3419 | 0 | BlockNumber pblkno; |
3420 | |
|
3421 | 0 | prefetchStart = blkno & ~(PREFETCH_SIZE - 1); |
3422 | 0 | for (pblkno = prefetchStart; pblkno <= blkno; pblkno++) |
3423 | 0 | { |
3424 | 0 | PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno); |
3425 | 0 | CHECK_FOR_INTERRUPTS(); |
3426 | 0 | } |
3427 | 0 | prefetchedUntil = prefetchStart; |
3428 | 0 | } |
3429 | |
|
3430 | 0 | buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL, |
3431 | 0 | vacrel->bstrategy); |
3432 | | |
3433 | | /* In this phase we only need shared access to the buffer */ |
3434 | 0 | LockBuffer(buf, BUFFER_LOCK_SHARE); |
3435 | |
|
3436 | 0 | page = BufferGetPage(buf); |
3437 | |
|
3438 | 0 | if (PageIsNew(page) || PageIsEmpty(page)) |
3439 | 0 | { |
3440 | 0 | UnlockReleaseBuffer(buf); |
3441 | 0 | continue; |
3442 | 0 | } |
3443 | | |
3444 | 0 | hastup = false; |
3445 | 0 | maxoff = PageGetMaxOffsetNumber(page); |
3446 | 0 | for (offnum = FirstOffsetNumber; |
3447 | 0 | offnum <= maxoff; |
3448 | 0 | offnum = OffsetNumberNext(offnum)) |
3449 | 0 | { |
3450 | 0 | ItemId itemid; |
3451 | |
|
3452 | 0 | itemid = PageGetItemId(page, offnum); |
3453 | | |
3454 | | /* |
3455 | | * Note: any non-unused item should be taken as a reason to keep |
3456 | | * this page. Even an LP_DEAD item makes truncation unsafe, since |
3457 | | * we must not have cleaned out its index entries. |
3458 | | */ |
3459 | 0 | if (ItemIdIsUsed(itemid)) |
3460 | 0 | { |
3461 | 0 | hastup = true; |
3462 | 0 | break; /* can stop scanning */ |
3463 | 0 | } |
3464 | 0 | } /* scan along page */ |
3465 | |
|
3466 | 0 | UnlockReleaseBuffer(buf); |
3467 | | |
3468 | | /* Done scanning if we found a tuple here */ |
3469 | 0 | if (hastup) |
3470 | 0 | return blkno + 1; |
3471 | 0 | } |
3472 | | |
3473 | | /* |
3474 | | * If we fall out of the loop, all the previously-thought-to-be-empty |
3475 | | * pages still are; we need not bother to look at the last known-nonempty |
3476 | | * page. |
3477 | | */ |
3478 | 0 | return vacrel->nonempty_pages; |
3479 | 0 | } |
3480 | | |
3481 | | /* |
3482 | | * Allocate dead_items and dead_items_info (either using palloc, or in dynamic |
3483 | | * shared memory). Sets both in vacrel for caller. |
3484 | | * |
3485 | | * Also handles parallel initialization as part of allocating dead_items in |
3486 | | * DSM when required. |
3487 | | */ |
3488 | | static void |
3489 | | dead_items_alloc(LVRelState *vacrel, int nworkers) |
3490 | 0 | { |
3491 | 0 | VacDeadItemsInfo *dead_items_info; |
3492 | 0 | int vac_work_mem = AmAutoVacuumWorkerProcess() && |
3493 | 0 | autovacuum_work_mem != -1 ? |
3494 | 0 | autovacuum_work_mem : maintenance_work_mem; |
3495 | | |
3496 | | /* |
3497 | | * Initialize state for a parallel vacuum. As of now, only one worker can |
3498 | | * be used for an index, so we invoke parallelism only if there are at |
3499 | | * least two indexes on a table. |
3500 | | */ |
3501 | 0 | if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming) |
3502 | 0 | { |
3503 | | /* |
3504 | | * Since parallel workers cannot access data in temporary tables, we |
3505 | | * can't perform parallel vacuum on them. |
3506 | | */ |
3507 | 0 | if (RelationUsesLocalBuffers(vacrel->rel)) |
3508 | 0 | { |
3509 | | /* |
3510 | | * Give warning only if the user explicitly tries to perform a |
3511 | | * parallel vacuum on the temporary table. |
3512 | | */ |
3513 | 0 | if (nworkers > 0) |
3514 | 0 | ereport(WARNING, |
3515 | 0 | (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel", |
3516 | 0 | vacrel->relname))); |
3517 | 0 | } |
3518 | 0 | else |
3519 | 0 | vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels, |
3520 | 0 | vacrel->nindexes, nworkers, |
3521 | 0 | vac_work_mem, |
3522 | 0 | vacrel->verbose ? INFO : DEBUG2, |
3523 | 0 | vacrel->bstrategy); |
3524 | | |
3525 | | /* |
3526 | | * If parallel mode started, dead_items and dead_items_info spaces are |
3527 | | * allocated in DSM. |
3528 | | */ |
3529 | 0 | if (ParallelVacuumIsActive(vacrel)) |
3530 | 0 | { |
3531 | 0 | vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs, |
3532 | 0 | &vacrel->dead_items_info); |
3533 | 0 | return; |
3534 | 0 | } |
3535 | 0 | } |
3536 | | |
3537 | | /* |
3538 | | * Serial VACUUM case. Allocate both dead_items and dead_items_info |
3539 | | * locally. |
3540 | | */ |
3541 | | |
3542 | 0 | dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo)); |
3543 | 0 | dead_items_info->max_bytes = vac_work_mem * (Size) 1024; |
3544 | 0 | dead_items_info->num_items = 0; |
3545 | 0 | vacrel->dead_items_info = dead_items_info; |
3546 | |
|
3547 | 0 | vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true); |
3548 | 0 | } |
3549 | | |
3550 | | /* |
3551 | | * Add the given block number and offset numbers to dead_items. |
3552 | | */ |
3553 | | static void |
3554 | | dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, |
3555 | | int num_offsets) |
3556 | 0 | { |
3557 | 0 | const int prog_index[2] = { |
3558 | 0 | PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS, |
3559 | 0 | PROGRESS_VACUUM_DEAD_TUPLE_BYTES |
3560 | 0 | }; |
3561 | 0 | int64 prog_val[2]; |
3562 | |
|
3563 | 0 | TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets); |
3564 | 0 | vacrel->dead_items_info->num_items += num_offsets; |
3565 | | |
3566 | | /* update the progress information */ |
3567 | 0 | prog_val[0] = vacrel->dead_items_info->num_items; |
3568 | 0 | prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items); |
3569 | 0 | pgstat_progress_update_multi_param(2, prog_index, prog_val); |
3570 | 0 | } |
3571 | | |
3572 | | /* |
3573 | | * Forget all collected dead items. |
3574 | | */ |
3575 | | static void |
3576 | | dead_items_reset(LVRelState *vacrel) |
3577 | 0 | { |
3578 | 0 | if (ParallelVacuumIsActive(vacrel)) |
3579 | 0 | { |
3580 | 0 | parallel_vacuum_reset_dead_items(vacrel->pvs); |
3581 | 0 | return; |
3582 | 0 | } |
3583 | | |
3584 | | /* Recreate the tidstore with the same max_bytes limitation */ |
3585 | 0 | TidStoreDestroy(vacrel->dead_items); |
3586 | 0 | vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true); |
3587 | | |
3588 | | /* Reset the counter */ |
3589 | 0 | vacrel->dead_items_info->num_items = 0; |
3590 | 0 | } |
3591 | | |
3592 | | /* |
3593 | | * Perform cleanup for resources allocated in dead_items_alloc |
3594 | | */ |
3595 | | static void |
3596 | | dead_items_cleanup(LVRelState *vacrel) |
3597 | 0 | { |
3598 | 0 | if (!ParallelVacuumIsActive(vacrel)) |
3599 | 0 | { |
3600 | | /* Don't bother with pfree here */ |
3601 | 0 | return; |
3602 | 0 | } |
3603 | | |
3604 | | /* End parallel mode */ |
3605 | 0 | parallel_vacuum_end(vacrel->pvs, vacrel->indstats); |
3606 | 0 | vacrel->pvs = NULL; |
3607 | 0 | } |
3608 | | |
3609 | | /* |
3610 | | * Check if every tuple in the given page is visible to all current and future |
3611 | | * transactions. Also return the visibility_cutoff_xid which is the highest |
3612 | | * xmin amongst the visible tuples. Set *all_frozen to true if every tuple |
3613 | | * on this page is frozen. |
3614 | | * |
3615 | | * This is a stripped down version of lazy_scan_prune(). If you change |
3616 | | * anything here, make sure that everything stays in sync. Note that an |
3617 | | * assertion calls us to verify that everybody still agrees. Be sure to avoid |
3618 | | * introducing new side-effects here. |
3619 | | */ |
3620 | | static bool |
3621 | | heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, |
3622 | | TransactionId *visibility_cutoff_xid, |
3623 | | bool *all_frozen) |
3624 | 0 | { |
3625 | 0 | Page page = BufferGetPage(buf); |
3626 | 0 | BlockNumber blockno = BufferGetBlockNumber(buf); |
3627 | 0 | OffsetNumber offnum, |
3628 | 0 | maxoff; |
3629 | 0 | bool all_visible = true; |
3630 | |
|
3631 | 0 | *visibility_cutoff_xid = InvalidTransactionId; |
3632 | 0 | *all_frozen = true; |
3633 | |
|
3634 | 0 | maxoff = PageGetMaxOffsetNumber(page); |
3635 | 0 | for (offnum = FirstOffsetNumber; |
3636 | 0 | offnum <= maxoff && all_visible; |
3637 | 0 | offnum = OffsetNumberNext(offnum)) |
3638 | 0 | { |
3639 | 0 | ItemId itemid; |
3640 | 0 | HeapTupleData tuple; |
3641 | | |
3642 | | /* |
3643 | | * Set the offset number so that we can display it along with any |
3644 | | * error that occurred while processing this tuple. |
3645 | | */ |
3646 | 0 | vacrel->offnum = offnum; |
3647 | 0 | itemid = PageGetItemId(page, offnum); |
3648 | | |
3649 | | /* Unused or redirect line pointers are of no interest */ |
3650 | 0 | if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid)) |
3651 | 0 | continue; |
3652 | | |
3653 | 0 | ItemPointerSet(&(tuple.t_self), blockno, offnum); |
3654 | | |
3655 | | /* |
3656 | | * Dead line pointers can have index pointers pointing to them. So |
3657 | | * they can't be treated as visible |
3658 | | */ |
3659 | 0 | if (ItemIdIsDead(itemid)) |
3660 | 0 | { |
3661 | 0 | all_visible = false; |
3662 | 0 | *all_frozen = false; |
3663 | 0 | break; |
3664 | 0 | } |
3665 | | |
3666 | 0 | Assert(ItemIdIsNormal(itemid)); |
3667 | |
|
3668 | 0 | tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); |
3669 | 0 | tuple.t_len = ItemIdGetLength(itemid); |
3670 | 0 | tuple.t_tableOid = RelationGetRelid(vacrel->rel); |
3671 | |
|
3672 | 0 | switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin, |
3673 | 0 | buf)) |
3674 | 0 | { |
3675 | 0 | case HEAPTUPLE_LIVE: |
3676 | 0 | { |
3677 | 0 | TransactionId xmin; |
3678 | | |
3679 | | /* Check comments in lazy_scan_prune. */ |
3680 | 0 | if (!HeapTupleHeaderXminCommitted(tuple.t_data)) |
3681 | 0 | { |
3682 | 0 | all_visible = false; |
3683 | 0 | *all_frozen = false; |
3684 | 0 | break; |
3685 | 0 | } |
3686 | | |
3687 | | /* |
3688 | | * The inserter definitely committed. But is it old enough |
3689 | | * that everyone sees it as committed? |
3690 | | */ |
3691 | 0 | xmin = HeapTupleHeaderGetXmin(tuple.t_data); |
3692 | 0 | if (!TransactionIdPrecedes(xmin, |
3693 | 0 | vacrel->cutoffs.OldestXmin)) |
3694 | 0 | { |
3695 | 0 | all_visible = false; |
3696 | 0 | *all_frozen = false; |
3697 | 0 | break; |
3698 | 0 | } |
3699 | | |
3700 | | /* Track newest xmin on page. */ |
3701 | 0 | if (TransactionIdFollows(xmin, *visibility_cutoff_xid) && |
3702 | 0 | TransactionIdIsNormal(xmin)) |
3703 | 0 | *visibility_cutoff_xid = xmin; |
3704 | | |
3705 | | /* Check whether this tuple is already frozen or not */ |
3706 | 0 | if (all_visible && *all_frozen && |
3707 | 0 | heap_tuple_needs_eventual_freeze(tuple.t_data)) |
3708 | 0 | *all_frozen = false; |
3709 | 0 | } |
3710 | 0 | break; |
3711 | | |
3712 | 0 | case HEAPTUPLE_DEAD: |
3713 | 0 | case HEAPTUPLE_RECENTLY_DEAD: |
3714 | 0 | case HEAPTUPLE_INSERT_IN_PROGRESS: |
3715 | 0 | case HEAPTUPLE_DELETE_IN_PROGRESS: |
3716 | 0 | { |
3717 | 0 | all_visible = false; |
3718 | 0 | *all_frozen = false; |
3719 | 0 | break; |
3720 | 0 | } |
3721 | 0 | default: |
3722 | 0 | elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); |
3723 | 0 | break; |
3724 | 0 | } |
3725 | 0 | } /* scan along page */ |
3726 | | |
3727 | | /* Clear the offset information once we have processed the given page. */ |
3728 | 0 | vacrel->offnum = InvalidOffsetNumber; |
3729 | |
|
3730 | 0 | return all_visible; |
3731 | 0 | } |
3732 | | |
3733 | | /* |
3734 | | * Update index statistics in pg_class if the statistics are accurate. |
3735 | | */ |
3736 | | static void |
3737 | | update_relstats_all_indexes(LVRelState *vacrel) |
3738 | 0 | { |
3739 | 0 | Relation *indrels = vacrel->indrels; |
3740 | 0 | int nindexes = vacrel->nindexes; |
3741 | 0 | IndexBulkDeleteResult **indstats = vacrel->indstats; |
3742 | |
|
3743 | 0 | Assert(vacrel->do_index_cleanup); |
3744 | |
|
3745 | 0 | for (int idx = 0; idx < nindexes; idx++) |
3746 | 0 | { |
3747 | 0 | Relation indrel = indrels[idx]; |
3748 | 0 | IndexBulkDeleteResult *istat = indstats[idx]; |
3749 | |
|
3750 | 0 | if (istat == NULL || istat->estimated_count) |
3751 | 0 | continue; |
3752 | | |
3753 | | /* Update index statistics */ |
3754 | 0 | vac_update_relstats(indrel, |
3755 | 0 | istat->num_pages, |
3756 | 0 | istat->num_index_tuples, |
3757 | 0 | 0, 0, |
3758 | 0 | false, |
3759 | 0 | InvalidTransactionId, |
3760 | 0 | InvalidMultiXactId, |
3761 | 0 | NULL, NULL, false); |
3762 | 0 | } |
3763 | 0 | } |
3764 | | |
3765 | | /* |
3766 | | * Error context callback for errors occurring during vacuum. The error |
3767 | | * context messages for index phases should match the messages set in parallel |
3768 | | * vacuum. If you change this function for those phases, change |
3769 | | * parallel_vacuum_error_callback() as well. |
3770 | | */ |
3771 | | static void |
3772 | | vacuum_error_callback(void *arg) |
3773 | 0 | { |
3774 | 0 | LVRelState *errinfo = arg; |
3775 | |
|
3776 | 0 | switch (errinfo->phase) |
3777 | 0 | { |
3778 | 0 | case VACUUM_ERRCB_PHASE_SCAN_HEAP: |
3779 | 0 | if (BlockNumberIsValid(errinfo->blkno)) |
3780 | 0 | { |
3781 | 0 | if (OffsetNumberIsValid(errinfo->offnum)) |
3782 | 0 | errcontext("while scanning block %u offset %u of relation \"%s.%s\"", |
3783 | 0 | errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname); |
3784 | 0 | else |
3785 | 0 | errcontext("while scanning block %u of relation \"%s.%s\"", |
3786 | 0 | errinfo->blkno, errinfo->relnamespace, errinfo->relname); |
3787 | 0 | } |
3788 | 0 | else |
3789 | 0 | errcontext("while scanning relation \"%s.%s\"", |
3790 | 0 | errinfo->relnamespace, errinfo->relname); |
3791 | 0 | break; |
3792 | | |
3793 | 0 | case VACUUM_ERRCB_PHASE_VACUUM_HEAP: |
3794 | 0 | if (BlockNumberIsValid(errinfo->blkno)) |
3795 | 0 | { |
3796 | 0 | if (OffsetNumberIsValid(errinfo->offnum)) |
3797 | 0 | errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"", |
3798 | 0 | errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname); |
3799 | 0 | else |
3800 | 0 | errcontext("while vacuuming block %u of relation \"%s.%s\"", |
3801 | 0 | errinfo->blkno, errinfo->relnamespace, errinfo->relname); |
3802 | 0 | } |
3803 | 0 | else |
3804 | 0 | errcontext("while vacuuming relation \"%s.%s\"", |
3805 | 0 | errinfo->relnamespace, errinfo->relname); |
3806 | 0 | break; |
3807 | | |
3808 | 0 | case VACUUM_ERRCB_PHASE_VACUUM_INDEX: |
3809 | 0 | errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"", |
3810 | 0 | errinfo->indname, errinfo->relnamespace, errinfo->relname); |
3811 | 0 | break; |
3812 | | |
3813 | 0 | case VACUUM_ERRCB_PHASE_INDEX_CLEANUP: |
3814 | 0 | errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"", |
3815 | 0 | errinfo->indname, errinfo->relnamespace, errinfo->relname); |
3816 | 0 | break; |
3817 | | |
3818 | 0 | case VACUUM_ERRCB_PHASE_TRUNCATE: |
3819 | 0 | if (BlockNumberIsValid(errinfo->blkno)) |
3820 | 0 | errcontext("while truncating relation \"%s.%s\" to %u blocks", |
3821 | 0 | errinfo->relnamespace, errinfo->relname, errinfo->blkno); |
3822 | 0 | break; |
3823 | | |
3824 | 0 | case VACUUM_ERRCB_PHASE_UNKNOWN: |
3825 | 0 | default: |
3826 | 0 | return; /* do nothing; the errinfo may not be |
3827 | | * initialized */ |
3828 | 0 | } |
3829 | 0 | } |
3830 | | |
3831 | | /* |
3832 | | * Updates the information required for vacuum error callback. This also saves |
3833 | | * the current information which can be later restored via restore_vacuum_error_info. |
3834 | | */ |
3835 | | static void |
3836 | | update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, |
3837 | | int phase, BlockNumber blkno, OffsetNumber offnum) |
3838 | 0 | { |
3839 | 0 | if (saved_vacrel) |
3840 | 0 | { |
3841 | 0 | saved_vacrel->offnum = vacrel->offnum; |
3842 | 0 | saved_vacrel->blkno = vacrel->blkno; |
3843 | 0 | saved_vacrel->phase = vacrel->phase; |
3844 | 0 | } |
3845 | |
|
3846 | 0 | vacrel->blkno = blkno; |
3847 | 0 | vacrel->offnum = offnum; |
3848 | 0 | vacrel->phase = phase; |
3849 | 0 | } |
3850 | | |
3851 | | /* |
3852 | | * Restores the vacuum information saved via a prior call to update_vacuum_error_info. |
3853 | | */ |
3854 | | static void |
3855 | | restore_vacuum_error_info(LVRelState *vacrel, |
3856 | | const LVSavedErrInfo *saved_vacrel) |
3857 | 0 | { |
3858 | 0 | vacrel->blkno = saved_vacrel->blkno; |
3859 | 0 | vacrel->offnum = saved_vacrel->offnum; |
3860 | 0 | vacrel->phase = saved_vacrel->phase; |
3861 | 0 | } |