/src/postgres/src/backend/commands/vacuum.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * vacuum.c |
4 | | * The postgres vacuum cleaner. |
5 | | * |
6 | | * This file includes (a) control and dispatch code for VACUUM and ANALYZE |
7 | | * commands, (b) code to compute various vacuum thresholds, and (c) index |
8 | | * vacuum code. |
9 | | * |
10 | | * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in |
11 | | * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of |
12 | | * CLUSTER, handled in cluster.c. |
13 | | * |
14 | | * |
15 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
16 | | * Portions Copyright (c) 1994, Regents of the University of California |
17 | | * |
18 | | * |
19 | | * IDENTIFICATION |
20 | | * src/backend/commands/vacuum.c |
21 | | * |
22 | | *------------------------------------------------------------------------- |
23 | | */ |
24 | | #include "postgres.h" |
25 | | |
26 | | #include <math.h> |
27 | | |
28 | | #include "access/clog.h" |
29 | | #include "access/commit_ts.h" |
30 | | #include "access/genam.h" |
31 | | #include "access/heapam.h" |
32 | | #include "access/htup_details.h" |
33 | | #include "access/multixact.h" |
34 | | #include "access/tableam.h" |
35 | | #include "access/transam.h" |
36 | | #include "access/xact.h" |
37 | | #include "catalog/namespace.h" |
38 | | #include "catalog/pg_database.h" |
39 | | #include "catalog/pg_inherits.h" |
40 | | #include "commands/cluster.h" |
41 | | #include "commands/defrem.h" |
42 | | #include "commands/progress.h" |
43 | | #include "commands/vacuum.h" |
44 | | #include "miscadmin.h" |
45 | | #include "nodes/makefuncs.h" |
46 | | #include "pgstat.h" |
47 | | #include "postmaster/autovacuum.h" |
48 | | #include "postmaster/bgworker_internals.h" |
49 | | #include "postmaster/interrupt.h" |
50 | | #include "storage/bufmgr.h" |
51 | | #include "storage/lmgr.h" |
52 | | #include "storage/pmsignal.h" |
53 | | #include "storage/proc.h" |
54 | | #include "storage/procarray.h" |
55 | | #include "utils/acl.h" |
56 | | #include "utils/fmgroids.h" |
57 | | #include "utils/guc.h" |
58 | | #include "utils/guc_hooks.h" |
59 | | #include "utils/memutils.h" |
60 | | #include "utils/snapmgr.h" |
61 | | #include "utils/syscache.h" |
62 | | |
63 | | /* |
64 | | * Minimum interval for cost-based vacuum delay reports from a parallel worker. |
65 | | * This aims to avoid sending too many messages and waking up the leader too |
66 | | * frequently. |
67 | | */ |
68 | 0 | #define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S) |
69 | | |
70 | | /* |
71 | | * GUC parameters |
72 | | */ |
73 | | int vacuum_freeze_min_age; |
74 | | int vacuum_freeze_table_age; |
75 | | int vacuum_multixact_freeze_min_age; |
76 | | int vacuum_multixact_freeze_table_age; |
77 | | int vacuum_failsafe_age; |
78 | | int vacuum_multixact_failsafe_age; |
79 | | double vacuum_max_eager_freeze_failure_rate; |
80 | | bool track_cost_delay_timing; |
81 | | bool vacuum_truncate; |
82 | | |
83 | | /* |
84 | | * Variables for cost-based vacuum delay. The defaults differ between |
85 | | * autovacuum and vacuum. They should be set with the appropriate GUC value in |
86 | | * vacuum code. They are initialized here to the defaults for client backends |
87 | | * executing VACUUM or ANALYZE. |
88 | | */ |
89 | | double vacuum_cost_delay = 0; |
90 | | int vacuum_cost_limit = 200; |
91 | | |
92 | | /* Variable for reporting cost-based vacuum delay from parallel workers. */ |
93 | | int64 parallel_vacuum_worker_delay_ns = 0; |
94 | | |
95 | | /* |
96 | | * VacuumFailsafeActive is a defined as a global so that we can determine |
97 | | * whether or not to re-enable cost-based vacuum delay when vacuuming a table. |
98 | | * If failsafe mode has been engaged, we will not re-enable cost-based delay |
99 | | * for the table until after vacuuming has completed, regardless of other |
100 | | * settings. |
101 | | * |
102 | | * Only VACUUM code should inspect this variable and only table access methods |
103 | | * should set it to true. In Table AM-agnostic VACUUM code, this variable is |
104 | | * inspected to determine whether or not to allow cost-based delays. Table AMs |
105 | | * are free to set it if they desire this behavior, but it is false by default |
106 | | * and reset to false in between vacuuming each relation. |
107 | | */ |
108 | | bool VacuumFailsafeActive = false; |
109 | | |
110 | | /* |
111 | | * Variables for cost-based parallel vacuum. See comments atop |
112 | | * compute_parallel_delay to understand how it works. |
113 | | */ |
114 | | pg_atomic_uint32 *VacuumSharedCostBalance = NULL; |
115 | | pg_atomic_uint32 *VacuumActiveNWorkers = NULL; |
116 | | int VacuumCostBalanceLocal = 0; |
117 | | |
118 | | /* non-export function prototypes */ |
119 | | static List *expand_vacuum_rel(VacuumRelation *vrel, |
120 | | MemoryContext vac_context, int options); |
121 | | static List *get_all_vacuum_rels(MemoryContext vac_context, int options); |
122 | | static void vac_truncate_clog(TransactionId frozenXID, |
123 | | MultiXactId minMulti, |
124 | | TransactionId lastSaneFrozenXid, |
125 | | MultiXactId lastSaneMinMulti); |
126 | | static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, |
127 | | BufferAccessStrategy bstrategy); |
128 | | static double compute_parallel_delay(void); |
129 | | static VacOptValue get_vacoptval_from_boolean(DefElem *def); |
130 | | static bool vac_tid_reaped(ItemPointer itemptr, void *state); |
131 | | |
132 | | /* |
133 | | * GUC check function to ensure GUC value specified is within the allowable |
134 | | * range. |
135 | | */ |
136 | | bool |
137 | | check_vacuum_buffer_usage_limit(int *newval, void **extra, |
138 | | GucSource source) |
139 | 0 | { |
140 | | /* Value upper and lower hard limits are inclusive */ |
141 | 0 | if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB && |
142 | 0 | *newval <= MAX_BAS_VAC_RING_SIZE_KB)) |
143 | 0 | return true; |
144 | | |
145 | | /* Value does not fall within any allowable range */ |
146 | 0 | GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.", |
147 | 0 | "vacuum_buffer_usage_limit", |
148 | 0 | MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB); |
149 | |
|
150 | 0 | return false; |
151 | 0 | } |
152 | | |
153 | | /* |
154 | | * Primary entry point for manual VACUUM and ANALYZE commands |
155 | | * |
156 | | * This is mainly a preparation wrapper for the real operations that will |
157 | | * happen in vacuum(). |
158 | | */ |
159 | | void |
160 | | ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) |
161 | 0 | { |
162 | 0 | VacuumParams params; |
163 | 0 | BufferAccessStrategy bstrategy = NULL; |
164 | 0 | bool verbose = false; |
165 | 0 | bool skip_locked = false; |
166 | 0 | bool analyze = false; |
167 | 0 | bool freeze = false; |
168 | 0 | bool full = false; |
169 | 0 | bool disable_page_skipping = false; |
170 | 0 | bool process_main = true; |
171 | 0 | bool process_toast = true; |
172 | 0 | int ring_size; |
173 | 0 | bool skip_database_stats = false; |
174 | 0 | bool only_database_stats = false; |
175 | 0 | MemoryContext vac_context; |
176 | 0 | ListCell *lc; |
177 | | |
178 | | /* index_cleanup and truncate values unspecified for now */ |
179 | 0 | params.index_cleanup = VACOPTVALUE_UNSPECIFIED; |
180 | 0 | params.truncate = VACOPTVALUE_UNSPECIFIED; |
181 | | |
182 | | /* By default parallel vacuum is enabled */ |
183 | 0 | params.nworkers = 0; |
184 | | |
185 | | /* Will be set later if we recurse to a TOAST table. */ |
186 | 0 | params.toast_parent = InvalidOid; |
187 | | |
188 | | /* |
189 | | * Set this to an invalid value so it is clear whether or not a |
190 | | * BUFFER_USAGE_LIMIT was specified when making the access strategy. |
191 | | */ |
192 | 0 | ring_size = -1; |
193 | | |
194 | | /* Parse options list */ |
195 | 0 | foreach(lc, vacstmt->options) |
196 | 0 | { |
197 | 0 | DefElem *opt = (DefElem *) lfirst(lc); |
198 | | |
199 | | /* Parse common options for VACUUM and ANALYZE */ |
200 | 0 | if (strcmp(opt->defname, "verbose") == 0) |
201 | 0 | verbose = defGetBoolean(opt); |
202 | 0 | else if (strcmp(opt->defname, "skip_locked") == 0) |
203 | 0 | skip_locked = defGetBoolean(opt); |
204 | 0 | else if (strcmp(opt->defname, "buffer_usage_limit") == 0) |
205 | 0 | { |
206 | 0 | const char *hintmsg; |
207 | 0 | int result; |
208 | 0 | char *vac_buffer_size; |
209 | |
|
210 | 0 | vac_buffer_size = defGetString(opt); |
211 | | |
212 | | /* |
213 | | * Check that the specified value is valid and the size falls |
214 | | * within the hard upper and lower limits if it is not 0. |
215 | | */ |
216 | 0 | if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) || |
217 | 0 | (result != 0 && |
218 | 0 | (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB))) |
219 | 0 | { |
220 | 0 | ereport(ERROR, |
221 | 0 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
222 | 0 | errmsg("BUFFER_USAGE_LIMIT option must be 0 or between %d kB and %d kB", |
223 | 0 | MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB), |
224 | 0 | hintmsg ? errhint("%s", _(hintmsg)) : 0)); |
225 | 0 | } |
226 | | |
227 | 0 | ring_size = result; |
228 | 0 | } |
229 | 0 | else if (!vacstmt->is_vacuumcmd) |
230 | 0 | ereport(ERROR, |
231 | 0 | (errcode(ERRCODE_SYNTAX_ERROR), |
232 | 0 | errmsg("unrecognized ANALYZE option \"%s\"", opt->defname), |
233 | 0 | parser_errposition(pstate, opt->location))); |
234 | | |
235 | | /* Parse options available on VACUUM */ |
236 | 0 | else if (strcmp(opt->defname, "analyze") == 0) |
237 | 0 | analyze = defGetBoolean(opt); |
238 | 0 | else if (strcmp(opt->defname, "freeze") == 0) |
239 | 0 | freeze = defGetBoolean(opt); |
240 | 0 | else if (strcmp(opt->defname, "full") == 0) |
241 | 0 | full = defGetBoolean(opt); |
242 | 0 | else if (strcmp(opt->defname, "disable_page_skipping") == 0) |
243 | 0 | disable_page_skipping = defGetBoolean(opt); |
244 | 0 | else if (strcmp(opt->defname, "index_cleanup") == 0) |
245 | 0 | { |
246 | | /* Interpret no string as the default, which is 'auto' */ |
247 | 0 | if (!opt->arg) |
248 | 0 | params.index_cleanup = VACOPTVALUE_AUTO; |
249 | 0 | else |
250 | 0 | { |
251 | 0 | char *sval = defGetString(opt); |
252 | | |
253 | | /* Try matching on 'auto' string, or fall back on boolean */ |
254 | 0 | if (pg_strcasecmp(sval, "auto") == 0) |
255 | 0 | params.index_cleanup = VACOPTVALUE_AUTO; |
256 | 0 | else |
257 | 0 | params.index_cleanup = get_vacoptval_from_boolean(opt); |
258 | 0 | } |
259 | 0 | } |
260 | 0 | else if (strcmp(opt->defname, "process_main") == 0) |
261 | 0 | process_main = defGetBoolean(opt); |
262 | 0 | else if (strcmp(opt->defname, "process_toast") == 0) |
263 | 0 | process_toast = defGetBoolean(opt); |
264 | 0 | else if (strcmp(opt->defname, "truncate") == 0) |
265 | 0 | params.truncate = get_vacoptval_from_boolean(opt); |
266 | 0 | else if (strcmp(opt->defname, "parallel") == 0) |
267 | 0 | { |
268 | 0 | if (opt->arg == NULL) |
269 | 0 | { |
270 | 0 | ereport(ERROR, |
271 | 0 | (errcode(ERRCODE_SYNTAX_ERROR), |
272 | 0 | errmsg("parallel option requires a value between 0 and %d", |
273 | 0 | MAX_PARALLEL_WORKER_LIMIT), |
274 | 0 | parser_errposition(pstate, opt->location))); |
275 | 0 | } |
276 | 0 | else |
277 | 0 | { |
278 | 0 | int nworkers; |
279 | |
|
280 | 0 | nworkers = defGetInt32(opt); |
281 | 0 | if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT) |
282 | 0 | ereport(ERROR, |
283 | 0 | (errcode(ERRCODE_SYNTAX_ERROR), |
284 | 0 | errmsg("parallel workers for vacuum must be between 0 and %d", |
285 | 0 | MAX_PARALLEL_WORKER_LIMIT), |
286 | 0 | parser_errposition(pstate, opt->location))); |
287 | | |
288 | | /* |
289 | | * Disable parallel vacuum, if user has specified parallel |
290 | | * degree as zero. |
291 | | */ |
292 | 0 | if (nworkers == 0) |
293 | 0 | params.nworkers = -1; |
294 | 0 | else |
295 | 0 | params.nworkers = nworkers; |
296 | 0 | } |
297 | 0 | } |
298 | 0 | else if (strcmp(opt->defname, "skip_database_stats") == 0) |
299 | 0 | skip_database_stats = defGetBoolean(opt); |
300 | 0 | else if (strcmp(opt->defname, "only_database_stats") == 0) |
301 | 0 | only_database_stats = defGetBoolean(opt); |
302 | 0 | else |
303 | 0 | ereport(ERROR, |
304 | 0 | (errcode(ERRCODE_SYNTAX_ERROR), |
305 | 0 | errmsg("unrecognized VACUUM option \"%s\"", opt->defname), |
306 | 0 | parser_errposition(pstate, opt->location))); |
307 | 0 | } |
308 | | |
309 | | /* Set vacuum options */ |
310 | 0 | params.options = |
311 | 0 | (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) | |
312 | 0 | (verbose ? VACOPT_VERBOSE : 0) | |
313 | 0 | (skip_locked ? VACOPT_SKIP_LOCKED : 0) | |
314 | 0 | (analyze ? VACOPT_ANALYZE : 0) | |
315 | 0 | (freeze ? VACOPT_FREEZE : 0) | |
316 | 0 | (full ? VACOPT_FULL : 0) | |
317 | 0 | (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) | |
318 | 0 | (process_main ? VACOPT_PROCESS_MAIN : 0) | |
319 | 0 | (process_toast ? VACOPT_PROCESS_TOAST : 0) | |
320 | 0 | (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) | |
321 | 0 | (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0); |
322 | | |
323 | | /* sanity checks on options */ |
324 | 0 | Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE)); |
325 | 0 | Assert((params.options & VACOPT_VACUUM) || |
326 | 0 | !(params.options & (VACOPT_FULL | VACOPT_FREEZE))); |
327 | |
|
328 | 0 | if ((params.options & VACOPT_FULL) && params.nworkers > 0) |
329 | 0 | ereport(ERROR, |
330 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
331 | 0 | errmsg("VACUUM FULL cannot be performed in parallel"))); |
332 | | |
333 | | /* |
334 | | * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an |
335 | | * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so |
336 | | * we'll permit that. |
337 | | */ |
338 | 0 | if (ring_size != -1 && (params.options & VACOPT_FULL) && |
339 | 0 | !(params.options & VACOPT_ANALYZE)) |
340 | 0 | ereport(ERROR, |
341 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
342 | 0 | errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL"))); |
343 | | |
344 | | /* |
345 | | * Make sure VACOPT_ANALYZE is specified if any column lists are present. |
346 | | */ |
347 | 0 | if (!(params.options & VACOPT_ANALYZE)) |
348 | 0 | { |
349 | 0 | foreach(lc, vacstmt->rels) |
350 | 0 | { |
351 | 0 | VacuumRelation *vrel = lfirst_node(VacuumRelation, lc); |
352 | |
|
353 | 0 | if (vrel->va_cols != NIL) |
354 | 0 | ereport(ERROR, |
355 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
356 | 0 | errmsg("ANALYZE option must be specified when a column list is provided"))); |
357 | 0 | } |
358 | 0 | } |
359 | | |
360 | | |
361 | | /* |
362 | | * Sanity check DISABLE_PAGE_SKIPPING option. |
363 | | */ |
364 | 0 | if ((params.options & VACOPT_FULL) != 0 && |
365 | 0 | (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0) |
366 | 0 | ereport(ERROR, |
367 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
368 | 0 | errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL"))); |
369 | | |
370 | | /* sanity check for PROCESS_TOAST */ |
371 | 0 | if ((params.options & VACOPT_FULL) != 0 && |
372 | 0 | (params.options & VACOPT_PROCESS_TOAST) == 0) |
373 | 0 | ereport(ERROR, |
374 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
375 | 0 | errmsg("PROCESS_TOAST required with VACUUM FULL"))); |
376 | | |
377 | | /* sanity check for ONLY_DATABASE_STATS */ |
378 | 0 | if (params.options & VACOPT_ONLY_DATABASE_STATS) |
379 | 0 | { |
380 | 0 | Assert(params.options & VACOPT_VACUUM); |
381 | 0 | if (vacstmt->rels != NIL) |
382 | 0 | ereport(ERROR, |
383 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
384 | 0 | errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables"))); |
385 | | /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */ |
386 | 0 | if (params.options & ~(VACOPT_VACUUM | |
387 | 0 | VACOPT_VERBOSE | |
388 | 0 | VACOPT_PROCESS_MAIN | |
389 | 0 | VACOPT_PROCESS_TOAST | |
390 | 0 | VACOPT_ONLY_DATABASE_STATS)) |
391 | 0 | ereport(ERROR, |
392 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
393 | 0 | errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options"))); |
394 | 0 | } |
395 | | |
396 | | /* |
397 | | * All freeze ages are zero if the FREEZE option is given; otherwise pass |
398 | | * them as -1 which means to use the default values. |
399 | | */ |
400 | 0 | if (params.options & VACOPT_FREEZE) |
401 | 0 | { |
402 | 0 | params.freeze_min_age = 0; |
403 | 0 | params.freeze_table_age = 0; |
404 | 0 | params.multixact_freeze_min_age = 0; |
405 | 0 | params.multixact_freeze_table_age = 0; |
406 | 0 | } |
407 | 0 | else |
408 | 0 | { |
409 | 0 | params.freeze_min_age = -1; |
410 | 0 | params.freeze_table_age = -1; |
411 | 0 | params.multixact_freeze_min_age = -1; |
412 | 0 | params.multixact_freeze_table_age = -1; |
413 | 0 | } |
414 | | |
415 | | /* user-invoked vacuum is never "for wraparound" */ |
416 | 0 | params.is_wraparound = false; |
417 | | |
418 | | /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */ |
419 | 0 | params.log_min_duration = -1; |
420 | | |
421 | | /* |
422 | | * Later, in vacuum_rel(), we check if a reloption override was specified. |
423 | | */ |
424 | 0 | params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate; |
425 | | |
426 | | /* |
427 | | * Create special memory context for cross-transaction storage. |
428 | | * |
429 | | * Since it is a child of PortalContext, it will go away eventually even |
430 | | * if we suffer an error; there's no need for special abort cleanup logic. |
431 | | */ |
432 | 0 | vac_context = AllocSetContextCreate(PortalContext, |
433 | 0 | "Vacuum", |
434 | 0 | ALLOCSET_DEFAULT_SIZES); |
435 | | |
436 | | /* |
437 | | * Make a buffer strategy object in the cross-transaction memory context. |
438 | | * We needn't bother making this for VACUUM (FULL) or VACUUM |
439 | | * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL, |
440 | | * ANALYZE) is possible, so we'd better ensure that we make a strategy |
441 | | * when we see ANALYZE. |
442 | | */ |
443 | 0 | if ((params.options & (VACOPT_ONLY_DATABASE_STATS | |
444 | 0 | VACOPT_FULL)) == 0 || |
445 | 0 | (params.options & VACOPT_ANALYZE) != 0) |
446 | 0 | { |
447 | |
|
448 | 0 | MemoryContext old_context = MemoryContextSwitchTo(vac_context); |
449 | |
|
450 | 0 | Assert(ring_size >= -1); |
451 | | |
452 | | /* |
453 | | * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE |
454 | | * command, it overrides the value of VacuumBufferUsageLimit. Either |
455 | | * value may be 0, in which case GetAccessStrategyWithSize() will |
456 | | * return NULL, effectively allowing full use of shared buffers. |
457 | | */ |
458 | 0 | if (ring_size == -1) |
459 | 0 | ring_size = VacuumBufferUsageLimit; |
460 | |
|
461 | 0 | bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size); |
462 | |
|
463 | 0 | MemoryContextSwitchTo(old_context); |
464 | 0 | } |
465 | | |
466 | | /* Now go through the common routine */ |
467 | 0 | vacuum(vacstmt->rels, ¶ms, bstrategy, vac_context, isTopLevel); |
468 | | |
469 | | /* Finally, clean up the vacuum memory context */ |
470 | 0 | MemoryContextDelete(vac_context); |
471 | 0 | } |
472 | | |
473 | | /* |
474 | | * Internal entry point for autovacuum and the VACUUM / ANALYZE commands. |
475 | | * |
476 | | * relations, if not NIL, is a list of VacuumRelation to process; otherwise, |
477 | | * we process all relevant tables in the database. For each VacuumRelation, |
478 | | * if a valid OID is supplied, the table with that OID is what to process; |
479 | | * otherwise, the VacuumRelation's RangeVar indicates what to process. |
480 | | * |
481 | | * params contains a set of parameters that can be used to customize the |
482 | | * behavior. |
483 | | * |
484 | | * bstrategy may be passed in as NULL when the caller does not want to |
485 | | * restrict the number of shared_buffers that VACUUM / ANALYZE can use, |
486 | | * otherwise, the caller must build a BufferAccessStrategy with the number of |
487 | | * shared_buffers that VACUUM / ANALYZE should try to limit themselves to |
488 | | * using. |
489 | | * |
490 | | * isTopLevel should be passed down from ProcessUtility. |
491 | | * |
492 | | * It is the caller's responsibility that all parameters are allocated in a |
493 | | * memory context that will not disappear at transaction commit. |
494 | | */ |
495 | | void |
496 | | vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, |
497 | | MemoryContext vac_context, bool isTopLevel) |
498 | 0 | { |
499 | 0 | static bool in_vacuum = false; |
500 | |
|
501 | 0 | const char *stmttype; |
502 | 0 | volatile bool in_outer_xact, |
503 | 0 | use_own_xacts; |
504 | |
|
505 | 0 | Assert(params != NULL); |
506 | |
|
507 | 0 | stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; |
508 | | |
509 | | /* |
510 | | * We cannot run VACUUM inside a user transaction block; if we were inside |
511 | | * a transaction, then our commit- and start-transaction-command calls |
512 | | * would not have the intended effect! There are numerous other subtle |
513 | | * dependencies on this, too. |
514 | | * |
515 | | * ANALYZE (without VACUUM) can run either way. |
516 | | */ |
517 | 0 | if (params->options & VACOPT_VACUUM) |
518 | 0 | { |
519 | 0 | PreventInTransactionBlock(isTopLevel, stmttype); |
520 | 0 | in_outer_xact = false; |
521 | 0 | } |
522 | 0 | else |
523 | 0 | in_outer_xact = IsInTransactionBlock(isTopLevel); |
524 | | |
525 | | /* |
526 | | * Check for and disallow recursive calls. This could happen when VACUUM |
527 | | * FULL or ANALYZE calls a hostile index expression that itself calls |
528 | | * ANALYZE. |
529 | | */ |
530 | 0 | if (in_vacuum) |
531 | 0 | ereport(ERROR, |
532 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
533 | 0 | errmsg("%s cannot be executed from VACUUM or ANALYZE", |
534 | 0 | stmttype))); |
535 | | |
536 | | /* |
537 | | * Build list of relation(s) to process, putting any new data in |
538 | | * vac_context for safekeeping. |
539 | | */ |
540 | 0 | if (params->options & VACOPT_ONLY_DATABASE_STATS) |
541 | 0 | { |
542 | | /* We don't process any tables in this case */ |
543 | 0 | Assert(relations == NIL); |
544 | 0 | } |
545 | 0 | else if (relations != NIL) |
546 | 0 | { |
547 | 0 | List *newrels = NIL; |
548 | 0 | ListCell *lc; |
549 | |
|
550 | 0 | foreach(lc, relations) |
551 | 0 | { |
552 | 0 | VacuumRelation *vrel = lfirst_node(VacuumRelation, lc); |
553 | 0 | List *sublist; |
554 | 0 | MemoryContext old_context; |
555 | |
|
556 | 0 | sublist = expand_vacuum_rel(vrel, vac_context, params->options); |
557 | 0 | old_context = MemoryContextSwitchTo(vac_context); |
558 | 0 | newrels = list_concat(newrels, sublist); |
559 | 0 | MemoryContextSwitchTo(old_context); |
560 | 0 | } |
561 | 0 | relations = newrels; |
562 | 0 | } |
563 | 0 | else |
564 | 0 | relations = get_all_vacuum_rels(vac_context, params->options); |
565 | | |
566 | | /* |
567 | | * Decide whether we need to start/commit our own transactions. |
568 | | * |
569 | | * For VACUUM (with or without ANALYZE): always do so, so that we can |
570 | | * release locks as soon as possible. (We could possibly use the outer |
571 | | * transaction for a one-table VACUUM, but handling TOAST tables would be |
572 | | * problematic.) |
573 | | * |
574 | | * For ANALYZE (no VACUUM): if inside a transaction block, we cannot |
575 | | * start/commit our own transactions. Also, there's no need to do so if |
576 | | * only processing one relation. For multiple relations when not within a |
577 | | * transaction block, and also in an autovacuum worker, use own |
578 | | * transactions so we can release locks sooner. |
579 | | */ |
580 | 0 | if (params->options & VACOPT_VACUUM) |
581 | 0 | use_own_xacts = true; |
582 | 0 | else |
583 | 0 | { |
584 | 0 | Assert(params->options & VACOPT_ANALYZE); |
585 | 0 | if (AmAutoVacuumWorkerProcess()) |
586 | 0 | use_own_xacts = true; |
587 | 0 | else if (in_outer_xact) |
588 | 0 | use_own_xacts = false; |
589 | 0 | else if (list_length(relations) > 1) |
590 | 0 | use_own_xacts = true; |
591 | 0 | else |
592 | 0 | use_own_xacts = false; |
593 | 0 | } |
594 | | |
595 | | /* |
596 | | * vacuum_rel expects to be entered with no transaction active; it will |
597 | | * start and commit its own transaction. But we are called by an SQL |
598 | | * command, and so we are executing inside a transaction already. We |
599 | | * commit the transaction started in PostgresMain() here, and start |
600 | | * another one before exiting to match the commit waiting for us back in |
601 | | * PostgresMain(). |
602 | | */ |
603 | 0 | if (use_own_xacts) |
604 | 0 | { |
605 | 0 | Assert(!in_outer_xact); |
606 | | |
607 | | /* ActiveSnapshot is not set by autovacuum */ |
608 | 0 | if (ActiveSnapshotSet()) |
609 | 0 | PopActiveSnapshot(); |
610 | | |
611 | | /* matches the StartTransaction in PostgresMain() */ |
612 | 0 | CommitTransactionCommand(); |
613 | 0 | } |
614 | | |
615 | | /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */ |
616 | 0 | PG_TRY(); |
617 | 0 | { |
618 | 0 | ListCell *cur; |
619 | |
|
620 | 0 | in_vacuum = true; |
621 | 0 | VacuumFailsafeActive = false; |
622 | 0 | VacuumUpdateCosts(); |
623 | 0 | VacuumCostBalance = 0; |
624 | 0 | VacuumCostBalanceLocal = 0; |
625 | 0 | VacuumSharedCostBalance = NULL; |
626 | 0 | VacuumActiveNWorkers = NULL; |
627 | | |
628 | | /* |
629 | | * Loop to process each selected relation. |
630 | | */ |
631 | 0 | foreach(cur, relations) |
632 | 0 | { |
633 | 0 | VacuumRelation *vrel = lfirst_node(VacuumRelation, cur); |
634 | |
|
635 | 0 | if (params->options & VACOPT_VACUUM) |
636 | 0 | { |
637 | 0 | if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy)) |
638 | 0 | continue; |
639 | 0 | } |
640 | | |
641 | 0 | if (params->options & VACOPT_ANALYZE) |
642 | 0 | { |
643 | | /* |
644 | | * If using separate xacts, start one for analyze. Otherwise, |
645 | | * we can use the outer transaction. |
646 | | */ |
647 | 0 | if (use_own_xacts) |
648 | 0 | { |
649 | 0 | StartTransactionCommand(); |
650 | | /* functions in indexes may want a snapshot set */ |
651 | 0 | PushActiveSnapshot(GetTransactionSnapshot()); |
652 | 0 | } |
653 | |
|
654 | 0 | analyze_rel(vrel->oid, vrel->relation, params, |
655 | 0 | vrel->va_cols, in_outer_xact, bstrategy); |
656 | |
|
657 | 0 | if (use_own_xacts) |
658 | 0 | { |
659 | 0 | PopActiveSnapshot(); |
660 | | /* standard_ProcessUtility() does CCI if !use_own_xacts */ |
661 | 0 | CommandCounterIncrement(); |
662 | 0 | CommitTransactionCommand(); |
663 | 0 | } |
664 | 0 | else |
665 | 0 | { |
666 | | /* |
667 | | * If we're not using separate xacts, better separate the |
668 | | * ANALYZE actions with CCIs. This avoids trouble if user |
669 | | * says "ANALYZE t, t". |
670 | | */ |
671 | 0 | CommandCounterIncrement(); |
672 | 0 | } |
673 | 0 | } |
674 | | |
675 | | /* |
676 | | * Ensure VacuumFailsafeActive has been reset before vacuuming the |
677 | | * next relation. |
678 | | */ |
679 | 0 | VacuumFailsafeActive = false; |
680 | 0 | } |
681 | 0 | } |
682 | 0 | PG_FINALLY(); |
683 | 0 | { |
684 | 0 | in_vacuum = false; |
685 | 0 | VacuumCostActive = false; |
686 | 0 | VacuumFailsafeActive = false; |
687 | 0 | VacuumCostBalance = 0; |
688 | 0 | } |
689 | 0 | PG_END_TRY(); |
690 | | |
691 | | /* |
692 | | * Finish up processing. |
693 | | */ |
694 | 0 | if (use_own_xacts) |
695 | 0 | { |
696 | | /* here, we are not in a transaction */ |
697 | | |
698 | | /* |
699 | | * This matches the CommitTransaction waiting for us in |
700 | | * PostgresMain(). |
701 | | */ |
702 | 0 | StartTransactionCommand(); |
703 | 0 | } |
704 | |
|
705 | 0 | if ((params->options & VACOPT_VACUUM) && |
706 | 0 | !(params->options & VACOPT_SKIP_DATABASE_STATS)) |
707 | 0 | { |
708 | | /* |
709 | | * Update pg_database.datfrozenxid, and truncate pg_xact if possible. |
710 | | */ |
711 | 0 | vac_update_datfrozenxid(); |
712 | 0 | } |
713 | |
|
714 | 0 | } |
715 | | |
716 | | /* |
717 | | * Check if the current user has privileges to vacuum or analyze the relation. |
718 | | * If not, issue a WARNING log message and return false to let the caller |
719 | | * decide what to do with this relation. This routine is used to decide if a |
720 | | * relation can be processed for VACUUM or ANALYZE. |
721 | | */ |
722 | | bool |
723 | | vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple, |
724 | | bits32 options) |
725 | 0 | { |
726 | 0 | char *relname; |
727 | |
|
728 | 0 | Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0); |
729 | | |
730 | | /*---------- |
731 | | * A role has privileges to vacuum or analyze the relation if any of the |
732 | | * following are true: |
733 | | * - the role owns the current database and the relation is not shared |
734 | | * - the role has the MAINTAIN privilege on the relation |
735 | | *---------- |
736 | | */ |
737 | 0 | if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) && |
738 | 0 | !reltuple->relisshared) || |
739 | 0 | pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK) |
740 | 0 | return true; |
741 | | |
742 | 0 | relname = NameStr(reltuple->relname); |
743 | |
|
744 | 0 | if ((options & VACOPT_VACUUM) != 0) |
745 | 0 | { |
746 | 0 | ereport(WARNING, |
747 | 0 | (errmsg("permission denied to vacuum \"%s\", skipping it", |
748 | 0 | relname))); |
749 | | |
750 | | /* |
751 | | * For VACUUM ANALYZE, both logs could show up, but just generate |
752 | | * information for VACUUM as that would be the first one to be |
753 | | * processed. |
754 | | */ |
755 | 0 | return false; |
756 | 0 | } |
757 | | |
758 | 0 | if ((options & VACOPT_ANALYZE) != 0) |
759 | 0 | ereport(WARNING, |
760 | 0 | (errmsg("permission denied to analyze \"%s\", skipping it", |
761 | 0 | relname))); |
762 | | |
763 | 0 | return false; |
764 | 0 | } |
765 | | |
766 | | |
767 | | /* |
768 | | * vacuum_open_relation |
769 | | * |
770 | | * This routine is used for attempting to open and lock a relation which |
771 | | * is going to be vacuumed or analyzed. If the relation cannot be opened |
772 | | * or locked, a log is emitted if possible. |
773 | | */ |
774 | | Relation |
775 | | vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options, |
776 | | bool verbose, LOCKMODE lmode) |
777 | 0 | { |
778 | 0 | Relation rel; |
779 | 0 | bool rel_lock = true; |
780 | 0 | int elevel; |
781 | |
|
782 | 0 | Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0); |
783 | | |
784 | | /* |
785 | | * Open the relation and get the appropriate lock on it. |
786 | | * |
787 | | * There's a race condition here: the relation may have gone away since |
788 | | * the last time we saw it. If so, we don't need to vacuum or analyze it. |
789 | | * |
790 | | * If we've been asked not to wait for the relation lock, acquire it first |
791 | | * in non-blocking mode, before calling try_relation_open(). |
792 | | */ |
793 | 0 | if (!(options & VACOPT_SKIP_LOCKED)) |
794 | 0 | rel = try_relation_open(relid, lmode); |
795 | 0 | else if (ConditionalLockRelationOid(relid, lmode)) |
796 | 0 | rel = try_relation_open(relid, NoLock); |
797 | 0 | else |
798 | 0 | { |
799 | 0 | rel = NULL; |
800 | 0 | rel_lock = false; |
801 | 0 | } |
802 | | |
803 | | /* if relation is opened, leave */ |
804 | 0 | if (rel) |
805 | 0 | return rel; |
806 | | |
807 | | /* |
808 | | * Relation could not be opened, hence generate if possible a log |
809 | | * informing on the situation. |
810 | | * |
811 | | * If the RangeVar is not defined, we do not have enough information to |
812 | | * provide a meaningful log statement. Chances are that the caller has |
813 | | * intentionally not provided this information so that this logging is |
814 | | * skipped, anyway. |
815 | | */ |
816 | 0 | if (relation == NULL) |
817 | 0 | return NULL; |
818 | | |
819 | | /* |
820 | | * Determine the log level. |
821 | | * |
822 | | * For manual VACUUM or ANALYZE, we emit a WARNING to match the log |
823 | | * statements in the permission checks; otherwise, only log if the caller |
824 | | * so requested. |
825 | | */ |
826 | 0 | if (!AmAutoVacuumWorkerProcess()) |
827 | 0 | elevel = WARNING; |
828 | 0 | else if (verbose) |
829 | 0 | elevel = LOG; |
830 | 0 | else |
831 | 0 | return NULL; |
832 | | |
833 | 0 | if ((options & VACOPT_VACUUM) != 0) |
834 | 0 | { |
835 | 0 | if (!rel_lock) |
836 | 0 | ereport(elevel, |
837 | 0 | (errcode(ERRCODE_LOCK_NOT_AVAILABLE), |
838 | 0 | errmsg("skipping vacuum of \"%s\" --- lock not available", |
839 | 0 | relation->relname))); |
840 | 0 | else |
841 | 0 | ereport(elevel, |
842 | 0 | (errcode(ERRCODE_UNDEFINED_TABLE), |
843 | 0 | errmsg("skipping vacuum of \"%s\" --- relation no longer exists", |
844 | 0 | relation->relname))); |
845 | | |
846 | | /* |
847 | | * For VACUUM ANALYZE, both logs could show up, but just generate |
848 | | * information for VACUUM as that would be the first one to be |
849 | | * processed. |
850 | | */ |
851 | 0 | return NULL; |
852 | 0 | } |
853 | | |
854 | 0 | if ((options & VACOPT_ANALYZE) != 0) |
855 | 0 | { |
856 | 0 | if (!rel_lock) |
857 | 0 | ereport(elevel, |
858 | 0 | (errcode(ERRCODE_LOCK_NOT_AVAILABLE), |
859 | 0 | errmsg("skipping analyze of \"%s\" --- lock not available", |
860 | 0 | relation->relname))); |
861 | 0 | else |
862 | 0 | ereport(elevel, |
863 | 0 | (errcode(ERRCODE_UNDEFINED_TABLE), |
864 | 0 | errmsg("skipping analyze of \"%s\" --- relation no longer exists", |
865 | 0 | relation->relname))); |
866 | 0 | } |
867 | | |
868 | 0 | return NULL; |
869 | 0 | } |
870 | | |
871 | | |
872 | | /* |
873 | | * Given a VacuumRelation, fill in the table OID if it wasn't specified, |
874 | | * and optionally add VacuumRelations for partitions or inheritance children. |
875 | | * |
876 | | * If a VacuumRelation does not have an OID supplied and is a partitioned |
877 | | * table, an extra entry will be added to the output for each partition. |
878 | | * Presently, only autovacuum supplies OIDs when calling vacuum(), and |
879 | | * it does not want us to expand partitioned tables. |
880 | | * |
881 | | * We take care not to modify the input data structure, but instead build |
882 | | * new VacuumRelation(s) to return. (But note that they will reference |
883 | | * unmodified parts of the input, eg column lists.) New data structures |
884 | | * are made in vac_context. |
885 | | */ |
886 | | static List * |
887 | | expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context, |
888 | | int options) |
889 | 0 | { |
890 | 0 | List *vacrels = NIL; |
891 | 0 | MemoryContext oldcontext; |
892 | | |
893 | | /* If caller supplied OID, there's nothing we need do here. */ |
894 | 0 | if (OidIsValid(vrel->oid)) |
895 | 0 | { |
896 | 0 | oldcontext = MemoryContextSwitchTo(vac_context); |
897 | 0 | vacrels = lappend(vacrels, vrel); |
898 | 0 | MemoryContextSwitchTo(oldcontext); |
899 | 0 | } |
900 | 0 | else |
901 | 0 | { |
902 | | /* |
903 | | * Process a specific relation, and possibly partitions or child |
904 | | * tables thereof. |
905 | | */ |
906 | 0 | Oid relid; |
907 | 0 | HeapTuple tuple; |
908 | 0 | Form_pg_class classForm; |
909 | 0 | bool include_children; |
910 | 0 | bool is_partitioned_table; |
911 | 0 | int rvr_opts; |
912 | | |
913 | | /* |
914 | | * Since autovacuum workers supply OIDs when calling vacuum(), no |
915 | | * autovacuum worker should reach this code. |
916 | | */ |
917 | 0 | Assert(!AmAutoVacuumWorkerProcess()); |
918 | | |
919 | | /* |
920 | | * We transiently take AccessShareLock to protect the syscache lookup |
921 | | * below, as well as find_all_inheritors's expectation that the caller |
922 | | * holds some lock on the starting relation. |
923 | | */ |
924 | 0 | rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0; |
925 | 0 | relid = RangeVarGetRelidExtended(vrel->relation, |
926 | 0 | AccessShareLock, |
927 | 0 | rvr_opts, |
928 | 0 | NULL, NULL); |
929 | | |
930 | | /* |
931 | | * If the lock is unavailable, emit the same log statement that |
932 | | * vacuum_rel() and analyze_rel() would. |
933 | | */ |
934 | 0 | if (!OidIsValid(relid)) |
935 | 0 | { |
936 | 0 | if (options & VACOPT_VACUUM) |
937 | 0 | ereport(WARNING, |
938 | 0 | (errcode(ERRCODE_LOCK_NOT_AVAILABLE), |
939 | 0 | errmsg("skipping vacuum of \"%s\" --- lock not available", |
940 | 0 | vrel->relation->relname))); |
941 | 0 | else |
942 | 0 | ereport(WARNING, |
943 | 0 | (errcode(ERRCODE_LOCK_NOT_AVAILABLE), |
944 | 0 | errmsg("skipping analyze of \"%s\" --- lock not available", |
945 | 0 | vrel->relation->relname))); |
946 | 0 | return vacrels; |
947 | 0 | } |
948 | | |
949 | | /* |
950 | | * To check whether the relation is a partitioned table and its |
951 | | * ownership, fetch its syscache entry. |
952 | | */ |
953 | 0 | tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); |
954 | 0 | if (!HeapTupleIsValid(tuple)) |
955 | 0 | elog(ERROR, "cache lookup failed for relation %u", relid); |
956 | 0 | classForm = (Form_pg_class) GETSTRUCT(tuple); |
957 | | |
958 | | /* |
959 | | * Make a returnable VacuumRelation for this rel if the user has the |
960 | | * required privileges. |
961 | | */ |
962 | 0 | if (vacuum_is_permitted_for_relation(relid, classForm, options)) |
963 | 0 | { |
964 | 0 | oldcontext = MemoryContextSwitchTo(vac_context); |
965 | 0 | vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation, |
966 | 0 | relid, |
967 | 0 | vrel->va_cols)); |
968 | 0 | MemoryContextSwitchTo(oldcontext); |
969 | 0 | } |
970 | | |
971 | | /* |
972 | | * Vacuuming a partitioned table with ONLY will not do anything since |
973 | | * the partitioned table itself is empty. Issue a warning if the user |
974 | | * requests this. |
975 | | */ |
976 | 0 | include_children = vrel->relation->inh; |
977 | 0 | is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE); |
978 | 0 | if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children) |
979 | 0 | ereport(WARNING, |
980 | 0 | (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect", |
981 | 0 | vrel->relation->relname))); |
982 | | |
983 | 0 | ReleaseSysCache(tuple); |
984 | | |
985 | | /* |
986 | | * Unless the user has specified ONLY, make relation list entries for |
987 | | * its partitions or inheritance child tables. Note that the list |
988 | | * returned by find_all_inheritors() includes the passed-in OID, so we |
989 | | * have to skip that. There's no point in taking locks on the |
990 | | * individual partitions or child tables yet, and doing so would just |
991 | | * add unnecessary deadlock risk. For this last reason, we do not yet |
992 | | * check the ownership of the partitions/tables, which get added to |
993 | | * the list to process. Ownership will be checked later on anyway. |
994 | | */ |
995 | 0 | if (include_children) |
996 | 0 | { |
997 | 0 | List *part_oids = find_all_inheritors(relid, NoLock, NULL); |
998 | 0 | ListCell *part_lc; |
999 | |
|
1000 | 0 | foreach(part_lc, part_oids) |
1001 | 0 | { |
1002 | 0 | Oid part_oid = lfirst_oid(part_lc); |
1003 | |
|
1004 | 0 | if (part_oid == relid) |
1005 | 0 | continue; /* ignore original table */ |
1006 | | |
1007 | | /* |
1008 | | * We omit a RangeVar since it wouldn't be appropriate to |
1009 | | * complain about failure to open one of these relations |
1010 | | * later. |
1011 | | */ |
1012 | 0 | oldcontext = MemoryContextSwitchTo(vac_context); |
1013 | 0 | vacrels = lappend(vacrels, makeVacuumRelation(NULL, |
1014 | 0 | part_oid, |
1015 | 0 | vrel->va_cols)); |
1016 | 0 | MemoryContextSwitchTo(oldcontext); |
1017 | 0 | } |
1018 | 0 | } |
1019 | | |
1020 | | /* |
1021 | | * Release lock again. This means that by the time we actually try to |
1022 | | * process the table, it might be gone or renamed. In the former case |
1023 | | * we'll silently ignore it; in the latter case we'll process it |
1024 | | * anyway, but we must beware that the RangeVar doesn't necessarily |
1025 | | * identify it anymore. This isn't ideal, perhaps, but there's little |
1026 | | * practical alternative, since we're typically going to commit this |
1027 | | * transaction and begin a new one between now and then. Moreover, |
1028 | | * holding locks on multiple relations would create significant risk |
1029 | | * of deadlock. |
1030 | | */ |
1031 | 0 | UnlockRelationOid(relid, AccessShareLock); |
1032 | 0 | } |
1033 | | |
1034 | 0 | return vacrels; |
1035 | 0 | } |
1036 | | |
1037 | | /* |
1038 | | * Construct a list of VacuumRelations for all vacuumable rels in |
1039 | | * the current database. The list is built in vac_context. |
1040 | | */ |
1041 | | static List * |
1042 | | get_all_vacuum_rels(MemoryContext vac_context, int options) |
1043 | 0 | { |
1044 | 0 | List *vacrels = NIL; |
1045 | 0 | Relation pgclass; |
1046 | 0 | TableScanDesc scan; |
1047 | 0 | HeapTuple tuple; |
1048 | |
|
1049 | 0 | pgclass = table_open(RelationRelationId, AccessShareLock); |
1050 | |
|
1051 | 0 | scan = table_beginscan_catalog(pgclass, 0, NULL); |
1052 | |
|
1053 | 0 | while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) |
1054 | 0 | { |
1055 | 0 | Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); |
1056 | 0 | MemoryContext oldcontext; |
1057 | 0 | Oid relid = classForm->oid; |
1058 | | |
1059 | | /* |
1060 | | * We include partitioned tables here; depending on which operation is |
1061 | | * to be performed, caller will decide whether to process or ignore |
1062 | | * them. |
1063 | | */ |
1064 | 0 | if (classForm->relkind != RELKIND_RELATION && |
1065 | 0 | classForm->relkind != RELKIND_MATVIEW && |
1066 | 0 | classForm->relkind != RELKIND_PARTITIONED_TABLE) |
1067 | 0 | continue; |
1068 | | |
1069 | | /* check permissions of relation */ |
1070 | 0 | if (!vacuum_is_permitted_for_relation(relid, classForm, options)) |
1071 | 0 | continue; |
1072 | | |
1073 | | /* |
1074 | | * Build VacuumRelation(s) specifying the table OIDs to be processed. |
1075 | | * We omit a RangeVar since it wouldn't be appropriate to complain |
1076 | | * about failure to open one of these relations later. |
1077 | | */ |
1078 | 0 | oldcontext = MemoryContextSwitchTo(vac_context); |
1079 | 0 | vacrels = lappend(vacrels, makeVacuumRelation(NULL, |
1080 | 0 | relid, |
1081 | 0 | NIL)); |
1082 | 0 | MemoryContextSwitchTo(oldcontext); |
1083 | 0 | } |
1084 | |
|
1085 | 0 | table_endscan(scan); |
1086 | 0 | table_close(pgclass, AccessShareLock); |
1087 | |
|
1088 | 0 | return vacrels; |
1089 | 0 | } |
1090 | | |
1091 | | /* |
1092 | | * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points |
1093 | | * |
1094 | | * The target relation and VACUUM parameters are our inputs. |
1095 | | * |
1096 | | * Output parameters are the cutoffs that VACUUM caller should use. |
1097 | | * |
1098 | | * Return value indicates if vacuumlazy.c caller should make its VACUUM |
1099 | | * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to |
1100 | | * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a |
1101 | | * minimum). |
1102 | | */ |
1103 | | bool |
1104 | | vacuum_get_cutoffs(Relation rel, const VacuumParams *params, |
1105 | | struct VacuumCutoffs *cutoffs) |
1106 | 0 | { |
1107 | 0 | int freeze_min_age, |
1108 | 0 | multixact_freeze_min_age, |
1109 | 0 | freeze_table_age, |
1110 | 0 | multixact_freeze_table_age, |
1111 | 0 | effective_multixact_freeze_max_age; |
1112 | 0 | TransactionId nextXID, |
1113 | 0 | safeOldestXmin, |
1114 | 0 | aggressiveXIDCutoff; |
1115 | 0 | MultiXactId nextMXID, |
1116 | 0 | safeOldestMxact, |
1117 | 0 | aggressiveMXIDCutoff; |
1118 | | |
1119 | | /* Use mutable copies of freeze age parameters */ |
1120 | 0 | freeze_min_age = params->freeze_min_age; |
1121 | 0 | multixact_freeze_min_age = params->multixact_freeze_min_age; |
1122 | 0 | freeze_table_age = params->freeze_table_age; |
1123 | 0 | multixact_freeze_table_age = params->multixact_freeze_table_age; |
1124 | | |
1125 | | /* Set pg_class fields in cutoffs */ |
1126 | 0 | cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid; |
1127 | 0 | cutoffs->relminmxid = rel->rd_rel->relminmxid; |
1128 | | |
1129 | | /* |
1130 | | * Acquire OldestXmin. |
1131 | | * |
1132 | | * We can always ignore processes running lazy vacuum. This is because we |
1133 | | * use these values only for deciding which tuples we must keep in the |
1134 | | * tables. Since lazy vacuum doesn't write its XID anywhere (usually no |
1135 | | * XID assigned), it's safe to ignore it. In theory it could be |
1136 | | * problematic to ignore lazy vacuums in a full vacuum, but keep in mind |
1137 | | * that only one vacuum process can be working on a particular table at |
1138 | | * any time, and that each vacuum is always an independent transaction. |
1139 | | */ |
1140 | 0 | cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel); |
1141 | |
|
1142 | 0 | Assert(TransactionIdIsNormal(cutoffs->OldestXmin)); |
1143 | | |
1144 | | /* Acquire OldestMxact */ |
1145 | 0 | cutoffs->OldestMxact = GetOldestMultiXactId(); |
1146 | 0 | Assert(MultiXactIdIsValid(cutoffs->OldestMxact)); |
1147 | | |
1148 | | /* Acquire next XID/next MXID values used to apply age-based settings */ |
1149 | 0 | nextXID = ReadNextTransactionId(); |
1150 | 0 | nextMXID = ReadNextMultiXactId(); |
1151 | | |
1152 | | /* |
1153 | | * Also compute the multixact age for which freezing is urgent. This is |
1154 | | * normally autovacuum_multixact_freeze_max_age, but may be less if we are |
1155 | | * short of multixact member space. |
1156 | | */ |
1157 | 0 | effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); |
1158 | | |
1159 | | /* |
1160 | | * Almost ready to set freeze output parameters; check if OldestXmin or |
1161 | | * OldestMxact are held back to an unsafe degree before we start on that |
1162 | | */ |
1163 | 0 | safeOldestXmin = nextXID - autovacuum_freeze_max_age; |
1164 | 0 | if (!TransactionIdIsNormal(safeOldestXmin)) |
1165 | 0 | safeOldestXmin = FirstNormalTransactionId; |
1166 | 0 | safeOldestMxact = nextMXID - effective_multixact_freeze_max_age; |
1167 | 0 | if (safeOldestMxact < FirstMultiXactId) |
1168 | 0 | safeOldestMxact = FirstMultiXactId; |
1169 | 0 | if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin)) |
1170 | 0 | ereport(WARNING, |
1171 | 0 | (errmsg("cutoff for removing and freezing tuples is far in the past"), |
1172 | 0 | errhint("Close open transactions soon to avoid wraparound problems.\n" |
1173 | 0 | "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); |
1174 | 0 | if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact)) |
1175 | 0 | ereport(WARNING, |
1176 | 0 | (errmsg("cutoff for freezing multixacts is far in the past"), |
1177 | 0 | errhint("Close open transactions soon to avoid wraparound problems.\n" |
1178 | 0 | "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); |
1179 | | |
1180 | | /* |
1181 | | * Determine the minimum freeze age to use: as specified by the caller, or |
1182 | | * vacuum_freeze_min_age, but in any case not more than half |
1183 | | * autovacuum_freeze_max_age, so that autovacuums to prevent XID |
1184 | | * wraparound won't occur too frequently. |
1185 | | */ |
1186 | 0 | if (freeze_min_age < 0) |
1187 | 0 | freeze_min_age = vacuum_freeze_min_age; |
1188 | 0 | freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2); |
1189 | 0 | Assert(freeze_min_age >= 0); |
1190 | | |
1191 | | /* Compute FreezeLimit, being careful to generate a normal XID */ |
1192 | 0 | cutoffs->FreezeLimit = nextXID - freeze_min_age; |
1193 | 0 | if (!TransactionIdIsNormal(cutoffs->FreezeLimit)) |
1194 | 0 | cutoffs->FreezeLimit = FirstNormalTransactionId; |
1195 | | /* FreezeLimit must always be <= OldestXmin */ |
1196 | 0 | if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit)) |
1197 | 0 | cutoffs->FreezeLimit = cutoffs->OldestXmin; |
1198 | | |
1199 | | /* |
1200 | | * Determine the minimum multixact freeze age to use: as specified by |
1201 | | * caller, or vacuum_multixact_freeze_min_age, but in any case not more |
1202 | | * than half effective_multixact_freeze_max_age, so that autovacuums to |
1203 | | * prevent MultiXact wraparound won't occur too frequently. |
1204 | | */ |
1205 | 0 | if (multixact_freeze_min_age < 0) |
1206 | 0 | multixact_freeze_min_age = vacuum_multixact_freeze_min_age; |
1207 | 0 | multixact_freeze_min_age = Min(multixact_freeze_min_age, |
1208 | 0 | effective_multixact_freeze_max_age / 2); |
1209 | 0 | Assert(multixact_freeze_min_age >= 0); |
1210 | | |
1211 | | /* Compute MultiXactCutoff, being careful to generate a valid value */ |
1212 | 0 | cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age; |
1213 | 0 | if (cutoffs->MultiXactCutoff < FirstMultiXactId) |
1214 | 0 | cutoffs->MultiXactCutoff = FirstMultiXactId; |
1215 | | /* MultiXactCutoff must always be <= OldestMxact */ |
1216 | 0 | if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff)) |
1217 | 0 | cutoffs->MultiXactCutoff = cutoffs->OldestMxact; |
1218 | | |
1219 | | /* |
1220 | | * Finally, figure out if caller needs to do an aggressive VACUUM or not. |
1221 | | * |
1222 | | * Determine the table freeze age to use: as specified by the caller, or |
1223 | | * the value of the vacuum_freeze_table_age GUC, but in any case not more |
1224 | | * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly |
1225 | | * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before |
1226 | | * anti-wraparound autovacuum is launched. |
1227 | | */ |
1228 | 0 | if (freeze_table_age < 0) |
1229 | 0 | freeze_table_age = vacuum_freeze_table_age; |
1230 | 0 | freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95); |
1231 | 0 | Assert(freeze_table_age >= 0); |
1232 | 0 | aggressiveXIDCutoff = nextXID - freeze_table_age; |
1233 | 0 | if (!TransactionIdIsNormal(aggressiveXIDCutoff)) |
1234 | 0 | aggressiveXIDCutoff = FirstNormalTransactionId; |
1235 | 0 | if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, |
1236 | 0 | aggressiveXIDCutoff)) |
1237 | 0 | return true; |
1238 | | |
1239 | | /* |
1240 | | * Similar to the above, determine the table freeze age to use for |
1241 | | * multixacts: as specified by the caller, or the value of the |
1242 | | * vacuum_multixact_freeze_table_age GUC, but in any case not more than |
1243 | | * effective_multixact_freeze_max_age * 0.95, so that if you have e.g. |
1244 | | * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze |
1245 | | * multixacts before anti-wraparound autovacuum is launched. |
1246 | | */ |
1247 | 0 | if (multixact_freeze_table_age < 0) |
1248 | 0 | multixact_freeze_table_age = vacuum_multixact_freeze_table_age; |
1249 | 0 | multixact_freeze_table_age = |
1250 | 0 | Min(multixact_freeze_table_age, |
1251 | 0 | effective_multixact_freeze_max_age * 0.95); |
1252 | 0 | Assert(multixact_freeze_table_age >= 0); |
1253 | 0 | aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age; |
1254 | 0 | if (aggressiveMXIDCutoff < FirstMultiXactId) |
1255 | 0 | aggressiveMXIDCutoff = FirstMultiXactId; |
1256 | 0 | if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid, |
1257 | 0 | aggressiveMXIDCutoff)) |
1258 | 0 | return true; |
1259 | | |
1260 | | /* Non-aggressive VACUUM */ |
1261 | 0 | return false; |
1262 | 0 | } |
1263 | | |
1264 | | /* |
1265 | | * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe |
1266 | | * mechanism to determine if its table's relfrozenxid and relminmxid are now |
1267 | | * dangerously far in the past. |
1268 | | * |
1269 | | * When we return true, VACUUM caller triggers the failsafe. |
1270 | | */ |
1271 | | bool |
1272 | | vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs) |
1273 | 0 | { |
1274 | 0 | TransactionId relfrozenxid = cutoffs->relfrozenxid; |
1275 | 0 | MultiXactId relminmxid = cutoffs->relminmxid; |
1276 | 0 | TransactionId xid_skip_limit; |
1277 | 0 | MultiXactId multi_skip_limit; |
1278 | 0 | int skip_index_vacuum; |
1279 | |
|
1280 | 0 | Assert(TransactionIdIsNormal(relfrozenxid)); |
1281 | 0 | Assert(MultiXactIdIsValid(relminmxid)); |
1282 | | |
1283 | | /* |
1284 | | * Determine the index skipping age to use. In any case no less than |
1285 | | * autovacuum_freeze_max_age * 1.05. |
1286 | | */ |
1287 | 0 | skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05); |
1288 | |
|
1289 | 0 | xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum; |
1290 | 0 | if (!TransactionIdIsNormal(xid_skip_limit)) |
1291 | 0 | xid_skip_limit = FirstNormalTransactionId; |
1292 | |
|
1293 | 0 | if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit)) |
1294 | 0 | { |
1295 | | /* The table's relfrozenxid is too old */ |
1296 | 0 | return true; |
1297 | 0 | } |
1298 | | |
1299 | | /* |
1300 | | * Similar to above, determine the index skipping age to use for |
1301 | | * multixact. In any case no less than autovacuum_multixact_freeze_max_age * |
1302 | | * 1.05. |
1303 | | */ |
1304 | 0 | skip_index_vacuum = Max(vacuum_multixact_failsafe_age, |
1305 | 0 | autovacuum_multixact_freeze_max_age * 1.05); |
1306 | |
|
1307 | 0 | multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum; |
1308 | 0 | if (multi_skip_limit < FirstMultiXactId) |
1309 | 0 | multi_skip_limit = FirstMultiXactId; |
1310 | |
|
1311 | 0 | if (MultiXactIdPrecedes(relminmxid, multi_skip_limit)) |
1312 | 0 | { |
1313 | | /* The table's relminmxid is too old */ |
1314 | 0 | return true; |
1315 | 0 | } |
1316 | | |
1317 | 0 | return false; |
1318 | 0 | } |
1319 | | |
1320 | | /* |
1321 | | * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples |
1322 | | * |
1323 | | * If we scanned the whole relation then we should just use the count of |
1324 | | * live tuples seen; but if we did not, we should not blindly extrapolate |
1325 | | * from that number, since VACUUM may have scanned a quite nonrandom |
1326 | | * subset of the table. When we have only partial information, we take |
1327 | | * the old value of pg_class.reltuples/pg_class.relpages as a measurement |
1328 | | * of the tuple density in the unscanned pages. |
1329 | | * |
1330 | | * Note: scanned_tuples should count only *live* tuples, since |
1331 | | * pg_class.reltuples is defined that way. |
1332 | | */ |
1333 | | double |
1334 | | vac_estimate_reltuples(Relation relation, |
1335 | | BlockNumber total_pages, |
1336 | | BlockNumber scanned_pages, |
1337 | | double scanned_tuples) |
1338 | 0 | { |
1339 | 0 | BlockNumber old_rel_pages = relation->rd_rel->relpages; |
1340 | 0 | double old_rel_tuples = relation->rd_rel->reltuples; |
1341 | 0 | double old_density; |
1342 | 0 | double unscanned_pages; |
1343 | 0 | double total_tuples; |
1344 | | |
1345 | | /* If we did scan the whole table, just use the count as-is */ |
1346 | 0 | if (scanned_pages >= total_pages) |
1347 | 0 | return scanned_tuples; |
1348 | | |
1349 | | /* |
1350 | | * When successive VACUUM commands scan the same few pages again and |
1351 | | * again, without anything from the table really changing, there is a risk |
1352 | | * that our beliefs about tuple density will gradually become distorted. |
1353 | | * This might be caused by vacuumlazy.c implementation details, such as |
1354 | | * its tendency to always scan the last heap page. Handle that here. |
1355 | | * |
1356 | | * If the relation is _exactly_ the same size according to the existing |
1357 | | * pg_class entry, and only a few of its pages (less than 2%) were |
1358 | | * scanned, keep the existing value of reltuples. Also keep the existing |
1359 | | * value when only a subset of rel's pages <= a single page were scanned. |
1360 | | * |
1361 | | * (Note: we might be returning -1 here.) |
1362 | | */ |
1363 | 0 | if (old_rel_pages == total_pages && |
1364 | 0 | scanned_pages < (double) total_pages * 0.02) |
1365 | 0 | return old_rel_tuples; |
1366 | 0 | if (scanned_pages <= 1) |
1367 | 0 | return old_rel_tuples; |
1368 | | |
1369 | | /* |
1370 | | * If old density is unknown, we can't do much except scale up |
1371 | | * scanned_tuples to match total_pages. |
1372 | | */ |
1373 | 0 | if (old_rel_tuples < 0 || old_rel_pages == 0) |
1374 | 0 | return floor((scanned_tuples / scanned_pages) * total_pages + 0.5); |
1375 | | |
1376 | | /* |
1377 | | * Okay, we've covered the corner cases. The normal calculation is to |
1378 | | * convert the old measurement to a density (tuples per page), then |
1379 | | * estimate the number of tuples in the unscanned pages using that figure, |
1380 | | * and finally add on the number of tuples in the scanned pages. |
1381 | | */ |
1382 | 0 | old_density = old_rel_tuples / old_rel_pages; |
1383 | 0 | unscanned_pages = (double) total_pages - (double) scanned_pages; |
1384 | 0 | total_tuples = old_density * unscanned_pages + scanned_tuples; |
1385 | 0 | return floor(total_tuples + 0.5); |
1386 | 0 | } |
1387 | | |
1388 | | |
1389 | | /* |
1390 | | * vac_update_relstats() -- update statistics for one relation |
1391 | | * |
1392 | | * Update the whole-relation statistics that are kept in its pg_class |
1393 | | * row. There are additional stats that will be updated if we are |
1394 | | * doing ANALYZE, but we always update these stats. This routine works |
1395 | | * for both index and heap relation entries in pg_class. |
1396 | | * |
1397 | | * We violate transaction semantics here by overwriting the rel's |
1398 | | * existing pg_class tuple with the new values. This is reasonably |
1399 | | * safe as long as we're sure that the new values are correct whether or |
1400 | | * not this transaction commits. The reason for doing this is that if |
1401 | | * we updated these tuples in the usual way, vacuuming pg_class itself |
1402 | | * wouldn't work very well --- by the time we got done with a vacuum |
1403 | | * cycle, most of the tuples in pg_class would've been obsoleted. Of |
1404 | | * course, this only works for fixed-size not-null columns, but these are. |
1405 | | * |
1406 | | * Another reason for doing it this way is that when we are in a lazy |
1407 | | * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates. |
1408 | | * Somebody vacuuming pg_class might think they could delete a tuple |
1409 | | * marked with xmin = our xid. |
1410 | | * |
1411 | | * In addition to fundamentally nontransactional statistics such as |
1412 | | * relpages and relallvisible, we try to maintain certain lazily-updated |
1413 | | * DDL flags such as relhasindex, by clearing them if no longer correct. |
1414 | | * It's safe to do this in VACUUM, which can't run in parallel with |
1415 | | * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block. |
1416 | | * However, it's *not* safe to do it in an ANALYZE that's within an |
1417 | | * outer transaction, because for example the current transaction might |
1418 | | * have dropped the last index; then we'd think relhasindex should be |
1419 | | * cleared, but if the transaction later rolls back this would be wrong. |
1420 | | * So we refrain from updating the DDL flags if we're inside an outer |
1421 | | * transaction. This is OK since postponing the flag maintenance is |
1422 | | * always allowable. |
1423 | | * |
1424 | | * Note: num_tuples should count only *live* tuples, since |
1425 | | * pg_class.reltuples is defined that way. |
1426 | | * |
1427 | | * This routine is shared by VACUUM and ANALYZE. |
1428 | | */ |
1429 | | void |
1430 | | vac_update_relstats(Relation relation, |
1431 | | BlockNumber num_pages, double num_tuples, |
1432 | | BlockNumber num_all_visible_pages, |
1433 | | BlockNumber num_all_frozen_pages, |
1434 | | bool hasindex, TransactionId frozenxid, |
1435 | | MultiXactId minmulti, |
1436 | | bool *frozenxid_updated, bool *minmulti_updated, |
1437 | | bool in_outer_xact) |
1438 | 0 | { |
1439 | 0 | Oid relid = RelationGetRelid(relation); |
1440 | 0 | Relation rd; |
1441 | 0 | ScanKeyData key[1]; |
1442 | 0 | HeapTuple ctup; |
1443 | 0 | void *inplace_state; |
1444 | 0 | Form_pg_class pgcform; |
1445 | 0 | bool dirty, |
1446 | 0 | futurexid, |
1447 | 0 | futuremxid; |
1448 | 0 | TransactionId oldfrozenxid; |
1449 | 0 | MultiXactId oldminmulti; |
1450 | |
|
1451 | 0 | rd = table_open(RelationRelationId, RowExclusiveLock); |
1452 | | |
1453 | | /* Fetch a copy of the tuple to scribble on */ |
1454 | 0 | ScanKeyInit(&key[0], |
1455 | 0 | Anum_pg_class_oid, |
1456 | 0 | BTEqualStrategyNumber, F_OIDEQ, |
1457 | 0 | ObjectIdGetDatum(relid)); |
1458 | 0 | systable_inplace_update_begin(rd, ClassOidIndexId, true, |
1459 | 0 | NULL, 1, key, &ctup, &inplace_state); |
1460 | 0 | if (!HeapTupleIsValid(ctup)) |
1461 | 0 | elog(ERROR, "pg_class entry for relid %u vanished during vacuuming", |
1462 | 0 | relid); |
1463 | 0 | pgcform = (Form_pg_class) GETSTRUCT(ctup); |
1464 | | |
1465 | | /* Apply statistical updates, if any, to copied tuple */ |
1466 | |
|
1467 | 0 | dirty = false; |
1468 | 0 | if (pgcform->relpages != (int32) num_pages) |
1469 | 0 | { |
1470 | 0 | pgcform->relpages = (int32) num_pages; |
1471 | 0 | dirty = true; |
1472 | 0 | } |
1473 | 0 | if (pgcform->reltuples != (float4) num_tuples) |
1474 | 0 | { |
1475 | 0 | pgcform->reltuples = (float4) num_tuples; |
1476 | 0 | dirty = true; |
1477 | 0 | } |
1478 | 0 | if (pgcform->relallvisible != (int32) num_all_visible_pages) |
1479 | 0 | { |
1480 | 0 | pgcform->relallvisible = (int32) num_all_visible_pages; |
1481 | 0 | dirty = true; |
1482 | 0 | } |
1483 | 0 | if (pgcform->relallfrozen != (int32) num_all_frozen_pages) |
1484 | 0 | { |
1485 | 0 | pgcform->relallfrozen = (int32) num_all_frozen_pages; |
1486 | 0 | dirty = true; |
1487 | 0 | } |
1488 | | |
1489 | | /* Apply DDL updates, but not inside an outer transaction (see above) */ |
1490 | |
|
1491 | 0 | if (!in_outer_xact) |
1492 | 0 | { |
1493 | | /* |
1494 | | * If we didn't find any indexes, reset relhasindex. |
1495 | | */ |
1496 | 0 | if (pgcform->relhasindex && !hasindex) |
1497 | 0 | { |
1498 | 0 | pgcform->relhasindex = false; |
1499 | 0 | dirty = true; |
1500 | 0 | } |
1501 | | |
1502 | | /* We also clear relhasrules and relhastriggers if needed */ |
1503 | 0 | if (pgcform->relhasrules && relation->rd_rules == NULL) |
1504 | 0 | { |
1505 | 0 | pgcform->relhasrules = false; |
1506 | 0 | dirty = true; |
1507 | 0 | } |
1508 | 0 | if (pgcform->relhastriggers && relation->trigdesc == NULL) |
1509 | 0 | { |
1510 | 0 | pgcform->relhastriggers = false; |
1511 | 0 | dirty = true; |
1512 | 0 | } |
1513 | 0 | } |
1514 | | |
1515 | | /* |
1516 | | * Update relfrozenxid, unless caller passed InvalidTransactionId |
1517 | | * indicating it has no new data. |
1518 | | * |
1519 | | * Ordinarily, we don't let relfrozenxid go backwards. However, if the |
1520 | | * stored relfrozenxid is "in the future" then it seems best to assume |
1521 | | * it's corrupt, and overwrite with the oldest remaining XID in the table. |
1522 | | * This should match vac_update_datfrozenxid() concerning what we consider |
1523 | | * to be "in the future". |
1524 | | */ |
1525 | 0 | oldfrozenxid = pgcform->relfrozenxid; |
1526 | 0 | futurexid = false; |
1527 | 0 | if (frozenxid_updated) |
1528 | 0 | *frozenxid_updated = false; |
1529 | 0 | if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid) |
1530 | 0 | { |
1531 | 0 | bool update = false; |
1532 | |
|
1533 | 0 | if (TransactionIdPrecedes(oldfrozenxid, frozenxid)) |
1534 | 0 | update = true; |
1535 | 0 | else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid)) |
1536 | 0 | futurexid = update = true; |
1537 | |
|
1538 | 0 | if (update) |
1539 | 0 | { |
1540 | 0 | pgcform->relfrozenxid = frozenxid; |
1541 | 0 | dirty = true; |
1542 | 0 | if (frozenxid_updated) |
1543 | 0 | *frozenxid_updated = true; |
1544 | 0 | } |
1545 | 0 | } |
1546 | | |
1547 | | /* Similarly for relminmxid */ |
1548 | 0 | oldminmulti = pgcform->relminmxid; |
1549 | 0 | futuremxid = false; |
1550 | 0 | if (minmulti_updated) |
1551 | 0 | *minmulti_updated = false; |
1552 | 0 | if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti) |
1553 | 0 | { |
1554 | 0 | bool update = false; |
1555 | |
|
1556 | 0 | if (MultiXactIdPrecedes(oldminmulti, minmulti)) |
1557 | 0 | update = true; |
1558 | 0 | else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti)) |
1559 | 0 | futuremxid = update = true; |
1560 | |
|
1561 | 0 | if (update) |
1562 | 0 | { |
1563 | 0 | pgcform->relminmxid = minmulti; |
1564 | 0 | dirty = true; |
1565 | 0 | if (minmulti_updated) |
1566 | 0 | *minmulti_updated = true; |
1567 | 0 | } |
1568 | 0 | } |
1569 | | |
1570 | | /* If anything changed, write out the tuple. */ |
1571 | 0 | if (dirty) |
1572 | 0 | systable_inplace_update_finish(inplace_state, ctup); |
1573 | 0 | else |
1574 | 0 | systable_inplace_update_cancel(inplace_state); |
1575 | |
|
1576 | 0 | table_close(rd, RowExclusiveLock); |
1577 | |
|
1578 | 0 | if (futurexid) |
1579 | 0 | ereport(WARNING, |
1580 | 0 | (errcode(ERRCODE_DATA_CORRUPTED), |
1581 | 0 | errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"", |
1582 | 0 | oldfrozenxid, frozenxid, |
1583 | 0 | RelationGetRelationName(relation)))); |
1584 | 0 | if (futuremxid) |
1585 | 0 | ereport(WARNING, |
1586 | 0 | (errcode(ERRCODE_DATA_CORRUPTED), |
1587 | 0 | errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"", |
1588 | 0 | oldminmulti, minmulti, |
1589 | 0 | RelationGetRelationName(relation)))); |
1590 | 0 | } |
1591 | | |
1592 | | |
1593 | | /* |
1594 | | * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB |
1595 | | * |
1596 | | * Update pg_database's datfrozenxid entry for our database to be the |
1597 | | * minimum of the pg_class.relfrozenxid values. |
1598 | | * |
1599 | | * Similarly, update our datminmxid to be the minimum of the |
1600 | | * pg_class.relminmxid values. |
1601 | | * |
1602 | | * If we are able to advance either pg_database value, also try to |
1603 | | * truncate pg_xact and pg_multixact. |
1604 | | * |
1605 | | * We violate transaction semantics here by overwriting the database's |
1606 | | * existing pg_database tuple with the new values. This is reasonably |
1607 | | * safe since the new values are correct whether or not this transaction |
1608 | | * commits. As with vac_update_relstats, this avoids leaving dead tuples |
1609 | | * behind after a VACUUM. |
1610 | | */ |
1611 | | void |
1612 | | vac_update_datfrozenxid(void) |
1613 | 0 | { |
1614 | 0 | HeapTuple tuple; |
1615 | 0 | Form_pg_database dbform; |
1616 | 0 | Relation relation; |
1617 | 0 | SysScanDesc scan; |
1618 | 0 | HeapTuple classTup; |
1619 | 0 | TransactionId newFrozenXid; |
1620 | 0 | MultiXactId newMinMulti; |
1621 | 0 | TransactionId lastSaneFrozenXid; |
1622 | 0 | MultiXactId lastSaneMinMulti; |
1623 | 0 | bool bogus = false; |
1624 | 0 | bool dirty = false; |
1625 | 0 | ScanKeyData key[1]; |
1626 | 0 | void *inplace_state; |
1627 | | |
1628 | | /* |
1629 | | * Restrict this task to one backend per database. This avoids race |
1630 | | * conditions that would move datfrozenxid or datminmxid backward. It |
1631 | | * avoids calling vac_truncate_clog() with a datfrozenxid preceding a |
1632 | | * datfrozenxid passed to an earlier vac_truncate_clog() call. |
1633 | | */ |
1634 | 0 | LockDatabaseFrozenIds(ExclusiveLock); |
1635 | | |
1636 | | /* |
1637 | | * Initialize the "min" calculation with |
1638 | | * GetOldestNonRemovableTransactionId(), which is a reasonable |
1639 | | * approximation to the minimum relfrozenxid for not-yet-committed |
1640 | | * pg_class entries for new tables; see AddNewRelationTuple(). So we |
1641 | | * cannot produce a wrong minimum by starting with this. |
1642 | | */ |
1643 | 0 | newFrozenXid = GetOldestNonRemovableTransactionId(NULL); |
1644 | | |
1645 | | /* |
1646 | | * Similarly, initialize the MultiXact "min" with the value that would be |
1647 | | * used on pg_class for new tables. See AddNewRelationTuple(). |
1648 | | */ |
1649 | 0 | newMinMulti = GetOldestMultiXactId(); |
1650 | | |
1651 | | /* |
1652 | | * Identify the latest relfrozenxid and relminmxid values that we could |
1653 | | * validly see during the scan. These are conservative values, but it's |
1654 | | * not really worth trying to be more exact. |
1655 | | */ |
1656 | 0 | lastSaneFrozenXid = ReadNextTransactionId(); |
1657 | 0 | lastSaneMinMulti = ReadNextMultiXactId(); |
1658 | | |
1659 | | /* |
1660 | | * We must seqscan pg_class to find the minimum Xid, because there is no |
1661 | | * index that can help us here. |
1662 | | * |
1663 | | * See vac_truncate_clog() for the race condition to prevent. |
1664 | | */ |
1665 | 0 | relation = table_open(RelationRelationId, AccessShareLock); |
1666 | |
|
1667 | 0 | scan = systable_beginscan(relation, InvalidOid, false, |
1668 | 0 | NULL, 0, NULL); |
1669 | |
|
1670 | 0 | while ((classTup = systable_getnext(scan)) != NULL) |
1671 | 0 | { |
1672 | 0 | volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup); |
1673 | 0 | TransactionId relfrozenxid = classForm->relfrozenxid; |
1674 | 0 | TransactionId relminmxid = classForm->relminmxid; |
1675 | | |
1676 | | /* |
1677 | | * Only consider relations able to hold unfrozen XIDs (anything else |
1678 | | * should have InvalidTransactionId in relfrozenxid anyway). |
1679 | | */ |
1680 | 0 | if (classForm->relkind != RELKIND_RELATION && |
1681 | 0 | classForm->relkind != RELKIND_MATVIEW && |
1682 | 0 | classForm->relkind != RELKIND_TOASTVALUE) |
1683 | 0 | { |
1684 | 0 | Assert(!TransactionIdIsValid(relfrozenxid)); |
1685 | 0 | Assert(!MultiXactIdIsValid(relminmxid)); |
1686 | 0 | continue; |
1687 | 0 | } |
1688 | | |
1689 | | /* |
1690 | | * Some table AMs might not need per-relation xid / multixid horizons. |
1691 | | * It therefore seems reasonable to allow relfrozenxid and relminmxid |
1692 | | * to not be set (i.e. set to their respective Invalid*Id) |
1693 | | * independently. Thus validate and compute horizon for each only if |
1694 | | * set. |
1695 | | * |
1696 | | * If things are working properly, no relation should have a |
1697 | | * relfrozenxid or relminmxid that is "in the future". However, such |
1698 | | * cases have been known to arise due to bugs in pg_upgrade. If we |
1699 | | * see any entries that are "in the future", chicken out and don't do |
1700 | | * anything. This ensures we won't truncate clog & multixact SLRUs |
1701 | | * before those relations have been scanned and cleaned up. |
1702 | | */ |
1703 | | |
1704 | 0 | if (TransactionIdIsValid(relfrozenxid)) |
1705 | 0 | { |
1706 | 0 | Assert(TransactionIdIsNormal(relfrozenxid)); |
1707 | | |
1708 | | /* check for values in the future */ |
1709 | 0 | if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid)) |
1710 | 0 | { |
1711 | 0 | bogus = true; |
1712 | 0 | break; |
1713 | 0 | } |
1714 | | |
1715 | | /* determine new horizon */ |
1716 | 0 | if (TransactionIdPrecedes(relfrozenxid, newFrozenXid)) |
1717 | 0 | newFrozenXid = relfrozenxid; |
1718 | 0 | } |
1719 | | |
1720 | 0 | if (MultiXactIdIsValid(relminmxid)) |
1721 | 0 | { |
1722 | | /* check for values in the future */ |
1723 | 0 | if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid)) |
1724 | 0 | { |
1725 | 0 | bogus = true; |
1726 | 0 | break; |
1727 | 0 | } |
1728 | | |
1729 | | /* determine new horizon */ |
1730 | 0 | if (MultiXactIdPrecedes(relminmxid, newMinMulti)) |
1731 | 0 | newMinMulti = relminmxid; |
1732 | 0 | } |
1733 | 0 | } |
1734 | | |
1735 | | /* we're done with pg_class */ |
1736 | 0 | systable_endscan(scan); |
1737 | 0 | table_close(relation, AccessShareLock); |
1738 | | |
1739 | | /* chicken out if bogus data found */ |
1740 | 0 | if (bogus) |
1741 | 0 | return; |
1742 | | |
1743 | 0 | Assert(TransactionIdIsNormal(newFrozenXid)); |
1744 | 0 | Assert(MultiXactIdIsValid(newMinMulti)); |
1745 | | |
1746 | | /* Now fetch the pg_database tuple we need to update. */ |
1747 | 0 | relation = table_open(DatabaseRelationId, RowExclusiveLock); |
1748 | | |
1749 | | /* |
1750 | | * Fetch a copy of the tuple to scribble on. We could check the syscache |
1751 | | * tuple first. If that concluded !dirty, we'd avoid waiting on |
1752 | | * concurrent heap_update() and would avoid exclusive-locking the buffer. |
1753 | | * For now, don't optimize that. |
1754 | | */ |
1755 | 0 | ScanKeyInit(&key[0], |
1756 | 0 | Anum_pg_database_oid, |
1757 | 0 | BTEqualStrategyNumber, F_OIDEQ, |
1758 | 0 | ObjectIdGetDatum(MyDatabaseId)); |
1759 | |
|
1760 | 0 | systable_inplace_update_begin(relation, DatabaseOidIndexId, true, |
1761 | 0 | NULL, 1, key, &tuple, &inplace_state); |
1762 | |
|
1763 | 0 | if (!HeapTupleIsValid(tuple)) |
1764 | 0 | elog(ERROR, "could not find tuple for database %u", MyDatabaseId); |
1765 | | |
1766 | 0 | dbform = (Form_pg_database) GETSTRUCT(tuple); |
1767 | | |
1768 | | /* |
1769 | | * As in vac_update_relstats(), we ordinarily don't want to let |
1770 | | * datfrozenxid go backward; but if it's "in the future" then it must be |
1771 | | * corrupt and it seems best to overwrite it. |
1772 | | */ |
1773 | 0 | if (dbform->datfrozenxid != newFrozenXid && |
1774 | 0 | (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) || |
1775 | 0 | TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid))) |
1776 | 0 | { |
1777 | 0 | dbform->datfrozenxid = newFrozenXid; |
1778 | 0 | dirty = true; |
1779 | 0 | } |
1780 | 0 | else |
1781 | 0 | newFrozenXid = dbform->datfrozenxid; |
1782 | | |
1783 | | /* Ditto for datminmxid */ |
1784 | 0 | if (dbform->datminmxid != newMinMulti && |
1785 | 0 | (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) || |
1786 | 0 | MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid))) |
1787 | 0 | { |
1788 | 0 | dbform->datminmxid = newMinMulti; |
1789 | 0 | dirty = true; |
1790 | 0 | } |
1791 | 0 | else |
1792 | 0 | newMinMulti = dbform->datminmxid; |
1793 | |
|
1794 | 0 | if (dirty) |
1795 | 0 | systable_inplace_update_finish(inplace_state, tuple); |
1796 | 0 | else |
1797 | 0 | systable_inplace_update_cancel(inplace_state); |
1798 | |
|
1799 | 0 | heap_freetuple(tuple); |
1800 | 0 | table_close(relation, RowExclusiveLock); |
1801 | | |
1802 | | /* |
1803 | | * If we were able to advance datfrozenxid or datminmxid, see if we can |
1804 | | * truncate pg_xact and/or pg_multixact. Also do it if the shared |
1805 | | * XID-wrap-limit info is stale, since this action will update that too. |
1806 | | */ |
1807 | 0 | if (dirty || ForceTransactionIdLimitUpdate()) |
1808 | 0 | vac_truncate_clog(newFrozenXid, newMinMulti, |
1809 | 0 | lastSaneFrozenXid, lastSaneMinMulti); |
1810 | 0 | } |
1811 | | |
1812 | | |
1813 | | /* |
1814 | | * vac_truncate_clog() -- attempt to truncate the commit log |
1815 | | * |
1816 | | * Scan pg_database to determine the system-wide oldest datfrozenxid, |
1817 | | * and use it to truncate the transaction commit log (pg_xact). |
1818 | | * Also update the XID wrap limit info maintained by varsup.c. |
1819 | | * Likewise for datminmxid. |
1820 | | * |
1821 | | * The passed frozenXID and minMulti are the updated values for my own |
1822 | | * pg_database entry. They're used to initialize the "min" calculations. |
1823 | | * The caller also passes the "last sane" XID and MXID, since it has |
1824 | | * those at hand already. |
1825 | | * |
1826 | | * This routine is only invoked when we've managed to change our |
1827 | | * DB's datfrozenxid/datminmxid values, or we found that the shared |
1828 | | * XID-wrap-limit info is stale. |
1829 | | */ |
1830 | | static void |
1831 | | vac_truncate_clog(TransactionId frozenXID, |
1832 | | MultiXactId minMulti, |
1833 | | TransactionId lastSaneFrozenXid, |
1834 | | MultiXactId lastSaneMinMulti) |
1835 | 0 | { |
1836 | 0 | TransactionId nextXID = ReadNextTransactionId(); |
1837 | 0 | Relation relation; |
1838 | 0 | TableScanDesc scan; |
1839 | 0 | HeapTuple tuple; |
1840 | 0 | Oid oldestxid_datoid; |
1841 | 0 | Oid minmulti_datoid; |
1842 | 0 | bool bogus = false; |
1843 | 0 | bool frozenAlreadyWrapped = false; |
1844 | | |
1845 | | /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */ |
1846 | 0 | LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE); |
1847 | | |
1848 | | /* init oldest datoids to sync with my frozenXID/minMulti values */ |
1849 | 0 | oldestxid_datoid = MyDatabaseId; |
1850 | 0 | minmulti_datoid = MyDatabaseId; |
1851 | | |
1852 | | /* |
1853 | | * Scan pg_database to compute the minimum datfrozenxid/datminmxid |
1854 | | * |
1855 | | * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place, |
1856 | | * the values could change while we look at them. Fetch each one just |
1857 | | * once to ensure sane behavior of the comparison logic. (Here, as in |
1858 | | * many other places, we assume that fetching or updating an XID in shared |
1859 | | * storage is atomic.) |
1860 | | * |
1861 | | * Note: we need not worry about a race condition with new entries being |
1862 | | * inserted by CREATE DATABASE. Any such entry will have a copy of some |
1863 | | * existing DB's datfrozenxid, and that source DB cannot be ours because |
1864 | | * of the interlock against copying a DB containing an active backend. |
1865 | | * Hence the new entry will not reduce the minimum. Also, if two VACUUMs |
1866 | | * concurrently modify the datfrozenxid's of different databases, the |
1867 | | * worst possible outcome is that pg_xact is not truncated as aggressively |
1868 | | * as it could be. |
1869 | | */ |
1870 | 0 | relation = table_open(DatabaseRelationId, AccessShareLock); |
1871 | |
|
1872 | 0 | scan = table_beginscan_catalog(relation, 0, NULL); |
1873 | |
|
1874 | 0 | while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) |
1875 | 0 | { |
1876 | 0 | volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple); |
1877 | 0 | TransactionId datfrozenxid = dbform->datfrozenxid; |
1878 | 0 | TransactionId datminmxid = dbform->datminmxid; |
1879 | |
|
1880 | 0 | Assert(TransactionIdIsNormal(datfrozenxid)); |
1881 | 0 | Assert(MultiXactIdIsValid(datminmxid)); |
1882 | | |
1883 | | /* |
1884 | | * If database is in the process of getting dropped, or has been |
1885 | | * interrupted while doing so, no connections to it are possible |
1886 | | * anymore. Therefore we don't need to take it into account here. |
1887 | | * Which is good, because it can't be processed by autovacuum either. |
1888 | | */ |
1889 | 0 | if (database_is_invalid_form((Form_pg_database) dbform)) |
1890 | 0 | { |
1891 | 0 | elog(DEBUG2, |
1892 | 0 | "skipping invalid database \"%s\" while computing relfrozenxid", |
1893 | 0 | NameStr(dbform->datname)); |
1894 | 0 | continue; |
1895 | 0 | } |
1896 | | |
1897 | | /* |
1898 | | * If things are working properly, no database should have a |
1899 | | * datfrozenxid or datminmxid that is "in the future". However, such |
1900 | | * cases have been known to arise due to bugs in pg_upgrade. If we |
1901 | | * see any entries that are "in the future", chicken out and don't do |
1902 | | * anything. This ensures we won't truncate clog before those |
1903 | | * databases have been scanned and cleaned up. (We will issue the |
1904 | | * "already wrapped" warning if appropriate, though.) |
1905 | | */ |
1906 | 0 | if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) || |
1907 | 0 | MultiXactIdPrecedes(lastSaneMinMulti, datminmxid)) |
1908 | 0 | bogus = true; |
1909 | |
|
1910 | 0 | if (TransactionIdPrecedes(nextXID, datfrozenxid)) |
1911 | 0 | frozenAlreadyWrapped = true; |
1912 | 0 | else if (TransactionIdPrecedes(datfrozenxid, frozenXID)) |
1913 | 0 | { |
1914 | 0 | frozenXID = datfrozenxid; |
1915 | 0 | oldestxid_datoid = dbform->oid; |
1916 | 0 | } |
1917 | |
|
1918 | 0 | if (MultiXactIdPrecedes(datminmxid, minMulti)) |
1919 | 0 | { |
1920 | 0 | minMulti = datminmxid; |
1921 | 0 | minmulti_datoid = dbform->oid; |
1922 | 0 | } |
1923 | 0 | } |
1924 | | |
1925 | 0 | table_endscan(scan); |
1926 | |
|
1927 | 0 | table_close(relation, AccessShareLock); |
1928 | | |
1929 | | /* |
1930 | | * Do not truncate CLOG if we seem to have suffered wraparound already; |
1931 | | * the computed minimum XID might be bogus. This case should now be |
1932 | | * impossible due to the defenses in GetNewTransactionId, but we keep the |
1933 | | * test anyway. |
1934 | | */ |
1935 | 0 | if (frozenAlreadyWrapped) |
1936 | 0 | { |
1937 | 0 | ereport(WARNING, |
1938 | 0 | (errmsg("some databases have not been vacuumed in over 2 billion transactions"), |
1939 | 0 | errdetail("You might have already suffered transaction-wraparound data loss."))); |
1940 | 0 | LWLockRelease(WrapLimitsVacuumLock); |
1941 | 0 | return; |
1942 | 0 | } |
1943 | | |
1944 | | /* chicken out if data is bogus in any other way */ |
1945 | 0 | if (bogus) |
1946 | 0 | { |
1947 | 0 | LWLockRelease(WrapLimitsVacuumLock); |
1948 | 0 | return; |
1949 | 0 | } |
1950 | | |
1951 | | /* |
1952 | | * Advance the oldest value for commit timestamps before truncating, so |
1953 | | * that if a user requests a timestamp for a transaction we're truncating |
1954 | | * away right after this point, they get NULL instead of an ugly "file not |
1955 | | * found" error from slru.c. This doesn't matter for xact/multixact |
1956 | | * because they are not subject to arbitrary lookups from users. |
1957 | | */ |
1958 | 0 | AdvanceOldestCommitTsXid(frozenXID); |
1959 | | |
1960 | | /* |
1961 | | * Truncate CLOG, multixact and CommitTs to the oldest computed value. |
1962 | | */ |
1963 | 0 | TruncateCLOG(frozenXID, oldestxid_datoid); |
1964 | 0 | TruncateCommitTs(frozenXID); |
1965 | 0 | TruncateMultiXact(minMulti, minmulti_datoid); |
1966 | | |
1967 | | /* |
1968 | | * Update the wrap limit for GetNewTransactionId and creation of new |
1969 | | * MultiXactIds. Note: these functions will also signal the postmaster |
1970 | | * for an(other) autovac cycle if needed. XXX should we avoid possibly |
1971 | | * signaling twice? |
1972 | | */ |
1973 | 0 | SetTransactionIdLimit(frozenXID, oldestxid_datoid); |
1974 | 0 | SetMultiXactIdLimit(minMulti, minmulti_datoid, false); |
1975 | |
|
1976 | 0 | LWLockRelease(WrapLimitsVacuumLock); |
1977 | 0 | } |
1978 | | |
1979 | | |
1980 | | /* |
1981 | | * vacuum_rel() -- vacuum one heap relation |
1982 | | * |
1983 | | * relid identifies the relation to vacuum. If relation is supplied, |
1984 | | * use the name therein for reporting any failure to open/lock the rel; |
1985 | | * do not use it once we've successfully opened the rel, since it might |
1986 | | * be stale. |
1987 | | * |
1988 | | * Returns true if it's okay to proceed with a requested ANALYZE |
1989 | | * operation on this table. |
1990 | | * |
1991 | | * Doing one heap at a time incurs extra overhead, since we need to |
1992 | | * check that the heap exists again just before we vacuum it. The |
1993 | | * reason that we do this is so that vacuuming can be spread across |
1994 | | * many small transactions. Otherwise, two-phase locking would require |
1995 | | * us to lock the entire database during one pass of the vacuum cleaner. |
1996 | | * |
1997 | | * At entry and exit, we are not inside a transaction. |
1998 | | */ |
1999 | | static bool |
2000 | | vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, |
2001 | | BufferAccessStrategy bstrategy) |
2002 | 0 | { |
2003 | 0 | LOCKMODE lmode; |
2004 | 0 | Relation rel; |
2005 | 0 | LockRelId lockrelid; |
2006 | 0 | Oid priv_relid; |
2007 | 0 | Oid toast_relid; |
2008 | 0 | Oid save_userid; |
2009 | 0 | int save_sec_context; |
2010 | 0 | int save_nestlevel; |
2011 | |
|
2012 | 0 | Assert(params != NULL); |
2013 | | |
2014 | | /* Begin a transaction for vacuuming this relation */ |
2015 | 0 | StartTransactionCommand(); |
2016 | |
|
2017 | 0 | if (!(params->options & VACOPT_FULL)) |
2018 | 0 | { |
2019 | | /* |
2020 | | * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets |
2021 | | * other concurrent VACUUMs know that they can ignore this one while |
2022 | | * determining their OldestXmin. (The reason we don't set it during a |
2023 | | * full VACUUM is exactly that we may have to run user-defined |
2024 | | * functions for functional indexes, and we want to make sure that if |
2025 | | * they use the snapshot set above, any tuples it requires can't get |
2026 | | * removed from other tables. An index function that depends on the |
2027 | | * contents of other tables is arguably broken, but we won't break it |
2028 | | * here by violating transaction semantics.) |
2029 | | * |
2030 | | * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by |
2031 | | * autovacuum; it's used to avoid canceling a vacuum that was invoked |
2032 | | * in an emergency. |
2033 | | * |
2034 | | * Note: these flags remain set until CommitTransaction or |
2035 | | * AbortTransaction. We don't want to clear them until we reset |
2036 | | * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId() |
2037 | | * might appear to go backwards, which is probably Not Good. (We also |
2038 | | * set PROC_IN_VACUUM *before* taking our own snapshot, so that our |
2039 | | * xmin doesn't become visible ahead of setting the flag.) |
2040 | | */ |
2041 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
2042 | 0 | MyProc->statusFlags |= PROC_IN_VACUUM; |
2043 | 0 | if (params->is_wraparound) |
2044 | 0 | MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND; |
2045 | 0 | ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; |
2046 | 0 | LWLockRelease(ProcArrayLock); |
2047 | 0 | } |
2048 | | |
2049 | | /* |
2050 | | * Need to acquire a snapshot to prevent pg_subtrans from being truncated, |
2051 | | * cutoff xids in local memory wrapping around, and to have updated xmin |
2052 | | * horizons. |
2053 | | */ |
2054 | 0 | PushActiveSnapshot(GetTransactionSnapshot()); |
2055 | | |
2056 | | /* |
2057 | | * Check for user-requested abort. Note we want this to be inside a |
2058 | | * transaction, so xact.c doesn't issue useless WARNING. |
2059 | | */ |
2060 | 0 | CHECK_FOR_INTERRUPTS(); |
2061 | | |
2062 | | /* |
2063 | | * Determine the type of lock we want --- hard exclusive lock for a FULL |
2064 | | * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either |
2065 | | * way, we can be sure that no other backend is vacuuming the same table. |
2066 | | */ |
2067 | 0 | lmode = (params->options & VACOPT_FULL) ? |
2068 | 0 | AccessExclusiveLock : ShareUpdateExclusiveLock; |
2069 | | |
2070 | | /* open the relation and get the appropriate lock on it */ |
2071 | 0 | rel = vacuum_open_relation(relid, relation, params->options, |
2072 | 0 | params->log_min_duration >= 0, lmode); |
2073 | | |
2074 | | /* leave if relation could not be opened or locked */ |
2075 | 0 | if (!rel) |
2076 | 0 | { |
2077 | 0 | PopActiveSnapshot(); |
2078 | 0 | CommitTransactionCommand(); |
2079 | 0 | return false; |
2080 | 0 | } |
2081 | | |
2082 | | /* |
2083 | | * When recursing to a TOAST table, check privileges on the parent. NB: |
2084 | | * This is only safe to do because we hold a session lock on the main |
2085 | | * relation that prevents concurrent deletion. |
2086 | | */ |
2087 | 0 | if (OidIsValid(params->toast_parent)) |
2088 | 0 | priv_relid = params->toast_parent; |
2089 | 0 | else |
2090 | 0 | priv_relid = RelationGetRelid(rel); |
2091 | | |
2092 | | /* |
2093 | | * Check if relation needs to be skipped based on privileges. This check |
2094 | | * happens also when building the relation list to vacuum for a manual |
2095 | | * operation, and needs to be done additionally here as VACUUM could |
2096 | | * happen across multiple transactions where privileges could have changed |
2097 | | * in-between. Make sure to only generate logs for VACUUM in this case. |
2098 | | */ |
2099 | 0 | if (!vacuum_is_permitted_for_relation(priv_relid, |
2100 | 0 | rel->rd_rel, |
2101 | 0 | params->options & ~VACOPT_ANALYZE)) |
2102 | 0 | { |
2103 | 0 | relation_close(rel, lmode); |
2104 | 0 | PopActiveSnapshot(); |
2105 | 0 | CommitTransactionCommand(); |
2106 | 0 | return false; |
2107 | 0 | } |
2108 | | |
2109 | | /* |
2110 | | * Check that it's of a vacuumable relkind. |
2111 | | */ |
2112 | 0 | if (rel->rd_rel->relkind != RELKIND_RELATION && |
2113 | 0 | rel->rd_rel->relkind != RELKIND_MATVIEW && |
2114 | 0 | rel->rd_rel->relkind != RELKIND_TOASTVALUE && |
2115 | 0 | rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) |
2116 | 0 | { |
2117 | 0 | ereport(WARNING, |
2118 | 0 | (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables", |
2119 | 0 | RelationGetRelationName(rel)))); |
2120 | 0 | relation_close(rel, lmode); |
2121 | 0 | PopActiveSnapshot(); |
2122 | 0 | CommitTransactionCommand(); |
2123 | 0 | return false; |
2124 | 0 | } |
2125 | | |
2126 | | /* |
2127 | | * Silently ignore tables that are temp tables of other backends --- |
2128 | | * trying to vacuum these will lead to great unhappiness, since their |
2129 | | * contents are probably not up-to-date on disk. (We don't throw a |
2130 | | * warning here; it would just lead to chatter during a database-wide |
2131 | | * VACUUM.) |
2132 | | */ |
2133 | 0 | if (RELATION_IS_OTHER_TEMP(rel)) |
2134 | 0 | { |
2135 | 0 | relation_close(rel, lmode); |
2136 | 0 | PopActiveSnapshot(); |
2137 | 0 | CommitTransactionCommand(); |
2138 | 0 | return false; |
2139 | 0 | } |
2140 | | |
2141 | | /* |
2142 | | * Silently ignore partitioned tables as there is no work to be done. The |
2143 | | * useful work is on their child partitions, which have been queued up for |
2144 | | * us separately. |
2145 | | */ |
2146 | 0 | if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) |
2147 | 0 | { |
2148 | 0 | relation_close(rel, lmode); |
2149 | 0 | PopActiveSnapshot(); |
2150 | 0 | CommitTransactionCommand(); |
2151 | | /* It's OK to proceed with ANALYZE on this table */ |
2152 | 0 | return true; |
2153 | 0 | } |
2154 | | |
2155 | | /* |
2156 | | * Get a session-level lock too. This will protect our access to the |
2157 | | * relation across multiple transactions, so that we can vacuum the |
2158 | | * relation's TOAST table (if any) secure in the knowledge that no one is |
2159 | | * deleting the parent relation. |
2160 | | * |
2161 | | * NOTE: this cannot block, even if someone else is waiting for access, |
2162 | | * because the lock manager knows that both lock requests are from the |
2163 | | * same process. |
2164 | | */ |
2165 | 0 | lockrelid = rel->rd_lockInfo.lockRelId; |
2166 | 0 | LockRelationIdForSession(&lockrelid, lmode); |
2167 | | |
2168 | | /* |
2169 | | * Set index_cleanup option based on index_cleanup reloption if it wasn't |
2170 | | * specified in VACUUM command, or when running in an autovacuum worker |
2171 | | */ |
2172 | 0 | if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED) |
2173 | 0 | { |
2174 | 0 | StdRdOptIndexCleanup vacuum_index_cleanup; |
2175 | |
|
2176 | 0 | if (rel->rd_options == NULL) |
2177 | 0 | vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO; |
2178 | 0 | else |
2179 | 0 | vacuum_index_cleanup = |
2180 | 0 | ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup; |
2181 | |
|
2182 | 0 | if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO) |
2183 | 0 | params->index_cleanup = VACOPTVALUE_AUTO; |
2184 | 0 | else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON) |
2185 | 0 | params->index_cleanup = VACOPTVALUE_ENABLED; |
2186 | 0 | else |
2187 | 0 | { |
2188 | 0 | Assert(vacuum_index_cleanup == |
2189 | 0 | STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF); |
2190 | 0 | params->index_cleanup = VACOPTVALUE_DISABLED; |
2191 | 0 | } |
2192 | 0 | } |
2193 | | |
2194 | | /* |
2195 | | * Check if the vacuum_max_eager_freeze_failure_rate table storage |
2196 | | * parameter was specified. This overrides the GUC value. |
2197 | | */ |
2198 | 0 | if (rel->rd_options != NULL && |
2199 | 0 | ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0) |
2200 | 0 | params->max_eager_freeze_failure_rate = |
2201 | 0 | ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate; |
2202 | | |
2203 | | /* |
2204 | | * Set truncate option based on truncate reloption or GUC if it wasn't |
2205 | | * specified in VACUUM command, or when running in an autovacuum worker |
2206 | | */ |
2207 | 0 | if (params->truncate == VACOPTVALUE_UNSPECIFIED) |
2208 | 0 | { |
2209 | 0 | StdRdOptions *opts = (StdRdOptions *) rel->rd_options; |
2210 | |
|
2211 | 0 | if (opts && opts->vacuum_truncate_set) |
2212 | 0 | { |
2213 | 0 | if (opts->vacuum_truncate) |
2214 | 0 | params->truncate = VACOPTVALUE_ENABLED; |
2215 | 0 | else |
2216 | 0 | params->truncate = VACOPTVALUE_DISABLED; |
2217 | 0 | } |
2218 | 0 | else if (vacuum_truncate) |
2219 | 0 | params->truncate = VACOPTVALUE_ENABLED; |
2220 | 0 | else |
2221 | 0 | params->truncate = VACOPTVALUE_DISABLED; |
2222 | 0 | } |
2223 | | |
2224 | | /* |
2225 | | * Remember the relation's TOAST relation for later, if the caller asked |
2226 | | * us to process it. In VACUUM FULL, though, the toast table is |
2227 | | * automatically rebuilt by cluster_rel so we shouldn't recurse to it, |
2228 | | * unless PROCESS_MAIN is disabled. |
2229 | | */ |
2230 | 0 | if ((params->options & VACOPT_PROCESS_TOAST) != 0 && |
2231 | 0 | ((params->options & VACOPT_FULL) == 0 || |
2232 | 0 | (params->options & VACOPT_PROCESS_MAIN) == 0)) |
2233 | 0 | toast_relid = rel->rd_rel->reltoastrelid; |
2234 | 0 | else |
2235 | 0 | toast_relid = InvalidOid; |
2236 | | |
2237 | | /* |
2238 | | * Switch to the table owner's userid, so that any index functions are run |
2239 | | * as that user. Also lock down security-restricted operations and |
2240 | | * arrange to make GUC variable changes local to this command. (This is |
2241 | | * unnecessary, but harmless, for lazy VACUUM.) |
2242 | | */ |
2243 | 0 | GetUserIdAndSecContext(&save_userid, &save_sec_context); |
2244 | 0 | SetUserIdAndSecContext(rel->rd_rel->relowner, |
2245 | 0 | save_sec_context | SECURITY_RESTRICTED_OPERATION); |
2246 | 0 | save_nestlevel = NewGUCNestLevel(); |
2247 | 0 | RestrictSearchPath(); |
2248 | | |
2249 | | /* |
2250 | | * If PROCESS_MAIN is set (the default), it's time to vacuum the main |
2251 | | * relation. Otherwise, we can skip this part. If processing the TOAST |
2252 | | * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN |
2253 | | * to be set when we recurse to the TOAST table. |
2254 | | */ |
2255 | 0 | if (params->options & VACOPT_PROCESS_MAIN) |
2256 | 0 | { |
2257 | | /* |
2258 | | * Do the actual work --- either FULL or "lazy" vacuum |
2259 | | */ |
2260 | 0 | if (params->options & VACOPT_FULL) |
2261 | 0 | { |
2262 | 0 | ClusterParams cluster_params = {0}; |
2263 | |
|
2264 | 0 | if ((params->options & VACOPT_VERBOSE) != 0) |
2265 | 0 | cluster_params.options |= CLUOPT_VERBOSE; |
2266 | | |
2267 | | /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */ |
2268 | 0 | cluster_rel(rel, InvalidOid, &cluster_params); |
2269 | | /* cluster_rel closes the relation, but keeps lock */ |
2270 | |
|
2271 | 0 | rel = NULL; |
2272 | 0 | } |
2273 | 0 | else |
2274 | 0 | table_relation_vacuum(rel, params, bstrategy); |
2275 | 0 | } |
2276 | | |
2277 | | /* Roll back any GUC changes executed by index functions */ |
2278 | 0 | AtEOXact_GUC(false, save_nestlevel); |
2279 | | |
2280 | | /* Restore userid and security context */ |
2281 | 0 | SetUserIdAndSecContext(save_userid, save_sec_context); |
2282 | | |
2283 | | /* all done with this class, but hold lock until commit */ |
2284 | 0 | if (rel) |
2285 | 0 | relation_close(rel, NoLock); |
2286 | | |
2287 | | /* |
2288 | | * Complete the transaction and free all temporary memory used. |
2289 | | */ |
2290 | 0 | PopActiveSnapshot(); |
2291 | 0 | CommitTransactionCommand(); |
2292 | | |
2293 | | /* |
2294 | | * If the relation has a secondary toast rel, vacuum that too while we |
2295 | | * still hold the session lock on the main table. Note however that |
2296 | | * "analyze" will not get done on the toast table. This is good, because |
2297 | | * the toaster always uses hardcoded index access and statistics are |
2298 | | * totally unimportant for toast relations. |
2299 | | */ |
2300 | 0 | if (toast_relid != InvalidOid) |
2301 | 0 | { |
2302 | 0 | VacuumParams toast_vacuum_params; |
2303 | | |
2304 | | /* |
2305 | | * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise, |
2306 | | * set toast_parent so that the privilege checks are done on the main |
2307 | | * relation. NB: This is only safe to do because we hold a session |
2308 | | * lock on the main relation that prevents concurrent deletion. |
2309 | | */ |
2310 | 0 | memcpy(&toast_vacuum_params, params, sizeof(VacuumParams)); |
2311 | 0 | toast_vacuum_params.options |= VACOPT_PROCESS_MAIN; |
2312 | 0 | toast_vacuum_params.toast_parent = relid; |
2313 | |
|
2314 | 0 | vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy); |
2315 | 0 | } |
2316 | | |
2317 | | /* |
2318 | | * Now release the session-level lock on the main table. |
2319 | | */ |
2320 | 0 | UnlockRelationIdForSession(&lockrelid, lmode); |
2321 | | |
2322 | | /* Report that we really did it. */ |
2323 | 0 | return true; |
2324 | 0 | } |
2325 | | |
2326 | | |
2327 | | /* |
2328 | | * Open all the vacuumable indexes of the given relation, obtaining the |
2329 | | * specified kind of lock on each. Return an array of Relation pointers for |
2330 | | * the indexes into *Irel, and the number of indexes into *nindexes. |
2331 | | * |
2332 | | * We consider an index vacuumable if it is marked insertable (indisready). |
2333 | | * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in |
2334 | | * execution, and what we have is too corrupt to be processable. We will |
2335 | | * vacuum even if the index isn't indisvalid; this is important because in a |
2336 | | * unique index, uniqueness checks will be performed anyway and had better not |
2337 | | * hit dangling index pointers. |
2338 | | */ |
2339 | | void |
2340 | | vac_open_indexes(Relation relation, LOCKMODE lockmode, |
2341 | | int *nindexes, Relation **Irel) |
2342 | 0 | { |
2343 | 0 | List *indexoidlist; |
2344 | 0 | ListCell *indexoidscan; |
2345 | 0 | int i; |
2346 | |
|
2347 | 0 | Assert(lockmode != NoLock); |
2348 | |
|
2349 | 0 | indexoidlist = RelationGetIndexList(relation); |
2350 | | |
2351 | | /* allocate enough memory for all indexes */ |
2352 | 0 | i = list_length(indexoidlist); |
2353 | |
|
2354 | 0 | if (i > 0) |
2355 | 0 | *Irel = (Relation *) palloc(i * sizeof(Relation)); |
2356 | 0 | else |
2357 | 0 | *Irel = NULL; |
2358 | | |
2359 | | /* collect just the ready indexes */ |
2360 | 0 | i = 0; |
2361 | 0 | foreach(indexoidscan, indexoidlist) |
2362 | 0 | { |
2363 | 0 | Oid indexoid = lfirst_oid(indexoidscan); |
2364 | 0 | Relation indrel; |
2365 | |
|
2366 | 0 | indrel = index_open(indexoid, lockmode); |
2367 | 0 | if (indrel->rd_index->indisready) |
2368 | 0 | (*Irel)[i++] = indrel; |
2369 | 0 | else |
2370 | 0 | index_close(indrel, lockmode); |
2371 | 0 | } |
2372 | |
|
2373 | 0 | *nindexes = i; |
2374 | |
|
2375 | 0 | list_free(indexoidlist); |
2376 | 0 | } |
2377 | | |
2378 | | /* |
2379 | | * Release the resources acquired by vac_open_indexes. Optionally release |
2380 | | * the locks (say NoLock to keep 'em). |
2381 | | */ |
2382 | | void |
2383 | | vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode) |
2384 | 0 | { |
2385 | 0 | if (Irel == NULL) |
2386 | 0 | return; |
2387 | | |
2388 | 0 | while (nindexes--) |
2389 | 0 | { |
2390 | 0 | Relation ind = Irel[nindexes]; |
2391 | |
|
2392 | 0 | index_close(ind, lockmode); |
2393 | 0 | } |
2394 | 0 | pfree(Irel); |
2395 | 0 | } |
2396 | | |
2397 | | /* |
2398 | | * vacuum_delay_point --- check for interrupts and cost-based delay. |
2399 | | * |
2400 | | * This should be called in each major loop of VACUUM processing, |
2401 | | * typically once per page processed. |
2402 | | */ |
2403 | | void |
2404 | | vacuum_delay_point(bool is_analyze) |
2405 | 0 | { |
2406 | 0 | double msec = 0; |
2407 | | |
2408 | | /* Always check for interrupts */ |
2409 | 0 | CHECK_FOR_INTERRUPTS(); |
2410 | |
|
2411 | 0 | if (InterruptPending || |
2412 | 0 | (!VacuumCostActive && !ConfigReloadPending)) |
2413 | 0 | return; |
2414 | | |
2415 | | /* |
2416 | | * Autovacuum workers should reload the configuration file if requested. |
2417 | | * This allows changes to [autovacuum_]vacuum_cost_limit and |
2418 | | * [autovacuum_]vacuum_cost_delay to take effect while a table is being |
2419 | | * vacuumed or analyzed. |
2420 | | */ |
2421 | 0 | if (ConfigReloadPending && AmAutoVacuumWorkerProcess()) |
2422 | 0 | { |
2423 | 0 | ConfigReloadPending = false; |
2424 | 0 | ProcessConfigFile(PGC_SIGHUP); |
2425 | 0 | VacuumUpdateCosts(); |
2426 | 0 | } |
2427 | | |
2428 | | /* |
2429 | | * If we disabled cost-based delays after reloading the config file, |
2430 | | * return. |
2431 | | */ |
2432 | 0 | if (!VacuumCostActive) |
2433 | 0 | return; |
2434 | | |
2435 | | /* |
2436 | | * For parallel vacuum, the delay is computed based on the shared cost |
2437 | | * balance. See compute_parallel_delay. |
2438 | | */ |
2439 | 0 | if (VacuumSharedCostBalance != NULL) |
2440 | 0 | msec = compute_parallel_delay(); |
2441 | 0 | else if (VacuumCostBalance >= vacuum_cost_limit) |
2442 | 0 | msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit; |
2443 | | |
2444 | | /* Nap if appropriate */ |
2445 | 0 | if (msec > 0) |
2446 | 0 | { |
2447 | 0 | instr_time delay_start; |
2448 | |
|
2449 | 0 | if (msec > vacuum_cost_delay * 4) |
2450 | 0 | msec = vacuum_cost_delay * 4; |
2451 | |
|
2452 | 0 | if (track_cost_delay_timing) |
2453 | 0 | INSTR_TIME_SET_CURRENT(delay_start); |
2454 | |
|
2455 | 0 | pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY); |
2456 | 0 | pg_usleep(msec * 1000); |
2457 | 0 | pgstat_report_wait_end(); |
2458 | |
|
2459 | 0 | if (track_cost_delay_timing) |
2460 | 0 | { |
2461 | 0 | instr_time delay_end; |
2462 | 0 | instr_time delay; |
2463 | |
|
2464 | 0 | INSTR_TIME_SET_CURRENT(delay_end); |
2465 | 0 | INSTR_TIME_SET_ZERO(delay); |
2466 | 0 | INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start); |
2467 | | |
2468 | | /* |
2469 | | * For parallel workers, we only report the delay time every once |
2470 | | * in a while to avoid overloading the leader with messages and |
2471 | | * interrupts. |
2472 | | */ |
2473 | 0 | if (IsParallelWorker()) |
2474 | 0 | { |
2475 | 0 | static instr_time last_report_time; |
2476 | 0 | instr_time time_since_last_report; |
2477 | |
|
2478 | 0 | Assert(!is_analyze); |
2479 | | |
2480 | | /* Accumulate the delay time */ |
2481 | 0 | parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay); |
2482 | | |
2483 | | /* Calculate interval since last report */ |
2484 | 0 | INSTR_TIME_SET_ZERO(time_since_last_report); |
2485 | 0 | INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time); |
2486 | | |
2487 | | /* If we haven't reported in a while, do so now */ |
2488 | 0 | if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >= |
2489 | 0 | PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS) |
2490 | 0 | { |
2491 | 0 | pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME, |
2492 | 0 | parallel_vacuum_worker_delay_ns); |
2493 | | |
2494 | | /* Reset variables */ |
2495 | 0 | last_report_time = delay_end; |
2496 | 0 | parallel_vacuum_worker_delay_ns = 0; |
2497 | 0 | } |
2498 | 0 | } |
2499 | 0 | else if (is_analyze) |
2500 | 0 | pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME, |
2501 | 0 | INSTR_TIME_GET_NANOSEC(delay)); |
2502 | 0 | else |
2503 | 0 | pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME, |
2504 | 0 | INSTR_TIME_GET_NANOSEC(delay)); |
2505 | 0 | } |
2506 | | |
2507 | | /* |
2508 | | * We don't want to ignore postmaster death during very long vacuums |
2509 | | * with vacuum_cost_delay configured. We can't use the usual |
2510 | | * WaitLatch() approach here because we want microsecond-based sleep |
2511 | | * durations above. |
2512 | | */ |
2513 | 0 | if (IsUnderPostmaster && !PostmasterIsAlive()) |
2514 | 0 | exit(1); |
2515 | | |
2516 | 0 | VacuumCostBalance = 0; |
2517 | | |
2518 | | /* |
2519 | | * Balance and update limit values for autovacuum workers. We must do |
2520 | | * this periodically, as the number of workers across which we are |
2521 | | * balancing the limit may have changed. |
2522 | | * |
2523 | | * TODO: There may be better criteria for determining when to do this |
2524 | | * besides "check after napping". |
2525 | | */ |
2526 | 0 | AutoVacuumUpdateCostLimit(); |
2527 | | |
2528 | | /* Might have gotten an interrupt while sleeping */ |
2529 | 0 | CHECK_FOR_INTERRUPTS(); |
2530 | 0 | } |
2531 | 0 | } |
2532 | | |
2533 | | /* |
2534 | | * Computes the vacuum delay for parallel workers. |
2535 | | * |
2536 | | * The basic idea of a cost-based delay for parallel vacuum is to allow each |
2537 | | * worker to sleep in proportion to the share of work it's done. We achieve this |
2538 | | * by allowing all parallel vacuum workers including the leader process to |
2539 | | * have a shared view of cost related parameters (mainly VacuumCostBalance). |
2540 | | * We allow each worker to update it as and when it has incurred any cost and |
2541 | | * then based on that decide whether it needs to sleep. We compute the time |
2542 | | * to sleep for a worker based on the cost it has incurred |
2543 | | * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by |
2544 | | * that amount. This avoids putting to sleep those workers which have done less |
2545 | | * I/O than other workers and therefore ensure that workers |
2546 | | * which are doing more I/O got throttled more. |
2547 | | * |
2548 | | * We allow a worker to sleep only if it has performed I/O above a certain |
2549 | | * threshold, which is calculated based on the number of active workers |
2550 | | * (VacuumActiveNWorkers), and the overall cost balance is more than |
2551 | | * VacuumCostLimit set by the system. Testing reveals that we achieve |
2552 | | * the required throttling if we force a worker that has done more than 50% |
2553 | | * of its share of work to sleep. |
2554 | | */ |
2555 | | static double |
2556 | | compute_parallel_delay(void) |
2557 | 0 | { |
2558 | 0 | double msec = 0; |
2559 | 0 | uint32 shared_balance; |
2560 | 0 | int nworkers; |
2561 | | |
2562 | | /* Parallel vacuum must be active */ |
2563 | 0 | Assert(VacuumSharedCostBalance); |
2564 | |
|
2565 | 0 | nworkers = pg_atomic_read_u32(VacuumActiveNWorkers); |
2566 | | |
2567 | | /* At least count itself */ |
2568 | 0 | Assert(nworkers >= 1); |
2569 | | |
2570 | | /* Update the shared cost balance value atomically */ |
2571 | 0 | shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance); |
2572 | | |
2573 | | /* Compute the total local balance for the current worker */ |
2574 | 0 | VacuumCostBalanceLocal += VacuumCostBalance; |
2575 | |
|
2576 | 0 | if ((shared_balance >= vacuum_cost_limit) && |
2577 | 0 | (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers))) |
2578 | 0 | { |
2579 | | /* Compute sleep time based on the local cost balance */ |
2580 | 0 | msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit; |
2581 | 0 | pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal); |
2582 | 0 | VacuumCostBalanceLocal = 0; |
2583 | 0 | } |
2584 | | |
2585 | | /* |
2586 | | * Reset the local balance as we accumulated it into the shared value. |
2587 | | */ |
2588 | 0 | VacuumCostBalance = 0; |
2589 | |
|
2590 | 0 | return msec; |
2591 | 0 | } |
2592 | | |
2593 | | /* |
2594 | | * A wrapper function of defGetBoolean(). |
2595 | | * |
2596 | | * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead |
2597 | | * of true and false. |
2598 | | */ |
2599 | | static VacOptValue |
2600 | | get_vacoptval_from_boolean(DefElem *def) |
2601 | 0 | { |
2602 | 0 | return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED; |
2603 | 0 | } |
2604 | | |
2605 | | /* |
2606 | | * vac_bulkdel_one_index() -- bulk-deletion for index relation. |
2607 | | * |
2608 | | * Returns bulk delete stats derived from input stats |
2609 | | */ |
2610 | | IndexBulkDeleteResult * |
2611 | | vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, |
2612 | | TidStore *dead_items, VacDeadItemsInfo *dead_items_info) |
2613 | 0 | { |
2614 | | /* Do bulk deletion */ |
2615 | 0 | istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped, |
2616 | 0 | dead_items); |
2617 | |
|
2618 | 0 | ereport(ivinfo->message_level, |
2619 | 0 | (errmsg("scanned index \"%s\" to remove %" PRId64 " row versions", |
2620 | 0 | RelationGetRelationName(ivinfo->index), |
2621 | 0 | dead_items_info->num_items))); |
2622 | | |
2623 | 0 | return istat; |
2624 | 0 | } |
2625 | | |
2626 | | /* |
2627 | | * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation. |
2628 | | * |
2629 | | * Returns bulk delete stats derived from input stats |
2630 | | */ |
2631 | | IndexBulkDeleteResult * |
2632 | | vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat) |
2633 | 0 | { |
2634 | 0 | istat = index_vacuum_cleanup(ivinfo, istat); |
2635 | |
|
2636 | 0 | if (istat) |
2637 | 0 | ereport(ivinfo->message_level, |
2638 | 0 | (errmsg("index \"%s\" now contains %.0f row versions in %u pages", |
2639 | 0 | RelationGetRelationName(ivinfo->index), |
2640 | 0 | istat->num_index_tuples, |
2641 | 0 | istat->num_pages), |
2642 | 0 | errdetail("%.0f index row versions were removed.\n" |
2643 | 0 | "%u index pages were newly deleted.\n" |
2644 | 0 | "%u index pages are currently deleted, of which %u are currently reusable.", |
2645 | 0 | istat->tuples_removed, |
2646 | 0 | istat->pages_newly_deleted, |
2647 | 0 | istat->pages_deleted, istat->pages_free))); |
2648 | | |
2649 | 0 | return istat; |
2650 | 0 | } |
2651 | | |
2652 | | /* |
2653 | | * vac_tid_reaped() -- is a particular tid deletable? |
2654 | | * |
2655 | | * This has the right signature to be an IndexBulkDeleteCallback. |
2656 | | */ |
2657 | | static bool |
2658 | | vac_tid_reaped(ItemPointer itemptr, void *state) |
2659 | 0 | { |
2660 | 0 | TidStore *dead_items = (TidStore *) state; |
2661 | |
|
2662 | 0 | return TidStoreIsMember(dead_items, itemptr); |
2663 | 0 | } |