Coverage Report

Created: 2025-06-15 06:31

/src/postgres/src/backend/commands/vacuum.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * vacuum.c
4
 *    The postgres vacuum cleaner.
5
 *
6
 * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7
 * commands, (b) code to compute various vacuum thresholds, and (c) index
8
 * vacuum code.
9
 *
10
 * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11
 * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12
 * CLUSTER, handled in cluster.c.
13
 *
14
 *
15
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16
 * Portions Copyright (c) 1994, Regents of the University of California
17
 *
18
 *
19
 * IDENTIFICATION
20
 *    src/backend/commands/vacuum.c
21
 *
22
 *-------------------------------------------------------------------------
23
 */
24
#include "postgres.h"
25
26
#include <math.h>
27
28
#include "access/clog.h"
29
#include "access/commit_ts.h"
30
#include "access/genam.h"
31
#include "access/heapam.h"
32
#include "access/htup_details.h"
33
#include "access/multixact.h"
34
#include "access/tableam.h"
35
#include "access/transam.h"
36
#include "access/xact.h"
37
#include "catalog/namespace.h"
38
#include "catalog/pg_database.h"
39
#include "catalog/pg_inherits.h"
40
#include "commands/cluster.h"
41
#include "commands/defrem.h"
42
#include "commands/progress.h"
43
#include "commands/vacuum.h"
44
#include "miscadmin.h"
45
#include "nodes/makefuncs.h"
46
#include "pgstat.h"
47
#include "postmaster/autovacuum.h"
48
#include "postmaster/bgworker_internals.h"
49
#include "postmaster/interrupt.h"
50
#include "storage/bufmgr.h"
51
#include "storage/lmgr.h"
52
#include "storage/pmsignal.h"
53
#include "storage/proc.h"
54
#include "storage/procarray.h"
55
#include "utils/acl.h"
56
#include "utils/fmgroids.h"
57
#include "utils/guc.h"
58
#include "utils/guc_hooks.h"
59
#include "utils/memutils.h"
60
#include "utils/snapmgr.h"
61
#include "utils/syscache.h"
62
63
/*
64
 * Minimum interval for cost-based vacuum delay reports from a parallel worker.
65
 * This aims to avoid sending too many messages and waking up the leader too
66
 * frequently.
67
 */
68
0
#define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS  (NS_PER_S)
69
70
/*
71
 * GUC parameters
72
 */
73
int     vacuum_freeze_min_age;
74
int     vacuum_freeze_table_age;
75
int     vacuum_multixact_freeze_min_age;
76
int     vacuum_multixact_freeze_table_age;
77
int     vacuum_failsafe_age;
78
int     vacuum_multixact_failsafe_age;
79
double    vacuum_max_eager_freeze_failure_rate;
80
bool    track_cost_delay_timing;
81
bool    vacuum_truncate;
82
83
/*
84
 * Variables for cost-based vacuum delay. The defaults differ between
85
 * autovacuum and vacuum. They should be set with the appropriate GUC value in
86
 * vacuum code. They are initialized here to the defaults for client backends
87
 * executing VACUUM or ANALYZE.
88
 */
89
double    vacuum_cost_delay = 0;
90
int     vacuum_cost_limit = 200;
91
92
/* Variable for reporting cost-based vacuum delay from parallel workers. */
93
int64   parallel_vacuum_worker_delay_ns = 0;
94
95
/*
96
 * VacuumFailsafeActive is a defined as a global so that we can determine
97
 * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
98
 * If failsafe mode has been engaged, we will not re-enable cost-based delay
99
 * for the table until after vacuuming has completed, regardless of other
100
 * settings.
101
 *
102
 * Only VACUUM code should inspect this variable and only table access methods
103
 * should set it to true. In Table AM-agnostic VACUUM code, this variable is
104
 * inspected to determine whether or not to allow cost-based delays. Table AMs
105
 * are free to set it if they desire this behavior, but it is false by default
106
 * and reset to false in between vacuuming each relation.
107
 */
108
bool    VacuumFailsafeActive = false;
109
110
/*
111
 * Variables for cost-based parallel vacuum.  See comments atop
112
 * compute_parallel_delay to understand how it works.
113
 */
114
pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
115
pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
116
int     VacuumCostBalanceLocal = 0;
117
118
/* non-export function prototypes */
119
static List *expand_vacuum_rel(VacuumRelation *vrel,
120
                 MemoryContext vac_context, int options);
121
static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
122
static void vac_truncate_clog(TransactionId frozenXID,
123
                MultiXactId minMulti,
124
                TransactionId lastSaneFrozenXid,
125
                MultiXactId lastSaneMinMulti);
126
static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
127
             BufferAccessStrategy bstrategy);
128
static double compute_parallel_delay(void);
129
static VacOptValue get_vacoptval_from_boolean(DefElem *def);
130
static bool vac_tid_reaped(ItemPointer itemptr, void *state);
131
132
/*
133
 * GUC check function to ensure GUC value specified is within the allowable
134
 * range.
135
 */
136
bool
137
check_vacuum_buffer_usage_limit(int *newval, void **extra,
138
                GucSource source)
139
0
{
140
  /* Value upper and lower hard limits are inclusive */
141
0
  if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
142
0
             *newval <= MAX_BAS_VAC_RING_SIZE_KB))
143
0
    return true;
144
145
  /* Value does not fall within any allowable range */
146
0
  GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.",
147
0
            "vacuum_buffer_usage_limit",
148
0
            MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
149
150
0
  return false;
151
0
}
152
153
/*
154
 * Primary entry point for manual VACUUM and ANALYZE commands
155
 *
156
 * This is mainly a preparation wrapper for the real operations that will
157
 * happen in vacuum().
158
 */
159
void
160
ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
161
0
{
162
0
  VacuumParams params;
163
0
  BufferAccessStrategy bstrategy = NULL;
164
0
  bool    verbose = false;
165
0
  bool    skip_locked = false;
166
0
  bool    analyze = false;
167
0
  bool    freeze = false;
168
0
  bool    full = false;
169
0
  bool    disable_page_skipping = false;
170
0
  bool    process_main = true;
171
0
  bool    process_toast = true;
172
0
  int     ring_size;
173
0
  bool    skip_database_stats = false;
174
0
  bool    only_database_stats = false;
175
0
  MemoryContext vac_context;
176
0
  ListCell   *lc;
177
178
  /* index_cleanup and truncate values unspecified for now */
179
0
  params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
180
0
  params.truncate = VACOPTVALUE_UNSPECIFIED;
181
182
  /* By default parallel vacuum is enabled */
183
0
  params.nworkers = 0;
184
185
  /* Will be set later if we recurse to a TOAST table. */
186
0
  params.toast_parent = InvalidOid;
187
188
  /*
189
   * Set this to an invalid value so it is clear whether or not a
190
   * BUFFER_USAGE_LIMIT was specified when making the access strategy.
191
   */
192
0
  ring_size = -1;
193
194
  /* Parse options list */
195
0
  foreach(lc, vacstmt->options)
196
0
  {
197
0
    DefElem    *opt = (DefElem *) lfirst(lc);
198
199
    /* Parse common options for VACUUM and ANALYZE */
200
0
    if (strcmp(opt->defname, "verbose") == 0)
201
0
      verbose = defGetBoolean(opt);
202
0
    else if (strcmp(opt->defname, "skip_locked") == 0)
203
0
      skip_locked = defGetBoolean(opt);
204
0
    else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
205
0
    {
206
0
      const char *hintmsg;
207
0
      int     result;
208
0
      char     *vac_buffer_size;
209
210
0
      vac_buffer_size = defGetString(opt);
211
212
      /*
213
       * Check that the specified value is valid and the size falls
214
       * within the hard upper and lower limits if it is not 0.
215
       */
216
0
      if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
217
0
        (result != 0 &&
218
0
         (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
219
0
      {
220
0
        ereport(ERROR,
221
0
            (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
222
0
             errmsg("BUFFER_USAGE_LIMIT option must be 0 or between %d kB and %d kB",
223
0
                MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
224
0
             hintmsg ? errhint("%s", _(hintmsg)) : 0));
225
0
      }
226
227
0
      ring_size = result;
228
0
    }
229
0
    else if (!vacstmt->is_vacuumcmd)
230
0
      ereport(ERROR,
231
0
          (errcode(ERRCODE_SYNTAX_ERROR),
232
0
           errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
233
0
           parser_errposition(pstate, opt->location)));
234
235
    /* Parse options available on VACUUM */
236
0
    else if (strcmp(opt->defname, "analyze") == 0)
237
0
      analyze = defGetBoolean(opt);
238
0
    else if (strcmp(opt->defname, "freeze") == 0)
239
0
      freeze = defGetBoolean(opt);
240
0
    else if (strcmp(opt->defname, "full") == 0)
241
0
      full = defGetBoolean(opt);
242
0
    else if (strcmp(opt->defname, "disable_page_skipping") == 0)
243
0
      disable_page_skipping = defGetBoolean(opt);
244
0
    else if (strcmp(opt->defname, "index_cleanup") == 0)
245
0
    {
246
      /* Interpret no string as the default, which is 'auto' */
247
0
      if (!opt->arg)
248
0
        params.index_cleanup = VACOPTVALUE_AUTO;
249
0
      else
250
0
      {
251
0
        char     *sval = defGetString(opt);
252
253
        /* Try matching on 'auto' string, or fall back on boolean */
254
0
        if (pg_strcasecmp(sval, "auto") == 0)
255
0
          params.index_cleanup = VACOPTVALUE_AUTO;
256
0
        else
257
0
          params.index_cleanup = get_vacoptval_from_boolean(opt);
258
0
      }
259
0
    }
260
0
    else if (strcmp(opt->defname, "process_main") == 0)
261
0
      process_main = defGetBoolean(opt);
262
0
    else if (strcmp(opt->defname, "process_toast") == 0)
263
0
      process_toast = defGetBoolean(opt);
264
0
    else if (strcmp(opt->defname, "truncate") == 0)
265
0
      params.truncate = get_vacoptval_from_boolean(opt);
266
0
    else if (strcmp(opt->defname, "parallel") == 0)
267
0
    {
268
0
      if (opt->arg == NULL)
269
0
      {
270
0
        ereport(ERROR,
271
0
            (errcode(ERRCODE_SYNTAX_ERROR),
272
0
             errmsg("parallel option requires a value between 0 and %d",
273
0
                MAX_PARALLEL_WORKER_LIMIT),
274
0
             parser_errposition(pstate, opt->location)));
275
0
      }
276
0
      else
277
0
      {
278
0
        int     nworkers;
279
280
0
        nworkers = defGetInt32(opt);
281
0
        if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
282
0
          ereport(ERROR,
283
0
              (errcode(ERRCODE_SYNTAX_ERROR),
284
0
               errmsg("parallel workers for vacuum must be between 0 and %d",
285
0
                  MAX_PARALLEL_WORKER_LIMIT),
286
0
               parser_errposition(pstate, opt->location)));
287
288
        /*
289
         * Disable parallel vacuum, if user has specified parallel
290
         * degree as zero.
291
         */
292
0
        if (nworkers == 0)
293
0
          params.nworkers = -1;
294
0
        else
295
0
          params.nworkers = nworkers;
296
0
      }
297
0
    }
298
0
    else if (strcmp(opt->defname, "skip_database_stats") == 0)
299
0
      skip_database_stats = defGetBoolean(opt);
300
0
    else if (strcmp(opt->defname, "only_database_stats") == 0)
301
0
      only_database_stats = defGetBoolean(opt);
302
0
    else
303
0
      ereport(ERROR,
304
0
          (errcode(ERRCODE_SYNTAX_ERROR),
305
0
           errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
306
0
           parser_errposition(pstate, opt->location)));
307
0
  }
308
309
  /* Set vacuum options */
310
0
  params.options =
311
0
    (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
312
0
    (verbose ? VACOPT_VERBOSE : 0) |
313
0
    (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
314
0
    (analyze ? VACOPT_ANALYZE : 0) |
315
0
    (freeze ? VACOPT_FREEZE : 0) |
316
0
    (full ? VACOPT_FULL : 0) |
317
0
    (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
318
0
    (process_main ? VACOPT_PROCESS_MAIN : 0) |
319
0
    (process_toast ? VACOPT_PROCESS_TOAST : 0) |
320
0
    (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
321
0
    (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
322
323
  /* sanity checks on options */
324
0
  Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
325
0
  Assert((params.options & VACOPT_VACUUM) ||
326
0
       !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
327
328
0
  if ((params.options & VACOPT_FULL) && params.nworkers > 0)
329
0
    ereport(ERROR,
330
0
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
331
0
         errmsg("VACUUM FULL cannot be performed in parallel")));
332
333
  /*
334
   * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
335
   * ERROR for that case.  VACUUM (FULL, ANALYZE) does make use of it, so
336
   * we'll permit that.
337
   */
338
0
  if (ring_size != -1 && (params.options & VACOPT_FULL) &&
339
0
    !(params.options & VACOPT_ANALYZE))
340
0
    ereport(ERROR,
341
0
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
342
0
         errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
343
344
  /*
345
   * Make sure VACOPT_ANALYZE is specified if any column lists are present.
346
   */
347
0
  if (!(params.options & VACOPT_ANALYZE))
348
0
  {
349
0
    foreach(lc, vacstmt->rels)
350
0
    {
351
0
      VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
352
353
0
      if (vrel->va_cols != NIL)
354
0
        ereport(ERROR,
355
0
            (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
356
0
             errmsg("ANALYZE option must be specified when a column list is provided")));
357
0
    }
358
0
  }
359
360
361
  /*
362
   * Sanity check DISABLE_PAGE_SKIPPING option.
363
   */
364
0
  if ((params.options & VACOPT_FULL) != 0 &&
365
0
    (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
366
0
    ereport(ERROR,
367
0
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
368
0
         errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
369
370
  /* sanity check for PROCESS_TOAST */
371
0
  if ((params.options & VACOPT_FULL) != 0 &&
372
0
    (params.options & VACOPT_PROCESS_TOAST) == 0)
373
0
    ereport(ERROR,
374
0
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
375
0
         errmsg("PROCESS_TOAST required with VACUUM FULL")));
376
377
  /* sanity check for ONLY_DATABASE_STATS */
378
0
  if (params.options & VACOPT_ONLY_DATABASE_STATS)
379
0
  {
380
0
    Assert(params.options & VACOPT_VACUUM);
381
0
    if (vacstmt->rels != NIL)
382
0
      ereport(ERROR,
383
0
          (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
384
0
           errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
385
    /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
386
0
    if (params.options & ~(VACOPT_VACUUM |
387
0
                 VACOPT_VERBOSE |
388
0
                 VACOPT_PROCESS_MAIN |
389
0
                 VACOPT_PROCESS_TOAST |
390
0
                 VACOPT_ONLY_DATABASE_STATS))
391
0
      ereport(ERROR,
392
0
          (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
393
0
           errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
394
0
  }
395
396
  /*
397
   * All freeze ages are zero if the FREEZE option is given; otherwise pass
398
   * them as -1 which means to use the default values.
399
   */
400
0
  if (params.options & VACOPT_FREEZE)
401
0
  {
402
0
    params.freeze_min_age = 0;
403
0
    params.freeze_table_age = 0;
404
0
    params.multixact_freeze_min_age = 0;
405
0
    params.multixact_freeze_table_age = 0;
406
0
  }
407
0
  else
408
0
  {
409
0
    params.freeze_min_age = -1;
410
0
    params.freeze_table_age = -1;
411
0
    params.multixact_freeze_min_age = -1;
412
0
    params.multixact_freeze_table_age = -1;
413
0
  }
414
415
  /* user-invoked vacuum is never "for wraparound" */
416
0
  params.is_wraparound = false;
417
418
  /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
419
0
  params.log_min_duration = -1;
420
421
  /*
422
   * Later, in vacuum_rel(), we check if a reloption override was specified.
423
   */
424
0
  params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
425
426
  /*
427
   * Create special memory context for cross-transaction storage.
428
   *
429
   * Since it is a child of PortalContext, it will go away eventually even
430
   * if we suffer an error; there's no need for special abort cleanup logic.
431
   */
432
0
  vac_context = AllocSetContextCreate(PortalContext,
433
0
                    "Vacuum",
434
0
                    ALLOCSET_DEFAULT_SIZES);
435
436
  /*
437
   * Make a buffer strategy object in the cross-transaction memory context.
438
   * We needn't bother making this for VACUUM (FULL) or VACUUM
439
   * (ONLY_DATABASE_STATS) as they'll not make use of it.  VACUUM (FULL,
440
   * ANALYZE) is possible, so we'd better ensure that we make a strategy
441
   * when we see ANALYZE.
442
   */
443
0
  if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
444
0
               VACOPT_FULL)) == 0 ||
445
0
    (params.options & VACOPT_ANALYZE) != 0)
446
0
  {
447
448
0
    MemoryContext old_context = MemoryContextSwitchTo(vac_context);
449
450
0
    Assert(ring_size >= -1);
451
452
    /*
453
     * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
454
     * command, it overrides the value of VacuumBufferUsageLimit.  Either
455
     * value may be 0, in which case GetAccessStrategyWithSize() will
456
     * return NULL, effectively allowing full use of shared buffers.
457
     */
458
0
    if (ring_size == -1)
459
0
      ring_size = VacuumBufferUsageLimit;
460
461
0
    bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
462
463
0
    MemoryContextSwitchTo(old_context);
464
0
  }
465
466
  /* Now go through the common routine */
467
0
  vacuum(vacstmt->rels, &params, bstrategy, vac_context, isTopLevel);
468
469
  /* Finally, clean up the vacuum memory context */
470
0
  MemoryContextDelete(vac_context);
471
0
}
472
473
/*
474
 * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
475
 *
476
 * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
477
 * we process all relevant tables in the database.  For each VacuumRelation,
478
 * if a valid OID is supplied, the table with that OID is what to process;
479
 * otherwise, the VacuumRelation's RangeVar indicates what to process.
480
 *
481
 * params contains a set of parameters that can be used to customize the
482
 * behavior.
483
 *
484
 * bstrategy may be passed in as NULL when the caller does not want to
485
 * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
486
 * otherwise, the caller must build a BufferAccessStrategy with the number of
487
 * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
488
 * using.
489
 *
490
 * isTopLevel should be passed down from ProcessUtility.
491
 *
492
 * It is the caller's responsibility that all parameters are allocated in a
493
 * memory context that will not disappear at transaction commit.
494
 */
495
void
496
vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
497
     MemoryContext vac_context, bool isTopLevel)
498
0
{
499
0
  static bool in_vacuum = false;
500
501
0
  const char *stmttype;
502
0
  volatile bool in_outer_xact,
503
0
        use_own_xacts;
504
505
0
  Assert(params != NULL);
506
507
0
  stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
508
509
  /*
510
   * We cannot run VACUUM inside a user transaction block; if we were inside
511
   * a transaction, then our commit- and start-transaction-command calls
512
   * would not have the intended effect!  There are numerous other subtle
513
   * dependencies on this, too.
514
   *
515
   * ANALYZE (without VACUUM) can run either way.
516
   */
517
0
  if (params->options & VACOPT_VACUUM)
518
0
  {
519
0
    PreventInTransactionBlock(isTopLevel, stmttype);
520
0
    in_outer_xact = false;
521
0
  }
522
0
  else
523
0
    in_outer_xact = IsInTransactionBlock(isTopLevel);
524
525
  /*
526
   * Check for and disallow recursive calls.  This could happen when VACUUM
527
   * FULL or ANALYZE calls a hostile index expression that itself calls
528
   * ANALYZE.
529
   */
530
0
  if (in_vacuum)
531
0
    ereport(ERROR,
532
0
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
533
0
         errmsg("%s cannot be executed from VACUUM or ANALYZE",
534
0
            stmttype)));
535
536
  /*
537
   * Build list of relation(s) to process, putting any new data in
538
   * vac_context for safekeeping.
539
   */
540
0
  if (params->options & VACOPT_ONLY_DATABASE_STATS)
541
0
  {
542
    /* We don't process any tables in this case */
543
0
    Assert(relations == NIL);
544
0
  }
545
0
  else if (relations != NIL)
546
0
  {
547
0
    List     *newrels = NIL;
548
0
    ListCell   *lc;
549
550
0
    foreach(lc, relations)
551
0
    {
552
0
      VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
553
0
      List     *sublist;
554
0
      MemoryContext old_context;
555
556
0
      sublist = expand_vacuum_rel(vrel, vac_context, params->options);
557
0
      old_context = MemoryContextSwitchTo(vac_context);
558
0
      newrels = list_concat(newrels, sublist);
559
0
      MemoryContextSwitchTo(old_context);
560
0
    }
561
0
    relations = newrels;
562
0
  }
563
0
  else
564
0
    relations = get_all_vacuum_rels(vac_context, params->options);
565
566
  /*
567
   * Decide whether we need to start/commit our own transactions.
568
   *
569
   * For VACUUM (with or without ANALYZE): always do so, so that we can
570
   * release locks as soon as possible.  (We could possibly use the outer
571
   * transaction for a one-table VACUUM, but handling TOAST tables would be
572
   * problematic.)
573
   *
574
   * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
575
   * start/commit our own transactions.  Also, there's no need to do so if
576
   * only processing one relation.  For multiple relations when not within a
577
   * transaction block, and also in an autovacuum worker, use own
578
   * transactions so we can release locks sooner.
579
   */
580
0
  if (params->options & VACOPT_VACUUM)
581
0
    use_own_xacts = true;
582
0
  else
583
0
  {
584
0
    Assert(params->options & VACOPT_ANALYZE);
585
0
    if (AmAutoVacuumWorkerProcess())
586
0
      use_own_xacts = true;
587
0
    else if (in_outer_xact)
588
0
      use_own_xacts = false;
589
0
    else if (list_length(relations) > 1)
590
0
      use_own_xacts = true;
591
0
    else
592
0
      use_own_xacts = false;
593
0
  }
594
595
  /*
596
   * vacuum_rel expects to be entered with no transaction active; it will
597
   * start and commit its own transaction.  But we are called by an SQL
598
   * command, and so we are executing inside a transaction already. We
599
   * commit the transaction started in PostgresMain() here, and start
600
   * another one before exiting to match the commit waiting for us back in
601
   * PostgresMain().
602
   */
603
0
  if (use_own_xacts)
604
0
  {
605
0
    Assert(!in_outer_xact);
606
607
    /* ActiveSnapshot is not set by autovacuum */
608
0
    if (ActiveSnapshotSet())
609
0
      PopActiveSnapshot();
610
611
    /* matches the StartTransaction in PostgresMain() */
612
0
    CommitTransactionCommand();
613
0
  }
614
615
  /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
616
0
  PG_TRY();
617
0
  {
618
0
    ListCell   *cur;
619
620
0
    in_vacuum = true;
621
0
    VacuumFailsafeActive = false;
622
0
    VacuumUpdateCosts();
623
0
    VacuumCostBalance = 0;
624
0
    VacuumCostBalanceLocal = 0;
625
0
    VacuumSharedCostBalance = NULL;
626
0
    VacuumActiveNWorkers = NULL;
627
628
    /*
629
     * Loop to process each selected relation.
630
     */
631
0
    foreach(cur, relations)
632
0
    {
633
0
      VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
634
635
0
      if (params->options & VACOPT_VACUUM)
636
0
      {
637
0
        if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
638
0
          continue;
639
0
      }
640
641
0
      if (params->options & VACOPT_ANALYZE)
642
0
      {
643
        /*
644
         * If using separate xacts, start one for analyze. Otherwise,
645
         * we can use the outer transaction.
646
         */
647
0
        if (use_own_xacts)
648
0
        {
649
0
          StartTransactionCommand();
650
          /* functions in indexes may want a snapshot set */
651
0
          PushActiveSnapshot(GetTransactionSnapshot());
652
0
        }
653
654
0
        analyze_rel(vrel->oid, vrel->relation, params,
655
0
              vrel->va_cols, in_outer_xact, bstrategy);
656
657
0
        if (use_own_xacts)
658
0
        {
659
0
          PopActiveSnapshot();
660
          /* standard_ProcessUtility() does CCI if !use_own_xacts */
661
0
          CommandCounterIncrement();
662
0
          CommitTransactionCommand();
663
0
        }
664
0
        else
665
0
        {
666
          /*
667
           * If we're not using separate xacts, better separate the
668
           * ANALYZE actions with CCIs.  This avoids trouble if user
669
           * says "ANALYZE t, t".
670
           */
671
0
          CommandCounterIncrement();
672
0
        }
673
0
      }
674
675
      /*
676
       * Ensure VacuumFailsafeActive has been reset before vacuuming the
677
       * next relation.
678
       */
679
0
      VacuumFailsafeActive = false;
680
0
    }
681
0
  }
682
0
  PG_FINALLY();
683
0
  {
684
0
    in_vacuum = false;
685
0
    VacuumCostActive = false;
686
0
    VacuumFailsafeActive = false;
687
0
    VacuumCostBalance = 0;
688
0
  }
689
0
  PG_END_TRY();
690
691
  /*
692
   * Finish up processing.
693
   */
694
0
  if (use_own_xacts)
695
0
  {
696
    /* here, we are not in a transaction */
697
698
    /*
699
     * This matches the CommitTransaction waiting for us in
700
     * PostgresMain().
701
     */
702
0
    StartTransactionCommand();
703
0
  }
704
705
0
  if ((params->options & VACOPT_VACUUM) &&
706
0
    !(params->options & VACOPT_SKIP_DATABASE_STATS))
707
0
  {
708
    /*
709
     * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
710
     */
711
0
    vac_update_datfrozenxid();
712
0
  }
713
714
0
}
715
716
/*
717
 * Check if the current user has privileges to vacuum or analyze the relation.
718
 * If not, issue a WARNING log message and return false to let the caller
719
 * decide what to do with this relation.  This routine is used to decide if a
720
 * relation can be processed for VACUUM or ANALYZE.
721
 */
722
bool
723
vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
724
                 bits32 options)
725
0
{
726
0
  char     *relname;
727
728
0
  Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
729
730
  /*----------
731
   * A role has privileges to vacuum or analyze the relation if any of the
732
   * following are true:
733
   *   - the role owns the current database and the relation is not shared
734
   *   - the role has the MAINTAIN privilege on the relation
735
   *----------
736
   */
737
0
  if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
738
0
     !reltuple->relisshared) ||
739
0
    pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
740
0
    return true;
741
742
0
  relname = NameStr(reltuple->relname);
743
744
0
  if ((options & VACOPT_VACUUM) != 0)
745
0
  {
746
0
    ereport(WARNING,
747
0
        (errmsg("permission denied to vacuum \"%s\", skipping it",
748
0
            relname)));
749
750
    /*
751
     * For VACUUM ANALYZE, both logs could show up, but just generate
752
     * information for VACUUM as that would be the first one to be
753
     * processed.
754
     */
755
0
    return false;
756
0
  }
757
758
0
  if ((options & VACOPT_ANALYZE) != 0)
759
0
    ereport(WARNING,
760
0
        (errmsg("permission denied to analyze \"%s\", skipping it",
761
0
            relname)));
762
763
0
  return false;
764
0
}
765
766
767
/*
768
 * vacuum_open_relation
769
 *
770
 * This routine is used for attempting to open and lock a relation which
771
 * is going to be vacuumed or analyzed.  If the relation cannot be opened
772
 * or locked, a log is emitted if possible.
773
 */
774
Relation
775
vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
776
           bool verbose, LOCKMODE lmode)
777
0
{
778
0
  Relation  rel;
779
0
  bool    rel_lock = true;
780
0
  int     elevel;
781
782
0
  Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
783
784
  /*
785
   * Open the relation and get the appropriate lock on it.
786
   *
787
   * There's a race condition here: the relation may have gone away since
788
   * the last time we saw it.  If so, we don't need to vacuum or analyze it.
789
   *
790
   * If we've been asked not to wait for the relation lock, acquire it first
791
   * in non-blocking mode, before calling try_relation_open().
792
   */
793
0
  if (!(options & VACOPT_SKIP_LOCKED))
794
0
    rel = try_relation_open(relid, lmode);
795
0
  else if (ConditionalLockRelationOid(relid, lmode))
796
0
    rel = try_relation_open(relid, NoLock);
797
0
  else
798
0
  {
799
0
    rel = NULL;
800
0
    rel_lock = false;
801
0
  }
802
803
  /* if relation is opened, leave */
804
0
  if (rel)
805
0
    return rel;
806
807
  /*
808
   * Relation could not be opened, hence generate if possible a log
809
   * informing on the situation.
810
   *
811
   * If the RangeVar is not defined, we do not have enough information to
812
   * provide a meaningful log statement.  Chances are that the caller has
813
   * intentionally not provided this information so that this logging is
814
   * skipped, anyway.
815
   */
816
0
  if (relation == NULL)
817
0
    return NULL;
818
819
  /*
820
   * Determine the log level.
821
   *
822
   * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
823
   * statements in the permission checks; otherwise, only log if the caller
824
   * so requested.
825
   */
826
0
  if (!AmAutoVacuumWorkerProcess())
827
0
    elevel = WARNING;
828
0
  else if (verbose)
829
0
    elevel = LOG;
830
0
  else
831
0
    return NULL;
832
833
0
  if ((options & VACOPT_VACUUM) != 0)
834
0
  {
835
0
    if (!rel_lock)
836
0
      ereport(elevel,
837
0
          (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
838
0
           errmsg("skipping vacuum of \"%s\" --- lock not available",
839
0
              relation->relname)));
840
0
    else
841
0
      ereport(elevel,
842
0
          (errcode(ERRCODE_UNDEFINED_TABLE),
843
0
           errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
844
0
              relation->relname)));
845
846
    /*
847
     * For VACUUM ANALYZE, both logs could show up, but just generate
848
     * information for VACUUM as that would be the first one to be
849
     * processed.
850
     */
851
0
    return NULL;
852
0
  }
853
854
0
  if ((options & VACOPT_ANALYZE) != 0)
855
0
  {
856
0
    if (!rel_lock)
857
0
      ereport(elevel,
858
0
          (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
859
0
           errmsg("skipping analyze of \"%s\" --- lock not available",
860
0
              relation->relname)));
861
0
    else
862
0
      ereport(elevel,
863
0
          (errcode(ERRCODE_UNDEFINED_TABLE),
864
0
           errmsg("skipping analyze of \"%s\" --- relation no longer exists",
865
0
              relation->relname)));
866
0
  }
867
868
0
  return NULL;
869
0
}
870
871
872
/*
873
 * Given a VacuumRelation, fill in the table OID if it wasn't specified,
874
 * and optionally add VacuumRelations for partitions or inheritance children.
875
 *
876
 * If a VacuumRelation does not have an OID supplied and is a partitioned
877
 * table, an extra entry will be added to the output for each partition.
878
 * Presently, only autovacuum supplies OIDs when calling vacuum(), and
879
 * it does not want us to expand partitioned tables.
880
 *
881
 * We take care not to modify the input data structure, but instead build
882
 * new VacuumRelation(s) to return.  (But note that they will reference
883
 * unmodified parts of the input, eg column lists.)  New data structures
884
 * are made in vac_context.
885
 */
886
static List *
887
expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
888
          int options)
889
0
{
890
0
  List     *vacrels = NIL;
891
0
  MemoryContext oldcontext;
892
893
  /* If caller supplied OID, there's nothing we need do here. */
894
0
  if (OidIsValid(vrel->oid))
895
0
  {
896
0
    oldcontext = MemoryContextSwitchTo(vac_context);
897
0
    vacrels = lappend(vacrels, vrel);
898
0
    MemoryContextSwitchTo(oldcontext);
899
0
  }
900
0
  else
901
0
  {
902
    /*
903
     * Process a specific relation, and possibly partitions or child
904
     * tables thereof.
905
     */
906
0
    Oid     relid;
907
0
    HeapTuple tuple;
908
0
    Form_pg_class classForm;
909
0
    bool    include_children;
910
0
    bool    is_partitioned_table;
911
0
    int     rvr_opts;
912
913
    /*
914
     * Since autovacuum workers supply OIDs when calling vacuum(), no
915
     * autovacuum worker should reach this code.
916
     */
917
0
    Assert(!AmAutoVacuumWorkerProcess());
918
919
    /*
920
     * We transiently take AccessShareLock to protect the syscache lookup
921
     * below, as well as find_all_inheritors's expectation that the caller
922
     * holds some lock on the starting relation.
923
     */
924
0
    rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
925
0
    relid = RangeVarGetRelidExtended(vrel->relation,
926
0
                     AccessShareLock,
927
0
                     rvr_opts,
928
0
                     NULL, NULL);
929
930
    /*
931
     * If the lock is unavailable, emit the same log statement that
932
     * vacuum_rel() and analyze_rel() would.
933
     */
934
0
    if (!OidIsValid(relid))
935
0
    {
936
0
      if (options & VACOPT_VACUUM)
937
0
        ereport(WARNING,
938
0
            (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
939
0
             errmsg("skipping vacuum of \"%s\" --- lock not available",
940
0
                vrel->relation->relname)));
941
0
      else
942
0
        ereport(WARNING,
943
0
            (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
944
0
             errmsg("skipping analyze of \"%s\" --- lock not available",
945
0
                vrel->relation->relname)));
946
0
      return vacrels;
947
0
    }
948
949
    /*
950
     * To check whether the relation is a partitioned table and its
951
     * ownership, fetch its syscache entry.
952
     */
953
0
    tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
954
0
    if (!HeapTupleIsValid(tuple))
955
0
      elog(ERROR, "cache lookup failed for relation %u", relid);
956
0
    classForm = (Form_pg_class) GETSTRUCT(tuple);
957
958
    /*
959
     * Make a returnable VacuumRelation for this rel if the user has the
960
     * required privileges.
961
     */
962
0
    if (vacuum_is_permitted_for_relation(relid, classForm, options))
963
0
    {
964
0
      oldcontext = MemoryContextSwitchTo(vac_context);
965
0
      vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
966
0
                              relid,
967
0
                              vrel->va_cols));
968
0
      MemoryContextSwitchTo(oldcontext);
969
0
    }
970
971
    /*
972
     * Vacuuming a partitioned table with ONLY will not do anything since
973
     * the partitioned table itself is empty.  Issue a warning if the user
974
     * requests this.
975
     */
976
0
    include_children = vrel->relation->inh;
977
0
    is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
978
0
    if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children)
979
0
      ereport(WARNING,
980
0
          (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect",
981
0
              vrel->relation->relname)));
982
983
0
    ReleaseSysCache(tuple);
984
985
    /*
986
     * Unless the user has specified ONLY, make relation list entries for
987
     * its partitions or inheritance child tables.  Note that the list
988
     * returned by find_all_inheritors() includes the passed-in OID, so we
989
     * have to skip that.  There's no point in taking locks on the
990
     * individual partitions or child tables yet, and doing so would just
991
     * add unnecessary deadlock risk.  For this last reason, we do not yet
992
     * check the ownership of the partitions/tables, which get added to
993
     * the list to process.  Ownership will be checked later on anyway.
994
     */
995
0
    if (include_children)
996
0
    {
997
0
      List     *part_oids = find_all_inheritors(relid, NoLock, NULL);
998
0
      ListCell   *part_lc;
999
1000
0
      foreach(part_lc, part_oids)
1001
0
      {
1002
0
        Oid     part_oid = lfirst_oid(part_lc);
1003
1004
0
        if (part_oid == relid)
1005
0
          continue; /* ignore original table */
1006
1007
        /*
1008
         * We omit a RangeVar since it wouldn't be appropriate to
1009
         * complain about failure to open one of these relations
1010
         * later.
1011
         */
1012
0
        oldcontext = MemoryContextSwitchTo(vac_context);
1013
0
        vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1014
0
                                part_oid,
1015
0
                                vrel->va_cols));
1016
0
        MemoryContextSwitchTo(oldcontext);
1017
0
      }
1018
0
    }
1019
1020
    /*
1021
     * Release lock again.  This means that by the time we actually try to
1022
     * process the table, it might be gone or renamed.  In the former case
1023
     * we'll silently ignore it; in the latter case we'll process it
1024
     * anyway, but we must beware that the RangeVar doesn't necessarily
1025
     * identify it anymore.  This isn't ideal, perhaps, but there's little
1026
     * practical alternative, since we're typically going to commit this
1027
     * transaction and begin a new one between now and then.  Moreover,
1028
     * holding locks on multiple relations would create significant risk
1029
     * of deadlock.
1030
     */
1031
0
    UnlockRelationOid(relid, AccessShareLock);
1032
0
  }
1033
1034
0
  return vacrels;
1035
0
}
1036
1037
/*
1038
 * Construct a list of VacuumRelations for all vacuumable rels in
1039
 * the current database.  The list is built in vac_context.
1040
 */
1041
static List *
1042
get_all_vacuum_rels(MemoryContext vac_context, int options)
1043
0
{
1044
0
  List     *vacrels = NIL;
1045
0
  Relation  pgclass;
1046
0
  TableScanDesc scan;
1047
0
  HeapTuple tuple;
1048
1049
0
  pgclass = table_open(RelationRelationId, AccessShareLock);
1050
1051
0
  scan = table_beginscan_catalog(pgclass, 0, NULL);
1052
1053
0
  while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1054
0
  {
1055
0
    Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1056
0
    MemoryContext oldcontext;
1057
0
    Oid     relid = classForm->oid;
1058
1059
    /*
1060
     * We include partitioned tables here; depending on which operation is
1061
     * to be performed, caller will decide whether to process or ignore
1062
     * them.
1063
     */
1064
0
    if (classForm->relkind != RELKIND_RELATION &&
1065
0
      classForm->relkind != RELKIND_MATVIEW &&
1066
0
      classForm->relkind != RELKIND_PARTITIONED_TABLE)
1067
0
      continue;
1068
1069
    /* check permissions of relation */
1070
0
    if (!vacuum_is_permitted_for_relation(relid, classForm, options))
1071
0
      continue;
1072
1073
    /*
1074
     * Build VacuumRelation(s) specifying the table OIDs to be processed.
1075
     * We omit a RangeVar since it wouldn't be appropriate to complain
1076
     * about failure to open one of these relations later.
1077
     */
1078
0
    oldcontext = MemoryContextSwitchTo(vac_context);
1079
0
    vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1080
0
                            relid,
1081
0
                            NIL));
1082
0
    MemoryContextSwitchTo(oldcontext);
1083
0
  }
1084
1085
0
  table_endscan(scan);
1086
0
  table_close(pgclass, AccessShareLock);
1087
1088
0
  return vacrels;
1089
0
}
1090
1091
/*
1092
 * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
1093
 *
1094
 * The target relation and VACUUM parameters are our inputs.
1095
 *
1096
 * Output parameters are the cutoffs that VACUUM caller should use.
1097
 *
1098
 * Return value indicates if vacuumlazy.c caller should make its VACUUM
1099
 * operation aggressive.  An aggressive VACUUM must advance relfrozenxid up to
1100
 * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1101
 * minimum).
1102
 */
1103
bool
1104
vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
1105
           struct VacuumCutoffs *cutoffs)
1106
0
{
1107
0
  int     freeze_min_age,
1108
0
        multixact_freeze_min_age,
1109
0
        freeze_table_age,
1110
0
        multixact_freeze_table_age,
1111
0
        effective_multixact_freeze_max_age;
1112
0
  TransactionId nextXID,
1113
0
        safeOldestXmin,
1114
0
        aggressiveXIDCutoff;
1115
0
  MultiXactId nextMXID,
1116
0
        safeOldestMxact,
1117
0
        aggressiveMXIDCutoff;
1118
1119
  /* Use mutable copies of freeze age parameters */
1120
0
  freeze_min_age = params->freeze_min_age;
1121
0
  multixact_freeze_min_age = params->multixact_freeze_min_age;
1122
0
  freeze_table_age = params->freeze_table_age;
1123
0
  multixact_freeze_table_age = params->multixact_freeze_table_age;
1124
1125
  /* Set pg_class fields in cutoffs */
1126
0
  cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1127
0
  cutoffs->relminmxid = rel->rd_rel->relminmxid;
1128
1129
  /*
1130
   * Acquire OldestXmin.
1131
   *
1132
   * We can always ignore processes running lazy vacuum.  This is because we
1133
   * use these values only for deciding which tuples we must keep in the
1134
   * tables.  Since lazy vacuum doesn't write its XID anywhere (usually no
1135
   * XID assigned), it's safe to ignore it.  In theory it could be
1136
   * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1137
   * that only one vacuum process can be working on a particular table at
1138
   * any time, and that each vacuum is always an independent transaction.
1139
   */
1140
0
  cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1141
1142
0
  Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1143
1144
  /* Acquire OldestMxact */
1145
0
  cutoffs->OldestMxact = GetOldestMultiXactId();
1146
0
  Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1147
1148
  /* Acquire next XID/next MXID values used to apply age-based settings */
1149
0
  nextXID = ReadNextTransactionId();
1150
0
  nextMXID = ReadNextMultiXactId();
1151
1152
  /*
1153
   * Also compute the multixact age for which freezing is urgent.  This is
1154
   * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1155
   * short of multixact member space.
1156
   */
1157
0
  effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1158
1159
  /*
1160
   * Almost ready to set freeze output parameters; check if OldestXmin or
1161
   * OldestMxact are held back to an unsafe degree before we start on that
1162
   */
1163
0
  safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1164
0
  if (!TransactionIdIsNormal(safeOldestXmin))
1165
0
    safeOldestXmin = FirstNormalTransactionId;
1166
0
  safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1167
0
  if (safeOldestMxact < FirstMultiXactId)
1168
0
    safeOldestMxact = FirstMultiXactId;
1169
0
  if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
1170
0
    ereport(WARNING,
1171
0
        (errmsg("cutoff for removing and freezing tuples is far in the past"),
1172
0
         errhint("Close open transactions soon to avoid wraparound problems.\n"
1173
0
             "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1174
0
  if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
1175
0
    ereport(WARNING,
1176
0
        (errmsg("cutoff for freezing multixacts is far in the past"),
1177
0
         errhint("Close open transactions soon to avoid wraparound problems.\n"
1178
0
             "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1179
1180
  /*
1181
   * Determine the minimum freeze age to use: as specified by the caller, or
1182
   * vacuum_freeze_min_age, but in any case not more than half
1183
   * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1184
   * wraparound won't occur too frequently.
1185
   */
1186
0
  if (freeze_min_age < 0)
1187
0
    freeze_min_age = vacuum_freeze_min_age;
1188
0
  freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1189
0
  Assert(freeze_min_age >= 0);
1190
1191
  /* Compute FreezeLimit, being careful to generate a normal XID */
1192
0
  cutoffs->FreezeLimit = nextXID - freeze_min_age;
1193
0
  if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
1194
0
    cutoffs->FreezeLimit = FirstNormalTransactionId;
1195
  /* FreezeLimit must always be <= OldestXmin */
1196
0
  if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1197
0
    cutoffs->FreezeLimit = cutoffs->OldestXmin;
1198
1199
  /*
1200
   * Determine the minimum multixact freeze age to use: as specified by
1201
   * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1202
   * than half effective_multixact_freeze_max_age, so that autovacuums to
1203
   * prevent MultiXact wraparound won't occur too frequently.
1204
   */
1205
0
  if (multixact_freeze_min_age < 0)
1206
0
    multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1207
0
  multixact_freeze_min_age = Min(multixact_freeze_min_age,
1208
0
                   effective_multixact_freeze_max_age / 2);
1209
0
  Assert(multixact_freeze_min_age >= 0);
1210
1211
  /* Compute MultiXactCutoff, being careful to generate a valid value */
1212
0
  cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1213
0
  if (cutoffs->MultiXactCutoff < FirstMultiXactId)
1214
0
    cutoffs->MultiXactCutoff = FirstMultiXactId;
1215
  /* MultiXactCutoff must always be <= OldestMxact */
1216
0
  if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1217
0
    cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1218
1219
  /*
1220
   * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1221
   *
1222
   * Determine the table freeze age to use: as specified by the caller, or
1223
   * the value of the vacuum_freeze_table_age GUC, but in any case not more
1224
   * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1225
   * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1226
   * anti-wraparound autovacuum is launched.
1227
   */
1228
0
  if (freeze_table_age < 0)
1229
0
    freeze_table_age = vacuum_freeze_table_age;
1230
0
  freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1231
0
  Assert(freeze_table_age >= 0);
1232
0
  aggressiveXIDCutoff = nextXID - freeze_table_age;
1233
0
  if (!TransactionIdIsNormal(aggressiveXIDCutoff))
1234
0
    aggressiveXIDCutoff = FirstNormalTransactionId;
1235
0
  if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
1236
0
                    aggressiveXIDCutoff))
1237
0
    return true;
1238
1239
  /*
1240
   * Similar to the above, determine the table freeze age to use for
1241
   * multixacts: as specified by the caller, or the value of the
1242
   * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1243
   * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1244
   * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1245
   * multixacts before anti-wraparound autovacuum is launched.
1246
   */
1247
0
  if (multixact_freeze_table_age < 0)
1248
0
    multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1249
0
  multixact_freeze_table_age =
1250
0
    Min(multixact_freeze_table_age,
1251
0
      effective_multixact_freeze_max_age * 0.95);
1252
0
  Assert(multixact_freeze_table_age >= 0);
1253
0
  aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1254
0
  if (aggressiveMXIDCutoff < FirstMultiXactId)
1255
0
    aggressiveMXIDCutoff = FirstMultiXactId;
1256
0
  if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
1257
0
                  aggressiveMXIDCutoff))
1258
0
    return true;
1259
1260
  /* Non-aggressive VACUUM */
1261
0
  return false;
1262
0
}
1263
1264
/*
1265
 * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1266
 * mechanism to determine if its table's relfrozenxid and relminmxid are now
1267
 * dangerously far in the past.
1268
 *
1269
 * When we return true, VACUUM caller triggers the failsafe.
1270
 */
1271
bool
1272
vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1273
0
{
1274
0
  TransactionId relfrozenxid = cutoffs->relfrozenxid;
1275
0
  MultiXactId relminmxid = cutoffs->relminmxid;
1276
0
  TransactionId xid_skip_limit;
1277
0
  MultiXactId multi_skip_limit;
1278
0
  int     skip_index_vacuum;
1279
1280
0
  Assert(TransactionIdIsNormal(relfrozenxid));
1281
0
  Assert(MultiXactIdIsValid(relminmxid));
1282
1283
  /*
1284
   * Determine the index skipping age to use. In any case no less than
1285
   * autovacuum_freeze_max_age * 1.05.
1286
   */
1287
0
  skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1288
1289
0
  xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1290
0
  if (!TransactionIdIsNormal(xid_skip_limit))
1291
0
    xid_skip_limit = FirstNormalTransactionId;
1292
1293
0
  if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1294
0
  {
1295
    /* The table's relfrozenxid is too old */
1296
0
    return true;
1297
0
  }
1298
1299
  /*
1300
   * Similar to above, determine the index skipping age to use for
1301
   * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1302
   * 1.05.
1303
   */
1304
0
  skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1305
0
              autovacuum_multixact_freeze_max_age * 1.05);
1306
1307
0
  multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1308
0
  if (multi_skip_limit < FirstMultiXactId)
1309
0
    multi_skip_limit = FirstMultiXactId;
1310
1311
0
  if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1312
0
  {
1313
    /* The table's relminmxid is too old */
1314
0
    return true;
1315
0
  }
1316
1317
0
  return false;
1318
0
}
1319
1320
/*
1321
 * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1322
 *
1323
 *    If we scanned the whole relation then we should just use the count of
1324
 *    live tuples seen; but if we did not, we should not blindly extrapolate
1325
 *    from that number, since VACUUM may have scanned a quite nonrandom
1326
 *    subset of the table.  When we have only partial information, we take
1327
 *    the old value of pg_class.reltuples/pg_class.relpages as a measurement
1328
 *    of the tuple density in the unscanned pages.
1329
 *
1330
 *    Note: scanned_tuples should count only *live* tuples, since
1331
 *    pg_class.reltuples is defined that way.
1332
 */
1333
double
1334
vac_estimate_reltuples(Relation relation,
1335
             BlockNumber total_pages,
1336
             BlockNumber scanned_pages,
1337
             double scanned_tuples)
1338
0
{
1339
0
  BlockNumber old_rel_pages = relation->rd_rel->relpages;
1340
0
  double    old_rel_tuples = relation->rd_rel->reltuples;
1341
0
  double    old_density;
1342
0
  double    unscanned_pages;
1343
0
  double    total_tuples;
1344
1345
  /* If we did scan the whole table, just use the count as-is */
1346
0
  if (scanned_pages >= total_pages)
1347
0
    return scanned_tuples;
1348
1349
  /*
1350
   * When successive VACUUM commands scan the same few pages again and
1351
   * again, without anything from the table really changing, there is a risk
1352
   * that our beliefs about tuple density will gradually become distorted.
1353
   * This might be caused by vacuumlazy.c implementation details, such as
1354
   * its tendency to always scan the last heap page.  Handle that here.
1355
   *
1356
   * If the relation is _exactly_ the same size according to the existing
1357
   * pg_class entry, and only a few of its pages (less than 2%) were
1358
   * scanned, keep the existing value of reltuples.  Also keep the existing
1359
   * value when only a subset of rel's pages <= a single page were scanned.
1360
   *
1361
   * (Note: we might be returning -1 here.)
1362
   */
1363
0
  if (old_rel_pages == total_pages &&
1364
0
    scanned_pages < (double) total_pages * 0.02)
1365
0
    return old_rel_tuples;
1366
0
  if (scanned_pages <= 1)
1367
0
    return old_rel_tuples;
1368
1369
  /*
1370
   * If old density is unknown, we can't do much except scale up
1371
   * scanned_tuples to match total_pages.
1372
   */
1373
0
  if (old_rel_tuples < 0 || old_rel_pages == 0)
1374
0
    return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1375
1376
  /*
1377
   * Okay, we've covered the corner cases.  The normal calculation is to
1378
   * convert the old measurement to a density (tuples per page), then
1379
   * estimate the number of tuples in the unscanned pages using that figure,
1380
   * and finally add on the number of tuples in the scanned pages.
1381
   */
1382
0
  old_density = old_rel_tuples / old_rel_pages;
1383
0
  unscanned_pages = (double) total_pages - (double) scanned_pages;
1384
0
  total_tuples = old_density * unscanned_pages + scanned_tuples;
1385
0
  return floor(total_tuples + 0.5);
1386
0
}
1387
1388
1389
/*
1390
 *  vac_update_relstats() -- update statistics for one relation
1391
 *
1392
 *    Update the whole-relation statistics that are kept in its pg_class
1393
 *    row.  There are additional stats that will be updated if we are
1394
 *    doing ANALYZE, but we always update these stats.  This routine works
1395
 *    for both index and heap relation entries in pg_class.
1396
 *
1397
 *    We violate transaction semantics here by overwriting the rel's
1398
 *    existing pg_class tuple with the new values.  This is reasonably
1399
 *    safe as long as we're sure that the new values are correct whether or
1400
 *    not this transaction commits.  The reason for doing this is that if
1401
 *    we updated these tuples in the usual way, vacuuming pg_class itself
1402
 *    wouldn't work very well --- by the time we got done with a vacuum
1403
 *    cycle, most of the tuples in pg_class would've been obsoleted.  Of
1404
 *    course, this only works for fixed-size not-null columns, but these are.
1405
 *
1406
 *    Another reason for doing it this way is that when we are in a lazy
1407
 *    VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1408
 *    Somebody vacuuming pg_class might think they could delete a tuple
1409
 *    marked with xmin = our xid.
1410
 *
1411
 *    In addition to fundamentally nontransactional statistics such as
1412
 *    relpages and relallvisible, we try to maintain certain lazily-updated
1413
 *    DDL flags such as relhasindex, by clearing them if no longer correct.
1414
 *    It's safe to do this in VACUUM, which can't run in parallel with
1415
 *    CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1416
 *    However, it's *not* safe to do it in an ANALYZE that's within an
1417
 *    outer transaction, because for example the current transaction might
1418
 *    have dropped the last index; then we'd think relhasindex should be
1419
 *    cleared, but if the transaction later rolls back this would be wrong.
1420
 *    So we refrain from updating the DDL flags if we're inside an outer
1421
 *    transaction.  This is OK since postponing the flag maintenance is
1422
 *    always allowable.
1423
 *
1424
 *    Note: num_tuples should count only *live* tuples, since
1425
 *    pg_class.reltuples is defined that way.
1426
 *
1427
 *    This routine is shared by VACUUM and ANALYZE.
1428
 */
1429
void
1430
vac_update_relstats(Relation relation,
1431
          BlockNumber num_pages, double num_tuples,
1432
          BlockNumber num_all_visible_pages,
1433
          BlockNumber num_all_frozen_pages,
1434
          bool hasindex, TransactionId frozenxid,
1435
          MultiXactId minmulti,
1436
          bool *frozenxid_updated, bool *minmulti_updated,
1437
          bool in_outer_xact)
1438
0
{
1439
0
  Oid     relid = RelationGetRelid(relation);
1440
0
  Relation  rd;
1441
0
  ScanKeyData key[1];
1442
0
  HeapTuple ctup;
1443
0
  void     *inplace_state;
1444
0
  Form_pg_class pgcform;
1445
0
  bool    dirty,
1446
0
        futurexid,
1447
0
        futuremxid;
1448
0
  TransactionId oldfrozenxid;
1449
0
  MultiXactId oldminmulti;
1450
1451
0
  rd = table_open(RelationRelationId, RowExclusiveLock);
1452
1453
  /* Fetch a copy of the tuple to scribble on */
1454
0
  ScanKeyInit(&key[0],
1455
0
        Anum_pg_class_oid,
1456
0
        BTEqualStrategyNumber, F_OIDEQ,
1457
0
        ObjectIdGetDatum(relid));
1458
0
  systable_inplace_update_begin(rd, ClassOidIndexId, true,
1459
0
                  NULL, 1, key, &ctup, &inplace_state);
1460
0
  if (!HeapTupleIsValid(ctup))
1461
0
    elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1462
0
       relid);
1463
0
  pgcform = (Form_pg_class) GETSTRUCT(ctup);
1464
1465
  /* Apply statistical updates, if any, to copied tuple */
1466
1467
0
  dirty = false;
1468
0
  if (pgcform->relpages != (int32) num_pages)
1469
0
  {
1470
0
    pgcform->relpages = (int32) num_pages;
1471
0
    dirty = true;
1472
0
  }
1473
0
  if (pgcform->reltuples != (float4) num_tuples)
1474
0
  {
1475
0
    pgcform->reltuples = (float4) num_tuples;
1476
0
    dirty = true;
1477
0
  }
1478
0
  if (pgcform->relallvisible != (int32) num_all_visible_pages)
1479
0
  {
1480
0
    pgcform->relallvisible = (int32) num_all_visible_pages;
1481
0
    dirty = true;
1482
0
  }
1483
0
  if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
1484
0
  {
1485
0
    pgcform->relallfrozen = (int32) num_all_frozen_pages;
1486
0
    dirty = true;
1487
0
  }
1488
1489
  /* Apply DDL updates, but not inside an outer transaction (see above) */
1490
1491
0
  if (!in_outer_xact)
1492
0
  {
1493
    /*
1494
     * If we didn't find any indexes, reset relhasindex.
1495
     */
1496
0
    if (pgcform->relhasindex && !hasindex)
1497
0
    {
1498
0
      pgcform->relhasindex = false;
1499
0
      dirty = true;
1500
0
    }
1501
1502
    /* We also clear relhasrules and relhastriggers if needed */
1503
0
    if (pgcform->relhasrules && relation->rd_rules == NULL)
1504
0
    {
1505
0
      pgcform->relhasrules = false;
1506
0
      dirty = true;
1507
0
    }
1508
0
    if (pgcform->relhastriggers && relation->trigdesc == NULL)
1509
0
    {
1510
0
      pgcform->relhastriggers = false;
1511
0
      dirty = true;
1512
0
    }
1513
0
  }
1514
1515
  /*
1516
   * Update relfrozenxid, unless caller passed InvalidTransactionId
1517
   * indicating it has no new data.
1518
   *
1519
   * Ordinarily, we don't let relfrozenxid go backwards.  However, if the
1520
   * stored relfrozenxid is "in the future" then it seems best to assume
1521
   * it's corrupt, and overwrite with the oldest remaining XID in the table.
1522
   * This should match vac_update_datfrozenxid() concerning what we consider
1523
   * to be "in the future".
1524
   */
1525
0
  oldfrozenxid = pgcform->relfrozenxid;
1526
0
  futurexid = false;
1527
0
  if (frozenxid_updated)
1528
0
    *frozenxid_updated = false;
1529
0
  if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1530
0
  {
1531
0
    bool    update = false;
1532
1533
0
    if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1534
0
      update = true;
1535
0
    else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
1536
0
      futurexid = update = true;
1537
1538
0
    if (update)
1539
0
    {
1540
0
      pgcform->relfrozenxid = frozenxid;
1541
0
      dirty = true;
1542
0
      if (frozenxid_updated)
1543
0
        *frozenxid_updated = true;
1544
0
    }
1545
0
  }
1546
1547
  /* Similarly for relminmxid */
1548
0
  oldminmulti = pgcform->relminmxid;
1549
0
  futuremxid = false;
1550
0
  if (minmulti_updated)
1551
0
    *minmulti_updated = false;
1552
0
  if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1553
0
  {
1554
0
    bool    update = false;
1555
1556
0
    if (MultiXactIdPrecedes(oldminmulti, minmulti))
1557
0
      update = true;
1558
0
    else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1559
0
      futuremxid = update = true;
1560
1561
0
    if (update)
1562
0
    {
1563
0
      pgcform->relminmxid = minmulti;
1564
0
      dirty = true;
1565
0
      if (minmulti_updated)
1566
0
        *minmulti_updated = true;
1567
0
    }
1568
0
  }
1569
1570
  /* If anything changed, write out the tuple. */
1571
0
  if (dirty)
1572
0
    systable_inplace_update_finish(inplace_state, ctup);
1573
0
  else
1574
0
    systable_inplace_update_cancel(inplace_state);
1575
1576
0
  table_close(rd, RowExclusiveLock);
1577
1578
0
  if (futurexid)
1579
0
    ereport(WARNING,
1580
0
        (errcode(ERRCODE_DATA_CORRUPTED),
1581
0
         errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1582
0
                 oldfrozenxid, frozenxid,
1583
0
                 RelationGetRelationName(relation))));
1584
0
  if (futuremxid)
1585
0
    ereport(WARNING,
1586
0
        (errcode(ERRCODE_DATA_CORRUPTED),
1587
0
         errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1588
0
                 oldminmulti, minmulti,
1589
0
                 RelationGetRelationName(relation))));
1590
0
}
1591
1592
1593
/*
1594
 *  vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1595
 *
1596
 *    Update pg_database's datfrozenxid entry for our database to be the
1597
 *    minimum of the pg_class.relfrozenxid values.
1598
 *
1599
 *    Similarly, update our datminmxid to be the minimum of the
1600
 *    pg_class.relminmxid values.
1601
 *
1602
 *    If we are able to advance either pg_database value, also try to
1603
 *    truncate pg_xact and pg_multixact.
1604
 *
1605
 *    We violate transaction semantics here by overwriting the database's
1606
 *    existing pg_database tuple with the new values.  This is reasonably
1607
 *    safe since the new values are correct whether or not this transaction
1608
 *    commits.  As with vac_update_relstats, this avoids leaving dead tuples
1609
 *    behind after a VACUUM.
1610
 */
1611
void
1612
vac_update_datfrozenxid(void)
1613
0
{
1614
0
  HeapTuple tuple;
1615
0
  Form_pg_database dbform;
1616
0
  Relation  relation;
1617
0
  SysScanDesc scan;
1618
0
  HeapTuple classTup;
1619
0
  TransactionId newFrozenXid;
1620
0
  MultiXactId newMinMulti;
1621
0
  TransactionId lastSaneFrozenXid;
1622
0
  MultiXactId lastSaneMinMulti;
1623
0
  bool    bogus = false;
1624
0
  bool    dirty = false;
1625
0
  ScanKeyData key[1];
1626
0
  void     *inplace_state;
1627
1628
  /*
1629
   * Restrict this task to one backend per database.  This avoids race
1630
   * conditions that would move datfrozenxid or datminmxid backward.  It
1631
   * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1632
   * datfrozenxid passed to an earlier vac_truncate_clog() call.
1633
   */
1634
0
  LockDatabaseFrozenIds(ExclusiveLock);
1635
1636
  /*
1637
   * Initialize the "min" calculation with
1638
   * GetOldestNonRemovableTransactionId(), which is a reasonable
1639
   * approximation to the minimum relfrozenxid for not-yet-committed
1640
   * pg_class entries for new tables; see AddNewRelationTuple().  So we
1641
   * cannot produce a wrong minimum by starting with this.
1642
   */
1643
0
  newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1644
1645
  /*
1646
   * Similarly, initialize the MultiXact "min" with the value that would be
1647
   * used on pg_class for new tables.  See AddNewRelationTuple().
1648
   */
1649
0
  newMinMulti = GetOldestMultiXactId();
1650
1651
  /*
1652
   * Identify the latest relfrozenxid and relminmxid values that we could
1653
   * validly see during the scan.  These are conservative values, but it's
1654
   * not really worth trying to be more exact.
1655
   */
1656
0
  lastSaneFrozenXid = ReadNextTransactionId();
1657
0
  lastSaneMinMulti = ReadNextMultiXactId();
1658
1659
  /*
1660
   * We must seqscan pg_class to find the minimum Xid, because there is no
1661
   * index that can help us here.
1662
   *
1663
   * See vac_truncate_clog() for the race condition to prevent.
1664
   */
1665
0
  relation = table_open(RelationRelationId, AccessShareLock);
1666
1667
0
  scan = systable_beginscan(relation, InvalidOid, false,
1668
0
                NULL, 0, NULL);
1669
1670
0
  while ((classTup = systable_getnext(scan)) != NULL)
1671
0
  {
1672
0
    volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
1673
0
    TransactionId relfrozenxid = classForm->relfrozenxid;
1674
0
    TransactionId relminmxid = classForm->relminmxid;
1675
1676
    /*
1677
     * Only consider relations able to hold unfrozen XIDs (anything else
1678
     * should have InvalidTransactionId in relfrozenxid anyway).
1679
     */
1680
0
    if (classForm->relkind != RELKIND_RELATION &&
1681
0
      classForm->relkind != RELKIND_MATVIEW &&
1682
0
      classForm->relkind != RELKIND_TOASTVALUE)
1683
0
    {
1684
0
      Assert(!TransactionIdIsValid(relfrozenxid));
1685
0
      Assert(!MultiXactIdIsValid(relminmxid));
1686
0
      continue;
1687
0
    }
1688
1689
    /*
1690
     * Some table AMs might not need per-relation xid / multixid horizons.
1691
     * It therefore seems reasonable to allow relfrozenxid and relminmxid
1692
     * to not be set (i.e. set to their respective Invalid*Id)
1693
     * independently. Thus validate and compute horizon for each only if
1694
     * set.
1695
     *
1696
     * If things are working properly, no relation should have a
1697
     * relfrozenxid or relminmxid that is "in the future".  However, such
1698
     * cases have been known to arise due to bugs in pg_upgrade.  If we
1699
     * see any entries that are "in the future", chicken out and don't do
1700
     * anything.  This ensures we won't truncate clog & multixact SLRUs
1701
     * before those relations have been scanned and cleaned up.
1702
     */
1703
1704
0
    if (TransactionIdIsValid(relfrozenxid))
1705
0
    {
1706
0
      Assert(TransactionIdIsNormal(relfrozenxid));
1707
1708
      /* check for values in the future */
1709
0
      if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1710
0
      {
1711
0
        bogus = true;
1712
0
        break;
1713
0
      }
1714
1715
      /* determine new horizon */
1716
0
      if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1717
0
        newFrozenXid = relfrozenxid;
1718
0
    }
1719
1720
0
    if (MultiXactIdIsValid(relminmxid))
1721
0
    {
1722
      /* check for values in the future */
1723
0
      if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1724
0
      {
1725
0
        bogus = true;
1726
0
        break;
1727
0
      }
1728
1729
      /* determine new horizon */
1730
0
      if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1731
0
        newMinMulti = relminmxid;
1732
0
    }
1733
0
  }
1734
1735
  /* we're done with pg_class */
1736
0
  systable_endscan(scan);
1737
0
  table_close(relation, AccessShareLock);
1738
1739
  /* chicken out if bogus data found */
1740
0
  if (bogus)
1741
0
    return;
1742
1743
0
  Assert(TransactionIdIsNormal(newFrozenXid));
1744
0
  Assert(MultiXactIdIsValid(newMinMulti));
1745
1746
  /* Now fetch the pg_database tuple we need to update. */
1747
0
  relation = table_open(DatabaseRelationId, RowExclusiveLock);
1748
1749
  /*
1750
   * Fetch a copy of the tuple to scribble on.  We could check the syscache
1751
   * tuple first.  If that concluded !dirty, we'd avoid waiting on
1752
   * concurrent heap_update() and would avoid exclusive-locking the buffer.
1753
   * For now, don't optimize that.
1754
   */
1755
0
  ScanKeyInit(&key[0],
1756
0
        Anum_pg_database_oid,
1757
0
        BTEqualStrategyNumber, F_OIDEQ,
1758
0
        ObjectIdGetDatum(MyDatabaseId));
1759
1760
0
  systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
1761
0
                  NULL, 1, key, &tuple, &inplace_state);
1762
1763
0
  if (!HeapTupleIsValid(tuple))
1764
0
    elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1765
1766
0
  dbform = (Form_pg_database) GETSTRUCT(tuple);
1767
1768
  /*
1769
   * As in vac_update_relstats(), we ordinarily don't want to let
1770
   * datfrozenxid go backward; but if it's "in the future" then it must be
1771
   * corrupt and it seems best to overwrite it.
1772
   */
1773
0
  if (dbform->datfrozenxid != newFrozenXid &&
1774
0
    (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1775
0
     TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1776
0
  {
1777
0
    dbform->datfrozenxid = newFrozenXid;
1778
0
    dirty = true;
1779
0
  }
1780
0
  else
1781
0
    newFrozenXid = dbform->datfrozenxid;
1782
1783
  /* Ditto for datminmxid */
1784
0
  if (dbform->datminmxid != newMinMulti &&
1785
0
    (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1786
0
     MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1787
0
  {
1788
0
    dbform->datminmxid = newMinMulti;
1789
0
    dirty = true;
1790
0
  }
1791
0
  else
1792
0
    newMinMulti = dbform->datminmxid;
1793
1794
0
  if (dirty)
1795
0
    systable_inplace_update_finish(inplace_state, tuple);
1796
0
  else
1797
0
    systable_inplace_update_cancel(inplace_state);
1798
1799
0
  heap_freetuple(tuple);
1800
0
  table_close(relation, RowExclusiveLock);
1801
1802
  /*
1803
   * If we were able to advance datfrozenxid or datminmxid, see if we can
1804
   * truncate pg_xact and/or pg_multixact.  Also do it if the shared
1805
   * XID-wrap-limit info is stale, since this action will update that too.
1806
   */
1807
0
  if (dirty || ForceTransactionIdLimitUpdate())
1808
0
    vac_truncate_clog(newFrozenXid, newMinMulti,
1809
0
              lastSaneFrozenXid, lastSaneMinMulti);
1810
0
}
1811
1812
1813
/*
1814
 *  vac_truncate_clog() -- attempt to truncate the commit log
1815
 *
1816
 *    Scan pg_database to determine the system-wide oldest datfrozenxid,
1817
 *    and use it to truncate the transaction commit log (pg_xact).
1818
 *    Also update the XID wrap limit info maintained by varsup.c.
1819
 *    Likewise for datminmxid.
1820
 *
1821
 *    The passed frozenXID and minMulti are the updated values for my own
1822
 *    pg_database entry. They're used to initialize the "min" calculations.
1823
 *    The caller also passes the "last sane" XID and MXID, since it has
1824
 *    those at hand already.
1825
 *
1826
 *    This routine is only invoked when we've managed to change our
1827
 *    DB's datfrozenxid/datminmxid values, or we found that the shared
1828
 *    XID-wrap-limit info is stale.
1829
 */
1830
static void
1831
vac_truncate_clog(TransactionId frozenXID,
1832
          MultiXactId minMulti,
1833
          TransactionId lastSaneFrozenXid,
1834
          MultiXactId lastSaneMinMulti)
1835
0
{
1836
0
  TransactionId nextXID = ReadNextTransactionId();
1837
0
  Relation  relation;
1838
0
  TableScanDesc scan;
1839
0
  HeapTuple tuple;
1840
0
  Oid     oldestxid_datoid;
1841
0
  Oid     minmulti_datoid;
1842
0
  bool    bogus = false;
1843
0
  bool    frozenAlreadyWrapped = false;
1844
1845
  /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1846
0
  LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1847
1848
  /* init oldest datoids to sync with my frozenXID/minMulti values */
1849
0
  oldestxid_datoid = MyDatabaseId;
1850
0
  minmulti_datoid = MyDatabaseId;
1851
1852
  /*
1853
   * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1854
   *
1855
   * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1856
   * the values could change while we look at them.  Fetch each one just
1857
   * once to ensure sane behavior of the comparison logic.  (Here, as in
1858
   * many other places, we assume that fetching or updating an XID in shared
1859
   * storage is atomic.)
1860
   *
1861
   * Note: we need not worry about a race condition with new entries being
1862
   * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1863
   * existing DB's datfrozenxid, and that source DB cannot be ours because
1864
   * of the interlock against copying a DB containing an active backend.
1865
   * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1866
   * concurrently modify the datfrozenxid's of different databases, the
1867
   * worst possible outcome is that pg_xact is not truncated as aggressively
1868
   * as it could be.
1869
   */
1870
0
  relation = table_open(DatabaseRelationId, AccessShareLock);
1871
1872
0
  scan = table_beginscan_catalog(relation, 0, NULL);
1873
1874
0
  while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1875
0
  {
1876
0
    volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1877
0
    TransactionId datfrozenxid = dbform->datfrozenxid;
1878
0
    TransactionId datminmxid = dbform->datminmxid;
1879
1880
0
    Assert(TransactionIdIsNormal(datfrozenxid));
1881
0
    Assert(MultiXactIdIsValid(datminmxid));
1882
1883
    /*
1884
     * If database is in the process of getting dropped, or has been
1885
     * interrupted while doing so, no connections to it are possible
1886
     * anymore. Therefore we don't need to take it into account here.
1887
     * Which is good, because it can't be processed by autovacuum either.
1888
     */
1889
0
    if (database_is_invalid_form((Form_pg_database) dbform))
1890
0
    {
1891
0
      elog(DEBUG2,
1892
0
         "skipping invalid database \"%s\" while computing relfrozenxid",
1893
0
         NameStr(dbform->datname));
1894
0
      continue;
1895
0
    }
1896
1897
    /*
1898
     * If things are working properly, no database should have a
1899
     * datfrozenxid or datminmxid that is "in the future".  However, such
1900
     * cases have been known to arise due to bugs in pg_upgrade.  If we
1901
     * see any entries that are "in the future", chicken out and don't do
1902
     * anything.  This ensures we won't truncate clog before those
1903
     * databases have been scanned and cleaned up.  (We will issue the
1904
     * "already wrapped" warning if appropriate, though.)
1905
     */
1906
0
    if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1907
0
      MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1908
0
      bogus = true;
1909
1910
0
    if (TransactionIdPrecedes(nextXID, datfrozenxid))
1911
0
      frozenAlreadyWrapped = true;
1912
0
    else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1913
0
    {
1914
0
      frozenXID = datfrozenxid;
1915
0
      oldestxid_datoid = dbform->oid;
1916
0
    }
1917
1918
0
    if (MultiXactIdPrecedes(datminmxid, minMulti))
1919
0
    {
1920
0
      minMulti = datminmxid;
1921
0
      minmulti_datoid = dbform->oid;
1922
0
    }
1923
0
  }
1924
1925
0
  table_endscan(scan);
1926
1927
0
  table_close(relation, AccessShareLock);
1928
1929
  /*
1930
   * Do not truncate CLOG if we seem to have suffered wraparound already;
1931
   * the computed minimum XID might be bogus.  This case should now be
1932
   * impossible due to the defenses in GetNewTransactionId, but we keep the
1933
   * test anyway.
1934
   */
1935
0
  if (frozenAlreadyWrapped)
1936
0
  {
1937
0
    ereport(WARNING,
1938
0
        (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1939
0
         errdetail("You might have already suffered transaction-wraparound data loss.")));
1940
0
    LWLockRelease(WrapLimitsVacuumLock);
1941
0
    return;
1942
0
  }
1943
1944
  /* chicken out if data is bogus in any other way */
1945
0
  if (bogus)
1946
0
  {
1947
0
    LWLockRelease(WrapLimitsVacuumLock);
1948
0
    return;
1949
0
  }
1950
1951
  /*
1952
   * Advance the oldest value for commit timestamps before truncating, so
1953
   * that if a user requests a timestamp for a transaction we're truncating
1954
   * away right after this point, they get NULL instead of an ugly "file not
1955
   * found" error from slru.c.  This doesn't matter for xact/multixact
1956
   * because they are not subject to arbitrary lookups from users.
1957
   */
1958
0
  AdvanceOldestCommitTsXid(frozenXID);
1959
1960
  /*
1961
   * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1962
   */
1963
0
  TruncateCLOG(frozenXID, oldestxid_datoid);
1964
0
  TruncateCommitTs(frozenXID);
1965
0
  TruncateMultiXact(minMulti, minmulti_datoid);
1966
1967
  /*
1968
   * Update the wrap limit for GetNewTransactionId and creation of new
1969
   * MultiXactIds.  Note: these functions will also signal the postmaster
1970
   * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1971
   * signaling twice?
1972
   */
1973
0
  SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1974
0
  SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1975
1976
0
  LWLockRelease(WrapLimitsVacuumLock);
1977
0
}
1978
1979
1980
/*
1981
 *  vacuum_rel() -- vacuum one heap relation
1982
 *
1983
 *    relid identifies the relation to vacuum.  If relation is supplied,
1984
 *    use the name therein for reporting any failure to open/lock the rel;
1985
 *    do not use it once we've successfully opened the rel, since it might
1986
 *    be stale.
1987
 *
1988
 *    Returns true if it's okay to proceed with a requested ANALYZE
1989
 *    operation on this table.
1990
 *
1991
 *    Doing one heap at a time incurs extra overhead, since we need to
1992
 *    check that the heap exists again just before we vacuum it.  The
1993
 *    reason that we do this is so that vacuuming can be spread across
1994
 *    many small transactions.  Otherwise, two-phase locking would require
1995
 *    us to lock the entire database during one pass of the vacuum cleaner.
1996
 *
1997
 *    At entry and exit, we are not inside a transaction.
1998
 */
1999
static bool
2000
vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
2001
       BufferAccessStrategy bstrategy)
2002
0
{
2003
0
  LOCKMODE  lmode;
2004
0
  Relation  rel;
2005
0
  LockRelId lockrelid;
2006
0
  Oid     priv_relid;
2007
0
  Oid     toast_relid;
2008
0
  Oid     save_userid;
2009
0
  int     save_sec_context;
2010
0
  int     save_nestlevel;
2011
2012
0
  Assert(params != NULL);
2013
2014
  /* Begin a transaction for vacuuming this relation */
2015
0
  StartTransactionCommand();
2016
2017
0
  if (!(params->options & VACOPT_FULL))
2018
0
  {
2019
    /*
2020
     * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
2021
     * other concurrent VACUUMs know that they can ignore this one while
2022
     * determining their OldestXmin.  (The reason we don't set it during a
2023
     * full VACUUM is exactly that we may have to run user-defined
2024
     * functions for functional indexes, and we want to make sure that if
2025
     * they use the snapshot set above, any tuples it requires can't get
2026
     * removed from other tables.  An index function that depends on the
2027
     * contents of other tables is arguably broken, but we won't break it
2028
     * here by violating transaction semantics.)
2029
     *
2030
     * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
2031
     * autovacuum; it's used to avoid canceling a vacuum that was invoked
2032
     * in an emergency.
2033
     *
2034
     * Note: these flags remain set until CommitTransaction or
2035
     * AbortTransaction.  We don't want to clear them until we reset
2036
     * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
2037
     * might appear to go backwards, which is probably Not Good.  (We also
2038
     * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
2039
     * xmin doesn't become visible ahead of setting the flag.)
2040
     */
2041
0
    LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2042
0
    MyProc->statusFlags |= PROC_IN_VACUUM;
2043
0
    if (params->is_wraparound)
2044
0
      MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
2045
0
    ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
2046
0
    LWLockRelease(ProcArrayLock);
2047
0
  }
2048
2049
  /*
2050
   * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2051
   * cutoff xids in local memory wrapping around, and to have updated xmin
2052
   * horizons.
2053
   */
2054
0
  PushActiveSnapshot(GetTransactionSnapshot());
2055
2056
  /*
2057
   * Check for user-requested abort.  Note we want this to be inside a
2058
   * transaction, so xact.c doesn't issue useless WARNING.
2059
   */
2060
0
  CHECK_FOR_INTERRUPTS();
2061
2062
  /*
2063
   * Determine the type of lock we want --- hard exclusive lock for a FULL
2064
   * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2065
   * way, we can be sure that no other backend is vacuuming the same table.
2066
   */
2067
0
  lmode = (params->options & VACOPT_FULL) ?
2068
0
    AccessExclusiveLock : ShareUpdateExclusiveLock;
2069
2070
  /* open the relation and get the appropriate lock on it */
2071
0
  rel = vacuum_open_relation(relid, relation, params->options,
2072
0
                 params->log_min_duration >= 0, lmode);
2073
2074
  /* leave if relation could not be opened or locked */
2075
0
  if (!rel)
2076
0
  {
2077
0
    PopActiveSnapshot();
2078
0
    CommitTransactionCommand();
2079
0
    return false;
2080
0
  }
2081
2082
  /*
2083
   * When recursing to a TOAST table, check privileges on the parent.  NB:
2084
   * This is only safe to do because we hold a session lock on the main
2085
   * relation that prevents concurrent deletion.
2086
   */
2087
0
  if (OidIsValid(params->toast_parent))
2088
0
    priv_relid = params->toast_parent;
2089
0
  else
2090
0
    priv_relid = RelationGetRelid(rel);
2091
2092
  /*
2093
   * Check if relation needs to be skipped based on privileges.  This check
2094
   * happens also when building the relation list to vacuum for a manual
2095
   * operation, and needs to be done additionally here as VACUUM could
2096
   * happen across multiple transactions where privileges could have changed
2097
   * in-between.  Make sure to only generate logs for VACUUM in this case.
2098
   */
2099
0
  if (!vacuum_is_permitted_for_relation(priv_relid,
2100
0
                      rel->rd_rel,
2101
0
                      params->options & ~VACOPT_ANALYZE))
2102
0
  {
2103
0
    relation_close(rel, lmode);
2104
0
    PopActiveSnapshot();
2105
0
    CommitTransactionCommand();
2106
0
    return false;
2107
0
  }
2108
2109
  /*
2110
   * Check that it's of a vacuumable relkind.
2111
   */
2112
0
  if (rel->rd_rel->relkind != RELKIND_RELATION &&
2113
0
    rel->rd_rel->relkind != RELKIND_MATVIEW &&
2114
0
    rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2115
0
    rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2116
0
  {
2117
0
    ereport(WARNING,
2118
0
        (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2119
0
            RelationGetRelationName(rel))));
2120
0
    relation_close(rel, lmode);
2121
0
    PopActiveSnapshot();
2122
0
    CommitTransactionCommand();
2123
0
    return false;
2124
0
  }
2125
2126
  /*
2127
   * Silently ignore tables that are temp tables of other backends ---
2128
   * trying to vacuum these will lead to great unhappiness, since their
2129
   * contents are probably not up-to-date on disk.  (We don't throw a
2130
   * warning here; it would just lead to chatter during a database-wide
2131
   * VACUUM.)
2132
   */
2133
0
  if (RELATION_IS_OTHER_TEMP(rel))
2134
0
  {
2135
0
    relation_close(rel, lmode);
2136
0
    PopActiveSnapshot();
2137
0
    CommitTransactionCommand();
2138
0
    return false;
2139
0
  }
2140
2141
  /*
2142
   * Silently ignore partitioned tables as there is no work to be done.  The
2143
   * useful work is on their child partitions, which have been queued up for
2144
   * us separately.
2145
   */
2146
0
  if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2147
0
  {
2148
0
    relation_close(rel, lmode);
2149
0
    PopActiveSnapshot();
2150
0
    CommitTransactionCommand();
2151
    /* It's OK to proceed with ANALYZE on this table */
2152
0
    return true;
2153
0
  }
2154
2155
  /*
2156
   * Get a session-level lock too. This will protect our access to the
2157
   * relation across multiple transactions, so that we can vacuum the
2158
   * relation's TOAST table (if any) secure in the knowledge that no one is
2159
   * deleting the parent relation.
2160
   *
2161
   * NOTE: this cannot block, even if someone else is waiting for access,
2162
   * because the lock manager knows that both lock requests are from the
2163
   * same process.
2164
   */
2165
0
  lockrelid = rel->rd_lockInfo.lockRelId;
2166
0
  LockRelationIdForSession(&lockrelid, lmode);
2167
2168
  /*
2169
   * Set index_cleanup option based on index_cleanup reloption if it wasn't
2170
   * specified in VACUUM command, or when running in an autovacuum worker
2171
   */
2172
0
  if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
2173
0
  {
2174
0
    StdRdOptIndexCleanup vacuum_index_cleanup;
2175
2176
0
    if (rel->rd_options == NULL)
2177
0
      vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2178
0
    else
2179
0
      vacuum_index_cleanup =
2180
0
        ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2181
2182
0
    if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2183
0
      params->index_cleanup = VACOPTVALUE_AUTO;
2184
0
    else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
2185
0
      params->index_cleanup = VACOPTVALUE_ENABLED;
2186
0
    else
2187
0
    {
2188
0
      Assert(vacuum_index_cleanup ==
2189
0
           STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2190
0
      params->index_cleanup = VACOPTVALUE_DISABLED;
2191
0
    }
2192
0
  }
2193
2194
  /*
2195
   * Check if the vacuum_max_eager_freeze_failure_rate table storage
2196
   * parameter was specified. This overrides the GUC value.
2197
   */
2198
0
  if (rel->rd_options != NULL &&
2199
0
    ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
2200
0
    params->max_eager_freeze_failure_rate =
2201
0
      ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
2202
2203
  /*
2204
   * Set truncate option based on truncate reloption or GUC if it wasn't
2205
   * specified in VACUUM command, or when running in an autovacuum worker
2206
   */
2207
0
  if (params->truncate == VACOPTVALUE_UNSPECIFIED)
2208
0
  {
2209
0
    StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
2210
2211
0
    if (opts && opts->vacuum_truncate_set)
2212
0
    {
2213
0
      if (opts->vacuum_truncate)
2214
0
        params->truncate = VACOPTVALUE_ENABLED;
2215
0
      else
2216
0
        params->truncate = VACOPTVALUE_DISABLED;
2217
0
    }
2218
0
    else if (vacuum_truncate)
2219
0
      params->truncate = VACOPTVALUE_ENABLED;
2220
0
    else
2221
0
      params->truncate = VACOPTVALUE_DISABLED;
2222
0
  }
2223
2224
  /*
2225
   * Remember the relation's TOAST relation for later, if the caller asked
2226
   * us to process it.  In VACUUM FULL, though, the toast table is
2227
   * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2228
   * unless PROCESS_MAIN is disabled.
2229
   */
2230
0
  if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
2231
0
    ((params->options & VACOPT_FULL) == 0 ||
2232
0
     (params->options & VACOPT_PROCESS_MAIN) == 0))
2233
0
    toast_relid = rel->rd_rel->reltoastrelid;
2234
0
  else
2235
0
    toast_relid = InvalidOid;
2236
2237
  /*
2238
   * Switch to the table owner's userid, so that any index functions are run
2239
   * as that user.  Also lock down security-restricted operations and
2240
   * arrange to make GUC variable changes local to this command. (This is
2241
   * unnecessary, but harmless, for lazy VACUUM.)
2242
   */
2243
0
  GetUserIdAndSecContext(&save_userid, &save_sec_context);
2244
0
  SetUserIdAndSecContext(rel->rd_rel->relowner,
2245
0
               save_sec_context | SECURITY_RESTRICTED_OPERATION);
2246
0
  save_nestlevel = NewGUCNestLevel();
2247
0
  RestrictSearchPath();
2248
2249
  /*
2250
   * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2251
   * relation.  Otherwise, we can skip this part.  If processing the TOAST
2252
   * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2253
   * to be set when we recurse to the TOAST table.
2254
   */
2255
0
  if (params->options & VACOPT_PROCESS_MAIN)
2256
0
  {
2257
    /*
2258
     * Do the actual work --- either FULL or "lazy" vacuum
2259
     */
2260
0
    if (params->options & VACOPT_FULL)
2261
0
    {
2262
0
      ClusterParams cluster_params = {0};
2263
2264
0
      if ((params->options & VACOPT_VERBOSE) != 0)
2265
0
        cluster_params.options |= CLUOPT_VERBOSE;
2266
2267
      /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
2268
0
      cluster_rel(rel, InvalidOid, &cluster_params);
2269
      /* cluster_rel closes the relation, but keeps lock */
2270
2271
0
      rel = NULL;
2272
0
    }
2273
0
    else
2274
0
      table_relation_vacuum(rel, params, bstrategy);
2275
0
  }
2276
2277
  /* Roll back any GUC changes executed by index functions */
2278
0
  AtEOXact_GUC(false, save_nestlevel);
2279
2280
  /* Restore userid and security context */
2281
0
  SetUserIdAndSecContext(save_userid, save_sec_context);
2282
2283
  /* all done with this class, but hold lock until commit */
2284
0
  if (rel)
2285
0
    relation_close(rel, NoLock);
2286
2287
  /*
2288
   * Complete the transaction and free all temporary memory used.
2289
   */
2290
0
  PopActiveSnapshot();
2291
0
  CommitTransactionCommand();
2292
2293
  /*
2294
   * If the relation has a secondary toast rel, vacuum that too while we
2295
   * still hold the session lock on the main table.  Note however that
2296
   * "analyze" will not get done on the toast table.  This is good, because
2297
   * the toaster always uses hardcoded index access and statistics are
2298
   * totally unimportant for toast relations.
2299
   */
2300
0
  if (toast_relid != InvalidOid)
2301
0
  {
2302
0
    VacuumParams toast_vacuum_params;
2303
2304
    /*
2305
     * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it.  Likewise,
2306
     * set toast_parent so that the privilege checks are done on the main
2307
     * relation.  NB: This is only safe to do because we hold a session
2308
     * lock on the main relation that prevents concurrent deletion.
2309
     */
2310
0
    memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
2311
0
    toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2312
0
    toast_vacuum_params.toast_parent = relid;
2313
2314
0
    vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy);
2315
0
  }
2316
2317
  /*
2318
   * Now release the session-level lock on the main table.
2319
   */
2320
0
  UnlockRelationIdForSession(&lockrelid, lmode);
2321
2322
  /* Report that we really did it. */
2323
0
  return true;
2324
0
}
2325
2326
2327
/*
2328
 * Open all the vacuumable indexes of the given relation, obtaining the
2329
 * specified kind of lock on each.  Return an array of Relation pointers for
2330
 * the indexes into *Irel, and the number of indexes into *nindexes.
2331
 *
2332
 * We consider an index vacuumable if it is marked insertable (indisready).
2333
 * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2334
 * execution, and what we have is too corrupt to be processable.  We will
2335
 * vacuum even if the index isn't indisvalid; this is important because in a
2336
 * unique index, uniqueness checks will be performed anyway and had better not
2337
 * hit dangling index pointers.
2338
 */
2339
void
2340
vac_open_indexes(Relation relation, LOCKMODE lockmode,
2341
         int *nindexes, Relation **Irel)
2342
0
{
2343
0
  List     *indexoidlist;
2344
0
  ListCell   *indexoidscan;
2345
0
  int     i;
2346
2347
0
  Assert(lockmode != NoLock);
2348
2349
0
  indexoidlist = RelationGetIndexList(relation);
2350
2351
  /* allocate enough memory for all indexes */
2352
0
  i = list_length(indexoidlist);
2353
2354
0
  if (i > 0)
2355
0
    *Irel = (Relation *) palloc(i * sizeof(Relation));
2356
0
  else
2357
0
    *Irel = NULL;
2358
2359
  /* collect just the ready indexes */
2360
0
  i = 0;
2361
0
  foreach(indexoidscan, indexoidlist)
2362
0
  {
2363
0
    Oid     indexoid = lfirst_oid(indexoidscan);
2364
0
    Relation  indrel;
2365
2366
0
    indrel = index_open(indexoid, lockmode);
2367
0
    if (indrel->rd_index->indisready)
2368
0
      (*Irel)[i++] = indrel;
2369
0
    else
2370
0
      index_close(indrel, lockmode);
2371
0
  }
2372
2373
0
  *nindexes = i;
2374
2375
0
  list_free(indexoidlist);
2376
0
}
2377
2378
/*
2379
 * Release the resources acquired by vac_open_indexes.  Optionally release
2380
 * the locks (say NoLock to keep 'em).
2381
 */
2382
void
2383
vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2384
0
{
2385
0
  if (Irel == NULL)
2386
0
    return;
2387
2388
0
  while (nindexes--)
2389
0
  {
2390
0
    Relation  ind = Irel[nindexes];
2391
2392
0
    index_close(ind, lockmode);
2393
0
  }
2394
0
  pfree(Irel);
2395
0
}
2396
2397
/*
2398
 * vacuum_delay_point --- check for interrupts and cost-based delay.
2399
 *
2400
 * This should be called in each major loop of VACUUM processing,
2401
 * typically once per page processed.
2402
 */
2403
void
2404
vacuum_delay_point(bool is_analyze)
2405
0
{
2406
0
  double    msec = 0;
2407
2408
  /* Always check for interrupts */
2409
0
  CHECK_FOR_INTERRUPTS();
2410
2411
0
  if (InterruptPending ||
2412
0
    (!VacuumCostActive && !ConfigReloadPending))
2413
0
    return;
2414
2415
  /*
2416
   * Autovacuum workers should reload the configuration file if requested.
2417
   * This allows changes to [autovacuum_]vacuum_cost_limit and
2418
   * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2419
   * vacuumed or analyzed.
2420
   */
2421
0
  if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
2422
0
  {
2423
0
    ConfigReloadPending = false;
2424
0
    ProcessConfigFile(PGC_SIGHUP);
2425
0
    VacuumUpdateCosts();
2426
0
  }
2427
2428
  /*
2429
   * If we disabled cost-based delays after reloading the config file,
2430
   * return.
2431
   */
2432
0
  if (!VacuumCostActive)
2433
0
    return;
2434
2435
  /*
2436
   * For parallel vacuum, the delay is computed based on the shared cost
2437
   * balance.  See compute_parallel_delay.
2438
   */
2439
0
  if (VacuumSharedCostBalance != NULL)
2440
0
    msec = compute_parallel_delay();
2441
0
  else if (VacuumCostBalance >= vacuum_cost_limit)
2442
0
    msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2443
2444
  /* Nap if appropriate */
2445
0
  if (msec > 0)
2446
0
  {
2447
0
    instr_time  delay_start;
2448
2449
0
    if (msec > vacuum_cost_delay * 4)
2450
0
      msec = vacuum_cost_delay * 4;
2451
2452
0
    if (track_cost_delay_timing)
2453
0
      INSTR_TIME_SET_CURRENT(delay_start);
2454
2455
0
    pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2456
0
    pg_usleep(msec * 1000);
2457
0
    pgstat_report_wait_end();
2458
2459
0
    if (track_cost_delay_timing)
2460
0
    {
2461
0
      instr_time  delay_end;
2462
0
      instr_time  delay;
2463
2464
0
      INSTR_TIME_SET_CURRENT(delay_end);
2465
0
      INSTR_TIME_SET_ZERO(delay);
2466
0
      INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
2467
2468
      /*
2469
       * For parallel workers, we only report the delay time every once
2470
       * in a while to avoid overloading the leader with messages and
2471
       * interrupts.
2472
       */
2473
0
      if (IsParallelWorker())
2474
0
      {
2475
0
        static instr_time last_report_time;
2476
0
        instr_time  time_since_last_report;
2477
2478
0
        Assert(!is_analyze);
2479
2480
        /* Accumulate the delay time */
2481
0
        parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
2482
2483
        /* Calculate interval since last report */
2484
0
        INSTR_TIME_SET_ZERO(time_since_last_report);
2485
0
        INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
2486
2487
        /* If we haven't reported in a while, do so now */
2488
0
        if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
2489
0
          PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
2490
0
        {
2491
0
          pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2492
0
                            parallel_vacuum_worker_delay_ns);
2493
2494
          /* Reset variables */
2495
0
          last_report_time = delay_end;
2496
0
          parallel_vacuum_worker_delay_ns = 0;
2497
0
        }
2498
0
      }
2499
0
      else if (is_analyze)
2500
0
        pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
2501
0
                       INSTR_TIME_GET_NANOSEC(delay));
2502
0
      else
2503
0
        pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2504
0
                       INSTR_TIME_GET_NANOSEC(delay));
2505
0
    }
2506
2507
    /*
2508
     * We don't want to ignore postmaster death during very long vacuums
2509
     * with vacuum_cost_delay configured.  We can't use the usual
2510
     * WaitLatch() approach here because we want microsecond-based sleep
2511
     * durations above.
2512
     */
2513
0
    if (IsUnderPostmaster && !PostmasterIsAlive())
2514
0
      exit(1);
2515
2516
0
    VacuumCostBalance = 0;
2517
2518
    /*
2519
     * Balance and update limit values for autovacuum workers. We must do
2520
     * this periodically, as the number of workers across which we are
2521
     * balancing the limit may have changed.
2522
     *
2523
     * TODO: There may be better criteria for determining when to do this
2524
     * besides "check after napping".
2525
     */
2526
0
    AutoVacuumUpdateCostLimit();
2527
2528
    /* Might have gotten an interrupt while sleeping */
2529
0
    CHECK_FOR_INTERRUPTS();
2530
0
  }
2531
0
}
2532
2533
/*
2534
 * Computes the vacuum delay for parallel workers.
2535
 *
2536
 * The basic idea of a cost-based delay for parallel vacuum is to allow each
2537
 * worker to sleep in proportion to the share of work it's done.  We achieve this
2538
 * by allowing all parallel vacuum workers including the leader process to
2539
 * have a shared view of cost related parameters (mainly VacuumCostBalance).
2540
 * We allow each worker to update it as and when it has incurred any cost and
2541
 * then based on that decide whether it needs to sleep.  We compute the time
2542
 * to sleep for a worker based on the cost it has incurred
2543
 * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2544
 * that amount.  This avoids putting to sleep those workers which have done less
2545
 * I/O than other workers and therefore ensure that workers
2546
 * which are doing more I/O got throttled more.
2547
 *
2548
 * We allow a worker to sleep only if it has performed I/O above a certain
2549
 * threshold, which is calculated based on the number of active workers
2550
 * (VacuumActiveNWorkers), and the overall cost balance is more than
2551
 * VacuumCostLimit set by the system.  Testing reveals that we achieve
2552
 * the required throttling if we force a worker that has done more than 50%
2553
 * of its share of work to sleep.
2554
 */
2555
static double
2556
compute_parallel_delay(void)
2557
0
{
2558
0
  double    msec = 0;
2559
0
  uint32    shared_balance;
2560
0
  int     nworkers;
2561
2562
  /* Parallel vacuum must be active */
2563
0
  Assert(VacuumSharedCostBalance);
2564
2565
0
  nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2566
2567
  /* At least count itself */
2568
0
  Assert(nworkers >= 1);
2569
2570
  /* Update the shared cost balance value atomically */
2571
0
  shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2572
2573
  /* Compute the total local balance for the current worker */
2574
0
  VacuumCostBalanceLocal += VacuumCostBalance;
2575
2576
0
  if ((shared_balance >= vacuum_cost_limit) &&
2577
0
    (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2578
0
  {
2579
    /* Compute sleep time based on the local cost balance */
2580
0
    msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
2581
0
    pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2582
0
    VacuumCostBalanceLocal = 0;
2583
0
  }
2584
2585
  /*
2586
   * Reset the local balance as we accumulated it into the shared value.
2587
   */
2588
0
  VacuumCostBalance = 0;
2589
2590
0
  return msec;
2591
0
}
2592
2593
/*
2594
 * A wrapper function of defGetBoolean().
2595
 *
2596
 * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
2597
 * of true and false.
2598
 */
2599
static VacOptValue
2600
get_vacoptval_from_boolean(DefElem *def)
2601
0
{
2602
0
  return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2603
0
}
2604
2605
/*
2606
 *  vac_bulkdel_one_index() -- bulk-deletion for index relation.
2607
 *
2608
 * Returns bulk delete stats derived from input stats
2609
 */
2610
IndexBulkDeleteResult *
2611
vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
2612
            TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
2613
0
{
2614
  /* Do bulk deletion */
2615
0
  istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
2616
0
                dead_items);
2617
2618
0
  ereport(ivinfo->message_level,
2619
0
      (errmsg("scanned index \"%s\" to remove %" PRId64 " row versions",
2620
0
          RelationGetRelationName(ivinfo->index),
2621
0
          dead_items_info->num_items)));
2622
2623
0
  return istat;
2624
0
}
2625
2626
/*
2627
 *  vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2628
 *
2629
 * Returns bulk delete stats derived from input stats
2630
 */
2631
IndexBulkDeleteResult *
2632
vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
2633
0
{
2634
0
  istat = index_vacuum_cleanup(ivinfo, istat);
2635
2636
0
  if (istat)
2637
0
    ereport(ivinfo->message_level,
2638
0
        (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2639
0
            RelationGetRelationName(ivinfo->index),
2640
0
            istat->num_index_tuples,
2641
0
            istat->num_pages),
2642
0
         errdetail("%.0f index row versions were removed.\n"
2643
0
               "%u index pages were newly deleted.\n"
2644
0
               "%u index pages are currently deleted, of which %u are currently reusable.",
2645
0
               istat->tuples_removed,
2646
0
               istat->pages_newly_deleted,
2647
0
               istat->pages_deleted, istat->pages_free)));
2648
2649
0
  return istat;
2650
0
}
2651
2652
/*
2653
 *  vac_tid_reaped() -- is a particular tid deletable?
2654
 *
2655
 *    This has the right signature to be an IndexBulkDeleteCallback.
2656
 */
2657
static bool
2658
vac_tid_reaped(ItemPointer itemptr, void *state)
2659
0
{
2660
0
  TidStore   *dead_items = (TidStore *) state;
2661
2662
0
  return TidStoreIsMember(dead_items, itemptr);
2663
0
}