Coverage Report

Created: 2025-10-09 06:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/backend/utils/activity/pgstat_relation.c
Line
Count
Source
1
/* -------------------------------------------------------------------------
2
 *
3
 * pgstat_relation.c
4
 *    Implementation of relation statistics.
5
 *
6
 * This file contains the implementation of relation statistics. It is kept
7
 * separate from pgstat.c to enforce the line between the statistics access /
8
 * storage implementation and the details about individual types of
9
 * statistics.
10
 *
11
 * Copyright (c) 2001-2025, PostgreSQL Global Development Group
12
 *
13
 * IDENTIFICATION
14
 *    src/backend/utils/activity/pgstat_relation.c
15
 * -------------------------------------------------------------------------
16
 */
17
18
#include "postgres.h"
19
20
#include "access/twophase_rmgr.h"
21
#include "access/xact.h"
22
#include "catalog/catalog.h"
23
#include "utils/memutils.h"
24
#include "utils/pgstat_internal.h"
25
#include "utils/rel.h"
26
#include "utils/timestamp.h"
27
28
29
/* Record that's written to 2PC state file when pgstat state is persisted */
30
typedef struct TwoPhasePgStatRecord
31
{
32
  PgStat_Counter tuples_inserted; /* tuples inserted in xact */
33
  PgStat_Counter tuples_updated;  /* tuples updated in xact */
34
  PgStat_Counter tuples_deleted;  /* tuples deleted in xact */
35
  /* tuples i/u/d prior to truncate/drop */
36
  PgStat_Counter inserted_pre_truncdrop;
37
  PgStat_Counter updated_pre_truncdrop;
38
  PgStat_Counter deleted_pre_truncdrop;
39
  Oid     id;       /* table's OID */
40
  bool    shared;     /* is it a shared catalog? */
41
  bool    truncdropped; /* was the relation truncated/dropped? */
42
} TwoPhasePgStatRecord;
43
44
45
static PgStat_TableStatus *pgstat_prep_relation_pending(Oid rel_id, bool isshared);
46
static void add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level);
47
static void ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info);
48
static void save_truncdrop_counters(PgStat_TableXactStatus *trans, bool is_drop);
49
static void restore_truncdrop_counters(PgStat_TableXactStatus *trans);
50
51
52
/*
53
 * Copy stats between relations. This is used for things like REINDEX
54
 * CONCURRENTLY.
55
 */
56
void
57
pgstat_copy_relation_stats(Relation dst, Relation src)
58
0
{
59
0
  PgStat_StatTabEntry *srcstats;
60
0
  PgStatShared_Relation *dstshstats;
61
0
  PgStat_EntryRef *dst_ref;
62
63
0
  srcstats = pgstat_fetch_stat_tabentry_ext(src->rd_rel->relisshared,
64
0
                        RelationGetRelid(src));
65
0
  if (!srcstats)
66
0
    return;
67
68
0
  dst_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION,
69
0
                      dst->rd_rel->relisshared ? InvalidOid : MyDatabaseId,
70
0
                      RelationGetRelid(dst),
71
0
                      false);
72
73
0
  dstshstats = (PgStatShared_Relation *) dst_ref->shared_stats;
74
0
  dstshstats->stats = *srcstats;
75
76
0
  pgstat_unlock_entry(dst_ref);
77
0
}
78
79
/*
80
 * Initialize a relcache entry to count access statistics.  Called whenever a
81
 * relation is opened.
82
 *
83
 * We assume that a relcache entry's pgstat_info field is zeroed by relcache.c
84
 * when the relcache entry is made; thereafter it is long-lived data.
85
 *
86
 * This does not create a reference to a stats entry in shared memory, nor
87
 * allocate memory for the pending stats. That happens in
88
 * pgstat_assoc_relation().
89
 */
90
void
91
pgstat_init_relation(Relation rel)
92
0
{
93
0
  char    relkind = rel->rd_rel->relkind;
94
95
  /*
96
   * We only count stats for relations with storage and partitioned tables
97
   */
98
0
  if (!RELKIND_HAS_STORAGE(relkind) && relkind != RELKIND_PARTITIONED_TABLE)
99
0
  {
100
0
    rel->pgstat_enabled = false;
101
0
    rel->pgstat_info = NULL;
102
0
    return;
103
0
  }
104
105
0
  if (!pgstat_track_counts)
106
0
  {
107
0
    if (rel->pgstat_info)
108
0
      pgstat_unlink_relation(rel);
109
110
    /* We're not counting at all */
111
0
    rel->pgstat_enabled = false;
112
0
    rel->pgstat_info = NULL;
113
0
    return;
114
0
  }
115
116
0
  rel->pgstat_enabled = true;
117
0
}
118
119
/*
120
 * Prepare for statistics for this relation to be collected.
121
 *
122
 * This ensures we have a reference to the stats entry before stats can be
123
 * generated. That is important because a relation drop in another connection
124
 * could otherwise lead to the stats entry being dropped, which then later
125
 * would get recreated when flushing stats.
126
 *
127
 * This is separate from pgstat_init_relation() as it is not uncommon for
128
 * relcache entries to be opened without ever getting stats reported.
129
 */
130
void
131
pgstat_assoc_relation(Relation rel)
132
0
{
133
0
  Assert(rel->pgstat_enabled);
134
0
  Assert(rel->pgstat_info == NULL);
135
136
  /* Else find or make the PgStat_TableStatus entry, and update link */
137
0
  rel->pgstat_info = pgstat_prep_relation_pending(RelationGetRelid(rel),
138
0
                          rel->rd_rel->relisshared);
139
140
  /* don't allow link a stats to multiple relcache entries */
141
0
  Assert(rel->pgstat_info->relation == NULL);
142
143
  /* mark this relation as the owner */
144
0
  rel->pgstat_info->relation = rel;
145
0
}
146
147
/*
148
 * Break the mutual link between a relcache entry and pending stats entry.
149
 * This must be called whenever one end of the link is removed.
150
 */
151
void
152
pgstat_unlink_relation(Relation rel)
153
0
{
154
  /* remove the link to stats info if any */
155
0
  if (rel->pgstat_info == NULL)
156
0
    return;
157
158
  /* link sanity check */
159
0
  Assert(rel->pgstat_info->relation == rel);
160
0
  rel->pgstat_info->relation = NULL;
161
0
  rel->pgstat_info = NULL;
162
0
}
163
164
/*
165
 * Ensure that stats are dropped if transaction aborts.
166
 */
167
void
168
pgstat_create_relation(Relation rel)
169
0
{
170
0
  pgstat_create_transactional(PGSTAT_KIND_RELATION,
171
0
                rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId,
172
0
                RelationGetRelid(rel));
173
0
}
174
175
/*
176
 * Ensure that stats are dropped if transaction commits.
177
 */
178
void
179
pgstat_drop_relation(Relation rel)
180
0
{
181
0
  int     nest_level = GetCurrentTransactionNestLevel();
182
0
  PgStat_TableStatus *pgstat_info;
183
184
0
  pgstat_drop_transactional(PGSTAT_KIND_RELATION,
185
0
                rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId,
186
0
                RelationGetRelid(rel));
187
188
0
  if (!pgstat_should_count_relation(rel))
189
0
    return;
190
191
  /*
192
   * Transactionally set counters to 0. That ensures that accesses to
193
   * pg_stat_xact_all_tables inside the transaction show 0.
194
   */
195
0
  pgstat_info = rel->pgstat_info;
196
0
  if (pgstat_info->trans &&
197
0
    pgstat_info->trans->nest_level == nest_level)
198
0
  {
199
0
    save_truncdrop_counters(pgstat_info->trans, true);
200
0
    pgstat_info->trans->tuples_inserted = 0;
201
0
    pgstat_info->trans->tuples_updated = 0;
202
0
    pgstat_info->trans->tuples_deleted = 0;
203
0
  }
204
0
}
205
206
/*
207
 * Report that the table was just vacuumed and flush IO statistics.
208
 */
209
void
210
pgstat_report_vacuum(Oid tableoid, bool shared,
211
           PgStat_Counter livetuples, PgStat_Counter deadtuples,
212
           TimestampTz starttime)
213
0
{
214
0
  PgStat_EntryRef *entry_ref;
215
0
  PgStatShared_Relation *shtabentry;
216
0
  PgStat_StatTabEntry *tabentry;
217
0
  Oid     dboid = (shared ? InvalidOid : MyDatabaseId);
218
0
  TimestampTz ts;
219
0
  PgStat_Counter elapsedtime;
220
221
0
  if (!pgstat_track_counts)
222
0
    return;
223
224
  /* Store the data in the table's hash table entry. */
225
0
  ts = GetCurrentTimestamp();
226
0
  elapsedtime = TimestampDifferenceMilliseconds(starttime, ts);
227
228
  /* block acquiring lock for the same reason as pgstat_report_autovac() */
229
0
  entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION,
230
0
                      dboid, tableoid, false);
231
232
0
  shtabentry = (PgStatShared_Relation *) entry_ref->shared_stats;
233
0
  tabentry = &shtabentry->stats;
234
235
0
  tabentry->live_tuples = livetuples;
236
0
  tabentry->dead_tuples = deadtuples;
237
238
  /*
239
   * It is quite possible that a non-aggressive VACUUM ended up skipping
240
   * various pages, however, we'll zero the insert counter here regardless.
241
   * It's currently used only to track when we need to perform an "insert"
242
   * autovacuum, which are mainly intended to freeze newly inserted tuples.
243
   * Zeroing this may just mean we'll not try to vacuum the table again
244
   * until enough tuples have been inserted to trigger another insert
245
   * autovacuum.  An anti-wraparound autovacuum will catch any persistent
246
   * stragglers.
247
   */
248
0
  tabentry->ins_since_vacuum = 0;
249
250
0
  if (AmAutoVacuumWorkerProcess())
251
0
  {
252
0
    tabentry->last_autovacuum_time = ts;
253
0
    tabentry->autovacuum_count++;
254
0
    tabentry->total_autovacuum_time += elapsedtime;
255
0
  }
256
0
  else
257
0
  {
258
0
    tabentry->last_vacuum_time = ts;
259
0
    tabentry->vacuum_count++;
260
0
    tabentry->total_vacuum_time += elapsedtime;
261
0
  }
262
263
0
  pgstat_unlock_entry(entry_ref);
264
265
  /*
266
   * Flush IO statistics now. pgstat_report_stat() will flush IO stats,
267
   * however this will not be called until after an entire autovacuum cycle
268
   * is done -- which will likely vacuum many relations -- or until the
269
   * VACUUM command has processed all tables and committed.
270
   */
271
0
  pgstat_flush_io(false);
272
0
  (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);
273
0
}
274
275
/*
276
 * Report that the table was just analyzed and flush IO statistics.
277
 *
278
 * Caller must provide new live- and dead-tuples estimates, as well as a
279
 * flag indicating whether to reset the mod_since_analyze counter.
280
 */
281
void
282
pgstat_report_analyze(Relation rel,
283
            PgStat_Counter livetuples, PgStat_Counter deadtuples,
284
            bool resetcounter, TimestampTz starttime)
285
0
{
286
0
  PgStat_EntryRef *entry_ref;
287
0
  PgStatShared_Relation *shtabentry;
288
0
  PgStat_StatTabEntry *tabentry;
289
0
  Oid     dboid = (rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId);
290
0
  TimestampTz ts;
291
0
  PgStat_Counter elapsedtime;
292
293
0
  if (!pgstat_track_counts)
294
0
    return;
295
296
  /*
297
   * Unlike VACUUM, ANALYZE might be running inside a transaction that has
298
   * already inserted and/or deleted rows in the target table. ANALYZE will
299
   * have counted such rows as live or dead respectively. Because we will
300
   * report our counts of such rows at transaction end, we should subtract
301
   * off these counts from the update we're making now, else they'll be
302
   * double-counted after commit.  (This approach also ensures that the
303
   * shared stats entry ends up with the right numbers if we abort instead
304
   * of committing.)
305
   *
306
   * Waste no time on partitioned tables, though.
307
   */
308
0
  if (pgstat_should_count_relation(rel) &&
309
0
    rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
310
0
  {
311
0
    PgStat_TableXactStatus *trans;
312
313
0
    for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
314
0
    {
315
0
      livetuples -= trans->tuples_inserted - trans->tuples_deleted;
316
0
      deadtuples -= trans->tuples_updated + trans->tuples_deleted;
317
0
    }
318
    /* count stuff inserted by already-aborted subxacts, too */
319
0
    deadtuples -= rel->pgstat_info->counts.delta_dead_tuples;
320
    /* Since ANALYZE's counts are estimates, we could have underflowed */
321
0
    livetuples = Max(livetuples, 0);
322
0
    deadtuples = Max(deadtuples, 0);
323
0
  }
324
325
  /* Store the data in the table's hash table entry. */
326
0
  ts = GetCurrentTimestamp();
327
0
  elapsedtime = TimestampDifferenceMilliseconds(starttime, ts);
328
329
  /* block acquiring lock for the same reason as pgstat_report_autovac() */
330
0
  entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, dboid,
331
0
                      RelationGetRelid(rel),
332
0
                      false);
333
  /* can't get dropped while accessed */
334
0
  Assert(entry_ref != NULL && entry_ref->shared_stats != NULL);
335
336
0
  shtabentry = (PgStatShared_Relation *) entry_ref->shared_stats;
337
0
  tabentry = &shtabentry->stats;
338
339
0
  tabentry->live_tuples = livetuples;
340
0
  tabentry->dead_tuples = deadtuples;
341
342
  /*
343
   * If commanded, reset mod_since_analyze to zero.  This forgets any
344
   * changes that were committed while the ANALYZE was in progress, but we
345
   * have no good way to estimate how many of those there were.
346
   */
347
0
  if (resetcounter)
348
0
    tabentry->mod_since_analyze = 0;
349
350
0
  if (AmAutoVacuumWorkerProcess())
351
0
  {
352
0
    tabentry->last_autoanalyze_time = ts;
353
0
    tabentry->autoanalyze_count++;
354
0
    tabentry->total_autoanalyze_time += elapsedtime;
355
0
  }
356
0
  else
357
0
  {
358
0
    tabentry->last_analyze_time = ts;
359
0
    tabentry->analyze_count++;
360
0
    tabentry->total_analyze_time += elapsedtime;
361
0
  }
362
363
0
  pgstat_unlock_entry(entry_ref);
364
365
  /* see pgstat_report_vacuum() */
366
0
  pgstat_flush_io(false);
367
0
  (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);
368
0
}
369
370
/*
371
 * count a tuple insertion of n tuples
372
 */
373
void
374
pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
375
0
{
376
0
  if (pgstat_should_count_relation(rel))
377
0
  {
378
0
    PgStat_TableStatus *pgstat_info = rel->pgstat_info;
379
380
0
    ensure_tabstat_xact_level(pgstat_info);
381
0
    pgstat_info->trans->tuples_inserted += n;
382
0
  }
383
0
}
384
385
/*
386
 * count a tuple update
387
 */
388
void
389
pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
390
0
{
391
0
  Assert(!(hot && newpage));
392
393
0
  if (pgstat_should_count_relation(rel))
394
0
  {
395
0
    PgStat_TableStatus *pgstat_info = rel->pgstat_info;
396
397
0
    ensure_tabstat_xact_level(pgstat_info);
398
0
    pgstat_info->trans->tuples_updated++;
399
400
    /*
401
     * tuples_hot_updated and tuples_newpage_updated counters are
402
     * nontransactional, so just advance them
403
     */
404
0
    if (hot)
405
0
      pgstat_info->counts.tuples_hot_updated++;
406
0
    else if (newpage)
407
0
      pgstat_info->counts.tuples_newpage_updated++;
408
0
  }
409
0
}
410
411
/*
412
 * count a tuple deletion
413
 */
414
void
415
pgstat_count_heap_delete(Relation rel)
416
0
{
417
0
  if (pgstat_should_count_relation(rel))
418
0
  {
419
0
    PgStat_TableStatus *pgstat_info = rel->pgstat_info;
420
421
0
    ensure_tabstat_xact_level(pgstat_info);
422
0
    pgstat_info->trans->tuples_deleted++;
423
0
  }
424
0
}
425
426
/*
427
 * update tuple counters due to truncate
428
 */
429
void
430
pgstat_count_truncate(Relation rel)
431
0
{
432
0
  if (pgstat_should_count_relation(rel))
433
0
  {
434
0
    PgStat_TableStatus *pgstat_info = rel->pgstat_info;
435
436
0
    ensure_tabstat_xact_level(pgstat_info);
437
0
    save_truncdrop_counters(pgstat_info->trans, false);
438
0
    pgstat_info->trans->tuples_inserted = 0;
439
0
    pgstat_info->trans->tuples_updated = 0;
440
0
    pgstat_info->trans->tuples_deleted = 0;
441
0
  }
442
0
}
443
444
/*
445
 * update dead-tuples count
446
 *
447
 * The semantics of this are that we are reporting the nontransactional
448
 * recovery of "delta" dead tuples; so delta_dead_tuples decreases
449
 * rather than increasing, and the change goes straight into the per-table
450
 * counter, not into transactional state.
451
 */
452
void
453
pgstat_update_heap_dead_tuples(Relation rel, int delta)
454
0
{
455
0
  if (pgstat_should_count_relation(rel))
456
0
  {
457
0
    PgStat_TableStatus *pgstat_info = rel->pgstat_info;
458
459
0
    pgstat_info->counts.delta_dead_tuples -= delta;
460
0
  }
461
0
}
462
463
/*
464
 * Support function for the SQL-callable pgstat* functions. Returns
465
 * the collected statistics for one table or NULL. NULL doesn't mean
466
 * that the table doesn't exist, just that there are no statistics, so the
467
 * caller is better off to report ZERO instead.
468
 */
469
PgStat_StatTabEntry *
470
pgstat_fetch_stat_tabentry(Oid relid)
471
0
{
472
0
  return pgstat_fetch_stat_tabentry_ext(IsSharedRelation(relid), relid);
473
0
}
474
475
/*
476
 * More efficient version of pgstat_fetch_stat_tabentry(), allowing to specify
477
 * whether the to-be-accessed table is a shared relation or not.
478
 */
479
PgStat_StatTabEntry *
480
pgstat_fetch_stat_tabentry_ext(bool shared, Oid reloid)
481
0
{
482
0
  Oid     dboid = (shared ? InvalidOid : MyDatabaseId);
483
484
0
  return (PgStat_StatTabEntry *)
485
0
    pgstat_fetch_entry(PGSTAT_KIND_RELATION, dboid, reloid);
486
0
}
487
488
/*
489
 * find any existing PgStat_TableStatus entry for rel
490
 *
491
 * Find any existing PgStat_TableStatus entry for rel_id in the current
492
 * database. If not found, try finding from shared tables.
493
 *
494
 * If an entry is found, copy it and increment the copy's counters with their
495
 * subtransaction counterparts, then return the copy.  The caller may need to
496
 * pfree() the copy.
497
 *
498
 * If no entry found, return NULL, don't create a new one.
499
 */
500
PgStat_TableStatus *
501
find_tabstat_entry(Oid rel_id)
502
0
{
503
0
  PgStat_EntryRef *entry_ref;
504
0
  PgStat_TableXactStatus *trans;
505
0
  PgStat_TableStatus *tabentry = NULL;
506
0
  PgStat_TableStatus *tablestatus = NULL;
507
508
0
  entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_RELATION, MyDatabaseId, rel_id);
509
0
  if (!entry_ref)
510
0
  {
511
0
    entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_RELATION, InvalidOid, rel_id);
512
0
    if (!entry_ref)
513
0
      return tablestatus;
514
0
  }
515
516
0
  tabentry = (PgStat_TableStatus *) entry_ref->pending;
517
0
  tablestatus = palloc(sizeof(PgStat_TableStatus));
518
0
  *tablestatus = *tabentry;
519
520
  /*
521
   * Reset tablestatus->trans in the copy of PgStat_TableStatus as it may
522
   * point to a shared memory area.  Its data is saved below, so removing it
523
   * does not matter.
524
   */
525
0
  tablestatus->trans = NULL;
526
527
  /*
528
   * Live subtransaction counts are not included yet.  This is not a hot
529
   * code path so reconcile tuples_inserted, tuples_updated and
530
   * tuples_deleted even if the caller may not be interested in this data.
531
   */
532
0
  for (trans = tabentry->trans; trans != NULL; trans = trans->upper)
533
0
  {
534
0
    tablestatus->counts.tuples_inserted += trans->tuples_inserted;
535
0
    tablestatus->counts.tuples_updated += trans->tuples_updated;
536
0
    tablestatus->counts.tuples_deleted += trans->tuples_deleted;
537
0
  }
538
539
0
  return tablestatus;
540
0
}
541
542
/*
543
 * Perform relation stats specific end-of-transaction work. Helper for
544
 * AtEOXact_PgStat.
545
 *
546
 * Transfer transactional insert/update counts into the base tabstat entries.
547
 * We don't bother to free any of the transactional state, since it's all in
548
 * TopTransactionContext and will go away anyway.
549
 */
550
void
551
AtEOXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit)
552
0
{
553
0
  PgStat_TableXactStatus *trans;
554
555
0
  for (trans = xact_state->first; trans != NULL; trans = trans->next)
556
0
  {
557
0
    PgStat_TableStatus *tabstat;
558
559
0
    Assert(trans->nest_level == 1);
560
0
    Assert(trans->upper == NULL);
561
0
    tabstat = trans->parent;
562
0
    Assert(tabstat->trans == trans);
563
    /* restore pre-truncate/drop stats (if any) in case of aborted xact */
564
0
    if (!isCommit)
565
0
      restore_truncdrop_counters(trans);
566
    /* count attempted actions regardless of commit/abort */
567
0
    tabstat->counts.tuples_inserted += trans->tuples_inserted;
568
0
    tabstat->counts.tuples_updated += trans->tuples_updated;
569
0
    tabstat->counts.tuples_deleted += trans->tuples_deleted;
570
0
    if (isCommit)
571
0
    {
572
0
      tabstat->counts.truncdropped = trans->truncdropped;
573
0
      if (trans->truncdropped)
574
0
      {
575
        /* forget live/dead stats seen by backend thus far */
576
0
        tabstat->counts.delta_live_tuples = 0;
577
0
        tabstat->counts.delta_dead_tuples = 0;
578
0
      }
579
      /* insert adds a live tuple, delete removes one */
580
0
      tabstat->counts.delta_live_tuples +=
581
0
        trans->tuples_inserted - trans->tuples_deleted;
582
      /* update and delete each create a dead tuple */
583
0
      tabstat->counts.delta_dead_tuples +=
584
0
        trans->tuples_updated + trans->tuples_deleted;
585
      /* insert, update, delete each count as one change event */
586
0
      tabstat->counts.changed_tuples +=
587
0
        trans->tuples_inserted + trans->tuples_updated +
588
0
        trans->tuples_deleted;
589
0
    }
590
0
    else
591
0
    {
592
      /* inserted tuples are dead, deleted tuples are unaffected */
593
0
      tabstat->counts.delta_dead_tuples +=
594
0
        trans->tuples_inserted + trans->tuples_updated;
595
      /* an aborted xact generates no changed_tuple events */
596
0
    }
597
0
    tabstat->trans = NULL;
598
0
  }
599
0
}
600
601
/*
602
 * Perform relation stats specific end-of-sub-transaction work. Helper for
603
 * AtEOSubXact_PgStat.
604
 *
605
 * Transfer transactional insert/update counts into the next higher
606
 * subtransaction state.
607
 */
608
void
609
AtEOSubXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit, int nestDepth)
610
0
{
611
0
  PgStat_TableXactStatus *trans;
612
0
  PgStat_TableXactStatus *next_trans;
613
614
0
  for (trans = xact_state->first; trans != NULL; trans = next_trans)
615
0
  {
616
0
    PgStat_TableStatus *tabstat;
617
618
0
    next_trans = trans->next;
619
0
    Assert(trans->nest_level == nestDepth);
620
0
    tabstat = trans->parent;
621
0
    Assert(tabstat->trans == trans);
622
623
0
    if (isCommit)
624
0
    {
625
0
      if (trans->upper && trans->upper->nest_level == nestDepth - 1)
626
0
      {
627
0
        if (trans->truncdropped)
628
0
        {
629
          /* propagate the truncate/drop status one level up */
630
0
          save_truncdrop_counters(trans->upper, false);
631
          /* replace upper xact stats with ours */
632
0
          trans->upper->tuples_inserted = trans->tuples_inserted;
633
0
          trans->upper->tuples_updated = trans->tuples_updated;
634
0
          trans->upper->tuples_deleted = trans->tuples_deleted;
635
0
        }
636
0
        else
637
0
        {
638
0
          trans->upper->tuples_inserted += trans->tuples_inserted;
639
0
          trans->upper->tuples_updated += trans->tuples_updated;
640
0
          trans->upper->tuples_deleted += trans->tuples_deleted;
641
0
        }
642
0
        tabstat->trans = trans->upper;
643
0
        pfree(trans);
644
0
      }
645
0
      else
646
0
      {
647
        /*
648
         * When there isn't an immediate parent state, we can just
649
         * reuse the record instead of going through a palloc/pfree
650
         * pushup (this works since it's all in TopTransactionContext
651
         * anyway).  We have to re-link it into the parent level,
652
         * though, and that might mean pushing a new entry into the
653
         * pgStatXactStack.
654
         */
655
0
        PgStat_SubXactStatus *upper_xact_state;
656
657
0
        upper_xact_state = pgstat_get_xact_stack_level(nestDepth - 1);
658
0
        trans->next = upper_xact_state->first;
659
0
        upper_xact_state->first = trans;
660
0
        trans->nest_level = nestDepth - 1;
661
0
      }
662
0
    }
663
0
    else
664
0
    {
665
      /*
666
       * On abort, update top-level tabstat counts, then forget the
667
       * subtransaction
668
       */
669
670
      /* first restore values obliterated by truncate/drop */
671
0
      restore_truncdrop_counters(trans);
672
      /* count attempted actions regardless of commit/abort */
673
0
      tabstat->counts.tuples_inserted += trans->tuples_inserted;
674
0
      tabstat->counts.tuples_updated += trans->tuples_updated;
675
0
      tabstat->counts.tuples_deleted += trans->tuples_deleted;
676
      /* inserted tuples are dead, deleted tuples are unaffected */
677
0
      tabstat->counts.delta_dead_tuples +=
678
0
        trans->tuples_inserted + trans->tuples_updated;
679
0
      tabstat->trans = trans->upper;
680
0
      pfree(trans);
681
0
    }
682
0
  }
683
0
}
684
685
/*
686
 * Generate 2PC records for all the pending transaction-dependent relation
687
 * stats.
688
 */
689
void
690
AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state)
691
0
{
692
0
  PgStat_TableXactStatus *trans;
693
694
0
  for (trans = xact_state->first; trans != NULL; trans = trans->next)
695
0
  {
696
0
    PgStat_TableStatus *tabstat PG_USED_FOR_ASSERTS_ONLY;
697
0
    TwoPhasePgStatRecord record;
698
699
0
    Assert(trans->nest_level == 1);
700
0
    Assert(trans->upper == NULL);
701
0
    tabstat = trans->parent;
702
0
    Assert(tabstat->trans == trans);
703
704
0
    record.tuples_inserted = trans->tuples_inserted;
705
0
    record.tuples_updated = trans->tuples_updated;
706
0
    record.tuples_deleted = trans->tuples_deleted;
707
0
    record.inserted_pre_truncdrop = trans->inserted_pre_truncdrop;
708
0
    record.updated_pre_truncdrop = trans->updated_pre_truncdrop;
709
0
    record.deleted_pre_truncdrop = trans->deleted_pre_truncdrop;
710
0
    record.id = tabstat->id;
711
0
    record.shared = tabstat->shared;
712
0
    record.truncdropped = trans->truncdropped;
713
714
0
    RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0,
715
0
                 &record, sizeof(TwoPhasePgStatRecord));
716
0
  }
717
0
}
718
719
/*
720
 * All we need do here is unlink the transaction stats state from the
721
 * nontransactional state.  The nontransactional action counts will be
722
 * reported to the stats system immediately, while the effects on live and
723
 * dead tuple counts are preserved in the 2PC state file.
724
 *
725
 * Note: AtEOXact_PgStat_Relations is not called during PREPARE.
726
 */
727
void
728
PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state)
729
0
{
730
0
  PgStat_TableXactStatus *trans;
731
732
0
  for (trans = xact_state->first; trans != NULL; trans = trans->next)
733
0
  {
734
0
    PgStat_TableStatus *tabstat;
735
736
0
    tabstat = trans->parent;
737
0
    tabstat->trans = NULL;
738
0
  }
739
0
}
740
741
/*
742
 * 2PC processing routine for COMMIT PREPARED case.
743
 *
744
 * Load the saved counts into our local pgstats state.
745
 */
746
void
747
pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info,
748
               void *recdata, uint32 len)
749
0
{
750
0
  TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
751
0
  PgStat_TableStatus *pgstat_info;
752
753
  /* Find or create a tabstat entry for the rel */
754
0
  pgstat_info = pgstat_prep_relation_pending(rec->id, rec->shared);
755
756
  /* Same math as in AtEOXact_PgStat, commit case */
757
0
  pgstat_info->counts.tuples_inserted += rec->tuples_inserted;
758
0
  pgstat_info->counts.tuples_updated += rec->tuples_updated;
759
0
  pgstat_info->counts.tuples_deleted += rec->tuples_deleted;
760
0
  pgstat_info->counts.truncdropped = rec->truncdropped;
761
0
  if (rec->truncdropped)
762
0
  {
763
    /* forget live/dead stats seen by backend thus far */
764
0
    pgstat_info->counts.delta_live_tuples = 0;
765
0
    pgstat_info->counts.delta_dead_tuples = 0;
766
0
  }
767
0
  pgstat_info->counts.delta_live_tuples +=
768
0
    rec->tuples_inserted - rec->tuples_deleted;
769
0
  pgstat_info->counts.delta_dead_tuples +=
770
0
    rec->tuples_updated + rec->tuples_deleted;
771
0
  pgstat_info->counts.changed_tuples +=
772
0
    rec->tuples_inserted + rec->tuples_updated +
773
0
    rec->tuples_deleted;
774
0
}
775
776
/*
777
 * 2PC processing routine for ROLLBACK PREPARED case.
778
 *
779
 * Load the saved counts into our local pgstats state, but treat them
780
 * as aborted.
781
 */
782
void
783
pgstat_twophase_postabort(FullTransactionId fxid, uint16 info,
784
              void *recdata, uint32 len)
785
0
{
786
0
  TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
787
0
  PgStat_TableStatus *pgstat_info;
788
789
  /* Find or create a tabstat entry for the rel */
790
0
  pgstat_info = pgstat_prep_relation_pending(rec->id, rec->shared);
791
792
  /* Same math as in AtEOXact_PgStat, abort case */
793
0
  if (rec->truncdropped)
794
0
  {
795
0
    rec->tuples_inserted = rec->inserted_pre_truncdrop;
796
0
    rec->tuples_updated = rec->updated_pre_truncdrop;
797
0
    rec->tuples_deleted = rec->deleted_pre_truncdrop;
798
0
  }
799
0
  pgstat_info->counts.tuples_inserted += rec->tuples_inserted;
800
0
  pgstat_info->counts.tuples_updated += rec->tuples_updated;
801
0
  pgstat_info->counts.tuples_deleted += rec->tuples_deleted;
802
0
  pgstat_info->counts.delta_dead_tuples +=
803
0
    rec->tuples_inserted + rec->tuples_updated;
804
0
}
805
806
/*
807
 * Flush out pending stats for the entry
808
 *
809
 * If nowait is true and the lock could not be immediately acquired, returns
810
 * false without flushing the entry.  Otherwise returns true.
811
 *
812
 * Some of the stats are copied to the corresponding pending database stats
813
 * entry when successfully flushing.
814
 */
815
bool
816
pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
817
0
{
818
0
  Oid     dboid;
819
0
  PgStat_TableStatus *lstats; /* pending stats entry  */
820
0
  PgStatShared_Relation *shtabstats;
821
0
  PgStat_StatTabEntry *tabentry;  /* table entry of shared stats */
822
0
  PgStat_StatDBEntry *dbentry;  /* pending database entry */
823
824
0
  dboid = entry_ref->shared_entry->key.dboid;
825
0
  lstats = (PgStat_TableStatus *) entry_ref->pending;
826
0
  shtabstats = (PgStatShared_Relation *) entry_ref->shared_stats;
827
828
  /*
829
   * Ignore entries that didn't accumulate any actual counts, such as
830
   * indexes that were opened by the planner but not used.
831
   */
832
0
  if (pg_memory_is_all_zeros(&lstats->counts,
833
0
                 sizeof(struct PgStat_TableCounts)))
834
0
    return true;
835
836
0
  if (!pgstat_lock_entry(entry_ref, nowait))
837
0
    return false;
838
839
  /* add the values to the shared entry. */
840
0
  tabentry = &shtabstats->stats;
841
842
0
  tabentry->numscans += lstats->counts.numscans;
843
0
  if (lstats->counts.numscans)
844
0
  {
845
0
    TimestampTz t = GetCurrentTransactionStopTimestamp();
846
847
0
    if (t > tabentry->lastscan)
848
0
      tabentry->lastscan = t;
849
0
  }
850
0
  tabentry->tuples_returned += lstats->counts.tuples_returned;
851
0
  tabentry->tuples_fetched += lstats->counts.tuples_fetched;
852
0
  tabentry->tuples_inserted += lstats->counts.tuples_inserted;
853
0
  tabentry->tuples_updated += lstats->counts.tuples_updated;
854
0
  tabentry->tuples_deleted += lstats->counts.tuples_deleted;
855
0
  tabentry->tuples_hot_updated += lstats->counts.tuples_hot_updated;
856
0
  tabentry->tuples_newpage_updated += lstats->counts.tuples_newpage_updated;
857
858
  /*
859
   * If table was truncated/dropped, first reset the live/dead counters.
860
   */
861
0
  if (lstats->counts.truncdropped)
862
0
  {
863
0
    tabentry->live_tuples = 0;
864
0
    tabentry->dead_tuples = 0;
865
0
    tabentry->ins_since_vacuum = 0;
866
0
  }
867
868
0
  tabentry->live_tuples += lstats->counts.delta_live_tuples;
869
0
  tabentry->dead_tuples += lstats->counts.delta_dead_tuples;
870
0
  tabentry->mod_since_analyze += lstats->counts.changed_tuples;
871
872
  /*
873
   * Using tuples_inserted to update ins_since_vacuum does mean that we'll
874
   * track aborted inserts too.  This isn't ideal, but otherwise probably
875
   * not worth adding an extra field for.  It may just amount to autovacuums
876
   * triggering for inserts more often than they maybe should, which is
877
   * probably not going to be common enough to be too concerned about here.
878
   */
879
0
  tabentry->ins_since_vacuum += lstats->counts.tuples_inserted;
880
881
0
  tabentry->blocks_fetched += lstats->counts.blocks_fetched;
882
0
  tabentry->blocks_hit += lstats->counts.blocks_hit;
883
884
  /* Clamp live_tuples in case of negative delta_live_tuples */
885
0
  tabentry->live_tuples = Max(tabentry->live_tuples, 0);
886
  /* Likewise for dead_tuples */
887
0
  tabentry->dead_tuples = Max(tabentry->dead_tuples, 0);
888
889
0
  pgstat_unlock_entry(entry_ref);
890
891
  /* The entry was successfully flushed, add the same to database stats */
892
0
  dbentry = pgstat_prep_database_pending(dboid);
893
0
  dbentry->tuples_returned += lstats->counts.tuples_returned;
894
0
  dbentry->tuples_fetched += lstats->counts.tuples_fetched;
895
0
  dbentry->tuples_inserted += lstats->counts.tuples_inserted;
896
0
  dbentry->tuples_updated += lstats->counts.tuples_updated;
897
0
  dbentry->tuples_deleted += lstats->counts.tuples_deleted;
898
0
  dbentry->blocks_fetched += lstats->counts.blocks_fetched;
899
0
  dbentry->blocks_hit += lstats->counts.blocks_hit;
900
901
0
  return true;
902
0
}
903
904
void
905
pgstat_relation_delete_pending_cb(PgStat_EntryRef *entry_ref)
906
0
{
907
0
  PgStat_TableStatus *pending = (PgStat_TableStatus *) entry_ref->pending;
908
909
0
  if (pending->relation)
910
0
    pgstat_unlink_relation(pending->relation);
911
0
}
912
913
void
914
pgstat_relation_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts)
915
0
{
916
0
  ((PgStatShared_Relation *) header)->stats.stat_reset_time = ts;
917
0
}
918
919
/*
920
 * Find or create a PgStat_TableStatus entry for rel. New entry is created and
921
 * initialized if not exists.
922
 */
923
static PgStat_TableStatus *
924
pgstat_prep_relation_pending(Oid rel_id, bool isshared)
925
0
{
926
0
  PgStat_EntryRef *entry_ref;
927
0
  PgStat_TableStatus *pending;
928
929
0
  entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_RELATION,
930
0
                      isshared ? InvalidOid : MyDatabaseId,
931
0
                      rel_id, NULL);
932
0
  pending = entry_ref->pending;
933
0
  pending->id = rel_id;
934
0
  pending->shared = isshared;
935
936
0
  return pending;
937
0
}
938
939
/*
940
 * add a new (sub)transaction state record
941
 */
942
static void
943
add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
944
0
{
945
0
  PgStat_SubXactStatus *xact_state;
946
0
  PgStat_TableXactStatus *trans;
947
948
  /*
949
   * If this is the first rel to be modified at the current nest level, we
950
   * first have to push a transaction stack entry.
951
   */
952
0
  xact_state = pgstat_get_xact_stack_level(nest_level);
953
954
  /* Now make a per-table stack entry */
955
0
  trans = (PgStat_TableXactStatus *)
956
0
    MemoryContextAllocZero(TopTransactionContext,
957
0
                 sizeof(PgStat_TableXactStatus));
958
0
  trans->nest_level = nest_level;
959
0
  trans->upper = pgstat_info->trans;
960
0
  trans->parent = pgstat_info;
961
0
  trans->next = xact_state->first;
962
0
  xact_state->first = trans;
963
0
  pgstat_info->trans = trans;
964
0
}
965
966
/*
967
 * Add a new (sub)transaction record if needed.
968
 */
969
static void
970
ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info)
971
0
{
972
0
  int     nest_level = GetCurrentTransactionNestLevel();
973
974
0
  if (pgstat_info->trans == NULL ||
975
0
    pgstat_info->trans->nest_level != nest_level)
976
0
    add_tabstat_xact_level(pgstat_info, nest_level);
977
0
}
978
979
/*
980
 * Whenever a table is truncated/dropped, we save its i/u/d counters so that
981
 * they can be cleared, and if the (sub)xact that executed the truncate/drop
982
 * later aborts, the counters can be restored to the saved (pre-truncate/drop)
983
 * values.
984
 *
985
 * Note that for truncate we do this on the first truncate in any particular
986
 * subxact level only.
987
 */
988
static void
989
save_truncdrop_counters(PgStat_TableXactStatus *trans, bool is_drop)
990
0
{
991
0
  if (!trans->truncdropped || is_drop)
992
0
  {
993
0
    trans->inserted_pre_truncdrop = trans->tuples_inserted;
994
0
    trans->updated_pre_truncdrop = trans->tuples_updated;
995
0
    trans->deleted_pre_truncdrop = trans->tuples_deleted;
996
0
    trans->truncdropped = true;
997
0
  }
998
0
}
999
1000
/*
1001
 * restore counters when a truncate aborts
1002
 */
1003
static void
1004
restore_truncdrop_counters(PgStat_TableXactStatus *trans)
1005
0
{
1006
0
  if (trans->truncdropped)
1007
0
  {
1008
0
    trans->tuples_inserted = trans->inserted_pre_truncdrop;
1009
0
    trans->tuples_updated = trans->updated_pre_truncdrop;
1010
0
    trans->tuples_deleted = trans->deleted_pre_truncdrop;
1011
0
  }
1012
0
}