Coverage Report

Created: 2025-09-27 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/backend/utils/activity/pgstat.c
Line
Count
Source
1
/* ----------
2
 * pgstat.c
3
 *    Infrastructure for the cumulative statistics system.
4
 *
5
 * The cumulative statistics system accumulates statistics for different kinds
6
 * of objects. Some kinds of statistics are collected for a fixed number of
7
 * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8
 * statistics are collected for a varying number of objects
9
 * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10
 * statistics.
11
 *
12
 * Statistics are loaded from the filesystem during startup (by the startup
13
 * process), unless preceded by a crash, in which case all stats are
14
 * discarded. They are written out by the checkpointer process just before
15
 * shutting down (if the stats kind allows it), except when shutting down in
16
 * immediate mode.
17
 *
18
 * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
19
 *
20
 * Statistics for variable-numbered objects are stored in dynamic shared
21
 * memory and can be found via a dshash hashtable. The statistics counters are
22
 * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
23
 * separately allocated (PgStatShared_HashEntry->body). The separate
24
 * allocation allows different kinds of statistics to be stored in the same
25
 * hashtable without wasting space in PgStatShared_HashEntry.
26
 *
27
 * Variable-numbered stats are addressed by PgStat_HashKey while running.  It
28
 * is not possible to have statistics for an object that cannot be addressed
29
 * that way at runtime. A wider identifier can be used when serializing to
30
 * disk (used for replication slot stats).
31
 *
32
 * To avoid contention on the shared hashtable, each backend has a
33
 * backend-local hashtable (pgStatEntryRefHash) in front of the shared
34
 * hashtable, containing references (PgStat_EntryRef) to shared hashtable
35
 * entries. The shared hashtable only needs to be accessed when no prior
36
 * reference is found in the local hashtable. Besides pointing to the
37
 * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
38
 * contains a pointer to the shared statistics data, as a process-local
39
 * address, to reduce access costs.
40
 *
41
 * The names for structs stored in shared memory are prefixed with
42
 * PgStatShared instead of PgStat. Each stats entry in shared memory is
43
 * protected by a dedicated lwlock.
44
 *
45
 * Most stats updates are first accumulated locally in each process as pending
46
 * entries, then later flushed to shared memory (just after commit, or by
47
 * idle-timeout). This practically eliminates contention on individual stats
48
 * entries. For most kinds of variable-numbered pending stats data is stored
49
 * in PgStat_EntryRef->pending. All entries with pending data are in the
50
 * pgStatPending list. Pending statistics updates are flushed out by
51
 * pgstat_report_stat().
52
 *
53
 * It is possible for external modules to define custom statistics kinds,
54
 * that can use the same properties as any built-in stats kinds.  Each custom
55
 * stats kind needs to assign a unique ID to ensure that it does not overlap
56
 * with other extensions.  In order to reserve a unique stats kind ID, refer
57
 * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
58
 *
59
 * The behavior of different kinds of statistics is determined by the kind's
60
 * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
61
 * defined, and pgstat_kind_custom_infos for custom kinds registered at
62
 * startup by pgstat_register_kind().  See PgStat_KindInfo for details.
63
 *
64
 * The consistency of read accesses to statistics can be configured using the
65
 * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
66
 * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
67
 * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
68
 * pgStatLocal.snapshot.
69
 *
70
 * To keep things manageable, stats handling is split across several
71
 * files. Infrastructure pieces are in:
72
 * - pgstat.c - this file, to tie it all together
73
 * - pgstat_shmem.c - nearly everything dealing with shared memory, including
74
 *   the maintenance of hashtable entries
75
 * - pgstat_xact.c - transactional integration, including the transactional
76
 *   creation and dropping of stats entries
77
 *
78
 * Each statistics kind is handled in a dedicated file:
79
 * - pgstat_archiver.c
80
 * - pgstat_backend.c
81
 * - pgstat_bgwriter.c
82
 * - pgstat_checkpointer.c
83
 * - pgstat_database.c
84
 * - pgstat_function.c
85
 * - pgstat_io.c
86
 * - pgstat_relation.c
87
 * - pgstat_replslot.c
88
 * - pgstat_slru.c
89
 * - pgstat_subscription.c
90
 * - pgstat_wal.c
91
 *
92
 * Whenever possible infrastructure files should not contain code related to
93
 * specific kinds of stats.
94
 *
95
 *
96
 * Copyright (c) 2001-2025, PostgreSQL Global Development Group
97
 *
98
 * IDENTIFICATION
99
 *    src/backend/utils/activity/pgstat.c
100
 * ----------
101
 */
102
#include "postgres.h"
103
104
#include <unistd.h>
105
106
#include "access/xact.h"
107
#include "lib/dshash.h"
108
#include "pgstat.h"
109
#include "storage/fd.h"
110
#include "storage/ipc.h"
111
#include "storage/lwlock.h"
112
#include "utils/guc_hooks.h"
113
#include "utils/memutils.h"
114
#include "utils/pgstat_internal.h"
115
#include "utils/timestamp.h"
116
117
118
/* ----------
119
 * Timer definitions.
120
 *
121
 * In milliseconds.
122
 * ----------
123
 */
124
125
/* minimum interval non-forced stats flushes.*/
126
0
#define PGSTAT_MIN_INTERVAL     1000
127
/* how long until to block flushing pending stats updates */
128
0
#define PGSTAT_MAX_INTERVAL     60000
129
/* when to call pgstat_report_stat() again, even when idle */
130
0
#define PGSTAT_IDLE_INTERVAL    10000
131
132
/* ----------
133
 * Initial size hints for the hash tables used in statistics.
134
 * ----------
135
 */
136
137
0
#define PGSTAT_SNAPSHOT_HASH_SIZE 512
138
139
/* ---------
140
 * Identifiers in stats file.
141
 * ---------
142
 */
143
0
#define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
144
0
#define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
145
0
#define PGSTAT_FILE_ENTRY_NAME  'N' /* stats entry identified by name */
146
0
#define PGSTAT_FILE_ENTRY_HASH  'S' /* stats entry identified by
147
                   * PgStat_HashKey */
148
149
/* hash table for statistics snapshots entry */
150
typedef struct PgStat_SnapshotEntry
151
{
152
  PgStat_HashKey key;
153
  char    status;     /* for simplehash use */
154
  void     *data;     /* the stats data itself */
155
} PgStat_SnapshotEntry;
156
157
158
/* ----------
159
 * Backend-local Hash Table Definitions
160
 * ----------
161
 */
162
163
/* for stats snapshot entries */
164
#define SH_PREFIX pgstat_snapshot
165
0
#define SH_ELEMENT_TYPE PgStat_SnapshotEntry
166
#define SH_KEY_TYPE PgStat_HashKey
167
0
#define SH_KEY key
168
#define SH_HASH_KEY(tb, key) \
169
0
  pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
170
#define SH_EQUAL(tb, a, b) \
171
0
  pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
172
#define SH_SCOPE static inline
173
#define SH_DEFINE
174
#define SH_DECLARE
175
#include "lib/simplehash.h"
176
177
178
/* ----------
179
 * Local function forward declarations
180
 * ----------
181
 */
182
183
static void pgstat_write_statsfile(void);
184
static void pgstat_read_statsfile(void);
185
186
static void pgstat_init_snapshot_fixed(void);
187
188
static void pgstat_reset_after_failure(void);
189
190
static bool pgstat_flush_pending_entries(bool nowait);
191
192
static void pgstat_prep_snapshot(void);
193
static void pgstat_build_snapshot(void);
194
static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
195
196
static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
197
198
199
/* ----------
200
 * GUC parameters
201
 * ----------
202
 */
203
204
bool    pgstat_track_counts = false;
205
int     pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
206
207
208
/* ----------
209
 * state shared with pgstat_*.c
210
 * ----------
211
 */
212
213
PgStat_LocalState pgStatLocal;
214
215
/*
216
 * Track pending reports for fixed-numbered stats, used by
217
 * pgstat_report_stat().
218
 */
219
bool    pgstat_report_fixed = false;
220
221
/* ----------
222
 * Local data
223
 *
224
 * NB: There should be only variables related to stats infrastructure here,
225
 * not for specific kinds of stats.
226
 * ----------
227
 */
228
229
/*
230
 * Memory contexts containing the pgStatEntryRefHash table, the
231
 * pgStatSharedRef entries, and pending data respectively. Mostly to make it
232
 * easier to track / attribute memory usage.
233
 */
234
235
static MemoryContext pgStatPendingContext = NULL;
236
237
/*
238
 * Backend local list of PgStat_EntryRef with unflushed pending stats.
239
 *
240
 * Newly pending entries should only ever be added to the end of the list,
241
 * otherwise pgstat_flush_pending_entries() might not see them immediately.
242
 */
243
static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
244
245
246
/*
247
 * Force the next stats flush to happen regardless of
248
 * PGSTAT_MIN_INTERVAL. Useful in test scripts.
249
 */
250
static bool pgStatForceNextFlush = false;
251
252
/*
253
 * Force-clear existing snapshot before next use when stats_fetch_consistency
254
 * is changed.
255
 */
256
static bool force_stats_snapshot_clear = false;
257
258
259
/*
260
 * For assertions that check pgstat is not used before initialization / after
261
 * shutdown.
262
 */
263
#ifdef USE_ASSERT_CHECKING
264
static bool pgstat_is_initialized = false;
265
static bool pgstat_is_shutdown = false;
266
#endif
267
268
269
/*
270
 * The different kinds of built-in statistics.
271
 *
272
 * If reasonably possible, handling specific to one kind of stats should go
273
 * through this abstraction, rather than making more of pgstat.c aware.
274
 *
275
 * See comments for struct PgStat_KindInfo for details about the individual
276
 * fields.
277
 *
278
 * XXX: It'd be nicer to define this outside of this file. But there doesn't
279
 * seem to be a great way of doing that, given the split across multiple
280
 * files.
281
 */
282
static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
283
284
  /* stats kinds for variable-numbered objects */
285
286
  [PGSTAT_KIND_DATABASE] = {
287
    .name = "database",
288
289
    .fixed_amount = false,
290
    .write_to_file = true,
291
    /* so pg_stat_database entries can be seen in all databases */
292
    .accessed_across_databases = true,
293
294
    .shared_size = sizeof(PgStatShared_Database),
295
    .shared_data_off = offsetof(PgStatShared_Database, stats),
296
    .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
297
    .pending_size = sizeof(PgStat_StatDBEntry),
298
299
    .flush_pending_cb = pgstat_database_flush_cb,
300
    .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
301
  },
302
303
  [PGSTAT_KIND_RELATION] = {
304
    .name = "relation",
305
306
    .fixed_amount = false,
307
    .write_to_file = true,
308
309
    .shared_size = sizeof(PgStatShared_Relation),
310
    .shared_data_off = offsetof(PgStatShared_Relation, stats),
311
    .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
312
    .pending_size = sizeof(PgStat_TableStatus),
313
314
    .flush_pending_cb = pgstat_relation_flush_cb,
315
    .delete_pending_cb = pgstat_relation_delete_pending_cb,
316
  },
317
318
  [PGSTAT_KIND_FUNCTION] = {
319
    .name = "function",
320
321
    .fixed_amount = false,
322
    .write_to_file = true,
323
324
    .shared_size = sizeof(PgStatShared_Function),
325
    .shared_data_off = offsetof(PgStatShared_Function, stats),
326
    .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
327
    .pending_size = sizeof(PgStat_FunctionCounts),
328
329
    .flush_pending_cb = pgstat_function_flush_cb,
330
  },
331
332
  [PGSTAT_KIND_REPLSLOT] = {
333
    .name = "replslot",
334
335
    .fixed_amount = false,
336
    .write_to_file = true,
337
338
    .accessed_across_databases = true,
339
340
    .shared_size = sizeof(PgStatShared_ReplSlot),
341
    .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
342
    .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
343
344
    .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
345
    .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
346
    .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
347
  },
348
349
  [PGSTAT_KIND_SUBSCRIPTION] = {
350
    .name = "subscription",
351
352
    .fixed_amount = false,
353
    .write_to_file = true,
354
    /* so pg_stat_subscription_stats entries can be seen in all databases */
355
    .accessed_across_databases = true,
356
357
    .shared_size = sizeof(PgStatShared_Subscription),
358
    .shared_data_off = offsetof(PgStatShared_Subscription, stats),
359
    .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
360
    .pending_size = sizeof(PgStat_BackendSubEntry),
361
362
    .flush_pending_cb = pgstat_subscription_flush_cb,
363
    .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
364
  },
365
366
  [PGSTAT_KIND_BACKEND] = {
367
    .name = "backend",
368
369
    .fixed_amount = false,
370
    .write_to_file = false,
371
372
    .accessed_across_databases = true,
373
374
    .shared_size = sizeof(PgStatShared_Backend),
375
    .shared_data_off = offsetof(PgStatShared_Backend, stats),
376
    .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
377
378
    .flush_static_cb = pgstat_backend_flush_cb,
379
    .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
380
  },
381
382
  /* stats for fixed-numbered (mostly 1) objects */
383
384
  [PGSTAT_KIND_ARCHIVER] = {
385
    .name = "archiver",
386
387
    .fixed_amount = true,
388
    .write_to_file = true,
389
390
    .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
391
    .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
392
    .shared_data_off = offsetof(PgStatShared_Archiver, stats),
393
    .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
394
395
    .init_shmem_cb = pgstat_archiver_init_shmem_cb,
396
    .reset_all_cb = pgstat_archiver_reset_all_cb,
397
    .snapshot_cb = pgstat_archiver_snapshot_cb,
398
  },
399
400
  [PGSTAT_KIND_BGWRITER] = {
401
    .name = "bgwriter",
402
403
    .fixed_amount = true,
404
    .write_to_file = true,
405
406
    .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
407
    .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
408
    .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
409
    .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
410
411
    .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
412
    .reset_all_cb = pgstat_bgwriter_reset_all_cb,
413
    .snapshot_cb = pgstat_bgwriter_snapshot_cb,
414
  },
415
416
  [PGSTAT_KIND_CHECKPOINTER] = {
417
    .name = "checkpointer",
418
419
    .fixed_amount = true,
420
    .write_to_file = true,
421
422
    .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
423
    .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
424
    .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
425
    .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
426
427
    .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
428
    .reset_all_cb = pgstat_checkpointer_reset_all_cb,
429
    .snapshot_cb = pgstat_checkpointer_snapshot_cb,
430
  },
431
432
  [PGSTAT_KIND_IO] = {
433
    .name = "io",
434
435
    .fixed_amount = true,
436
    .write_to_file = true,
437
438
    .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
439
    .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
440
    .shared_data_off = offsetof(PgStatShared_IO, stats),
441
    .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
442
443
    .flush_static_cb = pgstat_io_flush_cb,
444
    .init_shmem_cb = pgstat_io_init_shmem_cb,
445
    .reset_all_cb = pgstat_io_reset_all_cb,
446
    .snapshot_cb = pgstat_io_snapshot_cb,
447
  },
448
449
  [PGSTAT_KIND_SLRU] = {
450
    .name = "slru",
451
452
    .fixed_amount = true,
453
    .write_to_file = true,
454
455
    .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
456
    .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
457
    .shared_data_off = offsetof(PgStatShared_SLRU, stats),
458
    .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
459
460
    .flush_static_cb = pgstat_slru_flush_cb,
461
    .init_shmem_cb = pgstat_slru_init_shmem_cb,
462
    .reset_all_cb = pgstat_slru_reset_all_cb,
463
    .snapshot_cb = pgstat_slru_snapshot_cb,
464
  },
465
466
  [PGSTAT_KIND_WAL] = {
467
    .name = "wal",
468
469
    .fixed_amount = true,
470
    .write_to_file = true,
471
472
    .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
473
    .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
474
    .shared_data_off = offsetof(PgStatShared_Wal, stats),
475
    .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
476
477
    .init_backend_cb = pgstat_wal_init_backend_cb,
478
    .flush_static_cb = pgstat_wal_flush_cb,
479
    .init_shmem_cb = pgstat_wal_init_shmem_cb,
480
    .reset_all_cb = pgstat_wal_reset_all_cb,
481
    .snapshot_cb = pgstat_wal_snapshot_cb,
482
  },
483
};
484
485
/*
486
 * Information about custom statistics kinds.
487
 *
488
 * These are saved in a different array than the built-in kinds to save
489
 * in clarity with the initializations.
490
 *
491
 * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
492
 */
493
static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
494
495
/* ------------------------------------------------------------
496
 * Functions managing the state of the stats system for all backends.
497
 * ------------------------------------------------------------
498
 */
499
500
/*
501
 * Read on-disk stats into memory at server start.
502
 *
503
 * Should only be called by the startup process or in single user mode.
504
 */
505
void
506
pgstat_restore_stats(void)
507
0
{
508
0
  pgstat_read_statsfile();
509
0
}
510
511
/*
512
 * Remove the stats file.  This is currently used only if WAL recovery is
513
 * needed after a crash.
514
 *
515
 * Should only be called by the startup process or in single user mode.
516
 */
517
void
518
pgstat_discard_stats(void)
519
{
520
  int     ret;
521
522
  /* NB: this needs to be done even in single user mode */
523
524
  ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
525
  if (ret != 0)
526
  {
527
    if (errno == ENOENT)
528
      elog(DEBUG2,
529
         "didn't need to unlink permanent stats file \"%s\" - didn't exist",
530
         PGSTAT_STAT_PERMANENT_FILENAME);
531
    else
532
      ereport(LOG,
533
          (errcode_for_file_access(),
534
           errmsg("could not unlink permanent statistics file \"%s\": %m",
535
              PGSTAT_STAT_PERMANENT_FILENAME)));
536
  }
537
  else
538
  {
539
    ereport(DEBUG2,
540
        (errcode_for_file_access(),
541
         errmsg_internal("unlinked permanent statistics file \"%s\"",
542
                 PGSTAT_STAT_PERMANENT_FILENAME)));
543
  }
544
545
  /*
546
   * Reset stats contents. This will set reset timestamps of fixed-numbered
547
   * stats to the current time (no variable stats exist).
548
   */
549
  pgstat_reset_after_failure();
550
}
551
552
/*
553
 * pgstat_before_server_shutdown() needs to be called by exactly one process
554
 * during regular server shutdowns. Otherwise all stats will be lost.
555
 *
556
 * We currently only write out stats for proc_exit(0). We might want to change
557
 * that at some point... But right now pgstat_discard_stats() would be called
558
 * during the start after a disorderly shutdown, anyway.
559
 */
560
void
561
pgstat_before_server_shutdown(int code, Datum arg)
562
0
{
563
0
  Assert(pgStatLocal.shmem != NULL);
564
0
  Assert(!pgStatLocal.shmem->is_shutdown);
565
566
  /*
567
   * Stats should only be reported after pgstat_initialize() and before
568
   * pgstat_shutdown(). This is a convenient point to catch most violations
569
   * of this rule.
570
   */
571
0
  Assert(pgstat_is_initialized && !pgstat_is_shutdown);
572
573
  /* flush out our own pending changes before writing out */
574
0
  pgstat_report_stat(true);
575
576
  /*
577
   * Only write out file during normal shutdown. Don't even signal that
578
   * we've shutdown during irregular shutdowns, because the shutdown
579
   * sequence isn't coordinated to ensure this backend shuts down last.
580
   */
581
0
  if (code == 0)
582
0
  {
583
0
    pgStatLocal.shmem->is_shutdown = true;
584
0
    pgstat_write_statsfile();
585
0
  }
586
0
}
587
588
589
/* ------------------------------------------------------------
590
 * Backend initialization / shutdown functions
591
 * ------------------------------------------------------------
592
 */
593
594
/*
595
 * Shut down a single backend's statistics reporting at process exit.
596
 *
597
 * Flush out any remaining statistics counts.  Without this, operations
598
 * triggered during backend exit (such as temp table deletions) won't be
599
 * counted.
600
 */
601
static void
602
pgstat_shutdown_hook(int code, Datum arg)
603
0
{
604
0
  Assert(!pgstat_is_shutdown);
605
0
  Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
606
607
  /*
608
   * If we got as far as discovering our own database ID, we can flush out
609
   * what we did so far.  Otherwise, we'd be reporting an invalid database
610
   * ID, so forget it.  (This means that accesses to pg_database during
611
   * failed backend starts might never get counted.)
612
   */
613
0
  if (OidIsValid(MyDatabaseId))
614
0
    pgstat_report_disconnect(MyDatabaseId);
615
616
0
  pgstat_report_stat(true);
617
618
  /* there shouldn't be any pending changes left */
619
0
  Assert(dlist_is_empty(&pgStatPending));
620
0
  dlist_init(&pgStatPending);
621
622
  /* drop the backend stats entry */
623
0
  if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
624
0
    pgstat_request_entry_refs_gc();
625
626
0
  pgstat_detach_shmem();
627
628
#ifdef USE_ASSERT_CHECKING
629
  pgstat_is_shutdown = true;
630
#endif
631
0
}
632
633
/*
634
 * Initialize pgstats state, and set up our on-proc-exit hook. Called from
635
 * BaseInit().
636
 *
637
 * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
638
 */
639
void
640
pgstat_initialize(void)
641
0
{
642
0
  Assert(!pgstat_is_initialized);
643
644
0
  pgstat_attach_shmem();
645
646
0
  pgstat_init_snapshot_fixed();
647
648
  /* Backend initialization callbacks */
649
0
  for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
650
0
  {
651
0
    const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
652
653
0
    if (kind_info == NULL || kind_info->init_backend_cb == NULL)
654
0
      continue;
655
656
0
    kind_info->init_backend_cb();
657
0
  }
658
659
  /* Set up a process-exit hook to clean up */
660
0
  before_shmem_exit(pgstat_shutdown_hook, 0);
661
662
#ifdef USE_ASSERT_CHECKING
663
  pgstat_is_initialized = true;
664
#endif
665
0
}
666
667
668
/* ------------------------------------------------------------
669
 * Public functions used by backends follow
670
 * ------------------------------------------------------------
671
 */
672
673
/*
674
 * Must be called by processes that performs DML: tcop/postgres.c, logical
675
 * receiver processes, SPI worker, etc. to flush pending statistics updates to
676
 * shared memory.
677
 *
678
 * Unless called with 'force', pending stats updates are flushed happen once
679
 * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
680
 * block on lock acquisition, except if stats updates have been pending for
681
 * longer than PGSTAT_MAX_INTERVAL (60000ms).
682
 *
683
 * Whenever pending stats updates remain at the end of pgstat_report_stat() a
684
 * suggested idle timeout is returned. Currently this is always
685
 * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
686
 * a timeout after which to call pgstat_report_stat(true), but are not
687
 * required to do so.
688
 *
689
 * Note that this is called only when not within a transaction, so it is fair
690
 * to use transaction stop time as an approximation of current time.
691
 */
692
long
693
pgstat_report_stat(bool force)
694
0
{
695
0
  static TimestampTz pending_since = 0;
696
0
  static TimestampTz last_flush = 0;
697
0
  bool    partial_flush;
698
0
  TimestampTz now;
699
0
  bool    nowait;
700
701
0
  pgstat_assert_is_up();
702
0
  Assert(!IsTransactionOrTransactionBlock());
703
704
  /* "absorb" the forced flush even if there's nothing to flush */
705
0
  if (pgStatForceNextFlush)
706
0
  {
707
0
    force = true;
708
0
    pgStatForceNextFlush = false;
709
0
  }
710
711
  /* Don't expend a clock check if nothing to do */
712
0
  if (dlist_is_empty(&pgStatPending) &&
713
0
    !pgstat_report_fixed)
714
0
  {
715
0
    return 0;
716
0
  }
717
718
  /*
719
   * There should never be stats to report once stats are shut down. Can't
720
   * assert that before the checks above, as there is an unconditional
721
   * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
722
   * the process that ran pgstat_before_server_shutdown() will still call.
723
   */
724
0
  Assert(!pgStatLocal.shmem->is_shutdown);
725
726
0
  if (force)
727
0
  {
728
    /*
729
     * Stats reports are forced either when it's been too long since stats
730
     * have been reported or in processes that force stats reporting to
731
     * happen at specific points (including shutdown). In the former case
732
     * the transaction stop time might be quite old, in the latter it
733
     * would never get cleared.
734
     */
735
0
    now = GetCurrentTimestamp();
736
0
  }
737
0
  else
738
0
  {
739
0
    now = GetCurrentTransactionStopTimestamp();
740
741
0
    if (pending_since > 0 &&
742
0
      TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
743
0
    {
744
      /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
745
0
      force = true;
746
0
    }
747
0
    else if (last_flush > 0 &&
748
0
         !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
749
0
    {
750
      /* don't flush too frequently */
751
0
      if (pending_since == 0)
752
0
        pending_since = now;
753
754
0
      return PGSTAT_IDLE_INTERVAL;
755
0
    }
756
0
  }
757
758
0
  pgstat_update_dbstats(now);
759
760
  /* don't wait for lock acquisition when !force */
761
0
  nowait = !force;
762
763
0
  partial_flush = false;
764
765
  /* flush of variable-numbered stats tracked in pending entries list */
766
0
  partial_flush |= pgstat_flush_pending_entries(nowait);
767
768
  /* flush of other stats kinds */
769
0
  if (pgstat_report_fixed)
770
0
  {
771
0
    for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
772
0
    {
773
0
      const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
774
775
0
      if (!kind_info)
776
0
        continue;
777
0
      if (!kind_info->flush_static_cb)
778
0
        continue;
779
780
0
      partial_flush |= kind_info->flush_static_cb(nowait);
781
0
    }
782
0
  }
783
784
0
  last_flush = now;
785
786
  /*
787
   * If some of the pending stats could not be flushed due to lock
788
   * contention, let the caller know when to retry.
789
   */
790
0
  if (partial_flush)
791
0
  {
792
    /* force should have prevented us from getting here */
793
0
    Assert(!force);
794
795
    /* remember since when stats have been pending */
796
0
    if (pending_since == 0)
797
0
      pending_since = now;
798
799
0
    return PGSTAT_IDLE_INTERVAL;
800
0
  }
801
802
0
  pending_since = 0;
803
0
  pgstat_report_fixed = false;
804
805
0
  return 0;
806
0
}
807
808
/*
809
 * Force locally pending stats to be flushed during the next
810
 * pgstat_report_stat() call. This is useful for writing tests.
811
 */
812
void
813
pgstat_force_next_flush(void)
814
0
{
815
0
  pgStatForceNextFlush = true;
816
0
}
817
818
/*
819
 * Only for use by pgstat_reset_counters()
820
 */
821
static bool
822
match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
823
0
{
824
0
  return entry->key.dboid == MyDatabaseId;
825
0
}
826
827
/*
828
 * Reset counters for our database.
829
 *
830
 * Permission checking for this function is managed through the normal
831
 * GRANT system.
832
 */
833
void
834
pgstat_reset_counters(void)
835
0
{
836
0
  TimestampTz ts = GetCurrentTimestamp();
837
838
0
  pgstat_reset_matching_entries(match_db_entries,
839
0
                  ObjectIdGetDatum(MyDatabaseId),
840
0
                  ts);
841
0
}
842
843
/*
844
 * Reset a single variable-numbered entry.
845
 *
846
 * If the stats kind is within a database, also reset the database's
847
 * stat_reset_timestamp.
848
 *
849
 * Permission checking for this function is managed through the normal
850
 * GRANT system.
851
 */
852
void
853
pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
854
0
{
855
0
  const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
856
0
  TimestampTz ts = GetCurrentTimestamp();
857
858
  /* not needed atm, and doesn't make sense with the current signature */
859
0
  Assert(!pgstat_get_kind_info(kind)->fixed_amount);
860
861
  /* reset the "single counter" */
862
0
  pgstat_reset_entry(kind, dboid, objid, ts);
863
864
0
  if (!kind_info->accessed_across_databases)
865
0
    pgstat_reset_database_timestamp(dboid, ts);
866
0
}
867
868
/*
869
 * Reset stats for all entries of a kind.
870
 *
871
 * Permission checking for this function is managed through the normal
872
 * GRANT system.
873
 */
874
void
875
pgstat_reset_of_kind(PgStat_Kind kind)
876
0
{
877
0
  const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
878
0
  TimestampTz ts = GetCurrentTimestamp();
879
880
0
  if (kind_info->fixed_amount)
881
0
    kind_info->reset_all_cb(ts);
882
0
  else
883
0
    pgstat_reset_entries_of_kind(kind, ts);
884
0
}
885
886
887
/* ------------------------------------------------------------
888
 * Fetching of stats
889
 * ------------------------------------------------------------
890
 */
891
892
/*
893
 * Discard any data collected in the current transaction.  Any subsequent
894
 * request will cause new snapshots to be read.
895
 *
896
 * This is also invoked during transaction commit or abort to discard
897
 * the no-longer-wanted snapshot.  Updates of stats_fetch_consistency can
898
 * cause this routine to be called.
899
 */
900
void
901
pgstat_clear_snapshot(void)
902
0
{
903
0
  pgstat_assert_is_up();
904
905
0
  memset(&pgStatLocal.snapshot.fixed_valid, 0,
906
0
       sizeof(pgStatLocal.snapshot.fixed_valid));
907
0
  memset(&pgStatLocal.snapshot.custom_valid, 0,
908
0
       sizeof(pgStatLocal.snapshot.custom_valid));
909
0
  pgStatLocal.snapshot.stats = NULL;
910
0
  pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
911
912
  /* Release memory, if any was allocated */
913
0
  if (pgStatLocal.snapshot.context)
914
0
  {
915
0
    MemoryContextDelete(pgStatLocal.snapshot.context);
916
917
    /* Reset variables */
918
0
    pgStatLocal.snapshot.context = NULL;
919
0
  }
920
921
  /*
922
   * Historically the backend_status.c facilities lived in this file, and
923
   * were reset with the same function. For now keep it that way, and
924
   * forward the reset request.
925
   */
926
0
  pgstat_clear_backend_activity_snapshot();
927
928
  /* Reset this flag, as it may be possible that a cleanup was forced. */
929
0
  force_stats_snapshot_clear = false;
930
0
}
931
932
void *
933
pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
934
0
{
935
0
  PgStat_HashKey key = {0};
936
0
  PgStat_EntryRef *entry_ref;
937
0
  void     *stats_data;
938
0
  const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
939
940
  /* should be called from backends */
941
0
  Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
942
0
  Assert(!kind_info->fixed_amount);
943
944
0
  pgstat_prep_snapshot();
945
946
0
  key.kind = kind;
947
0
  key.dboid = dboid;
948
0
  key.objid = objid;
949
950
  /* if we need to build a full snapshot, do so */
951
0
  if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
952
0
    pgstat_build_snapshot();
953
954
  /* if caching is desired, look up in cache */
955
0
  if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
956
0
  {
957
0
    PgStat_SnapshotEntry *entry = NULL;
958
959
0
    entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
960
961
0
    if (entry)
962
0
      return entry->data;
963
964
    /*
965
     * If we built a full snapshot and the key is not in
966
     * pgStatLocal.snapshot.stats, there are no matching stats.
967
     */
968
0
    if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
969
0
      return NULL;
970
0
  }
971
972
0
  pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
973
974
0
  entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
975
976
0
  if (entry_ref == NULL || entry_ref->shared_entry->dropped)
977
0
  {
978
    /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
979
0
    if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
980
0
    {
981
0
      PgStat_SnapshotEntry *entry = NULL;
982
0
      bool    found;
983
984
0
      entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
985
0
      Assert(!found);
986
0
      entry->data = NULL;
987
0
    }
988
0
    return NULL;
989
0
  }
990
991
  /*
992
   * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
993
   * otherwise we could quickly end up with a fair bit of memory used due to
994
   * repeated accesses.
995
   */
996
0
  if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
997
0
    stats_data = palloc(kind_info->shared_data_len);
998
0
  else
999
0
    stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1000
0
                    kind_info->shared_data_len);
1001
1002
0
  (void) pgstat_lock_entry_shared(entry_ref, false);
1003
0
  memcpy(stats_data,
1004
0
       pgstat_get_entry_data(kind, entry_ref->shared_stats),
1005
0
       kind_info->shared_data_len);
1006
0
  pgstat_unlock_entry(entry_ref);
1007
1008
0
  if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1009
0
  {
1010
0
    PgStat_SnapshotEntry *entry = NULL;
1011
0
    bool    found;
1012
1013
0
    entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1014
0
    entry->data = stats_data;
1015
0
  }
1016
1017
0
  return stats_data;
1018
0
}
1019
1020
/*
1021
 * If a stats snapshot has been taken, return the timestamp at which that was
1022
 * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1023
 * false.
1024
 */
1025
TimestampTz
1026
pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1027
0
{
1028
0
  if (force_stats_snapshot_clear)
1029
0
    pgstat_clear_snapshot();
1030
1031
0
  if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1032
0
  {
1033
0
    *have_snapshot = true;
1034
0
    return pgStatLocal.snapshot.snapshot_timestamp;
1035
0
  }
1036
1037
0
  *have_snapshot = false;
1038
1039
0
  return 0;
1040
0
}
1041
1042
bool
1043
pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1044
0
{
1045
  /* fixed-numbered stats always exist */
1046
0
  if (pgstat_get_kind_info(kind)->fixed_amount)
1047
0
    return true;
1048
1049
0
  return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1050
0
}
1051
1052
/*
1053
 * Ensure snapshot for fixed-numbered 'kind' exists.
1054
 *
1055
 * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1056
 * massaging the data into the desired format.
1057
 */
1058
void
1059
pgstat_snapshot_fixed(PgStat_Kind kind)
1060
0
{
1061
0
  Assert(pgstat_is_kind_valid(kind));
1062
0
  Assert(pgstat_get_kind_info(kind)->fixed_amount);
1063
1064
0
  if (force_stats_snapshot_clear)
1065
0
    pgstat_clear_snapshot();
1066
1067
0
  if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1068
0
    pgstat_build_snapshot();
1069
0
  else
1070
0
    pgstat_build_snapshot_fixed(kind);
1071
1072
0
  if (pgstat_is_kind_builtin(kind))
1073
0
    Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1074
0
  else if (pgstat_is_kind_custom(kind))
1075
0
    Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1076
0
}
1077
1078
static void
1079
pgstat_init_snapshot_fixed(void)
1080
0
{
1081
  /*
1082
   * Initialize fixed-numbered statistics data in snapshots, only for custom
1083
   * stats kinds.
1084
   */
1085
0
  for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1086
0
  {
1087
0
    const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1088
1089
0
    if (!kind_info || !kind_info->fixed_amount)
1090
0
      continue;
1091
1092
0
    pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1093
0
      MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1094
0
  }
1095
0
}
1096
1097
static void
1098
pgstat_prep_snapshot(void)
1099
0
{
1100
0
  if (force_stats_snapshot_clear)
1101
0
    pgstat_clear_snapshot();
1102
1103
0
  if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1104
0
    pgStatLocal.snapshot.stats != NULL)
1105
0
    return;
1106
1107
0
  if (!pgStatLocal.snapshot.context)
1108
0
    pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1109
0
                               "PgStat Snapshot",
1110
0
                               ALLOCSET_SMALL_SIZES);
1111
1112
0
  pgStatLocal.snapshot.stats =
1113
0
    pgstat_snapshot_create(pgStatLocal.snapshot.context,
1114
0
                 PGSTAT_SNAPSHOT_HASH_SIZE,
1115
0
                 NULL);
1116
0
}
1117
1118
static void
1119
pgstat_build_snapshot(void)
1120
0
{
1121
0
  dshash_seq_status hstat;
1122
0
  PgStatShared_HashEntry *p;
1123
1124
  /* should only be called when we need a snapshot */
1125
0
  Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1126
1127
  /* snapshot already built */
1128
0
  if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1129
0
    return;
1130
1131
0
  pgstat_prep_snapshot();
1132
1133
0
  Assert(pgStatLocal.snapshot.stats->members == 0);
1134
1135
0
  pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1136
1137
  /*
1138
   * Snapshot all variable stats.
1139
   */
1140
0
  dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1141
0
  while ((p = dshash_seq_next(&hstat)) != NULL)
1142
0
  {
1143
0
    PgStat_Kind kind = p->key.kind;
1144
0
    const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1145
0
    bool    found;
1146
0
    PgStat_SnapshotEntry *entry;
1147
0
    PgStatShared_Common *stats_data;
1148
1149
    /*
1150
     * Check if the stats object should be included in the snapshot.
1151
     * Unless the stats kind can be accessed from all databases (e.g.,
1152
     * database stats themselves), we only include stats for the current
1153
     * database or objects not associated with a database (e.g. shared
1154
     * relations).
1155
     */
1156
0
    if (p->key.dboid != MyDatabaseId &&
1157
0
      p->key.dboid != InvalidOid &&
1158
0
      !kind_info->accessed_across_databases)
1159
0
      continue;
1160
1161
0
    if (p->dropped)
1162
0
      continue;
1163
1164
0
    Assert(pg_atomic_read_u32(&p->refcount) > 0);
1165
1166
0
    stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1167
0
    Assert(stats_data);
1168
1169
0
    entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1170
0
    Assert(!found);
1171
1172
0
    entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1173
0
                     pgstat_get_entry_len(kind));
1174
1175
    /*
1176
     * Acquire the LWLock directly instead of using
1177
     * pg_stat_lock_entry_shared() which requires a reference.
1178
     */
1179
0
    LWLockAcquire(&stats_data->lock, LW_SHARED);
1180
0
    memcpy(entry->data,
1181
0
         pgstat_get_entry_data(kind, stats_data),
1182
0
         pgstat_get_entry_len(kind));
1183
0
    LWLockRelease(&stats_data->lock);
1184
0
  }
1185
0
  dshash_seq_term(&hstat);
1186
1187
  /*
1188
   * Build snapshot of all fixed-numbered stats.
1189
   */
1190
0
  for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1191
0
  {
1192
0
    const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1193
1194
0
    if (!kind_info)
1195
0
      continue;
1196
0
    if (!kind_info->fixed_amount)
1197
0
    {
1198
0
      Assert(kind_info->snapshot_cb == NULL);
1199
0
      continue;
1200
0
    }
1201
1202
0
    pgstat_build_snapshot_fixed(kind);
1203
0
  }
1204
1205
0
  pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1206
0
}
1207
1208
static void
1209
pgstat_build_snapshot_fixed(PgStat_Kind kind)
1210
0
{
1211
0
  const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1212
0
  int     idx;
1213
0
  bool     *valid;
1214
1215
  /* Position in fixed_valid or custom_valid */
1216
0
  if (pgstat_is_kind_builtin(kind))
1217
0
  {
1218
0
    idx = kind;
1219
0
    valid = pgStatLocal.snapshot.fixed_valid;
1220
0
  }
1221
0
  else
1222
0
  {
1223
0
    idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1224
0
    valid = pgStatLocal.snapshot.custom_valid;
1225
0
  }
1226
1227
0
  Assert(kind_info->fixed_amount);
1228
0
  Assert(kind_info->snapshot_cb != NULL);
1229
1230
0
  if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1231
0
  {
1232
    /* rebuild every time */
1233
0
    valid[idx] = false;
1234
0
  }
1235
0
  else if (valid[idx])
1236
0
  {
1237
    /* in snapshot mode we shouldn't get called again */
1238
0
    Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1239
0
    return;
1240
0
  }
1241
1242
0
  Assert(!valid[idx]);
1243
1244
0
  kind_info->snapshot_cb();
1245
1246
0
  Assert(!valid[idx]);
1247
0
  valid[idx] = true;
1248
0
}
1249
1250
1251
/* ------------------------------------------------------------
1252
 * Backend-local pending stats infrastructure
1253
 * ------------------------------------------------------------
1254
 */
1255
1256
/*
1257
 * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1258
 * stats if not already done.
1259
 *
1260
 * If created_entry is non-NULL, it'll be set to true if the entry is newly
1261
 * created, false otherwise.
1262
 */
1263
PgStat_EntryRef *
1264
pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1265
0
{
1266
0
  PgStat_EntryRef *entry_ref;
1267
1268
  /* need to be able to flush out */
1269
0
  Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1270
1271
0
  if (unlikely(!pgStatPendingContext))
1272
0
  {
1273
0
    pgStatPendingContext =
1274
0
      AllocSetContextCreate(TopMemoryContext,
1275
0
                  "PgStat Pending",
1276
0
                  ALLOCSET_SMALL_SIZES);
1277
0
  }
1278
1279
0
  entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1280
0
                   true, created_entry);
1281
1282
0
  if (entry_ref->pending == NULL)
1283
0
  {
1284
0
    size_t    entrysize = pgstat_get_kind_info(kind)->pending_size;
1285
1286
0
    Assert(entrysize != (size_t) -1);
1287
1288
0
    entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1289
0
    dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1290
0
  }
1291
1292
0
  return entry_ref;
1293
0
}
1294
1295
/*
1296
 * Return an existing stats entry, or NULL.
1297
 *
1298
 * This should only be used for helper function for pgstatfuncs.c - outside of
1299
 * that it shouldn't be needed.
1300
 */
1301
PgStat_EntryRef *
1302
pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1303
0
{
1304
0
  PgStat_EntryRef *entry_ref;
1305
1306
0
  entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1307
1308
0
  if (entry_ref == NULL || entry_ref->pending == NULL)
1309
0
    return NULL;
1310
1311
0
  return entry_ref;
1312
0
}
1313
1314
void
1315
pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1316
0
{
1317
0
  PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1318
0
  const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1319
0
  void     *pending_data = entry_ref->pending;
1320
1321
0
  Assert(pending_data != NULL);
1322
  /* !fixed_amount stats should be handled explicitly */
1323
0
  Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1324
1325
0
  if (kind_info->delete_pending_cb)
1326
0
    kind_info->delete_pending_cb(entry_ref);
1327
1328
0
  pfree(pending_data);
1329
0
  entry_ref->pending = NULL;
1330
1331
0
  dlist_delete(&entry_ref->pending_node);
1332
0
}
1333
1334
/*
1335
 * Flush out pending variable-numbered stats.
1336
 */
1337
static bool
1338
pgstat_flush_pending_entries(bool nowait)
1339
0
{
1340
0
  bool    have_pending = false;
1341
0
  dlist_node *cur = NULL;
1342
1343
  /*
1344
   * Need to be a bit careful iterating over the list of pending entries.
1345
   * Processing a pending entry may queue further pending entries to the end
1346
   * of the list that we want to process, so a simple iteration won't do.
1347
   * Further complicating matters is that we want to delete the current
1348
   * entry in each iteration from the list if we flushed successfully.
1349
   *
1350
   * So we just keep track of the next pointer in each loop iteration.
1351
   */
1352
0
  if (!dlist_is_empty(&pgStatPending))
1353
0
    cur = dlist_head_node(&pgStatPending);
1354
1355
0
  while (cur)
1356
0
  {
1357
0
    PgStat_EntryRef *entry_ref =
1358
0
      dlist_container(PgStat_EntryRef, pending_node, cur);
1359
0
    PgStat_HashKey key = entry_ref->shared_entry->key;
1360
0
    PgStat_Kind kind = key.kind;
1361
0
    const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1362
0
    bool    did_flush;
1363
0
    dlist_node *next;
1364
1365
0
    Assert(!kind_info->fixed_amount);
1366
0
    Assert(kind_info->flush_pending_cb != NULL);
1367
1368
    /* flush the stats, if possible */
1369
0
    did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1370
1371
0
    Assert(did_flush || nowait);
1372
1373
    /* determine next entry, before deleting the pending entry */
1374
0
    if (dlist_has_next(&pgStatPending, cur))
1375
0
      next = dlist_next_node(&pgStatPending, cur);
1376
0
    else
1377
0
      next = NULL;
1378
1379
    /* if successfully flushed, remove entry */
1380
0
    if (did_flush)
1381
0
      pgstat_delete_pending_entry(entry_ref);
1382
0
    else
1383
0
      have_pending = true;
1384
1385
0
    cur = next;
1386
0
  }
1387
1388
0
  Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1389
1390
0
  return have_pending;
1391
0
}
1392
1393
1394
/* ------------------------------------------------------------
1395
 * Helper / infrastructure functions
1396
 * ------------------------------------------------------------
1397
 */
1398
1399
PgStat_Kind
1400
pgstat_get_kind_from_str(char *kind_str)
1401
0
{
1402
0
  for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1403
0
  {
1404
0
    if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1405
0
      return kind;
1406
0
  }
1407
1408
  /* Check the custom set of cumulative stats */
1409
0
  if (pgstat_kind_custom_infos)
1410
0
  {
1411
0
    for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1412
0
    {
1413
0
      uint32    idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1414
1415
0
      if (pgstat_kind_custom_infos[idx] &&
1416
0
        pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1417
0
        return kind;
1418
0
    }
1419
0
  }
1420
1421
0
  ereport(ERROR,
1422
0
      (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1423
0
       errmsg("invalid statistics kind: \"%s\"", kind_str)));
1424
0
  return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1425
0
}
1426
1427
static inline bool
1428
pgstat_is_kind_valid(PgStat_Kind kind)
1429
0
{
1430
0
  return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1431
0
}
1432
1433
const PgStat_KindInfo *
1434
pgstat_get_kind_info(PgStat_Kind kind)
1435
0
{
1436
0
  if (pgstat_is_kind_builtin(kind))
1437
0
    return &pgstat_kind_builtin_infos[kind];
1438
1439
0
  if (pgstat_is_kind_custom(kind))
1440
0
  {
1441
0
    uint32    idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1442
1443
0
    if (pgstat_kind_custom_infos == NULL ||
1444
0
      pgstat_kind_custom_infos[idx] == NULL)
1445
0
      return NULL;
1446
0
    return pgstat_kind_custom_infos[idx];
1447
0
  }
1448
1449
0
  return NULL;
1450
0
}
1451
1452
/*
1453
 * Register a new stats kind.
1454
 *
1455
 * PgStat_Kinds must be globally unique across all extensions. Refer
1456
 * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1457
 * unique ID for your extension, to avoid conflicts with other extension
1458
 * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1459
 * needlessly reserving a new ID.
1460
 */
1461
void
1462
pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1463
{
1464
  uint32    idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1465
1466
  if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1467
    ereport(ERROR,
1468
        (errmsg("custom cumulative statistics name is invalid"),
1469
         errhint("Provide a non-empty name for the custom cumulative statistics.")));
1470
1471
  if (!pgstat_is_kind_custom(kind))
1472
    ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1473
            errhint("Provide a custom cumulative statistics ID between %u and %u.",
1474
                PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1475
1476
  if (!process_shared_preload_libraries_in_progress)
1477
    ereport(ERROR,
1478
        (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1479
         errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1480
1481
  /*
1482
   * Check some data for fixed-numbered stats.
1483
   */
1484
  if (kind_info->fixed_amount)
1485
  {
1486
    if (kind_info->shared_size == 0)
1487
      ereport(ERROR,
1488
          (errmsg("custom cumulative statistics property is invalid"),
1489
           errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1490
  }
1491
1492
  /*
1493
   * If pgstat_kind_custom_infos is not available yet, allocate it.
1494
   */
1495
  if (pgstat_kind_custom_infos == NULL)
1496
  {
1497
    pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1498
      MemoryContextAllocZero(TopMemoryContext,
1499
                   sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1500
  }
1501
1502
  if (pgstat_kind_custom_infos[idx] != NULL &&
1503
    pgstat_kind_custom_infos[idx]->name != NULL)
1504
    ereport(ERROR,
1505
        (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1506
         errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1507
               pgstat_kind_custom_infos[idx]->name)));
1508
1509
  /* check for existing custom stats with the same name */
1510
  for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1511
  {
1512
    uint32    existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1513
1514
    if (pgstat_kind_custom_infos[existing_idx] == NULL)
1515
      continue;
1516
    if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1517
      ereport(ERROR,
1518
          (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1519
           errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1520
  }
1521
1522
  /* Register it */
1523
  pgstat_kind_custom_infos[idx] = kind_info;
1524
  ereport(LOG,
1525
      (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1526
          kind_info->name, kind)));
1527
}
1528
1529
/*
1530
 * Stats should only be reported after pgstat_initialize() and before
1531
 * pgstat_shutdown(). This check is put in a few central places to catch
1532
 * violations of this rule more easily.
1533
 */
1534
#ifdef USE_ASSERT_CHECKING
1535
void
1536
pgstat_assert_is_up(void)
1537
{
1538
  Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1539
}
1540
#endif
1541
1542
1543
/* ------------------------------------------------------------
1544
 * reading and writing of on-disk stats file
1545
 * ------------------------------------------------------------
1546
 */
1547
1548
/* helpers for pgstat_write_statsfile() */
1549
static void
1550
write_chunk(FILE *fpout, void *ptr, size_t len)
1551
0
{
1552
0
  int     rc;
1553
1554
0
  rc = fwrite(ptr, len, 1, fpout);
1555
1556
  /* we'll check for errors with ferror once at the end */
1557
0
  (void) rc;
1558
0
}
1559
1560
0
#define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1561
1562
/*
1563
 * This function is called in the last process that is accessing the shared
1564
 * stats so locking is not required.
1565
 */
1566
static void
1567
pgstat_write_statsfile(void)
1568
0
{
1569
0
  FILE     *fpout;
1570
0
  int32   format_id;
1571
0
  const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1572
0
  const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1573
0
  dshash_seq_status hstat;
1574
0
  PgStatShared_HashEntry *ps;
1575
1576
0
  pgstat_assert_is_up();
1577
1578
  /* should be called only by the checkpointer or single user mode */
1579
0
  Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1580
1581
  /* we're shutting down, so it's ok to just override this */
1582
0
  pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1583
1584
0
  elog(DEBUG2, "writing stats file \"%s\"", statfile);
1585
1586
  /*
1587
   * Open the statistics temp file to write out the current values.
1588
   */
1589
0
  fpout = AllocateFile(tmpfile, PG_BINARY_W);
1590
0
  if (fpout == NULL)
1591
0
  {
1592
0
    ereport(LOG,
1593
0
        (errcode_for_file_access(),
1594
0
         errmsg("could not open temporary statistics file \"%s\": %m",
1595
0
            tmpfile)));
1596
0
    return;
1597
0
  }
1598
1599
  /*
1600
   * Write the file header --- currently just a format ID.
1601
   */
1602
0
  format_id = PGSTAT_FILE_FORMAT_ID;
1603
0
  write_chunk_s(fpout, &format_id);
1604
1605
  /* Write various stats structs for fixed number of objects */
1606
0
  for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1607
0
  {
1608
0
    char     *ptr;
1609
0
    const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1610
1611
0
    if (!info || !info->fixed_amount)
1612
0
      continue;
1613
1614
0
    if (pgstat_is_kind_builtin(kind))
1615
0
      Assert(info->snapshot_ctl_off != 0);
1616
1617
    /* skip if no need to write to file */
1618
0
    if (!info->write_to_file)
1619
0
      continue;
1620
1621
0
    pgstat_build_snapshot_fixed(kind);
1622
0
    if (pgstat_is_kind_builtin(kind))
1623
0
      ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1624
0
    else
1625
0
      ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1626
1627
0
    fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1628
0
    write_chunk_s(fpout, &kind);
1629
0
    write_chunk(fpout, ptr, info->shared_data_len);
1630
0
  }
1631
1632
  /*
1633
   * Walk through the stats entries
1634
   */
1635
0
  dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1636
0
  while ((ps = dshash_seq_next(&hstat)) != NULL)
1637
0
  {
1638
0
    PgStatShared_Common *shstats;
1639
0
    const PgStat_KindInfo *kind_info = NULL;
1640
1641
0
    CHECK_FOR_INTERRUPTS();
1642
1643
    /*
1644
     * We should not see any "dropped" entries when writing the stats
1645
     * file, as all backends and auxiliary processes should have cleaned
1646
     * up their references before they terminated.
1647
     *
1648
     * However, since we are already shutting down, it is not worth
1649
     * crashing the server over any potential cleanup issues, so we simply
1650
     * skip such entries if encountered.
1651
     */
1652
0
    Assert(!ps->dropped);
1653
0
    if (ps->dropped)
1654
0
      continue;
1655
1656
    /*
1657
     * This discards data related to custom stats kinds that are unknown
1658
     * to this process.
1659
     */
1660
0
    if (!pgstat_is_kind_valid(ps->key.kind))
1661
0
    {
1662
0
      elog(WARNING, "found unknown stats entry %u/%u/%" PRIu64,
1663
0
         ps->key.kind, ps->key.dboid,
1664
0
         ps->key.objid);
1665
0
      continue;
1666
0
    }
1667
1668
0
    shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1669
1670
0
    kind_info = pgstat_get_kind_info(ps->key.kind);
1671
1672
    /* if not dropped the valid-entry refcount should exist */
1673
0
    Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1674
1675
    /* skip if no need to write to file */
1676
0
    if (!kind_info->write_to_file)
1677
0
      continue;
1678
1679
0
    if (!kind_info->to_serialized_name)
1680
0
    {
1681
      /* normal stats entry, identified by PgStat_HashKey */
1682
0
      fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1683
0
      write_chunk_s(fpout, &ps->key);
1684
0
    }
1685
0
    else
1686
0
    {
1687
      /* stats entry identified by name on disk (e.g. slots) */
1688
0
      NameData  name;
1689
1690
0
      kind_info->to_serialized_name(&ps->key, shstats, &name);
1691
1692
0
      fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1693
0
      write_chunk_s(fpout, &ps->key.kind);
1694
0
      write_chunk_s(fpout, &name);
1695
0
    }
1696
1697
    /* Write except the header part of the entry */
1698
0
    write_chunk(fpout,
1699
0
          pgstat_get_entry_data(ps->key.kind, shstats),
1700
0
          pgstat_get_entry_len(ps->key.kind));
1701
0
  }
1702
0
  dshash_seq_term(&hstat);
1703
1704
  /*
1705
   * No more output to be done. Close the temp file and replace the old
1706
   * pgstat.stat with it.  The ferror() check replaces testing for error
1707
   * after each individual fputc or fwrite (in write_chunk()) above.
1708
   */
1709
0
  fputc(PGSTAT_FILE_ENTRY_END, fpout);
1710
1711
0
  if (ferror(fpout))
1712
0
  {
1713
0
    ereport(LOG,
1714
0
        (errcode_for_file_access(),
1715
0
         errmsg("could not write temporary statistics file \"%s\": %m",
1716
0
            tmpfile)));
1717
0
    FreeFile(fpout);
1718
0
    unlink(tmpfile);
1719
0
  }
1720
0
  else if (FreeFile(fpout) < 0)
1721
0
  {
1722
0
    ereport(LOG,
1723
0
        (errcode_for_file_access(),
1724
0
         errmsg("could not close temporary statistics file \"%s\": %m",
1725
0
            tmpfile)));
1726
0
    unlink(tmpfile);
1727
0
  }
1728
0
  else if (durable_rename(tmpfile, statfile, LOG) < 0)
1729
0
  {
1730
    /* durable_rename already emitted log message */
1731
0
    unlink(tmpfile);
1732
0
  }
1733
0
}
1734
1735
/* helpers for pgstat_read_statsfile() */
1736
static bool
1737
read_chunk(FILE *fpin, void *ptr, size_t len)
1738
0
{
1739
0
  return fread(ptr, 1, len, fpin) == len;
1740
0
}
1741
1742
#define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1743
1744
/*
1745
 * Reads in existing statistics file into memory.
1746
 *
1747
 * This function is called in the only process that is accessing the shared
1748
 * stats so locking is not required.
1749
 */
1750
static void
1751
pgstat_read_statsfile(void)
1752
{
1753
  FILE     *fpin;
1754
  int32   format_id;
1755
  bool    found;
1756
  const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1757
  PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1758
1759
  /* shouldn't be called from postmaster */
1760
  Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1761
1762
  elog(DEBUG2, "reading stats file \"%s\"", statfile);
1763
1764
  /*
1765
   * Try to open the stats file. If it doesn't exist, the backends simply
1766
   * returns zero for anything and statistics simply starts from scratch
1767
   * with empty counters.
1768
   *
1769
   * ENOENT is a possibility if stats collection was previously disabled or
1770
   * has not yet written the stats file for the first time.  Any other
1771
   * failure condition is suspicious.
1772
   */
1773
  if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1774
  {
1775
    if (errno != ENOENT)
1776
      ereport(LOG,
1777
          (errcode_for_file_access(),
1778
           errmsg("could not open statistics file \"%s\": %m",
1779
              statfile)));
1780
    pgstat_reset_after_failure();
1781
    return;
1782
  }
1783
1784
  /*
1785
   * Verify it's of the expected format.
1786
   */
1787
  if (!read_chunk_s(fpin, &format_id))
1788
  {
1789
    elog(WARNING, "could not read format ID");
1790
    goto error;
1791
  }
1792
1793
  if (format_id != PGSTAT_FILE_FORMAT_ID)
1794
  {
1795
    elog(WARNING, "found incorrect format ID %d (expected %d)",
1796
       format_id, PGSTAT_FILE_FORMAT_ID);
1797
    goto error;
1798
  }
1799
1800
  /*
1801
   * We found an existing statistics file. Read it and put all the stats
1802
   * data into place.
1803
   */
1804
  for (;;)
1805
  {
1806
    int     t = fgetc(fpin);
1807
1808
    switch (t)
1809
    {
1810
      case PGSTAT_FILE_ENTRY_FIXED:
1811
        {
1812
          PgStat_Kind kind;
1813
          const PgStat_KindInfo *info;
1814
          char     *ptr;
1815
1816
          /* entry for fixed-numbered stats */
1817
          if (!read_chunk_s(fpin, &kind))
1818
          {
1819
            elog(WARNING, "could not read stats kind for entry of type %c", t);
1820
            goto error;
1821
          }
1822
1823
          if (!pgstat_is_kind_valid(kind))
1824
          {
1825
            elog(WARNING, "invalid stats kind %u for entry of type %c",
1826
               kind, t);
1827
            goto error;
1828
          }
1829
1830
          info = pgstat_get_kind_info(kind);
1831
          if (!info)
1832
          {
1833
            elog(WARNING, "could not find information of kind %u for entry of type %c",
1834
               kind, t);
1835
            goto error;
1836
          }
1837
1838
          if (!info->fixed_amount)
1839
          {
1840
            elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1841
               kind, t);
1842
            goto error;
1843
          }
1844
1845
          /* Load back stats into shared memory */
1846
          if (pgstat_is_kind_builtin(kind))
1847
            ptr = ((char *) shmem) + info->shared_ctl_off +
1848
              info->shared_data_off;
1849
          else
1850
          {
1851
            int     idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1852
1853
            ptr = ((char *) shmem->custom_data[idx]) +
1854
              info->shared_data_off;
1855
          }
1856
1857
          if (!read_chunk(fpin, ptr, info->shared_data_len))
1858
          {
1859
            elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1860
               kind, t, info->shared_data_len);
1861
            goto error;
1862
          }
1863
1864
          break;
1865
        }
1866
      case PGSTAT_FILE_ENTRY_HASH:
1867
      case PGSTAT_FILE_ENTRY_NAME:
1868
        {
1869
          PgStat_HashKey key;
1870
          PgStatShared_HashEntry *p;
1871
          PgStatShared_Common *header;
1872
1873
          CHECK_FOR_INTERRUPTS();
1874
1875
          if (t == PGSTAT_FILE_ENTRY_HASH)
1876
          {
1877
            /* normal stats entry, identified by PgStat_HashKey */
1878
            if (!read_chunk_s(fpin, &key))
1879
            {
1880
              elog(WARNING, "could not read key for entry of type %c", t);
1881
              goto error;
1882
            }
1883
1884
            if (!pgstat_is_kind_valid(key.kind))
1885
            {
1886
              elog(WARNING, "invalid stats kind for entry %u/%u/%" PRIu64 " of type %c",
1887
                 key.kind, key.dboid,
1888
                 key.objid, t);
1889
              goto error;
1890
            }
1891
1892
            if (!pgstat_get_kind_info(key.kind))
1893
            {
1894
              elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c",
1895
                 key.kind, key.dboid,
1896
                 key.objid, t);
1897
              goto error;
1898
            }
1899
          }
1900
          else
1901
          {
1902
            /* stats entry identified by name on disk (e.g. slots) */
1903
            const PgStat_KindInfo *kind_info = NULL;
1904
            PgStat_Kind kind;
1905
            NameData  name;
1906
1907
            if (!read_chunk_s(fpin, &kind))
1908
            {
1909
              elog(WARNING, "could not read stats kind for entry of type %c", t);
1910
              goto error;
1911
            }
1912
            if (!read_chunk_s(fpin, &name))
1913
            {
1914
              elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1915
                 kind, t);
1916
              goto error;
1917
            }
1918
            if (!pgstat_is_kind_valid(kind))
1919
            {
1920
              elog(WARNING, "invalid stats kind %u for entry of type %c",
1921
                 kind, t);
1922
              goto error;
1923
            }
1924
1925
            kind_info = pgstat_get_kind_info(kind);
1926
            if (!kind_info)
1927
            {
1928
              elog(WARNING, "could not find information of kind %u for entry of type %c",
1929
                 kind, t);
1930
              goto error;
1931
            }
1932
1933
            if (!kind_info->from_serialized_name)
1934
            {
1935
              elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1936
                 kind, t);
1937
              goto error;
1938
            }
1939
1940
            if (!kind_info->from_serialized_name(&name, &key))
1941
            {
1942
              /* skip over data for entry we don't care about */
1943
              if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1944
              {
1945
                elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
1946
                   NameStr(name), kind, t);
1947
                goto error;
1948
              }
1949
1950
              continue;
1951
            }
1952
1953
            Assert(key.kind == kind);
1954
          }
1955
1956
          /*
1957
           * This intentionally doesn't use pgstat_get_entry_ref() -
1958
           * putting all stats into checkpointer's
1959
           * pgStatEntryRefHash would be wasted effort and memory.
1960
           */
1961
          p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1962
1963
          /* don't allow duplicate entries */
1964
          if (found)
1965
          {
1966
            dshash_release_lock(pgStatLocal.shared_hash, p);
1967
            elog(WARNING, "found duplicate stats entry %u/%u/%" PRIu64 " of type %c",
1968
               key.kind, key.dboid,
1969
               key.objid, t);
1970
            goto error;
1971
          }
1972
1973
          header = pgstat_init_entry(key.kind, p);
1974
          dshash_release_lock(pgStatLocal.shared_hash, p);
1975
          if (header == NULL)
1976
          {
1977
            /*
1978
             * It would be tempting to switch this ERROR to a
1979
             * WARNING, but it would mean that all the statistics
1980
             * are discarded when the environment fails on OOM.
1981
             */
1982
            elog(ERROR, "could not allocate entry %u/%u/%" PRIu64 " of type %c",
1983
               key.kind, key.dboid,
1984
               key.objid, t);
1985
          }
1986
1987
          if (!read_chunk(fpin,
1988
                  pgstat_get_entry_data(key.kind, header),
1989
                  pgstat_get_entry_len(key.kind)))
1990
          {
1991
            elog(WARNING, "could not read data for entry %u/%u/%" PRIu64 " of type %c",
1992
               key.kind, key.dboid,
1993
               key.objid, t);
1994
            goto error;
1995
          }
1996
1997
          break;
1998
        }
1999
      case PGSTAT_FILE_ENTRY_END:
2000
2001
        /*
2002
         * check that PGSTAT_FILE_ENTRY_END actually signals end of
2003
         * file
2004
         */
2005
        if (fgetc(fpin) != EOF)
2006
        {
2007
          elog(WARNING, "could not read end-of-file");
2008
          goto error;
2009
        }
2010
2011
        goto done;
2012
2013
      default:
2014
        elog(WARNING, "could not read entry of type %c", t);
2015
        goto error;
2016
    }
2017
  }
2018
2019
done:
2020
  FreeFile(fpin);
2021
2022
  elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2023
  unlink(statfile);
2024
2025
  return;
2026
2027
error:
2028
  ereport(LOG,
2029
      (errmsg("corrupted statistics file \"%s\"", statfile)));
2030
2031
  pgstat_reset_after_failure();
2032
2033
  goto done;
2034
}
2035
2036
/*
2037
 * Helper to reset / drop stats after a crash or after restoring stats from
2038
 * disk failed, potentially after already loading parts.
2039
 */
2040
static void
2041
pgstat_reset_after_failure(void)
2042
0
{
2043
0
  TimestampTz ts = GetCurrentTimestamp();
2044
2045
  /* reset fixed-numbered stats */
2046
0
  for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2047
0
  {
2048
0
    const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2049
2050
0
    if (!kind_info || !kind_info->fixed_amount)
2051
0
      continue;
2052
2053
0
    kind_info->reset_all_cb(ts);
2054
0
  }
2055
2056
  /* and drop variable-numbered ones */
2057
0
  pgstat_drop_all_entries();
2058
0
}
2059
2060
/*
2061
 * GUC assign_hook for stats_fetch_consistency.
2062
 */
2063
void
2064
assign_stats_fetch_consistency(int newval, void *extra)
2065
2
{
2066
  /*
2067
   * Changing this value in a transaction may cause snapshot state
2068
   * inconsistencies, so force a clear of the current snapshot on the next
2069
   * snapshot build attempt.
2070
   */
2071
2
  if (pgstat_fetch_consistency != newval)
2072
0
    force_stats_snapshot_clear = true;
2073
2
}