/src/postgres/src/backend/utils/activity/pgstat_relation.c
Line | Count | Source |
1 | | /* ------------------------------------------------------------------------- |
2 | | * |
3 | | * pgstat_relation.c |
4 | | * Implementation of relation statistics. |
5 | | * |
6 | | * This file contains the implementation of relation statistics. It is kept |
7 | | * separate from pgstat.c to enforce the line between the statistics access / |
8 | | * storage implementation and the details about individual types of |
9 | | * statistics. |
10 | | * |
11 | | * Copyright (c) 2001-2025, PostgreSQL Global Development Group |
12 | | * |
13 | | * IDENTIFICATION |
14 | | * src/backend/utils/activity/pgstat_relation.c |
15 | | * ------------------------------------------------------------------------- |
16 | | */ |
17 | | |
18 | | #include "postgres.h" |
19 | | |
20 | | #include "access/twophase_rmgr.h" |
21 | | #include "access/xact.h" |
22 | | #include "catalog/catalog.h" |
23 | | #include "utils/memutils.h" |
24 | | #include "utils/pgstat_internal.h" |
25 | | #include "utils/rel.h" |
26 | | #include "utils/timestamp.h" |
27 | | |
28 | | |
29 | | /* Record that's written to 2PC state file when pgstat state is persisted */ |
30 | | typedef struct TwoPhasePgStatRecord |
31 | | { |
32 | | PgStat_Counter tuples_inserted; /* tuples inserted in xact */ |
33 | | PgStat_Counter tuples_updated; /* tuples updated in xact */ |
34 | | PgStat_Counter tuples_deleted; /* tuples deleted in xact */ |
35 | | /* tuples i/u/d prior to truncate/drop */ |
36 | | PgStat_Counter inserted_pre_truncdrop; |
37 | | PgStat_Counter updated_pre_truncdrop; |
38 | | PgStat_Counter deleted_pre_truncdrop; |
39 | | Oid id; /* table's OID */ |
40 | | bool shared; /* is it a shared catalog? */ |
41 | | bool truncdropped; /* was the relation truncated/dropped? */ |
42 | | } TwoPhasePgStatRecord; |
43 | | |
44 | | |
45 | | static PgStat_TableStatus *pgstat_prep_relation_pending(Oid rel_id, bool isshared); |
46 | | static void add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level); |
47 | | static void ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info); |
48 | | static void save_truncdrop_counters(PgStat_TableXactStatus *trans, bool is_drop); |
49 | | static void restore_truncdrop_counters(PgStat_TableXactStatus *trans); |
50 | | |
51 | | |
52 | | /* |
53 | | * Copy stats between relations. This is used for things like REINDEX |
54 | | * CONCURRENTLY. |
55 | | */ |
56 | | void |
57 | | pgstat_copy_relation_stats(Relation dst, Relation src) |
58 | 0 | { |
59 | 0 | PgStat_StatTabEntry *srcstats; |
60 | 0 | PgStatShared_Relation *dstshstats; |
61 | 0 | PgStat_EntryRef *dst_ref; |
62 | |
|
63 | 0 | srcstats = pgstat_fetch_stat_tabentry_ext(src->rd_rel->relisshared, |
64 | 0 | RelationGetRelid(src)); |
65 | 0 | if (!srcstats) |
66 | 0 | return; |
67 | | |
68 | 0 | dst_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, |
69 | 0 | dst->rd_rel->relisshared ? InvalidOid : MyDatabaseId, |
70 | 0 | RelationGetRelid(dst), |
71 | 0 | false); |
72 | |
|
73 | 0 | dstshstats = (PgStatShared_Relation *) dst_ref->shared_stats; |
74 | 0 | dstshstats->stats = *srcstats; |
75 | |
|
76 | 0 | pgstat_unlock_entry(dst_ref); |
77 | 0 | } |
78 | | |
79 | | /* |
80 | | * Initialize a relcache entry to count access statistics. Called whenever a |
81 | | * relation is opened. |
82 | | * |
83 | | * We assume that a relcache entry's pgstat_info field is zeroed by relcache.c |
84 | | * when the relcache entry is made; thereafter it is long-lived data. |
85 | | * |
86 | | * This does not create a reference to a stats entry in shared memory, nor |
87 | | * allocate memory for the pending stats. That happens in |
88 | | * pgstat_assoc_relation(). |
89 | | */ |
90 | | void |
91 | | pgstat_init_relation(Relation rel) |
92 | 0 | { |
93 | 0 | char relkind = rel->rd_rel->relkind; |
94 | | |
95 | | /* |
96 | | * We only count stats for relations with storage and partitioned tables |
97 | | */ |
98 | 0 | if (!RELKIND_HAS_STORAGE(relkind) && relkind != RELKIND_PARTITIONED_TABLE) |
99 | 0 | { |
100 | 0 | rel->pgstat_enabled = false; |
101 | 0 | rel->pgstat_info = NULL; |
102 | 0 | return; |
103 | 0 | } |
104 | | |
105 | 0 | if (!pgstat_track_counts) |
106 | 0 | { |
107 | 0 | if (rel->pgstat_info) |
108 | 0 | pgstat_unlink_relation(rel); |
109 | | |
110 | | /* We're not counting at all */ |
111 | 0 | rel->pgstat_enabled = false; |
112 | 0 | rel->pgstat_info = NULL; |
113 | 0 | return; |
114 | 0 | } |
115 | | |
116 | 0 | rel->pgstat_enabled = true; |
117 | 0 | } |
118 | | |
119 | | /* |
120 | | * Prepare for statistics for this relation to be collected. |
121 | | * |
122 | | * This ensures we have a reference to the stats entry before stats can be |
123 | | * generated. That is important because a relation drop in another connection |
124 | | * could otherwise lead to the stats entry being dropped, which then later |
125 | | * would get recreated when flushing stats. |
126 | | * |
127 | | * This is separate from pgstat_init_relation() as it is not uncommon for |
128 | | * relcache entries to be opened without ever getting stats reported. |
129 | | */ |
130 | | void |
131 | | pgstat_assoc_relation(Relation rel) |
132 | 0 | { |
133 | 0 | Assert(rel->pgstat_enabled); |
134 | 0 | Assert(rel->pgstat_info == NULL); |
135 | | |
136 | | /* Else find or make the PgStat_TableStatus entry, and update link */ |
137 | 0 | rel->pgstat_info = pgstat_prep_relation_pending(RelationGetRelid(rel), |
138 | 0 | rel->rd_rel->relisshared); |
139 | | |
140 | | /* don't allow link a stats to multiple relcache entries */ |
141 | 0 | Assert(rel->pgstat_info->relation == NULL); |
142 | | |
143 | | /* mark this relation as the owner */ |
144 | 0 | rel->pgstat_info->relation = rel; |
145 | 0 | } |
146 | | |
147 | | /* |
148 | | * Break the mutual link between a relcache entry and pending stats entry. |
149 | | * This must be called whenever one end of the link is removed. |
150 | | */ |
151 | | void |
152 | | pgstat_unlink_relation(Relation rel) |
153 | 0 | { |
154 | | /* remove the link to stats info if any */ |
155 | 0 | if (rel->pgstat_info == NULL) |
156 | 0 | return; |
157 | | |
158 | | /* link sanity check */ |
159 | 0 | Assert(rel->pgstat_info->relation == rel); |
160 | 0 | rel->pgstat_info->relation = NULL; |
161 | 0 | rel->pgstat_info = NULL; |
162 | 0 | } |
163 | | |
164 | | /* |
165 | | * Ensure that stats are dropped if transaction aborts. |
166 | | */ |
167 | | void |
168 | | pgstat_create_relation(Relation rel) |
169 | 0 | { |
170 | 0 | pgstat_create_transactional(PGSTAT_KIND_RELATION, |
171 | 0 | rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId, |
172 | 0 | RelationGetRelid(rel)); |
173 | 0 | } |
174 | | |
175 | | /* |
176 | | * Ensure that stats are dropped if transaction commits. |
177 | | */ |
178 | | void |
179 | | pgstat_drop_relation(Relation rel) |
180 | 0 | { |
181 | 0 | int nest_level = GetCurrentTransactionNestLevel(); |
182 | 0 | PgStat_TableStatus *pgstat_info; |
183 | |
|
184 | 0 | pgstat_drop_transactional(PGSTAT_KIND_RELATION, |
185 | 0 | rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId, |
186 | 0 | RelationGetRelid(rel)); |
187 | |
|
188 | 0 | if (!pgstat_should_count_relation(rel)) |
189 | 0 | return; |
190 | | |
191 | | /* |
192 | | * Transactionally set counters to 0. That ensures that accesses to |
193 | | * pg_stat_xact_all_tables inside the transaction show 0. |
194 | | */ |
195 | 0 | pgstat_info = rel->pgstat_info; |
196 | 0 | if (pgstat_info->trans && |
197 | 0 | pgstat_info->trans->nest_level == nest_level) |
198 | 0 | { |
199 | 0 | save_truncdrop_counters(pgstat_info->trans, true); |
200 | 0 | pgstat_info->trans->tuples_inserted = 0; |
201 | 0 | pgstat_info->trans->tuples_updated = 0; |
202 | 0 | pgstat_info->trans->tuples_deleted = 0; |
203 | 0 | } |
204 | 0 | } |
205 | | |
206 | | /* |
207 | | * Report that the table was just vacuumed and flush IO statistics. |
208 | | */ |
209 | | void |
210 | | pgstat_report_vacuum(Oid tableoid, bool shared, |
211 | | PgStat_Counter livetuples, PgStat_Counter deadtuples, |
212 | | TimestampTz starttime) |
213 | 0 | { |
214 | 0 | PgStat_EntryRef *entry_ref; |
215 | 0 | PgStatShared_Relation *shtabentry; |
216 | 0 | PgStat_StatTabEntry *tabentry; |
217 | 0 | Oid dboid = (shared ? InvalidOid : MyDatabaseId); |
218 | 0 | TimestampTz ts; |
219 | 0 | PgStat_Counter elapsedtime; |
220 | |
|
221 | 0 | if (!pgstat_track_counts) |
222 | 0 | return; |
223 | | |
224 | | /* Store the data in the table's hash table entry. */ |
225 | 0 | ts = GetCurrentTimestamp(); |
226 | 0 | elapsedtime = TimestampDifferenceMilliseconds(starttime, ts); |
227 | | |
228 | | /* block acquiring lock for the same reason as pgstat_report_autovac() */ |
229 | 0 | entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, |
230 | 0 | dboid, tableoid, false); |
231 | |
|
232 | 0 | shtabentry = (PgStatShared_Relation *) entry_ref->shared_stats; |
233 | 0 | tabentry = &shtabentry->stats; |
234 | |
|
235 | 0 | tabentry->live_tuples = livetuples; |
236 | 0 | tabentry->dead_tuples = deadtuples; |
237 | | |
238 | | /* |
239 | | * It is quite possible that a non-aggressive VACUUM ended up skipping |
240 | | * various pages, however, we'll zero the insert counter here regardless. |
241 | | * It's currently used only to track when we need to perform an "insert" |
242 | | * autovacuum, which are mainly intended to freeze newly inserted tuples. |
243 | | * Zeroing this may just mean we'll not try to vacuum the table again |
244 | | * until enough tuples have been inserted to trigger another insert |
245 | | * autovacuum. An anti-wraparound autovacuum will catch any persistent |
246 | | * stragglers. |
247 | | */ |
248 | 0 | tabentry->ins_since_vacuum = 0; |
249 | |
|
250 | 0 | if (AmAutoVacuumWorkerProcess()) |
251 | 0 | { |
252 | 0 | tabentry->last_autovacuum_time = ts; |
253 | 0 | tabentry->autovacuum_count++; |
254 | 0 | tabentry->total_autovacuum_time += elapsedtime; |
255 | 0 | } |
256 | 0 | else |
257 | 0 | { |
258 | 0 | tabentry->last_vacuum_time = ts; |
259 | 0 | tabentry->vacuum_count++; |
260 | 0 | tabentry->total_vacuum_time += elapsedtime; |
261 | 0 | } |
262 | |
|
263 | 0 | pgstat_unlock_entry(entry_ref); |
264 | | |
265 | | /* |
266 | | * Flush IO statistics now. pgstat_report_stat() will flush IO stats, |
267 | | * however this will not be called until after an entire autovacuum cycle |
268 | | * is done -- which will likely vacuum many relations -- or until the |
269 | | * VACUUM command has processed all tables and committed. |
270 | | */ |
271 | 0 | pgstat_flush_io(false); |
272 | 0 | (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); |
273 | 0 | } |
274 | | |
275 | | /* |
276 | | * Report that the table was just analyzed and flush IO statistics. |
277 | | * |
278 | | * Caller must provide new live- and dead-tuples estimates, as well as a |
279 | | * flag indicating whether to reset the mod_since_analyze counter. |
280 | | */ |
281 | | void |
282 | | pgstat_report_analyze(Relation rel, |
283 | | PgStat_Counter livetuples, PgStat_Counter deadtuples, |
284 | | bool resetcounter, TimestampTz starttime) |
285 | 0 | { |
286 | 0 | PgStat_EntryRef *entry_ref; |
287 | 0 | PgStatShared_Relation *shtabentry; |
288 | 0 | PgStat_StatTabEntry *tabentry; |
289 | 0 | Oid dboid = (rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId); |
290 | 0 | TimestampTz ts; |
291 | 0 | PgStat_Counter elapsedtime; |
292 | |
|
293 | 0 | if (!pgstat_track_counts) |
294 | 0 | return; |
295 | | |
296 | | /* |
297 | | * Unlike VACUUM, ANALYZE might be running inside a transaction that has |
298 | | * already inserted and/or deleted rows in the target table. ANALYZE will |
299 | | * have counted such rows as live or dead respectively. Because we will |
300 | | * report our counts of such rows at transaction end, we should subtract |
301 | | * off these counts from the update we're making now, else they'll be |
302 | | * double-counted after commit. (This approach also ensures that the |
303 | | * shared stats entry ends up with the right numbers if we abort instead |
304 | | * of committing.) |
305 | | * |
306 | | * Waste no time on partitioned tables, though. |
307 | | */ |
308 | 0 | if (pgstat_should_count_relation(rel) && |
309 | 0 | rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) |
310 | 0 | { |
311 | 0 | PgStat_TableXactStatus *trans; |
312 | |
|
313 | 0 | for (trans = rel->pgstat_info->trans; trans; trans = trans->upper) |
314 | 0 | { |
315 | 0 | livetuples -= trans->tuples_inserted - trans->tuples_deleted; |
316 | 0 | deadtuples -= trans->tuples_updated + trans->tuples_deleted; |
317 | 0 | } |
318 | | /* count stuff inserted by already-aborted subxacts, too */ |
319 | 0 | deadtuples -= rel->pgstat_info->counts.delta_dead_tuples; |
320 | | /* Since ANALYZE's counts are estimates, we could have underflowed */ |
321 | 0 | livetuples = Max(livetuples, 0); |
322 | 0 | deadtuples = Max(deadtuples, 0); |
323 | 0 | } |
324 | | |
325 | | /* Store the data in the table's hash table entry. */ |
326 | 0 | ts = GetCurrentTimestamp(); |
327 | 0 | elapsedtime = TimestampDifferenceMilliseconds(starttime, ts); |
328 | | |
329 | | /* block acquiring lock for the same reason as pgstat_report_autovac() */ |
330 | 0 | entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, dboid, |
331 | 0 | RelationGetRelid(rel), |
332 | 0 | false); |
333 | | /* can't get dropped while accessed */ |
334 | 0 | Assert(entry_ref != NULL && entry_ref->shared_stats != NULL); |
335 | |
|
336 | 0 | shtabentry = (PgStatShared_Relation *) entry_ref->shared_stats; |
337 | 0 | tabentry = &shtabentry->stats; |
338 | |
|
339 | 0 | tabentry->live_tuples = livetuples; |
340 | 0 | tabentry->dead_tuples = deadtuples; |
341 | | |
342 | | /* |
343 | | * If commanded, reset mod_since_analyze to zero. This forgets any |
344 | | * changes that were committed while the ANALYZE was in progress, but we |
345 | | * have no good way to estimate how many of those there were. |
346 | | */ |
347 | 0 | if (resetcounter) |
348 | 0 | tabentry->mod_since_analyze = 0; |
349 | |
|
350 | 0 | if (AmAutoVacuumWorkerProcess()) |
351 | 0 | { |
352 | 0 | tabentry->last_autoanalyze_time = ts; |
353 | 0 | tabentry->autoanalyze_count++; |
354 | 0 | tabentry->total_autoanalyze_time += elapsedtime; |
355 | 0 | } |
356 | 0 | else |
357 | 0 | { |
358 | 0 | tabentry->last_analyze_time = ts; |
359 | 0 | tabentry->analyze_count++; |
360 | 0 | tabentry->total_analyze_time += elapsedtime; |
361 | 0 | } |
362 | |
|
363 | 0 | pgstat_unlock_entry(entry_ref); |
364 | | |
365 | | /* see pgstat_report_vacuum() */ |
366 | 0 | pgstat_flush_io(false); |
367 | 0 | (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); |
368 | 0 | } |
369 | | |
370 | | /* |
371 | | * count a tuple insertion of n tuples |
372 | | */ |
373 | | void |
374 | | pgstat_count_heap_insert(Relation rel, PgStat_Counter n) |
375 | 0 | { |
376 | 0 | if (pgstat_should_count_relation(rel)) |
377 | 0 | { |
378 | 0 | PgStat_TableStatus *pgstat_info = rel->pgstat_info; |
379 | |
|
380 | 0 | ensure_tabstat_xact_level(pgstat_info); |
381 | 0 | pgstat_info->trans->tuples_inserted += n; |
382 | 0 | } |
383 | 0 | } |
384 | | |
385 | | /* |
386 | | * count a tuple update |
387 | | */ |
388 | | void |
389 | | pgstat_count_heap_update(Relation rel, bool hot, bool newpage) |
390 | 0 | { |
391 | 0 | Assert(!(hot && newpage)); |
392 | |
|
393 | 0 | if (pgstat_should_count_relation(rel)) |
394 | 0 | { |
395 | 0 | PgStat_TableStatus *pgstat_info = rel->pgstat_info; |
396 | |
|
397 | 0 | ensure_tabstat_xact_level(pgstat_info); |
398 | 0 | pgstat_info->trans->tuples_updated++; |
399 | | |
400 | | /* |
401 | | * tuples_hot_updated and tuples_newpage_updated counters are |
402 | | * nontransactional, so just advance them |
403 | | */ |
404 | 0 | if (hot) |
405 | 0 | pgstat_info->counts.tuples_hot_updated++; |
406 | 0 | else if (newpage) |
407 | 0 | pgstat_info->counts.tuples_newpage_updated++; |
408 | 0 | } |
409 | 0 | } |
410 | | |
411 | | /* |
412 | | * count a tuple deletion |
413 | | */ |
414 | | void |
415 | | pgstat_count_heap_delete(Relation rel) |
416 | 0 | { |
417 | 0 | if (pgstat_should_count_relation(rel)) |
418 | 0 | { |
419 | 0 | PgStat_TableStatus *pgstat_info = rel->pgstat_info; |
420 | |
|
421 | 0 | ensure_tabstat_xact_level(pgstat_info); |
422 | 0 | pgstat_info->trans->tuples_deleted++; |
423 | 0 | } |
424 | 0 | } |
425 | | |
426 | | /* |
427 | | * update tuple counters due to truncate |
428 | | */ |
429 | | void |
430 | | pgstat_count_truncate(Relation rel) |
431 | 0 | { |
432 | 0 | if (pgstat_should_count_relation(rel)) |
433 | 0 | { |
434 | 0 | PgStat_TableStatus *pgstat_info = rel->pgstat_info; |
435 | |
|
436 | 0 | ensure_tabstat_xact_level(pgstat_info); |
437 | 0 | save_truncdrop_counters(pgstat_info->trans, false); |
438 | 0 | pgstat_info->trans->tuples_inserted = 0; |
439 | 0 | pgstat_info->trans->tuples_updated = 0; |
440 | 0 | pgstat_info->trans->tuples_deleted = 0; |
441 | 0 | } |
442 | 0 | } |
443 | | |
444 | | /* |
445 | | * update dead-tuples count |
446 | | * |
447 | | * The semantics of this are that we are reporting the nontransactional |
448 | | * recovery of "delta" dead tuples; so delta_dead_tuples decreases |
449 | | * rather than increasing, and the change goes straight into the per-table |
450 | | * counter, not into transactional state. |
451 | | */ |
452 | | void |
453 | | pgstat_update_heap_dead_tuples(Relation rel, int delta) |
454 | 0 | { |
455 | 0 | if (pgstat_should_count_relation(rel)) |
456 | 0 | { |
457 | 0 | PgStat_TableStatus *pgstat_info = rel->pgstat_info; |
458 | |
|
459 | 0 | pgstat_info->counts.delta_dead_tuples -= delta; |
460 | 0 | } |
461 | 0 | } |
462 | | |
463 | | /* |
464 | | * Support function for the SQL-callable pgstat* functions. Returns |
465 | | * the collected statistics for one table or NULL. NULL doesn't mean |
466 | | * that the table doesn't exist, just that there are no statistics, so the |
467 | | * caller is better off to report ZERO instead. |
468 | | */ |
469 | | PgStat_StatTabEntry * |
470 | | pgstat_fetch_stat_tabentry(Oid relid) |
471 | 0 | { |
472 | 0 | return pgstat_fetch_stat_tabentry_ext(IsSharedRelation(relid), relid); |
473 | 0 | } |
474 | | |
475 | | /* |
476 | | * More efficient version of pgstat_fetch_stat_tabentry(), allowing to specify |
477 | | * whether the to-be-accessed table is a shared relation or not. |
478 | | */ |
479 | | PgStat_StatTabEntry * |
480 | | pgstat_fetch_stat_tabentry_ext(bool shared, Oid reloid) |
481 | 0 | { |
482 | 0 | Oid dboid = (shared ? InvalidOid : MyDatabaseId); |
483 | |
|
484 | 0 | return (PgStat_StatTabEntry *) |
485 | 0 | pgstat_fetch_entry(PGSTAT_KIND_RELATION, dboid, reloid); |
486 | 0 | } |
487 | | |
488 | | /* |
489 | | * find any existing PgStat_TableStatus entry for rel |
490 | | * |
491 | | * Find any existing PgStat_TableStatus entry for rel_id in the current |
492 | | * database. If not found, try finding from shared tables. |
493 | | * |
494 | | * If an entry is found, copy it and increment the copy's counters with their |
495 | | * subtransaction counterparts, then return the copy. The caller may need to |
496 | | * pfree() the copy. |
497 | | * |
498 | | * If no entry found, return NULL, don't create a new one. |
499 | | */ |
500 | | PgStat_TableStatus * |
501 | | find_tabstat_entry(Oid rel_id) |
502 | 0 | { |
503 | 0 | PgStat_EntryRef *entry_ref; |
504 | 0 | PgStat_TableXactStatus *trans; |
505 | 0 | PgStat_TableStatus *tabentry = NULL; |
506 | 0 | PgStat_TableStatus *tablestatus = NULL; |
507 | |
|
508 | 0 | entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_RELATION, MyDatabaseId, rel_id); |
509 | 0 | if (!entry_ref) |
510 | 0 | { |
511 | 0 | entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_RELATION, InvalidOid, rel_id); |
512 | 0 | if (!entry_ref) |
513 | 0 | return tablestatus; |
514 | 0 | } |
515 | | |
516 | 0 | tabentry = (PgStat_TableStatus *) entry_ref->pending; |
517 | 0 | tablestatus = palloc(sizeof(PgStat_TableStatus)); |
518 | 0 | *tablestatus = *tabentry; |
519 | | |
520 | | /* |
521 | | * Reset tablestatus->trans in the copy of PgStat_TableStatus as it may |
522 | | * point to a shared memory area. Its data is saved below, so removing it |
523 | | * does not matter. |
524 | | */ |
525 | 0 | tablestatus->trans = NULL; |
526 | | |
527 | | /* |
528 | | * Live subtransaction counts are not included yet. This is not a hot |
529 | | * code path so reconcile tuples_inserted, tuples_updated and |
530 | | * tuples_deleted even if the caller may not be interested in this data. |
531 | | */ |
532 | 0 | for (trans = tabentry->trans; trans != NULL; trans = trans->upper) |
533 | 0 | { |
534 | 0 | tablestatus->counts.tuples_inserted += trans->tuples_inserted; |
535 | 0 | tablestatus->counts.tuples_updated += trans->tuples_updated; |
536 | 0 | tablestatus->counts.tuples_deleted += trans->tuples_deleted; |
537 | 0 | } |
538 | |
|
539 | 0 | return tablestatus; |
540 | 0 | } |
541 | | |
542 | | /* |
543 | | * Perform relation stats specific end-of-transaction work. Helper for |
544 | | * AtEOXact_PgStat. |
545 | | * |
546 | | * Transfer transactional insert/update counts into the base tabstat entries. |
547 | | * We don't bother to free any of the transactional state, since it's all in |
548 | | * TopTransactionContext and will go away anyway. |
549 | | */ |
550 | | void |
551 | | AtEOXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit) |
552 | 0 | { |
553 | 0 | PgStat_TableXactStatus *trans; |
554 | |
|
555 | 0 | for (trans = xact_state->first; trans != NULL; trans = trans->next) |
556 | 0 | { |
557 | 0 | PgStat_TableStatus *tabstat; |
558 | |
|
559 | 0 | Assert(trans->nest_level == 1); |
560 | 0 | Assert(trans->upper == NULL); |
561 | 0 | tabstat = trans->parent; |
562 | 0 | Assert(tabstat->trans == trans); |
563 | | /* restore pre-truncate/drop stats (if any) in case of aborted xact */ |
564 | 0 | if (!isCommit) |
565 | 0 | restore_truncdrop_counters(trans); |
566 | | /* count attempted actions regardless of commit/abort */ |
567 | 0 | tabstat->counts.tuples_inserted += trans->tuples_inserted; |
568 | 0 | tabstat->counts.tuples_updated += trans->tuples_updated; |
569 | 0 | tabstat->counts.tuples_deleted += trans->tuples_deleted; |
570 | 0 | if (isCommit) |
571 | 0 | { |
572 | 0 | tabstat->counts.truncdropped = trans->truncdropped; |
573 | 0 | if (trans->truncdropped) |
574 | 0 | { |
575 | | /* forget live/dead stats seen by backend thus far */ |
576 | 0 | tabstat->counts.delta_live_tuples = 0; |
577 | 0 | tabstat->counts.delta_dead_tuples = 0; |
578 | 0 | } |
579 | | /* insert adds a live tuple, delete removes one */ |
580 | 0 | tabstat->counts.delta_live_tuples += |
581 | 0 | trans->tuples_inserted - trans->tuples_deleted; |
582 | | /* update and delete each create a dead tuple */ |
583 | 0 | tabstat->counts.delta_dead_tuples += |
584 | 0 | trans->tuples_updated + trans->tuples_deleted; |
585 | | /* insert, update, delete each count as one change event */ |
586 | 0 | tabstat->counts.changed_tuples += |
587 | 0 | trans->tuples_inserted + trans->tuples_updated + |
588 | 0 | trans->tuples_deleted; |
589 | 0 | } |
590 | 0 | else |
591 | 0 | { |
592 | | /* inserted tuples are dead, deleted tuples are unaffected */ |
593 | 0 | tabstat->counts.delta_dead_tuples += |
594 | 0 | trans->tuples_inserted + trans->tuples_updated; |
595 | | /* an aborted xact generates no changed_tuple events */ |
596 | 0 | } |
597 | 0 | tabstat->trans = NULL; |
598 | 0 | } |
599 | 0 | } |
600 | | |
601 | | /* |
602 | | * Perform relation stats specific end-of-sub-transaction work. Helper for |
603 | | * AtEOSubXact_PgStat. |
604 | | * |
605 | | * Transfer transactional insert/update counts into the next higher |
606 | | * subtransaction state. |
607 | | */ |
608 | | void |
609 | | AtEOSubXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit, int nestDepth) |
610 | 0 | { |
611 | 0 | PgStat_TableXactStatus *trans; |
612 | 0 | PgStat_TableXactStatus *next_trans; |
613 | |
|
614 | 0 | for (trans = xact_state->first; trans != NULL; trans = next_trans) |
615 | 0 | { |
616 | 0 | PgStat_TableStatus *tabstat; |
617 | |
|
618 | 0 | next_trans = trans->next; |
619 | 0 | Assert(trans->nest_level == nestDepth); |
620 | 0 | tabstat = trans->parent; |
621 | 0 | Assert(tabstat->trans == trans); |
622 | |
|
623 | 0 | if (isCommit) |
624 | 0 | { |
625 | 0 | if (trans->upper && trans->upper->nest_level == nestDepth - 1) |
626 | 0 | { |
627 | 0 | if (trans->truncdropped) |
628 | 0 | { |
629 | | /* propagate the truncate/drop status one level up */ |
630 | 0 | save_truncdrop_counters(trans->upper, false); |
631 | | /* replace upper xact stats with ours */ |
632 | 0 | trans->upper->tuples_inserted = trans->tuples_inserted; |
633 | 0 | trans->upper->tuples_updated = trans->tuples_updated; |
634 | 0 | trans->upper->tuples_deleted = trans->tuples_deleted; |
635 | 0 | } |
636 | 0 | else |
637 | 0 | { |
638 | 0 | trans->upper->tuples_inserted += trans->tuples_inserted; |
639 | 0 | trans->upper->tuples_updated += trans->tuples_updated; |
640 | 0 | trans->upper->tuples_deleted += trans->tuples_deleted; |
641 | 0 | } |
642 | 0 | tabstat->trans = trans->upper; |
643 | 0 | pfree(trans); |
644 | 0 | } |
645 | 0 | else |
646 | 0 | { |
647 | | /* |
648 | | * When there isn't an immediate parent state, we can just |
649 | | * reuse the record instead of going through a palloc/pfree |
650 | | * pushup (this works since it's all in TopTransactionContext |
651 | | * anyway). We have to re-link it into the parent level, |
652 | | * though, and that might mean pushing a new entry into the |
653 | | * pgStatXactStack. |
654 | | */ |
655 | 0 | PgStat_SubXactStatus *upper_xact_state; |
656 | |
|
657 | 0 | upper_xact_state = pgstat_get_xact_stack_level(nestDepth - 1); |
658 | 0 | trans->next = upper_xact_state->first; |
659 | 0 | upper_xact_state->first = trans; |
660 | 0 | trans->nest_level = nestDepth - 1; |
661 | 0 | } |
662 | 0 | } |
663 | 0 | else |
664 | 0 | { |
665 | | /* |
666 | | * On abort, update top-level tabstat counts, then forget the |
667 | | * subtransaction |
668 | | */ |
669 | | |
670 | | /* first restore values obliterated by truncate/drop */ |
671 | 0 | restore_truncdrop_counters(trans); |
672 | | /* count attempted actions regardless of commit/abort */ |
673 | 0 | tabstat->counts.tuples_inserted += trans->tuples_inserted; |
674 | 0 | tabstat->counts.tuples_updated += trans->tuples_updated; |
675 | 0 | tabstat->counts.tuples_deleted += trans->tuples_deleted; |
676 | | /* inserted tuples are dead, deleted tuples are unaffected */ |
677 | 0 | tabstat->counts.delta_dead_tuples += |
678 | 0 | trans->tuples_inserted + trans->tuples_updated; |
679 | 0 | tabstat->trans = trans->upper; |
680 | 0 | pfree(trans); |
681 | 0 | } |
682 | 0 | } |
683 | 0 | } |
684 | | |
685 | | /* |
686 | | * Generate 2PC records for all the pending transaction-dependent relation |
687 | | * stats. |
688 | | */ |
689 | | void |
690 | | AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state) |
691 | 0 | { |
692 | 0 | PgStat_TableXactStatus *trans; |
693 | |
|
694 | 0 | for (trans = xact_state->first; trans != NULL; trans = trans->next) |
695 | 0 | { |
696 | 0 | PgStat_TableStatus *tabstat PG_USED_FOR_ASSERTS_ONLY; |
697 | 0 | TwoPhasePgStatRecord record; |
698 | |
|
699 | 0 | Assert(trans->nest_level == 1); |
700 | 0 | Assert(trans->upper == NULL); |
701 | 0 | tabstat = trans->parent; |
702 | 0 | Assert(tabstat->trans == trans); |
703 | |
|
704 | 0 | record.tuples_inserted = trans->tuples_inserted; |
705 | 0 | record.tuples_updated = trans->tuples_updated; |
706 | 0 | record.tuples_deleted = trans->tuples_deleted; |
707 | 0 | record.inserted_pre_truncdrop = trans->inserted_pre_truncdrop; |
708 | 0 | record.updated_pre_truncdrop = trans->updated_pre_truncdrop; |
709 | 0 | record.deleted_pre_truncdrop = trans->deleted_pre_truncdrop; |
710 | 0 | record.id = tabstat->id; |
711 | 0 | record.shared = tabstat->shared; |
712 | 0 | record.truncdropped = trans->truncdropped; |
713 | |
|
714 | 0 | RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0, |
715 | 0 | &record, sizeof(TwoPhasePgStatRecord)); |
716 | 0 | } |
717 | 0 | } |
718 | | |
719 | | /* |
720 | | * All we need do here is unlink the transaction stats state from the |
721 | | * nontransactional state. The nontransactional action counts will be |
722 | | * reported to the stats system immediately, while the effects on live and |
723 | | * dead tuple counts are preserved in the 2PC state file. |
724 | | * |
725 | | * Note: AtEOXact_PgStat_Relations is not called during PREPARE. |
726 | | */ |
727 | | void |
728 | | PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state) |
729 | 0 | { |
730 | 0 | PgStat_TableXactStatus *trans; |
731 | |
|
732 | 0 | for (trans = xact_state->first; trans != NULL; trans = trans->next) |
733 | 0 | { |
734 | 0 | PgStat_TableStatus *tabstat; |
735 | |
|
736 | 0 | tabstat = trans->parent; |
737 | 0 | tabstat->trans = NULL; |
738 | 0 | } |
739 | 0 | } |
740 | | |
741 | | /* |
742 | | * 2PC processing routine for COMMIT PREPARED case. |
743 | | * |
744 | | * Load the saved counts into our local pgstats state. |
745 | | */ |
746 | | void |
747 | | pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info, |
748 | | void *recdata, uint32 len) |
749 | 0 | { |
750 | 0 | TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; |
751 | 0 | PgStat_TableStatus *pgstat_info; |
752 | | |
753 | | /* Find or create a tabstat entry for the rel */ |
754 | 0 | pgstat_info = pgstat_prep_relation_pending(rec->id, rec->shared); |
755 | | |
756 | | /* Same math as in AtEOXact_PgStat, commit case */ |
757 | 0 | pgstat_info->counts.tuples_inserted += rec->tuples_inserted; |
758 | 0 | pgstat_info->counts.tuples_updated += rec->tuples_updated; |
759 | 0 | pgstat_info->counts.tuples_deleted += rec->tuples_deleted; |
760 | 0 | pgstat_info->counts.truncdropped = rec->truncdropped; |
761 | 0 | if (rec->truncdropped) |
762 | 0 | { |
763 | | /* forget live/dead stats seen by backend thus far */ |
764 | 0 | pgstat_info->counts.delta_live_tuples = 0; |
765 | 0 | pgstat_info->counts.delta_dead_tuples = 0; |
766 | 0 | } |
767 | 0 | pgstat_info->counts.delta_live_tuples += |
768 | 0 | rec->tuples_inserted - rec->tuples_deleted; |
769 | 0 | pgstat_info->counts.delta_dead_tuples += |
770 | 0 | rec->tuples_updated + rec->tuples_deleted; |
771 | 0 | pgstat_info->counts.changed_tuples += |
772 | 0 | rec->tuples_inserted + rec->tuples_updated + |
773 | 0 | rec->tuples_deleted; |
774 | 0 | } |
775 | | |
776 | | /* |
777 | | * 2PC processing routine for ROLLBACK PREPARED case. |
778 | | * |
779 | | * Load the saved counts into our local pgstats state, but treat them |
780 | | * as aborted. |
781 | | */ |
782 | | void |
783 | | pgstat_twophase_postabort(FullTransactionId fxid, uint16 info, |
784 | | void *recdata, uint32 len) |
785 | 0 | { |
786 | 0 | TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; |
787 | 0 | PgStat_TableStatus *pgstat_info; |
788 | | |
789 | | /* Find or create a tabstat entry for the rel */ |
790 | 0 | pgstat_info = pgstat_prep_relation_pending(rec->id, rec->shared); |
791 | | |
792 | | /* Same math as in AtEOXact_PgStat, abort case */ |
793 | 0 | if (rec->truncdropped) |
794 | 0 | { |
795 | 0 | rec->tuples_inserted = rec->inserted_pre_truncdrop; |
796 | 0 | rec->tuples_updated = rec->updated_pre_truncdrop; |
797 | 0 | rec->tuples_deleted = rec->deleted_pre_truncdrop; |
798 | 0 | } |
799 | 0 | pgstat_info->counts.tuples_inserted += rec->tuples_inserted; |
800 | 0 | pgstat_info->counts.tuples_updated += rec->tuples_updated; |
801 | 0 | pgstat_info->counts.tuples_deleted += rec->tuples_deleted; |
802 | 0 | pgstat_info->counts.delta_dead_tuples += |
803 | 0 | rec->tuples_inserted + rec->tuples_updated; |
804 | 0 | } |
805 | | |
806 | | /* |
807 | | * Flush out pending stats for the entry |
808 | | * |
809 | | * If nowait is true and the lock could not be immediately acquired, returns |
810 | | * false without flushing the entry. Otherwise returns true. |
811 | | * |
812 | | * Some of the stats are copied to the corresponding pending database stats |
813 | | * entry when successfully flushing. |
814 | | */ |
815 | | bool |
816 | | pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) |
817 | 0 | { |
818 | 0 | Oid dboid; |
819 | 0 | PgStat_TableStatus *lstats; /* pending stats entry */ |
820 | 0 | PgStatShared_Relation *shtabstats; |
821 | 0 | PgStat_StatTabEntry *tabentry; /* table entry of shared stats */ |
822 | 0 | PgStat_StatDBEntry *dbentry; /* pending database entry */ |
823 | |
|
824 | 0 | dboid = entry_ref->shared_entry->key.dboid; |
825 | 0 | lstats = (PgStat_TableStatus *) entry_ref->pending; |
826 | 0 | shtabstats = (PgStatShared_Relation *) entry_ref->shared_stats; |
827 | | |
828 | | /* |
829 | | * Ignore entries that didn't accumulate any actual counts, such as |
830 | | * indexes that were opened by the planner but not used. |
831 | | */ |
832 | 0 | if (pg_memory_is_all_zeros(&lstats->counts, |
833 | 0 | sizeof(struct PgStat_TableCounts))) |
834 | 0 | return true; |
835 | | |
836 | 0 | if (!pgstat_lock_entry(entry_ref, nowait)) |
837 | 0 | return false; |
838 | | |
839 | | /* add the values to the shared entry. */ |
840 | 0 | tabentry = &shtabstats->stats; |
841 | |
|
842 | 0 | tabentry->numscans += lstats->counts.numscans; |
843 | 0 | if (lstats->counts.numscans) |
844 | 0 | { |
845 | 0 | TimestampTz t = GetCurrentTransactionStopTimestamp(); |
846 | |
|
847 | 0 | if (t > tabentry->lastscan) |
848 | 0 | tabentry->lastscan = t; |
849 | 0 | } |
850 | 0 | tabentry->tuples_returned += lstats->counts.tuples_returned; |
851 | 0 | tabentry->tuples_fetched += lstats->counts.tuples_fetched; |
852 | 0 | tabentry->tuples_inserted += lstats->counts.tuples_inserted; |
853 | 0 | tabentry->tuples_updated += lstats->counts.tuples_updated; |
854 | 0 | tabentry->tuples_deleted += lstats->counts.tuples_deleted; |
855 | 0 | tabentry->tuples_hot_updated += lstats->counts.tuples_hot_updated; |
856 | 0 | tabentry->tuples_newpage_updated += lstats->counts.tuples_newpage_updated; |
857 | | |
858 | | /* |
859 | | * If table was truncated/dropped, first reset the live/dead counters. |
860 | | */ |
861 | 0 | if (lstats->counts.truncdropped) |
862 | 0 | { |
863 | 0 | tabentry->live_tuples = 0; |
864 | 0 | tabentry->dead_tuples = 0; |
865 | 0 | tabentry->ins_since_vacuum = 0; |
866 | 0 | } |
867 | |
|
868 | 0 | tabentry->live_tuples += lstats->counts.delta_live_tuples; |
869 | 0 | tabentry->dead_tuples += lstats->counts.delta_dead_tuples; |
870 | 0 | tabentry->mod_since_analyze += lstats->counts.changed_tuples; |
871 | | |
872 | | /* |
873 | | * Using tuples_inserted to update ins_since_vacuum does mean that we'll |
874 | | * track aborted inserts too. This isn't ideal, but otherwise probably |
875 | | * not worth adding an extra field for. It may just amount to autovacuums |
876 | | * triggering for inserts more often than they maybe should, which is |
877 | | * probably not going to be common enough to be too concerned about here. |
878 | | */ |
879 | 0 | tabentry->ins_since_vacuum += lstats->counts.tuples_inserted; |
880 | |
|
881 | 0 | tabentry->blocks_fetched += lstats->counts.blocks_fetched; |
882 | 0 | tabentry->blocks_hit += lstats->counts.blocks_hit; |
883 | | |
884 | | /* Clamp live_tuples in case of negative delta_live_tuples */ |
885 | 0 | tabentry->live_tuples = Max(tabentry->live_tuples, 0); |
886 | | /* Likewise for dead_tuples */ |
887 | 0 | tabentry->dead_tuples = Max(tabentry->dead_tuples, 0); |
888 | |
|
889 | 0 | pgstat_unlock_entry(entry_ref); |
890 | | |
891 | | /* The entry was successfully flushed, add the same to database stats */ |
892 | 0 | dbentry = pgstat_prep_database_pending(dboid); |
893 | 0 | dbentry->tuples_returned += lstats->counts.tuples_returned; |
894 | 0 | dbentry->tuples_fetched += lstats->counts.tuples_fetched; |
895 | 0 | dbentry->tuples_inserted += lstats->counts.tuples_inserted; |
896 | 0 | dbentry->tuples_updated += lstats->counts.tuples_updated; |
897 | 0 | dbentry->tuples_deleted += lstats->counts.tuples_deleted; |
898 | 0 | dbentry->blocks_fetched += lstats->counts.blocks_fetched; |
899 | 0 | dbentry->blocks_hit += lstats->counts.blocks_hit; |
900 | |
|
901 | 0 | return true; |
902 | 0 | } |
903 | | |
904 | | void |
905 | | pgstat_relation_delete_pending_cb(PgStat_EntryRef *entry_ref) |
906 | 0 | { |
907 | 0 | PgStat_TableStatus *pending = (PgStat_TableStatus *) entry_ref->pending; |
908 | |
|
909 | 0 | if (pending->relation) |
910 | 0 | pgstat_unlink_relation(pending->relation); |
911 | 0 | } |
912 | | |
913 | | void |
914 | | pgstat_relation_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts) |
915 | 0 | { |
916 | 0 | ((PgStatShared_Relation *) header)->stats.stat_reset_time = ts; |
917 | 0 | } |
918 | | |
919 | | /* |
920 | | * Find or create a PgStat_TableStatus entry for rel. New entry is created and |
921 | | * initialized if not exists. |
922 | | */ |
923 | | static PgStat_TableStatus * |
924 | | pgstat_prep_relation_pending(Oid rel_id, bool isshared) |
925 | 0 | { |
926 | 0 | PgStat_EntryRef *entry_ref; |
927 | 0 | PgStat_TableStatus *pending; |
928 | |
|
929 | 0 | entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_RELATION, |
930 | 0 | isshared ? InvalidOid : MyDatabaseId, |
931 | 0 | rel_id, NULL); |
932 | 0 | pending = entry_ref->pending; |
933 | 0 | pending->id = rel_id; |
934 | 0 | pending->shared = isshared; |
935 | |
|
936 | 0 | return pending; |
937 | 0 | } |
938 | | |
939 | | /* |
940 | | * add a new (sub)transaction state record |
941 | | */ |
942 | | static void |
943 | | add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level) |
944 | 0 | { |
945 | 0 | PgStat_SubXactStatus *xact_state; |
946 | 0 | PgStat_TableXactStatus *trans; |
947 | | |
948 | | /* |
949 | | * If this is the first rel to be modified at the current nest level, we |
950 | | * first have to push a transaction stack entry. |
951 | | */ |
952 | 0 | xact_state = pgstat_get_xact_stack_level(nest_level); |
953 | | |
954 | | /* Now make a per-table stack entry */ |
955 | 0 | trans = (PgStat_TableXactStatus *) |
956 | 0 | MemoryContextAllocZero(TopTransactionContext, |
957 | 0 | sizeof(PgStat_TableXactStatus)); |
958 | 0 | trans->nest_level = nest_level; |
959 | 0 | trans->upper = pgstat_info->trans; |
960 | 0 | trans->parent = pgstat_info; |
961 | 0 | trans->next = xact_state->first; |
962 | 0 | xact_state->first = trans; |
963 | 0 | pgstat_info->trans = trans; |
964 | 0 | } |
965 | | |
966 | | /* |
967 | | * Add a new (sub)transaction record if needed. |
968 | | */ |
969 | | static void |
970 | | ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info) |
971 | 0 | { |
972 | 0 | int nest_level = GetCurrentTransactionNestLevel(); |
973 | |
|
974 | 0 | if (pgstat_info->trans == NULL || |
975 | 0 | pgstat_info->trans->nest_level != nest_level) |
976 | 0 | add_tabstat_xact_level(pgstat_info, nest_level); |
977 | 0 | } |
978 | | |
979 | | /* |
980 | | * Whenever a table is truncated/dropped, we save its i/u/d counters so that |
981 | | * they can be cleared, and if the (sub)xact that executed the truncate/drop |
982 | | * later aborts, the counters can be restored to the saved (pre-truncate/drop) |
983 | | * values. |
984 | | * |
985 | | * Note that for truncate we do this on the first truncate in any particular |
986 | | * subxact level only. |
987 | | */ |
988 | | static void |
989 | | save_truncdrop_counters(PgStat_TableXactStatus *trans, bool is_drop) |
990 | 0 | { |
991 | 0 | if (!trans->truncdropped || is_drop) |
992 | 0 | { |
993 | 0 | trans->inserted_pre_truncdrop = trans->tuples_inserted; |
994 | 0 | trans->updated_pre_truncdrop = trans->tuples_updated; |
995 | 0 | trans->deleted_pre_truncdrop = trans->tuples_deleted; |
996 | 0 | trans->truncdropped = true; |
997 | 0 | } |
998 | 0 | } |
999 | | |
1000 | | /* |
1001 | | * restore counters when a truncate aborts |
1002 | | */ |
1003 | | static void |
1004 | | restore_truncdrop_counters(PgStat_TableXactStatus *trans) |
1005 | 0 | { |
1006 | 0 | if (trans->truncdropped) |
1007 | 0 | { |
1008 | 0 | trans->tuples_inserted = trans->inserted_pre_truncdrop; |
1009 | 0 | trans->tuples_updated = trans->updated_pre_truncdrop; |
1010 | 0 | trans->tuples_deleted = trans->deleted_pre_truncdrop; |
1011 | 0 | } |
1012 | 0 | } |