/src/postgres/src/backend/executor/instrument.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * instrument.c |
4 | | * functions for instrumentation of plan execution |
5 | | * |
6 | | * |
7 | | * Copyright (c) 2001-2025, PostgreSQL Global Development Group |
8 | | * |
9 | | * IDENTIFICATION |
10 | | * src/backend/executor/instrument.c |
11 | | * |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | #include "postgres.h" |
15 | | |
16 | | #include <unistd.h> |
17 | | |
18 | | #include "executor/instrument.h" |
19 | | |
20 | | BufferUsage pgBufferUsage; |
21 | | static BufferUsage save_pgBufferUsage; |
22 | | WalUsage pgWalUsage; |
23 | | static WalUsage save_pgWalUsage; |
24 | | |
25 | | static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add); |
26 | | static void WalUsageAdd(WalUsage *dst, WalUsage *add); |
27 | | |
28 | | |
29 | | /* Allocate new instrumentation structure(s) */ |
30 | | Instrumentation * |
31 | | InstrAlloc(int n, int instrument_options, bool async_mode) |
32 | 0 | { |
33 | 0 | Instrumentation *instr; |
34 | | |
35 | | /* initialize all fields to zeroes, then modify as needed */ |
36 | 0 | instr = palloc0(n * sizeof(Instrumentation)); |
37 | 0 | if (instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_TIMER | INSTRUMENT_WAL)) |
38 | 0 | { |
39 | 0 | bool need_buffers = (instrument_options & INSTRUMENT_BUFFERS) != 0; |
40 | 0 | bool need_wal = (instrument_options & INSTRUMENT_WAL) != 0; |
41 | 0 | bool need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; |
42 | 0 | int i; |
43 | |
|
44 | 0 | for (i = 0; i < n; i++) |
45 | 0 | { |
46 | 0 | instr[i].need_bufusage = need_buffers; |
47 | 0 | instr[i].need_walusage = need_wal; |
48 | 0 | instr[i].need_timer = need_timer; |
49 | 0 | instr[i].async_mode = async_mode; |
50 | 0 | } |
51 | 0 | } |
52 | |
|
53 | 0 | return instr; |
54 | 0 | } |
55 | | |
56 | | /* Initialize a pre-allocated instrumentation structure. */ |
57 | | void |
58 | | InstrInit(Instrumentation *instr, int instrument_options) |
59 | 0 | { |
60 | 0 | memset(instr, 0, sizeof(Instrumentation)); |
61 | 0 | instr->need_bufusage = (instrument_options & INSTRUMENT_BUFFERS) != 0; |
62 | 0 | instr->need_walusage = (instrument_options & INSTRUMENT_WAL) != 0; |
63 | 0 | instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; |
64 | 0 | } |
65 | | |
66 | | /* Entry to a plan node */ |
67 | | void |
68 | | InstrStartNode(Instrumentation *instr) |
69 | 0 | { |
70 | 0 | if (instr->need_timer && |
71 | 0 | !INSTR_TIME_SET_CURRENT_LAZY(instr->starttime)) |
72 | 0 | elog(ERROR, "InstrStartNode called twice in a row"); |
73 | | |
74 | | /* save buffer usage totals at node entry, if needed */ |
75 | 0 | if (instr->need_bufusage) |
76 | 0 | instr->bufusage_start = pgBufferUsage; |
77 | |
|
78 | 0 | if (instr->need_walusage) |
79 | 0 | instr->walusage_start = pgWalUsage; |
80 | 0 | } |
81 | | |
82 | | /* Exit from a plan node */ |
83 | | void |
84 | | InstrStopNode(Instrumentation *instr, double nTuples) |
85 | 0 | { |
86 | 0 | double save_tuplecount = instr->tuplecount; |
87 | 0 | instr_time endtime; |
88 | | |
89 | | /* count the returned tuples */ |
90 | 0 | instr->tuplecount += nTuples; |
91 | | |
92 | | /* let's update the time only if the timer was requested */ |
93 | 0 | if (instr->need_timer) |
94 | 0 | { |
95 | 0 | if (INSTR_TIME_IS_ZERO(instr->starttime)) |
96 | 0 | elog(ERROR, "InstrStopNode called without start"); |
97 | | |
98 | 0 | INSTR_TIME_SET_CURRENT(endtime); |
99 | 0 | INSTR_TIME_ACCUM_DIFF(instr->counter, endtime, instr->starttime); |
100 | |
|
101 | 0 | INSTR_TIME_SET_ZERO(instr->starttime); |
102 | 0 | } |
103 | | |
104 | | /* Add delta of buffer usage since entry to node's totals */ |
105 | 0 | if (instr->need_bufusage) |
106 | 0 | BufferUsageAccumDiff(&instr->bufusage, |
107 | 0 | &pgBufferUsage, &instr->bufusage_start); |
108 | |
|
109 | 0 | if (instr->need_walusage) |
110 | 0 | WalUsageAccumDiff(&instr->walusage, |
111 | 0 | &pgWalUsage, &instr->walusage_start); |
112 | | |
113 | | /* Is this the first tuple of this cycle? */ |
114 | 0 | if (!instr->running) |
115 | 0 | { |
116 | 0 | instr->running = true; |
117 | 0 | instr->firsttuple = INSTR_TIME_GET_DOUBLE(instr->counter); |
118 | 0 | } |
119 | 0 | else |
120 | 0 | { |
121 | | /* |
122 | | * In async mode, if the plan node hadn't emitted any tuples before, |
123 | | * this might be the first tuple |
124 | | */ |
125 | 0 | if (instr->async_mode && save_tuplecount < 1.0) |
126 | 0 | instr->firsttuple = INSTR_TIME_GET_DOUBLE(instr->counter); |
127 | 0 | } |
128 | 0 | } |
129 | | |
130 | | /* Update tuple count */ |
131 | | void |
132 | | InstrUpdateTupleCount(Instrumentation *instr, double nTuples) |
133 | 0 | { |
134 | | /* count the returned tuples */ |
135 | 0 | instr->tuplecount += nTuples; |
136 | 0 | } |
137 | | |
138 | | /* Finish a run cycle for a plan node */ |
139 | | void |
140 | | InstrEndLoop(Instrumentation *instr) |
141 | 0 | { |
142 | 0 | double totaltime; |
143 | | |
144 | | /* Skip if nothing has happened, or already shut down */ |
145 | 0 | if (!instr->running) |
146 | 0 | return; |
147 | | |
148 | 0 | if (!INSTR_TIME_IS_ZERO(instr->starttime)) |
149 | 0 | elog(ERROR, "InstrEndLoop called on running node"); |
150 | | |
151 | | /* Accumulate per-cycle statistics into totals */ |
152 | 0 | totaltime = INSTR_TIME_GET_DOUBLE(instr->counter); |
153 | |
|
154 | 0 | instr->startup += instr->firsttuple; |
155 | 0 | instr->total += totaltime; |
156 | 0 | instr->ntuples += instr->tuplecount; |
157 | 0 | instr->nloops += 1; |
158 | | |
159 | | /* Reset for next cycle (if any) */ |
160 | 0 | instr->running = false; |
161 | 0 | INSTR_TIME_SET_ZERO(instr->starttime); |
162 | 0 | INSTR_TIME_SET_ZERO(instr->counter); |
163 | 0 | instr->firsttuple = 0; |
164 | 0 | instr->tuplecount = 0; |
165 | 0 | } |
166 | | |
167 | | /* aggregate instrumentation information */ |
168 | | void |
169 | | InstrAggNode(Instrumentation *dst, Instrumentation *add) |
170 | 0 | { |
171 | 0 | if (!dst->running && add->running) |
172 | 0 | { |
173 | 0 | dst->running = true; |
174 | 0 | dst->firsttuple = add->firsttuple; |
175 | 0 | } |
176 | 0 | else if (dst->running && add->running && dst->firsttuple > add->firsttuple) |
177 | 0 | dst->firsttuple = add->firsttuple; |
178 | |
|
179 | 0 | INSTR_TIME_ADD(dst->counter, add->counter); |
180 | |
|
181 | 0 | dst->tuplecount += add->tuplecount; |
182 | 0 | dst->startup += add->startup; |
183 | 0 | dst->total += add->total; |
184 | 0 | dst->ntuples += add->ntuples; |
185 | 0 | dst->ntuples2 += add->ntuples2; |
186 | 0 | dst->nloops += add->nloops; |
187 | 0 | dst->nfiltered1 += add->nfiltered1; |
188 | 0 | dst->nfiltered2 += add->nfiltered2; |
189 | | |
190 | | /* Add delta of buffer usage since entry to node's totals */ |
191 | 0 | if (dst->need_bufusage) |
192 | 0 | BufferUsageAdd(&dst->bufusage, &add->bufusage); |
193 | |
|
194 | 0 | if (dst->need_walusage) |
195 | 0 | WalUsageAdd(&dst->walusage, &add->walusage); |
196 | 0 | } |
197 | | |
198 | | /* note current values during parallel executor startup */ |
199 | | void |
200 | | InstrStartParallelQuery(void) |
201 | 0 | { |
202 | 0 | save_pgBufferUsage = pgBufferUsage; |
203 | 0 | save_pgWalUsage = pgWalUsage; |
204 | 0 | } |
205 | | |
206 | | /* report usage after parallel executor shutdown */ |
207 | | void |
208 | | InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage) |
209 | 0 | { |
210 | 0 | memset(bufusage, 0, sizeof(BufferUsage)); |
211 | 0 | BufferUsageAccumDiff(bufusage, &pgBufferUsage, &save_pgBufferUsage); |
212 | 0 | memset(walusage, 0, sizeof(WalUsage)); |
213 | 0 | WalUsageAccumDiff(walusage, &pgWalUsage, &save_pgWalUsage); |
214 | 0 | } |
215 | | |
216 | | /* accumulate work done by workers in leader's stats */ |
217 | | void |
218 | | InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage) |
219 | 0 | { |
220 | 0 | BufferUsageAdd(&pgBufferUsage, bufusage); |
221 | 0 | WalUsageAdd(&pgWalUsage, walusage); |
222 | 0 | } |
223 | | |
224 | | /* dst += add */ |
225 | | static void |
226 | | BufferUsageAdd(BufferUsage *dst, const BufferUsage *add) |
227 | 0 | { |
228 | 0 | dst->shared_blks_hit += add->shared_blks_hit; |
229 | 0 | dst->shared_blks_read += add->shared_blks_read; |
230 | 0 | dst->shared_blks_dirtied += add->shared_blks_dirtied; |
231 | 0 | dst->shared_blks_written += add->shared_blks_written; |
232 | 0 | dst->local_blks_hit += add->local_blks_hit; |
233 | 0 | dst->local_blks_read += add->local_blks_read; |
234 | 0 | dst->local_blks_dirtied += add->local_blks_dirtied; |
235 | 0 | dst->local_blks_written += add->local_blks_written; |
236 | 0 | dst->temp_blks_read += add->temp_blks_read; |
237 | 0 | dst->temp_blks_written += add->temp_blks_written; |
238 | 0 | INSTR_TIME_ADD(dst->shared_blk_read_time, add->shared_blk_read_time); |
239 | 0 | INSTR_TIME_ADD(dst->shared_blk_write_time, add->shared_blk_write_time); |
240 | 0 | INSTR_TIME_ADD(dst->local_blk_read_time, add->local_blk_read_time); |
241 | 0 | INSTR_TIME_ADD(dst->local_blk_write_time, add->local_blk_write_time); |
242 | 0 | INSTR_TIME_ADD(dst->temp_blk_read_time, add->temp_blk_read_time); |
243 | 0 | INSTR_TIME_ADD(dst->temp_blk_write_time, add->temp_blk_write_time); |
244 | 0 | } |
245 | | |
246 | | /* dst += add - sub */ |
247 | | void |
248 | | BufferUsageAccumDiff(BufferUsage *dst, |
249 | | const BufferUsage *add, |
250 | | const BufferUsage *sub) |
251 | 0 | { |
252 | 0 | dst->shared_blks_hit += add->shared_blks_hit - sub->shared_blks_hit; |
253 | 0 | dst->shared_blks_read += add->shared_blks_read - sub->shared_blks_read; |
254 | 0 | dst->shared_blks_dirtied += add->shared_blks_dirtied - sub->shared_blks_dirtied; |
255 | 0 | dst->shared_blks_written += add->shared_blks_written - sub->shared_blks_written; |
256 | 0 | dst->local_blks_hit += add->local_blks_hit - sub->local_blks_hit; |
257 | 0 | dst->local_blks_read += add->local_blks_read - sub->local_blks_read; |
258 | 0 | dst->local_blks_dirtied += add->local_blks_dirtied - sub->local_blks_dirtied; |
259 | 0 | dst->local_blks_written += add->local_blks_written - sub->local_blks_written; |
260 | 0 | dst->temp_blks_read += add->temp_blks_read - sub->temp_blks_read; |
261 | 0 | dst->temp_blks_written += add->temp_blks_written - sub->temp_blks_written; |
262 | 0 | INSTR_TIME_ACCUM_DIFF(dst->shared_blk_read_time, |
263 | 0 | add->shared_blk_read_time, sub->shared_blk_read_time); |
264 | 0 | INSTR_TIME_ACCUM_DIFF(dst->shared_blk_write_time, |
265 | 0 | add->shared_blk_write_time, sub->shared_blk_write_time); |
266 | 0 | INSTR_TIME_ACCUM_DIFF(dst->local_blk_read_time, |
267 | 0 | add->local_blk_read_time, sub->local_blk_read_time); |
268 | 0 | INSTR_TIME_ACCUM_DIFF(dst->local_blk_write_time, |
269 | 0 | add->local_blk_write_time, sub->local_blk_write_time); |
270 | 0 | INSTR_TIME_ACCUM_DIFF(dst->temp_blk_read_time, |
271 | 0 | add->temp_blk_read_time, sub->temp_blk_read_time); |
272 | 0 | INSTR_TIME_ACCUM_DIFF(dst->temp_blk_write_time, |
273 | 0 | add->temp_blk_write_time, sub->temp_blk_write_time); |
274 | 0 | } |
275 | | |
276 | | /* helper functions for WAL usage accumulation */ |
277 | | static void |
278 | | WalUsageAdd(WalUsage *dst, WalUsage *add) |
279 | 0 | { |
280 | 0 | dst->wal_bytes += add->wal_bytes; |
281 | 0 | dst->wal_records += add->wal_records; |
282 | 0 | dst->wal_fpi += add->wal_fpi; |
283 | 0 | dst->wal_buffers_full += add->wal_buffers_full; |
284 | 0 | } |
285 | | |
286 | | void |
287 | | WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub) |
288 | 0 | { |
289 | 0 | dst->wal_bytes += add->wal_bytes - sub->wal_bytes; |
290 | 0 | dst->wal_records += add->wal_records - sub->wal_records; |
291 | 0 | dst->wal_fpi += add->wal_fpi - sub->wal_fpi; |
292 | 0 | dst->wal_buffers_full += add->wal_buffers_full - sub->wal_buffers_full; |
293 | 0 | } |