/src/postgres/src/backend/storage/ipc/procarray.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * procarray.c |
4 | | * POSTGRES process array code. |
5 | | * |
6 | | * |
7 | | * This module maintains arrays of PGPROC substructures, as well as associated |
8 | | * arrays in ProcGlobal, for all active backends. Although there are several |
9 | | * uses for this, the principal one is as a means of determining the set of |
10 | | * currently running transactions. |
11 | | * |
12 | | * Because of various subtle race conditions it is critical that a backend |
13 | | * hold the correct locks while setting or clearing its xid (in |
14 | | * ProcGlobal->xids[]/MyProc->xid). See notes in |
15 | | * src/backend/access/transam/README. |
16 | | * |
17 | | * The process arrays now also include structures representing prepared |
18 | | * transactions. The xid and subxids fields of these are valid, as are the |
19 | | * myProcLocks lists. They can be distinguished from regular backend PGPROCs |
20 | | * at need by checking for pid == 0. |
21 | | * |
22 | | * During hot standby, we also keep a list of XIDs representing transactions |
23 | | * that are known to be running on the primary (or more precisely, were running |
24 | | * as of the current point in the WAL stream). This list is kept in the |
25 | | * KnownAssignedXids array, and is updated by watching the sequence of |
26 | | * arriving XIDs. This is necessary because if we leave those XIDs out of |
27 | | * snapshots taken for standby queries, then they will appear to be already |
28 | | * complete, leading to MVCC failures. Note that in hot standby, the PGPROC |
29 | | * array represents standby processes, which by definition are not running |
30 | | * transactions that have XIDs. |
31 | | * |
32 | | * It is perhaps possible for a backend on the primary to terminate without |
33 | | * writing an abort record for its transaction. While that shouldn't really |
34 | | * happen, it would tie up KnownAssignedXids indefinitely, so we protect |
35 | | * ourselves by pruning the array when a valid list of running XIDs arrives. |
36 | | * |
37 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
38 | | * Portions Copyright (c) 1994, Regents of the University of California |
39 | | * |
40 | | * |
41 | | * IDENTIFICATION |
42 | | * src/backend/storage/ipc/procarray.c |
43 | | * |
44 | | *------------------------------------------------------------------------- |
45 | | */ |
46 | | #include "postgres.h" |
47 | | |
48 | | #include <signal.h> |
49 | | |
50 | | #include "access/subtrans.h" |
51 | | #include "access/transam.h" |
52 | | #include "access/twophase.h" |
53 | | #include "access/xact.h" |
54 | | #include "access/xlogutils.h" |
55 | | #include "catalog/catalog.h" |
56 | | #include "catalog/pg_authid.h" |
57 | | #include "miscadmin.h" |
58 | | #include "pgstat.h" |
59 | | #include "port/pg_lfind.h" |
60 | | #include "storage/proc.h" |
61 | | #include "storage/procarray.h" |
62 | | #include "utils/acl.h" |
63 | | #include "utils/builtins.h" |
64 | | #include "utils/lsyscache.h" |
65 | | #include "utils/rel.h" |
66 | | #include "utils/snapmgr.h" |
67 | | |
68 | 0 | #define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) |
69 | | |
70 | | /* Our shared memory area */ |
71 | | typedef struct ProcArrayStruct |
72 | | { |
73 | | int numProcs; /* number of valid procs entries */ |
74 | | int maxProcs; /* allocated size of procs array */ |
75 | | |
76 | | /* |
77 | | * Known assigned XIDs handling |
78 | | */ |
79 | | int maxKnownAssignedXids; /* allocated size of array */ |
80 | | int numKnownAssignedXids; /* current # of valid entries */ |
81 | | int tailKnownAssignedXids; /* index of oldest valid element */ |
82 | | int headKnownAssignedXids; /* index of newest element, + 1 */ |
83 | | |
84 | | /* |
85 | | * Highest subxid that has been removed from KnownAssignedXids array to |
86 | | * prevent overflow; or InvalidTransactionId if none. We track this for |
87 | | * similar reasons to tracking overflowing cached subxids in PGPROC |
88 | | * entries. Must hold exclusive ProcArrayLock to change this, and shared |
89 | | * lock to read it. |
90 | | */ |
91 | | TransactionId lastOverflowedXid; |
92 | | |
93 | | /* oldest xmin of any replication slot */ |
94 | | TransactionId replication_slot_xmin; |
95 | | /* oldest catalog xmin of any replication slot */ |
96 | | TransactionId replication_slot_catalog_xmin; |
97 | | |
98 | | /* indexes into allProcs[], has PROCARRAY_MAXPROCS entries */ |
99 | | int pgprocnos[FLEXIBLE_ARRAY_MEMBER]; |
100 | | } ProcArrayStruct; |
101 | | |
102 | | /* |
103 | | * State for the GlobalVisTest* family of functions. Those functions can |
104 | | * e.g. be used to decide if a deleted row can be removed without violating |
105 | | * MVCC semantics: If the deleted row's xmax is not considered to be running |
106 | | * by anyone, the row can be removed. |
107 | | * |
108 | | * To avoid slowing down GetSnapshotData(), we don't calculate a precise |
109 | | * cutoff XID while building a snapshot (looking at the frequently changing |
110 | | * xmins scales badly). Instead we compute two boundaries while building the |
111 | | * snapshot: |
112 | | * |
113 | | * 1) definitely_needed, indicating that rows deleted by XIDs >= |
114 | | * definitely_needed are definitely still visible. |
115 | | * |
116 | | * 2) maybe_needed, indicating that rows deleted by XIDs < maybe_needed can |
117 | | * definitely be removed |
118 | | * |
119 | | * When testing an XID that falls in between the two (i.e. XID >= maybe_needed |
120 | | * && XID < definitely_needed), the boundaries can be recomputed (using |
121 | | * ComputeXidHorizons()) to get a more accurate answer. This is cheaper than |
122 | | * maintaining an accurate value all the time. |
123 | | * |
124 | | * As it is not cheap to compute accurate boundaries, we limit the number of |
125 | | * times that happens in short succession. See GlobalVisTestShouldUpdate(). |
126 | | * |
127 | | * |
128 | | * There are three backend lifetime instances of this struct, optimized for |
129 | | * different types of relations. As e.g. a normal user defined table in one |
130 | | * database is inaccessible to backends connected to another database, a test |
131 | | * specific to a relation can be more aggressive than a test for a shared |
132 | | * relation. Currently we track four different states: |
133 | | * |
134 | | * 1) GlobalVisSharedRels, which only considers an XID's |
135 | | * effects visible-to-everyone if neither snapshots in any database, nor a |
136 | | * replication slot's xmin, nor a replication slot's catalog_xmin might |
137 | | * still consider XID as running. |
138 | | * |
139 | | * 2) GlobalVisCatalogRels, which only considers an XID's |
140 | | * effects visible-to-everyone if neither snapshots in the current |
141 | | * database, nor a replication slot's xmin, nor a replication slot's |
142 | | * catalog_xmin might still consider XID as running. |
143 | | * |
144 | | * I.e. the difference to GlobalVisSharedRels is that |
145 | | * snapshot in other databases are ignored. |
146 | | * |
147 | | * 3) GlobalVisDataRels, which only considers an XID's |
148 | | * effects visible-to-everyone if neither snapshots in the current |
149 | | * database, nor a replication slot's xmin consider XID as running. |
150 | | * |
151 | | * I.e. the difference to GlobalVisCatalogRels is that |
152 | | * replication slot's catalog_xmin is not taken into account. |
153 | | * |
154 | | * 4) GlobalVisTempRels, which only considers the current session, as temp |
155 | | * tables are not visible to other sessions. |
156 | | * |
157 | | * GlobalVisTestFor(relation) returns the appropriate state |
158 | | * for the relation. |
159 | | * |
160 | | * The boundaries are FullTransactionIds instead of TransactionIds to avoid |
161 | | * wraparound dangers. There e.g. would otherwise exist no procarray state to |
162 | | * prevent maybe_needed to become old enough after the GetSnapshotData() |
163 | | * call. |
164 | | * |
165 | | * The typedef is in the header. |
166 | | */ |
167 | | struct GlobalVisState |
168 | | { |
169 | | /* XIDs >= are considered running by some backend */ |
170 | | FullTransactionId definitely_needed; |
171 | | |
172 | | /* XIDs < are not considered to be running by any backend */ |
173 | | FullTransactionId maybe_needed; |
174 | | }; |
175 | | |
176 | | /* |
177 | | * Result of ComputeXidHorizons(). |
178 | | */ |
179 | | typedef struct ComputeXidHorizonsResult |
180 | | { |
181 | | /* |
182 | | * The value of TransamVariables->latestCompletedXid when |
183 | | * ComputeXidHorizons() held ProcArrayLock. |
184 | | */ |
185 | | FullTransactionId latest_completed; |
186 | | |
187 | | /* |
188 | | * The same for procArray->replication_slot_xmin and |
189 | | * procArray->replication_slot_catalog_xmin. |
190 | | */ |
191 | | TransactionId slot_xmin; |
192 | | TransactionId slot_catalog_xmin; |
193 | | |
194 | | /* |
195 | | * Oldest xid that any backend might still consider running. This needs to |
196 | | * include processes running VACUUM, in contrast to the normal visibility |
197 | | * cutoffs, as vacuum needs to be able to perform pg_subtrans lookups when |
198 | | * determining visibility, but doesn't care about rows above its xmin to |
199 | | * be removed. |
200 | | * |
201 | | * This likely should only be needed to determine whether pg_subtrans can |
202 | | * be truncated. It currently includes the effects of replication slots, |
203 | | * for historical reasons. But that could likely be changed. |
204 | | */ |
205 | | TransactionId oldest_considered_running; |
206 | | |
207 | | /* |
208 | | * Oldest xid for which deleted tuples need to be retained in shared |
209 | | * tables. |
210 | | * |
211 | | * This includes the effects of replication slots. If that's not desired, |
212 | | * look at shared_oldest_nonremovable_raw; |
213 | | */ |
214 | | TransactionId shared_oldest_nonremovable; |
215 | | |
216 | | /* |
217 | | * Oldest xid that may be necessary to retain in shared tables. This is |
218 | | * the same as shared_oldest_nonremovable, except that is not affected by |
219 | | * replication slot's catalog_xmin. |
220 | | * |
221 | | * This is mainly useful to be able to send the catalog_xmin to upstream |
222 | | * streaming replication servers via hot_standby_feedback, so they can |
223 | | * apply the limit only when accessing catalog tables. |
224 | | */ |
225 | | TransactionId shared_oldest_nonremovable_raw; |
226 | | |
227 | | /* |
228 | | * Oldest xid for which deleted tuples need to be retained in non-shared |
229 | | * catalog tables. |
230 | | */ |
231 | | TransactionId catalog_oldest_nonremovable; |
232 | | |
233 | | /* |
234 | | * Oldest xid for which deleted tuples need to be retained in normal user |
235 | | * defined tables. |
236 | | */ |
237 | | TransactionId data_oldest_nonremovable; |
238 | | |
239 | | /* |
240 | | * Oldest xid for which deleted tuples need to be retained in this |
241 | | * session's temporary tables. |
242 | | */ |
243 | | TransactionId temp_oldest_nonremovable; |
244 | | } ComputeXidHorizonsResult; |
245 | | |
246 | | /* |
247 | | * Return value for GlobalVisHorizonKindForRel(). |
248 | | */ |
249 | | typedef enum GlobalVisHorizonKind |
250 | | { |
251 | | VISHORIZON_SHARED, |
252 | | VISHORIZON_CATALOG, |
253 | | VISHORIZON_DATA, |
254 | | VISHORIZON_TEMP, |
255 | | } GlobalVisHorizonKind; |
256 | | |
257 | | /* |
258 | | * Reason codes for KnownAssignedXidsCompress(). |
259 | | */ |
260 | | typedef enum KAXCompressReason |
261 | | { |
262 | | KAX_NO_SPACE, /* need to free up space at array end */ |
263 | | KAX_PRUNE, /* we just pruned old entries */ |
264 | | KAX_TRANSACTION_END, /* we just committed/removed some XIDs */ |
265 | | KAX_STARTUP_PROCESS_IDLE, /* startup process is about to sleep */ |
266 | | } KAXCompressReason; |
267 | | |
268 | | |
269 | | static ProcArrayStruct *procArray; |
270 | | |
271 | | static PGPROC *allProcs; |
272 | | |
273 | | /* |
274 | | * Cache to reduce overhead of repeated calls to TransactionIdIsInProgress() |
275 | | */ |
276 | | static TransactionId cachedXidIsNotInProgress = InvalidTransactionId; |
277 | | |
278 | | /* |
279 | | * Bookkeeping for tracking emulated transactions in recovery |
280 | | */ |
281 | | static TransactionId *KnownAssignedXids; |
282 | | static bool *KnownAssignedXidsValid; |
283 | | static TransactionId latestObservedXid = InvalidTransactionId; |
284 | | |
285 | | /* |
286 | | * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is |
287 | | * the highest xid that might still be running that we don't have in |
288 | | * KnownAssignedXids. |
289 | | */ |
290 | | static TransactionId standbySnapshotPendingXmin; |
291 | | |
292 | | /* |
293 | | * State for visibility checks on different types of relations. See struct |
294 | | * GlobalVisState for details. As shared, catalog, normal and temporary |
295 | | * relations can have different horizons, one such state exists for each. |
296 | | */ |
297 | | static GlobalVisState GlobalVisSharedRels; |
298 | | static GlobalVisState GlobalVisCatalogRels; |
299 | | static GlobalVisState GlobalVisDataRels; |
300 | | static GlobalVisState GlobalVisTempRels; |
301 | | |
302 | | /* |
303 | | * This backend's RecentXmin at the last time the accurate xmin horizon was |
304 | | * recomputed, or InvalidTransactionId if it has not. Used to limit how many |
305 | | * times accurate horizons are recomputed. See GlobalVisTestShouldUpdate(). |
306 | | */ |
307 | | static TransactionId ComputeXidHorizonsResultLastXmin; |
308 | | |
309 | | #ifdef XIDCACHE_DEBUG |
310 | | |
311 | | /* counters for XidCache measurement */ |
312 | | static long xc_by_recent_xmin = 0; |
313 | | static long xc_by_known_xact = 0; |
314 | | static long xc_by_my_xact = 0; |
315 | | static long xc_by_latest_xid = 0; |
316 | | static long xc_by_main_xid = 0; |
317 | | static long xc_by_child_xid = 0; |
318 | | static long xc_by_known_assigned = 0; |
319 | | static long xc_no_overflow = 0; |
320 | | static long xc_slow_answer = 0; |
321 | | |
322 | | #define xc_by_recent_xmin_inc() (xc_by_recent_xmin++) |
323 | | #define xc_by_known_xact_inc() (xc_by_known_xact++) |
324 | | #define xc_by_my_xact_inc() (xc_by_my_xact++) |
325 | | #define xc_by_latest_xid_inc() (xc_by_latest_xid++) |
326 | | #define xc_by_main_xid_inc() (xc_by_main_xid++) |
327 | | #define xc_by_child_xid_inc() (xc_by_child_xid++) |
328 | | #define xc_by_known_assigned_inc() (xc_by_known_assigned++) |
329 | | #define xc_no_overflow_inc() (xc_no_overflow++) |
330 | | #define xc_slow_answer_inc() (xc_slow_answer++) |
331 | | |
332 | | static void DisplayXidCache(void); |
333 | | #else /* !XIDCACHE_DEBUG */ |
334 | | |
335 | 0 | #define xc_by_recent_xmin_inc() ((void) 0) |
336 | 0 | #define xc_by_known_xact_inc() ((void) 0) |
337 | 0 | #define xc_by_my_xact_inc() ((void) 0) |
338 | 0 | #define xc_by_latest_xid_inc() ((void) 0) |
339 | 0 | #define xc_by_main_xid_inc() ((void) 0) |
340 | 0 | #define xc_by_child_xid_inc() ((void) 0) |
341 | 0 | #define xc_by_known_assigned_inc() ((void) 0) |
342 | 0 | #define xc_no_overflow_inc() ((void) 0) |
343 | 0 | #define xc_slow_answer_inc() ((void) 0) |
344 | | #endif /* XIDCACHE_DEBUG */ |
345 | | |
346 | | /* Primitives for KnownAssignedXids array handling for standby */ |
347 | | static void KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock); |
348 | | static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, |
349 | | bool exclusive_lock); |
350 | | static bool KnownAssignedXidsSearch(TransactionId xid, bool remove); |
351 | | static bool KnownAssignedXidExists(TransactionId xid); |
352 | | static void KnownAssignedXidsRemove(TransactionId xid); |
353 | | static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, |
354 | | TransactionId *subxids); |
355 | | static void KnownAssignedXidsRemovePreceding(TransactionId removeXid); |
356 | | static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax); |
357 | | static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, |
358 | | TransactionId *xmin, |
359 | | TransactionId xmax); |
360 | | static TransactionId KnownAssignedXidsGetOldestXmin(void); |
361 | | static void KnownAssignedXidsDisplay(int trace_level); |
362 | | static void KnownAssignedXidsReset(void); |
363 | | static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid); |
364 | | static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid); |
365 | | static void MaintainLatestCompletedXid(TransactionId latestXid); |
366 | | static void MaintainLatestCompletedXidRecovery(TransactionId latestXid); |
367 | | |
368 | | static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel, |
369 | | TransactionId xid); |
370 | | static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons); |
371 | | |
372 | | /* |
373 | | * Report shared-memory space needed by ProcArrayShmemInit |
374 | | */ |
375 | | Size |
376 | | ProcArrayShmemSize(void) |
377 | 0 | { |
378 | 0 | Size size; |
379 | | |
380 | | /* Size of the ProcArray structure itself */ |
381 | 0 | #define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts) |
382 | |
|
383 | 0 | size = offsetof(ProcArrayStruct, pgprocnos); |
384 | 0 | size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS)); |
385 | | |
386 | | /* |
387 | | * During Hot Standby processing we have a data structure called |
388 | | * KnownAssignedXids, created in shared memory. Local data structures are |
389 | | * also created in various backends during GetSnapshotData(), |
390 | | * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the |
391 | | * main structures created in those functions must be identically sized, |
392 | | * since we may at times copy the whole of the data structures around. We |
393 | | * refer to this size as TOTAL_MAX_CACHED_SUBXIDS. |
394 | | * |
395 | | * Ideally we'd only create this structure if we were actually doing hot |
396 | | * standby in the current run, but we don't know that yet at the time |
397 | | * shared memory is being set up. |
398 | | */ |
399 | 0 | #define TOTAL_MAX_CACHED_SUBXIDS \ |
400 | 0 | ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS) |
401 | |
|
402 | 0 | if (EnableHotStandby) |
403 | 0 | { |
404 | 0 | size = add_size(size, |
405 | 0 | mul_size(sizeof(TransactionId), |
406 | 0 | TOTAL_MAX_CACHED_SUBXIDS)); |
407 | 0 | size = add_size(size, |
408 | 0 | mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS)); |
409 | 0 | } |
410 | |
|
411 | 0 | return size; |
412 | 0 | } |
413 | | |
414 | | /* |
415 | | * Initialize the shared PGPROC array during postmaster startup. |
416 | | */ |
417 | | void |
418 | | ProcArrayShmemInit(void) |
419 | 0 | { |
420 | 0 | bool found; |
421 | | |
422 | | /* Create or attach to the ProcArray shared structure */ |
423 | 0 | procArray = (ProcArrayStruct *) |
424 | 0 | ShmemInitStruct("Proc Array", |
425 | 0 | add_size(offsetof(ProcArrayStruct, pgprocnos), |
426 | 0 | mul_size(sizeof(int), |
427 | 0 | PROCARRAY_MAXPROCS)), |
428 | 0 | &found); |
429 | |
|
430 | 0 | if (!found) |
431 | 0 | { |
432 | | /* |
433 | | * We're the first - initialize. |
434 | | */ |
435 | 0 | procArray->numProcs = 0; |
436 | 0 | procArray->maxProcs = PROCARRAY_MAXPROCS; |
437 | 0 | procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS; |
438 | 0 | procArray->numKnownAssignedXids = 0; |
439 | 0 | procArray->tailKnownAssignedXids = 0; |
440 | 0 | procArray->headKnownAssignedXids = 0; |
441 | 0 | procArray->lastOverflowedXid = InvalidTransactionId; |
442 | 0 | procArray->replication_slot_xmin = InvalidTransactionId; |
443 | 0 | procArray->replication_slot_catalog_xmin = InvalidTransactionId; |
444 | 0 | TransamVariables->xactCompletionCount = 1; |
445 | 0 | } |
446 | |
|
447 | 0 | allProcs = ProcGlobal->allProcs; |
448 | | |
449 | | /* Create or attach to the KnownAssignedXids arrays too, if needed */ |
450 | 0 | if (EnableHotStandby) |
451 | 0 | { |
452 | 0 | KnownAssignedXids = (TransactionId *) |
453 | 0 | ShmemInitStruct("KnownAssignedXids", |
454 | 0 | mul_size(sizeof(TransactionId), |
455 | 0 | TOTAL_MAX_CACHED_SUBXIDS), |
456 | 0 | &found); |
457 | 0 | KnownAssignedXidsValid = (bool *) |
458 | 0 | ShmemInitStruct("KnownAssignedXidsValid", |
459 | 0 | mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS), |
460 | 0 | &found); |
461 | 0 | } |
462 | 0 | } |
463 | | |
464 | | /* |
465 | | * Add the specified PGPROC to the shared array. |
466 | | */ |
467 | | void |
468 | | ProcArrayAdd(PGPROC *proc) |
469 | 0 | { |
470 | 0 | int pgprocno = GetNumberFromPGProc(proc); |
471 | 0 | ProcArrayStruct *arrayP = procArray; |
472 | 0 | int index; |
473 | 0 | int movecount; |
474 | | |
475 | | /* See ProcGlobal comment explaining why both locks are held */ |
476 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
477 | 0 | LWLockAcquire(XidGenLock, LW_EXCLUSIVE); |
478 | |
|
479 | 0 | if (arrayP->numProcs >= arrayP->maxProcs) |
480 | 0 | { |
481 | | /* |
482 | | * Oops, no room. (This really shouldn't happen, since there is a |
483 | | * fixed supply of PGPROC structs too, and so we should have failed |
484 | | * earlier.) |
485 | | */ |
486 | 0 | ereport(FATAL, |
487 | 0 | (errcode(ERRCODE_TOO_MANY_CONNECTIONS), |
488 | 0 | errmsg("sorry, too many clients already"))); |
489 | 0 | } |
490 | | |
491 | | /* |
492 | | * Keep the procs array sorted by (PGPROC *) so that we can utilize |
493 | | * locality of references much better. This is useful while traversing the |
494 | | * ProcArray because there is an increased likelihood of finding the next |
495 | | * PGPROC structure in the cache. |
496 | | * |
497 | | * Since the occurrence of adding/removing a proc is much lower than the |
498 | | * access to the ProcArray itself, the overhead should be marginal |
499 | | */ |
500 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
501 | 0 | { |
502 | 0 | int this_procno = arrayP->pgprocnos[index]; |
503 | |
|
504 | 0 | Assert(this_procno >= 0 && this_procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS)); |
505 | 0 | Assert(allProcs[this_procno].pgxactoff == index); |
506 | | |
507 | | /* If we have found our right position in the array, break */ |
508 | 0 | if (this_procno > pgprocno) |
509 | 0 | break; |
510 | 0 | } |
511 | |
|
512 | 0 | movecount = arrayP->numProcs - index; |
513 | 0 | memmove(&arrayP->pgprocnos[index + 1], |
514 | 0 | &arrayP->pgprocnos[index], |
515 | 0 | movecount * sizeof(*arrayP->pgprocnos)); |
516 | 0 | memmove(&ProcGlobal->xids[index + 1], |
517 | 0 | &ProcGlobal->xids[index], |
518 | 0 | movecount * sizeof(*ProcGlobal->xids)); |
519 | 0 | memmove(&ProcGlobal->subxidStates[index + 1], |
520 | 0 | &ProcGlobal->subxidStates[index], |
521 | 0 | movecount * sizeof(*ProcGlobal->subxidStates)); |
522 | 0 | memmove(&ProcGlobal->statusFlags[index + 1], |
523 | 0 | &ProcGlobal->statusFlags[index], |
524 | 0 | movecount * sizeof(*ProcGlobal->statusFlags)); |
525 | |
|
526 | 0 | arrayP->pgprocnos[index] = GetNumberFromPGProc(proc); |
527 | 0 | proc->pgxactoff = index; |
528 | 0 | ProcGlobal->xids[index] = proc->xid; |
529 | 0 | ProcGlobal->subxidStates[index] = proc->subxidStatus; |
530 | 0 | ProcGlobal->statusFlags[index] = proc->statusFlags; |
531 | |
|
532 | 0 | arrayP->numProcs++; |
533 | | |
534 | | /* adjust pgxactoff for all following PGPROCs */ |
535 | 0 | index++; |
536 | 0 | for (; index < arrayP->numProcs; index++) |
537 | 0 | { |
538 | 0 | int procno = arrayP->pgprocnos[index]; |
539 | |
|
540 | 0 | Assert(procno >= 0 && procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS)); |
541 | 0 | Assert(allProcs[procno].pgxactoff == index - 1); |
542 | |
|
543 | 0 | allProcs[procno].pgxactoff = index; |
544 | 0 | } |
545 | | |
546 | | /* |
547 | | * Release in reversed acquisition order, to reduce frequency of having to |
548 | | * wait for XidGenLock while holding ProcArrayLock. |
549 | | */ |
550 | 0 | LWLockRelease(XidGenLock); |
551 | 0 | LWLockRelease(ProcArrayLock); |
552 | 0 | } |
553 | | |
554 | | /* |
555 | | * Remove the specified PGPROC from the shared array. |
556 | | * |
557 | | * When latestXid is a valid XID, we are removing a live 2PC gxact from the |
558 | | * array, and thus causing it to appear as "not running" anymore. In this |
559 | | * case we must advance latestCompletedXid. (This is essentially the same |
560 | | * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take |
561 | | * the ProcArrayLock only once, and don't damage the content of the PGPROC; |
562 | | * twophase.c depends on the latter.) |
563 | | */ |
564 | | void |
565 | | ProcArrayRemove(PGPROC *proc, TransactionId latestXid) |
566 | 0 | { |
567 | 0 | ProcArrayStruct *arrayP = procArray; |
568 | 0 | int myoff; |
569 | 0 | int movecount; |
570 | |
|
571 | | #ifdef XIDCACHE_DEBUG |
572 | | /* dump stats at backend shutdown, but not prepared-xact end */ |
573 | | if (proc->pid != 0) |
574 | | DisplayXidCache(); |
575 | | #endif |
576 | | |
577 | | /* See ProcGlobal comment explaining why both locks are held */ |
578 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
579 | 0 | LWLockAcquire(XidGenLock, LW_EXCLUSIVE); |
580 | |
|
581 | 0 | myoff = proc->pgxactoff; |
582 | |
|
583 | 0 | Assert(myoff >= 0 && myoff < arrayP->numProcs); |
584 | 0 | Assert(ProcGlobal->allProcs[arrayP->pgprocnos[myoff]].pgxactoff == myoff); |
585 | |
|
586 | 0 | if (TransactionIdIsValid(latestXid)) |
587 | 0 | { |
588 | 0 | Assert(TransactionIdIsValid(ProcGlobal->xids[myoff])); |
589 | | |
590 | | /* Advance global latestCompletedXid while holding the lock */ |
591 | 0 | MaintainLatestCompletedXid(latestXid); |
592 | | |
593 | | /* Same with xactCompletionCount */ |
594 | 0 | TransamVariables->xactCompletionCount++; |
595 | |
|
596 | 0 | ProcGlobal->xids[myoff] = InvalidTransactionId; |
597 | 0 | ProcGlobal->subxidStates[myoff].overflowed = false; |
598 | 0 | ProcGlobal->subxidStates[myoff].count = 0; |
599 | 0 | } |
600 | 0 | else |
601 | 0 | { |
602 | | /* Shouldn't be trying to remove a live transaction here */ |
603 | 0 | Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff])); |
604 | 0 | } |
605 | |
|
606 | 0 | Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff])); |
607 | 0 | Assert(ProcGlobal->subxidStates[myoff].count == 0); |
608 | 0 | Assert(ProcGlobal->subxidStates[myoff].overflowed == false); |
609 | |
|
610 | 0 | ProcGlobal->statusFlags[myoff] = 0; |
611 | | |
612 | | /* Keep the PGPROC array sorted. See notes above */ |
613 | 0 | movecount = arrayP->numProcs - myoff - 1; |
614 | 0 | memmove(&arrayP->pgprocnos[myoff], |
615 | 0 | &arrayP->pgprocnos[myoff + 1], |
616 | 0 | movecount * sizeof(*arrayP->pgprocnos)); |
617 | 0 | memmove(&ProcGlobal->xids[myoff], |
618 | 0 | &ProcGlobal->xids[myoff + 1], |
619 | 0 | movecount * sizeof(*ProcGlobal->xids)); |
620 | 0 | memmove(&ProcGlobal->subxidStates[myoff], |
621 | 0 | &ProcGlobal->subxidStates[myoff + 1], |
622 | 0 | movecount * sizeof(*ProcGlobal->subxidStates)); |
623 | 0 | memmove(&ProcGlobal->statusFlags[myoff], |
624 | 0 | &ProcGlobal->statusFlags[myoff + 1], |
625 | 0 | movecount * sizeof(*ProcGlobal->statusFlags)); |
626 | |
|
627 | 0 | arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */ |
628 | 0 | arrayP->numProcs--; |
629 | | |
630 | | /* |
631 | | * Adjust pgxactoff of following procs for removed PGPROC (note that |
632 | | * numProcs already has been decremented). |
633 | | */ |
634 | 0 | for (int index = myoff; index < arrayP->numProcs; index++) |
635 | 0 | { |
636 | 0 | int procno = arrayP->pgprocnos[index]; |
637 | |
|
638 | 0 | Assert(procno >= 0 && procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS)); |
639 | 0 | Assert(allProcs[procno].pgxactoff - 1 == index); |
640 | |
|
641 | 0 | allProcs[procno].pgxactoff = index; |
642 | 0 | } |
643 | | |
644 | | /* |
645 | | * Release in reversed acquisition order, to reduce frequency of having to |
646 | | * wait for XidGenLock while holding ProcArrayLock. |
647 | | */ |
648 | 0 | LWLockRelease(XidGenLock); |
649 | 0 | LWLockRelease(ProcArrayLock); |
650 | 0 | } |
651 | | |
652 | | |
653 | | /* |
654 | | * ProcArrayEndTransaction -- mark a transaction as no longer running |
655 | | * |
656 | | * This is used interchangeably for commit and abort cases. The transaction |
657 | | * commit/abort must already be reported to WAL and pg_xact. |
658 | | * |
659 | | * proc is currently always MyProc, but we pass it explicitly for flexibility. |
660 | | * latestXid is the latest Xid among the transaction's main XID and |
661 | | * subtransactions, or InvalidTransactionId if it has no XID. (We must ask |
662 | | * the caller to pass latestXid, instead of computing it from the PGPROC's |
663 | | * contents, because the subxid information in the PGPROC might be |
664 | | * incomplete.) |
665 | | */ |
666 | | void |
667 | | ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) |
668 | 0 | { |
669 | 0 | if (TransactionIdIsValid(latestXid)) |
670 | 0 | { |
671 | | /* |
672 | | * We must lock ProcArrayLock while clearing our advertised XID, so |
673 | | * that we do not exit the set of "running" transactions while someone |
674 | | * else is taking a snapshot. See discussion in |
675 | | * src/backend/access/transam/README. |
676 | | */ |
677 | 0 | Assert(TransactionIdIsValid(proc->xid)); |
678 | | |
679 | | /* |
680 | | * If we can immediately acquire ProcArrayLock, we clear our own XID |
681 | | * and release the lock. If not, use group XID clearing to improve |
682 | | * efficiency. |
683 | | */ |
684 | 0 | if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE)) |
685 | 0 | { |
686 | 0 | ProcArrayEndTransactionInternal(proc, latestXid); |
687 | 0 | LWLockRelease(ProcArrayLock); |
688 | 0 | } |
689 | 0 | else |
690 | 0 | ProcArrayGroupClearXid(proc, latestXid); |
691 | 0 | } |
692 | 0 | else |
693 | 0 | { |
694 | | /* |
695 | | * If we have no XID, we don't need to lock, since we won't affect |
696 | | * anyone else's calculation of a snapshot. We might change their |
697 | | * estimate of global xmin, but that's OK. |
698 | | */ |
699 | 0 | Assert(!TransactionIdIsValid(proc->xid)); |
700 | 0 | Assert(proc->subxidStatus.count == 0); |
701 | 0 | Assert(!proc->subxidStatus.overflowed); |
702 | |
|
703 | 0 | proc->vxid.lxid = InvalidLocalTransactionId; |
704 | 0 | proc->xmin = InvalidTransactionId; |
705 | | |
706 | | /* be sure this is cleared in abort */ |
707 | 0 | proc->delayChkptFlags = 0; |
708 | |
|
709 | 0 | proc->recoveryConflictPending = false; |
710 | | |
711 | | /* must be cleared with xid/xmin: */ |
712 | | /* avoid unnecessarily dirtying shared cachelines */ |
713 | 0 | if (proc->statusFlags & PROC_VACUUM_STATE_MASK) |
714 | 0 | { |
715 | 0 | Assert(!LWLockHeldByMe(ProcArrayLock)); |
716 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
717 | 0 | Assert(proc->statusFlags == ProcGlobal->statusFlags[proc->pgxactoff]); |
718 | 0 | proc->statusFlags &= ~PROC_VACUUM_STATE_MASK; |
719 | 0 | ProcGlobal->statusFlags[proc->pgxactoff] = proc->statusFlags; |
720 | 0 | LWLockRelease(ProcArrayLock); |
721 | 0 | } |
722 | 0 | } |
723 | 0 | } |
724 | | |
725 | | /* |
726 | | * Mark a write transaction as no longer running. |
727 | | * |
728 | | * We don't do any locking here; caller must handle that. |
729 | | */ |
730 | | static inline void |
731 | | ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid) |
732 | 0 | { |
733 | 0 | int pgxactoff = proc->pgxactoff; |
734 | | |
735 | | /* |
736 | | * Note: we need exclusive lock here because we're going to change other |
737 | | * processes' PGPROC entries. |
738 | | */ |
739 | 0 | Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE)); |
740 | 0 | Assert(TransactionIdIsValid(ProcGlobal->xids[pgxactoff])); |
741 | 0 | Assert(ProcGlobal->xids[pgxactoff] == proc->xid); |
742 | |
|
743 | 0 | ProcGlobal->xids[pgxactoff] = InvalidTransactionId; |
744 | 0 | proc->xid = InvalidTransactionId; |
745 | 0 | proc->vxid.lxid = InvalidLocalTransactionId; |
746 | 0 | proc->xmin = InvalidTransactionId; |
747 | | |
748 | | /* be sure this is cleared in abort */ |
749 | 0 | proc->delayChkptFlags = 0; |
750 | |
|
751 | 0 | proc->recoveryConflictPending = false; |
752 | | |
753 | | /* must be cleared with xid/xmin: */ |
754 | | /* avoid unnecessarily dirtying shared cachelines */ |
755 | 0 | if (proc->statusFlags & PROC_VACUUM_STATE_MASK) |
756 | 0 | { |
757 | 0 | proc->statusFlags &= ~PROC_VACUUM_STATE_MASK; |
758 | 0 | ProcGlobal->statusFlags[proc->pgxactoff] = proc->statusFlags; |
759 | 0 | } |
760 | | |
761 | | /* Clear the subtransaction-XID cache too while holding the lock */ |
762 | 0 | Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count && |
763 | 0 | ProcGlobal->subxidStates[pgxactoff].overflowed == proc->subxidStatus.overflowed); |
764 | 0 | if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed) |
765 | 0 | { |
766 | 0 | ProcGlobal->subxidStates[pgxactoff].count = 0; |
767 | 0 | ProcGlobal->subxidStates[pgxactoff].overflowed = false; |
768 | 0 | proc->subxidStatus.count = 0; |
769 | 0 | proc->subxidStatus.overflowed = false; |
770 | 0 | } |
771 | | |
772 | | /* Also advance global latestCompletedXid while holding the lock */ |
773 | 0 | MaintainLatestCompletedXid(latestXid); |
774 | | |
775 | | /* Same with xactCompletionCount */ |
776 | 0 | TransamVariables->xactCompletionCount++; |
777 | 0 | } |
778 | | |
779 | | /* |
780 | | * ProcArrayGroupClearXid -- group XID clearing |
781 | | * |
782 | | * When we cannot immediately acquire ProcArrayLock in exclusive mode at |
783 | | * commit time, add ourselves to a list of processes that need their XIDs |
784 | | * cleared. The first process to add itself to the list will acquire |
785 | | * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal |
786 | | * on behalf of all group members. This avoids a great deal of contention |
787 | | * around ProcArrayLock when many processes are trying to commit at once, |
788 | | * since the lock need not be repeatedly handed off from one committing |
789 | | * process to the next. |
790 | | */ |
791 | | static void |
792 | | ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid) |
793 | 0 | { |
794 | 0 | int pgprocno = GetNumberFromPGProc(proc); |
795 | 0 | PROC_HDR *procglobal = ProcGlobal; |
796 | 0 | uint32 nextidx; |
797 | 0 | uint32 wakeidx; |
798 | | |
799 | | /* We should definitely have an XID to clear. */ |
800 | 0 | Assert(TransactionIdIsValid(proc->xid)); |
801 | | |
802 | | /* Add ourselves to the list of processes needing a group XID clear. */ |
803 | 0 | proc->procArrayGroupMember = true; |
804 | 0 | proc->procArrayGroupMemberXid = latestXid; |
805 | 0 | nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst); |
806 | 0 | while (true) |
807 | 0 | { |
808 | 0 | pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx); |
809 | |
|
810 | 0 | if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst, |
811 | 0 | &nextidx, |
812 | 0 | (uint32) pgprocno)) |
813 | 0 | break; |
814 | 0 | } |
815 | | |
816 | | /* |
817 | | * If the list was not empty, the leader will clear our XID. It is |
818 | | * impossible to have followers without a leader because the first process |
819 | | * that has added itself to the list will always have nextidx as |
820 | | * INVALID_PROC_NUMBER. |
821 | | */ |
822 | 0 | if (nextidx != INVALID_PROC_NUMBER) |
823 | 0 | { |
824 | 0 | int extraWaits = 0; |
825 | | |
826 | | /* Sleep until the leader clears our XID. */ |
827 | 0 | pgstat_report_wait_start(WAIT_EVENT_PROCARRAY_GROUP_UPDATE); |
828 | 0 | for (;;) |
829 | 0 | { |
830 | | /* acts as a read barrier */ |
831 | 0 | PGSemaphoreLock(proc->sem); |
832 | 0 | if (!proc->procArrayGroupMember) |
833 | 0 | break; |
834 | 0 | extraWaits++; |
835 | 0 | } |
836 | 0 | pgstat_report_wait_end(); |
837 | |
|
838 | 0 | Assert(pg_atomic_read_u32(&proc->procArrayGroupNext) == INVALID_PROC_NUMBER); |
839 | | |
840 | | /* Fix semaphore count for any absorbed wakeups */ |
841 | 0 | while (extraWaits-- > 0) |
842 | 0 | PGSemaphoreUnlock(proc->sem); |
843 | 0 | return; |
844 | 0 | } |
845 | | |
846 | | /* We are the leader. Acquire the lock on behalf of everyone. */ |
847 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
848 | | |
849 | | /* |
850 | | * Now that we've got the lock, clear the list of processes waiting for |
851 | | * group XID clearing, saving a pointer to the head of the list. Trying |
852 | | * to pop elements one at a time could lead to an ABA problem. |
853 | | */ |
854 | 0 | nextidx = pg_atomic_exchange_u32(&procglobal->procArrayGroupFirst, |
855 | 0 | INVALID_PROC_NUMBER); |
856 | | |
857 | | /* Remember head of list so we can perform wakeups after dropping lock. */ |
858 | 0 | wakeidx = nextidx; |
859 | | |
860 | | /* Walk the list and clear all XIDs. */ |
861 | 0 | while (nextidx != INVALID_PROC_NUMBER) |
862 | 0 | { |
863 | 0 | PGPROC *nextproc = &allProcs[nextidx]; |
864 | |
|
865 | 0 | ProcArrayEndTransactionInternal(nextproc, nextproc->procArrayGroupMemberXid); |
866 | | |
867 | | /* Move to next proc in list. */ |
868 | 0 | nextidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext); |
869 | 0 | } |
870 | | |
871 | | /* We're done with the lock now. */ |
872 | 0 | LWLockRelease(ProcArrayLock); |
873 | | |
874 | | /* |
875 | | * Now that we've released the lock, go back and wake everybody up. We |
876 | | * don't do this under the lock so as to keep lock hold times to a |
877 | | * minimum. The system calls we need to perform to wake other processes |
878 | | * up are probably much slower than the simple memory writes we did while |
879 | | * holding the lock. |
880 | | */ |
881 | 0 | while (wakeidx != INVALID_PROC_NUMBER) |
882 | 0 | { |
883 | 0 | PGPROC *nextproc = &allProcs[wakeidx]; |
884 | |
|
885 | 0 | wakeidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext); |
886 | 0 | pg_atomic_write_u32(&nextproc->procArrayGroupNext, INVALID_PROC_NUMBER); |
887 | | |
888 | | /* ensure all previous writes are visible before follower continues. */ |
889 | 0 | pg_write_barrier(); |
890 | |
|
891 | 0 | nextproc->procArrayGroupMember = false; |
892 | |
|
893 | 0 | if (nextproc != MyProc) |
894 | 0 | PGSemaphoreUnlock(nextproc->sem); |
895 | 0 | } |
896 | 0 | } |
897 | | |
898 | | /* |
899 | | * ProcArrayClearTransaction -- clear the transaction fields |
900 | | * |
901 | | * This is used after successfully preparing a 2-phase transaction. We are |
902 | | * not actually reporting the transaction's XID as no longer running --- it |
903 | | * will still appear as running because the 2PC's gxact is in the ProcArray |
904 | | * too. We just have to clear out our own PGPROC. |
905 | | */ |
906 | | void |
907 | | ProcArrayClearTransaction(PGPROC *proc) |
908 | 0 | { |
909 | 0 | int pgxactoff; |
910 | | |
911 | | /* |
912 | | * Currently we need to lock ProcArrayLock exclusively here, as we |
913 | | * increment xactCompletionCount below. We also need it at least in shared |
914 | | * mode for pgproc->pgxactoff to stay the same below. |
915 | | * |
916 | | * We could however, as this action does not actually change anyone's view |
917 | | * of the set of running XIDs (our entry is duplicate with the gxact that |
918 | | * has already been inserted into the ProcArray), lower the lock level to |
919 | | * shared if we were to make xactCompletionCount an atomic variable. But |
920 | | * that doesn't seem worth it currently, as a 2PC commit is heavyweight |
921 | | * enough for this not to be the bottleneck. If it ever becomes a |
922 | | * bottleneck it may also be worth considering to combine this with the |
923 | | * subsequent ProcArrayRemove() |
924 | | */ |
925 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
926 | |
|
927 | 0 | pgxactoff = proc->pgxactoff; |
928 | |
|
929 | 0 | ProcGlobal->xids[pgxactoff] = InvalidTransactionId; |
930 | 0 | proc->xid = InvalidTransactionId; |
931 | |
|
932 | 0 | proc->vxid.lxid = InvalidLocalTransactionId; |
933 | 0 | proc->xmin = InvalidTransactionId; |
934 | 0 | proc->recoveryConflictPending = false; |
935 | |
|
936 | 0 | Assert(!(proc->statusFlags & PROC_VACUUM_STATE_MASK)); |
937 | 0 | Assert(!proc->delayChkptFlags); |
938 | | |
939 | | /* |
940 | | * Need to increment completion count even though transaction hasn't |
941 | | * really committed yet. The reason for that is that GetSnapshotData() |
942 | | * omits the xid of the current transaction, thus without the increment we |
943 | | * otherwise could end up reusing the snapshot later. Which would be bad, |
944 | | * because it might not count the prepared transaction as running. |
945 | | */ |
946 | 0 | TransamVariables->xactCompletionCount++; |
947 | | |
948 | | /* Clear the subtransaction-XID cache too */ |
949 | 0 | Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count && |
950 | 0 | ProcGlobal->subxidStates[pgxactoff].overflowed == proc->subxidStatus.overflowed); |
951 | 0 | if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed) |
952 | 0 | { |
953 | 0 | ProcGlobal->subxidStates[pgxactoff].count = 0; |
954 | 0 | ProcGlobal->subxidStates[pgxactoff].overflowed = false; |
955 | 0 | proc->subxidStatus.count = 0; |
956 | 0 | proc->subxidStatus.overflowed = false; |
957 | 0 | } |
958 | |
|
959 | 0 | LWLockRelease(ProcArrayLock); |
960 | 0 | } |
961 | | |
962 | | /* |
963 | | * Update TransamVariables->latestCompletedXid to point to latestXid if |
964 | | * currently older. |
965 | | */ |
966 | | static void |
967 | | MaintainLatestCompletedXid(TransactionId latestXid) |
968 | 0 | { |
969 | 0 | FullTransactionId cur_latest = TransamVariables->latestCompletedXid; |
970 | |
|
971 | 0 | Assert(FullTransactionIdIsValid(cur_latest)); |
972 | 0 | Assert(!RecoveryInProgress()); |
973 | 0 | Assert(LWLockHeldByMe(ProcArrayLock)); |
974 | |
|
975 | 0 | if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid)) |
976 | 0 | { |
977 | 0 | TransamVariables->latestCompletedXid = |
978 | 0 | FullXidRelativeTo(cur_latest, latestXid); |
979 | 0 | } |
980 | |
|
981 | 0 | Assert(IsBootstrapProcessingMode() || |
982 | 0 | FullTransactionIdIsNormal(TransamVariables->latestCompletedXid)); |
983 | 0 | } |
984 | | |
985 | | /* |
986 | | * Same as MaintainLatestCompletedXid, except for use during WAL replay. |
987 | | */ |
988 | | static void |
989 | | MaintainLatestCompletedXidRecovery(TransactionId latestXid) |
990 | 0 | { |
991 | 0 | FullTransactionId cur_latest = TransamVariables->latestCompletedXid; |
992 | 0 | FullTransactionId rel; |
993 | |
|
994 | 0 | Assert(AmStartupProcess() || !IsUnderPostmaster); |
995 | 0 | Assert(LWLockHeldByMe(ProcArrayLock)); |
996 | | |
997 | | /* |
998 | | * Need a FullTransactionId to compare latestXid with. Can't rely on |
999 | | * latestCompletedXid to be initialized in recovery. But in recovery it's |
1000 | | * safe to access nextXid without a lock for the startup process. |
1001 | | */ |
1002 | 0 | rel = TransamVariables->nextXid; |
1003 | 0 | Assert(FullTransactionIdIsValid(TransamVariables->nextXid)); |
1004 | |
|
1005 | 0 | if (!FullTransactionIdIsValid(cur_latest) || |
1006 | 0 | TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid)) |
1007 | 0 | { |
1008 | 0 | TransamVariables->latestCompletedXid = |
1009 | 0 | FullXidRelativeTo(rel, latestXid); |
1010 | 0 | } |
1011 | |
|
1012 | 0 | Assert(FullTransactionIdIsNormal(TransamVariables->latestCompletedXid)); |
1013 | 0 | } |
1014 | | |
1015 | | /* |
1016 | | * ProcArrayInitRecovery -- initialize recovery xid mgmt environment |
1017 | | * |
1018 | | * Remember up to where the startup process initialized the CLOG and subtrans |
1019 | | * so we can ensure it's initialized gaplessly up to the point where necessary |
1020 | | * while in recovery. |
1021 | | */ |
1022 | | void |
1023 | | ProcArrayInitRecovery(TransactionId initializedUptoXID) |
1024 | 0 | { |
1025 | 0 | Assert(standbyState == STANDBY_INITIALIZED); |
1026 | 0 | Assert(TransactionIdIsNormal(initializedUptoXID)); |
1027 | | |
1028 | | /* |
1029 | | * we set latestObservedXid to the xid SUBTRANS has been initialized up |
1030 | | * to, so we can extend it from that point onwards in |
1031 | | * RecordKnownAssignedTransactionIds, and when we get consistent in |
1032 | | * ProcArrayApplyRecoveryInfo(). |
1033 | | */ |
1034 | 0 | latestObservedXid = initializedUptoXID; |
1035 | 0 | TransactionIdRetreat(latestObservedXid); |
1036 | 0 | } |
1037 | | |
1038 | | /* |
1039 | | * ProcArrayApplyRecoveryInfo -- apply recovery info about xids |
1040 | | * |
1041 | | * Takes us through 3 states: Initialized, Pending and Ready. |
1042 | | * Normal case is to go all the way to Ready straight away, though there |
1043 | | * are atypical cases where we need to take it in steps. |
1044 | | * |
1045 | | * Use the data about running transactions on the primary to create the initial |
1046 | | * state of KnownAssignedXids. We also use these records to regularly prune |
1047 | | * KnownAssignedXids because we know it is possible that some transactions |
1048 | | * with FATAL errors fail to write abort records, which could cause eventual |
1049 | | * overflow. |
1050 | | * |
1051 | | * See comments for LogStandbySnapshot(). |
1052 | | */ |
1053 | | void |
1054 | | ProcArrayApplyRecoveryInfo(RunningTransactions running) |
1055 | | { |
1056 | | TransactionId *xids; |
1057 | | TransactionId advanceNextXid; |
1058 | | int nxids; |
1059 | | int i; |
1060 | | |
1061 | | Assert(standbyState >= STANDBY_INITIALIZED); |
1062 | | Assert(TransactionIdIsValid(running->nextXid)); |
1063 | | Assert(TransactionIdIsValid(running->oldestRunningXid)); |
1064 | | Assert(TransactionIdIsNormal(running->latestCompletedXid)); |
1065 | | |
1066 | | /* |
1067 | | * Remove stale transactions, if any. |
1068 | | */ |
1069 | | ExpireOldKnownAssignedTransactionIds(running->oldestRunningXid); |
1070 | | |
1071 | | /* |
1072 | | * Adjust TransamVariables->nextXid before StandbyReleaseOldLocks(), |
1073 | | * because we will need it up to date for accessing two-phase transactions |
1074 | | * in StandbyReleaseOldLocks(). |
1075 | | */ |
1076 | | advanceNextXid = running->nextXid; |
1077 | | TransactionIdRetreat(advanceNextXid); |
1078 | | AdvanceNextFullTransactionIdPastXid(advanceNextXid); |
1079 | | Assert(FullTransactionIdIsValid(TransamVariables->nextXid)); |
1080 | | |
1081 | | /* |
1082 | | * Remove stale locks, if any. |
1083 | | */ |
1084 | | StandbyReleaseOldLocks(running->oldestRunningXid); |
1085 | | |
1086 | | /* |
1087 | | * If our snapshot is already valid, nothing else to do... |
1088 | | */ |
1089 | | if (standbyState == STANDBY_SNAPSHOT_READY) |
1090 | | return; |
1091 | | |
1092 | | /* |
1093 | | * If our initial RunningTransactionsData had an overflowed snapshot then |
1094 | | * we knew we were missing some subxids from our snapshot. If we continue |
1095 | | * to see overflowed snapshots then we might never be able to start up, so |
1096 | | * we make another test to see if our snapshot is now valid. We know that |
1097 | | * the missing subxids are equal to or earlier than nextXid. After we |
1098 | | * initialise we continue to apply changes during recovery, so once the |
1099 | | * oldestRunningXid is later than the nextXid from the initial snapshot we |
1100 | | * know that we no longer have missing information and can mark the |
1101 | | * snapshot as valid. |
1102 | | */ |
1103 | | if (standbyState == STANDBY_SNAPSHOT_PENDING) |
1104 | | { |
1105 | | /* |
1106 | | * If the snapshot isn't overflowed or if its empty we can reset our |
1107 | | * pending state and use this snapshot instead. |
1108 | | */ |
1109 | | if (running->subxid_status != SUBXIDS_MISSING || running->xcnt == 0) |
1110 | | { |
1111 | | /* |
1112 | | * If we have already collected known assigned xids, we need to |
1113 | | * throw them away before we apply the recovery snapshot. |
1114 | | */ |
1115 | | KnownAssignedXidsReset(); |
1116 | | standbyState = STANDBY_INITIALIZED; |
1117 | | } |
1118 | | else |
1119 | | { |
1120 | | if (TransactionIdPrecedes(standbySnapshotPendingXmin, |
1121 | | running->oldestRunningXid)) |
1122 | | { |
1123 | | standbyState = STANDBY_SNAPSHOT_READY; |
1124 | | elog(DEBUG1, |
1125 | | "recovery snapshots are now enabled"); |
1126 | | } |
1127 | | else |
1128 | | elog(DEBUG1, |
1129 | | "recovery snapshot waiting for non-overflowed snapshot or " |
1130 | | "until oldest active xid on standby is at least %u (now %u)", |
1131 | | standbySnapshotPendingXmin, |
1132 | | running->oldestRunningXid); |
1133 | | return; |
1134 | | } |
1135 | | } |
1136 | | |
1137 | | Assert(standbyState == STANDBY_INITIALIZED); |
1138 | | |
1139 | | /* |
1140 | | * NB: this can be reached at least twice, so make sure new code can deal |
1141 | | * with that. |
1142 | | */ |
1143 | | |
1144 | | /* |
1145 | | * Nobody else is running yet, but take locks anyhow |
1146 | | */ |
1147 | | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
1148 | | |
1149 | | /* |
1150 | | * KnownAssignedXids is sorted so we cannot just add the xids, we have to |
1151 | | * sort them first. |
1152 | | * |
1153 | | * Some of the new xids are top-level xids and some are subtransactions. |
1154 | | * We don't call SubTransSetParent because it doesn't matter yet. If we |
1155 | | * aren't overflowed then all xids will fit in snapshot and so we don't |
1156 | | * need subtrans. If we later overflow, an xid assignment record will add |
1157 | | * xids to subtrans. If RunningTransactionsData is overflowed then we |
1158 | | * don't have enough information to correctly update subtrans anyway. |
1159 | | */ |
1160 | | |
1161 | | /* |
1162 | | * Allocate a temporary array to avoid modifying the array passed as |
1163 | | * argument. |
1164 | | */ |
1165 | | xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt)); |
1166 | | |
1167 | | /* |
1168 | | * Add to the temp array any xids which have not already completed. |
1169 | | */ |
1170 | | nxids = 0; |
1171 | | for (i = 0; i < running->xcnt + running->subxcnt; i++) |
1172 | | { |
1173 | | TransactionId xid = running->xids[i]; |
1174 | | |
1175 | | /* |
1176 | | * The running-xacts snapshot can contain xids that were still visible |
1177 | | * in the procarray when the snapshot was taken, but were already |
1178 | | * WAL-logged as completed. They're not running anymore, so ignore |
1179 | | * them. |
1180 | | */ |
1181 | | if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) |
1182 | | continue; |
1183 | | |
1184 | | xids[nxids++] = xid; |
1185 | | } |
1186 | | |
1187 | | if (nxids > 0) |
1188 | | { |
1189 | | if (procArray->numKnownAssignedXids != 0) |
1190 | | { |
1191 | | LWLockRelease(ProcArrayLock); |
1192 | | elog(ERROR, "KnownAssignedXids is not empty"); |
1193 | | } |
1194 | | |
1195 | | /* |
1196 | | * Sort the array so that we can add them safely into |
1197 | | * KnownAssignedXids. |
1198 | | * |
1199 | | * We have to sort them logically, because in KnownAssignedXidsAdd we |
1200 | | * call TransactionIdFollowsOrEquals and so on. But we know these XIDs |
1201 | | * come from RUNNING_XACTS, which means there are only normal XIDs |
1202 | | * from the same epoch, so this is safe. |
1203 | | */ |
1204 | | qsort(xids, nxids, sizeof(TransactionId), xidLogicalComparator); |
1205 | | |
1206 | | /* |
1207 | | * Add the sorted snapshot into KnownAssignedXids. The running-xacts |
1208 | | * snapshot may include duplicated xids because of prepared |
1209 | | * transactions, so ignore them. |
1210 | | */ |
1211 | | for (i = 0; i < nxids; i++) |
1212 | | { |
1213 | | if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i])) |
1214 | | { |
1215 | | elog(DEBUG1, |
1216 | | "found duplicated transaction %u for KnownAssignedXids insertion", |
1217 | | xids[i]); |
1218 | | continue; |
1219 | | } |
1220 | | KnownAssignedXidsAdd(xids[i], xids[i], true); |
1221 | | } |
1222 | | |
1223 | | KnownAssignedXidsDisplay(DEBUG3); |
1224 | | } |
1225 | | |
1226 | | pfree(xids); |
1227 | | |
1228 | | /* |
1229 | | * latestObservedXid is at least set to the point where SUBTRANS was |
1230 | | * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid |
1231 | | * RecordKnownAssignedTransactionIds() was called for. Initialize |
1232 | | * subtrans from thereon, up to nextXid - 1. |
1233 | | * |
1234 | | * We need to duplicate parts of RecordKnownAssignedTransactionId() here, |
1235 | | * because we've just added xids to the known assigned xids machinery that |
1236 | | * haven't gone through RecordKnownAssignedTransactionId(). |
1237 | | */ |
1238 | | Assert(TransactionIdIsNormal(latestObservedXid)); |
1239 | | TransactionIdAdvance(latestObservedXid); |
1240 | | while (TransactionIdPrecedes(latestObservedXid, running->nextXid)) |
1241 | | { |
1242 | | ExtendSUBTRANS(latestObservedXid); |
1243 | | TransactionIdAdvance(latestObservedXid); |
1244 | | } |
1245 | | TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */ |
1246 | | |
1247 | | /* ---------- |
1248 | | * Now we've got the running xids we need to set the global values that |
1249 | | * are used to track snapshots as they evolve further. |
1250 | | * |
1251 | | * - latestCompletedXid which will be the xmax for snapshots |
1252 | | * - lastOverflowedXid which shows whether snapshots overflow |
1253 | | * - nextXid |
1254 | | * |
1255 | | * If the snapshot overflowed, then we still initialise with what we know, |
1256 | | * but the recovery snapshot isn't fully valid yet because we know there |
1257 | | * are some subxids missing. We don't know the specific subxids that are |
1258 | | * missing, so conservatively assume the last one is latestObservedXid. |
1259 | | * ---------- |
1260 | | */ |
1261 | | if (running->subxid_status == SUBXIDS_MISSING) |
1262 | | { |
1263 | | standbyState = STANDBY_SNAPSHOT_PENDING; |
1264 | | |
1265 | | standbySnapshotPendingXmin = latestObservedXid; |
1266 | | procArray->lastOverflowedXid = latestObservedXid; |
1267 | | } |
1268 | | else |
1269 | | { |
1270 | | standbyState = STANDBY_SNAPSHOT_READY; |
1271 | | |
1272 | | standbySnapshotPendingXmin = InvalidTransactionId; |
1273 | | |
1274 | | /* |
1275 | | * If the 'xids' array didn't include all subtransactions, we have to |
1276 | | * mark any snapshots taken as overflowed. |
1277 | | */ |
1278 | | if (running->subxid_status == SUBXIDS_IN_SUBTRANS) |
1279 | | procArray->lastOverflowedXid = latestObservedXid; |
1280 | | else |
1281 | | { |
1282 | | Assert(running->subxid_status == SUBXIDS_IN_ARRAY); |
1283 | | procArray->lastOverflowedXid = InvalidTransactionId; |
1284 | | } |
1285 | | } |
1286 | | |
1287 | | /* |
1288 | | * If a transaction wrote a commit record in the gap between taking and |
1289 | | * logging the snapshot then latestCompletedXid may already be higher than |
1290 | | * the value from the snapshot, so check before we use the incoming value. |
1291 | | * It also might not yet be set at all. |
1292 | | */ |
1293 | | MaintainLatestCompletedXidRecovery(running->latestCompletedXid); |
1294 | | |
1295 | | /* |
1296 | | * NB: No need to increment TransamVariables->xactCompletionCount here, |
1297 | | * nobody can see it yet. |
1298 | | */ |
1299 | | |
1300 | | LWLockRelease(ProcArrayLock); |
1301 | | |
1302 | | KnownAssignedXidsDisplay(DEBUG3); |
1303 | | if (standbyState == STANDBY_SNAPSHOT_READY) |
1304 | | elog(DEBUG1, "recovery snapshots are now enabled"); |
1305 | | else |
1306 | | elog(DEBUG1, |
1307 | | "recovery snapshot waiting for non-overflowed snapshot or " |
1308 | | "until oldest active xid on standby is at least %u (now %u)", |
1309 | | standbySnapshotPendingXmin, |
1310 | | running->oldestRunningXid); |
1311 | | } |
1312 | | |
1313 | | /* |
1314 | | * ProcArrayApplyXidAssignment |
1315 | | * Process an XLOG_XACT_ASSIGNMENT WAL record |
1316 | | */ |
1317 | | void |
1318 | | ProcArrayApplyXidAssignment(TransactionId topxid, |
1319 | | int nsubxids, TransactionId *subxids) |
1320 | 0 | { |
1321 | 0 | TransactionId max_xid; |
1322 | 0 | int i; |
1323 | |
|
1324 | 0 | Assert(standbyState >= STANDBY_INITIALIZED); |
1325 | |
|
1326 | 0 | max_xid = TransactionIdLatest(topxid, nsubxids, subxids); |
1327 | | |
1328 | | /* |
1329 | | * Mark all the subtransactions as observed. |
1330 | | * |
1331 | | * NOTE: This will fail if the subxid contains too many previously |
1332 | | * unobserved xids to fit into known-assigned-xids. That shouldn't happen |
1333 | | * as the code stands, because xid-assignment records should never contain |
1334 | | * more than PGPROC_MAX_CACHED_SUBXIDS entries. |
1335 | | */ |
1336 | 0 | RecordKnownAssignedTransactionIds(max_xid); |
1337 | | |
1338 | | /* |
1339 | | * Notice that we update pg_subtrans with the top-level xid, rather than |
1340 | | * the parent xid. This is a difference between normal processing and |
1341 | | * recovery, yet is still correct in all cases. The reason is that |
1342 | | * subtransaction commit is not marked in clog until commit processing, so |
1343 | | * all aborted subtransactions have already been clearly marked in clog. |
1344 | | * As a result we are able to refer directly to the top-level |
1345 | | * transaction's state rather than skipping through all the intermediate |
1346 | | * states in the subtransaction tree. This should be the first time we |
1347 | | * have attempted to SubTransSetParent(). |
1348 | | */ |
1349 | 0 | for (i = 0; i < nsubxids; i++) |
1350 | 0 | SubTransSetParent(subxids[i], topxid); |
1351 | | |
1352 | | /* KnownAssignedXids isn't maintained yet, so we're done for now */ |
1353 | 0 | if (standbyState == STANDBY_INITIALIZED) |
1354 | 0 | return; |
1355 | | |
1356 | | /* |
1357 | | * Uses same locking as transaction commit |
1358 | | */ |
1359 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
1360 | | |
1361 | | /* |
1362 | | * Remove subxids from known-assigned-xacts. |
1363 | | */ |
1364 | 0 | KnownAssignedXidsRemoveTree(InvalidTransactionId, nsubxids, subxids); |
1365 | | |
1366 | | /* |
1367 | | * Advance lastOverflowedXid to be at least the last of these subxids. |
1368 | | */ |
1369 | 0 | if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid)) |
1370 | 0 | procArray->lastOverflowedXid = max_xid; |
1371 | |
|
1372 | 0 | LWLockRelease(ProcArrayLock); |
1373 | 0 | } |
1374 | | |
1375 | | /* |
1376 | | * TransactionIdIsInProgress -- is given transaction running in some backend |
1377 | | * |
1378 | | * Aside from some shortcuts such as checking RecentXmin and our own Xid, |
1379 | | * there are four possibilities for finding a running transaction: |
1380 | | * |
1381 | | * 1. The given Xid is a main transaction Id. We will find this out cheaply |
1382 | | * by looking at ProcGlobal->xids. |
1383 | | * |
1384 | | * 2. The given Xid is one of the cached subxact Xids in the PGPROC array. |
1385 | | * We can find this out cheaply too. |
1386 | | * |
1387 | | * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see |
1388 | | * if the Xid is running on the primary. |
1389 | | * |
1390 | | * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see |
1391 | | * if that is running according to ProcGlobal->xids[] or KnownAssignedXids. |
1392 | | * This is the slowest way, but sadly it has to be done always if the others |
1393 | | * failed, unless we see that the cached subxact sets are complete (none have |
1394 | | * overflowed). |
1395 | | * |
1396 | | * ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids |
1397 | | * while doing 1 and 3, we can release the ProcArrayLock while we do 4. |
1398 | | * This buys back some concurrency (and we can't retrieve the main Xids from |
1399 | | * ProcGlobal->xids[] again anyway; see GetNewTransactionId). |
1400 | | */ |
1401 | | bool |
1402 | | TransactionIdIsInProgress(TransactionId xid) |
1403 | 0 | { |
1404 | 0 | static TransactionId *xids = NULL; |
1405 | 0 | static TransactionId *other_xids; |
1406 | 0 | XidCacheStatus *other_subxidstates; |
1407 | 0 | int nxids = 0; |
1408 | 0 | ProcArrayStruct *arrayP = procArray; |
1409 | 0 | TransactionId topxid; |
1410 | 0 | TransactionId latestCompletedXid; |
1411 | 0 | int mypgxactoff; |
1412 | 0 | int numProcs; |
1413 | 0 | int j; |
1414 | | |
1415 | | /* |
1416 | | * Don't bother checking a transaction older than RecentXmin; it could not |
1417 | | * possibly still be running. (Note: in particular, this guarantees that |
1418 | | * we reject InvalidTransactionId, FrozenTransactionId, etc as not |
1419 | | * running.) |
1420 | | */ |
1421 | 0 | if (TransactionIdPrecedes(xid, RecentXmin)) |
1422 | 0 | { |
1423 | 0 | xc_by_recent_xmin_inc(); |
1424 | 0 | return false; |
1425 | 0 | } |
1426 | | |
1427 | | /* |
1428 | | * We may have just checked the status of this transaction, so if it is |
1429 | | * already known to be completed, we can fall out without any access to |
1430 | | * shared memory. |
1431 | | */ |
1432 | 0 | if (TransactionIdEquals(cachedXidIsNotInProgress, xid)) |
1433 | 0 | { |
1434 | 0 | xc_by_known_xact_inc(); |
1435 | 0 | return false; |
1436 | 0 | } |
1437 | | |
1438 | | /* |
1439 | | * Also, we can handle our own transaction (and subtransactions) without |
1440 | | * any access to shared memory. |
1441 | | */ |
1442 | 0 | if (TransactionIdIsCurrentTransactionId(xid)) |
1443 | 0 | { |
1444 | 0 | xc_by_my_xact_inc(); |
1445 | 0 | return true; |
1446 | 0 | } |
1447 | | |
1448 | | /* |
1449 | | * If first time through, get workspace to remember main XIDs in. We |
1450 | | * malloc it permanently to avoid repeated palloc/pfree overhead. |
1451 | | */ |
1452 | 0 | if (xids == NULL) |
1453 | 0 | { |
1454 | | /* |
1455 | | * In hot standby mode, reserve enough space to hold all xids in the |
1456 | | * known-assigned list. If we later finish recovery, we no longer need |
1457 | | * the bigger array, but we don't bother to shrink it. |
1458 | | */ |
1459 | 0 | int maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs; |
1460 | |
|
1461 | 0 | xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId)); |
1462 | 0 | if (xids == NULL) |
1463 | 0 | ereport(ERROR, |
1464 | 0 | (errcode(ERRCODE_OUT_OF_MEMORY), |
1465 | 0 | errmsg("out of memory"))); |
1466 | 0 | } |
1467 | | |
1468 | 0 | other_xids = ProcGlobal->xids; |
1469 | 0 | other_subxidstates = ProcGlobal->subxidStates; |
1470 | |
|
1471 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
1472 | | |
1473 | | /* |
1474 | | * Now that we have the lock, we can check latestCompletedXid; if the |
1475 | | * target Xid is after that, it's surely still running. |
1476 | | */ |
1477 | 0 | latestCompletedXid = |
1478 | 0 | XidFromFullTransactionId(TransamVariables->latestCompletedXid); |
1479 | 0 | if (TransactionIdPrecedes(latestCompletedXid, xid)) |
1480 | 0 | { |
1481 | 0 | LWLockRelease(ProcArrayLock); |
1482 | 0 | xc_by_latest_xid_inc(); |
1483 | 0 | return true; |
1484 | 0 | } |
1485 | | |
1486 | | /* No shortcuts, gotta grovel through the array */ |
1487 | 0 | mypgxactoff = MyProc->pgxactoff; |
1488 | 0 | numProcs = arrayP->numProcs; |
1489 | 0 | for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++) |
1490 | 0 | { |
1491 | 0 | int pgprocno; |
1492 | 0 | PGPROC *proc; |
1493 | 0 | TransactionId pxid; |
1494 | 0 | int pxids; |
1495 | | |
1496 | | /* Ignore ourselves --- dealt with it above */ |
1497 | 0 | if (pgxactoff == mypgxactoff) |
1498 | 0 | continue; |
1499 | | |
1500 | | /* Fetch xid just once - see GetNewTransactionId */ |
1501 | 0 | pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]); |
1502 | |
|
1503 | 0 | if (!TransactionIdIsValid(pxid)) |
1504 | 0 | continue; |
1505 | | |
1506 | | /* |
1507 | | * Step 1: check the main Xid |
1508 | | */ |
1509 | 0 | if (TransactionIdEquals(pxid, xid)) |
1510 | 0 | { |
1511 | 0 | LWLockRelease(ProcArrayLock); |
1512 | 0 | xc_by_main_xid_inc(); |
1513 | 0 | return true; |
1514 | 0 | } |
1515 | | |
1516 | | /* |
1517 | | * We can ignore main Xids that are younger than the target Xid, since |
1518 | | * the target could not possibly be their child. |
1519 | | */ |
1520 | 0 | if (TransactionIdPrecedes(xid, pxid)) |
1521 | 0 | continue; |
1522 | | |
1523 | | /* |
1524 | | * Step 2: check the cached child-Xids arrays |
1525 | | */ |
1526 | 0 | pxids = other_subxidstates[pgxactoff].count; |
1527 | 0 | pg_read_barrier(); /* pairs with barrier in GetNewTransactionId() */ |
1528 | 0 | pgprocno = arrayP->pgprocnos[pgxactoff]; |
1529 | 0 | proc = &allProcs[pgprocno]; |
1530 | 0 | for (j = pxids - 1; j >= 0; j--) |
1531 | 0 | { |
1532 | | /* Fetch xid just once - see GetNewTransactionId */ |
1533 | 0 | TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]); |
1534 | |
|
1535 | 0 | if (TransactionIdEquals(cxid, xid)) |
1536 | 0 | { |
1537 | 0 | LWLockRelease(ProcArrayLock); |
1538 | 0 | xc_by_child_xid_inc(); |
1539 | 0 | return true; |
1540 | 0 | } |
1541 | 0 | } |
1542 | | |
1543 | | /* |
1544 | | * Save the main Xid for step 4. We only need to remember main Xids |
1545 | | * that have uncached children. (Note: there is no race condition |
1546 | | * here because the overflowed flag cannot be cleared, only set, while |
1547 | | * we hold ProcArrayLock. So we can't miss an Xid that we need to |
1548 | | * worry about.) |
1549 | | */ |
1550 | 0 | if (other_subxidstates[pgxactoff].overflowed) |
1551 | 0 | xids[nxids++] = pxid; |
1552 | 0 | } |
1553 | | |
1554 | | /* |
1555 | | * Step 3: in hot standby mode, check the known-assigned-xids list. XIDs |
1556 | | * in the list must be treated as running. |
1557 | | */ |
1558 | 0 | if (RecoveryInProgress()) |
1559 | 0 | { |
1560 | | /* none of the PGPROC entries should have XIDs in hot standby mode */ |
1561 | 0 | Assert(nxids == 0); |
1562 | |
|
1563 | 0 | if (KnownAssignedXidExists(xid)) |
1564 | 0 | { |
1565 | 0 | LWLockRelease(ProcArrayLock); |
1566 | 0 | xc_by_known_assigned_inc(); |
1567 | 0 | return true; |
1568 | 0 | } |
1569 | | |
1570 | | /* |
1571 | | * If the KnownAssignedXids overflowed, we have to check pg_subtrans |
1572 | | * too. Fetch all xids from KnownAssignedXids that are lower than |
1573 | | * xid, since if xid is a subtransaction its parent will always have a |
1574 | | * lower value. Note we will collect both main and subXIDs here, but |
1575 | | * there's no help for it. |
1576 | | */ |
1577 | 0 | if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid)) |
1578 | 0 | nxids = KnownAssignedXidsGet(xids, xid); |
1579 | 0 | } |
1580 | | |
1581 | 0 | LWLockRelease(ProcArrayLock); |
1582 | | |
1583 | | /* |
1584 | | * If none of the relevant caches overflowed, we know the Xid is not |
1585 | | * running without even looking at pg_subtrans. |
1586 | | */ |
1587 | 0 | if (nxids == 0) |
1588 | 0 | { |
1589 | 0 | xc_no_overflow_inc(); |
1590 | 0 | cachedXidIsNotInProgress = xid; |
1591 | 0 | return false; |
1592 | 0 | } |
1593 | | |
1594 | | /* |
1595 | | * Step 4: have to check pg_subtrans. |
1596 | | * |
1597 | | * At this point, we know it's either a subtransaction of one of the Xids |
1598 | | * in xids[], or it's not running. If it's an already-failed |
1599 | | * subtransaction, we want to say "not running" even though its parent may |
1600 | | * still be running. So first, check pg_xact to see if it's been aborted. |
1601 | | */ |
1602 | 0 | xc_slow_answer_inc(); |
1603 | |
|
1604 | 0 | if (TransactionIdDidAbort(xid)) |
1605 | 0 | { |
1606 | 0 | cachedXidIsNotInProgress = xid; |
1607 | 0 | return false; |
1608 | 0 | } |
1609 | | |
1610 | | /* |
1611 | | * It isn't aborted, so check whether the transaction tree it belongs to |
1612 | | * is still running (or, more precisely, whether it was running when we |
1613 | | * held ProcArrayLock). |
1614 | | */ |
1615 | 0 | topxid = SubTransGetTopmostTransaction(xid); |
1616 | 0 | Assert(TransactionIdIsValid(topxid)); |
1617 | 0 | if (!TransactionIdEquals(topxid, xid) && |
1618 | 0 | pg_lfind32(topxid, xids, nxids)) |
1619 | 0 | return true; |
1620 | | |
1621 | 0 | cachedXidIsNotInProgress = xid; |
1622 | 0 | return false; |
1623 | 0 | } |
1624 | | |
1625 | | |
1626 | | /* |
1627 | | * Determine XID horizons. |
1628 | | * |
1629 | | * This is used by wrapper functions like GetOldestNonRemovableTransactionId() |
1630 | | * (for VACUUM), GetReplicationHorizons() (for hot_standby_feedback), etc as |
1631 | | * well as "internally" by GlobalVisUpdate() (see comment above struct |
1632 | | * GlobalVisState). |
1633 | | * |
1634 | | * See the definition of ComputeXidHorizonsResult for the various computed |
1635 | | * horizons. |
1636 | | * |
1637 | | * For VACUUM separate horizons (used to decide which deleted tuples must |
1638 | | * be preserved), for shared and non-shared tables are computed. For shared |
1639 | | * relations backends in all databases must be considered, but for non-shared |
1640 | | * relations that's not required, since only backends in my own database could |
1641 | | * ever see the tuples in them. Also, we can ignore concurrently running lazy |
1642 | | * VACUUMs because (a) they must be working on other tables, and (b) they |
1643 | | * don't need to do snapshot-based lookups. |
1644 | | * |
1645 | | * This also computes a horizon used to truncate pg_subtrans. For that |
1646 | | * backends in all databases have to be considered, and concurrently running |
1647 | | * lazy VACUUMs cannot be ignored, as they still may perform pg_subtrans |
1648 | | * accesses. |
1649 | | * |
1650 | | * Note: we include all currently running xids in the set of considered xids. |
1651 | | * This ensures that if a just-started xact has not yet set its snapshot, |
1652 | | * when it does set the snapshot it cannot set xmin less than what we compute. |
1653 | | * See notes in src/backend/access/transam/README. |
1654 | | * |
1655 | | * Note: despite the above, it's possible for the calculated values to move |
1656 | | * backwards on repeated calls. The calculated values are conservative, so |
1657 | | * that anything older is definitely not considered as running by anyone |
1658 | | * anymore, but the exact values calculated depend on a number of things. For |
1659 | | * example, if there are no transactions running in the current database, the |
1660 | | * horizon for normal tables will be latestCompletedXid. If a transaction |
1661 | | * begins after that, its xmin will include in-progress transactions in other |
1662 | | * databases that started earlier, so another call will return a lower value. |
1663 | | * Nonetheless it is safe to vacuum a table in the current database with the |
1664 | | * first result. There are also replication-related effects: a walsender |
1665 | | * process can set its xmin based on transactions that are no longer running |
1666 | | * on the primary but are still being replayed on the standby, thus possibly |
1667 | | * making the values go backwards. In this case there is a possibility that |
1668 | | * we lose data that the standby would like to have, but unless the standby |
1669 | | * uses a replication slot to make its xmin persistent there is little we can |
1670 | | * do about that --- data is only protected if the walsender runs continuously |
1671 | | * while queries are executed on the standby. (The Hot Standby code deals |
1672 | | * with such cases by failing standby queries that needed to access |
1673 | | * already-removed data, so there's no integrity bug.) |
1674 | | * |
1675 | | * Note: the approximate horizons (see definition of GlobalVisState) are |
1676 | | * updated by the computations done here. That's currently required for |
1677 | | * correctness and a small optimization. Without doing so it's possible that |
1678 | | * heap vacuum's call to heap_page_prune_and_freeze() uses a more conservative |
1679 | | * horizon than later when deciding which tuples can be removed - which the |
1680 | | * code doesn't expect (breaking HOT). |
1681 | | */ |
1682 | | static void |
1683 | | ComputeXidHorizons(ComputeXidHorizonsResult *h) |
1684 | 0 | { |
1685 | 0 | ProcArrayStruct *arrayP = procArray; |
1686 | 0 | TransactionId kaxmin; |
1687 | 0 | bool in_recovery = RecoveryInProgress(); |
1688 | 0 | TransactionId *other_xids = ProcGlobal->xids; |
1689 | | |
1690 | | /* inferred after ProcArrayLock is released */ |
1691 | 0 | h->catalog_oldest_nonremovable = InvalidTransactionId; |
1692 | |
|
1693 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
1694 | |
|
1695 | 0 | h->latest_completed = TransamVariables->latestCompletedXid; |
1696 | | |
1697 | | /* |
1698 | | * We initialize the MIN() calculation with latestCompletedXid + 1. This |
1699 | | * is a lower bound for the XIDs that might appear in the ProcArray later, |
1700 | | * and so protects us against overestimating the result due to future |
1701 | | * additions. |
1702 | | */ |
1703 | 0 | { |
1704 | 0 | TransactionId initial; |
1705 | |
|
1706 | 0 | initial = XidFromFullTransactionId(h->latest_completed); |
1707 | 0 | Assert(TransactionIdIsValid(initial)); |
1708 | 0 | TransactionIdAdvance(initial); |
1709 | |
|
1710 | 0 | h->oldest_considered_running = initial; |
1711 | 0 | h->shared_oldest_nonremovable = initial; |
1712 | 0 | h->data_oldest_nonremovable = initial; |
1713 | | |
1714 | | /* |
1715 | | * Only modifications made by this backend affect the horizon for |
1716 | | * temporary relations. Instead of a check in each iteration of the |
1717 | | * loop over all PGPROCs it is cheaper to just initialize to the |
1718 | | * current top-level xid any. |
1719 | | * |
1720 | | * Without an assigned xid we could use a horizon as aggressive as |
1721 | | * GetNewTransactionId(), but we can get away with the much cheaper |
1722 | | * latestCompletedXid + 1: If this backend has no xid there, by |
1723 | | * definition, can't be any newer changes in the temp table than |
1724 | | * latestCompletedXid. |
1725 | | */ |
1726 | 0 | if (TransactionIdIsValid(MyProc->xid)) |
1727 | 0 | h->temp_oldest_nonremovable = MyProc->xid; |
1728 | 0 | else |
1729 | 0 | h->temp_oldest_nonremovable = initial; |
1730 | 0 | } |
1731 | | |
1732 | | /* |
1733 | | * Fetch slot horizons while ProcArrayLock is held - the |
1734 | | * LWLockAcquire/LWLockRelease are a barrier, ensuring this happens inside |
1735 | | * the lock. |
1736 | | */ |
1737 | 0 | h->slot_xmin = procArray->replication_slot_xmin; |
1738 | 0 | h->slot_catalog_xmin = procArray->replication_slot_catalog_xmin; |
1739 | |
|
1740 | 0 | for (int index = 0; index < arrayP->numProcs; index++) |
1741 | 0 | { |
1742 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
1743 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
1744 | 0 | int8 statusFlags = ProcGlobal->statusFlags[index]; |
1745 | 0 | TransactionId xid; |
1746 | 0 | TransactionId xmin; |
1747 | | |
1748 | | /* Fetch xid just once - see GetNewTransactionId */ |
1749 | 0 | xid = UINT32_ACCESS_ONCE(other_xids[index]); |
1750 | 0 | xmin = UINT32_ACCESS_ONCE(proc->xmin); |
1751 | | |
1752 | | /* |
1753 | | * Consider both the transaction's Xmin, and its Xid. |
1754 | | * |
1755 | | * We must check both because a transaction might have an Xmin but not |
1756 | | * (yet) an Xid; conversely, if it has an Xid, that could determine |
1757 | | * some not-yet-set Xmin. |
1758 | | */ |
1759 | 0 | xmin = TransactionIdOlder(xmin, xid); |
1760 | | |
1761 | | /* if neither is set, this proc doesn't influence the horizon */ |
1762 | 0 | if (!TransactionIdIsValid(xmin)) |
1763 | 0 | continue; |
1764 | | |
1765 | | /* |
1766 | | * Don't ignore any procs when determining which transactions might be |
1767 | | * considered running. While slots should ensure logical decoding |
1768 | | * backends are protected even without this check, it can't hurt to |
1769 | | * include them here as well.. |
1770 | | */ |
1771 | 0 | h->oldest_considered_running = |
1772 | 0 | TransactionIdOlder(h->oldest_considered_running, xmin); |
1773 | | |
1774 | | /* |
1775 | | * Skip over backends either vacuuming (which is ok with rows being |
1776 | | * removed, as long as pg_subtrans is not truncated) or doing logical |
1777 | | * decoding (which manages xmin separately, check below). |
1778 | | */ |
1779 | 0 | if (statusFlags & (PROC_IN_VACUUM | PROC_IN_LOGICAL_DECODING)) |
1780 | 0 | continue; |
1781 | | |
1782 | | /* shared tables need to take backends in all databases into account */ |
1783 | 0 | h->shared_oldest_nonremovable = |
1784 | 0 | TransactionIdOlder(h->shared_oldest_nonremovable, xmin); |
1785 | | |
1786 | | /* |
1787 | | * Normally sessions in other databases are ignored for anything but |
1788 | | * the shared horizon. |
1789 | | * |
1790 | | * However, include them when MyDatabaseId is not (yet) set. A |
1791 | | * backend in the process of starting up must not compute a "too |
1792 | | * aggressive" horizon, otherwise we could end up using it to prune |
1793 | | * still-needed data away. If the current backend never connects to a |
1794 | | * database this is harmless, because data_oldest_nonremovable will |
1795 | | * never be utilized. |
1796 | | * |
1797 | | * Also, sessions marked with PROC_AFFECTS_ALL_HORIZONS should always |
1798 | | * be included. (This flag is used for hot standby feedback, which |
1799 | | * can't be tied to a specific database.) |
1800 | | * |
1801 | | * Also, while in recovery we cannot compute an accurate per-database |
1802 | | * horizon, as all xids are managed via the KnownAssignedXids |
1803 | | * machinery. |
1804 | | */ |
1805 | 0 | if (proc->databaseId == MyDatabaseId || |
1806 | 0 | MyDatabaseId == InvalidOid || |
1807 | 0 | (statusFlags & PROC_AFFECTS_ALL_HORIZONS) || |
1808 | 0 | in_recovery) |
1809 | 0 | { |
1810 | 0 | h->data_oldest_nonremovable = |
1811 | 0 | TransactionIdOlder(h->data_oldest_nonremovable, xmin); |
1812 | 0 | } |
1813 | 0 | } |
1814 | | |
1815 | | /* |
1816 | | * If in recovery fetch oldest xid in KnownAssignedXids, will be applied |
1817 | | * after lock is released. |
1818 | | */ |
1819 | 0 | if (in_recovery) |
1820 | 0 | kaxmin = KnownAssignedXidsGetOldestXmin(); |
1821 | | |
1822 | | /* |
1823 | | * No other information from shared state is needed, release the lock |
1824 | | * immediately. The rest of the computations can be done without a lock. |
1825 | | */ |
1826 | 0 | LWLockRelease(ProcArrayLock); |
1827 | |
|
1828 | 0 | if (in_recovery) |
1829 | 0 | { |
1830 | 0 | h->oldest_considered_running = |
1831 | 0 | TransactionIdOlder(h->oldest_considered_running, kaxmin); |
1832 | 0 | h->shared_oldest_nonremovable = |
1833 | 0 | TransactionIdOlder(h->shared_oldest_nonremovable, kaxmin); |
1834 | 0 | h->data_oldest_nonremovable = |
1835 | 0 | TransactionIdOlder(h->data_oldest_nonremovable, kaxmin); |
1836 | | /* temp relations cannot be accessed in recovery */ |
1837 | 0 | } |
1838 | |
|
1839 | 0 | Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running, |
1840 | 0 | h->shared_oldest_nonremovable)); |
1841 | 0 | Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable, |
1842 | 0 | h->data_oldest_nonremovable)); |
1843 | | |
1844 | | /* |
1845 | | * Check whether there are replication slots requiring an older xmin. |
1846 | | */ |
1847 | 0 | h->shared_oldest_nonremovable = |
1848 | 0 | TransactionIdOlder(h->shared_oldest_nonremovable, h->slot_xmin); |
1849 | 0 | h->data_oldest_nonremovable = |
1850 | 0 | TransactionIdOlder(h->data_oldest_nonremovable, h->slot_xmin); |
1851 | | |
1852 | | /* |
1853 | | * The only difference between catalog / data horizons is that the slot's |
1854 | | * catalog xmin is applied to the catalog one (so catalogs can be accessed |
1855 | | * for logical decoding). Initialize with data horizon, and then back up |
1856 | | * further if necessary. Have to back up the shared horizon as well, since |
1857 | | * that also can contain catalogs. |
1858 | | */ |
1859 | 0 | h->shared_oldest_nonremovable_raw = h->shared_oldest_nonremovable; |
1860 | 0 | h->shared_oldest_nonremovable = |
1861 | 0 | TransactionIdOlder(h->shared_oldest_nonremovable, |
1862 | 0 | h->slot_catalog_xmin); |
1863 | 0 | h->catalog_oldest_nonremovable = h->data_oldest_nonremovable; |
1864 | 0 | h->catalog_oldest_nonremovable = |
1865 | 0 | TransactionIdOlder(h->catalog_oldest_nonremovable, |
1866 | 0 | h->slot_catalog_xmin); |
1867 | | |
1868 | | /* |
1869 | | * It's possible that slots backed up the horizons further than |
1870 | | * oldest_considered_running. Fix. |
1871 | | */ |
1872 | 0 | h->oldest_considered_running = |
1873 | 0 | TransactionIdOlder(h->oldest_considered_running, |
1874 | 0 | h->shared_oldest_nonremovable); |
1875 | 0 | h->oldest_considered_running = |
1876 | 0 | TransactionIdOlder(h->oldest_considered_running, |
1877 | 0 | h->catalog_oldest_nonremovable); |
1878 | 0 | h->oldest_considered_running = |
1879 | 0 | TransactionIdOlder(h->oldest_considered_running, |
1880 | 0 | h->data_oldest_nonremovable); |
1881 | | |
1882 | | /* |
1883 | | * shared horizons have to be at least as old as the oldest visible in |
1884 | | * current db |
1885 | | */ |
1886 | 0 | Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable, |
1887 | 0 | h->data_oldest_nonremovable)); |
1888 | 0 | Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable, |
1889 | 0 | h->catalog_oldest_nonremovable)); |
1890 | | |
1891 | | /* |
1892 | | * Horizons need to ensure that pg_subtrans access is still possible for |
1893 | | * the relevant backends. |
1894 | | */ |
1895 | 0 | Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running, |
1896 | 0 | h->shared_oldest_nonremovable)); |
1897 | 0 | Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running, |
1898 | 0 | h->catalog_oldest_nonremovable)); |
1899 | 0 | Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running, |
1900 | 0 | h->data_oldest_nonremovable)); |
1901 | 0 | Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running, |
1902 | 0 | h->temp_oldest_nonremovable)); |
1903 | 0 | Assert(!TransactionIdIsValid(h->slot_xmin) || |
1904 | 0 | TransactionIdPrecedesOrEquals(h->oldest_considered_running, |
1905 | 0 | h->slot_xmin)); |
1906 | 0 | Assert(!TransactionIdIsValid(h->slot_catalog_xmin) || |
1907 | 0 | TransactionIdPrecedesOrEquals(h->oldest_considered_running, |
1908 | 0 | h->slot_catalog_xmin)); |
1909 | | |
1910 | | /* update approximate horizons with the computed horizons */ |
1911 | 0 | GlobalVisUpdateApply(h); |
1912 | 0 | } |
1913 | | |
1914 | | /* |
1915 | | * Determine what kind of visibility horizon needs to be used for a |
1916 | | * relation. If rel is NULL, the most conservative horizon is used. |
1917 | | */ |
1918 | | static inline GlobalVisHorizonKind |
1919 | | GlobalVisHorizonKindForRel(Relation rel) |
1920 | 0 | { |
1921 | | /* |
1922 | | * Other relkinds currently don't contain xids, nor always the necessary |
1923 | | * logical decoding markers. |
1924 | | */ |
1925 | 0 | Assert(!rel || |
1926 | 0 | rel->rd_rel->relkind == RELKIND_RELATION || |
1927 | 0 | rel->rd_rel->relkind == RELKIND_MATVIEW || |
1928 | 0 | rel->rd_rel->relkind == RELKIND_TOASTVALUE); |
1929 | |
|
1930 | 0 | if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress()) |
1931 | 0 | return VISHORIZON_SHARED; |
1932 | 0 | else if (IsCatalogRelation(rel) || |
1933 | 0 | RelationIsAccessibleInLogicalDecoding(rel)) |
1934 | 0 | return VISHORIZON_CATALOG; |
1935 | 0 | else if (!RELATION_IS_LOCAL(rel)) |
1936 | 0 | return VISHORIZON_DATA; |
1937 | 0 | else |
1938 | 0 | return VISHORIZON_TEMP; |
1939 | 0 | } |
1940 | | |
1941 | | /* |
1942 | | * Return the oldest XID for which deleted tuples must be preserved in the |
1943 | | * passed table. |
1944 | | * |
1945 | | * If rel is not NULL the horizon may be considerably more recent than |
1946 | | * otherwise (i.e. fewer tuples will be removable). In the NULL case a horizon |
1947 | | * that is correct (but not optimal) for all relations will be returned. |
1948 | | * |
1949 | | * This is used by VACUUM to decide which deleted tuples must be preserved in |
1950 | | * the passed in table. |
1951 | | */ |
1952 | | TransactionId |
1953 | | GetOldestNonRemovableTransactionId(Relation rel) |
1954 | 0 | { |
1955 | 0 | ComputeXidHorizonsResult horizons; |
1956 | |
|
1957 | 0 | ComputeXidHorizons(&horizons); |
1958 | |
|
1959 | 0 | switch (GlobalVisHorizonKindForRel(rel)) |
1960 | 0 | { |
1961 | 0 | case VISHORIZON_SHARED: |
1962 | 0 | return horizons.shared_oldest_nonremovable; |
1963 | 0 | case VISHORIZON_CATALOG: |
1964 | 0 | return horizons.catalog_oldest_nonremovable; |
1965 | 0 | case VISHORIZON_DATA: |
1966 | 0 | return horizons.data_oldest_nonremovable; |
1967 | 0 | case VISHORIZON_TEMP: |
1968 | 0 | return horizons.temp_oldest_nonremovable; |
1969 | 0 | } |
1970 | | |
1971 | | /* just to prevent compiler warnings */ |
1972 | 0 | return InvalidTransactionId; |
1973 | 0 | } |
1974 | | |
1975 | | /* |
1976 | | * Return the oldest transaction id any currently running backend might still |
1977 | | * consider running. This should not be used for visibility / pruning |
1978 | | * determinations (see GetOldestNonRemovableTransactionId()), but for |
1979 | | * decisions like up to where pg_subtrans can be truncated. |
1980 | | */ |
1981 | | TransactionId |
1982 | | GetOldestTransactionIdConsideredRunning(void) |
1983 | 0 | { |
1984 | 0 | ComputeXidHorizonsResult horizons; |
1985 | |
|
1986 | 0 | ComputeXidHorizons(&horizons); |
1987 | |
|
1988 | 0 | return horizons.oldest_considered_running; |
1989 | 0 | } |
1990 | | |
1991 | | /* |
1992 | | * Return the visibility horizons for a hot standby feedback message. |
1993 | | */ |
1994 | | void |
1995 | | GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin) |
1996 | 0 | { |
1997 | 0 | ComputeXidHorizonsResult horizons; |
1998 | |
|
1999 | 0 | ComputeXidHorizons(&horizons); |
2000 | | |
2001 | | /* |
2002 | | * Don't want to use shared_oldest_nonremovable here, as that contains the |
2003 | | * effect of replication slot's catalog_xmin. We want to send a separate |
2004 | | * feedback for the catalog horizon, so the primary can remove data table |
2005 | | * contents more aggressively. |
2006 | | */ |
2007 | 0 | *xmin = horizons.shared_oldest_nonremovable_raw; |
2008 | 0 | *catalog_xmin = horizons.slot_catalog_xmin; |
2009 | 0 | } |
2010 | | |
2011 | | /* |
2012 | | * GetMaxSnapshotXidCount -- get max size for snapshot XID array |
2013 | | * |
2014 | | * We have to export this for use by snapmgr.c. |
2015 | | */ |
2016 | | int |
2017 | | GetMaxSnapshotXidCount(void) |
2018 | 0 | { |
2019 | 0 | return procArray->maxProcs; |
2020 | 0 | } |
2021 | | |
2022 | | /* |
2023 | | * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array |
2024 | | * |
2025 | | * We have to export this for use by snapmgr.c. |
2026 | | */ |
2027 | | int |
2028 | | GetMaxSnapshotSubxidCount(void) |
2029 | 0 | { |
2030 | 0 | return TOTAL_MAX_CACHED_SUBXIDS; |
2031 | 0 | } |
2032 | | |
2033 | | /* |
2034 | | * Helper function for GetSnapshotData() that checks if the bulk of the |
2035 | | * visibility information in the snapshot is still valid. If so, it updates |
2036 | | * the fields that need to change and returns true. Otherwise it returns |
2037 | | * false. |
2038 | | * |
2039 | | * This very likely can be evolved to not need ProcArrayLock held (at very |
2040 | | * least in the case we already hold a snapshot), but that's for another day. |
2041 | | */ |
2042 | | static bool |
2043 | | GetSnapshotDataReuse(Snapshot snapshot) |
2044 | 0 | { |
2045 | 0 | uint64 curXactCompletionCount; |
2046 | |
|
2047 | 0 | Assert(LWLockHeldByMe(ProcArrayLock)); |
2048 | |
|
2049 | 0 | if (unlikely(snapshot->snapXactCompletionCount == 0)) |
2050 | 0 | return false; |
2051 | | |
2052 | 0 | curXactCompletionCount = TransamVariables->xactCompletionCount; |
2053 | 0 | if (curXactCompletionCount != snapshot->snapXactCompletionCount) |
2054 | 0 | return false; |
2055 | | |
2056 | | /* |
2057 | | * If the current xactCompletionCount is still the same as it was at the |
2058 | | * time the snapshot was built, we can be sure that rebuilding the |
2059 | | * contents of the snapshot the hard way would result in the same snapshot |
2060 | | * contents: |
2061 | | * |
2062 | | * As explained in transam/README, the set of xids considered running by |
2063 | | * GetSnapshotData() cannot change while ProcArrayLock is held. Snapshot |
2064 | | * contents only depend on transactions with xids and xactCompletionCount |
2065 | | * is incremented whenever a transaction with an xid finishes (while |
2066 | | * holding ProcArrayLock exclusively). Thus the xactCompletionCount check |
2067 | | * ensures we would detect if the snapshot would have changed. |
2068 | | * |
2069 | | * As the snapshot contents are the same as it was before, it is safe to |
2070 | | * re-enter the snapshot's xmin into the PGPROC array. None of the rows |
2071 | | * visible under the snapshot could already have been removed (that'd |
2072 | | * require the set of running transactions to change) and it fulfills the |
2073 | | * requirement that concurrent GetSnapshotData() calls yield the same |
2074 | | * xmin. |
2075 | | */ |
2076 | 0 | if (!TransactionIdIsValid(MyProc->xmin)) |
2077 | 0 | MyProc->xmin = TransactionXmin = snapshot->xmin; |
2078 | |
|
2079 | 0 | RecentXmin = snapshot->xmin; |
2080 | 0 | Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin)); |
2081 | |
|
2082 | 0 | snapshot->curcid = GetCurrentCommandId(false); |
2083 | 0 | snapshot->active_count = 0; |
2084 | 0 | snapshot->regd_count = 0; |
2085 | 0 | snapshot->copied = false; |
2086 | |
|
2087 | 0 | return true; |
2088 | 0 | } |
2089 | | |
2090 | | /* |
2091 | | * GetSnapshotData -- returns information about running transactions. |
2092 | | * |
2093 | | * The returned snapshot includes xmin (lowest still-running xact ID), |
2094 | | * xmax (highest completed xact ID + 1), and a list of running xact IDs |
2095 | | * in the range xmin <= xid < xmax. It is used as follows: |
2096 | | * All xact IDs < xmin are considered finished. |
2097 | | * All xact IDs >= xmax are considered still running. |
2098 | | * For an xact ID xmin <= xid < xmax, consult list to see whether |
2099 | | * it is considered running or not. |
2100 | | * This ensures that the set of transactions seen as "running" by the |
2101 | | * current xact will not change after it takes the snapshot. |
2102 | | * |
2103 | | * All running top-level XIDs are included in the snapshot, except for lazy |
2104 | | * VACUUM processes. We also try to include running subtransaction XIDs, |
2105 | | * but since PGPROC has only a limited cache area for subxact XIDs, full |
2106 | | * information may not be available. If we find any overflowed subxid arrays, |
2107 | | * we have to mark the snapshot's subxid data as overflowed, and extra work |
2108 | | * *may* need to be done to determine what's running (see XidInMVCCSnapshot()). |
2109 | | * |
2110 | | * We also update the following backend-global variables: |
2111 | | * TransactionXmin: the oldest xmin of any snapshot in use in the |
2112 | | * current transaction (this is the same as MyProc->xmin). |
2113 | | * RecentXmin: the xmin computed for the most recent snapshot. XIDs |
2114 | | * older than this are known not running any more. |
2115 | | * |
2116 | | * And try to advance the bounds of GlobalVis{Shared,Catalog,Data,Temp}Rels |
2117 | | * for the benefit of the GlobalVisTest* family of functions. |
2118 | | * |
2119 | | * Note: this function should probably not be called with an argument that's |
2120 | | * not statically allocated (see xip allocation below). |
2121 | | */ |
2122 | | Snapshot |
2123 | | GetSnapshotData(Snapshot snapshot) |
2124 | 0 | { |
2125 | 0 | ProcArrayStruct *arrayP = procArray; |
2126 | 0 | TransactionId *other_xids = ProcGlobal->xids; |
2127 | 0 | TransactionId xmin; |
2128 | 0 | TransactionId xmax; |
2129 | 0 | int count = 0; |
2130 | 0 | int subcount = 0; |
2131 | 0 | bool suboverflowed = false; |
2132 | 0 | FullTransactionId latest_completed; |
2133 | 0 | TransactionId oldestxid; |
2134 | 0 | int mypgxactoff; |
2135 | 0 | TransactionId myxid; |
2136 | 0 | uint64 curXactCompletionCount; |
2137 | |
|
2138 | 0 | TransactionId replication_slot_xmin = InvalidTransactionId; |
2139 | 0 | TransactionId replication_slot_catalog_xmin = InvalidTransactionId; |
2140 | |
|
2141 | 0 | Assert(snapshot != NULL); |
2142 | | |
2143 | | /* |
2144 | | * Allocating space for maxProcs xids is usually overkill; numProcs would |
2145 | | * be sufficient. But it seems better to do the malloc while not holding |
2146 | | * the lock, so we can't look at numProcs. Likewise, we allocate much |
2147 | | * more subxip storage than is probably needed. |
2148 | | * |
2149 | | * This does open a possibility for avoiding repeated malloc/free: since |
2150 | | * maxProcs does not change at runtime, we can simply reuse the previous |
2151 | | * xip arrays if any. (This relies on the fact that all callers pass |
2152 | | * static SnapshotData structs.) |
2153 | | */ |
2154 | 0 | if (snapshot->xip == NULL) |
2155 | 0 | { |
2156 | | /* |
2157 | | * First call for this snapshot. Snapshot is same size whether or not |
2158 | | * we are in recovery, see later comments. |
2159 | | */ |
2160 | 0 | snapshot->xip = (TransactionId *) |
2161 | 0 | malloc(GetMaxSnapshotXidCount() * sizeof(TransactionId)); |
2162 | 0 | if (snapshot->xip == NULL) |
2163 | 0 | ereport(ERROR, |
2164 | 0 | (errcode(ERRCODE_OUT_OF_MEMORY), |
2165 | 0 | errmsg("out of memory"))); |
2166 | 0 | Assert(snapshot->subxip == NULL); |
2167 | 0 | snapshot->subxip = (TransactionId *) |
2168 | 0 | malloc(GetMaxSnapshotSubxidCount() * sizeof(TransactionId)); |
2169 | 0 | if (snapshot->subxip == NULL) |
2170 | 0 | ereport(ERROR, |
2171 | 0 | (errcode(ERRCODE_OUT_OF_MEMORY), |
2172 | 0 | errmsg("out of memory"))); |
2173 | 0 | } |
2174 | | |
2175 | | /* |
2176 | | * It is sufficient to get shared lock on ProcArrayLock, even if we are |
2177 | | * going to set MyProc->xmin. |
2178 | | */ |
2179 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2180 | |
|
2181 | 0 | if (GetSnapshotDataReuse(snapshot)) |
2182 | 0 | { |
2183 | 0 | LWLockRelease(ProcArrayLock); |
2184 | 0 | return snapshot; |
2185 | 0 | } |
2186 | | |
2187 | 0 | latest_completed = TransamVariables->latestCompletedXid; |
2188 | 0 | mypgxactoff = MyProc->pgxactoff; |
2189 | 0 | myxid = other_xids[mypgxactoff]; |
2190 | 0 | Assert(myxid == MyProc->xid); |
2191 | |
|
2192 | 0 | oldestxid = TransamVariables->oldestXid; |
2193 | 0 | curXactCompletionCount = TransamVariables->xactCompletionCount; |
2194 | | |
2195 | | /* xmax is always latestCompletedXid + 1 */ |
2196 | 0 | xmax = XidFromFullTransactionId(latest_completed); |
2197 | 0 | TransactionIdAdvance(xmax); |
2198 | 0 | Assert(TransactionIdIsNormal(xmax)); |
2199 | | |
2200 | | /* initialize xmin calculation with xmax */ |
2201 | 0 | xmin = xmax; |
2202 | | |
2203 | | /* take own xid into account, saves a check inside the loop */ |
2204 | 0 | if (TransactionIdIsNormal(myxid) && NormalTransactionIdPrecedes(myxid, xmin)) |
2205 | 0 | xmin = myxid; |
2206 | |
|
2207 | 0 | snapshot->takenDuringRecovery = RecoveryInProgress(); |
2208 | |
|
2209 | 0 | if (!snapshot->takenDuringRecovery) |
2210 | 0 | { |
2211 | 0 | int numProcs = arrayP->numProcs; |
2212 | 0 | TransactionId *xip = snapshot->xip; |
2213 | 0 | int *pgprocnos = arrayP->pgprocnos; |
2214 | 0 | XidCacheStatus *subxidStates = ProcGlobal->subxidStates; |
2215 | 0 | uint8 *allStatusFlags = ProcGlobal->statusFlags; |
2216 | | |
2217 | | /* |
2218 | | * First collect set of pgxactoff/xids that need to be included in the |
2219 | | * snapshot. |
2220 | | */ |
2221 | 0 | for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++) |
2222 | 0 | { |
2223 | | /* Fetch xid just once - see GetNewTransactionId */ |
2224 | 0 | TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]); |
2225 | 0 | uint8 statusFlags; |
2226 | |
|
2227 | 0 | Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff); |
2228 | | |
2229 | | /* |
2230 | | * If the transaction has no XID assigned, we can skip it; it |
2231 | | * won't have sub-XIDs either. |
2232 | | */ |
2233 | 0 | if (likely(xid == InvalidTransactionId)) |
2234 | 0 | continue; |
2235 | | |
2236 | | /* |
2237 | | * We don't include our own XIDs (if any) in the snapshot. It |
2238 | | * needs to be included in the xmin computation, but we did so |
2239 | | * outside the loop. |
2240 | | */ |
2241 | 0 | if (pgxactoff == mypgxactoff) |
2242 | 0 | continue; |
2243 | | |
2244 | | /* |
2245 | | * The only way we are able to get here with a non-normal xid is |
2246 | | * during bootstrap - with this backend using |
2247 | | * BootstrapTransactionId. But the above test should filter that |
2248 | | * out. |
2249 | | */ |
2250 | 0 | Assert(TransactionIdIsNormal(xid)); |
2251 | | |
2252 | | /* |
2253 | | * If the XID is >= xmax, we can skip it; such transactions will |
2254 | | * be treated as running anyway (and any sub-XIDs will also be >= |
2255 | | * xmax). |
2256 | | */ |
2257 | 0 | if (!NormalTransactionIdPrecedes(xid, xmax)) |
2258 | 0 | continue; |
2259 | | |
2260 | | /* |
2261 | | * Skip over backends doing logical decoding which manages xmin |
2262 | | * separately (check below) and ones running LAZY VACUUM. |
2263 | | */ |
2264 | 0 | statusFlags = allStatusFlags[pgxactoff]; |
2265 | 0 | if (statusFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM)) |
2266 | 0 | continue; |
2267 | | |
2268 | 0 | if (NormalTransactionIdPrecedes(xid, xmin)) |
2269 | 0 | xmin = xid; |
2270 | | |
2271 | | /* Add XID to snapshot. */ |
2272 | 0 | xip[count++] = xid; |
2273 | | |
2274 | | /* |
2275 | | * Save subtransaction XIDs if possible (if we've already |
2276 | | * overflowed, there's no point). Note that the subxact XIDs must |
2277 | | * be later than their parent, so no need to check them against |
2278 | | * xmin. We could filter against xmax, but it seems better not to |
2279 | | * do that much work while holding the ProcArrayLock. |
2280 | | * |
2281 | | * The other backend can add more subxids concurrently, but cannot |
2282 | | * remove any. Hence it's important to fetch nxids just once. |
2283 | | * Should be safe to use memcpy, though. (We needn't worry about |
2284 | | * missing any xids added concurrently, because they must postdate |
2285 | | * xmax.) |
2286 | | * |
2287 | | * Again, our own XIDs are not included in the snapshot. |
2288 | | */ |
2289 | 0 | if (!suboverflowed) |
2290 | 0 | { |
2291 | |
|
2292 | 0 | if (subxidStates[pgxactoff].overflowed) |
2293 | 0 | suboverflowed = true; |
2294 | 0 | else |
2295 | 0 | { |
2296 | 0 | int nsubxids = subxidStates[pgxactoff].count; |
2297 | |
|
2298 | 0 | if (nsubxids > 0) |
2299 | 0 | { |
2300 | 0 | int pgprocno = pgprocnos[pgxactoff]; |
2301 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
2302 | |
|
2303 | 0 | pg_read_barrier(); /* pairs with GetNewTransactionId */ |
2304 | |
|
2305 | 0 | memcpy(snapshot->subxip + subcount, |
2306 | 0 | proc->subxids.xids, |
2307 | 0 | nsubxids * sizeof(TransactionId)); |
2308 | 0 | subcount += nsubxids; |
2309 | 0 | } |
2310 | 0 | } |
2311 | 0 | } |
2312 | 0 | } |
2313 | 0 | } |
2314 | 0 | else |
2315 | 0 | { |
2316 | | /* |
2317 | | * We're in hot standby, so get XIDs from KnownAssignedXids. |
2318 | | * |
2319 | | * We store all xids directly into subxip[]. Here's why: |
2320 | | * |
2321 | | * In recovery we don't know which xids are top-level and which are |
2322 | | * subxacts, a design choice that greatly simplifies xid processing. |
2323 | | * |
2324 | | * It seems like we would want to try to put xids into xip[] only, but |
2325 | | * that is fairly small. We would either need to make that bigger or |
2326 | | * to increase the rate at which we WAL-log xid assignment; neither is |
2327 | | * an appealing choice. |
2328 | | * |
2329 | | * We could try to store xids into xip[] first and then into subxip[] |
2330 | | * if there are too many xids. That only works if the snapshot doesn't |
2331 | | * overflow because we do not search subxip[] in that case. A simpler |
2332 | | * way is to just store all xids in the subxip array because this is |
2333 | | * by far the bigger array. We just leave the xip array empty. |
2334 | | * |
2335 | | * Either way we need to change the way XidInMVCCSnapshot() works |
2336 | | * depending upon when the snapshot was taken, or change normal |
2337 | | * snapshot processing so it matches. |
2338 | | * |
2339 | | * Note: It is possible for recovery to end before we finish taking |
2340 | | * the snapshot, and for newly assigned transaction ids to be added to |
2341 | | * the ProcArray. xmax cannot change while we hold ProcArrayLock, so |
2342 | | * those newly added transaction ids would be filtered away, so we |
2343 | | * need not be concerned about them. |
2344 | | */ |
2345 | 0 | subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin, |
2346 | 0 | xmax); |
2347 | |
|
2348 | 0 | if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid)) |
2349 | 0 | suboverflowed = true; |
2350 | 0 | } |
2351 | | |
2352 | | |
2353 | | /* |
2354 | | * Fetch into local variable while ProcArrayLock is held - the |
2355 | | * LWLockRelease below is a barrier, ensuring this happens inside the |
2356 | | * lock. |
2357 | | */ |
2358 | 0 | replication_slot_xmin = procArray->replication_slot_xmin; |
2359 | 0 | replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin; |
2360 | |
|
2361 | 0 | if (!TransactionIdIsValid(MyProc->xmin)) |
2362 | 0 | MyProc->xmin = TransactionXmin = xmin; |
2363 | |
|
2364 | 0 | LWLockRelease(ProcArrayLock); |
2365 | | |
2366 | | /* maintain state for GlobalVis* */ |
2367 | 0 | { |
2368 | 0 | TransactionId def_vis_xid; |
2369 | 0 | TransactionId def_vis_xid_data; |
2370 | 0 | FullTransactionId def_vis_fxid; |
2371 | 0 | FullTransactionId def_vis_fxid_data; |
2372 | 0 | FullTransactionId oldestfxid; |
2373 | | |
2374 | | /* |
2375 | | * Converting oldestXid is only safe when xid horizon cannot advance, |
2376 | | * i.e. holding locks. While we don't hold the lock anymore, all the |
2377 | | * necessary data has been gathered with lock held. |
2378 | | */ |
2379 | 0 | oldestfxid = FullXidRelativeTo(latest_completed, oldestxid); |
2380 | | |
2381 | | /* Check whether there's a replication slot requiring an older xmin. */ |
2382 | 0 | def_vis_xid_data = |
2383 | 0 | TransactionIdOlder(xmin, replication_slot_xmin); |
2384 | | |
2385 | | /* |
2386 | | * Rows in non-shared, non-catalog tables possibly could be vacuumed |
2387 | | * if older than this xid. |
2388 | | */ |
2389 | 0 | def_vis_xid = def_vis_xid_data; |
2390 | | |
2391 | | /* |
2392 | | * Check whether there's a replication slot requiring an older catalog |
2393 | | * xmin. |
2394 | | */ |
2395 | 0 | def_vis_xid = |
2396 | 0 | TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid); |
2397 | |
|
2398 | 0 | def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid); |
2399 | 0 | def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data); |
2400 | | |
2401 | | /* |
2402 | | * Check if we can increase upper bound. As a previous |
2403 | | * GlobalVisUpdate() might have computed more aggressive values, don't |
2404 | | * overwrite them if so. |
2405 | | */ |
2406 | 0 | GlobalVisSharedRels.definitely_needed = |
2407 | 0 | FullTransactionIdNewer(def_vis_fxid, |
2408 | 0 | GlobalVisSharedRels.definitely_needed); |
2409 | 0 | GlobalVisCatalogRels.definitely_needed = |
2410 | 0 | FullTransactionIdNewer(def_vis_fxid, |
2411 | 0 | GlobalVisCatalogRels.definitely_needed); |
2412 | 0 | GlobalVisDataRels.definitely_needed = |
2413 | 0 | FullTransactionIdNewer(def_vis_fxid_data, |
2414 | 0 | GlobalVisDataRels.definitely_needed); |
2415 | | /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */ |
2416 | 0 | if (TransactionIdIsNormal(myxid)) |
2417 | 0 | GlobalVisTempRels.definitely_needed = |
2418 | 0 | FullXidRelativeTo(latest_completed, myxid); |
2419 | 0 | else |
2420 | 0 | { |
2421 | 0 | GlobalVisTempRels.definitely_needed = latest_completed; |
2422 | 0 | FullTransactionIdAdvance(&GlobalVisTempRels.definitely_needed); |
2423 | 0 | } |
2424 | | |
2425 | | /* |
2426 | | * Check if we know that we can initialize or increase the lower |
2427 | | * bound. Currently the only cheap way to do so is to use |
2428 | | * TransamVariables->oldestXid as input. |
2429 | | * |
2430 | | * We should definitely be able to do better. We could e.g. put a |
2431 | | * global lower bound value into TransamVariables. |
2432 | | */ |
2433 | 0 | GlobalVisSharedRels.maybe_needed = |
2434 | 0 | FullTransactionIdNewer(GlobalVisSharedRels.maybe_needed, |
2435 | 0 | oldestfxid); |
2436 | 0 | GlobalVisCatalogRels.maybe_needed = |
2437 | 0 | FullTransactionIdNewer(GlobalVisCatalogRels.maybe_needed, |
2438 | 0 | oldestfxid); |
2439 | 0 | GlobalVisDataRels.maybe_needed = |
2440 | 0 | FullTransactionIdNewer(GlobalVisDataRels.maybe_needed, |
2441 | 0 | oldestfxid); |
2442 | | /* accurate value known */ |
2443 | 0 | GlobalVisTempRels.maybe_needed = GlobalVisTempRels.definitely_needed; |
2444 | 0 | } |
2445 | |
|
2446 | 0 | RecentXmin = xmin; |
2447 | 0 | Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin)); |
2448 | |
|
2449 | 0 | snapshot->xmin = xmin; |
2450 | 0 | snapshot->xmax = xmax; |
2451 | 0 | snapshot->xcnt = count; |
2452 | 0 | snapshot->subxcnt = subcount; |
2453 | 0 | snapshot->suboverflowed = suboverflowed; |
2454 | 0 | snapshot->snapXactCompletionCount = curXactCompletionCount; |
2455 | |
|
2456 | 0 | snapshot->curcid = GetCurrentCommandId(false); |
2457 | | |
2458 | | /* |
2459 | | * This is a new snapshot, so set both refcounts are zero, and mark it as |
2460 | | * not copied in persistent memory. |
2461 | | */ |
2462 | 0 | snapshot->active_count = 0; |
2463 | 0 | snapshot->regd_count = 0; |
2464 | 0 | snapshot->copied = false; |
2465 | |
|
2466 | 0 | return snapshot; |
2467 | 0 | } |
2468 | | |
2469 | | /* |
2470 | | * ProcArrayInstallImportedXmin -- install imported xmin into MyProc->xmin |
2471 | | * |
2472 | | * This is called when installing a snapshot imported from another |
2473 | | * transaction. To ensure that OldestXmin doesn't go backwards, we must |
2474 | | * check that the source transaction is still running, and we'd better do |
2475 | | * that atomically with installing the new xmin. |
2476 | | * |
2477 | | * Returns true if successful, false if source xact is no longer running. |
2478 | | */ |
2479 | | bool |
2480 | | ProcArrayInstallImportedXmin(TransactionId xmin, |
2481 | | VirtualTransactionId *sourcevxid) |
2482 | 0 | { |
2483 | 0 | bool result = false; |
2484 | 0 | ProcArrayStruct *arrayP = procArray; |
2485 | 0 | int index; |
2486 | |
|
2487 | 0 | Assert(TransactionIdIsNormal(xmin)); |
2488 | 0 | if (!sourcevxid) |
2489 | 0 | return false; |
2490 | | |
2491 | | /* Get lock so source xact can't end while we're doing this */ |
2492 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2493 | | |
2494 | | /* |
2495 | | * Find the PGPROC entry of the source transaction. (This could use |
2496 | | * GetPGProcByNumber(), unless it's a prepared xact. But this isn't |
2497 | | * performance critical.) |
2498 | | */ |
2499 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2500 | 0 | { |
2501 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2502 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
2503 | 0 | int statusFlags = ProcGlobal->statusFlags[index]; |
2504 | 0 | TransactionId xid; |
2505 | | |
2506 | | /* Ignore procs running LAZY VACUUM */ |
2507 | 0 | if (statusFlags & PROC_IN_VACUUM) |
2508 | 0 | continue; |
2509 | | |
2510 | | /* We are only interested in the specific virtual transaction. */ |
2511 | 0 | if (proc->vxid.procNumber != sourcevxid->procNumber) |
2512 | 0 | continue; |
2513 | 0 | if (proc->vxid.lxid != sourcevxid->localTransactionId) |
2514 | 0 | continue; |
2515 | | |
2516 | | /* |
2517 | | * We check the transaction's database ID for paranoia's sake: if it's |
2518 | | * in another DB then its xmin does not cover us. Caller should have |
2519 | | * detected this already, so we just treat any funny cases as |
2520 | | * "transaction not found". |
2521 | | */ |
2522 | 0 | if (proc->databaseId != MyDatabaseId) |
2523 | 0 | continue; |
2524 | | |
2525 | | /* |
2526 | | * Likewise, let's just make real sure its xmin does cover us. |
2527 | | */ |
2528 | 0 | xid = UINT32_ACCESS_ONCE(proc->xmin); |
2529 | 0 | if (!TransactionIdIsNormal(xid) || |
2530 | 0 | !TransactionIdPrecedesOrEquals(xid, xmin)) |
2531 | 0 | continue; |
2532 | | |
2533 | | /* |
2534 | | * We're good. Install the new xmin. As in GetSnapshotData, set |
2535 | | * TransactionXmin too. (Note that because snapmgr.c called |
2536 | | * GetSnapshotData first, we'll be overwriting a valid xmin here, so |
2537 | | * we don't check that.) |
2538 | | */ |
2539 | 0 | MyProc->xmin = TransactionXmin = xmin; |
2540 | |
|
2541 | 0 | result = true; |
2542 | 0 | break; |
2543 | 0 | } |
2544 | |
|
2545 | 0 | LWLockRelease(ProcArrayLock); |
2546 | |
|
2547 | 0 | return result; |
2548 | 0 | } |
2549 | | |
2550 | | /* |
2551 | | * ProcArrayInstallRestoredXmin -- install restored xmin into MyProc->xmin |
2552 | | * |
2553 | | * This is like ProcArrayInstallImportedXmin, but we have a pointer to the |
2554 | | * PGPROC of the transaction from which we imported the snapshot, rather than |
2555 | | * an XID. |
2556 | | * |
2557 | | * Note that this function also copies statusFlags from the source `proc` in |
2558 | | * order to avoid the case where MyProc's xmin needs to be skipped for |
2559 | | * computing xid horizon. |
2560 | | * |
2561 | | * Returns true if successful, false if source xact is no longer running. |
2562 | | */ |
2563 | | bool |
2564 | | ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc) |
2565 | 0 | { |
2566 | 0 | bool result = false; |
2567 | 0 | TransactionId xid; |
2568 | |
|
2569 | 0 | Assert(TransactionIdIsNormal(xmin)); |
2570 | 0 | Assert(proc != NULL); |
2571 | | |
2572 | | /* |
2573 | | * Get an exclusive lock so that we can copy statusFlags from source proc. |
2574 | | */ |
2575 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
2576 | | |
2577 | | /* |
2578 | | * Be certain that the referenced PGPROC has an advertised xmin which is |
2579 | | * no later than the one we're installing, so that the system-wide xmin |
2580 | | * can't go backwards. Also, make sure it's running in the same database, |
2581 | | * so that the per-database xmin cannot go backwards. |
2582 | | */ |
2583 | 0 | xid = UINT32_ACCESS_ONCE(proc->xmin); |
2584 | 0 | if (proc->databaseId == MyDatabaseId && |
2585 | 0 | TransactionIdIsNormal(xid) && |
2586 | 0 | TransactionIdPrecedesOrEquals(xid, xmin)) |
2587 | 0 | { |
2588 | | /* |
2589 | | * Install xmin and propagate the statusFlags that affect how the |
2590 | | * value is interpreted by vacuum. |
2591 | | */ |
2592 | 0 | MyProc->xmin = TransactionXmin = xmin; |
2593 | 0 | MyProc->statusFlags = (MyProc->statusFlags & ~PROC_XMIN_FLAGS) | |
2594 | 0 | (proc->statusFlags & PROC_XMIN_FLAGS); |
2595 | 0 | ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; |
2596 | |
|
2597 | 0 | result = true; |
2598 | 0 | } |
2599 | |
|
2600 | 0 | LWLockRelease(ProcArrayLock); |
2601 | |
|
2602 | 0 | return result; |
2603 | 0 | } |
2604 | | |
2605 | | /* |
2606 | | * GetRunningTransactionData -- returns information about running transactions. |
2607 | | * |
2608 | | * Similar to GetSnapshotData but returns more information. We include |
2609 | | * all PGPROCs with an assigned TransactionId, even VACUUM processes and |
2610 | | * prepared transactions. |
2611 | | * |
2612 | | * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for |
2613 | | * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc |
2614 | | * array until the caller has WAL-logged this snapshot, and releases the |
2615 | | * lock. Acquiring ProcArrayLock ensures that no transactions commit until the |
2616 | | * lock is released. |
2617 | | * |
2618 | | * The returned data structure is statically allocated; caller should not |
2619 | | * modify it, and must not assume it is valid past the next call. |
2620 | | * |
2621 | | * This is never executed during recovery so there is no need to look at |
2622 | | * KnownAssignedXids. |
2623 | | * |
2624 | | * Dummy PGPROCs from prepared transaction are included, meaning that this |
2625 | | * may return entries with duplicated TransactionId values coming from |
2626 | | * transaction finishing to prepare. Nothing is done about duplicated |
2627 | | * entries here to not hold on ProcArrayLock more than necessary. |
2628 | | * |
2629 | | * We don't worry about updating other counters, we want to keep this as |
2630 | | * simple as possible and leave GetSnapshotData() as the primary code for |
2631 | | * that bookkeeping. |
2632 | | * |
2633 | | * Note that if any transaction has overflowed its cached subtransactions |
2634 | | * then there is no real need include any subtransactions. |
2635 | | */ |
2636 | | RunningTransactions |
2637 | | GetRunningTransactionData(void) |
2638 | 0 | { |
2639 | | /* result workspace */ |
2640 | 0 | static RunningTransactionsData CurrentRunningXactsData; |
2641 | |
|
2642 | 0 | ProcArrayStruct *arrayP = procArray; |
2643 | 0 | TransactionId *other_xids = ProcGlobal->xids; |
2644 | 0 | RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData; |
2645 | 0 | TransactionId latestCompletedXid; |
2646 | 0 | TransactionId oldestRunningXid; |
2647 | 0 | TransactionId oldestDatabaseRunningXid; |
2648 | 0 | TransactionId *xids; |
2649 | 0 | int index; |
2650 | 0 | int count; |
2651 | 0 | int subcount; |
2652 | 0 | bool suboverflowed; |
2653 | |
|
2654 | 0 | Assert(!RecoveryInProgress()); |
2655 | | |
2656 | | /* |
2657 | | * Allocating space for maxProcs xids is usually overkill; numProcs would |
2658 | | * be sufficient. But it seems better to do the malloc while not holding |
2659 | | * the lock, so we can't look at numProcs. Likewise, we allocate much |
2660 | | * more subxip storage than is probably needed. |
2661 | | * |
2662 | | * Should only be allocated in bgwriter, since only ever executed during |
2663 | | * checkpoints. |
2664 | | */ |
2665 | 0 | if (CurrentRunningXacts->xids == NULL) |
2666 | 0 | { |
2667 | | /* |
2668 | | * First call |
2669 | | */ |
2670 | 0 | CurrentRunningXacts->xids = (TransactionId *) |
2671 | 0 | malloc(TOTAL_MAX_CACHED_SUBXIDS * sizeof(TransactionId)); |
2672 | 0 | if (CurrentRunningXacts->xids == NULL) |
2673 | 0 | ereport(ERROR, |
2674 | 0 | (errcode(ERRCODE_OUT_OF_MEMORY), |
2675 | 0 | errmsg("out of memory"))); |
2676 | 0 | } |
2677 | | |
2678 | 0 | xids = CurrentRunningXacts->xids; |
2679 | |
|
2680 | 0 | count = subcount = 0; |
2681 | 0 | suboverflowed = false; |
2682 | | |
2683 | | /* |
2684 | | * Ensure that no xids enter or leave the procarray while we obtain |
2685 | | * snapshot. |
2686 | | */ |
2687 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2688 | 0 | LWLockAcquire(XidGenLock, LW_SHARED); |
2689 | |
|
2690 | 0 | latestCompletedXid = |
2691 | 0 | XidFromFullTransactionId(TransamVariables->latestCompletedXid); |
2692 | 0 | oldestDatabaseRunningXid = oldestRunningXid = |
2693 | 0 | XidFromFullTransactionId(TransamVariables->nextXid); |
2694 | | |
2695 | | /* |
2696 | | * Spin over procArray collecting all xids |
2697 | | */ |
2698 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2699 | 0 | { |
2700 | 0 | TransactionId xid; |
2701 | | |
2702 | | /* Fetch xid just once - see GetNewTransactionId */ |
2703 | 0 | xid = UINT32_ACCESS_ONCE(other_xids[index]); |
2704 | | |
2705 | | /* |
2706 | | * We don't need to store transactions that don't have a TransactionId |
2707 | | * yet because they will not show as running on a standby server. |
2708 | | */ |
2709 | 0 | if (!TransactionIdIsValid(xid)) |
2710 | 0 | continue; |
2711 | | |
2712 | | /* |
2713 | | * Be careful not to exclude any xids before calculating the values of |
2714 | | * oldestRunningXid and suboverflowed, since these are used to clean |
2715 | | * up transaction information held on standbys. |
2716 | | */ |
2717 | 0 | if (TransactionIdPrecedes(xid, oldestRunningXid)) |
2718 | 0 | oldestRunningXid = xid; |
2719 | | |
2720 | | /* |
2721 | | * Also, update the oldest running xid within the current database. As |
2722 | | * fetching pgprocno and PGPROC could cause cache misses, we do cheap |
2723 | | * TransactionId comparison first. |
2724 | | */ |
2725 | 0 | if (TransactionIdPrecedes(xid, oldestDatabaseRunningXid)) |
2726 | 0 | { |
2727 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2728 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
2729 | |
|
2730 | 0 | if (proc->databaseId == MyDatabaseId) |
2731 | 0 | oldestDatabaseRunningXid = xid; |
2732 | 0 | } |
2733 | |
|
2734 | 0 | if (ProcGlobal->subxidStates[index].overflowed) |
2735 | 0 | suboverflowed = true; |
2736 | | |
2737 | | /* |
2738 | | * If we wished to exclude xids this would be the right place for it. |
2739 | | * Procs with the PROC_IN_VACUUM flag set don't usually assign xids, |
2740 | | * but they do during truncation at the end when they get the lock and |
2741 | | * truncate, so it is not much of a problem to include them if they |
2742 | | * are seen and it is cleaner to include them. |
2743 | | */ |
2744 | |
|
2745 | 0 | xids[count++] = xid; |
2746 | 0 | } |
2747 | | |
2748 | | /* |
2749 | | * Spin over procArray collecting all subxids, but only if there hasn't |
2750 | | * been a suboverflow. |
2751 | | */ |
2752 | 0 | if (!suboverflowed) |
2753 | 0 | { |
2754 | 0 | XidCacheStatus *other_subxidstates = ProcGlobal->subxidStates; |
2755 | |
|
2756 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2757 | 0 | { |
2758 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2759 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
2760 | 0 | int nsubxids; |
2761 | | |
2762 | | /* |
2763 | | * Save subtransaction XIDs. Other backends can't add or remove |
2764 | | * entries while we're holding XidGenLock. |
2765 | | */ |
2766 | 0 | nsubxids = other_subxidstates[index].count; |
2767 | 0 | if (nsubxids > 0) |
2768 | 0 | { |
2769 | | /* barrier not really required, as XidGenLock is held, but ... */ |
2770 | 0 | pg_read_barrier(); /* pairs with GetNewTransactionId */ |
2771 | |
|
2772 | 0 | memcpy(&xids[count], proc->subxids.xids, |
2773 | 0 | nsubxids * sizeof(TransactionId)); |
2774 | 0 | count += nsubxids; |
2775 | 0 | subcount += nsubxids; |
2776 | | |
2777 | | /* |
2778 | | * Top-level XID of a transaction is always less than any of |
2779 | | * its subxids, so we don't need to check if any of the |
2780 | | * subxids are smaller than oldestRunningXid |
2781 | | */ |
2782 | 0 | } |
2783 | 0 | } |
2784 | 0 | } |
2785 | | |
2786 | | /* |
2787 | | * It's important *not* to include the limits set by slots here because |
2788 | | * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those |
2789 | | * were to be included here the initial value could never increase because |
2790 | | * of a circular dependency where slots only increase their limits when |
2791 | | * running xacts increases oldestRunningXid and running xacts only |
2792 | | * increases if slots do. |
2793 | | */ |
2794 | |
|
2795 | 0 | CurrentRunningXacts->xcnt = count - subcount; |
2796 | 0 | CurrentRunningXacts->subxcnt = subcount; |
2797 | 0 | CurrentRunningXacts->subxid_status = suboverflowed ? SUBXIDS_IN_SUBTRANS : SUBXIDS_IN_ARRAY; |
2798 | 0 | CurrentRunningXacts->nextXid = XidFromFullTransactionId(TransamVariables->nextXid); |
2799 | 0 | CurrentRunningXacts->oldestRunningXid = oldestRunningXid; |
2800 | 0 | CurrentRunningXacts->oldestDatabaseRunningXid = oldestDatabaseRunningXid; |
2801 | 0 | CurrentRunningXacts->latestCompletedXid = latestCompletedXid; |
2802 | |
|
2803 | 0 | Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid)); |
2804 | 0 | Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid)); |
2805 | 0 | Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid)); |
2806 | | |
2807 | | /* We don't release the locks here, the caller is responsible for that */ |
2808 | |
|
2809 | 0 | return CurrentRunningXacts; |
2810 | 0 | } |
2811 | | |
2812 | | /* |
2813 | | * GetOldestActiveTransactionId() |
2814 | | * |
2815 | | * Similar to GetSnapshotData but returns just oldestActiveXid. We include |
2816 | | * all PGPROCs with an assigned TransactionId, even VACUUM processes. |
2817 | | * |
2818 | | * If allDbs is true, we look at all databases, though there is no need to |
2819 | | * include WALSender since this has no effect on hot standby conflicts. If |
2820 | | * allDbs is false, skip processes attached to other databases. |
2821 | | * |
2822 | | * This is never executed during recovery so there is no need to look at |
2823 | | * KnownAssignedXids. |
2824 | | * |
2825 | | * We don't worry about updating other counters, we want to keep this as |
2826 | | * simple as possible and leave GetSnapshotData() as the primary code for |
2827 | | * that bookkeeping. |
2828 | | * |
2829 | | * inCommitOnly indicates getting the oldestActiveXid among the transactions |
2830 | | * in the commit critical section. |
2831 | | */ |
2832 | | TransactionId |
2833 | | GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs) |
2834 | 0 | { |
2835 | 0 | ProcArrayStruct *arrayP = procArray; |
2836 | 0 | TransactionId *other_xids = ProcGlobal->xids; |
2837 | 0 | TransactionId oldestRunningXid; |
2838 | 0 | int index; |
2839 | |
|
2840 | 0 | Assert(!RecoveryInProgress()); |
2841 | | |
2842 | | /* |
2843 | | * Read nextXid, as the upper bound of what's still active. |
2844 | | * |
2845 | | * Reading a TransactionId is atomic, but we must grab the lock to make |
2846 | | * sure that all XIDs < nextXid are already present in the proc array (or |
2847 | | * have already completed), when we spin over it. |
2848 | | */ |
2849 | 0 | LWLockAcquire(XidGenLock, LW_SHARED); |
2850 | 0 | oldestRunningXid = XidFromFullTransactionId(TransamVariables->nextXid); |
2851 | 0 | LWLockRelease(XidGenLock); |
2852 | | |
2853 | | /* |
2854 | | * Spin over procArray collecting all xids and subxids. |
2855 | | */ |
2856 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
2857 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2858 | 0 | { |
2859 | 0 | TransactionId xid; |
2860 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
2861 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
2862 | | |
2863 | | /* Fetch xid just once - see GetNewTransactionId */ |
2864 | 0 | xid = UINT32_ACCESS_ONCE(other_xids[index]); |
2865 | |
|
2866 | 0 | if (!TransactionIdIsNormal(xid)) |
2867 | 0 | continue; |
2868 | | |
2869 | 0 | if (inCommitOnly && |
2870 | 0 | (proc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0) |
2871 | 0 | continue; |
2872 | | |
2873 | 0 | if (!allDbs && proc->databaseId != MyDatabaseId) |
2874 | 0 | continue; |
2875 | | |
2876 | 0 | if (TransactionIdPrecedes(xid, oldestRunningXid)) |
2877 | 0 | oldestRunningXid = xid; |
2878 | | |
2879 | | /* |
2880 | | * Top-level XID of a transaction is always less than any of its |
2881 | | * subxids, so we don't need to check if any of the subxids are |
2882 | | * smaller than oldestRunningXid |
2883 | | */ |
2884 | 0 | } |
2885 | 0 | LWLockRelease(ProcArrayLock); |
2886 | |
|
2887 | 0 | return oldestRunningXid; |
2888 | 0 | } |
2889 | | |
2890 | | /* |
2891 | | * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum |
2892 | | * |
2893 | | * Returns the oldest xid that we can guarantee not to have been affected by |
2894 | | * vacuum, i.e. no rows >= that xid have been vacuumed away unless the |
2895 | | * transaction aborted. Note that the value can (and most of the time will) be |
2896 | | * much more conservative than what really has been affected by vacuum, but we |
2897 | | * currently don't have better data available. |
2898 | | * |
2899 | | * This is useful to initialize the cutoff xid after which a new changeset |
2900 | | * extraction replication slot can start decoding changes. |
2901 | | * |
2902 | | * Must be called with ProcArrayLock held either shared or exclusively, |
2903 | | * although most callers will want to use exclusive mode since it is expected |
2904 | | * that the caller will immediately use the xid to peg the xmin horizon. |
2905 | | */ |
2906 | | TransactionId |
2907 | | GetOldestSafeDecodingTransactionId(bool catalogOnly) |
2908 | 0 | { |
2909 | 0 | ProcArrayStruct *arrayP = procArray; |
2910 | 0 | TransactionId oldestSafeXid; |
2911 | 0 | int index; |
2912 | 0 | bool recovery_in_progress = RecoveryInProgress(); |
2913 | |
|
2914 | 0 | Assert(LWLockHeldByMe(ProcArrayLock)); |
2915 | | |
2916 | | /* |
2917 | | * Acquire XidGenLock, so no transactions can acquire an xid while we're |
2918 | | * running. If no transaction with xid were running concurrently a new xid |
2919 | | * could influence the RecentXmin et al. |
2920 | | * |
2921 | | * We initialize the computation to nextXid since that's guaranteed to be |
2922 | | * a safe, albeit pessimal, value. |
2923 | | */ |
2924 | 0 | LWLockAcquire(XidGenLock, LW_SHARED); |
2925 | 0 | oldestSafeXid = XidFromFullTransactionId(TransamVariables->nextXid); |
2926 | | |
2927 | | /* |
2928 | | * If there's already a slot pegging the xmin horizon, we can start with |
2929 | | * that value, it's guaranteed to be safe since it's computed by this |
2930 | | * routine initially and has been enforced since. We can always use the |
2931 | | * slot's general xmin horizon, but the catalog horizon is only usable |
2932 | | * when only catalog data is going to be looked at. |
2933 | | */ |
2934 | 0 | if (TransactionIdIsValid(procArray->replication_slot_xmin) && |
2935 | 0 | TransactionIdPrecedes(procArray->replication_slot_xmin, |
2936 | 0 | oldestSafeXid)) |
2937 | 0 | oldestSafeXid = procArray->replication_slot_xmin; |
2938 | |
|
2939 | 0 | if (catalogOnly && |
2940 | 0 | TransactionIdIsValid(procArray->replication_slot_catalog_xmin) && |
2941 | 0 | TransactionIdPrecedes(procArray->replication_slot_catalog_xmin, |
2942 | 0 | oldestSafeXid)) |
2943 | 0 | oldestSafeXid = procArray->replication_slot_catalog_xmin; |
2944 | | |
2945 | | /* |
2946 | | * If we're not in recovery, we walk over the procarray and collect the |
2947 | | * lowest xid. Since we're called with ProcArrayLock held and have |
2948 | | * acquired XidGenLock, no entries can vanish concurrently, since |
2949 | | * ProcGlobal->xids[i] is only set with XidGenLock held and only cleared |
2950 | | * with ProcArrayLock held. |
2951 | | * |
2952 | | * In recovery we can't lower the safe value besides what we've computed |
2953 | | * above, so we'll have to wait a bit longer there. We unfortunately can |
2954 | | * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids |
2955 | | * machinery can miss values and return an older value than is safe. |
2956 | | */ |
2957 | 0 | if (!recovery_in_progress) |
2958 | 0 | { |
2959 | 0 | TransactionId *other_xids = ProcGlobal->xids; |
2960 | | |
2961 | | /* |
2962 | | * Spin over procArray collecting min(ProcGlobal->xids[i]) |
2963 | | */ |
2964 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
2965 | 0 | { |
2966 | 0 | TransactionId xid; |
2967 | | |
2968 | | /* Fetch xid just once - see GetNewTransactionId */ |
2969 | 0 | xid = UINT32_ACCESS_ONCE(other_xids[index]); |
2970 | |
|
2971 | 0 | if (!TransactionIdIsNormal(xid)) |
2972 | 0 | continue; |
2973 | | |
2974 | 0 | if (TransactionIdPrecedes(xid, oldestSafeXid)) |
2975 | 0 | oldestSafeXid = xid; |
2976 | 0 | } |
2977 | 0 | } |
2978 | |
|
2979 | 0 | LWLockRelease(XidGenLock); |
2980 | |
|
2981 | 0 | return oldestSafeXid; |
2982 | 0 | } |
2983 | | |
2984 | | /* |
2985 | | * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are |
2986 | | * delaying checkpoint because they have critical actions in progress. |
2987 | | * |
2988 | | * Constructs an array of VXIDs of transactions that are currently in commit |
2989 | | * critical sections, as shown by having specified delayChkptFlags bits set |
2990 | | * in their PGPROC. |
2991 | | * |
2992 | | * Returns a palloc'd array that should be freed by the caller. |
2993 | | * *nvxids is the number of valid entries. |
2994 | | * |
2995 | | * Note that because backends set or clear delayChkptFlags without holding any |
2996 | | * lock, the result is somewhat indeterminate, but we don't really care. Even |
2997 | | * in a multiprocessor with delayed writes to shared memory, it should be |
2998 | | * certain that setting of delayChkptFlags will propagate to shared memory |
2999 | | * when the backend takes a lock, so we cannot fail to see a virtual xact as |
3000 | | * delayChkptFlags if it's already inserted its commit record. Whether it |
3001 | | * takes a little while for clearing of delayChkptFlags to propagate is |
3002 | | * unimportant for correctness. |
3003 | | */ |
3004 | | VirtualTransactionId * |
3005 | | GetVirtualXIDsDelayingChkpt(int *nvxids, int type) |
3006 | 0 | { |
3007 | 0 | VirtualTransactionId *vxids; |
3008 | 0 | ProcArrayStruct *arrayP = procArray; |
3009 | 0 | int count = 0; |
3010 | 0 | int index; |
3011 | |
|
3012 | 0 | Assert(type != 0); |
3013 | | |
3014 | | /* allocate what's certainly enough result space */ |
3015 | 0 | vxids = (VirtualTransactionId *) |
3016 | 0 | palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs); |
3017 | |
|
3018 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3019 | |
|
3020 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3021 | 0 | { |
3022 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3023 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3024 | |
|
3025 | 0 | if ((proc->delayChkptFlags & type) != 0) |
3026 | 0 | { |
3027 | 0 | VirtualTransactionId vxid; |
3028 | |
|
3029 | 0 | GET_VXID_FROM_PGPROC(vxid, *proc); |
3030 | 0 | if (VirtualTransactionIdIsValid(vxid)) |
3031 | 0 | vxids[count++] = vxid; |
3032 | 0 | } |
3033 | 0 | } |
3034 | |
|
3035 | 0 | LWLockRelease(ProcArrayLock); |
3036 | |
|
3037 | 0 | *nvxids = count; |
3038 | 0 | return vxids; |
3039 | 0 | } |
3040 | | |
3041 | | /* |
3042 | | * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying? |
3043 | | * |
3044 | | * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any |
3045 | | * of the specified VXIDs are still in critical sections of code. |
3046 | | * |
3047 | | * Note: this is O(N^2) in the number of vxacts that are/were delaying, but |
3048 | | * those numbers should be small enough for it not to be a problem. |
3049 | | */ |
3050 | | bool |
3051 | | HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type) |
3052 | 0 | { |
3053 | 0 | bool result = false; |
3054 | 0 | ProcArrayStruct *arrayP = procArray; |
3055 | 0 | int index; |
3056 | |
|
3057 | 0 | Assert(type != 0); |
3058 | |
|
3059 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3060 | |
|
3061 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3062 | 0 | { |
3063 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3064 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3065 | 0 | VirtualTransactionId vxid; |
3066 | |
|
3067 | 0 | GET_VXID_FROM_PGPROC(vxid, *proc); |
3068 | |
|
3069 | 0 | if ((proc->delayChkptFlags & type) != 0 && |
3070 | 0 | VirtualTransactionIdIsValid(vxid)) |
3071 | 0 | { |
3072 | 0 | int i; |
3073 | |
|
3074 | 0 | for (i = 0; i < nvxids; i++) |
3075 | 0 | { |
3076 | 0 | if (VirtualTransactionIdEquals(vxid, vxids[i])) |
3077 | 0 | { |
3078 | 0 | result = true; |
3079 | 0 | break; |
3080 | 0 | } |
3081 | 0 | } |
3082 | 0 | if (result) |
3083 | 0 | break; |
3084 | 0 | } |
3085 | 0 | } |
3086 | |
|
3087 | 0 | LWLockRelease(ProcArrayLock); |
3088 | |
|
3089 | 0 | return result; |
3090 | 0 | } |
3091 | | |
3092 | | /* |
3093 | | * ProcNumberGetProc -- get a backend's PGPROC given its proc number |
3094 | | * |
3095 | | * The result may be out of date arbitrarily quickly, so the caller |
3096 | | * must be careful about how this information is used. NULL is |
3097 | | * returned if the backend is not active. |
3098 | | */ |
3099 | | PGPROC * |
3100 | | ProcNumberGetProc(ProcNumber procNumber) |
3101 | 0 | { |
3102 | 0 | PGPROC *result; |
3103 | |
|
3104 | 0 | if (procNumber < 0 || procNumber >= ProcGlobal->allProcCount) |
3105 | 0 | return NULL; |
3106 | 0 | result = GetPGProcByNumber(procNumber); |
3107 | |
|
3108 | 0 | if (result->pid == 0) |
3109 | 0 | return NULL; |
3110 | | |
3111 | 0 | return result; |
3112 | 0 | } |
3113 | | |
3114 | | /* |
3115 | | * ProcNumberGetTransactionIds -- get a backend's transaction status |
3116 | | * |
3117 | | * Get the xid, xmin, nsubxid and overflow status of the backend. The |
3118 | | * result may be out of date arbitrarily quickly, so the caller must be |
3119 | | * careful about how this information is used. |
3120 | | */ |
3121 | | void |
3122 | | ProcNumberGetTransactionIds(ProcNumber procNumber, TransactionId *xid, |
3123 | | TransactionId *xmin, int *nsubxid, bool *overflowed) |
3124 | 0 | { |
3125 | 0 | PGPROC *proc; |
3126 | |
|
3127 | 0 | *xid = InvalidTransactionId; |
3128 | 0 | *xmin = InvalidTransactionId; |
3129 | 0 | *nsubxid = 0; |
3130 | 0 | *overflowed = false; |
3131 | |
|
3132 | 0 | if (procNumber < 0 || procNumber >= ProcGlobal->allProcCount) |
3133 | 0 | return; |
3134 | 0 | proc = GetPGProcByNumber(procNumber); |
3135 | | |
3136 | | /* Need to lock out additions/removals of backends */ |
3137 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3138 | |
|
3139 | 0 | if (proc->pid != 0) |
3140 | 0 | { |
3141 | 0 | *xid = proc->xid; |
3142 | 0 | *xmin = proc->xmin; |
3143 | 0 | *nsubxid = proc->subxidStatus.count; |
3144 | 0 | *overflowed = proc->subxidStatus.overflowed; |
3145 | 0 | } |
3146 | |
|
3147 | 0 | LWLockRelease(ProcArrayLock); |
3148 | 0 | } |
3149 | | |
3150 | | /* |
3151 | | * BackendPidGetProc -- get a backend's PGPROC given its PID |
3152 | | * |
3153 | | * Returns NULL if not found. Note that it is up to the caller to be |
3154 | | * sure that the question remains meaningful for long enough for the |
3155 | | * answer to be used ... |
3156 | | */ |
3157 | | PGPROC * |
3158 | | BackendPidGetProc(int pid) |
3159 | 0 | { |
3160 | 0 | PGPROC *result; |
3161 | |
|
3162 | 0 | if (pid == 0) /* never match dummy PGPROCs */ |
3163 | 0 | return NULL; |
3164 | | |
3165 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3166 | |
|
3167 | 0 | result = BackendPidGetProcWithLock(pid); |
3168 | |
|
3169 | 0 | LWLockRelease(ProcArrayLock); |
3170 | |
|
3171 | 0 | return result; |
3172 | 0 | } |
3173 | | |
3174 | | /* |
3175 | | * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID |
3176 | | * |
3177 | | * Same as above, except caller must be holding ProcArrayLock. The found |
3178 | | * entry, if any, can be assumed to be valid as long as the lock remains held. |
3179 | | */ |
3180 | | PGPROC * |
3181 | | BackendPidGetProcWithLock(int pid) |
3182 | 0 | { |
3183 | 0 | PGPROC *result = NULL; |
3184 | 0 | ProcArrayStruct *arrayP = procArray; |
3185 | 0 | int index; |
3186 | |
|
3187 | 0 | if (pid == 0) /* never match dummy PGPROCs */ |
3188 | 0 | return NULL; |
3189 | | |
3190 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3191 | 0 | { |
3192 | 0 | PGPROC *proc = &allProcs[arrayP->pgprocnos[index]]; |
3193 | |
|
3194 | 0 | if (proc->pid == pid) |
3195 | 0 | { |
3196 | 0 | result = proc; |
3197 | 0 | break; |
3198 | 0 | } |
3199 | 0 | } |
3200 | |
|
3201 | 0 | return result; |
3202 | 0 | } |
3203 | | |
3204 | | /* |
3205 | | * BackendXidGetPid -- get a backend's pid given its XID |
3206 | | * |
3207 | | * Returns 0 if not found or it's a prepared transaction. Note that |
3208 | | * it is up to the caller to be sure that the question remains |
3209 | | * meaningful for long enough for the answer to be used ... |
3210 | | * |
3211 | | * Only main transaction Ids are considered. This function is mainly |
3212 | | * useful for determining what backend owns a lock. |
3213 | | * |
3214 | | * Beware that not every xact has an XID assigned. However, as long as you |
3215 | | * only call this using an XID found on disk, you're safe. |
3216 | | */ |
3217 | | int |
3218 | | BackendXidGetPid(TransactionId xid) |
3219 | 0 | { |
3220 | 0 | int result = 0; |
3221 | 0 | ProcArrayStruct *arrayP = procArray; |
3222 | 0 | TransactionId *other_xids = ProcGlobal->xids; |
3223 | 0 | int index; |
3224 | |
|
3225 | 0 | if (xid == InvalidTransactionId) /* never match invalid xid */ |
3226 | 0 | return 0; |
3227 | | |
3228 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3229 | |
|
3230 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3231 | 0 | { |
3232 | 0 | if (other_xids[index] == xid) |
3233 | 0 | { |
3234 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3235 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3236 | |
|
3237 | 0 | result = proc->pid; |
3238 | 0 | break; |
3239 | 0 | } |
3240 | 0 | } |
3241 | |
|
3242 | 0 | LWLockRelease(ProcArrayLock); |
3243 | |
|
3244 | 0 | return result; |
3245 | 0 | } |
3246 | | |
3247 | | /* |
3248 | | * IsBackendPid -- is a given pid a running backend |
3249 | | * |
3250 | | * This is not called by the backend, but is called by external modules. |
3251 | | */ |
3252 | | bool |
3253 | | IsBackendPid(int pid) |
3254 | 0 | { |
3255 | 0 | return (BackendPidGetProc(pid) != NULL); |
3256 | 0 | } |
3257 | | |
3258 | | |
3259 | | /* |
3260 | | * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs. |
3261 | | * |
3262 | | * The array is palloc'd. The number of valid entries is returned into *nvxids. |
3263 | | * |
3264 | | * The arguments allow filtering the set of VXIDs returned. Our own process |
3265 | | * is always skipped. In addition: |
3266 | | * If limitXmin is not InvalidTransactionId, skip processes with |
3267 | | * xmin > limitXmin. |
3268 | | * If excludeXmin0 is true, skip processes with xmin = 0. |
3269 | | * If allDbs is false, skip processes attached to other databases. |
3270 | | * If excludeVacuum isn't zero, skip processes for which |
3271 | | * (statusFlags & excludeVacuum) is not zero. |
3272 | | * |
3273 | | * Note: the purpose of the limitXmin and excludeXmin0 parameters is to |
3274 | | * allow skipping backends whose oldest live snapshot is no older than |
3275 | | * some snapshot we have. Since we examine the procarray with only shared |
3276 | | * lock, there are race conditions: a backend could set its xmin just after |
3277 | | * we look. Indeed, on multiprocessors with weak memory ordering, the |
3278 | | * other backend could have set its xmin *before* we look. We know however |
3279 | | * that such a backend must have held shared ProcArrayLock overlapping our |
3280 | | * own hold of ProcArrayLock, else we would see its xmin update. Therefore, |
3281 | | * any snapshot the other backend is taking concurrently with our scan cannot |
3282 | | * consider any transactions as still running that we think are committed |
3283 | | * (since backends must hold ProcArrayLock exclusive to commit). |
3284 | | */ |
3285 | | VirtualTransactionId * |
3286 | | GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, |
3287 | | bool allDbs, int excludeVacuum, |
3288 | | int *nvxids) |
3289 | 0 | { |
3290 | 0 | VirtualTransactionId *vxids; |
3291 | 0 | ProcArrayStruct *arrayP = procArray; |
3292 | 0 | int count = 0; |
3293 | 0 | int index; |
3294 | | |
3295 | | /* allocate what's certainly enough result space */ |
3296 | 0 | vxids = (VirtualTransactionId *) |
3297 | 0 | palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs); |
3298 | |
|
3299 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3300 | |
|
3301 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3302 | 0 | { |
3303 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3304 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3305 | 0 | uint8 statusFlags = ProcGlobal->statusFlags[index]; |
3306 | |
|
3307 | 0 | if (proc == MyProc) |
3308 | 0 | continue; |
3309 | | |
3310 | 0 | if (excludeVacuum & statusFlags) |
3311 | 0 | continue; |
3312 | | |
3313 | 0 | if (allDbs || proc->databaseId == MyDatabaseId) |
3314 | 0 | { |
3315 | | /* Fetch xmin just once - might change on us */ |
3316 | 0 | TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin); |
3317 | |
|
3318 | 0 | if (excludeXmin0 && !TransactionIdIsValid(pxmin)) |
3319 | 0 | continue; |
3320 | | |
3321 | | /* |
3322 | | * InvalidTransactionId precedes all other XIDs, so a proc that |
3323 | | * hasn't set xmin yet will not be rejected by this test. |
3324 | | */ |
3325 | 0 | if (!TransactionIdIsValid(limitXmin) || |
3326 | 0 | TransactionIdPrecedesOrEquals(pxmin, limitXmin)) |
3327 | 0 | { |
3328 | 0 | VirtualTransactionId vxid; |
3329 | |
|
3330 | 0 | GET_VXID_FROM_PGPROC(vxid, *proc); |
3331 | 0 | if (VirtualTransactionIdIsValid(vxid)) |
3332 | 0 | vxids[count++] = vxid; |
3333 | 0 | } |
3334 | 0 | } |
3335 | 0 | } |
3336 | |
|
3337 | 0 | LWLockRelease(ProcArrayLock); |
3338 | |
|
3339 | 0 | *nvxids = count; |
3340 | 0 | return vxids; |
3341 | 0 | } |
3342 | | |
3343 | | /* |
3344 | | * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs. |
3345 | | * |
3346 | | * Usage is limited to conflict resolution during recovery on standby servers. |
3347 | | * limitXmin is supplied as either a cutoff with snapshotConflictHorizon |
3348 | | * semantics, or InvalidTransactionId in cases where caller cannot accurately |
3349 | | * determine a safe snapshotConflictHorizon value. |
3350 | | * |
3351 | | * If limitXmin is InvalidTransactionId then we want to kill everybody, |
3352 | | * so we're not worried if they have a snapshot or not, nor does it really |
3353 | | * matter what type of lock we hold. Caller must avoid calling here with |
3354 | | * snapshotConflictHorizon style cutoffs that were set to InvalidTransactionId |
3355 | | * during original execution, since that actually indicates that there is |
3356 | | * definitely no need for a recovery conflict (the snapshotConflictHorizon |
3357 | | * convention for InvalidTransactionId values is the opposite of our own!). |
3358 | | * |
3359 | | * All callers that are checking xmins always now supply a valid and useful |
3360 | | * value for limitXmin. The limitXmin is always lower than the lowest |
3361 | | * numbered KnownAssignedXid that is not already a FATAL error. This is |
3362 | | * because we only care about cleanup records that are cleaning up tuple |
3363 | | * versions from committed transactions. In that case they will only occur |
3364 | | * at the point where the record is less than the lowest running xid. That |
3365 | | * allows us to say that if any backend takes a snapshot concurrently with |
3366 | | * us then the conflict assessment made here would never include the snapshot |
3367 | | * that is being derived. So we take LW_SHARED on the ProcArray and allow |
3368 | | * concurrent snapshots when limitXmin is valid. We might think about adding |
3369 | | * Assert(limitXmin < lowest(KnownAssignedXids)) |
3370 | | * but that would not be true in the case of FATAL errors lagging in array, |
3371 | | * but we already know those are bogus anyway, so we skip that test. |
3372 | | * |
3373 | | * If dbOid is valid we skip backends attached to other databases. |
3374 | | * |
3375 | | * Be careful to *not* pfree the result from this function. We reuse |
3376 | | * this array sufficiently often that we use malloc for the result. |
3377 | | */ |
3378 | | VirtualTransactionId * |
3379 | | GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) |
3380 | 0 | { |
3381 | 0 | static VirtualTransactionId *vxids; |
3382 | 0 | ProcArrayStruct *arrayP = procArray; |
3383 | 0 | int count = 0; |
3384 | 0 | int index; |
3385 | | |
3386 | | /* |
3387 | | * If first time through, get workspace to remember main XIDs in. We |
3388 | | * malloc it permanently to avoid repeated palloc/pfree overhead. Allow |
3389 | | * result space, remembering room for a terminator. |
3390 | | */ |
3391 | 0 | if (vxids == NULL) |
3392 | 0 | { |
3393 | 0 | vxids = (VirtualTransactionId *) |
3394 | 0 | malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1)); |
3395 | 0 | if (vxids == NULL) |
3396 | 0 | ereport(ERROR, |
3397 | 0 | (errcode(ERRCODE_OUT_OF_MEMORY), |
3398 | 0 | errmsg("out of memory"))); |
3399 | 0 | } |
3400 | | |
3401 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3402 | |
|
3403 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3404 | 0 | { |
3405 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3406 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3407 | | |
3408 | | /* Exclude prepared transactions */ |
3409 | 0 | if (proc->pid == 0) |
3410 | 0 | continue; |
3411 | | |
3412 | 0 | if (!OidIsValid(dbOid) || |
3413 | 0 | proc->databaseId == dbOid) |
3414 | 0 | { |
3415 | | /* Fetch xmin just once - can't change on us, but good coding */ |
3416 | 0 | TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin); |
3417 | | |
3418 | | /* |
3419 | | * We ignore an invalid pxmin because this means that backend has |
3420 | | * no snapshot currently. We hold a Share lock to avoid contention |
3421 | | * with users taking snapshots. That is not a problem because the |
3422 | | * current xmin is always at least one higher than the latest |
3423 | | * removed xid, so any new snapshot would never conflict with the |
3424 | | * test here. |
3425 | | */ |
3426 | 0 | if (!TransactionIdIsValid(limitXmin) || |
3427 | 0 | (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin))) |
3428 | 0 | { |
3429 | 0 | VirtualTransactionId vxid; |
3430 | |
|
3431 | 0 | GET_VXID_FROM_PGPROC(vxid, *proc); |
3432 | 0 | if (VirtualTransactionIdIsValid(vxid)) |
3433 | 0 | vxids[count++] = vxid; |
3434 | 0 | } |
3435 | 0 | } |
3436 | 0 | } |
3437 | |
|
3438 | 0 | LWLockRelease(ProcArrayLock); |
3439 | | |
3440 | | /* add the terminator */ |
3441 | 0 | vxids[count].procNumber = INVALID_PROC_NUMBER; |
3442 | 0 | vxids[count].localTransactionId = InvalidLocalTransactionId; |
3443 | |
|
3444 | 0 | return vxids; |
3445 | 0 | } |
3446 | | |
3447 | | /* |
3448 | | * CancelVirtualTransaction - used in recovery conflict processing |
3449 | | * |
3450 | | * Returns pid of the process signaled, or 0 if not found. |
3451 | | */ |
3452 | | pid_t |
3453 | | CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) |
3454 | 0 | { |
3455 | 0 | return SignalVirtualTransaction(vxid, sigmode, true); |
3456 | 0 | } |
3457 | | |
3458 | | pid_t |
3459 | | SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode, |
3460 | | bool conflictPending) |
3461 | 0 | { |
3462 | 0 | ProcArrayStruct *arrayP = procArray; |
3463 | 0 | int index; |
3464 | 0 | pid_t pid = 0; |
3465 | |
|
3466 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3467 | |
|
3468 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3469 | 0 | { |
3470 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3471 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3472 | 0 | VirtualTransactionId procvxid; |
3473 | |
|
3474 | 0 | GET_VXID_FROM_PGPROC(procvxid, *proc); |
3475 | |
|
3476 | 0 | if (procvxid.procNumber == vxid.procNumber && |
3477 | 0 | procvxid.localTransactionId == vxid.localTransactionId) |
3478 | 0 | { |
3479 | 0 | proc->recoveryConflictPending = conflictPending; |
3480 | 0 | pid = proc->pid; |
3481 | 0 | if (pid != 0) |
3482 | 0 | { |
3483 | | /* |
3484 | | * Kill the pid if it's still here. If not, that's what we |
3485 | | * wanted so ignore any errors. |
3486 | | */ |
3487 | 0 | (void) SendProcSignal(pid, sigmode, vxid.procNumber); |
3488 | 0 | } |
3489 | 0 | break; |
3490 | 0 | } |
3491 | 0 | } |
3492 | |
|
3493 | 0 | LWLockRelease(ProcArrayLock); |
3494 | |
|
3495 | 0 | return pid; |
3496 | 0 | } |
3497 | | |
3498 | | /* |
3499 | | * MinimumActiveBackends --- count backends (other than myself) that are |
3500 | | * in active transactions. Return true if the count exceeds the |
3501 | | * minimum threshold passed. This is used as a heuristic to decide if |
3502 | | * a pre-XLOG-flush delay is worthwhile during commit. |
3503 | | * |
3504 | | * Do not count backends that are blocked waiting for locks, since they are |
3505 | | * not going to get to run until someone else commits. |
3506 | | */ |
3507 | | bool |
3508 | | MinimumActiveBackends(int min) |
3509 | 0 | { |
3510 | 0 | ProcArrayStruct *arrayP = procArray; |
3511 | 0 | int count = 0; |
3512 | 0 | int index; |
3513 | | |
3514 | | /* Quick short-circuit if no minimum is specified */ |
3515 | 0 | if (min == 0) |
3516 | 0 | return true; |
3517 | | |
3518 | | /* |
3519 | | * Note: for speed, we don't acquire ProcArrayLock. This is a little bit |
3520 | | * bogus, but since we are only testing fields for zero or nonzero, it |
3521 | | * should be OK. The result is only used for heuristic purposes anyway... |
3522 | | */ |
3523 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3524 | 0 | { |
3525 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3526 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3527 | | |
3528 | | /* |
3529 | | * Since we're not holding a lock, need to be prepared to deal with |
3530 | | * garbage, as someone could have incremented numProcs but not yet |
3531 | | * filled the structure. |
3532 | | * |
3533 | | * If someone just decremented numProcs, 'proc' could also point to a |
3534 | | * PGPROC entry that's no longer in the array. It still points to a |
3535 | | * PGPROC struct, though, because freed PGPROC entries just go to the |
3536 | | * free list and are recycled. Its contents are nonsense in that case, |
3537 | | * but that's acceptable for this function. |
3538 | | */ |
3539 | 0 | if (pgprocno == -1) |
3540 | 0 | continue; /* do not count deleted entries */ |
3541 | 0 | if (proc == MyProc) |
3542 | 0 | continue; /* do not count myself */ |
3543 | 0 | if (proc->xid == InvalidTransactionId) |
3544 | 0 | continue; /* do not count if no XID assigned */ |
3545 | 0 | if (proc->pid == 0) |
3546 | 0 | continue; /* do not count prepared xacts */ |
3547 | 0 | if (proc->waitLock != NULL) |
3548 | 0 | continue; /* do not count if blocked on a lock */ |
3549 | 0 | count++; |
3550 | 0 | if (count >= min) |
3551 | 0 | break; |
3552 | 0 | } |
3553 | |
|
3554 | 0 | return count >= min; |
3555 | 0 | } |
3556 | | |
3557 | | /* |
3558 | | * CountDBBackends --- count backends that are using specified database |
3559 | | */ |
3560 | | int |
3561 | | CountDBBackends(Oid databaseid) |
3562 | 0 | { |
3563 | 0 | ProcArrayStruct *arrayP = procArray; |
3564 | 0 | int count = 0; |
3565 | 0 | int index; |
3566 | |
|
3567 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3568 | |
|
3569 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3570 | 0 | { |
3571 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3572 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3573 | |
|
3574 | 0 | if (proc->pid == 0) |
3575 | 0 | continue; /* do not count prepared xacts */ |
3576 | 0 | if (!OidIsValid(databaseid) || |
3577 | 0 | proc->databaseId == databaseid) |
3578 | 0 | count++; |
3579 | 0 | } |
3580 | |
|
3581 | 0 | LWLockRelease(ProcArrayLock); |
3582 | |
|
3583 | 0 | return count; |
3584 | 0 | } |
3585 | | |
3586 | | /* |
3587 | | * CountDBConnections --- counts database backends (only regular backends) |
3588 | | */ |
3589 | | int |
3590 | | CountDBConnections(Oid databaseid) |
3591 | 0 | { |
3592 | 0 | ProcArrayStruct *arrayP = procArray; |
3593 | 0 | int count = 0; |
3594 | 0 | int index; |
3595 | |
|
3596 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3597 | |
|
3598 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3599 | 0 | { |
3600 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3601 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3602 | |
|
3603 | 0 | if (proc->pid == 0) |
3604 | 0 | continue; /* do not count prepared xacts */ |
3605 | 0 | if (!proc->isRegularBackend) |
3606 | 0 | continue; /* count only regular backend processes */ |
3607 | 0 | if (!OidIsValid(databaseid) || |
3608 | 0 | proc->databaseId == databaseid) |
3609 | 0 | count++; |
3610 | 0 | } |
3611 | |
|
3612 | 0 | LWLockRelease(ProcArrayLock); |
3613 | |
|
3614 | 0 | return count; |
3615 | 0 | } |
3616 | | |
3617 | | /* |
3618 | | * CancelDBBackends --- cancel backends that are using specified database |
3619 | | */ |
3620 | | void |
3621 | | CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending) |
3622 | 0 | { |
3623 | 0 | ProcArrayStruct *arrayP = procArray; |
3624 | 0 | int index; |
3625 | | |
3626 | | /* tell all backends to die */ |
3627 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
3628 | |
|
3629 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3630 | 0 | { |
3631 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3632 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3633 | |
|
3634 | 0 | if (databaseid == InvalidOid || proc->databaseId == databaseid) |
3635 | 0 | { |
3636 | 0 | VirtualTransactionId procvxid; |
3637 | 0 | pid_t pid; |
3638 | |
|
3639 | 0 | GET_VXID_FROM_PGPROC(procvxid, *proc); |
3640 | |
|
3641 | 0 | proc->recoveryConflictPending = conflictPending; |
3642 | 0 | pid = proc->pid; |
3643 | 0 | if (pid != 0) |
3644 | 0 | { |
3645 | | /* |
3646 | | * Kill the pid if it's still here. If not, that's what we |
3647 | | * wanted so ignore any errors. |
3648 | | */ |
3649 | 0 | (void) SendProcSignal(pid, sigmode, procvxid.procNumber); |
3650 | 0 | } |
3651 | 0 | } |
3652 | 0 | } |
3653 | |
|
3654 | 0 | LWLockRelease(ProcArrayLock); |
3655 | 0 | } |
3656 | | |
3657 | | /* |
3658 | | * CountUserBackends --- count backends that are used by specified user |
3659 | | * (only regular backends, not any type of background worker) |
3660 | | */ |
3661 | | int |
3662 | | CountUserBackends(Oid roleid) |
3663 | 0 | { |
3664 | 0 | ProcArrayStruct *arrayP = procArray; |
3665 | 0 | int count = 0; |
3666 | 0 | int index; |
3667 | |
|
3668 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3669 | |
|
3670 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3671 | 0 | { |
3672 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3673 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3674 | |
|
3675 | 0 | if (proc->pid == 0) |
3676 | 0 | continue; /* do not count prepared xacts */ |
3677 | 0 | if (!proc->isRegularBackend) |
3678 | 0 | continue; /* count only regular backend processes */ |
3679 | 0 | if (proc->roleId == roleid) |
3680 | 0 | count++; |
3681 | 0 | } |
3682 | |
|
3683 | 0 | LWLockRelease(ProcArrayLock); |
3684 | |
|
3685 | 0 | return count; |
3686 | 0 | } |
3687 | | |
3688 | | /* |
3689 | | * CountOtherDBBackends -- check for other backends running in the given DB |
3690 | | * |
3691 | | * If there are other backends in the DB, we will wait a maximum of 5 seconds |
3692 | | * for them to exit. Autovacuum backends are encouraged to exit early by |
3693 | | * sending them SIGTERM, but normal user backends are just waited for. |
3694 | | * |
3695 | | * The current backend is always ignored; it is caller's responsibility to |
3696 | | * check whether the current backend uses the given DB, if it's important. |
3697 | | * |
3698 | | * Returns true if there are (still) other backends in the DB, false if not. |
3699 | | * Also, *nbackends and *nprepared are set to the number of other backends |
3700 | | * and prepared transactions in the DB, respectively. |
3701 | | * |
3702 | | * This function is used to interlock DROP DATABASE and related commands |
3703 | | * against there being any active backends in the target DB --- dropping the |
3704 | | * DB while active backends remain would be a Bad Thing. Note that we cannot |
3705 | | * detect here the possibility of a newly-started backend that is trying to |
3706 | | * connect to the doomed database, so additional interlocking is needed during |
3707 | | * backend startup. The caller should normally hold an exclusive lock on the |
3708 | | * target DB before calling this, which is one reason we mustn't wait |
3709 | | * indefinitely. |
3710 | | */ |
3711 | | bool |
3712 | | CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) |
3713 | 0 | { |
3714 | 0 | ProcArrayStruct *arrayP = procArray; |
3715 | |
|
3716 | 0 | #define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */ |
3717 | 0 | int autovac_pids[MAXAUTOVACPIDS]; |
3718 | 0 | int tries; |
3719 | | |
3720 | | /* 50 tries with 100ms sleep between tries makes 5 sec total wait */ |
3721 | 0 | for (tries = 0; tries < 50; tries++) |
3722 | 0 | { |
3723 | 0 | int nautovacs = 0; |
3724 | 0 | bool found = false; |
3725 | 0 | int index; |
3726 | |
|
3727 | 0 | CHECK_FOR_INTERRUPTS(); |
3728 | |
|
3729 | 0 | *nbackends = *nprepared = 0; |
3730 | |
|
3731 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3732 | |
|
3733 | 0 | for (index = 0; index < arrayP->numProcs; index++) |
3734 | 0 | { |
3735 | 0 | int pgprocno = arrayP->pgprocnos[index]; |
3736 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3737 | 0 | uint8 statusFlags = ProcGlobal->statusFlags[index]; |
3738 | |
|
3739 | 0 | if (proc->databaseId != databaseId) |
3740 | 0 | continue; |
3741 | 0 | if (proc == MyProc) |
3742 | 0 | continue; |
3743 | | |
3744 | 0 | found = true; |
3745 | |
|
3746 | 0 | if (proc->pid == 0) |
3747 | 0 | (*nprepared)++; |
3748 | 0 | else |
3749 | 0 | { |
3750 | 0 | (*nbackends)++; |
3751 | 0 | if ((statusFlags & PROC_IS_AUTOVACUUM) && |
3752 | 0 | nautovacs < MAXAUTOVACPIDS) |
3753 | 0 | autovac_pids[nautovacs++] = proc->pid; |
3754 | 0 | } |
3755 | 0 | } |
3756 | |
|
3757 | 0 | LWLockRelease(ProcArrayLock); |
3758 | |
|
3759 | 0 | if (!found) |
3760 | 0 | return false; /* no conflicting backends, so done */ |
3761 | | |
3762 | | /* |
3763 | | * Send SIGTERM to any conflicting autovacuums before sleeping. We |
3764 | | * postpone this step until after the loop because we don't want to |
3765 | | * hold ProcArrayLock while issuing kill(). We have no idea what might |
3766 | | * block kill() inside the kernel... |
3767 | | */ |
3768 | 0 | for (index = 0; index < nautovacs; index++) |
3769 | 0 | (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */ |
3770 | | |
3771 | | /* sleep, then try again */ |
3772 | 0 | pg_usleep(100 * 1000L); /* 100ms */ |
3773 | 0 | } |
3774 | | |
3775 | 0 | return true; /* timed out, still conflicts */ |
3776 | 0 | } |
3777 | | |
3778 | | /* |
3779 | | * Terminate existing connections to the specified database. This routine |
3780 | | * is used by the DROP DATABASE command when user has asked to forcefully |
3781 | | * drop the database. |
3782 | | * |
3783 | | * The current backend is always ignored; it is caller's responsibility to |
3784 | | * check whether the current backend uses the given DB, if it's important. |
3785 | | * |
3786 | | * If the target database has a prepared transaction or permissions checks |
3787 | | * fail for a connection, this fails without terminating anything. |
3788 | | */ |
3789 | | void |
3790 | | TerminateOtherDBBackends(Oid databaseId) |
3791 | 0 | { |
3792 | 0 | ProcArrayStruct *arrayP = procArray; |
3793 | 0 | List *pids = NIL; |
3794 | 0 | int nprepared = 0; |
3795 | 0 | int i; |
3796 | |
|
3797 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3798 | |
|
3799 | 0 | for (i = 0; i < procArray->numProcs; i++) |
3800 | 0 | { |
3801 | 0 | int pgprocno = arrayP->pgprocnos[i]; |
3802 | 0 | PGPROC *proc = &allProcs[pgprocno]; |
3803 | |
|
3804 | 0 | if (proc->databaseId != databaseId) |
3805 | 0 | continue; |
3806 | 0 | if (proc == MyProc) |
3807 | 0 | continue; |
3808 | | |
3809 | 0 | if (proc->pid != 0) |
3810 | 0 | pids = lappend_int(pids, proc->pid); |
3811 | 0 | else |
3812 | 0 | nprepared++; |
3813 | 0 | } |
3814 | |
|
3815 | 0 | LWLockRelease(ProcArrayLock); |
3816 | |
|
3817 | 0 | if (nprepared > 0) |
3818 | 0 | ereport(ERROR, |
3819 | 0 | (errcode(ERRCODE_OBJECT_IN_USE), |
3820 | 0 | errmsg("database \"%s\" is being used by prepared transactions", |
3821 | 0 | get_database_name(databaseId)), |
3822 | 0 | errdetail_plural("There is %d prepared transaction using the database.", |
3823 | 0 | "There are %d prepared transactions using the database.", |
3824 | 0 | nprepared, |
3825 | 0 | nprepared))); |
3826 | | |
3827 | 0 | if (pids) |
3828 | 0 | { |
3829 | 0 | ListCell *lc; |
3830 | | |
3831 | | /* |
3832 | | * Permissions checks relax the pg_terminate_backend checks in two |
3833 | | * ways, both by omitting the !OidIsValid(proc->roleId) check: |
3834 | | * |
3835 | | * - Accept terminating autovacuum workers, since DROP DATABASE |
3836 | | * without FORCE terminates them. |
3837 | | * |
3838 | | * - Accept terminating bgworkers. For bgworker authors, it's |
3839 | | * convenient to be able to recommend FORCE if a worker is blocking |
3840 | | * DROP DATABASE unexpectedly. |
3841 | | * |
3842 | | * Unlike pg_terminate_backend, we don't raise some warnings - like |
3843 | | * "PID %d is not a PostgreSQL server process", because for us already |
3844 | | * finished session is not a problem. |
3845 | | */ |
3846 | 0 | foreach(lc, pids) |
3847 | 0 | { |
3848 | 0 | int pid = lfirst_int(lc); |
3849 | 0 | PGPROC *proc = BackendPidGetProc(pid); |
3850 | |
|
3851 | 0 | if (proc != NULL) |
3852 | 0 | { |
3853 | 0 | if (superuser_arg(proc->roleId) && !superuser()) |
3854 | 0 | ereport(ERROR, |
3855 | 0 | (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
3856 | 0 | errmsg("permission denied to terminate process"), |
3857 | 0 | errdetail("Only roles with the %s attribute may terminate processes of roles with the %s attribute.", |
3858 | 0 | "SUPERUSER", "SUPERUSER"))); |
3859 | | |
3860 | 0 | if (!has_privs_of_role(GetUserId(), proc->roleId) && |
3861 | 0 | !has_privs_of_role(GetUserId(), ROLE_PG_SIGNAL_BACKEND)) |
3862 | 0 | ereport(ERROR, |
3863 | 0 | (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
3864 | 0 | errmsg("permission denied to terminate process"), |
3865 | 0 | errdetail("Only roles with privileges of the role whose process is being terminated or with privileges of the \"%s\" role may terminate this process.", |
3866 | 0 | "pg_signal_backend"))); |
3867 | 0 | } |
3868 | 0 | } |
3869 | | |
3870 | | /* |
3871 | | * There's a race condition here: once we release the ProcArrayLock, |
3872 | | * it's possible for the session to exit before we issue kill. That |
3873 | | * race condition possibility seems too unlikely to worry about. See |
3874 | | * pg_signal_backend. |
3875 | | */ |
3876 | 0 | foreach(lc, pids) |
3877 | 0 | { |
3878 | 0 | int pid = lfirst_int(lc); |
3879 | 0 | PGPROC *proc = BackendPidGetProc(pid); |
3880 | |
|
3881 | 0 | if (proc != NULL) |
3882 | 0 | { |
3883 | | /* |
3884 | | * If we have setsid(), signal the backend's whole process |
3885 | | * group |
3886 | | */ |
3887 | 0 | #ifdef HAVE_SETSID |
3888 | 0 | (void) kill(-pid, SIGTERM); |
3889 | | #else |
3890 | | (void) kill(pid, SIGTERM); |
3891 | | #endif |
3892 | 0 | } |
3893 | 0 | } |
3894 | 0 | } |
3895 | 0 | } |
3896 | | |
3897 | | /* |
3898 | | * ProcArraySetReplicationSlotXmin |
3899 | | * |
3900 | | * Install limits to future computations of the xmin horizon to prevent vacuum |
3901 | | * and HOT pruning from removing affected rows still needed by clients with |
3902 | | * replication slots. |
3903 | | */ |
3904 | | void |
3905 | | ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, |
3906 | | bool already_locked) |
3907 | | { |
3908 | | Assert(!already_locked || LWLockHeldByMe(ProcArrayLock)); |
3909 | | |
3910 | | if (!already_locked) |
3911 | | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
3912 | | |
3913 | | procArray->replication_slot_xmin = xmin; |
3914 | | procArray->replication_slot_catalog_xmin = catalog_xmin; |
3915 | | |
3916 | | if (!already_locked) |
3917 | | LWLockRelease(ProcArrayLock); |
3918 | | |
3919 | | elog(DEBUG1, "xmin required by slots: data %u, catalog %u", |
3920 | | xmin, catalog_xmin); |
3921 | | } |
3922 | | |
3923 | | /* |
3924 | | * ProcArrayGetReplicationSlotXmin |
3925 | | * |
3926 | | * Return the current slot xmin limits. That's useful to be able to remove |
3927 | | * data that's older than those limits. |
3928 | | */ |
3929 | | void |
3930 | | ProcArrayGetReplicationSlotXmin(TransactionId *xmin, |
3931 | | TransactionId *catalog_xmin) |
3932 | 0 | { |
3933 | 0 | LWLockAcquire(ProcArrayLock, LW_SHARED); |
3934 | |
|
3935 | 0 | if (xmin != NULL) |
3936 | 0 | *xmin = procArray->replication_slot_xmin; |
3937 | |
|
3938 | 0 | if (catalog_xmin != NULL) |
3939 | 0 | *catalog_xmin = procArray->replication_slot_catalog_xmin; |
3940 | |
|
3941 | 0 | LWLockRelease(ProcArrayLock); |
3942 | 0 | } |
3943 | | |
3944 | | /* |
3945 | | * XidCacheRemoveRunningXids |
3946 | | * |
3947 | | * Remove a bunch of TransactionIds from the list of known-running |
3948 | | * subtransactions for my backend. Both the specified xid and those in |
3949 | | * the xids[] array (of length nxids) are removed from the subxids cache. |
3950 | | * latestXid must be the latest XID among the group. |
3951 | | */ |
3952 | | void |
3953 | | XidCacheRemoveRunningXids(TransactionId xid, |
3954 | | int nxids, const TransactionId *xids, |
3955 | | TransactionId latestXid) |
3956 | | { |
3957 | | int i, |
3958 | | j; |
3959 | | XidCacheStatus *mysubxidstat; |
3960 | | |
3961 | | Assert(TransactionIdIsValid(xid)); |
3962 | | |
3963 | | /* |
3964 | | * We must hold ProcArrayLock exclusively in order to remove transactions |
3965 | | * from the PGPROC array. (See src/backend/access/transam/README.) It's |
3966 | | * possible this could be relaxed since we know this routine is only used |
3967 | | * to abort subtransactions, but pending closer analysis we'd best be |
3968 | | * conservative. |
3969 | | * |
3970 | | * Note that we do not have to be careful about memory ordering of our own |
3971 | | * reads wrt. GetNewTransactionId() here - only this process can modify |
3972 | | * relevant fields of MyProc/ProcGlobal->xids[]. But we do have to be |
3973 | | * careful about our own writes being well ordered. |
3974 | | */ |
3975 | | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
3976 | | |
3977 | | mysubxidstat = &ProcGlobal->subxidStates[MyProc->pgxactoff]; |
3978 | | |
3979 | | /* |
3980 | | * Under normal circumstances xid and xids[] will be in increasing order, |
3981 | | * as will be the entries in subxids. Scan backwards to avoid O(N^2) |
3982 | | * behavior when removing a lot of xids. |
3983 | | */ |
3984 | | for (i = nxids - 1; i >= 0; i--) |
3985 | | { |
3986 | | TransactionId anxid = xids[i]; |
3987 | | |
3988 | | for (j = MyProc->subxidStatus.count - 1; j >= 0; j--) |
3989 | | { |
3990 | | if (TransactionIdEquals(MyProc->subxids.xids[j], anxid)) |
3991 | | { |
3992 | | MyProc->subxids.xids[j] = MyProc->subxids.xids[MyProc->subxidStatus.count - 1]; |
3993 | | pg_write_barrier(); |
3994 | | mysubxidstat->count--; |
3995 | | MyProc->subxidStatus.count--; |
3996 | | break; |
3997 | | } |
3998 | | } |
3999 | | |
4000 | | /* |
4001 | | * Ordinarily we should have found it, unless the cache has |
4002 | | * overflowed. However it's also possible for this routine to be |
4003 | | * invoked multiple times for the same subtransaction, in case of an |
4004 | | * error during AbortSubTransaction. So instead of Assert, emit a |
4005 | | * debug warning. |
4006 | | */ |
4007 | | if (j < 0 && !MyProc->subxidStatus.overflowed) |
4008 | | elog(WARNING, "did not find subXID %u in MyProc", anxid); |
4009 | | } |
4010 | | |
4011 | | for (j = MyProc->subxidStatus.count - 1; j >= 0; j--) |
4012 | | { |
4013 | | if (TransactionIdEquals(MyProc->subxids.xids[j], xid)) |
4014 | | { |
4015 | | MyProc->subxids.xids[j] = MyProc->subxids.xids[MyProc->subxidStatus.count - 1]; |
4016 | | pg_write_barrier(); |
4017 | | mysubxidstat->count--; |
4018 | | MyProc->subxidStatus.count--; |
4019 | | break; |
4020 | | } |
4021 | | } |
4022 | | /* Ordinarily we should have found it, unless the cache has overflowed */ |
4023 | | if (j < 0 && !MyProc->subxidStatus.overflowed) |
4024 | | elog(WARNING, "did not find subXID %u in MyProc", xid); |
4025 | | |
4026 | | /* Also advance global latestCompletedXid while holding the lock */ |
4027 | | MaintainLatestCompletedXid(latestXid); |
4028 | | |
4029 | | /* ... and xactCompletionCount */ |
4030 | | TransamVariables->xactCompletionCount++; |
4031 | | |
4032 | | LWLockRelease(ProcArrayLock); |
4033 | | } |
4034 | | |
4035 | | #ifdef XIDCACHE_DEBUG |
4036 | | |
4037 | | /* |
4038 | | * Print stats about effectiveness of XID cache |
4039 | | */ |
4040 | | static void |
4041 | | DisplayXidCache(void) |
4042 | | { |
4043 | | fprintf(stderr, |
4044 | | "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n", |
4045 | | xc_by_recent_xmin, |
4046 | | xc_by_known_xact, |
4047 | | xc_by_my_xact, |
4048 | | xc_by_latest_xid, |
4049 | | xc_by_main_xid, |
4050 | | xc_by_child_xid, |
4051 | | xc_by_known_assigned, |
4052 | | xc_no_overflow, |
4053 | | xc_slow_answer); |
4054 | | } |
4055 | | #endif /* XIDCACHE_DEBUG */ |
4056 | | |
4057 | | /* |
4058 | | * If rel != NULL, return test state appropriate for relation, otherwise |
4059 | | * return state usable for all relations. The latter may consider XIDs as |
4060 | | * not-yet-visible-to-everyone that a state for a specific relation would |
4061 | | * already consider visible-to-everyone. |
4062 | | * |
4063 | | * This needs to be called while a snapshot is active or registered, otherwise |
4064 | | * there are wraparound and other dangers. |
4065 | | * |
4066 | | * See comment for GlobalVisState for details. |
4067 | | */ |
4068 | | GlobalVisState * |
4069 | | GlobalVisTestFor(Relation rel) |
4070 | 0 | { |
4071 | 0 | GlobalVisState *state = NULL; |
4072 | | |
4073 | | /* XXX: we should assert that a snapshot is pushed or registered */ |
4074 | 0 | Assert(RecentXmin); |
4075 | |
|
4076 | 0 | switch (GlobalVisHorizonKindForRel(rel)) |
4077 | 0 | { |
4078 | 0 | case VISHORIZON_SHARED: |
4079 | 0 | state = &GlobalVisSharedRels; |
4080 | 0 | break; |
4081 | 0 | case VISHORIZON_CATALOG: |
4082 | 0 | state = &GlobalVisCatalogRels; |
4083 | 0 | break; |
4084 | 0 | case VISHORIZON_DATA: |
4085 | 0 | state = &GlobalVisDataRels; |
4086 | 0 | break; |
4087 | 0 | case VISHORIZON_TEMP: |
4088 | 0 | state = &GlobalVisTempRels; |
4089 | 0 | break; |
4090 | 0 | } |
4091 | | |
4092 | 0 | Assert(FullTransactionIdIsValid(state->definitely_needed) && |
4093 | 0 | FullTransactionIdIsValid(state->maybe_needed)); |
4094 | |
|
4095 | 0 | return state; |
4096 | 0 | } |
4097 | | |
4098 | | /* |
4099 | | * Return true if it's worth updating the accurate maybe_needed boundary. |
4100 | | * |
4101 | | * As it is somewhat expensive to determine xmin horizons, we don't want to |
4102 | | * repeatedly do so when there is a low likelihood of it being beneficial. |
4103 | | * |
4104 | | * The current heuristic is that we update only if RecentXmin has changed |
4105 | | * since the last update. If the oldest currently running transaction has not |
4106 | | * finished, it is unlikely that recomputing the horizon would be useful. |
4107 | | */ |
4108 | | static bool |
4109 | | GlobalVisTestShouldUpdate(GlobalVisState *state) |
4110 | 0 | { |
4111 | | /* hasn't been updated yet */ |
4112 | 0 | if (!TransactionIdIsValid(ComputeXidHorizonsResultLastXmin)) |
4113 | 0 | return true; |
4114 | | |
4115 | | /* |
4116 | | * If the maybe_needed/definitely_needed boundaries are the same, it's |
4117 | | * unlikely to be beneficial to refresh boundaries. |
4118 | | */ |
4119 | 0 | if (FullTransactionIdFollowsOrEquals(state->maybe_needed, |
4120 | 0 | state->definitely_needed)) |
4121 | 0 | return false; |
4122 | | |
4123 | | /* does the last snapshot built have a different xmin? */ |
4124 | 0 | return RecentXmin != ComputeXidHorizonsResultLastXmin; |
4125 | 0 | } |
4126 | | |
4127 | | static void |
4128 | | GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons) |
4129 | 0 | { |
4130 | 0 | GlobalVisSharedRels.maybe_needed = |
4131 | 0 | FullXidRelativeTo(horizons->latest_completed, |
4132 | 0 | horizons->shared_oldest_nonremovable); |
4133 | 0 | GlobalVisCatalogRels.maybe_needed = |
4134 | 0 | FullXidRelativeTo(horizons->latest_completed, |
4135 | 0 | horizons->catalog_oldest_nonremovable); |
4136 | 0 | GlobalVisDataRels.maybe_needed = |
4137 | 0 | FullXidRelativeTo(horizons->latest_completed, |
4138 | 0 | horizons->data_oldest_nonremovable); |
4139 | 0 | GlobalVisTempRels.maybe_needed = |
4140 | 0 | FullXidRelativeTo(horizons->latest_completed, |
4141 | 0 | horizons->temp_oldest_nonremovable); |
4142 | | |
4143 | | /* |
4144 | | * In longer running transactions it's possible that transactions we |
4145 | | * previously needed to treat as running aren't around anymore. So update |
4146 | | * definitely_needed to not be earlier than maybe_needed. |
4147 | | */ |
4148 | 0 | GlobalVisSharedRels.definitely_needed = |
4149 | 0 | FullTransactionIdNewer(GlobalVisSharedRels.maybe_needed, |
4150 | 0 | GlobalVisSharedRels.definitely_needed); |
4151 | 0 | GlobalVisCatalogRels.definitely_needed = |
4152 | 0 | FullTransactionIdNewer(GlobalVisCatalogRels.maybe_needed, |
4153 | 0 | GlobalVisCatalogRels.definitely_needed); |
4154 | 0 | GlobalVisDataRels.definitely_needed = |
4155 | 0 | FullTransactionIdNewer(GlobalVisDataRels.maybe_needed, |
4156 | 0 | GlobalVisDataRels.definitely_needed); |
4157 | 0 | GlobalVisTempRels.definitely_needed = GlobalVisTempRels.maybe_needed; |
4158 | |
|
4159 | 0 | ComputeXidHorizonsResultLastXmin = RecentXmin; |
4160 | 0 | } |
4161 | | |
4162 | | /* |
4163 | | * Update boundaries in GlobalVis{Shared,Catalog, Data}Rels |
4164 | | * using ComputeXidHorizons(). |
4165 | | */ |
4166 | | static void |
4167 | | GlobalVisUpdate(void) |
4168 | 0 | { |
4169 | 0 | ComputeXidHorizonsResult horizons; |
4170 | | |
4171 | | /* updates the horizons as a side-effect */ |
4172 | 0 | ComputeXidHorizons(&horizons); |
4173 | 0 | } |
4174 | | |
4175 | | /* |
4176 | | * Return true if no snapshot still considers fxid to be running. |
4177 | | * |
4178 | | * The state passed needs to have been initialized for the relation fxid is |
4179 | | * from (NULL is also OK), otherwise the result may not be correct. |
4180 | | * |
4181 | | * See comment for GlobalVisState for details. |
4182 | | */ |
4183 | | bool |
4184 | | GlobalVisTestIsRemovableFullXid(GlobalVisState *state, |
4185 | | FullTransactionId fxid) |
4186 | 0 | { |
4187 | | /* |
4188 | | * If fxid is older than maybe_needed bound, it definitely is visible to |
4189 | | * everyone. |
4190 | | */ |
4191 | 0 | if (FullTransactionIdPrecedes(fxid, state->maybe_needed)) |
4192 | 0 | return true; |
4193 | | |
4194 | | /* |
4195 | | * If fxid is >= definitely_needed bound, it is very likely to still be |
4196 | | * considered running. |
4197 | | */ |
4198 | 0 | if (FullTransactionIdFollowsOrEquals(fxid, state->definitely_needed)) |
4199 | 0 | return false; |
4200 | | |
4201 | | /* |
4202 | | * fxid is between maybe_needed and definitely_needed, i.e. there might or |
4203 | | * might not exist a snapshot considering fxid running. If it makes sense, |
4204 | | * update boundaries and recheck. |
4205 | | */ |
4206 | 0 | if (GlobalVisTestShouldUpdate(state)) |
4207 | 0 | { |
4208 | 0 | GlobalVisUpdate(); |
4209 | |
|
4210 | 0 | Assert(FullTransactionIdPrecedes(fxid, state->definitely_needed)); |
4211 | |
|
4212 | 0 | return FullTransactionIdPrecedes(fxid, state->maybe_needed); |
4213 | 0 | } |
4214 | 0 | else |
4215 | 0 | return false; |
4216 | 0 | } |
4217 | | |
4218 | | /* |
4219 | | * Wrapper around GlobalVisTestIsRemovableFullXid() for 32bit xids. |
4220 | | * |
4221 | | * It is crucial that this only gets called for xids from a source that |
4222 | | * protects against xid wraparounds (e.g. from a table and thus protected by |
4223 | | * relfrozenxid). |
4224 | | */ |
4225 | | bool |
4226 | | GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid) |
4227 | 0 | { |
4228 | 0 | FullTransactionId fxid; |
4229 | | |
4230 | | /* |
4231 | | * Convert 32 bit argument to FullTransactionId. We can do so safely |
4232 | | * because we know the xid has to, at the very least, be between |
4233 | | * [oldestXid, nextXid), i.e. within 2 billion of xid. To avoid taking a |
4234 | | * lock to determine either, we can just compare with |
4235 | | * state->definitely_needed, which was based on those value at the time |
4236 | | * the current snapshot was built. |
4237 | | */ |
4238 | 0 | fxid = FullXidRelativeTo(state->definitely_needed, xid); |
4239 | |
|
4240 | 0 | return GlobalVisTestIsRemovableFullXid(state, fxid); |
4241 | 0 | } |
4242 | | |
4243 | | /* |
4244 | | * Convenience wrapper around GlobalVisTestFor() and |
4245 | | * GlobalVisTestIsRemovableFullXid(), see their comments. |
4246 | | */ |
4247 | | bool |
4248 | | GlobalVisCheckRemovableFullXid(Relation rel, FullTransactionId fxid) |
4249 | 0 | { |
4250 | 0 | GlobalVisState *state; |
4251 | |
|
4252 | 0 | state = GlobalVisTestFor(rel); |
4253 | |
|
4254 | 0 | return GlobalVisTestIsRemovableFullXid(state, fxid); |
4255 | 0 | } |
4256 | | |
4257 | | /* |
4258 | | * Convenience wrapper around GlobalVisTestFor() and |
4259 | | * GlobalVisTestIsRemovableXid(), see their comments. |
4260 | | */ |
4261 | | bool |
4262 | | GlobalVisCheckRemovableXid(Relation rel, TransactionId xid) |
4263 | 0 | { |
4264 | 0 | GlobalVisState *state; |
4265 | |
|
4266 | 0 | state = GlobalVisTestFor(rel); |
4267 | |
|
4268 | 0 | return GlobalVisTestIsRemovableXid(state, xid); |
4269 | 0 | } |
4270 | | |
4271 | | /* |
4272 | | * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it |
4273 | | * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel). |
4274 | | * |
4275 | | * Be very careful about when to use this function. It can only safely be used |
4276 | | * when there is a guarantee that xid is within MaxTransactionId / 2 xids of |
4277 | | * rel. That e.g. can be guaranteed if the caller assures a snapshot is |
4278 | | * held by the backend and xid is from a table (where vacuum/freezing ensures |
4279 | | * the xid has to be within that range), or if xid is from the procarray and |
4280 | | * prevents xid wraparound that way. |
4281 | | */ |
4282 | | static inline FullTransactionId |
4283 | | FullXidRelativeTo(FullTransactionId rel, TransactionId xid) |
4284 | 0 | { |
4285 | 0 | TransactionId rel_xid = XidFromFullTransactionId(rel); |
4286 | |
|
4287 | 0 | Assert(TransactionIdIsValid(xid)); |
4288 | 0 | Assert(TransactionIdIsValid(rel_xid)); |
4289 | | |
4290 | | /* not guaranteed to find issues, but likely to catch mistakes */ |
4291 | 0 | AssertTransactionIdInAllowableRange(xid); |
4292 | |
|
4293 | 0 | return FullTransactionIdFromU64(U64FromFullTransactionId(rel) |
4294 | 0 | + (int32) (xid - rel_xid)); |
4295 | 0 | } |
4296 | | |
4297 | | |
4298 | | /* ---------------------------------------------- |
4299 | | * KnownAssignedTransactionIds sub-module |
4300 | | * ---------------------------------------------- |
4301 | | */ |
4302 | | |
4303 | | /* |
4304 | | * In Hot Standby mode, we maintain a list of transactions that are (or were) |
4305 | | * running on the primary at the current point in WAL. These XIDs must be |
4306 | | * treated as running by standby transactions, even though they are not in |
4307 | | * the standby server's PGPROC array. |
4308 | | * |
4309 | | * We record all XIDs that we know have been assigned. That includes all the |
4310 | | * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have |
4311 | | * been assigned. We can deduce the existence of unobserved XIDs because we |
4312 | | * know XIDs are assigned in sequence, with no gaps. The KnownAssignedXids |
4313 | | * list expands as new XIDs are observed or inferred, and contracts when |
4314 | | * transaction completion records arrive. |
4315 | | * |
4316 | | * During hot standby we do not fret too much about the distinction between |
4317 | | * top-level XIDs and subtransaction XIDs. We store both together in the |
4318 | | * KnownAssignedXids list. In backends, this is copied into snapshots in |
4319 | | * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot() |
4320 | | * doesn't care about the distinction either. Subtransaction XIDs are |
4321 | | * effectively treated as top-level XIDs and in the typical case pg_subtrans |
4322 | | * links are *not* maintained (which does not affect visibility). |
4323 | | * |
4324 | | * We have room in KnownAssignedXids and in snapshots to hold maxProcs * |
4325 | | * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every primary transaction must |
4326 | | * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at |
4327 | | * least every PGPROC_MAX_CACHED_SUBXIDS. When we receive one of these |
4328 | | * records, we mark the subXIDs as children of the top XID in pg_subtrans, |
4329 | | * and then remove them from KnownAssignedXids. This prevents overflow of |
4330 | | * KnownAssignedXids and snapshots, at the cost that status checks for these |
4331 | | * subXIDs will take a slower path through TransactionIdIsInProgress(). |
4332 | | * This means that KnownAssignedXids is not necessarily complete for subXIDs, |
4333 | | * though it should be complete for top-level XIDs; this is the same situation |
4334 | | * that holds with respect to the PGPROC entries in normal running. |
4335 | | * |
4336 | | * When we throw away subXIDs from KnownAssignedXids, we need to keep track of |
4337 | | * that, similarly to tracking overflow of a PGPROC's subxids array. We do |
4338 | | * that by remembering the lastOverflowedXid, ie the last thrown-away subXID. |
4339 | | * As long as that is within the range of interesting XIDs, we have to assume |
4340 | | * that subXIDs are missing from snapshots. (Note that subXID overflow occurs |
4341 | | * on primary when 65th subXID arrives, whereas on standby it occurs when 64th |
4342 | | * subXID arrives - that is not an error.) |
4343 | | * |
4344 | | * Should a backend on primary somehow disappear before it can write an abort |
4345 | | * record, then we just leave those XIDs in KnownAssignedXids. They actually |
4346 | | * aborted but we think they were running; the distinction is irrelevant |
4347 | | * because either way any changes done by the transaction are not visible to |
4348 | | * backends in the standby. We prune KnownAssignedXids when |
4349 | | * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the |
4350 | | * array due to such dead XIDs. |
4351 | | */ |
4352 | | |
4353 | | /* |
4354 | | * RecordKnownAssignedTransactionIds |
4355 | | * Record the given XID in KnownAssignedXids, as well as any preceding |
4356 | | * unobserved XIDs. |
4357 | | * |
4358 | | * RecordKnownAssignedTransactionIds() should be run for *every* WAL record |
4359 | | * associated with a transaction. Must be called for each record after we |
4360 | | * have executed StartupCLOG() et al, since we must ExtendCLOG() etc.. |
4361 | | * |
4362 | | * Called during recovery in analogy with and in place of GetNewTransactionId() |
4363 | | */ |
4364 | | void |
4365 | | RecordKnownAssignedTransactionIds(TransactionId xid) |
4366 | 0 | { |
4367 | 0 | Assert(standbyState >= STANDBY_INITIALIZED); |
4368 | 0 | Assert(TransactionIdIsValid(xid)); |
4369 | 0 | Assert(TransactionIdIsValid(latestObservedXid)); |
4370 | |
|
4371 | 0 | elog(DEBUG4, "record known xact %u latestObservedXid %u", |
4372 | 0 | xid, latestObservedXid); |
4373 | | |
4374 | | /* |
4375 | | * When a newly observed xid arrives, it is frequently the case that it is |
4376 | | * *not* the next xid in sequence. When this occurs, we must treat the |
4377 | | * intervening xids as running also. |
4378 | | */ |
4379 | 0 | if (TransactionIdFollows(xid, latestObservedXid)) |
4380 | 0 | { |
4381 | 0 | TransactionId next_expected_xid; |
4382 | | |
4383 | | /* |
4384 | | * Extend subtrans like we do in GetNewTransactionId() during normal |
4385 | | * operation using individual extend steps. Note that we do not need |
4386 | | * to extend clog since its extensions are WAL logged. |
4387 | | * |
4388 | | * This part has to be done regardless of standbyState since we |
4389 | | * immediately start assigning subtransactions to their toplevel |
4390 | | * transactions. |
4391 | | */ |
4392 | 0 | next_expected_xid = latestObservedXid; |
4393 | 0 | while (TransactionIdPrecedes(next_expected_xid, xid)) |
4394 | 0 | { |
4395 | 0 | TransactionIdAdvance(next_expected_xid); |
4396 | 0 | ExtendSUBTRANS(next_expected_xid); |
4397 | 0 | } |
4398 | 0 | Assert(next_expected_xid == xid); |
4399 | | |
4400 | | /* |
4401 | | * If the KnownAssignedXids machinery isn't up yet, there's nothing |
4402 | | * more to do since we don't track assigned xids yet. |
4403 | | */ |
4404 | 0 | if (standbyState <= STANDBY_INITIALIZED) |
4405 | 0 | { |
4406 | 0 | latestObservedXid = xid; |
4407 | 0 | return; |
4408 | 0 | } |
4409 | | |
4410 | | /* |
4411 | | * Add (latestObservedXid, xid] onto the KnownAssignedXids array. |
4412 | | */ |
4413 | 0 | next_expected_xid = latestObservedXid; |
4414 | 0 | TransactionIdAdvance(next_expected_xid); |
4415 | 0 | KnownAssignedXidsAdd(next_expected_xid, xid, false); |
4416 | | |
4417 | | /* |
4418 | | * Now we can advance latestObservedXid |
4419 | | */ |
4420 | 0 | latestObservedXid = xid; |
4421 | | |
4422 | | /* TransamVariables->nextXid must be beyond any observed xid */ |
4423 | 0 | AdvanceNextFullTransactionIdPastXid(latestObservedXid); |
4424 | 0 | } |
4425 | 0 | } |
4426 | | |
4427 | | /* |
4428 | | * ExpireTreeKnownAssignedTransactionIds |
4429 | | * Remove the given XIDs from KnownAssignedXids. |
4430 | | * |
4431 | | * Called during recovery in analogy with and in place of ProcArrayEndTransaction() |
4432 | | */ |
4433 | | void |
4434 | | ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, |
4435 | | TransactionId *subxids, TransactionId max_xid) |
4436 | 0 | { |
4437 | 0 | Assert(standbyState >= STANDBY_INITIALIZED); |
4438 | | |
4439 | | /* |
4440 | | * Uses same locking as transaction commit |
4441 | | */ |
4442 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
4443 | |
|
4444 | 0 | KnownAssignedXidsRemoveTree(xid, nsubxids, subxids); |
4445 | | |
4446 | | /* As in ProcArrayEndTransaction, advance latestCompletedXid */ |
4447 | 0 | MaintainLatestCompletedXidRecovery(max_xid); |
4448 | | |
4449 | | /* ... and xactCompletionCount */ |
4450 | 0 | TransamVariables->xactCompletionCount++; |
4451 | |
|
4452 | 0 | LWLockRelease(ProcArrayLock); |
4453 | 0 | } |
4454 | | |
4455 | | /* |
4456 | | * ExpireAllKnownAssignedTransactionIds |
4457 | | * Remove all entries in KnownAssignedXids and reset lastOverflowedXid. |
4458 | | */ |
4459 | | void |
4460 | | ExpireAllKnownAssignedTransactionIds(void) |
4461 | 0 | { |
4462 | 0 | FullTransactionId latestXid; |
4463 | |
|
4464 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
4465 | 0 | KnownAssignedXidsRemovePreceding(InvalidTransactionId); |
4466 | | |
4467 | | /* Reset latestCompletedXid to nextXid - 1 */ |
4468 | 0 | Assert(FullTransactionIdIsValid(TransamVariables->nextXid)); |
4469 | 0 | latestXid = TransamVariables->nextXid; |
4470 | 0 | FullTransactionIdRetreat(&latestXid); |
4471 | 0 | TransamVariables->latestCompletedXid = latestXid; |
4472 | | |
4473 | | /* |
4474 | | * Any transactions that were in-progress were effectively aborted, so |
4475 | | * advance xactCompletionCount. |
4476 | | */ |
4477 | 0 | TransamVariables->xactCompletionCount++; |
4478 | | |
4479 | | /* |
4480 | | * Reset lastOverflowedXid. Currently, lastOverflowedXid has no use after |
4481 | | * the call of this function. But do this for unification with what |
4482 | | * ExpireOldKnownAssignedTransactionIds() do. |
4483 | | */ |
4484 | 0 | procArray->lastOverflowedXid = InvalidTransactionId; |
4485 | 0 | LWLockRelease(ProcArrayLock); |
4486 | 0 | } |
4487 | | |
4488 | | /* |
4489 | | * ExpireOldKnownAssignedTransactionIds |
4490 | | * Remove KnownAssignedXids entries preceding the given XID and |
4491 | | * potentially reset lastOverflowedXid. |
4492 | | */ |
4493 | | void |
4494 | | ExpireOldKnownAssignedTransactionIds(TransactionId xid) |
4495 | 0 | { |
4496 | 0 | TransactionId latestXid; |
4497 | |
|
4498 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
4499 | | |
4500 | | /* As in ProcArrayEndTransaction, advance latestCompletedXid */ |
4501 | 0 | latestXid = xid; |
4502 | 0 | TransactionIdRetreat(latestXid); |
4503 | 0 | MaintainLatestCompletedXidRecovery(latestXid); |
4504 | | |
4505 | | /* ... and xactCompletionCount */ |
4506 | 0 | TransamVariables->xactCompletionCount++; |
4507 | | |
4508 | | /* |
4509 | | * Reset lastOverflowedXid if we know all transactions that have been |
4510 | | * possibly running are being gone. Not doing so could cause an incorrect |
4511 | | * lastOverflowedXid value, which makes extra snapshots be marked as |
4512 | | * suboverflowed. |
4513 | | */ |
4514 | 0 | if (TransactionIdPrecedes(procArray->lastOverflowedXid, xid)) |
4515 | 0 | procArray->lastOverflowedXid = InvalidTransactionId; |
4516 | 0 | KnownAssignedXidsRemovePreceding(xid); |
4517 | 0 | LWLockRelease(ProcArrayLock); |
4518 | 0 | } |
4519 | | |
4520 | | /* |
4521 | | * KnownAssignedTransactionIdsIdleMaintenance |
4522 | | * Opportunistically do maintenance work when the startup process |
4523 | | * is about to go idle. |
4524 | | */ |
4525 | | void |
4526 | | KnownAssignedTransactionIdsIdleMaintenance(void) |
4527 | 0 | { |
4528 | 0 | KnownAssignedXidsCompress(KAX_STARTUP_PROCESS_IDLE, false); |
4529 | 0 | } |
4530 | | |
4531 | | |
4532 | | /* |
4533 | | * Private module functions to manipulate KnownAssignedXids |
4534 | | * |
4535 | | * There are 5 main uses of the KnownAssignedXids data structure: |
4536 | | * |
4537 | | * * backends taking snapshots - all valid XIDs need to be copied out |
4538 | | * * backends seeking to determine presence of a specific XID |
4539 | | * * startup process adding new known-assigned XIDs |
4540 | | * * startup process removing specific XIDs as transactions end |
4541 | | * * startup process pruning array when special WAL records arrive |
4542 | | * |
4543 | | * This data structure is known to be a hot spot during Hot Standby, so we |
4544 | | * go to some lengths to make these operations as efficient and as concurrent |
4545 | | * as possible. |
4546 | | * |
4547 | | * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes |
4548 | | * order, to be exact --- to allow binary search for specific XIDs. Note: |
4549 | | * in general TransactionIdPrecedes would not provide a total order, but |
4550 | | * we know that the entries present at any instant should not extend across |
4551 | | * a large enough fraction of XID space to wrap around (the primary would |
4552 | | * shut down for fear of XID wrap long before that happens). So it's OK to |
4553 | | * use TransactionIdPrecedes as a binary-search comparator. |
4554 | | * |
4555 | | * It's cheap to maintain the sortedness during insertions, since new known |
4556 | | * XIDs are always reported in XID order; we just append them at the right. |
4557 | | * |
4558 | | * To keep individual deletions cheap, we need to allow gaps in the array. |
4559 | | * This is implemented by marking array elements as valid or invalid using |
4560 | | * the parallel boolean array KnownAssignedXidsValid[]. A deletion is done |
4561 | | * by setting KnownAssignedXidsValid[i] to false, *without* clearing the |
4562 | | * XID entry itself. This preserves the property that the XID entries are |
4563 | | * sorted, so we can do binary searches easily. Periodically we compress |
4564 | | * out the unused entries; that's much cheaper than having to compress the |
4565 | | * array immediately on every deletion. |
4566 | | * |
4567 | | * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[] |
4568 | | * are those with indexes tail <= i < head; items outside this subscript range |
4569 | | * have unspecified contents. When head reaches the end of the array, we |
4570 | | * force compression of unused entries rather than wrapping around, since |
4571 | | * allowing wraparound would greatly complicate the search logic. We maintain |
4572 | | * an explicit tail pointer so that pruning of old XIDs can be done without |
4573 | | * immediately moving the array contents. In most cases only a small fraction |
4574 | | * of the array contains valid entries at any instant. |
4575 | | * |
4576 | | * Although only the startup process can ever change the KnownAssignedXids |
4577 | | * data structure, we still need interlocking so that standby backends will |
4578 | | * not observe invalid intermediate states. The convention is that backends |
4579 | | * must hold shared ProcArrayLock to examine the array. To remove XIDs from |
4580 | | * the array, the startup process must hold ProcArrayLock exclusively, for |
4581 | | * the usual transactional reasons (compare commit/abort of a transaction |
4582 | | * during normal running). Compressing unused entries out of the array |
4583 | | * likewise requires exclusive lock. To add XIDs to the array, we just insert |
4584 | | * them into slots to the right of the head pointer and then advance the head |
4585 | | * pointer. This doesn't require any lock at all, but on machines with weak |
4586 | | * memory ordering, we need to be careful that other processors see the array |
4587 | | * element changes before they see the head pointer change. We handle this by |
4588 | | * using memory barriers when reading or writing the head/tail pointers (unless |
4589 | | * the caller holds ProcArrayLock exclusively). |
4590 | | * |
4591 | | * Algorithmic analysis: |
4592 | | * |
4593 | | * If we have a maximum of M slots, with N XIDs currently spread across |
4594 | | * S elements then we have N <= S <= M always. |
4595 | | * |
4596 | | * * Adding a new XID is O(1) and needs no lock (unless compression must |
4597 | | * happen) |
4598 | | * * Compressing the array is O(S) and requires exclusive lock |
4599 | | * * Removing an XID is O(logS) and requires exclusive lock |
4600 | | * * Taking a snapshot is O(S) and requires shared lock |
4601 | | * * Checking for an XID is O(logS) and requires shared lock |
4602 | | * |
4603 | | * In comparison, using a hash table for KnownAssignedXids would mean that |
4604 | | * taking snapshots would be O(M). If we can maintain S << M then the |
4605 | | * sorted array technique will deliver significantly faster snapshots. |
4606 | | * If we try to keep S too small then we will spend too much time compressing, |
4607 | | * so there is an optimal point for any workload mix. We use a heuristic to |
4608 | | * decide when to compress the array, though trimming also helps reduce |
4609 | | * frequency of compressing. The heuristic requires us to track the number of |
4610 | | * currently valid XIDs in the array (N). Except in special cases, we'll |
4611 | | * compress when S >= 2N. Bounding S at 2N in turn bounds the time for |
4612 | | * taking a snapshot to be O(N), which it would have to be anyway. |
4613 | | */ |
4614 | | |
4615 | | |
4616 | | /* |
4617 | | * Compress KnownAssignedXids by shifting valid data down to the start of the |
4618 | | * array, removing any gaps. |
4619 | | * |
4620 | | * A compression step is forced if "reason" is KAX_NO_SPACE, otherwise |
4621 | | * we do it only if a heuristic indicates it's a good time to do it. |
4622 | | * |
4623 | | * Compression requires holding ProcArrayLock in exclusive mode. |
4624 | | * Caller must pass haveLock = true if it already holds the lock. |
4625 | | */ |
4626 | | static void |
4627 | | KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock) |
4628 | 0 | { |
4629 | 0 | ProcArrayStruct *pArray = procArray; |
4630 | 0 | int head, |
4631 | 0 | tail, |
4632 | 0 | nelements; |
4633 | 0 | int compress_index; |
4634 | 0 | int i; |
4635 | | |
4636 | | /* Counters for compression heuristics */ |
4637 | 0 | static unsigned int transactionEndsCounter; |
4638 | 0 | static TimestampTz lastCompressTs; |
4639 | | |
4640 | | /* Tuning constants */ |
4641 | 0 | #define KAX_COMPRESS_FREQUENCY 128 /* in transactions */ |
4642 | 0 | #define KAX_COMPRESS_IDLE_INTERVAL 1000 /* in ms */ |
4643 | | |
4644 | | /* |
4645 | | * Since only the startup process modifies the head/tail pointers, we |
4646 | | * don't need a lock to read them here. |
4647 | | */ |
4648 | 0 | head = pArray->headKnownAssignedXids; |
4649 | 0 | tail = pArray->tailKnownAssignedXids; |
4650 | 0 | nelements = head - tail; |
4651 | | |
4652 | | /* |
4653 | | * If we can choose whether to compress, use a heuristic to avoid |
4654 | | * compressing too often or not often enough. "Compress" here simply |
4655 | | * means moving the values to the beginning of the array, so it is not as |
4656 | | * complex or costly as typical data compression algorithms. |
4657 | | */ |
4658 | 0 | if (nelements == pArray->numKnownAssignedXids) |
4659 | 0 | { |
4660 | | /* |
4661 | | * When there are no gaps between head and tail, don't bother to |
4662 | | * compress, except in the KAX_NO_SPACE case where we must compress to |
4663 | | * create some space after the head. |
4664 | | */ |
4665 | 0 | if (reason != KAX_NO_SPACE) |
4666 | 0 | return; |
4667 | 0 | } |
4668 | 0 | else if (reason == KAX_TRANSACTION_END) |
4669 | 0 | { |
4670 | | /* |
4671 | | * Consider compressing only once every so many commits. Frequency |
4672 | | * determined by benchmarks. |
4673 | | */ |
4674 | 0 | if ((transactionEndsCounter++) % KAX_COMPRESS_FREQUENCY != 0) |
4675 | 0 | return; |
4676 | | |
4677 | | /* |
4678 | | * Furthermore, compress only if the used part of the array is less |
4679 | | * than 50% full (see comments above). |
4680 | | */ |
4681 | 0 | if (nelements < 2 * pArray->numKnownAssignedXids) |
4682 | 0 | return; |
4683 | 0 | } |
4684 | 0 | else if (reason == KAX_STARTUP_PROCESS_IDLE) |
4685 | 0 | { |
4686 | | /* |
4687 | | * We're about to go idle for lack of new WAL, so we might as well |
4688 | | * compress. But not too often, to avoid ProcArray lock contention |
4689 | | * with readers. |
4690 | | */ |
4691 | 0 | if (lastCompressTs != 0) |
4692 | 0 | { |
4693 | 0 | TimestampTz compress_after; |
4694 | |
|
4695 | 0 | compress_after = TimestampTzPlusMilliseconds(lastCompressTs, |
4696 | 0 | KAX_COMPRESS_IDLE_INTERVAL); |
4697 | 0 | if (GetCurrentTimestamp() < compress_after) |
4698 | 0 | return; |
4699 | 0 | } |
4700 | 0 | } |
4701 | | |
4702 | | /* Need to compress, so get the lock if we don't have it. */ |
4703 | 0 | if (!haveLock) |
4704 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
4705 | | |
4706 | | /* |
4707 | | * We compress the array by reading the valid values from tail to head, |
4708 | | * re-aligning data to 0th element. |
4709 | | */ |
4710 | 0 | compress_index = 0; |
4711 | 0 | for (i = tail; i < head; i++) |
4712 | 0 | { |
4713 | 0 | if (KnownAssignedXidsValid[i]) |
4714 | 0 | { |
4715 | 0 | KnownAssignedXids[compress_index] = KnownAssignedXids[i]; |
4716 | 0 | KnownAssignedXidsValid[compress_index] = true; |
4717 | 0 | compress_index++; |
4718 | 0 | } |
4719 | 0 | } |
4720 | 0 | Assert(compress_index == pArray->numKnownAssignedXids); |
4721 | |
|
4722 | 0 | pArray->tailKnownAssignedXids = 0; |
4723 | 0 | pArray->headKnownAssignedXids = compress_index; |
4724 | |
|
4725 | 0 | if (!haveLock) |
4726 | 0 | LWLockRelease(ProcArrayLock); |
4727 | | |
4728 | | /* Update timestamp for maintenance. No need to hold lock for this. */ |
4729 | 0 | lastCompressTs = GetCurrentTimestamp(); |
4730 | 0 | } |
4731 | | |
4732 | | /* |
4733 | | * Add xids into KnownAssignedXids at the head of the array. |
4734 | | * |
4735 | | * xids from from_xid to to_xid, inclusive, are added to the array. |
4736 | | * |
4737 | | * If exclusive_lock is true then caller already holds ProcArrayLock in |
4738 | | * exclusive mode, so we need no extra locking here. Else caller holds no |
4739 | | * lock, so we need to be sure we maintain sufficient interlocks against |
4740 | | * concurrent readers. (Only the startup process ever calls this, so no need |
4741 | | * to worry about concurrent writers.) |
4742 | | */ |
4743 | | static void |
4744 | | KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, |
4745 | | bool exclusive_lock) |
4746 | 0 | { |
4747 | 0 | ProcArrayStruct *pArray = procArray; |
4748 | 0 | TransactionId next_xid; |
4749 | 0 | int head, |
4750 | 0 | tail; |
4751 | 0 | int nxids; |
4752 | 0 | int i; |
4753 | |
|
4754 | 0 | Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid)); |
4755 | | |
4756 | | /* |
4757 | | * Calculate how many array slots we'll need. Normally this is cheap; in |
4758 | | * the unusual case where the XIDs cross the wrap point, we do it the hard |
4759 | | * way. |
4760 | | */ |
4761 | 0 | if (to_xid >= from_xid) |
4762 | 0 | nxids = to_xid - from_xid + 1; |
4763 | 0 | else |
4764 | 0 | { |
4765 | 0 | nxids = 1; |
4766 | 0 | next_xid = from_xid; |
4767 | 0 | while (TransactionIdPrecedes(next_xid, to_xid)) |
4768 | 0 | { |
4769 | 0 | nxids++; |
4770 | 0 | TransactionIdAdvance(next_xid); |
4771 | 0 | } |
4772 | 0 | } |
4773 | | |
4774 | | /* |
4775 | | * Since only the startup process modifies the head/tail pointers, we |
4776 | | * don't need a lock to read them here. |
4777 | | */ |
4778 | 0 | head = pArray->headKnownAssignedXids; |
4779 | 0 | tail = pArray->tailKnownAssignedXids; |
4780 | |
|
4781 | 0 | Assert(head >= 0 && head <= pArray->maxKnownAssignedXids); |
4782 | 0 | Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids); |
4783 | | |
4784 | | /* |
4785 | | * Verify that insertions occur in TransactionId sequence. Note that even |
4786 | | * if the last existing element is marked invalid, it must still have a |
4787 | | * correctly sequenced XID value. |
4788 | | */ |
4789 | 0 | if (head > tail && |
4790 | 0 | TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid)) |
4791 | 0 | { |
4792 | 0 | KnownAssignedXidsDisplay(LOG); |
4793 | 0 | elog(ERROR, "out-of-order XID insertion in KnownAssignedXids"); |
4794 | 0 | } |
4795 | | |
4796 | | /* |
4797 | | * If our xids won't fit in the remaining space, compress out free space |
4798 | | */ |
4799 | 0 | if (head + nxids > pArray->maxKnownAssignedXids) |
4800 | 0 | { |
4801 | 0 | KnownAssignedXidsCompress(KAX_NO_SPACE, exclusive_lock); |
4802 | |
|
4803 | 0 | head = pArray->headKnownAssignedXids; |
4804 | | /* note: we no longer care about the tail pointer */ |
4805 | | |
4806 | | /* |
4807 | | * If it still won't fit then we're out of memory |
4808 | | */ |
4809 | 0 | if (head + nxids > pArray->maxKnownAssignedXids) |
4810 | 0 | elog(ERROR, "too many KnownAssignedXids"); |
4811 | 0 | } |
4812 | | |
4813 | | /* Now we can insert the xids into the space starting at head */ |
4814 | 0 | next_xid = from_xid; |
4815 | 0 | for (i = 0; i < nxids; i++) |
4816 | 0 | { |
4817 | 0 | KnownAssignedXids[head] = next_xid; |
4818 | 0 | KnownAssignedXidsValid[head] = true; |
4819 | 0 | TransactionIdAdvance(next_xid); |
4820 | 0 | head++; |
4821 | 0 | } |
4822 | | |
4823 | | /* Adjust count of number of valid entries */ |
4824 | 0 | pArray->numKnownAssignedXids += nxids; |
4825 | | |
4826 | | /* |
4827 | | * Now update the head pointer. We use a write barrier to ensure that |
4828 | | * other processors see the above array updates before they see the head |
4829 | | * pointer change. The barrier isn't required if we're holding |
4830 | | * ProcArrayLock exclusively. |
4831 | | */ |
4832 | 0 | if (!exclusive_lock) |
4833 | 0 | pg_write_barrier(); |
4834 | |
|
4835 | 0 | pArray->headKnownAssignedXids = head; |
4836 | 0 | } |
4837 | | |
4838 | | /* |
4839 | | * KnownAssignedXidsSearch |
4840 | | * |
4841 | | * Searches KnownAssignedXids for a specific xid and optionally removes it. |
4842 | | * Returns true if it was found, false if not. |
4843 | | * |
4844 | | * Caller must hold ProcArrayLock in shared or exclusive mode. |
4845 | | * Exclusive lock must be held for remove = true. |
4846 | | */ |
4847 | | static bool |
4848 | | KnownAssignedXidsSearch(TransactionId xid, bool remove) |
4849 | 0 | { |
4850 | 0 | ProcArrayStruct *pArray = procArray; |
4851 | 0 | int first, |
4852 | 0 | last; |
4853 | 0 | int head; |
4854 | 0 | int tail; |
4855 | 0 | int result_index = -1; |
4856 | |
|
4857 | 0 | tail = pArray->tailKnownAssignedXids; |
4858 | 0 | head = pArray->headKnownAssignedXids; |
4859 | | |
4860 | | /* |
4861 | | * Only the startup process removes entries, so we don't need the read |
4862 | | * barrier in that case. |
4863 | | */ |
4864 | 0 | if (!remove) |
4865 | 0 | pg_read_barrier(); /* pairs with KnownAssignedXidsAdd */ |
4866 | | |
4867 | | /* |
4868 | | * Standard binary search. Note we can ignore the KnownAssignedXidsValid |
4869 | | * array here, since even invalid entries will contain sorted XIDs. |
4870 | | */ |
4871 | 0 | first = tail; |
4872 | 0 | last = head - 1; |
4873 | 0 | while (first <= last) |
4874 | 0 | { |
4875 | 0 | int mid_index; |
4876 | 0 | TransactionId mid_xid; |
4877 | |
|
4878 | 0 | mid_index = (first + last) / 2; |
4879 | 0 | mid_xid = KnownAssignedXids[mid_index]; |
4880 | |
|
4881 | 0 | if (xid == mid_xid) |
4882 | 0 | { |
4883 | 0 | result_index = mid_index; |
4884 | 0 | break; |
4885 | 0 | } |
4886 | 0 | else if (TransactionIdPrecedes(xid, mid_xid)) |
4887 | 0 | last = mid_index - 1; |
4888 | 0 | else |
4889 | 0 | first = mid_index + 1; |
4890 | 0 | } |
4891 | |
|
4892 | 0 | if (result_index < 0) |
4893 | 0 | return false; /* not in array */ |
4894 | | |
4895 | 0 | if (!KnownAssignedXidsValid[result_index]) |
4896 | 0 | return false; /* in array, but invalid */ |
4897 | | |
4898 | 0 | if (remove) |
4899 | 0 | { |
4900 | 0 | KnownAssignedXidsValid[result_index] = false; |
4901 | |
|
4902 | 0 | pArray->numKnownAssignedXids--; |
4903 | 0 | Assert(pArray->numKnownAssignedXids >= 0); |
4904 | | |
4905 | | /* |
4906 | | * If we're removing the tail element then advance tail pointer over |
4907 | | * any invalid elements. This will speed future searches. |
4908 | | */ |
4909 | 0 | if (result_index == tail) |
4910 | 0 | { |
4911 | 0 | tail++; |
4912 | 0 | while (tail < head && !KnownAssignedXidsValid[tail]) |
4913 | 0 | tail++; |
4914 | 0 | if (tail >= head) |
4915 | 0 | { |
4916 | | /* Array is empty, so we can reset both pointers */ |
4917 | 0 | pArray->headKnownAssignedXids = 0; |
4918 | 0 | pArray->tailKnownAssignedXids = 0; |
4919 | 0 | } |
4920 | 0 | else |
4921 | 0 | { |
4922 | 0 | pArray->tailKnownAssignedXids = tail; |
4923 | 0 | } |
4924 | 0 | } |
4925 | 0 | } |
4926 | |
|
4927 | 0 | return true; |
4928 | 0 | } |
4929 | | |
4930 | | /* |
4931 | | * Is the specified XID present in KnownAssignedXids[]? |
4932 | | * |
4933 | | * Caller must hold ProcArrayLock in shared or exclusive mode. |
4934 | | */ |
4935 | | static bool |
4936 | | KnownAssignedXidExists(TransactionId xid) |
4937 | 0 | { |
4938 | 0 | Assert(TransactionIdIsValid(xid)); |
4939 | |
|
4940 | 0 | return KnownAssignedXidsSearch(xid, false); |
4941 | 0 | } |
4942 | | |
4943 | | /* |
4944 | | * Remove the specified XID from KnownAssignedXids[]. |
4945 | | * |
4946 | | * Caller must hold ProcArrayLock in exclusive mode. |
4947 | | */ |
4948 | | static void |
4949 | | KnownAssignedXidsRemove(TransactionId xid) |
4950 | | { |
4951 | | Assert(TransactionIdIsValid(xid)); |
4952 | | |
4953 | | elog(DEBUG4, "remove KnownAssignedXid %u", xid); |
4954 | | |
4955 | | /* |
4956 | | * Note: we cannot consider it an error to remove an XID that's not |
4957 | | * present. We intentionally remove subxact IDs while processing |
4958 | | * XLOG_XACT_ASSIGNMENT, to avoid array overflow. Then those XIDs will be |
4959 | | * removed again when the top-level xact commits or aborts. |
4960 | | * |
4961 | | * It might be possible to track such XIDs to distinguish this case from |
4962 | | * actual errors, but it would be complicated and probably not worth it. |
4963 | | * So, just ignore the search result. |
4964 | | */ |
4965 | | (void) KnownAssignedXidsSearch(xid, true); |
4966 | | } |
4967 | | |
4968 | | /* |
4969 | | * KnownAssignedXidsRemoveTree |
4970 | | * Remove xid (if it's not InvalidTransactionId) and all the subxids. |
4971 | | * |
4972 | | * Caller must hold ProcArrayLock in exclusive mode. |
4973 | | */ |
4974 | | static void |
4975 | | KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, |
4976 | | TransactionId *subxids) |
4977 | 0 | { |
4978 | 0 | int i; |
4979 | |
|
4980 | 0 | if (TransactionIdIsValid(xid)) |
4981 | 0 | KnownAssignedXidsRemove(xid); |
4982 | |
|
4983 | 0 | for (i = 0; i < nsubxids; i++) |
4984 | 0 | KnownAssignedXidsRemove(subxids[i]); |
4985 | | |
4986 | | /* Opportunistically compress the array */ |
4987 | 0 | KnownAssignedXidsCompress(KAX_TRANSACTION_END, true); |
4988 | 0 | } |
4989 | | |
4990 | | /* |
4991 | | * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid |
4992 | | * then clear the whole table. |
4993 | | * |
4994 | | * Caller must hold ProcArrayLock in exclusive mode. |
4995 | | */ |
4996 | | static void |
4997 | | KnownAssignedXidsRemovePreceding(TransactionId removeXid) |
4998 | 0 | { |
4999 | 0 | ProcArrayStruct *pArray = procArray; |
5000 | 0 | int count = 0; |
5001 | 0 | int head, |
5002 | 0 | tail, |
5003 | 0 | i; |
5004 | |
|
5005 | 0 | if (!TransactionIdIsValid(removeXid)) |
5006 | 0 | { |
5007 | 0 | elog(DEBUG4, "removing all KnownAssignedXids"); |
5008 | 0 | pArray->numKnownAssignedXids = 0; |
5009 | 0 | pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0; |
5010 | 0 | return; |
5011 | 0 | } |
5012 | | |
5013 | 0 | elog(DEBUG4, "prune KnownAssignedXids to %u", removeXid); |
5014 | | |
5015 | | /* |
5016 | | * Mark entries invalid starting at the tail. Since array is sorted, we |
5017 | | * can stop as soon as we reach an entry >= removeXid. |
5018 | | */ |
5019 | 0 | tail = pArray->tailKnownAssignedXids; |
5020 | 0 | head = pArray->headKnownAssignedXids; |
5021 | |
|
5022 | 0 | for (i = tail; i < head; i++) |
5023 | 0 | { |
5024 | 0 | if (KnownAssignedXidsValid[i]) |
5025 | 0 | { |
5026 | 0 | TransactionId knownXid = KnownAssignedXids[i]; |
5027 | |
|
5028 | 0 | if (TransactionIdFollowsOrEquals(knownXid, removeXid)) |
5029 | 0 | break; |
5030 | | |
5031 | 0 | if (!StandbyTransactionIdIsPrepared(knownXid)) |
5032 | 0 | { |
5033 | 0 | KnownAssignedXidsValid[i] = false; |
5034 | 0 | count++; |
5035 | 0 | } |
5036 | 0 | } |
5037 | 0 | } |
5038 | |
|
5039 | 0 | pArray->numKnownAssignedXids -= count; |
5040 | 0 | Assert(pArray->numKnownAssignedXids >= 0); |
5041 | | |
5042 | | /* |
5043 | | * Advance the tail pointer if we've marked the tail item invalid. |
5044 | | */ |
5045 | 0 | for (i = tail; i < head; i++) |
5046 | 0 | { |
5047 | 0 | if (KnownAssignedXidsValid[i]) |
5048 | 0 | break; |
5049 | 0 | } |
5050 | 0 | if (i >= head) |
5051 | 0 | { |
5052 | | /* Array is empty, so we can reset both pointers */ |
5053 | 0 | pArray->headKnownAssignedXids = 0; |
5054 | 0 | pArray->tailKnownAssignedXids = 0; |
5055 | 0 | } |
5056 | 0 | else |
5057 | 0 | { |
5058 | 0 | pArray->tailKnownAssignedXids = i; |
5059 | 0 | } |
5060 | | |
5061 | | /* Opportunistically compress the array */ |
5062 | 0 | KnownAssignedXidsCompress(KAX_PRUNE, true); |
5063 | 0 | } |
5064 | | |
5065 | | /* |
5066 | | * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids. |
5067 | | * We filter out anything >= xmax. |
5068 | | * |
5069 | | * Returns the number of XIDs stored into xarray[]. Caller is responsible |
5070 | | * that array is large enough. |
5071 | | * |
5072 | | * Caller must hold ProcArrayLock in (at least) shared mode. |
5073 | | */ |
5074 | | static int |
5075 | | KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax) |
5076 | 0 | { |
5077 | 0 | TransactionId xtmp = InvalidTransactionId; |
5078 | |
|
5079 | 0 | return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax); |
5080 | 0 | } |
5081 | | |
5082 | | /* |
5083 | | * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus |
5084 | | * we reduce *xmin to the lowest xid value seen if not already lower. |
5085 | | * |
5086 | | * Caller must hold ProcArrayLock in (at least) shared mode. |
5087 | | */ |
5088 | | static int |
5089 | | KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, |
5090 | | TransactionId xmax) |
5091 | 0 | { |
5092 | 0 | int count = 0; |
5093 | 0 | int head, |
5094 | 0 | tail; |
5095 | 0 | int i; |
5096 | | |
5097 | | /* |
5098 | | * Fetch head just once, since it may change while we loop. We can stop |
5099 | | * once we reach the initially seen head, since we are certain that an xid |
5100 | | * cannot enter and then leave the array while we hold ProcArrayLock. We |
5101 | | * might miss newly-added xids, but they should be >= xmax so irrelevant |
5102 | | * anyway. |
5103 | | */ |
5104 | 0 | tail = procArray->tailKnownAssignedXids; |
5105 | 0 | head = procArray->headKnownAssignedXids; |
5106 | |
|
5107 | 0 | pg_read_barrier(); /* pairs with KnownAssignedXidsAdd */ |
5108 | |
|
5109 | 0 | for (i = tail; i < head; i++) |
5110 | 0 | { |
5111 | | /* Skip any gaps in the array */ |
5112 | 0 | if (KnownAssignedXidsValid[i]) |
5113 | 0 | { |
5114 | 0 | TransactionId knownXid = KnownAssignedXids[i]; |
5115 | | |
5116 | | /* |
5117 | | * Update xmin if required. Only the first XID need be checked, |
5118 | | * since the array is sorted. |
5119 | | */ |
5120 | 0 | if (count == 0 && |
5121 | 0 | TransactionIdPrecedes(knownXid, *xmin)) |
5122 | 0 | *xmin = knownXid; |
5123 | | |
5124 | | /* |
5125 | | * Filter out anything >= xmax, again relying on sorted property |
5126 | | * of array. |
5127 | | */ |
5128 | 0 | if (TransactionIdIsValid(xmax) && |
5129 | 0 | TransactionIdFollowsOrEquals(knownXid, xmax)) |
5130 | 0 | break; |
5131 | | |
5132 | | /* Add knownXid into output array */ |
5133 | 0 | xarray[count++] = knownXid; |
5134 | 0 | } |
5135 | 0 | } |
5136 | |
|
5137 | 0 | return count; |
5138 | 0 | } |
5139 | | |
5140 | | /* |
5141 | | * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId |
5142 | | * if nothing there. |
5143 | | */ |
5144 | | static TransactionId |
5145 | | KnownAssignedXidsGetOldestXmin(void) |
5146 | 0 | { |
5147 | 0 | int head, |
5148 | 0 | tail; |
5149 | 0 | int i; |
5150 | | |
5151 | | /* |
5152 | | * Fetch head just once, since it may change while we loop. |
5153 | | */ |
5154 | 0 | tail = procArray->tailKnownAssignedXids; |
5155 | 0 | head = procArray->headKnownAssignedXids; |
5156 | |
|
5157 | 0 | pg_read_barrier(); /* pairs with KnownAssignedXidsAdd */ |
5158 | |
|
5159 | 0 | for (i = tail; i < head; i++) |
5160 | 0 | { |
5161 | | /* Skip any gaps in the array */ |
5162 | 0 | if (KnownAssignedXidsValid[i]) |
5163 | 0 | return KnownAssignedXids[i]; |
5164 | 0 | } |
5165 | | |
5166 | 0 | return InvalidTransactionId; |
5167 | 0 | } |
5168 | | |
5169 | | /* |
5170 | | * Display KnownAssignedXids to provide debug trail |
5171 | | * |
5172 | | * Currently this is only called within startup process, so we need no |
5173 | | * special locking. |
5174 | | * |
5175 | | * Note this is pretty expensive, and much of the expense will be incurred |
5176 | | * even if the elog message will get discarded. It's not currently called |
5177 | | * in any performance-critical places, however, so no need to be tenser. |
5178 | | */ |
5179 | | static void |
5180 | | KnownAssignedXidsDisplay(int trace_level) |
5181 | 0 | { |
5182 | 0 | ProcArrayStruct *pArray = procArray; |
5183 | 0 | StringInfoData buf; |
5184 | 0 | int head, |
5185 | 0 | tail, |
5186 | 0 | i; |
5187 | 0 | int nxids = 0; |
5188 | |
|
5189 | 0 | tail = pArray->tailKnownAssignedXids; |
5190 | 0 | head = pArray->headKnownAssignedXids; |
5191 | |
|
5192 | 0 | initStringInfo(&buf); |
5193 | |
|
5194 | 0 | for (i = tail; i < head; i++) |
5195 | 0 | { |
5196 | 0 | if (KnownAssignedXidsValid[i]) |
5197 | 0 | { |
5198 | 0 | nxids++; |
5199 | 0 | appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]); |
5200 | 0 | } |
5201 | 0 | } |
5202 | |
|
5203 | 0 | elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s", |
5204 | 0 | nxids, |
5205 | 0 | pArray->numKnownAssignedXids, |
5206 | 0 | pArray->tailKnownAssignedXids, |
5207 | 0 | pArray->headKnownAssignedXids, |
5208 | 0 | buf.data); |
5209 | | |
5210 | 0 | pfree(buf.data); |
5211 | 0 | } |
5212 | | |
5213 | | /* |
5214 | | * KnownAssignedXidsReset |
5215 | | * Resets KnownAssignedXids to be empty |
5216 | | */ |
5217 | | static void |
5218 | | KnownAssignedXidsReset(void) |
5219 | 0 | { |
5220 | 0 | ProcArrayStruct *pArray = procArray; |
5221 | |
|
5222 | 0 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
5223 | |
|
5224 | 0 | pArray->numKnownAssignedXids = 0; |
5225 | 0 | pArray->tailKnownAssignedXids = 0; |
5226 | 0 | pArray->headKnownAssignedXids = 0; |
5227 | |
|
5228 | 0 | LWLockRelease(ProcArrayLock); |
5229 | 0 | } |