/src/postgres/src/backend/utils/time/snapmgr.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * snapmgr.c |
4 | | * PostgreSQL snapshot manager |
5 | | * |
6 | | * The following functions return an MVCC snapshot that can be used in tuple |
7 | | * visibility checks: |
8 | | * |
9 | | * - GetTransactionSnapshot |
10 | | * - GetLatestSnapshot |
11 | | * - GetCatalogSnapshot |
12 | | * - GetNonHistoricCatalogSnapshot |
13 | | * |
14 | | * Each of these functions returns a reference to a statically allocated |
15 | | * snapshot. The statically allocated snapshot is subject to change on any |
16 | | * snapshot-related function call, and should not be used directly. Instead, |
17 | | * call PushActiveSnapshot() or RegisterSnapshot() to create a longer-lived |
18 | | * copy and use that. |
19 | | * |
20 | | * We keep track of snapshots in two ways: those "registered" by resowner.c, |
21 | | * and the "active snapshot" stack. All snapshots in either of them live in |
22 | | * persistent memory. When a snapshot is no longer in any of these lists |
23 | | * (tracked by separate refcounts on each snapshot), its memory can be freed. |
24 | | * |
25 | | * In addition to the above-mentioned MVCC snapshots, there are some special |
26 | | * snapshots like SnapshotSelf, SnapshotAny, and "dirty" snapshots. They can |
27 | | * only be used in limited contexts and cannot be registered or pushed to the |
28 | | * active stack. |
29 | | * |
30 | | * ActiveSnapshot stack |
31 | | * -------------------- |
32 | | * |
33 | | * Most visibility checks use the current "active snapshot" returned by |
34 | | * GetActiveSnapshot(). When running normal queries, the active snapshot is |
35 | | * set when query execution begins based on the transaction isolation level. |
36 | | * |
37 | | * The active snapshot is tracked in a stack so that the currently active one |
38 | | * is at the top of the stack. It mirrors the process call stack: whenever we |
39 | | * recurse or switch context to fetch rows from a different portal for |
40 | | * example, the appropriate snapshot is pushed to become the active snapshot, |
41 | | * and popped on return. Once upon a time, ActiveSnapshot was just a global |
42 | | * variable that was saved and restored similar to CurrentMemoryContext, but |
43 | | * nowadays it's managed as a separate data structure so that we can keep |
44 | | * track of which snapshots are in use and reset MyProc->xmin when there is no |
45 | | * active snapshot. |
46 | | * |
47 | | * However, there are a couple of exceptions where the active snapshot stack |
48 | | * does not strictly mirror the call stack: |
49 | | * |
50 | | * - VACUUM and a few other utility commands manage their own transactions, |
51 | | * which take their own snapshots. They are called with an active snapshot |
52 | | * set, like most utility commands, but they pop the active snapshot that |
53 | | * was pushed by the caller. PortalRunUtility knows about the possibility |
54 | | * that the snapshot it pushed is no longer active on return. |
55 | | * |
56 | | * - When COMMIT or ROLLBACK is executed within a procedure or DO-block, the |
57 | | * active snapshot stack is destroyed, and re-established later when |
58 | | * subsequent statements in the procedure are executed. There are many |
59 | | * limitations on when in-procedure COMMIT/ROLLBACK is allowed; one such |
60 | | * limitation is that all the snapshots on the active snapshot stack are |
61 | | * known to portals that are being executed, which makes it safe to reset |
62 | | * the stack. See EnsurePortalSnapshotExists(). |
63 | | * |
64 | | * Registered snapshots |
65 | | * -------------------- |
66 | | * |
67 | | * In addition to snapshots pushed to the active snapshot stack, a snapshot |
68 | | * can be registered with a resource owner. |
69 | | * |
70 | | * The FirstXactSnapshot, if any, is treated a bit specially: we increment its |
71 | | * regd_count and list it in RegisteredSnapshots, but this reference is not |
72 | | * tracked by a resource owner. We used to use the TopTransactionResourceOwner |
73 | | * to track this snapshot reference, but that introduces logical circularity |
74 | | * and thus makes it impossible to clean up in a sane fashion. It's better to |
75 | | * handle this reference as an internally-tracked registration, so that this |
76 | | * module is entirely lower-level than ResourceOwners. |
77 | | * |
78 | | * Likewise, any snapshots that have been exported by pg_export_snapshot |
79 | | * have regd_count = 1 and are listed in RegisteredSnapshots, but are not |
80 | | * tracked by any resource owner. |
81 | | * |
82 | | * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it |
83 | | * is valid, but is not tracked by any resource owner. |
84 | | * |
85 | | * The same is true for historic snapshots used during logical decoding, |
86 | | * their lifetime is managed separately (as they live longer than one xact.c |
87 | | * transaction). |
88 | | * |
89 | | * These arrangements let us reset MyProc->xmin when there are no snapshots |
90 | | * referenced by this transaction, and advance it when the one with oldest |
91 | | * Xmin is no longer referenced. For simplicity however, only registered |
92 | | * snapshots not active snapshots participate in tracking which one is oldest; |
93 | | * we don't try to change MyProc->xmin except when the active-snapshot |
94 | | * stack is empty. |
95 | | * |
96 | | * |
97 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
98 | | * Portions Copyright (c) 1994, Regents of the University of California |
99 | | * |
100 | | * IDENTIFICATION |
101 | | * src/backend/utils/time/snapmgr.c |
102 | | * |
103 | | *------------------------------------------------------------------------- |
104 | | */ |
105 | | #include "postgres.h" |
106 | | |
107 | | #include <sys/stat.h> |
108 | | #include <unistd.h> |
109 | | |
110 | | #include "access/subtrans.h" |
111 | | #include "access/transam.h" |
112 | | #include "access/xact.h" |
113 | | #include "datatype/timestamp.h" |
114 | | #include "lib/pairingheap.h" |
115 | | #include "miscadmin.h" |
116 | | #include "port/pg_lfind.h" |
117 | | #include "storage/fd.h" |
118 | | #include "storage/predicate.h" |
119 | | #include "storage/proc.h" |
120 | | #include "storage/procarray.h" |
121 | | #include "utils/builtins.h" |
122 | | #include "utils/memutils.h" |
123 | | #include "utils/resowner.h" |
124 | | #include "utils/snapmgr.h" |
125 | | #include "utils/syscache.h" |
126 | | |
127 | | |
128 | | /* |
129 | | * CurrentSnapshot points to the only snapshot taken in transaction-snapshot |
130 | | * mode, and to the latest one taken in a read-committed transaction. |
131 | | * SecondarySnapshot is a snapshot that's always up-to-date as of the current |
132 | | * instant, even in transaction-snapshot mode. It should only be used for |
133 | | * special-purpose code (say, RI checking.) CatalogSnapshot points to an |
134 | | * MVCC snapshot intended to be used for catalog scans; we must invalidate it |
135 | | * whenever a system catalog change occurs. |
136 | | * |
137 | | * These SnapshotData structs are static to simplify memory allocation |
138 | | * (see the hack in GetSnapshotData to avoid repeated malloc/free). |
139 | | */ |
140 | | static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC}; |
141 | | static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC}; |
142 | | static SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC}; |
143 | | SnapshotData SnapshotSelfData = {SNAPSHOT_SELF}; |
144 | | SnapshotData SnapshotAnyData = {SNAPSHOT_ANY}; |
145 | | SnapshotData SnapshotToastData = {SNAPSHOT_TOAST}; |
146 | | |
147 | | /* Pointers to valid snapshots */ |
148 | | static Snapshot CurrentSnapshot = NULL; |
149 | | static Snapshot SecondarySnapshot = NULL; |
150 | | static Snapshot CatalogSnapshot = NULL; |
151 | | static Snapshot HistoricSnapshot = NULL; |
152 | | |
153 | | /* |
154 | | * These are updated by GetSnapshotData. We initialize them this way |
155 | | * for the convenience of TransactionIdIsInProgress: even in bootstrap |
156 | | * mode, we don't want it to say that BootstrapTransactionId is in progress. |
157 | | */ |
158 | | TransactionId TransactionXmin = FirstNormalTransactionId; |
159 | | TransactionId RecentXmin = FirstNormalTransactionId; |
160 | | |
161 | | /* (table, ctid) => (cmin, cmax) mapping during timetravel */ |
162 | | static HTAB *tuplecid_data = NULL; |
163 | | |
164 | | /* |
165 | | * Elements of the active snapshot stack. |
166 | | * |
167 | | * Each element here accounts for exactly one active_count on SnapshotData. |
168 | | * |
169 | | * NB: the code assumes that elements in this list are in non-increasing |
170 | | * order of as_level; also, the list must be NULL-terminated. |
171 | | */ |
172 | | typedef struct ActiveSnapshotElt |
173 | | { |
174 | | Snapshot as_snap; |
175 | | int as_level; |
176 | | struct ActiveSnapshotElt *as_next; |
177 | | } ActiveSnapshotElt; |
178 | | |
179 | | /* Top of the stack of active snapshots */ |
180 | | static ActiveSnapshotElt *ActiveSnapshot = NULL; |
181 | | |
182 | | /* |
183 | | * Currently registered Snapshots. Ordered in a heap by xmin, so that we can |
184 | | * quickly find the one with lowest xmin, to advance our MyProc->xmin. |
185 | | */ |
186 | | static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, |
187 | | void *arg); |
188 | | |
189 | | static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL}; |
190 | | |
191 | | /* first GetTransactionSnapshot call in a transaction? */ |
192 | | bool FirstSnapshotSet = false; |
193 | | |
194 | | /* |
195 | | * Remember the serializable transaction snapshot, if any. We cannot trust |
196 | | * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because |
197 | | * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot. |
198 | | */ |
199 | | static Snapshot FirstXactSnapshot = NULL; |
200 | | |
201 | | /* Define pathname of exported-snapshot files */ |
202 | 0 | #define SNAPSHOT_EXPORT_DIR "pg_snapshots" |
203 | | |
204 | | /* Structure holding info about exported snapshot. */ |
205 | | typedef struct ExportedSnapshot |
206 | | { |
207 | | char *snapfile; |
208 | | Snapshot snapshot; |
209 | | } ExportedSnapshot; |
210 | | |
211 | | /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */ |
212 | | static List *exportedSnapshots = NIL; |
213 | | |
214 | | /* Prototypes for local functions */ |
215 | | static Snapshot CopySnapshot(Snapshot snapshot); |
216 | | static void UnregisterSnapshotNoOwner(Snapshot snapshot); |
217 | | static void FreeSnapshot(Snapshot snapshot); |
218 | | static void SnapshotResetXmin(void); |
219 | | |
220 | | /* ResourceOwner callbacks to track snapshot references */ |
221 | | static void ResOwnerReleaseSnapshot(Datum res); |
222 | | |
223 | | static const ResourceOwnerDesc snapshot_resowner_desc = |
224 | | { |
225 | | .name = "snapshot reference", |
226 | | .release_phase = RESOURCE_RELEASE_AFTER_LOCKS, |
227 | | .release_priority = RELEASE_PRIO_SNAPSHOT_REFS, |
228 | | .ReleaseResource = ResOwnerReleaseSnapshot, |
229 | | .DebugPrint = NULL /* the default message is fine */ |
230 | | }; |
231 | | |
232 | | /* Convenience wrappers over ResourceOwnerRemember/Forget */ |
233 | | static inline void |
234 | | ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snap) |
235 | 0 | { |
236 | 0 | ResourceOwnerRemember(owner, PointerGetDatum(snap), &snapshot_resowner_desc); |
237 | 0 | } |
238 | | static inline void |
239 | | ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snap) |
240 | 0 | { |
241 | 0 | ResourceOwnerForget(owner, PointerGetDatum(snap), &snapshot_resowner_desc); |
242 | 0 | } |
243 | | |
244 | | /* |
245 | | * Snapshot fields to be serialized. |
246 | | * |
247 | | * Only these fields need to be sent to the cooperating backend; the |
248 | | * remaining ones can (and must) be set by the receiver upon restore. |
249 | | */ |
250 | | typedef struct SerializedSnapshotData |
251 | | { |
252 | | TransactionId xmin; |
253 | | TransactionId xmax; |
254 | | uint32 xcnt; |
255 | | int32 subxcnt; |
256 | | bool suboverflowed; |
257 | | bool takenDuringRecovery; |
258 | | CommandId curcid; |
259 | | } SerializedSnapshotData; |
260 | | |
261 | | /* |
262 | | * GetTransactionSnapshot |
263 | | * Get the appropriate snapshot for a new query in a transaction. |
264 | | * |
265 | | * Note that the return value points at static storage that will be modified |
266 | | * by future calls and by CommandCounterIncrement(). Callers must call |
267 | | * RegisterSnapshot or PushActiveSnapshot on the returned snap before doing |
268 | | * any other non-trivial work that could invalidate it. |
269 | | */ |
270 | | Snapshot |
271 | | GetTransactionSnapshot(void) |
272 | 0 | { |
273 | | /* |
274 | | * Return historic snapshot if doing logical decoding. |
275 | | * |
276 | | * Historic snapshots are only usable for catalog access, not for |
277 | | * general-purpose queries. The caller is responsible for ensuring that |
278 | | * the snapshot is used correctly! (PostgreSQL code never calls this |
279 | | * during logical decoding, but extensions can do it.) |
280 | | */ |
281 | 0 | if (HistoricSnapshotActive()) |
282 | 0 | { |
283 | | /* |
284 | | * We'll never need a non-historic transaction snapshot in this |
285 | | * (sub-)transaction, so there's no need to be careful to set one up |
286 | | * for later calls to GetTransactionSnapshot(). |
287 | | */ |
288 | 0 | Assert(!FirstSnapshotSet); |
289 | 0 | return HistoricSnapshot; |
290 | 0 | } |
291 | | |
292 | | /* First call in transaction? */ |
293 | 0 | if (!FirstSnapshotSet) |
294 | 0 | { |
295 | | /* |
296 | | * Don't allow catalog snapshot to be older than xact snapshot. Must |
297 | | * do this first to allow the empty-heap Assert to succeed. |
298 | | */ |
299 | 0 | InvalidateCatalogSnapshot(); |
300 | |
|
301 | 0 | Assert(pairingheap_is_empty(&RegisteredSnapshots)); |
302 | 0 | Assert(FirstXactSnapshot == NULL); |
303 | |
|
304 | 0 | if (IsInParallelMode()) |
305 | 0 | elog(ERROR, |
306 | 0 | "cannot take query snapshot during a parallel operation"); |
307 | | |
308 | | /* |
309 | | * In transaction-snapshot mode, the first snapshot must live until |
310 | | * end of xact regardless of what the caller does with it, so we must |
311 | | * make a copy of it rather than returning CurrentSnapshotData |
312 | | * directly. Furthermore, if we're running in serializable mode, |
313 | | * predicate.c needs to wrap the snapshot fetch in its own processing. |
314 | | */ |
315 | 0 | if (IsolationUsesXactSnapshot()) |
316 | 0 | { |
317 | | /* First, create the snapshot in CurrentSnapshotData */ |
318 | 0 | if (IsolationIsSerializable()) |
319 | 0 | CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData); |
320 | 0 | else |
321 | 0 | CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); |
322 | | /* Make a saved copy */ |
323 | 0 | CurrentSnapshot = CopySnapshot(CurrentSnapshot); |
324 | 0 | FirstXactSnapshot = CurrentSnapshot; |
325 | | /* Mark it as "registered" in FirstXactSnapshot */ |
326 | 0 | FirstXactSnapshot->regd_count++; |
327 | 0 | pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); |
328 | 0 | } |
329 | 0 | else |
330 | 0 | CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); |
331 | |
|
332 | 0 | FirstSnapshotSet = true; |
333 | 0 | return CurrentSnapshot; |
334 | 0 | } |
335 | | |
336 | 0 | if (IsolationUsesXactSnapshot()) |
337 | 0 | return CurrentSnapshot; |
338 | | |
339 | | /* Don't allow catalog snapshot to be older than xact snapshot. */ |
340 | 0 | InvalidateCatalogSnapshot(); |
341 | |
|
342 | 0 | CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); |
343 | |
|
344 | 0 | return CurrentSnapshot; |
345 | 0 | } |
346 | | |
347 | | /* |
348 | | * GetLatestSnapshot |
349 | | * Get a snapshot that is up-to-date as of the current instant, |
350 | | * even if we are executing in transaction-snapshot mode. |
351 | | */ |
352 | | Snapshot |
353 | | GetLatestSnapshot(void) |
354 | 0 | { |
355 | | /* |
356 | | * We might be able to relax this, but nothing that could otherwise work |
357 | | * needs it. |
358 | | */ |
359 | 0 | if (IsInParallelMode()) |
360 | 0 | elog(ERROR, |
361 | 0 | "cannot update SecondarySnapshot during a parallel operation"); |
362 | | |
363 | | /* |
364 | | * So far there are no cases requiring support for GetLatestSnapshot() |
365 | | * during logical decoding, but it wouldn't be hard to add if required. |
366 | | */ |
367 | 0 | Assert(!HistoricSnapshotActive()); |
368 | | |
369 | | /* If first call in transaction, go ahead and set the xact snapshot */ |
370 | 0 | if (!FirstSnapshotSet) |
371 | 0 | return GetTransactionSnapshot(); |
372 | | |
373 | 0 | SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData); |
374 | |
|
375 | 0 | return SecondarySnapshot; |
376 | 0 | } |
377 | | |
378 | | /* |
379 | | * GetCatalogSnapshot |
380 | | * Get a snapshot that is sufficiently up-to-date for scan of the |
381 | | * system catalog with the specified OID. |
382 | | */ |
383 | | Snapshot |
384 | | GetCatalogSnapshot(Oid relid) |
385 | 0 | { |
386 | | /* |
387 | | * Return historic snapshot while we're doing logical decoding, so we can |
388 | | * see the appropriate state of the catalog. |
389 | | * |
390 | | * This is the primary reason for needing to reset the system caches after |
391 | | * finishing decoding. |
392 | | */ |
393 | 0 | if (HistoricSnapshotActive()) |
394 | 0 | return HistoricSnapshot; |
395 | | |
396 | 0 | return GetNonHistoricCatalogSnapshot(relid); |
397 | 0 | } |
398 | | |
399 | | /* |
400 | | * GetNonHistoricCatalogSnapshot |
401 | | * Get a snapshot that is sufficiently up-to-date for scan of the system |
402 | | * catalog with the specified OID, even while historic snapshots are set |
403 | | * up. |
404 | | */ |
405 | | Snapshot |
406 | | GetNonHistoricCatalogSnapshot(Oid relid) |
407 | 0 | { |
408 | | /* |
409 | | * If the caller is trying to scan a relation that has no syscache, no |
410 | | * catcache invalidations will be sent when it is updated. For a few key |
411 | | * relations, snapshot invalidations are sent instead. If we're trying to |
412 | | * scan a relation for which neither catcache nor snapshot invalidations |
413 | | * are sent, we must refresh the snapshot every time. |
414 | | */ |
415 | 0 | if (CatalogSnapshot && |
416 | 0 | !RelationInvalidatesSnapshotsOnly(relid) && |
417 | 0 | !RelationHasSysCache(relid)) |
418 | 0 | InvalidateCatalogSnapshot(); |
419 | |
|
420 | 0 | if (CatalogSnapshot == NULL) |
421 | 0 | { |
422 | | /* Get new snapshot. */ |
423 | 0 | CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData); |
424 | | |
425 | | /* |
426 | | * Make sure the catalog snapshot will be accounted for in decisions |
427 | | * about advancing PGPROC->xmin. We could apply RegisterSnapshot, but |
428 | | * that would result in making a physical copy, which is overkill; and |
429 | | * it would also create a dependency on some resource owner, which we |
430 | | * do not want for reasons explained at the head of this file. Instead |
431 | | * just shove the CatalogSnapshot into the pairing heap manually. This |
432 | | * has to be reversed in InvalidateCatalogSnapshot, of course. |
433 | | * |
434 | | * NB: it had better be impossible for this to throw error, since the |
435 | | * CatalogSnapshot pointer is already valid. |
436 | | */ |
437 | 0 | pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node); |
438 | 0 | } |
439 | |
|
440 | 0 | return CatalogSnapshot; |
441 | 0 | } |
442 | | |
443 | | /* |
444 | | * InvalidateCatalogSnapshot |
445 | | * Mark the current catalog snapshot, if any, as invalid |
446 | | * |
447 | | * We could change this API to allow the caller to provide more fine-grained |
448 | | * invalidation details, so that a change to relation A wouldn't prevent us |
449 | | * from using our cached snapshot to scan relation B, but so far there's no |
450 | | * evidence that the CPU cycles we spent tracking such fine details would be |
451 | | * well-spent. |
452 | | */ |
453 | | void |
454 | | InvalidateCatalogSnapshot(void) |
455 | 0 | { |
456 | 0 | if (CatalogSnapshot) |
457 | 0 | { |
458 | 0 | pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node); |
459 | 0 | CatalogSnapshot = NULL; |
460 | 0 | SnapshotResetXmin(); |
461 | 0 | } |
462 | 0 | } |
463 | | |
464 | | /* |
465 | | * InvalidateCatalogSnapshotConditionally |
466 | | * Drop catalog snapshot if it's the only one we have |
467 | | * |
468 | | * This is called when we are about to wait for client input, so we don't |
469 | | * want to continue holding the catalog snapshot if it might mean that the |
470 | | * global xmin horizon can't advance. However, if there are other snapshots |
471 | | * still active or registered, the catalog snapshot isn't likely to be the |
472 | | * oldest one, so we might as well keep it. |
473 | | */ |
474 | | void |
475 | | InvalidateCatalogSnapshotConditionally(void) |
476 | 0 | { |
477 | 0 | if (CatalogSnapshot && |
478 | 0 | ActiveSnapshot == NULL && |
479 | 0 | pairingheap_is_singular(&RegisteredSnapshots)) |
480 | 0 | InvalidateCatalogSnapshot(); |
481 | 0 | } |
482 | | |
483 | | /* |
484 | | * SnapshotSetCommandId |
485 | | * Propagate CommandCounterIncrement into the static snapshots, if set |
486 | | */ |
487 | | void |
488 | | SnapshotSetCommandId(CommandId curcid) |
489 | 0 | { |
490 | 0 | if (!FirstSnapshotSet) |
491 | 0 | return; |
492 | | |
493 | 0 | if (CurrentSnapshot) |
494 | 0 | CurrentSnapshot->curcid = curcid; |
495 | 0 | if (SecondarySnapshot) |
496 | 0 | SecondarySnapshot->curcid = curcid; |
497 | | /* Should we do the same with CatalogSnapshot? */ |
498 | 0 | } |
499 | | |
500 | | /* |
501 | | * SetTransactionSnapshot |
502 | | * Set the transaction's snapshot from an imported MVCC snapshot. |
503 | | * |
504 | | * Note that this is very closely tied to GetTransactionSnapshot --- it |
505 | | * must take care of all the same considerations as the first-snapshot case |
506 | | * in GetTransactionSnapshot. |
507 | | */ |
508 | | static void |
509 | | SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid, |
510 | | int sourcepid, PGPROC *sourceproc) |
511 | 0 | { |
512 | | /* Caller should have checked this already */ |
513 | 0 | Assert(!FirstSnapshotSet); |
514 | | |
515 | | /* Better do this to ensure following Assert succeeds. */ |
516 | 0 | InvalidateCatalogSnapshot(); |
517 | |
|
518 | 0 | Assert(pairingheap_is_empty(&RegisteredSnapshots)); |
519 | 0 | Assert(FirstXactSnapshot == NULL); |
520 | 0 | Assert(!HistoricSnapshotActive()); |
521 | | |
522 | | /* |
523 | | * Even though we are not going to use the snapshot it computes, we must |
524 | | * call GetSnapshotData, for two reasons: (1) to be sure that |
525 | | * CurrentSnapshotData's XID arrays have been allocated, and (2) to update |
526 | | * the state for GlobalVis*. |
527 | | */ |
528 | 0 | CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); |
529 | | |
530 | | /* |
531 | | * Now copy appropriate fields from the source snapshot. |
532 | | */ |
533 | 0 | CurrentSnapshot->xmin = sourcesnap->xmin; |
534 | 0 | CurrentSnapshot->xmax = sourcesnap->xmax; |
535 | 0 | CurrentSnapshot->xcnt = sourcesnap->xcnt; |
536 | 0 | Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount()); |
537 | 0 | if (sourcesnap->xcnt > 0) |
538 | 0 | memcpy(CurrentSnapshot->xip, sourcesnap->xip, |
539 | 0 | sourcesnap->xcnt * sizeof(TransactionId)); |
540 | 0 | CurrentSnapshot->subxcnt = sourcesnap->subxcnt; |
541 | 0 | Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount()); |
542 | 0 | if (sourcesnap->subxcnt > 0) |
543 | 0 | memcpy(CurrentSnapshot->subxip, sourcesnap->subxip, |
544 | 0 | sourcesnap->subxcnt * sizeof(TransactionId)); |
545 | 0 | CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed; |
546 | 0 | CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery; |
547 | | /* NB: curcid should NOT be copied, it's a local matter */ |
548 | |
|
549 | 0 | CurrentSnapshot->snapXactCompletionCount = 0; |
550 | | |
551 | | /* |
552 | | * Now we have to fix what GetSnapshotData did with MyProc->xmin and |
553 | | * TransactionXmin. There is a race condition: to make sure we are not |
554 | | * causing the global xmin to go backwards, we have to test that the |
555 | | * source transaction is still running, and that has to be done |
556 | | * atomically. So let procarray.c do it. |
557 | | * |
558 | | * Note: in serializable mode, predicate.c will do this a second time. It |
559 | | * doesn't seem worth contorting the logic here to avoid two calls, |
560 | | * especially since it's not clear that predicate.c *must* do this. |
561 | | */ |
562 | 0 | if (sourceproc != NULL) |
563 | 0 | { |
564 | 0 | if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc)) |
565 | 0 | ereport(ERROR, |
566 | 0 | (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
567 | 0 | errmsg("could not import the requested snapshot"), |
568 | 0 | errdetail("The source transaction is not running anymore."))); |
569 | 0 | } |
570 | 0 | else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid)) |
571 | 0 | ereport(ERROR, |
572 | 0 | (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
573 | 0 | errmsg("could not import the requested snapshot"), |
574 | 0 | errdetail("The source process with PID %d is not running anymore.", |
575 | 0 | sourcepid))); |
576 | | |
577 | | /* |
578 | | * In transaction-snapshot mode, the first snapshot must live until end of |
579 | | * xact, so we must make a copy of it. Furthermore, if we're running in |
580 | | * serializable mode, predicate.c needs to do its own processing. |
581 | | */ |
582 | 0 | if (IsolationUsesXactSnapshot()) |
583 | 0 | { |
584 | 0 | if (IsolationIsSerializable()) |
585 | 0 | SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid, |
586 | 0 | sourcepid); |
587 | | /* Make a saved copy */ |
588 | 0 | CurrentSnapshot = CopySnapshot(CurrentSnapshot); |
589 | 0 | FirstXactSnapshot = CurrentSnapshot; |
590 | | /* Mark it as "registered" in FirstXactSnapshot */ |
591 | 0 | FirstXactSnapshot->regd_count++; |
592 | 0 | pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); |
593 | 0 | } |
594 | |
|
595 | 0 | FirstSnapshotSet = true; |
596 | 0 | } |
597 | | |
598 | | /* |
599 | | * CopySnapshot |
600 | | * Copy the given snapshot. |
601 | | * |
602 | | * The copy is palloc'd in TopTransactionContext and has initial refcounts set |
603 | | * to 0. The returned snapshot has the copied flag set. |
604 | | */ |
605 | | static Snapshot |
606 | | CopySnapshot(Snapshot snapshot) |
607 | 0 | { |
608 | 0 | Snapshot newsnap; |
609 | 0 | Size subxipoff; |
610 | 0 | Size size; |
611 | |
|
612 | 0 | Assert(snapshot != InvalidSnapshot); |
613 | | |
614 | | /* We allocate any XID arrays needed in the same palloc block. */ |
615 | 0 | size = subxipoff = sizeof(SnapshotData) + |
616 | 0 | snapshot->xcnt * sizeof(TransactionId); |
617 | 0 | if (snapshot->subxcnt > 0) |
618 | 0 | size += snapshot->subxcnt * sizeof(TransactionId); |
619 | |
|
620 | 0 | newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size); |
621 | 0 | memcpy(newsnap, snapshot, sizeof(SnapshotData)); |
622 | |
|
623 | 0 | newsnap->regd_count = 0; |
624 | 0 | newsnap->active_count = 0; |
625 | 0 | newsnap->copied = true; |
626 | 0 | newsnap->snapXactCompletionCount = 0; |
627 | | |
628 | | /* setup XID array */ |
629 | 0 | if (snapshot->xcnt > 0) |
630 | 0 | { |
631 | 0 | newsnap->xip = (TransactionId *) (newsnap + 1); |
632 | 0 | memcpy(newsnap->xip, snapshot->xip, |
633 | 0 | snapshot->xcnt * sizeof(TransactionId)); |
634 | 0 | } |
635 | 0 | else |
636 | 0 | newsnap->xip = NULL; |
637 | | |
638 | | /* |
639 | | * Setup subXID array. Don't bother to copy it if it had overflowed, |
640 | | * though, because it's not used anywhere in that case. Except if it's a |
641 | | * snapshot taken during recovery; all the top-level XIDs are in subxip as |
642 | | * well in that case, so we mustn't lose them. |
643 | | */ |
644 | 0 | if (snapshot->subxcnt > 0 && |
645 | 0 | (!snapshot->suboverflowed || snapshot->takenDuringRecovery)) |
646 | 0 | { |
647 | 0 | newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff); |
648 | 0 | memcpy(newsnap->subxip, snapshot->subxip, |
649 | 0 | snapshot->subxcnt * sizeof(TransactionId)); |
650 | 0 | } |
651 | 0 | else |
652 | 0 | newsnap->subxip = NULL; |
653 | |
|
654 | 0 | return newsnap; |
655 | 0 | } |
656 | | |
657 | | /* |
658 | | * FreeSnapshot |
659 | | * Free the memory associated with a snapshot. |
660 | | */ |
661 | | static void |
662 | | FreeSnapshot(Snapshot snapshot) |
663 | 0 | { |
664 | 0 | Assert(snapshot->regd_count == 0); |
665 | 0 | Assert(snapshot->active_count == 0); |
666 | 0 | Assert(snapshot->copied); |
667 | |
|
668 | 0 | pfree(snapshot); |
669 | 0 | } |
670 | | |
671 | | /* |
672 | | * PushActiveSnapshot |
673 | | * Set the given snapshot as the current active snapshot |
674 | | * |
675 | | * If the passed snapshot is a statically-allocated one, or it is possibly |
676 | | * subject to a future command counter update, create a new long-lived copy |
677 | | * with active refcount=1. Otherwise, only increment the refcount. |
678 | | */ |
679 | | void |
680 | | PushActiveSnapshot(Snapshot snapshot) |
681 | 0 | { |
682 | 0 | PushActiveSnapshotWithLevel(snapshot, GetCurrentTransactionNestLevel()); |
683 | 0 | } |
684 | | |
685 | | /* |
686 | | * PushActiveSnapshotWithLevel |
687 | | * Set the given snapshot as the current active snapshot |
688 | | * |
689 | | * Same as PushActiveSnapshot except that caller can specify the |
690 | | * transaction nesting level that "owns" the snapshot. This level |
691 | | * must not be deeper than the current top of the snapshot stack. |
692 | | */ |
693 | | void |
694 | | PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level) |
695 | 0 | { |
696 | 0 | ActiveSnapshotElt *newactive; |
697 | |
|
698 | 0 | Assert(snapshot != InvalidSnapshot); |
699 | 0 | Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level); |
700 | |
|
701 | 0 | newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt)); |
702 | | |
703 | | /* |
704 | | * Checking SecondarySnapshot is probably useless here, but it seems |
705 | | * better to be sure. |
706 | | */ |
707 | 0 | if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot || |
708 | 0 | !snapshot->copied) |
709 | 0 | newactive->as_snap = CopySnapshot(snapshot); |
710 | 0 | else |
711 | 0 | newactive->as_snap = snapshot; |
712 | |
|
713 | 0 | newactive->as_next = ActiveSnapshot; |
714 | 0 | newactive->as_level = snap_level; |
715 | |
|
716 | 0 | newactive->as_snap->active_count++; |
717 | |
|
718 | 0 | ActiveSnapshot = newactive; |
719 | 0 | } |
720 | | |
721 | | /* |
722 | | * PushCopiedSnapshot |
723 | | * As above, except forcibly copy the presented snapshot. |
724 | | * |
725 | | * This should be used when the ActiveSnapshot has to be modifiable, for |
726 | | * example if the caller intends to call UpdateActiveSnapshotCommandId. |
727 | | * The new snapshot will be released when popped from the stack. |
728 | | */ |
729 | | void |
730 | | PushCopiedSnapshot(Snapshot snapshot) |
731 | 0 | { |
732 | 0 | PushActiveSnapshot(CopySnapshot(snapshot)); |
733 | 0 | } |
734 | | |
735 | | /* |
736 | | * UpdateActiveSnapshotCommandId |
737 | | * |
738 | | * Update the current CID of the active snapshot. This can only be applied |
739 | | * to a snapshot that is not referenced elsewhere. |
740 | | */ |
741 | | void |
742 | | UpdateActiveSnapshotCommandId(void) |
743 | 0 | { |
744 | 0 | CommandId save_curcid, |
745 | 0 | curcid; |
746 | |
|
747 | 0 | Assert(ActiveSnapshot != NULL); |
748 | 0 | Assert(ActiveSnapshot->as_snap->active_count == 1); |
749 | 0 | Assert(ActiveSnapshot->as_snap->regd_count == 0); |
750 | | |
751 | | /* |
752 | | * Don't allow modification of the active snapshot during parallel |
753 | | * operation. We share the snapshot to worker backends at the beginning |
754 | | * of parallel operation, so any change to the snapshot can lead to |
755 | | * inconsistencies. We have other defenses against |
756 | | * CommandCounterIncrement, but there are a few places that call this |
757 | | * directly, so we put an additional guard here. |
758 | | */ |
759 | 0 | save_curcid = ActiveSnapshot->as_snap->curcid; |
760 | 0 | curcid = GetCurrentCommandId(false); |
761 | 0 | if (IsInParallelMode() && save_curcid != curcid) |
762 | 0 | elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation"); |
763 | 0 | ActiveSnapshot->as_snap->curcid = curcid; |
764 | 0 | } |
765 | | |
766 | | /* |
767 | | * PopActiveSnapshot |
768 | | * |
769 | | * Remove the topmost snapshot from the active snapshot stack, decrementing the |
770 | | * reference count, and free it if this was the last reference. |
771 | | */ |
772 | | void |
773 | | PopActiveSnapshot(void) |
774 | 0 | { |
775 | 0 | ActiveSnapshotElt *newstack; |
776 | |
|
777 | 0 | newstack = ActiveSnapshot->as_next; |
778 | |
|
779 | 0 | Assert(ActiveSnapshot->as_snap->active_count > 0); |
780 | |
|
781 | 0 | ActiveSnapshot->as_snap->active_count--; |
782 | |
|
783 | 0 | if (ActiveSnapshot->as_snap->active_count == 0 && |
784 | 0 | ActiveSnapshot->as_snap->regd_count == 0) |
785 | 0 | FreeSnapshot(ActiveSnapshot->as_snap); |
786 | |
|
787 | 0 | pfree(ActiveSnapshot); |
788 | 0 | ActiveSnapshot = newstack; |
789 | |
|
790 | 0 | SnapshotResetXmin(); |
791 | 0 | } |
792 | | |
793 | | /* |
794 | | * GetActiveSnapshot |
795 | | * Return the topmost snapshot in the Active stack. |
796 | | */ |
797 | | Snapshot |
798 | | GetActiveSnapshot(void) |
799 | 0 | { |
800 | 0 | Assert(ActiveSnapshot != NULL); |
801 | |
|
802 | 0 | return ActiveSnapshot->as_snap; |
803 | 0 | } |
804 | | |
805 | | /* |
806 | | * ActiveSnapshotSet |
807 | | * Return whether there is at least one snapshot in the Active stack |
808 | | */ |
809 | | bool |
810 | | ActiveSnapshotSet(void) |
811 | 0 | { |
812 | 0 | return ActiveSnapshot != NULL; |
813 | 0 | } |
814 | | |
815 | | /* |
816 | | * RegisterSnapshot |
817 | | * Register a snapshot as being in use by the current resource owner |
818 | | * |
819 | | * If InvalidSnapshot is passed, it is not registered. |
820 | | */ |
821 | | Snapshot |
822 | | RegisterSnapshot(Snapshot snapshot) |
823 | 0 | { |
824 | 0 | if (snapshot == InvalidSnapshot) |
825 | 0 | return InvalidSnapshot; |
826 | | |
827 | 0 | return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner); |
828 | 0 | } |
829 | | |
830 | | /* |
831 | | * RegisterSnapshotOnOwner |
832 | | * As above, but use the specified resource owner |
833 | | */ |
834 | | Snapshot |
835 | | RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner) |
836 | 0 | { |
837 | 0 | Snapshot snap; |
838 | |
|
839 | 0 | if (snapshot == InvalidSnapshot) |
840 | 0 | return InvalidSnapshot; |
841 | | |
842 | | /* Static snapshot? Create a persistent copy */ |
843 | 0 | snap = snapshot->copied ? snapshot : CopySnapshot(snapshot); |
844 | | |
845 | | /* and tell resowner.c about it */ |
846 | 0 | ResourceOwnerEnlarge(owner); |
847 | 0 | snap->regd_count++; |
848 | 0 | ResourceOwnerRememberSnapshot(owner, snap); |
849 | |
|
850 | 0 | if (snap->regd_count == 1) |
851 | 0 | pairingheap_add(&RegisteredSnapshots, &snap->ph_node); |
852 | |
|
853 | 0 | return snap; |
854 | 0 | } |
855 | | |
856 | | /* |
857 | | * UnregisterSnapshot |
858 | | * |
859 | | * Decrement the reference count of a snapshot, remove the corresponding |
860 | | * reference from CurrentResourceOwner, and free the snapshot if no more |
861 | | * references remain. |
862 | | */ |
863 | | void |
864 | | UnregisterSnapshot(Snapshot snapshot) |
865 | 0 | { |
866 | 0 | if (snapshot == NULL) |
867 | 0 | return; |
868 | | |
869 | 0 | UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner); |
870 | 0 | } |
871 | | |
872 | | /* |
873 | | * UnregisterSnapshotFromOwner |
874 | | * As above, but use the specified resource owner |
875 | | */ |
876 | | void |
877 | | UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner) |
878 | 0 | { |
879 | 0 | if (snapshot == NULL) |
880 | 0 | return; |
881 | | |
882 | 0 | ResourceOwnerForgetSnapshot(owner, snapshot); |
883 | 0 | UnregisterSnapshotNoOwner(snapshot); |
884 | 0 | } |
885 | | |
886 | | static void |
887 | | UnregisterSnapshotNoOwner(Snapshot snapshot) |
888 | 0 | { |
889 | 0 | Assert(snapshot->regd_count > 0); |
890 | 0 | Assert(!pairingheap_is_empty(&RegisteredSnapshots)); |
891 | |
|
892 | 0 | snapshot->regd_count--; |
893 | 0 | if (snapshot->regd_count == 0) |
894 | 0 | pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node); |
895 | |
|
896 | 0 | if (snapshot->regd_count == 0 && snapshot->active_count == 0) |
897 | 0 | { |
898 | 0 | FreeSnapshot(snapshot); |
899 | 0 | SnapshotResetXmin(); |
900 | 0 | } |
901 | 0 | } |
902 | | |
903 | | /* |
904 | | * Comparison function for RegisteredSnapshots heap. Snapshots are ordered |
905 | | * by xmin, so that the snapshot with smallest xmin is at the top. |
906 | | */ |
907 | | static int |
908 | | xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg) |
909 | 0 | { |
910 | 0 | const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a); |
911 | 0 | const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b); |
912 | |
|
913 | 0 | if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin)) |
914 | 0 | return 1; |
915 | 0 | else if (TransactionIdFollows(asnap->xmin, bsnap->xmin)) |
916 | 0 | return -1; |
917 | 0 | else |
918 | 0 | return 0; |
919 | 0 | } |
920 | | |
921 | | /* |
922 | | * SnapshotResetXmin |
923 | | * |
924 | | * If there are no more snapshots, we can reset our PGPROC->xmin to |
925 | | * InvalidTransactionId. Note we can do this without locking because we assume |
926 | | * that storing an Xid is atomic. |
927 | | * |
928 | | * Even if there are some remaining snapshots, we may be able to advance our |
929 | | * PGPROC->xmin to some degree. This typically happens when a portal is |
930 | | * dropped. For efficiency, we only consider recomputing PGPROC->xmin when |
931 | | * the active snapshot stack is empty; this allows us not to need to track |
932 | | * which active snapshot is oldest. |
933 | | */ |
934 | | static void |
935 | | SnapshotResetXmin(void) |
936 | 0 | { |
937 | 0 | Snapshot minSnapshot; |
938 | |
|
939 | 0 | if (ActiveSnapshot != NULL) |
940 | 0 | return; |
941 | | |
942 | 0 | if (pairingheap_is_empty(&RegisteredSnapshots)) |
943 | 0 | { |
944 | 0 | MyProc->xmin = TransactionXmin = InvalidTransactionId; |
945 | 0 | return; |
946 | 0 | } |
947 | | |
948 | 0 | minSnapshot = pairingheap_container(SnapshotData, ph_node, |
949 | 0 | pairingheap_first(&RegisteredSnapshots)); |
950 | |
|
951 | 0 | if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin)) |
952 | 0 | MyProc->xmin = TransactionXmin = minSnapshot->xmin; |
953 | 0 | } |
954 | | |
955 | | /* |
956 | | * AtSubCommit_Snapshot |
957 | | */ |
958 | | void |
959 | | AtSubCommit_Snapshot(int level) |
960 | 0 | { |
961 | 0 | ActiveSnapshotElt *active; |
962 | | |
963 | | /* |
964 | | * Relabel the active snapshots set in this subtransaction as though they |
965 | | * are owned by the parent subxact. |
966 | | */ |
967 | 0 | for (active = ActiveSnapshot; active != NULL; active = active->as_next) |
968 | 0 | { |
969 | 0 | if (active->as_level < level) |
970 | 0 | break; |
971 | 0 | active->as_level = level - 1; |
972 | 0 | } |
973 | 0 | } |
974 | | |
975 | | /* |
976 | | * AtSubAbort_Snapshot |
977 | | * Clean up snapshots after a subtransaction abort |
978 | | */ |
979 | | void |
980 | | AtSubAbort_Snapshot(int level) |
981 | 0 | { |
982 | | /* Forget the active snapshots set by this subtransaction */ |
983 | 0 | while (ActiveSnapshot && ActiveSnapshot->as_level >= level) |
984 | 0 | { |
985 | 0 | ActiveSnapshotElt *next; |
986 | |
|
987 | 0 | next = ActiveSnapshot->as_next; |
988 | | |
989 | | /* |
990 | | * Decrement the snapshot's active count. If it's still registered or |
991 | | * marked as active by an outer subtransaction, we can't free it yet. |
992 | | */ |
993 | 0 | Assert(ActiveSnapshot->as_snap->active_count >= 1); |
994 | 0 | ActiveSnapshot->as_snap->active_count -= 1; |
995 | |
|
996 | 0 | if (ActiveSnapshot->as_snap->active_count == 0 && |
997 | 0 | ActiveSnapshot->as_snap->regd_count == 0) |
998 | 0 | FreeSnapshot(ActiveSnapshot->as_snap); |
999 | | |
1000 | | /* and free the stack element */ |
1001 | 0 | pfree(ActiveSnapshot); |
1002 | |
|
1003 | 0 | ActiveSnapshot = next; |
1004 | 0 | } |
1005 | |
|
1006 | 0 | SnapshotResetXmin(); |
1007 | 0 | } |
1008 | | |
1009 | | /* |
1010 | | * AtEOXact_Snapshot |
1011 | | * Snapshot manager's cleanup function for end of transaction |
1012 | | */ |
1013 | | void |
1014 | | AtEOXact_Snapshot(bool isCommit, bool resetXmin) |
1015 | 0 | { |
1016 | | /* |
1017 | | * In transaction-snapshot mode we must release our privately-managed |
1018 | | * reference to the transaction snapshot. We must remove it from |
1019 | | * RegisteredSnapshots to keep the check below happy. But we don't bother |
1020 | | * to do FreeSnapshot, for two reasons: the memory will go away with |
1021 | | * TopTransactionContext anyway, and if someone has left the snapshot |
1022 | | * stacked as active, we don't want the code below to be chasing through a |
1023 | | * dangling pointer. |
1024 | | */ |
1025 | 0 | if (FirstXactSnapshot != NULL) |
1026 | 0 | { |
1027 | 0 | Assert(FirstXactSnapshot->regd_count > 0); |
1028 | 0 | Assert(!pairingheap_is_empty(&RegisteredSnapshots)); |
1029 | 0 | pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); |
1030 | 0 | } |
1031 | 0 | FirstXactSnapshot = NULL; |
1032 | | |
1033 | | /* |
1034 | | * If we exported any snapshots, clean them up. |
1035 | | */ |
1036 | 0 | if (exportedSnapshots != NIL) |
1037 | 0 | { |
1038 | 0 | ListCell *lc; |
1039 | | |
1040 | | /* |
1041 | | * Get rid of the files. Unlink failure is only a WARNING because (1) |
1042 | | * it's too late to abort the transaction, and (2) leaving a leaked |
1043 | | * file around has little real consequence anyway. |
1044 | | * |
1045 | | * We also need to remove the snapshots from RegisteredSnapshots to |
1046 | | * prevent a warning below. |
1047 | | * |
1048 | | * As with the FirstXactSnapshot, we don't need to free resources of |
1049 | | * the snapshot itself as it will go away with the memory context. |
1050 | | */ |
1051 | 0 | foreach(lc, exportedSnapshots) |
1052 | 0 | { |
1053 | 0 | ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc); |
1054 | |
|
1055 | 0 | if (unlink(esnap->snapfile)) |
1056 | 0 | elog(WARNING, "could not unlink file \"%s\": %m", |
1057 | 0 | esnap->snapfile); |
1058 | | |
1059 | 0 | pairingheap_remove(&RegisteredSnapshots, |
1060 | 0 | &esnap->snapshot->ph_node); |
1061 | 0 | } |
1062 | | |
1063 | 0 | exportedSnapshots = NIL; |
1064 | 0 | } |
1065 | | |
1066 | | /* Drop catalog snapshot if any */ |
1067 | 0 | InvalidateCatalogSnapshot(); |
1068 | | |
1069 | | /* On commit, complain about leftover snapshots */ |
1070 | 0 | if (isCommit) |
1071 | 0 | { |
1072 | 0 | ActiveSnapshotElt *active; |
1073 | |
|
1074 | 0 | if (!pairingheap_is_empty(&RegisteredSnapshots)) |
1075 | 0 | elog(WARNING, "registered snapshots seem to remain after cleanup"); |
1076 | | |
1077 | | /* complain about unpopped active snapshots */ |
1078 | 0 | for (active = ActiveSnapshot; active != NULL; active = active->as_next) |
1079 | 0 | elog(WARNING, "snapshot %p still active", active); |
1080 | 0 | } |
1081 | | |
1082 | | /* |
1083 | | * And reset our state. We don't need to free the memory explicitly -- |
1084 | | * it'll go away with TopTransactionContext. |
1085 | | */ |
1086 | 0 | ActiveSnapshot = NULL; |
1087 | 0 | pairingheap_reset(&RegisteredSnapshots); |
1088 | |
|
1089 | 0 | CurrentSnapshot = NULL; |
1090 | 0 | SecondarySnapshot = NULL; |
1091 | |
|
1092 | 0 | FirstSnapshotSet = false; |
1093 | | |
1094 | | /* |
1095 | | * During normal commit processing, we call ProcArrayEndTransaction() to |
1096 | | * reset the MyProc->xmin. That call happens prior to the call to |
1097 | | * AtEOXact_Snapshot(), so we need not touch xmin here at all. |
1098 | | */ |
1099 | 0 | if (resetXmin) |
1100 | 0 | SnapshotResetXmin(); |
1101 | |
|
1102 | 0 | Assert(resetXmin || MyProc->xmin == 0); |
1103 | 0 | } |
1104 | | |
1105 | | |
1106 | | /* |
1107 | | * ExportSnapshot |
1108 | | * Export the snapshot to a file so that other backends can import it. |
1109 | | * Returns the token (the file name) that can be used to import this |
1110 | | * snapshot. |
1111 | | */ |
1112 | | char * |
1113 | | ExportSnapshot(Snapshot snapshot) |
1114 | 0 | { |
1115 | 0 | TransactionId topXid; |
1116 | 0 | TransactionId *children; |
1117 | 0 | ExportedSnapshot *esnap; |
1118 | 0 | int nchildren; |
1119 | 0 | int addTopXid; |
1120 | 0 | StringInfoData buf; |
1121 | 0 | FILE *f; |
1122 | 0 | int i; |
1123 | 0 | MemoryContext oldcxt; |
1124 | 0 | char path[MAXPGPATH]; |
1125 | 0 | char pathtmp[MAXPGPATH]; |
1126 | | |
1127 | | /* |
1128 | | * It's tempting to call RequireTransactionBlock here, since it's not very |
1129 | | * useful to export a snapshot that will disappear immediately afterwards. |
1130 | | * However, we haven't got enough information to do that, since we don't |
1131 | | * know if we're at top level or not. For example, we could be inside a |
1132 | | * plpgsql function that is going to fire off other transactions via |
1133 | | * dblink. Rather than disallow perfectly legitimate usages, don't make a |
1134 | | * check. |
1135 | | * |
1136 | | * Also note that we don't make any restriction on the transaction's |
1137 | | * isolation level; however, importers must check the level if they are |
1138 | | * serializable. |
1139 | | */ |
1140 | | |
1141 | | /* |
1142 | | * Get our transaction ID if there is one, to include in the snapshot. |
1143 | | */ |
1144 | 0 | topXid = GetTopTransactionIdIfAny(); |
1145 | | |
1146 | | /* |
1147 | | * We cannot export a snapshot from a subtransaction because there's no |
1148 | | * easy way for importers to verify that the same subtransaction is still |
1149 | | * running. |
1150 | | */ |
1151 | 0 | if (IsSubTransaction()) |
1152 | 0 | ereport(ERROR, |
1153 | 0 | (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), |
1154 | 0 | errmsg("cannot export a snapshot from a subtransaction"))); |
1155 | | |
1156 | | /* |
1157 | | * We do however allow previous committed subtransactions to exist. |
1158 | | * Importers of the snapshot must see them as still running, so get their |
1159 | | * XIDs to add them to the snapshot. |
1160 | | */ |
1161 | 0 | nchildren = xactGetCommittedChildren(&children); |
1162 | | |
1163 | | /* |
1164 | | * Generate file path for the snapshot. We start numbering of snapshots |
1165 | | * inside the transaction from 1. |
1166 | | */ |
1167 | 0 | snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d", |
1168 | 0 | MyProc->vxid.procNumber, MyProc->vxid.lxid, |
1169 | 0 | list_length(exportedSnapshots) + 1); |
1170 | | |
1171 | | /* |
1172 | | * Copy the snapshot into TopTransactionContext, add it to the |
1173 | | * exportedSnapshots list, and mark it pseudo-registered. We do this to |
1174 | | * ensure that the snapshot's xmin is honored for the rest of the |
1175 | | * transaction. |
1176 | | */ |
1177 | 0 | snapshot = CopySnapshot(snapshot); |
1178 | |
|
1179 | 0 | oldcxt = MemoryContextSwitchTo(TopTransactionContext); |
1180 | 0 | esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot)); |
1181 | 0 | esnap->snapfile = pstrdup(path); |
1182 | 0 | esnap->snapshot = snapshot; |
1183 | 0 | exportedSnapshots = lappend(exportedSnapshots, esnap); |
1184 | 0 | MemoryContextSwitchTo(oldcxt); |
1185 | |
|
1186 | 0 | snapshot->regd_count++; |
1187 | 0 | pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node); |
1188 | | |
1189 | | /* |
1190 | | * Fill buf with a text serialization of the snapshot, plus identification |
1191 | | * data about this transaction. The format expected by ImportSnapshot is |
1192 | | * pretty rigid: each line must be fieldname:value. |
1193 | | */ |
1194 | 0 | initStringInfo(&buf); |
1195 | |
|
1196 | 0 | appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->vxid.procNumber, MyProc->vxid.lxid); |
1197 | 0 | appendStringInfo(&buf, "pid:%d\n", MyProcPid); |
1198 | 0 | appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId); |
1199 | 0 | appendStringInfo(&buf, "iso:%d\n", XactIsoLevel); |
1200 | 0 | appendStringInfo(&buf, "ro:%d\n", XactReadOnly); |
1201 | |
|
1202 | 0 | appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin); |
1203 | 0 | appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax); |
1204 | | |
1205 | | /* |
1206 | | * We must include our own top transaction ID in the top-xid data, since |
1207 | | * by definition we will still be running when the importing transaction |
1208 | | * adopts the snapshot, but GetSnapshotData never includes our own XID in |
1209 | | * the snapshot. (There must, therefore, be enough room to add it.) |
1210 | | * |
1211 | | * However, it could be that our topXid is after the xmax, in which case |
1212 | | * we shouldn't include it because xip[] members are expected to be before |
1213 | | * xmax. (We need not make the same check for subxip[] members, see |
1214 | | * snapshot.h.) |
1215 | | */ |
1216 | 0 | addTopXid = (TransactionIdIsValid(topXid) && |
1217 | 0 | TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0; |
1218 | 0 | appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid); |
1219 | 0 | for (i = 0; i < snapshot->xcnt; i++) |
1220 | 0 | appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]); |
1221 | 0 | if (addTopXid) |
1222 | 0 | appendStringInfo(&buf, "xip:%u\n", topXid); |
1223 | | |
1224 | | /* |
1225 | | * Similarly, we add our subcommitted child XIDs to the subxid data. Here, |
1226 | | * we have to cope with possible overflow. |
1227 | | */ |
1228 | 0 | if (snapshot->suboverflowed || |
1229 | 0 | snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount()) |
1230 | 0 | appendStringInfoString(&buf, "sof:1\n"); |
1231 | 0 | else |
1232 | 0 | { |
1233 | 0 | appendStringInfoString(&buf, "sof:0\n"); |
1234 | 0 | appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren); |
1235 | 0 | for (i = 0; i < snapshot->subxcnt; i++) |
1236 | 0 | appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]); |
1237 | 0 | for (i = 0; i < nchildren; i++) |
1238 | 0 | appendStringInfo(&buf, "sxp:%u\n", children[i]); |
1239 | 0 | } |
1240 | 0 | appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery); |
1241 | | |
1242 | | /* |
1243 | | * Now write the text representation into a file. We first write to a |
1244 | | * ".tmp" filename, and rename to final filename if no error. This |
1245 | | * ensures that no other backend can read an incomplete file |
1246 | | * (ImportSnapshot won't allow it because of its valid-characters check). |
1247 | | */ |
1248 | 0 | snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path); |
1249 | 0 | if (!(f = AllocateFile(pathtmp, PG_BINARY_W))) |
1250 | 0 | ereport(ERROR, |
1251 | 0 | (errcode_for_file_access(), |
1252 | 0 | errmsg("could not create file \"%s\": %m", pathtmp))); |
1253 | | |
1254 | 0 | if (fwrite(buf.data, buf.len, 1, f) != 1) |
1255 | 0 | ereport(ERROR, |
1256 | 0 | (errcode_for_file_access(), |
1257 | 0 | errmsg("could not write to file \"%s\": %m", pathtmp))); |
1258 | | |
1259 | | /* no fsync() since file need not survive a system crash */ |
1260 | | |
1261 | 0 | if (FreeFile(f)) |
1262 | 0 | ereport(ERROR, |
1263 | 0 | (errcode_for_file_access(), |
1264 | 0 | errmsg("could not write to file \"%s\": %m", pathtmp))); |
1265 | | |
1266 | | /* |
1267 | | * Now that we have written everything into a .tmp file, rename the file |
1268 | | * to remove the .tmp suffix. |
1269 | | */ |
1270 | 0 | if (rename(pathtmp, path) < 0) |
1271 | 0 | ereport(ERROR, |
1272 | 0 | (errcode_for_file_access(), |
1273 | 0 | errmsg("could not rename file \"%s\" to \"%s\": %m", |
1274 | 0 | pathtmp, path))); |
1275 | | |
1276 | | /* |
1277 | | * The basename of the file is what we return from pg_export_snapshot(). |
1278 | | * It's already in path in a textual format and we know that the path |
1279 | | * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash |
1280 | | * and pstrdup it so as not to return the address of a local variable. |
1281 | | */ |
1282 | 0 | return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1); |
1283 | 0 | } |
1284 | | |
1285 | | /* |
1286 | | * pg_export_snapshot |
1287 | | * SQL-callable wrapper for ExportSnapshot. |
1288 | | */ |
1289 | | Datum |
1290 | | pg_export_snapshot(PG_FUNCTION_ARGS) |
1291 | 0 | { |
1292 | 0 | char *snapshotName; |
1293 | |
|
1294 | 0 | snapshotName = ExportSnapshot(GetActiveSnapshot()); |
1295 | 0 | PG_RETURN_TEXT_P(cstring_to_text(snapshotName)); |
1296 | 0 | } |
1297 | | |
1298 | | |
1299 | | /* |
1300 | | * Parsing subroutines for ImportSnapshot: parse a line with the given |
1301 | | * prefix followed by a value, and advance *s to the next line. The |
1302 | | * filename is provided for use in error messages. |
1303 | | */ |
1304 | | static int |
1305 | | parseIntFromText(const char *prefix, char **s, const char *filename) |
1306 | 0 | { |
1307 | 0 | char *ptr = *s; |
1308 | 0 | int prefixlen = strlen(prefix); |
1309 | 0 | int val; |
1310 | |
|
1311 | 0 | if (strncmp(ptr, prefix, prefixlen) != 0) |
1312 | 0 | ereport(ERROR, |
1313 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1314 | 0 | errmsg("invalid snapshot data in file \"%s\"", filename))); |
1315 | 0 | ptr += prefixlen; |
1316 | 0 | if (sscanf(ptr, "%d", &val) != 1) |
1317 | 0 | ereport(ERROR, |
1318 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1319 | 0 | errmsg("invalid snapshot data in file \"%s\"", filename))); |
1320 | 0 | ptr = strchr(ptr, '\n'); |
1321 | 0 | if (!ptr) |
1322 | 0 | ereport(ERROR, |
1323 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1324 | 0 | errmsg("invalid snapshot data in file \"%s\"", filename))); |
1325 | 0 | *s = ptr + 1; |
1326 | 0 | return val; |
1327 | 0 | } |
1328 | | |
1329 | | static TransactionId |
1330 | | parseXidFromText(const char *prefix, char **s, const char *filename) |
1331 | 0 | { |
1332 | 0 | char *ptr = *s; |
1333 | 0 | int prefixlen = strlen(prefix); |
1334 | 0 | TransactionId val; |
1335 | |
|
1336 | 0 | if (strncmp(ptr, prefix, prefixlen) != 0) |
1337 | 0 | ereport(ERROR, |
1338 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1339 | 0 | errmsg("invalid snapshot data in file \"%s\"", filename))); |
1340 | 0 | ptr += prefixlen; |
1341 | 0 | if (sscanf(ptr, "%u", &val) != 1) |
1342 | 0 | ereport(ERROR, |
1343 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1344 | 0 | errmsg("invalid snapshot data in file \"%s\"", filename))); |
1345 | 0 | ptr = strchr(ptr, '\n'); |
1346 | 0 | if (!ptr) |
1347 | 0 | ereport(ERROR, |
1348 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1349 | 0 | errmsg("invalid snapshot data in file \"%s\"", filename))); |
1350 | 0 | *s = ptr + 1; |
1351 | 0 | return val; |
1352 | 0 | } |
1353 | | |
1354 | | static void |
1355 | | parseVxidFromText(const char *prefix, char **s, const char *filename, |
1356 | | VirtualTransactionId *vxid) |
1357 | 0 | { |
1358 | 0 | char *ptr = *s; |
1359 | 0 | int prefixlen = strlen(prefix); |
1360 | |
|
1361 | 0 | if (strncmp(ptr, prefix, prefixlen) != 0) |
1362 | 0 | ereport(ERROR, |
1363 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1364 | 0 | errmsg("invalid snapshot data in file \"%s\"", filename))); |
1365 | 0 | ptr += prefixlen; |
1366 | 0 | if (sscanf(ptr, "%d/%u", &vxid->procNumber, &vxid->localTransactionId) != 2) |
1367 | 0 | ereport(ERROR, |
1368 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1369 | 0 | errmsg("invalid snapshot data in file \"%s\"", filename))); |
1370 | 0 | ptr = strchr(ptr, '\n'); |
1371 | 0 | if (!ptr) |
1372 | 0 | ereport(ERROR, |
1373 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1374 | 0 | errmsg("invalid snapshot data in file \"%s\"", filename))); |
1375 | 0 | *s = ptr + 1; |
1376 | 0 | } |
1377 | | |
1378 | | /* |
1379 | | * ImportSnapshot |
1380 | | * Import a previously exported snapshot. The argument should be a |
1381 | | * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file. |
1382 | | * This is called by "SET TRANSACTION SNAPSHOT 'foo'". |
1383 | | */ |
1384 | | void |
1385 | | ImportSnapshot(const char *idstr) |
1386 | 0 | { |
1387 | 0 | char path[MAXPGPATH]; |
1388 | 0 | FILE *f; |
1389 | 0 | struct stat stat_buf; |
1390 | 0 | char *filebuf; |
1391 | 0 | int xcnt; |
1392 | 0 | int i; |
1393 | 0 | VirtualTransactionId src_vxid; |
1394 | 0 | int src_pid; |
1395 | 0 | Oid src_dbid; |
1396 | 0 | int src_isolevel; |
1397 | 0 | bool src_readonly; |
1398 | 0 | SnapshotData snapshot; |
1399 | | |
1400 | | /* |
1401 | | * Must be at top level of a fresh transaction. Note in particular that |
1402 | | * we check we haven't acquired an XID --- if we have, it's conceivable |
1403 | | * that the snapshot would show it as not running, making for very screwy |
1404 | | * behavior. |
1405 | | */ |
1406 | 0 | if (FirstSnapshotSet || |
1407 | 0 | GetTopTransactionIdIfAny() != InvalidTransactionId || |
1408 | 0 | IsSubTransaction()) |
1409 | 0 | ereport(ERROR, |
1410 | 0 | (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), |
1411 | 0 | errmsg("SET TRANSACTION SNAPSHOT must be called before any query"))); |
1412 | | |
1413 | | /* |
1414 | | * If we are in read committed mode then the next query would execute with |
1415 | | * a new snapshot thus making this function call quite useless. |
1416 | | */ |
1417 | 0 | if (!IsolationUsesXactSnapshot()) |
1418 | 0 | ereport(ERROR, |
1419 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
1420 | 0 | errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ"))); |
1421 | | |
1422 | | /* |
1423 | | * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do |
1424 | | * this mainly to prevent reading arbitrary files. |
1425 | | */ |
1426 | 0 | if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr)) |
1427 | 0 | ereport(ERROR, |
1428 | 0 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
1429 | 0 | errmsg("invalid snapshot identifier: \"%s\"", idstr))); |
1430 | | |
1431 | | /* OK, read the file */ |
1432 | 0 | snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr); |
1433 | |
|
1434 | 0 | f = AllocateFile(path, PG_BINARY_R); |
1435 | 0 | if (!f) |
1436 | 0 | { |
1437 | | /* |
1438 | | * If file is missing while identifier has a correct format, avoid |
1439 | | * system errors. |
1440 | | */ |
1441 | 0 | if (errno == ENOENT) |
1442 | 0 | ereport(ERROR, |
1443 | 0 | (errcode(ERRCODE_UNDEFINED_OBJECT), |
1444 | 0 | errmsg("snapshot \"%s\" does not exist", idstr))); |
1445 | 0 | else |
1446 | 0 | ereport(ERROR, |
1447 | 0 | (errcode_for_file_access(), |
1448 | 0 | errmsg("could not open file \"%s\" for reading: %m", |
1449 | 0 | path))); |
1450 | 0 | } |
1451 | | |
1452 | | /* get the size of the file so that we know how much memory we need */ |
1453 | 0 | if (fstat(fileno(f), &stat_buf)) |
1454 | 0 | elog(ERROR, "could not stat file \"%s\": %m", path); |
1455 | | |
1456 | | /* and read the file into a palloc'd string */ |
1457 | 0 | filebuf = (char *) palloc(stat_buf.st_size + 1); |
1458 | 0 | if (fread(filebuf, stat_buf.st_size, 1, f) != 1) |
1459 | 0 | elog(ERROR, "could not read file \"%s\": %m", path); |
1460 | | |
1461 | 0 | filebuf[stat_buf.st_size] = '\0'; |
1462 | |
|
1463 | 0 | FreeFile(f); |
1464 | | |
1465 | | /* |
1466 | | * Construct a snapshot struct by parsing the file content. |
1467 | | */ |
1468 | 0 | memset(&snapshot, 0, sizeof(snapshot)); |
1469 | |
|
1470 | 0 | parseVxidFromText("vxid:", &filebuf, path, &src_vxid); |
1471 | 0 | src_pid = parseIntFromText("pid:", &filebuf, path); |
1472 | | /* we abuse parseXidFromText a bit here ... */ |
1473 | 0 | src_dbid = parseXidFromText("dbid:", &filebuf, path); |
1474 | 0 | src_isolevel = parseIntFromText("iso:", &filebuf, path); |
1475 | 0 | src_readonly = parseIntFromText("ro:", &filebuf, path); |
1476 | |
|
1477 | 0 | snapshot.snapshot_type = SNAPSHOT_MVCC; |
1478 | |
|
1479 | 0 | snapshot.xmin = parseXidFromText("xmin:", &filebuf, path); |
1480 | 0 | snapshot.xmax = parseXidFromText("xmax:", &filebuf, path); |
1481 | |
|
1482 | 0 | snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path); |
1483 | | |
1484 | | /* sanity-check the xid count before palloc */ |
1485 | 0 | if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount()) |
1486 | 0 | ereport(ERROR, |
1487 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1488 | 0 | errmsg("invalid snapshot data in file \"%s\"", path))); |
1489 | | |
1490 | 0 | snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId)); |
1491 | 0 | for (i = 0; i < xcnt; i++) |
1492 | 0 | snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path); |
1493 | |
|
1494 | 0 | snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path); |
1495 | |
|
1496 | 0 | if (!snapshot.suboverflowed) |
1497 | 0 | { |
1498 | 0 | snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path); |
1499 | | |
1500 | | /* sanity-check the xid count before palloc */ |
1501 | 0 | if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount()) |
1502 | 0 | ereport(ERROR, |
1503 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1504 | 0 | errmsg("invalid snapshot data in file \"%s\"", path))); |
1505 | | |
1506 | 0 | snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId)); |
1507 | 0 | for (i = 0; i < xcnt; i++) |
1508 | 0 | snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path); |
1509 | 0 | } |
1510 | 0 | else |
1511 | 0 | { |
1512 | 0 | snapshot.subxcnt = 0; |
1513 | 0 | snapshot.subxip = NULL; |
1514 | 0 | } |
1515 | | |
1516 | 0 | snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path); |
1517 | | |
1518 | | /* |
1519 | | * Do some additional sanity checking, just to protect ourselves. We |
1520 | | * don't trouble to check the array elements, just the most critical |
1521 | | * fields. |
1522 | | */ |
1523 | 0 | if (!VirtualTransactionIdIsValid(src_vxid) || |
1524 | 0 | !OidIsValid(src_dbid) || |
1525 | 0 | !TransactionIdIsNormal(snapshot.xmin) || |
1526 | 0 | !TransactionIdIsNormal(snapshot.xmax)) |
1527 | 0 | ereport(ERROR, |
1528 | 0 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
1529 | 0 | errmsg("invalid snapshot data in file \"%s\"", path))); |
1530 | | |
1531 | | /* |
1532 | | * If we're serializable, the source transaction must be too, otherwise |
1533 | | * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a |
1534 | | * non-read-only transaction can't adopt a snapshot from a read-only |
1535 | | * transaction, as predicate.c handles the cases very differently. |
1536 | | */ |
1537 | 0 | if (IsolationIsSerializable()) |
1538 | 0 | { |
1539 | 0 | if (src_isolevel != XACT_SERIALIZABLE) |
1540 | 0 | ereport(ERROR, |
1541 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
1542 | 0 | errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction"))); |
1543 | 0 | if (src_readonly && !XactReadOnly) |
1544 | 0 | ereport(ERROR, |
1545 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
1546 | 0 | errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction"))); |
1547 | 0 | } |
1548 | | |
1549 | | /* |
1550 | | * We cannot import a snapshot that was taken in a different database, |
1551 | | * because vacuum calculates OldestXmin on a per-database basis; so the |
1552 | | * source transaction's xmin doesn't protect us from data loss. This |
1553 | | * restriction could be removed if the source transaction were to mark its |
1554 | | * xmin as being globally applicable. But that would require some |
1555 | | * additional syntax, since that has to be known when the snapshot is |
1556 | | * initially taken. (See pgsql-hackers discussion of 2011-10-21.) |
1557 | | */ |
1558 | 0 | if (src_dbid != MyDatabaseId) |
1559 | 0 | ereport(ERROR, |
1560 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
1561 | 0 | errmsg("cannot import a snapshot from a different database"))); |
1562 | | |
1563 | | /* OK, install the snapshot */ |
1564 | 0 | SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL); |
1565 | 0 | } |
1566 | | |
1567 | | /* |
1568 | | * XactHasExportedSnapshots |
1569 | | * Test whether current transaction has exported any snapshots. |
1570 | | */ |
1571 | | bool |
1572 | | XactHasExportedSnapshots(void) |
1573 | 0 | { |
1574 | 0 | return (exportedSnapshots != NIL); |
1575 | 0 | } |
1576 | | |
1577 | | /* |
1578 | | * DeleteAllExportedSnapshotFiles |
1579 | | * Clean up any files that have been left behind by a crashed backend |
1580 | | * that had exported snapshots before it died. |
1581 | | * |
1582 | | * This should be called during database startup or crash recovery. |
1583 | | */ |
1584 | | void |
1585 | | DeleteAllExportedSnapshotFiles(void) |
1586 | | { |
1587 | | char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)]; |
1588 | | DIR *s_dir; |
1589 | | struct dirent *s_de; |
1590 | | |
1591 | | /* |
1592 | | * Problems in reading the directory, or unlinking files, are reported at |
1593 | | * LOG level. Since we're running in the startup process, ERROR level |
1594 | | * would prevent database start, and it's not important enough for that. |
1595 | | */ |
1596 | | s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR); |
1597 | | |
1598 | | while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL) |
1599 | | { |
1600 | | if (strcmp(s_de->d_name, ".") == 0 || |
1601 | | strcmp(s_de->d_name, "..") == 0) |
1602 | | continue; |
1603 | | |
1604 | | snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name); |
1605 | | |
1606 | | if (unlink(buf) != 0) |
1607 | | ereport(LOG, |
1608 | | (errcode_for_file_access(), |
1609 | | errmsg("could not remove file \"%s\": %m", buf))); |
1610 | | } |
1611 | | |
1612 | | FreeDir(s_dir); |
1613 | | } |
1614 | | |
1615 | | /* |
1616 | | * ThereAreNoPriorRegisteredSnapshots |
1617 | | * Is the registered snapshot count less than or equal to one? |
1618 | | * |
1619 | | * Don't use this to settle important decisions. While zero registrations and |
1620 | | * no ActiveSnapshot would confirm a certain idleness, the system makes no |
1621 | | * guarantees about the significance of one registered snapshot. |
1622 | | */ |
1623 | | bool |
1624 | | ThereAreNoPriorRegisteredSnapshots(void) |
1625 | 0 | { |
1626 | 0 | if (pairingheap_is_empty(&RegisteredSnapshots) || |
1627 | 0 | pairingheap_is_singular(&RegisteredSnapshots)) |
1628 | 0 | return true; |
1629 | | |
1630 | 0 | return false; |
1631 | 0 | } |
1632 | | |
1633 | | /* |
1634 | | * HaveRegisteredOrActiveSnapshot |
1635 | | * Is there any registered or active snapshot? |
1636 | | * |
1637 | | * NB: Unless pushed or active, the cached catalog snapshot will not cause |
1638 | | * this function to return true. That allows this function to be used in |
1639 | | * checks enforcing a longer-lived snapshot. |
1640 | | */ |
1641 | | bool |
1642 | | HaveRegisteredOrActiveSnapshot(void) |
1643 | 0 | { |
1644 | 0 | if (ActiveSnapshot != NULL) |
1645 | 0 | return true; |
1646 | | |
1647 | | /* |
1648 | | * The catalog snapshot is in RegisteredSnapshots when valid, but can be |
1649 | | * removed at any time due to invalidation processing. If explicitly |
1650 | | * registered more than one snapshot has to be in RegisteredSnapshots. |
1651 | | */ |
1652 | 0 | if (CatalogSnapshot != NULL && |
1653 | 0 | pairingheap_is_singular(&RegisteredSnapshots)) |
1654 | 0 | return false; |
1655 | | |
1656 | 0 | return !pairingheap_is_empty(&RegisteredSnapshots); |
1657 | 0 | } |
1658 | | |
1659 | | |
1660 | | /* |
1661 | | * Setup a snapshot that replaces normal catalog snapshots that allows catalog |
1662 | | * access to behave just like it did at a certain point in the past. |
1663 | | * |
1664 | | * Needed for logical decoding. |
1665 | | */ |
1666 | | void |
1667 | | SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids) |
1668 | 0 | { |
1669 | 0 | Assert(historic_snapshot != NULL); |
1670 | | |
1671 | | /* setup the timetravel snapshot */ |
1672 | 0 | HistoricSnapshot = historic_snapshot; |
1673 | | |
1674 | | /* setup (cmin, cmax) lookup hash */ |
1675 | 0 | tuplecid_data = tuplecids; |
1676 | 0 | } |
1677 | | |
1678 | | |
1679 | | /* |
1680 | | * Make catalog snapshots behave normally again. |
1681 | | */ |
1682 | | void |
1683 | | TeardownHistoricSnapshot(bool is_error) |
1684 | 0 | { |
1685 | 0 | HistoricSnapshot = NULL; |
1686 | 0 | tuplecid_data = NULL; |
1687 | 0 | } |
1688 | | |
1689 | | bool |
1690 | | HistoricSnapshotActive(void) |
1691 | 0 | { |
1692 | 0 | return HistoricSnapshot != NULL; |
1693 | 0 | } |
1694 | | |
1695 | | HTAB * |
1696 | | HistoricSnapshotGetTupleCids(void) |
1697 | 0 | { |
1698 | 0 | Assert(HistoricSnapshotActive()); |
1699 | 0 | return tuplecid_data; |
1700 | 0 | } |
1701 | | |
1702 | | /* |
1703 | | * EstimateSnapshotSpace |
1704 | | * Returns the size needed to store the given snapshot. |
1705 | | * |
1706 | | * We are exporting only required fields from the Snapshot, stored in |
1707 | | * SerializedSnapshotData. |
1708 | | */ |
1709 | | Size |
1710 | | EstimateSnapshotSpace(Snapshot snapshot) |
1711 | 0 | { |
1712 | 0 | Size size; |
1713 | |
|
1714 | 0 | Assert(snapshot != InvalidSnapshot); |
1715 | 0 | Assert(snapshot->snapshot_type == SNAPSHOT_MVCC); |
1716 | | |
1717 | | /* We allocate any XID arrays needed in the same palloc block. */ |
1718 | 0 | size = add_size(sizeof(SerializedSnapshotData), |
1719 | 0 | mul_size(snapshot->xcnt, sizeof(TransactionId))); |
1720 | 0 | if (snapshot->subxcnt > 0 && |
1721 | 0 | (!snapshot->suboverflowed || snapshot->takenDuringRecovery)) |
1722 | 0 | size = add_size(size, |
1723 | 0 | mul_size(snapshot->subxcnt, sizeof(TransactionId))); |
1724 | |
|
1725 | 0 | return size; |
1726 | 0 | } |
1727 | | |
1728 | | /* |
1729 | | * SerializeSnapshot |
1730 | | * Dumps the serialized snapshot (extracted from given snapshot) onto the |
1731 | | * memory location at start_address. |
1732 | | */ |
1733 | | void |
1734 | | SerializeSnapshot(Snapshot snapshot, char *start_address) |
1735 | 0 | { |
1736 | 0 | SerializedSnapshotData serialized_snapshot; |
1737 | |
|
1738 | 0 | Assert(snapshot->subxcnt >= 0); |
1739 | | |
1740 | | /* Copy all required fields */ |
1741 | 0 | serialized_snapshot.xmin = snapshot->xmin; |
1742 | 0 | serialized_snapshot.xmax = snapshot->xmax; |
1743 | 0 | serialized_snapshot.xcnt = snapshot->xcnt; |
1744 | 0 | serialized_snapshot.subxcnt = snapshot->subxcnt; |
1745 | 0 | serialized_snapshot.suboverflowed = snapshot->suboverflowed; |
1746 | 0 | serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery; |
1747 | 0 | serialized_snapshot.curcid = snapshot->curcid; |
1748 | | |
1749 | | /* |
1750 | | * Ignore the SubXID array if it has overflowed, unless the snapshot was |
1751 | | * taken during recovery - in that case, top-level XIDs are in subxip as |
1752 | | * well, and we mustn't lose them. |
1753 | | */ |
1754 | 0 | if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery) |
1755 | 0 | serialized_snapshot.subxcnt = 0; |
1756 | | |
1757 | | /* Copy struct to possibly-unaligned buffer */ |
1758 | 0 | memcpy(start_address, |
1759 | 0 | &serialized_snapshot, sizeof(SerializedSnapshotData)); |
1760 | | |
1761 | | /* Copy XID array */ |
1762 | 0 | if (snapshot->xcnt > 0) |
1763 | 0 | memcpy((TransactionId *) (start_address + |
1764 | 0 | sizeof(SerializedSnapshotData)), |
1765 | 0 | snapshot->xip, snapshot->xcnt * sizeof(TransactionId)); |
1766 | | |
1767 | | /* |
1768 | | * Copy SubXID array. Don't bother to copy it if it had overflowed, |
1769 | | * though, because it's not used anywhere in that case. Except if it's a |
1770 | | * snapshot taken during recovery; all the top-level XIDs are in subxip as |
1771 | | * well in that case, so we mustn't lose them. |
1772 | | */ |
1773 | 0 | if (serialized_snapshot.subxcnt > 0) |
1774 | 0 | { |
1775 | 0 | Size subxipoff = sizeof(SerializedSnapshotData) + |
1776 | 0 | snapshot->xcnt * sizeof(TransactionId); |
1777 | |
|
1778 | 0 | memcpy((TransactionId *) (start_address + subxipoff), |
1779 | 0 | snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId)); |
1780 | 0 | } |
1781 | 0 | } |
1782 | | |
1783 | | /* |
1784 | | * RestoreSnapshot |
1785 | | * Restore a serialized snapshot from the specified address. |
1786 | | * |
1787 | | * The copy is palloc'd in TopTransactionContext and has initial refcounts set |
1788 | | * to 0. The returned snapshot has the copied flag set. |
1789 | | */ |
1790 | | Snapshot |
1791 | | RestoreSnapshot(char *start_address) |
1792 | 0 | { |
1793 | 0 | SerializedSnapshotData serialized_snapshot; |
1794 | 0 | Size size; |
1795 | 0 | Snapshot snapshot; |
1796 | 0 | TransactionId *serialized_xids; |
1797 | |
|
1798 | 0 | memcpy(&serialized_snapshot, start_address, |
1799 | 0 | sizeof(SerializedSnapshotData)); |
1800 | 0 | serialized_xids = (TransactionId *) |
1801 | 0 | (start_address + sizeof(SerializedSnapshotData)); |
1802 | | |
1803 | | /* We allocate any XID arrays needed in the same palloc block. */ |
1804 | 0 | size = sizeof(SnapshotData) |
1805 | 0 | + serialized_snapshot.xcnt * sizeof(TransactionId) |
1806 | 0 | + serialized_snapshot.subxcnt * sizeof(TransactionId); |
1807 | | |
1808 | | /* Copy all required fields */ |
1809 | 0 | snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size); |
1810 | 0 | snapshot->snapshot_type = SNAPSHOT_MVCC; |
1811 | 0 | snapshot->xmin = serialized_snapshot.xmin; |
1812 | 0 | snapshot->xmax = serialized_snapshot.xmax; |
1813 | 0 | snapshot->xip = NULL; |
1814 | 0 | snapshot->xcnt = serialized_snapshot.xcnt; |
1815 | 0 | snapshot->subxip = NULL; |
1816 | 0 | snapshot->subxcnt = serialized_snapshot.subxcnt; |
1817 | 0 | snapshot->suboverflowed = serialized_snapshot.suboverflowed; |
1818 | 0 | snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery; |
1819 | 0 | snapshot->curcid = serialized_snapshot.curcid; |
1820 | 0 | snapshot->snapXactCompletionCount = 0; |
1821 | | |
1822 | | /* Copy XIDs, if present. */ |
1823 | 0 | if (serialized_snapshot.xcnt > 0) |
1824 | 0 | { |
1825 | 0 | snapshot->xip = (TransactionId *) (snapshot + 1); |
1826 | 0 | memcpy(snapshot->xip, serialized_xids, |
1827 | 0 | serialized_snapshot.xcnt * sizeof(TransactionId)); |
1828 | 0 | } |
1829 | | |
1830 | | /* Copy SubXIDs, if present. */ |
1831 | 0 | if (serialized_snapshot.subxcnt > 0) |
1832 | 0 | { |
1833 | 0 | snapshot->subxip = ((TransactionId *) (snapshot + 1)) + |
1834 | 0 | serialized_snapshot.xcnt; |
1835 | 0 | memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt, |
1836 | 0 | serialized_snapshot.subxcnt * sizeof(TransactionId)); |
1837 | 0 | } |
1838 | | |
1839 | | /* Set the copied flag so that the caller will set refcounts correctly. */ |
1840 | 0 | snapshot->regd_count = 0; |
1841 | 0 | snapshot->active_count = 0; |
1842 | 0 | snapshot->copied = true; |
1843 | |
|
1844 | 0 | return snapshot; |
1845 | 0 | } |
1846 | | |
1847 | | /* |
1848 | | * Install a restored snapshot as the transaction snapshot. |
1849 | | * |
1850 | | * The second argument is of type void * so that snapmgr.h need not include |
1851 | | * the declaration for PGPROC. |
1852 | | */ |
1853 | | void |
1854 | | RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc) |
1855 | 0 | { |
1856 | 0 | SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc); |
1857 | 0 | } |
1858 | | |
1859 | | /* |
1860 | | * XidInMVCCSnapshot |
1861 | | * Is the given XID still-in-progress according to the snapshot? |
1862 | | * |
1863 | | * Note: GetSnapshotData never stores either top xid or subxids of our own |
1864 | | * backend into a snapshot, so these xids will not be reported as "running" |
1865 | | * by this function. This is OK for current uses, because we always check |
1866 | | * TransactionIdIsCurrentTransactionId first, except when it's known the |
1867 | | * XID could not be ours anyway. |
1868 | | */ |
1869 | | bool |
1870 | | XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) |
1871 | 0 | { |
1872 | | /* |
1873 | | * Make a quick range check to eliminate most XIDs without looking at the |
1874 | | * xip arrays. Note that this is OK even if we convert a subxact XID to |
1875 | | * its parent below, because a subxact with XID < xmin has surely also got |
1876 | | * a parent with XID < xmin, while one with XID >= xmax must belong to a |
1877 | | * parent that was not yet committed at the time of this snapshot. |
1878 | | */ |
1879 | | |
1880 | | /* Any xid < xmin is not in-progress */ |
1881 | 0 | if (TransactionIdPrecedes(xid, snapshot->xmin)) |
1882 | 0 | return false; |
1883 | | /* Any xid >= xmax is in-progress */ |
1884 | 0 | if (TransactionIdFollowsOrEquals(xid, snapshot->xmax)) |
1885 | 0 | return true; |
1886 | | |
1887 | | /* |
1888 | | * Snapshot information is stored slightly differently in snapshots taken |
1889 | | * during recovery. |
1890 | | */ |
1891 | 0 | if (!snapshot->takenDuringRecovery) |
1892 | 0 | { |
1893 | | /* |
1894 | | * If the snapshot contains full subxact data, the fastest way to |
1895 | | * check things is just to compare the given XID against both subxact |
1896 | | * XIDs and top-level XIDs. If the snapshot overflowed, we have to |
1897 | | * use pg_subtrans to convert a subxact XID to its parent XID, but |
1898 | | * then we need only look at top-level XIDs not subxacts. |
1899 | | */ |
1900 | 0 | if (!snapshot->suboverflowed) |
1901 | 0 | { |
1902 | | /* we have full data, so search subxip */ |
1903 | 0 | if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) |
1904 | 0 | return true; |
1905 | | |
1906 | | /* not there, fall through to search xip[] */ |
1907 | 0 | } |
1908 | 0 | else |
1909 | 0 | { |
1910 | | /* |
1911 | | * Snapshot overflowed, so convert xid to top-level. This is safe |
1912 | | * because we eliminated too-old XIDs above. |
1913 | | */ |
1914 | 0 | xid = SubTransGetTopmostTransaction(xid); |
1915 | | |
1916 | | /* |
1917 | | * If xid was indeed a subxact, we might now have an xid < xmin, |
1918 | | * so recheck to avoid an array scan. No point in rechecking |
1919 | | * xmax. |
1920 | | */ |
1921 | 0 | if (TransactionIdPrecedes(xid, snapshot->xmin)) |
1922 | 0 | return false; |
1923 | 0 | } |
1924 | | |
1925 | 0 | if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt)) |
1926 | 0 | return true; |
1927 | 0 | } |
1928 | 0 | else |
1929 | 0 | { |
1930 | | /* |
1931 | | * In recovery we store all xids in the subxip array because it is by |
1932 | | * far the bigger array, and we mostly don't know which xids are |
1933 | | * top-level and which are subxacts. The xip array is empty. |
1934 | | * |
1935 | | * We start by searching subtrans, if we overflowed. |
1936 | | */ |
1937 | 0 | if (snapshot->suboverflowed) |
1938 | 0 | { |
1939 | | /* |
1940 | | * Snapshot overflowed, so convert xid to top-level. This is safe |
1941 | | * because we eliminated too-old XIDs above. |
1942 | | */ |
1943 | 0 | xid = SubTransGetTopmostTransaction(xid); |
1944 | | |
1945 | | /* |
1946 | | * If xid was indeed a subxact, we might now have an xid < xmin, |
1947 | | * so recheck to avoid an array scan. No point in rechecking |
1948 | | * xmax. |
1949 | | */ |
1950 | 0 | if (TransactionIdPrecedes(xid, snapshot->xmin)) |
1951 | 0 | return false; |
1952 | 0 | } |
1953 | | |
1954 | | /* |
1955 | | * We now have either a top-level xid higher than xmin or an |
1956 | | * indeterminate xid. We don't know whether it's top level or subxact |
1957 | | * but it doesn't matter. If it's present, the xid is visible. |
1958 | | */ |
1959 | 0 | if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) |
1960 | 0 | return true; |
1961 | 0 | } |
1962 | | |
1963 | 0 | return false; |
1964 | 0 | } |
1965 | | |
1966 | | /* ResourceOwner callbacks */ |
1967 | | |
1968 | | static void |
1969 | | ResOwnerReleaseSnapshot(Datum res) |
1970 | 0 | { |
1971 | 0 | UnregisterSnapshotNoOwner((Snapshot) DatumGetPointer(res)); |
1972 | 0 | } |