Coverage Report

Created: 2025-09-27 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/backend/utils/time/snapmgr.c
Line
Count
Source
1
/*-------------------------------------------------------------------------
2
 *
3
 * snapmgr.c
4
 *    PostgreSQL snapshot manager
5
 *
6
 * The following functions return an MVCC snapshot that can be used in tuple
7
 * visibility checks:
8
 *
9
 * - GetTransactionSnapshot
10
 * - GetLatestSnapshot
11
 * - GetCatalogSnapshot
12
 * - GetNonHistoricCatalogSnapshot
13
 *
14
 * Each of these functions returns a reference to a statically allocated
15
 * snapshot.  The statically allocated snapshot is subject to change on any
16
 * snapshot-related function call, and should not be used directly.  Instead,
17
 * call PushActiveSnapshot() or RegisterSnapshot() to create a longer-lived
18
 * copy and use that.
19
 *
20
 * We keep track of snapshots in two ways: those "registered" by resowner.c,
21
 * and the "active snapshot" stack.  All snapshots in either of them live in
22
 * persistent memory.  When a snapshot is no longer in any of these lists
23
 * (tracked by separate refcounts on each snapshot), its memory can be freed.
24
 *
25
 * In addition to the above-mentioned MVCC snapshots, there are some special
26
 * snapshots like SnapshotSelf, SnapshotAny, and "dirty" snapshots.  They can
27
 * only be used in limited contexts and cannot be registered or pushed to the
28
 * active stack.
29
 *
30
 * ActiveSnapshot stack
31
 * --------------------
32
 *
33
 * Most visibility checks use the current "active snapshot" returned by
34
 * GetActiveSnapshot().  When running normal queries, the active snapshot is
35
 * set when query execution begins based on the transaction isolation level.
36
 *
37
 * The active snapshot is tracked in a stack so that the currently active one
38
 * is at the top of the stack.  It mirrors the process call stack: whenever we
39
 * recurse or switch context to fetch rows from a different portal for
40
 * example, the appropriate snapshot is pushed to become the active snapshot,
41
 * and popped on return.  Once upon a time, ActiveSnapshot was just a global
42
 * variable that was saved and restored similar to CurrentMemoryContext, but
43
 * nowadays it's managed as a separate data structure so that we can keep
44
 * track of which snapshots are in use and reset MyProc->xmin when there is no
45
 * active snapshot.
46
 *
47
 * However, there are a couple of exceptions where the active snapshot stack
48
 * does not strictly mirror the call stack:
49
 *
50
 * - VACUUM and a few other utility commands manage their own transactions,
51
 *   which take their own snapshots.  They are called with an active snapshot
52
 *   set, like most utility commands, but they pop the active snapshot that
53
 *   was pushed by the caller.  PortalRunUtility knows about the possibility
54
 *   that the snapshot it pushed is no longer active on return.
55
 *
56
 * - When COMMIT or ROLLBACK is executed within a procedure or DO-block, the
57
 *   active snapshot stack is destroyed, and re-established later when
58
 *   subsequent statements in the procedure are executed.  There are many
59
 *   limitations on when in-procedure COMMIT/ROLLBACK is allowed; one such
60
 *   limitation is that all the snapshots on the active snapshot stack are
61
 *   known to portals that are being executed, which makes it safe to reset
62
 *   the stack.  See EnsurePortalSnapshotExists().
63
 *
64
 * Registered snapshots
65
 * --------------------
66
 *
67
 * In addition to snapshots pushed to the active snapshot stack, a snapshot
68
 * can be registered with a resource owner.
69
 *
70
 * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
71
 * regd_count and list it in RegisteredSnapshots, but this reference is not
72
 * tracked by a resource owner. We used to use the TopTransactionResourceOwner
73
 * to track this snapshot reference, but that introduces logical circularity
74
 * and thus makes it impossible to clean up in a sane fashion.  It's better to
75
 * handle this reference as an internally-tracked registration, so that this
76
 * module is entirely lower-level than ResourceOwners.
77
 *
78
 * Likewise, any snapshots that have been exported by pg_export_snapshot
79
 * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
80
 * tracked by any resource owner.
81
 *
82
 * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
83
 * is valid, but is not tracked by any resource owner.
84
 *
85
 * The same is true for historic snapshots used during logical decoding,
86
 * their lifetime is managed separately (as they live longer than one xact.c
87
 * transaction).
88
 *
89
 * These arrangements let us reset MyProc->xmin when there are no snapshots
90
 * referenced by this transaction, and advance it when the one with oldest
91
 * Xmin is no longer referenced.  For simplicity however, only registered
92
 * snapshots not active snapshots participate in tracking which one is oldest;
93
 * we don't try to change MyProc->xmin except when the active-snapshot
94
 * stack is empty.
95
 *
96
 *
97
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
98
 * Portions Copyright (c) 1994, Regents of the University of California
99
 *
100
 * IDENTIFICATION
101
 *    src/backend/utils/time/snapmgr.c
102
 *
103
 *-------------------------------------------------------------------------
104
 */
105
#include "postgres.h"
106
107
#include <sys/stat.h>
108
#include <unistd.h>
109
110
#include "access/subtrans.h"
111
#include "access/transam.h"
112
#include "access/xact.h"
113
#include "datatype/timestamp.h"
114
#include "lib/pairingheap.h"
115
#include "miscadmin.h"
116
#include "port/pg_lfind.h"
117
#include "storage/fd.h"
118
#include "storage/predicate.h"
119
#include "storage/proc.h"
120
#include "storage/procarray.h"
121
#include "utils/builtins.h"
122
#include "utils/memutils.h"
123
#include "utils/resowner.h"
124
#include "utils/snapmgr.h"
125
#include "utils/syscache.h"
126
127
128
/*
129
 * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
130
 * mode, and to the latest one taken in a read-committed transaction.
131
 * SecondarySnapshot is a snapshot that's always up-to-date as of the current
132
 * instant, even in transaction-snapshot mode.  It should only be used for
133
 * special-purpose code (say, RI checking.)  CatalogSnapshot points to an
134
 * MVCC snapshot intended to be used for catalog scans; we must invalidate it
135
 * whenever a system catalog change occurs.
136
 *
137
 * These SnapshotData structs are static to simplify memory allocation
138
 * (see the hack in GetSnapshotData to avoid repeated malloc/free).
139
 */
140
static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC};
141
static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC};
142
static SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC};
143
SnapshotData SnapshotSelfData = {SNAPSHOT_SELF};
144
SnapshotData SnapshotAnyData = {SNAPSHOT_ANY};
145
SnapshotData SnapshotToastData = {SNAPSHOT_TOAST};
146
147
/* Pointers to valid snapshots */
148
static Snapshot CurrentSnapshot = NULL;
149
static Snapshot SecondarySnapshot = NULL;
150
static Snapshot CatalogSnapshot = NULL;
151
static Snapshot HistoricSnapshot = NULL;
152
153
/*
154
 * These are updated by GetSnapshotData.  We initialize them this way
155
 * for the convenience of TransactionIdIsInProgress: even in bootstrap
156
 * mode, we don't want it to say that BootstrapTransactionId is in progress.
157
 */
158
TransactionId TransactionXmin = FirstNormalTransactionId;
159
TransactionId RecentXmin = FirstNormalTransactionId;
160
161
/* (table, ctid) => (cmin, cmax) mapping during timetravel */
162
static HTAB *tuplecid_data = NULL;
163
164
/*
165
 * Elements of the active snapshot stack.
166
 *
167
 * Each element here accounts for exactly one active_count on SnapshotData.
168
 *
169
 * NB: the code assumes that elements in this list are in non-increasing
170
 * order of as_level; also, the list must be NULL-terminated.
171
 */
172
typedef struct ActiveSnapshotElt
173
{
174
  Snapshot  as_snap;
175
  int     as_level;
176
  struct ActiveSnapshotElt *as_next;
177
} ActiveSnapshotElt;
178
179
/* Top of the stack of active snapshots */
180
static ActiveSnapshotElt *ActiveSnapshot = NULL;
181
182
/*
183
 * Currently registered Snapshots.  Ordered in a heap by xmin, so that we can
184
 * quickly find the one with lowest xmin, to advance our MyProc->xmin.
185
 */
186
static int  xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
187
           void *arg);
188
189
static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
190
191
/* first GetTransactionSnapshot call in a transaction? */
192
bool    FirstSnapshotSet = false;
193
194
/*
195
 * Remember the serializable transaction snapshot, if any.  We cannot trust
196
 * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
197
 * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
198
 */
199
static Snapshot FirstXactSnapshot = NULL;
200
201
/* Define pathname of exported-snapshot files */
202
0
#define SNAPSHOT_EXPORT_DIR "pg_snapshots"
203
204
/* Structure holding info about exported snapshot. */
205
typedef struct ExportedSnapshot
206
{
207
  char     *snapfile;
208
  Snapshot  snapshot;
209
} ExportedSnapshot;
210
211
/* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
212
static List *exportedSnapshots = NIL;
213
214
/* Prototypes for local functions */
215
static Snapshot CopySnapshot(Snapshot snapshot);
216
static void UnregisterSnapshotNoOwner(Snapshot snapshot);
217
static void FreeSnapshot(Snapshot snapshot);
218
static void SnapshotResetXmin(void);
219
220
/* ResourceOwner callbacks to track snapshot references */
221
static void ResOwnerReleaseSnapshot(Datum res);
222
223
static const ResourceOwnerDesc snapshot_resowner_desc =
224
{
225
  .name = "snapshot reference",
226
  .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
227
  .release_priority = RELEASE_PRIO_SNAPSHOT_REFS,
228
  .ReleaseResource = ResOwnerReleaseSnapshot,
229
  .DebugPrint = NULL      /* the default message is fine */
230
};
231
232
/* Convenience wrappers over ResourceOwnerRemember/Forget */
233
static inline void
234
ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snap)
235
0
{
236
0
  ResourceOwnerRemember(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
237
0
}
238
static inline void
239
ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snap)
240
0
{
241
0
  ResourceOwnerForget(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
242
0
}
243
244
/*
245
 * Snapshot fields to be serialized.
246
 *
247
 * Only these fields need to be sent to the cooperating backend; the
248
 * remaining ones can (and must) be set by the receiver upon restore.
249
 */
250
typedef struct SerializedSnapshotData
251
{
252
  TransactionId xmin;
253
  TransactionId xmax;
254
  uint32    xcnt;
255
  int32   subxcnt;
256
  bool    suboverflowed;
257
  bool    takenDuringRecovery;
258
  CommandId curcid;
259
} SerializedSnapshotData;
260
261
/*
262
 * GetTransactionSnapshot
263
 *    Get the appropriate snapshot for a new query in a transaction.
264
 *
265
 * Note that the return value points at static storage that will be modified
266
 * by future calls and by CommandCounterIncrement().  Callers must call
267
 * RegisterSnapshot or PushActiveSnapshot on the returned snap before doing
268
 * any other non-trivial work that could invalidate it.
269
 */
270
Snapshot
271
GetTransactionSnapshot(void)
272
0
{
273
  /*
274
   * Return historic snapshot if doing logical decoding.
275
   *
276
   * Historic snapshots are only usable for catalog access, not for
277
   * general-purpose queries.  The caller is responsible for ensuring that
278
   * the snapshot is used correctly! (PostgreSQL code never calls this
279
   * during logical decoding, but extensions can do it.)
280
   */
281
0
  if (HistoricSnapshotActive())
282
0
  {
283
    /*
284
     * We'll never need a non-historic transaction snapshot in this
285
     * (sub-)transaction, so there's no need to be careful to set one up
286
     * for later calls to GetTransactionSnapshot().
287
     */
288
0
    Assert(!FirstSnapshotSet);
289
0
    return HistoricSnapshot;
290
0
  }
291
292
  /* First call in transaction? */
293
0
  if (!FirstSnapshotSet)
294
0
  {
295
    /*
296
     * Don't allow catalog snapshot to be older than xact snapshot.  Must
297
     * do this first to allow the empty-heap Assert to succeed.
298
     */
299
0
    InvalidateCatalogSnapshot();
300
301
0
    Assert(pairingheap_is_empty(&RegisteredSnapshots));
302
0
    Assert(FirstXactSnapshot == NULL);
303
304
0
    if (IsInParallelMode())
305
0
      elog(ERROR,
306
0
         "cannot take query snapshot during a parallel operation");
307
308
    /*
309
     * In transaction-snapshot mode, the first snapshot must live until
310
     * end of xact regardless of what the caller does with it, so we must
311
     * make a copy of it rather than returning CurrentSnapshotData
312
     * directly.  Furthermore, if we're running in serializable mode,
313
     * predicate.c needs to wrap the snapshot fetch in its own processing.
314
     */
315
0
    if (IsolationUsesXactSnapshot())
316
0
    {
317
      /* First, create the snapshot in CurrentSnapshotData */
318
0
      if (IsolationIsSerializable())
319
0
        CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
320
0
      else
321
0
        CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
322
      /* Make a saved copy */
323
0
      CurrentSnapshot = CopySnapshot(CurrentSnapshot);
324
0
      FirstXactSnapshot = CurrentSnapshot;
325
      /* Mark it as "registered" in FirstXactSnapshot */
326
0
      FirstXactSnapshot->regd_count++;
327
0
      pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
328
0
    }
329
0
    else
330
0
      CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
331
332
0
    FirstSnapshotSet = true;
333
0
    return CurrentSnapshot;
334
0
  }
335
336
0
  if (IsolationUsesXactSnapshot())
337
0
    return CurrentSnapshot;
338
339
  /* Don't allow catalog snapshot to be older than xact snapshot. */
340
0
  InvalidateCatalogSnapshot();
341
342
0
  CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
343
344
0
  return CurrentSnapshot;
345
0
}
346
347
/*
348
 * GetLatestSnapshot
349
 *    Get a snapshot that is up-to-date as of the current instant,
350
 *    even if we are executing in transaction-snapshot mode.
351
 */
352
Snapshot
353
GetLatestSnapshot(void)
354
0
{
355
  /*
356
   * We might be able to relax this, but nothing that could otherwise work
357
   * needs it.
358
   */
359
0
  if (IsInParallelMode())
360
0
    elog(ERROR,
361
0
       "cannot update SecondarySnapshot during a parallel operation");
362
363
  /*
364
   * So far there are no cases requiring support for GetLatestSnapshot()
365
   * during logical decoding, but it wouldn't be hard to add if required.
366
   */
367
0
  Assert(!HistoricSnapshotActive());
368
369
  /* If first call in transaction, go ahead and set the xact snapshot */
370
0
  if (!FirstSnapshotSet)
371
0
    return GetTransactionSnapshot();
372
373
0
  SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
374
375
0
  return SecondarySnapshot;
376
0
}
377
378
/*
379
 * GetCatalogSnapshot
380
 *    Get a snapshot that is sufficiently up-to-date for scan of the
381
 *    system catalog with the specified OID.
382
 */
383
Snapshot
384
GetCatalogSnapshot(Oid relid)
385
0
{
386
  /*
387
   * Return historic snapshot while we're doing logical decoding, so we can
388
   * see the appropriate state of the catalog.
389
   *
390
   * This is the primary reason for needing to reset the system caches after
391
   * finishing decoding.
392
   */
393
0
  if (HistoricSnapshotActive())
394
0
    return HistoricSnapshot;
395
396
0
  return GetNonHistoricCatalogSnapshot(relid);
397
0
}
398
399
/*
400
 * GetNonHistoricCatalogSnapshot
401
 *    Get a snapshot that is sufficiently up-to-date for scan of the system
402
 *    catalog with the specified OID, even while historic snapshots are set
403
 *    up.
404
 */
405
Snapshot
406
GetNonHistoricCatalogSnapshot(Oid relid)
407
0
{
408
  /*
409
   * If the caller is trying to scan a relation that has no syscache, no
410
   * catcache invalidations will be sent when it is updated.  For a few key
411
   * relations, snapshot invalidations are sent instead.  If we're trying to
412
   * scan a relation for which neither catcache nor snapshot invalidations
413
   * are sent, we must refresh the snapshot every time.
414
   */
415
0
  if (CatalogSnapshot &&
416
0
    !RelationInvalidatesSnapshotsOnly(relid) &&
417
0
    !RelationHasSysCache(relid))
418
0
    InvalidateCatalogSnapshot();
419
420
0
  if (CatalogSnapshot == NULL)
421
0
  {
422
    /* Get new snapshot. */
423
0
    CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
424
425
    /*
426
     * Make sure the catalog snapshot will be accounted for in decisions
427
     * about advancing PGPROC->xmin.  We could apply RegisterSnapshot, but
428
     * that would result in making a physical copy, which is overkill; and
429
     * it would also create a dependency on some resource owner, which we
430
     * do not want for reasons explained at the head of this file. Instead
431
     * just shove the CatalogSnapshot into the pairing heap manually. This
432
     * has to be reversed in InvalidateCatalogSnapshot, of course.
433
     *
434
     * NB: it had better be impossible for this to throw error, since the
435
     * CatalogSnapshot pointer is already valid.
436
     */
437
0
    pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
438
0
  }
439
440
0
  return CatalogSnapshot;
441
0
}
442
443
/*
444
 * InvalidateCatalogSnapshot
445
 *    Mark the current catalog snapshot, if any, as invalid
446
 *
447
 * We could change this API to allow the caller to provide more fine-grained
448
 * invalidation details, so that a change to relation A wouldn't prevent us
449
 * from using our cached snapshot to scan relation B, but so far there's no
450
 * evidence that the CPU cycles we spent tracking such fine details would be
451
 * well-spent.
452
 */
453
void
454
InvalidateCatalogSnapshot(void)
455
0
{
456
0
  if (CatalogSnapshot)
457
0
  {
458
0
    pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
459
0
    CatalogSnapshot = NULL;
460
0
    SnapshotResetXmin();
461
0
  }
462
0
}
463
464
/*
465
 * InvalidateCatalogSnapshotConditionally
466
 *    Drop catalog snapshot if it's the only one we have
467
 *
468
 * This is called when we are about to wait for client input, so we don't
469
 * want to continue holding the catalog snapshot if it might mean that the
470
 * global xmin horizon can't advance.  However, if there are other snapshots
471
 * still active or registered, the catalog snapshot isn't likely to be the
472
 * oldest one, so we might as well keep it.
473
 */
474
void
475
InvalidateCatalogSnapshotConditionally(void)
476
0
{
477
0
  if (CatalogSnapshot &&
478
0
    ActiveSnapshot == NULL &&
479
0
    pairingheap_is_singular(&RegisteredSnapshots))
480
0
    InvalidateCatalogSnapshot();
481
0
}
482
483
/*
484
 * SnapshotSetCommandId
485
 *    Propagate CommandCounterIncrement into the static snapshots, if set
486
 */
487
void
488
SnapshotSetCommandId(CommandId curcid)
489
0
{
490
0
  if (!FirstSnapshotSet)
491
0
    return;
492
493
0
  if (CurrentSnapshot)
494
0
    CurrentSnapshot->curcid = curcid;
495
0
  if (SecondarySnapshot)
496
0
    SecondarySnapshot->curcid = curcid;
497
  /* Should we do the same with CatalogSnapshot? */
498
0
}
499
500
/*
501
 * SetTransactionSnapshot
502
 *    Set the transaction's snapshot from an imported MVCC snapshot.
503
 *
504
 * Note that this is very closely tied to GetTransactionSnapshot --- it
505
 * must take care of all the same considerations as the first-snapshot case
506
 * in GetTransactionSnapshot.
507
 */
508
static void
509
SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
510
             int sourcepid, PGPROC *sourceproc)
511
0
{
512
  /* Caller should have checked this already */
513
0
  Assert(!FirstSnapshotSet);
514
515
  /* Better do this to ensure following Assert succeeds. */
516
0
  InvalidateCatalogSnapshot();
517
518
0
  Assert(pairingheap_is_empty(&RegisteredSnapshots));
519
0
  Assert(FirstXactSnapshot == NULL);
520
0
  Assert(!HistoricSnapshotActive());
521
522
  /*
523
   * Even though we are not going to use the snapshot it computes, we must
524
   * call GetSnapshotData, for two reasons: (1) to be sure that
525
   * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
526
   * the state for GlobalVis*.
527
   */
528
0
  CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
529
530
  /*
531
   * Now copy appropriate fields from the source snapshot.
532
   */
533
0
  CurrentSnapshot->xmin = sourcesnap->xmin;
534
0
  CurrentSnapshot->xmax = sourcesnap->xmax;
535
0
  CurrentSnapshot->xcnt = sourcesnap->xcnt;
536
0
  Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
537
0
  if (sourcesnap->xcnt > 0)
538
0
    memcpy(CurrentSnapshot->xip, sourcesnap->xip,
539
0
         sourcesnap->xcnt * sizeof(TransactionId));
540
0
  CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
541
0
  Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
542
0
  if (sourcesnap->subxcnt > 0)
543
0
    memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
544
0
         sourcesnap->subxcnt * sizeof(TransactionId));
545
0
  CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
546
0
  CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
547
  /* NB: curcid should NOT be copied, it's a local matter */
548
549
0
  CurrentSnapshot->snapXactCompletionCount = 0;
550
551
  /*
552
   * Now we have to fix what GetSnapshotData did with MyProc->xmin and
553
   * TransactionXmin.  There is a race condition: to make sure we are not
554
   * causing the global xmin to go backwards, we have to test that the
555
   * source transaction is still running, and that has to be done
556
   * atomically. So let procarray.c do it.
557
   *
558
   * Note: in serializable mode, predicate.c will do this a second time. It
559
   * doesn't seem worth contorting the logic here to avoid two calls,
560
   * especially since it's not clear that predicate.c *must* do this.
561
   */
562
0
  if (sourceproc != NULL)
563
0
  {
564
0
    if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc))
565
0
      ereport(ERROR,
566
0
          (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
567
0
           errmsg("could not import the requested snapshot"),
568
0
           errdetail("The source transaction is not running anymore.")));
569
0
  }
570
0
  else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
571
0
    ereport(ERROR,
572
0
        (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
573
0
         errmsg("could not import the requested snapshot"),
574
0
         errdetail("The source process with PID %d is not running anymore.",
575
0
               sourcepid)));
576
577
  /*
578
   * In transaction-snapshot mode, the first snapshot must live until end of
579
   * xact, so we must make a copy of it.  Furthermore, if we're running in
580
   * serializable mode, predicate.c needs to do its own processing.
581
   */
582
0
  if (IsolationUsesXactSnapshot())
583
0
  {
584
0
    if (IsolationIsSerializable())
585
0
      SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid,
586
0
                         sourcepid);
587
    /* Make a saved copy */
588
0
    CurrentSnapshot = CopySnapshot(CurrentSnapshot);
589
0
    FirstXactSnapshot = CurrentSnapshot;
590
    /* Mark it as "registered" in FirstXactSnapshot */
591
0
    FirstXactSnapshot->regd_count++;
592
0
    pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
593
0
  }
594
595
0
  FirstSnapshotSet = true;
596
0
}
597
598
/*
599
 * CopySnapshot
600
 *    Copy the given snapshot.
601
 *
602
 * The copy is palloc'd in TopTransactionContext and has initial refcounts set
603
 * to 0.  The returned snapshot has the copied flag set.
604
 */
605
static Snapshot
606
CopySnapshot(Snapshot snapshot)
607
0
{
608
0
  Snapshot  newsnap;
609
0
  Size    subxipoff;
610
0
  Size    size;
611
612
0
  Assert(snapshot != InvalidSnapshot);
613
614
  /* We allocate any XID arrays needed in the same palloc block. */
615
0
  size = subxipoff = sizeof(SnapshotData) +
616
0
    snapshot->xcnt * sizeof(TransactionId);
617
0
  if (snapshot->subxcnt > 0)
618
0
    size += snapshot->subxcnt * sizeof(TransactionId);
619
620
0
  newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
621
0
  memcpy(newsnap, snapshot, sizeof(SnapshotData));
622
623
0
  newsnap->regd_count = 0;
624
0
  newsnap->active_count = 0;
625
0
  newsnap->copied = true;
626
0
  newsnap->snapXactCompletionCount = 0;
627
628
  /* setup XID array */
629
0
  if (snapshot->xcnt > 0)
630
0
  {
631
0
    newsnap->xip = (TransactionId *) (newsnap + 1);
632
0
    memcpy(newsnap->xip, snapshot->xip,
633
0
         snapshot->xcnt * sizeof(TransactionId));
634
0
  }
635
0
  else
636
0
    newsnap->xip = NULL;
637
638
  /*
639
   * Setup subXID array. Don't bother to copy it if it had overflowed,
640
   * though, because it's not used anywhere in that case. Except if it's a
641
   * snapshot taken during recovery; all the top-level XIDs are in subxip as
642
   * well in that case, so we mustn't lose them.
643
   */
644
0
  if (snapshot->subxcnt > 0 &&
645
0
    (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
646
0
  {
647
0
    newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
648
0
    memcpy(newsnap->subxip, snapshot->subxip,
649
0
         snapshot->subxcnt * sizeof(TransactionId));
650
0
  }
651
0
  else
652
0
    newsnap->subxip = NULL;
653
654
0
  return newsnap;
655
0
}
656
657
/*
658
 * FreeSnapshot
659
 *    Free the memory associated with a snapshot.
660
 */
661
static void
662
FreeSnapshot(Snapshot snapshot)
663
0
{
664
0
  Assert(snapshot->regd_count == 0);
665
0
  Assert(snapshot->active_count == 0);
666
0
  Assert(snapshot->copied);
667
668
0
  pfree(snapshot);
669
0
}
670
671
/*
672
 * PushActiveSnapshot
673
 *    Set the given snapshot as the current active snapshot
674
 *
675
 * If the passed snapshot is a statically-allocated one, or it is possibly
676
 * subject to a future command counter update, create a new long-lived copy
677
 * with active refcount=1.  Otherwise, only increment the refcount.
678
 */
679
void
680
PushActiveSnapshot(Snapshot snapshot)
681
0
{
682
0
  PushActiveSnapshotWithLevel(snapshot, GetCurrentTransactionNestLevel());
683
0
}
684
685
/*
686
 * PushActiveSnapshotWithLevel
687
 *    Set the given snapshot as the current active snapshot
688
 *
689
 * Same as PushActiveSnapshot except that caller can specify the
690
 * transaction nesting level that "owns" the snapshot.  This level
691
 * must not be deeper than the current top of the snapshot stack.
692
 */
693
void
694
PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
695
0
{
696
0
  ActiveSnapshotElt *newactive;
697
698
0
  Assert(snapshot != InvalidSnapshot);
699
0
  Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
700
701
0
  newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
702
703
  /*
704
   * Checking SecondarySnapshot is probably useless here, but it seems
705
   * better to be sure.
706
   */
707
0
  if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
708
0
    !snapshot->copied)
709
0
    newactive->as_snap = CopySnapshot(snapshot);
710
0
  else
711
0
    newactive->as_snap = snapshot;
712
713
0
  newactive->as_next = ActiveSnapshot;
714
0
  newactive->as_level = snap_level;
715
716
0
  newactive->as_snap->active_count++;
717
718
0
  ActiveSnapshot = newactive;
719
0
}
720
721
/*
722
 * PushCopiedSnapshot
723
 *    As above, except forcibly copy the presented snapshot.
724
 *
725
 * This should be used when the ActiveSnapshot has to be modifiable, for
726
 * example if the caller intends to call UpdateActiveSnapshotCommandId.
727
 * The new snapshot will be released when popped from the stack.
728
 */
729
void
730
PushCopiedSnapshot(Snapshot snapshot)
731
0
{
732
0
  PushActiveSnapshot(CopySnapshot(snapshot));
733
0
}
734
735
/*
736
 * UpdateActiveSnapshotCommandId
737
 *
738
 * Update the current CID of the active snapshot.  This can only be applied
739
 * to a snapshot that is not referenced elsewhere.
740
 */
741
void
742
UpdateActiveSnapshotCommandId(void)
743
0
{
744
0
  CommandId save_curcid,
745
0
        curcid;
746
747
0
  Assert(ActiveSnapshot != NULL);
748
0
  Assert(ActiveSnapshot->as_snap->active_count == 1);
749
0
  Assert(ActiveSnapshot->as_snap->regd_count == 0);
750
751
  /*
752
   * Don't allow modification of the active snapshot during parallel
753
   * operation.  We share the snapshot to worker backends at the beginning
754
   * of parallel operation, so any change to the snapshot can lead to
755
   * inconsistencies.  We have other defenses against
756
   * CommandCounterIncrement, but there are a few places that call this
757
   * directly, so we put an additional guard here.
758
   */
759
0
  save_curcid = ActiveSnapshot->as_snap->curcid;
760
0
  curcid = GetCurrentCommandId(false);
761
0
  if (IsInParallelMode() && save_curcid != curcid)
762
0
    elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
763
0
  ActiveSnapshot->as_snap->curcid = curcid;
764
0
}
765
766
/*
767
 * PopActiveSnapshot
768
 *
769
 * Remove the topmost snapshot from the active snapshot stack, decrementing the
770
 * reference count, and free it if this was the last reference.
771
 */
772
void
773
PopActiveSnapshot(void)
774
0
{
775
0
  ActiveSnapshotElt *newstack;
776
777
0
  newstack = ActiveSnapshot->as_next;
778
779
0
  Assert(ActiveSnapshot->as_snap->active_count > 0);
780
781
0
  ActiveSnapshot->as_snap->active_count--;
782
783
0
  if (ActiveSnapshot->as_snap->active_count == 0 &&
784
0
    ActiveSnapshot->as_snap->regd_count == 0)
785
0
    FreeSnapshot(ActiveSnapshot->as_snap);
786
787
0
  pfree(ActiveSnapshot);
788
0
  ActiveSnapshot = newstack;
789
790
0
  SnapshotResetXmin();
791
0
}
792
793
/*
794
 * GetActiveSnapshot
795
 *    Return the topmost snapshot in the Active stack.
796
 */
797
Snapshot
798
GetActiveSnapshot(void)
799
0
{
800
0
  Assert(ActiveSnapshot != NULL);
801
802
0
  return ActiveSnapshot->as_snap;
803
0
}
804
805
/*
806
 * ActiveSnapshotSet
807
 *    Return whether there is at least one snapshot in the Active stack
808
 */
809
bool
810
ActiveSnapshotSet(void)
811
0
{
812
0
  return ActiveSnapshot != NULL;
813
0
}
814
815
/*
816
 * RegisterSnapshot
817
 *    Register a snapshot as being in use by the current resource owner
818
 *
819
 * If InvalidSnapshot is passed, it is not registered.
820
 */
821
Snapshot
822
RegisterSnapshot(Snapshot snapshot)
823
0
{
824
0
  if (snapshot == InvalidSnapshot)
825
0
    return InvalidSnapshot;
826
827
0
  return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
828
0
}
829
830
/*
831
 * RegisterSnapshotOnOwner
832
 *    As above, but use the specified resource owner
833
 */
834
Snapshot
835
RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
836
0
{
837
0
  Snapshot  snap;
838
839
0
  if (snapshot == InvalidSnapshot)
840
0
    return InvalidSnapshot;
841
842
  /* Static snapshot?  Create a persistent copy */
843
0
  snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
844
845
  /* and tell resowner.c about it */
846
0
  ResourceOwnerEnlarge(owner);
847
0
  snap->regd_count++;
848
0
  ResourceOwnerRememberSnapshot(owner, snap);
849
850
0
  if (snap->regd_count == 1)
851
0
    pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
852
853
0
  return snap;
854
0
}
855
856
/*
857
 * UnregisterSnapshot
858
 *
859
 * Decrement the reference count of a snapshot, remove the corresponding
860
 * reference from CurrentResourceOwner, and free the snapshot if no more
861
 * references remain.
862
 */
863
void
864
UnregisterSnapshot(Snapshot snapshot)
865
0
{
866
0
  if (snapshot == NULL)
867
0
    return;
868
869
0
  UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
870
0
}
871
872
/*
873
 * UnregisterSnapshotFromOwner
874
 *    As above, but use the specified resource owner
875
 */
876
void
877
UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
878
0
{
879
0
  if (snapshot == NULL)
880
0
    return;
881
882
0
  ResourceOwnerForgetSnapshot(owner, snapshot);
883
0
  UnregisterSnapshotNoOwner(snapshot);
884
0
}
885
886
static void
887
UnregisterSnapshotNoOwner(Snapshot snapshot)
888
0
{
889
0
  Assert(snapshot->regd_count > 0);
890
0
  Assert(!pairingheap_is_empty(&RegisteredSnapshots));
891
892
0
  snapshot->regd_count--;
893
0
  if (snapshot->regd_count == 0)
894
0
    pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
895
896
0
  if (snapshot->regd_count == 0 && snapshot->active_count == 0)
897
0
  {
898
0
    FreeSnapshot(snapshot);
899
0
    SnapshotResetXmin();
900
0
  }
901
0
}
902
903
/*
904
 * Comparison function for RegisteredSnapshots heap.  Snapshots are ordered
905
 * by xmin, so that the snapshot with smallest xmin is at the top.
906
 */
907
static int
908
xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
909
0
{
910
0
  const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
911
0
  const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
912
913
0
  if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
914
0
    return 1;
915
0
  else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
916
0
    return -1;
917
0
  else
918
0
    return 0;
919
0
}
920
921
/*
922
 * SnapshotResetXmin
923
 *
924
 * If there are no more snapshots, we can reset our PGPROC->xmin to
925
 * InvalidTransactionId. Note we can do this without locking because we assume
926
 * that storing an Xid is atomic.
927
 *
928
 * Even if there are some remaining snapshots, we may be able to advance our
929
 * PGPROC->xmin to some degree.  This typically happens when a portal is
930
 * dropped.  For efficiency, we only consider recomputing PGPROC->xmin when
931
 * the active snapshot stack is empty; this allows us not to need to track
932
 * which active snapshot is oldest.
933
 */
934
static void
935
SnapshotResetXmin(void)
936
0
{
937
0
  Snapshot  minSnapshot;
938
939
0
  if (ActiveSnapshot != NULL)
940
0
    return;
941
942
0
  if (pairingheap_is_empty(&RegisteredSnapshots))
943
0
  {
944
0
    MyProc->xmin = TransactionXmin = InvalidTransactionId;
945
0
    return;
946
0
  }
947
948
0
  minSnapshot = pairingheap_container(SnapshotData, ph_node,
949
0
                    pairingheap_first(&RegisteredSnapshots));
950
951
0
  if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
952
0
    MyProc->xmin = TransactionXmin = minSnapshot->xmin;
953
0
}
954
955
/*
956
 * AtSubCommit_Snapshot
957
 */
958
void
959
AtSubCommit_Snapshot(int level)
960
0
{
961
0
  ActiveSnapshotElt *active;
962
963
  /*
964
   * Relabel the active snapshots set in this subtransaction as though they
965
   * are owned by the parent subxact.
966
   */
967
0
  for (active = ActiveSnapshot; active != NULL; active = active->as_next)
968
0
  {
969
0
    if (active->as_level < level)
970
0
      break;
971
0
    active->as_level = level - 1;
972
0
  }
973
0
}
974
975
/*
976
 * AtSubAbort_Snapshot
977
 *    Clean up snapshots after a subtransaction abort
978
 */
979
void
980
AtSubAbort_Snapshot(int level)
981
0
{
982
  /* Forget the active snapshots set by this subtransaction */
983
0
  while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
984
0
  {
985
0
    ActiveSnapshotElt *next;
986
987
0
    next = ActiveSnapshot->as_next;
988
989
    /*
990
     * Decrement the snapshot's active count.  If it's still registered or
991
     * marked as active by an outer subtransaction, we can't free it yet.
992
     */
993
0
    Assert(ActiveSnapshot->as_snap->active_count >= 1);
994
0
    ActiveSnapshot->as_snap->active_count -= 1;
995
996
0
    if (ActiveSnapshot->as_snap->active_count == 0 &&
997
0
      ActiveSnapshot->as_snap->regd_count == 0)
998
0
      FreeSnapshot(ActiveSnapshot->as_snap);
999
1000
    /* and free the stack element */
1001
0
    pfree(ActiveSnapshot);
1002
1003
0
    ActiveSnapshot = next;
1004
0
  }
1005
1006
0
  SnapshotResetXmin();
1007
0
}
1008
1009
/*
1010
 * AtEOXact_Snapshot
1011
 *    Snapshot manager's cleanup function for end of transaction
1012
 */
1013
void
1014
AtEOXact_Snapshot(bool isCommit, bool resetXmin)
1015
0
{
1016
  /*
1017
   * In transaction-snapshot mode we must release our privately-managed
1018
   * reference to the transaction snapshot.  We must remove it from
1019
   * RegisteredSnapshots to keep the check below happy.  But we don't bother
1020
   * to do FreeSnapshot, for two reasons: the memory will go away with
1021
   * TopTransactionContext anyway, and if someone has left the snapshot
1022
   * stacked as active, we don't want the code below to be chasing through a
1023
   * dangling pointer.
1024
   */
1025
0
  if (FirstXactSnapshot != NULL)
1026
0
  {
1027
0
    Assert(FirstXactSnapshot->regd_count > 0);
1028
0
    Assert(!pairingheap_is_empty(&RegisteredSnapshots));
1029
0
    pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
1030
0
  }
1031
0
  FirstXactSnapshot = NULL;
1032
1033
  /*
1034
   * If we exported any snapshots, clean them up.
1035
   */
1036
0
  if (exportedSnapshots != NIL)
1037
0
  {
1038
0
    ListCell   *lc;
1039
1040
    /*
1041
     * Get rid of the files.  Unlink failure is only a WARNING because (1)
1042
     * it's too late to abort the transaction, and (2) leaving a leaked
1043
     * file around has little real consequence anyway.
1044
     *
1045
     * We also need to remove the snapshots from RegisteredSnapshots to
1046
     * prevent a warning below.
1047
     *
1048
     * As with the FirstXactSnapshot, we don't need to free resources of
1049
     * the snapshot itself as it will go away with the memory context.
1050
     */
1051
0
    foreach(lc, exportedSnapshots)
1052
0
    {
1053
0
      ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
1054
1055
0
      if (unlink(esnap->snapfile))
1056
0
        elog(WARNING, "could not unlink file \"%s\": %m",
1057
0
           esnap->snapfile);
1058
1059
0
      pairingheap_remove(&RegisteredSnapshots,
1060
0
                 &esnap->snapshot->ph_node);
1061
0
    }
1062
1063
0
    exportedSnapshots = NIL;
1064
0
  }
1065
1066
  /* Drop catalog snapshot if any */
1067
0
  InvalidateCatalogSnapshot();
1068
1069
  /* On commit, complain about leftover snapshots */
1070
0
  if (isCommit)
1071
0
  {
1072
0
    ActiveSnapshotElt *active;
1073
1074
0
    if (!pairingheap_is_empty(&RegisteredSnapshots))
1075
0
      elog(WARNING, "registered snapshots seem to remain after cleanup");
1076
1077
    /* complain about unpopped active snapshots */
1078
0
    for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1079
0
      elog(WARNING, "snapshot %p still active", active);
1080
0
  }
1081
1082
  /*
1083
   * And reset our state.  We don't need to free the memory explicitly --
1084
   * it'll go away with TopTransactionContext.
1085
   */
1086
0
  ActiveSnapshot = NULL;
1087
0
  pairingheap_reset(&RegisteredSnapshots);
1088
1089
0
  CurrentSnapshot = NULL;
1090
0
  SecondarySnapshot = NULL;
1091
1092
0
  FirstSnapshotSet = false;
1093
1094
  /*
1095
   * During normal commit processing, we call ProcArrayEndTransaction() to
1096
   * reset the MyProc->xmin. That call happens prior to the call to
1097
   * AtEOXact_Snapshot(), so we need not touch xmin here at all.
1098
   */
1099
0
  if (resetXmin)
1100
0
    SnapshotResetXmin();
1101
1102
0
  Assert(resetXmin || MyProc->xmin == 0);
1103
0
}
1104
1105
1106
/*
1107
 * ExportSnapshot
1108
 *    Export the snapshot to a file so that other backends can import it.
1109
 *    Returns the token (the file name) that can be used to import this
1110
 *    snapshot.
1111
 */
1112
char *
1113
ExportSnapshot(Snapshot snapshot)
1114
0
{
1115
0
  TransactionId topXid;
1116
0
  TransactionId *children;
1117
0
  ExportedSnapshot *esnap;
1118
0
  int     nchildren;
1119
0
  int     addTopXid;
1120
0
  StringInfoData buf;
1121
0
  FILE     *f;
1122
0
  int     i;
1123
0
  MemoryContext oldcxt;
1124
0
  char    path[MAXPGPATH];
1125
0
  char    pathtmp[MAXPGPATH];
1126
1127
  /*
1128
   * It's tempting to call RequireTransactionBlock here, since it's not very
1129
   * useful to export a snapshot that will disappear immediately afterwards.
1130
   * However, we haven't got enough information to do that, since we don't
1131
   * know if we're at top level or not.  For example, we could be inside a
1132
   * plpgsql function that is going to fire off other transactions via
1133
   * dblink.  Rather than disallow perfectly legitimate usages, don't make a
1134
   * check.
1135
   *
1136
   * Also note that we don't make any restriction on the transaction's
1137
   * isolation level; however, importers must check the level if they are
1138
   * serializable.
1139
   */
1140
1141
  /*
1142
   * Get our transaction ID if there is one, to include in the snapshot.
1143
   */
1144
0
  topXid = GetTopTransactionIdIfAny();
1145
1146
  /*
1147
   * We cannot export a snapshot from a subtransaction because there's no
1148
   * easy way for importers to verify that the same subtransaction is still
1149
   * running.
1150
   */
1151
0
  if (IsSubTransaction())
1152
0
    ereport(ERROR,
1153
0
        (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1154
0
         errmsg("cannot export a snapshot from a subtransaction")));
1155
1156
  /*
1157
   * We do however allow previous committed subtransactions to exist.
1158
   * Importers of the snapshot must see them as still running, so get their
1159
   * XIDs to add them to the snapshot.
1160
   */
1161
0
  nchildren = xactGetCommittedChildren(&children);
1162
1163
  /*
1164
   * Generate file path for the snapshot.  We start numbering of snapshots
1165
   * inside the transaction from 1.
1166
   */
1167
0
  snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
1168
0
       MyProc->vxid.procNumber, MyProc->vxid.lxid,
1169
0
       list_length(exportedSnapshots) + 1);
1170
1171
  /*
1172
   * Copy the snapshot into TopTransactionContext, add it to the
1173
   * exportedSnapshots list, and mark it pseudo-registered.  We do this to
1174
   * ensure that the snapshot's xmin is honored for the rest of the
1175
   * transaction.
1176
   */
1177
0
  snapshot = CopySnapshot(snapshot);
1178
1179
0
  oldcxt = MemoryContextSwitchTo(TopTransactionContext);
1180
0
  esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot));
1181
0
  esnap->snapfile = pstrdup(path);
1182
0
  esnap->snapshot = snapshot;
1183
0
  exportedSnapshots = lappend(exportedSnapshots, esnap);
1184
0
  MemoryContextSwitchTo(oldcxt);
1185
1186
0
  snapshot->regd_count++;
1187
0
  pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
1188
1189
  /*
1190
   * Fill buf with a text serialization of the snapshot, plus identification
1191
   * data about this transaction.  The format expected by ImportSnapshot is
1192
   * pretty rigid: each line must be fieldname:value.
1193
   */
1194
0
  initStringInfo(&buf);
1195
1196
0
  appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->vxid.procNumber, MyProc->vxid.lxid);
1197
0
  appendStringInfo(&buf, "pid:%d\n", MyProcPid);
1198
0
  appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1199
0
  appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1200
0
  appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
1201
1202
0
  appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
1203
0
  appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
1204
1205
  /*
1206
   * We must include our own top transaction ID in the top-xid data, since
1207
   * by definition we will still be running when the importing transaction
1208
   * adopts the snapshot, but GetSnapshotData never includes our own XID in
1209
   * the snapshot.  (There must, therefore, be enough room to add it.)
1210
   *
1211
   * However, it could be that our topXid is after the xmax, in which case
1212
   * we shouldn't include it because xip[] members are expected to be before
1213
   * xmax.  (We need not make the same check for subxip[] members, see
1214
   * snapshot.h.)
1215
   */
1216
0
  addTopXid = (TransactionIdIsValid(topXid) &&
1217
0
         TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
1218
0
  appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1219
0
  for (i = 0; i < snapshot->xcnt; i++)
1220
0
    appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
1221
0
  if (addTopXid)
1222
0
    appendStringInfo(&buf, "xip:%u\n", topXid);
1223
1224
  /*
1225
   * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1226
   * we have to cope with possible overflow.
1227
   */
1228
0
  if (snapshot->suboverflowed ||
1229
0
    snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
1230
0
    appendStringInfoString(&buf, "sof:1\n");
1231
0
  else
1232
0
  {
1233
0
    appendStringInfoString(&buf, "sof:0\n");
1234
0
    appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
1235
0
    for (i = 0; i < snapshot->subxcnt; i++)
1236
0
      appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
1237
0
    for (i = 0; i < nchildren; i++)
1238
0
      appendStringInfo(&buf, "sxp:%u\n", children[i]);
1239
0
  }
1240
0
  appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1241
1242
  /*
1243
   * Now write the text representation into a file.  We first write to a
1244
   * ".tmp" filename, and rename to final filename if no error.  This
1245
   * ensures that no other backend can read an incomplete file
1246
   * (ImportSnapshot won't allow it because of its valid-characters check).
1247
   */
1248
0
  snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
1249
0
  if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
1250
0
    ereport(ERROR,
1251
0
        (errcode_for_file_access(),
1252
0
         errmsg("could not create file \"%s\": %m", pathtmp)));
1253
1254
0
  if (fwrite(buf.data, buf.len, 1, f) != 1)
1255
0
    ereport(ERROR,
1256
0
        (errcode_for_file_access(),
1257
0
         errmsg("could not write to file \"%s\": %m", pathtmp)));
1258
1259
  /* no fsync() since file need not survive a system crash */
1260
1261
0
  if (FreeFile(f))
1262
0
    ereport(ERROR,
1263
0
        (errcode_for_file_access(),
1264
0
         errmsg("could not write to file \"%s\": %m", pathtmp)));
1265
1266
  /*
1267
   * Now that we have written everything into a .tmp file, rename the file
1268
   * to remove the .tmp suffix.
1269
   */
1270
0
  if (rename(pathtmp, path) < 0)
1271
0
    ereport(ERROR,
1272
0
        (errcode_for_file_access(),
1273
0
         errmsg("could not rename file \"%s\" to \"%s\": %m",
1274
0
            pathtmp, path)));
1275
1276
  /*
1277
   * The basename of the file is what we return from pg_export_snapshot().
1278
   * It's already in path in a textual format and we know that the path
1279
   * starts with SNAPSHOT_EXPORT_DIR.  Skip over the prefix and the slash
1280
   * and pstrdup it so as not to return the address of a local variable.
1281
   */
1282
0
  return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1283
0
}
1284
1285
/*
1286
 * pg_export_snapshot
1287
 *    SQL-callable wrapper for ExportSnapshot.
1288
 */
1289
Datum
1290
pg_export_snapshot(PG_FUNCTION_ARGS)
1291
0
{
1292
0
  char     *snapshotName;
1293
1294
0
  snapshotName = ExportSnapshot(GetActiveSnapshot());
1295
0
  PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
1296
0
}
1297
1298
1299
/*
1300
 * Parsing subroutines for ImportSnapshot: parse a line with the given
1301
 * prefix followed by a value, and advance *s to the next line.  The
1302
 * filename is provided for use in error messages.
1303
 */
1304
static int
1305
parseIntFromText(const char *prefix, char **s, const char *filename)
1306
0
{
1307
0
  char     *ptr = *s;
1308
0
  int     prefixlen = strlen(prefix);
1309
0
  int     val;
1310
1311
0
  if (strncmp(ptr, prefix, prefixlen) != 0)
1312
0
    ereport(ERROR,
1313
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1314
0
         errmsg("invalid snapshot data in file \"%s\"", filename)));
1315
0
  ptr += prefixlen;
1316
0
  if (sscanf(ptr, "%d", &val) != 1)
1317
0
    ereport(ERROR,
1318
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1319
0
         errmsg("invalid snapshot data in file \"%s\"", filename)));
1320
0
  ptr = strchr(ptr, '\n');
1321
0
  if (!ptr)
1322
0
    ereport(ERROR,
1323
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1324
0
         errmsg("invalid snapshot data in file \"%s\"", filename)));
1325
0
  *s = ptr + 1;
1326
0
  return val;
1327
0
}
1328
1329
static TransactionId
1330
parseXidFromText(const char *prefix, char **s, const char *filename)
1331
0
{
1332
0
  char     *ptr = *s;
1333
0
  int     prefixlen = strlen(prefix);
1334
0
  TransactionId val;
1335
1336
0
  if (strncmp(ptr, prefix, prefixlen) != 0)
1337
0
    ereport(ERROR,
1338
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1339
0
         errmsg("invalid snapshot data in file \"%s\"", filename)));
1340
0
  ptr += prefixlen;
1341
0
  if (sscanf(ptr, "%u", &val) != 1)
1342
0
    ereport(ERROR,
1343
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1344
0
         errmsg("invalid snapshot data in file \"%s\"", filename)));
1345
0
  ptr = strchr(ptr, '\n');
1346
0
  if (!ptr)
1347
0
    ereport(ERROR,
1348
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1349
0
         errmsg("invalid snapshot data in file \"%s\"", filename)));
1350
0
  *s = ptr + 1;
1351
0
  return val;
1352
0
}
1353
1354
static void
1355
parseVxidFromText(const char *prefix, char **s, const char *filename,
1356
          VirtualTransactionId *vxid)
1357
0
{
1358
0
  char     *ptr = *s;
1359
0
  int     prefixlen = strlen(prefix);
1360
1361
0
  if (strncmp(ptr, prefix, prefixlen) != 0)
1362
0
    ereport(ERROR,
1363
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1364
0
         errmsg("invalid snapshot data in file \"%s\"", filename)));
1365
0
  ptr += prefixlen;
1366
0
  if (sscanf(ptr, "%d/%u", &vxid->procNumber, &vxid->localTransactionId) != 2)
1367
0
    ereport(ERROR,
1368
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1369
0
         errmsg("invalid snapshot data in file \"%s\"", filename)));
1370
0
  ptr = strchr(ptr, '\n');
1371
0
  if (!ptr)
1372
0
    ereport(ERROR,
1373
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1374
0
         errmsg("invalid snapshot data in file \"%s\"", filename)));
1375
0
  *s = ptr + 1;
1376
0
}
1377
1378
/*
1379
 * ImportSnapshot
1380
 *    Import a previously exported snapshot.  The argument should be a
1381
 *    filename in SNAPSHOT_EXPORT_DIR.  Load the snapshot from that file.
1382
 *    This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1383
 */
1384
void
1385
ImportSnapshot(const char *idstr)
1386
0
{
1387
0
  char    path[MAXPGPATH];
1388
0
  FILE     *f;
1389
0
  struct stat stat_buf;
1390
0
  char     *filebuf;
1391
0
  int     xcnt;
1392
0
  int     i;
1393
0
  VirtualTransactionId src_vxid;
1394
0
  int     src_pid;
1395
0
  Oid     src_dbid;
1396
0
  int     src_isolevel;
1397
0
  bool    src_readonly;
1398
0
  SnapshotData snapshot;
1399
1400
  /*
1401
   * Must be at top level of a fresh transaction.  Note in particular that
1402
   * we check we haven't acquired an XID --- if we have, it's conceivable
1403
   * that the snapshot would show it as not running, making for very screwy
1404
   * behavior.
1405
   */
1406
0
  if (FirstSnapshotSet ||
1407
0
    GetTopTransactionIdIfAny() != InvalidTransactionId ||
1408
0
    IsSubTransaction())
1409
0
    ereport(ERROR,
1410
0
        (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1411
0
         errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1412
1413
  /*
1414
   * If we are in read committed mode then the next query would execute with
1415
   * a new snapshot thus making this function call quite useless.
1416
   */
1417
0
  if (!IsolationUsesXactSnapshot())
1418
0
    ereport(ERROR,
1419
0
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1420
0
         errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1421
1422
  /*
1423
   * Verify the identifier: only 0-9, A-F and hyphens are allowed.  We do
1424
   * this mainly to prevent reading arbitrary files.
1425
   */
1426
0
  if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1427
0
    ereport(ERROR,
1428
0
        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1429
0
         errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1430
1431
  /* OK, read the file */
1432
0
  snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1433
1434
0
  f = AllocateFile(path, PG_BINARY_R);
1435
0
  if (!f)
1436
0
  {
1437
    /*
1438
     * If file is missing while identifier has a correct format, avoid
1439
     * system errors.
1440
     */
1441
0
    if (errno == ENOENT)
1442
0
      ereport(ERROR,
1443
0
          (errcode(ERRCODE_UNDEFINED_OBJECT),
1444
0
           errmsg("snapshot \"%s\" does not exist", idstr)));
1445
0
    else
1446
0
      ereport(ERROR,
1447
0
          (errcode_for_file_access(),
1448
0
           errmsg("could not open file \"%s\" for reading: %m",
1449
0
              path)));
1450
0
  }
1451
1452
  /* get the size of the file so that we know how much memory we need */
1453
0
  if (fstat(fileno(f), &stat_buf))
1454
0
    elog(ERROR, "could not stat file \"%s\": %m", path);
1455
1456
  /* and read the file into a palloc'd string */
1457
0
  filebuf = (char *) palloc(stat_buf.st_size + 1);
1458
0
  if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1459
0
    elog(ERROR, "could not read file \"%s\": %m", path);
1460
1461
0
  filebuf[stat_buf.st_size] = '\0';
1462
1463
0
  FreeFile(f);
1464
1465
  /*
1466
   * Construct a snapshot struct by parsing the file content.
1467
   */
1468
0
  memset(&snapshot, 0, sizeof(snapshot));
1469
1470
0
  parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
1471
0
  src_pid = parseIntFromText("pid:", &filebuf, path);
1472
  /* we abuse parseXidFromText a bit here ... */
1473
0
  src_dbid = parseXidFromText("dbid:", &filebuf, path);
1474
0
  src_isolevel = parseIntFromText("iso:", &filebuf, path);
1475
0
  src_readonly = parseIntFromText("ro:", &filebuf, path);
1476
1477
0
  snapshot.snapshot_type = SNAPSHOT_MVCC;
1478
1479
0
  snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1480
0
  snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1481
1482
0
  snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1483
1484
  /* sanity-check the xid count before palloc */
1485
0
  if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
1486
0
    ereport(ERROR,
1487
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1488
0
         errmsg("invalid snapshot data in file \"%s\"", path)));
1489
1490
0
  snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1491
0
  for (i = 0; i < xcnt; i++)
1492
0
    snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1493
1494
0
  snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1495
1496
0
  if (!snapshot.suboverflowed)
1497
0
  {
1498
0
    snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1499
1500
    /* sanity-check the xid count before palloc */
1501
0
    if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
1502
0
      ereport(ERROR,
1503
0
          (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1504
0
           errmsg("invalid snapshot data in file \"%s\"", path)));
1505
1506
0
    snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1507
0
    for (i = 0; i < xcnt; i++)
1508
0
      snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1509
0
  }
1510
0
  else
1511
0
  {
1512
0
    snapshot.subxcnt = 0;
1513
0
    snapshot.subxip = NULL;
1514
0
  }
1515
1516
0
  snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1517
1518
  /*
1519
   * Do some additional sanity checking, just to protect ourselves.  We
1520
   * don't trouble to check the array elements, just the most critical
1521
   * fields.
1522
   */
1523
0
  if (!VirtualTransactionIdIsValid(src_vxid) ||
1524
0
    !OidIsValid(src_dbid) ||
1525
0
    !TransactionIdIsNormal(snapshot.xmin) ||
1526
0
    !TransactionIdIsNormal(snapshot.xmax))
1527
0
    ereport(ERROR,
1528
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1529
0
         errmsg("invalid snapshot data in file \"%s\"", path)));
1530
1531
  /*
1532
   * If we're serializable, the source transaction must be too, otherwise
1533
   * predicate.c has problems (SxactGlobalXmin could go backwards).  Also, a
1534
   * non-read-only transaction can't adopt a snapshot from a read-only
1535
   * transaction, as predicate.c handles the cases very differently.
1536
   */
1537
0
  if (IsolationIsSerializable())
1538
0
  {
1539
0
    if (src_isolevel != XACT_SERIALIZABLE)
1540
0
      ereport(ERROR,
1541
0
          (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1542
0
           errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1543
0
    if (src_readonly && !XactReadOnly)
1544
0
      ereport(ERROR,
1545
0
          (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1546
0
           errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1547
0
  }
1548
1549
  /*
1550
   * We cannot import a snapshot that was taken in a different database,
1551
   * because vacuum calculates OldestXmin on a per-database basis; so the
1552
   * source transaction's xmin doesn't protect us from data loss.  This
1553
   * restriction could be removed if the source transaction were to mark its
1554
   * xmin as being globally applicable.  But that would require some
1555
   * additional syntax, since that has to be known when the snapshot is
1556
   * initially taken.  (See pgsql-hackers discussion of 2011-10-21.)
1557
   */
1558
0
  if (src_dbid != MyDatabaseId)
1559
0
    ereport(ERROR,
1560
0
        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1561
0
         errmsg("cannot import a snapshot from a different database")));
1562
1563
  /* OK, install the snapshot */
1564
0
  SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
1565
0
}
1566
1567
/*
1568
 * XactHasExportedSnapshots
1569
 *    Test whether current transaction has exported any snapshots.
1570
 */
1571
bool
1572
XactHasExportedSnapshots(void)
1573
0
{
1574
0
  return (exportedSnapshots != NIL);
1575
0
}
1576
1577
/*
1578
 * DeleteAllExportedSnapshotFiles
1579
 *    Clean up any files that have been left behind by a crashed backend
1580
 *    that had exported snapshots before it died.
1581
 *
1582
 * This should be called during database startup or crash recovery.
1583
 */
1584
void
1585
DeleteAllExportedSnapshotFiles(void)
1586
{
1587
  char    buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1588
  DIR      *s_dir;
1589
  struct dirent *s_de;
1590
1591
  /*
1592
   * Problems in reading the directory, or unlinking files, are reported at
1593
   * LOG level.  Since we're running in the startup process, ERROR level
1594
   * would prevent database start, and it's not important enough for that.
1595
   */
1596
  s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR);
1597
1598
  while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
1599
  {
1600
    if (strcmp(s_de->d_name, ".") == 0 ||
1601
      strcmp(s_de->d_name, "..") == 0)
1602
      continue;
1603
1604
    snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1605
1606
    if (unlink(buf) != 0)
1607
      ereport(LOG,
1608
          (errcode_for_file_access(),
1609
           errmsg("could not remove file \"%s\": %m", buf)));
1610
  }
1611
1612
  FreeDir(s_dir);
1613
}
1614
1615
/*
1616
 * ThereAreNoPriorRegisteredSnapshots
1617
 *    Is the registered snapshot count less than or equal to one?
1618
 *
1619
 * Don't use this to settle important decisions.  While zero registrations and
1620
 * no ActiveSnapshot would confirm a certain idleness, the system makes no
1621
 * guarantees about the significance of one registered snapshot.
1622
 */
1623
bool
1624
ThereAreNoPriorRegisteredSnapshots(void)
1625
0
{
1626
0
  if (pairingheap_is_empty(&RegisteredSnapshots) ||
1627
0
    pairingheap_is_singular(&RegisteredSnapshots))
1628
0
    return true;
1629
1630
0
  return false;
1631
0
}
1632
1633
/*
1634
 * HaveRegisteredOrActiveSnapshot
1635
 *    Is there any registered or active snapshot?
1636
 *
1637
 * NB: Unless pushed or active, the cached catalog snapshot will not cause
1638
 * this function to return true. That allows this function to be used in
1639
 * checks enforcing a longer-lived snapshot.
1640
 */
1641
bool
1642
HaveRegisteredOrActiveSnapshot(void)
1643
0
{
1644
0
  if (ActiveSnapshot != NULL)
1645
0
    return true;
1646
1647
  /*
1648
   * The catalog snapshot is in RegisteredSnapshots when valid, but can be
1649
   * removed at any time due to invalidation processing. If explicitly
1650
   * registered more than one snapshot has to be in RegisteredSnapshots.
1651
   */
1652
0
  if (CatalogSnapshot != NULL &&
1653
0
    pairingheap_is_singular(&RegisteredSnapshots))
1654
0
    return false;
1655
1656
0
  return !pairingheap_is_empty(&RegisteredSnapshots);
1657
0
}
1658
1659
1660
/*
1661
 * Setup a snapshot that replaces normal catalog snapshots that allows catalog
1662
 * access to behave just like it did at a certain point in the past.
1663
 *
1664
 * Needed for logical decoding.
1665
 */
1666
void
1667
SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
1668
0
{
1669
0
  Assert(historic_snapshot != NULL);
1670
1671
  /* setup the timetravel snapshot */
1672
0
  HistoricSnapshot = historic_snapshot;
1673
1674
  /* setup (cmin, cmax) lookup hash */
1675
0
  tuplecid_data = tuplecids;
1676
0
}
1677
1678
1679
/*
1680
 * Make catalog snapshots behave normally again.
1681
 */
1682
void
1683
TeardownHistoricSnapshot(bool is_error)
1684
0
{
1685
0
  HistoricSnapshot = NULL;
1686
0
  tuplecid_data = NULL;
1687
0
}
1688
1689
bool
1690
HistoricSnapshotActive(void)
1691
0
{
1692
0
  return HistoricSnapshot != NULL;
1693
0
}
1694
1695
HTAB *
1696
HistoricSnapshotGetTupleCids(void)
1697
0
{
1698
0
  Assert(HistoricSnapshotActive());
1699
0
  return tuplecid_data;
1700
0
}
1701
1702
/*
1703
 * EstimateSnapshotSpace
1704
 *    Returns the size needed to store the given snapshot.
1705
 *
1706
 * We are exporting only required fields from the Snapshot, stored in
1707
 * SerializedSnapshotData.
1708
 */
1709
Size
1710
EstimateSnapshotSpace(Snapshot snapshot)
1711
0
{
1712
0
  Size    size;
1713
1714
0
  Assert(snapshot != InvalidSnapshot);
1715
0
  Assert(snapshot->snapshot_type == SNAPSHOT_MVCC);
1716
1717
  /* We allocate any XID arrays needed in the same palloc block. */
1718
0
  size = add_size(sizeof(SerializedSnapshotData),
1719
0
          mul_size(snapshot->xcnt, sizeof(TransactionId)));
1720
0
  if (snapshot->subxcnt > 0 &&
1721
0
    (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
1722
0
    size = add_size(size,
1723
0
            mul_size(snapshot->subxcnt, sizeof(TransactionId)));
1724
1725
0
  return size;
1726
0
}
1727
1728
/*
1729
 * SerializeSnapshot
1730
 *    Dumps the serialized snapshot (extracted from given snapshot) onto the
1731
 *    memory location at start_address.
1732
 */
1733
void
1734
SerializeSnapshot(Snapshot snapshot, char *start_address)
1735
0
{
1736
0
  SerializedSnapshotData serialized_snapshot;
1737
1738
0
  Assert(snapshot->subxcnt >= 0);
1739
1740
  /* Copy all required fields */
1741
0
  serialized_snapshot.xmin = snapshot->xmin;
1742
0
  serialized_snapshot.xmax = snapshot->xmax;
1743
0
  serialized_snapshot.xcnt = snapshot->xcnt;
1744
0
  serialized_snapshot.subxcnt = snapshot->subxcnt;
1745
0
  serialized_snapshot.suboverflowed = snapshot->suboverflowed;
1746
0
  serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
1747
0
  serialized_snapshot.curcid = snapshot->curcid;
1748
1749
  /*
1750
   * Ignore the SubXID array if it has overflowed, unless the snapshot was
1751
   * taken during recovery - in that case, top-level XIDs are in subxip as
1752
   * well, and we mustn't lose them.
1753
   */
1754
0
  if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
1755
0
    serialized_snapshot.subxcnt = 0;
1756
1757
  /* Copy struct to possibly-unaligned buffer */
1758
0
  memcpy(start_address,
1759
0
       &serialized_snapshot, sizeof(SerializedSnapshotData));
1760
1761
  /* Copy XID array */
1762
0
  if (snapshot->xcnt > 0)
1763
0
    memcpy((TransactionId *) (start_address +
1764
0
                  sizeof(SerializedSnapshotData)),
1765
0
         snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
1766
1767
  /*
1768
   * Copy SubXID array. Don't bother to copy it if it had overflowed,
1769
   * though, because it's not used anywhere in that case. Except if it's a
1770
   * snapshot taken during recovery; all the top-level XIDs are in subxip as
1771
   * well in that case, so we mustn't lose them.
1772
   */
1773
0
  if (serialized_snapshot.subxcnt > 0)
1774
0
  {
1775
0
    Size    subxipoff = sizeof(SerializedSnapshotData) +
1776
0
      snapshot->xcnt * sizeof(TransactionId);
1777
1778
0
    memcpy((TransactionId *) (start_address + subxipoff),
1779
0
         snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
1780
0
  }
1781
0
}
1782
1783
/*
1784
 * RestoreSnapshot
1785
 *    Restore a serialized snapshot from the specified address.
1786
 *
1787
 * The copy is palloc'd in TopTransactionContext and has initial refcounts set
1788
 * to 0.  The returned snapshot has the copied flag set.
1789
 */
1790
Snapshot
1791
RestoreSnapshot(char *start_address)
1792
0
{
1793
0
  SerializedSnapshotData serialized_snapshot;
1794
0
  Size    size;
1795
0
  Snapshot  snapshot;
1796
0
  TransactionId *serialized_xids;
1797
1798
0
  memcpy(&serialized_snapshot, start_address,
1799
0
       sizeof(SerializedSnapshotData));
1800
0
  serialized_xids = (TransactionId *)
1801
0
    (start_address + sizeof(SerializedSnapshotData));
1802
1803
  /* We allocate any XID arrays needed in the same palloc block. */
1804
0
  size = sizeof(SnapshotData)
1805
0
    + serialized_snapshot.xcnt * sizeof(TransactionId)
1806
0
    + serialized_snapshot.subxcnt * sizeof(TransactionId);
1807
1808
  /* Copy all required fields */
1809
0
  snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
1810
0
  snapshot->snapshot_type = SNAPSHOT_MVCC;
1811
0
  snapshot->xmin = serialized_snapshot.xmin;
1812
0
  snapshot->xmax = serialized_snapshot.xmax;
1813
0
  snapshot->xip = NULL;
1814
0
  snapshot->xcnt = serialized_snapshot.xcnt;
1815
0
  snapshot->subxip = NULL;
1816
0
  snapshot->subxcnt = serialized_snapshot.subxcnt;
1817
0
  snapshot->suboverflowed = serialized_snapshot.suboverflowed;
1818
0
  snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
1819
0
  snapshot->curcid = serialized_snapshot.curcid;
1820
0
  snapshot->snapXactCompletionCount = 0;
1821
1822
  /* Copy XIDs, if present. */
1823
0
  if (serialized_snapshot.xcnt > 0)
1824
0
  {
1825
0
    snapshot->xip = (TransactionId *) (snapshot + 1);
1826
0
    memcpy(snapshot->xip, serialized_xids,
1827
0
         serialized_snapshot.xcnt * sizeof(TransactionId));
1828
0
  }
1829
1830
  /* Copy SubXIDs, if present. */
1831
0
  if (serialized_snapshot.subxcnt > 0)
1832
0
  {
1833
0
    snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
1834
0
      serialized_snapshot.xcnt;
1835
0
    memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
1836
0
         serialized_snapshot.subxcnt * sizeof(TransactionId));
1837
0
  }
1838
1839
  /* Set the copied flag so that the caller will set refcounts correctly. */
1840
0
  snapshot->regd_count = 0;
1841
0
  snapshot->active_count = 0;
1842
0
  snapshot->copied = true;
1843
1844
0
  return snapshot;
1845
0
}
1846
1847
/*
1848
 * Install a restored snapshot as the transaction snapshot.
1849
 *
1850
 * The second argument is of type void * so that snapmgr.h need not include
1851
 * the declaration for PGPROC.
1852
 */
1853
void
1854
RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
1855
0
{
1856
0
  SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
1857
0
}
1858
1859
/*
1860
 * XidInMVCCSnapshot
1861
 *    Is the given XID still-in-progress according to the snapshot?
1862
 *
1863
 * Note: GetSnapshotData never stores either top xid or subxids of our own
1864
 * backend into a snapshot, so these xids will not be reported as "running"
1865
 * by this function.  This is OK for current uses, because we always check
1866
 * TransactionIdIsCurrentTransactionId first, except when it's known the
1867
 * XID could not be ours anyway.
1868
 */
1869
bool
1870
XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
1871
0
{
1872
  /*
1873
   * Make a quick range check to eliminate most XIDs without looking at the
1874
   * xip arrays.  Note that this is OK even if we convert a subxact XID to
1875
   * its parent below, because a subxact with XID < xmin has surely also got
1876
   * a parent with XID < xmin, while one with XID >= xmax must belong to a
1877
   * parent that was not yet committed at the time of this snapshot.
1878
   */
1879
1880
  /* Any xid < xmin is not in-progress */
1881
0
  if (TransactionIdPrecedes(xid, snapshot->xmin))
1882
0
    return false;
1883
  /* Any xid >= xmax is in-progress */
1884
0
  if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
1885
0
    return true;
1886
1887
  /*
1888
   * Snapshot information is stored slightly differently in snapshots taken
1889
   * during recovery.
1890
   */
1891
0
  if (!snapshot->takenDuringRecovery)
1892
0
  {
1893
    /*
1894
     * If the snapshot contains full subxact data, the fastest way to
1895
     * check things is just to compare the given XID against both subxact
1896
     * XIDs and top-level XIDs.  If the snapshot overflowed, we have to
1897
     * use pg_subtrans to convert a subxact XID to its parent XID, but
1898
     * then we need only look at top-level XIDs not subxacts.
1899
     */
1900
0
    if (!snapshot->suboverflowed)
1901
0
    {
1902
      /* we have full data, so search subxip */
1903
0
      if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1904
0
        return true;
1905
1906
      /* not there, fall through to search xip[] */
1907
0
    }
1908
0
    else
1909
0
    {
1910
      /*
1911
       * Snapshot overflowed, so convert xid to top-level.  This is safe
1912
       * because we eliminated too-old XIDs above.
1913
       */
1914
0
      xid = SubTransGetTopmostTransaction(xid);
1915
1916
      /*
1917
       * If xid was indeed a subxact, we might now have an xid < xmin,
1918
       * so recheck to avoid an array scan.  No point in rechecking
1919
       * xmax.
1920
       */
1921
0
      if (TransactionIdPrecedes(xid, snapshot->xmin))
1922
0
        return false;
1923
0
    }
1924
1925
0
    if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
1926
0
      return true;
1927
0
  }
1928
0
  else
1929
0
  {
1930
    /*
1931
     * In recovery we store all xids in the subxip array because it is by
1932
     * far the bigger array, and we mostly don't know which xids are
1933
     * top-level and which are subxacts. The xip array is empty.
1934
     *
1935
     * We start by searching subtrans, if we overflowed.
1936
     */
1937
0
    if (snapshot->suboverflowed)
1938
0
    {
1939
      /*
1940
       * Snapshot overflowed, so convert xid to top-level.  This is safe
1941
       * because we eliminated too-old XIDs above.
1942
       */
1943
0
      xid = SubTransGetTopmostTransaction(xid);
1944
1945
      /*
1946
       * If xid was indeed a subxact, we might now have an xid < xmin,
1947
       * so recheck to avoid an array scan.  No point in rechecking
1948
       * xmax.
1949
       */
1950
0
      if (TransactionIdPrecedes(xid, snapshot->xmin))
1951
0
        return false;
1952
0
    }
1953
1954
    /*
1955
     * We now have either a top-level xid higher than xmin or an
1956
     * indeterminate xid. We don't know whether it's top level or subxact
1957
     * but it doesn't matter. If it's present, the xid is visible.
1958
     */
1959
0
    if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1960
0
      return true;
1961
0
  }
1962
1963
0
  return false;
1964
0
}
1965
1966
/* ResourceOwner callbacks */
1967
1968
static void
1969
ResOwnerReleaseSnapshot(Datum res)
1970
0
{
1971
0
  UnregisterSnapshotNoOwner((Snapshot) DatumGetPointer(res));
1972
0
}