Coverage Report

Created: 2025-06-13 06:06

/src/postgres/src/backend/backup/basebackup_incremental.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * basebackup_incremental.c
4
 *    code for incremental backup support
5
 *
6
 * This code isn't actually in charge of taking an incremental backup;
7
 * the actual construction of the incremental backup happens in
8
 * basebackup.c. Here, we're concerned with providing the necessary
9
 * supports for that operation. In particular, we need to parse the
10
 * backup manifest supplied by the user taking the incremental backup
11
 * and extract the required information from it.
12
 *
13
 * Portions Copyright (c) 2010-2025, PostgreSQL Global Development Group
14
 *
15
 * IDENTIFICATION
16
 *    src/backend/backup/basebackup_incremental.c
17
 *
18
 *-------------------------------------------------------------------------
19
 */
20
#include "postgres.h"
21
22
#include "access/timeline.h"
23
#include "access/xlog.h"
24
#include "backup/basebackup_incremental.h"
25
#include "backup/walsummary.h"
26
#include "common/blkreftable.h"
27
#include "common/hashfn.h"
28
#include "common/int.h"
29
#include "common/parse_manifest.h"
30
#include "postmaster/walsummarizer.h"
31
32
0
#define BLOCKS_PER_READ     512
33
34
/*
35
 * We expect to find the last lines of the manifest, including the checksum,
36
 * in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
37
 * parse step if we are about to overflow MAX_CHUNK bytes.
38
 */
39
0
#define MIN_CHUNK  1024
40
0
#define MAX_CHUNK (128 *  1024)
41
42
/*
43
 * Details extracted from the WAL ranges present in the supplied backup manifest.
44
 */
45
typedef struct
46
{
47
  TimeLineID  tli;
48
  XLogRecPtr  start_lsn;
49
  XLogRecPtr  end_lsn;
50
} backup_wal_range;
51
52
/*
53
 * Details extracted from the file list present in the supplied backup manifest.
54
 */
55
typedef struct
56
{
57
  uint32    status;
58
  const char *path;
59
  uint64    size;
60
} backup_file_entry;
61
62
static uint32 hash_string_pointer(const char *s);
63
#define SH_PREFIX               backup_file
64
0
#define SH_ELEMENT_TYPE     backup_file_entry
65
#define SH_KEY_TYPE             const char *
66
0
#define SH_KEY                  path
67
0
#define SH_HASH_KEY(tb, key)    hash_string_pointer(key)
68
0
#define SH_EQUAL(tb, a, b)    (strcmp(a, b) == 0)
69
#define SH_SCOPE                static inline
70
#define SH_DECLARE
71
#define SH_DEFINE
72
#include "lib/simplehash.h"
73
74
struct IncrementalBackupInfo
75
{
76
  /* Memory context for this object and its subsidiary objects. */
77
  MemoryContext mcxt;
78
79
  /* Temporary buffer for storing the manifest while parsing it. */
80
  StringInfoData buf;
81
82
  /* WAL ranges extracted from the backup manifest. */
83
  List     *manifest_wal_ranges;
84
85
  /*
86
   * Files extracted from the backup manifest.
87
   *
88
   * We don't really need this information, because we use WAL summaries to
89
   * figure out what's changed. It would be unsafe to just rely on the list
90
   * of files that existed before, because it's possible for a file to be
91
   * removed and a new one created with the same name and different
92
   * contents. In such cases, the whole file must still be sent. We can tell
93
   * from the WAL summaries whether that happened, but not from the file
94
   * list.
95
   *
96
   * Nonetheless, this data is useful for sanity checking. If a file that we
97
   * think we shouldn't need to send is not present in the manifest for the
98
   * prior backup, something has gone terribly wrong. We retain the file
99
   * names and sizes, but not the checksums or last modified times, for
100
   * which we have no use.
101
   *
102
   * One significant downside of storing this data is that it consumes
103
   * memory. If that turns out to be a problem, we might have to decide not
104
   * to retain this information, or to make it optional.
105
   */
106
  backup_file_hash *manifest_files;
107
108
  /*
109
   * Block-reference table for the incremental backup.
110
   *
111
   * It's possible that storing the entire block-reference table in memory
112
   * will be a problem for some users. The in-memory format that we're using
113
   * here is pretty efficient, converging to little more than 1 bit per
114
   * block for relation forks with large numbers of modified blocks. It's
115
   * possible, however, that if you try to perform an incremental backup of
116
   * a database with a sufficiently large number of relations on a
117
   * sufficiently small machine, you could run out of memory here. If that
118
   * turns out to be a problem in practice, we'll need to be more clever.
119
   */
120
  BlockRefTable *brtab;
121
122
  /*
123
   * State object for incremental JSON parsing
124
   */
125
  JsonManifestParseIncrementalState *inc_state;
126
};
127
128
static void manifest_process_version(JsonManifestParseContext *context,
129
                   int manifest_version);
130
static void manifest_process_system_identifier(JsonManifestParseContext *context,
131
                         uint64 manifest_system_identifier);
132
static void manifest_process_file(JsonManifestParseContext *context,
133
                  const char *pathname,
134
                  uint64 size,
135
                  pg_checksum_type checksum_type,
136
                  int checksum_length,
137
                  uint8 *checksum_payload);
138
static void manifest_process_wal_range(JsonManifestParseContext *context,
139
                     TimeLineID tli,
140
                     XLogRecPtr start_lsn,
141
                     XLogRecPtr end_lsn);
142
pg_noreturn static void manifest_report_error(JsonManifestParseContext *context,
143
                        const char *fmt,...)
144
      pg_attribute_printf(2, 3);
145
static int  compare_block_numbers(const void *a, const void *b);
146
147
/*
148
 * Create a new object for storing information extracted from the manifest
149
 * supplied when creating an incremental backup.
150
 */
151
IncrementalBackupInfo *
152
CreateIncrementalBackupInfo(MemoryContext mcxt)
153
0
{
154
0
  IncrementalBackupInfo *ib;
155
0
  MemoryContext oldcontext;
156
0
  JsonManifestParseContext *context;
157
158
0
  oldcontext = MemoryContextSwitchTo(mcxt);
159
160
0
  ib = palloc0(sizeof(IncrementalBackupInfo));
161
0
  ib->mcxt = mcxt;
162
0
  initStringInfo(&ib->buf);
163
164
  /*
165
   * It's hard to guess how many files a "typical" installation will have in
166
   * the data directory, but a fresh initdb creates almost 1000 files as of
167
   * this writing, so it seems to make sense for our estimate to
168
   * substantially higher.
169
   */
170
0
  ib->manifest_files = backup_file_create(mcxt, 10000, NULL);
171
172
0
  context = palloc0(sizeof(JsonManifestParseContext));
173
  /* Parse the manifest. */
174
0
  context->private_data = ib;
175
0
  context->version_cb = manifest_process_version;
176
0
  context->system_identifier_cb = manifest_process_system_identifier;
177
0
  context->per_file_cb = manifest_process_file;
178
0
  context->per_wal_range_cb = manifest_process_wal_range;
179
0
  context->error_cb = manifest_report_error;
180
181
0
  ib->inc_state = json_parse_manifest_incremental_init(context);
182
183
0
  MemoryContextSwitchTo(oldcontext);
184
185
0
  return ib;
186
0
}
187
188
/*
189
 * Before taking an incremental backup, the caller must supply the backup
190
 * manifest from a prior backup. Each chunk of manifest data received
191
 * from the client should be passed to this function.
192
 */
193
void
194
AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data,
195
                int len)
196
0
{
197
0
  MemoryContext oldcontext;
198
199
  /* Switch to our memory context. */
200
0
  oldcontext = MemoryContextSwitchTo(ib->mcxt);
201
202
0
  if (ib->buf.len > MIN_CHUNK && ib->buf.len + len > MAX_CHUNK)
203
0
  {
204
    /*
205
     * time for an incremental parse. We'll do all but the last MIN_CHUNK
206
     * so that we have enough left for the final piece.
207
     */
208
0
    json_parse_manifest_incremental_chunk(ib->inc_state, ib->buf.data,
209
0
                        ib->buf.len - MIN_CHUNK, false);
210
    /* now remove what we just parsed  */
211
0
    memmove(ib->buf.data, ib->buf.data + (ib->buf.len - MIN_CHUNK),
212
0
        MIN_CHUNK + 1);
213
0
    ib->buf.len = MIN_CHUNK;
214
0
  }
215
216
0
  appendBinaryStringInfo(&ib->buf, data, len);
217
218
  /* Switch back to previous memory context. */
219
0
  MemoryContextSwitchTo(oldcontext);
220
0
}
221
222
/*
223
 * Finalize an IncrementalBackupInfo object after all manifest data has
224
 * been supplied via calls to AppendIncrementalManifestData.
225
 */
226
void
227
FinalizeIncrementalManifest(IncrementalBackupInfo *ib)
228
0
{
229
0
  MemoryContext oldcontext;
230
231
  /* Switch to our memory context. */
232
0
  oldcontext = MemoryContextSwitchTo(ib->mcxt);
233
234
  /* Parse the last chunk of the manifest */
235
0
  json_parse_manifest_incremental_chunk(ib->inc_state, ib->buf.data,
236
0
                      ib->buf.len, true);
237
238
  /* Done with the buffer, so release memory. */
239
0
  pfree(ib->buf.data);
240
0
  ib->buf.data = NULL;
241
242
  /* Done with inc_state, so release that memory too */
243
0
  json_parse_manifest_incremental_shutdown(ib->inc_state);
244
245
  /* Switch back to previous memory context. */
246
0
  MemoryContextSwitchTo(oldcontext);
247
0
}
248
249
/*
250
 * Prepare to take an incremental backup.
251
 *
252
 * Before this function is called, AppendIncrementalManifestData and
253
 * FinalizeIncrementalManifest should have already been called to pass all
254
 * the manifest data to this object.
255
 *
256
 * This function performs sanity checks on the data extracted from the
257
 * manifest and figures out for which WAL ranges we need summaries, and
258
 * whether those summaries are available. Then, it reads and combines the
259
 * data from those summary files. It also updates the backup_state with the
260
 * reference TLI and LSN for the prior backup.
261
 */
262
void
263
PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
264
              BackupState *backup_state)
265
0
{
266
0
  MemoryContext oldcontext;
267
0
  List     *expectedTLEs;
268
0
  List     *all_wslist,
269
0
         *required_wslist = NIL;
270
0
  ListCell   *lc;
271
0
  TimeLineHistoryEntry **tlep;
272
0
  int     num_wal_ranges;
273
0
  int     i;
274
0
  bool    found_backup_start_tli = false;
275
0
  TimeLineID  earliest_wal_range_tli = 0;
276
0
  XLogRecPtr  earliest_wal_range_start_lsn = InvalidXLogRecPtr;
277
0
  TimeLineID  latest_wal_range_tli = 0;
278
279
0
  Assert(ib->buf.data == NULL);
280
281
  /* Switch to our memory context. */
282
0
  oldcontext = MemoryContextSwitchTo(ib->mcxt);
283
284
  /*
285
   * A valid backup manifest must always contain at least one WAL range
286
   * (usually exactly one, unless the backup spanned a timeline switch).
287
   */
288
0
  num_wal_ranges = list_length(ib->manifest_wal_ranges);
289
0
  if (num_wal_ranges == 0)
290
0
    ereport(ERROR,
291
0
        (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
292
0
         errmsg("manifest contains no required WAL ranges")));
293
294
  /*
295
   * Match up the TLIs that appear in the WAL ranges of the backup manifest
296
   * with those that appear in this server's timeline history. We expect
297
   * every backup_wal_range to match to a TimeLineHistoryEntry; if it does
298
   * not, that's an error.
299
   *
300
   * This loop also decides which of the WAL ranges is the manifest is most
301
   * ancient and which one is the newest, according to the timeline history
302
   * of this server, and stores TLIs of those WAL ranges into
303
   * earliest_wal_range_tli and latest_wal_range_tli. It also updates
304
   * earliest_wal_range_start_lsn to the start LSN of the WAL range for
305
   * earliest_wal_range_tli.
306
   *
307
   * Note that the return value of readTimeLineHistory puts the latest
308
   * timeline at the beginning of the list, not the end. Hence, the earliest
309
   * TLI is the one that occurs nearest the end of the list returned by
310
   * readTimeLineHistory, and the latest TLI is the one that occurs closest
311
   * to the beginning.
312
   */
313
0
  expectedTLEs = readTimeLineHistory(backup_state->starttli);
314
0
  tlep = palloc0(num_wal_ranges * sizeof(TimeLineHistoryEntry *));
315
0
  for (i = 0; i < num_wal_ranges; ++i)
316
0
  {
317
0
    backup_wal_range *range = list_nth(ib->manifest_wal_ranges, i);
318
0
    bool    saw_earliest_wal_range_tli = false;
319
0
    bool    saw_latest_wal_range_tli = false;
320
321
    /* Search this server's history for this WAL range's TLI. */
322
0
    foreach(lc, expectedTLEs)
323
0
    {
324
0
      TimeLineHistoryEntry *tle = lfirst(lc);
325
326
0
      if (tle->tli == range->tli)
327
0
      {
328
0
        tlep[i] = tle;
329
0
        break;
330
0
      }
331
332
0
      if (tle->tli == earliest_wal_range_tli)
333
0
        saw_earliest_wal_range_tli = true;
334
0
      if (tle->tli == latest_wal_range_tli)
335
0
        saw_latest_wal_range_tli = true;
336
0
    }
337
338
    /*
339
     * An incremental backup can only be taken relative to a backup that
340
     * represents a previous state of this server. If the backup requires
341
     * WAL from a timeline that's not in our history, that definitely
342
     * isn't the case.
343
     */
344
0
    if (tlep[i] == NULL)
345
0
      ereport(ERROR,
346
0
          (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
347
0
           errmsg("timeline %u found in manifest, but not in this server's history",
348
0
              range->tli)));
349
350
    /*
351
     * If we found this TLI in the server's history before encountering
352
     * the latest TLI seen so far in the server's history, then this TLI
353
     * is the latest one seen so far.
354
     *
355
     * If on the other hand we saw the earliest TLI seen so far before
356
     * finding this TLI, this TLI is earlier than the earliest one seen so
357
     * far. And if this is the first TLI for which we've searched, it's
358
     * also the earliest one seen so far.
359
     *
360
     * On the first loop iteration, both things should necessarily be
361
     * true.
362
     */
363
0
    if (!saw_latest_wal_range_tli)
364
0
      latest_wal_range_tli = range->tli;
365
0
    if (earliest_wal_range_tli == 0 || saw_earliest_wal_range_tli)
366
0
    {
367
0
      earliest_wal_range_tli = range->tli;
368
0
      earliest_wal_range_start_lsn = range->start_lsn;
369
0
    }
370
0
  }
371
372
  /*
373
   * Propagate information about the prior backup into the backup_label that
374
   * will be generated for this backup.
375
   */
376
0
  backup_state->istartpoint = earliest_wal_range_start_lsn;
377
0
  backup_state->istarttli = earliest_wal_range_tli;
378
379
  /*
380
   * Sanity check start and end LSNs for the WAL ranges in the manifest.
381
   *
382
   * Commonly, there won't be any timeline switches during the prior backup
383
   * at all, but if there are, they should happen at the same LSNs that this
384
   * server switched timelines.
385
   *
386
   * Whether there are any timeline switches during the prior backup or not,
387
   * the prior backup shouldn't require any WAL from a timeline prior to the
388
   * start of that timeline. It also shouldn't require any WAL from later
389
   * than the start of this backup.
390
   *
391
   * If any of these sanity checks fail, one possible explanation is that
392
   * the user has generated WAL on the same timeline with the same LSNs more
393
   * than once. For instance, if two standbys running on timeline 1 were
394
   * both promoted and (due to a broken archiving setup) both selected new
395
   * timeline ID 2, then it's possible that one of these checks might trip.
396
   *
397
   * Note that there are lots of ways for the user to do something very bad
398
   * without tripping any of these checks, and they are not intended to be
399
   * comprehensive. It's pretty hard to see how we could be certain of
400
   * anything here. However, if there's a problem staring us right in the
401
   * face, it's best to report it, so we do.
402
   */
403
0
  for (i = 0; i < num_wal_ranges; ++i)
404
0
  {
405
0
    backup_wal_range *range = list_nth(ib->manifest_wal_ranges, i);
406
407
0
    if (range->tli == earliest_wal_range_tli)
408
0
    {
409
0
      if (range->start_lsn < tlep[i]->begin)
410
0
        ereport(ERROR,
411
0
            (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
412
0
             errmsg("manifest requires WAL from initial timeline %u starting at %X/%X, but that timeline begins at %X/%X",
413
0
                range->tli,
414
0
                LSN_FORMAT_ARGS(range->start_lsn),
415
0
                LSN_FORMAT_ARGS(tlep[i]->begin))));
416
0
    }
417
0
    else
418
0
    {
419
0
      if (range->start_lsn != tlep[i]->begin)
420
0
        ereport(ERROR,
421
0
            (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
422
0
             errmsg("manifest requires WAL from continuation timeline %u starting at %X/%X, but that timeline begins at %X/%X",
423
0
                range->tli,
424
0
                LSN_FORMAT_ARGS(range->start_lsn),
425
0
                LSN_FORMAT_ARGS(tlep[i]->begin))));
426
0
    }
427
428
0
    if (range->tli == latest_wal_range_tli)
429
0
    {
430
0
      if (range->end_lsn > backup_state->startpoint)
431
0
        ereport(ERROR,
432
0
            (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
433
0
             errmsg("manifest requires WAL from final timeline %u ending at %X/%X, but this backup starts at %X/%X",
434
0
                range->tli,
435
0
                LSN_FORMAT_ARGS(range->end_lsn),
436
0
                LSN_FORMAT_ARGS(backup_state->startpoint)),
437
0
             errhint("This can happen for incremental backups on a standby if there was little activity since the previous backup.")));
438
0
    }
439
0
    else
440
0
    {
441
0
      if (range->end_lsn != tlep[i]->end)
442
0
        ereport(ERROR,
443
0
            (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
444
0
             errmsg("manifest requires WAL from non-final timeline %u ending at %X/%X, but this server switched timelines at %X/%X",
445
0
                range->tli,
446
0
                LSN_FORMAT_ARGS(range->end_lsn),
447
0
                LSN_FORMAT_ARGS(tlep[i]->end))));
448
0
    }
449
450
0
  }
451
452
  /*
453
   * Wait for WAL summarization to catch up to the backup start LSN. This
454
   * will throw an error if the WAL summarizer appears to be stuck. If WAL
455
   * summarization gets disabled while we're waiting, this will return
456
   * immediately, and we'll error out further down if the WAL summaries are
457
   * incomplete.
458
   */
459
0
  WaitForWalSummarization(backup_state->startpoint);
460
461
  /*
462
   * Retrieve a list of all WAL summaries on any timeline that overlap with
463
   * the LSN range of interest. We could instead call GetWalSummaries() once
464
   * per timeline in the loop that follows, but that would involve reading
465
   * the directory multiple times. It should be mildly faster - and perhaps
466
   * a bit safer - to do it just once.
467
   */
468
0
  all_wslist = GetWalSummaries(0, earliest_wal_range_start_lsn,
469
0
                 backup_state->startpoint);
470
471
  /*
472
   * We need WAL summaries for everything that happened during the prior
473
   * backup and everything that happened afterward up until the point where
474
   * the current backup started.
475
   */
476
0
  foreach(lc, expectedTLEs)
477
0
  {
478
0
    TimeLineHistoryEntry *tle = lfirst(lc);
479
0
    XLogRecPtr  tli_start_lsn = tle->begin;
480
0
    XLogRecPtr  tli_end_lsn = tle->end;
481
0
    XLogRecPtr  tli_missing_lsn = InvalidXLogRecPtr;
482
0
    List     *tli_wslist;
483
484
    /*
485
     * Working through the history of this server from the current
486
     * timeline backwards, we skip everything until we find the timeline
487
     * where this backup started. Most of the time, this means we won't
488
     * skip anything at all, as it's unlikely that the timeline has
489
     * changed since the beginning of the backup moments ago.
490
     */
491
0
    if (tle->tli == backup_state->starttli)
492
0
    {
493
0
      found_backup_start_tli = true;
494
0
      tli_end_lsn = backup_state->startpoint;
495
0
    }
496
0
    else if (!found_backup_start_tli)
497
0
      continue;
498
499
    /*
500
     * Find the summaries that overlap the LSN range of interest for this
501
     * timeline. If this is the earliest timeline involved, the range of
502
     * interest begins with the start LSN of the prior backup; otherwise,
503
     * it begins at the LSN at which this timeline came into existence. If
504
     * this is the latest TLI involved, the range of interest ends at the
505
     * start LSN of the current backup; otherwise, it ends at the point
506
     * where we switched from this timeline to the next one.
507
     */
508
0
    if (tle->tli == earliest_wal_range_tli)
509
0
      tli_start_lsn = earliest_wal_range_start_lsn;
510
0
    tli_wslist = FilterWalSummaries(all_wslist, tle->tli,
511
0
                    tli_start_lsn, tli_end_lsn);
512
513
    /*
514
     * There is no guarantee that the WAL summaries we found cover the
515
     * entire range of LSNs for which summaries are required, or indeed
516
     * that we found any WAL summaries at all. Check whether we have a
517
     * problem of that sort.
518
     */
519
0
    if (!WalSummariesAreComplete(tli_wslist, tli_start_lsn, tli_end_lsn,
520
0
                   &tli_missing_lsn))
521
0
    {
522
0
      if (XLogRecPtrIsInvalid(tli_missing_lsn))
523
0
        ereport(ERROR,
524
0
            (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
525
0
             errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but no summaries for that timeline and LSN range exist",
526
0
                tle->tli,
527
0
                LSN_FORMAT_ARGS(tli_start_lsn),
528
0
                LSN_FORMAT_ARGS(tli_end_lsn))));
529
0
      else
530
0
        ereport(ERROR,
531
0
            (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
532
0
             errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but the summaries for that timeline and LSN range are incomplete",
533
0
                tle->tli,
534
0
                LSN_FORMAT_ARGS(tli_start_lsn),
535
0
                LSN_FORMAT_ARGS(tli_end_lsn)),
536
0
             errdetail("The first unsummarized LSN in this range is %X/%X.",
537
0
                   LSN_FORMAT_ARGS(tli_missing_lsn))));
538
0
    }
539
540
    /*
541
     * Remember that we need to read these summaries.
542
     *
543
     * Technically, it's possible that this could read more files than
544
     * required, since tli_wslist in theory could contain redundant
545
     * summaries. For instance, if we have a summary from 0/10000000 to
546
     * 0/20000000 and also one from 0/00000000 to 0/30000000, then the
547
     * latter subsumes the former and the former could be ignored.
548
     *
549
     * We ignore this possibility because the WAL summarizer only tries to
550
     * generate summaries that do not overlap. If somehow they exist,
551
     * we'll do a bit of extra work but the results should still be
552
     * correct.
553
     */
554
0
    required_wslist = list_concat(required_wslist, tli_wslist);
555
556
    /*
557
     * Timelines earlier than the one in which the prior backup began are
558
     * not relevant.
559
     */
560
0
    if (tle->tli == earliest_wal_range_tli)
561
0
      break;
562
0
  }
563
564
  /*
565
   * Read all of the required block reference table files and merge all of
566
   * the data into a single in-memory block reference table.
567
   *
568
   * See the comments for struct IncrementalBackupInfo for some thoughts on
569
   * memory usage.
570
   */
571
0
  ib->brtab = CreateEmptyBlockRefTable();
572
0
  foreach(lc, required_wslist)
573
0
  {
574
0
    WalSummaryFile *ws = lfirst(lc);
575
0
    WalSummaryIO wsio;
576
0
    BlockRefTableReader *reader;
577
0
    RelFileLocator rlocator;
578
0
    ForkNumber  forknum;
579
0
    BlockNumber limit_block;
580
0
    BlockNumber blocks[BLOCKS_PER_READ];
581
582
0
    wsio.file = OpenWalSummaryFile(ws, false);
583
0
    wsio.filepos = 0;
584
0
    ereport(DEBUG1,
585
0
        (errmsg_internal("reading WAL summary file \"%s\"",
586
0
                 FilePathName(wsio.file))));
587
0
    reader = CreateBlockRefTableReader(ReadWalSummary, &wsio,
588
0
                       FilePathName(wsio.file),
589
0
                       ReportWalSummaryError, NULL);
590
0
    while (BlockRefTableReaderNextRelation(reader, &rlocator, &forknum,
591
0
                         &limit_block))
592
0
    {
593
0
      BlockRefTableSetLimitBlock(ib->brtab, &rlocator,
594
0
                     forknum, limit_block);
595
596
0
      while (1)
597
0
      {
598
0
        unsigned  nblocks;
599
0
        unsigned  i;
600
601
0
        nblocks = BlockRefTableReaderGetBlocks(reader, blocks,
602
0
                             BLOCKS_PER_READ);
603
0
        if (nblocks == 0)
604
0
          break;
605
606
0
        for (i = 0; i < nblocks; ++i)
607
0
          BlockRefTableMarkBlockModified(ib->brtab, &rlocator,
608
0
                           forknum, blocks[i]);
609
0
      }
610
0
    }
611
0
    DestroyBlockRefTableReader(reader);
612
0
    FileClose(wsio.file);
613
0
  }
614
615
  /* Switch back to previous memory context. */
616
0
  MemoryContextSwitchTo(oldcontext);
617
0
}
618
619
/*
620
 * Get the pathname that should be used when a file is sent incrementally.
621
 *
622
 * The result is a palloc'd string.
623
 */
624
char *
625
GetIncrementalFilePath(Oid dboid, Oid spcoid, RelFileNumber relfilenumber,
626
             ForkNumber forknum, unsigned segno)
627
0
{
628
0
  RelPathStr  path;
629
0
  char     *lastslash;
630
0
  char     *ipath;
631
632
0
  path = GetRelationPath(dboid, spcoid, relfilenumber, INVALID_PROC_NUMBER,
633
0
               forknum);
634
635
0
  lastslash = strrchr(path.str, '/');
636
0
  Assert(lastslash != NULL);
637
0
  *lastslash = '\0';
638
639
0
  if (segno > 0)
640
0
    ipath = psprintf("%s/INCREMENTAL.%s.%u", path.str, lastslash + 1, segno);
641
0
  else
642
0
    ipath = psprintf("%s/INCREMENTAL.%s", path.str, lastslash + 1);
643
644
0
  return ipath;
645
0
}
646
647
/*
648
 * How should we back up a particular file as part of an incremental backup?
649
 *
650
 * If the return value is BACK_UP_FILE_FULLY, caller should back up the whole
651
 * file just as if this were not an incremental backup.  The contents of the
652
 * relative_block_numbers array are unspecified in this case.
653
 *
654
 * If the return value is BACK_UP_FILE_INCREMENTALLY, caller should include
655
 * an incremental file in the backup instead of the entire file. On return,
656
 * *num_blocks_required will be set to the number of blocks that need to be
657
 * sent, and the actual block numbers will have been stored in
658
 * relative_block_numbers, which should be an array of at least RELSEG_SIZE.
659
 * In addition, *truncation_block_length will be set to the value that should
660
 * be included in the incremental file.
661
 */
662
FileBackupMethod
663
GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
664
          Oid dboid, Oid spcoid,
665
          RelFileNumber relfilenumber, ForkNumber forknum,
666
          unsigned segno, size_t size,
667
          unsigned *num_blocks_required,
668
          BlockNumber *relative_block_numbers,
669
          unsigned *truncation_block_length)
670
0
{
671
0
  BlockNumber limit_block;
672
0
  BlockNumber start_blkno;
673
0
  BlockNumber stop_blkno;
674
0
  RelFileLocator rlocator;
675
0
  BlockRefTableEntry *brtentry;
676
0
  unsigned  i;
677
0
  unsigned  nblocks;
678
679
  /* Should only be called after PrepareForIncrementalBackup. */
680
0
  Assert(ib->buf.data == NULL);
681
682
  /*
683
   * dboid could be InvalidOid if shared rel, but spcoid and relfilenumber
684
   * should have legal values.
685
   */
686
0
  Assert(OidIsValid(spcoid));
687
0
  Assert(RelFileNumberIsValid(relfilenumber));
688
689
  /*
690
   * If the file size is too large or not a multiple of BLCKSZ, then
691
   * something weird is happening, so give up and send the whole file.
692
   */
693
0
  if ((size % BLCKSZ) != 0 || size / BLCKSZ > RELSEG_SIZE)
694
0
    return BACK_UP_FILE_FULLY;
695
696
  /*
697
   * The free-space map fork is not properly WAL-logged, so we need to
698
   * backup the entire file every time.
699
   */
700
0
  if (forknum == FSM_FORKNUM)
701
0
    return BACK_UP_FILE_FULLY;
702
703
  /*
704
   * If this file was not part of the prior backup, back it up fully.
705
   *
706
   * If this file was created after the prior backup and before the start of
707
   * the current backup, then the WAL summary information will tell us to
708
   * back up the whole file. However, if this file was created after the
709
   * start of the current backup, then the WAL summary won't know anything
710
   * about it. Without this logic, we would erroneously conclude that it was
711
   * OK to send it incrementally.
712
   *
713
   * Note that the file could have existed at the time of the prior backup,
714
   * gotten deleted, and then a new file with the same name could have been
715
   * created.  In that case, this logic won't prevent the file from being
716
   * backed up incrementally. But, if the deletion happened before the start
717
   * of the current backup, the limit block will be 0, inducing a full
718
   * backup. If the deletion happened after the start of the current backup,
719
   * reconstruction will erroneously combine blocks from the current
720
   * lifespan of the file with blocks from the previous lifespan -- but in
721
   * this type of case, WAL replay to reach backup consistency should remove
722
   * and recreate the file anyway, so the initial bogus contents should not
723
   * matter.
724
   */
725
0
  if (backup_file_lookup(ib->manifest_files, path) == NULL)
726
0
  {
727
0
    char     *ipath;
728
729
0
    ipath = GetIncrementalFilePath(dboid, spcoid, relfilenumber,
730
0
                     forknum, segno);
731
0
    if (backup_file_lookup(ib->manifest_files, ipath) == NULL)
732
0
      return BACK_UP_FILE_FULLY;
733
0
  }
734
735
  /*
736
   * Look up the special block reference table entry for the database as a
737
   * whole.
738
   */
739
0
  rlocator.spcOid = spcoid;
740
0
  rlocator.dbOid = dboid;
741
0
  rlocator.relNumber = 0;
742
0
  if (BlockRefTableGetEntry(ib->brtab, &rlocator, MAIN_FORKNUM,
743
0
                &limit_block) != NULL)
744
0
  {
745
    /*
746
     * According to the WAL summary, this database OID/tablespace OID
747
     * pairing has been created since the previous backup. So, everything
748
     * in it must be backed up fully.
749
     */
750
0
    return BACK_UP_FILE_FULLY;
751
0
  }
752
753
  /* Look up the block reference table entry for this relfilenode. */
754
0
  rlocator.relNumber = relfilenumber;
755
0
  brtentry = BlockRefTableGetEntry(ib->brtab, &rlocator, forknum,
756
0
                   &limit_block);
757
758
  /*
759
   * If there is no entry, then there have been no WAL-logged changes to the
760
   * relation since the predecessor backup was taken, so we can back it up
761
   * incrementally and need not include any modified blocks.
762
   *
763
   * However, if the file is zero-length, we should do a full backup,
764
   * because an incremental file is always more than zero length, and it's
765
   * silly to take an incremental backup when a full backup would be
766
   * smaller.
767
   */
768
0
  if (brtentry == NULL)
769
0
  {
770
0
    if (size == 0)
771
0
      return BACK_UP_FILE_FULLY;
772
0
    *num_blocks_required = 0;
773
0
    *truncation_block_length = size / BLCKSZ;
774
0
    return BACK_UP_FILE_INCREMENTALLY;
775
0
  }
776
777
  /*
778
   * If the limit_block is less than or equal to the point where this
779
   * segment starts, send the whole file.
780
   */
781
0
  if (limit_block <= segno * RELSEG_SIZE)
782
0
    return BACK_UP_FILE_FULLY;
783
784
  /*
785
   * Get relevant entries from the block reference table entry.
786
   *
787
   * We shouldn't overflow computing the start or stop block numbers, but if
788
   * it manages to happen somehow, detect it and throw an error.
789
   */
790
0
  start_blkno = segno * RELSEG_SIZE;
791
0
  stop_blkno = start_blkno + (size / BLCKSZ);
792
0
  if (start_blkno / RELSEG_SIZE != segno || stop_blkno < start_blkno)
793
0
    ereport(ERROR,
794
0
        errcode(ERRCODE_INTERNAL_ERROR),
795
0
        errmsg_internal("overflow computing block number bounds for segment %u with size %zu",
796
0
                segno, size));
797
798
  /*
799
   * This will write *absolute* block numbers into the output array, but
800
   * we'll transpose them below.
801
   */
802
0
  nblocks = BlockRefTableEntryGetBlocks(brtentry, start_blkno, stop_blkno,
803
0
                      relative_block_numbers, RELSEG_SIZE);
804
0
  Assert(nblocks <= RELSEG_SIZE);
805
806
  /*
807
   * If we're going to have to send nearly all of the blocks, then just send
808
   * the whole file, because that won't require much extra storage or
809
   * transfer and will speed up and simplify backup restoration. It's not
810
   * clear what threshold is most appropriate here and perhaps it ought to
811
   * be configurable, but for now we're just going to say that if we'd need
812
   * to send 90% of the blocks anyway, give up and send the whole file.
813
   *
814
   * NB: If you change the threshold here, at least make sure to back up the
815
   * file fully when every single block must be sent, because there's
816
   * nothing good about sending an incremental file in that case.
817
   */
818
0
  if (nblocks * BLCKSZ > size * 0.9)
819
0
    return BACK_UP_FILE_FULLY;
820
821
  /*
822
   * Looks like we can send an incremental file, so sort the block numbers
823
   * and then transpose them from absolute block numbers to relative block
824
   * numbers if necessary.
825
   *
826
   * NB: If the block reference table was using the bitmap representation
827
   * for a given chunk, the block numbers in that chunk will already be
828
   * sorted, but when the array-of-offsets representation is used, we can
829
   * receive block numbers here out of order.
830
   */
831
0
  qsort(relative_block_numbers, nblocks, sizeof(BlockNumber),
832
0
      compare_block_numbers);
833
0
  if (start_blkno != 0)
834
0
  {
835
0
    for (i = 0; i < nblocks; ++i)
836
0
      relative_block_numbers[i] -= start_blkno;
837
0
  }
838
0
  *num_blocks_required = nblocks;
839
840
  /*
841
   * The truncation block length is the minimum length of the reconstructed
842
   * file. Any block numbers below this threshold that are not present in
843
   * the backup need to be fetched from the prior backup. At or above this
844
   * threshold, blocks should only be included in the result if they are
845
   * present in the backup. (This may require inserting zero blocks if the
846
   * blocks included in the backup are non-consecutive.)
847
   */
848
0
  *truncation_block_length = size / BLCKSZ;
849
0
  if (BlockNumberIsValid(limit_block))
850
0
  {
851
0
    unsigned  relative_limit = limit_block - segno * RELSEG_SIZE;
852
853
0
    if (*truncation_block_length < relative_limit)
854
0
      *truncation_block_length = relative_limit;
855
0
  }
856
857
  /* Send it incrementally. */
858
0
  return BACK_UP_FILE_INCREMENTALLY;
859
0
}
860
861
/*
862
 * Compute the size for a header of an incremental file containing a given
863
 * number of blocks. The header is rounded to a multiple of BLCKSZ, but
864
 * only if the file will store some block data.
865
 */
866
size_t
867
GetIncrementalHeaderSize(unsigned num_blocks_required)
868
0
{
869
0
  size_t    result;
870
871
  /* Make sure we're not going to overflow. */
872
0
  Assert(num_blocks_required <= RELSEG_SIZE);
873
874
  /*
875
   * Three four byte quantities (magic number, truncation block length,
876
   * block count) followed by block numbers.
877
   */
878
0
  result = 3 * sizeof(uint32) + (sizeof(BlockNumber) * num_blocks_required);
879
880
  /*
881
   * Round the header size to a multiple of BLCKSZ - when not a multiple of
882
   * BLCKSZ, add the missing fraction of a block. But do this only if the
883
   * file will store data for some blocks, otherwise keep it small.
884
   */
885
0
  if ((num_blocks_required > 0) && (result % BLCKSZ != 0))
886
0
    result += BLCKSZ - (result % BLCKSZ);
887
888
0
  return result;
889
0
}
890
891
/*
892
 * Compute the size for an incremental file containing a given number of blocks.
893
 */
894
size_t
895
GetIncrementalFileSize(unsigned num_blocks_required)
896
0
{
897
0
  size_t    result;
898
899
  /* Make sure we're not going to overflow. */
900
0
  Assert(num_blocks_required <= RELSEG_SIZE);
901
902
  /*
903
   * Header with three four byte quantities (magic number, truncation block
904
   * length, block count) followed by block numbers, rounded to a multiple
905
   * of BLCKSZ (for files with block data), followed by block contents.
906
   */
907
0
  result = GetIncrementalHeaderSize(num_blocks_required);
908
0
  result += BLCKSZ * num_blocks_required;
909
910
0
  return result;
911
0
}
912
913
/*
914
 * Helper function for filemap hash table.
915
 */
916
static uint32
917
hash_string_pointer(const char *s)
918
0
{
919
0
  unsigned char *ss = (unsigned char *) s;
920
921
0
  return hash_bytes(ss, strlen(s));
922
0
}
923
924
/*
925
 * This callback to validate the manifest version for incremental backup.
926
 */
927
static void
928
manifest_process_version(JsonManifestParseContext *context,
929
             int manifest_version)
930
0
{
931
  /* Incremental backups don't work with manifest version 1 */
932
0
  if (manifest_version == 1)
933
0
    context->error_cb(context,
934
0
              "backup manifest version 1 does not support incremental backup");
935
0
}
936
937
/*
938
 * This callback to validate the manifest system identifier against the current
939
 * database server.
940
 */
941
static void
942
manifest_process_system_identifier(JsonManifestParseContext *context,
943
                   uint64 manifest_system_identifier)
944
0
{
945
0
  uint64    system_identifier;
946
947
  /* Get system identifier of current system */
948
0
  system_identifier = GetSystemIdentifier();
949
950
0
  if (manifest_system_identifier != system_identifier)
951
0
    context->error_cb(context,
952
0
              "system identifier in backup manifest is %" PRIu64 ", but database system identifier is %" PRIu64,
953
0
              manifest_system_identifier,
954
0
              system_identifier);
955
0
}
956
957
/*
958
 * This callback is invoked for each file mentioned in the backup manifest.
959
 *
960
 * We store the path to each file and the size of each file for sanity-checking
961
 * purposes. For further details, see comments for IncrementalBackupInfo.
962
 */
963
static void
964
manifest_process_file(JsonManifestParseContext *context,
965
            const char *pathname, uint64 size,
966
            pg_checksum_type checksum_type,
967
            int checksum_length,
968
            uint8 *checksum_payload)
969
0
{
970
0
  IncrementalBackupInfo *ib = context->private_data;
971
0
  backup_file_entry *entry;
972
0
  bool    found;
973
974
0
  entry = backup_file_insert(ib->manifest_files, pathname, &found);
975
0
  if (!found)
976
0
  {
977
0
    entry->path = MemoryContextStrdup(ib->manifest_files->ctx,
978
0
                      pathname);
979
0
    entry->size = size;
980
0
  }
981
0
}
982
983
/*
984
 * This callback is invoked for each WAL range mentioned in the backup
985
 * manifest.
986
 *
987
 * We're just interested in learning the oldest LSN and the corresponding TLI
988
 * that appear in any WAL range.
989
 */
990
static void
991
manifest_process_wal_range(JsonManifestParseContext *context,
992
               TimeLineID tli, XLogRecPtr start_lsn,
993
               XLogRecPtr end_lsn)
994
0
{
995
0
  IncrementalBackupInfo *ib = context->private_data;
996
0
  backup_wal_range *range = palloc(sizeof(backup_wal_range));
997
998
0
  range->tli = tli;
999
0
  range->start_lsn = start_lsn;
1000
0
  range->end_lsn = end_lsn;
1001
0
  ib->manifest_wal_ranges = lappend(ib->manifest_wal_ranges, range);
1002
0
}
1003
1004
/*
1005
 * This callback is invoked if an error occurs while parsing the backup
1006
 * manifest.
1007
 */
1008
static void
1009
manifest_report_error(JsonManifestParseContext *context, const char *fmt,...)
1010
0
{
1011
0
  StringInfoData errbuf;
1012
1013
0
  initStringInfo(&errbuf);
1014
1015
0
  for (;;)
1016
0
  {
1017
0
    va_list   ap;
1018
0
    int     needed;
1019
1020
0
    va_start(ap, fmt);
1021
0
    needed = appendStringInfoVA(&errbuf, fmt, ap);
1022
0
    va_end(ap);
1023
0
    if (needed == 0)
1024
0
      break;
1025
0
    enlargeStringInfo(&errbuf, needed);
1026
0
  }
1027
1028
0
  ereport(ERROR,
1029
0
      errmsg_internal("%s", errbuf.data));
1030
0
}
1031
1032
/*
1033
 * Quicksort comparator for block numbers.
1034
 */
1035
static int
1036
compare_block_numbers(const void *a, const void *b)
1037
0
{
1038
0
  BlockNumber aa = *(BlockNumber *) a;
1039
0
  BlockNumber bb = *(BlockNumber *) b;
1040
1041
0
  return pg_cmp_u32(aa, bb);
1042
0
}