Coverage Report

Created: 2025-09-27 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/common/blkreftable.c
Line
Count
Source
1
/*-------------------------------------------------------------------------
2
 *
3
 * blkreftable.c
4
 *    Block reference tables.
5
 *
6
 * A block reference table is used to keep track of which blocks have
7
 * been modified by WAL records within a certain LSN range.
8
 *
9
 * For each relation fork, we keep track of all blocks that have appeared
10
 * in block reference in the WAL. We also keep track of the "limit block",
11
 * which is the smallest relation length in blocks known to have occurred
12
 * during that range of WAL records.  This should be set to 0 if the relation
13
 * fork is created or destroyed, and to the post-truncation length if
14
 * truncated.
15
 *
16
 * Whenever we set the limit block, we also forget about any modified blocks
17
 * beyond that point. Those blocks don't exist any more. Such blocks can
18
 * later be marked as modified again; if that happens, it means the relation
19
 * was re-extended.
20
 *
21
 * Portions Copyright (c) 2010-2025, PostgreSQL Global Development Group
22
 *
23
 * src/common/blkreftable.c
24
 *
25
 *-------------------------------------------------------------------------
26
 */
27
28
29
#ifndef FRONTEND
30
#include "postgres.h"
31
#else
32
#include "postgres_fe.h"
33
#endif
34
35
#ifdef FRONTEND
36
#include "common/logging.h"
37
#endif
38
39
#include "common/blkreftable.h"
40
#include "common/hashfn.h"
41
#include "port/pg_crc32c.h"
42
43
/*
44
 * A block reference table keeps track of the status of each relation
45
 * fork individually.
46
 */
47
typedef struct BlockRefTableKey
48
{
49
  RelFileLocator rlocator;
50
  ForkNumber  forknum;
51
} BlockRefTableKey;
52
53
/*
54
 * We could need to store data either for a relation in which only a
55
 * tiny fraction of the blocks have been modified or for a relation in
56
 * which nearly every block has been modified, and we want a
57
 * space-efficient representation in both cases. To accomplish this,
58
 * we divide the relation into chunks of 2^16 blocks and choose between
59
 * an array representation and a bitmap representation for each chunk.
60
 *
61
 * When the number of modified blocks in a given chunk is small, we
62
 * essentially store an array of block numbers, but we need not store the
63
 * entire block number: instead, we store each block number as a 2-byte
64
 * offset from the start of the chunk.
65
 *
66
 * When the number of modified blocks in a given chunk is large, we switch
67
 * to a bitmap representation.
68
 *
69
 * These same basic representational choices are used both when a block
70
 * reference table is stored in memory and when it is serialized to disk.
71
 *
72
 * In the in-memory representation, we initially allocate each chunk with
73
 * space for a number of entries given by INITIAL_ENTRIES_PER_CHUNK and
74
 * increase that as necessary until we reach MAX_ENTRIES_PER_CHUNK.
75
 * Any chunk whose allocated size reaches MAX_ENTRIES_PER_CHUNK is converted
76
 * to a bitmap, and thus never needs to grow further.
77
 */
78
0
#define BLOCKS_PER_CHUNK    (1 << 16)
79
0
#define BLOCKS_PER_ENTRY    (BITS_PER_BYTE * sizeof(uint16))
80
0
#define MAX_ENTRIES_PER_CHUNK (BLOCKS_PER_CHUNK / BLOCKS_PER_ENTRY)
81
0
#define INITIAL_ENTRIES_PER_CHUNK 16
82
typedef uint16 *BlockRefTableChunk;
83
84
/*
85
 * State for one relation fork.
86
 *
87
 * 'rlocator' and 'forknum' identify the relation fork to which this entry
88
 * pertains.
89
 *
90
 * 'limit_block' is the shortest known length of the relation in blocks
91
 * within the LSN range covered by a particular block reference table.
92
 * It should be set to 0 if the relation fork is created or dropped. If the
93
 * relation fork is truncated, it should be set to the number of blocks that
94
 * remain after truncation.
95
 *
96
 * 'nchunks' is the allocated length of each of the three arrays that follow.
97
 * We can only represent the status of block numbers less than nchunks *
98
 * BLOCKS_PER_CHUNK.
99
 *
100
 * 'chunk_size' is an array storing the allocated size of each chunk.
101
 *
102
 * 'chunk_usage' is an array storing the number of elements used in each
103
 * chunk. If that value is less than MAX_ENTRIES_PER_CHUNK, the corresponding
104
 * chunk is used as an array; else the corresponding chunk is used as a bitmap.
105
 * When used as a bitmap, the least significant bit of the first array element
106
 * is the status of the lowest-numbered block covered by this chunk.
107
 *
108
 * 'chunk_data' is the array of chunks.
109
 */
110
struct BlockRefTableEntry
111
{
112
  BlockRefTableKey key;
113
  BlockNumber limit_block;
114
  char    status;
115
  uint32    nchunks;
116
  uint16     *chunk_size;
117
  uint16     *chunk_usage;
118
  BlockRefTableChunk *chunk_data;
119
};
120
121
/* Declare and define a hash table over type BlockRefTableEntry. */
122
#define SH_PREFIX blockreftable
123
0
#define SH_ELEMENT_TYPE BlockRefTableEntry
124
#define SH_KEY_TYPE BlockRefTableKey
125
0
#define SH_KEY key
126
#define SH_HASH_KEY(tb, key) \
127
0
  hash_bytes((const unsigned char *) &key, sizeof(BlockRefTableKey))
128
0
#define SH_EQUAL(tb, a, b) (memcmp(&a, &b, sizeof(BlockRefTableKey)) == 0)
129
#define SH_SCOPE static inline
130
#ifdef FRONTEND
131
#define SH_RAW_ALLOCATOR pg_malloc0
132
#endif
133
#define SH_DEFINE
134
#define SH_DECLARE
135
#include "lib/simplehash.h"
136
137
/*
138
 * A block reference table is basically just the hash table, but we don't
139
 * want to expose that to outside callers.
140
 *
141
 * We keep track of the memory context in use explicitly too, so that it's
142
 * easy to place all of our allocations in the same context.
143
 */
144
struct BlockRefTable
145
{
146
  blockreftable_hash *hash;
147
#ifndef FRONTEND
148
  MemoryContext mcxt;
149
#endif
150
};
151
152
/*
153
 * On-disk serialization format for block reference table entries.
154
 */
155
typedef struct BlockRefTableSerializedEntry
156
{
157
  RelFileLocator rlocator;
158
  ForkNumber  forknum;
159
  BlockNumber limit_block;
160
  uint32    nchunks;
161
} BlockRefTableSerializedEntry;
162
163
/*
164
 * Buffer size, so that we avoid doing many small I/Os.
165
 */
166
0
#define BUFSIZE         65536
167
168
/*
169
 * Ad-hoc buffer for file I/O.
170
 */
171
typedef struct BlockRefTableBuffer
172
{
173
  io_callback_fn io_callback;
174
  void     *io_callback_arg;
175
  char    data[BUFSIZE];
176
  int     used;
177
  int     cursor;
178
  pg_crc32c crc;
179
} BlockRefTableBuffer;
180
181
/*
182
 * State for keeping track of progress while incrementally reading a block
183
 * table reference file from disk.
184
 *
185
 * total_chunks means the number of chunks for the RelFileLocator/ForkNumber
186
 * combination that is currently being read, and consumed_chunks is the number
187
 * of those that have been read. (We always read all the information for
188
 * a single chunk at one time, so we don't need to be able to represent the
189
 * state where a chunk has been partially read.)
190
 *
191
 * chunk_size is the array of chunk sizes. The length is given by total_chunks.
192
 *
193
 * chunk_data holds the current chunk.
194
 *
195
 * chunk_position helps us figure out how much progress we've made in returning
196
 * the block numbers for the current chunk to the caller. If the chunk is a
197
 * bitmap, it's the number of bits we've scanned; otherwise, it's the number
198
 * of chunk entries we've scanned.
199
 */
200
struct BlockRefTableReader
201
{
202
  BlockRefTableBuffer buffer;
203
  char     *error_filename;
204
  report_error_fn error_callback;
205
  void     *error_callback_arg;
206
  uint32    total_chunks;
207
  uint32    consumed_chunks;
208
  uint16     *chunk_size;
209
  uint16    chunk_data[MAX_ENTRIES_PER_CHUNK];
210
  uint32    chunk_position;
211
};
212
213
/*
214
 * State for keeping track of progress while incrementally writing a block
215
 * reference table file to disk.
216
 */
217
struct BlockRefTableWriter
218
{
219
  BlockRefTableBuffer buffer;
220
};
221
222
/* Function prototypes. */
223
static int  BlockRefTableComparator(const void *a, const void *b);
224
static void BlockRefTableFlush(BlockRefTableBuffer *buffer);
225
static void BlockRefTableRead(BlockRefTableReader *reader, void *data,
226
                int length);
227
static void BlockRefTableWrite(BlockRefTableBuffer *buffer, void *data,
228
                 int length);
229
static void BlockRefTableFileTerminate(BlockRefTableBuffer *buffer);
230
231
/*
232
 * Create an empty block reference table.
233
 */
234
BlockRefTable *
235
CreateEmptyBlockRefTable(void)
236
0
{
237
0
  BlockRefTable *brtab = palloc(sizeof(BlockRefTable));
238
239
  /*
240
   * Even completely empty database has a few hundred relation forks, so it
241
   * seems best to size the hash on the assumption that we're going to have
242
   * at least a few thousand entries.
243
   */
244
#ifdef FRONTEND
245
  brtab->hash = blockreftable_create(4096, NULL);
246
#else
247
0
  brtab->mcxt = CurrentMemoryContext;
248
0
  brtab->hash = blockreftable_create(brtab->mcxt, 4096, NULL);
249
0
#endif
250
251
0
  return brtab;
252
0
}
253
254
/*
255
 * Set the "limit block" for a relation fork and forget any modified blocks
256
 * with equal or higher block numbers.
257
 *
258
 * The "limit block" is the shortest known length of the relation within the
259
 * range of WAL records covered by this block reference table.
260
 */
261
void
262
BlockRefTableSetLimitBlock(BlockRefTable *brtab,
263
               const RelFileLocator *rlocator,
264
               ForkNumber forknum,
265
               BlockNumber limit_block)
266
0
{
267
0
  BlockRefTableEntry *brtentry;
268
0
  BlockRefTableKey key = {0}; /* make sure any padding is zero */
269
0
  bool    found;
270
271
0
  memcpy(&key.rlocator, rlocator, sizeof(RelFileLocator));
272
0
  key.forknum = forknum;
273
0
  brtentry = blockreftable_insert(brtab->hash, key, &found);
274
275
0
  if (!found)
276
0
  {
277
    /*
278
     * We have no existing data about this relation fork, so just record
279
     * the limit_block value supplied by the caller, and make sure other
280
     * parts of the entry are properly initialized.
281
     */
282
0
    brtentry->limit_block = limit_block;
283
0
    brtentry->nchunks = 0;
284
0
    brtentry->chunk_size = NULL;
285
0
    brtentry->chunk_usage = NULL;
286
0
    brtentry->chunk_data = NULL;
287
0
    return;
288
0
  }
289
290
0
  BlockRefTableEntrySetLimitBlock(brtentry, limit_block);
291
0
}
292
293
/*
294
 * Mark a block in a given relation fork as known to have been modified.
295
 */
296
void
297
BlockRefTableMarkBlockModified(BlockRefTable *brtab,
298
                 const RelFileLocator *rlocator,
299
                 ForkNumber forknum,
300
                 BlockNumber blknum)
301
0
{
302
0
  BlockRefTableEntry *brtentry;
303
0
  BlockRefTableKey key = {0}; /* make sure any padding is zero */
304
0
  bool    found;
305
0
#ifndef FRONTEND
306
0
  MemoryContext oldcontext = MemoryContextSwitchTo(brtab->mcxt);
307
0
#endif
308
309
0
  memcpy(&key.rlocator, rlocator, sizeof(RelFileLocator));
310
0
  key.forknum = forknum;
311
0
  brtentry = blockreftable_insert(brtab->hash, key, &found);
312
313
0
  if (!found)
314
0
  {
315
    /*
316
     * We want to set the initial limit block value to something higher
317
     * than any legal block number. InvalidBlockNumber fits the bill.
318
     */
319
0
    brtentry->limit_block = InvalidBlockNumber;
320
0
    brtentry->nchunks = 0;
321
0
    brtentry->chunk_size = NULL;
322
0
    brtentry->chunk_usage = NULL;
323
0
    brtentry->chunk_data = NULL;
324
0
  }
325
326
0
  BlockRefTableEntryMarkBlockModified(brtentry, forknum, blknum);
327
328
0
#ifndef FRONTEND
329
0
  MemoryContextSwitchTo(oldcontext);
330
0
#endif
331
0
}
332
333
/*
334
 * Get an entry from a block reference table.
335
 *
336
 * If the entry does not exist, this function returns NULL. Otherwise, it
337
 * returns the entry and sets *limit_block to the value from the entry.
338
 */
339
BlockRefTableEntry *
340
BlockRefTableGetEntry(BlockRefTable *brtab, const RelFileLocator *rlocator,
341
            ForkNumber forknum, BlockNumber *limit_block)
342
0
{
343
0
  BlockRefTableKey key = {0}; /* make sure any padding is zero */
344
0
  BlockRefTableEntry *entry;
345
346
0
  Assert(limit_block != NULL);
347
348
0
  memcpy(&key.rlocator, rlocator, sizeof(RelFileLocator));
349
0
  key.forknum = forknum;
350
0
  entry = blockreftable_lookup(brtab->hash, key);
351
352
0
  if (entry != NULL)
353
0
    *limit_block = entry->limit_block;
354
355
0
  return entry;
356
0
}
357
358
/*
359
 * Get block numbers from a table entry.
360
 *
361
 * 'blocks' must point to enough space to hold at least 'nblocks' block
362
 * numbers, and any block numbers we manage to get will be written there.
363
 * The return value is the number of block numbers actually written.
364
 *
365
 * We do not return block numbers unless they are greater than or equal to
366
 * start_blkno and strictly less than stop_blkno.
367
 */
368
int
369
BlockRefTableEntryGetBlocks(BlockRefTableEntry *entry,
370
              BlockNumber start_blkno,
371
              BlockNumber stop_blkno,
372
              BlockNumber *blocks,
373
              int nblocks)
374
0
{
375
0
  uint32    start_chunkno;
376
0
  uint32    stop_chunkno;
377
0
  uint32    chunkno;
378
0
  int     nresults = 0;
379
380
0
  Assert(entry != NULL);
381
382
  /*
383
   * Figure out which chunks could potentially contain blocks of interest.
384
   *
385
   * We need to be careful about overflow here, because stop_blkno could be
386
   * InvalidBlockNumber or something very close to it.
387
   */
388
0
  start_chunkno = start_blkno / BLOCKS_PER_CHUNK;
389
0
  stop_chunkno = stop_blkno / BLOCKS_PER_CHUNK;
390
0
  if ((stop_blkno % BLOCKS_PER_CHUNK) != 0)
391
0
    ++stop_chunkno;
392
0
  if (stop_chunkno > entry->nchunks)
393
0
    stop_chunkno = entry->nchunks;
394
395
  /*
396
   * Loop over chunks.
397
   */
398
0
  for (chunkno = start_chunkno; chunkno < stop_chunkno; ++chunkno)
399
0
  {
400
0
    uint16    chunk_usage = entry->chunk_usage[chunkno];
401
0
    BlockRefTableChunk chunk_data = entry->chunk_data[chunkno];
402
0
    unsigned  start_offset = 0;
403
0
    unsigned  stop_offset = BLOCKS_PER_CHUNK;
404
405
    /*
406
     * If the start and/or stop block number falls within this chunk, the
407
     * whole chunk may not be of interest. Figure out which portion we
408
     * care about, if it's not the whole thing.
409
     */
410
0
    if (chunkno == start_chunkno)
411
0
      start_offset = start_blkno % BLOCKS_PER_CHUNK;
412
0
    if (chunkno == stop_chunkno - 1)
413
0
    {
414
0
      Assert(stop_blkno > chunkno * BLOCKS_PER_CHUNK);
415
0
      stop_offset = stop_blkno - (chunkno * BLOCKS_PER_CHUNK);
416
0
      Assert(stop_offset <= BLOCKS_PER_CHUNK);
417
0
    }
418
419
    /*
420
     * Handling differs depending on whether this is an array of offsets
421
     * or a bitmap.
422
     */
423
0
    if (chunk_usage == MAX_ENTRIES_PER_CHUNK)
424
0
    {
425
0
      unsigned  i;
426
427
      /* It's a bitmap, so test every relevant bit. */
428
0
      for (i = start_offset; i < stop_offset; ++i)
429
0
      {
430
0
        uint16    w = chunk_data[i / BLOCKS_PER_ENTRY];
431
432
0
        if ((w & (1 << (i % BLOCKS_PER_ENTRY))) != 0)
433
0
        {
434
0
          BlockNumber blkno = chunkno * BLOCKS_PER_CHUNK + i;
435
436
0
          blocks[nresults++] = blkno;
437
438
          /* Early exit if we run out of output space. */
439
0
          if (nresults == nblocks)
440
0
            return nresults;
441
0
        }
442
0
      }
443
0
    }
444
0
    else
445
0
    {
446
0
      unsigned  i;
447
448
      /* It's an array of offsets, so check each one. */
449
0
      for (i = 0; i < chunk_usage; ++i)
450
0
      {
451
0
        uint16    offset = chunk_data[i];
452
453
0
        if (offset >= start_offset && offset < stop_offset)
454
0
        {
455
0
          BlockNumber blkno = chunkno * BLOCKS_PER_CHUNK + offset;
456
457
0
          blocks[nresults++] = blkno;
458
459
          /* Early exit if we run out of output space. */
460
0
          if (nresults == nblocks)
461
0
            return nresults;
462
0
        }
463
0
      }
464
0
    }
465
0
  }
466
467
0
  return nresults;
468
0
}
469
470
/*
471
 * Serialize a block reference table to a file.
472
 */
473
void
474
WriteBlockRefTable(BlockRefTable *brtab,
475
           io_callback_fn write_callback,
476
           void *write_callback_arg)
477
0
{
478
0
  BlockRefTableSerializedEntry *sdata = NULL;
479
0
  BlockRefTableBuffer buffer;
480
0
  uint32    magic = BLOCKREFTABLE_MAGIC;
481
482
  /* Prepare buffer. */
483
0
  memset(&buffer, 0, sizeof(BlockRefTableBuffer));
484
0
  buffer.io_callback = write_callback;
485
0
  buffer.io_callback_arg = write_callback_arg;
486
0
  INIT_CRC32C(buffer.crc);
487
488
  /* Write magic number. */
489
0
  BlockRefTableWrite(&buffer, &magic, sizeof(uint32));
490
491
  /* Write the entries, assuming there are some. */
492
0
  if (brtab->hash->members > 0)
493
0
  {
494
0
    unsigned  i = 0;
495
0
    blockreftable_iterator it;
496
0
    BlockRefTableEntry *brtentry;
497
498
    /* Extract entries into serializable format and sort them. */
499
0
    sdata =
500
0
      palloc(brtab->hash->members * sizeof(BlockRefTableSerializedEntry));
501
0
    blockreftable_start_iterate(brtab->hash, &it);
502
0
    while ((brtentry = blockreftable_iterate(brtab->hash, &it)) != NULL)
503
0
    {
504
0
      BlockRefTableSerializedEntry *sentry = &sdata[i++];
505
506
0
      sentry->rlocator = brtentry->key.rlocator;
507
0
      sentry->forknum = brtentry->key.forknum;
508
0
      sentry->limit_block = brtentry->limit_block;
509
0
      sentry->nchunks = brtentry->nchunks;
510
511
      /* trim trailing zero entries */
512
0
      while (sentry->nchunks > 0 &&
513
0
           brtentry->chunk_usage[sentry->nchunks - 1] == 0)
514
0
        sentry->nchunks--;
515
0
    }
516
0
    Assert(i == brtab->hash->members);
517
0
    qsort(sdata, i, sizeof(BlockRefTableSerializedEntry),
518
0
        BlockRefTableComparator);
519
520
    /* Loop over entries in sorted order and serialize each one. */
521
0
    for (i = 0; i < brtab->hash->members; ++i)
522
0
    {
523
0
      BlockRefTableSerializedEntry *sentry = &sdata[i];
524
0
      BlockRefTableKey key = {0}; /* make sure any padding is zero */
525
0
      unsigned  j;
526
527
      /* Write the serialized entry itself. */
528
0
      BlockRefTableWrite(&buffer, sentry,
529
0
                 sizeof(BlockRefTableSerializedEntry));
530
531
      /* Look up the original entry so we can access the chunks. */
532
0
      memcpy(&key.rlocator, &sentry->rlocator, sizeof(RelFileLocator));
533
0
      key.forknum = sentry->forknum;
534
0
      brtentry = blockreftable_lookup(brtab->hash, key);
535
0
      Assert(brtentry != NULL);
536
537
      /* Write the untruncated portion of the chunk length array. */
538
0
      if (sentry->nchunks != 0)
539
0
        BlockRefTableWrite(&buffer, brtentry->chunk_usage,
540
0
                   sentry->nchunks * sizeof(uint16));
541
542
      /* Write the contents of each chunk. */
543
0
      for (j = 0; j < brtentry->nchunks; ++j)
544
0
      {
545
0
        if (brtentry->chunk_usage[j] == 0)
546
0
          continue;
547
0
        BlockRefTableWrite(&buffer, brtentry->chunk_data[j],
548
0
                   brtentry->chunk_usage[j] * sizeof(uint16));
549
0
      }
550
0
    }
551
0
  }
552
553
  /* Write out appropriate terminator and CRC and flush buffer. */
554
0
  BlockRefTableFileTerminate(&buffer);
555
0
}
556
557
/*
558
 * Prepare to incrementally read a block reference table file.
559
 *
560
 * 'read_callback' is a function that can be called to read data from the
561
 * underlying file (or other data source) into our internal buffer.
562
 *
563
 * 'read_callback_arg' is an opaque argument to be passed to read_callback.
564
 *
565
 * 'error_filename' is the filename that should be included in error messages
566
 * if the file is found to be malformed. The value is not copied, so the
567
 * caller should ensure that it remains valid until done with this
568
 * BlockRefTableReader.
569
 *
570
 * 'error_callback' is a function to be called if the file is found to be
571
 * malformed. This is not used for I/O errors, which must be handled internally
572
 * by read_callback.
573
 *
574
 * 'error_callback_arg' is an opaque argument to be passed to error_callback.
575
 */
576
BlockRefTableReader *
577
CreateBlockRefTableReader(io_callback_fn read_callback,
578
              void *read_callback_arg,
579
              char *error_filename,
580
              report_error_fn error_callback,
581
              void *error_callback_arg)
582
0
{
583
0
  BlockRefTableReader *reader;
584
0
  uint32    magic;
585
586
  /* Initialize data structure. */
587
0
  reader = palloc0(sizeof(BlockRefTableReader));
588
0
  reader->buffer.io_callback = read_callback;
589
0
  reader->buffer.io_callback_arg = read_callback_arg;
590
0
  reader->error_filename = error_filename;
591
0
  reader->error_callback = error_callback;
592
0
  reader->error_callback_arg = error_callback_arg;
593
0
  INIT_CRC32C(reader->buffer.crc);
594
595
  /* Verify magic number. */
596
0
  BlockRefTableRead(reader, &magic, sizeof(uint32));
597
0
  if (magic != BLOCKREFTABLE_MAGIC)
598
0
    error_callback(error_callback_arg,
599
0
             "file \"%s\" has wrong magic number: expected %u, found %u",
600
0
             error_filename,
601
0
             BLOCKREFTABLE_MAGIC, magic);
602
603
0
  return reader;
604
0
}
605
606
/*
607
 * Read next relation fork covered by this block reference table file.
608
 *
609
 * After calling this function, you must call BlockRefTableReaderGetBlocks
610
 * until it returns 0 before calling it again.
611
 */
612
bool
613
BlockRefTableReaderNextRelation(BlockRefTableReader *reader,
614
                RelFileLocator *rlocator,
615
                ForkNumber *forknum,
616
                BlockNumber *limit_block)
617
0
{
618
0
  BlockRefTableSerializedEntry sentry;
619
0
  BlockRefTableSerializedEntry zentry = {0};
620
621
  /*
622
   * Sanity check: caller must read all blocks from all chunks before moving
623
   * on to the next relation.
624
   */
625
0
  Assert(reader->total_chunks == reader->consumed_chunks);
626
627
  /* Read serialized entry. */
628
0
  BlockRefTableRead(reader, &sentry,
629
0
            sizeof(BlockRefTableSerializedEntry));
630
631
  /*
632
   * If we just read the sentinel entry indicating that we've reached the
633
   * end, read and check the CRC.
634
   */
635
0
  if (memcmp(&sentry, &zentry, sizeof(BlockRefTableSerializedEntry)) == 0)
636
0
  {
637
0
    pg_crc32c expected_crc;
638
0
    pg_crc32c actual_crc;
639
640
    /*
641
     * We want to know the CRC of the file excluding the 4-byte CRC
642
     * itself, so copy the current value of the CRC accumulator before
643
     * reading those bytes, and use the copy to finalize the calculation.
644
     */
645
0
    expected_crc = reader->buffer.crc;
646
0
    FIN_CRC32C(expected_crc);
647
648
    /* Now we can read the actual value. */
649
0
    BlockRefTableRead(reader, &actual_crc, sizeof(pg_crc32c));
650
651
    /* Throw an error if there is a mismatch. */
652
0
    if (!EQ_CRC32C(expected_crc, actual_crc))
653
0
      reader->error_callback(reader->error_callback_arg,
654
0
                   "file \"%s\" has wrong checksum: expected %08X, found %08X",
655
0
                   reader->error_filename, expected_crc, actual_crc);
656
657
0
    return false;
658
0
  }
659
660
  /* Read chunk size array. */
661
0
  if (reader->chunk_size != NULL)
662
0
    pfree(reader->chunk_size);
663
0
  reader->chunk_size = palloc(sentry.nchunks * sizeof(uint16));
664
0
  BlockRefTableRead(reader, reader->chunk_size,
665
0
            sentry.nchunks * sizeof(uint16));
666
667
  /* Set up for chunk scan. */
668
0
  reader->total_chunks = sentry.nchunks;
669
0
  reader->consumed_chunks = 0;
670
671
  /* Return data to caller. */
672
0
  memcpy(rlocator, &sentry.rlocator, sizeof(RelFileLocator));
673
0
  *forknum = sentry.forknum;
674
0
  *limit_block = sentry.limit_block;
675
0
  return true;
676
0
}
677
678
/*
679
 * Get modified blocks associated with the relation fork returned by
680
 * the most recent call to BlockRefTableReaderNextRelation.
681
 *
682
 * On return, block numbers will be written into the 'blocks' array, whose
683
 * length should be passed via 'nblocks'. The return value is the number of
684
 * entries actually written into the 'blocks' array, which may be less than
685
 * 'nblocks' if we run out of modified blocks in the relation fork before
686
 * we run out of room in the array.
687
 */
688
unsigned
689
BlockRefTableReaderGetBlocks(BlockRefTableReader *reader,
690
               BlockNumber *blocks,
691
               int nblocks)
692
0
{
693
0
  unsigned  blocks_found = 0;
694
695
  /* Must provide space for at least one block number to be returned. */
696
0
  Assert(nblocks > 0);
697
698
  /* Loop collecting blocks to return to caller. */
699
0
  for (;;)
700
0
  {
701
0
    uint16    next_chunk_size;
702
703
    /*
704
     * If we've read at least one chunk, maybe it contains some block
705
     * numbers that could satisfy caller's request.
706
     */
707
0
    if (reader->consumed_chunks > 0)
708
0
    {
709
0
      uint32    chunkno = reader->consumed_chunks - 1;
710
0
      uint16    chunk_size = reader->chunk_size[chunkno];
711
712
0
      if (chunk_size == MAX_ENTRIES_PER_CHUNK)
713
0
      {
714
        /* Bitmap format, so search for bits that are set. */
715
0
        while (reader->chunk_position < BLOCKS_PER_CHUNK &&
716
0
             blocks_found < nblocks)
717
0
        {
718
0
          uint16    chunkoffset = reader->chunk_position;
719
0
          uint16    w;
720
721
0
          w = reader->chunk_data[chunkoffset / BLOCKS_PER_ENTRY];
722
0
          if ((w & (1u << (chunkoffset % BLOCKS_PER_ENTRY))) != 0)
723
0
            blocks[blocks_found++] =
724
0
              chunkno * BLOCKS_PER_CHUNK + chunkoffset;
725
0
          ++reader->chunk_position;
726
0
        }
727
0
      }
728
0
      else
729
0
      {
730
        /* Not in bitmap format, so each entry is a 2-byte offset. */
731
0
        while (reader->chunk_position < chunk_size &&
732
0
             blocks_found < nblocks)
733
0
        {
734
0
          blocks[blocks_found++] = chunkno * BLOCKS_PER_CHUNK
735
0
            + reader->chunk_data[reader->chunk_position];
736
0
          ++reader->chunk_position;
737
0
        }
738
0
      }
739
0
    }
740
741
    /* We found enough blocks, so we're done. */
742
0
    if (blocks_found >= nblocks)
743
0
      break;
744
745
    /*
746
     * We didn't find enough blocks, so we must need the next chunk. If
747
     * there are none left, though, then we're done anyway.
748
     */
749
0
    if (reader->consumed_chunks == reader->total_chunks)
750
0
      break;
751
752
    /*
753
     * Read data for next chunk and reset scan position to beginning of
754
     * chunk. Note that the next chunk might be empty, in which case we
755
     * consume the chunk without actually consuming any bytes from the
756
     * underlying file.
757
     */
758
0
    next_chunk_size = reader->chunk_size[reader->consumed_chunks];
759
0
    if (next_chunk_size > 0)
760
0
      BlockRefTableRead(reader, reader->chunk_data,
761
0
                next_chunk_size * sizeof(uint16));
762
0
    ++reader->consumed_chunks;
763
0
    reader->chunk_position = 0;
764
0
  }
765
766
0
  return blocks_found;
767
0
}
768
769
/*
770
 * Release memory used while reading a block reference table from a file.
771
 */
772
void
773
DestroyBlockRefTableReader(BlockRefTableReader *reader)
774
0
{
775
0
  if (reader->chunk_size != NULL)
776
0
  {
777
0
    pfree(reader->chunk_size);
778
0
    reader->chunk_size = NULL;
779
0
  }
780
0
  pfree(reader);
781
0
}
782
783
/*
784
 * Prepare to write a block reference table file incrementally.
785
 *
786
 * Caller must be able to supply BlockRefTableEntry objects sorted in the
787
 * appropriate order.
788
 */
789
BlockRefTableWriter *
790
CreateBlockRefTableWriter(io_callback_fn write_callback,
791
              void *write_callback_arg)
792
0
{
793
0
  BlockRefTableWriter *writer;
794
0
  uint32    magic = BLOCKREFTABLE_MAGIC;
795
796
  /* Prepare buffer and CRC check and save callbacks. */
797
0
  writer = palloc0(sizeof(BlockRefTableWriter));
798
0
  writer->buffer.io_callback = write_callback;
799
0
  writer->buffer.io_callback_arg = write_callback_arg;
800
0
  INIT_CRC32C(writer->buffer.crc);
801
802
  /* Write magic number. */
803
0
  BlockRefTableWrite(&writer->buffer, &magic, sizeof(uint32));
804
805
0
  return writer;
806
0
}
807
808
/*
809
 * Append one entry to a block reference table file.
810
 *
811
 * Note that entries must be written in the proper order, that is, sorted by
812
 * tablespace, then database, then relfilenumber, then fork number. Caller
813
 * is responsible for supplying data in the correct order. If that seems hard,
814
 * use an in-memory BlockRefTable instead.
815
 */
816
void
817
BlockRefTableWriteEntry(BlockRefTableWriter *writer, BlockRefTableEntry *entry)
818
0
{
819
0
  BlockRefTableSerializedEntry sentry;
820
0
  unsigned  j;
821
822
  /* Convert to serialized entry format. */
823
0
  sentry.rlocator = entry->key.rlocator;
824
0
  sentry.forknum = entry->key.forknum;
825
0
  sentry.limit_block = entry->limit_block;
826
0
  sentry.nchunks = entry->nchunks;
827
828
  /* Trim trailing zero entries. */
829
0
  while (sentry.nchunks > 0 && entry->chunk_usage[sentry.nchunks - 1] == 0)
830
0
    sentry.nchunks--;
831
832
  /* Write the serialized entry itself. */
833
0
  BlockRefTableWrite(&writer->buffer, &sentry,
834
0
             sizeof(BlockRefTableSerializedEntry));
835
836
  /* Write the untruncated portion of the chunk length array. */
837
0
  if (sentry.nchunks != 0)
838
0
    BlockRefTableWrite(&writer->buffer, entry->chunk_usage,
839
0
               sentry.nchunks * sizeof(uint16));
840
841
  /* Write the contents of each chunk. */
842
0
  for (j = 0; j < entry->nchunks; ++j)
843
0
  {
844
0
    if (entry->chunk_usage[j] == 0)
845
0
      continue;
846
0
    BlockRefTableWrite(&writer->buffer, entry->chunk_data[j],
847
0
               entry->chunk_usage[j] * sizeof(uint16));
848
0
  }
849
0
}
850
851
/*
852
 * Finalize an incremental write of a block reference table file.
853
 */
854
void
855
DestroyBlockRefTableWriter(BlockRefTableWriter *writer)
856
0
{
857
0
  BlockRefTableFileTerminate(&writer->buffer);
858
0
  pfree(writer);
859
0
}
860
861
/*
862
 * Allocate a standalone BlockRefTableEntry.
863
 *
864
 * When we're manipulating a full in-memory BlockRefTable, the entries are
865
 * part of the hash table and are allocated by simplehash. This routine is
866
 * used by callers that want to write out a BlockRefTable to a file without
867
 * needing to store the whole thing in memory at once.
868
 *
869
 * Entries allocated by this function can be manipulated using the functions
870
 * BlockRefTableEntrySetLimitBlock and BlockRefTableEntryMarkBlockModified
871
 * and then written using BlockRefTableWriteEntry and freed using
872
 * BlockRefTableFreeEntry.
873
 */
874
BlockRefTableEntry *
875
CreateBlockRefTableEntry(RelFileLocator rlocator, ForkNumber forknum)
876
0
{
877
0
  BlockRefTableEntry *entry = palloc0(sizeof(BlockRefTableEntry));
878
879
0
  memcpy(&entry->key.rlocator, &rlocator, sizeof(RelFileLocator));
880
0
  entry->key.forknum = forknum;
881
0
  entry->limit_block = InvalidBlockNumber;
882
883
0
  return entry;
884
0
}
885
886
/*
887
 * Update a BlockRefTableEntry with a new value for the "limit block" and
888
 * forget any equal-or-higher-numbered modified blocks.
889
 *
890
 * The "limit block" is the shortest known length of the relation within the
891
 * range of WAL records covered by this block reference table.
892
 */
893
void
894
BlockRefTableEntrySetLimitBlock(BlockRefTableEntry *entry,
895
                BlockNumber limit_block)
896
0
{
897
0
  unsigned  chunkno;
898
0
  unsigned  limit_chunkno;
899
0
  unsigned  limit_chunkoffset;
900
0
  BlockRefTableChunk limit_chunk;
901
902
  /* If we already have an equal or lower limit block, do nothing. */
903
0
  if (limit_block >= entry->limit_block)
904
0
    return;
905
906
  /* Record the new limit block value. */
907
0
  entry->limit_block = limit_block;
908
909
  /*
910
   * Figure out which chunk would store the state of the new limit block,
911
   * and which offset within that chunk.
912
   */
913
0
  limit_chunkno = limit_block / BLOCKS_PER_CHUNK;
914
0
  limit_chunkoffset = limit_block % BLOCKS_PER_CHUNK;
915
916
  /*
917
   * If the number of chunks is not large enough for any blocks with equal
918
   * or higher block numbers to exist, then there is nothing further to do.
919
   */
920
0
  if (limit_chunkno >= entry->nchunks)
921
0
    return;
922
923
  /* Discard entire contents of any higher-numbered chunks. */
924
0
  for (chunkno = limit_chunkno + 1; chunkno < entry->nchunks; ++chunkno)
925
0
    entry->chunk_usage[chunkno] = 0;
926
927
  /*
928
   * Next, we need to discard any offsets within the chunk that would
929
   * contain the limit_block. We must handle this differently depending on
930
   * whether the chunk that would contain limit_block is a bitmap or an
931
   * array of offsets.
932
   */
933
0
  limit_chunk = entry->chunk_data[limit_chunkno];
934
0
  if (entry->chunk_usage[limit_chunkno] == MAX_ENTRIES_PER_CHUNK)
935
0
  {
936
0
    unsigned  chunkoffset;
937
938
    /* It's a bitmap. Unset bits. */
939
0
    for (chunkoffset = limit_chunkoffset; chunkoffset < BLOCKS_PER_CHUNK;
940
0
       ++chunkoffset)
941
0
      limit_chunk[chunkoffset / BLOCKS_PER_ENTRY] &=
942
0
        ~(1 << (chunkoffset % BLOCKS_PER_ENTRY));
943
0
  }
944
0
  else
945
0
  {
946
0
    unsigned  i,
947
0
          j = 0;
948
949
    /* It's an offset array. Filter out large offsets. */
950
0
    for (i = 0; i < entry->chunk_usage[limit_chunkno]; ++i)
951
0
    {
952
0
      Assert(j <= i);
953
0
      if (limit_chunk[i] < limit_chunkoffset)
954
0
        limit_chunk[j++] = limit_chunk[i];
955
0
    }
956
0
    Assert(j <= entry->chunk_usage[limit_chunkno]);
957
0
    entry->chunk_usage[limit_chunkno] = j;
958
0
  }
959
0
}
960
961
/*
962
 * Mark a block in a given BlockRefTableEntry as known to have been modified.
963
 */
964
void
965
BlockRefTableEntryMarkBlockModified(BlockRefTableEntry *entry,
966
                  ForkNumber forknum,
967
                  BlockNumber blknum)
968
0
{
969
0
  unsigned  chunkno;
970
0
  unsigned  chunkoffset;
971
0
  unsigned  i;
972
973
  /*
974
   * Which chunk should store the state of this block? And what is the
975
   * offset of this block relative to the start of that chunk?
976
   */
977
0
  chunkno = blknum / BLOCKS_PER_CHUNK;
978
0
  chunkoffset = blknum % BLOCKS_PER_CHUNK;
979
980
  /*
981
   * If 'nchunks' isn't big enough for us to be able to represent the state
982
   * of this block, we need to enlarge our arrays.
983
   */
984
0
  if (chunkno >= entry->nchunks)
985
0
  {
986
0
    unsigned  max_chunks;
987
0
    unsigned  extra_chunks;
988
989
    /*
990
     * New array size is a power of 2, at least 16, big enough so that
991
     * chunkno will be a valid array index.
992
     */
993
0
    max_chunks = Max(16, entry->nchunks);
994
0
    while (max_chunks < chunkno + 1)
995
0
      max_chunks *= 2;
996
0
    extra_chunks = max_chunks - entry->nchunks;
997
998
0
    if (entry->nchunks == 0)
999
0
    {
1000
0
      entry->chunk_size = palloc0(sizeof(uint16) * max_chunks);
1001
0
      entry->chunk_usage = palloc0(sizeof(uint16) * max_chunks);
1002
0
      entry->chunk_data =
1003
0
        palloc0(sizeof(BlockRefTableChunk) * max_chunks);
1004
0
    }
1005
0
    else
1006
0
    {
1007
0
      entry->chunk_size = repalloc(entry->chunk_size,
1008
0
                     sizeof(uint16) * max_chunks);
1009
0
      memset(&entry->chunk_size[entry->nchunks], 0,
1010
0
           extra_chunks * sizeof(uint16));
1011
0
      entry->chunk_usage = repalloc(entry->chunk_usage,
1012
0
                      sizeof(uint16) * max_chunks);
1013
0
      memset(&entry->chunk_usage[entry->nchunks], 0,
1014
0
           extra_chunks * sizeof(uint16));
1015
0
      entry->chunk_data = repalloc(entry->chunk_data,
1016
0
                     sizeof(BlockRefTableChunk) * max_chunks);
1017
0
      memset(&entry->chunk_data[entry->nchunks], 0,
1018
0
           extra_chunks * sizeof(BlockRefTableChunk));
1019
0
    }
1020
0
    entry->nchunks = max_chunks;
1021
0
  }
1022
1023
  /*
1024
   * If the chunk that covers this block number doesn't exist yet, create it
1025
   * as an array and add the appropriate offset to it. We make it pretty
1026
   * small initially, because there might only be 1 or a few block
1027
   * references in this chunk and we don't want to use up too much memory.
1028
   */
1029
0
  if (entry->chunk_size[chunkno] == 0)
1030
0
  {
1031
0
    entry->chunk_data[chunkno] =
1032
0
      palloc(sizeof(uint16) * INITIAL_ENTRIES_PER_CHUNK);
1033
0
    entry->chunk_size[chunkno] = INITIAL_ENTRIES_PER_CHUNK;
1034
0
    entry->chunk_data[chunkno][0] = chunkoffset;
1035
0
    entry->chunk_usage[chunkno] = 1;
1036
0
    return;
1037
0
  }
1038
1039
  /*
1040
   * If the number of entries in this chunk is already maximum, it must be a
1041
   * bitmap. Just set the appropriate bit.
1042
   */
1043
0
  if (entry->chunk_usage[chunkno] == MAX_ENTRIES_PER_CHUNK)
1044
0
  {
1045
0
    BlockRefTableChunk chunk = entry->chunk_data[chunkno];
1046
1047
0
    chunk[chunkoffset / BLOCKS_PER_ENTRY] |=
1048
0
      1 << (chunkoffset % BLOCKS_PER_ENTRY);
1049
0
    return;
1050
0
  }
1051
1052
  /*
1053
   * There is an existing chunk and it's in array format. Let's find out
1054
   * whether it already has an entry for this block. If so, we do not need
1055
   * to do anything.
1056
   */
1057
0
  for (i = 0; i < entry->chunk_usage[chunkno]; ++i)
1058
0
  {
1059
0
    if (entry->chunk_data[chunkno][i] == chunkoffset)
1060
0
      return;
1061
0
  }
1062
1063
  /*
1064
   * If the number of entries currently used is one less than the maximum,
1065
   * it's time to convert to bitmap format.
1066
   */
1067
0
  if (entry->chunk_usage[chunkno] == MAX_ENTRIES_PER_CHUNK - 1)
1068
0
  {
1069
0
    BlockRefTableChunk newchunk;
1070
0
    unsigned  j;
1071
1072
    /* Allocate a new chunk. */
1073
0
    newchunk = palloc0(MAX_ENTRIES_PER_CHUNK * sizeof(uint16));
1074
1075
    /* Set the bit for each existing entry. */
1076
0
    for (j = 0; j < entry->chunk_usage[chunkno]; ++j)
1077
0
    {
1078
0
      unsigned  coff = entry->chunk_data[chunkno][j];
1079
1080
0
      newchunk[coff / BLOCKS_PER_ENTRY] |=
1081
0
        1 << (coff % BLOCKS_PER_ENTRY);
1082
0
    }
1083
1084
    /* Set the bit for the new entry. */
1085
0
    newchunk[chunkoffset / BLOCKS_PER_ENTRY] |=
1086
0
      1 << (chunkoffset % BLOCKS_PER_ENTRY);
1087
1088
    /* Swap the new chunk into place and update metadata. */
1089
0
    pfree(entry->chunk_data[chunkno]);
1090
0
    entry->chunk_data[chunkno] = newchunk;
1091
0
    entry->chunk_size[chunkno] = MAX_ENTRIES_PER_CHUNK;
1092
0
    entry->chunk_usage[chunkno] = MAX_ENTRIES_PER_CHUNK;
1093
0
    return;
1094
0
  }
1095
1096
  /*
1097
   * OK, we currently have an array, and we don't need to convert to a
1098
   * bitmap, but we do need to add a new element. If there's not enough
1099
   * room, we'll have to expand the array.
1100
   */
1101
0
  if (entry->chunk_usage[chunkno] == entry->chunk_size[chunkno])
1102
0
  {
1103
0
    unsigned  newsize = entry->chunk_size[chunkno] * 2;
1104
1105
0
    Assert(newsize <= MAX_ENTRIES_PER_CHUNK);
1106
0
    entry->chunk_data[chunkno] = repalloc(entry->chunk_data[chunkno],
1107
0
                        newsize * sizeof(uint16));
1108
0
    entry->chunk_size[chunkno] = newsize;
1109
0
  }
1110
1111
  /* Now we can add the new entry. */
1112
0
  entry->chunk_data[chunkno][entry->chunk_usage[chunkno]] =
1113
0
    chunkoffset;
1114
0
  entry->chunk_usage[chunkno]++;
1115
0
}
1116
1117
/*
1118
 * Release memory for a BlockRefTableEntry that was created by
1119
 * CreateBlockRefTableEntry.
1120
 */
1121
void
1122
BlockRefTableFreeEntry(BlockRefTableEntry *entry)
1123
0
{
1124
0
  if (entry->chunk_size != NULL)
1125
0
  {
1126
0
    pfree(entry->chunk_size);
1127
0
    entry->chunk_size = NULL;
1128
0
  }
1129
1130
0
  if (entry->chunk_usage != NULL)
1131
0
  {
1132
0
    pfree(entry->chunk_usage);
1133
0
    entry->chunk_usage = NULL;
1134
0
  }
1135
1136
0
  if (entry->chunk_data != NULL)
1137
0
  {
1138
0
    pfree(entry->chunk_data);
1139
0
    entry->chunk_data = NULL;
1140
0
  }
1141
1142
0
  pfree(entry);
1143
0
}
1144
1145
/*
1146
 * Comparator for BlockRefTableSerializedEntry objects.
1147
 *
1148
 * We make the tablespace OID the first column of the sort key to match
1149
 * the on-disk tree structure.
1150
 */
1151
static int
1152
BlockRefTableComparator(const void *a, const void *b)
1153
0
{
1154
0
  const BlockRefTableSerializedEntry *sa = a;
1155
0
  const BlockRefTableSerializedEntry *sb = b;
1156
1157
0
  if (sa->rlocator.spcOid > sb->rlocator.spcOid)
1158
0
    return 1;
1159
0
  if (sa->rlocator.spcOid < sb->rlocator.spcOid)
1160
0
    return -1;
1161
1162
0
  if (sa->rlocator.dbOid > sb->rlocator.dbOid)
1163
0
    return 1;
1164
0
  if (sa->rlocator.dbOid < sb->rlocator.dbOid)
1165
0
    return -1;
1166
1167
0
  if (sa->rlocator.relNumber > sb->rlocator.relNumber)
1168
0
    return 1;
1169
0
  if (sa->rlocator.relNumber < sb->rlocator.relNumber)
1170
0
    return -1;
1171
1172
0
  if (sa->forknum > sb->forknum)
1173
0
    return 1;
1174
0
  if (sa->forknum < sb->forknum)
1175
0
    return -1;
1176
1177
0
  return 0;
1178
0
}
1179
1180
/*
1181
 * Flush any buffered data out of a BlockRefTableBuffer.
1182
 */
1183
static void
1184
BlockRefTableFlush(BlockRefTableBuffer *buffer)
1185
0
{
1186
0
  buffer->io_callback(buffer->io_callback_arg, buffer->data, buffer->used);
1187
0
  buffer->used = 0;
1188
0
}
1189
1190
/*
1191
 * Read data from a BlockRefTableBuffer, and update the running CRC
1192
 * calculation for the returned data (but not any data that we may have
1193
 * buffered but not yet actually returned).
1194
 */
1195
static void
1196
BlockRefTableRead(BlockRefTableReader *reader, void *data, int length)
1197
0
{
1198
0
  BlockRefTableBuffer *buffer = &reader->buffer;
1199
1200
  /* Loop until read is fully satisfied. */
1201
0
  while (length > 0)
1202
0
  {
1203
0
    if (buffer->cursor < buffer->used)
1204
0
    {
1205
      /*
1206
       * If any buffered data is available, use that to satisfy as much
1207
       * of the request as possible.
1208
       */
1209
0
      int     bytes_to_copy = Min(length, buffer->used - buffer->cursor);
1210
1211
0
      memcpy(data, &buffer->data[buffer->cursor], bytes_to_copy);
1212
0
      COMP_CRC32C(buffer->crc, &buffer->data[buffer->cursor],
1213
0
            bytes_to_copy);
1214
0
      buffer->cursor += bytes_to_copy;
1215
0
      data = ((char *) data) + bytes_to_copy;
1216
0
      length -= bytes_to_copy;
1217
0
    }
1218
0
    else if (length >= BUFSIZE)
1219
0
    {
1220
      /*
1221
       * If the request length is long, read directly into caller's
1222
       * buffer.
1223
       */
1224
0
      int     bytes_read;
1225
1226
0
      bytes_read = buffer->io_callback(buffer->io_callback_arg,
1227
0
                       data, length);
1228
0
      COMP_CRC32C(buffer->crc, data, bytes_read);
1229
0
      data = ((char *) data) + bytes_read;
1230
0
      length -= bytes_read;
1231
1232
      /* If we didn't get anything, that's bad. */
1233
0
      if (bytes_read == 0)
1234
0
        reader->error_callback(reader->error_callback_arg,
1235
0
                     "file \"%s\" ends unexpectedly",
1236
0
                     reader->error_filename);
1237
0
    }
1238
0
    else
1239
0
    {
1240
      /*
1241
       * Refill our buffer.
1242
       */
1243
0
      buffer->used = buffer->io_callback(buffer->io_callback_arg,
1244
0
                         buffer->data, BUFSIZE);
1245
0
      buffer->cursor = 0;
1246
1247
      /* If we didn't get anything, that's bad. */
1248
0
      if (buffer->used == 0)
1249
0
        reader->error_callback(reader->error_callback_arg,
1250
0
                     "file \"%s\" ends unexpectedly",
1251
0
                     reader->error_filename);
1252
0
    }
1253
0
  }
1254
0
}
1255
1256
/*
1257
 * Supply data to a BlockRefTableBuffer for write to the underlying File,
1258
 * and update the running CRC calculation for that data.
1259
 */
1260
static void
1261
BlockRefTableWrite(BlockRefTableBuffer *buffer, void *data, int length)
1262
0
{
1263
  /* Update running CRC calculation. */
1264
0
  COMP_CRC32C(buffer->crc, data, length);
1265
1266
  /* If the new data can't fit into the buffer, flush the buffer. */
1267
0
  if (buffer->used + length > BUFSIZE)
1268
0
  {
1269
0
    buffer->io_callback(buffer->io_callback_arg, buffer->data,
1270
0
              buffer->used);
1271
0
    buffer->used = 0;
1272
0
  }
1273
1274
  /* If the new data would fill the buffer, or more, write it directly. */
1275
0
  if (length >= BUFSIZE)
1276
0
  {
1277
0
    buffer->io_callback(buffer->io_callback_arg, data, length);
1278
0
    return;
1279
0
  }
1280
1281
  /* Otherwise, copy the new data into the buffer. */
1282
0
  memcpy(&buffer->data[buffer->used], data, length);
1283
0
  buffer->used += length;
1284
0
  Assert(buffer->used <= BUFSIZE);
1285
0
}
1286
1287
/*
1288
 * Generate the sentinel and CRC required at the end of a block reference
1289
 * table file and flush them out of our internal buffer.
1290
 */
1291
static void
1292
BlockRefTableFileTerminate(BlockRefTableBuffer *buffer)
1293
0
{
1294
0
  BlockRefTableSerializedEntry zentry = {0};
1295
0
  pg_crc32c crc;
1296
1297
  /* Write a sentinel indicating that there are no more entries. */
1298
0
  BlockRefTableWrite(buffer, &zentry,
1299
0
             sizeof(BlockRefTableSerializedEntry));
1300
1301
  /*
1302
   * Writing the checksum will perturb the ongoing checksum calculation, so
1303
   * copy the state first and finalize the computation using the copy.
1304
   */
1305
0
  crc = buffer->crc;
1306
0
  FIN_CRC32C(crc);
1307
0
  BlockRefTableWrite(buffer, &crc, sizeof(pg_crc32c));
1308
1309
  /* Flush any leftover data out of our buffer. */
1310
0
  BlockRefTableFlush(buffer);
1311
0
}