Coverage Report

Created: 2025-09-27 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/backend/storage/freespace/fsmpage.c
Line
Count
Source
1
/*-------------------------------------------------------------------------
2
 *
3
 * fsmpage.c
4
 *    routines to search and manipulate one FSM page.
5
 *
6
 *
7
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8
 * Portions Copyright (c) 1994, Regents of the University of California
9
 *
10
 * IDENTIFICATION
11
 *    src/backend/storage/freespace/fsmpage.c
12
 *
13
 * NOTES:
14
 *
15
 *  The public functions in this file form an API that hides the internal
16
 *  structure of a FSM page. This allows freespace.c to treat each FSM page
17
 *  as a black box with SlotsPerPage "slots". fsm_set_avail() and
18
 *  fsm_get_avail() let you get/set the value of a slot, and
19
 *  fsm_search_avail() lets you search for a slot with value >= X.
20
 *
21
 *-------------------------------------------------------------------------
22
 */
23
#include "postgres.h"
24
25
#include "storage/bufmgr.h"
26
#include "storage/fsm_internals.h"
27
28
/* Macros to navigate the tree within a page. Root has index zero. */
29
0
#define leftchild(x)  (2 * (x) + 1)
30
#define rightchild(x) (2 * (x) + 2)
31
0
#define parentof(x)   (((x) - 1) / 2)
32
33
/*
34
 * Find right neighbor of x, wrapping around within the level
35
 */
36
static int
37
rightneighbor(int x)
38
0
{
39
  /*
40
   * Move right. This might wrap around, stepping to the leftmost node at
41
   * the next level.
42
   */
43
0
  x++;
44
45
  /*
46
   * Check if we stepped to the leftmost node at next level, and correct if
47
   * so. The leftmost nodes at each level are numbered x = 2^level - 1, so
48
   * check if (x + 1) is a power of two, using a standard
49
   * twos-complement-arithmetic trick.
50
   */
51
0
  if (((x + 1) & x) == 0)
52
0
    x = parentof(x);
53
54
0
  return x;
55
0
}
56
57
/*
58
 * Sets the value of a slot on page. Returns true if the page was modified.
59
 *
60
 * The caller must hold an exclusive lock on the page.
61
 */
62
bool
63
fsm_set_avail(Page page, int slot, uint8 value)
64
0
{
65
0
  int     nodeno = NonLeafNodesPerPage + slot;
66
0
  FSMPage   fsmpage = (FSMPage) PageGetContents(page);
67
0
  uint8   oldvalue;
68
69
0
  Assert(slot < LeafNodesPerPage);
70
71
0
  oldvalue = fsmpage->fp_nodes[nodeno];
72
73
  /* If the value hasn't changed, we don't need to do anything */
74
0
  if (oldvalue == value && value <= fsmpage->fp_nodes[0])
75
0
    return false;
76
77
0
  fsmpage->fp_nodes[nodeno] = value;
78
79
  /*
80
   * Propagate up, until we hit the root or a node that doesn't need to be
81
   * updated.
82
   */
83
0
  do
84
0
  {
85
0
    uint8   newvalue = 0;
86
0
    int     lchild;
87
0
    int     rchild;
88
89
0
    nodeno = parentof(nodeno);
90
0
    lchild = leftchild(nodeno);
91
0
    rchild = lchild + 1;
92
93
0
    newvalue = fsmpage->fp_nodes[lchild];
94
0
    if (rchild < NodesPerPage)
95
0
      newvalue = Max(newvalue,
96
0
               fsmpage->fp_nodes[rchild]);
97
98
0
    oldvalue = fsmpage->fp_nodes[nodeno];
99
0
    if (oldvalue == newvalue)
100
0
      break;
101
102
0
    fsmpage->fp_nodes[nodeno] = newvalue;
103
0
  } while (nodeno > 0);
104
105
  /*
106
   * sanity check: if the new value is (still) higher than the value at the
107
   * top, the tree is corrupt.  If so, rebuild.
108
   */
109
0
  if (value > fsmpage->fp_nodes[0])
110
0
    fsm_rebuild_page(page);
111
112
0
  return true;
113
0
}
114
115
/*
116
 * Returns the value of given slot on page.
117
 *
118
 * Since this is just a read-only access of a single byte, the page doesn't
119
 * need to be locked.
120
 */
121
uint8
122
fsm_get_avail(Page page, int slot)
123
0
{
124
0
  FSMPage   fsmpage = (FSMPage) PageGetContents(page);
125
126
0
  Assert(slot < LeafNodesPerPage);
127
128
0
  return fsmpage->fp_nodes[NonLeafNodesPerPage + slot];
129
0
}
130
131
/*
132
 * Returns the value at the root of a page.
133
 *
134
 * Since this is just a read-only access of a single byte, the page doesn't
135
 * need to be locked.
136
 */
137
uint8
138
fsm_get_max_avail(Page page)
139
0
{
140
0
  FSMPage   fsmpage = (FSMPage) PageGetContents(page);
141
142
0
  return fsmpage->fp_nodes[0];
143
0
}
144
145
/*
146
 * Searches for a slot with category at least minvalue.
147
 * Returns slot number, or -1 if none found.
148
 *
149
 * The caller must hold at least a shared lock on the page, and this
150
 * function can unlock and lock the page again in exclusive mode if it
151
 * needs to be updated. exclusive_lock_held should be set to true if the
152
 * caller is already holding an exclusive lock, to avoid extra work.
153
 *
154
 * If advancenext is false, fp_next_slot is set to point to the returned
155
 * slot, and if it's true, to the slot after the returned slot.
156
 */
157
int
158
fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
159
         bool exclusive_lock_held)
160
0
{
161
0
  Page    page = BufferGetPage(buf);
162
0
  FSMPage   fsmpage = (FSMPage) PageGetContents(page);
163
0
  int     nodeno;
164
0
  int     target;
165
0
  uint16    slot;
166
167
0
restart:
168
169
  /*
170
   * Check the root first, and exit quickly if there's no leaf with enough
171
   * free space
172
   */
173
0
  if (fsmpage->fp_nodes[0] < minvalue)
174
0
    return -1;
175
176
  /*
177
   * Start search using fp_next_slot.  It's just a hint, so check that it's
178
   * sane.  (This also handles wrapping around when the prior call returned
179
   * the last slot on the page.)
180
   */
181
0
  target = fsmpage->fp_next_slot;
182
0
  if (target < 0 || target >= LeafNodesPerPage)
183
0
    target = 0;
184
0
  target += NonLeafNodesPerPage;
185
186
  /*----------
187
   * Start the search from the target slot.  At every step, move one
188
   * node to the right, then climb up to the parent.  Stop when we reach
189
   * a node with enough free space (as we must, since the root has enough
190
   * space).
191
   *
192
   * The idea is to gradually expand our "search triangle", that is, all
193
   * nodes covered by the current node, and to be sure we search to the
194
   * right from the start point.  At the first step, only the target slot
195
   * is examined.  When we move up from a left child to its parent, we are
196
   * adding the right-hand subtree of that parent to the search triangle.
197
   * When we move right then up from a right child, we are dropping the
198
   * current search triangle (which we know doesn't contain any suitable
199
   * page) and instead looking at the next-larger-size triangle to its
200
   * right.  So we never look left from our original start point, and at
201
   * each step the size of the search triangle doubles, ensuring it takes
202
   * only log2(N) work to search N pages.
203
   *
204
   * The "move right" operation will wrap around if it hits the right edge
205
   * of the tree, so the behavior is still good if we start near the right.
206
   * Note also that the move-and-climb behavior ensures that we can't end
207
   * up on one of the missing nodes at the right of the leaf level.
208
   *
209
   * For example, consider this tree:
210
   *
211
   *       7
212
   *     7     6
213
   *   5   7   6   5
214
   *  4 5 5 7 2 6 5 2
215
   *        T
216
   *
217
   * Assume that the target node is the node indicated by the letter T,
218
   * and we're searching for a node with value of 6 or higher. The search
219
   * begins at T. At the first iteration, we move to the right, then to the
220
   * parent, arriving at the rightmost 5. At the second iteration, we move
221
   * to the right, wrapping around, then climb up, arriving at the 7 on the
222
   * third level.  7 satisfies our search, so we descend down to the bottom,
223
   * following the path of sevens.  This is in fact the first suitable page
224
   * to the right of (allowing for wraparound) our start point.
225
   *----------
226
   */
227
0
  nodeno = target;
228
0
  while (nodeno > 0)
229
0
  {
230
0
    if (fsmpage->fp_nodes[nodeno] >= minvalue)
231
0
      break;
232
233
    /*
234
     * Move to the right, wrapping around on same level if necessary, then
235
     * climb up.
236
     */
237
0
    nodeno = parentof(rightneighbor(nodeno));
238
0
  }
239
240
  /*
241
   * We're now at a node with enough free space, somewhere in the middle of
242
   * the tree. Descend to the bottom, following a path with enough free
243
   * space, preferring to move left if there's a choice.
244
   */
245
0
  while (nodeno < NonLeafNodesPerPage)
246
0
  {
247
0
    int     childnodeno = leftchild(nodeno);
248
249
0
    if (childnodeno < NodesPerPage &&
250
0
      fsmpage->fp_nodes[childnodeno] >= minvalue)
251
0
    {
252
0
      nodeno = childnodeno;
253
0
      continue;
254
0
    }
255
0
    childnodeno++;      /* point to right child */
256
0
    if (childnodeno < NodesPerPage &&
257
0
      fsmpage->fp_nodes[childnodeno] >= minvalue)
258
0
    {
259
0
      nodeno = childnodeno;
260
0
    }
261
0
    else
262
0
    {
263
      /*
264
       * Oops. The parent node promised that either left or right child
265
       * has enough space, but neither actually did. This can happen in
266
       * case of a "torn page", IOW if we crashed earlier while writing
267
       * the page to disk, and only part of the page made it to disk.
268
       *
269
       * Fix the corruption and restart.
270
       */
271
0
      RelFileLocator rlocator;
272
0
      ForkNumber  forknum;
273
0
      BlockNumber blknum;
274
275
0
      BufferGetTag(buf, &rlocator, &forknum, &blknum);
276
0
      elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u",
277
0
         blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber);
278
279
      /* make sure we hold an exclusive lock */
280
0
      if (!exclusive_lock_held)
281
0
      {
282
0
        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
283
0
        LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
284
0
        exclusive_lock_held = true;
285
0
      }
286
0
      fsm_rebuild_page(page);
287
0
      MarkBufferDirtyHint(buf, false);
288
0
      goto restart;
289
0
    }
290
0
  }
291
292
  /* We're now at the bottom level, at a node with enough space. */
293
0
  slot = nodeno - NonLeafNodesPerPage;
294
295
  /*
296
   * Update the next-target pointer. Note that we do this even if we're only
297
   * holding a shared lock, on the grounds that it's better to use a shared
298
   * lock and get a garbled next pointer every now and then, than take the
299
   * concurrency hit of an exclusive lock.
300
   *
301
   * Wrap-around is handled at the beginning of this function.
302
   */
303
0
  fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0);
304
305
0
  return slot;
306
0
}
307
308
/*
309
 * Sets the available space to zero for all slots numbered >= nslots.
310
 * Returns true if the page was modified.
311
 */
312
bool
313
fsm_truncate_avail(Page page, int nslots)
314
0
{
315
0
  FSMPage   fsmpage = (FSMPage) PageGetContents(page);
316
0
  uint8    *ptr;
317
0
  bool    changed = false;
318
319
0
  Assert(nslots >= 0 && nslots < LeafNodesPerPage);
320
321
  /* Clear all truncated leaf nodes */
322
0
  ptr = &fsmpage->fp_nodes[NonLeafNodesPerPage + nslots];
323
0
  for (; ptr < &fsmpage->fp_nodes[NodesPerPage]; ptr++)
324
0
  {
325
0
    if (*ptr != 0)
326
0
      changed = true;
327
0
    *ptr = 0;
328
0
  }
329
330
  /* Fix upper nodes. */
331
0
  if (changed)
332
0
    fsm_rebuild_page(page);
333
334
0
  return changed;
335
0
}
336
337
/*
338
 * Reconstructs the upper levels of a page. Returns true if the page
339
 * was modified.
340
 */
341
bool
342
fsm_rebuild_page(Page page)
343
0
{
344
0
  FSMPage   fsmpage = (FSMPage) PageGetContents(page);
345
0
  bool    changed = false;
346
0
  int     nodeno;
347
348
  /*
349
   * Start from the lowest non-leaf level, at last node, working our way
350
   * backwards, through all non-leaf nodes at all levels, up to the root.
351
   */
352
0
  for (nodeno = NonLeafNodesPerPage - 1; nodeno >= 0; nodeno--)
353
0
  {
354
0
    int     lchild = leftchild(nodeno);
355
0
    int     rchild = lchild + 1;
356
0
    uint8   newvalue = 0;
357
358
    /* The first few nodes we examine might have zero or one child. */
359
0
    if (lchild < NodesPerPage)
360
0
      newvalue = fsmpage->fp_nodes[lchild];
361
362
0
    if (rchild < NodesPerPage)
363
0
      newvalue = Max(newvalue,
364
0
               fsmpage->fp_nodes[rchild]);
365
366
0
    if (fsmpage->fp_nodes[nodeno] != newvalue)
367
0
    {
368
0
      fsmpage->fp_nodes[nodeno] = newvalue;
369
0
      changed = true;
370
0
    }
371
0
  }
372
373
0
  return changed;
374
0
}