/src/postgres/src/include/access/nbtree.h
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * nbtree.h |
4 | | * header file for postgres btree access method implementation. |
5 | | * |
6 | | * |
7 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
8 | | * Portions Copyright (c) 1994, Regents of the University of California |
9 | | * |
10 | | * src/include/access/nbtree.h |
11 | | * |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | #ifndef NBTREE_H |
15 | | #define NBTREE_H |
16 | | |
17 | | #include "access/amapi.h" |
18 | | #include "access/itup.h" |
19 | | #include "access/sdir.h" |
20 | | #include "catalog/pg_am_d.h" |
21 | | #include "catalog/pg_class.h" |
22 | | #include "catalog/pg_index.h" |
23 | | #include "lib/stringinfo.h" |
24 | | #include "storage/bufmgr.h" |
25 | | #include "storage/shm_toc.h" |
26 | | #include "utils/skipsupport.h" |
27 | | |
28 | | /* There's room for a 16-bit vacuum cycle ID in BTPageOpaqueData */ |
29 | | typedef uint16 BTCycleId; |
30 | | |
31 | | /* |
32 | | * BTPageOpaqueData -- At the end of every page, we store a pointer |
33 | | * to both siblings in the tree. This is used to do forward/backward |
34 | | * index scans. The next-page link is also critical for recovery when |
35 | | * a search has navigated to the wrong page due to concurrent page splits |
36 | | * or deletions; see src/backend/access/nbtree/README for more info. |
37 | | * |
38 | | * In addition, we store the page's btree level (counting upwards from |
39 | | * zero at a leaf page) as well as some flag bits indicating the page type |
40 | | * and status. If the page is deleted, a BTDeletedPageData struct is stored |
41 | | * in the page's tuple area, while a standard BTPageOpaqueData struct is |
42 | | * stored in the page special area. |
43 | | * |
44 | | * We also store a "vacuum cycle ID". When a page is split while VACUUM is |
45 | | * processing the index, a nonzero value associated with the VACUUM run is |
46 | | * stored into both halves of the split page. (If VACUUM is not running, |
47 | | * both pages receive zero cycleids.) This allows VACUUM to detect whether |
48 | | * a page was split since it started, with a small probability of false match |
49 | | * if the page was last split some exact multiple of MAX_BT_CYCLE_ID VACUUMs |
50 | | * ago. Also, during a split, the BTP_SPLIT_END flag is cleared in the left |
51 | | * (original) page, and set in the right page, but only if the next page |
52 | | * to its right has a different cycleid. |
53 | | * |
54 | | * NOTE: the BTP_LEAF flag bit is redundant since level==0 could be tested |
55 | | * instead. |
56 | | * |
57 | | * NOTE: the btpo_level field used to be a union type in order to allow |
58 | | * deleted pages to store a 32-bit safexid in the same field. We now store |
59 | | * 64-bit/full safexid values using BTDeletedPageData instead. |
60 | | */ |
61 | | |
62 | | typedef struct BTPageOpaqueData |
63 | | { |
64 | | BlockNumber btpo_prev; /* left sibling, or P_NONE if leftmost */ |
65 | | BlockNumber btpo_next; /* right sibling, or P_NONE if rightmost */ |
66 | | uint32 btpo_level; /* tree level --- zero for leaf pages */ |
67 | | uint16 btpo_flags; /* flag bits, see below */ |
68 | | BTCycleId btpo_cycleid; /* vacuum cycle ID of latest split */ |
69 | | } BTPageOpaqueData; |
70 | | |
71 | | typedef BTPageOpaqueData *BTPageOpaque; |
72 | | |
73 | 0 | #define BTPageGetOpaque(page) ((BTPageOpaque) PageGetSpecialPointer(page)) |
74 | | |
75 | | /* Bits defined in btpo_flags */ |
76 | 0 | #define BTP_LEAF (1 << 0) /* leaf page, i.e. not internal page */ |
77 | 0 | #define BTP_ROOT (1 << 1) /* root page (has no parent) */ |
78 | 0 | #define BTP_DELETED (1 << 2) /* page has been deleted from tree */ |
79 | 0 | #define BTP_META (1 << 3) /* meta-page */ |
80 | 0 | #define BTP_HALF_DEAD (1 << 4) /* empty, but still in tree */ |
81 | 0 | #define BTP_SPLIT_END (1 << 5) /* rightmost page of split group */ |
82 | 0 | #define BTP_HAS_GARBAGE (1 << 6) /* page has LP_DEAD tuples (deprecated) */ |
83 | 0 | #define BTP_INCOMPLETE_SPLIT (1 << 7) /* right sibling's downlink is missing */ |
84 | 0 | #define BTP_HAS_FULLXID (1 << 8) /* contains BTDeletedPageData */ |
85 | | |
86 | | /* |
87 | | * The max allowed value of a cycle ID is a bit less than 64K. This is |
88 | | * for convenience of pg_filedump and similar utilities: we want to use |
89 | | * the last 2 bytes of special space as an index type indicator, and |
90 | | * restricting cycle ID lets btree use that space for vacuum cycle IDs |
91 | | * while still allowing index type to be identified. |
92 | | */ |
93 | 0 | #define MAX_BT_CYCLE_ID 0xFF7F |
94 | | |
95 | | |
96 | | /* |
97 | | * The Meta page is always the first page in the btree index. |
98 | | * Its primary purpose is to point to the location of the btree root page. |
99 | | * We also point to the "fast" root, which is the current effective root; |
100 | | * see README for discussion. |
101 | | */ |
102 | | |
103 | | typedef struct BTMetaPageData |
104 | | { |
105 | | uint32 btm_magic; /* should contain BTREE_MAGIC */ |
106 | | uint32 btm_version; /* nbtree version (always <= BTREE_VERSION) */ |
107 | | BlockNumber btm_root; /* current root location */ |
108 | | uint32 btm_level; /* tree level of the root page */ |
109 | | BlockNumber btm_fastroot; /* current "fast" root location */ |
110 | | uint32 btm_fastlevel; /* tree level of the "fast" root page */ |
111 | | /* remaining fields only valid when btm_version >= BTREE_NOVAC_VERSION */ |
112 | | |
113 | | /* number of deleted, non-recyclable pages during last cleanup */ |
114 | | uint32 btm_last_cleanup_num_delpages; |
115 | | /* number of heap tuples during last cleanup (deprecated) */ |
116 | | float8 btm_last_cleanup_num_heap_tuples; |
117 | | |
118 | | bool btm_allequalimage; /* are all columns "equalimage"? */ |
119 | | } BTMetaPageData; |
120 | | |
121 | | #define BTPageGetMeta(p) \ |
122 | 0 | ((BTMetaPageData *) PageGetContents(p)) |
123 | | |
124 | | /* |
125 | | * The current Btree version is 4. That's what you'll get when you create |
126 | | * a new index. |
127 | | * |
128 | | * Btree version 3 was used in PostgreSQL v11. It is mostly the same as |
129 | | * version 4, but heap TIDs were not part of the keyspace. Index tuples |
130 | | * with duplicate keys could be stored in any order. We continue to |
131 | | * support reading and writing Btree versions 2 and 3, so that they don't |
132 | | * need to be immediately re-indexed at pg_upgrade. In order to get the |
133 | | * new heapkeyspace semantics, however, a REINDEX is needed. |
134 | | * |
135 | | * Deduplication is safe to use when the btm_allequalimage field is set to |
136 | | * true. It's safe to read the btm_allequalimage field on version 3, but |
137 | | * only version 4 indexes make use of deduplication. Even version 4 |
138 | | * indexes created on PostgreSQL v12 will need a REINDEX to make use of |
139 | | * deduplication, though, since there is no other way to set |
140 | | * btm_allequalimage to true (pg_upgrade hasn't been taught to set the |
141 | | * metapage field). |
142 | | * |
143 | | * Btree version 2 is mostly the same as version 3. There are two new |
144 | | * fields in the metapage that were introduced in version 3. A version 2 |
145 | | * metapage will be automatically upgraded to version 3 on the first |
146 | | * insert to it. INCLUDE indexes cannot use version 2. |
147 | | */ |
148 | 0 | #define BTREE_METAPAGE 0 /* first page is meta */ |
149 | 0 | #define BTREE_MAGIC 0x053162 /* magic number in metapage */ |
150 | 0 | #define BTREE_VERSION 4 /* current version number */ |
151 | 0 | #define BTREE_MIN_VERSION 2 /* minimum supported version */ |
152 | 0 | #define BTREE_NOVAC_VERSION 3 /* version with all meta fields set */ |
153 | | |
154 | | /* |
155 | | * Maximum size of a btree index entry, including its tuple header. |
156 | | * |
157 | | * We actually need to be able to fit three items on every page, |
158 | | * so restrict any one item to 1/3 the per-page available space. |
159 | | * |
160 | | * There are rare cases where _bt_truncate() will need to enlarge |
161 | | * a heap index tuple to make space for a tiebreaker heap TID |
162 | | * attribute, which we account for here. |
163 | | */ |
164 | | #define BTMaxItemSize \ |
165 | 0 | (MAXALIGN_DOWN((BLCKSZ - \ |
166 | 0 | MAXALIGN(SizeOfPageHeaderData + 3*sizeof(ItemIdData)) - \ |
167 | 0 | MAXALIGN(sizeof(BTPageOpaqueData))) / 3) - \ |
168 | 0 | MAXALIGN(sizeof(ItemPointerData))) |
169 | | #define BTMaxItemSizeNoHeapTid \ |
170 | 0 | MAXALIGN_DOWN((BLCKSZ - \ |
171 | | MAXALIGN(SizeOfPageHeaderData + 3*sizeof(ItemIdData)) - \ |
172 | | MAXALIGN(sizeof(BTPageOpaqueData))) / 3) |
173 | | |
174 | | /* |
175 | | * MaxTIDsPerBTreePage is an upper bound on the number of heap TIDs tuples |
176 | | * that may be stored on a btree leaf page. It is used to size the |
177 | | * per-page temporary buffers. |
178 | | * |
179 | | * Note: we don't bother considering per-tuple overheads here to keep |
180 | | * things simple (value is based on how many elements a single array of |
181 | | * heap TIDs must have to fill the space between the page header and |
182 | | * special area). The value is slightly higher (i.e. more conservative) |
183 | | * than necessary as a result, which is considered acceptable. |
184 | | */ |
185 | | #define MaxTIDsPerBTreePage \ |
186 | 0 | (int) ((BLCKSZ - SizeOfPageHeaderData - sizeof(BTPageOpaqueData)) / \ |
187 | 0 | sizeof(ItemPointerData)) |
188 | | |
189 | | /* |
190 | | * The leaf-page fillfactor defaults to 90% but is user-adjustable. |
191 | | * For pages above the leaf level, we use a fixed 70% fillfactor. |
192 | | * The fillfactor is applied during index build and when splitting |
193 | | * a rightmost page; when splitting non-rightmost pages we try to |
194 | | * divide the data equally. When splitting a page that's entirely |
195 | | * filled with a single value (duplicates), the effective leaf-page |
196 | | * fillfactor is 96%, regardless of whether the page is a rightmost |
197 | | * page. |
198 | | */ |
199 | | #define BTREE_MIN_FILLFACTOR 10 |
200 | 0 | #define BTREE_DEFAULT_FILLFACTOR 90 |
201 | 0 | #define BTREE_NONLEAF_FILLFACTOR 70 |
202 | 0 | #define BTREE_SINGLEVAL_FILLFACTOR 96 |
203 | | |
204 | | /* |
205 | | * In general, the btree code tries to localize its knowledge about |
206 | | * page layout to a couple of routines. However, we need a special |
207 | | * value to indicate "no page number" in those places where we expect |
208 | | * page numbers. We can use zero for this because we never need to |
209 | | * make a pointer to the metadata page. |
210 | | */ |
211 | | |
212 | 0 | #define P_NONE 0 |
213 | | |
214 | | /* |
215 | | * Macros to test whether a page is leftmost or rightmost on its tree level, |
216 | | * as well as other state info kept in the opaque data. |
217 | | */ |
218 | 0 | #define P_LEFTMOST(opaque) ((opaque)->btpo_prev == P_NONE) |
219 | 0 | #define P_RIGHTMOST(opaque) ((opaque)->btpo_next == P_NONE) |
220 | 0 | #define P_ISLEAF(opaque) (((opaque)->btpo_flags & BTP_LEAF) != 0) |
221 | 0 | #define P_ISROOT(opaque) (((opaque)->btpo_flags & BTP_ROOT) != 0) |
222 | 0 | #define P_ISDELETED(opaque) (((opaque)->btpo_flags & BTP_DELETED) != 0) |
223 | 0 | #define P_ISMETA(opaque) (((opaque)->btpo_flags & BTP_META) != 0) |
224 | 0 | #define P_ISHALFDEAD(opaque) (((opaque)->btpo_flags & BTP_HALF_DEAD) != 0) |
225 | 0 | #define P_IGNORE(opaque) (((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD)) != 0) |
226 | 0 | #define P_HAS_GARBAGE(opaque) (((opaque)->btpo_flags & BTP_HAS_GARBAGE) != 0) |
227 | 0 | #define P_INCOMPLETE_SPLIT(opaque) (((opaque)->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0) |
228 | 0 | #define P_HAS_FULLXID(opaque) (((opaque)->btpo_flags & BTP_HAS_FULLXID) != 0) |
229 | | |
230 | | /* |
231 | | * BTDeletedPageData is the page contents of a deleted page |
232 | | */ |
233 | | typedef struct BTDeletedPageData |
234 | | { |
235 | | FullTransactionId safexid; /* See BTPageIsRecyclable() */ |
236 | | } BTDeletedPageData; |
237 | | |
238 | | static inline void |
239 | | BTPageSetDeleted(Page page, FullTransactionId safexid) |
240 | 0 | { |
241 | 0 | BTPageOpaque opaque; |
242 | 0 | PageHeader header; |
243 | 0 | BTDeletedPageData *contents; |
244 | |
|
245 | 0 | opaque = BTPageGetOpaque(page); |
246 | 0 | header = ((PageHeader) page); |
247 | |
|
248 | 0 | opaque->btpo_flags &= ~BTP_HALF_DEAD; |
249 | 0 | opaque->btpo_flags |= BTP_DELETED | BTP_HAS_FULLXID; |
250 | 0 | header->pd_lower = MAXALIGN(SizeOfPageHeaderData) + |
251 | 0 | sizeof(BTDeletedPageData); |
252 | 0 | header->pd_upper = header->pd_special; |
253 | | |
254 | | /* Set safexid in deleted page */ |
255 | 0 | contents = ((BTDeletedPageData *) PageGetContents(page)); |
256 | 0 | contents->safexid = safexid; |
257 | 0 | } Unexecuted instantiation: reloptions.c:BTPageSetDeleted Unexecuted instantiation: nbtdedup.c:BTPageSetDeleted Unexecuted instantiation: nbtinsert.c:BTPageSetDeleted Unexecuted instantiation: nbtpage.c:BTPageSetDeleted Unexecuted instantiation: nbtpreprocesskeys.c:BTPageSetDeleted Unexecuted instantiation: nbtree.c:BTPageSetDeleted Unexecuted instantiation: nbtsearch.c:BTPageSetDeleted Unexecuted instantiation: nbtsort.c:BTPageSetDeleted Unexecuted instantiation: nbtsplitloc.c:BTPageSetDeleted Unexecuted instantiation: nbtutils.c:BTPageSetDeleted Unexecuted instantiation: nbtvalidate.c:BTPageSetDeleted Unexecuted instantiation: nbtxlog.c:BTPageSetDeleted Unexecuted instantiation: parallel.c:BTPageSetDeleted Unexecuted instantiation: parse_clause.c:BTPageSetDeleted Unexecuted instantiation: opclasscmds.c:BTPageSetDeleted Unexecuted instantiation: execExpr.c:BTPageSetDeleted Unexecuted instantiation: nodeIndexscan.c:BTPageSetDeleted Unexecuted instantiation: nodeMergejoin.c:BTPageSetDeleted Unexecuted instantiation: plancat.c:BTPageSetDeleted Unexecuted instantiation: partprune.c:BTPageSetDeleted Unexecuted instantiation: ipci.c:BTPageSetDeleted Unexecuted instantiation: skipsupport.c:BTPageSetDeleted Unexecuted instantiation: partcache.c:BTPageSetDeleted Unexecuted instantiation: typcache.c:BTPageSetDeleted Unexecuted instantiation: sortsupport.c:BTPageSetDeleted Unexecuted instantiation: tuplesortvariants.c:BTPageSetDeleted |
258 | | |
259 | | static inline FullTransactionId |
260 | | BTPageGetDeleteXid(Page page) |
261 | 0 | { |
262 | 0 | BTPageOpaque opaque; |
263 | 0 | BTDeletedPageData *contents; |
264 | | |
265 | | /* We only expect to be called with a deleted page */ |
266 | 0 | Assert(!PageIsNew(page)); |
267 | 0 | opaque = BTPageGetOpaque(page); |
268 | 0 | Assert(P_ISDELETED(opaque)); |
269 | | |
270 | | /* pg_upgrade'd deleted page -- must be safe to recycle now */ |
271 | 0 | if (!P_HAS_FULLXID(opaque)) |
272 | 0 | return FirstNormalFullTransactionId; |
273 | | |
274 | | /* Get safexid from deleted page */ |
275 | 0 | contents = ((BTDeletedPageData *) PageGetContents(page)); |
276 | 0 | return contents->safexid; |
277 | 0 | } Unexecuted instantiation: reloptions.c:BTPageGetDeleteXid Unexecuted instantiation: nbtdedup.c:BTPageGetDeleteXid Unexecuted instantiation: nbtinsert.c:BTPageGetDeleteXid Unexecuted instantiation: nbtpage.c:BTPageGetDeleteXid Unexecuted instantiation: nbtpreprocesskeys.c:BTPageGetDeleteXid Unexecuted instantiation: nbtree.c:BTPageGetDeleteXid Unexecuted instantiation: nbtsearch.c:BTPageGetDeleteXid Unexecuted instantiation: nbtsort.c:BTPageGetDeleteXid Unexecuted instantiation: nbtsplitloc.c:BTPageGetDeleteXid Unexecuted instantiation: nbtutils.c:BTPageGetDeleteXid Unexecuted instantiation: nbtvalidate.c:BTPageGetDeleteXid Unexecuted instantiation: nbtxlog.c:BTPageGetDeleteXid Unexecuted instantiation: parallel.c:BTPageGetDeleteXid Unexecuted instantiation: parse_clause.c:BTPageGetDeleteXid Unexecuted instantiation: opclasscmds.c:BTPageGetDeleteXid Unexecuted instantiation: execExpr.c:BTPageGetDeleteXid Unexecuted instantiation: nodeIndexscan.c:BTPageGetDeleteXid Unexecuted instantiation: nodeMergejoin.c:BTPageGetDeleteXid Unexecuted instantiation: plancat.c:BTPageGetDeleteXid Unexecuted instantiation: partprune.c:BTPageGetDeleteXid Unexecuted instantiation: ipci.c:BTPageGetDeleteXid Unexecuted instantiation: skipsupport.c:BTPageGetDeleteXid Unexecuted instantiation: partcache.c:BTPageGetDeleteXid Unexecuted instantiation: typcache.c:BTPageGetDeleteXid Unexecuted instantiation: sortsupport.c:BTPageGetDeleteXid Unexecuted instantiation: tuplesortvariants.c:BTPageGetDeleteXid |
278 | | |
279 | | /* |
280 | | * Is an existing page recyclable? |
281 | | * |
282 | | * This exists to centralize the policy on which deleted pages are now safe to |
283 | | * re-use. However, _bt_pendingfsm_finalize() duplicates some of the same |
284 | | * logic because it doesn't work directly with pages -- keep the two in sync. |
285 | | * |
286 | | * Note: PageIsNew() pages are always safe to recycle, but we can't deal with |
287 | | * them here (caller is responsible for that case themselves). Caller might |
288 | | * well need special handling for new pages anyway. |
289 | | */ |
290 | | static inline bool |
291 | | BTPageIsRecyclable(Page page, Relation heaprel) |
292 | 0 | { |
293 | 0 | BTPageOpaque opaque; |
294 | |
|
295 | 0 | Assert(!PageIsNew(page)); |
296 | 0 | Assert(heaprel != NULL); |
297 | | |
298 | | /* Recycling okay iff page is deleted and safexid is old enough */ |
299 | 0 | opaque = BTPageGetOpaque(page); |
300 | 0 | if (P_ISDELETED(opaque)) |
301 | 0 | { |
302 | 0 | FullTransactionId safexid = BTPageGetDeleteXid(page); |
303 | | |
304 | | /* |
305 | | * The page was deleted, but when? If it was just deleted, a scan |
306 | | * might have seen the downlink to it, and will read the page later. |
307 | | * As long as that can happen, we must keep the deleted page around as |
308 | | * a tombstone. |
309 | | * |
310 | | * For that check if the deletion XID could still be visible to |
311 | | * anyone. If not, then no scan that's still in progress could have |
312 | | * seen its downlink, and we can recycle it. |
313 | | */ |
314 | 0 | return GlobalVisCheckRemovableFullXid(heaprel, safexid); |
315 | 0 | } |
316 | | |
317 | 0 | return false; |
318 | 0 | } Unexecuted instantiation: reloptions.c:BTPageIsRecyclable Unexecuted instantiation: nbtdedup.c:BTPageIsRecyclable Unexecuted instantiation: nbtinsert.c:BTPageIsRecyclable Unexecuted instantiation: nbtpage.c:BTPageIsRecyclable Unexecuted instantiation: nbtpreprocesskeys.c:BTPageIsRecyclable Unexecuted instantiation: nbtree.c:BTPageIsRecyclable Unexecuted instantiation: nbtsearch.c:BTPageIsRecyclable Unexecuted instantiation: nbtsort.c:BTPageIsRecyclable Unexecuted instantiation: nbtsplitloc.c:BTPageIsRecyclable Unexecuted instantiation: nbtutils.c:BTPageIsRecyclable Unexecuted instantiation: nbtvalidate.c:BTPageIsRecyclable Unexecuted instantiation: nbtxlog.c:BTPageIsRecyclable Unexecuted instantiation: parallel.c:BTPageIsRecyclable Unexecuted instantiation: parse_clause.c:BTPageIsRecyclable Unexecuted instantiation: opclasscmds.c:BTPageIsRecyclable Unexecuted instantiation: execExpr.c:BTPageIsRecyclable Unexecuted instantiation: nodeIndexscan.c:BTPageIsRecyclable Unexecuted instantiation: nodeMergejoin.c:BTPageIsRecyclable Unexecuted instantiation: plancat.c:BTPageIsRecyclable Unexecuted instantiation: partprune.c:BTPageIsRecyclable Unexecuted instantiation: ipci.c:BTPageIsRecyclable Unexecuted instantiation: skipsupport.c:BTPageIsRecyclable Unexecuted instantiation: partcache.c:BTPageIsRecyclable Unexecuted instantiation: typcache.c:BTPageIsRecyclable Unexecuted instantiation: sortsupport.c:BTPageIsRecyclable Unexecuted instantiation: tuplesortvariants.c:BTPageIsRecyclable |
319 | | |
320 | | /* |
321 | | * BTVacState and BTPendingFSM are private nbtree.c state used during VACUUM. |
322 | | * They are exported for use by page deletion related code in nbtpage.c. |
323 | | */ |
324 | | typedef struct BTPendingFSM |
325 | | { |
326 | | BlockNumber target; /* Page deleted by current VACUUM */ |
327 | | FullTransactionId safexid; /* Page's BTDeletedPageData.safexid */ |
328 | | } BTPendingFSM; |
329 | | |
330 | | typedef struct BTVacState |
331 | | { |
332 | | IndexVacuumInfo *info; |
333 | | IndexBulkDeleteResult *stats; |
334 | | IndexBulkDeleteCallback callback; |
335 | | void *callback_state; |
336 | | BTCycleId cycleid; |
337 | | MemoryContext pagedelcontext; |
338 | | |
339 | | /* |
340 | | * _bt_pendingfsm_finalize() state |
341 | | */ |
342 | | int bufsize; /* pendingpages space (in # elements) */ |
343 | | int maxbufsize; /* max bufsize that respects work_mem */ |
344 | | BTPendingFSM *pendingpages; /* One entry per newly deleted page */ |
345 | | int npendingpages; /* current # valid pendingpages */ |
346 | | } BTVacState; |
347 | | |
348 | | /* |
349 | | * Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost |
350 | | * page. The high key is not a tuple that is used to visit the heap. It is |
351 | | * a pivot tuple (see "Notes on B-Tree tuple format" below for definition). |
352 | | * The high key on a page is required to be greater than or equal to any |
353 | | * other key that appears on the page. If we find ourselves trying to |
354 | | * insert a key that is strictly > high key, we know we need to move right |
355 | | * (this should only happen if the page was split since we examined the |
356 | | * parent page). |
357 | | * |
358 | | * Our insertion algorithm guarantees that we can use the initial least key |
359 | | * on our right sibling as the high key. Once a page is created, its high |
360 | | * key changes only if the page is split. |
361 | | * |
362 | | * On a non-rightmost page, the high key lives in item 1 and data items |
363 | | * start in item 2. Rightmost pages have no high key, so we store data |
364 | | * items beginning in item 1. |
365 | | */ |
366 | | |
367 | 0 | #define P_HIKEY ((OffsetNumber) 1) |
368 | 0 | #define P_FIRSTKEY ((OffsetNumber) 2) |
369 | 0 | #define P_FIRSTDATAKEY(opaque) (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY) |
370 | | |
371 | | /* |
372 | | * Notes on B-Tree tuple format, and key and non-key attributes: |
373 | | * |
374 | | * INCLUDE B-Tree indexes have non-key attributes. These are extra |
375 | | * attributes that may be returned by index-only scans, but do not influence |
376 | | * the order of items in the index (formally, non-key attributes are not |
377 | | * considered to be part of the key space). Non-key attributes are only |
378 | | * present in leaf index tuples whose item pointers actually point to heap |
379 | | * tuples (non-pivot tuples). _bt_check_natts() enforces the rules |
380 | | * described here. |
381 | | * |
382 | | * Non-pivot tuple format (plain/non-posting variant): |
383 | | * |
384 | | * t_tid | t_info | key values | INCLUDE columns, if any |
385 | | * |
386 | | * t_tid points to the heap TID, which is a tiebreaker key column as of |
387 | | * BTREE_VERSION 4. |
388 | | * |
389 | | * Non-pivot tuples complement pivot tuples, which only have key columns. |
390 | | * The sole purpose of pivot tuples is to represent how the key space is |
391 | | * separated. In general, any B-Tree index that has more than one level |
392 | | * (i.e. any index that does not just consist of a metapage and a single |
393 | | * leaf root page) must have some number of pivot tuples, since pivot |
394 | | * tuples are used for traversing the tree. Suffix truncation can omit |
395 | | * trailing key columns when a new pivot is formed, which makes minus |
396 | | * infinity their logical value. Since BTREE_VERSION 4 indexes treat heap |
397 | | * TID as a trailing key column that ensures that all index tuples are |
398 | | * physically unique, it is necessary to represent heap TID as a trailing |
399 | | * key column in pivot tuples, though very often this can be truncated |
400 | | * away, just like any other key column. (Actually, the heap TID is |
401 | | * omitted rather than truncated, since its representation is different to |
402 | | * the non-pivot representation.) |
403 | | * |
404 | | * Pivot tuple format: |
405 | | * |
406 | | * t_tid | t_info | key values | [heap TID] |
407 | | * |
408 | | * We store the number of columns present inside pivot tuples by abusing |
409 | | * their t_tid offset field, since pivot tuples never need to store a real |
410 | | * offset (pivot tuples generally store a downlink in t_tid, though). The |
411 | | * offset field only stores the number of columns/attributes when the |
412 | | * INDEX_ALT_TID_MASK bit is set, which doesn't count the trailing heap |
413 | | * TID column sometimes stored in pivot tuples -- that's represented by |
414 | | * the presence of BT_PIVOT_HEAP_TID_ATTR. The INDEX_ALT_TID_MASK bit in |
415 | | * t_info is always set on BTREE_VERSION 4 pivot tuples, since |
416 | | * BTreeTupleIsPivot() must work reliably on heapkeyspace versions. |
417 | | * |
418 | | * In version 2 or version 3 (!heapkeyspace) indexes, INDEX_ALT_TID_MASK |
419 | | * might not be set in pivot tuples. BTreeTupleIsPivot() won't work |
420 | | * reliably as a result. The number of columns stored is implicitly the |
421 | | * same as the number of columns in the index, just like any non-pivot |
422 | | * tuple. (The number of columns stored should not vary, since suffix |
423 | | * truncation of key columns is unsafe within any !heapkeyspace index.) |
424 | | * |
425 | | * The 12 least significant bits from t_tid's offset number are used to |
426 | | * represent the number of key columns within a pivot tuple. This leaves 4 |
427 | | * status bits (BT_STATUS_OFFSET_MASK bits), which are shared by all tuples |
428 | | * that have the INDEX_ALT_TID_MASK bit set (set in t_info) to store basic |
429 | | * tuple metadata. BTreeTupleIsPivot() and BTreeTupleIsPosting() use the |
430 | | * BT_STATUS_OFFSET_MASK bits. |
431 | | * |
432 | | * Sometimes non-pivot tuples also use a representation that repurposes |
433 | | * t_tid to store metadata rather than a TID. PostgreSQL v13 introduced a |
434 | | * new non-pivot tuple format to support deduplication: posting list |
435 | | * tuples. Deduplication merges together multiple equal non-pivot tuples |
436 | | * into a logically equivalent, space efficient representation. A posting |
437 | | * list is an array of ItemPointerData elements. Non-pivot tuples are |
438 | | * merged together to form posting list tuples lazily, at the point where |
439 | | * we'd otherwise have to split a leaf page. |
440 | | * |
441 | | * Posting tuple format (alternative non-pivot tuple representation): |
442 | | * |
443 | | * t_tid | t_info | key values | posting list (TID array) |
444 | | * |
445 | | * Posting list tuples are recognized as such by having the |
446 | | * INDEX_ALT_TID_MASK status bit set in t_info and the BT_IS_POSTING status |
447 | | * bit set in t_tid's offset number. These flags redefine the content of |
448 | | * the posting tuple's t_tid to store the location of the posting list |
449 | | * (instead of a block number), as well as the total number of heap TIDs |
450 | | * present in the tuple (instead of a real offset number). |
451 | | * |
452 | | * The 12 least significant bits from t_tid's offset number are used to |
453 | | * represent the number of heap TIDs present in the tuple, leaving 4 status |
454 | | * bits (the BT_STATUS_OFFSET_MASK bits). Like any non-pivot tuple, the |
455 | | * number of columns stored is always implicitly the total number in the |
456 | | * index (in practice there can never be non-key columns stored, since |
457 | | * deduplication is not supported with INCLUDE indexes). |
458 | | */ |
459 | 0 | #define INDEX_ALT_TID_MASK INDEX_AM_RESERVED_BIT |
460 | | |
461 | | /* Item pointer offset bit masks */ |
462 | 0 | #define BT_OFFSET_MASK 0x0FFF |
463 | | #define BT_STATUS_OFFSET_MASK 0xF000 |
464 | | /* BT_STATUS_OFFSET_MASK status bits */ |
465 | 0 | #define BT_PIVOT_HEAP_TID_ATTR 0x1000 |
466 | 0 | #define BT_IS_POSTING 0x2000 |
467 | | |
468 | | /* |
469 | | * Mask allocated for number of keys in index tuple must be able to fit |
470 | | * maximum possible number of index attributes |
471 | | */ |
472 | | StaticAssertDecl(BT_OFFSET_MASK >= INDEX_MAX_KEYS, |
473 | | "BT_OFFSET_MASK can't fit INDEX_MAX_KEYS"); |
474 | | |
475 | | /* |
476 | | * Note: BTreeTupleIsPivot() can have false negatives (but not false |
477 | | * positives) when used with !heapkeyspace indexes |
478 | | */ |
479 | | static inline bool |
480 | | BTreeTupleIsPivot(IndexTuple itup) |
481 | 0 | { |
482 | 0 | if ((itup->t_info & INDEX_ALT_TID_MASK) == 0) |
483 | 0 | return false; |
484 | | /* absence of BT_IS_POSTING in offset number indicates pivot tuple */ |
485 | 0 | if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & BT_IS_POSTING) != 0) |
486 | 0 | return false; |
487 | | |
488 | 0 | return true; |
489 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleIsPivot Unexecuted instantiation: nbtdedup.c:BTreeTupleIsPivot Unexecuted instantiation: nbtinsert.c:BTreeTupleIsPivot Unexecuted instantiation: nbtpage.c:BTreeTupleIsPivot Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleIsPivot Unexecuted instantiation: nbtree.c:BTreeTupleIsPivot Unexecuted instantiation: nbtsearch.c:BTreeTupleIsPivot Unexecuted instantiation: nbtsort.c:BTreeTupleIsPivot Unexecuted instantiation: nbtsplitloc.c:BTreeTupleIsPivot Unexecuted instantiation: nbtutils.c:BTreeTupleIsPivot Unexecuted instantiation: nbtvalidate.c:BTreeTupleIsPivot Unexecuted instantiation: nbtxlog.c:BTreeTupleIsPivot Unexecuted instantiation: parallel.c:BTreeTupleIsPivot Unexecuted instantiation: parse_clause.c:BTreeTupleIsPivot Unexecuted instantiation: opclasscmds.c:BTreeTupleIsPivot Unexecuted instantiation: execExpr.c:BTreeTupleIsPivot Unexecuted instantiation: nodeIndexscan.c:BTreeTupleIsPivot Unexecuted instantiation: nodeMergejoin.c:BTreeTupleIsPivot Unexecuted instantiation: plancat.c:BTreeTupleIsPivot Unexecuted instantiation: partprune.c:BTreeTupleIsPivot Unexecuted instantiation: ipci.c:BTreeTupleIsPivot Unexecuted instantiation: skipsupport.c:BTreeTupleIsPivot Unexecuted instantiation: partcache.c:BTreeTupleIsPivot Unexecuted instantiation: typcache.c:BTreeTupleIsPivot Unexecuted instantiation: sortsupport.c:BTreeTupleIsPivot Unexecuted instantiation: tuplesortvariants.c:BTreeTupleIsPivot |
490 | | |
491 | | static inline bool |
492 | | BTreeTupleIsPosting(IndexTuple itup) |
493 | 0 | { |
494 | 0 | if ((itup->t_info & INDEX_ALT_TID_MASK) == 0) |
495 | 0 | return false; |
496 | | /* presence of BT_IS_POSTING in offset number indicates posting tuple */ |
497 | 0 | if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & BT_IS_POSTING) == 0) |
498 | 0 | return false; |
499 | | |
500 | 0 | return true; |
501 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleIsPosting Unexecuted instantiation: nbtdedup.c:BTreeTupleIsPosting Unexecuted instantiation: nbtinsert.c:BTreeTupleIsPosting Unexecuted instantiation: nbtpage.c:BTreeTupleIsPosting Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleIsPosting Unexecuted instantiation: nbtree.c:BTreeTupleIsPosting Unexecuted instantiation: nbtsearch.c:BTreeTupleIsPosting Unexecuted instantiation: nbtsort.c:BTreeTupleIsPosting Unexecuted instantiation: nbtsplitloc.c:BTreeTupleIsPosting Unexecuted instantiation: nbtutils.c:BTreeTupleIsPosting Unexecuted instantiation: nbtvalidate.c:BTreeTupleIsPosting Unexecuted instantiation: nbtxlog.c:BTreeTupleIsPosting Unexecuted instantiation: parallel.c:BTreeTupleIsPosting Unexecuted instantiation: parse_clause.c:BTreeTupleIsPosting Unexecuted instantiation: opclasscmds.c:BTreeTupleIsPosting Unexecuted instantiation: execExpr.c:BTreeTupleIsPosting Unexecuted instantiation: nodeIndexscan.c:BTreeTupleIsPosting Unexecuted instantiation: nodeMergejoin.c:BTreeTupleIsPosting Unexecuted instantiation: plancat.c:BTreeTupleIsPosting Unexecuted instantiation: partprune.c:BTreeTupleIsPosting Unexecuted instantiation: ipci.c:BTreeTupleIsPosting Unexecuted instantiation: skipsupport.c:BTreeTupleIsPosting Unexecuted instantiation: partcache.c:BTreeTupleIsPosting Unexecuted instantiation: typcache.c:BTreeTupleIsPosting Unexecuted instantiation: sortsupport.c:BTreeTupleIsPosting Unexecuted instantiation: tuplesortvariants.c:BTreeTupleIsPosting |
502 | | |
503 | | static inline void |
504 | | BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset) |
505 | 0 | { |
506 | 0 | Assert(nhtids > 1); |
507 | 0 | Assert((nhtids & BT_STATUS_OFFSET_MASK) == 0); |
508 | 0 | Assert((size_t) postingoffset == MAXALIGN(postingoffset)); |
509 | 0 | Assert(postingoffset < INDEX_SIZE_MASK); |
510 | 0 | Assert(!BTreeTupleIsPivot(itup)); |
511 | |
|
512 | 0 | itup->t_info |= INDEX_ALT_TID_MASK; |
513 | 0 | ItemPointerSetOffsetNumber(&itup->t_tid, (nhtids | BT_IS_POSTING)); |
514 | 0 | ItemPointerSetBlockNumber(&itup->t_tid, postingoffset); |
515 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleSetPosting Unexecuted instantiation: nbtdedup.c:BTreeTupleSetPosting Unexecuted instantiation: nbtinsert.c:BTreeTupleSetPosting Unexecuted instantiation: nbtpage.c:BTreeTupleSetPosting Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleSetPosting Unexecuted instantiation: nbtree.c:BTreeTupleSetPosting Unexecuted instantiation: nbtsearch.c:BTreeTupleSetPosting Unexecuted instantiation: nbtsort.c:BTreeTupleSetPosting Unexecuted instantiation: nbtsplitloc.c:BTreeTupleSetPosting Unexecuted instantiation: nbtutils.c:BTreeTupleSetPosting Unexecuted instantiation: nbtvalidate.c:BTreeTupleSetPosting Unexecuted instantiation: nbtxlog.c:BTreeTupleSetPosting Unexecuted instantiation: parallel.c:BTreeTupleSetPosting Unexecuted instantiation: parse_clause.c:BTreeTupleSetPosting Unexecuted instantiation: opclasscmds.c:BTreeTupleSetPosting Unexecuted instantiation: execExpr.c:BTreeTupleSetPosting Unexecuted instantiation: nodeIndexscan.c:BTreeTupleSetPosting Unexecuted instantiation: nodeMergejoin.c:BTreeTupleSetPosting Unexecuted instantiation: plancat.c:BTreeTupleSetPosting Unexecuted instantiation: partprune.c:BTreeTupleSetPosting Unexecuted instantiation: ipci.c:BTreeTupleSetPosting Unexecuted instantiation: skipsupport.c:BTreeTupleSetPosting Unexecuted instantiation: partcache.c:BTreeTupleSetPosting Unexecuted instantiation: typcache.c:BTreeTupleSetPosting Unexecuted instantiation: sortsupport.c:BTreeTupleSetPosting Unexecuted instantiation: tuplesortvariants.c:BTreeTupleSetPosting |
516 | | |
517 | | static inline uint16 |
518 | | BTreeTupleGetNPosting(IndexTuple posting) |
519 | 0 | { |
520 | 0 | OffsetNumber existing; |
521 | |
|
522 | 0 | Assert(BTreeTupleIsPosting(posting)); |
523 | |
|
524 | 0 | existing = ItemPointerGetOffsetNumberNoCheck(&posting->t_tid); |
525 | 0 | return (existing & BT_OFFSET_MASK); |
526 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtdedup.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtinsert.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtpage.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtree.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtsearch.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtsort.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtsplitloc.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtutils.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtvalidate.c:BTreeTupleGetNPosting Unexecuted instantiation: nbtxlog.c:BTreeTupleGetNPosting Unexecuted instantiation: parallel.c:BTreeTupleGetNPosting Unexecuted instantiation: parse_clause.c:BTreeTupleGetNPosting Unexecuted instantiation: opclasscmds.c:BTreeTupleGetNPosting Unexecuted instantiation: execExpr.c:BTreeTupleGetNPosting Unexecuted instantiation: nodeIndexscan.c:BTreeTupleGetNPosting Unexecuted instantiation: nodeMergejoin.c:BTreeTupleGetNPosting Unexecuted instantiation: plancat.c:BTreeTupleGetNPosting Unexecuted instantiation: partprune.c:BTreeTupleGetNPosting Unexecuted instantiation: ipci.c:BTreeTupleGetNPosting Unexecuted instantiation: skipsupport.c:BTreeTupleGetNPosting Unexecuted instantiation: partcache.c:BTreeTupleGetNPosting Unexecuted instantiation: typcache.c:BTreeTupleGetNPosting Unexecuted instantiation: sortsupport.c:BTreeTupleGetNPosting Unexecuted instantiation: tuplesortvariants.c:BTreeTupleGetNPosting |
527 | | |
528 | | static inline uint32 |
529 | | BTreeTupleGetPostingOffset(IndexTuple posting) |
530 | 0 | { |
531 | 0 | Assert(BTreeTupleIsPosting(posting)); |
532 | |
|
533 | 0 | return ItemPointerGetBlockNumberNoCheck(&posting->t_tid); |
534 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtdedup.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtinsert.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtpage.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtree.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtsearch.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtsort.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtsplitloc.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtutils.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtvalidate.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nbtxlog.c:BTreeTupleGetPostingOffset Unexecuted instantiation: parallel.c:BTreeTupleGetPostingOffset Unexecuted instantiation: parse_clause.c:BTreeTupleGetPostingOffset Unexecuted instantiation: opclasscmds.c:BTreeTupleGetPostingOffset Unexecuted instantiation: execExpr.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nodeIndexscan.c:BTreeTupleGetPostingOffset Unexecuted instantiation: nodeMergejoin.c:BTreeTupleGetPostingOffset Unexecuted instantiation: plancat.c:BTreeTupleGetPostingOffset Unexecuted instantiation: partprune.c:BTreeTupleGetPostingOffset Unexecuted instantiation: ipci.c:BTreeTupleGetPostingOffset Unexecuted instantiation: skipsupport.c:BTreeTupleGetPostingOffset Unexecuted instantiation: partcache.c:BTreeTupleGetPostingOffset Unexecuted instantiation: typcache.c:BTreeTupleGetPostingOffset Unexecuted instantiation: sortsupport.c:BTreeTupleGetPostingOffset Unexecuted instantiation: tuplesortvariants.c:BTreeTupleGetPostingOffset |
535 | | |
536 | | static inline ItemPointer |
537 | | BTreeTupleGetPosting(IndexTuple posting) |
538 | 0 | { |
539 | 0 | return (ItemPointer) ((char *) posting + |
540 | 0 | BTreeTupleGetPostingOffset(posting)); |
541 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleGetPosting Unexecuted instantiation: nbtdedup.c:BTreeTupleGetPosting Unexecuted instantiation: nbtinsert.c:BTreeTupleGetPosting Unexecuted instantiation: nbtpage.c:BTreeTupleGetPosting Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleGetPosting Unexecuted instantiation: nbtree.c:BTreeTupleGetPosting Unexecuted instantiation: nbtsearch.c:BTreeTupleGetPosting Unexecuted instantiation: nbtsort.c:BTreeTupleGetPosting Unexecuted instantiation: nbtsplitloc.c:BTreeTupleGetPosting Unexecuted instantiation: nbtutils.c:BTreeTupleGetPosting Unexecuted instantiation: nbtvalidate.c:BTreeTupleGetPosting Unexecuted instantiation: nbtxlog.c:BTreeTupleGetPosting Unexecuted instantiation: parallel.c:BTreeTupleGetPosting Unexecuted instantiation: parse_clause.c:BTreeTupleGetPosting Unexecuted instantiation: opclasscmds.c:BTreeTupleGetPosting Unexecuted instantiation: execExpr.c:BTreeTupleGetPosting Unexecuted instantiation: nodeIndexscan.c:BTreeTupleGetPosting Unexecuted instantiation: nodeMergejoin.c:BTreeTupleGetPosting Unexecuted instantiation: plancat.c:BTreeTupleGetPosting Unexecuted instantiation: partprune.c:BTreeTupleGetPosting Unexecuted instantiation: ipci.c:BTreeTupleGetPosting Unexecuted instantiation: skipsupport.c:BTreeTupleGetPosting Unexecuted instantiation: partcache.c:BTreeTupleGetPosting Unexecuted instantiation: typcache.c:BTreeTupleGetPosting Unexecuted instantiation: sortsupport.c:BTreeTupleGetPosting Unexecuted instantiation: tuplesortvariants.c:BTreeTupleGetPosting |
542 | | |
543 | | static inline ItemPointer |
544 | | BTreeTupleGetPostingN(IndexTuple posting, int n) |
545 | 0 | { |
546 | 0 | return BTreeTupleGetPosting(posting) + n; |
547 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtdedup.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtinsert.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtpage.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtree.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtsearch.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtsort.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtsplitloc.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtutils.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtvalidate.c:BTreeTupleGetPostingN Unexecuted instantiation: nbtxlog.c:BTreeTupleGetPostingN Unexecuted instantiation: parallel.c:BTreeTupleGetPostingN Unexecuted instantiation: parse_clause.c:BTreeTupleGetPostingN Unexecuted instantiation: opclasscmds.c:BTreeTupleGetPostingN Unexecuted instantiation: execExpr.c:BTreeTupleGetPostingN Unexecuted instantiation: nodeIndexscan.c:BTreeTupleGetPostingN Unexecuted instantiation: nodeMergejoin.c:BTreeTupleGetPostingN Unexecuted instantiation: plancat.c:BTreeTupleGetPostingN Unexecuted instantiation: partprune.c:BTreeTupleGetPostingN Unexecuted instantiation: ipci.c:BTreeTupleGetPostingN Unexecuted instantiation: skipsupport.c:BTreeTupleGetPostingN Unexecuted instantiation: partcache.c:BTreeTupleGetPostingN Unexecuted instantiation: typcache.c:BTreeTupleGetPostingN Unexecuted instantiation: sortsupport.c:BTreeTupleGetPostingN Unexecuted instantiation: tuplesortvariants.c:BTreeTupleGetPostingN |
548 | | |
549 | | /* |
550 | | * Get/set downlink block number in pivot tuple. |
551 | | * |
552 | | * Note: Cannot assert that tuple is a pivot tuple. If we did so then |
553 | | * !heapkeyspace indexes would exhibit false positive assertion failures. |
554 | | */ |
555 | | static inline BlockNumber |
556 | | BTreeTupleGetDownLink(IndexTuple pivot) |
557 | 0 | { |
558 | 0 | return ItemPointerGetBlockNumberNoCheck(&pivot->t_tid); |
559 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtdedup.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtinsert.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtpage.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtree.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtsearch.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtsort.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtsplitloc.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtutils.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtvalidate.c:BTreeTupleGetDownLink Unexecuted instantiation: nbtxlog.c:BTreeTupleGetDownLink Unexecuted instantiation: parallel.c:BTreeTupleGetDownLink Unexecuted instantiation: parse_clause.c:BTreeTupleGetDownLink Unexecuted instantiation: opclasscmds.c:BTreeTupleGetDownLink Unexecuted instantiation: execExpr.c:BTreeTupleGetDownLink Unexecuted instantiation: nodeIndexscan.c:BTreeTupleGetDownLink Unexecuted instantiation: nodeMergejoin.c:BTreeTupleGetDownLink Unexecuted instantiation: plancat.c:BTreeTupleGetDownLink Unexecuted instantiation: partprune.c:BTreeTupleGetDownLink Unexecuted instantiation: ipci.c:BTreeTupleGetDownLink Unexecuted instantiation: skipsupport.c:BTreeTupleGetDownLink Unexecuted instantiation: partcache.c:BTreeTupleGetDownLink Unexecuted instantiation: typcache.c:BTreeTupleGetDownLink Unexecuted instantiation: sortsupport.c:BTreeTupleGetDownLink Unexecuted instantiation: tuplesortvariants.c:BTreeTupleGetDownLink |
560 | | |
561 | | static inline void |
562 | | BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno) |
563 | 0 | { |
564 | 0 | ItemPointerSetBlockNumber(&pivot->t_tid, blkno); |
565 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtdedup.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtinsert.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtpage.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtree.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtsearch.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtsort.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtsplitloc.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtutils.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtvalidate.c:BTreeTupleSetDownLink Unexecuted instantiation: nbtxlog.c:BTreeTupleSetDownLink Unexecuted instantiation: parallel.c:BTreeTupleSetDownLink Unexecuted instantiation: parse_clause.c:BTreeTupleSetDownLink Unexecuted instantiation: opclasscmds.c:BTreeTupleSetDownLink Unexecuted instantiation: execExpr.c:BTreeTupleSetDownLink Unexecuted instantiation: nodeIndexscan.c:BTreeTupleSetDownLink Unexecuted instantiation: nodeMergejoin.c:BTreeTupleSetDownLink Unexecuted instantiation: plancat.c:BTreeTupleSetDownLink Unexecuted instantiation: partprune.c:BTreeTupleSetDownLink Unexecuted instantiation: ipci.c:BTreeTupleSetDownLink Unexecuted instantiation: skipsupport.c:BTreeTupleSetDownLink Unexecuted instantiation: partcache.c:BTreeTupleSetDownLink Unexecuted instantiation: typcache.c:BTreeTupleSetDownLink Unexecuted instantiation: sortsupport.c:BTreeTupleSetDownLink Unexecuted instantiation: tuplesortvariants.c:BTreeTupleSetDownLink |
566 | | |
567 | | /* |
568 | | * Get number of attributes within tuple. |
569 | | * |
570 | | * Note that this does not include an implicit tiebreaker heap TID |
571 | | * attribute, if any. Note also that the number of key attributes must be |
572 | | * explicitly represented in all heapkeyspace pivot tuples. |
573 | | * |
574 | | * Note: This is defined as a macro rather than an inline function to |
575 | | * avoid including rel.h. |
576 | | */ |
577 | | #define BTreeTupleGetNAtts(itup, rel) \ |
578 | 0 | ( \ |
579 | 0 | (BTreeTupleIsPivot(itup)) ? \ |
580 | 0 | ( \ |
581 | 0 | ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_OFFSET_MASK \ |
582 | 0 | ) \ |
583 | 0 | : \ |
584 | 0 | IndexRelationGetNumberOfAttributes(rel) \ |
585 | 0 | ) |
586 | | |
587 | | /* |
588 | | * Set number of key attributes in tuple. |
589 | | * |
590 | | * The heap TID tiebreaker attribute bit may also be set here, indicating that |
591 | | * a heap TID value will be stored at the end of the tuple (i.e. using the |
592 | | * special pivot tuple representation). |
593 | | */ |
594 | | static inline void |
595 | | BTreeTupleSetNAtts(IndexTuple itup, uint16 nkeyatts, bool heaptid) |
596 | 0 | { |
597 | 0 | Assert(nkeyatts <= INDEX_MAX_KEYS); |
598 | 0 | Assert((nkeyatts & BT_STATUS_OFFSET_MASK) == 0); |
599 | 0 | Assert(!heaptid || nkeyatts > 0); |
600 | 0 | Assert(!BTreeTupleIsPivot(itup) || nkeyatts == 0); |
601 | |
|
602 | 0 | itup->t_info |= INDEX_ALT_TID_MASK; |
603 | |
|
604 | 0 | if (heaptid) |
605 | 0 | nkeyatts |= BT_PIVOT_HEAP_TID_ATTR; |
606 | | |
607 | | /* BT_IS_POSTING bit is deliberately unset here */ |
608 | 0 | ItemPointerSetOffsetNumber(&itup->t_tid, nkeyatts); |
609 | 0 | Assert(BTreeTupleIsPivot(itup)); |
610 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtdedup.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtinsert.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtpage.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtree.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtsearch.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtsort.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtsplitloc.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtutils.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtvalidate.c:BTreeTupleSetNAtts Unexecuted instantiation: nbtxlog.c:BTreeTupleSetNAtts Unexecuted instantiation: parallel.c:BTreeTupleSetNAtts Unexecuted instantiation: parse_clause.c:BTreeTupleSetNAtts Unexecuted instantiation: opclasscmds.c:BTreeTupleSetNAtts Unexecuted instantiation: execExpr.c:BTreeTupleSetNAtts Unexecuted instantiation: nodeIndexscan.c:BTreeTupleSetNAtts Unexecuted instantiation: nodeMergejoin.c:BTreeTupleSetNAtts Unexecuted instantiation: plancat.c:BTreeTupleSetNAtts Unexecuted instantiation: partprune.c:BTreeTupleSetNAtts Unexecuted instantiation: ipci.c:BTreeTupleSetNAtts Unexecuted instantiation: skipsupport.c:BTreeTupleSetNAtts Unexecuted instantiation: partcache.c:BTreeTupleSetNAtts Unexecuted instantiation: typcache.c:BTreeTupleSetNAtts Unexecuted instantiation: sortsupport.c:BTreeTupleSetNAtts Unexecuted instantiation: tuplesortvariants.c:BTreeTupleSetNAtts |
611 | | |
612 | | /* |
613 | | * Get/set leaf page's "top parent" link from its high key. Used during page |
614 | | * deletion. |
615 | | * |
616 | | * Note: Cannot assert that tuple is a pivot tuple. If we did so then |
617 | | * !heapkeyspace indexes would exhibit false positive assertion failures. |
618 | | */ |
619 | | static inline BlockNumber |
620 | | BTreeTupleGetTopParent(IndexTuple leafhikey) |
621 | 0 | { |
622 | 0 | return ItemPointerGetBlockNumberNoCheck(&leafhikey->t_tid); |
623 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtdedup.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtinsert.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtpage.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtree.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtsearch.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtsort.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtsplitloc.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtutils.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtvalidate.c:BTreeTupleGetTopParent Unexecuted instantiation: nbtxlog.c:BTreeTupleGetTopParent Unexecuted instantiation: parallel.c:BTreeTupleGetTopParent Unexecuted instantiation: parse_clause.c:BTreeTupleGetTopParent Unexecuted instantiation: opclasscmds.c:BTreeTupleGetTopParent Unexecuted instantiation: execExpr.c:BTreeTupleGetTopParent Unexecuted instantiation: nodeIndexscan.c:BTreeTupleGetTopParent Unexecuted instantiation: nodeMergejoin.c:BTreeTupleGetTopParent Unexecuted instantiation: plancat.c:BTreeTupleGetTopParent Unexecuted instantiation: partprune.c:BTreeTupleGetTopParent Unexecuted instantiation: ipci.c:BTreeTupleGetTopParent Unexecuted instantiation: skipsupport.c:BTreeTupleGetTopParent Unexecuted instantiation: partcache.c:BTreeTupleGetTopParent Unexecuted instantiation: typcache.c:BTreeTupleGetTopParent Unexecuted instantiation: sortsupport.c:BTreeTupleGetTopParent Unexecuted instantiation: tuplesortvariants.c:BTreeTupleGetTopParent |
624 | | |
625 | | static inline void |
626 | | BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno) |
627 | 0 | { |
628 | 0 | ItemPointerSetBlockNumber(&leafhikey->t_tid, blkno); |
629 | 0 | BTreeTupleSetNAtts(leafhikey, 0, false); |
630 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtdedup.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtinsert.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtpage.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtree.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtsearch.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtsort.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtsplitloc.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtutils.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtvalidate.c:BTreeTupleSetTopParent Unexecuted instantiation: nbtxlog.c:BTreeTupleSetTopParent Unexecuted instantiation: parallel.c:BTreeTupleSetTopParent Unexecuted instantiation: parse_clause.c:BTreeTupleSetTopParent Unexecuted instantiation: opclasscmds.c:BTreeTupleSetTopParent Unexecuted instantiation: execExpr.c:BTreeTupleSetTopParent Unexecuted instantiation: nodeIndexscan.c:BTreeTupleSetTopParent Unexecuted instantiation: nodeMergejoin.c:BTreeTupleSetTopParent Unexecuted instantiation: plancat.c:BTreeTupleSetTopParent Unexecuted instantiation: partprune.c:BTreeTupleSetTopParent Unexecuted instantiation: ipci.c:BTreeTupleSetTopParent Unexecuted instantiation: skipsupport.c:BTreeTupleSetTopParent Unexecuted instantiation: partcache.c:BTreeTupleSetTopParent Unexecuted instantiation: typcache.c:BTreeTupleSetTopParent Unexecuted instantiation: sortsupport.c:BTreeTupleSetTopParent Unexecuted instantiation: tuplesortvariants.c:BTreeTupleSetTopParent |
631 | | |
632 | | /* |
633 | | * Get tiebreaker heap TID attribute, if any. |
634 | | * |
635 | | * This returns the first/lowest heap TID in the case of a posting list tuple. |
636 | | */ |
637 | | static inline ItemPointer |
638 | | BTreeTupleGetHeapTID(IndexTuple itup) |
639 | 0 | { |
640 | 0 | if (BTreeTupleIsPivot(itup)) |
641 | 0 | { |
642 | | /* Pivot tuple heap TID representation? */ |
643 | 0 | if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & |
644 | 0 | BT_PIVOT_HEAP_TID_ATTR) != 0) |
645 | 0 | return (ItemPointer) ((char *) itup + IndexTupleSize(itup) - |
646 | 0 | sizeof(ItemPointerData)); |
647 | | |
648 | | /* Heap TID attribute was truncated */ |
649 | 0 | return NULL; |
650 | 0 | } |
651 | 0 | else if (BTreeTupleIsPosting(itup)) |
652 | 0 | return BTreeTupleGetPosting(itup); |
653 | | |
654 | 0 | return &itup->t_tid; |
655 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtdedup.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtinsert.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtpage.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtree.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtsearch.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtsort.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtsplitloc.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtutils.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtvalidate.c:BTreeTupleGetHeapTID Unexecuted instantiation: nbtxlog.c:BTreeTupleGetHeapTID Unexecuted instantiation: parallel.c:BTreeTupleGetHeapTID Unexecuted instantiation: parse_clause.c:BTreeTupleGetHeapTID Unexecuted instantiation: opclasscmds.c:BTreeTupleGetHeapTID Unexecuted instantiation: execExpr.c:BTreeTupleGetHeapTID Unexecuted instantiation: nodeIndexscan.c:BTreeTupleGetHeapTID Unexecuted instantiation: nodeMergejoin.c:BTreeTupleGetHeapTID Unexecuted instantiation: plancat.c:BTreeTupleGetHeapTID Unexecuted instantiation: partprune.c:BTreeTupleGetHeapTID Unexecuted instantiation: ipci.c:BTreeTupleGetHeapTID Unexecuted instantiation: skipsupport.c:BTreeTupleGetHeapTID Unexecuted instantiation: partcache.c:BTreeTupleGetHeapTID Unexecuted instantiation: typcache.c:BTreeTupleGetHeapTID Unexecuted instantiation: sortsupport.c:BTreeTupleGetHeapTID Unexecuted instantiation: tuplesortvariants.c:BTreeTupleGetHeapTID |
656 | | |
657 | | /* |
658 | | * Get maximum heap TID attribute, which could be the only TID in the case of |
659 | | * a non-pivot tuple that does not have a posting list. |
660 | | * |
661 | | * Works with non-pivot tuples only. |
662 | | */ |
663 | | static inline ItemPointer |
664 | | BTreeTupleGetMaxHeapTID(IndexTuple itup) |
665 | 0 | { |
666 | 0 | Assert(!BTreeTupleIsPivot(itup)); |
667 | |
|
668 | 0 | if (BTreeTupleIsPosting(itup)) |
669 | 0 | { |
670 | 0 | uint16 nposting = BTreeTupleGetNPosting(itup); |
671 | |
|
672 | 0 | return BTreeTupleGetPostingN(itup, nposting - 1); |
673 | 0 | } |
674 | | |
675 | 0 | return &itup->t_tid; |
676 | 0 | } Unexecuted instantiation: reloptions.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtdedup.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtinsert.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtpage.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtpreprocesskeys.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtree.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtsearch.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtsort.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtsplitloc.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtutils.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtvalidate.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nbtxlog.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: parallel.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: parse_clause.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: opclasscmds.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: execExpr.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nodeIndexscan.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: nodeMergejoin.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: plancat.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: partprune.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: ipci.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: skipsupport.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: partcache.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: typcache.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: sortsupport.c:BTreeTupleGetMaxHeapTID Unexecuted instantiation: tuplesortvariants.c:BTreeTupleGetMaxHeapTID |
677 | | |
678 | | /* |
679 | | * Operator strategy numbers for B-tree have been moved to access/stratnum.h, |
680 | | * because many places need to use them in ScanKeyInit() calls. |
681 | | * |
682 | | * The strategy numbers are chosen so that we can commute them by |
683 | | * subtraction, thus: |
684 | | */ |
685 | 0 | #define BTCommuteStrategyNumber(strat) (BTMaxStrategyNumber + 1 - (strat)) |
686 | | |
687 | | /* |
688 | | * When a new operator class is declared, we require that the user |
689 | | * supply us with an amproc procedure (BTORDER_PROC) for determining |
690 | | * whether, for two keys a and b, a < b, a = b, or a > b. This routine |
691 | | * must return < 0, 0, > 0, respectively, in these three cases. |
692 | | * |
693 | | * To facilitate accelerated sorting, an operator class may choose to |
694 | | * offer a second procedure (BTSORTSUPPORT_PROC). For full details, see |
695 | | * src/include/utils/sortsupport.h. |
696 | | * |
697 | | * To support window frames defined by "RANGE offset PRECEDING/FOLLOWING", |
698 | | * an operator class may choose to offer a third amproc procedure |
699 | | * (BTINRANGE_PROC), independently of whether it offers sortsupport. |
700 | | * For full details, see doc/src/sgml/btree.sgml. |
701 | | * |
702 | | * To facilitate B-Tree deduplication, an operator class may choose to |
703 | | * offer a forth amproc procedure (BTEQUALIMAGE_PROC). For full details, |
704 | | * see doc/src/sgml/btree.sgml. |
705 | | * |
706 | | * An operator class may choose to offer a fifth amproc procedure |
707 | | * (BTOPTIONS_PROC). These procedures define a set of user-visible |
708 | | * parameters that can be used to control operator class behavior. None of |
709 | | * the built-in B-Tree operator classes currently register an "options" proc. |
710 | | * |
711 | | * To facilitate more efficient B-Tree skip scans, an operator class may |
712 | | * choose to offer a sixth amproc procedure (BTSKIPSUPPORT_PROC). For full |
713 | | * details, see src/include/utils/skipsupport.h. |
714 | | */ |
715 | | |
716 | 0 | #define BTORDER_PROC 1 |
717 | 0 | #define BTSORTSUPPORT_PROC 2 |
718 | 0 | #define BTINRANGE_PROC 3 |
719 | 0 | #define BTEQUALIMAGE_PROC 4 |
720 | 0 | #define BTOPTIONS_PROC 5 |
721 | 0 | #define BTSKIPSUPPORT_PROC 6 |
722 | 0 | #define BTNProcs 6 |
723 | | |
724 | | /* |
725 | | * We need to be able to tell the difference between read and write |
726 | | * requests for pages, in order to do locking correctly. |
727 | | */ |
728 | | |
729 | 0 | #define BT_READ BUFFER_LOCK_SHARE |
730 | 0 | #define BT_WRITE BUFFER_LOCK_EXCLUSIVE |
731 | | |
732 | | /* |
733 | | * BTStackData -- As we descend a tree, we push the location of pivot |
734 | | * tuples whose downlink we are about to follow onto a private stack. If |
735 | | * we split a leaf, we use this stack to walk back up the tree and insert |
736 | | * data into its parent page at the correct location. We also have to |
737 | | * recursively insert into the grandparent page if and when the parent page |
738 | | * splits. Our private stack can become stale due to concurrent page |
739 | | * splits and page deletions, but it should never give us an irredeemably |
740 | | * bad picture. |
741 | | */ |
742 | | typedef struct BTStackData |
743 | | { |
744 | | BlockNumber bts_blkno; |
745 | | OffsetNumber bts_offset; |
746 | | struct BTStackData *bts_parent; |
747 | | } BTStackData; |
748 | | |
749 | | typedef BTStackData *BTStack; |
750 | | |
751 | | /* |
752 | | * BTScanInsertData is the btree-private state needed to find an initial |
753 | | * position for an indexscan, or to insert new tuples -- an "insertion |
754 | | * scankey" (not to be confused with a search scankey). It's used to descend |
755 | | * a B-Tree using _bt_search. |
756 | | * |
757 | | * heapkeyspace indicates if we expect all keys in the index to be physically |
758 | | * unique because heap TID is used as a tiebreaker attribute, and if index may |
759 | | * have truncated key attributes in pivot tuples. This is actually a property |
760 | | * of the index relation itself (not an indexscan). heapkeyspace indexes are |
761 | | * indexes whose version is >= version 4. It's convenient to keep this close |
762 | | * by, rather than accessing the metapage repeatedly. |
763 | | * |
764 | | * allequalimage is set to indicate that deduplication is safe for the index. |
765 | | * This is also a property of the index relation rather than an indexscan. |
766 | | * |
767 | | * anynullkeys indicates if any of the keys had NULL value when scankey was |
768 | | * built from index tuple (note that already-truncated tuple key attributes |
769 | | * set NULL as a placeholder key value, which also affects value of |
770 | | * anynullkeys). This is a convenience for unique index non-pivot tuple |
771 | | * insertion, which usually temporarily unsets scantid, but shouldn't iff |
772 | | * anynullkeys is true. Value generally matches non-pivot tuple's HasNulls |
773 | | * bit, but may not when inserting into an INCLUDE index (tuple header value |
774 | | * is affected by the NULL-ness of both key and non-key attributes). |
775 | | * |
776 | | * See comments in _bt_first for an explanation of the nextkey and backward |
777 | | * fields. |
778 | | * |
779 | | * scantid is the heap TID that is used as a final tiebreaker attribute. It |
780 | | * is set to NULL when index scan doesn't need to find a position for a |
781 | | * specific physical tuple. Must be set when inserting new tuples into |
782 | | * heapkeyspace indexes, since every tuple in the tree unambiguously belongs |
783 | | * in one exact position (it's never set with !heapkeyspace indexes, though). |
784 | | * Despite the representational difference, nbtree search code considers |
785 | | * scantid to be just another insertion scankey attribute. |
786 | | * |
787 | | * scankeys is an array of scan key entries for attributes that are compared |
788 | | * before scantid (user-visible attributes). keysz is the size of the array. |
789 | | * During insertion, there must be a scan key for every attribute, but when |
790 | | * starting a regular index scan some can be omitted. The array is used as a |
791 | | * flexible array member, though it's sized in a way that makes it possible to |
792 | | * use stack allocations. See nbtree/README for full details. |
793 | | */ |
794 | | typedef struct BTScanInsertData |
795 | | { |
796 | | bool heapkeyspace; |
797 | | bool allequalimage; |
798 | | bool anynullkeys; |
799 | | bool nextkey; |
800 | | bool backward; /* backward index scan? */ |
801 | | ItemPointer scantid; /* tiebreaker for scankeys */ |
802 | | int keysz; /* Size of scankeys array */ |
803 | | ScanKeyData scankeys[INDEX_MAX_KEYS]; /* Must appear last */ |
804 | | } BTScanInsertData; |
805 | | |
806 | | typedef BTScanInsertData *BTScanInsert; |
807 | | |
808 | | /* |
809 | | * BTInsertStateData is a working area used during insertion. |
810 | | * |
811 | | * This is filled in after descending the tree to the first leaf page the new |
812 | | * tuple might belong on. Tracks the current position while performing |
813 | | * uniqueness check, before we have determined which exact page to insert |
814 | | * to. |
815 | | * |
816 | | * (This should be private to nbtinsert.c, but it's also used by |
817 | | * _bt_binsrch_insert) |
818 | | */ |
819 | | typedef struct BTInsertStateData |
820 | | { |
821 | | IndexTuple itup; /* Item we're inserting */ |
822 | | Size itemsz; /* Size of itup -- should be MAXALIGN()'d */ |
823 | | BTScanInsert itup_key; /* Insertion scankey */ |
824 | | |
825 | | /* Buffer containing leaf page we're likely to insert itup on */ |
826 | | Buffer buf; |
827 | | |
828 | | /* |
829 | | * Cache of bounds within the current buffer. Only used for insertions |
830 | | * where _bt_check_unique is called. See _bt_binsrch_insert and |
831 | | * _bt_findinsertloc for details. |
832 | | */ |
833 | | bool bounds_valid; |
834 | | OffsetNumber low; |
835 | | OffsetNumber stricthigh; |
836 | | |
837 | | /* |
838 | | * if _bt_binsrch_insert found the location inside existing posting list, |
839 | | * save the position inside the list. -1 sentinel value indicates overlap |
840 | | * with an existing posting list tuple that has its LP_DEAD bit set. |
841 | | */ |
842 | | int postingoff; |
843 | | } BTInsertStateData; |
844 | | |
845 | | typedef BTInsertStateData *BTInsertState; |
846 | | |
847 | | /* |
848 | | * State used to representing an individual pending tuple during |
849 | | * deduplication. |
850 | | */ |
851 | | typedef struct BTDedupInterval |
852 | | { |
853 | | OffsetNumber baseoff; |
854 | | uint16 nitems; |
855 | | } BTDedupInterval; |
856 | | |
857 | | /* |
858 | | * BTDedupStateData is a working area used during deduplication. |
859 | | * |
860 | | * The status info fields track the state of a whole-page deduplication pass. |
861 | | * State about the current pending posting list is also tracked. |
862 | | * |
863 | | * A pending posting list is comprised of a contiguous group of equal items |
864 | | * from the page, starting from page offset number 'baseoff'. This is the |
865 | | * offset number of the "base" tuple for new posting list. 'nitems' is the |
866 | | * current total number of existing items from the page that will be merged to |
867 | | * make a new posting list tuple, including the base tuple item. (Existing |
868 | | * items may themselves be posting list tuples, or regular non-pivot tuples.) |
869 | | * |
870 | | * The total size of the existing tuples to be freed when pending posting list |
871 | | * is processed gets tracked by 'phystupsize'. This information allows |
872 | | * deduplication to calculate the space saving for each new posting list |
873 | | * tuple, and for the entire pass over the page as a whole. |
874 | | */ |
875 | | typedef struct BTDedupStateData |
876 | | { |
877 | | /* Deduplication status info for entire pass over page */ |
878 | | bool deduplicate; /* Still deduplicating page? */ |
879 | | int nmaxitems; /* Number of max-sized tuples so far */ |
880 | | Size maxpostingsize; /* Limit on size of final tuple */ |
881 | | |
882 | | /* Metadata about base tuple of current pending posting list */ |
883 | | IndexTuple base; /* Use to form new posting list */ |
884 | | OffsetNumber baseoff; /* page offset of base */ |
885 | | Size basetupsize; /* base size without original posting list */ |
886 | | |
887 | | /* Other metadata about pending posting list */ |
888 | | ItemPointer htids; /* Heap TIDs in pending posting list */ |
889 | | int nhtids; /* Number of heap TIDs in htids array */ |
890 | | int nitems; /* Number of existing tuples/line pointers */ |
891 | | Size phystupsize; /* Includes line pointer overhead */ |
892 | | |
893 | | /* |
894 | | * Array of tuples to go on new version of the page. Contains one entry |
895 | | * for each group of consecutive items. Note that existing tuples that |
896 | | * will not become posting list tuples do not appear in the array (they |
897 | | * are implicitly unchanged by deduplication pass). |
898 | | */ |
899 | | int nintervals; /* current number of intervals in array */ |
900 | | BTDedupInterval intervals[MaxIndexTuplesPerPage]; |
901 | | } BTDedupStateData; |
902 | | |
903 | | typedef BTDedupStateData *BTDedupState; |
904 | | |
905 | | /* |
906 | | * BTVacuumPostingData is state that represents how to VACUUM (or delete) a |
907 | | * posting list tuple when some (though not all) of its TIDs are to be |
908 | | * deleted. |
909 | | * |
910 | | * Convention is that itup field is the original posting list tuple on input, |
911 | | * and palloc()'d final tuple used to overwrite existing tuple on output. |
912 | | */ |
913 | | typedef struct BTVacuumPostingData |
914 | | { |
915 | | /* Tuple that will be/was updated */ |
916 | | IndexTuple itup; |
917 | | OffsetNumber updatedoffset; |
918 | | |
919 | | /* State needed to describe final itup in WAL */ |
920 | | uint16 ndeletedtids; |
921 | | uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]; |
922 | | } BTVacuumPostingData; |
923 | | |
924 | | typedef BTVacuumPostingData *BTVacuumPosting; |
925 | | |
926 | | /* |
927 | | * BTScanOpaqueData is the btree-private state needed for an indexscan. |
928 | | * This consists of preprocessed scan keys (see _bt_preprocess_keys() for |
929 | | * details of the preprocessing), information about the current location |
930 | | * of the scan, and information about the marked location, if any. (We use |
931 | | * BTScanPosData to represent the data needed for each of current and marked |
932 | | * locations.) In addition we can remember some known-killed index entries |
933 | | * that must be marked before we can move off the current page. |
934 | | * |
935 | | * Index scans work a page at a time: we pin and read-lock the page, identify |
936 | | * all the matching items on the page and save them in BTScanPosData, then |
937 | | * release the read-lock while returning the items to the caller for |
938 | | * processing. This approach minimizes lock/unlock traffic. We must always |
939 | | * drop the lock to make it okay for caller to process the returned items. |
940 | | * Whether or not we can also release the pin during this window will vary. |
941 | | * We drop the pin (when so->dropPin) to avoid blocking progress by VACUUM |
942 | | * (see nbtree/README section about making concurrent TID recycling safe). |
943 | | * We'll always release both the lock and the pin on the current page before |
944 | | * moving on to its sibling page. |
945 | | * |
946 | | * If we are doing an index-only scan, we save the entire IndexTuple for each |
947 | | * matched item, otherwise only its heap TID and offset. The IndexTuples go |
948 | | * into a separate workspace array; each BTScanPosItem stores its tuple's |
949 | | * offset within that array. Posting list tuples store a "base" tuple once, |
950 | | * allowing the same key to be returned for each TID in the posting list |
951 | | * tuple. |
952 | | */ |
953 | | |
954 | | typedef struct BTScanPosItem /* what we remember about each match */ |
955 | | { |
956 | | ItemPointerData heapTid; /* TID of referenced heap item */ |
957 | | OffsetNumber indexOffset; /* index item's location within page */ |
958 | | LocationIndex tupleOffset; /* IndexTuple's offset in workspace, if any */ |
959 | | } BTScanPosItem; |
960 | | |
961 | | typedef struct BTScanPosData |
962 | | { |
963 | | Buffer buf; /* currPage buf (invalid means unpinned) */ |
964 | | |
965 | | /* page details as of the saved position's call to _bt_readpage */ |
966 | | BlockNumber currPage; /* page referenced by items array */ |
967 | | BlockNumber prevPage; /* currPage's left link */ |
968 | | BlockNumber nextPage; /* currPage's right link */ |
969 | | XLogRecPtr lsn; /* currPage's LSN (when so->dropPin) */ |
970 | | |
971 | | /* scan direction for the saved position's call to _bt_readpage */ |
972 | | ScanDirection dir; |
973 | | |
974 | | /* |
975 | | * If we are doing an index-only scan, nextTupleOffset is the first free |
976 | | * location in the associated tuple storage workspace. |
977 | | */ |
978 | | int nextTupleOffset; |
979 | | |
980 | | /* |
981 | | * moreLeft and moreRight track whether we think there may be matching |
982 | | * index entries to the left and right of the current page, respectively. |
983 | | */ |
984 | | bool moreLeft; |
985 | | bool moreRight; |
986 | | |
987 | | /* |
988 | | * The items array is always ordered in index order (ie, increasing |
989 | | * indexoffset). When scanning backwards it is convenient to fill the |
990 | | * array back-to-front, so we start at the last slot and fill downwards. |
991 | | * Hence we need both a first-valid-entry and a last-valid-entry counter. |
992 | | * itemIndex is a cursor showing which entry was last returned to caller. |
993 | | */ |
994 | | int firstItem; /* first valid index in items[] */ |
995 | | int lastItem; /* last valid index in items[] */ |
996 | | int itemIndex; /* current index in items[] */ |
997 | | |
998 | | BTScanPosItem items[MaxTIDsPerBTreePage]; /* MUST BE LAST */ |
999 | | } BTScanPosData; |
1000 | | |
1001 | | typedef BTScanPosData *BTScanPos; |
1002 | | |
1003 | 0 | #define BTScanPosIsPinned(scanpos) \ |
1004 | 0 | ( \ |
1005 | 0 | AssertMacro(BlockNumberIsValid((scanpos).currPage) || \ |
1006 | 0 | !BufferIsValid((scanpos).buf)), \ |
1007 | 0 | BufferIsValid((scanpos).buf) \ |
1008 | 0 | ) |
1009 | | #define BTScanPosUnpin(scanpos) \ |
1010 | 0 | do { \ |
1011 | 0 | ReleaseBuffer((scanpos).buf); \ |
1012 | 0 | (scanpos).buf = InvalidBuffer; \ |
1013 | 0 | } while (0) |
1014 | | #define BTScanPosUnpinIfPinned(scanpos) \ |
1015 | 0 | do { \ |
1016 | 0 | if (BTScanPosIsPinned(scanpos)) \ |
1017 | 0 | BTScanPosUnpin(scanpos); \ |
1018 | 0 | } while (0) |
1019 | | |
1020 | 0 | #define BTScanPosIsValid(scanpos) \ |
1021 | 0 | ( \ |
1022 | 0 | AssertMacro(BlockNumberIsValid((scanpos).currPage) || \ |
1023 | 0 | !BufferIsValid((scanpos).buf)), \ |
1024 | 0 | BlockNumberIsValid((scanpos).currPage) \ |
1025 | 0 | ) |
1026 | | #define BTScanPosInvalidate(scanpos) \ |
1027 | 0 | do { \ |
1028 | 0 | (scanpos).buf = InvalidBuffer; \ |
1029 | 0 | (scanpos).currPage = InvalidBlockNumber; \ |
1030 | 0 | } while (0) |
1031 | | |
1032 | | /* We need one of these for each equality-type SK_SEARCHARRAY scan key */ |
1033 | | typedef struct BTArrayKeyInfo |
1034 | | { |
1035 | | /* fields set for both kinds of array (SAOP arrays and skip arrays) */ |
1036 | | int scan_key; /* index of associated key in keyData */ |
1037 | | int num_elems; /* number of elems (-1 means skip array) */ |
1038 | | |
1039 | | /* fields set for ScalarArrayOpExpr arrays only */ |
1040 | | Datum *elem_values; /* array of num_elems Datums */ |
1041 | | int cur_elem; /* index of current element in elem_values */ |
1042 | | |
1043 | | /* fields set for skip arrays only */ |
1044 | | int16 attlen; /* attr's length, in bytes */ |
1045 | | bool attbyval; /* attr's FormData_pg_attribute.attbyval */ |
1046 | | bool null_elem; /* NULL is lowest/highest element? */ |
1047 | | SkipSupport sksup; /* skip support (NULL if opclass lacks it) */ |
1048 | | ScanKey low_compare; /* array's > or >= lower bound */ |
1049 | | ScanKey high_compare; /* array's < or <= upper bound */ |
1050 | | } BTArrayKeyInfo; |
1051 | | |
1052 | | typedef struct BTScanOpaqueData |
1053 | | { |
1054 | | /* these fields are set by _bt_preprocess_keys(): */ |
1055 | | bool qual_ok; /* false if qual can never be satisfied */ |
1056 | | int numberOfKeys; /* number of preprocessed scan keys */ |
1057 | | ScanKey keyData; /* array of preprocessed scan keys */ |
1058 | | |
1059 | | /* workspace for SK_SEARCHARRAY support */ |
1060 | | int numArrayKeys; /* number of equality-type array keys */ |
1061 | | bool skipScan; /* At least one skip array in arrayKeys[]? */ |
1062 | | bool needPrimScan; /* New prim scan to continue in current dir? */ |
1063 | | bool scanBehind; /* Check scan not still behind on next page? */ |
1064 | | bool oppositeDirCheck; /* scanBehind opposite-scan-dir check? */ |
1065 | | BTArrayKeyInfo *arrayKeys; /* info about each equality-type array key */ |
1066 | | FmgrInfo *orderProcs; /* ORDER procs for required equality keys */ |
1067 | | MemoryContext arrayContext; /* scan-lifespan context for array data */ |
1068 | | |
1069 | | /* info about killed items if any (killedItems is NULL if never used) */ |
1070 | | int *killedItems; /* currPos.items indexes of killed items */ |
1071 | | int numKilled; /* number of currently stored items */ |
1072 | | bool dropPin; /* drop leaf pin before btgettuple returns? */ |
1073 | | |
1074 | | /* |
1075 | | * If we are doing an index-only scan, these are the tuple storage |
1076 | | * workspaces for the currPos and markPos respectively. Each is of size |
1077 | | * BLCKSZ, so it can hold as much as a full page's worth of tuples. |
1078 | | */ |
1079 | | char *currTuples; /* tuple storage for currPos */ |
1080 | | char *markTuples; /* tuple storage for markPos */ |
1081 | | |
1082 | | /* |
1083 | | * If the marked position is on the same page as current position, we |
1084 | | * don't use markPos, but just keep the marked itemIndex in markItemIndex |
1085 | | * (all the rest of currPos is valid for the mark position). Hence, to |
1086 | | * determine if there is a mark, first look at markItemIndex, then at |
1087 | | * markPos. |
1088 | | */ |
1089 | | int markItemIndex; /* itemIndex, or -1 if not valid */ |
1090 | | |
1091 | | /* keep these last in struct for efficiency */ |
1092 | | BTScanPosData currPos; /* current position data */ |
1093 | | BTScanPosData markPos; /* marked position, if any */ |
1094 | | } BTScanOpaqueData; |
1095 | | |
1096 | | typedef BTScanOpaqueData *BTScanOpaque; |
1097 | | |
1098 | | /* |
1099 | | * _bt_readpage state used across _bt_checkkeys calls for a page |
1100 | | */ |
1101 | | typedef struct BTReadPageState |
1102 | | { |
1103 | | /* Input parameters, set by _bt_readpage for _bt_checkkeys */ |
1104 | | OffsetNumber minoff; /* Lowest non-pivot tuple's offset */ |
1105 | | OffsetNumber maxoff; /* Highest non-pivot tuple's offset */ |
1106 | | IndexTuple finaltup; /* Needed by scans with array keys */ |
1107 | | Page page; /* Page being read */ |
1108 | | bool firstpage; /* page is first for primitive scan? */ |
1109 | | bool forcenonrequired; /* treat all keys as nonrequired? */ |
1110 | | int startikey; /* start comparisons from this scan key */ |
1111 | | |
1112 | | /* Per-tuple input parameters, set by _bt_readpage for _bt_checkkeys */ |
1113 | | OffsetNumber offnum; /* current tuple's page offset number */ |
1114 | | |
1115 | | /* Output parameters, set by _bt_checkkeys for _bt_readpage */ |
1116 | | OffsetNumber skip; /* Array keys "look ahead" skip offnum */ |
1117 | | bool continuescan; /* Terminate ongoing (primitive) index scan? */ |
1118 | | |
1119 | | /* |
1120 | | * Private _bt_checkkeys state used to manage "look ahead" optimization |
1121 | | * and primscan scheduling (only used during scans with array keys) |
1122 | | */ |
1123 | | int16 rechecks; |
1124 | | int16 targetdistance; |
1125 | | int16 nskipadvances; |
1126 | | |
1127 | | } BTReadPageState; |
1128 | | |
1129 | | /* |
1130 | | * We use some private sk_flags bits in preprocessed scan keys. We're allowed |
1131 | | * to use bits 16-31 (see skey.h). The uppermost bits are copied from the |
1132 | | * index's indoption[] array entry for the index attribute. |
1133 | | */ |
1134 | 0 | #define SK_BT_REQFWD 0x00010000 /* required to continue forward scan */ |
1135 | 0 | #define SK_BT_REQBKWD 0x00020000 /* required to continue backward scan */ |
1136 | 0 | #define SK_BT_SKIP 0x00040000 /* skip array on column without input = */ |
1137 | | |
1138 | | /* SK_BT_SKIP-only flags (set and unset by array advancement) */ |
1139 | 0 | #define SK_BT_MINVAL 0x00080000 /* invalid sk_argument, use low_compare */ |
1140 | 0 | #define SK_BT_MAXVAL 0x00100000 /* invalid sk_argument, use high_compare */ |
1141 | 0 | #define SK_BT_NEXT 0x00200000 /* positions the scan > sk_argument */ |
1142 | 0 | #define SK_BT_PRIOR 0x00400000 /* positions the scan < sk_argument */ |
1143 | | |
1144 | | /* Remaps pg_index flag bits to uppermost SK_BT_* byte */ |
1145 | 0 | #define SK_BT_INDOPTION_SHIFT 24 /* must clear the above bits */ |
1146 | 0 | #define SK_BT_DESC (INDOPTION_DESC << SK_BT_INDOPTION_SHIFT) |
1147 | 0 | #define SK_BT_NULLS_FIRST (INDOPTION_NULLS_FIRST << SK_BT_INDOPTION_SHIFT) |
1148 | | |
1149 | | typedef struct BTOptions |
1150 | | { |
1151 | | int32 varlena_header_; /* varlena header (do not touch directly!) */ |
1152 | | int fillfactor; /* page fill factor in percent (0..100) */ |
1153 | | float8 vacuum_cleanup_index_scale_factor; /* deprecated */ |
1154 | | bool deduplicate_items; /* Try to deduplicate items? */ |
1155 | | } BTOptions; |
1156 | | |
1157 | | #define BTGetFillFactor(relation) \ |
1158 | 0 | (AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \ |
1159 | 0 | relation->rd_rel->relam == BTREE_AM_OID), \ |
1160 | 0 | (relation)->rd_options ? \ |
1161 | 0 | ((BTOptions *) (relation)->rd_options)->fillfactor : \ |
1162 | 0 | BTREE_DEFAULT_FILLFACTOR) |
1163 | | #define BTGetTargetPageFreeSpace(relation) \ |
1164 | 0 | (BLCKSZ * (100 - BTGetFillFactor(relation)) / 100) |
1165 | | #define BTGetDeduplicateItems(relation) \ |
1166 | 0 | (AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \ |
1167 | 0 | relation->rd_rel->relam == BTREE_AM_OID), \ |
1168 | 0 | ((relation)->rd_options ? \ |
1169 | 0 | ((BTOptions *) (relation)->rd_options)->deduplicate_items : true)) |
1170 | | |
1171 | | /* |
1172 | | * Constant definition for progress reporting. Phase numbers must match |
1173 | | * btbuildphasename. |
1174 | | */ |
1175 | | /* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 (see progress.h) */ |
1176 | 0 | #define PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN 2 |
1177 | 0 | #define PROGRESS_BTREE_PHASE_PERFORMSORT_1 3 |
1178 | 0 | #define PROGRESS_BTREE_PHASE_PERFORMSORT_2 4 |
1179 | 0 | #define PROGRESS_BTREE_PHASE_LEAF_LOAD 5 |
1180 | | |
1181 | | /* |
1182 | | * external entry points for btree, in nbtree.c |
1183 | | */ |
1184 | | extern void btbuildempty(Relation index); |
1185 | | extern bool btinsert(Relation rel, Datum *values, bool *isnull, |
1186 | | ItemPointer ht_ctid, Relation heapRel, |
1187 | | IndexUniqueCheck checkUnique, |
1188 | | bool indexUnchanged, |
1189 | | struct IndexInfo *indexInfo); |
1190 | | extern IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys); |
1191 | | extern Size btestimateparallelscan(Relation rel, int nkeys, int norderbys); |
1192 | | extern void btinitparallelscan(void *target); |
1193 | | extern bool btgettuple(IndexScanDesc scan, ScanDirection dir); |
1194 | | extern int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm); |
1195 | | extern void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, |
1196 | | ScanKey orderbys, int norderbys); |
1197 | | extern void btparallelrescan(IndexScanDesc scan); |
1198 | | extern void btendscan(IndexScanDesc scan); |
1199 | | extern void btmarkpos(IndexScanDesc scan); |
1200 | | extern void btrestrpos(IndexScanDesc scan); |
1201 | | extern IndexBulkDeleteResult *btbulkdelete(IndexVacuumInfo *info, |
1202 | | IndexBulkDeleteResult *stats, |
1203 | | IndexBulkDeleteCallback callback, |
1204 | | void *callback_state); |
1205 | | extern IndexBulkDeleteResult *btvacuumcleanup(IndexVacuumInfo *info, |
1206 | | IndexBulkDeleteResult *stats); |
1207 | | extern bool btcanreturn(Relation index, int attno); |
1208 | | extern int btgettreeheight(Relation rel); |
1209 | | |
1210 | | extern CompareType bttranslatestrategy(StrategyNumber strategy, Oid opfamily); |
1211 | | extern StrategyNumber bttranslatecmptype(CompareType cmptype, Oid opfamily); |
1212 | | |
1213 | | /* |
1214 | | * prototypes for internal functions in nbtree.c |
1215 | | */ |
1216 | | extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page, |
1217 | | BlockNumber *last_curr_page, bool first); |
1218 | | extern void _bt_parallel_release(IndexScanDesc scan, |
1219 | | BlockNumber next_scan_page, |
1220 | | BlockNumber curr_page); |
1221 | | extern void _bt_parallel_done(IndexScanDesc scan); |
1222 | | extern void _bt_parallel_primscan_schedule(IndexScanDesc scan, |
1223 | | BlockNumber curr_page); |
1224 | | |
1225 | | /* |
1226 | | * prototypes for functions in nbtdedup.c |
1227 | | */ |
1228 | | extern void _bt_dedup_pass(Relation rel, Buffer buf, IndexTuple newitem, |
1229 | | Size newitemsz, bool bottomupdedup); |
1230 | | extern bool _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, |
1231 | | Size newitemsz); |
1232 | | extern void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, |
1233 | | OffsetNumber baseoff); |
1234 | | extern bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup); |
1235 | | extern Size _bt_dedup_finish_pending(Page newpage, BTDedupState state); |
1236 | | extern IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids, |
1237 | | int nhtids); |
1238 | | extern void _bt_update_posting(BTVacuumPosting vacposting); |
1239 | | extern IndexTuple _bt_swap_posting(IndexTuple newitem, IndexTuple oposting, |
1240 | | int postingoff); |
1241 | | |
1242 | | /* |
1243 | | * prototypes for functions in nbtinsert.c |
1244 | | */ |
1245 | | extern bool _bt_doinsert(Relation rel, IndexTuple itup, |
1246 | | IndexUniqueCheck checkUnique, bool indexUnchanged, |
1247 | | Relation heapRel); |
1248 | | extern void _bt_finish_split(Relation rel, Relation heaprel, Buffer lbuf, |
1249 | | BTStack stack); |
1250 | | extern Buffer _bt_getstackbuf(Relation rel, Relation heaprel, BTStack stack, |
1251 | | BlockNumber child); |
1252 | | |
1253 | | /* |
1254 | | * prototypes for functions in nbtsplitloc.c |
1255 | | */ |
1256 | | extern OffsetNumber _bt_findsplitloc(Relation rel, Page origpage, |
1257 | | OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem, |
1258 | | bool *newitemonleft); |
1259 | | |
1260 | | /* |
1261 | | * prototypes for functions in nbtpage.c |
1262 | | */ |
1263 | | extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, |
1264 | | bool allequalimage); |
1265 | | extern bool _bt_vacuum_needs_cleanup(Relation rel); |
1266 | | extern void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages); |
1267 | | extern void _bt_upgrademetapage(Page page); |
1268 | | extern Buffer _bt_getroot(Relation rel, Relation heaprel, int access); |
1269 | | extern Buffer _bt_gettrueroot(Relation rel); |
1270 | | extern int _bt_getrootheight(Relation rel); |
1271 | | extern void _bt_metaversion(Relation rel, bool *heapkeyspace, |
1272 | | bool *allequalimage); |
1273 | | extern void _bt_checkpage(Relation rel, Buffer buf); |
1274 | | extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access); |
1275 | | extern Buffer _bt_allocbuf(Relation rel, Relation heaprel); |
1276 | | extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, |
1277 | | BlockNumber blkno, int access); |
1278 | | extern void _bt_relbuf(Relation rel, Buffer buf); |
1279 | | extern void _bt_lockbuf(Relation rel, Buffer buf, int access); |
1280 | | extern void _bt_unlockbuf(Relation rel, Buffer buf); |
1281 | | extern bool _bt_conditionallockbuf(Relation rel, Buffer buf); |
1282 | | extern void _bt_upgradelockbufcleanup(Relation rel, Buffer buf); |
1283 | | extern void _bt_pageinit(Page page, Size size); |
1284 | | extern void _bt_delitems_vacuum(Relation rel, Buffer buf, |
1285 | | OffsetNumber *deletable, int ndeletable, |
1286 | | BTVacuumPosting *updatable, int nupdatable); |
1287 | | struct TM_IndexDeleteOp; /* avoid including tableam.h here */ |
1288 | | extern void _bt_delitems_delete_check(Relation rel, Buffer buf, |
1289 | | Relation heapRel, |
1290 | | struct TM_IndexDeleteOp *delstate); |
1291 | | extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate); |
1292 | | extern void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, |
1293 | | bool cleanuponly); |
1294 | | extern void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate); |
1295 | | |
1296 | | /* |
1297 | | * prototypes for functions in nbtpreprocesskeys.c |
1298 | | */ |
1299 | | extern void _bt_preprocess_keys(IndexScanDesc scan); |
1300 | | |
1301 | | /* |
1302 | | * prototypes for functions in nbtsearch.c |
1303 | | */ |
1304 | | extern BTStack _bt_search(Relation rel, Relation heaprel, BTScanInsert key, |
1305 | | Buffer *bufP, int access); |
1306 | | extern OffsetNumber _bt_binsrch_insert(Relation rel, BTInsertState insertstate); |
1307 | | extern int32 _bt_compare(Relation rel, BTScanInsert key, Page page, OffsetNumber offnum); |
1308 | | extern bool _bt_first(IndexScanDesc scan, ScanDirection dir); |
1309 | | extern bool _bt_next(IndexScanDesc scan, ScanDirection dir); |
1310 | | extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost); |
1311 | | |
1312 | | /* |
1313 | | * prototypes for functions in nbtutils.c |
1314 | | */ |
1315 | | extern BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup); |
1316 | | extern void _bt_freestack(BTStack stack); |
1317 | | extern bool _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir); |
1318 | | extern int _bt_binsrch_array_skey(FmgrInfo *orderproc, |
1319 | | bool cur_elem_trig, ScanDirection dir, |
1320 | | Datum tupdatum, bool tupnull, |
1321 | | BTArrayKeyInfo *array, ScanKey cur, |
1322 | | int32 *set_elem_result); |
1323 | | extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir); |
1324 | | extern bool _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys, |
1325 | | IndexTuple tuple, int tupnatts); |
1326 | | extern bool _bt_scanbehind_checkkeys(IndexScanDesc scan, ScanDirection dir, |
1327 | | IndexTuple finaltup); |
1328 | | extern void _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate); |
1329 | | extern void _bt_killitems(IndexScanDesc scan); |
1330 | | extern BTCycleId _bt_vacuum_cycleid(Relation rel); |
1331 | | extern BTCycleId _bt_start_vacuum(Relation rel); |
1332 | | extern void _bt_end_vacuum(Relation rel); |
1333 | | extern void _bt_end_vacuum_callback(int code, Datum arg); |
1334 | | extern Size BTreeShmemSize(void); |
1335 | | extern void BTreeShmemInit(void); |
1336 | | extern bytea *btoptions(Datum reloptions, bool validate); |
1337 | | extern bool btproperty(Oid index_oid, int attno, |
1338 | | IndexAMProperty prop, const char *propname, |
1339 | | bool *res, bool *isnull); |
1340 | | extern char *btbuildphasename(int64 phasenum); |
1341 | | extern IndexTuple _bt_truncate(Relation rel, IndexTuple lastleft, |
1342 | | IndexTuple firstright, BTScanInsert itup_key); |
1343 | | extern int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, |
1344 | | IndexTuple firstright); |
1345 | | extern bool _bt_check_natts(Relation rel, bool heapkeyspace, Page page, |
1346 | | OffsetNumber offnum); |
1347 | | extern void _bt_check_third_page(Relation rel, Relation heap, |
1348 | | bool needheaptidspace, Page page, IndexTuple newtup); |
1349 | | extern bool _bt_allequalimage(Relation rel, bool debugmessage); |
1350 | | |
1351 | | /* |
1352 | | * prototypes for functions in nbtvalidate.c |
1353 | | */ |
1354 | | extern bool btvalidate(Oid opclassoid); |
1355 | | extern void btadjustmembers(Oid opfamilyoid, |
1356 | | Oid opclassoid, |
1357 | | List *operators, |
1358 | | List *functions); |
1359 | | |
1360 | | /* |
1361 | | * prototypes for functions in nbtsort.c |
1362 | | */ |
1363 | | extern IndexBuildResult *btbuild(Relation heap, Relation index, |
1364 | | struct IndexInfo *indexInfo); |
1365 | | extern void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc); |
1366 | | |
1367 | | #endif /* NBTREE_H */ |