/src/postgres/src/backend/access/gist/gistsplit.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * gistsplit.c |
4 | | * Multi-column page splitting algorithm |
5 | | * |
6 | | * This file is concerned with making good page-split decisions in multi-column |
7 | | * GiST indexes. The opclass-specific picksplit functions can only be expected |
8 | | * to produce answers based on a single column. We first run the picksplit |
9 | | * function for column 1; then, if there are more columns, we check if any of |
10 | | * the tuples are "don't cares" so far as the column 1 split is concerned |
11 | | * (that is, they could go to either side for no additional penalty). If so, |
12 | | * we try to redistribute those tuples on the basis of the next column. |
13 | | * Repeat till we're out of columns. |
14 | | * |
15 | | * gistSplitByKey() is the entry point to this file. |
16 | | * |
17 | | * |
18 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
19 | | * Portions Copyright (c) 1994, Regents of the University of California |
20 | | * |
21 | | * IDENTIFICATION |
22 | | * src/backend/access/gist/gistsplit.c |
23 | | * |
24 | | *------------------------------------------------------------------------- |
25 | | */ |
26 | | #include "postgres.h" |
27 | | |
28 | | #include "access/gist_private.h" |
29 | | #include "utils/rel.h" |
30 | | |
31 | | typedef struct |
32 | | { |
33 | | OffsetNumber *entries; |
34 | | int len; |
35 | | Datum *attr; |
36 | | bool *isnull; |
37 | | bool *dontcare; |
38 | | } GistSplitUnion; |
39 | | |
40 | | |
41 | | /* |
42 | | * Form unions of subkeys in itvec[] entries listed in gsvp->entries[], |
43 | | * ignoring any tuples that are marked in gsvp->dontcare[]. Subroutine for |
44 | | * gistunionsubkey. |
45 | | */ |
46 | | static void |
47 | | gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec, |
48 | | GistSplitUnion *gsvp) |
49 | 0 | { |
50 | 0 | IndexTuple *cleanedItVec; |
51 | 0 | int i, |
52 | 0 | cleanedLen = 0; |
53 | |
|
54 | 0 | cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len); |
55 | |
|
56 | 0 | for (i = 0; i < gsvp->len; i++) |
57 | 0 | { |
58 | 0 | if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]]) |
59 | 0 | continue; |
60 | | |
61 | 0 | cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1]; |
62 | 0 | } |
63 | |
|
64 | 0 | gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, |
65 | 0 | gsvp->attr, gsvp->isnull); |
66 | |
|
67 | 0 | pfree(cleanedItVec); |
68 | 0 | } |
69 | | |
70 | | /* |
71 | | * Recompute unions of left- and right-side subkeys after a page split, |
72 | | * ignoring any tuples that are marked in spl->spl_dontcare[]. |
73 | | * |
74 | | * Note: we always recompute union keys for all index columns. In some cases |
75 | | * this might represent duplicate work for the leftmost column(s), but it's |
76 | | * not safe to assume that "zero penalty to move a tuple" means "the union |
77 | | * key doesn't change at all". Penalty functions aren't 100% accurate. |
78 | | */ |
79 | | static void |
80 | | gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl) |
81 | 0 | { |
82 | 0 | GistSplitUnion gsvp; |
83 | |
|
84 | 0 | gsvp.dontcare = spl->spl_dontcare; |
85 | |
|
86 | 0 | gsvp.entries = spl->splitVector.spl_left; |
87 | 0 | gsvp.len = spl->splitVector.spl_nleft; |
88 | 0 | gsvp.attr = spl->spl_lattr; |
89 | 0 | gsvp.isnull = spl->spl_lisnull; |
90 | |
|
91 | 0 | gistunionsubkeyvec(giststate, itvec, &gsvp); |
92 | |
|
93 | 0 | gsvp.entries = spl->splitVector.spl_right; |
94 | 0 | gsvp.len = spl->splitVector.spl_nright; |
95 | 0 | gsvp.attr = spl->spl_rattr; |
96 | 0 | gsvp.isnull = spl->spl_risnull; |
97 | |
|
98 | 0 | gistunionsubkeyvec(giststate, itvec, &gsvp); |
99 | 0 | } |
100 | | |
101 | | /* |
102 | | * Find tuples that are "don't cares", that is could be moved to the other |
103 | | * side of the split with zero penalty, so far as the attno column is |
104 | | * concerned. |
105 | | * |
106 | | * Don't-care tuples are marked by setting the corresponding entry in |
107 | | * spl->spl_dontcare[] to "true". Caller must have initialized that array |
108 | | * to zeroes. |
109 | | * |
110 | | * Returns number of don't-cares found. |
111 | | */ |
112 | | static int |
113 | | findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, |
114 | | GistSplitVector *spl, int attno) |
115 | 0 | { |
116 | 0 | int i; |
117 | 0 | GISTENTRY entry; |
118 | 0 | int NumDontCare = 0; |
119 | | |
120 | | /* |
121 | | * First, search the left-side tuples to see if any have zero penalty to |
122 | | * be added to the right-side union key. |
123 | | * |
124 | | * attno column is known all-not-null (see gistSplitByKey), so we need not |
125 | | * check for nulls |
126 | | */ |
127 | 0 | gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL, |
128 | 0 | (OffsetNumber) 0, false); |
129 | 0 | for (i = 0; i < spl->splitVector.spl_nleft; i++) |
130 | 0 | { |
131 | 0 | int j = spl->splitVector.spl_left[i]; |
132 | 0 | float penalty = gistpenalty(giststate, attno, &entry, false, |
133 | 0 | &valvec[j], false); |
134 | |
|
135 | 0 | if (penalty == 0.0) |
136 | 0 | { |
137 | 0 | spl->spl_dontcare[j] = true; |
138 | 0 | NumDontCare++; |
139 | 0 | } |
140 | 0 | } |
141 | | |
142 | | /* And conversely for the right-side tuples */ |
143 | 0 | gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL, |
144 | 0 | (OffsetNumber) 0, false); |
145 | 0 | for (i = 0; i < spl->splitVector.spl_nright; i++) |
146 | 0 | { |
147 | 0 | int j = spl->splitVector.spl_right[i]; |
148 | 0 | float penalty = gistpenalty(giststate, attno, &entry, false, |
149 | 0 | &valvec[j], false); |
150 | |
|
151 | 0 | if (penalty == 0.0) |
152 | 0 | { |
153 | 0 | spl->spl_dontcare[j] = true; |
154 | 0 | NumDontCare++; |
155 | 0 | } |
156 | 0 | } |
157 | |
|
158 | 0 | return NumDontCare; |
159 | 0 | } |
160 | | |
161 | | /* |
162 | | * Remove tuples that are marked don't-cares from the tuple index array a[] |
163 | | * of length *len. This is applied separately to the spl_left and spl_right |
164 | | * arrays. |
165 | | */ |
166 | | static void |
167 | | removeDontCares(OffsetNumber *a, int *len, const bool *dontcare) |
168 | 0 | { |
169 | 0 | int origlen, |
170 | 0 | newlen, |
171 | 0 | i; |
172 | 0 | OffsetNumber *curwpos; |
173 | |
|
174 | 0 | origlen = newlen = *len; |
175 | 0 | curwpos = a; |
176 | 0 | for (i = 0; i < origlen; i++) |
177 | 0 | { |
178 | 0 | OffsetNumber ai = a[i]; |
179 | |
|
180 | 0 | if (dontcare[ai] == false) |
181 | 0 | { |
182 | | /* re-emit item into a[] */ |
183 | 0 | *curwpos = ai; |
184 | 0 | curwpos++; |
185 | 0 | } |
186 | 0 | else |
187 | 0 | newlen--; |
188 | 0 | } |
189 | |
|
190 | 0 | *len = newlen; |
191 | 0 | } |
192 | | |
193 | | /* |
194 | | * Place a single don't-care tuple into either the left or right side of the |
195 | | * split, according to which has least penalty for merging the tuple into |
196 | | * the previously-computed union keys. We need consider only columns starting |
197 | | * at attno. |
198 | | */ |
199 | | static void |
200 | | placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, |
201 | | IndexTuple itup, OffsetNumber off, int attno) |
202 | 0 | { |
203 | 0 | GISTENTRY identry[INDEX_MAX_KEYS]; |
204 | 0 | bool isnull[INDEX_MAX_KEYS]; |
205 | 0 | bool toLeft = true; |
206 | |
|
207 | 0 | gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, |
208 | 0 | identry, isnull); |
209 | |
|
210 | 0 | for (; attno < giststate->nonLeafTupdesc->natts; attno++) |
211 | 0 | { |
212 | 0 | float lpenalty, |
213 | 0 | rpenalty; |
214 | 0 | GISTENTRY entry; |
215 | |
|
216 | 0 | gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, false); |
217 | 0 | lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], |
218 | 0 | identry + attno, isnull[attno]); |
219 | 0 | gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, false); |
220 | 0 | rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], |
221 | 0 | identry + attno, isnull[attno]); |
222 | |
|
223 | 0 | if (lpenalty != rpenalty) |
224 | 0 | { |
225 | 0 | if (lpenalty > rpenalty) |
226 | 0 | toLeft = false; |
227 | 0 | break; |
228 | 0 | } |
229 | 0 | } |
230 | |
|
231 | 0 | if (toLeft) |
232 | 0 | v->splitVector.spl_left[v->splitVector.spl_nleft++] = off; |
233 | 0 | else |
234 | 0 | v->splitVector.spl_right[v->splitVector.spl_nright++] = off; |
235 | 0 | } |
236 | | |
237 | 0 | #define SWAPVAR( s, d, t ) \ |
238 | 0 | do { \ |
239 | 0 | (t) = (s); \ |
240 | 0 | (s) = (d); \ |
241 | 0 | (d) = (t); \ |
242 | 0 | } while(0) |
243 | | |
244 | | /* |
245 | | * Clean up when we did a secondary split but the user-defined PickSplit |
246 | | * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists |
247 | | * true). |
248 | | * |
249 | | * We consider whether to swap the left and right outputs of the secondary |
250 | | * split; this can be worthwhile if the penalty for merging those tuples into |
251 | | * the previously chosen sets is less that way. |
252 | | * |
253 | | * In any case we must update the union datums for the current column by |
254 | | * adding in the previous union keys (oldL/oldR), since the user-defined |
255 | | * PickSplit method didn't do so. |
256 | | */ |
257 | | static void |
258 | | supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, |
259 | | GIST_SPLITVEC *sv, Datum oldL, Datum oldR) |
260 | 0 | { |
261 | 0 | bool leaveOnLeft = true, |
262 | 0 | tmpBool; |
263 | 0 | GISTENTRY entryL, |
264 | 0 | entryR, |
265 | 0 | entrySL, |
266 | 0 | entrySR; |
267 | |
|
268 | 0 | gistentryinit(entryL, oldL, r, NULL, 0, false); |
269 | 0 | gistentryinit(entryR, oldR, r, NULL, 0, false); |
270 | 0 | gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false); |
271 | 0 | gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false); |
272 | |
|
273 | 0 | if (sv->spl_ldatum_exists && sv->spl_rdatum_exists) |
274 | 0 | { |
275 | 0 | float penalty1, |
276 | 0 | penalty2; |
277 | |
|
278 | 0 | penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) + |
279 | 0 | gistpenalty(giststate, attno, &entryR, false, &entrySR, false); |
280 | 0 | penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) + |
281 | 0 | gistpenalty(giststate, attno, &entryR, false, &entrySL, false); |
282 | |
|
283 | 0 | if (penalty1 > penalty2) |
284 | 0 | leaveOnLeft = false; |
285 | 0 | } |
286 | 0 | else |
287 | 0 | { |
288 | 0 | GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR; |
289 | 0 | float penalty1, |
290 | 0 | penalty2; |
291 | | |
292 | | /* |
293 | | * There is only one previously defined union, so we just choose swap |
294 | | * or not by lowest penalty for that side. We can only get here if a |
295 | | * secondary split happened to have all NULLs in its column in the |
296 | | * tuples that the outer recursion level had assigned to one side. |
297 | | * (Note that the null checks in gistSplitByKey don't prevent the |
298 | | * case, because they'll only be checking tuples that were considered |
299 | | * don't-cares at the outer recursion level, not the tuples that went |
300 | | * into determining the passed-down left and right union keys.) |
301 | | */ |
302 | 0 | penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false); |
303 | 0 | penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false); |
304 | |
|
305 | 0 | if (penalty1 < penalty2) |
306 | 0 | leaveOnLeft = sv->spl_ldatum_exists; |
307 | 0 | else |
308 | 0 | leaveOnLeft = sv->spl_rdatum_exists; |
309 | 0 | } |
310 | |
|
311 | 0 | if (leaveOnLeft == false) |
312 | 0 | { |
313 | | /* |
314 | | * swap left and right |
315 | | */ |
316 | 0 | OffsetNumber *off, |
317 | 0 | noff; |
318 | 0 | Datum datum; |
319 | |
|
320 | 0 | SWAPVAR(sv->spl_left, sv->spl_right, off); |
321 | 0 | SWAPVAR(sv->spl_nleft, sv->spl_nright, noff); |
322 | 0 | SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum); |
323 | 0 | gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false); |
324 | 0 | gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false); |
325 | 0 | } |
326 | |
|
327 | 0 | if (sv->spl_ldatum_exists) |
328 | 0 | gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false, |
329 | 0 | &sv->spl_ldatum, &tmpBool); |
330 | |
|
331 | 0 | if (sv->spl_rdatum_exists) |
332 | 0 | gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false, |
333 | 0 | &sv->spl_rdatum, &tmpBool); |
334 | |
|
335 | 0 | sv->spl_ldatum_exists = sv->spl_rdatum_exists = false; |
336 | 0 | } |
337 | | |
338 | | /* |
339 | | * Trivial picksplit implementation. Function called only |
340 | | * if user-defined picksplit puts all keys on the same side of the split. |
341 | | * That is a bug of user-defined picksplit but we don't want to fail. |
342 | | */ |
343 | | static void |
344 | | genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno) |
345 | 0 | { |
346 | 0 | OffsetNumber i, |
347 | 0 | maxoff; |
348 | 0 | int nbytes; |
349 | 0 | GistEntryVector *evec; |
350 | |
|
351 | 0 | maxoff = entryvec->n - 1; |
352 | |
|
353 | 0 | nbytes = (maxoff + 2) * sizeof(OffsetNumber); |
354 | |
|
355 | 0 | v->spl_left = (OffsetNumber *) palloc(nbytes); |
356 | 0 | v->spl_right = (OffsetNumber *) palloc(nbytes); |
357 | 0 | v->spl_nleft = v->spl_nright = 0; |
358 | |
|
359 | 0 | for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) |
360 | 0 | { |
361 | 0 | if (i <= (maxoff - FirstOffsetNumber + 1) / 2) |
362 | 0 | { |
363 | 0 | v->spl_left[v->spl_nleft] = i; |
364 | 0 | v->spl_nleft++; |
365 | 0 | } |
366 | 0 | else |
367 | 0 | { |
368 | 0 | v->spl_right[v->spl_nright] = i; |
369 | 0 | v->spl_nright++; |
370 | 0 | } |
371 | 0 | } |
372 | | |
373 | | /* |
374 | | * Form union datums for each side |
375 | | */ |
376 | 0 | evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ); |
377 | |
|
378 | 0 | evec->n = v->spl_nleft; |
379 | 0 | memcpy(evec->vector, entryvec->vector + FirstOffsetNumber, |
380 | 0 | sizeof(GISTENTRY) * evec->n); |
381 | 0 | v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno], |
382 | 0 | giststate->supportCollation[attno], |
383 | 0 | PointerGetDatum(evec), |
384 | 0 | PointerGetDatum(&nbytes)); |
385 | |
|
386 | 0 | evec->n = v->spl_nright; |
387 | 0 | memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft, |
388 | 0 | sizeof(GISTENTRY) * evec->n); |
389 | 0 | v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno], |
390 | 0 | giststate->supportCollation[attno], |
391 | 0 | PointerGetDatum(evec), |
392 | 0 | PointerGetDatum(&nbytes)); |
393 | 0 | } |
394 | | |
395 | | /* |
396 | | * Calls user picksplit method for attno column to split tuples into |
397 | | * two vectors. |
398 | | * |
399 | | * Returns false if split is complete (there are no more index columns, or |
400 | | * there is no need to consider them because split is optimal already). |
401 | | * |
402 | | * Returns true and v->spl_dontcare = NULL if the picksplit result is |
403 | | * degenerate (all tuples seem to be don't-cares), so we should just |
404 | | * disregard this column and split on the next column(s) instead. |
405 | | * |
406 | | * Returns true and v->spl_dontcare != NULL if there are don't-care tuples |
407 | | * that could be relocated based on the next column(s). The don't-care |
408 | | * tuples have been removed from the split and must be reinserted by caller. |
409 | | * There is at least one non-don't-care tuple on each side of the split, |
410 | | * and union keys for all columns are updated to include just those tuples. |
411 | | * |
412 | | * A true result implies there is at least one more index column. |
413 | | */ |
414 | | static bool |
415 | | gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v, |
416 | | IndexTuple *itup, int len, GISTSTATE *giststate) |
417 | 0 | { |
418 | 0 | GIST_SPLITVEC *sv = &v->splitVector; |
419 | | |
420 | | /* |
421 | | * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in |
422 | | * case we are doing a secondary split (see comments in gist.h). |
423 | | */ |
424 | 0 | sv->spl_ldatum_exists = !(v->spl_lisnull[attno]); |
425 | 0 | sv->spl_rdatum_exists = !(v->spl_risnull[attno]); |
426 | 0 | sv->spl_ldatum = v->spl_lattr[attno]; |
427 | 0 | sv->spl_rdatum = v->spl_rattr[attno]; |
428 | | |
429 | | /* |
430 | | * Let the opclass-specific PickSplit method do its thing. Note that at |
431 | | * this point we know there are no null keys in the entryvec. |
432 | | */ |
433 | 0 | FunctionCall2Coll(&giststate->picksplitFn[attno], |
434 | 0 | giststate->supportCollation[attno], |
435 | 0 | PointerGetDatum(entryvec), |
436 | 0 | PointerGetDatum(sv)); |
437 | |
|
438 | 0 | if (sv->spl_nleft == 0 || sv->spl_nright == 0) |
439 | 0 | { |
440 | | /* |
441 | | * User-defined picksplit failed to create an actual split, ie it put |
442 | | * everything on the same side. Complain but cope. |
443 | | */ |
444 | 0 | ereport(DEBUG1, |
445 | 0 | (errcode(ERRCODE_INTERNAL_ERROR), |
446 | 0 | errmsg("picksplit method for column %d of index \"%s\" failed", |
447 | 0 | attno + 1, RelationGetRelationName(r)), |
448 | 0 | errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command."))); |
449 | | |
450 | | /* |
451 | | * Reinit GIST_SPLITVEC. Although these fields are not used by |
452 | | * genericPickSplit(), set them up for further processing |
453 | | */ |
454 | 0 | sv->spl_ldatum_exists = !(v->spl_lisnull[attno]); |
455 | 0 | sv->spl_rdatum_exists = !(v->spl_risnull[attno]); |
456 | 0 | sv->spl_ldatum = v->spl_lattr[attno]; |
457 | 0 | sv->spl_rdatum = v->spl_rattr[attno]; |
458 | | |
459 | | /* Do a generic split */ |
460 | 0 | genericPickSplit(giststate, entryvec, sv, attno); |
461 | 0 | } |
462 | 0 | else |
463 | 0 | { |
464 | | /* hack for compatibility with old picksplit API */ |
465 | 0 | if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber) |
466 | 0 | sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1); |
467 | 0 | if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber) |
468 | 0 | sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1); |
469 | 0 | } |
470 | | |
471 | | /* Clean up if PickSplit didn't take care of a secondary split */ |
472 | 0 | if (sv->spl_ldatum_exists || sv->spl_rdatum_exists) |
473 | 0 | supportSecondarySplit(r, giststate, attno, sv, |
474 | 0 | v->spl_lattr[attno], v->spl_rattr[attno]); |
475 | | |
476 | | /* emit union datums computed by PickSplit back to v arrays */ |
477 | 0 | v->spl_lattr[attno] = sv->spl_ldatum; |
478 | 0 | v->spl_rattr[attno] = sv->spl_rdatum; |
479 | 0 | v->spl_lisnull[attno] = false; |
480 | 0 | v->spl_risnull[attno] = false; |
481 | | |
482 | | /* |
483 | | * If index columns remain, then consider whether we can improve the split |
484 | | * by using them. |
485 | | */ |
486 | 0 | v->spl_dontcare = NULL; |
487 | |
|
488 | 0 | if (attno + 1 < giststate->nonLeafTupdesc->natts) |
489 | 0 | { |
490 | 0 | int NumDontCare; |
491 | | |
492 | | /* |
493 | | * Make a quick check to see if left and right union keys are equal; |
494 | | * if so, the split is certainly degenerate, so tell caller to |
495 | | * re-split with the next column. |
496 | | */ |
497 | 0 | if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum)) |
498 | 0 | return true; |
499 | | |
500 | | /* |
501 | | * Locate don't-care tuples, if any. If there are none, the split is |
502 | | * optimal, so just fall out and return false. |
503 | | */ |
504 | 0 | v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1)); |
505 | |
|
506 | 0 | NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno); |
507 | |
|
508 | 0 | if (NumDontCare > 0) |
509 | 0 | { |
510 | | /* |
511 | | * Remove don't-cares from spl_left[] and spl_right[]. |
512 | | */ |
513 | 0 | removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare); |
514 | 0 | removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare); |
515 | | |
516 | | /* |
517 | | * If all tuples on either side were don't-cares, the split is |
518 | | * degenerate, and we're best off to ignore it and split on the |
519 | | * next column. (We used to try to press on with a secondary |
520 | | * split by forcing a random tuple on each side to be treated as |
521 | | * non-don't-care, but it seems unlikely that that technique |
522 | | * really gives a better result. Note that we don't want to try a |
523 | | * secondary split with empty left or right primary split sides, |
524 | | * because then there is no union key on that side for the |
525 | | * PickSplit function to try to expand, so it can have no good |
526 | | * figure of merit for what it's doing. Also note that this check |
527 | | * ensures we can't produce a bogus one-side-only split in the |
528 | | * NumDontCare == 1 special case below.) |
529 | | */ |
530 | 0 | if (sv->spl_nleft == 0 || sv->spl_nright == 0) |
531 | 0 | { |
532 | 0 | v->spl_dontcare = NULL; |
533 | 0 | return true; |
534 | 0 | } |
535 | | |
536 | | /* |
537 | | * Recompute union keys, considering only non-don't-care tuples. |
538 | | * NOTE: this will set union keys for remaining index columns, |
539 | | * which will cause later calls of gistUserPicksplit to pass those |
540 | | * values down to user-defined PickSplit methods with |
541 | | * spl_ldatum_exists/spl_rdatum_exists set true. |
542 | | */ |
543 | 0 | gistunionsubkey(giststate, itup, v); |
544 | |
|
545 | 0 | if (NumDontCare == 1) |
546 | 0 | { |
547 | | /* |
548 | | * If there's only one don't-care tuple then we can't do a |
549 | | * PickSplit on it, so just choose whether to send it left or |
550 | | * right by comparing penalties. We needed the |
551 | | * gistunionsubkey step anyway so that we have appropriate |
552 | | * union keys for figuring the penalties. |
553 | | */ |
554 | 0 | OffsetNumber toMove; |
555 | | |
556 | | /* find it ... */ |
557 | 0 | for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++) |
558 | 0 | { |
559 | 0 | if (v->spl_dontcare[toMove]) |
560 | 0 | break; |
561 | 0 | } |
562 | 0 | Assert(toMove < entryvec->n); |
563 | | |
564 | | /* ... and assign it to cheaper side */ |
565 | 0 | placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1); |
566 | | |
567 | | /* |
568 | | * At this point the union keys are wrong, but we don't care |
569 | | * because we're done splitting. The outermost recursion |
570 | | * level of gistSplitByKey will fix things before returning. |
571 | | */ |
572 | 0 | } |
573 | 0 | else |
574 | 0 | return true; |
575 | 0 | } |
576 | 0 | } |
577 | | |
578 | 0 | return false; |
579 | 0 | } |
580 | | |
581 | | /* |
582 | | * simply split page in half |
583 | | */ |
584 | | static void |
585 | | gistSplitHalf(GIST_SPLITVEC *v, int len) |
586 | 0 | { |
587 | 0 | int i; |
588 | |
|
589 | 0 | v->spl_nright = v->spl_nleft = 0; |
590 | 0 | v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
591 | 0 | v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
592 | 0 | for (i = 1; i <= len; i++) |
593 | 0 | if (i < len / 2) |
594 | 0 | v->spl_right[v->spl_nright++] = i; |
595 | 0 | else |
596 | 0 | v->spl_left[v->spl_nleft++] = i; |
597 | | |
598 | | /* we need not compute union keys, caller took care of it */ |
599 | 0 | } |
600 | | |
601 | | /* |
602 | | * gistSplitByKey: main entry point for page-splitting algorithm |
603 | | * |
604 | | * r: index relation |
605 | | * page: page being split |
606 | | * itup: array of IndexTuples to be processed |
607 | | * len: number of IndexTuples to be processed (must be at least 2) |
608 | | * giststate: additional info about index |
609 | | * v: working state and output area |
610 | | * attno: column we are working on (zero-based index) |
611 | | * |
612 | | * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays |
613 | | * to all-true. On return, spl_left/spl_nleft contain indexes of tuples |
614 | | * to go left, spl_right/spl_nright contain indexes of tuples to go right, |
615 | | * spl_lattr/spl_lisnull contain left-side union key values, and |
616 | | * spl_rattr/spl_risnull contain right-side union key values. Other fields |
617 | | * in this struct are workspace for this file. |
618 | | * |
619 | | * Outside caller must pass zero for attno. The function may internally |
620 | | * recurse to the next column by passing attno+1. |
621 | | */ |
622 | | void |
623 | | gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, |
624 | | GISTSTATE *giststate, GistSplitVector *v, int attno) |
625 | 0 | { |
626 | 0 | GistEntryVector *entryvec; |
627 | 0 | OffsetNumber *offNullTuples; |
628 | 0 | int nOffNullTuples = 0; |
629 | 0 | int i; |
630 | | |
631 | | /* generate the item array, and identify tuples with null keys */ |
632 | | /* note that entryvec->vector[0] goes unused in this code */ |
633 | 0 | entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY)); |
634 | 0 | entryvec->n = len + 1; |
635 | 0 | offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
636 | |
|
637 | 0 | for (i = 1; i <= len; i++) |
638 | 0 | { |
639 | 0 | Datum datum; |
640 | 0 | bool IsNull; |
641 | |
|
642 | 0 | datum = index_getattr(itup[i - 1], attno + 1, giststate->leafTupdesc, |
643 | 0 | &IsNull); |
644 | 0 | gistdentryinit(giststate, attno, &(entryvec->vector[i]), |
645 | 0 | datum, r, page, i, |
646 | 0 | false, IsNull); |
647 | 0 | if (IsNull) |
648 | 0 | offNullTuples[nOffNullTuples++] = i; |
649 | 0 | } |
650 | |
|
651 | 0 | if (nOffNullTuples == len) |
652 | 0 | { |
653 | | /* |
654 | | * Corner case: All keys in attno column are null, so just transfer |
655 | | * our attention to the next column. If there's no next column, just |
656 | | * split page in half. |
657 | | */ |
658 | 0 | v->spl_risnull[attno] = v->spl_lisnull[attno] = true; |
659 | |
|
660 | 0 | if (attno + 1 < giststate->nonLeafTupdesc->natts) |
661 | 0 | gistSplitByKey(r, page, itup, len, giststate, v, attno + 1); |
662 | 0 | else |
663 | 0 | gistSplitHalf(&v->splitVector, len); |
664 | 0 | } |
665 | 0 | else if (nOffNullTuples > 0) |
666 | 0 | { |
667 | 0 | int j = 0; |
668 | | |
669 | | /* |
670 | | * We don't want to mix NULL and not-NULL keys on one page, so split |
671 | | * nulls to right page and not-nulls to left. |
672 | | */ |
673 | 0 | v->splitVector.spl_right = offNullTuples; |
674 | 0 | v->splitVector.spl_nright = nOffNullTuples; |
675 | 0 | v->spl_risnull[attno] = true; |
676 | |
|
677 | 0 | v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
678 | 0 | v->splitVector.spl_nleft = 0; |
679 | 0 | for (i = 1; i <= len; i++) |
680 | 0 | if (j < v->splitVector.spl_nright && offNullTuples[j] == i) |
681 | 0 | j++; |
682 | 0 | else |
683 | 0 | v->splitVector.spl_left[v->splitVector.spl_nleft++] = i; |
684 | | |
685 | | /* Compute union keys, unless outer recursion level will handle it */ |
686 | 0 | if (attno == 0 && giststate->nonLeafTupdesc->natts == 1) |
687 | 0 | { |
688 | 0 | v->spl_dontcare = NULL; |
689 | 0 | gistunionsubkey(giststate, itup, v); |
690 | 0 | } |
691 | 0 | } |
692 | 0 | else |
693 | 0 | { |
694 | | /* |
695 | | * All keys are not-null, so apply user-defined PickSplit method |
696 | | */ |
697 | 0 | if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate)) |
698 | 0 | { |
699 | | /* |
700 | | * Splitting on attno column is not optimal, so consider |
701 | | * redistributing don't-care tuples according to the next column |
702 | | */ |
703 | 0 | Assert(attno + 1 < giststate->nonLeafTupdesc->natts); |
704 | |
|
705 | 0 | if (v->spl_dontcare == NULL) |
706 | 0 | { |
707 | | /* |
708 | | * This split was actually degenerate, so ignore it altogether |
709 | | * and just split according to the next column. |
710 | | */ |
711 | 0 | gistSplitByKey(r, page, itup, len, giststate, v, attno + 1); |
712 | 0 | } |
713 | 0 | else |
714 | 0 | { |
715 | | /* |
716 | | * Form an array of just the don't-care tuples to pass to a |
717 | | * recursive invocation of this function for the next column. |
718 | | */ |
719 | 0 | IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple)); |
720 | 0 | OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
721 | 0 | int newlen = 0; |
722 | 0 | GIST_SPLITVEC backupSplit; |
723 | |
|
724 | 0 | for (i = 0; i < len; i++) |
725 | 0 | { |
726 | 0 | if (v->spl_dontcare[i + 1]) |
727 | 0 | { |
728 | 0 | newitup[newlen] = itup[i]; |
729 | 0 | map[newlen] = i + 1; |
730 | 0 | newlen++; |
731 | 0 | } |
732 | 0 | } |
733 | |
|
734 | 0 | Assert(newlen > 0); |
735 | | |
736 | | /* |
737 | | * Make a backup copy of v->splitVector, since the recursive |
738 | | * call will overwrite that with its own result. |
739 | | */ |
740 | 0 | backupSplit = v->splitVector; |
741 | 0 | backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len); |
742 | 0 | memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft); |
743 | 0 | backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len); |
744 | 0 | memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright); |
745 | | |
746 | | /* Recursively decide how to split the don't-care tuples */ |
747 | 0 | gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1); |
748 | | |
749 | | /* Merge result of subsplit with non-don't-care tuples */ |
750 | 0 | for (i = 0; i < v->splitVector.spl_nleft; i++) |
751 | 0 | backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1]; |
752 | 0 | for (i = 0; i < v->splitVector.spl_nright; i++) |
753 | 0 | backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1]; |
754 | |
|
755 | 0 | v->splitVector = backupSplit; |
756 | 0 | } |
757 | 0 | } |
758 | 0 | } |
759 | | |
760 | | /* |
761 | | * If we're handling a multicolumn index, at the end of the recursion |
762 | | * recompute the left and right union datums for all index columns. This |
763 | | * makes sure we hand back correct union datums in all corner cases, |
764 | | * including when we haven't processed all columns to start with, or when |
765 | | * a secondary split moved "don't care" tuples from one side to the other |
766 | | * (we really shouldn't assume that that didn't change the union datums). |
767 | | * |
768 | | * Note: when we're in an internal recursion (attno > 0), we do not worry |
769 | | * about whether the union datums we return with are sensible, since |
770 | | * calling levels won't care. Also, in a single-column index, we expect |
771 | | * that PickSplit (or the special cases above) produced correct union |
772 | | * datums. |
773 | | */ |
774 | 0 | if (attno == 0 && giststate->nonLeafTupdesc->natts > 1) |
775 | 0 | { |
776 | 0 | v->spl_dontcare = NULL; |
777 | 0 | gistunionsubkey(giststate, itup, v); |
778 | 0 | } |
779 | 0 | } |