/src/postgres/src/backend/utils/adt/tsgistidx.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * tsgistidx.c |
4 | | * GiST support functions for tsvector_ops |
5 | | * |
6 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
7 | | * |
8 | | * |
9 | | * IDENTIFICATION |
10 | | * src/backend/utils/adt/tsgistidx.c |
11 | | * |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | |
15 | | #include "postgres.h" |
16 | | |
17 | | #include "access/gist.h" |
18 | | #include "access/heaptoast.h" |
19 | | #include "access/reloptions.h" |
20 | | #include "common/int.h" |
21 | | #include "lib/qunique.h" |
22 | | #include "port/pg_bitutils.h" |
23 | | #include "tsearch/ts_utils.h" |
24 | | #include "utils/fmgrprotos.h" |
25 | | #include "utils/pg_crc.h" |
26 | | |
27 | | |
28 | | /* tsvector_ops opclass options */ |
29 | | typedef struct |
30 | | { |
31 | | int32 vl_len_; /* varlena header (do not touch directly!) */ |
32 | | int siglen; /* signature length */ |
33 | | } GistTsVectorOptions; |
34 | | |
35 | 0 | #define SIGLEN_DEFAULT (31 * 4) |
36 | 0 | #define SIGLEN_MAX GISTMaxIndexKeySize |
37 | 0 | #define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \ |
38 | 0 | ((GistTsVectorOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \ |
39 | 0 | SIGLEN_DEFAULT) |
40 | | |
41 | 0 | #define SIGLENBIT(siglen) ((siglen) * BITS_PER_BYTE) |
42 | | |
43 | | typedef char *BITVECP; |
44 | | |
45 | | #define LOOPBYTE(siglen) \ |
46 | 0 | for (i = 0; i < siglen; i++) |
47 | | |
48 | 0 | #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) ) |
49 | | #define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 ) |
50 | | #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) ) |
51 | 0 | #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) ) |
52 | 0 | #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 ) |
53 | | |
54 | | #define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen)) |
55 | 0 | #define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen)) |
56 | | |
57 | 0 | #define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key)) |
58 | | |
59 | | /* |
60 | | * type of GiST index key |
61 | | */ |
62 | | |
63 | | typedef struct |
64 | | { |
65 | | int32 vl_len_; /* varlena header (do not touch directly!) */ |
66 | | int32 flag; |
67 | | char data[FLEXIBLE_ARRAY_MEMBER]; |
68 | | } SignTSVector; |
69 | | |
70 | 0 | #define ARRKEY 0x01 |
71 | 0 | #define SIGNKEY 0x02 |
72 | 0 | #define ALLISTRUE 0x04 |
73 | | |
74 | 0 | #define ISARRKEY(x) ( ((SignTSVector*)(x))->flag & ARRKEY ) |
75 | 0 | #define ISSIGNKEY(x) ( ((SignTSVector*)(x))->flag & SIGNKEY ) |
76 | 0 | #define ISALLTRUE(x) ( ((SignTSVector*)(x))->flag & ALLISTRUE ) |
77 | | |
78 | 0 | #define GTHDRSIZE ( VARHDRSZ + sizeof(int32) ) |
79 | 0 | #define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int32)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) ) |
80 | | |
81 | 0 | #define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) ) |
82 | 0 | #define GETSIGLEN(x)( VARSIZE(x) - GTHDRSIZE ) |
83 | 0 | #define GETARR(x) ( (int32*)( (char*)(x)+GTHDRSIZE ) ) |
84 | 0 | #define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int32) ) |
85 | | |
86 | | static int32 sizebitvec(BITVECP sign, int siglen); |
87 | | |
88 | | Datum |
89 | | gtsvectorin(PG_FUNCTION_ARGS) |
90 | 0 | { |
91 | | /* There's no need to support input of gtsvectors */ |
92 | 0 | ereport(ERROR, |
93 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
94 | 0 | errmsg("cannot accept a value of type %s", "gtsvector"))); |
95 | | |
96 | 0 | PG_RETURN_VOID(); /* keep compiler quiet */ |
97 | 0 | } |
98 | | |
99 | | Datum |
100 | | gtsvectorout(PG_FUNCTION_ARGS) |
101 | 0 | { |
102 | 0 | SignTSVector *key = (SignTSVector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); |
103 | 0 | char *outbuf; |
104 | |
|
105 | 0 | if (ISARRKEY(key)) |
106 | 0 | outbuf = psprintf("%d unique words", (int) ARRNELEM(key)); |
107 | 0 | else |
108 | 0 | { |
109 | 0 | if (ISALLTRUE(key)) |
110 | 0 | outbuf = pstrdup("all true bits"); |
111 | 0 | else |
112 | 0 | { |
113 | 0 | int siglen = GETSIGLEN(key); |
114 | 0 | int cnttrue = sizebitvec(GETSIGN(key), siglen); |
115 | |
|
116 | 0 | outbuf = psprintf("%d true bits, %d false bits", |
117 | 0 | cnttrue, (int) SIGLENBIT(siglen) - cnttrue); |
118 | 0 | } |
119 | 0 | } |
120 | |
|
121 | 0 | PG_FREE_IF_COPY(key, 0); |
122 | 0 | PG_RETURN_POINTER(outbuf); |
123 | 0 | } |
124 | | |
125 | | static int |
126 | | compareint(const void *va, const void *vb) |
127 | 0 | { |
128 | 0 | int32 a = *((const int32 *) va); |
129 | 0 | int32 b = *((const int32 *) vb); |
130 | |
|
131 | 0 | return pg_cmp_s32(a, b); |
132 | 0 | } |
133 | | |
134 | | static void |
135 | | makesign(BITVECP sign, SignTSVector *a, int siglen) |
136 | 0 | { |
137 | 0 | int32 k, |
138 | 0 | len = ARRNELEM(a); |
139 | 0 | int32 *ptr = GETARR(a); |
140 | |
|
141 | 0 | MemSet(sign, 0, siglen); |
142 | 0 | for (k = 0; k < len; k++) |
143 | 0 | HASH(sign, ptr[k], siglen); |
144 | 0 | } |
145 | | |
146 | | static SignTSVector * |
147 | | gtsvector_alloc(int flag, int len, BITVECP sign) |
148 | 0 | { |
149 | 0 | int size = CALCGTSIZE(flag, len); |
150 | 0 | SignTSVector *res = palloc(size); |
151 | |
|
152 | 0 | SET_VARSIZE(res, size); |
153 | 0 | res->flag = flag; |
154 | |
|
155 | 0 | if ((flag & (SIGNKEY | ALLISTRUE)) == SIGNKEY && sign) |
156 | 0 | memcpy(GETSIGN(res), sign, len); |
157 | |
|
158 | 0 | return res; |
159 | 0 | } |
160 | | |
161 | | |
162 | | Datum |
163 | | gtsvector_compress(PG_FUNCTION_ARGS) |
164 | 0 | { |
165 | 0 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); |
166 | 0 | int siglen = GET_SIGLEN(); |
167 | 0 | GISTENTRY *retval = entry; |
168 | |
|
169 | 0 | if (entry->leafkey) |
170 | 0 | { /* tsvector */ |
171 | 0 | TSVector val = DatumGetTSVector(entry->key); |
172 | 0 | SignTSVector *res = gtsvector_alloc(ARRKEY, val->size, NULL); |
173 | 0 | int32 len; |
174 | 0 | int32 *arr; |
175 | 0 | WordEntry *ptr = ARRPTR(val); |
176 | 0 | char *words = STRPTR(val); |
177 | |
|
178 | 0 | arr = GETARR(res); |
179 | 0 | len = val->size; |
180 | 0 | while (len--) |
181 | 0 | { |
182 | 0 | pg_crc32 c; |
183 | |
|
184 | 0 | INIT_LEGACY_CRC32(c); |
185 | 0 | COMP_LEGACY_CRC32(c, words + ptr->pos, ptr->len); |
186 | 0 | FIN_LEGACY_CRC32(c); |
187 | |
|
188 | 0 | *arr = *(int32 *) &c; |
189 | 0 | arr++; |
190 | 0 | ptr++; |
191 | 0 | } |
192 | |
|
193 | 0 | qsort(GETARR(res), val->size, sizeof(int), compareint); |
194 | 0 | len = qunique(GETARR(res), val->size, sizeof(int), compareint); |
195 | 0 | if (len != val->size) |
196 | 0 | { |
197 | | /* |
198 | | * there is a collision of hash-function; len is always less than |
199 | | * val->size |
200 | | */ |
201 | 0 | len = CALCGTSIZE(ARRKEY, len); |
202 | 0 | res = (SignTSVector *) repalloc(res, len); |
203 | 0 | SET_VARSIZE(res, len); |
204 | 0 | } |
205 | | |
206 | | /* make signature, if array is too long */ |
207 | 0 | if (VARSIZE(res) > TOAST_INDEX_TARGET) |
208 | 0 | { |
209 | 0 | SignTSVector *ressign = gtsvector_alloc(SIGNKEY, siglen, NULL); |
210 | |
|
211 | 0 | makesign(GETSIGN(ressign), res, siglen); |
212 | 0 | res = ressign; |
213 | 0 | } |
214 | |
|
215 | 0 | retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); |
216 | 0 | gistentryinit(*retval, PointerGetDatum(res), |
217 | 0 | entry->rel, entry->page, |
218 | 0 | entry->offset, false); |
219 | 0 | } |
220 | 0 | else if (ISSIGNKEY(DatumGetPointer(entry->key)) && |
221 | 0 | !ISALLTRUE(DatumGetPointer(entry->key))) |
222 | 0 | { |
223 | 0 | int32 i; |
224 | 0 | SignTSVector *res; |
225 | 0 | BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); |
226 | |
|
227 | 0 | LOOPBYTE(siglen) |
228 | 0 | { |
229 | 0 | if ((sign[i] & 0xff) != 0xff) |
230 | 0 | PG_RETURN_POINTER(retval); |
231 | 0 | } |
232 | | |
233 | 0 | res = gtsvector_alloc(SIGNKEY | ALLISTRUE, siglen, sign); |
234 | 0 | retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); |
235 | 0 | gistentryinit(*retval, PointerGetDatum(res), |
236 | 0 | entry->rel, entry->page, |
237 | 0 | entry->offset, false); |
238 | 0 | } |
239 | 0 | PG_RETURN_POINTER(retval); |
240 | 0 | } |
241 | | |
242 | | Datum |
243 | | gtsvector_decompress(PG_FUNCTION_ARGS) |
244 | 0 | { |
245 | | /* |
246 | | * We need to detoast the stored value, because the other gtsvector |
247 | | * support functions don't cope with toasted values. |
248 | | */ |
249 | 0 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); |
250 | 0 | SignTSVector *key = (SignTSVector *) PG_DETOAST_DATUM(entry->key); |
251 | |
|
252 | 0 | if (key != (SignTSVector *) DatumGetPointer(entry->key)) |
253 | 0 | { |
254 | 0 | GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); |
255 | |
|
256 | 0 | gistentryinit(*retval, PointerGetDatum(key), |
257 | 0 | entry->rel, entry->page, |
258 | 0 | entry->offset, false); |
259 | |
|
260 | 0 | PG_RETURN_POINTER(retval); |
261 | 0 | } |
262 | | |
263 | 0 | PG_RETURN_POINTER(entry); |
264 | 0 | } |
265 | | |
266 | | typedef struct |
267 | | { |
268 | | int32 *arrb; |
269 | | int32 *arre; |
270 | | } CHKVAL; |
271 | | |
272 | | /* |
273 | | * TS_execute callback for matching a tsquery operand to GIST leaf-page data |
274 | | */ |
275 | | static TSTernaryValue |
276 | | checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data) |
277 | 0 | { |
278 | 0 | int32 *StopLow = ((CHKVAL *) checkval)->arrb; |
279 | 0 | int32 *StopHigh = ((CHKVAL *) checkval)->arre; |
280 | 0 | int32 *StopMiddle; |
281 | | |
282 | | /* Loop invariant: StopLow <= val < StopHigh */ |
283 | | |
284 | | /* |
285 | | * we are not able to find a prefix by hash value |
286 | | */ |
287 | 0 | if (val->prefix) |
288 | 0 | return TS_MAYBE; |
289 | | |
290 | 0 | while (StopLow < StopHigh) |
291 | 0 | { |
292 | 0 | StopMiddle = StopLow + (StopHigh - StopLow) / 2; |
293 | 0 | if (*StopMiddle == val->valcrc) |
294 | 0 | return TS_MAYBE; |
295 | 0 | else if (*StopMiddle < val->valcrc) |
296 | 0 | StopLow = StopMiddle + 1; |
297 | 0 | else |
298 | 0 | StopHigh = StopMiddle; |
299 | 0 | } |
300 | | |
301 | 0 | return TS_NO; |
302 | 0 | } |
303 | | |
304 | | /* |
305 | | * TS_execute callback for matching a tsquery operand to GIST non-leaf data |
306 | | */ |
307 | | static TSTernaryValue |
308 | | checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data) |
309 | 0 | { |
310 | 0 | void *key = (SignTSVector *) checkval; |
311 | | |
312 | | /* |
313 | | * we are not able to find a prefix in signature tree |
314 | | */ |
315 | 0 | if (val->prefix) |
316 | 0 | return TS_MAYBE; |
317 | | |
318 | 0 | if (GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key)))) |
319 | 0 | return TS_MAYBE; |
320 | 0 | else |
321 | 0 | return TS_NO; |
322 | 0 | } |
323 | | |
324 | | Datum |
325 | | gtsvector_consistent(PG_FUNCTION_ARGS) |
326 | 0 | { |
327 | 0 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); |
328 | 0 | TSQuery query = PG_GETARG_TSQUERY(1); |
329 | | |
330 | | /* StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); */ |
331 | | /* Oid subtype = PG_GETARG_OID(3); */ |
332 | 0 | bool *recheck = (bool *) PG_GETARG_POINTER(4); |
333 | 0 | SignTSVector *key = (SignTSVector *) DatumGetPointer(entry->key); |
334 | | |
335 | | /* All cases served by this function are inexact */ |
336 | 0 | *recheck = true; |
337 | |
|
338 | 0 | if (!query->size) |
339 | 0 | PG_RETURN_BOOL(false); |
340 | | |
341 | 0 | if (ISSIGNKEY(key)) |
342 | 0 | { |
343 | 0 | if (ISALLTRUE(key)) |
344 | 0 | PG_RETURN_BOOL(true); |
345 | | |
346 | 0 | PG_RETURN_BOOL(TS_execute(GETQUERY(query), |
347 | 0 | key, |
348 | 0 | TS_EXEC_PHRASE_NO_POS, |
349 | 0 | checkcondition_bit)); |
350 | 0 | } |
351 | 0 | else |
352 | 0 | { /* only leaf pages */ |
353 | 0 | CHKVAL chkval; |
354 | |
|
355 | 0 | chkval.arrb = GETARR(key); |
356 | 0 | chkval.arre = chkval.arrb + ARRNELEM(key); |
357 | 0 | PG_RETURN_BOOL(TS_execute(GETQUERY(query), |
358 | 0 | &chkval, |
359 | 0 | TS_EXEC_PHRASE_NO_POS, |
360 | 0 | checkcondition_arr)); |
361 | 0 | } |
362 | 0 | } |
363 | | |
364 | | static int32 |
365 | | unionkey(BITVECP sbase, SignTSVector *add, int siglen) |
366 | 0 | { |
367 | 0 | int32 i; |
368 | |
|
369 | 0 | if (ISSIGNKEY(add)) |
370 | 0 | { |
371 | 0 | BITVECP sadd = GETSIGN(add); |
372 | |
|
373 | 0 | if (ISALLTRUE(add)) |
374 | 0 | return 1; |
375 | | |
376 | 0 | Assert(GETSIGLEN(add) == siglen); |
377 | |
|
378 | 0 | LOOPBYTE(siglen) |
379 | 0 | sbase[i] |= sadd[i]; |
380 | 0 | } |
381 | 0 | else |
382 | 0 | { |
383 | 0 | int32 *ptr = GETARR(add); |
384 | |
|
385 | 0 | for (i = 0; i < ARRNELEM(add); i++) |
386 | 0 | HASH(sbase, ptr[i], siglen); |
387 | 0 | } |
388 | 0 | return 0; |
389 | 0 | } |
390 | | |
391 | | |
392 | | Datum |
393 | | gtsvector_union(PG_FUNCTION_ARGS) |
394 | 0 | { |
395 | 0 | GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); |
396 | 0 | int *size = (int *) PG_GETARG_POINTER(1); |
397 | 0 | int siglen = GET_SIGLEN(); |
398 | 0 | SignTSVector *result = gtsvector_alloc(SIGNKEY, siglen, NULL); |
399 | 0 | BITVECP base = GETSIGN(result); |
400 | 0 | int32 i; |
401 | |
|
402 | 0 | memset(base, 0, siglen); |
403 | |
|
404 | 0 | for (i = 0; i < entryvec->n; i++) |
405 | 0 | { |
406 | 0 | if (unionkey(base, GETENTRY(entryvec, i), siglen)) |
407 | 0 | { |
408 | 0 | result->flag |= ALLISTRUE; |
409 | 0 | SET_VARSIZE(result, CALCGTSIZE(result->flag, siglen)); |
410 | 0 | break; |
411 | 0 | } |
412 | 0 | } |
413 | |
|
414 | 0 | *size = VARSIZE(result); |
415 | |
|
416 | 0 | PG_RETURN_POINTER(result); |
417 | 0 | } |
418 | | |
419 | | Datum |
420 | | gtsvector_same(PG_FUNCTION_ARGS) |
421 | 0 | { |
422 | 0 | SignTSVector *a = (SignTSVector *) PG_GETARG_POINTER(0); |
423 | 0 | SignTSVector *b = (SignTSVector *) PG_GETARG_POINTER(1); |
424 | 0 | bool *result = (bool *) PG_GETARG_POINTER(2); |
425 | 0 | int siglen = GET_SIGLEN(); |
426 | |
|
427 | 0 | if (ISSIGNKEY(a)) |
428 | 0 | { /* then b also ISSIGNKEY */ |
429 | 0 | if (ISALLTRUE(a) && ISALLTRUE(b)) |
430 | 0 | *result = true; |
431 | 0 | else if (ISALLTRUE(a)) |
432 | 0 | *result = false; |
433 | 0 | else if (ISALLTRUE(b)) |
434 | 0 | *result = false; |
435 | 0 | else |
436 | 0 | { |
437 | 0 | int32 i; |
438 | 0 | BITVECP sa = GETSIGN(a), |
439 | 0 | sb = GETSIGN(b); |
440 | |
|
441 | 0 | Assert(GETSIGLEN(a) == siglen && GETSIGLEN(b) == siglen); |
442 | |
|
443 | 0 | *result = true; |
444 | 0 | LOOPBYTE(siglen) |
445 | 0 | { |
446 | 0 | if (sa[i] != sb[i]) |
447 | 0 | { |
448 | 0 | *result = false; |
449 | 0 | break; |
450 | 0 | } |
451 | 0 | } |
452 | 0 | } |
453 | 0 | } |
454 | 0 | else |
455 | 0 | { /* a and b ISARRKEY */ |
456 | 0 | int32 lena = ARRNELEM(a), |
457 | 0 | lenb = ARRNELEM(b); |
458 | |
|
459 | 0 | if (lena != lenb) |
460 | 0 | *result = false; |
461 | 0 | else |
462 | 0 | { |
463 | 0 | int32 *ptra = GETARR(a), |
464 | 0 | *ptrb = GETARR(b); |
465 | 0 | int32 i; |
466 | |
|
467 | 0 | *result = true; |
468 | 0 | for (i = 0; i < lena; i++) |
469 | 0 | if (ptra[i] != ptrb[i]) |
470 | 0 | { |
471 | 0 | *result = false; |
472 | 0 | break; |
473 | 0 | } |
474 | 0 | } |
475 | 0 | } |
476 | |
|
477 | 0 | PG_RETURN_POINTER(result); |
478 | 0 | } |
479 | | |
480 | | static int32 |
481 | | sizebitvec(BITVECP sign, int siglen) |
482 | 0 | { |
483 | 0 | return pg_popcount(sign, siglen); |
484 | 0 | } |
485 | | |
486 | | static int |
487 | | hemdistsign(BITVECP a, BITVECP b, int siglen) |
488 | 0 | { |
489 | 0 | int i, |
490 | 0 | diff, |
491 | 0 | dist = 0; |
492 | |
|
493 | 0 | LOOPBYTE(siglen) |
494 | 0 | { |
495 | 0 | diff = (unsigned char) (a[i] ^ b[i]); |
496 | | /* Using the popcount functions here isn't likely to win */ |
497 | 0 | dist += pg_number_of_ones[diff]; |
498 | 0 | } |
499 | 0 | return dist; |
500 | 0 | } |
501 | | |
502 | | static int |
503 | | hemdist(SignTSVector *a, SignTSVector *b) |
504 | 0 | { |
505 | 0 | int siglena = GETSIGLEN(a); |
506 | 0 | int siglenb = GETSIGLEN(b); |
507 | |
|
508 | 0 | if (ISALLTRUE(a)) |
509 | 0 | { |
510 | 0 | if (ISALLTRUE(b)) |
511 | 0 | return 0; |
512 | 0 | else |
513 | 0 | return SIGLENBIT(siglenb) - sizebitvec(GETSIGN(b), siglenb); |
514 | 0 | } |
515 | 0 | else if (ISALLTRUE(b)) |
516 | 0 | return SIGLENBIT(siglena) - sizebitvec(GETSIGN(a), siglena); |
517 | | |
518 | 0 | Assert(siglena == siglenb); |
519 | |
|
520 | 0 | return hemdistsign(GETSIGN(a), GETSIGN(b), siglena); |
521 | 0 | } |
522 | | |
523 | | Datum |
524 | | gtsvector_penalty(PG_FUNCTION_ARGS) |
525 | 0 | { |
526 | 0 | GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */ |
527 | 0 | GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); |
528 | 0 | float *penalty = (float *) PG_GETARG_POINTER(2); |
529 | 0 | int siglen = GET_SIGLEN(); |
530 | 0 | SignTSVector *origval = (SignTSVector *) DatumGetPointer(origentry->key); |
531 | 0 | SignTSVector *newval = (SignTSVector *) DatumGetPointer(newentry->key); |
532 | 0 | BITVECP orig = GETSIGN(origval); |
533 | |
|
534 | 0 | *penalty = 0.0; |
535 | |
|
536 | 0 | if (ISARRKEY(newval)) |
537 | 0 | { |
538 | 0 | BITVECP sign = palloc(siglen); |
539 | |
|
540 | 0 | makesign(sign, newval, siglen); |
541 | |
|
542 | 0 | if (ISALLTRUE(origval)) |
543 | 0 | { |
544 | 0 | int siglenbit = SIGLENBIT(siglen); |
545 | |
|
546 | 0 | *penalty = |
547 | 0 | (float) (siglenbit - sizebitvec(sign, siglen)) / |
548 | 0 | (float) (siglenbit + 1); |
549 | 0 | } |
550 | 0 | else |
551 | 0 | *penalty = hemdistsign(sign, orig, siglen); |
552 | |
|
553 | 0 | pfree(sign); |
554 | 0 | } |
555 | 0 | else |
556 | 0 | *penalty = hemdist(origval, newval); |
557 | 0 | PG_RETURN_POINTER(penalty); |
558 | 0 | } |
559 | | |
560 | | typedef struct |
561 | | { |
562 | | bool allistrue; |
563 | | BITVECP sign; |
564 | | } CACHESIGN; |
565 | | |
566 | | static void |
567 | | fillcache(CACHESIGN *item, SignTSVector *key, int siglen) |
568 | 0 | { |
569 | 0 | item->allistrue = false; |
570 | 0 | if (ISARRKEY(key)) |
571 | 0 | makesign(item->sign, key, siglen); |
572 | 0 | else if (ISALLTRUE(key)) |
573 | 0 | item->allistrue = true; |
574 | 0 | else |
575 | 0 | memcpy(item->sign, GETSIGN(key), siglen); |
576 | 0 | } |
577 | | |
578 | 0 | #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) |
579 | | typedef struct |
580 | | { |
581 | | OffsetNumber pos; |
582 | | int32 cost; |
583 | | } SPLITCOST; |
584 | | |
585 | | static int |
586 | | comparecost(const void *va, const void *vb) |
587 | 0 | { |
588 | 0 | const SPLITCOST *a = (const SPLITCOST *) va; |
589 | 0 | const SPLITCOST *b = (const SPLITCOST *) vb; |
590 | |
|
591 | 0 | return pg_cmp_s32(a->cost, b->cost); |
592 | 0 | } |
593 | | |
594 | | |
595 | | static int |
596 | | hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen) |
597 | 0 | { |
598 | 0 | if (a->allistrue) |
599 | 0 | { |
600 | 0 | if (b->allistrue) |
601 | 0 | return 0; |
602 | 0 | else |
603 | 0 | return SIGLENBIT(siglen) - sizebitvec(b->sign, siglen); |
604 | 0 | } |
605 | 0 | else if (b->allistrue) |
606 | 0 | return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen); |
607 | | |
608 | 0 | return hemdistsign(a->sign, b->sign, siglen); |
609 | 0 | } |
610 | | |
611 | | Datum |
612 | | gtsvector_picksplit(PG_FUNCTION_ARGS) |
613 | 0 | { |
614 | 0 | GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); |
615 | 0 | GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); |
616 | 0 | int siglen = GET_SIGLEN(); |
617 | 0 | OffsetNumber k, |
618 | 0 | j; |
619 | 0 | SignTSVector *datum_l, |
620 | 0 | *datum_r; |
621 | 0 | BITVECP union_l, |
622 | 0 | union_r; |
623 | 0 | int32 size_alpha, |
624 | 0 | size_beta; |
625 | 0 | int32 size_waste, |
626 | 0 | waste = -1; |
627 | 0 | int32 nbytes; |
628 | 0 | OffsetNumber seed_1 = 0, |
629 | 0 | seed_2 = 0; |
630 | 0 | OffsetNumber *left, |
631 | 0 | *right; |
632 | 0 | OffsetNumber maxoff; |
633 | 0 | BITVECP ptr; |
634 | 0 | int i; |
635 | 0 | CACHESIGN *cache; |
636 | 0 | char *cache_sign; |
637 | 0 | SPLITCOST *costvector; |
638 | |
|
639 | 0 | maxoff = entryvec->n - 2; |
640 | 0 | nbytes = (maxoff + 2) * sizeof(OffsetNumber); |
641 | 0 | v->spl_left = (OffsetNumber *) palloc(nbytes); |
642 | 0 | v->spl_right = (OffsetNumber *) palloc(nbytes); |
643 | |
|
644 | 0 | cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2)); |
645 | 0 | cache_sign = palloc(siglen * (maxoff + 2)); |
646 | |
|
647 | 0 | for (j = 0; j < maxoff + 2; j++) |
648 | 0 | cache[j].sign = &cache_sign[siglen * j]; |
649 | |
|
650 | 0 | fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber), |
651 | 0 | siglen); |
652 | |
|
653 | 0 | for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) |
654 | 0 | { |
655 | 0 | for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) |
656 | 0 | { |
657 | 0 | if (k == FirstOffsetNumber) |
658 | 0 | fillcache(&cache[j], GETENTRY(entryvec, j), siglen); |
659 | |
|
660 | 0 | size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen); |
661 | 0 | if (size_waste > waste) |
662 | 0 | { |
663 | 0 | waste = size_waste; |
664 | 0 | seed_1 = k; |
665 | 0 | seed_2 = j; |
666 | 0 | } |
667 | 0 | } |
668 | 0 | } |
669 | |
|
670 | 0 | left = v->spl_left; |
671 | 0 | v->spl_nleft = 0; |
672 | 0 | right = v->spl_right; |
673 | 0 | v->spl_nright = 0; |
674 | |
|
675 | 0 | if (seed_1 == 0 || seed_2 == 0) |
676 | 0 | { |
677 | 0 | seed_1 = 1; |
678 | 0 | seed_2 = 2; |
679 | 0 | } |
680 | | |
681 | | /* form initial .. */ |
682 | 0 | datum_l = gtsvector_alloc(SIGNKEY | (cache[seed_1].allistrue ? ALLISTRUE : 0), |
683 | 0 | siglen, cache[seed_1].sign); |
684 | 0 | datum_r = gtsvector_alloc(SIGNKEY | (cache[seed_2].allistrue ? ALLISTRUE : 0), |
685 | 0 | siglen, cache[seed_2].sign); |
686 | 0 | union_l = GETSIGN(datum_l); |
687 | 0 | union_r = GETSIGN(datum_r); |
688 | 0 | maxoff = OffsetNumberNext(maxoff); |
689 | 0 | fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff), siglen); |
690 | | /* sort before ... */ |
691 | 0 | costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); |
692 | 0 | for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) |
693 | 0 | { |
694 | 0 | costvector[j - 1].pos = j; |
695 | 0 | size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen); |
696 | 0 | size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen); |
697 | 0 | costvector[j - 1].cost = abs(size_alpha - size_beta); |
698 | 0 | } |
699 | 0 | qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost); |
700 | |
|
701 | 0 | for (k = 0; k < maxoff; k++) |
702 | 0 | { |
703 | 0 | j = costvector[k].pos; |
704 | 0 | if (j == seed_1) |
705 | 0 | { |
706 | 0 | *left++ = j; |
707 | 0 | v->spl_nleft++; |
708 | 0 | continue; |
709 | 0 | } |
710 | 0 | else if (j == seed_2) |
711 | 0 | { |
712 | 0 | *right++ = j; |
713 | 0 | v->spl_nright++; |
714 | 0 | continue; |
715 | 0 | } |
716 | | |
717 | 0 | if (ISALLTRUE(datum_l) || cache[j].allistrue) |
718 | 0 | { |
719 | 0 | if (ISALLTRUE(datum_l) && cache[j].allistrue) |
720 | 0 | size_alpha = 0; |
721 | 0 | else |
722 | 0 | size_alpha = SIGLENBIT(siglen) - |
723 | 0 | sizebitvec((cache[j].allistrue) ? |
724 | 0 | GETSIGN(datum_l) : |
725 | 0 | cache[j].sign, |
726 | 0 | siglen); |
727 | 0 | } |
728 | 0 | else |
729 | 0 | size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen); |
730 | |
|
731 | 0 | if (ISALLTRUE(datum_r) || cache[j].allistrue) |
732 | 0 | { |
733 | 0 | if (ISALLTRUE(datum_r) && cache[j].allistrue) |
734 | 0 | size_beta = 0; |
735 | 0 | else |
736 | 0 | size_beta = SIGLENBIT(siglen) - |
737 | 0 | sizebitvec((cache[j].allistrue) ? |
738 | 0 | GETSIGN(datum_r) : |
739 | 0 | cache[j].sign, |
740 | 0 | siglen); |
741 | 0 | } |
742 | 0 | else |
743 | 0 | size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen); |
744 | |
|
745 | 0 | if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) |
746 | 0 | { |
747 | 0 | if (ISALLTRUE(datum_l) || cache[j].allistrue) |
748 | 0 | { |
749 | 0 | if (!ISALLTRUE(datum_l)) |
750 | 0 | memset(GETSIGN(datum_l), 0xff, siglen); |
751 | 0 | } |
752 | 0 | else |
753 | 0 | { |
754 | 0 | ptr = cache[j].sign; |
755 | 0 | LOOPBYTE(siglen) |
756 | 0 | union_l[i] |= ptr[i]; |
757 | 0 | } |
758 | 0 | *left++ = j; |
759 | 0 | v->spl_nleft++; |
760 | 0 | } |
761 | 0 | else |
762 | 0 | { |
763 | 0 | if (ISALLTRUE(datum_r) || cache[j].allistrue) |
764 | 0 | { |
765 | 0 | if (!ISALLTRUE(datum_r)) |
766 | 0 | memset(GETSIGN(datum_r), 0xff, siglen); |
767 | 0 | } |
768 | 0 | else |
769 | 0 | { |
770 | 0 | ptr = cache[j].sign; |
771 | 0 | LOOPBYTE(siglen) |
772 | 0 | union_r[i] |= ptr[i]; |
773 | 0 | } |
774 | 0 | *right++ = j; |
775 | 0 | v->spl_nright++; |
776 | 0 | } |
777 | 0 | } |
778 | |
|
779 | 0 | *right = *left = FirstOffsetNumber; |
780 | 0 | v->spl_ldatum = PointerGetDatum(datum_l); |
781 | 0 | v->spl_rdatum = PointerGetDatum(datum_r); |
782 | |
|
783 | 0 | PG_RETURN_POINTER(v); |
784 | 0 | } |
785 | | |
786 | | /* |
787 | | * Formerly, gtsvector_consistent was declared in pg_proc.h with arguments |
788 | | * that did not match the documented conventions for GiST support functions. |
789 | | * We fixed that, but we still need a pg_proc entry with the old signature |
790 | | * to support reloading pre-9.6 contrib/tsearch2 opclass declarations. |
791 | | * This compatibility function should go away eventually. |
792 | | */ |
793 | | Datum |
794 | | gtsvector_consistent_oldsig(PG_FUNCTION_ARGS) |
795 | 0 | { |
796 | 0 | return gtsvector_consistent(fcinfo); |
797 | 0 | } |
798 | | |
799 | | Datum |
800 | | gtsvector_options(PG_FUNCTION_ARGS) |
801 | 0 | { |
802 | 0 | local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0); |
803 | |
|
804 | 0 | init_local_reloptions(relopts, sizeof(GistTsVectorOptions)); |
805 | 0 | add_local_int_reloption(relopts, "siglen", "signature length", |
806 | 0 | SIGLEN_DEFAULT, 1, SIGLEN_MAX, |
807 | 0 | offsetof(GistTsVectorOptions, siglen)); |
808 | |
|
809 | 0 | PG_RETURN_VOID(); |
810 | 0 | } |