/src/postgres/src/backend/utils/adt/tsginidx.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * tsginidx.c |
4 | | * GIN support functions for tsvector_ops |
5 | | * |
6 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
7 | | * |
8 | | * |
9 | | * IDENTIFICATION |
10 | | * src/backend/utils/adt/tsginidx.c |
11 | | * |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | #include "postgres.h" |
15 | | |
16 | | #include "access/gin.h" |
17 | | #include "tsearch/ts_type.h" |
18 | | #include "tsearch/ts_utils.h" |
19 | | #include "utils/builtins.h" |
20 | | #include "varatt.h" |
21 | | |
22 | | |
23 | | Datum |
24 | | gin_cmp_tslexeme(PG_FUNCTION_ARGS) |
25 | 0 | { |
26 | 0 | text *a = PG_GETARG_TEXT_PP(0); |
27 | 0 | text *b = PG_GETARG_TEXT_PP(1); |
28 | 0 | int cmp; |
29 | |
|
30 | 0 | cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), |
31 | 0 | VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), |
32 | 0 | false); |
33 | |
|
34 | 0 | PG_FREE_IF_COPY(a, 0); |
35 | 0 | PG_FREE_IF_COPY(b, 1); |
36 | 0 | PG_RETURN_INT32(cmp); |
37 | 0 | } |
38 | | |
39 | | Datum |
40 | | gin_cmp_prefix(PG_FUNCTION_ARGS) |
41 | 0 | { |
42 | 0 | text *a = PG_GETARG_TEXT_PP(0); |
43 | 0 | text *b = PG_GETARG_TEXT_PP(1); |
44 | |
|
45 | | #ifdef NOT_USED |
46 | | StrategyNumber strategy = PG_GETARG_UINT16(2); |
47 | | Pointer extra_data = PG_GETARG_POINTER(3); |
48 | | #endif |
49 | 0 | int cmp; |
50 | |
|
51 | 0 | cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), |
52 | 0 | VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), |
53 | 0 | true); |
54 | |
|
55 | 0 | if (cmp < 0) |
56 | 0 | cmp = 1; /* prevent continue scan */ |
57 | |
|
58 | 0 | PG_FREE_IF_COPY(a, 0); |
59 | 0 | PG_FREE_IF_COPY(b, 1); |
60 | 0 | PG_RETURN_INT32(cmp); |
61 | 0 | } |
62 | | |
63 | | Datum |
64 | | gin_extract_tsvector(PG_FUNCTION_ARGS) |
65 | 0 | { |
66 | 0 | TSVector vector = PG_GETARG_TSVECTOR(0); |
67 | 0 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
68 | 0 | Datum *entries = NULL; |
69 | |
|
70 | 0 | *nentries = vector->size; |
71 | 0 | if (vector->size > 0) |
72 | 0 | { |
73 | 0 | int i; |
74 | 0 | WordEntry *we = ARRPTR(vector); |
75 | |
|
76 | 0 | entries = (Datum *) palloc(sizeof(Datum) * vector->size); |
77 | |
|
78 | 0 | for (i = 0; i < vector->size; i++) |
79 | 0 | { |
80 | 0 | text *txt; |
81 | |
|
82 | 0 | txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len); |
83 | 0 | entries[i] = PointerGetDatum(txt); |
84 | |
|
85 | 0 | we++; |
86 | 0 | } |
87 | 0 | } |
88 | |
|
89 | 0 | PG_FREE_IF_COPY(vector, 0); |
90 | 0 | PG_RETURN_POINTER(entries); |
91 | 0 | } |
92 | | |
93 | | Datum |
94 | | gin_extract_tsquery(PG_FUNCTION_ARGS) |
95 | 0 | { |
96 | 0 | TSQuery query = PG_GETARG_TSQUERY(0); |
97 | 0 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
98 | | |
99 | | /* StrategyNumber strategy = PG_GETARG_UINT16(2); */ |
100 | 0 | bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3); |
101 | 0 | Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); |
102 | | |
103 | | /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */ |
104 | 0 | int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); |
105 | 0 | Datum *entries = NULL; |
106 | |
|
107 | 0 | *nentries = 0; |
108 | |
|
109 | 0 | if (query->size > 0) |
110 | 0 | { |
111 | 0 | QueryItem *item = GETQUERY(query); |
112 | 0 | int32 i, |
113 | 0 | j; |
114 | 0 | bool *partialmatch; |
115 | 0 | int *map_item_operand; |
116 | | |
117 | | /* |
118 | | * If the query doesn't have any required positive matches (for |
119 | | * instance, it's something like '! foo'), we have to do a full index |
120 | | * scan. |
121 | | */ |
122 | 0 | if (tsquery_requires_match(item)) |
123 | 0 | *searchMode = GIN_SEARCH_MODE_DEFAULT; |
124 | 0 | else |
125 | 0 | *searchMode = GIN_SEARCH_MODE_ALL; |
126 | | |
127 | | /* count number of VAL items */ |
128 | 0 | j = 0; |
129 | 0 | for (i = 0; i < query->size; i++) |
130 | 0 | { |
131 | 0 | if (item[i].type == QI_VAL) |
132 | 0 | j++; |
133 | 0 | } |
134 | 0 | *nentries = j; |
135 | |
|
136 | 0 | entries = (Datum *) palloc(sizeof(Datum) * j); |
137 | 0 | partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j); |
138 | | |
139 | | /* |
140 | | * Make map to convert item's number to corresponding operand's (the |
141 | | * same, entry's) number. Entry's number is used in check array in |
142 | | * consistent method. We use the same map for each entry. |
143 | | */ |
144 | 0 | *extra_data = (Pointer *) palloc(sizeof(Pointer) * j); |
145 | 0 | map_item_operand = (int *) palloc0(sizeof(int) * query->size); |
146 | | |
147 | | /* Now rescan the VAL items and fill in the arrays */ |
148 | 0 | j = 0; |
149 | 0 | for (i = 0; i < query->size; i++) |
150 | 0 | { |
151 | 0 | if (item[i].type == QI_VAL) |
152 | 0 | { |
153 | 0 | QueryOperand *val = &item[i].qoperand; |
154 | 0 | text *txt; |
155 | |
|
156 | 0 | txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance, |
157 | 0 | val->length); |
158 | 0 | entries[j] = PointerGetDatum(txt); |
159 | 0 | partialmatch[j] = val->prefix; |
160 | 0 | (*extra_data)[j] = (Pointer) map_item_operand; |
161 | 0 | map_item_operand[i] = j; |
162 | 0 | j++; |
163 | 0 | } |
164 | 0 | } |
165 | 0 | } |
166 | |
|
167 | 0 | PG_FREE_IF_COPY(query, 0); |
168 | |
|
169 | 0 | PG_RETURN_POINTER(entries); |
170 | 0 | } |
171 | | |
172 | | typedef struct |
173 | | { |
174 | | QueryItem *first_item; |
175 | | GinTernaryValue *check; |
176 | | int *map_item_operand; |
177 | | } GinChkVal; |
178 | | |
179 | | /* |
180 | | * TS_execute callback for matching a tsquery operand to GIN index data |
181 | | */ |
182 | | static TSTernaryValue |
183 | | checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) |
184 | 0 | { |
185 | 0 | GinChkVal *gcv = (GinChkVal *) checkval; |
186 | 0 | int j; |
187 | 0 | GinTernaryValue result; |
188 | | |
189 | | /* convert item's number to corresponding entry's (operand's) number */ |
190 | 0 | j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; |
191 | | |
192 | | /* determine presence of current entry in indexed value */ |
193 | 0 | result = gcv->check[j]; |
194 | | |
195 | | /* |
196 | | * If any val requiring a weight is used or caller needs position |
197 | | * information then we must recheck, so replace TRUE with MAYBE. |
198 | | */ |
199 | 0 | if (result == GIN_TRUE) |
200 | 0 | { |
201 | 0 | if (val->weight != 0 || data != NULL) |
202 | 0 | result = GIN_MAYBE; |
203 | 0 | } |
204 | | |
205 | | /* |
206 | | * We rely on GinTernaryValue and TSTernaryValue using equivalent value |
207 | | * assignments. We could use a switch statement to map the values if that |
208 | | * ever stops being true, but it seems unlikely to happen. |
209 | | */ |
210 | 0 | return (TSTernaryValue) result; |
211 | 0 | } |
212 | | |
213 | | Datum |
214 | | gin_tsquery_consistent(PG_FUNCTION_ARGS) |
215 | 0 | { |
216 | 0 | bool *check = (bool *) PG_GETARG_POINTER(0); |
217 | | |
218 | | /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ |
219 | 0 | TSQuery query = PG_GETARG_TSQUERY(2); |
220 | | |
221 | | /* int32 nkeys = PG_GETARG_INT32(3); */ |
222 | 0 | Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); |
223 | 0 | bool *recheck = (bool *) PG_GETARG_POINTER(5); |
224 | 0 | bool res = false; |
225 | | |
226 | | /* Initially assume query doesn't require recheck */ |
227 | 0 | *recheck = false; |
228 | |
|
229 | 0 | if (query->size > 0) |
230 | 0 | { |
231 | 0 | GinChkVal gcv; |
232 | | |
233 | | /* |
234 | | * check-parameter array has one entry for each value (operand) in the |
235 | | * query. |
236 | | */ |
237 | 0 | gcv.first_item = GETQUERY(query); |
238 | 0 | gcv.check = (GinTernaryValue *) check; |
239 | 0 | gcv.map_item_operand = (int *) (extra_data[0]); |
240 | |
|
241 | 0 | switch (TS_execute_ternary(GETQUERY(query), |
242 | 0 | &gcv, |
243 | 0 | TS_EXEC_PHRASE_NO_POS, |
244 | 0 | checkcondition_gin)) |
245 | 0 | { |
246 | 0 | case TS_NO: |
247 | 0 | res = false; |
248 | 0 | break; |
249 | 0 | case TS_YES: |
250 | 0 | res = true; |
251 | 0 | break; |
252 | 0 | case TS_MAYBE: |
253 | 0 | res = true; |
254 | 0 | *recheck = true; |
255 | 0 | break; |
256 | 0 | } |
257 | 0 | } |
258 | | |
259 | 0 | PG_RETURN_BOOL(res); |
260 | 0 | } |
261 | | |
262 | | Datum |
263 | | gin_tsquery_triconsistent(PG_FUNCTION_ARGS) |
264 | 0 | { |
265 | 0 | GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); |
266 | | |
267 | | /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ |
268 | 0 | TSQuery query = PG_GETARG_TSQUERY(2); |
269 | | |
270 | | /* int32 nkeys = PG_GETARG_INT32(3); */ |
271 | 0 | Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); |
272 | 0 | GinTernaryValue res = GIN_FALSE; |
273 | |
|
274 | 0 | if (query->size > 0) |
275 | 0 | { |
276 | 0 | GinChkVal gcv; |
277 | | |
278 | | /* |
279 | | * check-parameter array has one entry for each value (operand) in the |
280 | | * query. |
281 | | */ |
282 | 0 | gcv.first_item = GETQUERY(query); |
283 | 0 | gcv.check = check; |
284 | 0 | gcv.map_item_operand = (int *) (extra_data[0]); |
285 | |
|
286 | 0 | res = TS_execute_ternary(GETQUERY(query), |
287 | 0 | &gcv, |
288 | 0 | TS_EXEC_PHRASE_NO_POS, |
289 | 0 | checkcondition_gin); |
290 | 0 | } |
291 | |
|
292 | 0 | PG_RETURN_GIN_TERNARY_VALUE(res); |
293 | 0 | } |
294 | | |
295 | | /* |
296 | | * Formerly, gin_extract_tsvector had only two arguments. Now it has three, |
297 | | * but we still need a pg_proc entry with two args to support reloading |
298 | | * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility |
299 | | * function should go away eventually. (Note: you might say "hey, but the |
300 | | * code above is only *using* two args, so let's just declare it that way". |
301 | | * If you try that you'll find the opr_sanity regression test complains.) |
302 | | */ |
303 | | Datum |
304 | | gin_extract_tsvector_2args(PG_FUNCTION_ARGS) |
305 | 0 | { |
306 | 0 | if (PG_NARGS() < 3) /* should not happen */ |
307 | 0 | elog(ERROR, "gin_extract_tsvector requires three arguments"); |
308 | 0 | return gin_extract_tsvector(fcinfo); |
309 | 0 | } |
310 | | |
311 | | /* |
312 | | * Likewise, we need a stub version of gin_extract_tsquery declared with |
313 | | * only five arguments. |
314 | | */ |
315 | | Datum |
316 | | gin_extract_tsquery_5args(PG_FUNCTION_ARGS) |
317 | 0 | { |
318 | 0 | if (PG_NARGS() < 7) /* should not happen */ |
319 | 0 | elog(ERROR, "gin_extract_tsquery requires seven arguments"); |
320 | 0 | return gin_extract_tsquery(fcinfo); |
321 | 0 | } |
322 | | |
323 | | /* |
324 | | * Likewise, we need a stub version of gin_tsquery_consistent declared with |
325 | | * only six arguments. |
326 | | */ |
327 | | Datum |
328 | | gin_tsquery_consistent_6args(PG_FUNCTION_ARGS) |
329 | 0 | { |
330 | 0 | if (PG_NARGS() < 8) /* should not happen */ |
331 | 0 | elog(ERROR, "gin_tsquery_consistent requires eight arguments"); |
332 | 0 | return gin_tsquery_consistent(fcinfo); |
333 | 0 | } |
334 | | |
335 | | /* |
336 | | * Likewise, a stub version of gin_extract_tsquery declared with argument |
337 | | * types that are no longer considered appropriate. |
338 | | */ |
339 | | Datum |
340 | | gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS) |
341 | 0 | { |
342 | 0 | return gin_extract_tsquery(fcinfo); |
343 | 0 | } |
344 | | |
345 | | /* |
346 | | * Likewise, a stub version of gin_tsquery_consistent declared with argument |
347 | | * types that are no longer considered appropriate. |
348 | | */ |
349 | | Datum |
350 | | gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS) |
351 | 0 | { |
352 | 0 | return gin_tsquery_consistent(fcinfo); |
353 | 0 | } |