/src/postgres/src/backend/tsearch/to_tsany.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * to_tsany.c |
4 | | * to_ts* function definitions |
5 | | * |
6 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
7 | | * |
8 | | * |
9 | | * IDENTIFICATION |
10 | | * src/backend/tsearch/to_tsany.c |
11 | | * |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | #include "postgres.h" |
15 | | |
16 | | #include "tsearch/ts_cache.h" |
17 | | #include "tsearch/ts_utils.h" |
18 | | #include "utils/builtins.h" |
19 | | #include "utils/jsonfuncs.h" |
20 | | |
21 | | |
22 | | /* |
23 | | * Opaque data structure, which is passed by parse_tsquery() to pushval_morph(). |
24 | | */ |
25 | | typedef struct MorphOpaque |
26 | | { |
27 | | Oid cfg_id; |
28 | | |
29 | | /* |
30 | | * Single tsquery morph could be parsed into multiple words. When these |
31 | | * words reside in adjacent positions, they are connected using this |
32 | | * operator. Usually, that is OP_PHRASE, which requires word positions of |
33 | | * a complex morph to exactly match the tsvector. |
34 | | */ |
35 | | int qoperator; |
36 | | } MorphOpaque; |
37 | | |
38 | | typedef struct TSVectorBuildState |
39 | | { |
40 | | ParsedText *prs; |
41 | | Oid cfgId; |
42 | | } TSVectorBuildState; |
43 | | |
44 | | static void add_to_tsvector(void *_state, char *elem_value, int elem_len); |
45 | | |
46 | | |
47 | | Datum |
48 | | get_current_ts_config(PG_FUNCTION_ARGS) |
49 | 0 | { |
50 | 0 | PG_RETURN_OID(getTSCurrentConfig(true)); |
51 | 0 | } |
52 | | |
53 | | /* |
54 | | * to_tsvector |
55 | | */ |
56 | | static int |
57 | | compareWORD(const void *a, const void *b) |
58 | 0 | { |
59 | 0 | int res; |
60 | |
|
61 | 0 | res = tsCompareString(((const ParsedWord *) a)->word, ((const ParsedWord *) a)->len, |
62 | 0 | ((const ParsedWord *) b)->word, ((const ParsedWord *) b)->len, |
63 | 0 | false); |
64 | |
|
65 | 0 | if (res == 0) |
66 | 0 | { |
67 | 0 | if (((const ParsedWord *) a)->pos.pos == ((const ParsedWord *) b)->pos.pos) |
68 | 0 | return 0; |
69 | | |
70 | 0 | res = (((const ParsedWord *) a)->pos.pos > ((const ParsedWord *) b)->pos.pos) ? 1 : -1; |
71 | 0 | } |
72 | | |
73 | 0 | return res; |
74 | 0 | } |
75 | | |
76 | | static int |
77 | | uniqueWORD(ParsedWord *a, int32 l) |
78 | 0 | { |
79 | 0 | ParsedWord *ptr, |
80 | 0 | *res; |
81 | 0 | int tmppos; |
82 | |
|
83 | 0 | if (l == 1) |
84 | 0 | { |
85 | 0 | tmppos = LIMITPOS(a->pos.pos); |
86 | 0 | a->alen = 2; |
87 | 0 | a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen); |
88 | 0 | a->pos.apos[0] = 1; |
89 | 0 | a->pos.apos[1] = tmppos; |
90 | 0 | return l; |
91 | 0 | } |
92 | | |
93 | 0 | res = a; |
94 | 0 | ptr = a + 1; |
95 | | |
96 | | /* |
97 | | * Sort words with its positions |
98 | | */ |
99 | 0 | qsort(a, l, sizeof(ParsedWord), compareWORD); |
100 | | |
101 | | /* |
102 | | * Initialize first word and its first position |
103 | | */ |
104 | 0 | tmppos = LIMITPOS(a->pos.pos); |
105 | 0 | a->alen = 2; |
106 | 0 | a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen); |
107 | 0 | a->pos.apos[0] = 1; |
108 | 0 | a->pos.apos[1] = tmppos; |
109 | | |
110 | | /* |
111 | | * Summarize position information for each word |
112 | | */ |
113 | 0 | while (ptr - a < l) |
114 | 0 | { |
115 | 0 | if (!(ptr->len == res->len && |
116 | 0 | strncmp(ptr->word, res->word, res->len) == 0)) |
117 | 0 | { |
118 | | /* |
119 | | * Got a new word, so put it in result |
120 | | */ |
121 | 0 | res++; |
122 | 0 | res->len = ptr->len; |
123 | 0 | res->word = ptr->word; |
124 | 0 | tmppos = LIMITPOS(ptr->pos.pos); |
125 | 0 | res->alen = 2; |
126 | 0 | res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen); |
127 | 0 | res->pos.apos[0] = 1; |
128 | 0 | res->pos.apos[1] = tmppos; |
129 | 0 | } |
130 | 0 | else |
131 | 0 | { |
132 | | /* |
133 | | * The word already exists, so adjust position information. But |
134 | | * before we should check size of position's array, max allowed |
135 | | * value for position and uniqueness of position |
136 | | */ |
137 | 0 | pfree(ptr->word); |
138 | 0 | if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1 && |
139 | 0 | res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos)) |
140 | 0 | { |
141 | 0 | if (res->pos.apos[0] + 1 >= res->alen) |
142 | 0 | { |
143 | 0 | res->alen *= 2; |
144 | 0 | res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen); |
145 | 0 | } |
146 | 0 | if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos)) |
147 | 0 | { |
148 | 0 | res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos); |
149 | 0 | res->pos.apos[0]++; |
150 | 0 | } |
151 | 0 | } |
152 | 0 | } |
153 | 0 | ptr++; |
154 | 0 | } |
155 | |
|
156 | 0 | return res + 1 - a; |
157 | 0 | } |
158 | | |
159 | | /* |
160 | | * make value of tsvector, given parsed text |
161 | | * |
162 | | * Note: frees prs->words and subsidiary data. |
163 | | */ |
164 | | TSVector |
165 | | make_tsvector(ParsedText *prs) |
166 | 0 | { |
167 | 0 | int i, |
168 | 0 | j, |
169 | 0 | lenstr = 0, |
170 | 0 | totallen; |
171 | 0 | TSVector in; |
172 | 0 | WordEntry *ptr; |
173 | 0 | char *str; |
174 | 0 | int stroff; |
175 | | |
176 | | /* Merge duplicate words */ |
177 | 0 | if (prs->curwords > 0) |
178 | 0 | prs->curwords = uniqueWORD(prs->words, prs->curwords); |
179 | | |
180 | | /* Determine space needed */ |
181 | 0 | for (i = 0; i < prs->curwords; i++) |
182 | 0 | { |
183 | 0 | lenstr += prs->words[i].len; |
184 | 0 | if (prs->words[i].alen) |
185 | 0 | { |
186 | 0 | lenstr = SHORTALIGN(lenstr); |
187 | 0 | lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); |
188 | 0 | } |
189 | 0 | } |
190 | |
|
191 | 0 | if (lenstr > MAXSTRPOS) |
192 | 0 | ereport(ERROR, |
193 | 0 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
194 | 0 | errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS))); |
195 | | |
196 | 0 | totallen = CALCDATASIZE(prs->curwords, lenstr); |
197 | 0 | in = (TSVector) palloc0(totallen); |
198 | 0 | SET_VARSIZE(in, totallen); |
199 | 0 | in->size = prs->curwords; |
200 | |
|
201 | 0 | ptr = ARRPTR(in); |
202 | 0 | str = STRPTR(in); |
203 | 0 | stroff = 0; |
204 | 0 | for (i = 0; i < prs->curwords; i++) |
205 | 0 | { |
206 | 0 | ptr->len = prs->words[i].len; |
207 | 0 | ptr->pos = stroff; |
208 | 0 | memcpy(str + stroff, prs->words[i].word, prs->words[i].len); |
209 | 0 | stroff += prs->words[i].len; |
210 | 0 | pfree(prs->words[i].word); |
211 | 0 | if (prs->words[i].alen) |
212 | 0 | { |
213 | 0 | int k = prs->words[i].pos.apos[0]; |
214 | 0 | WordEntryPos *wptr; |
215 | |
|
216 | 0 | if (k > 0xFFFF) |
217 | 0 | elog(ERROR, "positions array too long"); |
218 | | |
219 | 0 | ptr->haspos = 1; |
220 | 0 | stroff = SHORTALIGN(stroff); |
221 | 0 | *(uint16 *) (str + stroff) = (uint16) k; |
222 | 0 | wptr = POSDATAPTR(in, ptr); |
223 | 0 | for (j = 0; j < k; j++) |
224 | 0 | { |
225 | 0 | WEP_SETWEIGHT(wptr[j], 0); |
226 | 0 | WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]); |
227 | 0 | } |
228 | 0 | stroff += sizeof(uint16) + k * sizeof(WordEntryPos); |
229 | 0 | pfree(prs->words[i].pos.apos); |
230 | 0 | } |
231 | 0 | else |
232 | 0 | ptr->haspos = 0; |
233 | 0 | ptr++; |
234 | 0 | } |
235 | | |
236 | 0 | if (prs->words) |
237 | 0 | pfree(prs->words); |
238 | |
|
239 | 0 | return in; |
240 | 0 | } |
241 | | |
242 | | Datum |
243 | | to_tsvector_byid(PG_FUNCTION_ARGS) |
244 | 0 | { |
245 | 0 | Oid cfgId = PG_GETARG_OID(0); |
246 | 0 | text *in = PG_GETARG_TEXT_PP(1); |
247 | 0 | ParsedText prs; |
248 | 0 | TSVector out; |
249 | |
|
250 | 0 | prs.lenwords = VARSIZE_ANY_EXHDR(in) / 6; /* just estimation of word's |
251 | | * number */ |
252 | 0 | if (prs.lenwords < 2) |
253 | 0 | prs.lenwords = 2; |
254 | 0 | else if (prs.lenwords > MaxAllocSize / sizeof(ParsedWord)) |
255 | 0 | prs.lenwords = MaxAllocSize / sizeof(ParsedWord); |
256 | 0 | prs.curwords = 0; |
257 | 0 | prs.pos = 0; |
258 | 0 | prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); |
259 | |
|
260 | 0 | parsetext(cfgId, &prs, VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in)); |
261 | |
|
262 | 0 | PG_FREE_IF_COPY(in, 1); |
263 | |
|
264 | 0 | out = make_tsvector(&prs); |
265 | |
|
266 | 0 | PG_RETURN_TSVECTOR(out); |
267 | 0 | } |
268 | | |
269 | | Datum |
270 | | to_tsvector(PG_FUNCTION_ARGS) |
271 | 0 | { |
272 | 0 | text *in = PG_GETARG_TEXT_PP(0); |
273 | 0 | Oid cfgId; |
274 | |
|
275 | 0 | cfgId = getTSCurrentConfig(true); |
276 | 0 | PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid, |
277 | 0 | ObjectIdGetDatum(cfgId), |
278 | 0 | PointerGetDatum(in))); |
279 | 0 | } |
280 | | |
281 | | /* |
282 | | * Worker function for jsonb(_string)_to_tsvector(_byid) |
283 | | */ |
284 | | static TSVector |
285 | | jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, uint32 flags) |
286 | 0 | { |
287 | 0 | TSVectorBuildState state; |
288 | 0 | ParsedText prs; |
289 | |
|
290 | 0 | prs.words = NULL; |
291 | 0 | prs.curwords = 0; |
292 | 0 | state.prs = &prs; |
293 | 0 | state.cfgId = cfgId; |
294 | |
|
295 | 0 | iterate_jsonb_values(jb, flags, &state, add_to_tsvector); |
296 | |
|
297 | 0 | return make_tsvector(&prs); |
298 | 0 | } |
299 | | |
300 | | Datum |
301 | | jsonb_string_to_tsvector_byid(PG_FUNCTION_ARGS) |
302 | 0 | { |
303 | 0 | Oid cfgId = PG_GETARG_OID(0); |
304 | 0 | Jsonb *jb = PG_GETARG_JSONB_P(1); |
305 | 0 | TSVector result; |
306 | |
|
307 | 0 | result = jsonb_to_tsvector_worker(cfgId, jb, jtiString); |
308 | 0 | PG_FREE_IF_COPY(jb, 1); |
309 | |
|
310 | 0 | PG_RETURN_TSVECTOR(result); |
311 | 0 | } |
312 | | |
313 | | Datum |
314 | | jsonb_string_to_tsvector(PG_FUNCTION_ARGS) |
315 | 0 | { |
316 | 0 | Jsonb *jb = PG_GETARG_JSONB_P(0); |
317 | 0 | Oid cfgId; |
318 | 0 | TSVector result; |
319 | |
|
320 | 0 | cfgId = getTSCurrentConfig(true); |
321 | 0 | result = jsonb_to_tsvector_worker(cfgId, jb, jtiString); |
322 | 0 | PG_FREE_IF_COPY(jb, 0); |
323 | |
|
324 | 0 | PG_RETURN_TSVECTOR(result); |
325 | 0 | } |
326 | | |
327 | | Datum |
328 | | jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) |
329 | 0 | { |
330 | 0 | Oid cfgId = PG_GETARG_OID(0); |
331 | 0 | Jsonb *jb = PG_GETARG_JSONB_P(1); |
332 | 0 | Jsonb *jbFlags = PG_GETARG_JSONB_P(2); |
333 | 0 | TSVector result; |
334 | 0 | uint32 flags = parse_jsonb_index_flags(jbFlags); |
335 | |
|
336 | 0 | result = jsonb_to_tsvector_worker(cfgId, jb, flags); |
337 | 0 | PG_FREE_IF_COPY(jb, 1); |
338 | 0 | PG_FREE_IF_COPY(jbFlags, 2); |
339 | |
|
340 | 0 | PG_RETURN_TSVECTOR(result); |
341 | 0 | } |
342 | | |
343 | | Datum |
344 | | jsonb_to_tsvector(PG_FUNCTION_ARGS) |
345 | 0 | { |
346 | 0 | Jsonb *jb = PG_GETARG_JSONB_P(0); |
347 | 0 | Jsonb *jbFlags = PG_GETARG_JSONB_P(1); |
348 | 0 | Oid cfgId; |
349 | 0 | TSVector result; |
350 | 0 | uint32 flags = parse_jsonb_index_flags(jbFlags); |
351 | |
|
352 | 0 | cfgId = getTSCurrentConfig(true); |
353 | 0 | result = jsonb_to_tsvector_worker(cfgId, jb, flags); |
354 | 0 | PG_FREE_IF_COPY(jb, 0); |
355 | 0 | PG_FREE_IF_COPY(jbFlags, 1); |
356 | |
|
357 | 0 | PG_RETURN_TSVECTOR(result); |
358 | 0 | } |
359 | | |
360 | | /* |
361 | | * Worker function for json(_string)_to_tsvector(_byid) |
362 | | */ |
363 | | static TSVector |
364 | | json_to_tsvector_worker(Oid cfgId, text *json, uint32 flags) |
365 | 0 | { |
366 | 0 | TSVectorBuildState state; |
367 | 0 | ParsedText prs; |
368 | |
|
369 | 0 | prs.words = NULL; |
370 | 0 | prs.curwords = 0; |
371 | 0 | state.prs = &prs; |
372 | 0 | state.cfgId = cfgId; |
373 | |
|
374 | 0 | iterate_json_values(json, flags, &state, add_to_tsvector); |
375 | |
|
376 | 0 | return make_tsvector(&prs); |
377 | 0 | } |
378 | | |
379 | | Datum |
380 | | json_string_to_tsvector_byid(PG_FUNCTION_ARGS) |
381 | 0 | { |
382 | 0 | Oid cfgId = PG_GETARG_OID(0); |
383 | 0 | text *json = PG_GETARG_TEXT_P(1); |
384 | 0 | TSVector result; |
385 | |
|
386 | 0 | result = json_to_tsvector_worker(cfgId, json, jtiString); |
387 | 0 | PG_FREE_IF_COPY(json, 1); |
388 | |
|
389 | 0 | PG_RETURN_TSVECTOR(result); |
390 | 0 | } |
391 | | |
392 | | Datum |
393 | | json_string_to_tsvector(PG_FUNCTION_ARGS) |
394 | 0 | { |
395 | 0 | text *json = PG_GETARG_TEXT_P(0); |
396 | 0 | Oid cfgId; |
397 | 0 | TSVector result; |
398 | |
|
399 | 0 | cfgId = getTSCurrentConfig(true); |
400 | 0 | result = json_to_tsvector_worker(cfgId, json, jtiString); |
401 | 0 | PG_FREE_IF_COPY(json, 0); |
402 | |
|
403 | 0 | PG_RETURN_TSVECTOR(result); |
404 | 0 | } |
405 | | |
406 | | Datum |
407 | | json_to_tsvector_byid(PG_FUNCTION_ARGS) |
408 | 0 | { |
409 | 0 | Oid cfgId = PG_GETARG_OID(0); |
410 | 0 | text *json = PG_GETARG_TEXT_P(1); |
411 | 0 | Jsonb *jbFlags = PG_GETARG_JSONB_P(2); |
412 | 0 | TSVector result; |
413 | 0 | uint32 flags = parse_jsonb_index_flags(jbFlags); |
414 | |
|
415 | 0 | result = json_to_tsvector_worker(cfgId, json, flags); |
416 | 0 | PG_FREE_IF_COPY(json, 1); |
417 | 0 | PG_FREE_IF_COPY(jbFlags, 2); |
418 | |
|
419 | 0 | PG_RETURN_TSVECTOR(result); |
420 | 0 | } |
421 | | |
422 | | Datum |
423 | | json_to_tsvector(PG_FUNCTION_ARGS) |
424 | 0 | { |
425 | 0 | text *json = PG_GETARG_TEXT_P(0); |
426 | 0 | Jsonb *jbFlags = PG_GETARG_JSONB_P(1); |
427 | 0 | Oid cfgId; |
428 | 0 | TSVector result; |
429 | 0 | uint32 flags = parse_jsonb_index_flags(jbFlags); |
430 | |
|
431 | 0 | cfgId = getTSCurrentConfig(true); |
432 | 0 | result = json_to_tsvector_worker(cfgId, json, flags); |
433 | 0 | PG_FREE_IF_COPY(json, 0); |
434 | 0 | PG_FREE_IF_COPY(jbFlags, 1); |
435 | |
|
436 | 0 | PG_RETURN_TSVECTOR(result); |
437 | 0 | } |
438 | | |
439 | | /* |
440 | | * Parse lexemes in an element of a json(b) value, add to TSVectorBuildState. |
441 | | */ |
442 | | static void |
443 | | add_to_tsvector(void *_state, char *elem_value, int elem_len) |
444 | 0 | { |
445 | 0 | TSVectorBuildState *state = (TSVectorBuildState *) _state; |
446 | 0 | ParsedText *prs = state->prs; |
447 | 0 | int32 prevwords; |
448 | |
|
449 | 0 | if (prs->words == NULL) |
450 | 0 | { |
451 | | /* |
452 | | * First time through: initialize words array to a reasonable size. |
453 | | * (parsetext() will realloc it bigger as needed.) |
454 | | */ |
455 | 0 | prs->lenwords = 16; |
456 | 0 | prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords); |
457 | 0 | prs->curwords = 0; |
458 | 0 | prs->pos = 0; |
459 | 0 | } |
460 | |
|
461 | 0 | prevwords = prs->curwords; |
462 | |
|
463 | 0 | parsetext(state->cfgId, prs, elem_value, elem_len); |
464 | | |
465 | | /* |
466 | | * If we extracted any words from this JSON element, advance pos to create |
467 | | * an artificial break between elements. This is because we don't want |
468 | | * phrase searches to think that the last word in this element is adjacent |
469 | | * to the first word in the next one. |
470 | | */ |
471 | 0 | if (prs->curwords > prevwords) |
472 | 0 | prs->pos += 1; |
473 | 0 | } |
474 | | |
475 | | |
476 | | /* |
477 | | * to_tsquery |
478 | | */ |
479 | | |
480 | | |
481 | | /* |
482 | | * This function is used for morph parsing. |
483 | | * |
484 | | * The value is passed to parsetext which will call the right dictionary to |
485 | | * lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP |
486 | | * to the stack. |
487 | | * |
488 | | * All words belonging to the same variant are pushed as an ANDed list, |
489 | | * and different variants are ORed together. |
490 | | */ |
491 | | static void |
492 | | pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix) |
493 | 0 | { |
494 | 0 | int32 count = 0; |
495 | 0 | ParsedText prs; |
496 | 0 | uint32 variant, |
497 | 0 | pos = 0, |
498 | 0 | cntvar = 0, |
499 | 0 | cntpos = 0, |
500 | 0 | cnt = 0; |
501 | 0 | MorphOpaque *data = (MorphOpaque *) DatumGetPointer(opaque); |
502 | |
|
503 | 0 | prs.lenwords = 4; |
504 | 0 | prs.curwords = 0; |
505 | 0 | prs.pos = 0; |
506 | 0 | prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); |
507 | |
|
508 | 0 | parsetext(data->cfg_id, &prs, strval, lenval); |
509 | |
|
510 | 0 | if (prs.curwords > 0) |
511 | 0 | { |
512 | 0 | while (count < prs.curwords) |
513 | 0 | { |
514 | | /* |
515 | | * Were any stop words removed? If so, fill empty positions with |
516 | | * placeholders linked by an appropriate operator. |
517 | | */ |
518 | 0 | if (pos > 0 && pos + 1 < prs.words[count].pos.pos) |
519 | 0 | { |
520 | 0 | while (pos + 1 < prs.words[count].pos.pos) |
521 | 0 | { |
522 | | /* put placeholders for each missing stop word */ |
523 | 0 | pushStop(state); |
524 | 0 | if (cntpos) |
525 | 0 | pushOperator(state, data->qoperator, 1); |
526 | 0 | cntpos++; |
527 | 0 | pos++; |
528 | 0 | } |
529 | 0 | } |
530 | | |
531 | | /* save current word's position */ |
532 | 0 | pos = prs.words[count].pos.pos; |
533 | | |
534 | | /* Go through all variants obtained from this token */ |
535 | 0 | cntvar = 0; |
536 | 0 | while (count < prs.curwords && pos == prs.words[count].pos.pos) |
537 | 0 | { |
538 | 0 | variant = prs.words[count].nvariant; |
539 | | |
540 | | /* Push all words belonging to the same variant */ |
541 | 0 | cnt = 0; |
542 | 0 | while (count < prs.curwords && |
543 | 0 | pos == prs.words[count].pos.pos && |
544 | 0 | variant == prs.words[count].nvariant) |
545 | 0 | { |
546 | 0 | pushValue(state, |
547 | 0 | prs.words[count].word, |
548 | 0 | prs.words[count].len, |
549 | 0 | weight, |
550 | 0 | ((prs.words[count].flags & TSL_PREFIX) || prefix)); |
551 | 0 | pfree(prs.words[count].word); |
552 | 0 | if (cnt) |
553 | 0 | pushOperator(state, OP_AND, 0); |
554 | 0 | cnt++; |
555 | 0 | count++; |
556 | 0 | } |
557 | |
|
558 | 0 | if (cntvar) |
559 | 0 | pushOperator(state, OP_OR, 0); |
560 | 0 | cntvar++; |
561 | 0 | } |
562 | |
|
563 | 0 | if (cntpos) |
564 | 0 | { |
565 | | /* distance may be useful */ |
566 | 0 | pushOperator(state, data->qoperator, 1); |
567 | 0 | } |
568 | |
|
569 | 0 | cntpos++; |
570 | 0 | } |
571 | |
|
572 | 0 | pfree(prs.words); |
573 | 0 | } |
574 | 0 | else |
575 | 0 | pushStop(state); |
576 | 0 | } |
577 | | |
578 | | Datum |
579 | | to_tsquery_byid(PG_FUNCTION_ARGS) |
580 | 0 | { |
581 | 0 | text *in = PG_GETARG_TEXT_PP(1); |
582 | 0 | TSQuery query; |
583 | 0 | MorphOpaque data; |
584 | |
|
585 | 0 | data.cfg_id = PG_GETARG_OID(0); |
586 | | |
587 | | /* |
588 | | * Passing OP_PHRASE as a qoperator makes tsquery require matching of word |
589 | | * positions of a complex morph exactly match the tsvector. Also, when |
590 | | * the complex morphs are connected with OP_PHRASE operator, we connect |
591 | | * all their words into the OP_PHRASE sequence. |
592 | | */ |
593 | 0 | data.qoperator = OP_PHRASE; |
594 | |
|
595 | 0 | query = parse_tsquery(text_to_cstring(in), |
596 | 0 | pushval_morph, |
597 | 0 | PointerGetDatum(&data), |
598 | 0 | 0, |
599 | 0 | NULL); |
600 | |
|
601 | 0 | PG_RETURN_TSQUERY(query); |
602 | 0 | } |
603 | | |
604 | | Datum |
605 | | to_tsquery(PG_FUNCTION_ARGS) |
606 | 0 | { |
607 | 0 | text *in = PG_GETARG_TEXT_PP(0); |
608 | 0 | Oid cfgId; |
609 | |
|
610 | 0 | cfgId = getTSCurrentConfig(true); |
611 | 0 | PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid, |
612 | 0 | ObjectIdGetDatum(cfgId), |
613 | 0 | PointerGetDatum(in))); |
614 | 0 | } |
615 | | |
616 | | Datum |
617 | | plainto_tsquery_byid(PG_FUNCTION_ARGS) |
618 | 0 | { |
619 | 0 | text *in = PG_GETARG_TEXT_PP(1); |
620 | 0 | TSQuery query; |
621 | 0 | MorphOpaque data; |
622 | |
|
623 | 0 | data.cfg_id = PG_GETARG_OID(0); |
624 | | |
625 | | /* |
626 | | * parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a |
627 | | * single morph. Passing OP_PHRASE as a qoperator makes tsquery require |
628 | | * matching of all words independently on their positions. |
629 | | */ |
630 | 0 | data.qoperator = OP_AND; |
631 | |
|
632 | 0 | query = parse_tsquery(text_to_cstring(in), |
633 | 0 | pushval_morph, |
634 | 0 | PointerGetDatum(&data), |
635 | 0 | P_TSQ_PLAIN, |
636 | 0 | NULL); |
637 | |
|
638 | 0 | PG_RETURN_POINTER(query); |
639 | 0 | } |
640 | | |
641 | | Datum |
642 | | plainto_tsquery(PG_FUNCTION_ARGS) |
643 | 0 | { |
644 | 0 | text *in = PG_GETARG_TEXT_PP(0); |
645 | 0 | Oid cfgId; |
646 | |
|
647 | 0 | cfgId = getTSCurrentConfig(true); |
648 | 0 | PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid, |
649 | 0 | ObjectIdGetDatum(cfgId), |
650 | 0 | PointerGetDatum(in))); |
651 | 0 | } |
652 | | |
653 | | |
654 | | Datum |
655 | | phraseto_tsquery_byid(PG_FUNCTION_ARGS) |
656 | 0 | { |
657 | 0 | text *in = PG_GETARG_TEXT_PP(1); |
658 | 0 | TSQuery query; |
659 | 0 | MorphOpaque data; |
660 | |
|
661 | 0 | data.cfg_id = PG_GETARG_OID(0); |
662 | | |
663 | | /* |
664 | | * parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a |
665 | | * single morph. Passing OP_PHRASE as a qoperator makes tsquery require |
666 | | * matching of word positions. |
667 | | */ |
668 | 0 | data.qoperator = OP_PHRASE; |
669 | |
|
670 | 0 | query = parse_tsquery(text_to_cstring(in), |
671 | 0 | pushval_morph, |
672 | 0 | PointerGetDatum(&data), |
673 | 0 | P_TSQ_PLAIN, |
674 | 0 | NULL); |
675 | |
|
676 | 0 | PG_RETURN_TSQUERY(query); |
677 | 0 | } |
678 | | |
679 | | Datum |
680 | | phraseto_tsquery(PG_FUNCTION_ARGS) |
681 | 0 | { |
682 | 0 | text *in = PG_GETARG_TEXT_PP(0); |
683 | 0 | Oid cfgId; |
684 | |
|
685 | 0 | cfgId = getTSCurrentConfig(true); |
686 | 0 | PG_RETURN_DATUM(DirectFunctionCall2(phraseto_tsquery_byid, |
687 | 0 | ObjectIdGetDatum(cfgId), |
688 | 0 | PointerGetDatum(in))); |
689 | 0 | } |
690 | | |
691 | | Datum |
692 | | websearch_to_tsquery_byid(PG_FUNCTION_ARGS) |
693 | 0 | { |
694 | 0 | text *in = PG_GETARG_TEXT_PP(1); |
695 | 0 | MorphOpaque data; |
696 | 0 | TSQuery query = NULL; |
697 | |
|
698 | 0 | data.cfg_id = PG_GETARG_OID(0); |
699 | | |
700 | | /* |
701 | | * Passing OP_PHRASE as a qoperator makes tsquery require matching of word |
702 | | * positions of a complex morph exactly match the tsvector. Also, when |
703 | | * the complex morphs are given in quotes, we connect all their words into |
704 | | * the OP_PHRASE sequence. |
705 | | */ |
706 | 0 | data.qoperator = OP_PHRASE; |
707 | |
|
708 | 0 | query = parse_tsquery(text_to_cstring(in), |
709 | 0 | pushval_morph, |
710 | 0 | PointerGetDatum(&data), |
711 | 0 | P_TSQ_WEB, |
712 | 0 | NULL); |
713 | |
|
714 | 0 | PG_RETURN_TSQUERY(query); |
715 | 0 | } |
716 | | |
717 | | Datum |
718 | | websearch_to_tsquery(PG_FUNCTION_ARGS) |
719 | 0 | { |
720 | 0 | text *in = PG_GETARG_TEXT_PP(0); |
721 | 0 | Oid cfgId; |
722 | |
|
723 | 0 | cfgId = getTSCurrentConfig(true); |
724 | 0 | PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid, |
725 | 0 | ObjectIdGetDatum(cfgId), |
726 | 0 | PointerGetDatum(in))); |
727 | 0 | } |