/src/postgres/src/backend/utils/adt/tsquery_util.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * tsquery_util.c |
4 | | * Utilities for tsquery datatype |
5 | | * |
6 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
7 | | * |
8 | | * |
9 | | * IDENTIFICATION |
10 | | * src/backend/utils/adt/tsquery_util.c |
11 | | * |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | |
15 | | #include "postgres.h" |
16 | | |
17 | | #include "miscadmin.h" |
18 | | #include "tsearch/ts_utils.h" |
19 | | #include "varatt.h" |
20 | | |
21 | | /* |
22 | | * Build QTNode tree for a tsquery given in QueryItem array format. |
23 | | */ |
24 | | QTNode * |
25 | | QT2QTN(QueryItem *in, char *operand) |
26 | 0 | { |
27 | 0 | QTNode *node = (QTNode *) palloc0(sizeof(QTNode)); |
28 | | |
29 | | /* since this function recurses, it could be driven to stack overflow. */ |
30 | 0 | check_stack_depth(); |
31 | |
|
32 | 0 | node->valnode = in; |
33 | |
|
34 | 0 | if (in->type == QI_OPR) |
35 | 0 | { |
36 | 0 | node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); |
37 | 0 | node->child[0] = QT2QTN(in + 1, operand); |
38 | 0 | node->sign = node->child[0]->sign; |
39 | 0 | if (in->qoperator.oper == OP_NOT) |
40 | 0 | node->nchild = 1; |
41 | 0 | else |
42 | 0 | { |
43 | 0 | node->nchild = 2; |
44 | 0 | node->child[1] = QT2QTN(in + in->qoperator.left, operand); |
45 | 0 | node->sign |= node->child[1]->sign; |
46 | 0 | } |
47 | 0 | } |
48 | 0 | else if (operand) |
49 | 0 | { |
50 | 0 | node->word = operand + in->qoperand.distance; |
51 | 0 | node->sign = ((uint32) 1) << (((unsigned int) in->qoperand.valcrc) % 32); |
52 | 0 | } |
53 | |
|
54 | 0 | return node; |
55 | 0 | } |
56 | | |
57 | | /* |
58 | | * Free a QTNode tree. |
59 | | * |
60 | | * Referenced "word" and "valnode" items are freed if marked as transient |
61 | | * by flags. |
62 | | */ |
63 | | void |
64 | | QTNFree(QTNode *in) |
65 | 0 | { |
66 | 0 | if (!in) |
67 | 0 | return; |
68 | | |
69 | | /* since this function recurses, it could be driven to stack overflow. */ |
70 | 0 | check_stack_depth(); |
71 | |
|
72 | 0 | if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0) |
73 | 0 | pfree(in->word); |
74 | |
|
75 | 0 | if (in->valnode->type == QI_OPR) |
76 | 0 | { |
77 | 0 | int i; |
78 | |
|
79 | 0 | for (i = 0; i < in->nchild; i++) |
80 | 0 | QTNFree(in->child[i]); |
81 | 0 | } |
82 | 0 | if (in->child) |
83 | 0 | pfree(in->child); |
84 | |
|
85 | 0 | if (in->flags & QTN_NEEDFREE) |
86 | 0 | pfree(in->valnode); |
87 | |
|
88 | 0 | pfree(in); |
89 | 0 | } |
90 | | |
91 | | /* |
92 | | * Sort comparator for QTNodes. |
93 | | * |
94 | | * The sort order is somewhat arbitrary. |
95 | | */ |
96 | | int |
97 | | QTNodeCompare(QTNode *an, QTNode *bn) |
98 | 0 | { |
99 | | /* since this function recurses, it could be driven to stack overflow. */ |
100 | 0 | check_stack_depth(); |
101 | |
|
102 | 0 | if (an->valnode->type != bn->valnode->type) |
103 | 0 | return (an->valnode->type > bn->valnode->type) ? -1 : 1; |
104 | | |
105 | 0 | if (an->valnode->type == QI_OPR) |
106 | 0 | { |
107 | 0 | QueryOperator *ao = &an->valnode->qoperator; |
108 | 0 | QueryOperator *bo = &bn->valnode->qoperator; |
109 | |
|
110 | 0 | if (ao->oper != bo->oper) |
111 | 0 | return (ao->oper > bo->oper) ? -1 : 1; |
112 | | |
113 | 0 | if (an->nchild != bn->nchild) |
114 | 0 | return (an->nchild > bn->nchild) ? -1 : 1; |
115 | | |
116 | 0 | { |
117 | 0 | int i, |
118 | 0 | res; |
119 | |
|
120 | 0 | for (i = 0; i < an->nchild; i++) |
121 | 0 | if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0) |
122 | 0 | return res; |
123 | 0 | } |
124 | | |
125 | 0 | if (ao->oper == OP_PHRASE && ao->distance != bo->distance) |
126 | 0 | return (ao->distance > bo->distance) ? -1 : 1; |
127 | | |
128 | 0 | return 0; |
129 | 0 | } |
130 | 0 | else if (an->valnode->type == QI_VAL) |
131 | 0 | { |
132 | 0 | QueryOperand *ao = &an->valnode->qoperand; |
133 | 0 | QueryOperand *bo = &bn->valnode->qoperand; |
134 | |
|
135 | 0 | if (ao->valcrc != bo->valcrc) |
136 | 0 | { |
137 | 0 | return (ao->valcrc > bo->valcrc) ? -1 : 1; |
138 | 0 | } |
139 | | |
140 | 0 | return tsCompareString(an->word, ao->length, bn->word, bo->length, false); |
141 | 0 | } |
142 | 0 | else |
143 | 0 | { |
144 | 0 | elog(ERROR, "unrecognized QueryItem type: %d", an->valnode->type); |
145 | 0 | return 0; /* keep compiler quiet */ |
146 | 0 | } |
147 | 0 | } |
148 | | |
149 | | /* |
150 | | * qsort comparator for QTNode pointers. |
151 | | */ |
152 | | static int |
153 | | cmpQTN(const void *a, const void *b) |
154 | 0 | { |
155 | 0 | return QTNodeCompare(*(QTNode *const *) a, *(QTNode *const *) b); |
156 | 0 | } |
157 | | |
158 | | /* |
159 | | * Canonicalize a QTNode tree by sorting the children of AND/OR nodes |
160 | | * into an arbitrary but well-defined order. |
161 | | */ |
162 | | void |
163 | | QTNSort(QTNode *in) |
164 | 0 | { |
165 | 0 | int i; |
166 | | |
167 | | /* since this function recurses, it could be driven to stack overflow. */ |
168 | 0 | check_stack_depth(); |
169 | |
|
170 | 0 | if (in->valnode->type != QI_OPR) |
171 | 0 | return; |
172 | | |
173 | 0 | for (i = 0; i < in->nchild; i++) |
174 | 0 | QTNSort(in->child[i]); |
175 | 0 | if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE) |
176 | 0 | qsort(in->child, in->nchild, sizeof(QTNode *), cmpQTN); |
177 | 0 | } |
178 | | |
179 | | /* |
180 | | * Are two QTNode trees equal according to QTNodeCompare? |
181 | | */ |
182 | | bool |
183 | | QTNEq(QTNode *a, QTNode *b) |
184 | 0 | { |
185 | 0 | uint32 sign = a->sign & b->sign; |
186 | |
|
187 | 0 | if (!(sign == a->sign && sign == b->sign)) |
188 | 0 | return false; |
189 | | |
190 | 0 | return (QTNodeCompare(a, b) == 0); |
191 | 0 | } |
192 | | |
193 | | /* |
194 | | * Remove unnecessary intermediate nodes. For example: |
195 | | * |
196 | | * OR OR |
197 | | * a OR -> a b c |
198 | | * b c |
199 | | */ |
200 | | void |
201 | | QTNTernary(QTNode *in) |
202 | 0 | { |
203 | 0 | int i; |
204 | | |
205 | | /* since this function recurses, it could be driven to stack overflow. */ |
206 | 0 | check_stack_depth(); |
207 | |
|
208 | 0 | if (in->valnode->type != QI_OPR) |
209 | 0 | return; |
210 | | |
211 | 0 | for (i = 0; i < in->nchild; i++) |
212 | 0 | QTNTernary(in->child[i]); |
213 | | |
214 | | /* Only AND and OR are associative, so don't flatten other node types */ |
215 | 0 | if (in->valnode->qoperator.oper != OP_AND && |
216 | 0 | in->valnode->qoperator.oper != OP_OR) |
217 | 0 | return; |
218 | | |
219 | 0 | for (i = 0; i < in->nchild; i++) |
220 | 0 | { |
221 | 0 | QTNode *cc = in->child[i]; |
222 | |
|
223 | 0 | if (cc->valnode->type == QI_OPR && |
224 | 0 | in->valnode->qoperator.oper == cc->valnode->qoperator.oper) |
225 | 0 | { |
226 | 0 | int oldnchild = in->nchild; |
227 | |
|
228 | 0 | in->nchild += cc->nchild - 1; |
229 | 0 | in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *)); |
230 | |
|
231 | 0 | if (i + 1 != oldnchild) |
232 | 0 | memmove(in->child + i + cc->nchild, in->child + i + 1, |
233 | 0 | (oldnchild - i - 1) * sizeof(QTNode *)); |
234 | |
|
235 | 0 | memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *)); |
236 | 0 | i += cc->nchild - 1; |
237 | |
|
238 | 0 | if (cc->flags & QTN_NEEDFREE) |
239 | 0 | pfree(cc->valnode); |
240 | 0 | pfree(cc); |
241 | 0 | } |
242 | 0 | } |
243 | 0 | } |
244 | | |
245 | | /* |
246 | | * Convert a tree to binary tree by inserting intermediate nodes. |
247 | | * (Opposite of QTNTernary) |
248 | | */ |
249 | | void |
250 | | QTNBinary(QTNode *in) |
251 | 0 | { |
252 | 0 | int i; |
253 | | |
254 | | /* since this function recurses, it could be driven to stack overflow. */ |
255 | 0 | check_stack_depth(); |
256 | |
|
257 | 0 | if (in->valnode->type != QI_OPR) |
258 | 0 | return; |
259 | | |
260 | 0 | for (i = 0; i < in->nchild; i++) |
261 | 0 | QTNBinary(in->child[i]); |
262 | |
|
263 | 0 | while (in->nchild > 2) |
264 | 0 | { |
265 | 0 | QTNode *nn = (QTNode *) palloc0(sizeof(QTNode)); |
266 | |
|
267 | 0 | nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); |
268 | 0 | nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); |
269 | |
|
270 | 0 | nn->nchild = 2; |
271 | 0 | nn->flags = QTN_NEEDFREE; |
272 | |
|
273 | 0 | nn->child[0] = in->child[0]; |
274 | 0 | nn->child[1] = in->child[1]; |
275 | 0 | nn->sign = nn->child[0]->sign | nn->child[1]->sign; |
276 | |
|
277 | 0 | nn->valnode->type = in->valnode->type; |
278 | 0 | nn->valnode->qoperator.oper = in->valnode->qoperator.oper; |
279 | |
|
280 | 0 | in->child[0] = nn; |
281 | 0 | in->child[1] = in->child[in->nchild - 1]; |
282 | 0 | in->nchild--; |
283 | 0 | } |
284 | 0 | } |
285 | | |
286 | | /* |
287 | | * Count the total length of operand strings in tree (including '\0'- |
288 | | * terminators) and the total number of nodes. |
289 | | * Caller must initialize *sumlen and *nnode to zeroes. |
290 | | */ |
291 | | static void |
292 | | cntsize(QTNode *in, int *sumlen, int *nnode) |
293 | 0 | { |
294 | | /* since this function recurses, it could be driven to stack overflow. */ |
295 | 0 | check_stack_depth(); |
296 | |
|
297 | 0 | *nnode += 1; |
298 | 0 | if (in->valnode->type == QI_OPR) |
299 | 0 | { |
300 | 0 | int i; |
301 | |
|
302 | 0 | for (i = 0; i < in->nchild; i++) |
303 | 0 | cntsize(in->child[i], sumlen, nnode); |
304 | 0 | } |
305 | 0 | else |
306 | 0 | { |
307 | 0 | *sumlen += in->valnode->qoperand.length + 1; |
308 | 0 | } |
309 | 0 | } |
310 | | |
311 | | typedef struct |
312 | | { |
313 | | QueryItem *curitem; |
314 | | char *operand; |
315 | | char *curoperand; |
316 | | } QTN2QTState; |
317 | | |
318 | | /* |
319 | | * Recursively convert a QTNode tree into flat tsquery format. |
320 | | * Caller must have allocated arrays of the correct size. |
321 | | */ |
322 | | static void |
323 | | fillQT(QTN2QTState *state, QTNode *in) |
324 | 0 | { |
325 | | /* since this function recurses, it could be driven to stack overflow. */ |
326 | 0 | check_stack_depth(); |
327 | |
|
328 | 0 | if (in->valnode->type == QI_VAL) |
329 | 0 | { |
330 | 0 | memcpy(state->curitem, in->valnode, sizeof(QueryOperand)); |
331 | |
|
332 | 0 | memcpy(state->curoperand, in->word, in->valnode->qoperand.length); |
333 | 0 | state->curitem->qoperand.distance = state->curoperand - state->operand; |
334 | 0 | state->curoperand[in->valnode->qoperand.length] = '\0'; |
335 | 0 | state->curoperand += in->valnode->qoperand.length + 1; |
336 | 0 | state->curitem++; |
337 | 0 | } |
338 | 0 | else |
339 | 0 | { |
340 | 0 | QueryItem *curitem = state->curitem; |
341 | |
|
342 | 0 | Assert(in->valnode->type == QI_OPR); |
343 | |
|
344 | 0 | memcpy(state->curitem, in->valnode, sizeof(QueryOperator)); |
345 | |
|
346 | 0 | Assert(in->nchild <= 2); |
347 | 0 | state->curitem++; |
348 | |
|
349 | 0 | fillQT(state, in->child[0]); |
350 | |
|
351 | 0 | if (in->nchild == 2) |
352 | 0 | { |
353 | 0 | curitem->qoperator.left = state->curitem - curitem; |
354 | 0 | fillQT(state, in->child[1]); |
355 | 0 | } |
356 | 0 | } |
357 | 0 | } |
358 | | |
359 | | /* |
360 | | * Build flat tsquery from a QTNode tree. |
361 | | */ |
362 | | TSQuery |
363 | | QTN2QT(QTNode *in) |
364 | 0 | { |
365 | 0 | TSQuery out; |
366 | 0 | int len; |
367 | 0 | int sumlen = 0, |
368 | 0 | nnode = 0; |
369 | 0 | QTN2QTState state; |
370 | |
|
371 | 0 | cntsize(in, &sumlen, &nnode); |
372 | |
|
373 | 0 | if (TSQUERY_TOO_BIG(nnode, sumlen)) |
374 | 0 | ereport(ERROR, |
375 | 0 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
376 | 0 | errmsg("tsquery is too large"))); |
377 | 0 | len = COMPUTESIZE(nnode, sumlen); |
378 | |
|
379 | 0 | out = (TSQuery) palloc0(len); |
380 | 0 | SET_VARSIZE(out, len); |
381 | 0 | out->size = nnode; |
382 | |
|
383 | 0 | state.curitem = GETQUERY(out); |
384 | 0 | state.operand = state.curoperand = GETOPERAND(out); |
385 | |
|
386 | 0 | fillQT(&state, in); |
387 | 0 | return out; |
388 | 0 | } |
389 | | |
390 | | /* |
391 | | * Copy a QTNode tree. |
392 | | * |
393 | | * Modifiable copies of the words and valnodes are made, too. |
394 | | */ |
395 | | QTNode * |
396 | | QTNCopy(QTNode *in) |
397 | 0 | { |
398 | 0 | QTNode *out; |
399 | | |
400 | | /* since this function recurses, it could be driven to stack overflow. */ |
401 | 0 | check_stack_depth(); |
402 | |
|
403 | 0 | out = (QTNode *) palloc(sizeof(QTNode)); |
404 | |
|
405 | 0 | *out = *in; |
406 | 0 | out->valnode = (QueryItem *) palloc(sizeof(QueryItem)); |
407 | 0 | *(out->valnode) = *(in->valnode); |
408 | 0 | out->flags |= QTN_NEEDFREE; |
409 | |
|
410 | 0 | if (in->valnode->type == QI_VAL) |
411 | 0 | { |
412 | 0 | out->word = palloc(in->valnode->qoperand.length + 1); |
413 | 0 | memcpy(out->word, in->word, in->valnode->qoperand.length); |
414 | 0 | out->word[in->valnode->qoperand.length] = '\0'; |
415 | 0 | out->flags |= QTN_WORDFREE; |
416 | 0 | } |
417 | 0 | else |
418 | 0 | { |
419 | 0 | int i; |
420 | |
|
421 | 0 | out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild); |
422 | |
|
423 | 0 | for (i = 0; i < in->nchild; i++) |
424 | 0 | out->child[i] = QTNCopy(in->child[i]); |
425 | 0 | } |
426 | |
|
427 | 0 | return out; |
428 | 0 | } |
429 | | |
430 | | /* |
431 | | * Clear the specified flag bit(s) in all nodes of a QTNode tree. |
432 | | */ |
433 | | void |
434 | | QTNClearFlags(QTNode *in, uint32 flags) |
435 | 0 | { |
436 | | /* since this function recurses, it could be driven to stack overflow. */ |
437 | 0 | check_stack_depth(); |
438 | |
|
439 | 0 | in->flags &= ~flags; |
440 | |
|
441 | 0 | if (in->valnode->type != QI_VAL) |
442 | 0 | { |
443 | 0 | int i; |
444 | |
|
445 | 0 | for (i = 0; i < in->nchild; i++) |
446 | 0 | QTNClearFlags(in->child[i], flags); |
447 | 0 | } |
448 | 0 | } |