/src/postgres/src/backend/utils/adt/tsquery_rewrite.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * tsquery_rewrite.c |
4 | | * Utilities for reconstructing tsquery |
5 | | * |
6 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
7 | | * |
8 | | * |
9 | | * IDENTIFICATION |
10 | | * src/backend/utils/adt/tsquery_rewrite.c |
11 | | * |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | |
15 | | #include "postgres.h" |
16 | | |
17 | | #include "catalog/pg_type.h" |
18 | | #include "executor/spi.h" |
19 | | #include "miscadmin.h" |
20 | | #include "tsearch/ts_utils.h" |
21 | | #include "utils/builtins.h" |
22 | | |
23 | | |
24 | | /* |
25 | | * If "node" is equal to "ex", return a copy of "subs" instead. |
26 | | * If "ex" matches a subset of node's children, return a modified version |
27 | | * of "node" in which those children are replaced with a copy of "subs". |
28 | | * Otherwise return "node" unmodified. |
29 | | * |
30 | | * The QTN_NOCHANGE bit is set in successfully modified nodes, so that |
31 | | * we won't uselessly recurse into them. |
32 | | * Also, set *isfind true if we make a replacement. |
33 | | */ |
34 | | static QTNode * |
35 | | findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) |
36 | 0 | { |
37 | | /* Can't match unless signature matches and node type matches. */ |
38 | 0 | if ((node->sign & ex->sign) != ex->sign || |
39 | 0 | node->valnode->type != ex->valnode->type) |
40 | 0 | return node; |
41 | | |
42 | | /* Ignore nodes marked NOCHANGE, too. */ |
43 | 0 | if (node->flags & QTN_NOCHANGE) |
44 | 0 | return node; |
45 | | |
46 | 0 | if (node->valnode->type == QI_OPR) |
47 | 0 | { |
48 | | /* Must be same operator. */ |
49 | 0 | if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper) |
50 | 0 | return node; |
51 | | |
52 | 0 | if (node->nchild == ex->nchild) |
53 | 0 | { |
54 | | /* |
55 | | * Simple case: when same number of children, match if equal. |
56 | | * (This is reliable when the children were sorted earlier.) |
57 | | */ |
58 | 0 | if (QTNEq(node, ex)) |
59 | 0 | { |
60 | | /* Match; delete node and return a copy of subs instead. */ |
61 | 0 | QTNFree(node); |
62 | 0 | if (subs) |
63 | 0 | { |
64 | 0 | node = QTNCopy(subs); |
65 | 0 | node->flags |= QTN_NOCHANGE; |
66 | 0 | } |
67 | 0 | else |
68 | 0 | node = NULL; |
69 | 0 | *isfind = true; |
70 | 0 | } |
71 | 0 | } |
72 | 0 | else if (node->nchild > ex->nchild && ex->nchild > 0) |
73 | 0 | { |
74 | | /* |
75 | | * AND and OR are commutative/associative, so we should check if a |
76 | | * subset of the children match. For example, if node is A|B|C, |
77 | | * and ex is B|C, we have a match after we notionally convert node |
78 | | * to A|(B|C). This does not work for NOT or PHRASE nodes, but we |
79 | | * can't get here for those node types because they have a fixed |
80 | | * number of children. |
81 | | * |
82 | | * Because we expect that the children are sorted, it suffices to |
83 | | * make one pass through the two lists to find the matches. |
84 | | */ |
85 | 0 | bool *matched; |
86 | 0 | int nmatched; |
87 | 0 | int i, |
88 | 0 | j; |
89 | | |
90 | | /* Assert that the subset rule is OK */ |
91 | 0 | Assert(node->valnode->qoperator.oper == OP_AND || |
92 | 0 | node->valnode->qoperator.oper == OP_OR); |
93 | | |
94 | | /* matched[] will record which children of node matched */ |
95 | 0 | matched = (bool *) palloc0(node->nchild * sizeof(bool)); |
96 | 0 | nmatched = 0; |
97 | 0 | i = j = 0; |
98 | 0 | while (i < node->nchild && j < ex->nchild) |
99 | 0 | { |
100 | 0 | int cmp = QTNodeCompare(node->child[i], ex->child[j]); |
101 | |
|
102 | 0 | if (cmp == 0) |
103 | 0 | { |
104 | | /* match! */ |
105 | 0 | matched[i] = true; |
106 | 0 | nmatched++; |
107 | 0 | i++, j++; |
108 | 0 | } |
109 | 0 | else if (cmp < 0) |
110 | 0 | { |
111 | | /* node->child[i] has no match, ignore it */ |
112 | 0 | i++; |
113 | 0 | } |
114 | 0 | else |
115 | 0 | { |
116 | | /* ex->child[j] has no match; we can give up immediately */ |
117 | 0 | break; |
118 | 0 | } |
119 | 0 | } |
120 | |
|
121 | 0 | if (nmatched == ex->nchild) |
122 | 0 | { |
123 | | /* collapse out the matched children of node */ |
124 | 0 | j = 0; |
125 | 0 | for (i = 0; i < node->nchild; i++) |
126 | 0 | { |
127 | 0 | if (matched[i]) |
128 | 0 | QTNFree(node->child[i]); |
129 | 0 | else |
130 | 0 | node->child[j++] = node->child[i]; |
131 | 0 | } |
132 | | |
133 | | /* and instead insert a copy of subs */ |
134 | 0 | if (subs) |
135 | 0 | { |
136 | 0 | subs = QTNCopy(subs); |
137 | 0 | subs->flags |= QTN_NOCHANGE; |
138 | 0 | node->child[j++] = subs; |
139 | 0 | } |
140 | |
|
141 | 0 | node->nchild = j; |
142 | | |
143 | | /* |
144 | | * At this point we might have a node with zero or one child, |
145 | | * which should be simplified. But we leave it to our caller |
146 | | * (dofindsubquery) to take care of that. |
147 | | */ |
148 | | |
149 | | /* |
150 | | * Re-sort the node to put new child in the right place. This |
151 | | * is a bit bogus, because it won't matter for findsubquery's |
152 | | * remaining processing, and it's insufficient to prepare the |
153 | | * tree for another search (we would need to re-flatten as |
154 | | * well, and we don't want to do that because we'd lose the |
155 | | * QTN_NOCHANGE marking on the new child). But it's needed to |
156 | | * keep the results the same as the regression tests expect. |
157 | | */ |
158 | 0 | QTNSort(node); |
159 | |
|
160 | 0 | *isfind = true; |
161 | 0 | } |
162 | |
|
163 | 0 | pfree(matched); |
164 | 0 | } |
165 | 0 | } |
166 | 0 | else |
167 | 0 | { |
168 | 0 | Assert(node->valnode->type == QI_VAL); |
169 | |
|
170 | 0 | if (node->valnode->qoperand.valcrc != ex->valnode->qoperand.valcrc) |
171 | 0 | return node; |
172 | 0 | else if (QTNEq(node, ex)) |
173 | 0 | { |
174 | 0 | QTNFree(node); |
175 | 0 | if (subs) |
176 | 0 | { |
177 | 0 | node = QTNCopy(subs); |
178 | 0 | node->flags |= QTN_NOCHANGE; |
179 | 0 | } |
180 | 0 | else |
181 | 0 | { |
182 | 0 | node = NULL; |
183 | 0 | } |
184 | 0 | *isfind = true; |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | 0 | return node; |
189 | 0 | } |
190 | | |
191 | | /* |
192 | | * Recursive guts of findsubquery(): attempt to replace "ex" with "subs" |
193 | | * at the root node, and if we failed to do so, recursively match against |
194 | | * child nodes. |
195 | | * |
196 | | * Delete any void subtrees resulting from the replacement. |
197 | | * In the following example '5' is replaced by empty operand: |
198 | | * |
199 | | * AND -> 6 |
200 | | * / \ |
201 | | * 5 OR |
202 | | * / \ |
203 | | * 6 5 |
204 | | */ |
205 | | static QTNode * |
206 | | dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) |
207 | 0 | { |
208 | | /* since this function recurses, it could be driven to stack overflow. */ |
209 | 0 | check_stack_depth(); |
210 | | |
211 | | /* also, since it's a bit expensive, let's check for query cancel. */ |
212 | 0 | CHECK_FOR_INTERRUPTS(); |
213 | | |
214 | | /* match at the node itself */ |
215 | 0 | root = findeq(root, ex, subs, isfind); |
216 | | |
217 | | /* unless we matched here, consider matches at child nodes */ |
218 | 0 | if (root && (root->flags & QTN_NOCHANGE) == 0 && |
219 | 0 | root->valnode->type == QI_OPR) |
220 | 0 | { |
221 | 0 | int i, |
222 | 0 | j = 0; |
223 | | |
224 | | /* |
225 | | * Any subtrees that are replaced by NULL must be dropped from the |
226 | | * tree. |
227 | | */ |
228 | 0 | for (i = 0; i < root->nchild; i++) |
229 | 0 | { |
230 | 0 | root->child[j] = dofindsubquery(root->child[i], ex, subs, isfind); |
231 | 0 | if (root->child[j]) |
232 | 0 | j++; |
233 | 0 | } |
234 | |
|
235 | 0 | root->nchild = j; |
236 | | |
237 | | /* |
238 | | * If we have just zero or one remaining child node, simplify out this |
239 | | * operator node. |
240 | | */ |
241 | 0 | if (root->nchild == 0) |
242 | 0 | { |
243 | 0 | QTNFree(root); |
244 | 0 | root = NULL; |
245 | 0 | } |
246 | 0 | else if (root->nchild == 1 && root->valnode->qoperator.oper != OP_NOT) |
247 | 0 | { |
248 | 0 | QTNode *nroot = root->child[0]; |
249 | |
|
250 | 0 | pfree(root); |
251 | 0 | root = nroot; |
252 | 0 | } |
253 | 0 | } |
254 | |
|
255 | 0 | return root; |
256 | 0 | } |
257 | | |
258 | | /* |
259 | | * Substitute "subs" for "ex" throughout the QTNode tree at root. |
260 | | * |
261 | | * If isfind isn't NULL, set *isfind to show whether we made any substitution. |
262 | | * |
263 | | * Both "root" and "ex" must have been through QTNTernary and QTNSort |
264 | | * to ensure reliable matching. |
265 | | */ |
266 | | QTNode * |
267 | | findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) |
268 | 0 | { |
269 | 0 | bool DidFind = false; |
270 | |
|
271 | 0 | root = dofindsubquery(root, ex, subs, &DidFind); |
272 | |
|
273 | 0 | if (isfind) |
274 | 0 | *isfind = DidFind; |
275 | |
|
276 | 0 | return root; |
277 | 0 | } |
278 | | |
279 | | Datum |
280 | | tsquery_rewrite_query(PG_FUNCTION_ARGS) |
281 | 0 | { |
282 | 0 | TSQuery query = PG_GETARG_TSQUERY_COPY(0); |
283 | 0 | text *in = PG_GETARG_TEXT_PP(1); |
284 | 0 | TSQuery rewritten = query; |
285 | 0 | MemoryContext outercontext = CurrentMemoryContext; |
286 | 0 | MemoryContext oldcontext; |
287 | 0 | QTNode *tree; |
288 | 0 | char *buf; |
289 | 0 | SPIPlanPtr plan; |
290 | 0 | Portal portal; |
291 | 0 | bool isnull; |
292 | |
|
293 | 0 | if (query->size == 0) |
294 | 0 | { |
295 | 0 | PG_FREE_IF_COPY(in, 1); |
296 | 0 | PG_RETURN_POINTER(rewritten); |
297 | 0 | } |
298 | | |
299 | 0 | tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); |
300 | 0 | QTNTernary(tree); |
301 | 0 | QTNSort(tree); |
302 | |
|
303 | 0 | buf = text_to_cstring(in); |
304 | |
|
305 | 0 | SPI_connect(); |
306 | |
|
307 | 0 | if ((plan = SPI_prepare(buf, 0, NULL)) == NULL) |
308 | 0 | elog(ERROR, "SPI_prepare(\"%s\") failed", buf); |
309 | | |
310 | 0 | if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) |
311 | 0 | elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf); |
312 | | |
313 | 0 | SPI_cursor_fetch(portal, true, 100); |
314 | |
|
315 | 0 | if (SPI_tuptable == NULL || |
316 | 0 | SPI_tuptable->tupdesc->natts != 2 || |
317 | 0 | SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID || |
318 | 0 | SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID) |
319 | 0 | ereport(ERROR, |
320 | 0 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
321 | 0 | errmsg("ts_rewrite query must return two tsquery columns"))); |
322 | | |
323 | 0 | while (SPI_processed > 0 && tree) |
324 | 0 | { |
325 | 0 | uint64 i; |
326 | |
|
327 | 0 | for (i = 0; i < SPI_processed && tree; i++) |
328 | 0 | { |
329 | 0 | Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); |
330 | 0 | Datum sdata; |
331 | |
|
332 | 0 | if (isnull) |
333 | 0 | continue; |
334 | | |
335 | 0 | sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull); |
336 | |
|
337 | 0 | if (!isnull) |
338 | 0 | { |
339 | 0 | TSQuery qtex = DatumGetTSQuery(qdata); |
340 | 0 | TSQuery qtsubs = DatumGetTSQuery(sdata); |
341 | 0 | QTNode *qex, |
342 | 0 | *qsubs = NULL; |
343 | |
|
344 | 0 | if (qtex->size == 0) |
345 | 0 | { |
346 | 0 | if (qtex != (TSQuery) DatumGetPointer(qdata)) |
347 | 0 | pfree(qtex); |
348 | 0 | if (qtsubs != (TSQuery) DatumGetPointer(sdata)) |
349 | 0 | pfree(qtsubs); |
350 | 0 | continue; |
351 | 0 | } |
352 | | |
353 | 0 | qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex)); |
354 | |
|
355 | 0 | QTNTernary(qex); |
356 | 0 | QTNSort(qex); |
357 | |
|
358 | 0 | if (qtsubs->size) |
359 | 0 | qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs)); |
360 | |
|
361 | 0 | oldcontext = MemoryContextSwitchTo(outercontext); |
362 | 0 | tree = findsubquery(tree, qex, qsubs, NULL); |
363 | 0 | MemoryContextSwitchTo(oldcontext); |
364 | |
|
365 | 0 | QTNFree(qex); |
366 | 0 | if (qtex != (TSQuery) DatumGetPointer(qdata)) |
367 | 0 | pfree(qtex); |
368 | 0 | QTNFree(qsubs); |
369 | 0 | if (qtsubs != (TSQuery) DatumGetPointer(sdata)) |
370 | 0 | pfree(qtsubs); |
371 | |
|
372 | 0 | if (tree) |
373 | 0 | { |
374 | | /* ready the tree for another pass */ |
375 | 0 | QTNClearFlags(tree, QTN_NOCHANGE); |
376 | 0 | QTNTernary(tree); |
377 | 0 | QTNSort(tree); |
378 | 0 | } |
379 | 0 | } |
380 | 0 | } |
381 | |
|
382 | 0 | SPI_freetuptable(SPI_tuptable); |
383 | 0 | SPI_cursor_fetch(portal, true, 100); |
384 | 0 | } |
385 | |
|
386 | 0 | SPI_freetuptable(SPI_tuptable); |
387 | 0 | SPI_cursor_close(portal); |
388 | 0 | SPI_freeplan(plan); |
389 | 0 | SPI_finish(); |
390 | |
|
391 | 0 | if (tree) |
392 | 0 | { |
393 | 0 | QTNBinary(tree); |
394 | 0 | rewritten = QTN2QT(tree); |
395 | 0 | QTNFree(tree); |
396 | 0 | PG_FREE_IF_COPY(query, 0); |
397 | 0 | } |
398 | 0 | else |
399 | 0 | { |
400 | 0 | SET_VARSIZE(rewritten, HDRSIZETQ); |
401 | 0 | rewritten->size = 0; |
402 | 0 | } |
403 | |
|
404 | 0 | pfree(buf); |
405 | 0 | PG_FREE_IF_COPY(in, 1); |
406 | 0 | PG_RETURN_POINTER(rewritten); |
407 | 0 | } |
408 | | |
409 | | Datum |
410 | | tsquery_rewrite(PG_FUNCTION_ARGS) |
411 | 0 | { |
412 | 0 | TSQuery query = PG_GETARG_TSQUERY_COPY(0); |
413 | 0 | TSQuery ex = PG_GETARG_TSQUERY(1); |
414 | 0 | TSQuery subst = PG_GETARG_TSQUERY(2); |
415 | 0 | TSQuery rewritten = query; |
416 | 0 | QTNode *tree, |
417 | 0 | *qex, |
418 | 0 | *subs = NULL; |
419 | |
|
420 | 0 | if (query->size == 0 || ex->size == 0) |
421 | 0 | { |
422 | 0 | PG_FREE_IF_COPY(ex, 1); |
423 | 0 | PG_FREE_IF_COPY(subst, 2); |
424 | 0 | PG_RETURN_POINTER(rewritten); |
425 | 0 | } |
426 | | |
427 | 0 | tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); |
428 | 0 | QTNTernary(tree); |
429 | 0 | QTNSort(tree); |
430 | |
|
431 | 0 | qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex)); |
432 | 0 | QTNTernary(qex); |
433 | 0 | QTNSort(qex); |
434 | |
|
435 | 0 | if (subst->size) |
436 | 0 | subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst)); |
437 | |
|
438 | 0 | tree = findsubquery(tree, qex, subs, NULL); |
439 | |
|
440 | 0 | QTNFree(qex); |
441 | 0 | QTNFree(subs); |
442 | |
|
443 | 0 | if (!tree) |
444 | 0 | { |
445 | 0 | SET_VARSIZE(rewritten, HDRSIZETQ); |
446 | 0 | rewritten->size = 0; |
447 | 0 | PG_FREE_IF_COPY(ex, 1); |
448 | 0 | PG_FREE_IF_COPY(subst, 2); |
449 | 0 | PG_RETURN_POINTER(rewritten); |
450 | 0 | } |
451 | 0 | else |
452 | 0 | { |
453 | 0 | QTNBinary(tree); |
454 | 0 | rewritten = QTN2QT(tree); |
455 | 0 | QTNFree(tree); |
456 | 0 | } |
457 | | |
458 | 0 | PG_FREE_IF_COPY(query, 0); |
459 | 0 | PG_FREE_IF_COPY(ex, 1); |
460 | 0 | PG_FREE_IF_COPY(subst, 2); |
461 | 0 | PG_RETURN_POINTER(rewritten); |
462 | 0 | } |