/src/postgres/src/backend/optimizer/path/pathkeys.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * pathkeys.c |
4 | | * Utilities for matching and building path keys |
5 | | * |
6 | | * See src/backend/optimizer/README for a great deal of information about |
7 | | * the nature and use of path keys. |
8 | | * |
9 | | * |
10 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
11 | | * Portions Copyright (c) 1994, Regents of the University of California |
12 | | * |
13 | | * IDENTIFICATION |
14 | | * src/backend/optimizer/path/pathkeys.c |
15 | | * |
16 | | *------------------------------------------------------------------------- |
17 | | */ |
18 | | #include "postgres.h" |
19 | | |
20 | | #include "access/stratnum.h" |
21 | | #include "catalog/pg_opfamily.h" |
22 | | #include "nodes/nodeFuncs.h" |
23 | | #include "optimizer/cost.h" |
24 | | #include "optimizer/optimizer.h" |
25 | | #include "optimizer/pathnode.h" |
26 | | #include "optimizer/paths.h" |
27 | | #include "partitioning/partbounds.h" |
28 | | #include "rewrite/rewriteManip.h" |
29 | | #include "utils/lsyscache.h" |
30 | | |
31 | | /* Consider reordering of GROUP BY keys? */ |
32 | | bool enable_group_by_reordering = true; |
33 | | |
34 | | static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); |
35 | | static bool matches_boolean_partition_clause(RestrictInfo *rinfo, |
36 | | RelOptInfo *partrel, |
37 | | int partkeycol); |
38 | | static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle); |
39 | | static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); |
40 | | |
41 | | |
42 | | /**************************************************************************** |
43 | | * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING |
44 | | ****************************************************************************/ |
45 | | |
46 | | /* |
47 | | * make_canonical_pathkey |
48 | | * Given the parameters for a PathKey, find any pre-existing matching |
49 | | * pathkey in the query's list of "canonical" pathkeys. Make a new |
50 | | * entry if there's not one already. |
51 | | * |
52 | | * Note that this function must not be used until after we have completed |
53 | | * merging EquivalenceClasses. |
54 | | */ |
55 | | PathKey * |
56 | | make_canonical_pathkey(PlannerInfo *root, |
57 | | EquivalenceClass *eclass, Oid opfamily, |
58 | | CompareType cmptype, bool nulls_first) |
59 | 0 | { |
60 | 0 | PathKey *pk; |
61 | 0 | ListCell *lc; |
62 | 0 | MemoryContext oldcontext; |
63 | | |
64 | | /* Can't make canonical pathkeys if the set of ECs might still change */ |
65 | 0 | if (!root->ec_merging_done) |
66 | 0 | elog(ERROR, "too soon to build canonical pathkeys"); |
67 | | |
68 | | /* The passed eclass might be non-canonical, so chase up to the top */ |
69 | 0 | while (eclass->ec_merged) |
70 | 0 | eclass = eclass->ec_merged; |
71 | |
|
72 | 0 | foreach(lc, root->canon_pathkeys) |
73 | 0 | { |
74 | 0 | pk = (PathKey *) lfirst(lc); |
75 | 0 | if (eclass == pk->pk_eclass && |
76 | 0 | opfamily == pk->pk_opfamily && |
77 | 0 | cmptype == pk->pk_cmptype && |
78 | 0 | nulls_first == pk->pk_nulls_first) |
79 | 0 | return pk; |
80 | 0 | } |
81 | | |
82 | | /* |
83 | | * Be sure canonical pathkeys are allocated in the main planning context. |
84 | | * Not an issue in normal planning, but it is for GEQO. |
85 | | */ |
86 | 0 | oldcontext = MemoryContextSwitchTo(root->planner_cxt); |
87 | |
|
88 | 0 | pk = makeNode(PathKey); |
89 | 0 | pk->pk_eclass = eclass; |
90 | 0 | pk->pk_opfamily = opfamily; |
91 | 0 | pk->pk_cmptype = cmptype; |
92 | 0 | pk->pk_nulls_first = nulls_first; |
93 | |
|
94 | 0 | root->canon_pathkeys = lappend(root->canon_pathkeys, pk); |
95 | |
|
96 | 0 | MemoryContextSwitchTo(oldcontext); |
97 | |
|
98 | 0 | return pk; |
99 | 0 | } |
100 | | |
101 | | /* |
102 | | * append_pathkeys |
103 | | * Append all non-redundant PathKeys in 'source' onto 'target' and |
104 | | * returns the updated 'target' list. |
105 | | */ |
106 | | List * |
107 | | append_pathkeys(List *target, List *source) |
108 | 0 | { |
109 | 0 | ListCell *lc; |
110 | |
|
111 | 0 | Assert(target != NIL); |
112 | |
|
113 | 0 | foreach(lc, source) |
114 | 0 | { |
115 | 0 | PathKey *pk = lfirst_node(PathKey, lc); |
116 | |
|
117 | 0 | if (!pathkey_is_redundant(pk, target)) |
118 | 0 | target = lappend(target, pk); |
119 | 0 | } |
120 | 0 | return target; |
121 | 0 | } |
122 | | |
123 | | /* |
124 | | * pathkey_is_redundant |
125 | | * Is a pathkey redundant with one already in the given list? |
126 | | * |
127 | | * We detect two cases: |
128 | | * |
129 | | * 1. If the new pathkey's equivalence class contains a constant, and isn't |
130 | | * below an outer join, then we can disregard it as a sort key. An example: |
131 | | * SELECT ... WHERE x = 42 ORDER BY x, y; |
132 | | * We may as well just sort by y. Note that because of opfamily matching, |
133 | | * this is semantically correct: we know that the equality constraint is one |
134 | | * that actually binds the variable to a single value in the terms of any |
135 | | * ordering operator that might go with the eclass. This rule not only lets |
136 | | * us simplify (or even skip) explicit sorts, but also allows matching index |
137 | | * sort orders to a query when there are don't-care index columns. |
138 | | * |
139 | | * 2. If the new pathkey's equivalence class is the same as that of any |
140 | | * existing member of the pathkey list, then it is redundant. Some examples: |
141 | | * SELECT ... ORDER BY x, x; |
142 | | * SELECT ... ORDER BY x, x DESC; |
143 | | * SELECT ... WHERE x = y ORDER BY x, y; |
144 | | * In all these cases the second sort key cannot distinguish values that are |
145 | | * considered equal by the first, and so there's no point in using it. |
146 | | * Note in particular that we need not compare opfamily (all the opfamilies |
147 | | * of the EC have the same notion of equality) nor sort direction. |
148 | | * |
149 | | * Both the given pathkey and the list members must be canonical for this |
150 | | * to work properly, but that's okay since we no longer ever construct any |
151 | | * non-canonical pathkeys. (Note: the notion of a pathkey *list* being |
152 | | * canonical includes the additional requirement of no redundant entries, |
153 | | * which is exactly what we are checking for here.) |
154 | | * |
155 | | * Because the equivclass.c machinery forms only one copy of any EC per query, |
156 | | * pointer comparison is enough to decide whether canonical ECs are the same. |
157 | | */ |
158 | | static bool |
159 | | pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys) |
160 | 0 | { |
161 | 0 | EquivalenceClass *new_ec = new_pathkey->pk_eclass; |
162 | 0 | ListCell *lc; |
163 | | |
164 | | /* Check for EC containing a constant --- unconditionally redundant */ |
165 | 0 | if (EC_MUST_BE_REDUNDANT(new_ec)) |
166 | 0 | return true; |
167 | | |
168 | | /* If same EC already used in list, then redundant */ |
169 | 0 | foreach(lc, pathkeys) |
170 | 0 | { |
171 | 0 | PathKey *old_pathkey = (PathKey *) lfirst(lc); |
172 | |
|
173 | 0 | if (new_ec == old_pathkey->pk_eclass) |
174 | 0 | return true; |
175 | 0 | } |
176 | | |
177 | 0 | return false; |
178 | 0 | } |
179 | | |
180 | | /* |
181 | | * make_pathkey_from_sortinfo |
182 | | * Given an expression and sort-order information, create a PathKey. |
183 | | * The result is always a "canonical" PathKey, but it might be redundant. |
184 | | * |
185 | | * If the PathKey is being generated from a SortGroupClause, sortref should be |
186 | | * the SortGroupClause's SortGroupRef; otherwise zero. |
187 | | * |
188 | | * If rel is not NULL, it identifies a specific relation we're considering |
189 | | * a path for, and indicates that child EC members for that relation can be |
190 | | * considered. Otherwise child members are ignored. (See the comments for |
191 | | * get_eclass_for_sort_expr.) |
192 | | * |
193 | | * create_it is true if we should create any missing EquivalenceClass |
194 | | * needed to represent the sort key. If it's false, we return NULL if the |
195 | | * sort key isn't already present in any EquivalenceClass. |
196 | | */ |
197 | | static PathKey * |
198 | | make_pathkey_from_sortinfo(PlannerInfo *root, |
199 | | Expr *expr, |
200 | | Oid opfamily, |
201 | | Oid opcintype, |
202 | | Oid collation, |
203 | | bool reverse_sort, |
204 | | bool nulls_first, |
205 | | Index sortref, |
206 | | Relids rel, |
207 | | bool create_it) |
208 | 0 | { |
209 | 0 | CompareType cmptype; |
210 | 0 | Oid equality_op; |
211 | 0 | List *opfamilies; |
212 | 0 | EquivalenceClass *eclass; |
213 | |
|
214 | 0 | cmptype = reverse_sort ? COMPARE_GT : COMPARE_LT; |
215 | | |
216 | | /* |
217 | | * EquivalenceClasses need to contain opfamily lists based on the family |
218 | | * membership of mergejoinable equality operators, which could belong to |
219 | | * more than one opfamily. So we have to look up the opfamily's equality |
220 | | * operator and get its membership. |
221 | | */ |
222 | 0 | equality_op = get_opfamily_member_for_cmptype(opfamily, |
223 | 0 | opcintype, |
224 | 0 | opcintype, |
225 | 0 | COMPARE_EQ); |
226 | 0 | if (!OidIsValid(equality_op)) /* shouldn't happen */ |
227 | 0 | elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", |
228 | 0 | COMPARE_EQ, opcintype, opcintype, opfamily); |
229 | 0 | opfamilies = get_mergejoin_opfamilies(equality_op); |
230 | 0 | if (!opfamilies) /* certainly should find some */ |
231 | 0 | elog(ERROR, "could not find opfamilies for equality operator %u", |
232 | 0 | equality_op); |
233 | | |
234 | | /* Now find or (optionally) create a matching EquivalenceClass */ |
235 | 0 | eclass = get_eclass_for_sort_expr(root, expr, |
236 | 0 | opfamilies, opcintype, collation, |
237 | 0 | sortref, rel, create_it); |
238 | | |
239 | | /* Fail if no EC and !create_it */ |
240 | 0 | if (!eclass) |
241 | 0 | return NULL; |
242 | | |
243 | | /* And finally we can find or create a PathKey node */ |
244 | 0 | return make_canonical_pathkey(root, eclass, opfamily, |
245 | 0 | cmptype, nulls_first); |
246 | 0 | } |
247 | | |
248 | | /* |
249 | | * make_pathkey_from_sortop |
250 | | * Like make_pathkey_from_sortinfo, but work from a sort operator. |
251 | | * |
252 | | * This should eventually go away, but we need to restructure SortGroupClause |
253 | | * first. |
254 | | */ |
255 | | static PathKey * |
256 | | make_pathkey_from_sortop(PlannerInfo *root, |
257 | | Expr *expr, |
258 | | Oid ordering_op, |
259 | | bool reverse_sort, |
260 | | bool nulls_first, |
261 | | Index sortref, |
262 | | bool create_it) |
263 | 0 | { |
264 | 0 | Oid opfamily, |
265 | 0 | opcintype, |
266 | 0 | collation; |
267 | 0 | CompareType cmptype; |
268 | | |
269 | | /* Find the operator in pg_amop --- failure shouldn't happen */ |
270 | 0 | if (!get_ordering_op_properties(ordering_op, |
271 | 0 | &opfamily, &opcintype, &cmptype)) |
272 | 0 | elog(ERROR, "operator %u is not a valid ordering operator", |
273 | 0 | ordering_op); |
274 | | |
275 | | /* Because SortGroupClause doesn't carry collation, consult the expr */ |
276 | 0 | collation = exprCollation((Node *) expr); |
277 | |
|
278 | 0 | return make_pathkey_from_sortinfo(root, |
279 | 0 | expr, |
280 | 0 | opfamily, |
281 | 0 | opcintype, |
282 | 0 | collation, |
283 | 0 | reverse_sort, |
284 | 0 | nulls_first, |
285 | 0 | sortref, |
286 | 0 | NULL, |
287 | 0 | create_it); |
288 | 0 | } |
289 | | |
290 | | |
291 | | /**************************************************************************** |
292 | | * PATHKEY COMPARISONS |
293 | | ****************************************************************************/ |
294 | | |
295 | | /* |
296 | | * compare_pathkeys |
297 | | * Compare two pathkeys to see if they are equivalent, and if not whether |
298 | | * one is "better" than the other. |
299 | | * |
300 | | * We assume the pathkeys are canonical, and so they can be checked for |
301 | | * equality by simple pointer comparison. |
302 | | */ |
303 | | PathKeysComparison |
304 | | compare_pathkeys(List *keys1, List *keys2) |
305 | 0 | { |
306 | 0 | ListCell *key1, |
307 | 0 | *key2; |
308 | | |
309 | | /* |
310 | | * Fall out quickly if we are passed two identical lists. This mostly |
311 | | * catches the case where both are NIL, but that's common enough to |
312 | | * warrant the test. |
313 | | */ |
314 | 0 | if (keys1 == keys2) |
315 | 0 | return PATHKEYS_EQUAL; |
316 | | |
317 | 0 | forboth(key1, keys1, key2, keys2) |
318 | 0 | { |
319 | 0 | PathKey *pathkey1 = (PathKey *) lfirst(key1); |
320 | 0 | PathKey *pathkey2 = (PathKey *) lfirst(key2); |
321 | |
|
322 | 0 | if (pathkey1 != pathkey2) |
323 | 0 | return PATHKEYS_DIFFERENT; /* no need to keep looking */ |
324 | 0 | } |
325 | | |
326 | | /* |
327 | | * If we reached the end of only one list, the other is longer and |
328 | | * therefore not a subset. |
329 | | */ |
330 | 0 | if (key1 != NULL) |
331 | 0 | return PATHKEYS_BETTER1; /* key1 is longer */ |
332 | 0 | if (key2 != NULL) |
333 | 0 | return PATHKEYS_BETTER2; /* key2 is longer */ |
334 | 0 | return PATHKEYS_EQUAL; |
335 | 0 | } |
336 | | |
337 | | /* |
338 | | * pathkeys_contained_in |
339 | | * Common special case of compare_pathkeys: we just want to know |
340 | | * if keys2 are at least as well sorted as keys1. |
341 | | */ |
342 | | bool |
343 | | pathkeys_contained_in(List *keys1, List *keys2) |
344 | 0 | { |
345 | 0 | switch (compare_pathkeys(keys1, keys2)) |
346 | 0 | { |
347 | 0 | case PATHKEYS_EQUAL: |
348 | 0 | case PATHKEYS_BETTER2: |
349 | 0 | return true; |
350 | 0 | default: |
351 | 0 | break; |
352 | 0 | } |
353 | 0 | return false; |
354 | 0 | } |
355 | | |
356 | | /* |
357 | | * group_keys_reorder_by_pathkeys |
358 | | * Reorder GROUP BY pathkeys and clauses to match the input pathkeys. |
359 | | * |
360 | | * 'pathkeys' is an input list of pathkeys |
361 | | * '*group_pathkeys' and '*group_clauses' are pathkeys and clauses lists to |
362 | | * reorder. The pointers are redirected to new lists, original lists |
363 | | * stay untouched. |
364 | | * 'num_groupby_pathkeys' is the number of first '*group_pathkeys' items to |
365 | | * search matching pathkeys. |
366 | | * |
367 | | * Returns the number of GROUP BY keys with a matching pathkey. |
368 | | */ |
369 | | static int |
370 | | group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, |
371 | | List **group_clauses, |
372 | | int num_groupby_pathkeys) |
373 | 0 | { |
374 | 0 | List *new_group_pathkeys = NIL, |
375 | 0 | *new_group_clauses = NIL; |
376 | 0 | List *grouping_pathkeys; |
377 | 0 | ListCell *lc; |
378 | 0 | int n; |
379 | |
|
380 | 0 | if (pathkeys == NIL || *group_pathkeys == NIL) |
381 | 0 | return 0; |
382 | | |
383 | | /* |
384 | | * We're going to search within just the first num_groupby_pathkeys of |
385 | | * *group_pathkeys. The thing is that root->group_pathkeys is passed as |
386 | | * *group_pathkeys containing grouping pathkeys altogether with aggregate |
387 | | * pathkeys. If we process aggregate pathkeys we could get an invalid |
388 | | * result of get_sortgroupref_clause_noerr(), because their |
389 | | * pathkey->pk_eclass->ec_sortref doesn't reference query targetlist. So, |
390 | | * we allocate a separate list of pathkeys for lookups. |
391 | | */ |
392 | 0 | grouping_pathkeys = list_copy_head(*group_pathkeys, num_groupby_pathkeys); |
393 | | |
394 | | /* |
395 | | * Walk the pathkeys (determining ordering of the input path) and see if |
396 | | * there's a matching GROUP BY key. If we find one, we append it to the |
397 | | * list, and do the same for the clauses. |
398 | | * |
399 | | * Once we find the first pathkey without a matching GROUP BY key, the |
400 | | * rest of the pathkeys are useless and can't be used to evaluate the |
401 | | * grouping, so we abort the loop and ignore the remaining pathkeys. |
402 | | */ |
403 | 0 | foreach(lc, pathkeys) |
404 | 0 | { |
405 | 0 | PathKey *pathkey = (PathKey *) lfirst(lc); |
406 | 0 | SortGroupClause *sgc; |
407 | | |
408 | | /* |
409 | | * Pathkeys are built in a way that allows simply comparing pointers. |
410 | | * Give up if we can't find the matching pointer. Also give up if |
411 | | * there is no sortclause reference for some reason. |
412 | | */ |
413 | 0 | if (foreach_current_index(lc) >= num_groupby_pathkeys || |
414 | 0 | !list_member_ptr(grouping_pathkeys, pathkey) || |
415 | 0 | pathkey->pk_eclass->ec_sortref == 0) |
416 | 0 | break; |
417 | | |
418 | | /* |
419 | | * Since 1349d27 pathkey coming from underlying node can be in the |
420 | | * root->group_pathkeys but not in the processed_groupClause. So, we |
421 | | * should be careful here. |
422 | | */ |
423 | 0 | sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref, |
424 | 0 | *group_clauses); |
425 | 0 | if (!sgc) |
426 | | /* The grouping clause does not cover this pathkey */ |
427 | 0 | break; |
428 | | |
429 | | /* |
430 | | * Sort group clause should have an ordering operator as long as there |
431 | | * is an associated pathkey. |
432 | | */ |
433 | 0 | Assert(OidIsValid(sgc->sortop)); |
434 | |
|
435 | 0 | new_group_pathkeys = lappend(new_group_pathkeys, pathkey); |
436 | 0 | new_group_clauses = lappend(new_group_clauses, sgc); |
437 | 0 | } |
438 | | |
439 | | /* remember the number of pathkeys with a matching GROUP BY key */ |
440 | 0 | n = list_length(new_group_pathkeys); |
441 | | |
442 | | /* append the remaining group pathkeys (will be treated as not sorted) */ |
443 | 0 | *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys, |
444 | 0 | *group_pathkeys); |
445 | 0 | *group_clauses = list_concat_unique_ptr(new_group_clauses, |
446 | 0 | *group_clauses); |
447 | |
|
448 | 0 | list_free(grouping_pathkeys); |
449 | 0 | return n; |
450 | 0 | } |
451 | | |
452 | | /* |
453 | | * get_useful_group_keys_orderings |
454 | | * Determine which orderings of GROUP BY keys are potentially interesting. |
455 | | * |
456 | | * Returns a list of GroupByOrdering items, each representing an interesting |
457 | | * ordering of GROUP BY keys. Each item stores pathkeys and clauses in the |
458 | | * matching order. |
459 | | * |
460 | | * The function considers (and keeps) following GROUP BY orderings: |
461 | | * |
462 | | * - GROUP BY keys as ordered by preprocess_groupclause() to match target |
463 | | * ORDER BY clause (as much as possible), |
464 | | * - GROUP BY keys reordered to match 'path' ordering (as much as possible). |
465 | | */ |
466 | | List * |
467 | | get_useful_group_keys_orderings(PlannerInfo *root, Path *path) |
468 | 0 | { |
469 | 0 | Query *parse = root->parse; |
470 | 0 | List *infos = NIL; |
471 | 0 | GroupByOrdering *info; |
472 | |
|
473 | 0 | List *pathkeys = root->group_pathkeys; |
474 | 0 | List *clauses = root->processed_groupClause; |
475 | | |
476 | | /* always return at least the original pathkeys/clauses */ |
477 | 0 | info = makeNode(GroupByOrdering); |
478 | 0 | info->pathkeys = pathkeys; |
479 | 0 | info->clauses = clauses; |
480 | 0 | infos = lappend(infos, info); |
481 | | |
482 | | /* |
483 | | * Should we try generating alternative orderings of the group keys? If |
484 | | * not, we produce only the order specified in the query, i.e. the |
485 | | * optimization is effectively disabled. |
486 | | */ |
487 | 0 | if (!enable_group_by_reordering) |
488 | 0 | return infos; |
489 | | |
490 | | /* |
491 | | * Grouping sets have own and more complex logic to decide the ordering. |
492 | | */ |
493 | 0 | if (parse->groupingSets) |
494 | 0 | return infos; |
495 | | |
496 | | /* |
497 | | * If the path is sorted in some way, try reordering the group keys to |
498 | | * match the path as much of the ordering as possible. Then thanks to |
499 | | * incremental sort we would get this sort as cheap as possible. |
500 | | */ |
501 | 0 | if (path->pathkeys && |
502 | 0 | !pathkeys_contained_in(path->pathkeys, root->group_pathkeys)) |
503 | 0 | { |
504 | 0 | int n; |
505 | |
|
506 | 0 | n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses, |
507 | 0 | root->num_groupby_pathkeys); |
508 | |
|
509 | 0 | if (n > 0 && |
510 | 0 | (enable_incremental_sort || n == root->num_groupby_pathkeys) && |
511 | 0 | compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL) |
512 | 0 | { |
513 | 0 | info = makeNode(GroupByOrdering); |
514 | 0 | info->pathkeys = pathkeys; |
515 | 0 | info->clauses = clauses; |
516 | |
|
517 | 0 | infos = lappend(infos, info); |
518 | 0 | } |
519 | 0 | } |
520 | |
|
521 | | #ifdef USE_ASSERT_CHECKING |
522 | | { |
523 | | GroupByOrdering *pinfo = linitial_node(GroupByOrdering, infos); |
524 | | ListCell *lc; |
525 | | |
526 | | /* Test consistency of info structures */ |
527 | | for_each_from(lc, infos, 1) |
528 | | { |
529 | | ListCell *lc1, |
530 | | *lc2; |
531 | | |
532 | | info = lfirst_node(GroupByOrdering, lc); |
533 | | |
534 | | Assert(list_length(info->clauses) == list_length(pinfo->clauses)); |
535 | | Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys)); |
536 | | Assert(list_difference(info->clauses, pinfo->clauses) == NIL); |
537 | | Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL); |
538 | | |
539 | | forboth(lc1, info->clauses, lc2, info->pathkeys) |
540 | | { |
541 | | SortGroupClause *sgc = lfirst_node(SortGroupClause, lc1); |
542 | | PathKey *pk = lfirst_node(PathKey, lc2); |
543 | | |
544 | | Assert(pk->pk_eclass->ec_sortref == sgc->tleSortGroupRef); |
545 | | } |
546 | | } |
547 | | } |
548 | | #endif |
549 | 0 | return infos; |
550 | 0 | } |
551 | | |
552 | | /* |
553 | | * pathkeys_count_contained_in |
554 | | * Same as pathkeys_contained_in, but also sets length of longest |
555 | | * common prefix of keys1 and keys2. |
556 | | */ |
557 | | bool |
558 | | pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common) |
559 | 0 | { |
560 | 0 | int n = 0; |
561 | 0 | ListCell *key1, |
562 | 0 | *key2; |
563 | | |
564 | | /* |
565 | | * See if we can avoiding looping through both lists. This optimization |
566 | | * gains us several percent in planning time in a worst-case test. |
567 | | */ |
568 | 0 | if (keys1 == keys2) |
569 | 0 | { |
570 | 0 | *n_common = list_length(keys1); |
571 | 0 | return true; |
572 | 0 | } |
573 | 0 | else if (keys1 == NIL) |
574 | 0 | { |
575 | 0 | *n_common = 0; |
576 | 0 | return true; |
577 | 0 | } |
578 | 0 | else if (keys2 == NIL) |
579 | 0 | { |
580 | 0 | *n_common = 0; |
581 | 0 | return false; |
582 | 0 | } |
583 | | |
584 | | /* |
585 | | * If both lists are non-empty, iterate through both to find out how many |
586 | | * items are shared. |
587 | | */ |
588 | 0 | forboth(key1, keys1, key2, keys2) |
589 | 0 | { |
590 | 0 | PathKey *pathkey1 = (PathKey *) lfirst(key1); |
591 | 0 | PathKey *pathkey2 = (PathKey *) lfirst(key2); |
592 | |
|
593 | 0 | if (pathkey1 != pathkey2) |
594 | 0 | { |
595 | 0 | *n_common = n; |
596 | 0 | return false; |
597 | 0 | } |
598 | 0 | n++; |
599 | 0 | } |
600 | | |
601 | | /* If we ended with a null value, then we've processed the whole list. */ |
602 | 0 | *n_common = n; |
603 | 0 | return (key1 == NULL); |
604 | 0 | } |
605 | | |
606 | | /* |
607 | | * get_cheapest_path_for_pathkeys |
608 | | * Find the cheapest path (according to the specified criterion) that |
609 | | * satisfies the given pathkeys and parameterization, and is parallel-safe |
610 | | * if required. |
611 | | * Return NULL if no such path. |
612 | | * |
613 | | * 'paths' is a list of possible paths that all generate the same relation |
614 | | * 'pathkeys' represents a required ordering (in canonical form!) |
615 | | * 'required_outer' denotes allowable outer relations for parameterized paths |
616 | | * 'cost_criterion' is STARTUP_COST or TOTAL_COST |
617 | | * 'require_parallel_safe' causes us to consider only parallel-safe paths |
618 | | */ |
619 | | Path * |
620 | | get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, |
621 | | Relids required_outer, |
622 | | CostSelector cost_criterion, |
623 | | bool require_parallel_safe) |
624 | 0 | { |
625 | 0 | Path *matched_path = NULL; |
626 | 0 | ListCell *l; |
627 | |
|
628 | 0 | foreach(l, paths) |
629 | 0 | { |
630 | 0 | Path *path = (Path *) lfirst(l); |
631 | | |
632 | | /* If required, reject paths that are not parallel-safe */ |
633 | 0 | if (require_parallel_safe && !path->parallel_safe) |
634 | 0 | continue; |
635 | | |
636 | | /* |
637 | | * Since cost comparison is a lot cheaper than pathkey comparison, do |
638 | | * that first. (XXX is that still true?) |
639 | | */ |
640 | 0 | if (matched_path != NULL && |
641 | 0 | compare_path_costs(matched_path, path, cost_criterion) <= 0) |
642 | 0 | continue; |
643 | | |
644 | 0 | if (pathkeys_contained_in(pathkeys, path->pathkeys) && |
645 | 0 | bms_is_subset(PATH_REQ_OUTER(path), required_outer)) |
646 | 0 | matched_path = path; |
647 | 0 | } |
648 | 0 | return matched_path; |
649 | 0 | } |
650 | | |
651 | | /* |
652 | | * get_cheapest_fractional_path_for_pathkeys |
653 | | * Find the cheapest path (for retrieving a specified fraction of all |
654 | | * the tuples) that satisfies the given pathkeys and parameterization. |
655 | | * Return NULL if no such path. |
656 | | * |
657 | | * See compare_fractional_path_costs() for the interpretation of the fraction |
658 | | * parameter. |
659 | | * |
660 | | * 'paths' is a list of possible paths that all generate the same relation |
661 | | * 'pathkeys' represents a required ordering (in canonical form!) |
662 | | * 'required_outer' denotes allowable outer relations for parameterized paths |
663 | | * 'fraction' is the fraction of the total tuples expected to be retrieved |
664 | | */ |
665 | | Path * |
666 | | get_cheapest_fractional_path_for_pathkeys(List *paths, |
667 | | List *pathkeys, |
668 | | Relids required_outer, |
669 | | double fraction) |
670 | 0 | { |
671 | 0 | Path *matched_path = NULL; |
672 | 0 | ListCell *l; |
673 | |
|
674 | 0 | foreach(l, paths) |
675 | 0 | { |
676 | 0 | Path *path = (Path *) lfirst(l); |
677 | | |
678 | | /* |
679 | | * Since cost comparison is a lot cheaper than pathkey comparison, do |
680 | | * that first. (XXX is that still true?) |
681 | | */ |
682 | 0 | if (matched_path != NULL && |
683 | 0 | compare_fractional_path_costs(matched_path, path, fraction) <= 0) |
684 | 0 | continue; |
685 | | |
686 | 0 | if (pathkeys_contained_in(pathkeys, path->pathkeys) && |
687 | 0 | bms_is_subset(PATH_REQ_OUTER(path), required_outer)) |
688 | 0 | matched_path = path; |
689 | 0 | } |
690 | 0 | return matched_path; |
691 | 0 | } |
692 | | |
693 | | |
694 | | /* |
695 | | * get_cheapest_parallel_safe_total_inner |
696 | | * Find the unparameterized parallel-safe path with the least total cost. |
697 | | */ |
698 | | Path * |
699 | | get_cheapest_parallel_safe_total_inner(List *paths) |
700 | 0 | { |
701 | 0 | ListCell *l; |
702 | |
|
703 | 0 | foreach(l, paths) |
704 | 0 | { |
705 | 0 | Path *innerpath = (Path *) lfirst(l); |
706 | |
|
707 | 0 | if (innerpath->parallel_safe && |
708 | 0 | bms_is_empty(PATH_REQ_OUTER(innerpath))) |
709 | 0 | return innerpath; |
710 | 0 | } |
711 | | |
712 | 0 | return NULL; |
713 | 0 | } |
714 | | |
715 | | /**************************************************************************** |
716 | | * NEW PATHKEY FORMATION |
717 | | ****************************************************************************/ |
718 | | |
719 | | /* |
720 | | * build_index_pathkeys |
721 | | * Build a pathkeys list that describes the ordering induced by an index |
722 | | * scan using the given index. (Note that an unordered index doesn't |
723 | | * induce any ordering, so we return NIL.) |
724 | | * |
725 | | * If 'scandir' is BackwardScanDirection, build pathkeys representing a |
726 | | * backwards scan of the index. |
727 | | * |
728 | | * We iterate only key columns of covering indexes, since non-key columns |
729 | | * don't influence index ordering. The result is canonical, meaning that |
730 | | * redundant pathkeys are removed; it may therefore have fewer entries than |
731 | | * there are key columns in the index. |
732 | | * |
733 | | * Another reason for stopping early is that we may be able to tell that |
734 | | * an index column's sort order is uninteresting for this query. However, |
735 | | * that test is just based on the existence of an EquivalenceClass and not |
736 | | * on position in pathkey lists, so it's not complete. Caller should call |
737 | | * truncate_useless_pathkeys() to possibly remove more pathkeys. |
738 | | */ |
739 | | List * |
740 | | build_index_pathkeys(PlannerInfo *root, |
741 | | IndexOptInfo *index, |
742 | | ScanDirection scandir) |
743 | 0 | { |
744 | 0 | List *retval = NIL; |
745 | 0 | ListCell *lc; |
746 | 0 | int i; |
747 | |
|
748 | 0 | if (index->sortopfamily == NULL) |
749 | 0 | return NIL; /* non-orderable index */ |
750 | | |
751 | 0 | i = 0; |
752 | 0 | foreach(lc, index->indextlist) |
753 | 0 | { |
754 | 0 | TargetEntry *indextle = (TargetEntry *) lfirst(lc); |
755 | 0 | Expr *indexkey; |
756 | 0 | bool reverse_sort; |
757 | 0 | bool nulls_first; |
758 | 0 | PathKey *cpathkey; |
759 | | |
760 | | /* |
761 | | * INCLUDE columns are stored in index unordered, so they don't |
762 | | * support ordered index scan. |
763 | | */ |
764 | 0 | if (i >= index->nkeycolumns) |
765 | 0 | break; |
766 | | |
767 | | /* We assume we don't need to make a copy of the tlist item */ |
768 | 0 | indexkey = indextle->expr; |
769 | |
|
770 | 0 | if (ScanDirectionIsBackward(scandir)) |
771 | 0 | { |
772 | 0 | reverse_sort = !index->reverse_sort[i]; |
773 | 0 | nulls_first = !index->nulls_first[i]; |
774 | 0 | } |
775 | 0 | else |
776 | 0 | { |
777 | 0 | reverse_sort = index->reverse_sort[i]; |
778 | 0 | nulls_first = index->nulls_first[i]; |
779 | 0 | } |
780 | | |
781 | | /* |
782 | | * OK, try to make a canonical pathkey for this sort key. |
783 | | */ |
784 | 0 | cpathkey = make_pathkey_from_sortinfo(root, |
785 | 0 | indexkey, |
786 | 0 | index->sortopfamily[i], |
787 | 0 | index->opcintype[i], |
788 | 0 | index->indexcollations[i], |
789 | 0 | reverse_sort, |
790 | 0 | nulls_first, |
791 | 0 | 0, |
792 | 0 | index->rel->relids, |
793 | 0 | false); |
794 | |
|
795 | 0 | if (cpathkey) |
796 | 0 | { |
797 | | /* |
798 | | * We found the sort key in an EquivalenceClass, so it's relevant |
799 | | * for this query. Add it to list, unless it's redundant. |
800 | | */ |
801 | 0 | if (!pathkey_is_redundant(cpathkey, retval)) |
802 | 0 | retval = lappend(retval, cpathkey); |
803 | 0 | } |
804 | 0 | else |
805 | 0 | { |
806 | | /* |
807 | | * Boolean index keys might be redundant even if they do not |
808 | | * appear in an EquivalenceClass, because of our special treatment |
809 | | * of boolean equality conditions --- see the comment for |
810 | | * indexcol_is_bool_constant_for_query(). If that applies, we can |
811 | | * continue to examine lower-order index columns. Otherwise, the |
812 | | * sort key is not an interesting sort order for this query, so we |
813 | | * should stop considering index columns; any lower-order sort |
814 | | * keys won't be useful either. |
815 | | */ |
816 | 0 | if (!indexcol_is_bool_constant_for_query(root, index, i)) |
817 | 0 | break; |
818 | 0 | } |
819 | | |
820 | 0 | i++; |
821 | 0 | } |
822 | |
|
823 | 0 | return retval; |
824 | 0 | } |
825 | | |
826 | | /* |
827 | | * partkey_is_bool_constant_for_query |
828 | | * |
829 | | * If a partition key column is constrained to have a constant value by the |
830 | | * query's WHERE conditions, then it's irrelevant for sort-order |
831 | | * considerations. Usually that means we have a restriction clause |
832 | | * WHERE partkeycol = constant, which gets turned into an EquivalenceClass |
833 | | * containing a constant, which is recognized as redundant by |
834 | | * build_partition_pathkeys(). But if the partition key column is a |
835 | | * boolean variable (or expression), then we are not going to see such a |
836 | | * WHERE clause, because expression preprocessing will have simplified it |
837 | | * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going |
838 | | * to have a matching EquivalenceClass (unless the query also contains |
839 | | * "ORDER BY partkeycol"). To allow such cases to work the same as they would |
840 | | * for non-boolean values, this function is provided to detect whether the |
841 | | * specified partition key column matches a boolean restriction clause. |
842 | | */ |
843 | | static bool |
844 | | partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol) |
845 | 0 | { |
846 | 0 | PartitionScheme partscheme = partrel->part_scheme; |
847 | 0 | ListCell *lc; |
848 | | |
849 | | /* |
850 | | * If the partkey isn't boolean, we can't possibly get a match. |
851 | | * |
852 | | * Partitioning currently can only use built-in AMs, so checking for |
853 | | * built-in boolean opfamilies is good enough. |
854 | | */ |
855 | 0 | if (!IsBuiltinBooleanOpfamily(partscheme->partopfamily[partkeycol])) |
856 | 0 | return false; |
857 | | |
858 | | /* Check each restriction clause for the partitioned rel */ |
859 | 0 | foreach(lc, partrel->baserestrictinfo) |
860 | 0 | { |
861 | 0 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); |
862 | | |
863 | | /* Ignore pseudoconstant quals, they won't match */ |
864 | 0 | if (rinfo->pseudoconstant) |
865 | 0 | continue; |
866 | | |
867 | | /* See if we can match the clause's expression to the partkey column */ |
868 | 0 | if (matches_boolean_partition_clause(rinfo, partrel, partkeycol)) |
869 | 0 | return true; |
870 | 0 | } |
871 | | |
872 | 0 | return false; |
873 | 0 | } |
874 | | |
875 | | /* |
876 | | * matches_boolean_partition_clause |
877 | | * Determine if the boolean clause described by rinfo matches |
878 | | * partrel's partkeycol-th partition key column. |
879 | | * |
880 | | * "Matches" can be either an exact match (equivalent to partkey = true), |
881 | | * or a NOT above an exact match (equivalent to partkey = false). |
882 | | */ |
883 | | static bool |
884 | | matches_boolean_partition_clause(RestrictInfo *rinfo, |
885 | | RelOptInfo *partrel, int partkeycol) |
886 | 0 | { |
887 | 0 | Node *clause = (Node *) rinfo->clause; |
888 | 0 | Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]); |
889 | | |
890 | | /* Direct match? */ |
891 | 0 | if (equal(partexpr, clause)) |
892 | 0 | return true; |
893 | | /* NOT clause? */ |
894 | 0 | else if (is_notclause(clause)) |
895 | 0 | { |
896 | 0 | Node *arg = (Node *) get_notclausearg((Expr *) clause); |
897 | |
|
898 | 0 | if (equal(partexpr, arg)) |
899 | 0 | return true; |
900 | 0 | } |
901 | | |
902 | 0 | return false; |
903 | 0 | } |
904 | | |
905 | | /* |
906 | | * build_partition_pathkeys |
907 | | * Build a pathkeys list that describes the ordering induced by the |
908 | | * partitions of partrel, under either forward or backward scan |
909 | | * as per scandir. |
910 | | * |
911 | | * Caller must have checked that the partitions are properly ordered, |
912 | | * as detected by partitions_are_ordered(). |
913 | | * |
914 | | * Sets *partialkeys to true if pathkeys were only built for a prefix of the |
915 | | * partition key, or false if the pathkeys include all columns of the |
916 | | * partition key. |
917 | | */ |
918 | | List * |
919 | | build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, |
920 | | ScanDirection scandir, bool *partialkeys) |
921 | 0 | { |
922 | 0 | List *retval = NIL; |
923 | 0 | PartitionScheme partscheme = partrel->part_scheme; |
924 | 0 | int i; |
925 | |
|
926 | 0 | Assert(partscheme != NULL); |
927 | 0 | Assert(partitions_are_ordered(partrel->boundinfo, partrel->live_parts)); |
928 | | /* For now, we can only cope with baserels */ |
929 | 0 | Assert(IS_SIMPLE_REL(partrel)); |
930 | |
|
931 | 0 | for (i = 0; i < partscheme->partnatts; i++) |
932 | 0 | { |
933 | 0 | PathKey *cpathkey; |
934 | 0 | Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]); |
935 | | |
936 | | /* |
937 | | * Try to make a canonical pathkey for this partkey. |
938 | | * |
939 | | * We assume the PartitionDesc lists any NULL partition last, so we |
940 | | * treat the scan like a NULLS LAST index: we have nulls_first for |
941 | | * backwards scan only. |
942 | | */ |
943 | 0 | cpathkey = make_pathkey_from_sortinfo(root, |
944 | 0 | keyCol, |
945 | 0 | partscheme->partopfamily[i], |
946 | 0 | partscheme->partopcintype[i], |
947 | 0 | partscheme->partcollation[i], |
948 | 0 | ScanDirectionIsBackward(scandir), |
949 | 0 | ScanDirectionIsBackward(scandir), |
950 | 0 | 0, |
951 | 0 | partrel->relids, |
952 | 0 | false); |
953 | | |
954 | |
|
955 | 0 | if (cpathkey) |
956 | 0 | { |
957 | | /* |
958 | | * We found the sort key in an EquivalenceClass, so it's relevant |
959 | | * for this query. Add it to list, unless it's redundant. |
960 | | */ |
961 | 0 | if (!pathkey_is_redundant(cpathkey, retval)) |
962 | 0 | retval = lappend(retval, cpathkey); |
963 | 0 | } |
964 | 0 | else |
965 | 0 | { |
966 | | /* |
967 | | * Boolean partition keys might be redundant even if they do not |
968 | | * appear in an EquivalenceClass, because of our special treatment |
969 | | * of boolean equality conditions --- see the comment for |
970 | | * partkey_is_bool_constant_for_query(). If that applies, we can |
971 | | * continue to examine lower-order partition keys. Otherwise, the |
972 | | * sort key is not an interesting sort order for this query, so we |
973 | | * should stop considering partition columns; any lower-order sort |
974 | | * keys won't be useful either. |
975 | | */ |
976 | 0 | if (!partkey_is_bool_constant_for_query(partrel, i)) |
977 | 0 | { |
978 | 0 | *partialkeys = true; |
979 | 0 | return retval; |
980 | 0 | } |
981 | 0 | } |
982 | 0 | } |
983 | | |
984 | 0 | *partialkeys = false; |
985 | 0 | return retval; |
986 | 0 | } |
987 | | |
988 | | /* |
989 | | * build_expression_pathkey |
990 | | * Build a pathkeys list that describes an ordering by a single expression |
991 | | * using the given sort operator. |
992 | | * |
993 | | * expr and rel are as for make_pathkey_from_sortinfo. |
994 | | * We induce the other arguments assuming default sort order for the operator. |
995 | | * |
996 | | * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it |
997 | | * is false and the expression isn't already in some EquivalenceClass. |
998 | | */ |
999 | | List * |
1000 | | build_expression_pathkey(PlannerInfo *root, |
1001 | | Expr *expr, |
1002 | | Oid opno, |
1003 | | Relids rel, |
1004 | | bool create_it) |
1005 | 0 | { |
1006 | 0 | List *pathkeys; |
1007 | 0 | Oid opfamily, |
1008 | 0 | opcintype; |
1009 | 0 | CompareType cmptype; |
1010 | 0 | PathKey *cpathkey; |
1011 | | |
1012 | | /* Find the operator in pg_amop --- failure shouldn't happen */ |
1013 | 0 | if (!get_ordering_op_properties(opno, |
1014 | 0 | &opfamily, &opcintype, &cmptype)) |
1015 | 0 | elog(ERROR, "operator %u is not a valid ordering operator", |
1016 | 0 | opno); |
1017 | | |
1018 | 0 | cpathkey = make_pathkey_from_sortinfo(root, |
1019 | 0 | expr, |
1020 | 0 | opfamily, |
1021 | 0 | opcintype, |
1022 | 0 | exprCollation((Node *) expr), |
1023 | 0 | (cmptype == COMPARE_GT), |
1024 | 0 | (cmptype == COMPARE_GT), |
1025 | 0 | 0, |
1026 | 0 | rel, |
1027 | 0 | create_it); |
1028 | |
|
1029 | 0 | if (cpathkey) |
1030 | 0 | pathkeys = list_make1(cpathkey); |
1031 | 0 | else |
1032 | 0 | pathkeys = NIL; |
1033 | |
|
1034 | 0 | return pathkeys; |
1035 | 0 | } |
1036 | | |
1037 | | /* |
1038 | | * convert_subquery_pathkeys |
1039 | | * Build a pathkeys list that describes the ordering of a subquery's |
1040 | | * result, in the terms of the outer query. This is essentially a |
1041 | | * task of conversion. |
1042 | | * |
1043 | | * 'rel': outer query's RelOptInfo for the subquery relation. |
1044 | | * 'subquery_pathkeys': the subquery's output pathkeys, in its terms. |
1045 | | * 'subquery_tlist': the subquery's output targetlist, in its terms. |
1046 | | * |
1047 | | * We intentionally don't do truncate_useless_pathkeys() here, because there |
1048 | | * are situations where seeing the raw ordering of the subquery is helpful. |
1049 | | * For example, if it returns ORDER BY x DESC, that may prompt us to |
1050 | | * construct a mergejoin using DESC order rather than ASC order; but the |
1051 | | * right_merge_direction heuristic would have us throw the knowledge away. |
1052 | | */ |
1053 | | List * |
1054 | | convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, |
1055 | | List *subquery_pathkeys, |
1056 | | List *subquery_tlist) |
1057 | 0 | { |
1058 | 0 | List *retval = NIL; |
1059 | 0 | int retvallen = 0; |
1060 | 0 | int outer_query_keys = list_length(root->query_pathkeys); |
1061 | 0 | ListCell *i; |
1062 | |
|
1063 | 0 | foreach(i, subquery_pathkeys) |
1064 | 0 | { |
1065 | 0 | PathKey *sub_pathkey = (PathKey *) lfirst(i); |
1066 | 0 | EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass; |
1067 | 0 | PathKey *best_pathkey = NULL; |
1068 | |
|
1069 | 0 | if (sub_eclass->ec_has_volatile) |
1070 | 0 | { |
1071 | | /* |
1072 | | * If the sub_pathkey's EquivalenceClass is volatile, then it must |
1073 | | * have come from an ORDER BY clause, and we have to match it to |
1074 | | * that same targetlist entry. |
1075 | | */ |
1076 | 0 | TargetEntry *tle; |
1077 | 0 | Var *outer_var; |
1078 | |
|
1079 | 0 | if (sub_eclass->ec_sortref == 0) /* can't happen */ |
1080 | 0 | elog(ERROR, "volatile EquivalenceClass has no sortref"); |
1081 | 0 | tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist); |
1082 | 0 | Assert(tle); |
1083 | | /* Is TLE actually available to the outer query? */ |
1084 | 0 | outer_var = find_var_for_subquery_tle(rel, tle); |
1085 | 0 | if (outer_var) |
1086 | 0 | { |
1087 | | /* We can represent this sub_pathkey */ |
1088 | 0 | EquivalenceMember *sub_member; |
1089 | 0 | EquivalenceClass *outer_ec; |
1090 | |
|
1091 | 0 | Assert(list_length(sub_eclass->ec_members) == 1); |
1092 | 0 | sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members); |
1093 | | |
1094 | | /* |
1095 | | * Note: it might look funny to be setting sortref = 0 for a |
1096 | | * reference to a volatile sub_eclass. However, the |
1097 | | * expression is *not* volatile in the outer query: it's just |
1098 | | * a Var referencing whatever the subquery emitted. (IOW, the |
1099 | | * outer query isn't going to re-execute the volatile |
1100 | | * expression itself.) So this is okay. |
1101 | | */ |
1102 | 0 | outer_ec = |
1103 | 0 | get_eclass_for_sort_expr(root, |
1104 | 0 | (Expr *) outer_var, |
1105 | 0 | sub_eclass->ec_opfamilies, |
1106 | 0 | sub_member->em_datatype, |
1107 | 0 | sub_eclass->ec_collation, |
1108 | 0 | 0, |
1109 | 0 | rel->relids, |
1110 | 0 | false); |
1111 | | |
1112 | | /* |
1113 | | * If we don't find a matching EC, sub-pathkey isn't |
1114 | | * interesting to the outer query |
1115 | | */ |
1116 | 0 | if (outer_ec) |
1117 | 0 | best_pathkey = |
1118 | 0 | make_canonical_pathkey(root, |
1119 | 0 | outer_ec, |
1120 | 0 | sub_pathkey->pk_opfamily, |
1121 | 0 | sub_pathkey->pk_cmptype, |
1122 | 0 | sub_pathkey->pk_nulls_first); |
1123 | 0 | } |
1124 | 0 | } |
1125 | 0 | else |
1126 | 0 | { |
1127 | | /* |
1128 | | * Otherwise, the sub_pathkey's EquivalenceClass could contain |
1129 | | * multiple elements (representing knowledge that multiple items |
1130 | | * are effectively equal). Each element might match none, one, or |
1131 | | * more of the output columns that are visible to the outer query. |
1132 | | * This means we may have multiple possible representations of the |
1133 | | * sub_pathkey in the context of the outer query. Ideally we |
1134 | | * would generate them all and put them all into an EC of the |
1135 | | * outer query, thereby propagating equality knowledge up to the |
1136 | | * outer query. Right now we cannot do so, because the outer |
1137 | | * query's EquivalenceClasses are already frozen when this is |
1138 | | * called. Instead we prefer the one that has the highest "score" |
1139 | | * (number of EC peers, plus one if it matches the outer |
1140 | | * query_pathkeys). This is the most likely to be useful in the |
1141 | | * outer query. |
1142 | | */ |
1143 | 0 | int best_score = -1; |
1144 | 0 | ListCell *j; |
1145 | | |
1146 | | /* Ignore children here */ |
1147 | 0 | foreach(j, sub_eclass->ec_members) |
1148 | 0 | { |
1149 | 0 | EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j); |
1150 | 0 | Expr *sub_expr = sub_member->em_expr; |
1151 | 0 | Oid sub_expr_type = sub_member->em_datatype; |
1152 | 0 | Oid sub_expr_coll = sub_eclass->ec_collation; |
1153 | 0 | ListCell *k; |
1154 | | |
1155 | | /* Child members should not exist in ec_members */ |
1156 | 0 | Assert(!sub_member->em_is_child); |
1157 | |
|
1158 | 0 | foreach(k, subquery_tlist) |
1159 | 0 | { |
1160 | 0 | TargetEntry *tle = (TargetEntry *) lfirst(k); |
1161 | 0 | Var *outer_var; |
1162 | 0 | Expr *tle_expr; |
1163 | 0 | EquivalenceClass *outer_ec; |
1164 | 0 | PathKey *outer_pk; |
1165 | 0 | int score; |
1166 | | |
1167 | | /* Is TLE actually available to the outer query? */ |
1168 | 0 | outer_var = find_var_for_subquery_tle(rel, tle); |
1169 | 0 | if (!outer_var) |
1170 | 0 | continue; |
1171 | | |
1172 | | /* |
1173 | | * The targetlist entry is considered to match if it |
1174 | | * matches after sort-key canonicalization. That is |
1175 | | * needed since the sub_expr has been through the same |
1176 | | * process. |
1177 | | */ |
1178 | 0 | tle_expr = canonicalize_ec_expression(tle->expr, |
1179 | 0 | sub_expr_type, |
1180 | 0 | sub_expr_coll); |
1181 | 0 | if (!equal(tle_expr, sub_expr)) |
1182 | 0 | continue; |
1183 | | |
1184 | | /* See if we have a matching EC for the TLE */ |
1185 | 0 | outer_ec = get_eclass_for_sort_expr(root, |
1186 | 0 | (Expr *) outer_var, |
1187 | 0 | sub_eclass->ec_opfamilies, |
1188 | 0 | sub_expr_type, |
1189 | 0 | sub_expr_coll, |
1190 | 0 | 0, |
1191 | 0 | rel->relids, |
1192 | 0 | false); |
1193 | | |
1194 | | /* |
1195 | | * If we don't find a matching EC, this sub-pathkey isn't |
1196 | | * interesting to the outer query |
1197 | | */ |
1198 | 0 | if (!outer_ec) |
1199 | 0 | continue; |
1200 | | |
1201 | 0 | outer_pk = make_canonical_pathkey(root, |
1202 | 0 | outer_ec, |
1203 | 0 | sub_pathkey->pk_opfamily, |
1204 | 0 | sub_pathkey->pk_cmptype, |
1205 | 0 | sub_pathkey->pk_nulls_first); |
1206 | | /* score = # of equivalence peers */ |
1207 | 0 | score = list_length(outer_ec->ec_members) - 1; |
1208 | | /* +1 if it matches the proper query_pathkeys item */ |
1209 | 0 | if (retvallen < outer_query_keys && |
1210 | 0 | list_nth(root->query_pathkeys, retvallen) == outer_pk) |
1211 | 0 | score++; |
1212 | 0 | if (score > best_score) |
1213 | 0 | { |
1214 | 0 | best_pathkey = outer_pk; |
1215 | 0 | best_score = score; |
1216 | 0 | } |
1217 | 0 | } |
1218 | 0 | } |
1219 | 0 | } |
1220 | | |
1221 | | /* |
1222 | | * If we couldn't find a representation of this sub_pathkey, we're |
1223 | | * done (we can't use the ones to its right, either). |
1224 | | */ |
1225 | 0 | if (!best_pathkey) |
1226 | 0 | break; |
1227 | | |
1228 | | /* |
1229 | | * Eliminate redundant ordering info; could happen if outer query |
1230 | | * equivalences subquery keys... |
1231 | | */ |
1232 | 0 | if (!pathkey_is_redundant(best_pathkey, retval)) |
1233 | 0 | { |
1234 | 0 | retval = lappend(retval, best_pathkey); |
1235 | 0 | retvallen++; |
1236 | 0 | } |
1237 | 0 | } |
1238 | | |
1239 | 0 | return retval; |
1240 | 0 | } |
1241 | | |
1242 | | /* |
1243 | | * find_var_for_subquery_tle |
1244 | | * |
1245 | | * If the given subquery tlist entry is due to be emitted by the subquery's |
1246 | | * scan node, return a Var for it, else return NULL. |
1247 | | * |
1248 | | * We need this to ensure that we don't return pathkeys describing values |
1249 | | * that are unavailable above the level of the subquery scan. |
1250 | | */ |
1251 | | static Var * |
1252 | | find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle) |
1253 | 0 | { |
1254 | 0 | ListCell *lc; |
1255 | | |
1256 | | /* If the TLE is resjunk, it's certainly not visible to the outer query */ |
1257 | 0 | if (tle->resjunk) |
1258 | 0 | return NULL; |
1259 | | |
1260 | | /* Search the rel's targetlist to see what it will return */ |
1261 | 0 | foreach(lc, rel->reltarget->exprs) |
1262 | 0 | { |
1263 | 0 | Var *var = (Var *) lfirst(lc); |
1264 | | |
1265 | | /* Ignore placeholders */ |
1266 | 0 | if (!IsA(var, Var)) |
1267 | 0 | continue; |
1268 | 0 | Assert(var->varno == rel->relid); |
1269 | | |
1270 | | /* If we find a Var referencing this TLE, we're good */ |
1271 | 0 | if (var->varattno == tle->resno) |
1272 | 0 | return copyObject(var); /* Make a copy for safety */ |
1273 | 0 | } |
1274 | 0 | return NULL; |
1275 | 0 | } |
1276 | | |
1277 | | /* |
1278 | | * build_join_pathkeys |
1279 | | * Build the path keys for a join relation constructed by mergejoin or |
1280 | | * nestloop join. This is normally the same as the outer path's keys. |
1281 | | * |
1282 | | * EXCEPTION: in a FULL, RIGHT or RIGHT_ANTI join, we cannot treat the |
1283 | | * result as having the outer path's path keys, because null lefthand rows |
1284 | | * may be inserted at random points. It must be treated as unsorted. |
1285 | | * |
1286 | | * We truncate away any pathkeys that are uninteresting for higher joins. |
1287 | | * |
1288 | | * 'joinrel' is the join relation that paths are being formed for |
1289 | | * 'jointype' is the join type (inner, left, full, etc) |
1290 | | * 'outer_pathkeys' is the list of the current outer path's path keys |
1291 | | * |
1292 | | * Returns the list of new path keys. |
1293 | | */ |
1294 | | List * |
1295 | | build_join_pathkeys(PlannerInfo *root, |
1296 | | RelOptInfo *joinrel, |
1297 | | JoinType jointype, |
1298 | | List *outer_pathkeys) |
1299 | 0 | { |
1300 | | /* RIGHT_SEMI should not come here */ |
1301 | 0 | Assert(jointype != JOIN_RIGHT_SEMI); |
1302 | |
|
1303 | 0 | if (jointype == JOIN_FULL || |
1304 | 0 | jointype == JOIN_RIGHT || |
1305 | 0 | jointype == JOIN_RIGHT_ANTI) |
1306 | 0 | return NIL; |
1307 | | |
1308 | | /* |
1309 | | * This used to be quite a complex bit of code, but now that all pathkey |
1310 | | * sublists start out life canonicalized, we don't have to do a darn thing |
1311 | | * here! |
1312 | | * |
1313 | | * We do, however, need to truncate the pathkeys list, since it may |
1314 | | * contain pathkeys that were useful for forming this joinrel but are |
1315 | | * uninteresting to higher levels. |
1316 | | */ |
1317 | 0 | return truncate_useless_pathkeys(root, joinrel, outer_pathkeys); |
1318 | 0 | } |
1319 | | |
1320 | | /**************************************************************************** |
1321 | | * PATHKEYS AND SORT CLAUSES |
1322 | | ****************************************************************************/ |
1323 | | |
1324 | | /* |
1325 | | * make_pathkeys_for_sortclauses |
1326 | | * Generate a pathkeys list that represents the sort order specified |
1327 | | * by a list of SortGroupClauses |
1328 | | * |
1329 | | * The resulting PathKeys are always in canonical form. (Actually, there |
1330 | | * is no longer any code anywhere that creates non-canonical PathKeys.) |
1331 | | * |
1332 | | * 'sortclauses' is a list of SortGroupClause nodes |
1333 | | * 'tlist' is the targetlist to find the referenced tlist entries in |
1334 | | */ |
1335 | | List * |
1336 | | make_pathkeys_for_sortclauses(PlannerInfo *root, |
1337 | | List *sortclauses, |
1338 | | List *tlist) |
1339 | 0 | { |
1340 | 0 | List *result; |
1341 | 0 | bool sortable; |
1342 | |
|
1343 | 0 | result = make_pathkeys_for_sortclauses_extended(root, |
1344 | 0 | &sortclauses, |
1345 | 0 | tlist, |
1346 | 0 | false, |
1347 | 0 | false, |
1348 | 0 | &sortable, |
1349 | 0 | false); |
1350 | | /* It's caller error if not all clauses were sortable */ |
1351 | 0 | Assert(sortable); |
1352 | 0 | return result; |
1353 | 0 | } |
1354 | | |
1355 | | /* |
1356 | | * make_pathkeys_for_sortclauses_extended |
1357 | | * Generate a pathkeys list that represents the sort order specified |
1358 | | * by a list of SortGroupClauses |
1359 | | * |
1360 | | * The comments for make_pathkeys_for_sortclauses apply here too. In addition: |
1361 | | * |
1362 | | * If remove_redundant is true, then any sort clauses that are found to |
1363 | | * give rise to redundant pathkeys are removed from the sortclauses list |
1364 | | * (which therefore must be pass-by-reference in this version). |
1365 | | * |
1366 | | * If remove_group_rtindex is true, then we need to remove the RT index of the |
1367 | | * grouping step from the sort expressions before we make PathKeys for them. |
1368 | | * |
1369 | | * *sortable is set to true if all the sort clauses are in fact sortable. |
1370 | | * If any are not, they are ignored except for setting *sortable false. |
1371 | | * (In that case, the output pathkey list isn't really useful. However, |
1372 | | * we process the whole sortclauses list anyway, because it's still valid |
1373 | | * to remove any clauses that can be proven redundant via the eclass logic. |
1374 | | * Even though we'll have to hash in that case, we might as well not hash |
1375 | | * redundant columns.) |
1376 | | * |
1377 | | * If set_ec_sortref is true then sets the value of the pathkey's |
1378 | | * EquivalenceClass unless it's already initialized. |
1379 | | */ |
1380 | | List * |
1381 | | make_pathkeys_for_sortclauses_extended(PlannerInfo *root, |
1382 | | List **sortclauses, |
1383 | | List *tlist, |
1384 | | bool remove_redundant, |
1385 | | bool remove_group_rtindex, |
1386 | | bool *sortable, |
1387 | | bool set_ec_sortref) |
1388 | 0 | { |
1389 | 0 | List *pathkeys = NIL; |
1390 | 0 | ListCell *l; |
1391 | |
|
1392 | 0 | *sortable = true; |
1393 | 0 | foreach(l, *sortclauses) |
1394 | 0 | { |
1395 | 0 | SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); |
1396 | 0 | Expr *sortkey; |
1397 | 0 | PathKey *pathkey; |
1398 | |
|
1399 | 0 | sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); |
1400 | 0 | if (!OidIsValid(sortcl->sortop)) |
1401 | 0 | { |
1402 | 0 | *sortable = false; |
1403 | 0 | continue; |
1404 | 0 | } |
1405 | 0 | if (remove_group_rtindex) |
1406 | 0 | { |
1407 | 0 | Assert(root->group_rtindex > 0); |
1408 | 0 | sortkey = (Expr *) |
1409 | 0 | remove_nulling_relids((Node *) sortkey, |
1410 | 0 | bms_make_singleton(root->group_rtindex), |
1411 | 0 | NULL); |
1412 | 0 | } |
1413 | 0 | pathkey = make_pathkey_from_sortop(root, |
1414 | 0 | sortkey, |
1415 | 0 | sortcl->sortop, |
1416 | 0 | sortcl->reverse_sort, |
1417 | 0 | sortcl->nulls_first, |
1418 | 0 | sortcl->tleSortGroupRef, |
1419 | 0 | true); |
1420 | 0 | if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref) |
1421 | 0 | { |
1422 | | /* |
1423 | | * Copy the sortref if it hasn't been set yet. That may happen if |
1424 | | * the EquivalenceClass was constructed from a WHERE clause, i.e. |
1425 | | * it doesn't have a target reference at all. |
1426 | | */ |
1427 | 0 | pathkey->pk_eclass->ec_sortref = sortcl->tleSortGroupRef; |
1428 | 0 | } |
1429 | | |
1430 | | /* Canonical form eliminates redundant ordering keys */ |
1431 | 0 | if (!pathkey_is_redundant(pathkey, pathkeys)) |
1432 | 0 | pathkeys = lappend(pathkeys, pathkey); |
1433 | 0 | else if (remove_redundant) |
1434 | 0 | *sortclauses = foreach_delete_current(*sortclauses, l); |
1435 | 0 | } |
1436 | 0 | return pathkeys; |
1437 | 0 | } |
1438 | | |
1439 | | /**************************************************************************** |
1440 | | * PATHKEYS AND MERGECLAUSES |
1441 | | ****************************************************************************/ |
1442 | | |
1443 | | /* |
1444 | | * initialize_mergeclause_eclasses |
1445 | | * Set the EquivalenceClass links in a mergeclause restrictinfo. |
1446 | | * |
1447 | | * RestrictInfo contains fields in which we may cache pointers to |
1448 | | * EquivalenceClasses for the left and right inputs of the mergeclause. |
1449 | | * (If the mergeclause is a true equivalence clause these will be the |
1450 | | * same EquivalenceClass, otherwise not.) If the mergeclause is either |
1451 | | * used to generate an EquivalenceClass, or derived from an EquivalenceClass, |
1452 | | * then it's easy to set up the left_ec and right_ec members --- otherwise, |
1453 | | * this function should be called to set them up. We will generate new |
1454 | | * EquivalenceClauses if necessary to represent the mergeclause's left and |
1455 | | * right sides. |
1456 | | * |
1457 | | * Note this is called before EC merging is complete, so the links won't |
1458 | | * necessarily point to canonical ECs. Before they are actually used for |
1459 | | * anything, update_mergeclause_eclasses must be called to ensure that |
1460 | | * they've been updated to point to canonical ECs. |
1461 | | */ |
1462 | | void |
1463 | | initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) |
1464 | 0 | { |
1465 | 0 | Expr *clause = restrictinfo->clause; |
1466 | 0 | Oid lefttype, |
1467 | 0 | righttype; |
1468 | | |
1469 | | /* Should be a mergeclause ... */ |
1470 | 0 | Assert(restrictinfo->mergeopfamilies != NIL); |
1471 | | /* ... with links not yet set */ |
1472 | 0 | Assert(restrictinfo->left_ec == NULL); |
1473 | 0 | Assert(restrictinfo->right_ec == NULL); |
1474 | | |
1475 | | /* Need the declared input types of the operator */ |
1476 | 0 | op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype); |
1477 | | |
1478 | | /* Find or create a matching EquivalenceClass for each side */ |
1479 | 0 | restrictinfo->left_ec = |
1480 | 0 | get_eclass_for_sort_expr(root, |
1481 | 0 | (Expr *) get_leftop(clause), |
1482 | 0 | restrictinfo->mergeopfamilies, |
1483 | 0 | lefttype, |
1484 | 0 | ((OpExpr *) clause)->inputcollid, |
1485 | 0 | 0, |
1486 | 0 | NULL, |
1487 | 0 | true); |
1488 | 0 | restrictinfo->right_ec = |
1489 | 0 | get_eclass_for_sort_expr(root, |
1490 | 0 | (Expr *) get_rightop(clause), |
1491 | 0 | restrictinfo->mergeopfamilies, |
1492 | 0 | righttype, |
1493 | 0 | ((OpExpr *) clause)->inputcollid, |
1494 | 0 | 0, |
1495 | 0 | NULL, |
1496 | 0 | true); |
1497 | 0 | } |
1498 | | |
1499 | | /* |
1500 | | * update_mergeclause_eclasses |
1501 | | * Make the cached EquivalenceClass links valid in a mergeclause |
1502 | | * restrictinfo. |
1503 | | * |
1504 | | * These pointers should have been set by process_equivalence or |
1505 | | * initialize_mergeclause_eclasses, but they might have been set to |
1506 | | * non-canonical ECs that got merged later. Chase up to the canonical |
1507 | | * merged parent if so. |
1508 | | */ |
1509 | | void |
1510 | | update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) |
1511 | 0 | { |
1512 | | /* Should be a merge clause ... */ |
1513 | 0 | Assert(restrictinfo->mergeopfamilies != NIL); |
1514 | | /* ... with pointers already set */ |
1515 | 0 | Assert(restrictinfo->left_ec != NULL); |
1516 | 0 | Assert(restrictinfo->right_ec != NULL); |
1517 | | |
1518 | | /* Chase up to the top as needed */ |
1519 | 0 | while (restrictinfo->left_ec->ec_merged) |
1520 | 0 | restrictinfo->left_ec = restrictinfo->left_ec->ec_merged; |
1521 | 0 | while (restrictinfo->right_ec->ec_merged) |
1522 | 0 | restrictinfo->right_ec = restrictinfo->right_ec->ec_merged; |
1523 | 0 | } |
1524 | | |
1525 | | /* |
1526 | | * find_mergeclauses_for_outer_pathkeys |
1527 | | * This routine attempts to find a list of mergeclauses that can be |
1528 | | * used with a specified ordering for the join's outer relation. |
1529 | | * If successful, it returns a list of mergeclauses. |
1530 | | * |
1531 | | * 'pathkeys' is a pathkeys list showing the ordering of an outer-rel path. |
1532 | | * 'restrictinfos' is a list of mergejoinable restriction clauses for the |
1533 | | * join relation being formed, in no particular order. |
1534 | | * |
1535 | | * The restrictinfos must be marked (via outer_is_left) to show which side |
1536 | | * of each clause is associated with the current outer path. (See |
1537 | | * select_mergejoin_clauses()) |
1538 | | * |
1539 | | * The result is NIL if no merge can be done, else a maximal list of |
1540 | | * usable mergeclauses (represented as a list of their restrictinfo nodes). |
1541 | | * The list is ordered to match the pathkeys, as required for execution. |
1542 | | */ |
1543 | | List * |
1544 | | find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, |
1545 | | List *pathkeys, |
1546 | | List *restrictinfos) |
1547 | 0 | { |
1548 | 0 | List *mergeclauses = NIL; |
1549 | 0 | ListCell *i; |
1550 | | |
1551 | | /* make sure we have eclasses cached in the clauses */ |
1552 | 0 | foreach(i, restrictinfos) |
1553 | 0 | { |
1554 | 0 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); |
1555 | |
|
1556 | 0 | update_mergeclause_eclasses(root, rinfo); |
1557 | 0 | } |
1558 | |
|
1559 | 0 | foreach(i, pathkeys) |
1560 | 0 | { |
1561 | 0 | PathKey *pathkey = (PathKey *) lfirst(i); |
1562 | 0 | EquivalenceClass *pathkey_ec = pathkey->pk_eclass; |
1563 | 0 | List *matched_restrictinfos = NIL; |
1564 | 0 | ListCell *j; |
1565 | | |
1566 | | /*---------- |
1567 | | * A mergejoin clause matches a pathkey if it has the same EC. |
1568 | | * If there are multiple matching clauses, take them all. In plain |
1569 | | * inner-join scenarios we expect only one match, because |
1570 | | * equivalence-class processing will have removed any redundant |
1571 | | * mergeclauses. However, in outer-join scenarios there might be |
1572 | | * multiple matches. An example is |
1573 | | * |
1574 | | * select * from a full join b |
1575 | | * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2; |
1576 | | * |
1577 | | * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three |
1578 | | * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed |
1579 | | * we *must* do so or we will be unable to form a valid plan. |
1580 | | * |
1581 | | * We expect that the given pathkeys list is canonical, which means |
1582 | | * no two members have the same EC, so it's not possible for this |
1583 | | * code to enter the same mergeclause into the result list twice. |
1584 | | * |
1585 | | * It's possible that multiple matching clauses might have different |
1586 | | * ECs on the other side, in which case the order we put them into our |
1587 | | * result makes a difference in the pathkeys required for the inner |
1588 | | * input rel. However this routine hasn't got any info about which |
1589 | | * order would be best, so we don't worry about that. |
1590 | | * |
1591 | | * It's also possible that the selected mergejoin clauses produce |
1592 | | * a noncanonical ordering of pathkeys for the inner side, ie, we |
1593 | | * might select clauses that reference b.v1, b.v2, b.v1 in that |
1594 | | * order. This is not harmful in itself, though it suggests that |
1595 | | * the clauses are partially redundant. Since the alternative is |
1596 | | * to omit mergejoin clauses and thereby possibly fail to generate a |
1597 | | * plan altogether, we live with it. make_inner_pathkeys_for_merge() |
1598 | | * has to delete duplicates when it constructs the inner pathkeys |
1599 | | * list, and we also have to deal with such cases specially in |
1600 | | * create_mergejoin_plan(). |
1601 | | *---------- |
1602 | | */ |
1603 | 0 | foreach(j, restrictinfos) |
1604 | 0 | { |
1605 | 0 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(j); |
1606 | 0 | EquivalenceClass *clause_ec; |
1607 | |
|
1608 | 0 | clause_ec = rinfo->outer_is_left ? |
1609 | 0 | rinfo->left_ec : rinfo->right_ec; |
1610 | 0 | if (clause_ec == pathkey_ec) |
1611 | 0 | matched_restrictinfos = lappend(matched_restrictinfos, rinfo); |
1612 | 0 | } |
1613 | | |
1614 | | /* |
1615 | | * If we didn't find a mergeclause, we're done --- any additional |
1616 | | * sort-key positions in the pathkeys are useless. (But we can still |
1617 | | * mergejoin if we found at least one mergeclause.) |
1618 | | */ |
1619 | 0 | if (matched_restrictinfos == NIL) |
1620 | 0 | break; |
1621 | | |
1622 | | /* |
1623 | | * If we did find usable mergeclause(s) for this sort-key position, |
1624 | | * add them to result list. |
1625 | | */ |
1626 | 0 | mergeclauses = list_concat(mergeclauses, matched_restrictinfos); |
1627 | 0 | } |
1628 | |
|
1629 | 0 | return mergeclauses; |
1630 | 0 | } |
1631 | | |
1632 | | /* |
1633 | | * select_outer_pathkeys_for_merge |
1634 | | * Builds a pathkey list representing a possible sort ordering |
1635 | | * that can be used with the given mergeclauses. |
1636 | | * |
1637 | | * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses |
1638 | | * that will be used in a merge join. |
1639 | | * 'joinrel' is the join relation we are trying to construct. |
1640 | | * |
1641 | | * The restrictinfos must be marked (via outer_is_left) to show which side |
1642 | | * of each clause is associated with the current outer path. (See |
1643 | | * select_mergejoin_clauses()) |
1644 | | * |
1645 | | * Returns a pathkeys list that can be applied to the outer relation. |
1646 | | * |
1647 | | * Since we assume here that a sort is required, there is no particular use |
1648 | | * in matching any available ordering of the outerrel. (joinpath.c has an |
1649 | | * entirely separate code path for considering sort-free mergejoins.) Rather, |
1650 | | * it's interesting to try to match, or match a prefix of the requested |
1651 | | * query_pathkeys so that a second output sort may be avoided or an |
1652 | | * incremental sort may be done instead. We can get away with just a prefix |
1653 | | * of the query_pathkeys when that prefix covers the entire join condition. |
1654 | | * Failing that, we try to list "more popular" keys (those with the most |
1655 | | * unmatched EquivalenceClass peers) earlier, in hopes of making the resulting |
1656 | | * ordering useful for as many higher-level mergejoins as possible. |
1657 | | */ |
1658 | | List * |
1659 | | select_outer_pathkeys_for_merge(PlannerInfo *root, |
1660 | | List *mergeclauses, |
1661 | | RelOptInfo *joinrel) |
1662 | 0 | { |
1663 | 0 | List *pathkeys = NIL; |
1664 | 0 | int nClauses = list_length(mergeclauses); |
1665 | 0 | EquivalenceClass **ecs; |
1666 | 0 | int *scores; |
1667 | 0 | int necs; |
1668 | 0 | ListCell *lc; |
1669 | 0 | int j; |
1670 | | |
1671 | | /* Might have no mergeclauses */ |
1672 | 0 | if (nClauses == 0) |
1673 | 0 | return NIL; |
1674 | | |
1675 | | /* |
1676 | | * Make arrays of the ECs used by the mergeclauses (dropping any |
1677 | | * duplicates) and their "popularity" scores. |
1678 | | */ |
1679 | 0 | ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *)); |
1680 | 0 | scores = (int *) palloc(nClauses * sizeof(int)); |
1681 | 0 | necs = 0; |
1682 | |
|
1683 | 0 | foreach(lc, mergeclauses) |
1684 | 0 | { |
1685 | 0 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); |
1686 | 0 | EquivalenceClass *oeclass; |
1687 | 0 | int score; |
1688 | 0 | ListCell *lc2; |
1689 | | |
1690 | | /* get the outer eclass */ |
1691 | 0 | update_mergeclause_eclasses(root, rinfo); |
1692 | |
|
1693 | 0 | if (rinfo->outer_is_left) |
1694 | 0 | oeclass = rinfo->left_ec; |
1695 | 0 | else |
1696 | 0 | oeclass = rinfo->right_ec; |
1697 | | |
1698 | | /* reject duplicates */ |
1699 | 0 | for (j = 0; j < necs; j++) |
1700 | 0 | { |
1701 | 0 | if (ecs[j] == oeclass) |
1702 | 0 | break; |
1703 | 0 | } |
1704 | 0 | if (j < necs) |
1705 | 0 | continue; |
1706 | | |
1707 | | /* compute score */ |
1708 | 0 | score = 0; |
1709 | 0 | foreach(lc2, oeclass->ec_members) |
1710 | 0 | { |
1711 | 0 | EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); |
1712 | | |
1713 | | /* Child members should not exist in ec_members */ |
1714 | 0 | Assert(!em->em_is_child); |
1715 | | |
1716 | | /* Potential future join partner? */ |
1717 | 0 | if (!em->em_is_const && |
1718 | 0 | !bms_overlap(em->em_relids, joinrel->relids)) |
1719 | 0 | score++; |
1720 | 0 | } |
1721 | |
|
1722 | 0 | ecs[necs] = oeclass; |
1723 | 0 | scores[necs] = score; |
1724 | 0 | necs++; |
1725 | 0 | } |
1726 | | |
1727 | | /* |
1728 | | * Find out if we have all the ECs mentioned in query_pathkeys; if so we |
1729 | | * can generate a sort order that's also useful for final output. If we |
1730 | | * only have a prefix of the query_pathkeys, and that prefix is the entire |
1731 | | * join condition, then it's useful to use the prefix as the pathkeys as |
1732 | | * this increases the chances that an incremental sort will be able to be |
1733 | | * used by the upper planner. |
1734 | | */ |
1735 | 0 | if (root->query_pathkeys) |
1736 | 0 | { |
1737 | 0 | int matches = 0; |
1738 | |
|
1739 | 0 | foreach(lc, root->query_pathkeys) |
1740 | 0 | { |
1741 | 0 | PathKey *query_pathkey = (PathKey *) lfirst(lc); |
1742 | 0 | EquivalenceClass *query_ec = query_pathkey->pk_eclass; |
1743 | |
|
1744 | 0 | for (j = 0; j < necs; j++) |
1745 | 0 | { |
1746 | 0 | if (ecs[j] == query_ec) |
1747 | 0 | break; /* found match */ |
1748 | 0 | } |
1749 | 0 | if (j >= necs) |
1750 | 0 | break; /* didn't find match */ |
1751 | | |
1752 | 0 | matches++; |
1753 | 0 | } |
1754 | | /* if we got to the end of the list, we have them all */ |
1755 | 0 | if (lc == NULL) |
1756 | 0 | { |
1757 | | /* copy query_pathkeys as starting point for our output */ |
1758 | 0 | pathkeys = list_copy(root->query_pathkeys); |
1759 | | /* mark their ECs as already-emitted */ |
1760 | 0 | foreach(lc, root->query_pathkeys) |
1761 | 0 | { |
1762 | 0 | PathKey *query_pathkey = (PathKey *) lfirst(lc); |
1763 | 0 | EquivalenceClass *query_ec = query_pathkey->pk_eclass; |
1764 | |
|
1765 | 0 | for (j = 0; j < necs; j++) |
1766 | 0 | { |
1767 | 0 | if (ecs[j] == query_ec) |
1768 | 0 | { |
1769 | 0 | scores[j] = -1; |
1770 | 0 | break; |
1771 | 0 | } |
1772 | 0 | } |
1773 | 0 | } |
1774 | 0 | } |
1775 | | |
1776 | | /* |
1777 | | * If we didn't match to all of the query_pathkeys, but did match to |
1778 | | * all of the join clauses then we'll make use of these as partially |
1779 | | * sorted input is better than nothing for the upper planner as it may |
1780 | | * lead to incremental sorts instead of full sorts. |
1781 | | */ |
1782 | 0 | else if (matches == nClauses) |
1783 | 0 | { |
1784 | 0 | pathkeys = list_copy_head(root->query_pathkeys, matches); |
1785 | | |
1786 | | /* we have all of the join pathkeys, so nothing more to do */ |
1787 | 0 | pfree(ecs); |
1788 | 0 | pfree(scores); |
1789 | |
|
1790 | 0 | return pathkeys; |
1791 | 0 | } |
1792 | 0 | } |
1793 | | |
1794 | | /* |
1795 | | * Add remaining ECs to the list in popularity order, using a default sort |
1796 | | * ordering. (We could use qsort() here, but the list length is usually |
1797 | | * so small it's not worth it.) |
1798 | | */ |
1799 | 0 | for (;;) |
1800 | 0 | { |
1801 | 0 | int best_j; |
1802 | 0 | int best_score; |
1803 | 0 | EquivalenceClass *ec; |
1804 | 0 | PathKey *pathkey; |
1805 | |
|
1806 | 0 | best_j = 0; |
1807 | 0 | best_score = scores[0]; |
1808 | 0 | for (j = 1; j < necs; j++) |
1809 | 0 | { |
1810 | 0 | if (scores[j] > best_score) |
1811 | 0 | { |
1812 | 0 | best_j = j; |
1813 | 0 | best_score = scores[j]; |
1814 | 0 | } |
1815 | 0 | } |
1816 | 0 | if (best_score < 0) |
1817 | 0 | break; /* all done */ |
1818 | 0 | ec = ecs[best_j]; |
1819 | 0 | scores[best_j] = -1; |
1820 | 0 | pathkey = make_canonical_pathkey(root, |
1821 | 0 | ec, |
1822 | 0 | linitial_oid(ec->ec_opfamilies), |
1823 | 0 | COMPARE_LT, |
1824 | 0 | false); |
1825 | | /* can't be redundant because no duplicate ECs */ |
1826 | 0 | Assert(!pathkey_is_redundant(pathkey, pathkeys)); |
1827 | 0 | pathkeys = lappend(pathkeys, pathkey); |
1828 | 0 | } |
1829 | |
|
1830 | 0 | pfree(ecs); |
1831 | 0 | pfree(scores); |
1832 | |
|
1833 | 0 | return pathkeys; |
1834 | 0 | } |
1835 | | |
1836 | | /* |
1837 | | * make_inner_pathkeys_for_merge |
1838 | | * Builds a pathkey list representing the explicit sort order that |
1839 | | * must be applied to an inner path to make it usable with the |
1840 | | * given mergeclauses. |
1841 | | * |
1842 | | * 'mergeclauses' is a list of RestrictInfos for the mergejoin clauses |
1843 | | * that will be used in a merge join, in order. |
1844 | | * 'outer_pathkeys' are the already-known canonical pathkeys for the outer |
1845 | | * side of the join. |
1846 | | * |
1847 | | * The restrictinfos must be marked (via outer_is_left) to show which side |
1848 | | * of each clause is associated with the current outer path. (See |
1849 | | * select_mergejoin_clauses()) |
1850 | | * |
1851 | | * Returns a pathkeys list that can be applied to the inner relation. |
1852 | | * |
1853 | | * Note that it is not this routine's job to decide whether sorting is |
1854 | | * actually needed for a particular input path. Assume a sort is necessary; |
1855 | | * just make the keys, eh? |
1856 | | */ |
1857 | | List * |
1858 | | make_inner_pathkeys_for_merge(PlannerInfo *root, |
1859 | | List *mergeclauses, |
1860 | | List *outer_pathkeys) |
1861 | 0 | { |
1862 | 0 | List *pathkeys = NIL; |
1863 | 0 | EquivalenceClass *lastoeclass; |
1864 | 0 | PathKey *opathkey; |
1865 | 0 | ListCell *lc; |
1866 | 0 | ListCell *lop; |
1867 | |
|
1868 | 0 | lastoeclass = NULL; |
1869 | 0 | opathkey = NULL; |
1870 | 0 | lop = list_head(outer_pathkeys); |
1871 | |
|
1872 | 0 | foreach(lc, mergeclauses) |
1873 | 0 | { |
1874 | 0 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); |
1875 | 0 | EquivalenceClass *oeclass; |
1876 | 0 | EquivalenceClass *ieclass; |
1877 | 0 | PathKey *pathkey; |
1878 | |
|
1879 | 0 | update_mergeclause_eclasses(root, rinfo); |
1880 | |
|
1881 | 0 | if (rinfo->outer_is_left) |
1882 | 0 | { |
1883 | 0 | oeclass = rinfo->left_ec; |
1884 | 0 | ieclass = rinfo->right_ec; |
1885 | 0 | } |
1886 | 0 | else |
1887 | 0 | { |
1888 | 0 | oeclass = rinfo->right_ec; |
1889 | 0 | ieclass = rinfo->left_ec; |
1890 | 0 | } |
1891 | | |
1892 | | /* outer eclass should match current or next pathkeys */ |
1893 | | /* we check this carefully for debugging reasons */ |
1894 | 0 | if (oeclass != lastoeclass) |
1895 | 0 | { |
1896 | 0 | if (!lop) |
1897 | 0 | elog(ERROR, "too few pathkeys for mergeclauses"); |
1898 | 0 | opathkey = (PathKey *) lfirst(lop); |
1899 | 0 | lop = lnext(outer_pathkeys, lop); |
1900 | 0 | lastoeclass = opathkey->pk_eclass; |
1901 | 0 | if (oeclass != lastoeclass) |
1902 | 0 | elog(ERROR, "outer pathkeys do not match mergeclause"); |
1903 | 0 | } |
1904 | | |
1905 | | /* |
1906 | | * Often, we'll have same EC on both sides, in which case the outer |
1907 | | * pathkey is also canonical for the inner side, and we can skip a |
1908 | | * useless search. |
1909 | | */ |
1910 | 0 | if (ieclass == oeclass) |
1911 | 0 | pathkey = opathkey; |
1912 | 0 | else |
1913 | 0 | pathkey = make_canonical_pathkey(root, |
1914 | 0 | ieclass, |
1915 | 0 | opathkey->pk_opfamily, |
1916 | 0 | opathkey->pk_cmptype, |
1917 | 0 | opathkey->pk_nulls_first); |
1918 | | |
1919 | | /* |
1920 | | * Don't generate redundant pathkeys (which can happen if multiple |
1921 | | * mergeclauses refer to the same EC). Because we do this, the output |
1922 | | * pathkey list isn't necessarily ordered like the mergeclauses, which |
1923 | | * complicates life for create_mergejoin_plan(). But if we didn't, |
1924 | | * we'd have a noncanonical sort key list, which would be bad; for one |
1925 | | * reason, it certainly wouldn't match any available sort order for |
1926 | | * the input relation. |
1927 | | */ |
1928 | 0 | if (!pathkey_is_redundant(pathkey, pathkeys)) |
1929 | 0 | pathkeys = lappend(pathkeys, pathkey); |
1930 | 0 | } |
1931 | | |
1932 | 0 | return pathkeys; |
1933 | 0 | } |
1934 | | |
1935 | | /* |
1936 | | * trim_mergeclauses_for_inner_pathkeys |
1937 | | * This routine trims a list of mergeclauses to include just those that |
1938 | | * work with a specified ordering for the join's inner relation. |
1939 | | * |
1940 | | * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses for the |
1941 | | * join relation being formed, in an order known to work for the |
1942 | | * currently-considered sort ordering of the join's outer rel. |
1943 | | * 'pathkeys' is a pathkeys list showing the ordering of an inner-rel path; |
1944 | | * it should be equal to, or a truncation of, the result of |
1945 | | * make_inner_pathkeys_for_merge for these mergeclauses. |
1946 | | * |
1947 | | * What we return will be a prefix of the given mergeclauses list. |
1948 | | * |
1949 | | * We need this logic because make_inner_pathkeys_for_merge's result isn't |
1950 | | * necessarily in the same order as the mergeclauses. That means that if we |
1951 | | * consider an inner-rel pathkey list that is a truncation of that result, |
1952 | | * we might need to drop mergeclauses even though they match a surviving inner |
1953 | | * pathkey. This happens when they are to the right of a mergeclause that |
1954 | | * matches a removed inner pathkey. |
1955 | | * |
1956 | | * The mergeclauses must be marked (via outer_is_left) to show which side |
1957 | | * of each clause is associated with the current outer path. (See |
1958 | | * select_mergejoin_clauses()) |
1959 | | */ |
1960 | | List * |
1961 | | trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root, |
1962 | | List *mergeclauses, |
1963 | | List *pathkeys) |
1964 | 0 | { |
1965 | 0 | List *new_mergeclauses = NIL; |
1966 | 0 | PathKey *pathkey; |
1967 | 0 | EquivalenceClass *pathkey_ec; |
1968 | 0 | bool matched_pathkey; |
1969 | 0 | ListCell *lip; |
1970 | 0 | ListCell *i; |
1971 | | |
1972 | | /* No pathkeys => no mergeclauses (though we don't expect this case) */ |
1973 | 0 | if (pathkeys == NIL) |
1974 | 0 | return NIL; |
1975 | | /* Initialize to consider first pathkey */ |
1976 | 0 | lip = list_head(pathkeys); |
1977 | 0 | pathkey = (PathKey *) lfirst(lip); |
1978 | 0 | pathkey_ec = pathkey->pk_eclass; |
1979 | 0 | lip = lnext(pathkeys, lip); |
1980 | 0 | matched_pathkey = false; |
1981 | | |
1982 | | /* Scan mergeclauses to see how many we can use */ |
1983 | 0 | foreach(i, mergeclauses) |
1984 | 0 | { |
1985 | 0 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); |
1986 | 0 | EquivalenceClass *clause_ec; |
1987 | | |
1988 | | /* Assume we needn't do update_mergeclause_eclasses again here */ |
1989 | | |
1990 | | /* Check clause's inner-rel EC against current pathkey */ |
1991 | 0 | clause_ec = rinfo->outer_is_left ? |
1992 | 0 | rinfo->right_ec : rinfo->left_ec; |
1993 | | |
1994 | | /* If we don't have a match, attempt to advance to next pathkey */ |
1995 | 0 | if (clause_ec != pathkey_ec) |
1996 | 0 | { |
1997 | | /* If we had no clauses matching this inner pathkey, must stop */ |
1998 | 0 | if (!matched_pathkey) |
1999 | 0 | break; |
2000 | | |
2001 | | /* Advance to next inner pathkey, if any */ |
2002 | 0 | if (lip == NULL) |
2003 | 0 | break; |
2004 | 0 | pathkey = (PathKey *) lfirst(lip); |
2005 | 0 | pathkey_ec = pathkey->pk_eclass; |
2006 | 0 | lip = lnext(pathkeys, lip); |
2007 | 0 | matched_pathkey = false; |
2008 | 0 | } |
2009 | | |
2010 | | /* If mergeclause matches current inner pathkey, we can use it */ |
2011 | 0 | if (clause_ec == pathkey_ec) |
2012 | 0 | { |
2013 | 0 | new_mergeclauses = lappend(new_mergeclauses, rinfo); |
2014 | 0 | matched_pathkey = true; |
2015 | 0 | } |
2016 | 0 | else |
2017 | 0 | { |
2018 | | /* Else, no hope of adding any more mergeclauses */ |
2019 | 0 | break; |
2020 | 0 | } |
2021 | 0 | } |
2022 | |
|
2023 | 0 | return new_mergeclauses; |
2024 | 0 | } |
2025 | | |
2026 | | |
2027 | | /**************************************************************************** |
2028 | | * PATHKEY USEFULNESS CHECKS |
2029 | | * |
2030 | | * We only want to remember as many of the pathkeys of a path as have some |
2031 | | * potential use, either for subsequent mergejoins or for meeting the query's |
2032 | | * requested output ordering. This ensures that add_path() won't consider |
2033 | | * a path to have a usefully different ordering unless it really is useful. |
2034 | | * These routines check for usefulness of given pathkeys. |
2035 | | ****************************************************************************/ |
2036 | | |
2037 | | /* |
2038 | | * pathkeys_useful_for_merging |
2039 | | * Count the number of pathkeys that may be useful for mergejoins |
2040 | | * above the given relation. |
2041 | | * |
2042 | | * We consider a pathkey potentially useful if it corresponds to the merge |
2043 | | * ordering of either side of any joinclause for the rel. This might be |
2044 | | * overoptimistic, since joinclauses that require different other relations |
2045 | | * might never be usable at the same time, but trying to be exact is likely |
2046 | | * to be more trouble than it's worth. |
2047 | | * |
2048 | | * To avoid doubling the number of mergejoin paths considered, we would like |
2049 | | * to consider only one of the two scan directions (ASC or DESC) as useful |
2050 | | * for merging for any given target column. The choice is arbitrary unless |
2051 | | * one of the directions happens to match an ORDER BY key, in which case |
2052 | | * that direction should be preferred, in hopes of avoiding a final sort step. |
2053 | | * right_merge_direction() implements this heuristic. |
2054 | | */ |
2055 | | static int |
2056 | | pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys) |
2057 | 0 | { |
2058 | 0 | int useful = 0; |
2059 | 0 | ListCell *i; |
2060 | |
|
2061 | 0 | foreach(i, pathkeys) |
2062 | 0 | { |
2063 | 0 | PathKey *pathkey = (PathKey *) lfirst(i); |
2064 | 0 | bool matched = false; |
2065 | 0 | ListCell *j; |
2066 | | |
2067 | | /* If "wrong" direction, not useful for merging */ |
2068 | 0 | if (!right_merge_direction(root, pathkey)) |
2069 | 0 | break; |
2070 | | |
2071 | | /* |
2072 | | * First look into the EquivalenceClass of the pathkey, to see if |
2073 | | * there are any members not yet joined to the rel. If so, it's |
2074 | | * surely possible to generate a mergejoin clause using them. |
2075 | | */ |
2076 | 0 | if (rel->has_eclass_joins && |
2077 | 0 | eclass_useful_for_merging(root, pathkey->pk_eclass, rel)) |
2078 | 0 | matched = true; |
2079 | 0 | else |
2080 | 0 | { |
2081 | | /* |
2082 | | * Otherwise search the rel's joininfo list, which contains |
2083 | | * non-EquivalenceClass-derivable join clauses that might |
2084 | | * nonetheless be mergejoinable. |
2085 | | */ |
2086 | 0 | foreach(j, rel->joininfo) |
2087 | 0 | { |
2088 | 0 | RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); |
2089 | |
|
2090 | 0 | if (restrictinfo->mergeopfamilies == NIL) |
2091 | 0 | continue; |
2092 | 0 | update_mergeclause_eclasses(root, restrictinfo); |
2093 | |
|
2094 | 0 | if (pathkey->pk_eclass == restrictinfo->left_ec || |
2095 | 0 | pathkey->pk_eclass == restrictinfo->right_ec) |
2096 | 0 | { |
2097 | 0 | matched = true; |
2098 | 0 | break; |
2099 | 0 | } |
2100 | 0 | } |
2101 | 0 | } |
2102 | | |
2103 | | /* |
2104 | | * If we didn't find a mergeclause, we're done --- any additional |
2105 | | * sort-key positions in the pathkeys are useless. (But we can still |
2106 | | * mergejoin if we found at least one mergeclause.) |
2107 | | */ |
2108 | 0 | if (matched) |
2109 | 0 | useful++; |
2110 | 0 | else |
2111 | 0 | break; |
2112 | 0 | } |
2113 | |
|
2114 | 0 | return useful; |
2115 | 0 | } |
2116 | | |
2117 | | /* |
2118 | | * right_merge_direction |
2119 | | * Check whether the pathkey embodies the preferred sort direction |
2120 | | * for merging its target column. |
2121 | | */ |
2122 | | static bool |
2123 | | right_merge_direction(PlannerInfo *root, PathKey *pathkey) |
2124 | 0 | { |
2125 | 0 | ListCell *l; |
2126 | |
|
2127 | 0 | foreach(l, root->query_pathkeys) |
2128 | 0 | { |
2129 | 0 | PathKey *query_pathkey = (PathKey *) lfirst(l); |
2130 | |
|
2131 | 0 | if (pathkey->pk_eclass == query_pathkey->pk_eclass && |
2132 | 0 | pathkey->pk_opfamily == query_pathkey->pk_opfamily) |
2133 | 0 | { |
2134 | | /* |
2135 | | * Found a matching query sort column. Prefer this pathkey's |
2136 | | * direction iff it matches. Note that we ignore pk_nulls_first, |
2137 | | * which means that a sort might be needed anyway ... but we still |
2138 | | * want to prefer only one of the two possible directions, and we |
2139 | | * might as well use this one. |
2140 | | */ |
2141 | 0 | return (pathkey->pk_cmptype == query_pathkey->pk_cmptype); |
2142 | 0 | } |
2143 | 0 | } |
2144 | | |
2145 | | /* If no matching ORDER BY request, prefer the ASC direction */ |
2146 | 0 | return (pathkey->pk_cmptype == COMPARE_LT); |
2147 | 0 | } |
2148 | | |
2149 | | /* |
2150 | | * pathkeys_useful_for_ordering |
2151 | | * Count the number of pathkeys that are useful for meeting the |
2152 | | * query's requested output ordering. |
2153 | | * |
2154 | | * Because we the have the possibility of incremental sort, a prefix list of |
2155 | | * keys is potentially useful for improving the performance of the requested |
2156 | | * ordering. Thus we return 0, if no valuable keys are found, or the number |
2157 | | * of leading keys shared by the list and the requested ordering.. |
2158 | | */ |
2159 | | static int |
2160 | | pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) |
2161 | 0 | { |
2162 | 0 | int n_common_pathkeys; |
2163 | |
|
2164 | 0 | (void) pathkeys_count_contained_in(root->query_pathkeys, pathkeys, |
2165 | 0 | &n_common_pathkeys); |
2166 | |
|
2167 | 0 | return n_common_pathkeys; |
2168 | 0 | } |
2169 | | |
2170 | | /* |
2171 | | * pathkeys_useful_for_grouping |
2172 | | * Count the number of pathkeys that are useful for grouping (instead of |
2173 | | * explicit sort) |
2174 | | * |
2175 | | * Group pathkeys could be reordered to benefit from the ordering. The |
2176 | | * ordering may not be "complete" and may require incremental sort, but that's |
2177 | | * fine. So we simply count prefix pathkeys with a matching group key, and |
2178 | | * stop once we find the first pathkey without a match. |
2179 | | * |
2180 | | * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b) |
2181 | | * pathkeys are useful for grouping, and we might do incremental sort to get |
2182 | | * path ordered by (a,b,e). |
2183 | | * |
2184 | | * This logic is necessary to retain paths with ordering not matching grouping |
2185 | | * keys directly, without the reordering. |
2186 | | * |
2187 | | * Returns the length of pathkey prefix with matching group keys. |
2188 | | */ |
2189 | | static int |
2190 | | pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) |
2191 | 0 | { |
2192 | 0 | ListCell *key; |
2193 | 0 | int n = 0; |
2194 | | |
2195 | | /* no special ordering requested for grouping */ |
2196 | 0 | if (root->group_pathkeys == NIL) |
2197 | 0 | return 0; |
2198 | | |
2199 | | /* walk the pathkeys and search for matching group key */ |
2200 | 0 | foreach(key, pathkeys) |
2201 | 0 | { |
2202 | 0 | PathKey *pathkey = (PathKey *) lfirst(key); |
2203 | | |
2204 | | /* no matching group key, we're done */ |
2205 | 0 | if (!list_member_ptr(root->group_pathkeys, pathkey)) |
2206 | 0 | break; |
2207 | | |
2208 | 0 | n++; |
2209 | 0 | } |
2210 | |
|
2211 | 0 | return n; |
2212 | 0 | } |
2213 | | |
2214 | | /* |
2215 | | * pathkeys_useful_for_distinct |
2216 | | * Count the number of pathkeys that are useful for DISTINCT or DISTINCT |
2217 | | * ON clause. |
2218 | | * |
2219 | | * DISTINCT keys could be reordered to benefit from the given pathkey list. As |
2220 | | * with pathkeys_useful_for_grouping, we return the number of leading keys in |
2221 | | * the list that are shared by the distinctClause pathkeys. |
2222 | | */ |
2223 | | static int |
2224 | | pathkeys_useful_for_distinct(PlannerInfo *root, List *pathkeys) |
2225 | 0 | { |
2226 | 0 | int n_common_pathkeys; |
2227 | | |
2228 | | /* |
2229 | | * distinct_pathkeys may have become empty if all of the pathkeys were |
2230 | | * determined to be redundant. Return 0 in this case. |
2231 | | */ |
2232 | 0 | if (root->distinct_pathkeys == NIL) |
2233 | 0 | return 0; |
2234 | | |
2235 | | /* walk the pathkeys and search for matching DISTINCT key */ |
2236 | 0 | n_common_pathkeys = 0; |
2237 | 0 | foreach_node(PathKey, pathkey, pathkeys) |
2238 | 0 | { |
2239 | | /* no matching DISTINCT key, we're done */ |
2240 | 0 | if (!list_member_ptr(root->distinct_pathkeys, pathkey)) |
2241 | 0 | break; |
2242 | | |
2243 | 0 | n_common_pathkeys++; |
2244 | 0 | } |
2245 | |
|
2246 | 0 | return n_common_pathkeys; |
2247 | 0 | } |
2248 | | |
2249 | | /* |
2250 | | * pathkeys_useful_for_setop |
2251 | | * Count the number of leading common pathkeys root's 'setop_pathkeys' in |
2252 | | * 'pathkeys'. |
2253 | | */ |
2254 | | static int |
2255 | | pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys) |
2256 | 0 | { |
2257 | 0 | int n_common_pathkeys; |
2258 | |
|
2259 | 0 | (void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys, |
2260 | 0 | &n_common_pathkeys); |
2261 | |
|
2262 | 0 | return n_common_pathkeys; |
2263 | 0 | } |
2264 | | |
2265 | | /* |
2266 | | * truncate_useless_pathkeys |
2267 | | * Shorten the given pathkey list to just the useful pathkeys. |
2268 | | */ |
2269 | | List * |
2270 | | truncate_useless_pathkeys(PlannerInfo *root, |
2271 | | RelOptInfo *rel, |
2272 | | List *pathkeys) |
2273 | 0 | { |
2274 | 0 | int nuseful; |
2275 | 0 | int nuseful2; |
2276 | |
|
2277 | 0 | nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); |
2278 | 0 | nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); |
2279 | 0 | if (nuseful2 > nuseful) |
2280 | 0 | nuseful = nuseful2; |
2281 | 0 | nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); |
2282 | 0 | if (nuseful2 > nuseful) |
2283 | 0 | nuseful = nuseful2; |
2284 | 0 | nuseful2 = pathkeys_useful_for_distinct(root, pathkeys); |
2285 | 0 | if (nuseful2 > nuseful) |
2286 | 0 | nuseful = nuseful2; |
2287 | 0 | nuseful2 = pathkeys_useful_for_setop(root, pathkeys); |
2288 | 0 | if (nuseful2 > nuseful) |
2289 | 0 | nuseful = nuseful2; |
2290 | | |
2291 | | /* |
2292 | | * Note: not safe to modify input list destructively, but we can avoid |
2293 | | * copying the list if we're not actually going to change it |
2294 | | */ |
2295 | 0 | if (nuseful == 0) |
2296 | 0 | return NIL; |
2297 | 0 | else if (nuseful == list_length(pathkeys)) |
2298 | 0 | return pathkeys; |
2299 | 0 | else |
2300 | 0 | return list_copy_head(pathkeys, nuseful); |
2301 | 0 | } |
2302 | | |
2303 | | /* |
2304 | | * has_useful_pathkeys |
2305 | | * Detect whether the specified rel could have any pathkeys that are |
2306 | | * useful according to truncate_useless_pathkeys(). |
2307 | | * |
2308 | | * This is a cheap test that lets us skip building pathkeys at all in very |
2309 | | * simple queries. It's OK to err in the direction of returning "true" when |
2310 | | * there really aren't any usable pathkeys, but erring in the other direction |
2311 | | * is bad --- so keep this in sync with the routines above! |
2312 | | * |
2313 | | * We could make the test more complex, for example checking to see if any of |
2314 | | * the joinclauses are really mergejoinable, but that likely wouldn't win |
2315 | | * often enough to repay the extra cycles. Queries with neither a join nor |
2316 | | * a sort are reasonably common, though, so this much work seems worthwhile. |
2317 | | */ |
2318 | | bool |
2319 | | has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) |
2320 | 0 | { |
2321 | 0 | if (rel->joininfo != NIL || rel->has_eclass_joins) |
2322 | 0 | return true; /* might be able to use pathkeys for merging */ |
2323 | 0 | if (root->group_pathkeys != NIL) |
2324 | 0 | return true; /* might be able to use pathkeys for grouping */ |
2325 | 0 | if (root->query_pathkeys != NIL) |
2326 | 0 | return true; /* might be able to use them for ordering */ |
2327 | 0 | return false; /* definitely useless */ |
2328 | 0 | } |