/src/postgres/src/backend/optimizer/plan/analyzejoins.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * analyzejoins.c |
4 | | * Routines for simplifying joins after initial query analysis |
5 | | * |
6 | | * While we do a great deal of join simplification in prep/prepjointree.c, |
7 | | * certain optimizations cannot be performed at that stage for lack of |
8 | | * detailed information about the query. The routines here are invoked |
9 | | * after initsplan.c has done its work, and can do additional join removal |
10 | | * and simplification steps based on the information extracted. The penalty |
11 | | * is that we have to work harder to clean up after ourselves when we modify |
12 | | * the query, since the derived data structures have to be updated too. |
13 | | * |
14 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
15 | | * Portions Copyright (c) 1994, Regents of the University of California |
16 | | * |
17 | | * |
18 | | * IDENTIFICATION |
19 | | * src/backend/optimizer/plan/analyzejoins.c |
20 | | * |
21 | | *------------------------------------------------------------------------- |
22 | | */ |
23 | | #include "postgres.h" |
24 | | |
25 | | #include "catalog/pg_class.h" |
26 | | #include "nodes/nodeFuncs.h" |
27 | | #include "optimizer/joininfo.h" |
28 | | #include "optimizer/optimizer.h" |
29 | | #include "optimizer/pathnode.h" |
30 | | #include "optimizer/paths.h" |
31 | | #include "optimizer/placeholder.h" |
32 | | #include "optimizer/planmain.h" |
33 | | #include "optimizer/restrictinfo.h" |
34 | | #include "rewrite/rewriteManip.h" |
35 | | #include "utils/lsyscache.h" |
36 | | |
37 | | /* |
38 | | * Utility structure. A sorting procedure is needed to simplify the search |
39 | | * of SJE-candidate baserels referencing the same database relation. Having |
40 | | * collected all baserels from the query jointree, the planner sorts them |
41 | | * according to the reloid value, groups them with the next pass and attempts |
42 | | * to remove self-joins. |
43 | | * |
44 | | * Preliminary sorting prevents quadratic behavior that can be harmful in the |
45 | | * case of numerous joins. |
46 | | */ |
47 | | typedef struct |
48 | | { |
49 | | int relid; |
50 | | Oid reloid; |
51 | | } SelfJoinCandidate; |
52 | | |
53 | | bool enable_self_join_elimination; |
54 | | |
55 | | /* local functions */ |
56 | | static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); |
57 | | static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid, |
58 | | SpecialJoinInfo *sjinfo); |
59 | | static void remove_rel_from_restrictinfo(RestrictInfo *rinfo, |
60 | | int relid, int ojrelid); |
61 | | static void remove_rel_from_eclass(EquivalenceClass *ec, |
62 | | SpecialJoinInfo *sjinfo, |
63 | | int relid, int subst); |
64 | | static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); |
65 | | static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); |
66 | | static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, |
67 | | List *clause_list, List **extra_clauses); |
68 | | static Oid distinct_col_search(int colno, List *colnos, List *opids); |
69 | | static bool is_innerrel_unique_for(PlannerInfo *root, |
70 | | Relids joinrelids, |
71 | | Relids outerrelids, |
72 | | RelOptInfo *innerrel, |
73 | | JoinType jointype, |
74 | | List *restrictlist, |
75 | | List **extra_clauses); |
76 | | static int self_join_candidates_cmp(const void *a, const void *b); |
77 | | static bool replace_relid_callback(Node *node, |
78 | | ChangeVarNodes_context *context); |
79 | | |
80 | | |
81 | | /* |
82 | | * remove_useless_joins |
83 | | * Check for relations that don't actually need to be joined at all, |
84 | | * and remove them from the query. |
85 | | * |
86 | | * We are passed the current joinlist and return the updated list. Other |
87 | | * data structures that have to be updated are accessible via "root". |
88 | | */ |
89 | | List * |
90 | | remove_useless_joins(PlannerInfo *root, List *joinlist) |
91 | 0 | { |
92 | 0 | ListCell *lc; |
93 | | |
94 | | /* |
95 | | * We are only interested in relations that are left-joined to, so we can |
96 | | * scan the join_info_list to find them easily. |
97 | | */ |
98 | 0 | restart: |
99 | 0 | foreach(lc, root->join_info_list) |
100 | 0 | { |
101 | 0 | SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); |
102 | 0 | int innerrelid; |
103 | 0 | int nremoved; |
104 | | |
105 | | /* Skip if not removable */ |
106 | 0 | if (!join_is_removable(root, sjinfo)) |
107 | 0 | continue; |
108 | | |
109 | | /* |
110 | | * Currently, join_is_removable can only succeed when the sjinfo's |
111 | | * righthand is a single baserel. Remove that rel from the query and |
112 | | * joinlist. |
113 | | */ |
114 | 0 | innerrelid = bms_singleton_member(sjinfo->min_righthand); |
115 | |
|
116 | 0 | remove_leftjoinrel_from_query(root, innerrelid, sjinfo); |
117 | | |
118 | | /* We verify that exactly one reference gets removed from joinlist */ |
119 | 0 | nremoved = 0; |
120 | 0 | joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved); |
121 | 0 | if (nremoved != 1) |
122 | 0 | elog(ERROR, "failed to find relation %d in joinlist", innerrelid); |
123 | | |
124 | | /* |
125 | | * We can delete this SpecialJoinInfo from the list too, since it's no |
126 | | * longer of interest. (Since we'll restart the foreach loop |
127 | | * immediately, we don't bother with foreach_delete_current.) |
128 | | */ |
129 | 0 | root->join_info_list = list_delete_cell(root->join_info_list, lc); |
130 | | |
131 | | /* |
132 | | * Restart the scan. This is necessary to ensure we find all |
133 | | * removable joins independently of ordering of the join_info_list |
134 | | * (note that removal of attr_needed bits may make a join appear |
135 | | * removable that did not before). |
136 | | */ |
137 | 0 | goto restart; |
138 | 0 | } |
139 | | |
140 | 0 | return joinlist; |
141 | 0 | } |
142 | | |
143 | | /* |
144 | | * join_is_removable |
145 | | * Check whether we need not perform this special join at all, because |
146 | | * it will just duplicate its left input. |
147 | | * |
148 | | * This is true for a left join for which the join condition cannot match |
149 | | * more than one inner-side row. (There are other possibly interesting |
150 | | * cases, but we don't have the infrastructure to prove them.) We also |
151 | | * have to check that the inner side doesn't generate any variables needed |
152 | | * above the join. |
153 | | */ |
154 | | static bool |
155 | | join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) |
156 | 0 | { |
157 | 0 | int innerrelid; |
158 | 0 | RelOptInfo *innerrel; |
159 | 0 | Relids inputrelids; |
160 | 0 | Relids joinrelids; |
161 | 0 | List *clause_list = NIL; |
162 | 0 | ListCell *l; |
163 | 0 | int attroff; |
164 | | |
165 | | /* |
166 | | * Must be a left join to a single baserel, else we aren't going to be |
167 | | * able to do anything with it. |
168 | | */ |
169 | 0 | if (sjinfo->jointype != JOIN_LEFT) |
170 | 0 | return false; |
171 | | |
172 | 0 | if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) |
173 | 0 | return false; |
174 | | |
175 | | /* |
176 | | * Never try to eliminate a left join to the query result rel. Although |
177 | | * the case is syntactically impossible in standard SQL, MERGE will build |
178 | | * a join tree that looks exactly like that. |
179 | | */ |
180 | 0 | if (innerrelid == root->parse->resultRelation) |
181 | 0 | return false; |
182 | | |
183 | 0 | innerrel = find_base_rel(root, innerrelid); |
184 | | |
185 | | /* |
186 | | * Before we go to the effort of checking whether any innerrel variables |
187 | | * are needed above the join, make a quick check to eliminate cases in |
188 | | * which we will surely be unable to prove uniqueness of the innerrel. |
189 | | */ |
190 | 0 | if (!rel_supports_distinctness(root, innerrel)) |
191 | 0 | return false; |
192 | | |
193 | | /* Compute the relid set for the join we are considering */ |
194 | 0 | inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); |
195 | 0 | Assert(sjinfo->ojrelid != 0); |
196 | 0 | joinrelids = bms_copy(inputrelids); |
197 | 0 | joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid); |
198 | | |
199 | | /* |
200 | | * We can't remove the join if any inner-rel attributes are used above the |
201 | | * join. Here, "above" the join includes pushed-down conditions, so we |
202 | | * should reject if attr_needed includes the OJ's own relid; therefore, |
203 | | * compare to inputrelids not joinrelids. |
204 | | * |
205 | | * As a micro-optimization, it seems better to start with max_attr and |
206 | | * count down rather than starting with min_attr and counting up, on the |
207 | | * theory that the system attributes are somewhat less likely to be wanted |
208 | | * and should be tested last. |
209 | | */ |
210 | 0 | for (attroff = innerrel->max_attr - innerrel->min_attr; |
211 | 0 | attroff >= 0; |
212 | 0 | attroff--) |
213 | 0 | { |
214 | 0 | if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids)) |
215 | 0 | return false; |
216 | 0 | } |
217 | | |
218 | | /* |
219 | | * Similarly check that the inner rel isn't needed by any PlaceHolderVars |
220 | | * that will be used above the join. The PHV case is a little bit more |
221 | | * complicated, because PHVs may have been assigned a ph_eval_at location |
222 | | * that includes the innerrel, yet their contained expression might not |
223 | | * actually reference the innerrel (it could be just a constant, for |
224 | | * instance). If such a PHV is due to be evaluated above the join then it |
225 | | * needn't prevent join removal. |
226 | | */ |
227 | 0 | foreach(l, root->placeholder_list) |
228 | 0 | { |
229 | 0 | PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); |
230 | |
|
231 | 0 | if (bms_overlap(phinfo->ph_lateral, innerrel->relids)) |
232 | 0 | return false; /* it references innerrel laterally */ |
233 | 0 | if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids)) |
234 | 0 | continue; /* it definitely doesn't reference innerrel */ |
235 | 0 | if (bms_is_subset(phinfo->ph_needed, inputrelids)) |
236 | 0 | continue; /* PHV is not used above the join */ |
237 | 0 | if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at)) |
238 | 0 | return false; /* it has to be evaluated below the join */ |
239 | | |
240 | | /* |
241 | | * We need to be sure there will still be a place to evaluate the PHV |
242 | | * if we remove the join, ie that ph_eval_at wouldn't become empty. |
243 | | */ |
244 | 0 | if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at)) |
245 | 0 | return false; /* there isn't any other place to eval PHV */ |
246 | | /* Check contained expression last, since this is a bit expensive */ |
247 | 0 | if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr), |
248 | 0 | innerrel->relids)) |
249 | 0 | return false; /* contained expression references innerrel */ |
250 | 0 | } |
251 | | |
252 | | /* |
253 | | * Search for mergejoinable clauses that constrain the inner rel against |
254 | | * either the outer rel or a pseudoconstant. If an operator is |
255 | | * mergejoinable then it behaves like equality for some btree opclass, so |
256 | | * it's what we want. The mergejoinability test also eliminates clauses |
257 | | * containing volatile functions, which we couldn't depend on. |
258 | | */ |
259 | 0 | foreach(l, innerrel->joininfo) |
260 | 0 | { |
261 | 0 | RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); |
262 | | |
263 | | /* |
264 | | * If the current join commutes with some other outer join(s) via |
265 | | * outer join identity 3, there will be multiple clones of its join |
266 | | * clauses in the joininfo list. We want to consider only the |
267 | | * has_clone form of such clauses. Processing more than one form |
268 | | * would be wasteful, and also some of the others would confuse the |
269 | | * RINFO_IS_PUSHED_DOWN test below. |
270 | | */ |
271 | 0 | if (restrictinfo->is_clone) |
272 | 0 | continue; /* ignore it */ |
273 | | |
274 | | /* |
275 | | * If it's not a join clause for this outer join, we can't use it. |
276 | | * Note that if the clause is pushed-down, then it is logically from |
277 | | * above the outer join, even if it references no other rels (it might |
278 | | * be from WHERE, for example). |
279 | | */ |
280 | 0 | if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) |
281 | 0 | continue; /* ignore; not useful here */ |
282 | | |
283 | | /* Ignore if it's not a mergejoinable clause */ |
284 | 0 | if (!restrictinfo->can_join || |
285 | 0 | restrictinfo->mergeopfamilies == NIL) |
286 | 0 | continue; /* not mergejoinable */ |
287 | | |
288 | | /* |
289 | | * Check if the clause has the form "outer op inner" or "inner op |
290 | | * outer", and if so mark which side is inner. |
291 | | */ |
292 | 0 | if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand, |
293 | 0 | innerrel->relids)) |
294 | 0 | continue; /* no good for these input relations */ |
295 | | |
296 | | /* OK, add to list */ |
297 | 0 | clause_list = lappend(clause_list, restrictinfo); |
298 | 0 | } |
299 | | |
300 | | /* |
301 | | * Now that we have the relevant equality join clauses, try to prove the |
302 | | * innerrel distinct. |
303 | | */ |
304 | 0 | if (rel_is_distinct_for(root, innerrel, clause_list, NULL)) |
305 | 0 | return true; |
306 | | |
307 | | /* |
308 | | * Some day it would be nice to check for other methods of establishing |
309 | | * distinctness. |
310 | | */ |
311 | 0 | return false; |
312 | 0 | } |
313 | | |
314 | | |
315 | | /* |
316 | | * Remove the target rel->relid and references to the target join from the |
317 | | * planner's data structures, having determined that there is no need |
318 | | * to include them in the query. Optionally replace them with subst if subst |
319 | | * is non-negative. |
320 | | * |
321 | | * This function updates only parts needed for both left-join removal and |
322 | | * self-join removal. |
323 | | */ |
324 | | static void |
325 | | remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel, |
326 | | int subst, SpecialJoinInfo *sjinfo, |
327 | | Relids joinrelids) |
328 | 0 | { |
329 | 0 | int relid = rel->relid; |
330 | 0 | Index rti; |
331 | 0 | ListCell *l; |
332 | | |
333 | | /* |
334 | | * Update all_baserels and related relid sets. |
335 | | */ |
336 | 0 | root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst); |
337 | 0 | root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst); |
338 | |
|
339 | 0 | if (sjinfo != NULL) |
340 | 0 | { |
341 | 0 | root->outer_join_rels = bms_del_member(root->outer_join_rels, |
342 | 0 | sjinfo->ojrelid); |
343 | 0 | root->all_query_rels = bms_del_member(root->all_query_rels, |
344 | 0 | sjinfo->ojrelid); |
345 | 0 | } |
346 | | |
347 | | /* |
348 | | * Likewise remove references from SpecialJoinInfo data structures. |
349 | | * |
350 | | * This is relevant in case the outer join we're deleting is nested inside |
351 | | * other outer joins: the upper joins' relid sets have to be adjusted. The |
352 | | * RHS of the target outer join will be made empty here, but that's OK |
353 | | * since caller will delete that SpecialJoinInfo entirely. |
354 | | */ |
355 | 0 | foreach(l, root->join_info_list) |
356 | 0 | { |
357 | 0 | SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l); |
358 | | |
359 | | /* |
360 | | * initsplan.c is fairly cavalier about allowing SpecialJoinInfos' |
361 | | * lefthand/righthand relid sets to be shared with other data |
362 | | * structures. Ensure that we don't modify the original relid sets. |
363 | | * (The commute_xxx sets are always per-SpecialJoinInfo though.) |
364 | | */ |
365 | 0 | sjinf->min_lefthand = bms_copy(sjinf->min_lefthand); |
366 | 0 | sjinf->min_righthand = bms_copy(sjinf->min_righthand); |
367 | 0 | sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand); |
368 | 0 | sjinf->syn_righthand = bms_copy(sjinf->syn_righthand); |
369 | | /* Now remove relid from the sets: */ |
370 | 0 | sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst); |
371 | 0 | sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst); |
372 | 0 | sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst); |
373 | 0 | sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst); |
374 | |
|
375 | 0 | if (sjinfo != NULL) |
376 | 0 | { |
377 | 0 | Assert(subst <= 0); |
378 | | |
379 | | /* Remove sjinfo->ojrelid bits from the sets: */ |
380 | 0 | sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, |
381 | 0 | sjinfo->ojrelid); |
382 | 0 | sjinf->min_righthand = bms_del_member(sjinf->min_righthand, |
383 | 0 | sjinfo->ojrelid); |
384 | 0 | sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, |
385 | 0 | sjinfo->ojrelid); |
386 | 0 | sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, |
387 | 0 | sjinfo->ojrelid); |
388 | | /* relid cannot appear in these fields, but ojrelid can: */ |
389 | 0 | sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, |
390 | 0 | sjinfo->ojrelid); |
391 | 0 | sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, |
392 | 0 | sjinfo->ojrelid); |
393 | 0 | sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, |
394 | 0 | sjinfo->ojrelid); |
395 | 0 | sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, |
396 | 0 | sjinfo->ojrelid); |
397 | 0 | } |
398 | 0 | else |
399 | 0 | { |
400 | 0 | Assert(subst > 0); |
401 | |
|
402 | 0 | ChangeVarNodesExtended((Node *) sjinf->semi_rhs_exprs, relid, subst, |
403 | 0 | 0, replace_relid_callback); |
404 | 0 | } |
405 | 0 | } |
406 | | |
407 | | /* |
408 | | * Likewise remove references from PlaceHolderVar data structures, |
409 | | * removing any no-longer-needed placeholders entirely. We remove PHV |
410 | | * only for left-join removal. With self-join elimination, PHVs already |
411 | | * get moved to the remaining relation, where they might still be needed. |
412 | | * It might also happen that we skip the removal of some PHVs that could |
413 | | * be removed. However, the overhead of extra PHVs is small compared to |
414 | | * the complexity of analysis needed to remove them. |
415 | | * |
416 | | * Removal is a bit trickier than it might seem: we can remove PHVs that |
417 | | * are used at the target rel and/or in the join qual, but not those that |
418 | | * are used at join partner rels or above the join. It's not that easy to |
419 | | * distinguish PHVs used at partner rels from those used in the join qual, |
420 | | * since they will both have ph_needed sets that are subsets of |
421 | | * joinrelids. However, a PHV used at a partner rel could not have the |
422 | | * target rel in ph_eval_at, so we check that while deciding whether to |
423 | | * remove or just update the PHV. There is no corresponding test in |
424 | | * join_is_removable because it doesn't need to distinguish those cases. |
425 | | */ |
426 | 0 | foreach(l, root->placeholder_list) |
427 | 0 | { |
428 | 0 | PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); |
429 | |
|
430 | 0 | Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral)); |
431 | 0 | if (sjinfo != NULL && |
432 | 0 | bms_is_subset(phinfo->ph_needed, joinrelids) && |
433 | 0 | bms_is_member(relid, phinfo->ph_eval_at) && |
434 | 0 | !bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at)) |
435 | 0 | { |
436 | | /* |
437 | | * This code shouldn't be executed if one relation is substituted |
438 | | * with another: in this case, the placeholder may be employed in |
439 | | * a filter inside the scan node the SJE removes. |
440 | | */ |
441 | 0 | root->placeholder_list = foreach_delete_current(root->placeholder_list, |
442 | 0 | l); |
443 | 0 | root->placeholder_array[phinfo->phid] = NULL; |
444 | 0 | } |
445 | 0 | else |
446 | 0 | { |
447 | 0 | PlaceHolderVar *phv = phinfo->ph_var; |
448 | |
|
449 | 0 | phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst); |
450 | 0 | if (sjinfo != NULL) |
451 | 0 | phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, |
452 | 0 | sjinfo->ojrelid, subst); |
453 | 0 | Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */ |
454 | | /* Reduce ph_needed to contain only "relation 0"; see below */ |
455 | 0 | if (bms_is_member(0, phinfo->ph_needed)) |
456 | 0 | phinfo->ph_needed = bms_make_singleton(0); |
457 | 0 | else |
458 | 0 | phinfo->ph_needed = NULL; |
459 | |
|
460 | 0 | phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst); |
461 | | |
462 | | /* |
463 | | * ph_lateral might contain rels mentioned in ph_eval_at after the |
464 | | * replacement, remove them. |
465 | | */ |
466 | 0 | phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at); |
467 | | /* ph_lateral might or might not be empty */ |
468 | |
|
469 | 0 | phv->phrels = adjust_relid_set(phv->phrels, relid, subst); |
470 | 0 | if (sjinfo != NULL) |
471 | 0 | phv->phrels = adjust_relid_set(phv->phrels, |
472 | 0 | sjinfo->ojrelid, subst); |
473 | 0 | Assert(!bms_is_empty(phv->phrels)); |
474 | |
|
475 | 0 | ChangeVarNodesExtended((Node *) phv->phexpr, relid, subst, 0, |
476 | 0 | replace_relid_callback); |
477 | |
|
478 | 0 | Assert(phv->phnullingrels == NULL); /* no need to adjust */ |
479 | 0 | } |
480 | 0 | } |
481 | | |
482 | | /* |
483 | | * Likewise remove references from EquivalenceClasses. |
484 | | */ |
485 | 0 | foreach(l, root->eq_classes) |
486 | 0 | { |
487 | 0 | EquivalenceClass *ec = (EquivalenceClass *) lfirst(l); |
488 | |
|
489 | 0 | if (bms_is_member(relid, ec->ec_relids) || |
490 | 0 | (sjinfo == NULL || bms_is_member(sjinfo->ojrelid, ec->ec_relids))) |
491 | 0 | remove_rel_from_eclass(ec, sjinfo, relid, subst); |
492 | 0 | } |
493 | | |
494 | | /* |
495 | | * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar |
496 | | * ph_needed relid sets. These have to be known accurately, else we may |
497 | | * fail to remove other now-removable outer joins. And our removal of the |
498 | | * join clause(s) for this outer join may mean that Vars that were |
499 | | * formerly needed no longer are. So we have to do this honestly by |
500 | | * repeating the construction of those relid sets. We can cheat to one |
501 | | * small extent: we can avoid re-examining the targetlist and HAVING qual |
502 | | * by preserving "relation 0" bits from the existing relid sets. This is |
503 | | * safe because we'd never remove such references. |
504 | | * |
505 | | * So, start by removing all other bits from attr_needed sets and |
506 | | * lateral_vars lists. (We already did this above for ph_needed.) |
507 | | */ |
508 | 0 | for (rti = 1; rti < root->simple_rel_array_size; rti++) |
509 | 0 | { |
510 | 0 | RelOptInfo *otherrel = root->simple_rel_array[rti]; |
511 | 0 | int attroff; |
512 | | |
513 | | /* there may be empty slots corresponding to non-baserel RTEs */ |
514 | 0 | if (otherrel == NULL) |
515 | 0 | continue; |
516 | | |
517 | 0 | Assert(otherrel->relid == rti); /* sanity check on array */ |
518 | |
|
519 | 0 | for (attroff = otherrel->max_attr - otherrel->min_attr; |
520 | 0 | attroff >= 0; |
521 | 0 | attroff--) |
522 | 0 | { |
523 | 0 | if (bms_is_member(0, otherrel->attr_needed[attroff])) |
524 | 0 | otherrel->attr_needed[attroff] = bms_make_singleton(0); |
525 | 0 | else |
526 | 0 | otherrel->attr_needed[attroff] = NULL; |
527 | 0 | } |
528 | |
|
529 | 0 | if (subst > 0) |
530 | 0 | ChangeVarNodesExtended((Node *) otherrel->lateral_vars, relid, |
531 | 0 | subst, 0, replace_relid_callback); |
532 | 0 | } |
533 | 0 | } |
534 | | |
535 | | /* |
536 | | * Remove the target relid and references to the target join from the |
537 | | * planner's data structures, having determined that there is no need |
538 | | * to include them in the query. |
539 | | * |
540 | | * We are not terribly thorough here. We only bother to update parts of |
541 | | * the planner's data structures that will actually be consulted later. |
542 | | */ |
543 | | static void |
544 | | remove_leftjoinrel_from_query(PlannerInfo *root, int relid, |
545 | | SpecialJoinInfo *sjinfo) |
546 | 0 | { |
547 | 0 | RelOptInfo *rel = find_base_rel(root, relid); |
548 | 0 | int ojrelid = sjinfo->ojrelid; |
549 | 0 | Relids joinrelids; |
550 | 0 | Relids join_plus_commute; |
551 | 0 | List *joininfos; |
552 | 0 | ListCell *l; |
553 | | |
554 | | /* Compute the relid set for the join we are considering */ |
555 | 0 | joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); |
556 | 0 | Assert(ojrelid != 0); |
557 | 0 | joinrelids = bms_add_member(joinrelids, ojrelid); |
558 | |
|
559 | 0 | remove_rel_from_query(root, rel, -1, sjinfo, joinrelids); |
560 | | |
561 | | /* |
562 | | * Remove any joinquals referencing the rel from the joininfo lists. |
563 | | * |
564 | | * In some cases, a joinqual has to be put back after deleting its |
565 | | * reference to the target rel. This can occur for pseudoconstant and |
566 | | * outerjoin-delayed quals, which can get marked as requiring the rel in |
567 | | * order to force them to be evaluated at or above the join. We can't |
568 | | * just discard them, though. Only quals that logically belonged to the |
569 | | * outer join being discarded should be removed from the query. |
570 | | * |
571 | | * We might encounter a qual that is a clone of a deletable qual with some |
572 | | * outer-join relids added (see deconstruct_distribute_oj_quals). To |
573 | | * ensure we get rid of such clones as well, add the relids of all OJs |
574 | | * commutable with this one to the set we test against for |
575 | | * pushed-down-ness. |
576 | | */ |
577 | 0 | join_plus_commute = bms_union(joinrelids, |
578 | 0 | sjinfo->commute_above_r); |
579 | 0 | join_plus_commute = bms_add_members(join_plus_commute, |
580 | 0 | sjinfo->commute_below_l); |
581 | | |
582 | | /* |
583 | | * We must make a copy of the rel's old joininfo list before starting the |
584 | | * loop, because otherwise remove_join_clause_from_rels would destroy the |
585 | | * list while we're scanning it. |
586 | | */ |
587 | 0 | joininfos = list_copy(rel->joininfo); |
588 | 0 | foreach(l, joininfos) |
589 | 0 | { |
590 | 0 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); |
591 | |
|
592 | 0 | remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); |
593 | |
|
594 | 0 | if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute)) |
595 | 0 | { |
596 | | /* |
597 | | * There might be references to relid or ojrelid in the |
598 | | * RestrictInfo's relid sets, as a consequence of PHVs having had |
599 | | * ph_eval_at sets that include those. We already checked above |
600 | | * that any such PHV is safe (and updated its ph_eval_at), so we |
601 | | * can just drop those references. |
602 | | */ |
603 | 0 | remove_rel_from_restrictinfo(rinfo, relid, ojrelid); |
604 | | |
605 | | /* |
606 | | * Cross-check that the clause itself does not reference the |
607 | | * target rel or join. |
608 | | */ |
609 | | #ifdef USE_ASSERT_CHECKING |
610 | | { |
611 | | Relids clause_varnos = pull_varnos(root, |
612 | | (Node *) rinfo->clause); |
613 | | |
614 | | Assert(!bms_is_member(relid, clause_varnos)); |
615 | | Assert(!bms_is_member(ojrelid, clause_varnos)); |
616 | | } |
617 | | #endif |
618 | | /* Now throw it back into the joininfo lists */ |
619 | 0 | distribute_restrictinfo_to_rels(root, rinfo); |
620 | 0 | } |
621 | 0 | } |
622 | | |
623 | | /* |
624 | | * There may be references to the rel in root->fkey_list, but if so, |
625 | | * match_foreign_keys_to_quals() will get rid of them. |
626 | | */ |
627 | | |
628 | | /* |
629 | | * Now remove the rel from the baserel array to prevent it from being |
630 | | * referenced again. (We can't do this earlier because |
631 | | * remove_join_clause_from_rels will touch it.) |
632 | | */ |
633 | 0 | root->simple_rel_array[relid] = NULL; |
634 | 0 | root->simple_rte_array[relid] = NULL; |
635 | | |
636 | | /* And nuke the RelOptInfo, just in case there's another access path */ |
637 | 0 | pfree(rel); |
638 | | |
639 | | /* |
640 | | * Now repeat construction of attr_needed bits coming from all other |
641 | | * sources. |
642 | | */ |
643 | 0 | rebuild_placeholder_attr_needed(root); |
644 | 0 | rebuild_joinclause_attr_needed(root); |
645 | 0 | rebuild_eclass_attr_needed(root); |
646 | 0 | rebuild_lateral_attr_needed(root); |
647 | 0 | } |
648 | | |
649 | | /* |
650 | | * Remove any references to relid or ojrelid from the RestrictInfo. |
651 | | * |
652 | | * We only bother to clean out bits in clause_relids and required_relids, |
653 | | * not nullingrel bits in contained Vars and PHVs. (This might have to be |
654 | | * improved sometime.) However, if the RestrictInfo contains an OR clause |
655 | | * we have to also clean up the sub-clauses. |
656 | | */ |
657 | | static void |
658 | | remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid) |
659 | 0 | { |
660 | | /* |
661 | | * initsplan.c is fairly cavalier about allowing RestrictInfos to share |
662 | | * relid sets with other RestrictInfos, and SpecialJoinInfos too. Make |
663 | | * sure this RestrictInfo has its own relid sets before we modify them. |
664 | | * (In present usage, clause_relids is probably not shared, but |
665 | | * required_relids could be; let's not assume anything.) |
666 | | */ |
667 | 0 | rinfo->clause_relids = bms_copy(rinfo->clause_relids); |
668 | 0 | rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid); |
669 | 0 | rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid); |
670 | | /* Likewise for required_relids */ |
671 | 0 | rinfo->required_relids = bms_copy(rinfo->required_relids); |
672 | 0 | rinfo->required_relids = bms_del_member(rinfo->required_relids, relid); |
673 | 0 | rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid); |
674 | | |
675 | | /* If it's an OR, recurse to clean up sub-clauses */ |
676 | 0 | if (restriction_is_or_clause(rinfo)) |
677 | 0 | { |
678 | 0 | ListCell *lc; |
679 | |
|
680 | 0 | Assert(is_orclause(rinfo->orclause)); |
681 | 0 | foreach(lc, ((BoolExpr *) rinfo->orclause)->args) |
682 | 0 | { |
683 | 0 | Node *orarg = (Node *) lfirst(lc); |
684 | | |
685 | | /* OR arguments should be ANDs or sub-RestrictInfos */ |
686 | 0 | if (is_andclause(orarg)) |
687 | 0 | { |
688 | 0 | List *andargs = ((BoolExpr *) orarg)->args; |
689 | 0 | ListCell *lc2; |
690 | |
|
691 | 0 | foreach(lc2, andargs) |
692 | 0 | { |
693 | 0 | RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2); |
694 | |
|
695 | 0 | remove_rel_from_restrictinfo(rinfo2, relid, ojrelid); |
696 | 0 | } |
697 | 0 | } |
698 | 0 | else |
699 | 0 | { |
700 | 0 | RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg); |
701 | |
|
702 | 0 | remove_rel_from_restrictinfo(rinfo2, relid, ojrelid); |
703 | 0 | } |
704 | 0 | } |
705 | 0 | } |
706 | 0 | } |
707 | | |
708 | | /* |
709 | | * Remove any references to relid or sjinfo->ojrelid (if sjinfo != NULL) |
710 | | * from the EquivalenceClass. |
711 | | * |
712 | | * Like remove_rel_from_restrictinfo, we don't worry about cleaning out |
713 | | * any nullingrel bits in contained Vars and PHVs. (This might have to be |
714 | | * improved sometime.) We do need to fix the EC and EM relid sets to ensure |
715 | | * that implied join equalities will be generated at the appropriate join |
716 | | * level(s). |
717 | | */ |
718 | | static void |
719 | | remove_rel_from_eclass(EquivalenceClass *ec, SpecialJoinInfo *sjinfo, |
720 | | int relid, int subst) |
721 | 0 | { |
722 | 0 | ListCell *lc; |
723 | | |
724 | | /* Fix up the EC's overall relids */ |
725 | 0 | ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst); |
726 | 0 | if (sjinfo != NULL) |
727 | 0 | ec->ec_relids = adjust_relid_set(ec->ec_relids, |
728 | 0 | sjinfo->ojrelid, subst); |
729 | | |
730 | | /* |
731 | | * We don't expect any EC child members to exist at this point. Ensure |
732 | | * that's the case, otherwise, we might be getting asked to do something |
733 | | * this function hasn't been coded for. |
734 | | */ |
735 | 0 | Assert(ec->ec_childmembers == NULL); |
736 | | |
737 | | /* |
738 | | * Fix up the member expressions. Any non-const member that ends with |
739 | | * empty em_relids must be a Var or PHV of the removed relation. We don't |
740 | | * need it anymore, so we can drop it. |
741 | | */ |
742 | 0 | foreach(lc, ec->ec_members) |
743 | 0 | { |
744 | 0 | EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); |
745 | |
|
746 | 0 | if (bms_is_member(relid, cur_em->em_relids) || |
747 | 0 | (sjinfo != NULL && bms_is_member(sjinfo->ojrelid, |
748 | 0 | cur_em->em_relids))) |
749 | 0 | { |
750 | 0 | Assert(!cur_em->em_is_const); |
751 | 0 | cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst); |
752 | 0 | if (sjinfo != NULL) |
753 | 0 | cur_em->em_relids = adjust_relid_set(cur_em->em_relids, |
754 | 0 | sjinfo->ojrelid, subst); |
755 | 0 | if (bms_is_empty(cur_em->em_relids)) |
756 | 0 | ec->ec_members = foreach_delete_current(ec->ec_members, lc); |
757 | 0 | } |
758 | 0 | } |
759 | | |
760 | | /* Fix up the source clauses, in case we can re-use them later */ |
761 | 0 | foreach(lc, ec->ec_sources) |
762 | 0 | { |
763 | 0 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); |
764 | |
|
765 | 0 | if (sjinfo == NULL) |
766 | 0 | ChangeVarNodesExtended((Node *) rinfo, relid, subst, 0, |
767 | 0 | replace_relid_callback); |
768 | 0 | else |
769 | 0 | remove_rel_from_restrictinfo(rinfo, relid, sjinfo->ojrelid); |
770 | 0 | } |
771 | | |
772 | | /* |
773 | | * Rather than expend code on fixing up any already-derived clauses, just |
774 | | * drop them. (At this point, any such clauses would be base restriction |
775 | | * clauses, which we'd not need anymore anyway.) |
776 | | */ |
777 | 0 | ec_clear_derived_clauses(ec); |
778 | 0 | } |
779 | | |
780 | | /* |
781 | | * Remove any occurrences of the target relid from a joinlist structure. |
782 | | * |
783 | | * It's easiest to build a whole new list structure, so we handle it that |
784 | | * way. Efficiency is not a big deal here. |
785 | | * |
786 | | * *nremoved is incremented by the number of occurrences removed (there |
787 | | * should be exactly one, but the caller checks that). |
788 | | */ |
789 | | static List * |
790 | | remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) |
791 | 0 | { |
792 | 0 | List *result = NIL; |
793 | 0 | ListCell *jl; |
794 | |
|
795 | 0 | foreach(jl, joinlist) |
796 | 0 | { |
797 | 0 | Node *jlnode = (Node *) lfirst(jl); |
798 | |
|
799 | 0 | if (IsA(jlnode, RangeTblRef)) |
800 | 0 | { |
801 | 0 | int varno = ((RangeTblRef *) jlnode)->rtindex; |
802 | |
|
803 | 0 | if (varno == relid) |
804 | 0 | (*nremoved)++; |
805 | 0 | else |
806 | 0 | result = lappend(result, jlnode); |
807 | 0 | } |
808 | 0 | else if (IsA(jlnode, List)) |
809 | 0 | { |
810 | | /* Recurse to handle subproblem */ |
811 | 0 | List *sublist; |
812 | |
|
813 | 0 | sublist = remove_rel_from_joinlist((List *) jlnode, |
814 | 0 | relid, nremoved); |
815 | | /* Avoid including empty sub-lists in the result */ |
816 | 0 | if (sublist) |
817 | 0 | result = lappend(result, sublist); |
818 | 0 | } |
819 | 0 | else |
820 | 0 | { |
821 | 0 | elog(ERROR, "unrecognized joinlist node type: %d", |
822 | 0 | (int) nodeTag(jlnode)); |
823 | 0 | } |
824 | 0 | } |
825 | | |
826 | 0 | return result; |
827 | 0 | } |
828 | | |
829 | | |
830 | | /* |
831 | | * reduce_unique_semijoins |
832 | | * Check for semijoins that can be simplified to plain inner joins |
833 | | * because the inner relation is provably unique for the join clauses. |
834 | | * |
835 | | * Ideally this would happen during reduce_outer_joins, but we don't have |
836 | | * enough information at that point. |
837 | | * |
838 | | * To perform the strength reduction when applicable, we need only delete |
839 | | * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't |
840 | | * bother fixing the join type attributed to it in the query jointree, |
841 | | * since that won't be consulted again.) |
842 | | */ |
843 | | void |
844 | | reduce_unique_semijoins(PlannerInfo *root) |
845 | 0 | { |
846 | 0 | ListCell *lc; |
847 | | |
848 | | /* |
849 | | * Scan the join_info_list to find semijoins. |
850 | | */ |
851 | 0 | foreach(lc, root->join_info_list) |
852 | 0 | { |
853 | 0 | SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); |
854 | 0 | int innerrelid; |
855 | 0 | RelOptInfo *innerrel; |
856 | 0 | Relids joinrelids; |
857 | 0 | List *restrictlist; |
858 | | |
859 | | /* |
860 | | * Must be a semijoin to a single baserel, else we aren't going to be |
861 | | * able to do anything with it. |
862 | | */ |
863 | 0 | if (sjinfo->jointype != JOIN_SEMI) |
864 | 0 | continue; |
865 | | |
866 | 0 | if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) |
867 | 0 | continue; |
868 | | |
869 | 0 | innerrel = find_base_rel(root, innerrelid); |
870 | | |
871 | | /* |
872 | | * Before we trouble to run generate_join_implied_equalities, make a |
873 | | * quick check to eliminate cases in which we will surely be unable to |
874 | | * prove uniqueness of the innerrel. |
875 | | */ |
876 | 0 | if (!rel_supports_distinctness(root, innerrel)) |
877 | 0 | continue; |
878 | | |
879 | | /* Compute the relid set for the join we are considering */ |
880 | 0 | joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); |
881 | 0 | Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */ |
882 | | |
883 | | /* |
884 | | * Since we're only considering a single-rel RHS, any join clauses it |
885 | | * has must be clauses linking it to the semijoin's min_lefthand. We |
886 | | * can also consider EC-derived join clauses. |
887 | | */ |
888 | 0 | restrictlist = |
889 | 0 | list_concat(generate_join_implied_equalities(root, |
890 | 0 | joinrelids, |
891 | 0 | sjinfo->min_lefthand, |
892 | 0 | innerrel, |
893 | 0 | NULL), |
894 | 0 | innerrel->joininfo); |
895 | | |
896 | | /* Test whether the innerrel is unique for those clauses. */ |
897 | 0 | if (!innerrel_is_unique(root, |
898 | 0 | joinrelids, sjinfo->min_lefthand, innerrel, |
899 | 0 | JOIN_SEMI, restrictlist, true)) |
900 | 0 | continue; |
901 | | |
902 | | /* OK, remove the SpecialJoinInfo from the list. */ |
903 | 0 | root->join_info_list = foreach_delete_current(root->join_info_list, lc); |
904 | 0 | } |
905 | 0 | } |
906 | | |
907 | | |
908 | | /* |
909 | | * rel_supports_distinctness |
910 | | * Could the relation possibly be proven distinct on some set of columns? |
911 | | * |
912 | | * This is effectively a pre-checking function for rel_is_distinct_for(). |
913 | | * It must return true if rel_is_distinct_for() could possibly return true |
914 | | * with this rel, but it should not expend a lot of cycles. The idea is |
915 | | * that callers can avoid doing possibly-expensive processing to compute |
916 | | * rel_is_distinct_for()'s argument lists if the call could not possibly |
917 | | * succeed. |
918 | | */ |
919 | | static bool |
920 | | rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) |
921 | 0 | { |
922 | | /* We only know about baserels ... */ |
923 | 0 | if (rel->reloptkind != RELOPT_BASEREL) |
924 | 0 | return false; |
925 | 0 | if (rel->rtekind == RTE_RELATION) |
926 | 0 | { |
927 | | /* |
928 | | * For a plain relation, we only know how to prove uniqueness by |
929 | | * reference to unique indexes. Make sure there's at least one |
930 | | * suitable unique index. It must be immediately enforced, and not a |
931 | | * partial index. (Keep these conditions in sync with |
932 | | * relation_has_unique_index_for!) |
933 | | */ |
934 | 0 | ListCell *lc; |
935 | |
|
936 | 0 | foreach(lc, rel->indexlist) |
937 | 0 | { |
938 | 0 | IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); |
939 | |
|
940 | 0 | if (ind->unique && ind->immediate && ind->indpred == NIL) |
941 | 0 | return true; |
942 | 0 | } |
943 | 0 | } |
944 | 0 | else if (rel->rtekind == RTE_SUBQUERY) |
945 | 0 | { |
946 | 0 | Query *subquery = root->simple_rte_array[rel->relid]->subquery; |
947 | | |
948 | | /* Check if the subquery has any qualities that support distinctness */ |
949 | 0 | if (query_supports_distinctness(subquery)) |
950 | 0 | return true; |
951 | 0 | } |
952 | | /* We have no proof rules for any other rtekinds. */ |
953 | 0 | return false; |
954 | 0 | } |
955 | | |
956 | | /* |
957 | | * rel_is_distinct_for |
958 | | * Does the relation return only distinct rows according to clause_list? |
959 | | * |
960 | | * clause_list is a list of join restriction clauses involving this rel and |
961 | | * some other one. Return true if no two rows emitted by this rel could |
962 | | * possibly join to the same row of the other rel. |
963 | | * |
964 | | * The caller must have already determined that each condition is a |
965 | | * mergejoinable equality with an expression in this relation on one side, and |
966 | | * an expression not involving this relation on the other. The transient |
967 | | * outer_is_left flag is used to identify which side references this relation: |
968 | | * left side if outer_is_left is false, right side if it is true. |
969 | | * |
970 | | * Note that the passed-in clause_list may be destructively modified! This |
971 | | * is OK for current uses, because the clause_list is built by the caller for |
972 | | * the sole purpose of passing to this function. |
973 | | * |
974 | | * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses, |
975 | | * looking like "x = const" if distinctness is derived from such clauses, not |
976 | | * joininfo clauses. Pass NULL to the extra_clauses if this value is not |
977 | | * needed. |
978 | | */ |
979 | | static bool |
980 | | rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list, |
981 | | List **extra_clauses) |
982 | 0 | { |
983 | | /* |
984 | | * We could skip a couple of tests here if we assume all callers checked |
985 | | * rel_supports_distinctness first, but it doesn't seem worth taking any |
986 | | * risk for. |
987 | | */ |
988 | 0 | if (rel->reloptkind != RELOPT_BASEREL) |
989 | 0 | return false; |
990 | 0 | if (rel->rtekind == RTE_RELATION) |
991 | 0 | { |
992 | | /* |
993 | | * Examine the indexes to see if we have a matching unique index. |
994 | | * relation_has_unique_index_for automatically adds any usable |
995 | | * restriction clauses for the rel, so we needn't do that here. |
996 | | */ |
997 | 0 | if (relation_has_unique_index_for(root, rel, clause_list, extra_clauses)) |
998 | 0 | return true; |
999 | 0 | } |
1000 | 0 | else if (rel->rtekind == RTE_SUBQUERY) |
1001 | 0 | { |
1002 | 0 | Index relid = rel->relid; |
1003 | 0 | Query *subquery = root->simple_rte_array[relid]->subquery; |
1004 | 0 | List *colnos = NIL; |
1005 | 0 | List *opids = NIL; |
1006 | 0 | ListCell *l; |
1007 | | |
1008 | | /* |
1009 | | * Build the argument lists for query_is_distinct_for: a list of |
1010 | | * output column numbers that the query needs to be distinct over, and |
1011 | | * a list of equality operators that the output columns need to be |
1012 | | * distinct according to. |
1013 | | * |
1014 | | * (XXX we are not considering restriction clauses attached to the |
1015 | | * subquery; is that worth doing?) |
1016 | | */ |
1017 | 0 | foreach(l, clause_list) |
1018 | 0 | { |
1019 | 0 | RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); |
1020 | 0 | Oid op; |
1021 | 0 | Var *var; |
1022 | | |
1023 | | /* |
1024 | | * Get the equality operator we need uniqueness according to. |
1025 | | * (This might be a cross-type operator and thus not exactly the |
1026 | | * same operator the subquery would consider; that's all right |
1027 | | * since query_is_distinct_for can resolve such cases.) The |
1028 | | * caller's mergejoinability test should have selected only |
1029 | | * OpExprs. |
1030 | | */ |
1031 | 0 | op = castNode(OpExpr, rinfo->clause)->opno; |
1032 | | |
1033 | | /* caller identified the inner side for us */ |
1034 | 0 | if (rinfo->outer_is_left) |
1035 | 0 | var = (Var *) get_rightop(rinfo->clause); |
1036 | 0 | else |
1037 | 0 | var = (Var *) get_leftop(rinfo->clause); |
1038 | | |
1039 | | /* |
1040 | | * We may ignore any RelabelType node above the operand. (There |
1041 | | * won't be more than one, since eval_const_expressions() has been |
1042 | | * applied already.) |
1043 | | */ |
1044 | 0 | if (var && IsA(var, RelabelType)) |
1045 | 0 | var = (Var *) ((RelabelType *) var)->arg; |
1046 | | |
1047 | | /* |
1048 | | * If inner side isn't a Var referencing a subquery output column, |
1049 | | * this clause doesn't help us. |
1050 | | */ |
1051 | 0 | if (!var || !IsA(var, Var) || |
1052 | 0 | var->varno != relid || var->varlevelsup != 0) |
1053 | 0 | continue; |
1054 | | |
1055 | 0 | colnos = lappend_int(colnos, var->varattno); |
1056 | 0 | opids = lappend_oid(opids, op); |
1057 | 0 | } |
1058 | |
|
1059 | 0 | if (query_is_distinct_for(subquery, colnos, opids)) |
1060 | 0 | return true; |
1061 | 0 | } |
1062 | 0 | return false; |
1063 | 0 | } |
1064 | | |
1065 | | |
1066 | | /* |
1067 | | * query_supports_distinctness - could the query possibly be proven distinct |
1068 | | * on some set of output columns? |
1069 | | * |
1070 | | * This is effectively a pre-checking function for query_is_distinct_for(). |
1071 | | * It must return true if query_is_distinct_for() could possibly return true |
1072 | | * with this query, but it should not expend a lot of cycles. The idea is |
1073 | | * that callers can avoid doing possibly-expensive processing to compute |
1074 | | * query_is_distinct_for()'s argument lists if the call could not possibly |
1075 | | * succeed. |
1076 | | */ |
1077 | | bool |
1078 | | query_supports_distinctness(Query *query) |
1079 | 0 | { |
1080 | | /* SRFs break distinctness except with DISTINCT, see below */ |
1081 | 0 | if (query->hasTargetSRFs && query->distinctClause == NIL) |
1082 | 0 | return false; |
1083 | | |
1084 | | /* check for features we can prove distinctness with */ |
1085 | 0 | if (query->distinctClause != NIL || |
1086 | 0 | query->groupClause != NIL || |
1087 | 0 | query->groupingSets != NIL || |
1088 | 0 | query->hasAggs || |
1089 | 0 | query->havingQual || |
1090 | 0 | query->setOperations) |
1091 | 0 | return true; |
1092 | | |
1093 | 0 | return false; |
1094 | 0 | } |
1095 | | |
1096 | | /* |
1097 | | * query_is_distinct_for - does query never return duplicates of the |
1098 | | * specified columns? |
1099 | | * |
1100 | | * query is a not-yet-planned subquery (in current usage, it's always from |
1101 | | * a subquery RTE, which the planner avoids scribbling on). |
1102 | | * |
1103 | | * colnos is an integer list of output column numbers (resno's). We are |
1104 | | * interested in whether rows consisting of just these columns are certain |
1105 | | * to be distinct. "Distinctness" is defined according to whether the |
1106 | | * corresponding upper-level equality operators listed in opids would think |
1107 | | * the values are distinct. (Note: the opids entries could be cross-type |
1108 | | * operators, and thus not exactly the equality operators that the subquery |
1109 | | * would use itself. We use equality_ops_are_compatible() to check |
1110 | | * compatibility. That looks at opfamily membership for index AMs that have |
1111 | | * declared that they support consistent equality semantics within an |
1112 | | * opfamily, and so should give trustworthy answers for all operators that we |
1113 | | * might need to deal with here.) |
1114 | | */ |
1115 | | bool |
1116 | | query_is_distinct_for(Query *query, List *colnos, List *opids) |
1117 | 0 | { |
1118 | 0 | ListCell *l; |
1119 | 0 | Oid opid; |
1120 | |
|
1121 | 0 | Assert(list_length(colnos) == list_length(opids)); |
1122 | | |
1123 | | /* |
1124 | | * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the |
1125 | | * columns in the DISTINCT clause appear in colnos and operator semantics |
1126 | | * match. This is true even if there are SRFs in the DISTINCT columns or |
1127 | | * elsewhere in the tlist. |
1128 | | */ |
1129 | 0 | if (query->distinctClause) |
1130 | 0 | { |
1131 | 0 | foreach(l, query->distinctClause) |
1132 | 0 | { |
1133 | 0 | SortGroupClause *sgc = (SortGroupClause *) lfirst(l); |
1134 | 0 | TargetEntry *tle = get_sortgroupclause_tle(sgc, |
1135 | 0 | query->targetList); |
1136 | |
|
1137 | 0 | opid = distinct_col_search(tle->resno, colnos, opids); |
1138 | 0 | if (!OidIsValid(opid) || |
1139 | 0 | !equality_ops_are_compatible(opid, sgc->eqop)) |
1140 | 0 | break; /* exit early if no match */ |
1141 | 0 | } |
1142 | 0 | if (l == NULL) /* had matches for all? */ |
1143 | 0 | return true; |
1144 | 0 | } |
1145 | | |
1146 | | /* |
1147 | | * Otherwise, a set-returning function in the query's targetlist can |
1148 | | * result in returning duplicate rows, despite any grouping that might |
1149 | | * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY |
1150 | | * columns, it would be safe because they'd be expanded before grouping. |
1151 | | * But it doesn't currently seem worth the effort to check for that.) |
1152 | | */ |
1153 | 0 | if (query->hasTargetSRFs) |
1154 | 0 | return false; |
1155 | | |
1156 | | /* |
1157 | | * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all |
1158 | | * the grouped columns appear in colnos and operator semantics match. |
1159 | | */ |
1160 | 0 | if (query->groupClause && !query->groupingSets) |
1161 | 0 | { |
1162 | 0 | foreach(l, query->groupClause) |
1163 | 0 | { |
1164 | 0 | SortGroupClause *sgc = (SortGroupClause *) lfirst(l); |
1165 | 0 | TargetEntry *tle = get_sortgroupclause_tle(sgc, |
1166 | 0 | query->targetList); |
1167 | |
|
1168 | 0 | opid = distinct_col_search(tle->resno, colnos, opids); |
1169 | 0 | if (!OidIsValid(opid) || |
1170 | 0 | !equality_ops_are_compatible(opid, sgc->eqop)) |
1171 | 0 | break; /* exit early if no match */ |
1172 | 0 | } |
1173 | 0 | if (l == NULL) /* had matches for all? */ |
1174 | 0 | return true; |
1175 | 0 | } |
1176 | 0 | else if (query->groupingSets) |
1177 | 0 | { |
1178 | | /* |
1179 | | * If we have grouping sets with expressions, we probably don't have |
1180 | | * uniqueness and analysis would be hard. Punt. |
1181 | | */ |
1182 | 0 | if (query->groupClause) |
1183 | 0 | return false; |
1184 | | |
1185 | | /* |
1186 | | * If we have no groupClause (therefore no grouping expressions), we |
1187 | | * might have one or many empty grouping sets. If there's just one, |
1188 | | * then we're returning only one row and are certainly unique. But |
1189 | | * otherwise, we know we're certainly not unique. |
1190 | | */ |
1191 | 0 | if (list_length(query->groupingSets) == 1 && |
1192 | 0 | ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY) |
1193 | 0 | return true; |
1194 | 0 | else |
1195 | 0 | return false; |
1196 | 0 | } |
1197 | 0 | else |
1198 | 0 | { |
1199 | | /* |
1200 | | * If we have no GROUP BY, but do have aggregates or HAVING, then the |
1201 | | * result is at most one row so it's surely unique, for any operators. |
1202 | | */ |
1203 | 0 | if (query->hasAggs || query->havingQual) |
1204 | 0 | return true; |
1205 | 0 | } |
1206 | | |
1207 | | /* |
1208 | | * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row, |
1209 | | * except with ALL. |
1210 | | */ |
1211 | 0 | if (query->setOperations) |
1212 | 0 | { |
1213 | 0 | SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations); |
1214 | |
|
1215 | 0 | Assert(topop->op != SETOP_NONE); |
1216 | |
|
1217 | 0 | if (!topop->all) |
1218 | 0 | { |
1219 | 0 | ListCell *lg; |
1220 | | |
1221 | | /* We're good if all the nonjunk output columns are in colnos */ |
1222 | 0 | lg = list_head(topop->groupClauses); |
1223 | 0 | foreach(l, query->targetList) |
1224 | 0 | { |
1225 | 0 | TargetEntry *tle = (TargetEntry *) lfirst(l); |
1226 | 0 | SortGroupClause *sgc; |
1227 | |
|
1228 | 0 | if (tle->resjunk) |
1229 | 0 | continue; /* ignore resjunk columns */ |
1230 | | |
1231 | | /* non-resjunk columns should have grouping clauses */ |
1232 | 0 | Assert(lg != NULL); |
1233 | 0 | sgc = (SortGroupClause *) lfirst(lg); |
1234 | 0 | lg = lnext(topop->groupClauses, lg); |
1235 | |
|
1236 | 0 | opid = distinct_col_search(tle->resno, colnos, opids); |
1237 | 0 | if (!OidIsValid(opid) || |
1238 | 0 | !equality_ops_are_compatible(opid, sgc->eqop)) |
1239 | 0 | break; /* exit early if no match */ |
1240 | 0 | } |
1241 | 0 | if (l == NULL) /* had matches for all? */ |
1242 | 0 | return true; |
1243 | 0 | } |
1244 | 0 | } |
1245 | | |
1246 | | /* |
1247 | | * XXX Are there any other cases in which we can easily see the result |
1248 | | * must be distinct? |
1249 | | * |
1250 | | * If you do add more smarts to this function, be sure to update |
1251 | | * query_supports_distinctness() to match. |
1252 | | */ |
1253 | | |
1254 | 0 | return false; |
1255 | 0 | } |
1256 | | |
1257 | | /* |
1258 | | * distinct_col_search - subroutine for query_is_distinct_for |
1259 | | * |
1260 | | * If colno is in colnos, return the corresponding element of opids, |
1261 | | * else return InvalidOid. (Ordinarily colnos would not contain duplicates, |
1262 | | * but if it does, we arbitrarily select the first match.) |
1263 | | */ |
1264 | | static Oid |
1265 | | distinct_col_search(int colno, List *colnos, List *opids) |
1266 | 0 | { |
1267 | 0 | ListCell *lc1, |
1268 | 0 | *lc2; |
1269 | |
|
1270 | 0 | forboth(lc1, colnos, lc2, opids) |
1271 | 0 | { |
1272 | 0 | if (colno == lfirst_int(lc1)) |
1273 | 0 | return lfirst_oid(lc2); |
1274 | 0 | } |
1275 | 0 | return InvalidOid; |
1276 | 0 | } |
1277 | | |
1278 | | |
1279 | | /* |
1280 | | * innerrel_is_unique |
1281 | | * Check if the innerrel provably contains at most one tuple matching any |
1282 | | * tuple from the outerrel, based on join clauses in the 'restrictlist'. |
1283 | | * |
1284 | | * We need an actual RelOptInfo for the innerrel, but it's sufficient to |
1285 | | * identify the outerrel by its Relids. This asymmetry supports use of this |
1286 | | * function before joinrels have been built. (The caller is expected to |
1287 | | * also supply the joinrelids, just to save recalculating that.) |
1288 | | * |
1289 | | * The proof must be made based only on clauses that will be "joinquals" |
1290 | | * rather than "otherquals" at execution. For an inner join there's no |
1291 | | * difference; but if the join is outer, we must ignore pushed-down quals, |
1292 | | * as those will become "otherquals". Note that this means the answer might |
1293 | | * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the |
1294 | | * answer without regard to that, callers must take care not to call this |
1295 | | * with jointypes that would be classified differently by IS_OUTER_JOIN(). |
1296 | | * |
1297 | | * The actual proof is undertaken by is_innerrel_unique_for(); this function |
1298 | | * is a frontend that is mainly concerned with caching the answers. |
1299 | | * In particular, the force_cache argument allows overriding the internal |
1300 | | * heuristic about whether to cache negative answers; it should be "true" |
1301 | | * if making an inquiry that is not part of the normal bottom-up join search |
1302 | | * sequence. |
1303 | | */ |
1304 | | bool |
1305 | | innerrel_is_unique(PlannerInfo *root, |
1306 | | Relids joinrelids, |
1307 | | Relids outerrelids, |
1308 | | RelOptInfo *innerrel, |
1309 | | JoinType jointype, |
1310 | | List *restrictlist, |
1311 | | bool force_cache) |
1312 | 0 | { |
1313 | 0 | return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel, |
1314 | 0 | jointype, restrictlist, force_cache, NULL); |
1315 | 0 | } |
1316 | | |
1317 | | /* |
1318 | | * innerrel_is_unique_ext |
1319 | | * Do the same as innerrel_is_unique(), but also set to (*extra_clauses) |
1320 | | * additional clauses from a baserestrictinfo list used to prove the |
1321 | | * uniqueness. |
1322 | | * |
1323 | | * A non-NULL extra_clauses indicates that we're checking for self-join and |
1324 | | * correspondingly dealing with filtered clauses. |
1325 | | */ |
1326 | | bool |
1327 | | innerrel_is_unique_ext(PlannerInfo *root, |
1328 | | Relids joinrelids, |
1329 | | Relids outerrelids, |
1330 | | RelOptInfo *innerrel, |
1331 | | JoinType jointype, |
1332 | | List *restrictlist, |
1333 | | bool force_cache, |
1334 | | List **extra_clauses) |
1335 | 0 | { |
1336 | 0 | MemoryContext old_context; |
1337 | 0 | ListCell *lc; |
1338 | 0 | UniqueRelInfo *uniqueRelInfo; |
1339 | 0 | List *outer_exprs = NIL; |
1340 | 0 | bool self_join = (extra_clauses != NULL); |
1341 | | |
1342 | | /* Certainly can't prove uniqueness when there are no joinclauses */ |
1343 | 0 | if (restrictlist == NIL) |
1344 | 0 | return false; |
1345 | | |
1346 | | /* |
1347 | | * Make a quick check to eliminate cases in which we will surely be unable |
1348 | | * to prove uniqueness of the innerrel. |
1349 | | */ |
1350 | 0 | if (!rel_supports_distinctness(root, innerrel)) |
1351 | 0 | return false; |
1352 | | |
1353 | | /* |
1354 | | * Query the cache to see if we've managed to prove that innerrel is |
1355 | | * unique for any subset of this outerrel. For non-self-join search, we |
1356 | | * don't need an exact match, as extra outerrels can't make the innerrel |
1357 | | * any less unique (or more formally, the restrictlist for a join to a |
1358 | | * superset outerrel must be a superset of the conditions we successfully |
1359 | | * used before). For self-join search, we require an exact match of |
1360 | | * outerrels because we need extra clauses to be valid for our case. Also, |
1361 | | * for self-join checking we've filtered the clauses list. Thus, we can |
1362 | | * match only the result cached for a self-join search for another |
1363 | | * self-join check. |
1364 | | */ |
1365 | 0 | foreach(lc, innerrel->unique_for_rels) |
1366 | 0 | { |
1367 | 0 | uniqueRelInfo = (UniqueRelInfo *) lfirst(lc); |
1368 | |
|
1369 | 0 | if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) || |
1370 | 0 | (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) && |
1371 | 0 | uniqueRelInfo->self_join)) |
1372 | 0 | { |
1373 | 0 | if (extra_clauses) |
1374 | 0 | *extra_clauses = uniqueRelInfo->extra_clauses; |
1375 | 0 | return true; /* Success! */ |
1376 | 0 | } |
1377 | 0 | } |
1378 | | |
1379 | | /* |
1380 | | * Conversely, we may have already determined that this outerrel, or some |
1381 | | * superset thereof, cannot prove this innerrel to be unique. |
1382 | | */ |
1383 | 0 | foreach(lc, innerrel->non_unique_for_rels) |
1384 | 0 | { |
1385 | 0 | Relids unique_for_rels = (Relids) lfirst(lc); |
1386 | |
|
1387 | 0 | if (bms_is_subset(outerrelids, unique_for_rels)) |
1388 | 0 | return false; |
1389 | 0 | } |
1390 | | |
1391 | | /* No cached information, so try to make the proof. */ |
1392 | 0 | if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel, |
1393 | 0 | jointype, restrictlist, |
1394 | 0 | self_join ? &outer_exprs : NULL)) |
1395 | 0 | { |
1396 | | /* |
1397 | | * Cache the positive result for future probes, being sure to keep it |
1398 | | * in the planner_cxt even if we are working in GEQO. |
1399 | | * |
1400 | | * Note: one might consider trying to isolate the minimal subset of |
1401 | | * the outerrels that proved the innerrel unique. But it's not worth |
1402 | | * the trouble, because the planner builds up joinrels incrementally |
1403 | | * and so we'll see the minimally sufficient outerrels before any |
1404 | | * supersets of them anyway. |
1405 | | */ |
1406 | 0 | old_context = MemoryContextSwitchTo(root->planner_cxt); |
1407 | 0 | uniqueRelInfo = makeNode(UniqueRelInfo); |
1408 | 0 | uniqueRelInfo->outerrelids = bms_copy(outerrelids); |
1409 | 0 | uniqueRelInfo->self_join = self_join; |
1410 | 0 | uniqueRelInfo->extra_clauses = outer_exprs; |
1411 | 0 | innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, |
1412 | 0 | uniqueRelInfo); |
1413 | 0 | MemoryContextSwitchTo(old_context); |
1414 | |
|
1415 | 0 | if (extra_clauses) |
1416 | 0 | *extra_clauses = outer_exprs; |
1417 | 0 | return true; /* Success! */ |
1418 | 0 | } |
1419 | 0 | else |
1420 | 0 | { |
1421 | | /* |
1422 | | * None of the join conditions for outerrel proved innerrel unique, so |
1423 | | * we can safely reject this outerrel or any subset of it in future |
1424 | | * checks. |
1425 | | * |
1426 | | * However, in normal planning mode, caching this knowledge is totally |
1427 | | * pointless; it won't be queried again, because we build up joinrels |
1428 | | * from smaller to larger. It's only useful when using GEQO or |
1429 | | * another planner extension that attempts planning multiple times. |
1430 | | * |
1431 | | * Also, allow callers to override that heuristic and force caching; |
1432 | | * that's useful for reduce_unique_semijoins, which calls here before |
1433 | | * the normal join search starts. |
1434 | | */ |
1435 | 0 | if (force_cache || root->assumeReplanning) |
1436 | 0 | { |
1437 | 0 | old_context = MemoryContextSwitchTo(root->planner_cxt); |
1438 | 0 | innerrel->non_unique_for_rels = |
1439 | 0 | lappend(innerrel->non_unique_for_rels, |
1440 | 0 | bms_copy(outerrelids)); |
1441 | 0 | MemoryContextSwitchTo(old_context); |
1442 | 0 | } |
1443 | |
|
1444 | 0 | return false; |
1445 | 0 | } |
1446 | 0 | } |
1447 | | |
1448 | | /* |
1449 | | * is_innerrel_unique_for |
1450 | | * Check if the innerrel provably contains at most one tuple matching any |
1451 | | * tuple from the outerrel, based on join clauses in the 'restrictlist'. |
1452 | | */ |
1453 | | static bool |
1454 | | is_innerrel_unique_for(PlannerInfo *root, |
1455 | | Relids joinrelids, |
1456 | | Relids outerrelids, |
1457 | | RelOptInfo *innerrel, |
1458 | | JoinType jointype, |
1459 | | List *restrictlist, |
1460 | | List **extra_clauses) |
1461 | 0 | { |
1462 | 0 | List *clause_list = NIL; |
1463 | 0 | ListCell *lc; |
1464 | | |
1465 | | /* |
1466 | | * Search for mergejoinable clauses that constrain the inner rel against |
1467 | | * the outer rel. If an operator is mergejoinable then it behaves like |
1468 | | * equality for some btree opclass, so it's what we want. The |
1469 | | * mergejoinability test also eliminates clauses containing volatile |
1470 | | * functions, which we couldn't depend on. |
1471 | | */ |
1472 | 0 | foreach(lc, restrictlist) |
1473 | 0 | { |
1474 | 0 | RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); |
1475 | | |
1476 | | /* |
1477 | | * As noted above, if it's a pushed-down clause and we're at an outer |
1478 | | * join, we can't use it. |
1479 | | */ |
1480 | 0 | if (IS_OUTER_JOIN(jointype) && |
1481 | 0 | RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) |
1482 | 0 | continue; |
1483 | | |
1484 | | /* Ignore if it's not a mergejoinable clause */ |
1485 | 0 | if (!restrictinfo->can_join || |
1486 | 0 | restrictinfo->mergeopfamilies == NIL) |
1487 | 0 | continue; /* not mergejoinable */ |
1488 | | |
1489 | | /* |
1490 | | * Check if the clause has the form "outer op inner" or "inner op |
1491 | | * outer", and if so mark which side is inner. |
1492 | | */ |
1493 | 0 | if (!clause_sides_match_join(restrictinfo, outerrelids, |
1494 | 0 | innerrel->relids)) |
1495 | 0 | continue; /* no good for these input relations */ |
1496 | | |
1497 | | /* OK, add to the list */ |
1498 | 0 | clause_list = lappend(clause_list, restrictinfo); |
1499 | 0 | } |
1500 | | |
1501 | | /* Let rel_is_distinct_for() do the hard work */ |
1502 | 0 | return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses); |
1503 | 0 | } |
1504 | | |
1505 | | /* |
1506 | | * Update EC members to point to the remaining relation instead of the removed |
1507 | | * one, removing duplicates. |
1508 | | * |
1509 | | * Restriction clauses for base relations are already distributed to |
1510 | | * the respective baserestrictinfo lists (see |
1511 | | * generate_implied_equalities_for_column). The above code has already processed |
1512 | | * this list and updated these clauses to reference the remaining |
1513 | | * relation, so that we can skip them here based on their relids. |
1514 | | * |
1515 | | * Likewise, we have already processed the join clauses that join the |
1516 | | * removed relation to the remaining one. |
1517 | | * |
1518 | | * Finally, there might be join clauses tying the removed relation to |
1519 | | * some third relation. We can't just delete the source clauses and |
1520 | | * regenerate them from the EC because the corresponding equality |
1521 | | * operators might be missing (see the handling of ec_broken). |
1522 | | * Therefore, we will update the references in the source clauses. |
1523 | | * |
1524 | | * Derived clauses can be generated again, so it is simpler just to |
1525 | | * delete them. |
1526 | | */ |
1527 | | static void |
1528 | | update_eclasses(EquivalenceClass *ec, int from, int to) |
1529 | 0 | { |
1530 | 0 | List *new_members = NIL; |
1531 | 0 | List *new_sources = NIL; |
1532 | | |
1533 | | /* |
1534 | | * We don't expect any EC child members to exist at this point. Ensure |
1535 | | * that's the case, otherwise, we might be getting asked to do something |
1536 | | * this function hasn't been coded for. |
1537 | | */ |
1538 | 0 | Assert(ec->ec_childmembers == NULL); |
1539 | |
|
1540 | 0 | foreach_node(EquivalenceMember, em, ec->ec_members) |
1541 | 0 | { |
1542 | 0 | bool is_redundant = false; |
1543 | |
|
1544 | 0 | if (!bms_is_member(from, em->em_relids)) |
1545 | 0 | { |
1546 | 0 | new_members = lappend(new_members, em); |
1547 | 0 | continue; |
1548 | 0 | } |
1549 | | |
1550 | 0 | em->em_relids = adjust_relid_set(em->em_relids, from, to); |
1551 | 0 | em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to); |
1552 | | |
1553 | | /* We only process inner joins */ |
1554 | 0 | ChangeVarNodesExtended((Node *) em->em_expr, from, to, 0, |
1555 | 0 | replace_relid_callback); |
1556 | |
|
1557 | 0 | foreach_node(EquivalenceMember, other, new_members) |
1558 | 0 | { |
1559 | 0 | if (!equal(em->em_relids, other->em_relids)) |
1560 | 0 | continue; |
1561 | | |
1562 | 0 | if (equal(em->em_expr, other->em_expr)) |
1563 | 0 | { |
1564 | 0 | is_redundant = true; |
1565 | 0 | break; |
1566 | 0 | } |
1567 | 0 | } |
1568 | |
|
1569 | 0 | if (!is_redundant) |
1570 | 0 | new_members = lappend(new_members, em); |
1571 | 0 | } |
1572 | |
|
1573 | 0 | list_free(ec->ec_members); |
1574 | 0 | ec->ec_members = new_members; |
1575 | |
|
1576 | 0 | ec_clear_derived_clauses(ec); |
1577 | | |
1578 | | /* Update EC source expressions */ |
1579 | 0 | foreach_node(RestrictInfo, rinfo, ec->ec_sources) |
1580 | 0 | { |
1581 | 0 | bool is_redundant = false; |
1582 | |
|
1583 | 0 | if (!bms_is_member(from, rinfo->required_relids)) |
1584 | 0 | { |
1585 | 0 | new_sources = lappend(new_sources, rinfo); |
1586 | 0 | continue; |
1587 | 0 | } |
1588 | | |
1589 | 0 | ChangeVarNodesExtended((Node *) rinfo, from, to, 0, |
1590 | 0 | replace_relid_callback); |
1591 | | |
1592 | | /* |
1593 | | * After switching the clause to the remaining relation, check it for |
1594 | | * redundancy with existing ones. We don't have to check for |
1595 | | * redundancy with derived clauses, because we've just deleted them. |
1596 | | */ |
1597 | 0 | foreach_node(RestrictInfo, other, new_sources) |
1598 | 0 | { |
1599 | 0 | if (!equal(rinfo->clause_relids, other->clause_relids)) |
1600 | 0 | continue; |
1601 | | |
1602 | 0 | if (equal(rinfo->clause, other->clause)) |
1603 | 0 | { |
1604 | 0 | is_redundant = true; |
1605 | 0 | break; |
1606 | 0 | } |
1607 | 0 | } |
1608 | |
|
1609 | 0 | if (!is_redundant) |
1610 | 0 | new_sources = lappend(new_sources, rinfo); |
1611 | 0 | } |
1612 | |
|
1613 | 0 | list_free(ec->ec_sources); |
1614 | 0 | ec->ec_sources = new_sources; |
1615 | 0 | ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to); |
1616 | 0 | } |
1617 | | |
1618 | | /* |
1619 | | * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field, |
1620 | | * which makes almost every RestrictInfo unique. This type of comparison is |
1621 | | * useful when removing duplicates while moving RestrictInfo's from removed |
1622 | | * relation to remaining relation during self-join elimination. |
1623 | | * |
1624 | | * XXX: In the future, we might remove the 'rinfo_serial' field completely and |
1625 | | * get rid of this function. |
1626 | | */ |
1627 | | static bool |
1628 | | restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b) |
1629 | 0 | { |
1630 | 0 | int saved_rinfo_serial = a->rinfo_serial; |
1631 | 0 | bool result; |
1632 | |
|
1633 | 0 | a->rinfo_serial = b->rinfo_serial; |
1634 | 0 | result = equal(a, b); |
1635 | 0 | a->rinfo_serial = saved_rinfo_serial; |
1636 | |
|
1637 | 0 | return result; |
1638 | 0 | } |
1639 | | |
1640 | | /* |
1641 | | * This function adds all non-redundant clauses to the keeping relation |
1642 | | * during self-join elimination. That is a contradictory operation. On the |
1643 | | * one hand, we reduce the length of the `restrict` lists, which can |
1644 | | * impact planning or executing time. Additionally, we improve the |
1645 | | * accuracy of cardinality estimation. On the other hand, it is one more |
1646 | | * place that can make planning time much longer in specific cases. It |
1647 | | * would have been better to avoid calling the equal() function here, but |
1648 | | * it's the only way to detect duplicated inequality expressions. |
1649 | | * |
1650 | | * (*keep_rinfo_list) is given by pointer because it might be altered by |
1651 | | * distribute_restrictinfo_to_rels(). |
1652 | | */ |
1653 | | static void |
1654 | | add_non_redundant_clauses(PlannerInfo *root, |
1655 | | List *rinfo_candidates, |
1656 | | List **keep_rinfo_list, |
1657 | | Index removed_relid) |
1658 | 0 | { |
1659 | 0 | foreach_node(RestrictInfo, rinfo, rinfo_candidates) |
1660 | 0 | { |
1661 | 0 | bool is_redundant = false; |
1662 | |
|
1663 | 0 | Assert(!bms_is_member(removed_relid, rinfo->required_relids)); |
1664 | |
|
1665 | 0 | foreach_node(RestrictInfo, src, (*keep_rinfo_list)) |
1666 | 0 | { |
1667 | 0 | if (!bms_equal(src->clause_relids, rinfo->clause_relids)) |
1668 | | /* Can't compare trivially different clauses */ |
1669 | 0 | continue; |
1670 | | |
1671 | 0 | if (src == rinfo || |
1672 | 0 | (rinfo->parent_ec != NULL && |
1673 | 0 | src->parent_ec == rinfo->parent_ec) || |
1674 | 0 | restrict_infos_logically_equal(rinfo, src)) |
1675 | 0 | { |
1676 | 0 | is_redundant = true; |
1677 | 0 | break; |
1678 | 0 | } |
1679 | 0 | } |
1680 | 0 | if (!is_redundant) |
1681 | 0 | distribute_restrictinfo_to_rels(root, rinfo); |
1682 | 0 | } |
1683 | 0 | } |
1684 | | |
1685 | | /* |
1686 | | * A custom callback for ChangeVarNodesExtended() providing |
1687 | | * Self-join elimination (SJE) related functionality |
1688 | | * |
1689 | | * SJE needs to skip the RangeTblRef node |
1690 | | * type. During SJE's last step, remove_rel_from_joinlist() removes |
1691 | | * remaining RangeTblRefs with target relid. If ChangeVarNodes() replaces |
1692 | | * the target relid before, remove_rel_from_joinlist() fails to identify |
1693 | | * the nodes to delete. |
1694 | | * |
1695 | | * SJE also needs to change the relids within RestrictInfo's. |
1696 | | */ |
1697 | | static bool |
1698 | | replace_relid_callback(Node *node, ChangeVarNodes_context *context) |
1699 | 0 | { |
1700 | 0 | if (IsA(node, RangeTblRef)) |
1701 | 0 | { |
1702 | 0 | return true; |
1703 | 0 | } |
1704 | 0 | else if (IsA(node, RestrictInfo)) |
1705 | 0 | { |
1706 | 0 | RestrictInfo *rinfo = (RestrictInfo *) node; |
1707 | 0 | int relid = -1; |
1708 | 0 | bool is_req_equal = |
1709 | 0 | (rinfo->required_relids == rinfo->clause_relids); |
1710 | 0 | bool clause_relids_is_multiple = |
1711 | 0 | (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); |
1712 | | |
1713 | | /* |
1714 | | * Recurse down into clauses if the target relation is present in |
1715 | | * clause_relids or required_relids. We must check required_relids |
1716 | | * because the relation not present in clause_relids might still be |
1717 | | * present somewhere in orclause. |
1718 | | */ |
1719 | 0 | if (bms_is_member(context->rt_index, rinfo->clause_relids) || |
1720 | 0 | bms_is_member(context->rt_index, rinfo->required_relids)) |
1721 | 0 | { |
1722 | 0 | Relids new_clause_relids; |
1723 | |
|
1724 | 0 | ChangeVarNodesWalkExpression((Node *) rinfo->clause, context); |
1725 | 0 | ChangeVarNodesWalkExpression((Node *) rinfo->orclause, context); |
1726 | |
|
1727 | 0 | new_clause_relids = adjust_relid_set(rinfo->clause_relids, |
1728 | 0 | context->rt_index, |
1729 | 0 | context->new_index); |
1730 | | |
1731 | | /* |
1732 | | * Incrementally adjust num_base_rels based on the change of |
1733 | | * clause_relids, which could contain both base relids and |
1734 | | * outer-join relids. This operation is legal until we remove |
1735 | | * only baserels. |
1736 | | */ |
1737 | 0 | rinfo->num_base_rels -= bms_num_members(rinfo->clause_relids) - |
1738 | 0 | bms_num_members(new_clause_relids); |
1739 | |
|
1740 | 0 | rinfo->clause_relids = new_clause_relids; |
1741 | 0 | rinfo->left_relids = |
1742 | 0 | adjust_relid_set(rinfo->left_relids, context->rt_index, context->new_index); |
1743 | 0 | rinfo->right_relids = |
1744 | 0 | adjust_relid_set(rinfo->right_relids, context->rt_index, context->new_index); |
1745 | 0 | } |
1746 | |
|
1747 | 0 | if (is_req_equal) |
1748 | 0 | rinfo->required_relids = rinfo->clause_relids; |
1749 | 0 | else |
1750 | 0 | rinfo->required_relids = |
1751 | 0 | adjust_relid_set(rinfo->required_relids, context->rt_index, context->new_index); |
1752 | |
|
1753 | 0 | rinfo->outer_relids = |
1754 | 0 | adjust_relid_set(rinfo->outer_relids, context->rt_index, context->new_index); |
1755 | 0 | rinfo->incompatible_relids = |
1756 | 0 | adjust_relid_set(rinfo->incompatible_relids, context->rt_index, context->new_index); |
1757 | |
|
1758 | 0 | if (rinfo->mergeopfamilies && |
1759 | 0 | bms_get_singleton_member(rinfo->clause_relids, &relid) && |
1760 | 0 | clause_relids_is_multiple && |
1761 | 0 | relid == context->new_index && IsA(rinfo->clause, OpExpr)) |
1762 | 0 | { |
1763 | 0 | Expr *leftOp; |
1764 | 0 | Expr *rightOp; |
1765 | |
|
1766 | 0 | leftOp = (Expr *) get_leftop(rinfo->clause); |
1767 | 0 | rightOp = (Expr *) get_rightop(rinfo->clause); |
1768 | | |
1769 | | /* |
1770 | | * For self-join elimination, changing varnos could transform |
1771 | | * "t1.a = t2.a" into "t1.a = t1.a". That is always true as long |
1772 | | * as "t1.a" is not null. We use qual() to check for such a case, |
1773 | | * and then we replace the qual for a check for not null |
1774 | | * (NullTest). |
1775 | | */ |
1776 | 0 | if (leftOp != NULL && equal(leftOp, rightOp)) |
1777 | 0 | { |
1778 | 0 | NullTest *ntest = makeNode(NullTest); |
1779 | |
|
1780 | 0 | ntest->arg = leftOp; |
1781 | 0 | ntest->nulltesttype = IS_NOT_NULL; |
1782 | 0 | ntest->argisrow = false; |
1783 | 0 | ntest->location = -1; |
1784 | 0 | rinfo->clause = (Expr *) ntest; |
1785 | 0 | rinfo->mergeopfamilies = NIL; |
1786 | 0 | rinfo->left_em = NULL; |
1787 | 0 | rinfo->right_em = NULL; |
1788 | 0 | } |
1789 | 0 | Assert(rinfo->orclause == NULL); |
1790 | 0 | } |
1791 | 0 | return true; |
1792 | 0 | } |
1793 | | |
1794 | 0 | return false; |
1795 | 0 | } |
1796 | | |
1797 | | /* |
1798 | | * Remove a relation after we have proven that it participates only in an |
1799 | | * unneeded unique self-join. |
1800 | | * |
1801 | | * Replace any links in planner info structures. |
1802 | | * |
1803 | | * Transfer join and restriction clauses from the removed relation to the |
1804 | | * remaining one. We change the Vars of the clause to point to the |
1805 | | * remaining relation instead of the removed one. The clauses that require |
1806 | | * a subset of joinrelids become restriction clauses of the remaining |
1807 | | * relation, and others remain join clauses. We append them to |
1808 | | * baserestrictinfo and joininfo, respectively, trying not to introduce |
1809 | | * duplicates. |
1810 | | * |
1811 | | * We also have to process the 'joinclauses' list here, because it |
1812 | | * contains EC-derived join clauses which must become filter clauses. It |
1813 | | * is not enough to just correct the ECs because the EC-derived |
1814 | | * restrictions are generated before join removal (see |
1815 | | * generate_base_implied_equalities). |
1816 | | * |
1817 | | * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all |
1818 | | * cached relids and relid bitmapsets can be correctly cleaned during the |
1819 | | * self-join elimination procedure. |
1820 | | */ |
1821 | | static void |
1822 | | remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, |
1823 | | RelOptInfo *toKeep, RelOptInfo *toRemove, |
1824 | | List *restrictlist) |
1825 | 0 | { |
1826 | 0 | List *joininfos; |
1827 | 0 | ListCell *lc; |
1828 | 0 | int i; |
1829 | 0 | List *jinfo_candidates = NIL; |
1830 | 0 | List *binfo_candidates = NIL; |
1831 | |
|
1832 | 0 | Assert(toKeep->relid > 0); |
1833 | 0 | Assert(toRemove->relid > 0); |
1834 | | |
1835 | | /* |
1836 | | * Replace the index of the removing table with the keeping one. The |
1837 | | * technique of removing/distributing restrictinfo is used here to attach |
1838 | | * just appeared (for keeping relation) join clauses and avoid adding |
1839 | | * duplicates of those that already exist in the joininfo list. |
1840 | | */ |
1841 | 0 | joininfos = list_copy(toRemove->joininfo); |
1842 | 0 | foreach_node(RestrictInfo, rinfo, joininfos) |
1843 | 0 | { |
1844 | 0 | remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); |
1845 | 0 | ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid, |
1846 | 0 | 0, replace_relid_callback); |
1847 | |
|
1848 | 0 | if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE) |
1849 | 0 | jinfo_candidates = lappend(jinfo_candidates, rinfo); |
1850 | 0 | else |
1851 | 0 | binfo_candidates = lappend(binfo_candidates, rinfo); |
1852 | 0 | } |
1853 | | |
1854 | | /* |
1855 | | * Concatenate restrictlist to the list of base restrictions of the |
1856 | | * removing table just to simplify the replacement procedure: all of them |
1857 | | * weren't connected to any keeping relations and need to be added to some |
1858 | | * rels. |
1859 | | */ |
1860 | 0 | toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo, |
1861 | 0 | restrictlist); |
1862 | 0 | foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo) |
1863 | 0 | { |
1864 | 0 | ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid, |
1865 | 0 | 0, replace_relid_callback); |
1866 | |
|
1867 | 0 | if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE) |
1868 | 0 | jinfo_candidates = lappend(jinfo_candidates, rinfo); |
1869 | 0 | else |
1870 | 0 | binfo_candidates = lappend(binfo_candidates, rinfo); |
1871 | 0 | } |
1872 | | |
1873 | | /* |
1874 | | * Now, add all non-redundant clauses to the keeping relation. |
1875 | | */ |
1876 | 0 | add_non_redundant_clauses(root, binfo_candidates, |
1877 | 0 | &toKeep->baserestrictinfo, toRemove->relid); |
1878 | 0 | add_non_redundant_clauses(root, jinfo_candidates, |
1879 | 0 | &toKeep->joininfo, toRemove->relid); |
1880 | |
|
1881 | 0 | list_free(binfo_candidates); |
1882 | 0 | list_free(jinfo_candidates); |
1883 | | |
1884 | | /* |
1885 | | * Arrange equivalence classes, mentioned removing a table, with the |
1886 | | * keeping one: varno of removing table should be replaced in members and |
1887 | | * sources lists. Also, remove duplicated elements if this replacement |
1888 | | * procedure created them. |
1889 | | */ |
1890 | 0 | i = -1; |
1891 | 0 | while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0) |
1892 | 0 | { |
1893 | 0 | EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i); |
1894 | |
|
1895 | 0 | update_eclasses(ec, toRemove->relid, toKeep->relid); |
1896 | 0 | toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i); |
1897 | 0 | } |
1898 | | |
1899 | | /* |
1900 | | * Transfer the targetlist and attr_needed flags. |
1901 | | */ |
1902 | |
|
1903 | 0 | foreach(lc, toRemove->reltarget->exprs) |
1904 | 0 | { |
1905 | 0 | Node *node = lfirst(lc); |
1906 | |
|
1907 | 0 | ChangeVarNodesExtended(node, toRemove->relid, toKeep->relid, 0, |
1908 | 0 | replace_relid_callback); |
1909 | 0 | if (!list_member(toKeep->reltarget->exprs, node)) |
1910 | 0 | toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node); |
1911 | 0 | } |
1912 | |
|
1913 | 0 | for (i = toKeep->min_attr; i <= toKeep->max_attr; i++) |
1914 | 0 | { |
1915 | 0 | int attno = i - toKeep->min_attr; |
1916 | |
|
1917 | 0 | toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno], |
1918 | 0 | toRemove->relid, toKeep->relid); |
1919 | 0 | toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno], |
1920 | 0 | toRemove->attr_needed[attno]); |
1921 | 0 | } |
1922 | | |
1923 | | /* |
1924 | | * If the removed relation has a row mark, transfer it to the remaining |
1925 | | * one. |
1926 | | * |
1927 | | * If both rels have row marks, just keep the one corresponding to the |
1928 | | * remaining relation because we verified earlier that they have the same |
1929 | | * strength. |
1930 | | */ |
1931 | 0 | if (rmark) |
1932 | 0 | { |
1933 | 0 | if (kmark) |
1934 | 0 | { |
1935 | 0 | Assert(kmark->markType == rmark->markType); |
1936 | |
|
1937 | 0 | root->rowMarks = list_delete_ptr(root->rowMarks, rmark); |
1938 | 0 | } |
1939 | 0 | else |
1940 | 0 | { |
1941 | | /* Shouldn't have inheritance children here. */ |
1942 | 0 | Assert(rmark->rti == rmark->prti); |
1943 | |
|
1944 | 0 | rmark->rti = rmark->prti = toKeep->relid; |
1945 | 0 | } |
1946 | 0 | } |
1947 | | |
1948 | | /* |
1949 | | * Replace varno in all the query structures, except nodes RangeTblRef |
1950 | | * otherwise later remove_rel_from_joinlist will yield errors. |
1951 | | */ |
1952 | 0 | ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid, |
1953 | 0 | 0, replace_relid_callback); |
1954 | | |
1955 | | /* Replace links in the planner info */ |
1956 | 0 | remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL); |
1957 | | |
1958 | | /* At last, replace varno in root targetlist and HAVING clause */ |
1959 | 0 | ChangeVarNodesExtended((Node *) root->processed_tlist, toRemove->relid, |
1960 | 0 | toKeep->relid, 0, replace_relid_callback); |
1961 | 0 | ChangeVarNodesExtended((Node *) root->processed_groupClause, |
1962 | 0 | toRemove->relid, toKeep->relid, 0, |
1963 | 0 | replace_relid_callback); |
1964 | |
|
1965 | 0 | adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid); |
1966 | 0 | adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid); |
1967 | | |
1968 | | /* |
1969 | | * There may be references to the rel in root->fkey_list, but if so, |
1970 | | * match_foreign_keys_to_quals() will get rid of them. |
1971 | | */ |
1972 | | |
1973 | | /* |
1974 | | * Finally, remove the rel from the baserel array to prevent it from being |
1975 | | * referenced again. (We can't do this earlier because |
1976 | | * remove_join_clause_from_rels will touch it.) |
1977 | | */ |
1978 | 0 | root->simple_rel_array[toRemove->relid] = NULL; |
1979 | 0 | root->simple_rte_array[toRemove->relid] = NULL; |
1980 | | |
1981 | | /* And nuke the RelOptInfo, just in case there's another access path. */ |
1982 | 0 | pfree(toRemove); |
1983 | | |
1984 | | |
1985 | | /* |
1986 | | * Now repeat construction of attr_needed bits coming from all other |
1987 | | * sources. |
1988 | | */ |
1989 | 0 | rebuild_placeholder_attr_needed(root); |
1990 | 0 | rebuild_joinclause_attr_needed(root); |
1991 | 0 | rebuild_eclass_attr_needed(root); |
1992 | 0 | rebuild_lateral_attr_needed(root); |
1993 | 0 | } |
1994 | | |
1995 | | /* |
1996 | | * split_selfjoin_quals |
1997 | | * Processes 'joinquals' by building two lists: one containing the quals |
1998 | | * where the columns/exprs are on either side of the join match and |
1999 | | * another one containing the remaining quals. |
2000 | | * |
2001 | | * 'joinquals' must only contain quals for a RTE_RELATION being joined to |
2002 | | * itself. |
2003 | | */ |
2004 | | static void |
2005 | | split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals, |
2006 | | List **otherjoinquals, int from, int to) |
2007 | 0 | { |
2008 | 0 | List *sjoinquals = NIL; |
2009 | 0 | List *ojoinquals = NIL; |
2010 | |
|
2011 | 0 | foreach_node(RestrictInfo, rinfo, joinquals) |
2012 | 0 | { |
2013 | 0 | OpExpr *expr; |
2014 | 0 | Node *leftexpr; |
2015 | 0 | Node *rightexpr; |
2016 | | |
2017 | | /* In general, clause looks like F(arg1) = G(arg2) */ |
2018 | 0 | if (!rinfo->mergeopfamilies || |
2019 | 0 | bms_num_members(rinfo->clause_relids) != 2 || |
2020 | 0 | bms_membership(rinfo->left_relids) != BMS_SINGLETON || |
2021 | 0 | bms_membership(rinfo->right_relids) != BMS_SINGLETON) |
2022 | 0 | { |
2023 | 0 | ojoinquals = lappend(ojoinquals, rinfo); |
2024 | 0 | continue; |
2025 | 0 | } |
2026 | | |
2027 | 0 | expr = (OpExpr *) rinfo->clause; |
2028 | |
|
2029 | 0 | if (!IsA(expr, OpExpr) || list_length(expr->args) != 2) |
2030 | 0 | { |
2031 | 0 | ojoinquals = lappend(ojoinquals, rinfo); |
2032 | 0 | continue; |
2033 | 0 | } |
2034 | | |
2035 | 0 | leftexpr = get_leftop(rinfo->clause); |
2036 | 0 | rightexpr = copyObject(get_rightop(rinfo->clause)); |
2037 | |
|
2038 | 0 | if (leftexpr && IsA(leftexpr, RelabelType)) |
2039 | 0 | leftexpr = (Node *) ((RelabelType *) leftexpr)->arg; |
2040 | 0 | if (rightexpr && IsA(rightexpr, RelabelType)) |
2041 | 0 | rightexpr = (Node *) ((RelabelType *) rightexpr)->arg; |
2042 | | |
2043 | | /* |
2044 | | * Quite an expensive operation, narrowing the use case. For example, |
2045 | | * when we have cast of the same var to different (but compatible) |
2046 | | * types. |
2047 | | */ |
2048 | 0 | ChangeVarNodesExtended(rightexpr, |
2049 | 0 | bms_singleton_member(rinfo->right_relids), |
2050 | 0 | bms_singleton_member(rinfo->left_relids), 0, |
2051 | 0 | replace_relid_callback); |
2052 | |
|
2053 | 0 | if (equal(leftexpr, rightexpr)) |
2054 | 0 | sjoinquals = lappend(sjoinquals, rinfo); |
2055 | 0 | else |
2056 | 0 | ojoinquals = lappend(ojoinquals, rinfo); |
2057 | 0 | } |
2058 | |
|
2059 | 0 | *selfjoinquals = sjoinquals; |
2060 | 0 | *otherjoinquals = ojoinquals; |
2061 | 0 | } |
2062 | | |
2063 | | /* |
2064 | | * Check for a case when uniqueness is at least partly derived from a |
2065 | | * baserestrictinfo clause. In this case, we have a chance to return only |
2066 | | * one row (if such clauses on both sides of SJ are equal) or nothing (if they |
2067 | | * are different). |
2068 | | */ |
2069 | | static bool |
2070 | | match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses, |
2071 | | Index relid) |
2072 | 0 | { |
2073 | 0 | foreach_node(RestrictInfo, rinfo, uclauses) |
2074 | 0 | { |
2075 | 0 | Expr *clause; |
2076 | 0 | Node *iclause; |
2077 | 0 | Node *c1; |
2078 | 0 | bool matched = false; |
2079 | |
|
2080 | 0 | Assert(outer->relid > 0 && relid > 0); |
2081 | | |
2082 | | /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */ |
2083 | 0 | Assert(bms_is_empty(rinfo->left_relids) ^ |
2084 | 0 | bms_is_empty(rinfo->right_relids)); |
2085 | |
|
2086 | 0 | clause = (Expr *) copyObject(rinfo->clause); |
2087 | 0 | ChangeVarNodesExtended((Node *) clause, relid, outer->relid, 0, |
2088 | 0 | replace_relid_callback); |
2089 | |
|
2090 | 0 | iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) : |
2091 | 0 | get_leftop(clause); |
2092 | 0 | c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) : |
2093 | 0 | get_rightop(clause); |
2094 | | |
2095 | | /* |
2096 | | * Compare these left and right sides with the corresponding sides of |
2097 | | * the outer's filters. If no one is detected - return immediately. |
2098 | | */ |
2099 | 0 | foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo) |
2100 | 0 | { |
2101 | 0 | Node *oclause; |
2102 | 0 | Node *c2; |
2103 | |
|
2104 | 0 | if (orinfo->mergeopfamilies == NIL) |
2105 | | /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */ |
2106 | 0 | continue; |
2107 | | |
2108 | 0 | Assert(is_opclause(orinfo->clause)); |
2109 | |
|
2110 | 0 | oclause = bms_is_empty(orinfo->left_relids) ? |
2111 | 0 | get_rightop(orinfo->clause) : get_leftop(orinfo->clause); |
2112 | 0 | c2 = (bms_is_empty(orinfo->left_relids) ? |
2113 | 0 | get_leftop(orinfo->clause) : get_rightop(orinfo->clause)); |
2114 | |
|
2115 | 0 | if (equal(iclause, oclause) && equal(c1, c2)) |
2116 | 0 | { |
2117 | 0 | matched = true; |
2118 | 0 | break; |
2119 | 0 | } |
2120 | 0 | } |
2121 | |
|
2122 | 0 | if (!matched) |
2123 | 0 | return false; |
2124 | 0 | } |
2125 | | |
2126 | 0 | return true; |
2127 | 0 | } |
2128 | | |
2129 | | /* |
2130 | | * Find and remove unique self-joins in a group of base relations that have |
2131 | | * the same Oid. |
2132 | | * |
2133 | | * Returns a set of relids that were removed. |
2134 | | */ |
2135 | | static Relids |
2136 | | remove_self_joins_one_group(PlannerInfo *root, Relids relids) |
2137 | 0 | { |
2138 | 0 | Relids result = NULL; |
2139 | 0 | int k; /* Index of kept relation */ |
2140 | 0 | int r = -1; /* Index of removed relation */ |
2141 | |
|
2142 | 0 | while ((r = bms_next_member(relids, r)) > 0) |
2143 | 0 | { |
2144 | 0 | RelOptInfo *rrel = root->simple_rel_array[r]; |
2145 | |
|
2146 | 0 | k = r; |
2147 | |
|
2148 | 0 | while ((k = bms_next_member(relids, k)) > 0) |
2149 | 0 | { |
2150 | 0 | Relids joinrelids = NULL; |
2151 | 0 | RelOptInfo *krel = root->simple_rel_array[k]; |
2152 | 0 | List *restrictlist; |
2153 | 0 | List *selfjoinquals; |
2154 | 0 | List *otherjoinquals; |
2155 | 0 | ListCell *lc; |
2156 | 0 | bool jinfo_check = true; |
2157 | 0 | PlanRowMark *kmark = NULL; |
2158 | 0 | PlanRowMark *rmark = NULL; |
2159 | 0 | List *uclauses = NIL; |
2160 | | |
2161 | | /* A sanity check: the relations have the same Oid. */ |
2162 | 0 | Assert(root->simple_rte_array[k]->relid == |
2163 | 0 | root->simple_rte_array[r]->relid); |
2164 | | |
2165 | | /* |
2166 | | * It is impossible to eliminate the join of two relations if they |
2167 | | * belong to different rules of order. Otherwise, the planner |
2168 | | * can't find any variants of the correct query plan. |
2169 | | */ |
2170 | 0 | foreach(lc, root->join_info_list) |
2171 | 0 | { |
2172 | 0 | SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc); |
2173 | |
|
2174 | 0 | if ((bms_is_member(k, info->syn_lefthand) ^ |
2175 | 0 | bms_is_member(r, info->syn_lefthand)) || |
2176 | 0 | (bms_is_member(k, info->syn_righthand) ^ |
2177 | 0 | bms_is_member(r, info->syn_righthand))) |
2178 | 0 | { |
2179 | 0 | jinfo_check = false; |
2180 | 0 | break; |
2181 | 0 | } |
2182 | 0 | } |
2183 | 0 | if (!jinfo_check) |
2184 | 0 | continue; |
2185 | | |
2186 | | /* |
2187 | | * Check Row Marks equivalence. We can't remove the join if the |
2188 | | * relations have row marks of different strength (e.g., one is |
2189 | | * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for |
2190 | | * EvalPlanQual rechecking). |
2191 | | */ |
2192 | 0 | foreach(lc, root->rowMarks) |
2193 | 0 | { |
2194 | 0 | PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc); |
2195 | |
|
2196 | 0 | if (rowMark->rti == r) |
2197 | 0 | { |
2198 | 0 | Assert(rmark == NULL); |
2199 | 0 | rmark = rowMark; |
2200 | 0 | } |
2201 | 0 | else if (rowMark->rti == k) |
2202 | 0 | { |
2203 | 0 | Assert(kmark == NULL); |
2204 | 0 | kmark = rowMark; |
2205 | 0 | } |
2206 | |
|
2207 | 0 | if (kmark && rmark) |
2208 | 0 | break; |
2209 | 0 | } |
2210 | 0 | if (kmark && rmark && kmark->markType != rmark->markType) |
2211 | 0 | continue; |
2212 | | |
2213 | | /* |
2214 | | * We only deal with base rels here, so their relids bitset |
2215 | | * contains only one member -- their relid. |
2216 | | */ |
2217 | 0 | joinrelids = bms_add_member(joinrelids, r); |
2218 | 0 | joinrelids = bms_add_member(joinrelids, k); |
2219 | | |
2220 | | /* |
2221 | | * PHVs should not impose any constraints on removing self-joins. |
2222 | | */ |
2223 | | |
2224 | | /* |
2225 | | * At this stage, joininfo lists of inner and outer can contain |
2226 | | * only clauses required for a superior outer join that can't |
2227 | | * influence this optimization. So, we can avoid to call the |
2228 | | * build_joinrel_restrictlist() routine. |
2229 | | */ |
2230 | 0 | restrictlist = generate_join_implied_equalities(root, joinrelids, |
2231 | 0 | rrel->relids, |
2232 | 0 | krel, NULL); |
2233 | 0 | if (restrictlist == NIL) |
2234 | 0 | continue; |
2235 | | |
2236 | | /* |
2237 | | * Process restrictlist to separate the self-join quals from the |
2238 | | * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to |
2239 | | * otherjoinquals. |
2240 | | */ |
2241 | 0 | split_selfjoin_quals(root, restrictlist, &selfjoinquals, |
2242 | 0 | &otherjoinquals, rrel->relid, krel->relid); |
2243 | |
|
2244 | 0 | Assert(list_length(restrictlist) == |
2245 | 0 | (list_length(selfjoinquals) + list_length(otherjoinquals))); |
2246 | | |
2247 | | /* |
2248 | | * To enable SJE for the only degenerate case without any self |
2249 | | * join clauses at all, add baserestrictinfo to this list. The |
2250 | | * degenerate case works only if both sides have the same clause. |
2251 | | * So doesn't matter which side to add. |
2252 | | */ |
2253 | 0 | selfjoinquals = list_concat(selfjoinquals, krel->baserestrictinfo); |
2254 | | |
2255 | | /* |
2256 | | * Determine if the rrel can duplicate outer rows. We must bypass |
2257 | | * the unique rel cache here since we're possibly using a subset |
2258 | | * of join quals. We can use 'force_cache' == true when all join |
2259 | | * quals are self-join quals. Otherwise, we could end up putting |
2260 | | * false negatives in the cache. |
2261 | | */ |
2262 | 0 | if (!innerrel_is_unique_ext(root, joinrelids, rrel->relids, |
2263 | 0 | krel, JOIN_INNER, selfjoinquals, |
2264 | 0 | list_length(otherjoinquals) == 0, |
2265 | 0 | &uclauses)) |
2266 | 0 | continue; |
2267 | | |
2268 | | /* |
2269 | | * 'uclauses' is the copy of outer->baserestrictinfo that are |
2270 | | * associated with an index. We proved by matching selfjoinquals |
2271 | | * to a unique index that the outer relation has at most one |
2272 | | * matching row for each inner row. Sometimes that is not enough. |
2273 | | * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the |
2274 | | * unique index is (a,b). Having non-empty uclauses, we must |
2275 | | * validate that the inner baserestrictinfo contains the same |
2276 | | * expressions, or we won't match the same row on each side of the |
2277 | | * join. |
2278 | | */ |
2279 | 0 | if (!match_unique_clauses(root, rrel, uclauses, krel->relid)) |
2280 | 0 | continue; |
2281 | | |
2282 | | /* |
2283 | | * Remove rrel ReloptInfo from the planner structures and the |
2284 | | * corresponding row mark. |
2285 | | */ |
2286 | 0 | remove_self_join_rel(root, kmark, rmark, krel, rrel, restrictlist); |
2287 | |
|
2288 | 0 | result = bms_add_member(result, r); |
2289 | | |
2290 | | /* We have removed the outer relation, try the next one. */ |
2291 | 0 | break; |
2292 | 0 | } |
2293 | 0 | } |
2294 | |
|
2295 | 0 | return result; |
2296 | 0 | } |
2297 | | |
2298 | | /* |
2299 | | * Gather indexes of base relations from the joinlist and try to eliminate self |
2300 | | * joins. |
2301 | | */ |
2302 | | static Relids |
2303 | | remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove) |
2304 | 0 | { |
2305 | 0 | ListCell *jl; |
2306 | 0 | Relids relids = NULL; |
2307 | 0 | SelfJoinCandidate *candidates = NULL; |
2308 | 0 | int i; |
2309 | 0 | int j; |
2310 | 0 | int numRels; |
2311 | | |
2312 | | /* Collect indexes of base relations of the join tree */ |
2313 | 0 | foreach(jl, joinlist) |
2314 | 0 | { |
2315 | 0 | Node *jlnode = (Node *) lfirst(jl); |
2316 | |
|
2317 | 0 | if (IsA(jlnode, RangeTblRef)) |
2318 | 0 | { |
2319 | 0 | int varno = ((RangeTblRef *) jlnode)->rtindex; |
2320 | 0 | RangeTblEntry *rte = root->simple_rte_array[varno]; |
2321 | | |
2322 | | /* |
2323 | | * We only consider ordinary relations as candidates to be |
2324 | | * removed, and these relations should not have TABLESAMPLE |
2325 | | * clauses specified. Removing a relation with TABLESAMPLE clause |
2326 | | * could potentially change the syntax of the query. Because of |
2327 | | * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or |
2328 | | * Query->mergeTargetRelation associated rel cannot be eliminated. |
2329 | | */ |
2330 | 0 | if (rte->rtekind == RTE_RELATION && |
2331 | 0 | rte->relkind == RELKIND_RELATION && |
2332 | 0 | rte->tablesample == NULL && |
2333 | 0 | varno != root->parse->resultRelation && |
2334 | 0 | varno != root->parse->mergeTargetRelation) |
2335 | 0 | { |
2336 | 0 | Assert(!bms_is_member(varno, relids)); |
2337 | 0 | relids = bms_add_member(relids, varno); |
2338 | 0 | } |
2339 | 0 | } |
2340 | 0 | else if (IsA(jlnode, List)) |
2341 | 0 | { |
2342 | | /* Recursively go inside the sub-joinlist */ |
2343 | 0 | toRemove = remove_self_joins_recurse(root, (List *) jlnode, |
2344 | 0 | toRemove); |
2345 | 0 | } |
2346 | 0 | else |
2347 | 0 | elog(ERROR, "unrecognized joinlist node type: %d", |
2348 | 0 | (int) nodeTag(jlnode)); |
2349 | 0 | } |
2350 | | |
2351 | 0 | numRels = bms_num_members(relids); |
2352 | | |
2353 | | /* Need at least two relations for the join */ |
2354 | 0 | if (numRels < 2) |
2355 | 0 | return toRemove; |
2356 | | |
2357 | | /* |
2358 | | * In order to find relations with the same oid we first build an array of |
2359 | | * candidates and then sort it by oid. |
2360 | | */ |
2361 | 0 | candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) * |
2362 | 0 | numRels); |
2363 | 0 | i = -1; |
2364 | 0 | j = 0; |
2365 | 0 | while ((i = bms_next_member(relids, i)) >= 0) |
2366 | 0 | { |
2367 | 0 | candidates[j].relid = i; |
2368 | 0 | candidates[j].reloid = root->simple_rte_array[i]->relid; |
2369 | 0 | j++; |
2370 | 0 | } |
2371 | |
|
2372 | 0 | qsort(candidates, numRels, sizeof(SelfJoinCandidate), |
2373 | 0 | self_join_candidates_cmp); |
2374 | | |
2375 | | /* |
2376 | | * Iteratively form a group of relation indexes with the same oid and |
2377 | | * launch the routine that detects self-joins in this group and removes |
2378 | | * excessive range table entries. |
2379 | | * |
2380 | | * At the end of the iteration, exclude the group from the overall relids |
2381 | | * list. So each next iteration of the cycle will involve less and less |
2382 | | * value of relids. |
2383 | | */ |
2384 | 0 | i = 0; |
2385 | 0 | for (j = 1; j < numRels + 1; j++) |
2386 | 0 | { |
2387 | 0 | if (j == numRels || candidates[j].reloid != candidates[i].reloid) |
2388 | 0 | { |
2389 | 0 | if (j - i >= 2) |
2390 | 0 | { |
2391 | | /* Create a group of relation indexes with the same oid */ |
2392 | 0 | Relids group = NULL; |
2393 | 0 | Relids removed; |
2394 | |
|
2395 | 0 | while (i < j) |
2396 | 0 | { |
2397 | 0 | group = bms_add_member(group, candidates[i].relid); |
2398 | 0 | i++; |
2399 | 0 | } |
2400 | 0 | relids = bms_del_members(relids, group); |
2401 | | |
2402 | | /* |
2403 | | * Try to remove self-joins from a group of identical entries. |
2404 | | * Make the next attempt iteratively - if something is deleted |
2405 | | * from a group, changes in clauses and equivalence classes |
2406 | | * can give us a chance to find more candidates. |
2407 | | */ |
2408 | 0 | do |
2409 | 0 | { |
2410 | 0 | Assert(!bms_overlap(group, toRemove)); |
2411 | 0 | removed = remove_self_joins_one_group(root, group); |
2412 | 0 | toRemove = bms_add_members(toRemove, removed); |
2413 | 0 | group = bms_del_members(group, removed); |
2414 | 0 | } while (!bms_is_empty(removed) && |
2415 | 0 | bms_membership(group) == BMS_MULTIPLE); |
2416 | 0 | bms_free(removed); |
2417 | 0 | bms_free(group); |
2418 | 0 | } |
2419 | 0 | else |
2420 | 0 | { |
2421 | | /* Single relation, just remove it from the set */ |
2422 | 0 | relids = bms_del_member(relids, candidates[i].relid); |
2423 | 0 | i = j; |
2424 | 0 | } |
2425 | 0 | } |
2426 | 0 | } |
2427 | |
|
2428 | 0 | Assert(bms_is_empty(relids)); |
2429 | |
|
2430 | 0 | return toRemove; |
2431 | 0 | } |
2432 | | |
2433 | | /* |
2434 | | * Compare self-join candidates by their oids. |
2435 | | */ |
2436 | | static int |
2437 | | self_join_candidates_cmp(const void *a, const void *b) |
2438 | 0 | { |
2439 | 0 | const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a; |
2440 | 0 | const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b; |
2441 | |
|
2442 | 0 | if (ca->reloid != cb->reloid) |
2443 | 0 | return (ca->reloid < cb->reloid ? -1 : 1); |
2444 | 0 | else |
2445 | 0 | return 0; |
2446 | 0 | } |
2447 | | |
2448 | | /* |
2449 | | * Find and remove useless self joins. |
2450 | | * |
2451 | | * Search for joins where a relation is joined to itself. If the join clause |
2452 | | * for each tuple from one side of the join is proven to match the same |
2453 | | * physical row (or nothing) on the other side, that self-join can be |
2454 | | * eliminated from the query. Suitable join clauses are assumed to be in the |
2455 | | * form of X = X, and can be replaced with NOT NULL clauses. |
2456 | | * |
2457 | | * For the sake of simplicity, we don't apply this optimization to special |
2458 | | * joins. Here is a list of what we could do in some particular cases: |
2459 | | * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins, |
2460 | | * and then removed normally. |
2461 | | * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND |
2462 | | * (IS NULL on join columns OR NOT inner quals)'. |
2463 | | * 'a a1 left join a a2': could simplify to a scan like inner but without |
2464 | | * NOT NULL conditions on join columns. |
2465 | | * 'a a1 left join (a a2 join b)': can't simplify this, because join to b |
2466 | | * can both remove rows and introduce duplicates. |
2467 | | * |
2468 | | * To search for removable joins, we order all the relations on their Oid, |
2469 | | * go over each set with the same Oid, and consider each pair of relations |
2470 | | * in this set. |
2471 | | * |
2472 | | * To remove the join, we mark one of the participating relations as dead |
2473 | | * and rewrite all references to it to point to the remaining relation. |
2474 | | * This includes modifying RestrictInfos, EquivalenceClasses, and |
2475 | | * EquivalenceMembers. We also have to modify the row marks. The join clauses |
2476 | | * of the removed relation become either restriction or join clauses, based on |
2477 | | * whether they reference any relations not participating in the removed join. |
2478 | | * |
2479 | | * 'joinlist' is the top-level joinlist of the query. If it has any |
2480 | | * references to the removed relations, we update them to point to the |
2481 | | * remaining ones. |
2482 | | */ |
2483 | | List * |
2484 | | remove_useless_self_joins(PlannerInfo *root, List *joinlist) |
2485 | 0 | { |
2486 | 0 | Relids toRemove = NULL; |
2487 | 0 | int relid = -1; |
2488 | |
|
2489 | 0 | if (!enable_self_join_elimination || joinlist == NIL || |
2490 | 0 | (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List))) |
2491 | 0 | return joinlist; |
2492 | | |
2493 | | /* |
2494 | | * Merge pairs of relations participated in self-join. Remove unnecessary |
2495 | | * range table entries. |
2496 | | */ |
2497 | 0 | toRemove = remove_self_joins_recurse(root, joinlist, toRemove); |
2498 | |
|
2499 | 0 | if (unlikely(toRemove != NULL)) |
2500 | 0 | { |
2501 | | /* At the end, remove orphaned relation links */ |
2502 | 0 | while ((relid = bms_next_member(toRemove, relid)) >= 0) |
2503 | 0 | { |
2504 | 0 | int nremoved = 0; |
2505 | |
|
2506 | 0 | joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved); |
2507 | 0 | if (nremoved != 1) |
2508 | 0 | elog(ERROR, "failed to find relation %d in joinlist", relid); |
2509 | 0 | } |
2510 | 0 | } |
2511 | | |
2512 | 0 | return joinlist; |
2513 | 0 | } |