Coverage Report

Created: 2025-10-09 06:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/backend/optimizer/plan/analyzejoins.c
Line
Count
Source
1
/*-------------------------------------------------------------------------
2
 *
3
 * analyzejoins.c
4
 *    Routines for simplifying joins after initial query analysis
5
 *
6
 * While we do a great deal of join simplification in prep/prepjointree.c,
7
 * certain optimizations cannot be performed at that stage for lack of
8
 * detailed information about the query.  The routines here are invoked
9
 * after initsplan.c has done its work, and can do additional join removal
10
 * and simplification steps based on the information extracted.  The penalty
11
 * is that we have to work harder to clean up after ourselves when we modify
12
 * the query, since the derived data structures have to be updated too.
13
 *
14
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
15
 * Portions Copyright (c) 1994, Regents of the University of California
16
 *
17
 *
18
 * IDENTIFICATION
19
 *    src/backend/optimizer/plan/analyzejoins.c
20
 *
21
 *-------------------------------------------------------------------------
22
 */
23
#include "postgres.h"
24
25
#include "catalog/pg_class.h"
26
#include "nodes/nodeFuncs.h"
27
#include "optimizer/joininfo.h"
28
#include "optimizer/optimizer.h"
29
#include "optimizer/pathnode.h"
30
#include "optimizer/paths.h"
31
#include "optimizer/placeholder.h"
32
#include "optimizer/planmain.h"
33
#include "optimizer/restrictinfo.h"
34
#include "rewrite/rewriteManip.h"
35
#include "utils/lsyscache.h"
36
37
/*
38
 * Utility structure.  A sorting procedure is needed to simplify the search
39
 * of SJE-candidate baserels referencing the same database relation.  Having
40
 * collected all baserels from the query jointree, the planner sorts them
41
 * according to the reloid value, groups them with the next pass and attempts
42
 * to remove self-joins.
43
 *
44
 * Preliminary sorting prevents quadratic behavior that can be harmful in the
45
 * case of numerous joins.
46
 */
47
typedef struct
48
{
49
  int     relid;
50
  Oid     reloid;
51
} SelfJoinCandidate;
52
53
bool    enable_self_join_elimination;
54
55
/* local functions */
56
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
57
static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
58
                      SpecialJoinInfo *sjinfo);
59
static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
60
                     int relid, int ojrelid);
61
static void remove_rel_from_eclass(EquivalenceClass *ec,
62
                   SpecialJoinInfo *sjinfo,
63
                   int relid, int subst);
64
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
65
static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
66
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
67
                List *clause_list, List **extra_clauses);
68
static Oid  distinct_col_search(int colno, List *colnos, List *opids);
69
static bool is_innerrel_unique_for(PlannerInfo *root,
70
                   Relids joinrelids,
71
                   Relids outerrelids,
72
                   RelOptInfo *innerrel,
73
                   JoinType jointype,
74
                   List *restrictlist,
75
                   List **extra_clauses);
76
static int  self_join_candidates_cmp(const void *a, const void *b);
77
static bool replace_relid_callback(Node *node,
78
                   ChangeVarNodes_context *context);
79
80
81
/*
82
 * remove_useless_joins
83
 *    Check for relations that don't actually need to be joined at all,
84
 *    and remove them from the query.
85
 *
86
 * We are passed the current joinlist and return the updated list.  Other
87
 * data structures that have to be updated are accessible via "root".
88
 */
89
List *
90
remove_useless_joins(PlannerInfo *root, List *joinlist)
91
0
{
92
0
  ListCell   *lc;
93
94
  /*
95
   * We are only interested in relations that are left-joined to, so we can
96
   * scan the join_info_list to find them easily.
97
   */
98
0
restart:
99
0
  foreach(lc, root->join_info_list)
100
0
  {
101
0
    SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
102
0
    int     innerrelid;
103
0
    int     nremoved;
104
105
    /* Skip if not removable */
106
0
    if (!join_is_removable(root, sjinfo))
107
0
      continue;
108
109
    /*
110
     * Currently, join_is_removable can only succeed when the sjinfo's
111
     * righthand is a single baserel.  Remove that rel from the query and
112
     * joinlist.
113
     */
114
0
    innerrelid = bms_singleton_member(sjinfo->min_righthand);
115
116
0
    remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
117
118
    /* We verify that exactly one reference gets removed from joinlist */
119
0
    nremoved = 0;
120
0
    joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
121
0
    if (nremoved != 1)
122
0
      elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
123
124
    /*
125
     * We can delete this SpecialJoinInfo from the list too, since it's no
126
     * longer of interest.  (Since we'll restart the foreach loop
127
     * immediately, we don't bother with foreach_delete_current.)
128
     */
129
0
    root->join_info_list = list_delete_cell(root->join_info_list, lc);
130
131
    /*
132
     * Restart the scan.  This is necessary to ensure we find all
133
     * removable joins independently of ordering of the join_info_list
134
     * (note that removal of attr_needed bits may make a join appear
135
     * removable that did not before).
136
     */
137
0
    goto restart;
138
0
  }
139
140
0
  return joinlist;
141
0
}
142
143
/*
144
 * join_is_removable
145
 *    Check whether we need not perform this special join at all, because
146
 *    it will just duplicate its left input.
147
 *
148
 * This is true for a left join for which the join condition cannot match
149
 * more than one inner-side row.  (There are other possibly interesting
150
 * cases, but we don't have the infrastructure to prove them.)  We also
151
 * have to check that the inner side doesn't generate any variables needed
152
 * above the join.
153
 */
154
static bool
155
join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
156
0
{
157
0
  int     innerrelid;
158
0
  RelOptInfo *innerrel;
159
0
  Relids    inputrelids;
160
0
  Relids    joinrelids;
161
0
  List     *clause_list = NIL;
162
0
  ListCell   *l;
163
0
  int     attroff;
164
165
  /*
166
   * Must be a left join to a single baserel, else we aren't going to be
167
   * able to do anything with it.
168
   */
169
0
  if (sjinfo->jointype != JOIN_LEFT)
170
0
    return false;
171
172
0
  if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
173
0
    return false;
174
175
  /*
176
   * Never try to eliminate a left join to the query result rel.  Although
177
   * the case is syntactically impossible in standard SQL, MERGE will build
178
   * a join tree that looks exactly like that.
179
   */
180
0
  if (innerrelid == root->parse->resultRelation)
181
0
    return false;
182
183
0
  innerrel = find_base_rel(root, innerrelid);
184
185
  /*
186
   * Before we go to the effort of checking whether any innerrel variables
187
   * are needed above the join, make a quick check to eliminate cases in
188
   * which we will surely be unable to prove uniqueness of the innerrel.
189
   */
190
0
  if (!rel_supports_distinctness(root, innerrel))
191
0
    return false;
192
193
  /* Compute the relid set for the join we are considering */
194
0
  inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
195
0
  Assert(sjinfo->ojrelid != 0);
196
0
  joinrelids = bms_copy(inputrelids);
197
0
  joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
198
199
  /*
200
   * We can't remove the join if any inner-rel attributes are used above the
201
   * join.  Here, "above" the join includes pushed-down conditions, so we
202
   * should reject if attr_needed includes the OJ's own relid; therefore,
203
   * compare to inputrelids not joinrelids.
204
   *
205
   * As a micro-optimization, it seems better to start with max_attr and
206
   * count down rather than starting with min_attr and counting up, on the
207
   * theory that the system attributes are somewhat less likely to be wanted
208
   * and should be tested last.
209
   */
210
0
  for (attroff = innerrel->max_attr - innerrel->min_attr;
211
0
     attroff >= 0;
212
0
     attroff--)
213
0
  {
214
0
    if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
215
0
      return false;
216
0
  }
217
218
  /*
219
   * Similarly check that the inner rel isn't needed by any PlaceHolderVars
220
   * that will be used above the join.  The PHV case is a little bit more
221
   * complicated, because PHVs may have been assigned a ph_eval_at location
222
   * that includes the innerrel, yet their contained expression might not
223
   * actually reference the innerrel (it could be just a constant, for
224
   * instance).  If such a PHV is due to be evaluated above the join then it
225
   * needn't prevent join removal.
226
   */
227
0
  foreach(l, root->placeholder_list)
228
0
  {
229
0
    PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
230
231
0
    if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
232
0
      return false;   /* it references innerrel laterally */
233
0
    if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
234
0
      continue;     /* it definitely doesn't reference innerrel */
235
0
    if (bms_is_subset(phinfo->ph_needed, inputrelids))
236
0
      continue;     /* PHV is not used above the join */
237
0
    if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
238
0
      return false;   /* it has to be evaluated below the join */
239
240
    /*
241
     * We need to be sure there will still be a place to evaluate the PHV
242
     * if we remove the join, ie that ph_eval_at wouldn't become empty.
243
     */
244
0
    if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
245
0
      return false;   /* there isn't any other place to eval PHV */
246
    /* Check contained expression last, since this is a bit expensive */
247
0
    if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
248
0
            innerrel->relids))
249
0
      return false;   /* contained expression references innerrel */
250
0
  }
251
252
  /*
253
   * Search for mergejoinable clauses that constrain the inner rel against
254
   * either the outer rel or a pseudoconstant.  If an operator is
255
   * mergejoinable then it behaves like equality for some btree opclass, so
256
   * it's what we want.  The mergejoinability test also eliminates clauses
257
   * containing volatile functions, which we couldn't depend on.
258
   */
259
0
  foreach(l, innerrel->joininfo)
260
0
  {
261
0
    RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
262
263
    /*
264
     * If the current join commutes with some other outer join(s) via
265
     * outer join identity 3, there will be multiple clones of its join
266
     * clauses in the joininfo list.  We want to consider only the
267
     * has_clone form of such clauses.  Processing more than one form
268
     * would be wasteful, and also some of the others would confuse the
269
     * RINFO_IS_PUSHED_DOWN test below.
270
     */
271
0
    if (restrictinfo->is_clone)
272
0
      continue;     /* ignore it */
273
274
    /*
275
     * If it's not a join clause for this outer join, we can't use it.
276
     * Note that if the clause is pushed-down, then it is logically from
277
     * above the outer join, even if it references no other rels (it might
278
     * be from WHERE, for example).
279
     */
280
0
    if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
281
0
      continue;     /* ignore; not useful here */
282
283
    /* Ignore if it's not a mergejoinable clause */
284
0
    if (!restrictinfo->can_join ||
285
0
      restrictinfo->mergeopfamilies == NIL)
286
0
      continue;     /* not mergejoinable */
287
288
    /*
289
     * Check if the clause has the form "outer op inner" or "inner op
290
     * outer", and if so mark which side is inner.
291
     */
292
0
    if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
293
0
                   innerrel->relids))
294
0
      continue;     /* no good for these input relations */
295
296
    /* OK, add to list */
297
0
    clause_list = lappend(clause_list, restrictinfo);
298
0
  }
299
300
  /*
301
   * Now that we have the relevant equality join clauses, try to prove the
302
   * innerrel distinct.
303
   */
304
0
  if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
305
0
    return true;
306
307
  /*
308
   * Some day it would be nice to check for other methods of establishing
309
   * distinctness.
310
   */
311
0
  return false;
312
0
}
313
314
315
/*
316
 * Remove the target rel->relid and references to the target join from the
317
 * planner's data structures, having determined that there is no need
318
 * to include them in the query. Optionally replace them with subst if subst
319
 * is non-negative.
320
 *
321
 * This function updates only parts needed for both left-join removal and
322
 * self-join removal.
323
 */
324
static void
325
remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
326
            int subst, SpecialJoinInfo *sjinfo,
327
            Relids joinrelids)
328
0
{
329
0
  int     relid = rel->relid;
330
0
  Index   rti;
331
0
  ListCell   *l;
332
333
  /*
334
   * Update all_baserels and related relid sets.
335
   */
336
0
  root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
337
0
  root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
338
339
0
  if (sjinfo != NULL)
340
0
  {
341
0
    root->outer_join_rels = bms_del_member(root->outer_join_rels,
342
0
                         sjinfo->ojrelid);
343
0
    root->all_query_rels = bms_del_member(root->all_query_rels,
344
0
                        sjinfo->ojrelid);
345
0
  }
346
347
  /*
348
   * Likewise remove references from SpecialJoinInfo data structures.
349
   *
350
   * This is relevant in case the outer join we're deleting is nested inside
351
   * other outer joins: the upper joins' relid sets have to be adjusted. The
352
   * RHS of the target outer join will be made empty here, but that's OK
353
   * since caller will delete that SpecialJoinInfo entirely.
354
   */
355
0
  foreach(l, root->join_info_list)
356
0
  {
357
0
    SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
358
359
    /*
360
     * initsplan.c is fairly cavalier about allowing SpecialJoinInfos'
361
     * lefthand/righthand relid sets to be shared with other data
362
     * structures.  Ensure that we don't modify the original relid sets.
363
     * (The commute_xxx sets are always per-SpecialJoinInfo though.)
364
     */
365
0
    sjinf->min_lefthand = bms_copy(sjinf->min_lefthand);
366
0
    sjinf->min_righthand = bms_copy(sjinf->min_righthand);
367
0
    sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
368
0
    sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
369
    /* Now remove relid from the sets: */
370
0
    sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
371
0
    sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
372
0
    sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
373
0
    sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
374
375
0
    if (sjinfo != NULL)
376
0
    {
377
0
      Assert(subst <= 0);
378
379
      /* Remove sjinfo->ojrelid bits from the sets: */
380
0
      sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand,
381
0
                         sjinfo->ojrelid);
382
0
      sjinf->min_righthand = bms_del_member(sjinf->min_righthand,
383
0
                          sjinfo->ojrelid);
384
0
      sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand,
385
0
                         sjinfo->ojrelid);
386
0
      sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand,
387
0
                          sjinfo->ojrelid);
388
      /* relid cannot appear in these fields, but ojrelid can: */
389
0
      sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l,
390
0
                          sjinfo->ojrelid);
391
0
      sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r,
392
0
                          sjinfo->ojrelid);
393
0
      sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l,
394
0
                          sjinfo->ojrelid);
395
0
      sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r,
396
0
                          sjinfo->ojrelid);
397
0
    }
398
0
    else
399
0
    {
400
0
      Assert(subst > 0);
401
402
0
      ChangeVarNodesExtended((Node *) sjinf->semi_rhs_exprs, relid, subst,
403
0
                   0, replace_relid_callback);
404
0
    }
405
0
  }
406
407
  /*
408
   * Likewise remove references from PlaceHolderVar data structures,
409
   * removing any no-longer-needed placeholders entirely.  We remove PHV
410
   * only for left-join removal.  With self-join elimination, PHVs already
411
   * get moved to the remaining relation, where they might still be needed.
412
   * It might also happen that we skip the removal of some PHVs that could
413
   * be removed.  However, the overhead of extra PHVs is small compared to
414
   * the complexity of analysis needed to remove them.
415
   *
416
   * Removal is a bit trickier than it might seem: we can remove PHVs that
417
   * are used at the target rel and/or in the join qual, but not those that
418
   * are used at join partner rels or above the join.  It's not that easy to
419
   * distinguish PHVs used at partner rels from those used in the join qual,
420
   * since they will both have ph_needed sets that are subsets of
421
   * joinrelids.  However, a PHV used at a partner rel could not have the
422
   * target rel in ph_eval_at, so we check that while deciding whether to
423
   * remove or just update the PHV.  There is no corresponding test in
424
   * join_is_removable because it doesn't need to distinguish those cases.
425
   */
426
0
  foreach(l, root->placeholder_list)
427
0
  {
428
0
    PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
429
430
0
    Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
431
0
    if (sjinfo != NULL &&
432
0
      bms_is_subset(phinfo->ph_needed, joinrelids) &&
433
0
      bms_is_member(relid, phinfo->ph_eval_at) &&
434
0
      !bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
435
0
    {
436
      /*
437
       * This code shouldn't be executed if one relation is substituted
438
       * with another: in this case, the placeholder may be employed in
439
       * a filter inside the scan node the SJE removes.
440
       */
441
0
      root->placeholder_list = foreach_delete_current(root->placeholder_list,
442
0
                              l);
443
0
      root->placeholder_array[phinfo->phid] = NULL;
444
0
    }
445
0
    else
446
0
    {
447
0
      PlaceHolderVar *phv = phinfo->ph_var;
448
449
0
      phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
450
0
      if (sjinfo != NULL)
451
0
        phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at,
452
0
                            sjinfo->ojrelid, subst);
453
0
      Assert(!bms_is_empty(phinfo->ph_eval_at));  /* checked previously */
454
      /* Reduce ph_needed to contain only "relation 0"; see below */
455
0
      if (bms_is_member(0, phinfo->ph_needed))
456
0
        phinfo->ph_needed = bms_make_singleton(0);
457
0
      else
458
0
        phinfo->ph_needed = NULL;
459
460
0
      phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
461
462
      /*
463
       * ph_lateral might contain rels mentioned in ph_eval_at after the
464
       * replacement, remove them.
465
       */
466
0
      phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
467
      /* ph_lateral might or might not be empty */
468
469
0
      phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
470
0
      if (sjinfo != NULL)
471
0
        phv->phrels = adjust_relid_set(phv->phrels,
472
0
                         sjinfo->ojrelid, subst);
473
0
      Assert(!bms_is_empty(phv->phrels));
474
475
0
      ChangeVarNodesExtended((Node *) phv->phexpr, relid, subst, 0,
476
0
                   replace_relid_callback);
477
478
0
      Assert(phv->phnullingrels == NULL); /* no need to adjust */
479
0
    }
480
0
  }
481
482
  /*
483
   * Likewise remove references from EquivalenceClasses.
484
   */
485
0
  foreach(l, root->eq_classes)
486
0
  {
487
0
    EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
488
489
0
    if (bms_is_member(relid, ec->ec_relids) ||
490
0
      (sjinfo == NULL || bms_is_member(sjinfo->ojrelid, ec->ec_relids)))
491
0
      remove_rel_from_eclass(ec, sjinfo, relid, subst);
492
0
  }
493
494
  /*
495
   * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
496
   * ph_needed relid sets.  These have to be known accurately, else we may
497
   * fail to remove other now-removable outer joins.  And our removal of the
498
   * join clause(s) for this outer join may mean that Vars that were
499
   * formerly needed no longer are.  So we have to do this honestly by
500
   * repeating the construction of those relid sets.  We can cheat to one
501
   * small extent: we can avoid re-examining the targetlist and HAVING qual
502
   * by preserving "relation 0" bits from the existing relid sets.  This is
503
   * safe because we'd never remove such references.
504
   *
505
   * So, start by removing all other bits from attr_needed sets and
506
   * lateral_vars lists.  (We already did this above for ph_needed.)
507
   */
508
0
  for (rti = 1; rti < root->simple_rel_array_size; rti++)
509
0
  {
510
0
    RelOptInfo *otherrel = root->simple_rel_array[rti];
511
0
    int     attroff;
512
513
    /* there may be empty slots corresponding to non-baserel RTEs */
514
0
    if (otherrel == NULL)
515
0
      continue;
516
517
0
    Assert(otherrel->relid == rti); /* sanity check on array */
518
519
0
    for (attroff = otherrel->max_attr - otherrel->min_attr;
520
0
       attroff >= 0;
521
0
       attroff--)
522
0
    {
523
0
      if (bms_is_member(0, otherrel->attr_needed[attroff]))
524
0
        otherrel->attr_needed[attroff] = bms_make_singleton(0);
525
0
      else
526
0
        otherrel->attr_needed[attroff] = NULL;
527
0
    }
528
529
0
    if (subst > 0)
530
0
      ChangeVarNodesExtended((Node *) otherrel->lateral_vars, relid,
531
0
                   subst, 0, replace_relid_callback);
532
0
  }
533
0
}
534
535
/*
536
 * Remove the target relid and references to the target join from the
537
 * planner's data structures, having determined that there is no need
538
 * to include them in the query.
539
 *
540
 * We are not terribly thorough here.  We only bother to update parts of
541
 * the planner's data structures that will actually be consulted later.
542
 */
543
static void
544
remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
545
                SpecialJoinInfo *sjinfo)
546
0
{
547
0
  RelOptInfo *rel = find_base_rel(root, relid);
548
0
  int     ojrelid = sjinfo->ojrelid;
549
0
  Relids    joinrelids;
550
0
  Relids    join_plus_commute;
551
0
  List     *joininfos;
552
0
  ListCell   *l;
553
554
  /* Compute the relid set for the join we are considering */
555
0
  joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
556
0
  Assert(ojrelid != 0);
557
0
  joinrelids = bms_add_member(joinrelids, ojrelid);
558
559
0
  remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
560
561
  /*
562
   * Remove any joinquals referencing the rel from the joininfo lists.
563
   *
564
   * In some cases, a joinqual has to be put back after deleting its
565
   * reference to the target rel.  This can occur for pseudoconstant and
566
   * outerjoin-delayed quals, which can get marked as requiring the rel in
567
   * order to force them to be evaluated at or above the join.  We can't
568
   * just discard them, though.  Only quals that logically belonged to the
569
   * outer join being discarded should be removed from the query.
570
   *
571
   * We might encounter a qual that is a clone of a deletable qual with some
572
   * outer-join relids added (see deconstruct_distribute_oj_quals).  To
573
   * ensure we get rid of such clones as well, add the relids of all OJs
574
   * commutable with this one to the set we test against for
575
   * pushed-down-ness.
576
   */
577
0
  join_plus_commute = bms_union(joinrelids,
578
0
                  sjinfo->commute_above_r);
579
0
  join_plus_commute = bms_add_members(join_plus_commute,
580
0
                    sjinfo->commute_below_l);
581
582
  /*
583
   * We must make a copy of the rel's old joininfo list before starting the
584
   * loop, because otherwise remove_join_clause_from_rels would destroy the
585
   * list while we're scanning it.
586
   */
587
0
  joininfos = list_copy(rel->joininfo);
588
0
  foreach(l, joininfos)
589
0
  {
590
0
    RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
591
592
0
    remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
593
594
0
    if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
595
0
    {
596
      /*
597
       * There might be references to relid or ojrelid in the
598
       * RestrictInfo's relid sets, as a consequence of PHVs having had
599
       * ph_eval_at sets that include those.  We already checked above
600
       * that any such PHV is safe (and updated its ph_eval_at), so we
601
       * can just drop those references.
602
       */
603
0
      remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
604
605
      /*
606
       * Cross-check that the clause itself does not reference the
607
       * target rel or join.
608
       */
609
#ifdef USE_ASSERT_CHECKING
610
      {
611
        Relids    clause_varnos = pull_varnos(root,
612
                            (Node *) rinfo->clause);
613
614
        Assert(!bms_is_member(relid, clause_varnos));
615
        Assert(!bms_is_member(ojrelid, clause_varnos));
616
      }
617
#endif
618
      /* Now throw it back into the joininfo lists */
619
0
      distribute_restrictinfo_to_rels(root, rinfo);
620
0
    }
621
0
  }
622
623
  /*
624
   * There may be references to the rel in root->fkey_list, but if so,
625
   * match_foreign_keys_to_quals() will get rid of them.
626
   */
627
628
  /*
629
   * Now remove the rel from the baserel array to prevent it from being
630
   * referenced again.  (We can't do this earlier because
631
   * remove_join_clause_from_rels will touch it.)
632
   */
633
0
  root->simple_rel_array[relid] = NULL;
634
0
  root->simple_rte_array[relid] = NULL;
635
636
  /* And nuke the RelOptInfo, just in case there's another access path */
637
0
  pfree(rel);
638
639
  /*
640
   * Now repeat construction of attr_needed bits coming from all other
641
   * sources.
642
   */
643
0
  rebuild_placeholder_attr_needed(root);
644
0
  rebuild_joinclause_attr_needed(root);
645
0
  rebuild_eclass_attr_needed(root);
646
0
  rebuild_lateral_attr_needed(root);
647
0
}
648
649
/*
650
 * Remove any references to relid or ojrelid from the RestrictInfo.
651
 *
652
 * We only bother to clean out bits in clause_relids and required_relids,
653
 * not nullingrel bits in contained Vars and PHVs.  (This might have to be
654
 * improved sometime.)  However, if the RestrictInfo contains an OR clause
655
 * we have to also clean up the sub-clauses.
656
 */
657
static void
658
remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
659
0
{
660
  /*
661
   * initsplan.c is fairly cavalier about allowing RestrictInfos to share
662
   * relid sets with other RestrictInfos, and SpecialJoinInfos too.  Make
663
   * sure this RestrictInfo has its own relid sets before we modify them.
664
   * (In present usage, clause_relids is probably not shared, but
665
   * required_relids could be; let's not assume anything.)
666
   */
667
0
  rinfo->clause_relids = bms_copy(rinfo->clause_relids);
668
0
  rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
669
0
  rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
670
  /* Likewise for required_relids */
671
0
  rinfo->required_relids = bms_copy(rinfo->required_relids);
672
0
  rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
673
0
  rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
674
675
  /* If it's an OR, recurse to clean up sub-clauses */
676
0
  if (restriction_is_or_clause(rinfo))
677
0
  {
678
0
    ListCell   *lc;
679
680
0
    Assert(is_orclause(rinfo->orclause));
681
0
    foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
682
0
    {
683
0
      Node     *orarg = (Node *) lfirst(lc);
684
685
      /* OR arguments should be ANDs or sub-RestrictInfos */
686
0
      if (is_andclause(orarg))
687
0
      {
688
0
        List     *andargs = ((BoolExpr *) orarg)->args;
689
0
        ListCell   *lc2;
690
691
0
        foreach(lc2, andargs)
692
0
        {
693
0
          RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
694
695
0
          remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
696
0
        }
697
0
      }
698
0
      else
699
0
      {
700
0
        RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
701
702
0
        remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
703
0
      }
704
0
    }
705
0
  }
706
0
}
707
708
/*
709
 * Remove any references to relid or sjinfo->ojrelid (if sjinfo != NULL)
710
 * from the EquivalenceClass.
711
 *
712
 * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
713
 * any nullingrel bits in contained Vars and PHVs.  (This might have to be
714
 * improved sometime.)  We do need to fix the EC and EM relid sets to ensure
715
 * that implied join equalities will be generated at the appropriate join
716
 * level(s).
717
 */
718
static void
719
remove_rel_from_eclass(EquivalenceClass *ec, SpecialJoinInfo *sjinfo,
720
             int relid, int subst)
721
0
{
722
0
  ListCell   *lc;
723
724
  /* Fix up the EC's overall relids */
725
0
  ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst);
726
0
  if (sjinfo != NULL)
727
0
    ec->ec_relids = adjust_relid_set(ec->ec_relids,
728
0
                     sjinfo->ojrelid, subst);
729
730
  /*
731
   * We don't expect any EC child members to exist at this point.  Ensure
732
   * that's the case, otherwise, we might be getting asked to do something
733
   * this function hasn't been coded for.
734
   */
735
0
  Assert(ec->ec_childmembers == NULL);
736
737
  /*
738
   * Fix up the member expressions.  Any non-const member that ends with
739
   * empty em_relids must be a Var or PHV of the removed relation.  We don't
740
   * need it anymore, so we can drop it.
741
   */
742
0
  foreach(lc, ec->ec_members)
743
0
  {
744
0
    EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
745
746
0
    if (bms_is_member(relid, cur_em->em_relids) ||
747
0
      (sjinfo != NULL && bms_is_member(sjinfo->ojrelid,
748
0
                       cur_em->em_relids)))
749
0
    {
750
0
      Assert(!cur_em->em_is_const);
751
0
      cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
752
0
      if (sjinfo != NULL)
753
0
        cur_em->em_relids = adjust_relid_set(cur_em->em_relids,
754
0
                           sjinfo->ojrelid, subst);
755
0
      if (bms_is_empty(cur_em->em_relids))
756
0
        ec->ec_members = foreach_delete_current(ec->ec_members, lc);
757
0
    }
758
0
  }
759
760
  /* Fix up the source clauses, in case we can re-use them later */
761
0
  foreach(lc, ec->ec_sources)
762
0
  {
763
0
    RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
764
765
0
    if (sjinfo == NULL)
766
0
      ChangeVarNodesExtended((Node *) rinfo, relid, subst, 0,
767
0
                   replace_relid_callback);
768
0
    else
769
0
      remove_rel_from_restrictinfo(rinfo, relid, sjinfo->ojrelid);
770
0
  }
771
772
  /*
773
   * Rather than expend code on fixing up any already-derived clauses, just
774
   * drop them.  (At this point, any such clauses would be base restriction
775
   * clauses, which we'd not need anymore anyway.)
776
   */
777
0
  ec_clear_derived_clauses(ec);
778
0
}
779
780
/*
781
 * Remove any occurrences of the target relid from a joinlist structure.
782
 *
783
 * It's easiest to build a whole new list structure, so we handle it that
784
 * way.  Efficiency is not a big deal here.
785
 *
786
 * *nremoved is incremented by the number of occurrences removed (there
787
 * should be exactly one, but the caller checks that).
788
 */
789
static List *
790
remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
791
0
{
792
0
  List     *result = NIL;
793
0
  ListCell   *jl;
794
795
0
  foreach(jl, joinlist)
796
0
  {
797
0
    Node     *jlnode = (Node *) lfirst(jl);
798
799
0
    if (IsA(jlnode, RangeTblRef))
800
0
    {
801
0
      int     varno = ((RangeTblRef *) jlnode)->rtindex;
802
803
0
      if (varno == relid)
804
0
        (*nremoved)++;
805
0
      else
806
0
        result = lappend(result, jlnode);
807
0
    }
808
0
    else if (IsA(jlnode, List))
809
0
    {
810
      /* Recurse to handle subproblem */
811
0
      List     *sublist;
812
813
0
      sublist = remove_rel_from_joinlist((List *) jlnode,
814
0
                         relid, nremoved);
815
      /* Avoid including empty sub-lists in the result */
816
0
      if (sublist)
817
0
        result = lappend(result, sublist);
818
0
    }
819
0
    else
820
0
    {
821
0
      elog(ERROR, "unrecognized joinlist node type: %d",
822
0
         (int) nodeTag(jlnode));
823
0
    }
824
0
  }
825
826
0
  return result;
827
0
}
828
829
830
/*
831
 * reduce_unique_semijoins
832
 *    Check for semijoins that can be simplified to plain inner joins
833
 *    because the inner relation is provably unique for the join clauses.
834
 *
835
 * Ideally this would happen during reduce_outer_joins, but we don't have
836
 * enough information at that point.
837
 *
838
 * To perform the strength reduction when applicable, we need only delete
839
 * the semijoin's SpecialJoinInfo from root->join_info_list.  (We don't
840
 * bother fixing the join type attributed to it in the query jointree,
841
 * since that won't be consulted again.)
842
 */
843
void
844
reduce_unique_semijoins(PlannerInfo *root)
845
0
{
846
0
  ListCell   *lc;
847
848
  /*
849
   * Scan the join_info_list to find semijoins.
850
   */
851
0
  foreach(lc, root->join_info_list)
852
0
  {
853
0
    SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
854
0
    int     innerrelid;
855
0
    RelOptInfo *innerrel;
856
0
    Relids    joinrelids;
857
0
    List     *restrictlist;
858
859
    /*
860
     * Must be a semijoin to a single baserel, else we aren't going to be
861
     * able to do anything with it.
862
     */
863
0
    if (sjinfo->jointype != JOIN_SEMI)
864
0
      continue;
865
866
0
    if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
867
0
      continue;
868
869
0
    innerrel = find_base_rel(root, innerrelid);
870
871
    /*
872
     * Before we trouble to run generate_join_implied_equalities, make a
873
     * quick check to eliminate cases in which we will surely be unable to
874
     * prove uniqueness of the innerrel.
875
     */
876
0
    if (!rel_supports_distinctness(root, innerrel))
877
0
      continue;
878
879
    /* Compute the relid set for the join we are considering */
880
0
    joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
881
0
    Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
882
883
    /*
884
     * Since we're only considering a single-rel RHS, any join clauses it
885
     * has must be clauses linking it to the semijoin's min_lefthand.  We
886
     * can also consider EC-derived join clauses.
887
     */
888
0
    restrictlist =
889
0
      list_concat(generate_join_implied_equalities(root,
890
0
                             joinrelids,
891
0
                             sjinfo->min_lefthand,
892
0
                             innerrel,
893
0
                             NULL),
894
0
            innerrel->joininfo);
895
896
    /* Test whether the innerrel is unique for those clauses. */
897
0
    if (!innerrel_is_unique(root,
898
0
                joinrelids, sjinfo->min_lefthand, innerrel,
899
0
                JOIN_SEMI, restrictlist, true))
900
0
      continue;
901
902
    /* OK, remove the SpecialJoinInfo from the list. */
903
0
    root->join_info_list = foreach_delete_current(root->join_info_list, lc);
904
0
  }
905
0
}
906
907
908
/*
909
 * rel_supports_distinctness
910
 *    Could the relation possibly be proven distinct on some set of columns?
911
 *
912
 * This is effectively a pre-checking function for rel_is_distinct_for().
913
 * It must return true if rel_is_distinct_for() could possibly return true
914
 * with this rel, but it should not expend a lot of cycles.  The idea is
915
 * that callers can avoid doing possibly-expensive processing to compute
916
 * rel_is_distinct_for()'s argument lists if the call could not possibly
917
 * succeed.
918
 */
919
static bool
920
rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
921
0
{
922
  /* We only know about baserels ... */
923
0
  if (rel->reloptkind != RELOPT_BASEREL)
924
0
    return false;
925
0
  if (rel->rtekind == RTE_RELATION)
926
0
  {
927
    /*
928
     * For a plain relation, we only know how to prove uniqueness by
929
     * reference to unique indexes.  Make sure there's at least one
930
     * suitable unique index.  It must be immediately enforced, and not a
931
     * partial index. (Keep these conditions in sync with
932
     * relation_has_unique_index_for!)
933
     */
934
0
    ListCell   *lc;
935
936
0
    foreach(lc, rel->indexlist)
937
0
    {
938
0
      IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
939
940
0
      if (ind->unique && ind->immediate && ind->indpred == NIL)
941
0
        return true;
942
0
    }
943
0
  }
944
0
  else if (rel->rtekind == RTE_SUBQUERY)
945
0
  {
946
0
    Query    *subquery = root->simple_rte_array[rel->relid]->subquery;
947
948
    /* Check if the subquery has any qualities that support distinctness */
949
0
    if (query_supports_distinctness(subquery))
950
0
      return true;
951
0
  }
952
  /* We have no proof rules for any other rtekinds. */
953
0
  return false;
954
0
}
955
956
/*
957
 * rel_is_distinct_for
958
 *    Does the relation return only distinct rows according to clause_list?
959
 *
960
 * clause_list is a list of join restriction clauses involving this rel and
961
 * some other one.  Return true if no two rows emitted by this rel could
962
 * possibly join to the same row of the other rel.
963
 *
964
 * The caller must have already determined that each condition is a
965
 * mergejoinable equality with an expression in this relation on one side, and
966
 * an expression not involving this relation on the other.  The transient
967
 * outer_is_left flag is used to identify which side references this relation:
968
 * left side if outer_is_left is false, right side if it is true.
969
 *
970
 * Note that the passed-in clause_list may be destructively modified!  This
971
 * is OK for current uses, because the clause_list is built by the caller for
972
 * the sole purpose of passing to this function.
973
 *
974
 * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
975
 * looking like "x = const" if distinctness is derived from such clauses, not
976
 * joininfo clauses.  Pass NULL to the extra_clauses if this value is not
977
 * needed.
978
 */
979
static bool
980
rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
981
          List **extra_clauses)
982
0
{
983
  /*
984
   * We could skip a couple of tests here if we assume all callers checked
985
   * rel_supports_distinctness first, but it doesn't seem worth taking any
986
   * risk for.
987
   */
988
0
  if (rel->reloptkind != RELOPT_BASEREL)
989
0
    return false;
990
0
  if (rel->rtekind == RTE_RELATION)
991
0
  {
992
    /*
993
     * Examine the indexes to see if we have a matching unique index.
994
     * relation_has_unique_index_for automatically adds any usable
995
     * restriction clauses for the rel, so we needn't do that here.
996
     */
997
0
    if (relation_has_unique_index_for(root, rel, clause_list, extra_clauses))
998
0
      return true;
999
0
  }
1000
0
  else if (rel->rtekind == RTE_SUBQUERY)
1001
0
  {
1002
0
    Index   relid = rel->relid;
1003
0
    Query    *subquery = root->simple_rte_array[relid]->subquery;
1004
0
    List     *colnos = NIL;
1005
0
    List     *opids = NIL;
1006
0
    ListCell   *l;
1007
1008
    /*
1009
     * Build the argument lists for query_is_distinct_for: a list of
1010
     * output column numbers that the query needs to be distinct over, and
1011
     * a list of equality operators that the output columns need to be
1012
     * distinct according to.
1013
     *
1014
     * (XXX we are not considering restriction clauses attached to the
1015
     * subquery; is that worth doing?)
1016
     */
1017
0
    foreach(l, clause_list)
1018
0
    {
1019
0
      RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
1020
0
      Oid     op;
1021
0
      Var      *var;
1022
1023
      /*
1024
       * Get the equality operator we need uniqueness according to.
1025
       * (This might be a cross-type operator and thus not exactly the
1026
       * same operator the subquery would consider; that's all right
1027
       * since query_is_distinct_for can resolve such cases.)  The
1028
       * caller's mergejoinability test should have selected only
1029
       * OpExprs.
1030
       */
1031
0
      op = castNode(OpExpr, rinfo->clause)->opno;
1032
1033
      /* caller identified the inner side for us */
1034
0
      if (rinfo->outer_is_left)
1035
0
        var = (Var *) get_rightop(rinfo->clause);
1036
0
      else
1037
0
        var = (Var *) get_leftop(rinfo->clause);
1038
1039
      /*
1040
       * We may ignore any RelabelType node above the operand.  (There
1041
       * won't be more than one, since eval_const_expressions() has been
1042
       * applied already.)
1043
       */
1044
0
      if (var && IsA(var, RelabelType))
1045
0
        var = (Var *) ((RelabelType *) var)->arg;
1046
1047
      /*
1048
       * If inner side isn't a Var referencing a subquery output column,
1049
       * this clause doesn't help us.
1050
       */
1051
0
      if (!var || !IsA(var, Var) ||
1052
0
        var->varno != relid || var->varlevelsup != 0)
1053
0
        continue;
1054
1055
0
      colnos = lappend_int(colnos, var->varattno);
1056
0
      opids = lappend_oid(opids, op);
1057
0
    }
1058
1059
0
    if (query_is_distinct_for(subquery, colnos, opids))
1060
0
      return true;
1061
0
  }
1062
0
  return false;
1063
0
}
1064
1065
1066
/*
1067
 * query_supports_distinctness - could the query possibly be proven distinct
1068
 *    on some set of output columns?
1069
 *
1070
 * This is effectively a pre-checking function for query_is_distinct_for().
1071
 * It must return true if query_is_distinct_for() could possibly return true
1072
 * with this query, but it should not expend a lot of cycles.  The idea is
1073
 * that callers can avoid doing possibly-expensive processing to compute
1074
 * query_is_distinct_for()'s argument lists if the call could not possibly
1075
 * succeed.
1076
 */
1077
bool
1078
query_supports_distinctness(Query *query)
1079
0
{
1080
  /* SRFs break distinctness except with DISTINCT, see below */
1081
0
  if (query->hasTargetSRFs && query->distinctClause == NIL)
1082
0
    return false;
1083
1084
  /* check for features we can prove distinctness with */
1085
0
  if (query->distinctClause != NIL ||
1086
0
    query->groupClause != NIL ||
1087
0
    query->groupingSets != NIL ||
1088
0
    query->hasAggs ||
1089
0
    query->havingQual ||
1090
0
    query->setOperations)
1091
0
    return true;
1092
1093
0
  return false;
1094
0
}
1095
1096
/*
1097
 * query_is_distinct_for - does query never return duplicates of the
1098
 *    specified columns?
1099
 *
1100
 * query is a not-yet-planned subquery (in current usage, it's always from
1101
 * a subquery RTE, which the planner avoids scribbling on).
1102
 *
1103
 * colnos is an integer list of output column numbers (resno's).  We are
1104
 * interested in whether rows consisting of just these columns are certain
1105
 * to be distinct.  "Distinctness" is defined according to whether the
1106
 * corresponding upper-level equality operators listed in opids would think
1107
 * the values are distinct.  (Note: the opids entries could be cross-type
1108
 * operators, and thus not exactly the equality operators that the subquery
1109
 * would use itself.  We use equality_ops_are_compatible() to check
1110
 * compatibility.  That looks at opfamily membership for index AMs that have
1111
 * declared that they support consistent equality semantics within an
1112
 * opfamily, and so should give trustworthy answers for all operators that we
1113
 * might need to deal with here.)
1114
 */
1115
bool
1116
query_is_distinct_for(Query *query, List *colnos, List *opids)
1117
0
{
1118
0
  ListCell   *l;
1119
0
  Oid     opid;
1120
1121
0
  Assert(list_length(colnos) == list_length(opids));
1122
1123
  /*
1124
   * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
1125
   * columns in the DISTINCT clause appear in colnos and operator semantics
1126
   * match.  This is true even if there are SRFs in the DISTINCT columns or
1127
   * elsewhere in the tlist.
1128
   */
1129
0
  if (query->distinctClause)
1130
0
  {
1131
0
    foreach(l, query->distinctClause)
1132
0
    {
1133
0
      SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1134
0
      TargetEntry *tle = get_sortgroupclause_tle(sgc,
1135
0
                             query->targetList);
1136
1137
0
      opid = distinct_col_search(tle->resno, colnos, opids);
1138
0
      if (!OidIsValid(opid) ||
1139
0
        !equality_ops_are_compatible(opid, sgc->eqop))
1140
0
        break;     /* exit early if no match */
1141
0
    }
1142
0
    if (l == NULL)     /* had matches for all? */
1143
0
      return true;
1144
0
  }
1145
1146
  /*
1147
   * Otherwise, a set-returning function in the query's targetlist can
1148
   * result in returning duplicate rows, despite any grouping that might
1149
   * occur before tlist evaluation.  (If all tlist SRFs are within GROUP BY
1150
   * columns, it would be safe because they'd be expanded before grouping.
1151
   * But it doesn't currently seem worth the effort to check for that.)
1152
   */
1153
0
  if (query->hasTargetSRFs)
1154
0
    return false;
1155
1156
  /*
1157
   * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
1158
   * the grouped columns appear in colnos and operator semantics match.
1159
   */
1160
0
  if (query->groupClause && !query->groupingSets)
1161
0
  {
1162
0
    foreach(l, query->groupClause)
1163
0
    {
1164
0
      SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1165
0
      TargetEntry *tle = get_sortgroupclause_tle(sgc,
1166
0
                             query->targetList);
1167
1168
0
      opid = distinct_col_search(tle->resno, colnos, opids);
1169
0
      if (!OidIsValid(opid) ||
1170
0
        !equality_ops_are_compatible(opid, sgc->eqop))
1171
0
        break;     /* exit early if no match */
1172
0
    }
1173
0
    if (l == NULL)     /* had matches for all? */
1174
0
      return true;
1175
0
  }
1176
0
  else if (query->groupingSets)
1177
0
  {
1178
    /*
1179
     * If we have grouping sets with expressions, we probably don't have
1180
     * uniqueness and analysis would be hard. Punt.
1181
     */
1182
0
    if (query->groupClause)
1183
0
      return false;
1184
1185
    /*
1186
     * If we have no groupClause (therefore no grouping expressions), we
1187
     * might have one or many empty grouping sets. If there's just one,
1188
     * then we're returning only one row and are certainly unique. But
1189
     * otherwise, we know we're certainly not unique.
1190
     */
1191
0
    if (list_length(query->groupingSets) == 1 &&
1192
0
      ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
1193
0
      return true;
1194
0
    else
1195
0
      return false;
1196
0
  }
1197
0
  else
1198
0
  {
1199
    /*
1200
     * If we have no GROUP BY, but do have aggregates or HAVING, then the
1201
     * result is at most one row so it's surely unique, for any operators.
1202
     */
1203
0
    if (query->hasAggs || query->havingQual)
1204
0
      return true;
1205
0
  }
1206
1207
  /*
1208
   * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1209
   * except with ALL.
1210
   */
1211
0
  if (query->setOperations)
1212
0
  {
1213
0
    SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
1214
1215
0
    Assert(topop->op != SETOP_NONE);
1216
1217
0
    if (!topop->all)
1218
0
    {
1219
0
      ListCell   *lg;
1220
1221
      /* We're good if all the nonjunk output columns are in colnos */
1222
0
      lg = list_head(topop->groupClauses);
1223
0
      foreach(l, query->targetList)
1224
0
      {
1225
0
        TargetEntry *tle = (TargetEntry *) lfirst(l);
1226
0
        SortGroupClause *sgc;
1227
1228
0
        if (tle->resjunk)
1229
0
          continue; /* ignore resjunk columns */
1230
1231
        /* non-resjunk columns should have grouping clauses */
1232
0
        Assert(lg != NULL);
1233
0
        sgc = (SortGroupClause *) lfirst(lg);
1234
0
        lg = lnext(topop->groupClauses, lg);
1235
1236
0
        opid = distinct_col_search(tle->resno, colnos, opids);
1237
0
        if (!OidIsValid(opid) ||
1238
0
          !equality_ops_are_compatible(opid, sgc->eqop))
1239
0
          break;   /* exit early if no match */
1240
0
      }
1241
0
      if (l == NULL)   /* had matches for all? */
1242
0
        return true;
1243
0
    }
1244
0
  }
1245
1246
  /*
1247
   * XXX Are there any other cases in which we can easily see the result
1248
   * must be distinct?
1249
   *
1250
   * If you do add more smarts to this function, be sure to update
1251
   * query_supports_distinctness() to match.
1252
   */
1253
1254
0
  return false;
1255
0
}
1256
1257
/*
1258
 * distinct_col_search - subroutine for query_is_distinct_for
1259
 *
1260
 * If colno is in colnos, return the corresponding element of opids,
1261
 * else return InvalidOid.  (Ordinarily colnos would not contain duplicates,
1262
 * but if it does, we arbitrarily select the first match.)
1263
 */
1264
static Oid
1265
distinct_col_search(int colno, List *colnos, List *opids)
1266
0
{
1267
0
  ListCell   *lc1,
1268
0
         *lc2;
1269
1270
0
  forboth(lc1, colnos, lc2, opids)
1271
0
  {
1272
0
    if (colno == lfirst_int(lc1))
1273
0
      return lfirst_oid(lc2);
1274
0
  }
1275
0
  return InvalidOid;
1276
0
}
1277
1278
1279
/*
1280
 * innerrel_is_unique
1281
 *    Check if the innerrel provably contains at most one tuple matching any
1282
 *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
1283
 *
1284
 * We need an actual RelOptInfo for the innerrel, but it's sufficient to
1285
 * identify the outerrel by its Relids.  This asymmetry supports use of this
1286
 * function before joinrels have been built.  (The caller is expected to
1287
 * also supply the joinrelids, just to save recalculating that.)
1288
 *
1289
 * The proof must be made based only on clauses that will be "joinquals"
1290
 * rather than "otherquals" at execution.  For an inner join there's no
1291
 * difference; but if the join is outer, we must ignore pushed-down quals,
1292
 * as those will become "otherquals".  Note that this means the answer might
1293
 * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
1294
 * answer without regard to that, callers must take care not to call this
1295
 * with jointypes that would be classified differently by IS_OUTER_JOIN().
1296
 *
1297
 * The actual proof is undertaken by is_innerrel_unique_for(); this function
1298
 * is a frontend that is mainly concerned with caching the answers.
1299
 * In particular, the force_cache argument allows overriding the internal
1300
 * heuristic about whether to cache negative answers; it should be "true"
1301
 * if making an inquiry that is not part of the normal bottom-up join search
1302
 * sequence.
1303
 */
1304
bool
1305
innerrel_is_unique(PlannerInfo *root,
1306
           Relids joinrelids,
1307
           Relids outerrelids,
1308
           RelOptInfo *innerrel,
1309
           JoinType jointype,
1310
           List *restrictlist,
1311
           bool force_cache)
1312
0
{
1313
0
  return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
1314
0
                  jointype, restrictlist, force_cache, NULL);
1315
0
}
1316
1317
/*
1318
 * innerrel_is_unique_ext
1319
 *    Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
1320
 *    additional clauses from a baserestrictinfo list used to prove the
1321
 *    uniqueness.
1322
 *
1323
 * A non-NULL extra_clauses indicates that we're checking for self-join and
1324
 * correspondingly dealing with filtered clauses.
1325
 */
1326
bool
1327
innerrel_is_unique_ext(PlannerInfo *root,
1328
             Relids joinrelids,
1329
             Relids outerrelids,
1330
             RelOptInfo *innerrel,
1331
             JoinType jointype,
1332
             List *restrictlist,
1333
             bool force_cache,
1334
             List **extra_clauses)
1335
0
{
1336
0
  MemoryContext old_context;
1337
0
  ListCell   *lc;
1338
0
  UniqueRelInfo *uniqueRelInfo;
1339
0
  List     *outer_exprs = NIL;
1340
0
  bool    self_join = (extra_clauses != NULL);
1341
1342
  /* Certainly can't prove uniqueness when there are no joinclauses */
1343
0
  if (restrictlist == NIL)
1344
0
    return false;
1345
1346
  /*
1347
   * Make a quick check to eliminate cases in which we will surely be unable
1348
   * to prove uniqueness of the innerrel.
1349
   */
1350
0
  if (!rel_supports_distinctness(root, innerrel))
1351
0
    return false;
1352
1353
  /*
1354
   * Query the cache to see if we've managed to prove that innerrel is
1355
   * unique for any subset of this outerrel.  For non-self-join search, we
1356
   * don't need an exact match, as extra outerrels can't make the innerrel
1357
   * any less unique (or more formally, the restrictlist for a join to a
1358
   * superset outerrel must be a superset of the conditions we successfully
1359
   * used before). For self-join search, we require an exact match of
1360
   * outerrels because we need extra clauses to be valid for our case. Also,
1361
   * for self-join checking we've filtered the clauses list.  Thus, we can
1362
   * match only the result cached for a self-join search for another
1363
   * self-join check.
1364
   */
1365
0
  foreach(lc, innerrel->unique_for_rels)
1366
0
  {
1367
0
    uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
1368
1369
0
    if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
1370
0
      (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
1371
0
       uniqueRelInfo->self_join))
1372
0
    {
1373
0
      if (extra_clauses)
1374
0
        *extra_clauses = uniqueRelInfo->extra_clauses;
1375
0
      return true;   /* Success! */
1376
0
    }
1377
0
  }
1378
1379
  /*
1380
   * Conversely, we may have already determined that this outerrel, or some
1381
   * superset thereof, cannot prove this innerrel to be unique.
1382
   */
1383
0
  foreach(lc, innerrel->non_unique_for_rels)
1384
0
  {
1385
0
    Relids    unique_for_rels = (Relids) lfirst(lc);
1386
1387
0
    if (bms_is_subset(outerrelids, unique_for_rels))
1388
0
      return false;
1389
0
  }
1390
1391
  /* No cached information, so try to make the proof. */
1392
0
  if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1393
0
                 jointype, restrictlist,
1394
0
                 self_join ? &outer_exprs : NULL))
1395
0
  {
1396
    /*
1397
     * Cache the positive result for future probes, being sure to keep it
1398
     * in the planner_cxt even if we are working in GEQO.
1399
     *
1400
     * Note: one might consider trying to isolate the minimal subset of
1401
     * the outerrels that proved the innerrel unique.  But it's not worth
1402
     * the trouble, because the planner builds up joinrels incrementally
1403
     * and so we'll see the minimally sufficient outerrels before any
1404
     * supersets of them anyway.
1405
     */
1406
0
    old_context = MemoryContextSwitchTo(root->planner_cxt);
1407
0
    uniqueRelInfo = makeNode(UniqueRelInfo);
1408
0
    uniqueRelInfo->outerrelids = bms_copy(outerrelids);
1409
0
    uniqueRelInfo->self_join = self_join;
1410
0
    uniqueRelInfo->extra_clauses = outer_exprs;
1411
0
    innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1412
0
                      uniqueRelInfo);
1413
0
    MemoryContextSwitchTo(old_context);
1414
1415
0
    if (extra_clauses)
1416
0
      *extra_clauses = outer_exprs;
1417
0
    return true;     /* Success! */
1418
0
  }
1419
0
  else
1420
0
  {
1421
    /*
1422
     * None of the join conditions for outerrel proved innerrel unique, so
1423
     * we can safely reject this outerrel or any subset of it in future
1424
     * checks.
1425
     *
1426
     * However, in normal planning mode, caching this knowledge is totally
1427
     * pointless; it won't be queried again, because we build up joinrels
1428
     * from smaller to larger.  It's only useful when using GEQO or
1429
     * another planner extension that attempts planning multiple times.
1430
     *
1431
     * Also, allow callers to override that heuristic and force caching;
1432
     * that's useful for reduce_unique_semijoins, which calls here before
1433
     * the normal join search starts.
1434
     */
1435
0
    if (force_cache || root->assumeReplanning)
1436
0
    {
1437
0
      old_context = MemoryContextSwitchTo(root->planner_cxt);
1438
0
      innerrel->non_unique_for_rels =
1439
0
        lappend(innerrel->non_unique_for_rels,
1440
0
            bms_copy(outerrelids));
1441
0
      MemoryContextSwitchTo(old_context);
1442
0
    }
1443
1444
0
    return false;
1445
0
  }
1446
0
}
1447
1448
/*
1449
 * is_innerrel_unique_for
1450
 *    Check if the innerrel provably contains at most one tuple matching any
1451
 *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
1452
 */
1453
static bool
1454
is_innerrel_unique_for(PlannerInfo *root,
1455
             Relids joinrelids,
1456
             Relids outerrelids,
1457
             RelOptInfo *innerrel,
1458
             JoinType jointype,
1459
             List *restrictlist,
1460
             List **extra_clauses)
1461
0
{
1462
0
  List     *clause_list = NIL;
1463
0
  ListCell   *lc;
1464
1465
  /*
1466
   * Search for mergejoinable clauses that constrain the inner rel against
1467
   * the outer rel.  If an operator is mergejoinable then it behaves like
1468
   * equality for some btree opclass, so it's what we want.  The
1469
   * mergejoinability test also eliminates clauses containing volatile
1470
   * functions, which we couldn't depend on.
1471
   */
1472
0
  foreach(lc, restrictlist)
1473
0
  {
1474
0
    RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1475
1476
    /*
1477
     * As noted above, if it's a pushed-down clause and we're at an outer
1478
     * join, we can't use it.
1479
     */
1480
0
    if (IS_OUTER_JOIN(jointype) &&
1481
0
      RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
1482
0
      continue;
1483
1484
    /* Ignore if it's not a mergejoinable clause */
1485
0
    if (!restrictinfo->can_join ||
1486
0
      restrictinfo->mergeopfamilies == NIL)
1487
0
      continue;     /* not mergejoinable */
1488
1489
    /*
1490
     * Check if the clause has the form "outer op inner" or "inner op
1491
     * outer", and if so mark which side is inner.
1492
     */
1493
0
    if (!clause_sides_match_join(restrictinfo, outerrelids,
1494
0
                   innerrel->relids))
1495
0
      continue;     /* no good for these input relations */
1496
1497
    /* OK, add to the list */
1498
0
    clause_list = lappend(clause_list, restrictinfo);
1499
0
  }
1500
1501
  /* Let rel_is_distinct_for() do the hard work */
1502
0
  return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
1503
0
}
1504
1505
/*
1506
 * Update EC members to point to the remaining relation instead of the removed
1507
 * one, removing duplicates.
1508
 *
1509
 * Restriction clauses for base relations are already distributed to
1510
 * the respective baserestrictinfo lists (see
1511
 * generate_implied_equalities_for_column). The above code has already processed
1512
 * this list and updated these clauses to reference the remaining
1513
 * relation, so that we can skip them here based on their relids.
1514
 *
1515
 * Likewise, we have already processed the join clauses that join the
1516
 * removed relation to the remaining one.
1517
 *
1518
 * Finally, there might be join clauses tying the removed relation to
1519
 * some third relation.  We can't just delete the source clauses and
1520
 * regenerate them from the EC because the corresponding equality
1521
 * operators might be missing (see the handling of ec_broken).
1522
 * Therefore, we will update the references in the source clauses.
1523
 *
1524
 * Derived clauses can be generated again, so it is simpler just to
1525
 * delete them.
1526
 */
1527
static void
1528
update_eclasses(EquivalenceClass *ec, int from, int to)
1529
0
{
1530
0
  List     *new_members = NIL;
1531
0
  List     *new_sources = NIL;
1532
1533
  /*
1534
   * We don't expect any EC child members to exist at this point.  Ensure
1535
   * that's the case, otherwise, we might be getting asked to do something
1536
   * this function hasn't been coded for.
1537
   */
1538
0
  Assert(ec->ec_childmembers == NULL);
1539
1540
0
  foreach_node(EquivalenceMember, em, ec->ec_members)
1541
0
  {
1542
0
    bool    is_redundant = false;
1543
1544
0
    if (!bms_is_member(from, em->em_relids))
1545
0
    {
1546
0
      new_members = lappend(new_members, em);
1547
0
      continue;
1548
0
    }
1549
1550
0
    em->em_relids = adjust_relid_set(em->em_relids, from, to);
1551
0
    em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
1552
1553
    /* We only process inner joins */
1554
0
    ChangeVarNodesExtended((Node *) em->em_expr, from, to, 0,
1555
0
                 replace_relid_callback);
1556
1557
0
    foreach_node(EquivalenceMember, other, new_members)
1558
0
    {
1559
0
      if (!equal(em->em_relids, other->em_relids))
1560
0
        continue;
1561
1562
0
      if (equal(em->em_expr, other->em_expr))
1563
0
      {
1564
0
        is_redundant = true;
1565
0
        break;
1566
0
      }
1567
0
    }
1568
1569
0
    if (!is_redundant)
1570
0
      new_members = lappend(new_members, em);
1571
0
  }
1572
1573
0
  list_free(ec->ec_members);
1574
0
  ec->ec_members = new_members;
1575
1576
0
  ec_clear_derived_clauses(ec);
1577
1578
  /* Update EC source expressions */
1579
0
  foreach_node(RestrictInfo, rinfo, ec->ec_sources)
1580
0
  {
1581
0
    bool    is_redundant = false;
1582
1583
0
    if (!bms_is_member(from, rinfo->required_relids))
1584
0
    {
1585
0
      new_sources = lappend(new_sources, rinfo);
1586
0
      continue;
1587
0
    }
1588
1589
0
    ChangeVarNodesExtended((Node *) rinfo, from, to, 0,
1590
0
                 replace_relid_callback);
1591
1592
    /*
1593
     * After switching the clause to the remaining relation, check it for
1594
     * redundancy with existing ones. We don't have to check for
1595
     * redundancy with derived clauses, because we've just deleted them.
1596
     */
1597
0
    foreach_node(RestrictInfo, other, new_sources)
1598
0
    {
1599
0
      if (!equal(rinfo->clause_relids, other->clause_relids))
1600
0
        continue;
1601
1602
0
      if (equal(rinfo->clause, other->clause))
1603
0
      {
1604
0
        is_redundant = true;
1605
0
        break;
1606
0
      }
1607
0
    }
1608
1609
0
    if (!is_redundant)
1610
0
      new_sources = lappend(new_sources, rinfo);
1611
0
  }
1612
1613
0
  list_free(ec->ec_sources);
1614
0
  ec->ec_sources = new_sources;
1615
0
  ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
1616
0
}
1617
1618
/*
1619
 * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
1620
 * which makes almost every RestrictInfo unique.  This type of comparison is
1621
 * useful when removing duplicates while moving RestrictInfo's from removed
1622
 * relation to remaining relation during self-join elimination.
1623
 *
1624
 * XXX: In the future, we might remove the 'rinfo_serial' field completely and
1625
 * get rid of this function.
1626
 */
1627
static bool
1628
restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
1629
0
{
1630
0
  int     saved_rinfo_serial = a->rinfo_serial;
1631
0
  bool    result;
1632
1633
0
  a->rinfo_serial = b->rinfo_serial;
1634
0
  result = equal(a, b);
1635
0
  a->rinfo_serial = saved_rinfo_serial;
1636
1637
0
  return result;
1638
0
}
1639
1640
/*
1641
 * This function adds all non-redundant clauses to the keeping relation
1642
 * during self-join elimination.  That is a contradictory operation. On the
1643
 * one hand, we reduce the length of the `restrict` lists, which can
1644
 * impact planning or executing time.  Additionally, we improve the
1645
 * accuracy of cardinality estimation.  On the other hand, it is one more
1646
 * place that can make planning time much longer in specific cases.  It
1647
 * would have been better to avoid calling the equal() function here, but
1648
 * it's the only way to detect duplicated inequality expressions.
1649
 *
1650
 * (*keep_rinfo_list) is given by pointer because it might be altered by
1651
 * distribute_restrictinfo_to_rels().
1652
 */
1653
static void
1654
add_non_redundant_clauses(PlannerInfo *root,
1655
              List *rinfo_candidates,
1656
              List **keep_rinfo_list,
1657
              Index removed_relid)
1658
0
{
1659
0
  foreach_node(RestrictInfo, rinfo, rinfo_candidates)
1660
0
  {
1661
0
    bool    is_redundant = false;
1662
1663
0
    Assert(!bms_is_member(removed_relid, rinfo->required_relids));
1664
1665
0
    foreach_node(RestrictInfo, src, (*keep_rinfo_list))
1666
0
    {
1667
0
      if (!bms_equal(src->clause_relids, rinfo->clause_relids))
1668
        /* Can't compare trivially different clauses */
1669
0
        continue;
1670
1671
0
      if (src == rinfo ||
1672
0
        (rinfo->parent_ec != NULL &&
1673
0
         src->parent_ec == rinfo->parent_ec) ||
1674
0
        restrict_infos_logically_equal(rinfo, src))
1675
0
      {
1676
0
        is_redundant = true;
1677
0
        break;
1678
0
      }
1679
0
    }
1680
0
    if (!is_redundant)
1681
0
      distribute_restrictinfo_to_rels(root, rinfo);
1682
0
  }
1683
0
}
1684
1685
/*
1686
 * A custom callback for ChangeVarNodesExtended() providing
1687
 * Self-join elimination (SJE) related functionality
1688
 *
1689
 * SJE needs to skip the RangeTblRef node
1690
 * type.  During SJE's last step, remove_rel_from_joinlist() removes
1691
 * remaining RangeTblRefs with target relid.  If ChangeVarNodes() replaces
1692
 * the target relid before, remove_rel_from_joinlist() fails to identify
1693
 * the nodes to delete.
1694
 *
1695
 * SJE also needs to change the relids within RestrictInfo's.
1696
 */
1697
static bool
1698
replace_relid_callback(Node *node, ChangeVarNodes_context *context)
1699
0
{
1700
0
  if (IsA(node, RangeTblRef))
1701
0
  {
1702
0
    return true;
1703
0
  }
1704
0
  else if (IsA(node, RestrictInfo))
1705
0
  {
1706
0
    RestrictInfo *rinfo = (RestrictInfo *) node;
1707
0
    int     relid = -1;
1708
0
    bool    is_req_equal =
1709
0
      (rinfo->required_relids == rinfo->clause_relids);
1710
0
    bool    clause_relids_is_multiple =
1711
0
      (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
1712
1713
    /*
1714
     * Recurse down into clauses if the target relation is present in
1715
     * clause_relids or required_relids.  We must check required_relids
1716
     * because the relation not present in clause_relids might still be
1717
     * present somewhere in orclause.
1718
     */
1719
0
    if (bms_is_member(context->rt_index, rinfo->clause_relids) ||
1720
0
      bms_is_member(context->rt_index, rinfo->required_relids))
1721
0
    {
1722
0
      Relids    new_clause_relids;
1723
1724
0
      ChangeVarNodesWalkExpression((Node *) rinfo->clause, context);
1725
0
      ChangeVarNodesWalkExpression((Node *) rinfo->orclause, context);
1726
1727
0
      new_clause_relids = adjust_relid_set(rinfo->clause_relids,
1728
0
                         context->rt_index,
1729
0
                         context->new_index);
1730
1731
      /*
1732
       * Incrementally adjust num_base_rels based on the change of
1733
       * clause_relids, which could contain both base relids and
1734
       * outer-join relids.  This operation is legal until we remove
1735
       * only baserels.
1736
       */
1737
0
      rinfo->num_base_rels -= bms_num_members(rinfo->clause_relids) -
1738
0
        bms_num_members(new_clause_relids);
1739
1740
0
      rinfo->clause_relids = new_clause_relids;
1741
0
      rinfo->left_relids =
1742
0
        adjust_relid_set(rinfo->left_relids, context->rt_index, context->new_index);
1743
0
      rinfo->right_relids =
1744
0
        adjust_relid_set(rinfo->right_relids, context->rt_index, context->new_index);
1745
0
    }
1746
1747
0
    if (is_req_equal)
1748
0
      rinfo->required_relids = rinfo->clause_relids;
1749
0
    else
1750
0
      rinfo->required_relids =
1751
0
        adjust_relid_set(rinfo->required_relids, context->rt_index, context->new_index);
1752
1753
0
    rinfo->outer_relids =
1754
0
      adjust_relid_set(rinfo->outer_relids, context->rt_index, context->new_index);
1755
0
    rinfo->incompatible_relids =
1756
0
      adjust_relid_set(rinfo->incompatible_relids, context->rt_index, context->new_index);
1757
1758
0
    if (rinfo->mergeopfamilies &&
1759
0
      bms_get_singleton_member(rinfo->clause_relids, &relid) &&
1760
0
      clause_relids_is_multiple &&
1761
0
      relid == context->new_index && IsA(rinfo->clause, OpExpr))
1762
0
    {
1763
0
      Expr     *leftOp;
1764
0
      Expr     *rightOp;
1765
1766
0
      leftOp = (Expr *) get_leftop(rinfo->clause);
1767
0
      rightOp = (Expr *) get_rightop(rinfo->clause);
1768
1769
      /*
1770
       * For self-join elimination, changing varnos could transform
1771
       * "t1.a = t2.a" into "t1.a = t1.a".  That is always true as long
1772
       * as "t1.a" is not null.  We use qual() to check for such a case,
1773
       * and then we replace the qual for a check for not null
1774
       * (NullTest).
1775
       */
1776
0
      if (leftOp != NULL && equal(leftOp, rightOp))
1777
0
      {
1778
0
        NullTest   *ntest = makeNode(NullTest);
1779
1780
0
        ntest->arg = leftOp;
1781
0
        ntest->nulltesttype = IS_NOT_NULL;
1782
0
        ntest->argisrow = false;
1783
0
        ntest->location = -1;
1784
0
        rinfo->clause = (Expr *) ntest;
1785
0
        rinfo->mergeopfamilies = NIL;
1786
0
        rinfo->left_em = NULL;
1787
0
        rinfo->right_em = NULL;
1788
0
      }
1789
0
      Assert(rinfo->orclause == NULL);
1790
0
    }
1791
0
    return true;
1792
0
  }
1793
1794
0
  return false;
1795
0
}
1796
1797
/*
1798
 * Remove a relation after we have proven that it participates only in an
1799
 * unneeded unique self-join.
1800
 *
1801
 * Replace any links in planner info structures.
1802
 *
1803
 * Transfer join and restriction clauses from the removed relation to the
1804
 * remaining one. We change the Vars of the clause to point to the
1805
 * remaining relation instead of the removed one. The clauses that require
1806
 * a subset of joinrelids become restriction clauses of the remaining
1807
 * relation, and others remain join clauses. We append them to
1808
 * baserestrictinfo and joininfo, respectively, trying not to introduce
1809
 * duplicates.
1810
 *
1811
 * We also have to process the 'joinclauses' list here, because it
1812
 * contains EC-derived join clauses which must become filter clauses. It
1813
 * is not enough to just correct the ECs because the EC-derived
1814
 * restrictions are generated before join removal (see
1815
 * generate_base_implied_equalities).
1816
 *
1817
 * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
1818
 * cached relids and relid bitmapsets can be correctly cleaned during the
1819
 * self-join elimination procedure.
1820
 */
1821
static void
1822
remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
1823
           RelOptInfo *toKeep, RelOptInfo *toRemove,
1824
           List *restrictlist)
1825
0
{
1826
0
  List     *joininfos;
1827
0
  ListCell   *lc;
1828
0
  int     i;
1829
0
  List     *jinfo_candidates = NIL;
1830
0
  List     *binfo_candidates = NIL;
1831
1832
0
  Assert(toKeep->relid > 0);
1833
0
  Assert(toRemove->relid > 0);
1834
1835
  /*
1836
   * Replace the index of the removing table with the keeping one. The
1837
   * technique of removing/distributing restrictinfo is used here to attach
1838
   * just appeared (for keeping relation) join clauses and avoid adding
1839
   * duplicates of those that already exist in the joininfo list.
1840
   */
1841
0
  joininfos = list_copy(toRemove->joininfo);
1842
0
  foreach_node(RestrictInfo, rinfo, joininfos)
1843
0
  {
1844
0
    remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
1845
0
    ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
1846
0
                 0, replace_relid_callback);
1847
1848
0
    if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1849
0
      jinfo_candidates = lappend(jinfo_candidates, rinfo);
1850
0
    else
1851
0
      binfo_candidates = lappend(binfo_candidates, rinfo);
1852
0
  }
1853
1854
  /*
1855
   * Concatenate restrictlist to the list of base restrictions of the
1856
   * removing table just to simplify the replacement procedure: all of them
1857
   * weren't connected to any keeping relations and need to be added to some
1858
   * rels.
1859
   */
1860
0
  toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
1861
0
                       restrictlist);
1862
0
  foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
1863
0
  {
1864
0
    ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
1865
0
                 0, replace_relid_callback);
1866
1867
0
    if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1868
0
      jinfo_candidates = lappend(jinfo_candidates, rinfo);
1869
0
    else
1870
0
      binfo_candidates = lappend(binfo_candidates, rinfo);
1871
0
  }
1872
1873
  /*
1874
   * Now, add all non-redundant clauses to the keeping relation.
1875
   */
1876
0
  add_non_redundant_clauses(root, binfo_candidates,
1877
0
                &toKeep->baserestrictinfo, toRemove->relid);
1878
0
  add_non_redundant_clauses(root, jinfo_candidates,
1879
0
                &toKeep->joininfo, toRemove->relid);
1880
1881
0
  list_free(binfo_candidates);
1882
0
  list_free(jinfo_candidates);
1883
1884
  /*
1885
   * Arrange equivalence classes, mentioned removing a table, with the
1886
   * keeping one: varno of removing table should be replaced in members and
1887
   * sources lists. Also, remove duplicated elements if this replacement
1888
   * procedure created them.
1889
   */
1890
0
  i = -1;
1891
0
  while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
1892
0
  {
1893
0
    EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
1894
1895
0
    update_eclasses(ec, toRemove->relid, toKeep->relid);
1896
0
    toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
1897
0
  }
1898
1899
  /*
1900
   * Transfer the targetlist and attr_needed flags.
1901
   */
1902
1903
0
  foreach(lc, toRemove->reltarget->exprs)
1904
0
  {
1905
0
    Node     *node = lfirst(lc);
1906
1907
0
    ChangeVarNodesExtended(node, toRemove->relid, toKeep->relid, 0,
1908
0
                 replace_relid_callback);
1909
0
    if (!list_member(toKeep->reltarget->exprs, node))
1910
0
      toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
1911
0
  }
1912
1913
0
  for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
1914
0
  {
1915
0
    int     attno = i - toKeep->min_attr;
1916
1917
0
    toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
1918
0
                            toRemove->relid, toKeep->relid);
1919
0
    toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
1920
0
                           toRemove->attr_needed[attno]);
1921
0
  }
1922
1923
  /*
1924
   * If the removed relation has a row mark, transfer it to the remaining
1925
   * one.
1926
   *
1927
   * If both rels have row marks, just keep the one corresponding to the
1928
   * remaining relation because we verified earlier that they have the same
1929
   * strength.
1930
   */
1931
0
  if (rmark)
1932
0
  {
1933
0
    if (kmark)
1934
0
    {
1935
0
      Assert(kmark->markType == rmark->markType);
1936
1937
0
      root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
1938
0
    }
1939
0
    else
1940
0
    {
1941
      /* Shouldn't have inheritance children here. */
1942
0
      Assert(rmark->rti == rmark->prti);
1943
1944
0
      rmark->rti = rmark->prti = toKeep->relid;
1945
0
    }
1946
0
  }
1947
1948
  /*
1949
   * Replace varno in all the query structures, except nodes RangeTblRef
1950
   * otherwise later remove_rel_from_joinlist will yield errors.
1951
   */
1952
0
  ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid,
1953
0
               0, replace_relid_callback);
1954
1955
  /* Replace links in the planner info */
1956
0
  remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
1957
1958
  /* At last, replace varno in root targetlist and HAVING clause */
1959
0
  ChangeVarNodesExtended((Node *) root->processed_tlist, toRemove->relid,
1960
0
               toKeep->relid, 0, replace_relid_callback);
1961
0
  ChangeVarNodesExtended((Node *) root->processed_groupClause,
1962
0
               toRemove->relid, toKeep->relid, 0,
1963
0
               replace_relid_callback);
1964
1965
0
  adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid);
1966
0
  adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid);
1967
1968
  /*
1969
   * There may be references to the rel in root->fkey_list, but if so,
1970
   * match_foreign_keys_to_quals() will get rid of them.
1971
   */
1972
1973
  /*
1974
   * Finally, remove the rel from the baserel array to prevent it from being
1975
   * referenced again.  (We can't do this earlier because
1976
   * remove_join_clause_from_rels will touch it.)
1977
   */
1978
0
  root->simple_rel_array[toRemove->relid] = NULL;
1979
0
  root->simple_rte_array[toRemove->relid] = NULL;
1980
1981
  /* And nuke the RelOptInfo, just in case there's another access path. */
1982
0
  pfree(toRemove);
1983
1984
1985
  /*
1986
   * Now repeat construction of attr_needed bits coming from all other
1987
   * sources.
1988
   */
1989
0
  rebuild_placeholder_attr_needed(root);
1990
0
  rebuild_joinclause_attr_needed(root);
1991
0
  rebuild_eclass_attr_needed(root);
1992
0
  rebuild_lateral_attr_needed(root);
1993
0
}
1994
1995
/*
1996
 * split_selfjoin_quals
1997
 *    Processes 'joinquals' by building two lists: one containing the quals
1998
 *    where the columns/exprs are on either side of the join match and
1999
 *    another one containing the remaining quals.
2000
 *
2001
 * 'joinquals' must only contain quals for a RTE_RELATION being joined to
2002
 * itself.
2003
 */
2004
static void
2005
split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
2006
           List **otherjoinquals, int from, int to)
2007
0
{
2008
0
  List     *sjoinquals = NIL;
2009
0
  List     *ojoinquals = NIL;
2010
2011
0
  foreach_node(RestrictInfo, rinfo, joinquals)
2012
0
  {
2013
0
    OpExpr     *expr;
2014
0
    Node     *leftexpr;
2015
0
    Node     *rightexpr;
2016
2017
    /* In general, clause looks like F(arg1) = G(arg2) */
2018
0
    if (!rinfo->mergeopfamilies ||
2019
0
      bms_num_members(rinfo->clause_relids) != 2 ||
2020
0
      bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
2021
0
      bms_membership(rinfo->right_relids) != BMS_SINGLETON)
2022
0
    {
2023
0
      ojoinquals = lappend(ojoinquals, rinfo);
2024
0
      continue;
2025
0
    }
2026
2027
0
    expr = (OpExpr *) rinfo->clause;
2028
2029
0
    if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
2030
0
    {
2031
0
      ojoinquals = lappend(ojoinquals, rinfo);
2032
0
      continue;
2033
0
    }
2034
2035
0
    leftexpr = get_leftop(rinfo->clause);
2036
0
    rightexpr = copyObject(get_rightop(rinfo->clause));
2037
2038
0
    if (leftexpr && IsA(leftexpr, RelabelType))
2039
0
      leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
2040
0
    if (rightexpr && IsA(rightexpr, RelabelType))
2041
0
      rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
2042
2043
    /*
2044
     * Quite an expensive operation, narrowing the use case. For example,
2045
     * when we have cast of the same var to different (but compatible)
2046
     * types.
2047
     */
2048
0
    ChangeVarNodesExtended(rightexpr,
2049
0
                 bms_singleton_member(rinfo->right_relids),
2050
0
                 bms_singleton_member(rinfo->left_relids), 0,
2051
0
                 replace_relid_callback);
2052
2053
0
    if (equal(leftexpr, rightexpr))
2054
0
      sjoinquals = lappend(sjoinquals, rinfo);
2055
0
    else
2056
0
      ojoinquals = lappend(ojoinquals, rinfo);
2057
0
  }
2058
2059
0
  *selfjoinquals = sjoinquals;
2060
0
  *otherjoinquals = ojoinquals;
2061
0
}
2062
2063
/*
2064
 * Check for a case when uniqueness is at least partly derived from a
2065
 * baserestrictinfo clause. In this case, we have a chance to return only
2066
 * one row (if such clauses on both sides of SJ are equal) or nothing (if they
2067
 * are different).
2068
 */
2069
static bool
2070
match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
2071
           Index relid)
2072
0
{
2073
0
  foreach_node(RestrictInfo, rinfo, uclauses)
2074
0
  {
2075
0
    Expr     *clause;
2076
0
    Node     *iclause;
2077
0
    Node     *c1;
2078
0
    bool    matched = false;
2079
2080
0
    Assert(outer->relid > 0 && relid > 0);
2081
2082
    /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
2083
0
    Assert(bms_is_empty(rinfo->left_relids) ^
2084
0
         bms_is_empty(rinfo->right_relids));
2085
2086
0
    clause = (Expr *) copyObject(rinfo->clause);
2087
0
    ChangeVarNodesExtended((Node *) clause, relid, outer->relid, 0,
2088
0
                 replace_relid_callback);
2089
2090
0
    iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
2091
0
      get_leftop(clause);
2092
0
    c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
2093
0
      get_rightop(clause);
2094
2095
    /*
2096
     * Compare these left and right sides with the corresponding sides of
2097
     * the outer's filters. If no one is detected - return immediately.
2098
     */
2099
0
    foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
2100
0
    {
2101
0
      Node     *oclause;
2102
0
      Node     *c2;
2103
2104
0
      if (orinfo->mergeopfamilies == NIL)
2105
        /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
2106
0
        continue;
2107
2108
0
      Assert(is_opclause(orinfo->clause));
2109
2110
0
      oclause = bms_is_empty(orinfo->left_relids) ?
2111
0
        get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
2112
0
      c2 = (bms_is_empty(orinfo->left_relids) ?
2113
0
          get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
2114
2115
0
      if (equal(iclause, oclause) && equal(c1, c2))
2116
0
      {
2117
0
        matched = true;
2118
0
        break;
2119
0
      }
2120
0
    }
2121
2122
0
    if (!matched)
2123
0
      return false;
2124
0
  }
2125
2126
0
  return true;
2127
0
}
2128
2129
/*
2130
 * Find and remove unique self-joins in a group of base relations that have
2131
 * the same Oid.
2132
 *
2133
 * Returns a set of relids that were removed.
2134
 */
2135
static Relids
2136
remove_self_joins_one_group(PlannerInfo *root, Relids relids)
2137
0
{
2138
0
  Relids    result = NULL;
2139
0
  int     k;        /* Index of kept relation */
2140
0
  int     r = -1;     /* Index of removed relation */
2141
2142
0
  while ((r = bms_next_member(relids, r)) > 0)
2143
0
  {
2144
0
    RelOptInfo *rrel = root->simple_rel_array[r];
2145
2146
0
    k = r;
2147
2148
0
    while ((k = bms_next_member(relids, k)) > 0)
2149
0
    {
2150
0
      Relids    joinrelids = NULL;
2151
0
      RelOptInfo *krel = root->simple_rel_array[k];
2152
0
      List     *restrictlist;
2153
0
      List     *selfjoinquals;
2154
0
      List     *otherjoinquals;
2155
0
      ListCell   *lc;
2156
0
      bool    jinfo_check = true;
2157
0
      PlanRowMark *kmark = NULL;
2158
0
      PlanRowMark *rmark = NULL;
2159
0
      List     *uclauses = NIL;
2160
2161
      /* A sanity check: the relations have the same Oid. */
2162
0
      Assert(root->simple_rte_array[k]->relid ==
2163
0
           root->simple_rte_array[r]->relid);
2164
2165
      /*
2166
       * It is impossible to eliminate the join of two relations if they
2167
       * belong to different rules of order. Otherwise, the planner
2168
       * can't find any variants of the correct query plan.
2169
       */
2170
0
      foreach(lc, root->join_info_list)
2171
0
      {
2172
0
        SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
2173
2174
0
        if ((bms_is_member(k, info->syn_lefthand) ^
2175
0
           bms_is_member(r, info->syn_lefthand)) ||
2176
0
          (bms_is_member(k, info->syn_righthand) ^
2177
0
           bms_is_member(r, info->syn_righthand)))
2178
0
        {
2179
0
          jinfo_check = false;
2180
0
          break;
2181
0
        }
2182
0
      }
2183
0
      if (!jinfo_check)
2184
0
        continue;
2185
2186
      /*
2187
       * Check Row Marks equivalence. We can't remove the join if the
2188
       * relations have row marks of different strength (e.g., one is
2189
       * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
2190
       * EvalPlanQual rechecking).
2191
       */
2192
0
      foreach(lc, root->rowMarks)
2193
0
      {
2194
0
        PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
2195
2196
0
        if (rowMark->rti == r)
2197
0
        {
2198
0
          Assert(rmark == NULL);
2199
0
          rmark = rowMark;
2200
0
        }
2201
0
        else if (rowMark->rti == k)
2202
0
        {
2203
0
          Assert(kmark == NULL);
2204
0
          kmark = rowMark;
2205
0
        }
2206
2207
0
        if (kmark && rmark)
2208
0
          break;
2209
0
      }
2210
0
      if (kmark && rmark && kmark->markType != rmark->markType)
2211
0
        continue;
2212
2213
      /*
2214
       * We only deal with base rels here, so their relids bitset
2215
       * contains only one member -- their relid.
2216
       */
2217
0
      joinrelids = bms_add_member(joinrelids, r);
2218
0
      joinrelids = bms_add_member(joinrelids, k);
2219
2220
      /*
2221
       * PHVs should not impose any constraints on removing self-joins.
2222
       */
2223
2224
      /*
2225
       * At this stage, joininfo lists of inner and outer can contain
2226
       * only clauses required for a superior outer join that can't
2227
       * influence this optimization. So, we can avoid to call the
2228
       * build_joinrel_restrictlist() routine.
2229
       */
2230
0
      restrictlist = generate_join_implied_equalities(root, joinrelids,
2231
0
                              rrel->relids,
2232
0
                              krel, NULL);
2233
0
      if (restrictlist == NIL)
2234
0
        continue;
2235
2236
      /*
2237
       * Process restrictlist to separate the self-join quals from the
2238
       * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
2239
       * otherjoinquals.
2240
       */
2241
0
      split_selfjoin_quals(root, restrictlist, &selfjoinquals,
2242
0
                 &otherjoinquals, rrel->relid, krel->relid);
2243
2244
0
      Assert(list_length(restrictlist) ==
2245
0
           (list_length(selfjoinquals) + list_length(otherjoinquals)));
2246
2247
      /*
2248
       * To enable SJE for the only degenerate case without any self
2249
       * join clauses at all, add baserestrictinfo to this list. The
2250
       * degenerate case works only if both sides have the same clause.
2251
       * So doesn't matter which side to add.
2252
       */
2253
0
      selfjoinquals = list_concat(selfjoinquals, krel->baserestrictinfo);
2254
2255
      /*
2256
       * Determine if the rrel can duplicate outer rows. We must bypass
2257
       * the unique rel cache here since we're possibly using a subset
2258
       * of join quals. We can use 'force_cache' == true when all join
2259
       * quals are self-join quals.  Otherwise, we could end up putting
2260
       * false negatives in the cache.
2261
       */
2262
0
      if (!innerrel_is_unique_ext(root, joinrelids, rrel->relids,
2263
0
                    krel, JOIN_INNER, selfjoinquals,
2264
0
                    list_length(otherjoinquals) == 0,
2265
0
                    &uclauses))
2266
0
        continue;
2267
2268
      /*
2269
       * 'uclauses' is the copy of outer->baserestrictinfo that are
2270
       * associated with an index.  We proved by matching selfjoinquals
2271
       * to a unique index that the outer relation has at most one
2272
       * matching row for each inner row.  Sometimes that is not enough.
2273
       * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
2274
       * unique index is (a,b).  Having non-empty uclauses, we must
2275
       * validate that the inner baserestrictinfo contains the same
2276
       * expressions, or we won't match the same row on each side of the
2277
       * join.
2278
       */
2279
0
      if (!match_unique_clauses(root, rrel, uclauses, krel->relid))
2280
0
        continue;
2281
2282
      /*
2283
       * Remove rrel ReloptInfo from the planner structures and the
2284
       * corresponding row mark.
2285
       */
2286
0
      remove_self_join_rel(root, kmark, rmark, krel, rrel, restrictlist);
2287
2288
0
      result = bms_add_member(result, r);
2289
2290
      /* We have removed the outer relation, try the next one. */
2291
0
      break;
2292
0
    }
2293
0
  }
2294
2295
0
  return result;
2296
0
}
2297
2298
/*
2299
 * Gather indexes of base relations from the joinlist and try to eliminate self
2300
 * joins.
2301
 */
2302
static Relids
2303
remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
2304
0
{
2305
0
  ListCell   *jl;
2306
0
  Relids    relids = NULL;
2307
0
  SelfJoinCandidate *candidates = NULL;
2308
0
  int     i;
2309
0
  int     j;
2310
0
  int     numRels;
2311
2312
  /* Collect indexes of base relations of the join tree */
2313
0
  foreach(jl, joinlist)
2314
0
  {
2315
0
    Node     *jlnode = (Node *) lfirst(jl);
2316
2317
0
    if (IsA(jlnode, RangeTblRef))
2318
0
    {
2319
0
      int     varno = ((RangeTblRef *) jlnode)->rtindex;
2320
0
      RangeTblEntry *rte = root->simple_rte_array[varno];
2321
2322
      /*
2323
       * We only consider ordinary relations as candidates to be
2324
       * removed, and these relations should not have TABLESAMPLE
2325
       * clauses specified.  Removing a relation with TABLESAMPLE clause
2326
       * could potentially change the syntax of the query. Because of
2327
       * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
2328
       * Query->mergeTargetRelation associated rel cannot be eliminated.
2329
       */
2330
0
      if (rte->rtekind == RTE_RELATION &&
2331
0
        rte->relkind == RELKIND_RELATION &&
2332
0
        rte->tablesample == NULL &&
2333
0
        varno != root->parse->resultRelation &&
2334
0
        varno != root->parse->mergeTargetRelation)
2335
0
      {
2336
0
        Assert(!bms_is_member(varno, relids));
2337
0
        relids = bms_add_member(relids, varno);
2338
0
      }
2339
0
    }
2340
0
    else if (IsA(jlnode, List))
2341
0
    {
2342
      /* Recursively go inside the sub-joinlist */
2343
0
      toRemove = remove_self_joins_recurse(root, (List *) jlnode,
2344
0
                         toRemove);
2345
0
    }
2346
0
    else
2347
0
      elog(ERROR, "unrecognized joinlist node type: %d",
2348
0
         (int) nodeTag(jlnode));
2349
0
  }
2350
2351
0
  numRels = bms_num_members(relids);
2352
2353
  /* Need at least two relations for the join */
2354
0
  if (numRels < 2)
2355
0
    return toRemove;
2356
2357
  /*
2358
   * In order to find relations with the same oid we first build an array of
2359
   * candidates and then sort it by oid.
2360
   */
2361
0
  candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
2362
0
                        numRels);
2363
0
  i = -1;
2364
0
  j = 0;
2365
0
  while ((i = bms_next_member(relids, i)) >= 0)
2366
0
  {
2367
0
    candidates[j].relid = i;
2368
0
    candidates[j].reloid = root->simple_rte_array[i]->relid;
2369
0
    j++;
2370
0
  }
2371
2372
0
  qsort(candidates, numRels, sizeof(SelfJoinCandidate),
2373
0
      self_join_candidates_cmp);
2374
2375
  /*
2376
   * Iteratively form a group of relation indexes with the same oid and
2377
   * launch the routine that detects self-joins in this group and removes
2378
   * excessive range table entries.
2379
   *
2380
   * At the end of the iteration, exclude the group from the overall relids
2381
   * list. So each next iteration of the cycle will involve less and less
2382
   * value of relids.
2383
   */
2384
0
  i = 0;
2385
0
  for (j = 1; j < numRels + 1; j++)
2386
0
  {
2387
0
    if (j == numRels || candidates[j].reloid != candidates[i].reloid)
2388
0
    {
2389
0
      if (j - i >= 2)
2390
0
      {
2391
        /* Create a group of relation indexes with the same oid */
2392
0
        Relids    group = NULL;
2393
0
        Relids    removed;
2394
2395
0
        while (i < j)
2396
0
        {
2397
0
          group = bms_add_member(group, candidates[i].relid);
2398
0
          i++;
2399
0
        }
2400
0
        relids = bms_del_members(relids, group);
2401
2402
        /*
2403
         * Try to remove self-joins from a group of identical entries.
2404
         * Make the next attempt iteratively - if something is deleted
2405
         * from a group, changes in clauses and equivalence classes
2406
         * can give us a chance to find more candidates.
2407
         */
2408
0
        do
2409
0
        {
2410
0
          Assert(!bms_overlap(group, toRemove));
2411
0
          removed = remove_self_joins_one_group(root, group);
2412
0
          toRemove = bms_add_members(toRemove, removed);
2413
0
          group = bms_del_members(group, removed);
2414
0
        } while (!bms_is_empty(removed) &&
2415
0
             bms_membership(group) == BMS_MULTIPLE);
2416
0
        bms_free(removed);
2417
0
        bms_free(group);
2418
0
      }
2419
0
      else
2420
0
      {
2421
        /* Single relation, just remove it from the set */
2422
0
        relids = bms_del_member(relids, candidates[i].relid);
2423
0
        i = j;
2424
0
      }
2425
0
    }
2426
0
  }
2427
2428
0
  Assert(bms_is_empty(relids));
2429
2430
0
  return toRemove;
2431
0
}
2432
2433
/*
2434
 * Compare self-join candidates by their oids.
2435
 */
2436
static int
2437
self_join_candidates_cmp(const void *a, const void *b)
2438
0
{
2439
0
  const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
2440
0
  const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
2441
2442
0
  if (ca->reloid != cb->reloid)
2443
0
    return (ca->reloid < cb->reloid ? -1 : 1);
2444
0
  else
2445
0
    return 0;
2446
0
}
2447
2448
/*
2449
 * Find and remove useless self joins.
2450
 *
2451
 * Search for joins where a relation is joined to itself. If the join clause
2452
 * for each tuple from one side of the join is proven to match the same
2453
 * physical row (or nothing) on the other side, that self-join can be
2454
 * eliminated from the query.  Suitable join clauses are assumed to be in the
2455
 * form of X = X, and can be replaced with NOT NULL clauses.
2456
 *
2457
 * For the sake of simplicity, we don't apply this optimization to special
2458
 * joins. Here is a list of what we could do in some particular cases:
2459
 * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
2460
 * and then removed normally.
2461
 * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
2462
 * (IS NULL on join columns OR NOT inner quals)'.
2463
 * 'a a1 left join a a2': could simplify to a scan like inner but without
2464
 * NOT NULL conditions on join columns.
2465
 * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
2466
 * can both remove rows and introduce duplicates.
2467
 *
2468
 * To search for removable joins, we order all the relations on their Oid,
2469
 * go over each set with the same Oid, and consider each pair of relations
2470
 * in this set.
2471
 *
2472
 * To remove the join, we mark one of the participating relations as dead
2473
 * and rewrite all references to it to point to the remaining relation.
2474
 * This includes modifying RestrictInfos, EquivalenceClasses, and
2475
 * EquivalenceMembers. We also have to modify the row marks. The join clauses
2476
 * of the removed relation become either restriction or join clauses, based on
2477
 * whether they reference any relations not participating in the removed join.
2478
 *
2479
 * 'joinlist' is the top-level joinlist of the query. If it has any
2480
 * references to the removed relations, we update them to point to the
2481
 * remaining ones.
2482
 */
2483
List *
2484
remove_useless_self_joins(PlannerInfo *root, List *joinlist)
2485
0
{
2486
0
  Relids    toRemove = NULL;
2487
0
  int     relid = -1;
2488
2489
0
  if (!enable_self_join_elimination || joinlist == NIL ||
2490
0
    (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
2491
0
    return joinlist;
2492
2493
  /*
2494
   * Merge pairs of relations participated in self-join. Remove unnecessary
2495
   * range table entries.
2496
   */
2497
0
  toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
2498
2499
0
  if (unlikely(toRemove != NULL))
2500
0
  {
2501
    /* At the end, remove orphaned relation links */
2502
0
    while ((relid = bms_next_member(toRemove, relid)) >= 0)
2503
0
    {
2504
0
      int     nremoved = 0;
2505
2506
0
      joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
2507
0
      if (nremoved != 1)
2508
0
        elog(ERROR, "failed to find relation %d in joinlist", relid);
2509
0
    }
2510
0
  }
2511
2512
0
  return joinlist;
2513
0
}