Coverage Report

Created: 2026-05-14 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/duckdb/src/optimizer/deliminator.cpp
Line
Count
Source
1
#include "duckdb/optimizer/deliminator.hpp"
2
3
#include "duckdb/planner/expression/bound_cast_expression.hpp"
4
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
5
#include "duckdb/planner/expression/bound_conjunction_expression.hpp"
6
#include "duckdb/planner/expression/bound_operator_expression.hpp"
7
#include "duckdb/planner/filter/expression_filter.hpp"
8
#include "duckdb/planner/operator/logical_aggregate.hpp"
9
#include "duckdb/planner/operator/logical_comparison_join.hpp"
10
#include "duckdb/planner/operator/logical_delim_get.hpp"
11
#include "duckdb/planner/operator/logical_filter.hpp"
12
#include "duckdb/planner/operator/logical_get.hpp"
13
#include "duckdb/planner/table_filter.hpp"
14
15
#include <algorithm>
16
17
namespace duckdb {
18
19
struct JoinWithDelimGet {
20
0
  JoinWithDelimGet(unique_ptr<LogicalOperator> &join_p, idx_t depth_p) : join(join_p), depth(depth_p) {
21
0
  }
22
  reference<unique_ptr<LogicalOperator>> join;
23
  idx_t depth;
24
};
25
26
struct DelimCandidate {
27
public:
28
  explicit DelimCandidate(unique_ptr<LogicalOperator> &op, LogicalComparisonJoin &delim_join)
29
0
      : op(op), delim_join(delim_join), delim_get_count(0) {
30
0
  }
31
32
public:
33
  unique_ptr<LogicalOperator> &op;
34
  LogicalComparisonJoin &delim_join;
35
  vector<JoinWithDelimGet> joins;
36
  idx_t delim_get_count;
37
};
38
39
0
static bool IsEqualityJoinCondition(const JoinCondition &cond) {
40
0
  if (!cond.IsComparison()) {
41
0
    return false;
42
0
  }
43
0
  switch (cond.GetComparisonType()) {
44
0
  case ExpressionType::COMPARE_EQUAL:
45
0
  case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
46
0
    return true;
47
0
  default:
48
0
    return false;
49
0
  }
50
0
}
51
52
8
unique_ptr<LogicalOperator> Deliminator::Optimize(unique_ptr<LogicalOperator> op) {
53
8
  root = op;
54
55
8
  vector<DelimCandidate> candidates;
56
8
  FindCandidates(op, candidates);
57
58
8
  if (candidates.empty()) {
59
8
    return op;
60
8
  }
61
62
0
  for (auto &candidate : candidates) {
63
0
    auto &delim_join = candidate.delim_join;
64
65
    // Sort these so the deepest are first
66
0
    std::sort(candidate.joins.begin(), candidate.joins.end(),
67
0
              [](const JoinWithDelimGet &lhs, const JoinWithDelimGet &rhs) { return lhs.depth > rhs.depth; });
68
69
0
    bool all_removed = true;
70
0
    if (!candidate.joins.empty() && HasSelection(delim_join)) {
71
      // Keep the deepest join with DelimGet in these cases,
72
      // as the selection can greatly reduce the cost of the RHS child of the DelimJoin
73
0
      candidate.joins.erase(candidate.joins.begin());
74
0
      all_removed = false;
75
0
    }
76
77
0
    bool all_equality_conditions = true;
78
0
    for (auto &join : candidate.joins) {
79
0
      all_removed = RemoveJoinWithDelimGet(delim_join, candidate.delim_get_count, join.join.get(),
80
0
                                           all_equality_conditions) &&
81
0
                    all_removed;
82
0
    }
83
84
    // Change type if there are no more duplicate-eliminated columns
85
0
    if (candidate.joins.size() == candidate.delim_get_count && all_removed) {
86
0
      delim_join.type = LogicalOperatorType::LOGICAL_COMPARISON_JOIN;
87
0
      delim_join.duplicate_eliminated_columns.clear();
88
0
    }
89
90
    // Only DelimJoins are ever created as SINGLE joins,
91
    // and we can switch from SINGLE to LEFT if the RHS is de-duplicated by an aggr
92
0
    if (delim_join.join_type == JoinType::SINGLE) {
93
0
      TrySwitchSingleToLeft(delim_join);
94
0
    }
95
0
  }
96
97
0
  return op;
98
8
}
99
100
76
void Deliminator::FindCandidates(unique_ptr<LogicalOperator> &op, vector<DelimCandidate> &candidates) {
101
76
  for (auto &child : op->children) {
102
68
    FindCandidates(child, candidates);
103
68
  }
104
105
76
  if (op->type != LogicalOperatorType::LOGICAL_DELIM_JOIN) {
106
76
    return;
107
76
  }
108
109
0
  candidates.emplace_back(op, op->Cast<LogicalComparisonJoin>());
110
0
  auto &candidate = candidates.back();
111
112
  // DelimGets are in the RHS
113
0
  FindJoinWithDelimGet(op->children[1], candidate);
114
0
}
115
116
0
bool Deliminator::HasSelection(const LogicalOperator &op) {
117
  // TODO once we implement selectivity estimation using samples we need to use that here
118
0
  switch (op.type) {
119
0
  case LogicalOperatorType::LOGICAL_GET: {
120
0
    auto &get = op.Cast<LogicalGet>();
121
0
    for (const auto &entry : get.table_filters) {
122
0
      auto &expr_filter = ExpressionFilter::GetExpressionFilter(entry.Filter(), "Deliminator::HasSelection");
123
0
      auto &expr = *expr_filter.expr;
124
0
      if (expr.GetExpressionClass() != ExpressionClass::BOUND_OPERATOR ||
125
0
          expr.GetExpressionType() != ExpressionType::OPERATOR_IS_NOT_NULL) {
126
0
        return true;
127
0
      }
128
0
    }
129
0
    break;
130
0
  }
131
0
  case LogicalOperatorType::LOGICAL_FILTER:
132
0
    return true;
133
0
  default:
134
0
    break;
135
0
  }
136
137
0
  for (auto &child : op.children) {
138
0
    if (HasSelection(*child)) {
139
0
      return true;
140
0
    }
141
0
  }
142
143
0
  return false;
144
0
}
145
146
0
static bool OperatorIsDelimGet(LogicalOperator &op) {
147
0
  if (op.type == LogicalOperatorType::LOGICAL_DELIM_GET) {
148
0
    return true;
149
0
  }
150
0
  if (op.type == LogicalOperatorType::LOGICAL_FILTER &&
151
0
      op.children[0]->type == LogicalOperatorType::LOGICAL_DELIM_GET) {
152
0
    return true;
153
0
  }
154
0
  return false;
155
0
}
156
157
0
void Deliminator::FindJoinWithDelimGet(unique_ptr<LogicalOperator> &op, DelimCandidate &candidate, idx_t depth) {
158
0
  if (op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
159
0
    FindJoinWithDelimGet(op->children[0], candidate, depth + 1);
160
0
  } else if (op->type == LogicalOperatorType::LOGICAL_DELIM_GET) {
161
0
    candidate.delim_get_count++;
162
0
  } else {
163
0
    for (auto &child : op->children) {
164
0
      FindJoinWithDelimGet(child, candidate, depth + 1);
165
0
    }
166
0
  }
167
168
0
  if (op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN &&
169
0
      (OperatorIsDelimGet(*op->children[0]) || OperatorIsDelimGet(*op->children[1]))) {
170
0
    candidate.joins.emplace_back(op, depth);
171
0
  }
172
0
}
173
174
0
static bool ChildJoinTypeCanBeDeliminated(JoinType &join_type) {
175
0
  switch (join_type) {
176
0
  case JoinType::INNER:
177
0
  case JoinType::SEMI:
178
0
    return true;
179
0
  default:
180
0
    return false;
181
0
  }
182
0
}
183
184
bool Deliminator::RemoveJoinWithDelimGet(LogicalComparisonJoin &delim_join, const idx_t delim_get_count,
185
0
                                         unique_ptr<LogicalOperator> &join, bool &all_equality_conditions) {
186
0
  auto &comparison_join = join->Cast<LogicalComparisonJoin>();
187
0
  if (!ChildJoinTypeCanBeDeliminated(comparison_join.join_type)) {
188
0
    return false;
189
0
  }
190
191
  // Get the index (left or right) of the DelimGet side of the join
192
0
  const idx_t delim_idx = OperatorIsDelimGet(*join->children[0]) ? 0 : 1;
193
194
  // Get the filter (if any)
195
0
  optional_ptr<LogicalFilter> filter;
196
0
  vector<unique_ptr<Expression>> filter_expressions;
197
0
  if (join->children[delim_idx]->type == LogicalOperatorType::LOGICAL_FILTER) {
198
0
    filter = &join->children[delim_idx]->Cast<LogicalFilter>();
199
0
    for (auto &expr : filter->expressions) {
200
0
      filter_expressions.emplace_back(expr->Copy());
201
0
    }
202
0
  }
203
204
0
  auto &delim_get = (filter ? filter->children[0] : join->children[delim_idx])->Cast<LogicalDelimGet>();
205
0
  if (comparison_join.conditions.size() != delim_get.chunk_types.size()) {
206
0
    return false; // Joining with DelimGet adds new information
207
0
  }
208
209
  // Check if joining with the DelimGet is redundant, and collect relevant column information
210
0
  ColumnBindingReplacer replacer;
211
0
  auto &replacement_bindings = replacer.replacement_bindings;
212
0
  for (auto &cond : comparison_join.conditions) {
213
0
    if (!cond.IsComparison()) {
214
0
      return false;
215
0
    }
216
0
    all_equality_conditions = all_equality_conditions && IsEqualityJoinCondition(cond);
217
0
    auto &delim_side = delim_idx == 0 ? cond.GetLHS() : cond.GetRHS();
218
0
    auto &other_side = delim_idx == 0 ? cond.GetRHS() : cond.GetLHS();
219
0
    if (delim_side.GetExpressionType() != ExpressionType::BOUND_COLUMN_REF ||
220
0
        other_side.GetExpressionType() != ExpressionType::BOUND_COLUMN_REF) {
221
0
      return false;
222
0
    }
223
0
    auto &delim_colref = delim_side.Cast<BoundColumnRefExpression>();
224
0
    auto &other_colref = other_side.Cast<BoundColumnRefExpression>();
225
0
    replacement_bindings.emplace_back(delim_colref.binding, other_colref.binding);
226
227
    // Only add IS NOT NULL filter for regular equality/inequality comparisons
228
    // Do NOT add for DISTINCT FROM variants, as they handle NULL correctly
229
0
    if (cond.GetComparisonType() != ExpressionType::COMPARE_NOT_DISTINCT_FROM &&
230
0
        cond.GetComparisonType() != ExpressionType::COMPARE_DISTINCT_FROM) {
231
0
      auto is_not_null_expr =
232
0
          make_uniq<BoundOperatorExpression>(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN);
233
0
      is_not_null_expr->children.push_back(other_side.Copy());
234
0
      filter_expressions.push_back(std::move(is_not_null_expr));
235
0
    }
236
0
  }
237
238
0
  if (!all_equality_conditions &&
239
0
      !RemoveInequalityJoinWithDelimGet(delim_join, delim_get_count, join, replacement_bindings)) {
240
0
    return false;
241
0
  }
242
243
0
  unique_ptr<LogicalOperator> replacement_op = std::move(comparison_join.children[1 - delim_idx]);
244
0
  if (!filter_expressions.empty()) { // Create filter if necessary
245
0
    auto new_filter = make_uniq<LogicalFilter>();
246
0
    new_filter->expressions = std::move(filter_expressions);
247
0
    new_filter->children.emplace_back(std::move(replacement_op));
248
0
    replacement_op = std::move(new_filter);
249
0
  }
250
251
0
  join = std::move(replacement_op);
252
253
  // TODO: Maybe go from delim join instead to save work
254
0
  replacer.VisitOperator(*root);
255
0
  return true;
256
0
}
257
258
0
static bool InequalityDelimJoinCanBeEliminated(JoinType &join_type) {
259
0
  return join_type == JoinType::ANTI || join_type == JoinType::MARK || join_type == JoinType::SEMI ||
260
0
         join_type == JoinType::SINGLE;
261
0
}
262
263
bool FindAndReplaceBindings(vector<ColumnBinding> &traced_bindings, const vector<unique_ptr<Expression>> &expressions,
264
0
                            const vector<ColumnBinding> &current_bindings) {
265
0
  for (auto &binding : traced_bindings) {
266
0
    idx_t current_idx;
267
0
    for (current_idx = 0; current_idx < expressions.size(); current_idx++) {
268
0
      if (binding == current_bindings[current_idx]) {
269
0
        break;
270
0
      }
271
0
    }
272
273
0
    if (current_idx == expressions.size() ||
274
0
        expressions[current_idx]->GetExpressionType() != ExpressionType::BOUND_COLUMN_REF) {
275
0
      return false; // Didn't find / can't deal with non-colref
276
0
    }
277
278
0
    auto &colref = expressions[current_idx]->Cast<BoundColumnRefExpression>();
279
0
    binding = colref.binding;
280
0
  }
281
0
  return true;
282
0
}
283
284
bool Deliminator::RemoveInequalityJoinWithDelimGet(LogicalComparisonJoin &delim_join, const idx_t delim_get_count,
285
                                                   unique_ptr<LogicalOperator> &join,
286
0
                                                   const vector<ReplacementBinding> &replacement_bindings) {
287
0
  auto &comparison_join = join->Cast<LogicalComparisonJoin>();
288
0
  auto &delim_conditions = delim_join.conditions;
289
0
  const auto &join_conditions = comparison_join.conditions;
290
0
  if (delim_get_count != 1 || !InequalityDelimJoinCanBeEliminated(delim_join.join_type) ||
291
0
      delim_conditions.size() != join_conditions.size()) {
292
0
    return false;
293
0
  }
294
295
  // TODO: we cannot perform the optimization here because our pure inequality joins don't implement
296
  //  JoinType::SINGLE yet, and JoinType::MARK is a special case
297
0
  if (delim_join.join_type == JoinType::SINGLE || delim_join.join_type == JoinType::MARK) {
298
0
    bool has_one_equality = false;
299
0
    for (auto &cond : join_conditions) {
300
0
      has_one_equality = has_one_equality || IsEqualityJoinCondition(cond);
301
0
    }
302
0
    if (!has_one_equality) {
303
0
      return false;
304
0
    }
305
0
  }
306
307
  // We only support colref's
308
0
  vector<ColumnBinding> traced_bindings;
309
0
  for (const auto &cond : delim_conditions) {
310
0
    if (!cond.IsComparison() || cond.GetRHS().GetExpressionType() != ExpressionType::BOUND_COLUMN_REF) {
311
0
      return false;
312
0
    }
313
0
    auto &colref = cond.GetRHS().Cast<BoundColumnRefExpression>();
314
0
    traced_bindings.emplace_back(colref.binding);
315
0
  }
316
317
  // Now we trace down the bindings to the join (for now, we only trace it through a few operators)
318
0
  reference<LogicalOperator> current_op = *delim_join.children[1];
319
0
  while (&current_op.get() != join.get()) {
320
0
    if (current_op.get().children.size() != 1) {
321
0
      return false;
322
0
    }
323
324
0
    switch (current_op.get().type) {
325
0
    case LogicalOperatorType::LOGICAL_PROJECTION:
326
0
      FindAndReplaceBindings(traced_bindings, current_op.get().expressions, current_op.get().GetColumnBindings());
327
0
      break;
328
0
    case LogicalOperatorType::LOGICAL_FILTER:
329
0
      break; // Doesn't change bindings
330
0
    default:
331
0
      return false;
332
0
    }
333
0
    current_op = *current_op.get().children[0];
334
0
  }
335
336
  // Get the index (left or right) of the DelimGet side of the join
337
0
  const idx_t delim_idx = OperatorIsDelimGet(*join->children[0]) ? 0 : 1;
338
339
0
  bool found_all = true;
340
0
  for (idx_t cond_idx = 0; cond_idx < delim_conditions.size(); cond_idx++) {
341
0
    auto &delim_condition = delim_conditions[cond_idx];
342
0
    if (!delim_condition.IsComparison()) {
343
0
      continue;
344
0
    }
345
0
    const auto &traced_binding = traced_bindings[cond_idx];
346
347
0
    bool found = false;
348
0
    for (auto &join_condition : join_conditions) {
349
0
      auto &delim_side = delim_idx == 0 ? join_condition.GetLHS() : join_condition.GetRHS();
350
0
      auto &colref = delim_side.Cast<BoundColumnRefExpression>();
351
0
      if (colref.binding == traced_binding) {
352
0
        if (!join_condition.IsComparison()) {
353
0
          continue;
354
0
        }
355
0
        auto join_comparison = join_condition.GetComparisonType();
356
0
        auto original_join_comparison = join_condition.GetComparisonType(); // Save original for later check
357
0
        if (delim_condition.GetComparisonType() == ExpressionType::COMPARE_DISTINCT_FROM ||
358
0
            delim_condition.GetComparisonType() == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
359
          // We need to compare NULL values
360
0
          if (join_comparison == ExpressionType::COMPARE_EQUAL) {
361
0
            join_comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
362
0
          } else if (join_comparison == ExpressionType::COMPARE_NOTEQUAL) {
363
0
            join_comparison = ExpressionType::COMPARE_DISTINCT_FROM;
364
0
          } else if (join_comparison != ExpressionType::COMPARE_DISTINCT_FROM &&
365
0
                     join_comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
366
            // The optimization does not work here
367
0
            found = false;
368
0
            break;
369
0
          }
370
0
        }
371
0
        auto left_copy = delim_condition.LeftReference()->Copy();
372
0
        auto right_copy = delim_condition.RightReference()->Copy();
373
374
0
        delim_conditions[cond_idx] = JoinCondition(std::move(left_copy), std::move(right_copy),
375
0
                                                   FlipComparisonExpression(join_comparison));
376
        // join condition was a not equal and filtered out all NULLS.
377
        // DELIM JOIN need to do that for not DELIM_GET side. Easiest way is to change the
378
        // comparison expression type. See duckdb/duckdb#16803
379
        // Only convert if the ORIGINAL join had != or = (not DISTINCT FROM variants)
380
0
        if (delim_join.join_type != JoinType::MARK &&
381
0
            original_join_comparison != ExpressionType::COMPARE_DISTINCT_FROM &&
382
0
            original_join_comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
383
0
          auto final_comparison = delim_conditions[cond_idx].GetComparisonType();
384
0
          if (final_comparison == ExpressionType::COMPARE_DISTINCT_FROM) {
385
0
            final_comparison = ExpressionType::COMPARE_NOTEQUAL;
386
0
          } else if (final_comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
387
0
            final_comparison = ExpressionType::COMPARE_EQUAL;
388
0
          }
389
0
          delim_conditions[cond_idx] =
390
0
              JoinCondition(delim_conditions[cond_idx].LeftReference()->Copy(),
391
0
                            delim_conditions[cond_idx].RightReference()->Copy(), final_comparison);
392
0
        }
393
0
        found = true;
394
0
        break;
395
0
      }
396
0
    }
397
0
    found_all = found_all && found;
398
0
  }
399
400
0
  return found_all;
401
0
}
402
403
0
void Deliminator::TrySwitchSingleToLeft(LogicalComparisonJoin &delim_join) {
404
0
  D_ASSERT(delim_join.join_type == JoinType::SINGLE);
405
406
  // Collect RHS bindings
407
0
  vector<ColumnBinding> join_bindings;
408
0
  for (const auto &cond : delim_join.conditions) {
409
0
    if (!IsEqualityJoinCondition(cond)) {
410
0
      return;
411
0
    }
412
0
    if (!cond.IsComparison() || cond.GetRHS().GetExpressionType() != ExpressionType::BOUND_COLUMN_REF) {
413
0
      return;
414
0
    }
415
0
    auto &colref = cond.GetRHS().Cast<BoundColumnRefExpression>();
416
0
    join_bindings.emplace_back(colref.binding);
417
0
  }
418
419
  // Now try to find an aggr in the RHS such that the join_column_bindings is a superset of the groups
420
0
  reference<LogicalOperator> current_op = *delim_join.children[1];
421
0
  while (current_op.get().type != LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY) {
422
0
    if (current_op.get().children.size() != 1) {
423
0
      return;
424
0
    }
425
426
0
    switch (current_op.get().type) {
427
0
    case LogicalOperatorType::LOGICAL_PROJECTION:
428
0
      FindAndReplaceBindings(join_bindings, current_op.get().expressions, current_op.get().GetColumnBindings());
429
0
      break;
430
0
    case LogicalOperatorType::LOGICAL_FILTER:
431
0
      break; // Doesn't change bindings
432
0
    default:
433
0
      return;
434
0
    }
435
0
    current_op = *current_op.get().children[0];
436
0
  }
437
438
0
  D_ASSERT(current_op.get().type == LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY);
439
0
  const auto &aggr = current_op.get().Cast<LogicalAggregate>();
440
0
  if (!aggr.grouping_functions.empty()) {
441
0
    return;
442
0
  }
443
444
0
  for (idx_t group_idx = 0; group_idx < aggr.groups.size(); group_idx++) {
445
0
    if (std::find(join_bindings.begin(), join_bindings.end(),
446
0
                  ColumnBinding(aggr.group_index, ProjectionIndex(group_idx))) == join_bindings.end()) {
447
0
      return;
448
0
    }
449
0
  }
450
451
0
  delim_join.join_type = JoinType::LEFT;
452
0
}
453
454
} // namespace duckdb