Coverage Report

Created: 2025-10-09 06:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/backend/optimizer/plan/planagg.c
Line
Count
Source
1
/*-------------------------------------------------------------------------
2
 *
3
 * planagg.c
4
 *    Special planning for aggregate queries.
5
 *
6
 * This module tries to replace MIN/MAX aggregate functions by subqueries
7
 * of the form
8
 *    (SELECT col FROM tab
9
 *     WHERE col IS NOT NULL AND existing-quals
10
 *     ORDER BY col ASC/DESC
11
 *     LIMIT 1)
12
 * Given a suitable index on tab.col, this can be much faster than the
13
 * generic scan-all-the-rows aggregation plan.  We can handle multiple
14
 * MIN/MAX aggregates by generating multiple subqueries, and their
15
 * orderings can be different.  However, if the query contains any
16
 * non-optimizable aggregates, there's no point since we'll have to
17
 * scan all the rows anyway.
18
 *
19
 *
20
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
21
 * Portions Copyright (c) 1994, Regents of the University of California
22
 *
23
 *
24
 * IDENTIFICATION
25
 *    src/backend/optimizer/plan/planagg.c
26
 *
27
 *-------------------------------------------------------------------------
28
 */
29
#include "postgres.h"
30
31
#include "access/htup_details.h"
32
#include "catalog/pg_aggregate.h"
33
#include "catalog/pg_type.h"
34
#include "nodes/makefuncs.h"
35
#include "nodes/nodeFuncs.h"
36
#include "optimizer/cost.h"
37
#include "optimizer/optimizer.h"
38
#include "optimizer/pathnode.h"
39
#include "optimizer/paths.h"
40
#include "optimizer/planmain.h"
41
#include "optimizer/planner.h"
42
#include "optimizer/subselect.h"
43
#include "optimizer/tlist.h"
44
#include "parser/parse_clause.h"
45
#include "parser/parsetree.h"
46
#include "rewrite/rewriteManip.h"
47
#include "utils/lsyscache.h"
48
#include "utils/syscache.h"
49
50
static bool can_minmax_aggs(PlannerInfo *root, List **context);
51
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
52
                Oid eqop, Oid sortop, bool reverse_sort,
53
                bool nulls_first);
54
static void minmax_qp_callback(PlannerInfo *root, void *extra);
55
static Oid  fetch_agg_sort_op(Oid aggfnoid);
56
57
58
/*
59
 * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
60
 *
61
 * Check to see whether the query contains MIN/MAX aggregate functions that
62
 * might be optimizable via indexscans.  If it does, and all the aggregates
63
 * are potentially optimizable, then create a MinMaxAggPath and add it to
64
 * the (UPPERREL_GROUP_AGG, NULL) upperrel.
65
 *
66
 * This should be called by grouping_planner() just before it's ready to call
67
 * query_planner(), because we generate indexscan paths by cloning the
68
 * planner's state and invoking query_planner() on a modified version of
69
 * the query parsetree.  Thus, all preprocessing needed before query_planner()
70
 * must already be done.  This relies on the list of aggregates in
71
 * root->agginfos, so preprocess_aggrefs() must have been called already, too.
72
 */
73
void
74
preprocess_minmax_aggregates(PlannerInfo *root)
75
0
{
76
0
  Query    *parse = root->parse;
77
0
  FromExpr   *jtnode;
78
0
  RangeTblRef *rtr;
79
0
  RangeTblEntry *rte;
80
0
  List     *aggs_list;
81
0
  RelOptInfo *grouped_rel;
82
0
  ListCell   *lc;
83
84
  /* minmax_aggs list should be empty at this point */
85
0
  Assert(root->minmax_aggs == NIL);
86
87
  /* Nothing to do if query has no aggregates */
88
0
  if (!parse->hasAggs)
89
0
    return;
90
91
0
  Assert(!parse->setOperations);  /* shouldn't get here if a setop */
92
0
  Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
93
94
  /*
95
   * Reject unoptimizable cases.
96
   *
97
   * We don't handle GROUP BY or windowing, because our current
98
   * implementations of grouping require looking at all the rows anyway, and
99
   * so there's not much point in optimizing MIN/MAX.
100
   */
101
0
  if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
102
0
    parse->hasWindowFuncs)
103
0
    return;
104
105
  /*
106
   * Reject if query contains any CTEs; there's no way to build an indexscan
107
   * on one so we couldn't succeed here.  (If the CTEs are unreferenced,
108
   * that's not true, but it doesn't seem worth expending cycles to check.)
109
   */
110
0
  if (parse->cteList)
111
0
    return;
112
113
  /*
114
   * We also restrict the query to reference exactly one table, since join
115
   * conditions can't be handled reasonably.  (We could perhaps handle a
116
   * query containing cartesian-product joins, but it hardly seems worth the
117
   * trouble.)  However, the single table could be buried in several levels
118
   * of FromExpr due to subqueries.  Note the "single" table could be an
119
   * inheritance parent, too, including the case of a UNION ALL subquery
120
   * that's been flattened to an appendrel.
121
   */
122
0
  jtnode = parse->jointree;
123
0
  while (IsA(jtnode, FromExpr))
124
0
  {
125
0
    if (list_length(jtnode->fromlist) != 1)
126
0
      return;
127
0
    jtnode = linitial(jtnode->fromlist);
128
0
  }
129
0
  if (!IsA(jtnode, RangeTblRef))
130
0
    return;
131
0
  rtr = (RangeTblRef *) jtnode;
132
0
  rte = planner_rt_fetch(rtr->rtindex, root);
133
0
  if (rte->rtekind == RTE_RELATION)
134
0
     /* ordinary relation, ok */ ;
135
0
  else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
136
0
     /* flattened UNION ALL subquery, ok */ ;
137
0
  else
138
0
    return;
139
140
  /*
141
   * Examine all the aggregates and verify all are MIN/MAX aggregates.  Stop
142
   * as soon as we find one that isn't.
143
   */
144
0
  aggs_list = NIL;
145
0
  if (!can_minmax_aggs(root, &aggs_list))
146
0
    return;
147
148
  /*
149
   * OK, there is at least the possibility of performing the optimization.
150
   * Build an access path for each aggregate.  If any of the aggregates
151
   * prove to be non-indexable, give up; there is no point in optimizing
152
   * just some of them.
153
   */
154
0
  foreach(lc, aggs_list)
155
0
  {
156
0
    MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
157
0
    Oid     eqop;
158
0
    bool    reverse;
159
160
    /*
161
     * We'll need the equality operator that goes with the aggregate's
162
     * ordering operator.
163
     */
164
0
    eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
165
0
    if (!OidIsValid(eqop)) /* shouldn't happen */
166
0
      elog(ERROR, "could not find equality operator for ordering operator %u",
167
0
         mminfo->aggsortop);
168
169
    /*
170
     * We can use either an ordering that gives NULLS FIRST or one that
171
     * gives NULLS LAST; furthermore there's unlikely to be much
172
     * performance difference between them, so it doesn't seem worth
173
     * costing out both ways if we get a hit on the first one.  NULLS
174
     * FIRST is more likely to be available if the operator is a
175
     * reverse-sort operator, so try that first if reverse.
176
     */
177
0
    if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, reverse))
178
0
      continue;
179
0
    if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, !reverse))
180
0
      continue;
181
182
    /* No indexable path for this aggregate, so fail */
183
0
    return;
184
0
  }
185
186
  /*
187
   * OK, we can do the query this way.  Prepare to create a MinMaxAggPath
188
   * node.
189
   *
190
   * First, create an output Param node for each agg.  (If we end up not
191
   * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
192
   * which is not worth worrying about.  We can't wait till create_plan time
193
   * to decide whether to make the Param, unfortunately.)
194
   */
195
0
  foreach(lc, aggs_list)
196
0
  {
197
0
    MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
198
199
0
    mminfo->param =
200
0
      SS_make_initplan_output_param(root,
201
0
                      exprType((Node *) mminfo->target),
202
0
                      -1,
203
0
                      exprCollation((Node *) mminfo->target));
204
0
  }
205
206
  /*
207
   * Create a MinMaxAggPath node with the appropriate estimated costs and
208
   * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
209
   * it will compete against the standard aggregate implementation.  (It
210
   * will likely always win, but we need not assume that here.)
211
   *
212
   * Note: grouping_planner won't have created this upperrel yet, but it's
213
   * fine for us to create it first.  We will not have inserted the correct
214
   * consider_parallel value in it, but MinMaxAggPath paths are currently
215
   * never parallel-safe anyway, so that doesn't matter.  Likewise, it
216
   * doesn't matter that we haven't filled FDW-related fields in the rel.
217
   * Also, because there are no rowmarks, we know that the processed_tlist
218
   * doesn't need to change anymore, so making the pathtarget now is safe.
219
   */
220
0
  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
221
0
  add_path(grouped_rel, (Path *)
222
0
       create_minmaxagg_path(root, grouped_rel,
223
0
                   create_pathtarget(root,
224
0
                           root->processed_tlist),
225
0
                   aggs_list,
226
0
                   (List *) parse->havingQual));
227
0
}
228
229
/*
230
 * can_minmax_aggs
231
 *    Examine all the aggregates in the query, and check if they are
232
 *    all MIN/MAX aggregates.  If so, build a list of MinMaxAggInfo
233
 *    nodes for them.
234
 *
235
 * Returns false if a non-MIN/MAX aggregate is found, true otherwise.
236
 */
237
static bool
238
can_minmax_aggs(PlannerInfo *root, List **context)
239
0
{
240
0
  ListCell   *lc;
241
242
  /*
243
   * This function used to have to scan the query for itself, but now we can
244
   * just thumb through the AggInfo list made by preprocess_aggrefs.
245
   */
246
0
  foreach(lc, root->agginfos)
247
0
  {
248
0
    AggInfo    *agginfo = lfirst_node(AggInfo, lc);
249
0
    Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
250
0
    Oid     aggsortop;
251
0
    TargetEntry *curTarget;
252
0
    MinMaxAggInfo *mminfo;
253
254
0
    Assert(aggref->agglevelsup == 0);
255
0
    if (list_length(aggref->args) != 1)
256
0
      return false;   /* it couldn't be MIN/MAX */
257
258
    /*
259
     * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
260
     * outcome if the aggsortop's operator class recognizes non-identical
261
     * values as equal.  For example, 4.0 and 4.00 are equal according to
262
     * numeric_ops, yet distinguishable.  If MIN() receives more than one
263
     * value equal to 4.0 and no value less than 4.0, it is unspecified
264
     * which of those equal values MIN() returns.  An ORDER BY expression
265
     * that differs for each of those equal values of the argument
266
     * expression makes the result predictable once again.  This is a
267
     * niche requirement, and we do not implement it with subquery paths.
268
     * In any case, this test lets us reject ordered-set aggregates
269
     * quickly.
270
     */
271
0
    if (aggref->aggorder != NIL)
272
0
      return false;
273
    /* note: we do not care if DISTINCT is mentioned ... */
274
275
    /*
276
     * We might implement the optimization when a FILTER clause is present
277
     * by adding the filter to the quals of the generated subquery.  For
278
     * now, just punt.
279
     */
280
0
    if (aggref->aggfilter != NULL)
281
0
      return false;
282
283
0
    aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
284
0
    if (!OidIsValid(aggsortop))
285
0
      return false;   /* not a MIN/MAX aggregate */
286
287
0
    curTarget = (TargetEntry *) linitial(aggref->args);
288
289
0
    if (contain_mutable_functions((Node *) curTarget->expr))
290
0
      return false;   /* not potentially indexable */
291
292
0
    if (type_is_rowtype(exprType((Node *) curTarget->expr)))
293
0
      return false;   /* IS NOT NULL would have weird semantics */
294
295
0
    mminfo = makeNode(MinMaxAggInfo);
296
0
    mminfo->aggfnoid = aggref->aggfnoid;
297
0
    mminfo->aggsortop = aggsortop;
298
0
    mminfo->target = curTarget->expr;
299
0
    mminfo->subroot = NULL; /* don't compute path yet */
300
0
    mminfo->path = NULL;
301
0
    mminfo->pathcost = 0;
302
0
    mminfo->param = NULL;
303
304
0
    *context = lappend(*context, mminfo);
305
0
  }
306
0
  return true;
307
0
}
308
309
/*
310
 * build_minmax_path
311
 *    Given a MIN/MAX aggregate, try to build an indexscan Path it can be
312
 *    optimized with.
313
 *
314
 * If successful, stash the best path in *mminfo and return true.
315
 * Otherwise, return false.
316
 */
317
static bool
318
build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
319
          Oid eqop, Oid sortop, bool reverse_sort, bool nulls_first)
320
0
{
321
0
  PlannerInfo *subroot;
322
0
  Query    *parse;
323
0
  TargetEntry *tle;
324
0
  List     *tlist;
325
0
  NullTest   *ntest;
326
0
  SortGroupClause *sortcl;
327
0
  RelOptInfo *final_rel;
328
0
  Path     *sorted_path;
329
0
  Cost    path_cost;
330
0
  double    path_fraction;
331
332
  /*
333
   * We are going to construct what is effectively a sub-SELECT query, so
334
   * clone the current query level's state and adjust it to make it look
335
   * like a subquery.  Any outer references will now be one level higher
336
   * than before.  (This means that when we are done, there will be no Vars
337
   * of level 1, which is why the subquery can become an initplan.)
338
   */
339
0
  subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
340
0
  memcpy(subroot, root, sizeof(PlannerInfo));
341
0
  subroot->query_level++;
342
0
  subroot->parent_root = root;
343
0
  subroot->plan_name = choose_plan_name(root->glob, "minmax", true);
344
345
  /* reset subplan-related stuff */
346
0
  subroot->plan_params = NIL;
347
0
  subroot->outer_params = NULL;
348
0
  subroot->init_plans = NIL;
349
0
  subroot->agginfos = NIL;
350
0
  subroot->aggtransinfos = NIL;
351
352
0
  subroot->parse = parse = copyObject(root->parse);
353
0
  IncrementVarSublevelsUp((Node *) parse, 1, 1);
354
355
  /* append_rel_list might contain outer Vars? */
356
0
  subroot->append_rel_list = copyObject(root->append_rel_list);
357
0
  IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
358
  /* There shouldn't be any OJ info to translate, as yet */
359
0
  Assert(subroot->join_info_list == NIL);
360
  /* and we haven't made equivalence classes, either */
361
0
  Assert(subroot->eq_classes == NIL);
362
  /* and we haven't created PlaceHolderInfos, either */
363
0
  Assert(subroot->placeholder_list == NIL);
364
365
  /*----------
366
   * Generate modified query of the form
367
   *    (SELECT col FROM tab
368
   *     WHERE col IS NOT NULL AND existing-quals
369
   *     ORDER BY col ASC/DESC
370
   *     LIMIT 1)
371
   *----------
372
   */
373
  /* single tlist entry that is the aggregate target */
374
0
  tle = makeTargetEntry(copyObject(mminfo->target),
375
0
              (AttrNumber) 1,
376
0
              pstrdup("agg_target"),
377
0
              false);
378
0
  tlist = list_make1(tle);
379
0
  subroot->processed_tlist = parse->targetList = tlist;
380
381
  /* No HAVING, no DISTINCT, no aggregates anymore */
382
0
  parse->havingQual = NULL;
383
0
  subroot->hasHavingQual = false;
384
0
  parse->distinctClause = NIL;
385
0
  parse->hasDistinctOn = false;
386
0
  parse->hasAggs = false;
387
388
  /* Build "target IS NOT NULL" expression */
389
0
  ntest = makeNode(NullTest);
390
0
  ntest->nulltesttype = IS_NOT_NULL;
391
0
  ntest->arg = copyObject(mminfo->target);
392
  /* we checked it wasn't a rowtype in can_minmax_aggs */
393
0
  ntest->argisrow = false;
394
0
  ntest->location = -1;
395
396
  /* User might have had that in WHERE already */
397
0
  if (!list_member((List *) parse->jointree->quals, ntest))
398
0
    parse->jointree->quals = (Node *)
399
0
      lcons(ntest, (List *) parse->jointree->quals);
400
401
  /* Build suitable ORDER BY clause */
402
0
  sortcl = makeNode(SortGroupClause);
403
0
  sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
404
0
  sortcl->eqop = eqop;
405
0
  sortcl->sortop = sortop;
406
0
  sortcl->reverse_sort = reverse_sort;
407
0
  sortcl->nulls_first = nulls_first;
408
0
  sortcl->hashable = false;  /* no need to make this accurate */
409
0
  parse->sortClause = list_make1(sortcl);
410
411
  /* set up expressions for LIMIT 1 */
412
0
  parse->limitOffset = NULL;
413
0
  parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
414
0
                       sizeof(int64),
415
0
                       Int64GetDatum(1), false,
416
0
                       true);
417
418
  /*
419
   * Generate the best paths for this query, telling query_planner that we
420
   * have LIMIT 1.
421
   */
422
0
  subroot->tuple_fraction = 1.0;
423
0
  subroot->limit_tuples = 1.0;
424
425
0
  final_rel = query_planner(subroot, minmax_qp_callback, NULL);
426
427
  /*
428
   * Since we didn't go through subquery_planner() to handle the subquery,
429
   * we have to do some of the same cleanup it would do, in particular cope
430
   * with params and initplans used within this subquery.  (This won't
431
   * matter if we end up not using the subplan.)
432
   */
433
0
  SS_identify_outer_params(subroot);
434
0
  SS_charge_for_initplans(subroot, final_rel);
435
436
  /*
437
   * Get the best presorted path, that being the one that's cheapest for
438
   * fetching just one row.  If there's no such path, fail.
439
   */
440
0
  if (final_rel->rows > 1.0)
441
0
    path_fraction = 1.0 / final_rel->rows;
442
0
  else
443
0
    path_fraction = 1.0;
444
445
0
  sorted_path =
446
0
    get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
447
0
                          subroot->query_pathkeys,
448
0
                          NULL,
449
0
                          path_fraction);
450
0
  if (!sorted_path)
451
0
    return false;
452
453
  /*
454
   * The path might not return exactly what we want, so fix that.  (We
455
   * assume that this won't change any conclusions about which was the
456
   * cheapest path.)
457
   */
458
0
  sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
459
0
                       create_pathtarget(subroot,
460
0
                               subroot->processed_tlist));
461
462
  /*
463
   * Determine cost to get just the first row of the presorted path.
464
   *
465
   * Note: cost calculation here should match
466
   * compare_fractional_path_costs().
467
   */
468
0
  path_cost = sorted_path->startup_cost +
469
0
    path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
470
471
  /* Save state for further processing */
472
0
  mminfo->subroot = subroot;
473
0
  mminfo->path = sorted_path;
474
0
  mminfo->pathcost = path_cost;
475
476
0
  return true;
477
0
}
478
479
/*
480
 * Compute query_pathkeys and other pathkeys during query_planner()
481
 */
482
static void
483
minmax_qp_callback(PlannerInfo *root, void *extra)
484
0
{
485
0
  root->group_pathkeys = NIL;
486
0
  root->window_pathkeys = NIL;
487
0
  root->distinct_pathkeys = NIL;
488
489
0
  root->sort_pathkeys =
490
0
    make_pathkeys_for_sortclauses(root,
491
0
                    root->parse->sortClause,
492
0
                    root->parse->targetList);
493
494
0
  root->query_pathkeys = root->sort_pathkeys;
495
0
}
496
497
/*
498
 * Get the OID of the sort operator, if any, associated with an aggregate.
499
 * Returns InvalidOid if there is no such operator.
500
 */
501
static Oid
502
fetch_agg_sort_op(Oid aggfnoid)
503
0
{
504
0
  HeapTuple aggTuple;
505
0
  Form_pg_aggregate aggform;
506
0
  Oid     aggsortop;
507
508
  /* fetch aggregate entry from pg_aggregate */
509
0
  aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
510
0
  if (!HeapTupleIsValid(aggTuple))
511
0
    return InvalidOid;
512
0
  aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
513
0
  aggsortop = aggform->aggsortop;
514
0
  ReleaseSysCache(aggTuple);
515
516
0
  return aggsortop;
517
0
}