/src/postgres/src/backend/optimizer/plan/planagg.c

Source (jump to first uncovered line)
/*-------------------------------------------------------------------------
 *
 * planagg.c
 *    Special planning for aggregate queries.
 *
 * This module tries to replace MIN/MAX aggregate functions by subqueries
 * of the form
 *    (SELECT col FROM tab
 *     WHERE col IS NOT NULL AND existing-quals
 *     ORDER BY col ASC/DESC
 *     LIMIT 1)
 * Given a suitable index on tab.col, this can be much faster than the
 * generic scan-all-the-rows aggregation plan.  We can handle multiple
 * MIN/MAX aggregates by generating multiple subqueries, and their
 * orderings can be different.  However, if the query contains any
 * non-optimizable aggregates, there's no point since we'll have to
 * scan all the rows anyway.
 *
 *
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *    src/backend/optimizer/plan/planagg.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/htup_details.h"
#include "catalog/pg_aggregate.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/subselect.h"
#include "optimizer/tlist.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"

static bool can_minmax_aggs(PlannerInfo *root, List **context);
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
                Oid eqop, Oid sortop, bool reverse_sort,
                bool nulls_first);
static void minmax_qp_callback(PlannerInfo *root, void *extra);
static Oid  fetch_agg_sort_op(Oid aggfnoid);


/*
 * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
 *
 * Check to see whether the query contains MIN/MAX aggregate functions that
 * might be optimizable via indexscans.  If it does, and all the aggregates
 * are potentially optimizable, then create a MinMaxAggPath and add it to
 * the (UPPERREL_GROUP_AGG, NULL) upperrel.
 *
 * This should be called by grouping_planner() just before it's ready to call
 * query_planner(), because we generate indexscan paths by cloning the
 * planner's state and invoking query_planner() on a modified version of
 * the query parsetree.  Thus, all preprocessing needed before query_planner()
 * must already be done.  This relies on the list of aggregates in
 * root->agginfos, so preprocess_aggrefs() must have been called already, too.
 */
void
preprocess_minmax_aggregates(PlannerInfo *root)
{
  Query    *parse = root->parse;
  FromExpr   *jtnode;
  RangeTblRef *rtr;
  RangeTblEntry *rte;
  List     *aggs_list;
  RelOptInfo *grouped_rel;
  ListCell   *lc;

  /* minmax_aggs list should be empty at this point */
  Assert(root->minmax_aggs == NIL);

  /* Nothing to do if query has no aggregates */
  if (!parse->hasAggs)
    return;

  Assert(!parse->setOperations);  /* shouldn't get here if a setop */
  Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */

  /*
   * Reject unoptimizable cases.
   *
   * We don't handle GROUP BY or windowing, because our current
   * implementations of grouping require looking at all the rows anyway, and
   * so there's not much point in optimizing MIN/MAX.
   */
  if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
    parse->hasWindowFuncs)
    return;

  /*
   * Reject if query contains any CTEs; there's no way to build an indexscan
   * on one so we couldn't succeed here.  (If the CTEs are unreferenced,
   * that's not true, but it doesn't seem worth expending cycles to check.)
   */
  if (parse->cteList)
    return;

  /*
   * We also restrict the query to reference exactly one table, since join
   * conditions can't be handled reasonably.  (We could perhaps handle a
   * query containing cartesian-product joins, but it hardly seems worth the
   * trouble.)  However, the single table could be buried in several levels
   * of FromExpr due to subqueries.  Note the "single" table could be an
   * inheritance parent, too, including the case of a UNION ALL subquery
   * that's been flattened to an appendrel.
   */
  jtnode = parse->jointree;
  while (IsA(jtnode, FromExpr))
  {
    if (list_length(jtnode->fromlist) != 1)
      return;
    jtnode = linitial(jtnode->fromlist);
  }
  if (!IsA(jtnode, RangeTblRef))
    return;
  rtr = (RangeTblRef *) jtnode;
  rte = planner_rt_fetch(rtr->rtindex, root);
  if (rte->rtekind == RTE_RELATION)
     /* ordinary relation, ok */ ;
  else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
     /* flattened UNION ALL subquery, ok */ ;
  else
    return;

  /*
   * Examine all the aggregates and verify all are MIN/MAX aggregates.  Stop
   * as soon as we find one that isn't.
   */
  aggs_list = NIL;
  if (!can_minmax_aggs(root, &aggs_list))
    return;

  /*
   * OK, there is at least the possibility of performing the optimization.
   * Build an access path for each aggregate.  If any of the aggregates
   * prove to be non-indexable, give up; there is no point in optimizing
   * just some of them.
   */
  foreach(lc, aggs_list)
  {
    MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
    Oid     eqop;
    bool    reverse;

    /*
     * We'll need the equality operator that goes with the aggregate's
     * ordering operator.
     */
    eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
    if (!OidIsValid(eqop)) /* shouldn't happen */
      elog(ERROR, "could not find equality operator for ordering operator %u",
         mminfo->aggsortop);

    /*
     * We can use either an ordering that gives NULLS FIRST or one that
     * gives NULLS LAST; furthermore there's unlikely to be much
     * performance difference between them, so it doesn't seem worth
     * costing out both ways if we get a hit on the first one.  NULLS
     * FIRST is more likely to be available if the operator is a
     * reverse-sort operator, so try that first if reverse.
     */
    if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, reverse))
      continue;
    if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, !reverse))
      continue;

    /* No indexable path for this aggregate, so fail */
    return;
  }

  /*
   * OK, we can do the query this way.  Prepare to create a MinMaxAggPath
   * node.
   *
   * First, create an output Param node for each agg.  (If we end up not
   * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
   * which is not worth worrying about.  We can't wait till create_plan time
   * to decide whether to make the Param, unfortunately.)
   */
  foreach(lc, aggs_list)
  {
    MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);

    mminfo->param =
      SS_make_initplan_output_param(root,
                      exprType((Node *) mminfo->target),
                      -1,
                      exprCollation((Node *) mminfo->target));
  }

  /*
   * Create a MinMaxAggPath node with the appropriate estimated costs and
   * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
   * it will compete against the standard aggregate implementation.  (It
   * will likely always win, but we need not assume that here.)
   *
   * Note: grouping_planner won't have created this upperrel yet, but it's
   * fine for us to create it first.  We will not have inserted the correct
   * consider_parallel value in it, but MinMaxAggPath paths are currently
   * never parallel-safe anyway, so that doesn't matter.  Likewise, it
   * doesn't matter that we haven't filled FDW-related fields in the rel.
   * Also, because there are no rowmarks, we know that the processed_tlist
   * doesn't need to change anymore, so making the pathtarget now is safe.
   */
  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
  add_path(grouped_rel, (Path *)
       create_minmaxagg_path(root, grouped_rel,
                   create_pathtarget(root,
                           root->processed_tlist),
                   aggs_list,
                   (List *) parse->havingQual));
}

/*
 * can_minmax_aggs
 *    Examine all the aggregates in the query, and check if they are
 *    all MIN/MAX aggregates.  If so, build a list of MinMaxAggInfo
 *    nodes for them.
 *
 * Returns false if a non-MIN/MAX aggregate is found, true otherwise.
 */
static bool
can_minmax_aggs(PlannerInfo *root, List **context)
{
  ListCell   *lc;

  /*
   * This function used to have to scan the query for itself, but now we can
   * just thumb through the AggInfo list made by preprocess_aggrefs.
   */
  foreach(lc, root->agginfos)
  {
    AggInfo    *agginfo = lfirst_node(AggInfo, lc);
    Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
    Oid     aggsortop;
    TargetEntry *curTarget;
    MinMaxAggInfo *mminfo;

    Assert(aggref->agglevelsup == 0);
    if (list_length(aggref->args) != 1)
      return false;   /* it couldn't be MIN/MAX */

    /*
     * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
     * outcome if the aggsortop's operator class recognizes non-identical
     * values as equal.  For example, 4.0 and 4.00 are equal according to
     * numeric_ops, yet distinguishable.  If MIN() receives more than one
     * value equal to 4.0 and no value less than 4.0, it is unspecified
     * which of those equal values MIN() returns.  An ORDER BY expression
     * that differs for each of those equal values of the argument
     * expression makes the result predictable once again.  This is a
     * niche requirement, and we do not implement it with subquery paths.
     * In any case, this test lets us reject ordered-set aggregates
     * quickly.
     */
    if (aggref->aggorder != NIL)
      return false;
    /* note: we do not care if DISTINCT is mentioned ... */

    /*
     * We might implement the optimization when a FILTER clause is present
     * by adding the filter to the quals of the generated subquery.  For
     * now, just punt.
     */
    if (aggref->aggfilter != NULL)
      return false;

    aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
    if (!OidIsValid(aggsortop))
      return false;   /* not a MIN/MAX aggregate */

    curTarget = (TargetEntry *) linitial(aggref->args);

    if (contain_mutable_functions((Node *) curTarget->expr))
      return false;   /* not potentially indexable */

    if (type_is_rowtype(exprType((Node *) curTarget->expr)))
      return false;   /* IS NOT NULL would have weird semantics */

    mminfo = makeNode(MinMaxAggInfo);
    mminfo->aggfnoid = aggref->aggfnoid;
    mminfo->aggsortop = aggsortop;
    mminfo->target = curTarget->expr;
    mminfo->subroot = NULL; /* don't compute path yet */
    mminfo->path = NULL;
    mminfo->pathcost = 0;
    mminfo->param = NULL;

    *context = lappend(*context, mminfo);
  }
  return true;
}

/*
 * build_minmax_path
 *    Given a MIN/MAX aggregate, try to build an indexscan Path it can be
 *    optimized with.
 *
 * If successful, stash the best path in *mminfo and return true.
 * Otherwise, return false.
 */
static bool
build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
          Oid eqop, Oid sortop, bool reverse_sort, bool nulls_first)
{
  PlannerInfo *subroot;
  Query    *parse;
  TargetEntry *tle;
  List     *tlist;
  NullTest   *ntest;
  SortGroupClause *sortcl;
  RelOptInfo *final_rel;
  Path     *sorted_path;
  Cost    path_cost;
  double    path_fraction;

  /*
   * We are going to construct what is effectively a sub-SELECT query, so
   * clone the current query level's state and adjust it to make it look
   * like a subquery.  Any outer references will now be one level higher
   * than before.  (This means that when we are done, there will be no Vars
   * of level 1, which is why the subquery can become an initplan.)
   */
  subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
  memcpy(subroot, root, sizeof(PlannerInfo));
  subroot->query_level++;
  subroot->parent_root = root;
  /* reset subplan-related stuff */
  subroot->plan_params = NIL;
  subroot->outer_params = NULL;
  subroot->init_plans = NIL;
  subroot->agginfos = NIL;
  subroot->aggtransinfos = NIL;

  subroot->parse = parse = copyObject(root->parse);
  IncrementVarSublevelsUp((Node *) parse, 1, 1);

  /* append_rel_list might contain outer Vars? */
  subroot->append_rel_list = copyObject(root->append_rel_list);
  IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
  /* There shouldn't be any OJ info to translate, as yet */
  Assert(subroot->join_info_list == NIL);
  /* and we haven't made equivalence classes, either */
  Assert(subroot->eq_classes == NIL);
  /* and we haven't created PlaceHolderInfos, either */
  Assert(subroot->placeholder_list == NIL);

  /*----------
   * Generate modified query of the form
   *    (SELECT col FROM tab
   *     WHERE col IS NOT NULL AND existing-quals
   *     ORDER BY col ASC/DESC
   *     LIMIT 1)
   *----------
   */
  /* single tlist entry that is the aggregate target */
  tle = makeTargetEntry(copyObject(mminfo->target),
              (AttrNumber) 1,
              pstrdup("agg_target"),
              false);
  tlist = list_make1(tle);
  subroot->processed_tlist = parse->targetList = tlist;

  /* No HAVING, no DISTINCT, no aggregates anymore */
  parse->havingQual = NULL;
  subroot->hasHavingQual = false;
  parse->distinctClause = NIL;
  parse->hasDistinctOn = false;
  parse->hasAggs = false;

  /* Build "target IS NOT NULL" expression */
  ntest = makeNode(NullTest);
  ntest->nulltesttype = IS_NOT_NULL;
  ntest->arg = copyObject(mminfo->target);
  /* we checked it wasn't a rowtype in can_minmax_aggs */
  ntest->argisrow = false;
  ntest->location = -1;

  /* User might have had that in WHERE already */
  if (!list_member((List *) parse->jointree->quals, ntest))
    parse->jointree->quals = (Node *)
      lcons(ntest, (List *) parse->jointree->quals);

  /* Build suitable ORDER BY clause */
  sortcl = makeNode(SortGroupClause);
  sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
  sortcl->eqop = eqop;
  sortcl->sortop = sortop;
  sortcl->reverse_sort = reverse_sort;
  sortcl->nulls_first = nulls_first;
  sortcl->hashable = false; /* no need to make this accurate */
  parse->sortClause = list_make1(sortcl);

  /* set up expressions for LIMIT 1 */
  parse->limitOffset = NULL;
  parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
                       sizeof(int64),
                       Int64GetDatum(1), false,
                       FLOAT8PASSBYVAL);

  /*
   * Generate the best paths for this query, telling query_planner that we
   * have LIMIT 1.
   */
  subroot->tuple_fraction = 1.0;
  subroot->limit_tuples = 1.0;

  final_rel = query_planner(subroot, minmax_qp_callback, NULL);

  /*
   * Since we didn't go through subquery_planner() to handle the subquery,
   * we have to do some of the same cleanup it would do, in particular cope
   * with params and initplans used within this subquery.  (This won't
   * matter if we end up not using the subplan.)
   */
  SS_identify_outer_params(subroot);
  SS_charge_for_initplans(subroot, final_rel);

  /*
   * Get the best presorted path, that being the one that's cheapest for
   * fetching just one row.  If there's no such path, fail.
   */
  if (final_rel->rows > 1.0)
    path_fraction = 1.0 / final_rel->rows;
  else
    path_fraction = 1.0;

  sorted_path =
    get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
                          subroot->query_pathkeys,
                          NULL,
                          path_fraction);
  if (!sorted_path)
    return false;

  /*
   * The path might not return exactly what we want, so fix that.  (We
   * assume that this won't change any conclusions about which was the
   * cheapest path.)
   */
  sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
                       create_pathtarget(subroot,
                               subroot->processed_tlist));

  /*
   * Determine cost to get just the first row of the presorted path.
   *
   * Note: cost calculation here should match
   * compare_fractional_path_costs().
   */
  path_cost = sorted_path->startup_cost +
    path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);

  /* Save state for further processing */
  mminfo->subroot = subroot;
  mminfo->path = sorted_path;
  mminfo->pathcost = path_cost;

  return true;
}

/*
 * Compute query_pathkeys and other pathkeys during query_planner()
 */
static void
minmax_qp_callback(PlannerInfo *root, void *extra)
{
  root->group_pathkeys = NIL;
  root->window_pathkeys = NIL;
  root->distinct_pathkeys = NIL;

  root->sort_pathkeys =
    make_pathkeys_for_sortclauses(root,
                    root->parse->sortClause,
                    root->parse->targetList);

  root->query_pathkeys = root->sort_pathkeys;
}

/*
 * Get the OID of the sort operator, if any, associated with an aggregate.
 * Returns InvalidOid if there is no such operator.
 */
static Oid
fetch_agg_sort_op(Oid aggfnoid)
{
  HeapTuple aggTuple;
  Form_pg_aggregate aggform;
  Oid     aggsortop;

  /* fetch aggregate entry from pg_aggregate */
  aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
  if (!HeapTupleIsValid(aggTuple))
    return InvalidOid;
  aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
  aggsortop = aggform->aggsortop;
  ReleaseSysCache(aggTuple);

  return aggsortop;
}

Coverage Report

Created: 2025-07-03 06:49

Line	Count	Source (jump to first uncovered line)
1		/*-------------------------------------------------------------------------
2		*
3		* planagg.c
4		* Special planning for aggregate queries.
5		*
6		* This module tries to replace MIN/MAX aggregate functions by subqueries
7		* of the form
8		* (SELECT col FROM tab
9		* WHERE col IS NOT NULL AND existing-quals
10		* ORDER BY col ASC/DESC
11		* LIMIT 1)
12		* Given a suitable index on tab.col, this can be much faster than the
13		* generic scan-all-the-rows aggregation plan. We can handle multiple
14		* MIN/MAX aggregates by generating multiple subqueries, and their
15		* orderings can be different. However, if the query contains any
16		* non-optimizable aggregates, there's no point since we'll have to
17		* scan all the rows anyway.
18		*
19		*
20		* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
21		* Portions Copyright (c) 1994, Regents of the University of California
22		*
23		*
24		* IDENTIFICATION
25		* src/backend/optimizer/plan/planagg.c
26		*
27		*-------------------------------------------------------------------------
28		*/
29		#include "postgres.h"
30
31		#include "access/htup_details.h"
32		#include "catalog/pg_aggregate.h"
33		#include "catalog/pg_type.h"
34		#include "nodes/makefuncs.h"
35		#include "nodes/nodeFuncs.h"
36		#include "optimizer/cost.h"
37		#include "optimizer/optimizer.h"
38		#include "optimizer/pathnode.h"
39		#include "optimizer/paths.h"
40		#include "optimizer/planmain.h"
41		#include "optimizer/subselect.h"
42		#include "optimizer/tlist.h"
43		#include "parser/parse_clause.h"
44		#include "parser/parsetree.h"
45		#include "rewrite/rewriteManip.h"
46		#include "utils/lsyscache.h"
47		#include "utils/syscache.h"
48
49		static bool can_minmax_aggs(PlannerInfo root, List *context);
50		static bool build_minmax_path(PlannerInfo root, MinMaxAggInfo mminfo,
51		Oid eqop, Oid sortop, bool reverse_sort,
52		bool nulls_first);
53		static void minmax_qp_callback(PlannerInfo root, void extra);
54		static Oid fetch_agg_sort_op(Oid aggfnoid);
55
56
57		/*
58		* preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
59		*
60		* Check to see whether the query contains MIN/MAX aggregate functions that
61		* might be optimizable via indexscans. If it does, and all the aggregates
62		* are potentially optimizable, then create a MinMaxAggPath and add it to
63		* the (UPPERREL_GROUP_AGG, NULL) upperrel.
64		*
65		* This should be called by grouping_planner() just before it's ready to call
66		* query_planner(), because we generate indexscan paths by cloning the
67		* planner's state and invoking query_planner() on a modified version of
68		* the query parsetree. Thus, all preprocessing needed before query_planner()
69		* must already be done. This relies on the list of aggregates in
70		* root->agginfos, so preprocess_aggrefs() must have been called already, too.
71		*/
72		void
73		preprocess_minmax_aggregates(PlannerInfo *root)
74	0	{
75	0	Query *parse = root->parse;
76	0	FromExpr *jtnode;
77	0	RangeTblRef *rtr;
78	0	RangeTblEntry *rte;
79	0	List *aggs_list;
80	0	RelOptInfo *grouped_rel;
81	0	ListCell *lc;
82
83		/* minmax_aggs list should be empty at this point */
84	0	Assert(root->minmax_aggs == NIL);
85
86		/* Nothing to do if query has no aggregates */
87	0	if (!parse->hasAggs)
88	0	return;
89
90	0	Assert(!parse->setOperations); /* shouldn't get here if a setop */
91	0	Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
92
93		/*
94		* Reject unoptimizable cases.
95		*
96		* We don't handle GROUP BY or windowing, because our current
97		* implementations of grouping require looking at all the rows anyway, and
98		* so there's not much point in optimizing MIN/MAX.
99		*/
100	0	if (parse->groupClause \|\| list_length(parse->groupingSets) > 1 \|\|
101	0	parse->hasWindowFuncs)
102	0	return;
103
104		/*
105		* Reject if query contains any CTEs; there's no way to build an indexscan
106		* on one so we couldn't succeed here. (If the CTEs are unreferenced,
107		* that's not true, but it doesn't seem worth expending cycles to check.)
108		*/
109	0	if (parse->cteList)
110	0	return;
111
112		/*
113		* We also restrict the query to reference exactly one table, since join
114		* conditions can't be handled reasonably. (We could perhaps handle a
115		* query containing cartesian-product joins, but it hardly seems worth the
116		* trouble.) However, the single table could be buried in several levels
117		* of FromExpr due to subqueries. Note the "single" table could be an
118		* inheritance parent, too, including the case of a UNION ALL subquery
119		* that's been flattened to an appendrel.
120		*/
121	0	jtnode = parse->jointree;
122	0	while (IsA(jtnode, FromExpr))
123	0	{
124	0	if (list_length(jtnode->fromlist) != 1)
125	0	return;
126	0	jtnode = linitial(jtnode->fromlist);
127	0	}
128	0	if (!IsA(jtnode, RangeTblRef))
129	0	return;
130	0	rtr = (RangeTblRef *) jtnode;
131	0	rte = planner_rt_fetch(rtr->rtindex, root);
132	0	if (rte->rtekind == RTE_RELATION)
133	0	/* ordinary relation, ok */ ;
134	0	else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
135	0	/* flattened UNION ALL subquery, ok */ ;
136	0	else
137	0	return;
138
139		/*
140		* Examine all the aggregates and verify all are MIN/MAX aggregates. Stop
141		* as soon as we find one that isn't.
142		*/
143	0	aggs_list = NIL;
144	0	if (!can_minmax_aggs(root, &aggs_list))
145	0	return;
146
147		/*
148		* OK, there is at least the possibility of performing the optimization.
149		* Build an access path for each aggregate. If any of the aggregates
150		* prove to be non-indexable, give up; there is no point in optimizing
151		* just some of them.
152		*/
153	0	foreach(lc, aggs_list)
154	0	{
155	0	MinMaxAggInfo mminfo = (MinMaxAggInfo ) lfirst(lc);
156	0	Oid eqop;
157	0	bool reverse;
158
159		/*
160		* We'll need the equality operator that goes with the aggregate's
161		* ordering operator.
162		*/
163	0	eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
164	0	if (!OidIsValid(eqop)) /* shouldn't happen */
165	0	elog(ERROR, "could not find equality operator for ordering operator %u",
166	0	mminfo->aggsortop);
167
168		/*
169		* We can use either an ordering that gives NULLS FIRST or one that
170		* gives NULLS LAST; furthermore there's unlikely to be much
171		* performance difference between them, so it doesn't seem worth
172		* costing out both ways if we get a hit on the first one. NULLS
173		* FIRST is more likely to be available if the operator is a
174		* reverse-sort operator, so try that first if reverse.
175		*/
176	0	if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, reverse))
177	0	continue;
178	0	if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, !reverse))
179	0	continue;
180
181		/* No indexable path for this aggregate, so fail */
182	0	return;
183	0	}
184
185		/*
186		* OK, we can do the query this way. Prepare to create a MinMaxAggPath
187		* node.
188		*
189		* First, create an output Param node for each agg. (If we end up not
190		* using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
191		* which is not worth worrying about. We can't wait till create_plan time
192		* to decide whether to make the Param, unfortunately.)
193		*/
194	0	foreach(lc, aggs_list)
195	0	{
196	0	MinMaxAggInfo mminfo = (MinMaxAggInfo ) lfirst(lc);
197
198	0	mminfo->param =
199	0	SS_make_initplan_output_param(root,
200	0	exprType((Node *) mminfo->target),
201	0	-1,
202	0	exprCollation((Node *) mminfo->target));
203	0	}
204
205		/*
206		* Create a MinMaxAggPath node with the appropriate estimated costs and
207		* other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
208		* it will compete against the standard aggregate implementation. (It
209		* will likely always win, but we need not assume that here.)
210		*
211		* Note: grouping_planner won't have created this upperrel yet, but it's
212		* fine for us to create it first. We will not have inserted the correct
213		* consider_parallel value in it, but MinMaxAggPath paths are currently
214		* never parallel-safe anyway, so that doesn't matter. Likewise, it
215		* doesn't matter that we haven't filled FDW-related fields in the rel.
216		* Also, because there are no rowmarks, we know that the processed_tlist
217		* doesn't need to change anymore, so making the pathtarget now is safe.
218		*/
219	0	grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
220	0	add_path(grouped_rel, (Path *)
221	0	create_minmaxagg_path(root, grouped_rel,
222	0	create_pathtarget(root,
223	0	root->processed_tlist),
224	0	aggs_list,
225	0	(List *) parse->havingQual));
226	0	}
227
228		/*
229		* can_minmax_aggs
230		* Examine all the aggregates in the query, and check if they are
231		* all MIN/MAX aggregates. If so, build a list of MinMaxAggInfo
232		* nodes for them.
233		*
234		* Returns false if a non-MIN/MAX aggregate is found, true otherwise.
235		*/
236		static bool
237		can_minmax_aggs(PlannerInfo root, List *context)
238	0	{
239	0	ListCell *lc;
240
241		/*
242		* This function used to have to scan the query for itself, but now we can
243		* just thumb through the AggInfo list made by preprocess_aggrefs.
244		*/
245	0	foreach(lc, root->agginfos)
246	0	{
247	0	AggInfo *agginfo = lfirst_node(AggInfo, lc);
248	0	Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
249	0	Oid aggsortop;
250	0	TargetEntry *curTarget;
251	0	MinMaxAggInfo *mminfo;
252
253	0	Assert(aggref->agglevelsup == 0);
254	0	if (list_length(aggref->args) != 1)
255	0	return false; /* it couldn't be MIN/MAX */
256
257		/*
258		* ORDER BY is usually irrelevant for MIN/MAX, but it can change the
259		* outcome if the aggsortop's operator class recognizes non-identical
260		* values as equal. For example, 4.0 and 4.00 are equal according to
261		* numeric_ops, yet distinguishable. If MIN() receives more than one
262		* value equal to 4.0 and no value less than 4.0, it is unspecified
263		* which of those equal values MIN() returns. An ORDER BY expression
264		* that differs for each of those equal values of the argument
265		* expression makes the result predictable once again. This is a
266		* niche requirement, and we do not implement it with subquery paths.
267		* In any case, this test lets us reject ordered-set aggregates
268		* quickly.
269		*/
270	0	if (aggref->aggorder != NIL)
271	0	return false;
272		/* note: we do not care if DISTINCT is mentioned ... */
273
274		/*
275		* We might implement the optimization when a FILTER clause is present
276		* by adding the filter to the quals of the generated subquery. For
277		* now, just punt.
278		*/
279	0	if (aggref->aggfilter != NULL)
280	0	return false;
281
282	0	aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
283	0	if (!OidIsValid(aggsortop))
284	0	return false; /* not a MIN/MAX aggregate */
285
286	0	curTarget = (TargetEntry *) linitial(aggref->args);
287
288	0	if (contain_mutable_functions((Node *) curTarget->expr))
289	0	return false; /* not potentially indexable */
290
291	0	if (type_is_rowtype(exprType((Node *) curTarget->expr)))
292	0	return false; /* IS NOT NULL would have weird semantics */
293
294	0	mminfo = makeNode(MinMaxAggInfo);
295	0	mminfo->aggfnoid = aggref->aggfnoid;
296	0	mminfo->aggsortop = aggsortop;
297	0	mminfo->target = curTarget->expr;
298	0	mminfo->subroot = NULL; /* don't compute path yet */
299	0	mminfo->path = NULL;
300	0	mminfo->pathcost = 0;
301	0	mminfo->param = NULL;
302
303	0	context = lappend(context, mminfo);
304	0	}
305	0	return true;
306	0	}
307
308		/*
309		* build_minmax_path
310		* Given a MIN/MAX aggregate, try to build an indexscan Path it can be
311		* optimized with.
312		*
313		* If successful, stash the best path in *mminfo and return true.
314		* Otherwise, return false.
315		*/
316		static bool
317		build_minmax_path(PlannerInfo root, MinMaxAggInfo mminfo,
318		Oid eqop, Oid sortop, bool reverse_sort, bool nulls_first)
319	0	{
320	0	PlannerInfo *subroot;
321	0	Query *parse;
322	0	TargetEntry *tle;
323	0	List *tlist;
324	0	NullTest *ntest;
325	0	SortGroupClause *sortcl;
326	0	RelOptInfo *final_rel;
327	0	Path *sorted_path;
328	0	Cost path_cost;
329	0	double path_fraction;
330
331		/*
332		* We are going to construct what is effectively a sub-SELECT query, so
333		* clone the current query level's state and adjust it to make it look
334		* like a subquery. Any outer references will now be one level higher
335		* than before. (This means that when we are done, there will be no Vars
336		* of level 1, which is why the subquery can become an initplan.)
337		*/
338	0	subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
339	0	memcpy(subroot, root, sizeof(PlannerInfo));
340	0	subroot->query_level++;
341	0	subroot->parent_root = root;
342		/* reset subplan-related stuff */
343	0	subroot->plan_params = NIL;
344	0	subroot->outer_params = NULL;
345	0	subroot->init_plans = NIL;
346	0	subroot->agginfos = NIL;
347	0	subroot->aggtransinfos = NIL;
348
349	0	subroot->parse = parse = copyObject(root->parse);
350	0	IncrementVarSublevelsUp((Node *) parse, 1, 1);
351
352		/* append_rel_list might contain outer Vars? */
353	0	subroot->append_rel_list = copyObject(root->append_rel_list);
354	0	IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
355		/* There shouldn't be any OJ info to translate, as yet */
356	0	Assert(subroot->join_info_list == NIL);
357		/* and we haven't made equivalence classes, either */
358	0	Assert(subroot->eq_classes == NIL);
359		/* and we haven't created PlaceHolderInfos, either */
360	0	Assert(subroot->placeholder_list == NIL);
361
362		/*----------
363		* Generate modified query of the form
364		* (SELECT col FROM tab
365		* WHERE col IS NOT NULL AND existing-quals
366		* ORDER BY col ASC/DESC
367		* LIMIT 1)
368		*----------
369		*/
370		/* single tlist entry that is the aggregate target */
371	0	tle = makeTargetEntry(copyObject(mminfo->target),
372	0	(AttrNumber) 1,
373	0	pstrdup("agg_target"),
374	0	false);
375	0	tlist = list_make1(tle);
376	0	subroot->processed_tlist = parse->targetList = tlist;
377
378		/* No HAVING, no DISTINCT, no aggregates anymore */
379	0	parse->havingQual = NULL;
380	0	subroot->hasHavingQual = false;
381	0	parse->distinctClause = NIL;
382	0	parse->hasDistinctOn = false;
383	0	parse->hasAggs = false;
384
385		/* Build "target IS NOT NULL" expression */
386	0	ntest = makeNode(NullTest);
387	0	ntest->nulltesttype = IS_NOT_NULL;
388	0	ntest->arg = copyObject(mminfo->target);
389		/* we checked it wasn't a rowtype in can_minmax_aggs */
390	0	ntest->argisrow = false;
391	0	ntest->location = -1;
392
393		/* User might have had that in WHERE already */
394	0	if (!list_member((List *) parse->jointree->quals, ntest))
395	0	parse->jointree->quals = (Node *)
396	0	lcons(ntest, (List *) parse->jointree->quals);
397
398		/* Build suitable ORDER BY clause */
399	0	sortcl = makeNode(SortGroupClause);
400	0	sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
401	0	sortcl->eqop = eqop;
402	0	sortcl->sortop = sortop;
403	0	sortcl->reverse_sort = reverse_sort;
404	0	sortcl->nulls_first = nulls_first;
405	0	sortcl->hashable = false; /* no need to make this accurate */
406	0	parse->sortClause = list_make1(sortcl);
407
408		/* set up expressions for LIMIT 1 */
409	0	parse->limitOffset = NULL;
410	0	parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
411	0	sizeof(int64),
412	0	Int64GetDatum(1), false,
413	0	FLOAT8PASSBYVAL);
414
415		/*
416		* Generate the best paths for this query, telling query_planner that we
417		* have LIMIT 1.
418		*/
419	0	subroot->tuple_fraction = 1.0;
420	0	subroot->limit_tuples = 1.0;
421
422	0	final_rel = query_planner(subroot, minmax_qp_callback, NULL);
423
424		/*
425		* Since we didn't go through subquery_planner() to handle the subquery,
426		* we have to do some of the same cleanup it would do, in particular cope
427		* with params and initplans used within this subquery. (This won't
428		* matter if we end up not using the subplan.)
429		*/
430	0	SS_identify_outer_params(subroot);
431	0	SS_charge_for_initplans(subroot, final_rel);
432
433		/*
434		* Get the best presorted path, that being the one that's cheapest for
435		* fetching just one row. If there's no such path, fail.
436		*/
437	0	if (final_rel->rows > 1.0)
438	0	path_fraction = 1.0 / final_rel->rows;
439	0	else
440	0	path_fraction = 1.0;
441
442	0	sorted_path =
443	0	get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
444	0	subroot->query_pathkeys,
445	0	NULL,
446	0	path_fraction);
447	0	if (!sorted_path)
448	0	return false;
449
450		/*
451		* The path might not return exactly what we want, so fix that. (We
452		* assume that this won't change any conclusions about which was the
453		* cheapest path.)
454		*/
455	0	sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
456	0	create_pathtarget(subroot,
457	0	subroot->processed_tlist));
458
459		/*
460		* Determine cost to get just the first row of the presorted path.
461		*
462		* Note: cost calculation here should match
463		* compare_fractional_path_costs().
464		*/
465	0	path_cost = sorted_path->startup_cost +
466	0	path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
467
468		/* Save state for further processing */
469	0	mminfo->subroot = subroot;
470	0	mminfo->path = sorted_path;
471	0	mminfo->pathcost = path_cost;
472
473	0	return true;
474	0	}
475
476		/*
477		* Compute query_pathkeys and other pathkeys during query_planner()
478		*/
479		static void
480		minmax_qp_callback(PlannerInfo root, void extra)
481	0	{
482	0	root->group_pathkeys = NIL;
483	0	root->window_pathkeys = NIL;
484	0	root->distinct_pathkeys = NIL;
485
486	0	root->sort_pathkeys =
487	0	make_pathkeys_for_sortclauses(root,
488	0	root->parse->sortClause,
489	0	root->parse->targetList);
490
491	0	root->query_pathkeys = root->sort_pathkeys;
492	0	}
493
494		/*
495		* Get the OID of the sort operator, if any, associated with an aggregate.
496		* Returns InvalidOid if there is no such operator.
497		*/
498		static Oid
499		fetch_agg_sort_op(Oid aggfnoid)
500	0	{
501	0	HeapTuple aggTuple;
502	0	Form_pg_aggregate aggform;
503	0	Oid aggsortop;
504
505		/* fetch aggregate entry from pg_aggregate */
506	0	aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
507	0	if (!HeapTupleIsValid(aggTuple))
508	0	return InvalidOid;
509	0	aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
510	0	aggsortop = aggform->aggsortop;
511	0	ReleaseSysCache(aggTuple);
512
513	0	return aggsortop;
514	0	}