/src/postgres/src/backend/optimizer/prep/prepqual.c

Source (jump to first uncovered line)
/*-------------------------------------------------------------------------
 *
 * prepqual.c
 *    Routines for preprocessing qualification expressions
 *
 *
 * While the parser will produce flattened (N-argument) AND/OR trees from
 * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
 * directly underneath another AND, or OR underneath OR, if the input was
 * oddly parenthesized.  Also, rule expansion and subquery flattening could
 * produce such parsetrees.  The planner wants to flatten all such cases
 * to ensure consistent optimization behavior.
 *
 * Formerly, this module was responsible for doing the initial flattening,
 * but now we leave it to eval_const_expressions to do that since it has to
 * make a complete pass over the expression tree anyway.  Instead, we just
 * have to ensure that our manipulations preserve AND/OR flatness.
 * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
 * tree after local transformations that might introduce nested AND/ORs.
 *
 *
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *    src/backend/optimizer/prep/prepqual.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
#include "utils/lsyscache.h"


static List *pull_ands(List *andlist);
static List *pull_ors(List *orlist);
static Expr *find_duplicate_ors(Expr *qual, bool is_check);
static Expr *process_duplicate_ors(List *orlist);


/*
 * negate_clause
 *    Negate a Boolean expression.
 *
 * Input is a clause to be negated (e.g., the argument of a NOT clause).
 * Returns a new clause equivalent to the negation of the given clause.
 *
 * Although this can be invoked on its own, it's mainly intended as a helper
 * for eval_const_expressions(), and that context drives several design
 * decisions.  In particular, if the input is already AND/OR flat, we must
 * preserve that property.  We also don't bother to recurse in situations
 * where we can assume that lower-level executions of eval_const_expressions
 * would already have simplified sub-clauses of the input.
 *
 * The difference between this and a simple make_notclause() is that this
 * tries to get rid of the NOT node by logical simplification.  It's clearly
 * always a win if the NOT node can be eliminated altogether.  However, our
 * use of DeMorgan's laws could result in having more NOT nodes rather than
 * fewer.  We do that unconditionally anyway, because in WHERE clauses it's
 * important to expose as much top-level AND/OR structure as possible.
 * Also, eliminating an intermediate NOT may allow us to flatten two levels
 * of AND or OR together that we couldn't have otherwise.  Finally, one of
 * the motivations for doing this is to ensure that logically equivalent
 * expressions will be seen as physically equal(), so we should always apply
 * the same transformations.
 */
Node *
negate_clause(Node *node)
{
  if (node == NULL)     /* should not happen */
    elog(ERROR, "can't negate an empty subexpression");
  switch (nodeTag(node))
  {
    case T_Const:
      {
        Const    *c = (Const *) node;

        /* NOT NULL is still NULL */
        if (c->constisnull)
          return makeBoolConst(false, true);
        /* otherwise pretty easy */
        return makeBoolConst(!DatumGetBool(c->constvalue), false);
      }
      break;
    case T_OpExpr:
      {
        /*
         * Negate operator if possible: (NOT (< A B)) => (>= A B)
         */
        OpExpr     *opexpr = (OpExpr *) node;
        Oid     negator = get_negator(opexpr->opno);

        if (negator)
        {
          OpExpr     *newopexpr = makeNode(OpExpr);

          newopexpr->opno = negator;
          newopexpr->opfuncid = InvalidOid;
          newopexpr->opresulttype = opexpr->opresulttype;
          newopexpr->opretset = opexpr->opretset;
          newopexpr->opcollid = opexpr->opcollid;
          newopexpr->inputcollid = opexpr->inputcollid;
          newopexpr->args = opexpr->args;
          newopexpr->location = opexpr->location;
          return (Node *) newopexpr;
        }
      }
      break;
    case T_ScalarArrayOpExpr:
      {
        /*
         * Negate a ScalarArrayOpExpr if its operator has a negator;
         * for example x = ANY (list) becomes x <> ALL (list)
         */
        ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
        Oid     negator = get_negator(saopexpr->opno);

        if (negator)
        {
          ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);

          newopexpr->opno = negator;
          newopexpr->opfuncid = InvalidOid;
          newopexpr->hashfuncid = InvalidOid;
          newopexpr->negfuncid = InvalidOid;
          newopexpr->useOr = !saopexpr->useOr;
          newopexpr->inputcollid = saopexpr->inputcollid;
          newopexpr->args = saopexpr->args;
          newopexpr->location = saopexpr->location;
          return (Node *) newopexpr;
        }
      }
      break;
    case T_BoolExpr:
      {
        BoolExpr   *expr = (BoolExpr *) node;

        switch (expr->boolop)
        {
            /*--------------------
             * Apply DeMorgan's Laws:
             *    (NOT (AND A B)) => (OR (NOT A) (NOT B))
             *    (NOT (OR A B))  => (AND (NOT A) (NOT B))
             * i.e., swap AND for OR and negate each subclause.
             *
             * If the input is already AND/OR flat and has no NOT
             * directly above AND or OR, this transformation preserves
             * those properties.  For example, if no direct child of
             * the given AND clause is an AND or a NOT-above-OR, then
             * the recursive calls of negate_clause() can't return any
             * OR clauses.  So we needn't call pull_ors() before
             * building a new OR clause.  Similarly for the OR case.
             *--------------------
             */
          case AND_EXPR:
            {
              List     *nargs = NIL;
              ListCell   *lc;

              foreach(lc, expr->args)
              {
                nargs = lappend(nargs,
                        negate_clause(lfirst(lc)));
              }
              return (Node *) make_orclause(nargs);
            }
            break;
          case OR_EXPR:
            {
              List     *nargs = NIL;
              ListCell   *lc;

              foreach(lc, expr->args)
              {
                nargs = lappend(nargs,
                        negate_clause(lfirst(lc)));
              }
              return (Node *) make_andclause(nargs);
            }
            break;
          case NOT_EXPR:

            /*
             * NOT underneath NOT: they cancel.  We assume the
             * input is already simplified, so no need to recurse.
             */
            return (Node *) linitial(expr->args);
          default:
            elog(ERROR, "unrecognized boolop: %d",
               (int) expr->boolop);
            break;
        }
      }
      break;
    case T_NullTest:
      {
        NullTest   *expr = (NullTest *) node;

        /*
         * In the rowtype case, the two flavors of NullTest are *not*
         * logical inverses, so we can't simplify.  But it does work
         * for scalar datatypes.
         */
        if (!expr->argisrow)
        {
          NullTest   *newexpr = makeNode(NullTest);

          newexpr->arg = expr->arg;
          newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
                       IS_NOT_NULL : IS_NULL);
          newexpr->argisrow = expr->argisrow;
          newexpr->location = expr->location;
          return (Node *) newexpr;
        }
      }
      break;
    case T_BooleanTest:
      {
        BooleanTest *expr = (BooleanTest *) node;
        BooleanTest *newexpr = makeNode(BooleanTest);

        newexpr->arg = expr->arg;
        switch (expr->booltesttype)
        {
          case IS_TRUE:
            newexpr->booltesttype = IS_NOT_TRUE;
            break;
          case IS_NOT_TRUE:
            newexpr->booltesttype = IS_TRUE;
            break;
          case IS_FALSE:
            newexpr->booltesttype = IS_NOT_FALSE;
            break;
          case IS_NOT_FALSE:
            newexpr->booltesttype = IS_FALSE;
            break;
          case IS_UNKNOWN:
            newexpr->booltesttype = IS_NOT_UNKNOWN;
            break;
          case IS_NOT_UNKNOWN:
            newexpr->booltesttype = IS_UNKNOWN;
            break;
          default:
            elog(ERROR, "unrecognized booltesttype: %d",
               (int) expr->booltesttype);
            break;
        }
        newexpr->location = expr->location;
        return (Node *) newexpr;
      }
      break;
    default:
      /* else fall through */
      break;
  }

  /*
   * Otherwise we don't know how to simplify this, so just tack on an
   * explicit NOT node.
   */
  return (Node *) make_notclause((Expr *) node);
}


/*
 * canonicalize_qual
 *    Convert a qualification expression to the most useful form.
 *
 * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
 * clauses.  It can also be used on top-level CHECK constraints, for which
 * pass is_check = true.  DO NOT call it on any expression that is not known
 * to be one or the other, as it might apply inappropriate simplifications.
 *
 * The name of this routine is a holdover from a time when it would try to
 * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
 * Eventually, we recognized that that had more theoretical purity than
 * actual usefulness, and so now the transformation doesn't involve any
 * notion of reaching a canonical form.
 *
 * NOTE: we assume the input has already been through eval_const_expressions
 * and therefore possesses AND/OR flatness.  Formerly this function included
 * its own flattening logic, but that requires a useless extra pass over the
 * tree.
 *
 * Returns the modified qualification.
 */
Expr *
canonicalize_qual(Expr *qual, bool is_check)
{
  Expr     *newqual;

  /* Quick exit for empty qual */
  if (qual == NULL)
    return NULL;

  /* This should not be invoked on quals in implicit-AND format */
  Assert(!IsA(qual, List));

  /*
   * Pull up redundant subclauses in OR-of-AND trees.  We do this only
   * within the top-level AND/OR structure; there's no point in looking
   * deeper.  Also remove any NULL constants in the top-level structure.
   */
  newqual = find_duplicate_ors(qual, is_check);

  return newqual;
}


/*
 * pull_ands
 *    Recursively flatten nested AND clauses into a single and-clause list.
 *
 * Input is the arglist of an AND clause.
 * Returns the rebuilt arglist (note original list structure is not touched).
 */
static List *
pull_ands(List *andlist)
{
  List     *out_list = NIL;
  ListCell   *arg;

  foreach(arg, andlist)
  {
    Node     *subexpr = (Node *) lfirst(arg);

    if (is_andclause(subexpr))
      out_list = list_concat(out_list,
                   pull_ands(((BoolExpr *) subexpr)->args));
    else
      out_list = lappend(out_list, subexpr);
  }
  return out_list;
}

/*
 * pull_ors
 *    Recursively flatten nested OR clauses into a single or-clause list.
 *
 * Input is the arglist of an OR clause.
 * Returns the rebuilt arglist (note original list structure is not touched).
 */
static List *
pull_ors(List *orlist)
{
  List     *out_list = NIL;
  ListCell   *arg;

  foreach(arg, orlist)
  {
    Node     *subexpr = (Node *) lfirst(arg);

    if (is_orclause(subexpr))
      out_list = list_concat(out_list,
                   pull_ors(((BoolExpr *) subexpr)->args));
    else
      out_list = lappend(out_list, subexpr);
  }
  return out_list;
}


/*--------------------
 * The following code attempts to apply the inverse OR distributive law:
 *    ((A AND B) OR (A AND C))  =>  (A AND (B OR C))
 * That is, locate OR clauses in which every subclause contains an
 * identical term, and pull out the duplicated terms.
 *
 * This may seem like a fairly useless activity, but it turns out to be
 * applicable to many machine-generated queries, and there are also queries
 * in some of the TPC benchmarks that need it.  This was in fact almost the
 * sole useful side-effect of the old prepqual code that tried to force
 * the query into canonical AND-of-ORs form: the canonical equivalent of
 *    ((A AND B) OR (A AND C))
 * is
 *    ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
 * which the code was able to simplify to
 *    (A AND (A OR C) AND (B OR A) AND (B OR C))
 * thus successfully extracting the common condition A --- but at the cost
 * of cluttering the qual with many redundant clauses.
 *--------------------
 */

/*
 * find_duplicate_ors
 *    Given a qualification tree with the NOTs pushed down, search for
 *    OR clauses to which the inverse OR distributive law might apply.
 *    Only the top-level AND/OR structure is searched.
 *
 * While at it, we remove any NULL constants within the top-level AND/OR
 * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
 * In general that would change the result, so eval_const_expressions can't
 * do it; but at top level of WHERE, we don't need to distinguish between
 * FALSE and NULL results, so it's valid to treat NULL::boolean the same
 * as FALSE and then simplify AND/OR accordingly.  Conversely, in a top-level
 * CHECK constraint, we may treat a NULL the same as TRUE.
 *
 * Returns the modified qualification.  AND/OR flatness is preserved.
 */
static Expr *
find_duplicate_ors(Expr *qual, bool is_check)
{
  if (is_orclause(qual))
  {
    List     *orlist = NIL;
    ListCell   *temp;

    /* Recurse */
    foreach(temp, ((BoolExpr *) qual)->args)
    {
      Expr     *arg = (Expr *) lfirst(temp);

      arg = find_duplicate_ors(arg, is_check);

      /* Get rid of any constant inputs */
      if (arg && IsA(arg, Const))
      {
        Const    *carg = (Const *) arg;

        if (is_check)
        {
          /* Within OR in CHECK, drop constant FALSE */
          if (!carg->constisnull && !DatumGetBool(carg->constvalue))
            continue;
          /* Constant TRUE or NULL, so OR reduces to TRUE */
          return (Expr *) makeBoolConst(true, false);
        }
        else
        {
          /* Within OR in WHERE, drop constant FALSE or NULL */
          if (carg->constisnull || !DatumGetBool(carg->constvalue))
            continue;
          /* Constant TRUE, so OR reduces to TRUE */
          return arg;
        }
      }

      orlist = lappend(orlist, arg);
    }

    /* Flatten any ORs pulled up to just below here */
    orlist = pull_ors(orlist);

    /* Now we can look for duplicate ORs */
    return process_duplicate_ors(orlist);
  }
  else if (is_andclause(qual))
  {
    List     *andlist = NIL;
    ListCell   *temp;

    /* Recurse */
    foreach(temp, ((BoolExpr *) qual)->args)
    {
      Expr     *arg = (Expr *) lfirst(temp);

      arg = find_duplicate_ors(arg, is_check);

      /* Get rid of any constant inputs */
      if (arg && IsA(arg, Const))
      {
        Const    *carg = (Const *) arg;

        if (is_check)
        {
          /* Within AND in CHECK, drop constant TRUE or NULL */
          if (carg->constisnull || DatumGetBool(carg->constvalue))
            continue;
          /* Constant FALSE, so AND reduces to FALSE */
          return arg;
        }
        else
        {
          /* Within AND in WHERE, drop constant TRUE */
          if (!carg->constisnull && DatumGetBool(carg->constvalue))
            continue;
          /* Constant FALSE or NULL, so AND reduces to FALSE */
          return (Expr *) makeBoolConst(false, false);
        }
      }

      andlist = lappend(andlist, arg);
    }

    /* Flatten any ANDs introduced just below here */
    andlist = pull_ands(andlist);

    /* AND of no inputs reduces to TRUE */
    if (andlist == NIL)
      return (Expr *) makeBoolConst(true, false);

    /* Single-expression AND just reduces to that expression */
    if (list_length(andlist) == 1)
      return (Expr *) linitial(andlist);

    /* Else we still need an AND node */
    return make_andclause(andlist);
  }
  else
    return qual;
}

/*
 * process_duplicate_ors
 *    Given a list of exprs which are ORed together, try to apply
 *    the inverse OR distributive law.
 *
 * Returns the resulting expression (could be an AND clause, an OR
 * clause, or maybe even a single subexpression).
 */
static Expr *
process_duplicate_ors(List *orlist)
{
  List     *reference = NIL;
  int     num_subclauses = 0;
  List     *winners;
  List     *neworlist;
  ListCell   *temp;

  /* OR of no inputs reduces to FALSE */
  if (orlist == NIL)
    return (Expr *) makeBoolConst(false, false);

  /* Single-expression OR just reduces to that expression */
  if (list_length(orlist) == 1)
    return (Expr *) linitial(orlist);

  /*
   * Choose the shortest AND clause as the reference list --- obviously, any
   * subclause not in this clause isn't in all the clauses. If we find a
   * clause that's not an AND, we can treat it as a one-element AND clause,
   * which necessarily wins as shortest.
   */
  foreach(temp, orlist)
  {
    Expr     *clause = (Expr *) lfirst(temp);

    if (is_andclause(clause))
    {
      List     *subclauses = ((BoolExpr *) clause)->args;
      int     nclauses = list_length(subclauses);

      if (reference == NIL || nclauses < num_subclauses)
      {
        reference = subclauses;
        num_subclauses = nclauses;
      }
    }
    else
    {
      reference = list_make1(clause);
      break;
    }
  }

  /*
   * Just in case, eliminate any duplicates in the reference list.
   */
  reference = list_union(NIL, reference);

  /*
   * Check each element of the reference list to see if it's in all the OR
   * clauses.  Build a new list of winning clauses.
   */
  winners = NIL;
  foreach(temp, reference)
  {
    Expr     *refclause = (Expr *) lfirst(temp);
    bool    win = true;
    ListCell   *temp2;

    foreach(temp2, orlist)
    {
      Expr     *clause = (Expr *) lfirst(temp2);

      if (is_andclause(clause))
      {
        if (!list_member(((BoolExpr *) clause)->args, refclause))
        {
          win = false;
          break;
        }
      }
      else
      {
        if (!equal(refclause, clause))
        {
          win = false;
          break;
        }
      }
    }

    if (win)
      winners = lappend(winners, refclause);
  }

  /*
   * If no winners, we can't transform the OR
   */
  if (winners == NIL)
    return make_orclause(orlist);

  /*
   * Generate new OR list consisting of the remaining sub-clauses.
   *
   * If any clause degenerates to empty, then we have a situation like (A
   * AND B) OR (A), which can be reduced to just A --- that is, the
   * additional conditions in other arms of the OR are irrelevant.
   *
   * Note that because we use list_difference, any multiple occurrences of a
   * winning clause in an AND sub-clause will be removed automatically.
   */
  neworlist = NIL;
  foreach(temp, orlist)
  {
    Expr     *clause = (Expr *) lfirst(temp);

    if (is_andclause(clause))
    {
      List     *subclauses = ((BoolExpr *) clause)->args;

      subclauses = list_difference(subclauses, winners);
      if (subclauses != NIL)
      {
        if (list_length(subclauses) == 1)
          neworlist = lappend(neworlist, linitial(subclauses));
        else
          neworlist = lappend(neworlist, make_andclause(subclauses));
      }
      else
      {
        neworlist = NIL; /* degenerate case, see above */
        break;
      }
    }
    else
    {
      if (!list_member(winners, clause))
        neworlist = lappend(neworlist, clause);
      else
      {
        neworlist = NIL; /* degenerate case, see above */
        break;
      }
    }
  }

  /*
   * Append reduced OR to the winners list, if it's not degenerate, handling
   * the special case of one element correctly (can that really happen?).
   * Also be careful to maintain AND/OR flatness in case we pulled up a
   * sub-sub-OR-clause.
   */
  if (neworlist != NIL)
  {
    if (list_length(neworlist) == 1)
      winners = lappend(winners, linitial(neworlist));
    else
      winners = lappend(winners, make_orclause(pull_ors(neworlist)));
  }

  /*
   * And return the constructed AND clause, again being wary of a single
   * element and AND/OR flatness.
   */
  if (list_length(winners) == 1)
    return (Expr *) linitial(winners);
  else
    return make_andclause(pull_ands(winners));
}

Coverage Report

Created: 2025-06-15 06:31

Line	Count	Source (jump to first uncovered line)
1		/*-------------------------------------------------------------------------
2		*
3		* prepqual.c
4		* Routines for preprocessing qualification expressions
5		*
6		*
7		* While the parser will produce flattened (N-argument) AND/OR trees from
8		* simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9		* directly underneath another AND, or OR underneath OR, if the input was
10		* oddly parenthesized. Also, rule expansion and subquery flattening could
11		* produce such parsetrees. The planner wants to flatten all such cases
12		* to ensure consistent optimization behavior.
13		*
14		* Formerly, this module was responsible for doing the initial flattening,
15		* but now we leave it to eval_const_expressions to do that since it has to
16		* make a complete pass over the expression tree anyway. Instead, we just
17		* have to ensure that our manipulations preserve AND/OR flatness.
18		* pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19		* tree after local transformations that might introduce nested AND/ORs.
20		*
21		*
22		* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
23		* Portions Copyright (c) 1994, Regents of the University of California
24		*
25		*
26		* IDENTIFICATION
27		* src/backend/optimizer/prep/prepqual.c
28		*
29		*-------------------------------------------------------------------------
30		*/
31
32		#include "postgres.h"
33
34		#include "nodes/makefuncs.h"
35		#include "nodes/nodeFuncs.h"
36		#include "optimizer/optimizer.h"
37		#include "utils/lsyscache.h"
38
39
40		static List pull_ands(List andlist);
41		static List pull_ors(List orlist);
42		static Expr find_duplicate_ors(Expr qual, bool is_check);
43		static Expr process_duplicate_ors(List orlist);
44
45
46		/*
47		* negate_clause
48		* Negate a Boolean expression.
49		*
50		* Input is a clause to be negated (e.g., the argument of a NOT clause).
51		* Returns a new clause equivalent to the negation of the given clause.
52		*
53		* Although this can be invoked on its own, it's mainly intended as a helper
54		* for eval_const_expressions(), and that context drives several design
55		* decisions. In particular, if the input is already AND/OR flat, we must
56		* preserve that property. We also don't bother to recurse in situations
57		* where we can assume that lower-level executions of eval_const_expressions
58		* would already have simplified sub-clauses of the input.
59		*
60		* The difference between this and a simple make_notclause() is that this
61		* tries to get rid of the NOT node by logical simplification. It's clearly
62		* always a win if the NOT node can be eliminated altogether. However, our
63		* use of DeMorgan's laws could result in having more NOT nodes rather than
64		* fewer. We do that unconditionally anyway, because in WHERE clauses it's
65		* important to expose as much top-level AND/OR structure as possible.
66		* Also, eliminating an intermediate NOT may allow us to flatten two levels
67		* of AND or OR together that we couldn't have otherwise. Finally, one of
68		* the motivations for doing this is to ensure that logically equivalent
69		* expressions will be seen as physically equal(), so we should always apply
70		* the same transformations.
71		*/
72		Node *
73		negate_clause(Node *node)
74	0	{
75	0	if (node == NULL) /* should not happen */
76	0	elog(ERROR, "can't negate an empty subexpression");
77	0	switch (nodeTag(node))
78	0	{
79	0	case T_Const:
80	0	{
81	0	Const c = (Const ) node;
82
83		/* NOT NULL is still NULL */
84	0	if (c->constisnull)
85	0	return makeBoolConst(false, true);
86		/* otherwise pretty easy */
87	0	return makeBoolConst(!DatumGetBool(c->constvalue), false);
88	0	}
89	0	break;
90	0	case T_OpExpr:
91	0	{
92		/*
93		* Negate operator if possible: (NOT (< A B)) => (>= A B)
94		*/
95	0	OpExpr opexpr = (OpExpr ) node;
96	0	Oid negator = get_negator(opexpr->opno);
97
98	0	if (negator)
99	0	{
100	0	OpExpr *newopexpr = makeNode(OpExpr);
101
102	0	newopexpr->opno = negator;
103	0	newopexpr->opfuncid = InvalidOid;
104	0	newopexpr->opresulttype = opexpr->opresulttype;
105	0	newopexpr->opretset = opexpr->opretset;
106	0	newopexpr->opcollid = opexpr->opcollid;
107	0	newopexpr->inputcollid = opexpr->inputcollid;
108	0	newopexpr->args = opexpr->args;
109	0	newopexpr->location = opexpr->location;
110	0	return (Node *) newopexpr;
111	0	}
112	0	}
113	0	break;
114	0	case T_ScalarArrayOpExpr:
115	0	{
116		/*
117		* Negate a ScalarArrayOpExpr if its operator has a negator;
118		* for example x = ANY (list) becomes x <> ALL (list)
119		*/
120	0	ScalarArrayOpExpr saopexpr = (ScalarArrayOpExpr ) node;
121	0	Oid negator = get_negator(saopexpr->opno);
122
123	0	if (negator)
124	0	{
125	0	ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
126
127	0	newopexpr->opno = negator;
128	0	newopexpr->opfuncid = InvalidOid;
129	0	newopexpr->hashfuncid = InvalidOid;
130	0	newopexpr->negfuncid = InvalidOid;
131	0	newopexpr->useOr = !saopexpr->useOr;
132	0	newopexpr->inputcollid = saopexpr->inputcollid;
133	0	newopexpr->args = saopexpr->args;
134	0	newopexpr->location = saopexpr->location;
135	0	return (Node *) newopexpr;
136	0	}
137	0	}
138	0	break;
139	0	case T_BoolExpr:
140	0	{
141	0	BoolExpr expr = (BoolExpr ) node;
142
143	0	switch (expr->boolop)
144	0	{
145		/*--------------------
146		* Apply DeMorgan's Laws:
147		* (NOT (AND A B)) => (OR (NOT A) (NOT B))
148		* (NOT (OR A B)) => (AND (NOT A) (NOT B))
149		* i.e., swap AND for OR and negate each subclause.
150		*
151		* If the input is already AND/OR flat and has no NOT
152		* directly above AND or OR, this transformation preserves
153		* those properties. For example, if no direct child of
154		* the given AND clause is an AND or a NOT-above-OR, then
155		* the recursive calls of negate_clause() can't return any
156		* OR clauses. So we needn't call pull_ors() before
157		* building a new OR clause. Similarly for the OR case.
158		*--------------------
159		*/
160	0	case AND_EXPR:
161	0	{
162	0	List *nargs = NIL;
163	0	ListCell *lc;
164
165	0	foreach(lc, expr->args)
166	0	{
167	0	nargs = lappend(nargs,
168	0	negate_clause(lfirst(lc)));
169	0	}
170	0	return (Node *) make_orclause(nargs);
171	0	}
172	0	break;
173	0	case OR_EXPR:
174	0	{
175	0	List *nargs = NIL;
176	0	ListCell *lc;
177
178	0	foreach(lc, expr->args)
179	0	{
180	0	nargs = lappend(nargs,
181	0	negate_clause(lfirst(lc)));
182	0	}
183	0	return (Node *) make_andclause(nargs);
184	0	}
185	0	break;
186	0	case NOT_EXPR:
187
188		/*
189		* NOT underneath NOT: they cancel. We assume the
190		* input is already simplified, so no need to recurse.
191		*/
192	0	return (Node *) linitial(expr->args);
193	0	default:
194	0	elog(ERROR, "unrecognized boolop: %d",
195	0	(int) expr->boolop);
196	0	break;
197	0	}
198	0	}
199	0	break;
200	0	case T_NullTest:
201	0	{
202	0	NullTest expr = (NullTest ) node;
203
204		/*
205		* In the rowtype case, the two flavors of NullTest are not
206		* logical inverses, so we can't simplify. But it does work
207		* for scalar datatypes.
208		*/
209	0	if (!expr->argisrow)
210	0	{
211	0	NullTest *newexpr = makeNode(NullTest);
212
213	0	newexpr->arg = expr->arg;
214	0	newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
215	0	IS_NOT_NULL : IS_NULL);
216	0	newexpr->argisrow = expr->argisrow;
217	0	newexpr->location = expr->location;
218	0	return (Node *) newexpr;
219	0	}
220	0	}
221	0	break;
222	0	case T_BooleanTest:
223	0	{
224	0	BooleanTest expr = (BooleanTest ) node;
225	0	BooleanTest *newexpr = makeNode(BooleanTest);
226
227	0	newexpr->arg = expr->arg;
228	0	switch (expr->booltesttype)
229	0	{
230	0	case IS_TRUE:
231	0	newexpr->booltesttype = IS_NOT_TRUE;
232	0	break;
233	0	case IS_NOT_TRUE:
234	0	newexpr->booltesttype = IS_TRUE;
235	0	break;
236	0	case IS_FALSE:
237	0	newexpr->booltesttype = IS_NOT_FALSE;
238	0	break;
239	0	case IS_NOT_FALSE:
240	0	newexpr->booltesttype = IS_FALSE;
241	0	break;
242	0	case IS_UNKNOWN:
243	0	newexpr->booltesttype = IS_NOT_UNKNOWN;
244	0	break;
245	0	case IS_NOT_UNKNOWN:
246	0	newexpr->booltesttype = IS_UNKNOWN;
247	0	break;
248	0	default:
249	0	elog(ERROR, "unrecognized booltesttype: %d",
250	0	(int) expr->booltesttype);
251	0	break;
252	0	}
253	0	newexpr->location = expr->location;
254	0	return (Node *) newexpr;
255	0	}
256	0	break;
257	0	default:
258		/* else fall through */
259	0	break;
260	0	}
261
262		/*
263		* Otherwise we don't know how to simplify this, so just tack on an
264		* explicit NOT node.
265		*/
266	0	return (Node ) make_notclause((Expr ) node);
267	0	}
268
269
270		/*
271		* canonicalize_qual
272		* Convert a qualification expression to the most useful form.
273		*
274		* This is primarily intended to be used on top-level WHERE (or JOIN/ON)
275		* clauses. It can also be used on top-level CHECK constraints, for which
276		* pass is_check = true. DO NOT call it on any expression that is not known
277		* to be one or the other, as it might apply inappropriate simplifications.
278		*
279		* The name of this routine is a holdover from a time when it would try to
280		* force the expression into canonical AND-of-ORs or OR-of-ANDs form.
281		* Eventually, we recognized that that had more theoretical purity than
282		* actual usefulness, and so now the transformation doesn't involve any
283		* notion of reaching a canonical form.
284		*
285		* NOTE: we assume the input has already been through eval_const_expressions
286		* and therefore possesses AND/OR flatness. Formerly this function included
287		* its own flattening logic, but that requires a useless extra pass over the
288		* tree.
289		*
290		* Returns the modified qualification.
291		*/
292		Expr *
293		canonicalize_qual(Expr *qual, bool is_check)
294	0	{
295	0	Expr *newqual;
296
297		/* Quick exit for empty qual */
298	0	if (qual == NULL)
299	0	return NULL;
300
301		/* This should not be invoked on quals in implicit-AND format */
302	0	Assert(!IsA(qual, List));
303
304		/*
305		* Pull up redundant subclauses in OR-of-AND trees. We do this only
306		* within the top-level AND/OR structure; there's no point in looking
307		* deeper. Also remove any NULL constants in the top-level structure.
308		*/
309	0	newqual = find_duplicate_ors(qual, is_check);
310
311	0	return newqual;
312	0	}
313
314
315		/*
316		* pull_ands
317		* Recursively flatten nested AND clauses into a single and-clause list.
318		*
319		* Input is the arglist of an AND clause.
320		* Returns the rebuilt arglist (note original list structure is not touched).
321		*/
322		static List *
323		pull_ands(List *andlist)
324	0	{
325	0	List *out_list = NIL;
326	0	ListCell *arg;
327
328	0	foreach(arg, andlist)
329	0	{
330	0	Node subexpr = (Node ) lfirst(arg);
331
332	0	if (is_andclause(subexpr))
333	0	out_list = list_concat(out_list,
334	0	pull_ands(((BoolExpr *) subexpr)->args));
335	0	else
336	0	out_list = lappend(out_list, subexpr);
337	0	}
338	0	return out_list;
339	0	}
340
341		/*
342		* pull_ors
343		* Recursively flatten nested OR clauses into a single or-clause list.
344		*
345		* Input is the arglist of an OR clause.
346		* Returns the rebuilt arglist (note original list structure is not touched).
347		*/
348		static List *
349		pull_ors(List *orlist)
350	0	{
351	0	List *out_list = NIL;
352	0	ListCell *arg;
353
354	0	foreach(arg, orlist)
355	0	{
356	0	Node subexpr = (Node ) lfirst(arg);
357
358	0	if (is_orclause(subexpr))
359	0	out_list = list_concat(out_list,
360	0	pull_ors(((BoolExpr *) subexpr)->args));
361	0	else
362	0	out_list = lappend(out_list, subexpr);
363	0	}
364	0	return out_list;
365	0	}
366
367
368		/*--------------------
369		* The following code attempts to apply the inverse OR distributive law:
370		* ((A AND B) OR (A AND C)) => (A AND (B OR C))
371		* That is, locate OR clauses in which every subclause contains an
372		* identical term, and pull out the duplicated terms.
373		*
374		* This may seem like a fairly useless activity, but it turns out to be
375		* applicable to many machine-generated queries, and there are also queries
376		* in some of the TPC benchmarks that need it. This was in fact almost the
377		* sole useful side-effect of the old prepqual code that tried to force
378		* the query into canonical AND-of-ORs form: the canonical equivalent of
379		* ((A AND B) OR (A AND C))
380		* is
381		* ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
382		* which the code was able to simplify to
383		* (A AND (A OR C) AND (B OR A) AND (B OR C))
384		* thus successfully extracting the common condition A --- but at the cost
385		* of cluttering the qual with many redundant clauses.
386		*--------------------
387		*/
388
389		/*
390		* find_duplicate_ors
391		* Given a qualification tree with the NOTs pushed down, search for
392		* OR clauses to which the inverse OR distributive law might apply.
393		* Only the top-level AND/OR structure is searched.
394		*
395		* While at it, we remove any NULL constants within the top-level AND/OR
396		* structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
397		* In general that would change the result, so eval_const_expressions can't
398		* do it; but at top level of WHERE, we don't need to distinguish between
399		* FALSE and NULL results, so it's valid to treat NULL::boolean the same
400		* as FALSE and then simplify AND/OR accordingly. Conversely, in a top-level
401		* CHECK constraint, we may treat a NULL the same as TRUE.
402		*
403		* Returns the modified qualification. AND/OR flatness is preserved.
404		*/
405		static Expr *
406		find_duplicate_ors(Expr *qual, bool is_check)
407	0	{
408	0	if (is_orclause(qual))
409	0	{
410	0	List *orlist = NIL;
411	0	ListCell *temp;
412
413		/* Recurse */
414	0	foreach(temp, ((BoolExpr *) qual)->args)
415	0	{
416	0	Expr arg = (Expr ) lfirst(temp);
417
418	0	arg = find_duplicate_ors(arg, is_check);
419
420		/* Get rid of any constant inputs */
421	0	if (arg && IsA(arg, Const))
422	0	{
423	0	Const carg = (Const ) arg;
424
425	0	if (is_check)
426	0	{
427		/* Within OR in CHECK, drop constant FALSE */
428	0	if (!carg->constisnull && !DatumGetBool(carg->constvalue))
429	0	continue;
430		/* Constant TRUE or NULL, so OR reduces to TRUE */
431	0	return (Expr *) makeBoolConst(true, false);
432	0	}
433	0	else
434	0	{
435		/* Within OR in WHERE, drop constant FALSE or NULL */
436	0	if (carg->constisnull \|\| !DatumGetBool(carg->constvalue))
437	0	continue;
438		/* Constant TRUE, so OR reduces to TRUE */
439	0	return arg;
440	0	}
441	0	}
442
443	0	orlist = lappend(orlist, arg);
444	0	}
445
446		/* Flatten any ORs pulled up to just below here */
447	0	orlist = pull_ors(orlist);
448
449		/* Now we can look for duplicate ORs */
450	0	return process_duplicate_ors(orlist);
451	0	}
452	0	else if (is_andclause(qual))
453	0	{
454	0	List *andlist = NIL;
455	0	ListCell *temp;
456
457		/* Recurse */
458	0	foreach(temp, ((BoolExpr *) qual)->args)
459	0	{
460	0	Expr arg = (Expr ) lfirst(temp);
461
462	0	arg = find_duplicate_ors(arg, is_check);
463
464		/* Get rid of any constant inputs */
465	0	if (arg && IsA(arg, Const))
466	0	{
467	0	Const carg = (Const ) arg;
468
469	0	if (is_check)
470	0	{
471		/* Within AND in CHECK, drop constant TRUE or NULL */
472	0	if (carg->constisnull \|\| DatumGetBool(carg->constvalue))
473	0	continue;
474		/* Constant FALSE, so AND reduces to FALSE */
475	0	return arg;
476	0	}
477	0	else
478	0	{
479		/* Within AND in WHERE, drop constant TRUE */
480	0	if (!carg->constisnull && DatumGetBool(carg->constvalue))
481	0	continue;
482		/* Constant FALSE or NULL, so AND reduces to FALSE */
483	0	return (Expr *) makeBoolConst(false, false);
484	0	}
485	0	}
486
487	0	andlist = lappend(andlist, arg);
488	0	}
489
490		/* Flatten any ANDs introduced just below here */
491	0	andlist = pull_ands(andlist);
492
493		/* AND of no inputs reduces to TRUE */
494	0	if (andlist == NIL)
495	0	return (Expr *) makeBoolConst(true, false);
496
497		/* Single-expression AND just reduces to that expression */
498	0	if (list_length(andlist) == 1)
499	0	return (Expr *) linitial(andlist);
500
501		/* Else we still need an AND node */
502	0	return make_andclause(andlist);
503	0	}
504	0	else
505	0	return qual;
506	0	}
507
508		/*
509		* process_duplicate_ors
510		* Given a list of exprs which are ORed together, try to apply
511		* the inverse OR distributive law.
512		*
513		* Returns the resulting expression (could be an AND clause, an OR
514		* clause, or maybe even a single subexpression).
515		*/
516		static Expr *
517		process_duplicate_ors(List *orlist)
518	0	{
519	0	List *reference = NIL;
520	0	int num_subclauses = 0;
521	0	List *winners;
522	0	List *neworlist;
523	0	ListCell *temp;
524
525		/* OR of no inputs reduces to FALSE */
526	0	if (orlist == NIL)
527	0	return (Expr *) makeBoolConst(false, false);
528
529		/* Single-expression OR just reduces to that expression */
530	0	if (list_length(orlist) == 1)
531	0	return (Expr *) linitial(orlist);
532
533		/*
534		* Choose the shortest AND clause as the reference list --- obviously, any
535		* subclause not in this clause isn't in all the clauses. If we find a
536		* clause that's not an AND, we can treat it as a one-element AND clause,
537		* which necessarily wins as shortest.
538		*/
539	0	foreach(temp, orlist)
540	0	{
541	0	Expr clause = (Expr ) lfirst(temp);
542
543	0	if (is_andclause(clause))
544	0	{
545	0	List subclauses = ((BoolExpr ) clause)->args;
546	0	int nclauses = list_length(subclauses);
547
548	0	if (reference == NIL \|\| nclauses < num_subclauses)
549	0	{
550	0	reference = subclauses;
551	0	num_subclauses = nclauses;
552	0	}
553	0	}
554	0	else
555	0	{
556	0	reference = list_make1(clause);
557	0	break;
558	0	}
559	0	}
560
561		/*
562		* Just in case, eliminate any duplicates in the reference list.
563		*/
564	0	reference = list_union(NIL, reference);
565
566		/*
567		* Check each element of the reference list to see if it's in all the OR
568		* clauses. Build a new list of winning clauses.
569		*/
570	0	winners = NIL;
571	0	foreach(temp, reference)
572	0	{
573	0	Expr refclause = (Expr ) lfirst(temp);
574	0	bool win = true;
575	0	ListCell *temp2;
576
577	0	foreach(temp2, orlist)
578	0	{
579	0	Expr clause = (Expr ) lfirst(temp2);
580
581	0	if (is_andclause(clause))
582	0	{
583	0	if (!list_member(((BoolExpr *) clause)->args, refclause))
584	0	{
585	0	win = false;
586	0	break;
587	0	}
588	0	}
589	0	else
590	0	{
591	0	if (!equal(refclause, clause))
592	0	{
593	0	win = false;
594	0	break;
595	0	}
596	0	}
597	0	}
598
599	0	if (win)
600	0	winners = lappend(winners, refclause);
601	0	}
602
603		/*
604		* If no winners, we can't transform the OR
605		*/
606	0	if (winners == NIL)
607	0	return make_orclause(orlist);
608
609		/*
610		* Generate new OR list consisting of the remaining sub-clauses.
611		*
612		* If any clause degenerates to empty, then we have a situation like (A
613		* AND B) OR (A), which can be reduced to just A --- that is, the
614		* additional conditions in other arms of the OR are irrelevant.
615		*
616		* Note that because we use list_difference, any multiple occurrences of a
617		* winning clause in an AND sub-clause will be removed automatically.
618		*/
619	0	neworlist = NIL;
620	0	foreach(temp, orlist)
621	0	{
622	0	Expr clause = (Expr ) lfirst(temp);
623
624	0	if (is_andclause(clause))
625	0	{
626	0	List subclauses = ((BoolExpr ) clause)->args;
627
628	0	subclauses = list_difference(subclauses, winners);
629	0	if (subclauses != NIL)
630	0	{
631	0	if (list_length(subclauses) == 1)
632	0	neworlist = lappend(neworlist, linitial(subclauses));
633	0	else
634	0	neworlist = lappend(neworlist, make_andclause(subclauses));
635	0	}
636	0	else
637	0	{
638	0	neworlist = NIL; /* degenerate case, see above */
639	0	break;
640	0	}
641	0	}
642	0	else
643	0	{
644	0	if (!list_member(winners, clause))
645	0	neworlist = lappend(neworlist, clause);
646	0	else
647	0	{
648	0	neworlist = NIL; /* degenerate case, see above */
649	0	break;
650	0	}
651	0	}
652	0	}
653
654		/*
655		* Append reduced OR to the winners list, if it's not degenerate, handling
656		* the special case of one element correctly (can that really happen?).
657		* Also be careful to maintain AND/OR flatness in case we pulled up a
658		* sub-sub-OR-clause.
659		*/
660	0	if (neworlist != NIL)
661	0	{
662	0	if (list_length(neworlist) == 1)
663	0	winners = lappend(winners, linitial(neworlist));
664	0	else
665	0	winners = lappend(winners, make_orclause(pull_ors(neworlist)));
666	0	}
667
668		/*
669		* And return the constructed AND clause, again being wary of a single
670		* element and AND/OR flatness.
671		*/
672	0	if (list_length(winners) == 1)
673	0	return (Expr *) linitial(winners);
674	0	else
675	0	return make_andclause(pull_ands(winners));
676	0	}