Header And Logo

PostgreSQL
| The world's most advanced open source database.

prepqual.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * prepqual.c
00004  *    Routines for preprocessing qualification expressions
00005  *
00006  *
00007  * The parser regards AND and OR as purely binary operators, so a qual like
00008  *      (A = 1) OR (A = 2) OR (A = 3) ...
00009  * will produce a nested parsetree
00010  *      (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
00011  * In reality, the optimizer and executor regard AND and OR as N-argument
00012  * operators, so this tree can be flattened to
00013  *      (OR (A = 1) (A = 2) (A = 3) ...)
00014  *
00015  * Formerly, this module was responsible for doing the initial flattening,
00016  * but now we leave it to eval_const_expressions to do that since it has to
00017  * make a complete pass over the expression tree anyway.  Instead, we just
00018  * have to ensure that our manipulations preserve AND/OR flatness.
00019  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
00020  * tree after local transformations that might introduce nested AND/ORs.
00021  *
00022  *
00023  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00024  * Portions Copyright (c) 1994, Regents of the University of California
00025  *
00026  *
00027  * IDENTIFICATION
00028  *    src/backend/optimizer/prep/prepqual.c
00029  *
00030  *-------------------------------------------------------------------------
00031  */
00032 
00033 #include "postgres.h"
00034 
00035 #include "nodes/makefuncs.h"
00036 #include "optimizer/clauses.h"
00037 #include "optimizer/prep.h"
00038 #include "utils/lsyscache.h"
00039 
00040 
00041 static List *pull_ands(List *andlist);
00042 static List *pull_ors(List *orlist);
00043 static Expr *find_duplicate_ors(Expr *qual);
00044 static Expr *process_duplicate_ors(List *orlist);
00045 
00046 
00047 /*
00048  * negate_clause
00049  *    Negate a Boolean expression.
00050  *
00051  * Input is a clause to be negated (e.g., the argument of a NOT clause).
00052  * Returns a new clause equivalent to the negation of the given clause.
00053  *
00054  * Although this can be invoked on its own, it's mainly intended as a helper
00055  * for eval_const_expressions(), and that context drives several design
00056  * decisions.  In particular, if the input is already AND/OR flat, we must
00057  * preserve that property.  We also don't bother to recurse in situations
00058  * where we can assume that lower-level executions of eval_const_expressions
00059  * would already have simplified sub-clauses of the input.
00060  *
00061  * The difference between this and a simple make_notclause() is that this
00062  * tries to get rid of the NOT node by logical simplification.  It's clearly
00063  * always a win if the NOT node can be eliminated altogether.  However, our
00064  * use of DeMorgan's laws could result in having more NOT nodes rather than
00065  * fewer.  We do that unconditionally anyway, because in WHERE clauses it's
00066  * important to expose as much top-level AND/OR structure as possible.
00067  * Also, eliminating an intermediate NOT may allow us to flatten two levels
00068  * of AND or OR together that we couldn't have otherwise.  Finally, one of
00069  * the motivations for doing this is to ensure that logically equivalent
00070  * expressions will be seen as physically equal(), so we should always apply
00071  * the same transformations.
00072  */
00073 Node *
00074 negate_clause(Node *node)
00075 {
00076     if (node == NULL)           /* should not happen */
00077         elog(ERROR, "can't negate an empty subexpression");
00078     switch (nodeTag(node))
00079     {
00080         case T_Const:
00081             {
00082                 Const      *c = (Const *) node;
00083 
00084                 /* NOT NULL is still NULL */
00085                 if (c->constisnull)
00086                     return makeBoolConst(false, true);
00087                 /* otherwise pretty easy */
00088                 return makeBoolConst(!DatumGetBool(c->constvalue), false);
00089             }
00090             break;
00091         case T_OpExpr:
00092             {
00093                 /*
00094                  * Negate operator if possible: (NOT (< A B)) => (>= A B)
00095                  */
00096                 OpExpr     *opexpr = (OpExpr *) node;
00097                 Oid         negator = get_negator(opexpr->opno);
00098 
00099                 if (negator)
00100                 {
00101                     OpExpr     *newopexpr = makeNode(OpExpr);
00102 
00103                     newopexpr->opno = negator;
00104                     newopexpr->opfuncid = InvalidOid;
00105                     newopexpr->opresulttype = opexpr->opresulttype;
00106                     newopexpr->opretset = opexpr->opretset;
00107                     newopexpr->opcollid = opexpr->opcollid;
00108                     newopexpr->inputcollid = opexpr->inputcollid;
00109                     newopexpr->args = opexpr->args;
00110                     newopexpr->location = opexpr->location;
00111                     return (Node *) newopexpr;
00112                 }
00113             }
00114             break;
00115         case T_ScalarArrayOpExpr:
00116             {
00117                 /*
00118                  * Negate a ScalarArrayOpExpr if its operator has a negator;
00119                  * for example x = ANY (list) becomes x <> ALL (list)
00120                  */
00121                 ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
00122                 Oid         negator = get_negator(saopexpr->opno);
00123 
00124                 if (negator)
00125                 {
00126                     ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
00127 
00128                     newopexpr->opno = negator;
00129                     newopexpr->opfuncid = InvalidOid;
00130                     newopexpr->useOr = !saopexpr->useOr;
00131                     newopexpr->inputcollid = saopexpr->inputcollid;
00132                     newopexpr->args = saopexpr->args;
00133                     newopexpr->location = saopexpr->location;
00134                     return (Node *) newopexpr;
00135                 }
00136             }
00137             break;
00138         case T_BoolExpr:
00139             {
00140                 BoolExpr   *expr = (BoolExpr *) node;
00141 
00142                 switch (expr->boolop)
00143                 {
00144                         /*--------------------
00145                          * Apply DeMorgan's Laws:
00146                          *      (NOT (AND A B)) => (OR (NOT A) (NOT B))
00147                          *      (NOT (OR A B))  => (AND (NOT A) (NOT B))
00148                          * i.e., swap AND for OR and negate each subclause.
00149                          *
00150                          * If the input is already AND/OR flat and has no NOT
00151                          * directly above AND or OR, this transformation preserves
00152                          * those properties.  For example, if no direct child of
00153                          * the given AND clause is an AND or a NOT-above-OR, then
00154                          * the recursive calls of negate_clause() can't return any
00155                          * OR clauses.  So we needn't call pull_ors() before
00156                          * building a new OR clause.  Similarly for the OR case.
00157                          *--------------------
00158                          */
00159                     case AND_EXPR:
00160                         {
00161                             List       *nargs = NIL;
00162                             ListCell   *lc;
00163 
00164                             foreach(lc, expr->args)
00165                             {
00166                                 nargs = lappend(nargs,
00167                                                 negate_clause(lfirst(lc)));
00168                             }
00169                             return (Node *) make_orclause(nargs);
00170                         }
00171                         break;
00172                     case OR_EXPR:
00173                         {
00174                             List       *nargs = NIL;
00175                             ListCell   *lc;
00176 
00177                             foreach(lc, expr->args)
00178                             {
00179                                 nargs = lappend(nargs,
00180                                                 negate_clause(lfirst(lc)));
00181                             }
00182                             return (Node *) make_andclause(nargs);
00183                         }
00184                         break;
00185                     case NOT_EXPR:
00186 
00187                         /*
00188                          * NOT underneath NOT: they cancel.  We assume the
00189                          * input is already simplified, so no need to recurse.
00190                          */
00191                         return (Node *) linitial(expr->args);
00192                     default:
00193                         elog(ERROR, "unrecognized boolop: %d",
00194                              (int) expr->boolop);
00195                         break;
00196                 }
00197             }
00198             break;
00199         case T_NullTest:
00200             {
00201                 NullTest   *expr = (NullTest *) node;
00202 
00203                 /*
00204                  * In the rowtype case, the two flavors of NullTest are *not*
00205                  * logical inverses, so we can't simplify.  But it does work
00206                  * for scalar datatypes.
00207                  */
00208                 if (!expr->argisrow)
00209                 {
00210                     NullTest   *newexpr = makeNode(NullTest);
00211 
00212                     newexpr->arg = expr->arg;
00213                     newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
00214                                              IS_NOT_NULL : IS_NULL);
00215                     newexpr->argisrow = expr->argisrow;
00216                     return (Node *) newexpr;
00217                 }
00218             }
00219             break;
00220         case T_BooleanTest:
00221             {
00222                 BooleanTest *expr = (BooleanTest *) node;
00223                 BooleanTest *newexpr = makeNode(BooleanTest);
00224 
00225                 newexpr->arg = expr->arg;
00226                 switch (expr->booltesttype)
00227                 {
00228                     case IS_TRUE:
00229                         newexpr->booltesttype = IS_NOT_TRUE;
00230                         break;
00231                     case IS_NOT_TRUE:
00232                         newexpr->booltesttype = IS_TRUE;
00233                         break;
00234                     case IS_FALSE:
00235                         newexpr->booltesttype = IS_NOT_FALSE;
00236                         break;
00237                     case IS_NOT_FALSE:
00238                         newexpr->booltesttype = IS_FALSE;
00239                         break;
00240                     case IS_UNKNOWN:
00241                         newexpr->booltesttype = IS_NOT_UNKNOWN;
00242                         break;
00243                     case IS_NOT_UNKNOWN:
00244                         newexpr->booltesttype = IS_UNKNOWN;
00245                         break;
00246                     default:
00247                         elog(ERROR, "unrecognized booltesttype: %d",
00248                              (int) expr->booltesttype);
00249                         break;
00250                 }
00251                 return (Node *) newexpr;
00252             }
00253             break;
00254         default:
00255             /* else fall through */
00256             break;
00257     }
00258 
00259     /*
00260      * Otherwise we don't know how to simplify this, so just tack on an
00261      * explicit NOT node.
00262      */
00263     return (Node *) make_notclause((Expr *) node);
00264 }
00265 
00266 
00267 /*
00268  * canonicalize_qual
00269  *    Convert a qualification expression to the most useful form.
00270  *
00271  * The name of this routine is a holdover from a time when it would try to
00272  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
00273  * Eventually, we recognized that that had more theoretical purity than
00274  * actual usefulness, and so now the transformation doesn't involve any
00275  * notion of reaching a canonical form.
00276  *
00277  * NOTE: we assume the input has already been through eval_const_expressions
00278  * and therefore possesses AND/OR flatness.  Formerly this function included
00279  * its own flattening logic, but that requires a useless extra pass over the
00280  * tree.
00281  *
00282  * Returns the modified qualification.
00283  */
00284 Expr *
00285 canonicalize_qual(Expr *qual)
00286 {
00287     Expr       *newqual;
00288 
00289     /* Quick exit for empty qual */
00290     if (qual == NULL)
00291         return NULL;
00292 
00293     /*
00294      * Pull up redundant subclauses in OR-of-AND trees.  We do this only
00295      * within the top-level AND/OR structure; there's no point in looking
00296      * deeper.
00297      */
00298     newqual = find_duplicate_ors(qual);
00299 
00300     return newqual;
00301 }
00302 
00303 
00304 /*
00305  * pull_ands
00306  *    Recursively flatten nested AND clauses into a single and-clause list.
00307  *
00308  * Input is the arglist of an AND clause.
00309  * Returns the rebuilt arglist (note original list structure is not touched).
00310  */
00311 static List *
00312 pull_ands(List *andlist)
00313 {
00314     List       *out_list = NIL;
00315     ListCell   *arg;
00316 
00317     foreach(arg, andlist)
00318     {
00319         Node       *subexpr = (Node *) lfirst(arg);
00320 
00321         /*
00322          * Note: we can destructively concat the subexpression's arglist
00323          * because we know the recursive invocation of pull_ands will have
00324          * built a new arglist not shared with any other expr. Otherwise we'd
00325          * need a list_copy here.
00326          */
00327         if (and_clause(subexpr))
00328             out_list = list_concat(out_list,
00329                                    pull_ands(((BoolExpr *) subexpr)->args));
00330         else
00331             out_list = lappend(out_list, subexpr);
00332     }
00333     return out_list;
00334 }
00335 
00336 /*
00337  * pull_ors
00338  *    Recursively flatten nested OR clauses into a single or-clause list.
00339  *
00340  * Input is the arglist of an OR clause.
00341  * Returns the rebuilt arglist (note original list structure is not touched).
00342  */
00343 static List *
00344 pull_ors(List *orlist)
00345 {
00346     List       *out_list = NIL;
00347     ListCell   *arg;
00348 
00349     foreach(arg, orlist)
00350     {
00351         Node       *subexpr = (Node *) lfirst(arg);
00352 
00353         /*
00354          * Note: we can destructively concat the subexpression's arglist
00355          * because we know the recursive invocation of pull_ors will have
00356          * built a new arglist not shared with any other expr. Otherwise we'd
00357          * need a list_copy here.
00358          */
00359         if (or_clause(subexpr))
00360             out_list = list_concat(out_list,
00361                                    pull_ors(((BoolExpr *) subexpr)->args));
00362         else
00363             out_list = lappend(out_list, subexpr);
00364     }
00365     return out_list;
00366 }
00367 
00368 
00369 /*--------------------
00370  * The following code attempts to apply the inverse OR distributive law:
00371  *      ((A AND B) OR (A AND C))  =>  (A AND (B OR C))
00372  * That is, locate OR clauses in which every subclause contains an
00373  * identical term, and pull out the duplicated terms.
00374  *
00375  * This may seem like a fairly useless activity, but it turns out to be
00376  * applicable to many machine-generated queries, and there are also queries
00377  * in some of the TPC benchmarks that need it.  This was in fact almost the
00378  * sole useful side-effect of the old prepqual code that tried to force
00379  * the query into canonical AND-of-ORs form: the canonical equivalent of
00380  *      ((A AND B) OR (A AND C))
00381  * is
00382  *      ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
00383  * which the code was able to simplify to
00384  *      (A AND (A OR C) AND (B OR A) AND (B OR C))
00385  * thus successfully extracting the common condition A --- but at the cost
00386  * of cluttering the qual with many redundant clauses.
00387  *--------------------
00388  */
00389 
00390 /*
00391  * find_duplicate_ors
00392  *    Given a qualification tree with the NOTs pushed down, search for
00393  *    OR clauses to which the inverse OR distributive law might apply.
00394  *    Only the top-level AND/OR structure is searched.
00395  *
00396  * Returns the modified qualification.  AND/OR flatness is preserved.
00397  */
00398 static Expr *
00399 find_duplicate_ors(Expr *qual)
00400 {
00401     if (or_clause((Node *) qual))
00402     {
00403         List       *orlist = NIL;
00404         ListCell   *temp;
00405 
00406         /* Recurse */
00407         foreach(temp, ((BoolExpr *) qual)->args)
00408             orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
00409 
00410         /*
00411          * Don't need pull_ors() since this routine will never introduce an OR
00412          * where there wasn't one before.
00413          */
00414         return process_duplicate_ors(orlist);
00415     }
00416     else if (and_clause((Node *) qual))
00417     {
00418         List       *andlist = NIL;
00419         ListCell   *temp;
00420 
00421         /* Recurse */
00422         foreach(temp, ((BoolExpr *) qual)->args)
00423             andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
00424         /* Flatten any ANDs introduced just below here */
00425         andlist = pull_ands(andlist);
00426         /* The AND list can't get shorter, so result is always an AND */
00427         return make_andclause(andlist);
00428     }
00429     else
00430         return qual;
00431 }
00432 
00433 /*
00434  * process_duplicate_ors
00435  *    Given a list of exprs which are ORed together, try to apply
00436  *    the inverse OR distributive law.
00437  *
00438  * Returns the resulting expression (could be an AND clause, an OR
00439  * clause, or maybe even a single subexpression).
00440  */
00441 static Expr *
00442 process_duplicate_ors(List *orlist)
00443 {
00444     List       *reference = NIL;
00445     int         num_subclauses = 0;
00446     List       *winners;
00447     List       *neworlist;
00448     ListCell   *temp;
00449 
00450     if (orlist == NIL)
00451         return NULL;            /* probably can't happen */
00452     if (list_length(orlist) == 1)       /* single-expression OR (can this
00453                                          * happen?) */
00454         return linitial(orlist);
00455 
00456     /*
00457      * Choose the shortest AND clause as the reference list --- obviously, any
00458      * subclause not in this clause isn't in all the clauses. If we find a
00459      * clause that's not an AND, we can treat it as a one-element AND clause,
00460      * which necessarily wins as shortest.
00461      */
00462     foreach(temp, orlist)
00463     {
00464         Expr       *clause = (Expr *) lfirst(temp);
00465 
00466         if (and_clause((Node *) clause))
00467         {
00468             List       *subclauses = ((BoolExpr *) clause)->args;
00469             int         nclauses = list_length(subclauses);
00470 
00471             if (reference == NIL || nclauses < num_subclauses)
00472             {
00473                 reference = subclauses;
00474                 num_subclauses = nclauses;
00475             }
00476         }
00477         else
00478         {
00479             reference = list_make1(clause);
00480             break;
00481         }
00482     }
00483 
00484     /*
00485      * Just in case, eliminate any duplicates in the reference list.
00486      */
00487     reference = list_union(NIL, reference);
00488 
00489     /*
00490      * Check each element of the reference list to see if it's in all the OR
00491      * clauses.  Build a new list of winning clauses.
00492      */
00493     winners = NIL;
00494     foreach(temp, reference)
00495     {
00496         Expr       *refclause = (Expr *) lfirst(temp);
00497         bool        win = true;
00498         ListCell   *temp2;
00499 
00500         foreach(temp2, orlist)
00501         {
00502             Expr       *clause = (Expr *) lfirst(temp2);
00503 
00504             if (and_clause((Node *) clause))
00505             {
00506                 if (!list_member(((BoolExpr *) clause)->args, refclause))
00507                 {
00508                     win = false;
00509                     break;
00510                 }
00511             }
00512             else
00513             {
00514                 if (!equal(refclause, clause))
00515                 {
00516                     win = false;
00517                     break;
00518                 }
00519             }
00520         }
00521 
00522         if (win)
00523             winners = lappend(winners, refclause);
00524     }
00525 
00526     /*
00527      * If no winners, we can't transform the OR
00528      */
00529     if (winners == NIL)
00530         return make_orclause(orlist);
00531 
00532     /*
00533      * Generate new OR list consisting of the remaining sub-clauses.
00534      *
00535      * If any clause degenerates to empty, then we have a situation like (A
00536      * AND B) OR (A), which can be reduced to just A --- that is, the
00537      * additional conditions in other arms of the OR are irrelevant.
00538      *
00539      * Note that because we use list_difference, any multiple occurrences of a
00540      * winning clause in an AND sub-clause will be removed automatically.
00541      */
00542     neworlist = NIL;
00543     foreach(temp, orlist)
00544     {
00545         Expr       *clause = (Expr *) lfirst(temp);
00546 
00547         if (and_clause((Node *) clause))
00548         {
00549             List       *subclauses = ((BoolExpr *) clause)->args;
00550 
00551             subclauses = list_difference(subclauses, winners);
00552             if (subclauses != NIL)
00553             {
00554                 if (list_length(subclauses) == 1)
00555                     neworlist = lappend(neworlist, linitial(subclauses));
00556                 else
00557                     neworlist = lappend(neworlist, make_andclause(subclauses));
00558             }
00559             else
00560             {
00561                 neworlist = NIL;    /* degenerate case, see above */
00562                 break;
00563             }
00564         }
00565         else
00566         {
00567             if (!list_member(winners, clause))
00568                 neworlist = lappend(neworlist, clause);
00569             else
00570             {
00571                 neworlist = NIL;    /* degenerate case, see above */
00572                 break;
00573             }
00574         }
00575     }
00576 
00577     /*
00578      * Append reduced OR to the winners list, if it's not degenerate, handling
00579      * the special case of one element correctly (can that really happen?).
00580      * Also be careful to maintain AND/OR flatness in case we pulled up a
00581      * sub-sub-OR-clause.
00582      */
00583     if (neworlist != NIL)
00584     {
00585         if (list_length(neworlist) == 1)
00586             winners = lappend(winners, linitial(neworlist));
00587         else
00588             winners = lappend(winners, make_orclause(pull_ors(neworlist)));
00589     }
00590 
00591     /*
00592      * And return the constructed AND clause, again being wary of a single
00593      * element and AND/OR flatness.
00594      */
00595     if (list_length(winners) == 1)
00596         return (Expr *) linitial(winners);
00597     else
00598         return make_andclause(pull_ands(winners));
00599 }