PostgreSQL Source Code: src/backend/optimizer/path/equivclass.c Source File

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * equivclass.c
00004  *    Routines for managing EquivalenceClasses
00005  *
00006  * See src/backend/optimizer/README for discussion of EquivalenceClasses.
00007  *
00008  *
00009  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00010  * Portions Copyright (c) 1994, Regents of the University of California
00011  *
00012  * IDENTIFICATION
00013  *    src/backend/optimizer/path/equivclass.c
00014  *
00015  *-------------------------------------------------------------------------
00016  */
00017 #include "postgres.h"
00018 
00019 #include "access/skey.h"
00020 #include "catalog/pg_type.h"
00021 #include "nodes/makefuncs.h"
00022 #include "nodes/nodeFuncs.h"
00023 #include "optimizer/clauses.h"
00024 #include "optimizer/pathnode.h"
00025 #include "optimizer/paths.h"
00026 #include "optimizer/planmain.h"
00027 #include "optimizer/prep.h"
00028 #include "optimizer/var.h"
00029 #include "utils/lsyscache.h"
00030 
00031 
00032 static EquivalenceMember *add_eq_member(EquivalenceClass *ec,
00033               Expr *expr, Relids relids, Relids nullable_relids,
00034               bool is_child, Oid datatype);
00035 static void generate_base_implied_equalities_const(PlannerInfo *root,
00036                                        EquivalenceClass *ec);
00037 static void generate_base_implied_equalities_no_const(PlannerInfo *root,
00038                                           EquivalenceClass *ec);
00039 static void generate_base_implied_equalities_broken(PlannerInfo *root,
00040                                         EquivalenceClass *ec);
00041 static List *generate_join_implied_equalities_normal(PlannerInfo *root,
00042                                         EquivalenceClass *ec,
00043                                         Relids join_relids,
00044                                         Relids outer_relids,
00045                                         Relids inner_relids);
00046 static List *generate_join_implied_equalities_broken(PlannerInfo *root,
00047                                         EquivalenceClass *ec,
00048                                         Relids nominal_join_relids,
00049                                         Relids outer_relids,
00050                                         Relids nominal_inner_relids,
00051                                         AppendRelInfo *inner_appinfo);
00052 static Oid select_equality_operator(EquivalenceClass *ec,
00053                          Oid lefttype, Oid righttype);
00054 static RestrictInfo *create_join_clause(PlannerInfo *root,
00055                    EquivalenceClass *ec, Oid opno,
00056                    EquivalenceMember *leftem,
00057                    EquivalenceMember *rightem,
00058                    EquivalenceClass *parent_ec);
00059 static bool reconsider_outer_join_clause(PlannerInfo *root,
00060                              RestrictInfo *rinfo,
00061                              bool outer_on_left);
00062 static bool reconsider_full_join_clause(PlannerInfo *root,
00063                             RestrictInfo *rinfo);
00064 
00065 
00066 /*
00067  * process_equivalence
00068  *    The given clause has a mergejoinable operator and can be applied without
00069  *    any delay by an outer join, so its two sides can be considered equal
00070  *    anywhere they are both computable; moreover that equality can be
00071  *    extended transitively.  Record this knowledge in the EquivalenceClass
00072  *    data structure.  Returns TRUE if successful, FALSE if not (in which
00073  *    case caller should treat the clause as ordinary, not an equivalence).
00074  *
00075  * If below_outer_join is true, then the clause was found below the nullable
00076  * side of an outer join, so its sides might validly be both NULL rather than
00077  * strictly equal.  We can still deduce equalities in such cases, but we take
00078  * care to mark an EquivalenceClass if it came from any such clauses.  Also,
00079  * we have to check that both sides are either pseudo-constants or strict
00080  * functions of Vars, else they might not both go to NULL above the outer
00081  * join.  (This is the reason why we need a failure return.  It's more
00082  * convenient to check this case here than at the call sites...)
00083  *
00084  * On success return, we have also initialized the clause's left_ec/right_ec
00085  * fields to point to the EquivalenceClass representing it.  This saves lookup
00086  * effort later.
00087  *
00088  * Note: constructing merged EquivalenceClasses is a standard UNION-FIND
00089  * problem, for which there exist better data structures than simple lists.
00090  * If this code ever proves to be a bottleneck then it could be sped up ---
00091  * but for now, simple is beautiful.
00092  *
00093  * Note: this is only called during planner startup, not during GEQO
00094  * exploration, so we need not worry about whether we're in the right
00095  * memory context.
00096  */
00097 bool
00098 process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
00099                     bool below_outer_join)
00100 {
00101     Expr       *clause = restrictinfo->clause;
00102     Oid         opno,
00103                 collation,
00104                 item1_type,
00105                 item2_type;
00106     Expr       *item1;
00107     Expr       *item2;
00108     Relids      item1_relids,
00109                 item2_relids,
00110                 item1_nullable_relids,
00111                 item2_nullable_relids;
00112     List       *opfamilies;
00113     EquivalenceClass *ec1,
00114                *ec2;
00115     EquivalenceMember *em1,
00116                *em2;
00117     ListCell   *lc1;
00118 
00119     /* Should not already be marked as having generated an eclass */
00120     Assert(restrictinfo->left_ec == NULL);
00121     Assert(restrictinfo->right_ec == NULL);
00122 
00123     /* Extract info from given clause */
00124     Assert(is_opclause(clause));
00125     opno = ((OpExpr *) clause)->opno;
00126     collation = ((OpExpr *) clause)->inputcollid;
00127     item1 = (Expr *) get_leftop(clause);
00128     item2 = (Expr *) get_rightop(clause);
00129     item1_relids = restrictinfo->left_relids;
00130     item2_relids = restrictinfo->right_relids;
00131 
00132     /*
00133      * Ensure both input expressions expose the desired collation (their types
00134      * should be OK already); see comments for canonicalize_ec_expression.
00135      */
00136     item1 = canonicalize_ec_expression(item1,
00137                                        exprType((Node *) item1),
00138                                        collation);
00139     item2 = canonicalize_ec_expression(item2,
00140                                        exprType((Node *) item2),
00141                                        collation);
00142 
00143     /*
00144      * Reject clauses of the form X=X.  These are not as redundant as they
00145      * might seem at first glance: assuming the operator is strict, this is
00146      * really an expensive way to write X IS NOT NULL.  So we must not risk
00147      * just losing the clause, which would be possible if there is already a
00148      * single-element EquivalenceClass containing X.  The case is not common
00149      * enough to be worth contorting the EC machinery for, so just reject the
00150      * clause and let it be processed as a normal restriction clause.
00151      */
00152     if (equal(item1, item2))
00153         return false;           /* X=X is not a useful equivalence */
00154 
00155     /*
00156      * If below outer join, check for strictness, else reject.
00157      */
00158     if (below_outer_join)
00159     {
00160         if (!bms_is_empty(item1_relids) &&
00161             contain_nonstrict_functions((Node *) item1))
00162             return false;       /* LHS is non-strict but not constant */
00163         if (!bms_is_empty(item2_relids) &&
00164             contain_nonstrict_functions((Node *) item2))
00165             return false;       /* RHS is non-strict but not constant */
00166     }
00167 
00168     /* Calculate nullable-relid sets for each side of the clause */
00169     item1_nullable_relids = bms_intersect(item1_relids,
00170                                           restrictinfo->nullable_relids);
00171     item2_nullable_relids = bms_intersect(item2_relids,
00172                                           restrictinfo->nullable_relids);
00173 
00174     /*
00175      * We use the declared input types of the operator, not exprType() of the
00176      * inputs, as the nominal datatypes for opfamily lookup.  This presumes
00177      * that btree operators are always registered with amoplefttype and
00178      * amoprighttype equal to their declared input types.  We will need this
00179      * info anyway to build EquivalenceMember nodes, and by extracting it now
00180      * we can use type comparisons to short-circuit some equal() tests.
00181      */
00182     op_input_types(opno, &item1_type, &item2_type);
00183 
00184     opfamilies = restrictinfo->mergeopfamilies;
00185 
00186     /*
00187      * Sweep through the existing EquivalenceClasses looking for matches to
00188      * item1 and item2.  These are the possible outcomes:
00189      *
00190      * 1. We find both in the same EC.  The equivalence is already known, so
00191      * there's nothing to do.
00192      *
00193      * 2. We find both in different ECs.  Merge the two ECs together.
00194      *
00195      * 3. We find just one.  Add the other to its EC.
00196      *
00197      * 4. We find neither.  Make a new, two-entry EC.
00198      *
00199      * Note: since all ECs are built through this process or the similar
00200      * search in get_eclass_for_sort_expr(), it's impossible that we'd match
00201      * an item in more than one existing nonvolatile EC.  So it's okay to stop
00202      * at the first match.
00203      */
00204     ec1 = ec2 = NULL;
00205     em1 = em2 = NULL;
00206     foreach(lc1, root->eq_classes)
00207     {
00208         EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
00209         ListCell   *lc2;
00210 
00211         /* Never match to a volatile EC */
00212         if (cur_ec->ec_has_volatile)
00213             continue;
00214 
00215         /*
00216          * The collation has to match; check this first since it's cheaper
00217          * than the opfamily comparison.
00218          */
00219         if (collation != cur_ec->ec_collation)
00220             continue;
00221 
00222         /*
00223          * A "match" requires matching sets of btree opfamilies.  Use of
00224          * equal() for this test has implications discussed in the comments
00225          * for get_mergejoin_opfamilies().
00226          */
00227         if (!equal(opfamilies, cur_ec->ec_opfamilies))
00228             continue;
00229 
00230         foreach(lc2, cur_ec->ec_members)
00231         {
00232             EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
00233 
00234             Assert(!cur_em->em_is_child);       /* no children yet */
00235 
00236             /*
00237              * If below an outer join, don't match constants: they're not as
00238              * constant as they look.
00239              */
00240             if ((below_outer_join || cur_ec->ec_below_outer_join) &&
00241                 cur_em->em_is_const)
00242                 continue;
00243 
00244             if (!ec1 &&
00245                 item1_type == cur_em->em_datatype &&
00246                 equal(item1, cur_em->em_expr))
00247             {
00248                 ec1 = cur_ec;
00249                 em1 = cur_em;
00250                 if (ec2)
00251                     break;
00252             }
00253 
00254             if (!ec2 &&
00255                 item2_type == cur_em->em_datatype &&
00256                 equal(item2, cur_em->em_expr))
00257             {
00258                 ec2 = cur_ec;
00259                 em2 = cur_em;
00260                 if (ec1)
00261                     break;
00262             }
00263         }
00264 
00265         if (ec1 && ec2)
00266             break;
00267     }
00268 
00269     /* Sweep finished, what did we find? */
00270 
00271     if (ec1 && ec2)
00272     {
00273         /* If case 1, nothing to do, except add to sources */
00274         if (ec1 == ec2)
00275         {
00276             ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
00277             ec1->ec_below_outer_join |= below_outer_join;
00278             /* mark the RI as associated with this eclass */
00279             restrictinfo->left_ec = ec1;
00280             restrictinfo->right_ec = ec1;
00281             /* mark the RI as usable with this pair of EMs */
00282             restrictinfo->left_em = em1;
00283             restrictinfo->right_em = em2;
00284             return true;
00285         }
00286 
00287         /*
00288          * Case 2: need to merge ec1 and ec2.  This should never happen after
00289          * we've built any canonical pathkeys; if it did, those pathkeys might
00290          * be rendered non-canonical by the merge.
00291          */
00292         if (root->canon_pathkeys != NIL)
00293             elog(ERROR, "too late to merge equivalence classes");
00294 
00295         /*
00296          * We add ec2's items to ec1, then set ec2's ec_merged link to point
00297          * to ec1 and remove ec2 from the eq_classes list.  We cannot simply
00298          * delete ec2 because that could leave dangling pointers in existing
00299          * PathKeys.  We leave it behind with a link so that the merged EC can
00300          * be found.
00301          */
00302         ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members);
00303         ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources);
00304         ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives);
00305         ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids);
00306         ec1->ec_has_const |= ec2->ec_has_const;
00307         /* can't need to set has_volatile */
00308         ec1->ec_below_outer_join |= ec2->ec_below_outer_join;
00309         ec2->ec_merged = ec1;
00310         root->eq_classes = list_delete_ptr(root->eq_classes, ec2);
00311         /* just to avoid debugging confusion w/ dangling pointers: */
00312         ec2->ec_members = NIL;
00313         ec2->ec_sources = NIL;
00314         ec2->ec_derives = NIL;
00315         ec2->ec_relids = NULL;
00316         ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
00317         ec1->ec_below_outer_join |= below_outer_join;
00318         /* mark the RI as associated with this eclass */
00319         restrictinfo->left_ec = ec1;
00320         restrictinfo->right_ec = ec1;
00321         /* mark the RI as usable with this pair of EMs */
00322         restrictinfo->left_em = em1;
00323         restrictinfo->right_em = em2;
00324     }
00325     else if (ec1)
00326     {
00327         /* Case 3: add item2 to ec1 */
00328         em2 = add_eq_member(ec1, item2, item2_relids, item2_nullable_relids,
00329                             false, item2_type);
00330         ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
00331         ec1->ec_below_outer_join |= below_outer_join;
00332         /* mark the RI as associated with this eclass */
00333         restrictinfo->left_ec = ec1;
00334         restrictinfo->right_ec = ec1;
00335         /* mark the RI as usable with this pair of EMs */
00336         restrictinfo->left_em = em1;
00337         restrictinfo->right_em = em2;
00338     }
00339     else if (ec2)
00340     {
00341         /* Case 3: add item1 to ec2 */
00342         em1 = add_eq_member(ec2, item1, item1_relids, item1_nullable_relids,
00343                             false, item1_type);
00344         ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo);
00345         ec2->ec_below_outer_join |= below_outer_join;
00346         /* mark the RI as associated with this eclass */
00347         restrictinfo->left_ec = ec2;
00348         restrictinfo->right_ec = ec2;
00349         /* mark the RI as usable with this pair of EMs */
00350         restrictinfo->left_em = em1;
00351         restrictinfo->right_em = em2;
00352     }
00353     else
00354     {
00355         /* Case 4: make a new, two-entry EC */
00356         EquivalenceClass *ec = makeNode(EquivalenceClass);
00357 
00358         ec->ec_opfamilies = opfamilies;
00359         ec->ec_collation = collation;
00360         ec->ec_members = NIL;
00361         ec->ec_sources = list_make1(restrictinfo);
00362         ec->ec_derives = NIL;
00363         ec->ec_relids = NULL;
00364         ec->ec_has_const = false;
00365         ec->ec_has_volatile = false;
00366         ec->ec_below_outer_join = below_outer_join;
00367         ec->ec_broken = false;
00368         ec->ec_sortref = 0;
00369         ec->ec_merged = NULL;
00370         em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids,
00371                             false, item1_type);
00372         em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids,
00373                             false, item2_type);
00374 
00375         root->eq_classes = lappend(root->eq_classes, ec);
00376 
00377         /* mark the RI as associated with this eclass */
00378         restrictinfo->left_ec = ec;
00379         restrictinfo->right_ec = ec;
00380         /* mark the RI as usable with this pair of EMs */
00381         restrictinfo->left_em = em1;
00382         restrictinfo->right_em = em2;
00383     }
00384 
00385     return true;
00386 }
00387 
00388 /*
00389  * canonicalize_ec_expression
00390  *
00391  * This function ensures that the expression exposes the expected type and
00392  * collation, so that it will be equal() to other equivalence-class expressions
00393  * that it ought to be equal() to.
00394  *
00395  * The rule for datatypes is that the exposed type should match what it would
00396  * be for an input to an operator of the EC's opfamilies; which is usually
00397  * the declared input type of the operator, but in the case of polymorphic
00398  * operators no relabeling is wanted (compare the behavior of parse_coerce.c).
00399  * Expressions coming in from quals will generally have the right type
00400  * already, but expressions coming from indexkeys may not (because they are
00401  * represented without any explicit relabel in pg_index), and the same problem
00402  * occurs for sort expressions (because the parser is likewise cavalier about
00403  * putting relabels on them).  Such cases will be binary-compatible with the
00404  * real operators, so adding a RelabelType is sufficient.
00405  *
00406  * Also, the expression's exposed collation must match the EC's collation.
00407  * This is important because in comparisons like "foo < bar COLLATE baz",
00408  * only one of the expressions has the correct exposed collation as we receive
00409  * it from the parser.  Forcing both of them to have it ensures that all
00410  * variant spellings of such a construct behave the same.  Again, we can
00411  * stick on a RelabelType to force the right exposed collation.  (It might
00412  * work to not label the collation at all in EC members, but this is risky
00413  * since some parts of the system expect exprCollation() to deliver the
00414  * right answer for a sort key.)
00415  *
00416  * Note this code assumes that the expression has already been through
00417  * eval_const_expressions, so there are no CollateExprs and no redundant
00418  * RelabelTypes.
00419  */
00420 Expr *
00421 canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation)
00422 {
00423     Oid         expr_type = exprType((Node *) expr);
00424 
00425     /*
00426      * For a polymorphic-input-type opclass, just keep the same exposed type.
00427      */
00428     if (IsPolymorphicType(req_type))
00429         req_type = expr_type;
00430 
00431     /*
00432      * No work if the expression exposes the right type/collation already.
00433      */
00434     if (expr_type != req_type ||
00435         exprCollation((Node *) expr) != req_collation)
00436     {
00437         /*
00438          * Strip any existing RelabelType, then add a new one if needed. This
00439          * is to preserve the invariant of no redundant RelabelTypes.
00440          *
00441          * If we have to change the exposed type of the stripped expression,
00442          * set typmod to -1 (since the new type may not have the same typmod
00443          * interpretation).  If we only have to change collation, preserve the
00444          * exposed typmod.
00445          */
00446         while (expr && IsA(expr, RelabelType))
00447             expr = (Expr *) ((RelabelType *) expr)->arg;
00448 
00449         if (exprType((Node *) expr) != req_type)
00450             expr = (Expr *) makeRelabelType(expr,
00451                                             req_type,
00452                                             -1,
00453                                             req_collation,
00454                                             COERCE_IMPLICIT_CAST);
00455         else if (exprCollation((Node *) expr) != req_collation)
00456             expr = (Expr *) makeRelabelType(expr,
00457                                             req_type,
00458                                             exprTypmod((Node *) expr),
00459                                             req_collation,
00460                                             COERCE_IMPLICIT_CAST);
00461     }
00462 
00463     return expr;
00464 }
00465 
00466 /*
00467  * add_eq_member - build a new EquivalenceMember and add it to an EC
00468  */
00469 static EquivalenceMember *
00470 add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
00471               Relids nullable_relids, bool is_child, Oid datatype)
00472 {
00473     EquivalenceMember *em = makeNode(EquivalenceMember);
00474 
00475     em->em_expr = expr;
00476     em->em_relids = relids;
00477     em->em_nullable_relids = nullable_relids;
00478     em->em_is_const = false;
00479     em->em_is_child = is_child;
00480     em->em_datatype = datatype;
00481 
00482     if (bms_is_empty(relids))
00483     {
00484         /*
00485          * No Vars, assume it's a pseudoconstant.  This is correct for entries
00486          * generated from process_equivalence(), because a WHERE clause can't
00487          * contain aggregates or SRFs, and non-volatility was checked before
00488          * process_equivalence() ever got called.  But
00489          * get_eclass_for_sort_expr() has to work harder.  We put the tests
00490          * there not here to save cycles in the equivalence case.
00491          */
00492         Assert(!is_child);
00493         em->em_is_const = true;
00494         ec->ec_has_const = true;
00495         /* it can't affect ec_relids */
00496     }
00497     else if (!is_child)         /* child members don't add to ec_relids */
00498     {
00499         ec->ec_relids = bms_add_members(ec->ec_relids, relids);
00500     }
00501     ec->ec_members = lappend(ec->ec_members, em);
00502 
00503     return em;
00504 }
00505 
00506 
00507 /*
00508  * get_eclass_for_sort_expr
00509  *    Given an expression and opfamily/collation info, find an existing
00510  *    equivalence class it is a member of; if none, optionally build a new
00511  *    single-member EquivalenceClass for it.
00512  *
00513  * sortref is the SortGroupRef of the originating SortGroupClause, if any,
00514  * or zero if not.  (It should never be zero if the expression is volatile!)
00515  *
00516  * If rel is not NULL, it identifies a specific relation we're considering
00517  * a path for, and indicates that child EC members for that relation can be
00518  * considered.  Otherwise child members are ignored.  (Note: since child EC
00519  * members aren't guaranteed unique, a non-NULL value means that there could
00520  * be more than one EC that matches the expression; if so it's order-dependent
00521  * which one you get.  This is annoying but it only happens in corner cases,
00522  * so for now we live with just reporting the first match.  See also
00523  * generate_implied_equalities_for_column and match_pathkeys_to_index.)
00524  *
00525  * If create_it is TRUE, we'll build a new EquivalenceClass when there is no
00526  * match.  If create_it is FALSE, we just return NULL when no match.
00527  *
00528  * This can be used safely both before and after EquivalenceClass merging;
00529  * since it never causes merging it does not invalidate any existing ECs
00530  * or PathKeys.  However, ECs added after path generation has begun are
00531  * of limited usefulness, so usually it's best to create them beforehand.
00532  *
00533  * Note: opfamilies must be chosen consistently with the way
00534  * process_equivalence() would do; that is, generated from a mergejoinable
00535  * equality operator.  Else we might fail to detect valid equivalences,
00536  * generating poor (but not incorrect) plans.
00537  */
00538 EquivalenceClass *
00539 get_eclass_for_sort_expr(PlannerInfo *root,
00540                          Expr *expr,
00541                          List *opfamilies,
00542                          Oid opcintype,
00543                          Oid collation,
00544                          Index sortref,
00545                          Relids rel,
00546                          bool create_it)
00547 {
00548     EquivalenceClass *newec;
00549     EquivalenceMember *newem;
00550     ListCell   *lc1;
00551     MemoryContext oldcontext;
00552 
00553     /*
00554      * Ensure the expression exposes the correct type and collation.
00555      */
00556     expr = canonicalize_ec_expression(expr, opcintype, collation);
00557 
00558     /*
00559      * Scan through the existing EquivalenceClasses for a match
00560      */
00561     foreach(lc1, root->eq_classes)
00562     {
00563         EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
00564         ListCell   *lc2;
00565 
00566         /*
00567          * Never match to a volatile EC, except when we are looking at another
00568          * reference to the same volatile SortGroupClause.
00569          */
00570         if (cur_ec->ec_has_volatile &&
00571             (sortref == 0 || sortref != cur_ec->ec_sortref))
00572             continue;
00573 
00574         if (collation != cur_ec->ec_collation)
00575             continue;
00576         if (!equal(opfamilies, cur_ec->ec_opfamilies))
00577             continue;
00578 
00579         foreach(lc2, cur_ec->ec_members)
00580         {
00581             EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
00582 
00583             /*
00584              * Ignore child members unless they match the request.
00585              */
00586             if (cur_em->em_is_child &&
00587                 !bms_equal(cur_em->em_relids, rel))
00588                 continue;
00589 
00590             /*
00591              * If below an outer join, don't match constants: they're not as
00592              * constant as they look.
00593              */
00594             if (cur_ec->ec_below_outer_join &&
00595                 cur_em->em_is_const)
00596                 continue;
00597 
00598             if (opcintype == cur_em->em_datatype &&
00599                 equal(expr, cur_em->em_expr))
00600                 return cur_ec;  /* Match! */
00601         }
00602     }
00603 
00604     /* No match; does caller want a NULL result? */
00605     if (!create_it)
00606         return NULL;
00607 
00608     /*
00609      * OK, build a new single-member EC
00610      *
00611      * Here, we must be sure that we construct the EC in the right context.
00612      */
00613     oldcontext = MemoryContextSwitchTo(root->planner_cxt);
00614 
00615     newec = makeNode(EquivalenceClass);
00616     newec->ec_opfamilies = list_copy(opfamilies);
00617     newec->ec_collation = collation;
00618     newec->ec_members = NIL;
00619     newec->ec_sources = NIL;
00620     newec->ec_derives = NIL;
00621     newec->ec_relids = NULL;
00622     newec->ec_has_const = false;
00623     newec->ec_has_volatile = contain_volatile_functions((Node *) expr);
00624     newec->ec_below_outer_join = false;
00625     newec->ec_broken = false;
00626     newec->ec_sortref = sortref;
00627     newec->ec_merged = NULL;
00628 
00629     if (newec->ec_has_volatile && sortref == 0) /* should not happen */
00630         elog(ERROR, "volatile EquivalenceClass has no sortref");
00631 
00632     newem = add_eq_member(newec, copyObject(expr), pull_varnos((Node *) expr),
00633                           NULL, false, opcintype);
00634 
00635     /*
00636      * add_eq_member doesn't check for volatile functions, set-returning
00637      * functions, aggregates, or window functions, but such could appear in
00638      * sort expressions; so we have to check whether its const-marking was
00639      * correct.
00640      */
00641     if (newec->ec_has_const)
00642     {
00643         if (newec->ec_has_volatile ||
00644             expression_returns_set((Node *) expr) ||
00645             contain_agg_clause((Node *) expr) ||
00646             contain_window_function((Node *) expr))
00647         {
00648             newec->ec_has_const = false;
00649             newem->em_is_const = false;
00650         }
00651     }
00652 
00653     root->eq_classes = lappend(root->eq_classes, newec);
00654 
00655     MemoryContextSwitchTo(oldcontext);
00656 
00657     return newec;
00658 }
00659 
00660 
00661 /*
00662  * generate_base_implied_equalities
00663  *    Generate any restriction clauses that we can deduce from equivalence
00664  *    classes.
00665  *
00666  * When an EC contains pseudoconstants, our strategy is to generate
00667  * "member = const1" clauses where const1 is the first constant member, for
00668  * every other member (including other constants).  If we are able to do this
00669  * then we don't need any "var = var" comparisons because we've successfully
00670  * constrained all the vars at their points of creation.  If we fail to
00671  * generate any of these clauses due to lack of cross-type operators, we fall
00672  * back to the "ec_broken" strategy described below.  (XXX if there are
00673  * multiple constants of different types, it's possible that we might succeed
00674  * in forming all the required clauses if we started from a different const
00675  * member; but this seems a sufficiently hokey corner case to not be worth
00676  * spending lots of cycles on.)
00677  *
00678  * For ECs that contain no pseudoconstants, we generate derived clauses
00679  * "member1 = member2" for each pair of members belonging to the same base
00680  * relation (actually, if there are more than two for the same base relation,
00681  * we only need enough clauses to link each to each other).  This provides
00682  * the base case for the recursion: each row emitted by a base relation scan
00683  * will constrain all computable members of the EC to be equal.  As each
00684  * join path is formed, we'll add additional derived clauses on-the-fly
00685  * to maintain this invariant (see generate_join_implied_equalities).
00686  *
00687  * If the opfamilies used by the EC do not provide complete sets of cross-type
00688  * equality operators, it is possible that we will fail to generate a clause
00689  * that must be generated to maintain the invariant.  (An example: given
00690  * "WHERE a.x = b.y AND b.y = a.z", the scheme breaks down if we cannot
00691  * generate "a.x = a.z" as a restriction clause for A.)  In this case we mark
00692  * the EC "ec_broken" and fall back to regurgitating its original source
00693  * RestrictInfos at appropriate times.  We do not try to retract any derived
00694  * clauses already generated from the broken EC, so the resulting plan could
00695  * be poor due to bad selectivity estimates caused by redundant clauses.  But
00696  * the correct solution to that is to fix the opfamilies ...
00697  *
00698  * Equality clauses derived by this function are passed off to
00699  * process_implied_equality (in plan/initsplan.c) to be inserted into the
00700  * restrictinfo datastructures.  Note that this must be called after initial
00701  * scanning of the quals and before Path construction begins.
00702  *
00703  * We make no attempt to avoid generating duplicate RestrictInfos here: we
00704  * don't search ec_sources for matches, nor put the created RestrictInfos
00705  * into ec_derives.  Doing so would require some slightly ugly changes in
00706  * initsplan.c's API, and there's no real advantage, because the clauses
00707  * generated here can't duplicate anything we will generate for joins anyway.
00708  */
00709 void
00710 generate_base_implied_equalities(PlannerInfo *root)
00711 {
00712     ListCell   *lc;
00713     Index       rti;
00714 
00715     foreach(lc, root->eq_classes)
00716     {
00717         EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc);
00718 
00719         Assert(ec->ec_merged == NULL);  /* else shouldn't be in list */
00720         Assert(!ec->ec_broken); /* not yet anyway... */
00721 
00722         /* Single-member ECs won't generate any deductions */
00723         if (list_length(ec->ec_members) <= 1)
00724             continue;
00725 
00726         if (ec->ec_has_const)
00727             generate_base_implied_equalities_const(root, ec);
00728         else
00729             generate_base_implied_equalities_no_const(root, ec);
00730 
00731         /* Recover if we failed to generate required derived clauses */
00732         if (ec->ec_broken)
00733             generate_base_implied_equalities_broken(root, ec);
00734     }
00735 
00736     /*
00737      * This is also a handy place to mark base rels (which should all exist by
00738      * now) with flags showing whether they have pending eclass joins.
00739      */
00740     for (rti = 1; rti < root->simple_rel_array_size; rti++)
00741     {
00742         RelOptInfo *brel = root->simple_rel_array[rti];
00743 
00744         if (brel == NULL)
00745             continue;
00746 
00747         brel->has_eclass_joins = has_relevant_eclass_joinclause(root, brel);
00748     }
00749 }
00750 
00751 /*
00752  * generate_base_implied_equalities when EC contains pseudoconstant(s)
00753  */
00754 static void
00755 generate_base_implied_equalities_const(PlannerInfo *root,
00756                                        EquivalenceClass *ec)
00757 {
00758     EquivalenceMember *const_em = NULL;
00759     ListCell   *lc;
00760 
00761     /*
00762      * In the trivial case where we just had one "var = const" clause, push
00763      * the original clause back into the main planner machinery.  There is
00764      * nothing to be gained by doing it differently, and we save the effort to
00765      * re-build and re-analyze an equality clause that will be exactly
00766      * equivalent to the old one.
00767      */
00768     if (list_length(ec->ec_members) == 2 &&
00769         list_length(ec->ec_sources) == 1)
00770     {
00771         RestrictInfo *restrictinfo = (RestrictInfo *) linitial(ec->ec_sources);
00772 
00773         if (bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE)
00774         {
00775             distribute_restrictinfo_to_rels(root, restrictinfo);
00776             return;
00777         }
00778     }
00779 
00780     /*
00781      * Find the constant member to use.  We prefer an actual constant to
00782      * pseudo-constants (such as Params), because the constraint exclusion
00783      * machinery might be able to exclude relations on the basis of generated
00784      * "var = const" equalities, but "var = param" won't work for that.
00785      */
00786     foreach(lc, ec->ec_members)
00787     {
00788         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
00789 
00790         if (cur_em->em_is_const)
00791         {
00792             const_em = cur_em;
00793             if (IsA(cur_em->em_expr, Const))
00794                 break;
00795         }
00796     }
00797     Assert(const_em != NULL);
00798 
00799     /* Generate a derived equality against each other member */
00800     foreach(lc, ec->ec_members)
00801     {
00802         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
00803         Oid         eq_op;
00804 
00805         Assert(!cur_em->em_is_child);   /* no children yet */
00806         if (cur_em == const_em)
00807             continue;
00808         eq_op = select_equality_operator(ec,
00809                                          cur_em->em_datatype,
00810                                          const_em->em_datatype);
00811         if (!OidIsValid(eq_op))
00812         {
00813             /* failed... */
00814             ec->ec_broken = true;
00815             break;
00816         }
00817         process_implied_equality(root, eq_op, ec->ec_collation,
00818                                  cur_em->em_expr, const_em->em_expr,
00819                                  bms_copy(ec->ec_relids),
00820                                  bms_union(cur_em->em_nullable_relids,
00821                                            const_em->em_nullable_relids),
00822                                  ec->ec_below_outer_join,
00823                                  cur_em->em_is_const);
00824     }
00825 }
00826 
00827 /*
00828  * generate_base_implied_equalities when EC contains no pseudoconstants
00829  */
00830 static void
00831 generate_base_implied_equalities_no_const(PlannerInfo *root,
00832                                           EquivalenceClass *ec)
00833 {
00834     EquivalenceMember **prev_ems;
00835     ListCell   *lc;
00836 
00837     /*
00838      * We scan the EC members once and track the last-seen member for each
00839      * base relation.  When we see another member of the same base relation,
00840      * we generate "prev_mem = cur_mem".  This results in the minimum number
00841      * of derived clauses, but it's possible that it will fail when a
00842      * different ordering would succeed.  XXX FIXME: use a UNION-FIND
00843      * algorithm similar to the way we build merged ECs.  (Use a list-of-lists
00844      * for each rel.)
00845      */
00846     prev_ems = (EquivalenceMember **)
00847         palloc0(root->simple_rel_array_size * sizeof(EquivalenceMember *));
00848 
00849     foreach(lc, ec->ec_members)
00850     {
00851         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
00852         int         relid;
00853 
00854         Assert(!cur_em->em_is_child);   /* no children yet */
00855         if (bms_membership(cur_em->em_relids) != BMS_SINGLETON)
00856             continue;
00857         relid = bms_singleton_member(cur_em->em_relids);
00858         Assert(relid < root->simple_rel_array_size);
00859 
00860         if (prev_ems[relid] != NULL)
00861         {
00862             EquivalenceMember *prev_em = prev_ems[relid];
00863             Oid         eq_op;
00864 
00865             eq_op = select_equality_operator(ec,
00866                                              prev_em->em_datatype,
00867                                              cur_em->em_datatype);
00868             if (!OidIsValid(eq_op))
00869             {
00870                 /* failed... */
00871                 ec->ec_broken = true;
00872                 break;
00873             }
00874             process_implied_equality(root, eq_op, ec->ec_collation,
00875                                      prev_em->em_expr, cur_em->em_expr,
00876                                      bms_copy(ec->ec_relids),
00877                                      bms_union(prev_em->em_nullable_relids,
00878                                                cur_em->em_nullable_relids),
00879                                      ec->ec_below_outer_join,
00880                                      false);
00881         }
00882         prev_ems[relid] = cur_em;
00883     }
00884 
00885     pfree(prev_ems);
00886 
00887     /*
00888      * We also have to make sure that all the Vars used in the member clauses
00889      * will be available at any join node we might try to reference them at.
00890      * For the moment we force all the Vars to be available at all join nodes
00891      * for this eclass.  Perhaps this could be improved by doing some
00892      * pre-analysis of which members we prefer to join, but it's no worse than
00893      * what happened in the pre-8.3 code.
00894      */
00895     foreach(lc, ec->ec_members)
00896     {
00897         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
00898         List       *vars = pull_var_clause((Node *) cur_em->em_expr,
00899                                            PVC_RECURSE_AGGREGATES,
00900                                            PVC_INCLUDE_PLACEHOLDERS);
00901 
00902         add_vars_to_targetlist(root, vars, ec->ec_relids, false);
00903         list_free(vars);
00904     }
00905 }
00906 
00907 /*
00908  * generate_base_implied_equalities cleanup after failure
00909  *
00910  * What we must do here is push any zero- or one-relation source RestrictInfos
00911  * of the EC back into the main restrictinfo datastructures.  Multi-relation
00912  * clauses will be regurgitated later by generate_join_implied_equalities().
00913  * (We do it this way to maintain continuity with the case that ec_broken
00914  * becomes set only after we've gone up a join level or two.)  However, for
00915  * an EC that contains constants, we can adopt a simpler strategy and just
00916  * throw back all the source RestrictInfos immediately; that works because
00917  * we know that such an EC can't become broken later.  (This rule justifies
00918  * ignoring ec_has_const ECs in generate_join_implied_equalities, even when
00919  * they are broken.)
00920  */
00921 static void
00922 generate_base_implied_equalities_broken(PlannerInfo *root,
00923                                         EquivalenceClass *ec)
00924 {
00925     ListCell   *lc;
00926 
00927     foreach(lc, ec->ec_sources)
00928     {
00929         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
00930 
00931         if (ec->ec_has_const ||
00932             bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE)
00933             distribute_restrictinfo_to_rels(root, restrictinfo);
00934     }
00935 }
00936 
00937 
00938 /*
00939  * generate_join_implied_equalities
00940  *    Generate any join clauses that we can deduce from equivalence classes.
00941  *
00942  * At a join node, we must enforce restriction clauses sufficient to ensure
00943  * that all equivalence-class members computable at that node are equal.
00944  * Since the set of clauses to enforce can vary depending on which subset
00945  * relations are the inputs, we have to compute this afresh for each join
00946  * relation pair.  Hence a fresh List of RestrictInfo nodes is built and
00947  * passed back on each call.
00948  *
00949  * In addition to its use at join nodes, this can be applied to generate
00950  * eclass-based join clauses for use in a parameterized scan of a base rel.
00951  * The reason for the asymmetry of specifying the inner rel as a RelOptInfo
00952  * and the outer rel by Relids is that this usage occurs before we have
00953  * built any join RelOptInfos.
00954  *
00955  * An annoying special case for parameterized scans is that the inner rel can
00956  * be an appendrel child (an "other rel").  In this case we must generate
00957  * appropriate clauses using child EC members.  add_child_rel_equivalences
00958  * must already have been done for the child rel.
00959  *
00960  * The results are sufficient for use in merge, hash, and plain nestloop join
00961  * methods.  We do not worry here about selecting clauses that are optimal
00962  * for use in a parameterized indexscan.  indxpath.c makes its own selections
00963  * of clauses to use, and if the ones we pick here are redundant with those,
00964  * the extras will be eliminated at createplan time, using the parent_ec
00965  * markers that we provide (see is_redundant_derived_clause()).
00966  *
00967  * Because the same join clauses are likely to be needed multiple times as
00968  * we consider different join paths, we avoid generating multiple copies:
00969  * whenever we select a particular pair of EquivalenceMembers to join,
00970  * we check to see if the pair matches any original clause (in ec_sources)
00971  * or previously-built clause (in ec_derives).  This saves memory and allows
00972  * re-use of information cached in RestrictInfos.
00973  *
00974  * join_relids should always equal bms_union(outer_relids, inner_rel->relids).
00975  * We could simplify this function's API by computing it internally, but in
00976  * all current uses, the caller has the value at hand anyway.
00977  */
00978 List *
00979 generate_join_implied_equalities(PlannerInfo *root,
00980                                  Relids join_relids,
00981                                  Relids outer_relids,
00982                                  RelOptInfo *inner_rel)
00983 {
00984     List       *result = NIL;
00985     Relids      inner_relids = inner_rel->relids;
00986     Relids      nominal_inner_relids;
00987     Relids      nominal_join_relids;
00988     AppendRelInfo *inner_appinfo;
00989     ListCell   *lc;
00990 
00991     /* If inner rel is a child, extra setup work is needed */
00992     if (inner_rel->reloptkind == RELOPT_OTHER_MEMBER_REL)
00993     {
00994         /* Lookup parent->child translation data */
00995         inner_appinfo = find_childrel_appendrelinfo(root, inner_rel);
00996         /* Construct relids for the parent rel */
00997         nominal_inner_relids = bms_make_singleton(inner_appinfo->parent_relid);
00998         /* ECs will be marked with the parent's relid, not the child's */
00999         nominal_join_relids = bms_union(outer_relids, nominal_inner_relids);
01000     }
01001     else
01002     {
01003         inner_appinfo = NULL;
01004         nominal_inner_relids = inner_relids;
01005         nominal_join_relids = join_relids;
01006     }
01007 
01008     foreach(lc, root->eq_classes)
01009     {
01010         EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc);
01011         List       *sublist = NIL;
01012 
01013         /* ECs containing consts do not need any further enforcement */
01014         if (ec->ec_has_const)
01015             continue;
01016 
01017         /* Single-member ECs won't generate any deductions */
01018         if (list_length(ec->ec_members) <= 1)
01019             continue;
01020 
01021         /* We can quickly ignore any that don't overlap the join, too */
01022         if (!bms_overlap(ec->ec_relids, nominal_join_relids))
01023             continue;
01024 
01025         if (!ec->ec_broken)
01026             sublist = generate_join_implied_equalities_normal(root,
01027                                                               ec,
01028                                                               join_relids,
01029                                                               outer_relids,
01030                                                               inner_relids);
01031 
01032         /* Recover if we failed to generate required derived clauses */
01033         if (ec->ec_broken)
01034             sublist = generate_join_implied_equalities_broken(root,
01035                                                               ec,
01036                                                          nominal_join_relids,
01037                                                               outer_relids,
01038                                                         nominal_inner_relids,
01039                                                               inner_appinfo);
01040 
01041         result = list_concat(result, sublist);
01042     }
01043 
01044     return result;
01045 }
01046 
01047 /*
01048  * generate_join_implied_equalities for a still-valid EC
01049  */
01050 static List *
01051 generate_join_implied_equalities_normal(PlannerInfo *root,
01052                                         EquivalenceClass *ec,
01053                                         Relids join_relids,
01054                                         Relids outer_relids,
01055                                         Relids inner_relids)
01056 {
01057     List       *result = NIL;
01058     List       *new_members = NIL;
01059     List       *outer_members = NIL;
01060     List       *inner_members = NIL;
01061     ListCell   *lc1;
01062 
01063     /*
01064      * First, scan the EC to identify member values that are computable at the
01065      * outer rel, at the inner rel, or at this relation but not in either
01066      * input rel.  The outer-rel members should already be enforced equal,
01067      * likewise for the inner-rel members.  We'll need to create clauses to
01068      * enforce that any newly computable members are all equal to each other
01069      * as well as to at least one input member, plus enforce at least one
01070      * outer-rel member equal to at least one inner-rel member.
01071      */
01072     foreach(lc1, ec->ec_members)
01073     {
01074         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1);
01075 
01076         /*
01077          * We don't need to check explicitly for child EC members.  This test
01078          * against join_relids will cause them to be ignored except when
01079          * considering a child inner rel, which is what we want.
01080          */
01081         if (!bms_is_subset(cur_em->em_relids, join_relids))
01082             continue;           /* not computable yet, or wrong child */
01083 
01084         if (bms_is_subset(cur_em->em_relids, outer_relids))
01085             outer_members = lappend(outer_members, cur_em);
01086         else if (bms_is_subset(cur_em->em_relids, inner_relids))
01087             inner_members = lappend(inner_members, cur_em);
01088         else
01089             new_members = lappend(new_members, cur_em);
01090     }
01091 
01092     /*
01093      * First, select the joinclause if needed.  We can equate any one outer
01094      * member to any one inner member, but we have to find a datatype
01095      * combination for which an opfamily member operator exists.  If we have
01096      * choices, we prefer simple Var members (possibly with RelabelType) since
01097      * these are (a) cheapest to compute at runtime and (b) most likely to
01098      * have useful statistics. Also, prefer operators that are also
01099      * hashjoinable.
01100      */
01101     if (outer_members && inner_members)
01102     {
01103         EquivalenceMember *best_outer_em = NULL;
01104         EquivalenceMember *best_inner_em = NULL;
01105         Oid         best_eq_op = InvalidOid;
01106         int         best_score = -1;
01107         RestrictInfo *rinfo;
01108 
01109         foreach(lc1, outer_members)
01110         {
01111             EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc1);
01112             ListCell   *lc2;
01113 
01114             foreach(lc2, inner_members)
01115             {
01116                 EquivalenceMember *inner_em = (EquivalenceMember *) lfirst(lc2);
01117                 Oid         eq_op;
01118                 int         score;
01119 
01120                 eq_op = select_equality_operator(ec,
01121                                                  outer_em->em_datatype,
01122                                                  inner_em->em_datatype);
01123                 if (!OidIsValid(eq_op))
01124                     continue;
01125                 score = 0;
01126                 if (IsA(outer_em->em_expr, Var) ||
01127                     (IsA(outer_em->em_expr, RelabelType) &&
01128                      IsA(((RelabelType *) outer_em->em_expr)->arg, Var)))
01129                     score++;
01130                 if (IsA(inner_em->em_expr, Var) ||
01131                     (IsA(inner_em->em_expr, RelabelType) &&
01132                      IsA(((RelabelType *) inner_em->em_expr)->arg, Var)))
01133                     score++;
01134                 if (op_hashjoinable(eq_op,
01135                                     exprType((Node *) outer_em->em_expr)))
01136                     score++;
01137                 if (score > best_score)
01138                 {
01139                     best_outer_em = outer_em;
01140                     best_inner_em = inner_em;
01141                     best_eq_op = eq_op;
01142                     best_score = score;
01143                     if (best_score == 3)
01144                         break;  /* no need to look further */
01145                 }
01146             }
01147             if (best_score == 3)
01148                 break;          /* no need to look further */
01149         }
01150         if (best_score < 0)
01151         {
01152             /* failed... */
01153             ec->ec_broken = true;
01154             return NIL;
01155         }
01156 
01157         /*
01158          * Create clause, setting parent_ec to mark it as redundant with other
01159          * joinclauses
01160          */
01161         rinfo = create_join_clause(root, ec, best_eq_op,
01162                                    best_outer_em, best_inner_em,
01163                                    ec);
01164 
01165         result = lappend(result, rinfo);
01166     }
01167 
01168     /*
01169      * Now deal with building restrictions for any expressions that involve
01170      * Vars from both sides of the join.  We have to equate all of these to
01171      * each other as well as to at least one old member (if any).
01172      *
01173      * XXX as in generate_base_implied_equalities_no_const, we could be a lot
01174      * smarter here to avoid unnecessary failures in cross-type situations.
01175      * For now, use the same left-to-right method used there.
01176      */
01177     if (new_members)
01178     {
01179         List       *old_members = list_concat(outer_members, inner_members);
01180         EquivalenceMember *prev_em = NULL;
01181         RestrictInfo *rinfo;
01182 
01183         /* For now, arbitrarily take the first old_member as the one to use */
01184         if (old_members)
01185             new_members = lappend(new_members, linitial(old_members));
01186 
01187         foreach(lc1, new_members)
01188         {
01189             EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1);
01190 
01191             if (prev_em != NULL)
01192             {
01193                 Oid         eq_op;
01194 
01195                 eq_op = select_equality_operator(ec,
01196                                                  prev_em->em_datatype,
01197                                                  cur_em->em_datatype);
01198                 if (!OidIsValid(eq_op))
01199                 {
01200                     /* failed... */
01201                     ec->ec_broken = true;
01202                     return NIL;
01203                 }
01204                 /* do NOT set parent_ec, this qual is not redundant! */
01205                 rinfo = create_join_clause(root, ec, eq_op,
01206                                            prev_em, cur_em,
01207                                            NULL);
01208 
01209                 result = lappend(result, rinfo);
01210             }
01211             prev_em = cur_em;
01212         }
01213     }
01214 
01215     return result;
01216 }
01217 
01218 /*
01219  * generate_join_implied_equalities cleanup after failure
01220  *
01221  * Return any original RestrictInfos that are enforceable at this join.
01222  *
01223  * In the case of a child inner relation, we have to translate the
01224  * original RestrictInfos from parent to child Vars.
01225  */
01226 static List *
01227 generate_join_implied_equalities_broken(PlannerInfo *root,
01228                                         EquivalenceClass *ec,
01229                                         Relids nominal_join_relids,
01230                                         Relids outer_relids,
01231                                         Relids nominal_inner_relids,
01232                                         AppendRelInfo *inner_appinfo)
01233 {
01234     List       *result = NIL;
01235     ListCell   *lc;
01236 
01237     foreach(lc, ec->ec_sources)
01238     {
01239         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
01240         Relids      clause_relids = restrictinfo->required_relids;
01241 
01242         if (bms_is_subset(clause_relids, nominal_join_relids) &&
01243             !bms_is_subset(clause_relids, outer_relids) &&
01244             !bms_is_subset(clause_relids, nominal_inner_relids))
01245             result = lappend(result, restrictinfo);
01246     }
01247 
01248     /*
01249      * If we have to translate, just brute-force apply adjust_appendrel_attrs
01250      * to all the RestrictInfos at once.  This will result in returning
01251      * RestrictInfos that are not listed in ec_derives, but there shouldn't be
01252      * any duplication, and it's a sufficiently narrow corner case that we
01253      * shouldn't sweat too much over it anyway.
01254      */
01255     if (inner_appinfo)
01256         result = (List *) adjust_appendrel_attrs(root, (Node *) result,
01257                                                  inner_appinfo);
01258 
01259     return result;
01260 }
01261 
01262 
01263 /*
01264  * select_equality_operator
01265  *    Select a suitable equality operator for comparing two EC members
01266  *
01267  * Returns InvalidOid if no operator can be found for this datatype combination
01268  */
01269 static Oid
01270 select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype)
01271 {
01272     ListCell   *lc;
01273 
01274     foreach(lc, ec->ec_opfamilies)
01275     {
01276         Oid         opfamily = lfirst_oid(lc);
01277         Oid         opno;
01278 
01279         opno = get_opfamily_member(opfamily, lefttype, righttype,
01280                                    BTEqualStrategyNumber);
01281         if (OidIsValid(opno))
01282             return opno;
01283     }
01284     return InvalidOid;
01285 }
01286 
01287 
01288 /*
01289  * create_join_clause
01290  *    Find or make a RestrictInfo comparing the two given EC members
01291  *    with the given operator.
01292  *
01293  * parent_ec is either equal to ec (if the clause is a potentially-redundant
01294  * join clause) or NULL (if not).  We have to treat this as part of the
01295  * match requirements --- it's possible that a clause comparing the same two
01296  * EMs is a join clause in one join path and a restriction clause in another.
01297  */
01298 static RestrictInfo *
01299 create_join_clause(PlannerInfo *root,
01300                    EquivalenceClass *ec, Oid opno,
01301                    EquivalenceMember *leftem,
01302                    EquivalenceMember *rightem,
01303                    EquivalenceClass *parent_ec)
01304 {
01305     RestrictInfo *rinfo;
01306     ListCell   *lc;
01307     MemoryContext oldcontext;
01308 
01309     /*
01310      * Search to see if we already built a RestrictInfo for this pair of
01311      * EquivalenceMembers.  We can use either original source clauses or
01312      * previously-derived clauses.  The check on opno is probably redundant,
01313      * but be safe ...
01314      */
01315     foreach(lc, ec->ec_sources)
01316     {
01317         rinfo = (RestrictInfo *) lfirst(lc);
01318         if (rinfo->left_em == leftem &&
01319             rinfo->right_em == rightem &&
01320             rinfo->parent_ec == parent_ec &&
01321             opno == ((OpExpr *) rinfo->clause)->opno)
01322             return rinfo;
01323     }
01324 
01325     foreach(lc, ec->ec_derives)
01326     {
01327         rinfo = (RestrictInfo *) lfirst(lc);
01328         if (rinfo->left_em == leftem &&
01329             rinfo->right_em == rightem &&
01330             rinfo->parent_ec == parent_ec &&
01331             opno == ((OpExpr *) rinfo->clause)->opno)
01332             return rinfo;
01333     }
01334 
01335     /*
01336      * Not there, so build it, in planner context so we can re-use it. (Not
01337      * important in normal planning, but definitely so in GEQO.)
01338      */
01339     oldcontext = MemoryContextSwitchTo(root->planner_cxt);
01340 
01341     rinfo = build_implied_join_equality(opno,
01342                                         ec->ec_collation,
01343                                         leftem->em_expr,
01344                                         rightem->em_expr,
01345                                         bms_union(leftem->em_relids,
01346                                                   rightem->em_relids),
01347                                         bms_union(leftem->em_nullable_relids,
01348                                                rightem->em_nullable_relids));
01349 
01350     /* Mark the clause as redundant, or not */
01351     rinfo->parent_ec = parent_ec;
01352 
01353     /*
01354      * We know the correct values for left_ec/right_ec, ie this particular EC,
01355      * so we can just set them directly instead of forcing another lookup.
01356      */
01357     rinfo->left_ec = ec;
01358     rinfo->right_ec = ec;
01359 
01360     /* Mark it as usable with these EMs */
01361     rinfo->left_em = leftem;
01362     rinfo->right_em = rightem;
01363     /* and save it for possible re-use */
01364     ec->ec_derives = lappend(ec->ec_derives, rinfo);
01365 
01366     MemoryContextSwitchTo(oldcontext);
01367 
01368     return rinfo;
01369 }
01370 
01371 
01372 /*
01373  * reconsider_outer_join_clauses
01374  *    Re-examine any outer-join clauses that were set aside by
01375  *    distribute_qual_to_rels(), and see if we can derive any
01376  *    EquivalenceClasses from them.  Then, if they were not made
01377  *    redundant, push them out into the regular join-clause lists.
01378  *
01379  * When we have mergejoinable clauses A = B that are outer-join clauses,
01380  * we can't blindly combine them with other clauses A = C to deduce B = C,
01381  * since in fact the "equality" A = B won't necessarily hold above the
01382  * outer join (one of the variables might be NULL instead).  Nonetheless
01383  * there are cases where we can add qual clauses using transitivity.
01384  *
01385  * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR
01386  * for which there is also an equivalence clause OUTERVAR = CONSTANT.
01387  * It is safe and useful to push a clause INNERVAR = CONSTANT into the
01388  * evaluation of the inner (nullable) relation, because any inner rows not
01389  * meeting this condition will not contribute to the outer-join result anyway.
01390  * (Any outer rows they could join to will be eliminated by the pushed-down
01391  * equivalence clause.)
01392  *
01393  * Note that the above rule does not work for full outer joins; nor is it
01394  * very interesting to consider cases where the generated equivalence clause
01395  * would involve relations outside the outer join, since such clauses couldn't
01396  * be pushed into the inner side's scan anyway.  So the restriction to
01397  * outervar = pseudoconstant is not really giving up anything.
01398  *
01399  * For full-join cases, we can only do something useful if it's a FULL JOIN
01400  * USING and a merged column has an equivalence MERGEDVAR = CONSTANT.
01401  * By the time it gets here, the merged column will look like
01402  *      COALESCE(LEFTVAR, RIGHTVAR)
01403  * and we will have a full-join clause LEFTVAR = RIGHTVAR that we can match
01404  * the COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT
01405  * and RIGHTVAR = CONSTANT into the input relations, since any rows not
01406  * meeting these conditions cannot contribute to the join result.
01407  *
01408  * Again, there isn't any traction to be gained by trying to deal with
01409  * clauses comparing a mergedvar to a non-pseudoconstant.  So we can make
01410  * use of the EquivalenceClasses to search for matching variables that were
01411  * equivalenced to constants.  The interesting outer-join clauses were
01412  * accumulated for us by distribute_qual_to_rels.
01413  *
01414  * When we find one of these cases, we implement the changes we want by
01415  * generating a new equivalence clause INNERVAR = CONSTANT (or LEFTVAR, etc)
01416  * and pushing it into the EquivalenceClass structures.  This is because we
01417  * may already know that INNERVAR is equivalenced to some other var(s), and
01418  * we'd like the constant to propagate to them too.  Note that it would be
01419  * unsafe to merge any existing EC for INNERVAR with the OUTERVAR's EC ---
01420  * that could result in propagating constant restrictions from
01421  * INNERVAR to OUTERVAR, which would be very wrong.
01422  *
01423  * It's possible that the INNERVAR is also an OUTERVAR for some other
01424  * outer-join clause, in which case the process can be repeated.  So we repeat
01425  * looping over the lists of clauses until no further deductions can be made.
01426  * Whenever we do make a deduction, we remove the generating clause from the
01427  * lists, since we don't want to make the same deduction twice.
01428  *
01429  * If we don't find any match for a set-aside outer join clause, we must
01430  * throw it back into the regular joinclause processing by passing it to
01431  * distribute_restrictinfo_to_rels().  If we do generate a derived clause,
01432  * however, the outer-join clause is redundant.  We still throw it back,
01433  * because otherwise the join will be seen as a clauseless join and avoided
01434  * during join order searching; but we mark it as redundant to keep from
01435  * messing up the joinrel's size estimate.  (This behavior means that the
01436  * API for this routine is uselessly complex: we could have just put all
01437  * the clauses into the regular processing initially.  We keep it because
01438  * someday we might want to do something else, such as inserting "dummy"
01439  * joinclauses instead of real ones.)
01440  *
01441  * Outer join clauses that are marked outerjoin_delayed are special: this
01442  * condition means that one or both VARs might go to null due to a lower
01443  * outer join.  We can still push a constant through the clause, but only
01444  * if its operator is strict; and we *have to* throw the clause back into
01445  * regular joinclause processing.  By keeping the strict join clause,
01446  * we ensure that any null-extended rows that are mistakenly generated due
01447  * to suppressing rows not matching the constant will be rejected at the
01448  * upper outer join.  (This doesn't work for full-join clauses.)
01449  */
01450 void
01451 reconsider_outer_join_clauses(PlannerInfo *root)
01452 {
01453     bool        found;
01454     ListCell   *cell;
01455     ListCell   *prev;
01456     ListCell   *next;
01457 
01458     /* Outer loop repeats until we find no more deductions */
01459     do
01460     {
01461         found = false;
01462 
01463         /* Process the LEFT JOIN clauses */
01464         prev = NULL;
01465         for (cell = list_head(root->left_join_clauses); cell; cell = next)
01466         {
01467             RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
01468 
01469             next = lnext(cell);
01470             if (reconsider_outer_join_clause(root, rinfo, true))
01471             {
01472                 found = true;
01473                 /* remove it from the list */
01474                 root->left_join_clauses =
01475                     list_delete_cell(root->left_join_clauses, cell, prev);
01476                 /* we throw it back anyway (see notes above) */
01477                 /* but the thrown-back clause has no extra selectivity */
01478                 rinfo->norm_selec = 2.0;
01479                 rinfo->outer_selec = 1.0;
01480                 distribute_restrictinfo_to_rels(root, rinfo);
01481             }
01482             else
01483                 prev = cell;
01484         }
01485 
01486         /* Process the RIGHT JOIN clauses */
01487         prev = NULL;
01488         for (cell = list_head(root->right_join_clauses); cell; cell = next)
01489         {
01490             RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
01491 
01492             next = lnext(cell);
01493             if (reconsider_outer_join_clause(root, rinfo, false))
01494             {
01495                 found = true;
01496                 /* remove it from the list */
01497                 root->right_join_clauses =
01498                     list_delete_cell(root->right_join_clauses, cell, prev);
01499                 /* we throw it back anyway (see notes above) */
01500                 /* but the thrown-back clause has no extra selectivity */
01501                 rinfo->norm_selec = 2.0;
01502                 rinfo->outer_selec = 1.0;
01503                 distribute_restrictinfo_to_rels(root, rinfo);
01504             }
01505             else
01506                 prev = cell;
01507         }
01508 
01509         /* Process the FULL JOIN clauses */
01510         prev = NULL;
01511         for (cell = list_head(root->full_join_clauses); cell; cell = next)
01512         {
01513             RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
01514 
01515             next = lnext(cell);
01516             if (reconsider_full_join_clause(root, rinfo))
01517             {
01518                 found = true;
01519                 /* remove it from the list */
01520                 root->full_join_clauses =
01521                     list_delete_cell(root->full_join_clauses, cell, prev);
01522                 /* we throw it back anyway (see notes above) */
01523                 /* but the thrown-back clause has no extra selectivity */
01524                 rinfo->norm_selec = 2.0;
01525                 rinfo->outer_selec = 1.0;
01526                 distribute_restrictinfo_to_rels(root, rinfo);
01527             }
01528             else
01529                 prev = cell;
01530         }
01531     } while (found);
01532 
01533     /* Now, any remaining clauses have to be thrown back */
01534     foreach(cell, root->left_join_clauses)
01535     {
01536         RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
01537 
01538         distribute_restrictinfo_to_rels(root, rinfo);
01539     }
01540     foreach(cell, root->right_join_clauses)
01541     {
01542         RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
01543 
01544         distribute_restrictinfo_to_rels(root, rinfo);
01545     }
01546     foreach(cell, root->full_join_clauses)
01547     {
01548         RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
01549 
01550         distribute_restrictinfo_to_rels(root, rinfo);
01551     }
01552 }
01553 
01554 /*
01555  * reconsider_outer_join_clauses for a single LEFT/RIGHT JOIN clause
01556  *
01557  * Returns TRUE if we were able to propagate a constant through the clause.
01558  */
01559 static bool
01560 reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo,
01561                              bool outer_on_left)
01562 {
01563     Expr       *outervar,
01564                *innervar;
01565     Oid         opno,
01566                 collation,
01567                 left_type,
01568                 right_type,
01569                 inner_datatype;
01570     Relids      inner_relids,
01571                 inner_nullable_relids;
01572     ListCell   *lc1;
01573 
01574     Assert(is_opclause(rinfo->clause));
01575     opno = ((OpExpr *) rinfo->clause)->opno;
01576     collation = ((OpExpr *) rinfo->clause)->inputcollid;
01577 
01578     /* If clause is outerjoin_delayed, operator must be strict */
01579     if (rinfo->outerjoin_delayed && !op_strict(opno))
01580         return false;
01581 
01582     /* Extract needed info from the clause */
01583     op_input_types(opno, &left_type, &right_type);
01584     if (outer_on_left)
01585     {
01586         outervar = (Expr *) get_leftop(rinfo->clause);
01587         innervar = (Expr *) get_rightop(rinfo->clause);
01588         inner_datatype = right_type;
01589         inner_relids = rinfo->right_relids;
01590     }
01591     else
01592     {
01593         outervar = (Expr *) get_rightop(rinfo->clause);
01594         innervar = (Expr *) get_leftop(rinfo->clause);
01595         inner_datatype = left_type;
01596         inner_relids = rinfo->left_relids;
01597     }
01598     inner_nullable_relids = bms_intersect(inner_relids,
01599                                           rinfo->nullable_relids);
01600 
01601     /* Scan EquivalenceClasses for a match to outervar */
01602     foreach(lc1, root->eq_classes)
01603     {
01604         EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
01605         bool        match;
01606         ListCell   *lc2;
01607 
01608         /* Ignore EC unless it contains pseudoconstants */
01609         if (!cur_ec->ec_has_const)
01610             continue;
01611         /* Never match to a volatile EC */
01612         if (cur_ec->ec_has_volatile)
01613             continue;
01614         /* It has to match the outer-join clause as to semantics, too */
01615         if (collation != cur_ec->ec_collation)
01616             continue;
01617         if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies))
01618             continue;
01619         /* Does it contain a match to outervar? */
01620         match = false;
01621         foreach(lc2, cur_ec->ec_members)
01622         {
01623             EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
01624 
01625             Assert(!cur_em->em_is_child);       /* no children yet */
01626             if (equal(outervar, cur_em->em_expr))
01627             {
01628                 match = true;
01629                 break;
01630             }
01631         }
01632         if (!match)
01633             continue;           /* no match, so ignore this EC */
01634 
01635         /*
01636          * Yes it does!  Try to generate a clause INNERVAR = CONSTANT for each
01637          * CONSTANT in the EC.  Note that we must succeed with at least one
01638          * constant before we can decide to throw away the outer-join clause.
01639          */
01640         match = false;
01641         foreach(lc2, cur_ec->ec_members)
01642         {
01643             EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
01644             Oid         eq_op;
01645             RestrictInfo *newrinfo;
01646 
01647             if (!cur_em->em_is_const)
01648                 continue;       /* ignore non-const members */
01649             eq_op = select_equality_operator(cur_ec,
01650                                              inner_datatype,
01651                                              cur_em->em_datatype);
01652             if (!OidIsValid(eq_op))
01653                 continue;       /* can't generate equality */
01654             newrinfo = build_implied_join_equality(eq_op,
01655                                                    cur_ec->ec_collation,
01656                                                    innervar,
01657                                                    cur_em->em_expr,
01658                                                    bms_copy(inner_relids),
01659                                             bms_copy(inner_nullable_relids));
01660             if (process_equivalence(root, newrinfo, true))
01661                 match = true;
01662         }
01663 
01664         /*
01665          * If we were able to equate INNERVAR to any constant, report success.
01666          * Otherwise, fall out of the search loop, since we know the OUTERVAR
01667          * appears in at most one EC.
01668          */
01669         if (match)
01670             return true;
01671         else
01672             break;
01673     }
01674 
01675     return false;               /* failed to make any deduction */
01676 }
01677 
01678 /*
01679  * reconsider_outer_join_clauses for a single FULL JOIN clause
01680  *
01681  * Returns TRUE if we were able to propagate a constant through the clause.
01682  */
01683 static bool
01684 reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo)
01685 {
01686     Expr       *leftvar;
01687     Expr       *rightvar;
01688     Oid         opno,
01689                 collation,
01690                 left_type,
01691                 right_type;
01692     Relids      left_relids,
01693                 right_relids,
01694                 left_nullable_relids,
01695                 right_nullable_relids;
01696     ListCell   *lc1;
01697 
01698     /* Can't use an outerjoin_delayed clause here */
01699     if (rinfo->outerjoin_delayed)
01700         return false;
01701 
01702     /* Extract needed info from the clause */
01703     Assert(is_opclause(rinfo->clause));
01704     opno = ((OpExpr *) rinfo->clause)->opno;
01705     collation = ((OpExpr *) rinfo->clause)->inputcollid;
01706     op_input_types(opno, &left_type, &right_type);
01707     leftvar = (Expr *) get_leftop(rinfo->clause);
01708     rightvar = (Expr *) get_rightop(rinfo->clause);
01709     left_relids = rinfo->left_relids;
01710     right_relids = rinfo->right_relids;
01711     left_nullable_relids = bms_intersect(left_relids,
01712                                          rinfo->nullable_relids);
01713     right_nullable_relids = bms_intersect(right_relids,
01714                                           rinfo->nullable_relids);
01715 
01716     foreach(lc1, root->eq_classes)
01717     {
01718         EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
01719         EquivalenceMember *coal_em = NULL;
01720         bool        match;
01721         bool        matchleft;
01722         bool        matchright;
01723         ListCell   *lc2;
01724 
01725         /* Ignore EC unless it contains pseudoconstants */
01726         if (!cur_ec->ec_has_const)
01727             continue;
01728         /* Never match to a volatile EC */
01729         if (cur_ec->ec_has_volatile)
01730             continue;
01731         /* It has to match the outer-join clause as to semantics, too */
01732         if (collation != cur_ec->ec_collation)
01733             continue;
01734         if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies))
01735             continue;
01736 
01737         /*
01738          * Does it contain a COALESCE(leftvar, rightvar) construct?
01739          *
01740          * We can assume the COALESCE() inputs are in the same order as the
01741          * join clause, since both were automatically generated in the cases
01742          * we care about.
01743          *
01744          * XXX currently this may fail to match in cross-type cases because
01745          * the COALESCE will contain typecast operations while the join clause
01746          * may not (if there is a cross-type mergejoin operator available for
01747          * the two column types). Is it OK to strip implicit coercions from
01748          * the COALESCE arguments?
01749          */
01750         match = false;
01751         foreach(lc2, cur_ec->ec_members)
01752         {
01753             coal_em = (EquivalenceMember *) lfirst(lc2);
01754             Assert(!coal_em->em_is_child);      /* no children yet */
01755             if (IsA(coal_em->em_expr, CoalesceExpr))
01756             {
01757                 CoalesceExpr *cexpr = (CoalesceExpr *) coal_em->em_expr;
01758                 Node       *cfirst;
01759                 Node       *csecond;
01760 
01761                 if (list_length(cexpr->args) != 2)
01762                     continue;
01763                 cfirst = (Node *) linitial(cexpr->args);
01764                 csecond = (Node *) lsecond(cexpr->args);
01765 
01766                 if (equal(leftvar, cfirst) && equal(rightvar, csecond))
01767                 {
01768                     match = true;
01769                     break;
01770                 }
01771             }
01772         }
01773         if (!match)
01774             continue;           /* no match, so ignore this EC */
01775 
01776         /*
01777          * Yes it does!  Try to generate clauses LEFTVAR = CONSTANT and
01778          * RIGHTVAR = CONSTANT for each CONSTANT in the EC.  Note that we must
01779          * succeed with at least one constant for each var before we can
01780          * decide to throw away the outer-join clause.
01781          */
01782         matchleft = matchright = false;
01783         foreach(lc2, cur_ec->ec_members)
01784         {
01785             EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
01786             Oid         eq_op;
01787             RestrictInfo *newrinfo;
01788 
01789             if (!cur_em->em_is_const)
01790                 continue;       /* ignore non-const members */
01791             eq_op = select_equality_operator(cur_ec,
01792                                              left_type,
01793                                              cur_em->em_datatype);
01794             if (OidIsValid(eq_op))
01795             {
01796                 newrinfo = build_implied_join_equality(eq_op,
01797                                                        cur_ec->ec_collation,
01798                                                        leftvar,
01799                                                        cur_em->em_expr,
01800                                                        bms_copy(left_relids),
01801                                              bms_copy(left_nullable_relids));
01802                 if (process_equivalence(root, newrinfo, true))
01803                     matchleft = true;
01804             }
01805             eq_op = select_equality_operator(cur_ec,
01806                                              right_type,
01807                                              cur_em->em_datatype);
01808             if (OidIsValid(eq_op))
01809             {
01810                 newrinfo = build_implied_join_equality(eq_op,
01811                                                        cur_ec->ec_collation,
01812                                                        rightvar,
01813                                                        cur_em->em_expr,
01814                                                        bms_copy(right_relids),
01815                                             bms_copy(right_nullable_relids));
01816                 if (process_equivalence(root, newrinfo, true))
01817                     matchright = true;
01818             }
01819         }
01820 
01821         /*
01822          * If we were able to equate both vars to constants, we're done, and
01823          * we can throw away the full-join clause as redundant.  Moreover, we
01824          * can remove the COALESCE entry from the EC, since the added
01825          * restrictions ensure it will always have the expected value. (We
01826          * don't bother trying to update ec_relids or ec_sources.)
01827          */
01828         if (matchleft && matchright)
01829         {
01830             cur_ec->ec_members = list_delete_ptr(cur_ec->ec_members, coal_em);
01831             return true;
01832         }
01833 
01834         /*
01835          * Otherwise, fall out of the search loop, since we know the COALESCE
01836          * appears in at most one EC (XXX might stop being true if we allow
01837          * stripping of coercions above?)
01838          */
01839         break;
01840     }
01841 
01842     return false;               /* failed to make any deduction */
01843 }
01844 
01845 
01846 /*
01847  * exprs_known_equal
01848  *    Detect whether two expressions are known equal due to equivalence
01849  *    relationships.
01850  *
01851  * Actually, this only shows that the expressions are equal according
01852  * to some opfamily's notion of equality --- but we only use it for
01853  * selectivity estimation, so a fuzzy idea of equality is OK.
01854  *
01855  * Note: does not bother to check for "equal(item1, item2)"; caller must
01856  * check that case if it's possible to pass identical items.
01857  */
01858 bool
01859 exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
01860 {
01861     ListCell   *lc1;
01862 
01863     foreach(lc1, root->eq_classes)
01864     {
01865         EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
01866         bool        item1member = false;
01867         bool        item2member = false;
01868         ListCell   *lc2;
01869 
01870         /* Never match to a volatile EC */
01871         if (ec->ec_has_volatile)
01872             continue;
01873 
01874         foreach(lc2, ec->ec_members)
01875         {
01876             EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
01877 
01878             if (em->em_is_child)
01879                 continue;       /* ignore children here */
01880             if (equal(item1, em->em_expr))
01881                 item1member = true;
01882             else if (equal(item2, em->em_expr))
01883                 item2member = true;
01884             /* Exit as soon as equality is proven */
01885             if (item1member && item2member)
01886                 return true;
01887         }
01888     }
01889     return false;
01890 }
01891 
01892 
01893 /*
01894  * add_child_rel_equivalences
01895  *    Search for EC members that reference the parent_rel, and
01896  *    add transformed members referencing the child_rel.
01897  *
01898  * Note that this function won't be called at all unless we have at least some
01899  * reason to believe that the EC members it generates will be useful.
01900  *
01901  * parent_rel and child_rel could be derived from appinfo, but since the
01902  * caller has already computed them, we might as well just pass them in.
01903  */
01904 void
01905 add_child_rel_equivalences(PlannerInfo *root,
01906                            AppendRelInfo *appinfo,
01907                            RelOptInfo *parent_rel,
01908                            RelOptInfo *child_rel)
01909 {
01910     ListCell   *lc1;
01911 
01912     foreach(lc1, root->eq_classes)
01913     {
01914         EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
01915         ListCell   *lc2;
01916 
01917         /*
01918          * If this EC contains a volatile expression, then generating child
01919          * EMs would be downright dangerous, so skip it.  We rely on a
01920          * volatile EC having only one EM.
01921          */
01922         if (cur_ec->ec_has_volatile)
01923             continue;
01924 
01925         /* No point in searching if parent rel not mentioned in eclass */
01926         if (!bms_is_subset(parent_rel->relids, cur_ec->ec_relids))
01927             continue;
01928 
01929         foreach(lc2, cur_ec->ec_members)
01930         {
01931             EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
01932 
01933             if (cur_em->em_is_const || cur_em->em_is_child)
01934                 continue;       /* ignore consts and children here */
01935 
01936             /* Does it reference parent_rel? */
01937             if (bms_overlap(cur_em->em_relids, parent_rel->relids))
01938             {
01939                 /* Yes, generate transformed child version */
01940                 Expr       *child_expr;
01941                 Relids      new_relids;
01942                 Relids      new_nullable_relids;
01943 
01944                 child_expr = (Expr *)
01945                     adjust_appendrel_attrs(root,
01946                                            (Node *) cur_em->em_expr,
01947                                            appinfo);
01948 
01949                 /*
01950                  * Transform em_relids to match.  Note we do *not* do
01951                  * pull_varnos(child_expr) here, as for example the
01952                  * transformation might have substituted a constant, but we
01953                  * don't want the child member to be marked as constant.
01954                  */
01955                 new_relids = bms_difference(cur_em->em_relids,
01956                                             parent_rel->relids);
01957                 new_relids = bms_add_members(new_relids, child_rel->relids);
01958 
01959                 /*
01960                  * And likewise for nullable_relids.  Note this code assumes
01961                  * parent and child relids are singletons.
01962                  */
01963                 new_nullable_relids = cur_em->em_nullable_relids;
01964                 if (bms_overlap(new_nullable_relids, parent_rel->relids))
01965                 {
01966                     new_nullable_relids = bms_difference(new_nullable_relids,
01967                                                          parent_rel->relids);
01968                     new_nullable_relids = bms_add_members(new_nullable_relids,
01969                                                           child_rel->relids);
01970                 }
01971 
01972                 (void) add_eq_member(cur_ec, child_expr,
01973                                      new_relids, new_nullable_relids,
01974                                      true, cur_em->em_datatype);
01975             }
01976         }
01977     }
01978 }
01979 
01980 
01981 /*
01982  * mutate_eclass_expressions
01983  *    Apply an expression tree mutator to all expressions stored in
01984  *    equivalence classes (but ignore child exprs unless include_child_exprs).
01985  *
01986  * This is a bit of a hack ... it's currently needed only by planagg.c,
01987  * which needs to do a global search-and-replace of MIN/MAX Aggrefs
01988  * after eclasses are already set up.  Without changing the eclasses too,
01989  * subsequent matching of ORDER BY and DISTINCT clauses would fail.
01990  *
01991  * Note that we assume the mutation won't affect relation membership or any
01992  * other properties we keep track of (which is a bit bogus, but by the time
01993  * planagg.c runs, it no longer matters).  Also we must be called in the
01994  * main planner memory context.
01995  */
01996 void
01997 mutate_eclass_expressions(PlannerInfo *root,
01998                           Node *(*mutator) (),
01999                           void *context,
02000                           bool include_child_exprs)
02001 {
02002     ListCell   *lc1;
02003 
02004     foreach(lc1, root->eq_classes)
02005     {
02006         EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
02007         ListCell   *lc2;
02008 
02009         foreach(lc2, cur_ec->ec_members)
02010         {
02011             EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
02012 
02013             if (cur_em->em_is_child && !include_child_exprs)
02014                 continue;       /* ignore children unless requested */
02015 
02016             cur_em->em_expr = (Expr *)
02017                 mutator((Node *) cur_em->em_expr, context);
02018         }
02019     }
02020 }
02021 
02022 
02023 /*
02024  * generate_implied_equalities_for_column
02025  *    Create EC-derived joinclauses usable with a specific column.
02026  *
02027  * This is used by indxpath.c to extract potentially indexable joinclauses
02028  * from ECs, and can be used by foreign data wrappers for similar purposes.
02029  * We assume that only expressions in Vars of a single table are of interest,
02030  * but the caller provides a callback function to identify exactly which
02031  * such expressions it would like to know about.
02032  *
02033  * We assume that any given table/index column could appear in only one EC.
02034  * (This should be true in all but the most pathological cases, and if it
02035  * isn't, we stop on the first match anyway.)  Therefore, what we return
02036  * is a redundant list of clauses equating the table/index column to each of
02037  * the other-relation values it is known to be equal to.  Any one of
02038  * these clauses can be used to create a parameterized path, and there
02039  * is no value in using more than one.  (But it *is* worthwhile to create
02040  * a separate parameterized path for each one, since that leads to different
02041  * join orders.)
02042  *
02043  * The caller can pass a Relids set of rels we aren't interested in joining
02044  * to, so as to save the work of creating useless clauses.
02045  */
02046 List *
02047 generate_implied_equalities_for_column(PlannerInfo *root,
02048                                        RelOptInfo *rel,
02049                                        ec_matches_callback_type callback,
02050                                        void *callback_arg,
02051                                        Relids prohibited_rels)
02052 {
02053     List       *result = NIL;
02054     bool        is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
02055     Index       parent_relid;
02056     ListCell   *lc1;
02057 
02058     /* If it's a child rel, we'll need to know what its parent is */
02059     if (is_child_rel)
02060         parent_relid = find_childrel_appendrelinfo(root, rel)->parent_relid;
02061     else
02062         parent_relid = 0;       /* not used, but keep compiler quiet */
02063 
02064     foreach(lc1, root->eq_classes)
02065     {
02066         EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
02067         EquivalenceMember *cur_em;
02068         ListCell   *lc2;
02069 
02070         /*
02071          * Won't generate joinclauses if const or single-member (the latter
02072          * test covers the volatile case too)
02073          */
02074         if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
02075             continue;
02076 
02077         /*
02078          * No point in searching if rel not mentioned in eclass (but we can't
02079          * tell that for a child rel).
02080          */
02081         if (!is_child_rel &&
02082             !bms_is_subset(rel->relids, cur_ec->ec_relids))
02083             continue;
02084 
02085         /*
02086          * Scan members, looking for a match to the target column.  Note
02087          * that child EC members are considered, but only when they belong to
02088          * the target relation.  (Unlike regular members, the same expression
02089          * could be a child member of more than one EC.  Therefore, it's
02090          * potentially order-dependent which EC a child relation's target
02091          * column gets matched to.  This is annoying but it only happens in
02092          * corner cases, so for now we live with just reporting the first
02093          * match.  See also get_eclass_for_sort_expr.)
02094          */
02095         cur_em = NULL;
02096         foreach(lc2, cur_ec->ec_members)
02097         {
02098             cur_em = (EquivalenceMember *) lfirst(lc2);
02099             if (bms_equal(cur_em->em_relids, rel->relids) &&
02100                 callback(root, rel, cur_ec, cur_em, callback_arg))
02101                 break;
02102             cur_em = NULL;
02103         }
02104 
02105         if (!cur_em)
02106             continue;
02107 
02108         /*
02109          * Found our match.  Scan the other EC members and attempt to generate
02110          * joinclauses.
02111          */
02112         foreach(lc2, cur_ec->ec_members)
02113         {
02114             EquivalenceMember *other_em = (EquivalenceMember *) lfirst(lc2);
02115             Oid         eq_op;
02116             RestrictInfo *rinfo;
02117 
02118             if (other_em->em_is_child)
02119                 continue;       /* ignore children here */
02120 
02121             /* Make sure it'll be a join to a different rel */
02122             if (other_em == cur_em ||
02123                 bms_overlap(other_em->em_relids, rel->relids))
02124                 continue;
02125 
02126             /* Forget it if caller doesn't want joins to this rel */
02127             if (bms_overlap(other_em->em_relids, prohibited_rels))
02128                 continue;
02129 
02130             /*
02131              * Also, if this is a child rel, avoid generating a useless join
02132              * to its parent rel.
02133              */
02134             if (is_child_rel &&
02135                 bms_is_member(parent_relid, other_em->em_relids))
02136                 continue;
02137 
02138             eq_op = select_equality_operator(cur_ec,
02139                                              cur_em->em_datatype,
02140                                              other_em->em_datatype);
02141             if (!OidIsValid(eq_op))
02142                 continue;
02143 
02144             /* set parent_ec to mark as redundant with other joinclauses */
02145             rinfo = create_join_clause(root, cur_ec, eq_op,
02146                                        cur_em, other_em,
02147                                        cur_ec);
02148 
02149             result = lappend(result, rinfo);
02150         }
02151 
02152         /*
02153          * If somehow we failed to create any join clauses, we might as well
02154          * keep scanning the ECs for another match.  But if we did make any,
02155          * we're done, because we don't want to return non-redundant clauses.
02156          */
02157         if (result)
02158             break;
02159     }
02160 
02161     return result;
02162 }
02163 
02164 /*
02165  * have_relevant_eclass_joinclause
02166  *      Detect whether there is an EquivalenceClass that could produce
02167  *      a joinclause involving the two given relations.
02168  *
02169  * This is essentially a very cut-down version of
02170  * generate_join_implied_equalities().  Note it's OK to occasionally say "yes"
02171  * incorrectly.  Hence we don't bother with details like whether the lack of a
02172  * cross-type operator might prevent the clause from actually being generated.
02173  */
02174 bool
02175 have_relevant_eclass_joinclause(PlannerInfo *root,
02176                                 RelOptInfo *rel1, RelOptInfo *rel2)
02177 {
02178     ListCell   *lc1;
02179 
02180     foreach(lc1, root->eq_classes)
02181     {
02182         EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
02183 
02184         /*
02185          * Won't generate joinclauses if single-member (this test covers the
02186          * volatile case too)
02187          */
02188         if (list_length(ec->ec_members) <= 1)
02189             continue;
02190 
02191         /*
02192          * We do not need to examine the individual members of the EC, because
02193          * all that we care about is whether each rel overlaps the relids of
02194          * at least one member, and a test on ec_relids is sufficient to prove
02195          * that.  (As with have_relevant_joinclause(), it is not necessary
02196          * that the EC be able to form a joinclause relating exactly the two
02197          * given rels, only that it be able to form a joinclause mentioning
02198          * both, and this will surely be true if both of them overlap
02199          * ec_relids.)
02200          *
02201          * Note we don't test ec_broken; if we did, we'd need a separate code
02202          * path to look through ec_sources.  Checking the membership anyway is
02203          * OK as a possibly-overoptimistic heuristic.
02204          *
02205          * We don't test ec_has_const either, even though a const eclass won't
02206          * generate real join clauses.  This is because if we had "WHERE a.x =
02207          * b.y and a.x = 42", it is worth considering a join between a and b,
02208          * since the join result is likely to be small even though it'll end
02209          * up being an unqualified nestloop.
02210          */
02211         if (bms_overlap(rel1->relids, ec->ec_relids) &&
02212             bms_overlap(rel2->relids, ec->ec_relids))
02213             return true;
02214     }
02215 
02216     return false;
02217 }
02218 
02219 
02220 /*
02221  * has_relevant_eclass_joinclause
02222  *      Detect whether there is an EquivalenceClass that could produce
02223  *      a joinclause involving the given relation and anything else.
02224  *
02225  * This is the same as have_relevant_eclass_joinclause with the other rel
02226  * implicitly defined as "everything else in the query".
02227  */
02228 bool
02229 has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1)
02230 {
02231     ListCell   *lc1;
02232 
02233     foreach(lc1, root->eq_classes)
02234     {
02235         EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
02236 
02237         /*
02238          * Won't generate joinclauses if single-member (this test covers the
02239          * volatile case too)
02240          */
02241         if (list_length(ec->ec_members) <= 1)
02242             continue;
02243 
02244         /*
02245          * Per the comment in have_relevant_eclass_joinclause, it's sufficient
02246          * to find an EC that mentions both this rel and some other rel.
02247          */
02248         if (bms_overlap(rel1->relids, ec->ec_relids) &&
02249             !bms_is_subset(ec->ec_relids, rel1->relids))
02250             return true;
02251     }
02252 
02253     return false;
02254 }
02255 
02256 
02257 /*
02258  * eclass_useful_for_merging
02259  *    Detect whether the EC could produce any mergejoinable join clauses
02260  *    against the specified relation.
02261  *
02262  * This is just a heuristic test and doesn't have to be exact; it's better
02263  * to say "yes" incorrectly than "no".  Hence we don't bother with details
02264  * like whether the lack of a cross-type operator might prevent the clause
02265  * from actually being generated.
02266  */
02267 bool
02268 eclass_useful_for_merging(EquivalenceClass *eclass,
02269                           RelOptInfo *rel)
02270 {
02271     ListCell   *lc;
02272 
02273     Assert(!eclass->ec_merged);
02274 
02275     /*
02276      * Won't generate joinclauses if const or single-member (the latter test
02277      * covers the volatile case too)
02278      */
02279     if (eclass->ec_has_const || list_length(eclass->ec_members) <= 1)
02280         return false;
02281 
02282     /*
02283      * Note we don't test ec_broken; if we did, we'd need a separate code path
02284      * to look through ec_sources.  Checking the members anyway is OK as a
02285      * possibly-overoptimistic heuristic.
02286      */
02287 
02288     /* If rel already includes all members of eclass, no point in searching */
02289     if (bms_is_subset(eclass->ec_relids, rel->relids))
02290         return false;
02291 
02292     /* To join, we need a member not in the given rel */
02293     foreach(lc, eclass->ec_members)
02294     {
02295         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
02296 
02297         if (cur_em->em_is_child)
02298             continue;           /* ignore children here */
02299 
02300         if (!bms_overlap(cur_em->em_relids, rel->relids))
02301             return true;
02302     }
02303 
02304     return false;
02305 }
02306 
02307 
02308 /*
02309  * is_redundant_derived_clause
02310  *      Test whether rinfo is derived from same EC as any clause in clauselist;
02311  *      if so, it can be presumed to represent a condition that's redundant
02312  *      with that member of the list.
02313  */
02314 bool
02315 is_redundant_derived_clause(RestrictInfo *rinfo, List *clauselist)
02316 {
02317     EquivalenceClass *parent_ec = rinfo->parent_ec;
02318     ListCell   *lc;
02319 
02320     /* Fail if it's not a potentially-redundant clause from some EC */
02321     if (parent_ec == NULL)
02322         return false;
02323 
02324     foreach(lc, clauselist)
02325     {
02326         RestrictInfo *otherrinfo = (RestrictInfo *) lfirst(lc);
02327 
02328         if (otherrinfo->parent_ec == parent_ec)
02329             return true;
02330     }
02331 
02332     return false;
02333 }
Header And Logo

equivclass.c