Header And Logo

PostgreSQL
| The world's most advanced open source database.

analyzejoins.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * analyzejoins.c
00004  *    Routines for simplifying joins after initial query analysis
00005  *
00006  * While we do a great deal of join simplification in prep/prepjointree.c,
00007  * certain optimizations cannot be performed at that stage for lack of
00008  * detailed information about the query.  The routines here are invoked
00009  * after initsplan.c has done its work, and can do additional join removal
00010  * and simplification steps based on the information extracted.  The penalty
00011  * is that we have to work harder to clean up after ourselves when we modify
00012  * the query, since the derived data structures have to be updated too.
00013  *
00014  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00015  * Portions Copyright (c) 1994, Regents of the University of California
00016  *
00017  *
00018  * IDENTIFICATION
00019  *    src/backend/optimizer/plan/analyzejoins.c
00020  *
00021  *-------------------------------------------------------------------------
00022  */
00023 #include "postgres.h"
00024 
00025 #include "optimizer/joininfo.h"
00026 #include "optimizer/pathnode.h"
00027 #include "optimizer/paths.h"
00028 #include "optimizer/planmain.h"
00029 #include "optimizer/var.h"
00030 
00031 /* local functions */
00032 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
00033 static void remove_rel_from_query(PlannerInfo *root, int relid,
00034                       Relids joinrelids);
00035 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
00036 
00037 
00038 /*
00039  * remove_useless_joins
00040  *      Check for relations that don't actually need to be joined at all,
00041  *      and remove them from the query.
00042  *
00043  * We are passed the current joinlist and return the updated list.  Other
00044  * data structures that have to be updated are accessible via "root".
00045  */
00046 List *
00047 remove_useless_joins(PlannerInfo *root, List *joinlist)
00048 {
00049     ListCell   *lc;
00050 
00051     /*
00052      * We are only interested in relations that are left-joined to, so we can
00053      * scan the join_info_list to find them easily.
00054      */
00055 restart:
00056     foreach(lc, root->join_info_list)
00057     {
00058         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
00059         int         innerrelid;
00060         int         nremoved;
00061 
00062         /* Skip if not removable */
00063         if (!join_is_removable(root, sjinfo))
00064             continue;
00065 
00066         /*
00067          * Currently, join_is_removable can only succeed when the sjinfo's
00068          * righthand is a single baserel.  Remove that rel from the query and
00069          * joinlist.
00070          */
00071         innerrelid = bms_singleton_member(sjinfo->min_righthand);
00072 
00073         remove_rel_from_query(root, innerrelid,
00074                               bms_union(sjinfo->min_lefthand,
00075                                         sjinfo->min_righthand));
00076 
00077         /* We verify that exactly one reference gets removed from joinlist */
00078         nremoved = 0;
00079         joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
00080         if (nremoved != 1)
00081             elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
00082 
00083         /*
00084          * We can delete this SpecialJoinInfo from the list too, since it's no
00085          * longer of interest.
00086          */
00087         root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
00088 
00089         /*
00090          * Restart the scan.  This is necessary to ensure we find all
00091          * removable joins independently of ordering of the join_info_list
00092          * (note that removal of attr_needed bits may make a join appear
00093          * removable that did not before).  Also, since we just deleted the
00094          * current list cell, we'd have to have some kluge to continue the
00095          * list scan anyway.
00096          */
00097         goto restart;
00098     }
00099 
00100     return joinlist;
00101 }
00102 
00103 /*
00104  * clause_sides_match_join
00105  *    Determine whether a join clause is of the right form to use in this join.
00106  *
00107  * We already know that the clause is a binary opclause referencing only the
00108  * rels in the current join.  The point here is to check whether it has the
00109  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
00110  * rather than mixing outer and inner vars on either side.  If it matches,
00111  * we set the transient flag outer_is_left to identify which side is which.
00112  */
00113 static inline bool
00114 clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
00115                         Relids innerrelids)
00116 {
00117     if (bms_is_subset(rinfo->left_relids, outerrelids) &&
00118         bms_is_subset(rinfo->right_relids, innerrelids))
00119     {
00120         /* lefthand side is outer */
00121         rinfo->outer_is_left = true;
00122         return true;
00123     }
00124     else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
00125              bms_is_subset(rinfo->right_relids, outerrelids))
00126     {
00127         /* righthand side is outer */
00128         rinfo->outer_is_left = false;
00129         return true;
00130     }
00131     return false;               /* no good for these input relations */
00132 }
00133 
00134 /*
00135  * join_is_removable
00136  *    Check whether we need not perform this special join at all, because
00137  *    it will just duplicate its left input.
00138  *
00139  * This is true for a left join for which the join condition cannot match
00140  * more than one inner-side row.  (There are other possibly interesting
00141  * cases, but we don't have the infrastructure to prove them.)  We also
00142  * have to check that the inner side doesn't generate any variables needed
00143  * above the join.
00144  */
00145 static bool
00146 join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
00147 {
00148     int         innerrelid;
00149     RelOptInfo *innerrel;
00150     Relids      joinrelids;
00151     List       *clause_list = NIL;
00152     ListCell   *l;
00153     int         attroff;
00154 
00155     /*
00156      * Currently, we only know how to remove left joins to a baserel with
00157      * unique indexes.  We can check most of these criteria pretty trivially
00158      * to avoid doing useless extra work.  But checking whether any of the
00159      * indexes are unique would require iterating over the indexlist, so for
00160      * now we just make sure there are indexes of some sort or other.  If none
00161      * of them are unique, join removal will still fail, just slightly later.
00162      */
00163     if (sjinfo->jointype != JOIN_LEFT ||
00164         sjinfo->delay_upper_joins ||
00165         bms_membership(sjinfo->min_righthand) != BMS_SINGLETON)
00166         return false;
00167 
00168     innerrelid = bms_singleton_member(sjinfo->min_righthand);
00169     innerrel = find_base_rel(root, innerrelid);
00170 
00171     if (innerrel->reloptkind != RELOPT_BASEREL ||
00172         innerrel->rtekind != RTE_RELATION ||
00173         innerrel->indexlist == NIL)
00174         return false;
00175 
00176     /* Compute the relid set for the join we are considering */
00177     joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
00178 
00179     /*
00180      * We can't remove the join if any inner-rel attributes are used above the
00181      * join.
00182      *
00183      * Note that this test only detects use of inner-rel attributes in higher
00184      * join conditions and the target list.  There might be such attributes in
00185      * pushed-down conditions at this join, too.  We check that case below.
00186      *
00187      * As a micro-optimization, it seems better to start with max_attr and
00188      * count down rather than starting with min_attr and counting up, on the
00189      * theory that the system attributes are somewhat less likely to be wanted
00190      * and should be tested last.
00191      */
00192     for (attroff = innerrel->max_attr - innerrel->min_attr;
00193          attroff >= 0;
00194          attroff--)
00195     {
00196         if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
00197             return false;
00198     }
00199 
00200     /*
00201      * Similarly check that the inner rel isn't needed by any PlaceHolderVars
00202      * that will be used above the join.  We only need to fail if such a PHV
00203      * actually references some inner-rel attributes; but the correct check
00204      * for that is relatively expensive, so we first check against ph_eval_at,
00205      * which must mention the inner rel if the PHV uses any inner-rel attrs.
00206      */
00207     foreach(l, root->placeholder_list)
00208     {
00209         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
00210 
00211         if (bms_is_subset(phinfo->ph_needed, joinrelids))
00212             continue;           /* PHV is not used above the join */
00213         if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
00214             continue;           /* it definitely doesn't reference innerrel */
00215         if (bms_overlap(pull_varnos((Node *) phinfo->ph_var),
00216                         innerrel->relids))
00217             return false;       /* it does reference innerrel */
00218     }
00219 
00220     /*
00221      * Search for mergejoinable clauses that constrain the inner rel against
00222      * either the outer rel or a pseudoconstant.  If an operator is
00223      * mergejoinable then it behaves like equality for some btree opclass, so
00224      * it's what we want.  The mergejoinability test also eliminates clauses
00225      * containing volatile functions, which we couldn't depend on.
00226      */
00227     foreach(l, innerrel->joininfo)
00228     {
00229         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
00230 
00231         /*
00232          * If it's not a join clause for this outer join, we can't use it.
00233          * Note that if the clause is pushed-down, then it is logically from
00234          * above the outer join, even if it references no other rels (it might
00235          * be from WHERE, for example).
00236          */
00237         if (restrictinfo->is_pushed_down ||
00238             !bms_equal(restrictinfo->required_relids, joinrelids))
00239         {
00240             /*
00241              * If such a clause actually references the inner rel then join
00242              * removal has to be disallowed.  We have to check this despite
00243              * the previous attr_needed checks because of the possibility of
00244              * pushed-down clauses referencing the rel.
00245              */
00246             if (bms_is_member(innerrelid, restrictinfo->clause_relids))
00247                 return false;
00248             continue;           /* else, ignore; not useful here */
00249         }
00250 
00251         /* Ignore if it's not a mergejoinable clause */
00252         if (!restrictinfo->can_join ||
00253             restrictinfo->mergeopfamilies == NIL)
00254             continue;           /* not mergejoinable */
00255 
00256         /*
00257          * Check if clause has the form "outer op inner" or "inner op outer".
00258          */
00259         if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
00260                                      innerrel->relids))
00261             continue;           /* no good for these input relations */
00262 
00263         /* OK, add to list */
00264         clause_list = lappend(clause_list, restrictinfo);
00265     }
00266 
00267     /*
00268      * relation_has_unique_index_for automatically adds any usable restriction
00269      * clauses for the innerrel, so we needn't do that here.
00270      */
00271 
00272     /* Now examine the indexes to see if we have a matching unique index */
00273     if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
00274         return true;
00275 
00276     /*
00277      * Some day it would be nice to check for other methods of establishing
00278      * distinctness.
00279      */
00280     return false;
00281 }
00282 
00283 
00284 /*
00285  * Remove the target relid from the planner's data structures, having
00286  * determined that there is no need to include it in the query.
00287  *
00288  * We are not terribly thorough here.  We must make sure that the rel is
00289  * no longer treated as a baserel, and that attributes of other baserels
00290  * are no longer marked as being needed at joins involving this rel.
00291  * Also, join quals involving the rel have to be removed from the joininfo
00292  * lists, but only if they belong to the outer join identified by joinrelids.
00293  */
00294 static void
00295 remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
00296 {
00297     RelOptInfo *rel = find_base_rel(root, relid);
00298     List       *joininfos;
00299     Index       rti;
00300     ListCell   *l;
00301     ListCell   *nextl;
00302 
00303     /*
00304      * Mark the rel as "dead" to show it is no longer part of the join tree.
00305      * (Removing it from the baserel array altogether seems too risky.)
00306      */
00307     rel->reloptkind = RELOPT_DEADREL;
00308 
00309     /*
00310      * Remove references to the rel from other baserels' attr_needed arrays.
00311      */
00312     for (rti = 1; rti < root->simple_rel_array_size; rti++)
00313     {
00314         RelOptInfo *otherrel = root->simple_rel_array[rti];
00315         int         attroff;
00316 
00317         /* there may be empty slots corresponding to non-baserel RTEs */
00318         if (otherrel == NULL)
00319             continue;
00320 
00321         Assert(otherrel->relid == rti); /* sanity check on array */
00322 
00323         /* no point in processing target rel itself */
00324         if (otherrel == rel)
00325             continue;
00326 
00327         for (attroff = otherrel->max_attr - otherrel->min_attr;
00328              attroff >= 0;
00329              attroff--)
00330         {
00331             otherrel->attr_needed[attroff] =
00332                 bms_del_member(otherrel->attr_needed[attroff], relid);
00333         }
00334     }
00335 
00336     /*
00337      * Likewise remove references from SpecialJoinInfo data structures.
00338      *
00339      * This is relevant in case the outer join we're deleting is nested inside
00340      * other outer joins: the upper joins' relid sets have to be adjusted. The
00341      * RHS of the target outer join will be made empty here, but that's OK
00342      * since caller will delete that SpecialJoinInfo entirely.
00343      */
00344     foreach(l, root->join_info_list)
00345     {
00346         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
00347 
00348         sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
00349         sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
00350         sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
00351         sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
00352     }
00353 
00354     /*
00355      * Likewise remove references from LateralJoinInfo data structures.
00356      *
00357      * If we are deleting a LATERAL subquery, we can forget its
00358      * LateralJoinInfo altogether.  Otherwise, make sure the target is not
00359      * included in any lateral_lhs set.  (It probably can't be, since that
00360      * should have precluded deciding to remove it; but let's cope anyway.)
00361      */
00362     for (l = list_head(root->lateral_info_list); l != NULL; l = nextl)
00363     {
00364         LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
00365 
00366         nextl = lnext(l);
00367         if (ljinfo->lateral_rhs == relid)
00368             root->lateral_info_list = list_delete_ptr(root->lateral_info_list,
00369                                                       ljinfo);
00370         else
00371             ljinfo->lateral_lhs = bms_del_member(ljinfo->lateral_lhs, relid);
00372     }
00373 
00374     /*
00375      * Likewise remove references from PlaceHolderVar data structures.
00376      *
00377      * Here we have a special case: if a PHV's eval_at set is just the target
00378      * relid, we want to leave it that way instead of reducing it to the empty
00379      * set.  An empty eval_at set would confuse later processing since it
00380      * would match every possible eval placement.
00381      */
00382     foreach(l, root->placeholder_list)
00383     {
00384         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
00385 
00386         phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
00387         if (bms_is_empty(phinfo->ph_eval_at))   /* oops, belay that */
00388             phinfo->ph_eval_at = bms_add_member(phinfo->ph_eval_at, relid);
00389 
00390         phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
00391         /* ph_may_need probably isn't used after this, but fix it anyway */
00392         phinfo->ph_may_need = bms_del_member(phinfo->ph_may_need, relid);
00393     }
00394 
00395     /*
00396      * Remove any joinquals referencing the rel from the joininfo lists.
00397      *
00398      * In some cases, a joinqual has to be put back after deleting its
00399      * reference to the target rel.  This can occur for pseudoconstant and
00400      * outerjoin-delayed quals, which can get marked as requiring the rel in
00401      * order to force them to be evaluated at or above the join.  We can't
00402      * just discard them, though.  Only quals that logically belonged to the
00403      * outer join being discarded should be removed from the query.
00404      *
00405      * We must make a copy of the rel's old joininfo list before starting the
00406      * loop, because otherwise remove_join_clause_from_rels would destroy the
00407      * list while we're scanning it.
00408      */
00409     joininfos = list_copy(rel->joininfo);
00410     foreach(l, joininfos)
00411     {
00412         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
00413 
00414         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
00415 
00416         if (rinfo->is_pushed_down ||
00417             !bms_equal(rinfo->required_relids, joinrelids))
00418         {
00419             /* Recheck that qual doesn't actually reference the target rel */
00420             Assert(!bms_is_member(relid, rinfo->clause_relids));
00421 
00422             /*
00423              * The required_relids probably aren't shared with anything else,
00424              * but let's copy them just to be sure.
00425              */
00426             rinfo->required_relids = bms_copy(rinfo->required_relids);
00427             rinfo->required_relids = bms_del_member(rinfo->required_relids,
00428                                                     relid);
00429             distribute_restrictinfo_to_rels(root, rinfo);
00430         }
00431     }
00432 }
00433 
00434 /*
00435  * Remove any occurrences of the target relid from a joinlist structure.
00436  *
00437  * It's easiest to build a whole new list structure, so we handle it that
00438  * way.  Efficiency is not a big deal here.
00439  *
00440  * *nremoved is incremented by the number of occurrences removed (there
00441  * should be exactly one, but the caller checks that).
00442  */
00443 static List *
00444 remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
00445 {
00446     List       *result = NIL;
00447     ListCell   *jl;
00448 
00449     foreach(jl, joinlist)
00450     {
00451         Node       *jlnode = (Node *) lfirst(jl);
00452 
00453         if (IsA(jlnode, RangeTblRef))
00454         {
00455             int         varno = ((RangeTblRef *) jlnode)->rtindex;
00456 
00457             if (varno == relid)
00458                 (*nremoved)++;
00459             else
00460                 result = lappend(result, jlnode);
00461         }
00462         else if (IsA(jlnode, List))
00463         {
00464             /* Recurse to handle subproblem */
00465             List       *sublist;
00466 
00467             sublist = remove_rel_from_joinlist((List *) jlnode,
00468                                                relid, nremoved);
00469             /* Avoid including empty sub-lists in the result */
00470             if (sublist)
00471                 result = lappend(result, sublist);
00472         }
00473         else
00474         {
00475             elog(ERROR, "unrecognized joinlist node type: %d",
00476                  (int) nodeTag(jlnode));
00477         }
00478     }
00479 
00480     return result;
00481 }