Header And Logo

PostgreSQL
| The world's most advanced open source database.

joinpath.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * joinpath.c
00004  *    Routines to find all possible paths for processing a set of joins
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/optimizer/path/joinpath.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 #include "postgres.h"
00016 
00017 #include <math.h>
00018 
00019 #include "executor/executor.h"
00020 #include "optimizer/cost.h"
00021 #include "optimizer/pathnode.h"
00022 #include "optimizer/paths.h"
00023 
00024 
00025 #define PATH_PARAM_BY_REL(path, rel)  \
00026     ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
00027 
00028 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
00029                      RelOptInfo *outerrel, RelOptInfo *innerrel,
00030                      List *restrictlist, List *mergeclause_list,
00031                      JoinType jointype, SpecialJoinInfo *sjinfo,
00032                      Relids param_source_rels);
00033 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
00034                      RelOptInfo *outerrel, RelOptInfo *innerrel,
00035                      List *restrictlist, List *mergeclause_list,
00036                      JoinType jointype, SpecialJoinInfo *sjinfo,
00037                      SemiAntiJoinFactors *semifactors,
00038                      Relids param_source_rels);
00039 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
00040                      RelOptInfo *outerrel, RelOptInfo *innerrel,
00041                      List *restrictlist,
00042                      JoinType jointype, SpecialJoinInfo *sjinfo,
00043                      SemiAntiJoinFactors *semifactors,
00044                      Relids param_source_rels);
00045 static List *select_mergejoin_clauses(PlannerInfo *root,
00046                          RelOptInfo *joinrel,
00047                          RelOptInfo *outerrel,
00048                          RelOptInfo *innerrel,
00049                          List *restrictlist,
00050                          JoinType jointype,
00051                          bool *mergejoin_allowed);
00052 
00053 
00054 /*
00055  * add_paths_to_joinrel
00056  *    Given a join relation and two component rels from which it can be made,
00057  *    consider all possible paths that use the two component rels as outer
00058  *    and inner rel respectively.  Add these paths to the join rel's pathlist
00059  *    if they survive comparison with other paths (and remove any existing
00060  *    paths that are dominated by these paths).
00061  *
00062  * Modifies the pathlist field of the joinrel node to contain the best
00063  * paths found so far.
00064  *
00065  * jointype is not necessarily the same as sjinfo->jointype; it might be
00066  * "flipped around" if we are considering joining the rels in the opposite
00067  * direction from what's indicated in sjinfo.
00068  *
00069  * Also, this routine and others in this module accept the special JoinTypes
00070  * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
00071  * unique-ify the outer or inner relation and then apply a regular inner
00072  * join.  These values are not allowed to propagate outside this module,
00073  * however.  Path cost estimation code may need to recognize that it's
00074  * dealing with such a case --- the combination of nominal jointype INNER
00075  * with sjinfo->jointype == JOIN_SEMI indicates that.
00076  */
00077 void
00078 add_paths_to_joinrel(PlannerInfo *root,
00079                      RelOptInfo *joinrel,
00080                      RelOptInfo *outerrel,
00081                      RelOptInfo *innerrel,
00082                      JoinType jointype,
00083                      SpecialJoinInfo *sjinfo,
00084                      List *restrictlist)
00085 {
00086     List       *mergeclause_list = NIL;
00087     bool        mergejoin_allowed = true;
00088     SemiAntiJoinFactors semifactors;
00089     Relids      param_source_rels = NULL;
00090     ListCell   *lc;
00091 
00092     /*
00093      * Find potential mergejoin clauses.  We can skip this if we are not
00094      * interested in doing a mergejoin.  However, mergejoin may be our only
00095      * way of implementing a full outer join, so override enable_mergejoin if
00096      * it's a full join.
00097      */
00098     if (enable_mergejoin || jointype == JOIN_FULL)
00099         mergeclause_list = select_mergejoin_clauses(root,
00100                                                     joinrel,
00101                                                     outerrel,
00102                                                     innerrel,
00103                                                     restrictlist,
00104                                                     jointype,
00105                                                     &mergejoin_allowed);
00106 
00107     /*
00108      * If it's SEMI or ANTI join, compute correction factors for cost
00109      * estimation.  These will be the same for all paths.
00110      */
00111     if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
00112         compute_semi_anti_join_factors(root, outerrel, innerrel,
00113                                        jointype, sjinfo, restrictlist,
00114                                        &semifactors);
00115 
00116     /*
00117      * Decide whether it's sensible to generate parameterized paths for this
00118      * joinrel, and if so, which relations such paths should require.  There
00119      * is no need to create a parameterized result path unless there is a join
00120      * order restriction that prevents joining one of our input rels directly
00121      * to the parameter source rel instead of joining to the other input rel.
00122      * This restriction reduces the number of parameterized paths we have to
00123      * deal with at higher join levels, without compromising the quality of
00124      * the resulting plan.  We express the restriction as a Relids set that
00125      * must overlap the parameterization of any proposed join path.
00126      */
00127     foreach(lc, root->join_info_list)
00128     {
00129         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
00130 
00131         /*
00132          * SJ is relevant to this join if we have some part of its RHS
00133          * (possibly not all of it), and haven't yet joined to its LHS.  (This
00134          * test is pretty simplistic, but should be sufficient considering the
00135          * join has already been proven legal.)  If the SJ is relevant, it
00136          * presents constraints for joining to anything not in its RHS.
00137          */
00138         if (bms_overlap(joinrel->relids, sjinfo->min_righthand) &&
00139             !bms_overlap(joinrel->relids, sjinfo->min_lefthand))
00140             param_source_rels = bms_join(param_source_rels,
00141                                          bms_difference(root->all_baserels,
00142                                                      sjinfo->min_righthand));
00143 
00144         /* full joins constrain both sides symmetrically */
00145         if (sjinfo->jointype == JOIN_FULL &&
00146             bms_overlap(joinrel->relids, sjinfo->min_lefthand) &&
00147             !bms_overlap(joinrel->relids, sjinfo->min_righthand))
00148             param_source_rels = bms_join(param_source_rels,
00149                                          bms_difference(root->all_baserels,
00150                                                       sjinfo->min_lefthand));
00151     }
00152 
00153     /*
00154      * However, when a LATERAL subquery is involved, we have to be a bit
00155      * laxer, because there will simply not be any paths for the joinrel that
00156      * aren't parameterized by whatever the subquery is parameterized by,
00157      * unless its parameterization is resolved within the joinrel.  Hence, add
00158      * to param_source_rels anything that is laterally referenced in either
00159      * input and is not in the join already.
00160      */
00161     foreach(lc, root->lateral_info_list)
00162     {
00163         LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc);
00164 
00165         if (bms_is_member(ljinfo->lateral_rhs, joinrel->relids))
00166             param_source_rels = bms_join(param_source_rels,
00167                                          bms_difference(ljinfo->lateral_lhs,
00168                                                         joinrel->relids));
00169     }
00170 
00171     /*
00172      * 1. Consider mergejoin paths where both relations must be explicitly
00173      * sorted.  Skip this if we can't mergejoin.
00174      */
00175     if (mergejoin_allowed)
00176         sort_inner_and_outer(root, joinrel, outerrel, innerrel,
00177                              restrictlist, mergeclause_list, jointype,
00178                              sjinfo, param_source_rels);
00179 
00180     /*
00181      * 2. Consider paths where the outer relation need not be explicitly
00182      * sorted. This includes both nestloops and mergejoins where the outer
00183      * path is already ordered.  Again, skip this if we can't mergejoin.
00184      * (That's okay because we know that nestloop can't handle right/full
00185      * joins at all, so it wouldn't work in the prohibited cases either.)
00186      */
00187     if (mergejoin_allowed)
00188         match_unsorted_outer(root, joinrel, outerrel, innerrel,
00189                              restrictlist, mergeclause_list, jointype,
00190                              sjinfo, &semifactors, param_source_rels);
00191 
00192 #ifdef NOT_USED
00193 
00194     /*
00195      * 3. Consider paths where the inner relation need not be explicitly
00196      * sorted.  This includes mergejoins only (nestloops were already built in
00197      * match_unsorted_outer).
00198      *
00199      * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
00200      * significant difference between the inner and outer side of a mergejoin,
00201      * so match_unsorted_inner creates no paths that aren't equivalent to
00202      * those made by match_unsorted_outer when add_paths_to_joinrel() is
00203      * invoked with the two rels given in the other order.
00204      */
00205     if (mergejoin_allowed)
00206         match_unsorted_inner(root, joinrel, outerrel, innerrel,
00207                              restrictlist, mergeclause_list, jointype,
00208                              sjinfo, &semifactors, param_source_rels);
00209 #endif
00210 
00211     /*
00212      * 4. Consider paths where both outer and inner relations must be hashed
00213      * before being joined.  As above, disregard enable_hashjoin for full
00214      * joins, because there may be no other alternative.
00215      */
00216     if (enable_hashjoin || jointype == JOIN_FULL)
00217         hash_inner_and_outer(root, joinrel, outerrel, innerrel,
00218                              restrictlist, jointype,
00219                              sjinfo, &semifactors, param_source_rels);
00220 }
00221 
00222 /*
00223  * try_nestloop_path
00224  *    Consider a nestloop join path; if it appears useful, push it into
00225  *    the joinrel's pathlist via add_path().
00226  */
00227 static void
00228 try_nestloop_path(PlannerInfo *root,
00229                   RelOptInfo *joinrel,
00230                   JoinType jointype,
00231                   SpecialJoinInfo *sjinfo,
00232                   SemiAntiJoinFactors *semifactors,
00233                   Relids param_source_rels,
00234                   Path *outer_path,
00235                   Path *inner_path,
00236                   List *restrict_clauses,
00237                   List *pathkeys)
00238 {
00239     Relids      required_outer;
00240     JoinCostWorkspace workspace;
00241 
00242     /*
00243      * Check to see if proposed path is still parameterized, and reject if the
00244      * parameterization wouldn't be sensible.
00245      */
00246     required_outer = calc_nestloop_required_outer(outer_path,
00247                                                   inner_path);
00248     if (required_outer &&
00249         !bms_overlap(required_outer, param_source_rels))
00250     {
00251         /* Waste no memory when we reject a path here */
00252         bms_free(required_outer);
00253         return;
00254     }
00255 
00256     /*
00257      * Do a precheck to quickly eliminate obviously-inferior paths.  We
00258      * calculate a cheap lower bound on the path's cost and then use
00259      * add_path_precheck() to see if the path is clearly going to be dominated
00260      * by some existing path for the joinrel.  If not, do the full pushup with
00261      * creating a fully valid path structure and submitting it to add_path().
00262      * The latter two steps are expensive enough to make this two-phase
00263      * methodology worthwhile.
00264      */
00265     initial_cost_nestloop(root, &workspace, jointype,
00266                           outer_path, inner_path,
00267                           sjinfo, semifactors);
00268 
00269     if (add_path_precheck(joinrel,
00270                           workspace.startup_cost, workspace.total_cost,
00271                           pathkeys, required_outer))
00272     {
00273         add_path(joinrel, (Path *)
00274                  create_nestloop_path(root,
00275                                       joinrel,
00276                                       jointype,
00277                                       &workspace,
00278                                       sjinfo,
00279                                       semifactors,
00280                                       outer_path,
00281                                       inner_path,
00282                                       restrict_clauses,
00283                                       pathkeys,
00284                                       required_outer));
00285     }
00286     else
00287     {
00288         /* Waste no memory when we reject a path here */
00289         bms_free(required_outer);
00290     }
00291 }
00292 
00293 /*
00294  * try_mergejoin_path
00295  *    Consider a merge join path; if it appears useful, push it into
00296  *    the joinrel's pathlist via add_path().
00297  */
00298 static void
00299 try_mergejoin_path(PlannerInfo *root,
00300                    RelOptInfo *joinrel,
00301                    JoinType jointype,
00302                    SpecialJoinInfo *sjinfo,
00303                    Relids param_source_rels,
00304                    Path *outer_path,
00305                    Path *inner_path,
00306                    List *restrict_clauses,
00307                    List *pathkeys,
00308                    List *mergeclauses,
00309                    List *outersortkeys,
00310                    List *innersortkeys)
00311 {
00312     Relids      required_outer;
00313     JoinCostWorkspace workspace;
00314 
00315     /*
00316      * Check to see if proposed path is still parameterized, and reject if the
00317      * parameterization wouldn't be sensible.
00318      */
00319     required_outer = calc_non_nestloop_required_outer(outer_path,
00320                                                       inner_path);
00321     if (required_outer &&
00322         !bms_overlap(required_outer, param_source_rels))
00323     {
00324         /* Waste no memory when we reject a path here */
00325         bms_free(required_outer);
00326         return;
00327     }
00328 
00329     /*
00330      * If the given paths are already well enough ordered, we can skip doing
00331      * an explicit sort.
00332      */
00333     if (outersortkeys &&
00334         pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
00335         outersortkeys = NIL;
00336     if (innersortkeys &&
00337         pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
00338         innersortkeys = NIL;
00339 
00340     /*
00341      * See comments in try_nestloop_path().
00342      */
00343     initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
00344                            outer_path, inner_path,
00345                            outersortkeys, innersortkeys,
00346                            sjinfo);
00347 
00348     if (add_path_precheck(joinrel,
00349                           workspace.startup_cost, workspace.total_cost,
00350                           pathkeys, required_outer))
00351     {
00352         add_path(joinrel, (Path *)
00353                  create_mergejoin_path(root,
00354                                        joinrel,
00355                                        jointype,
00356                                        &workspace,
00357                                        sjinfo,
00358                                        outer_path,
00359                                        inner_path,
00360                                        restrict_clauses,
00361                                        pathkeys,
00362                                        required_outer,
00363                                        mergeclauses,
00364                                        outersortkeys,
00365                                        innersortkeys));
00366     }
00367     else
00368     {
00369         /* Waste no memory when we reject a path here */
00370         bms_free(required_outer);
00371     }
00372 }
00373 
00374 /*
00375  * try_hashjoin_path
00376  *    Consider a hash join path; if it appears useful, push it into
00377  *    the joinrel's pathlist via add_path().
00378  */
00379 static void
00380 try_hashjoin_path(PlannerInfo *root,
00381                   RelOptInfo *joinrel,
00382                   JoinType jointype,
00383                   SpecialJoinInfo *sjinfo,
00384                   SemiAntiJoinFactors *semifactors,
00385                   Relids param_source_rels,
00386                   Path *outer_path,
00387                   Path *inner_path,
00388                   List *restrict_clauses,
00389                   List *hashclauses)
00390 {
00391     Relids      required_outer;
00392     JoinCostWorkspace workspace;
00393 
00394     /*
00395      * Check to see if proposed path is still parameterized, and reject if the
00396      * parameterization wouldn't be sensible.
00397      */
00398     required_outer = calc_non_nestloop_required_outer(outer_path,
00399                                                       inner_path);
00400     if (required_outer &&
00401         !bms_overlap(required_outer, param_source_rels))
00402     {
00403         /* Waste no memory when we reject a path here */
00404         bms_free(required_outer);
00405         return;
00406     }
00407 
00408     /*
00409      * See comments in try_nestloop_path().  Also note that hashjoin paths
00410      * never have any output pathkeys, per comments in create_hashjoin_path.
00411      */
00412     initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
00413                           outer_path, inner_path,
00414                           sjinfo, semifactors);
00415 
00416     if (add_path_precheck(joinrel,
00417                           workspace.startup_cost, workspace.total_cost,
00418                           NIL, required_outer))
00419     {
00420         add_path(joinrel, (Path *)
00421                  create_hashjoin_path(root,
00422                                       joinrel,
00423                                       jointype,
00424                                       &workspace,
00425                                       sjinfo,
00426                                       semifactors,
00427                                       outer_path,
00428                                       inner_path,
00429                                       restrict_clauses,
00430                                       required_outer,
00431                                       hashclauses));
00432     }
00433     else
00434     {
00435         /* Waste no memory when we reject a path here */
00436         bms_free(required_outer);
00437     }
00438 }
00439 
00440 /*
00441  * clause_sides_match_join
00442  *    Determine whether a join clause is of the right form to use in this join.
00443  *
00444  * We already know that the clause is a binary opclause referencing only the
00445  * rels in the current join.  The point here is to check whether it has the
00446  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
00447  * rather than mixing outer and inner vars on either side.  If it matches,
00448  * we set the transient flag outer_is_left to identify which side is which.
00449  */
00450 static inline bool
00451 clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
00452                         RelOptInfo *innerrel)
00453 {
00454     if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
00455         bms_is_subset(rinfo->right_relids, innerrel->relids))
00456     {
00457         /* lefthand side is outer */
00458         rinfo->outer_is_left = true;
00459         return true;
00460     }
00461     else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
00462              bms_is_subset(rinfo->right_relids, outerrel->relids))
00463     {
00464         /* righthand side is outer */
00465         rinfo->outer_is_left = false;
00466         return true;
00467     }
00468     return false;               /* no good for these input relations */
00469 }
00470 
00471 /*
00472  * sort_inner_and_outer
00473  *    Create mergejoin join paths by explicitly sorting both the outer and
00474  *    inner join relations on each available merge ordering.
00475  *
00476  * 'joinrel' is the join relation
00477  * 'outerrel' is the outer join relation
00478  * 'innerrel' is the inner join relation
00479  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
00480  *      clauses that apply to this join
00481  * 'mergeclause_list' is a list of RestrictInfo nodes for available
00482  *      mergejoin clauses in this join
00483  * 'jointype' is the type of join to do
00484  * 'sjinfo' is extra info about the join for selectivity estimation
00485  * 'param_source_rels' are OK targets for parameterization of result paths
00486  */
00487 static void
00488 sort_inner_and_outer(PlannerInfo *root,
00489                      RelOptInfo *joinrel,
00490                      RelOptInfo *outerrel,
00491                      RelOptInfo *innerrel,
00492                      List *restrictlist,
00493                      List *mergeclause_list,
00494                      JoinType jointype,
00495                      SpecialJoinInfo *sjinfo,
00496                      Relids param_source_rels)
00497 {
00498     Path       *outer_path;
00499     Path       *inner_path;
00500     List       *all_pathkeys;
00501     ListCell   *l;
00502 
00503     /*
00504      * We only consider the cheapest-total-cost input paths, since we are
00505      * assuming here that a sort is required.  We will consider
00506      * cheapest-startup-cost input paths later, and only if they don't need a
00507      * sort.
00508      *
00509      * This function intentionally does not consider parameterized input
00510      * paths, except when the cheapest-total is parameterized.  If we did so,
00511      * we'd have a combinatorial explosion of mergejoin paths of dubious
00512      * value.  This interacts with decisions elsewhere that also discriminate
00513      * against mergejoins with parameterized inputs; see comments in
00514      * src/backend/optimizer/README.
00515      */
00516     outer_path = outerrel->cheapest_total_path;
00517     inner_path = innerrel->cheapest_total_path;
00518 
00519     /*
00520      * If either cheapest-total path is parameterized by the other rel, we
00521      * can't use a mergejoin.  (There's no use looking for alternative input
00522      * paths, since these should already be the least-parameterized available
00523      * paths.)
00524      */
00525     if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
00526         PATH_PARAM_BY_REL(inner_path, outerrel))
00527         return;
00528 
00529     /*
00530      * If unique-ification is requested, do it and then handle as a plain
00531      * inner join.
00532      */
00533     if (jointype == JOIN_UNIQUE_OUTER)
00534     {
00535         outer_path = (Path *) create_unique_path(root, outerrel,
00536                                                  outer_path, sjinfo);
00537         Assert(outer_path);
00538         jointype = JOIN_INNER;
00539     }
00540     else if (jointype == JOIN_UNIQUE_INNER)
00541     {
00542         inner_path = (Path *) create_unique_path(root, innerrel,
00543                                                  inner_path, sjinfo);
00544         Assert(inner_path);
00545         jointype = JOIN_INNER;
00546     }
00547 
00548     /*
00549      * Each possible ordering of the available mergejoin clauses will generate
00550      * a differently-sorted result path at essentially the same cost.  We have
00551      * no basis for choosing one over another at this level of joining, but
00552      * some sort orders may be more useful than others for higher-level
00553      * mergejoins, so it's worth considering multiple orderings.
00554      *
00555      * Actually, it's not quite true that every mergeclause ordering will
00556      * generate a different path order, because some of the clauses may be
00557      * partially redundant (refer to the same EquivalenceClasses).  Therefore,
00558      * what we do is convert the mergeclause list to a list of canonical
00559      * pathkeys, and then consider different orderings of the pathkeys.
00560      *
00561      * Generating a path for *every* permutation of the pathkeys doesn't seem
00562      * like a winning strategy; the cost in planning time is too high. For
00563      * now, we generate one path for each pathkey, listing that pathkey first
00564      * and the rest in random order.  This should allow at least a one-clause
00565      * mergejoin without re-sorting against any other possible mergejoin
00566      * partner path.  But if we've not guessed the right ordering of secondary
00567      * keys, we may end up evaluating clauses as qpquals when they could have
00568      * been done as mergeclauses.  (In practice, it's rare that there's more
00569      * than two or three mergeclauses, so expending a huge amount of thought
00570      * on that is probably not worth it.)
00571      *
00572      * The pathkey order returned by select_outer_pathkeys_for_merge() has
00573      * some heuristics behind it (see that function), so be sure to try it
00574      * exactly as-is as well as making variants.
00575      */
00576     all_pathkeys = select_outer_pathkeys_for_merge(root,
00577                                                    mergeclause_list,
00578                                                    joinrel);
00579 
00580     foreach(l, all_pathkeys)
00581     {
00582         List       *front_pathkey = (List *) lfirst(l);
00583         List       *cur_mergeclauses;
00584         List       *outerkeys;
00585         List       *innerkeys;
00586         List       *merge_pathkeys;
00587 
00588         /* Make a pathkey list with this guy first */
00589         if (l != list_head(all_pathkeys))
00590             outerkeys = lcons(front_pathkey,
00591                               list_delete_ptr(list_copy(all_pathkeys),
00592                                               front_pathkey));
00593         else
00594             outerkeys = all_pathkeys;   /* no work at first one... */
00595 
00596         /* Sort the mergeclauses into the corresponding ordering */
00597         cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
00598                                                           outerkeys,
00599                                                           true,
00600                                                           mergeclause_list);
00601 
00602         /* Should have used them all... */
00603         Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));
00604 
00605         /* Build sort pathkeys for the inner side */
00606         innerkeys = make_inner_pathkeys_for_merge(root,
00607                                                   cur_mergeclauses,
00608                                                   outerkeys);
00609 
00610         /* Build pathkeys representing output sort order */
00611         merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
00612                                              outerkeys);
00613 
00614         /*
00615          * And now we can make the path.
00616          *
00617          * Note: it's possible that the cheapest paths will already be sorted
00618          * properly.  try_mergejoin_path will detect that case and suppress an
00619          * explicit sort step, so we needn't do so here.
00620          */
00621         try_mergejoin_path(root,
00622                            joinrel,
00623                            jointype,
00624                            sjinfo,
00625                            param_source_rels,
00626                            outer_path,
00627                            inner_path,
00628                            restrictlist,
00629                            merge_pathkeys,
00630                            cur_mergeclauses,
00631                            outerkeys,
00632                            innerkeys);
00633     }
00634 }
00635 
00636 /*
00637  * match_unsorted_outer
00638  *    Creates possible join paths for processing a single join relation
00639  *    'joinrel' by employing either iterative substitution or
00640  *    mergejoining on each of its possible outer paths (considering
00641  *    only outer paths that are already ordered well enough for merging).
00642  *
00643  * We always generate a nestloop path for each available outer path.
00644  * In fact we may generate as many as five: one on the cheapest-total-cost
00645  * inner path, one on the same with materialization, one on the
00646  * cheapest-startup-cost inner path (if different), one on the
00647  * cheapest-total inner-indexscan path (if any), and one on the
00648  * cheapest-startup inner-indexscan path (if different).
00649  *
00650  * We also consider mergejoins if mergejoin clauses are available.  We have
00651  * two ways to generate the inner path for a mergejoin: sort the cheapest
00652  * inner path, or use an inner path that is already suitably ordered for the
00653  * merge.  If we have several mergeclauses, it could be that there is no inner
00654  * path (or only a very expensive one) for the full list of mergeclauses, but
00655  * better paths exist if we truncate the mergeclause list (thereby discarding
00656  * some sort key requirements).  So, we consider truncations of the
00657  * mergeclause list as well as the full list.  (Ideally we'd consider all
00658  * subsets of the mergeclause list, but that seems way too expensive.)
00659  *
00660  * 'joinrel' is the join relation
00661  * 'outerrel' is the outer join relation
00662  * 'innerrel' is the inner join relation
00663  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
00664  *      clauses that apply to this join
00665  * 'mergeclause_list' is a list of RestrictInfo nodes for available
00666  *      mergejoin clauses in this join
00667  * 'jointype' is the type of join to do
00668  * 'sjinfo' is extra info about the join for selectivity estimation
00669  * 'semifactors' contains valid data if jointype is SEMI or ANTI
00670  * 'param_source_rels' are OK targets for parameterization of result paths
00671  */
00672 static void
00673 match_unsorted_outer(PlannerInfo *root,
00674                      RelOptInfo *joinrel,
00675                      RelOptInfo *outerrel,
00676                      RelOptInfo *innerrel,
00677                      List *restrictlist,
00678                      List *mergeclause_list,
00679                      JoinType jointype,
00680                      SpecialJoinInfo *sjinfo,
00681                      SemiAntiJoinFactors *semifactors,
00682                      Relids param_source_rels)
00683 {
00684     JoinType    save_jointype = jointype;
00685     bool        nestjoinOK;
00686     bool        useallclauses;
00687     Path       *inner_cheapest_total = innerrel->cheapest_total_path;
00688     Path       *matpath = NULL;
00689     ListCell   *lc1;
00690 
00691     /*
00692      * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
00693      * are doing a right or full mergejoin, we must use *all* the mergeclauses
00694      * as join clauses, else we will not have a valid plan.  (Although these
00695      * two flags are currently inverses, keep them separate for clarity and
00696      * possible future changes.)
00697      */
00698     switch (jointype)
00699     {
00700         case JOIN_INNER:
00701         case JOIN_LEFT:
00702         case JOIN_SEMI:
00703         case JOIN_ANTI:
00704             nestjoinOK = true;
00705             useallclauses = false;
00706             break;
00707         case JOIN_RIGHT:
00708         case JOIN_FULL:
00709             nestjoinOK = false;
00710             useallclauses = true;
00711             break;
00712         case JOIN_UNIQUE_OUTER:
00713         case JOIN_UNIQUE_INNER:
00714             jointype = JOIN_INNER;
00715             nestjoinOK = true;
00716             useallclauses = false;
00717             break;
00718         default:
00719             elog(ERROR, "unrecognized join type: %d",
00720                  (int) jointype);
00721             nestjoinOK = false; /* keep compiler quiet */
00722             useallclauses = false;
00723             break;
00724     }
00725 
00726     /*
00727      * If inner_cheapest_total is parameterized by the outer rel, ignore it;
00728      * we will consider it below as a member of cheapest_parameterized_paths,
00729      * but the other possibilities considered in this routine aren't usable.
00730      */
00731     if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
00732         inner_cheapest_total = NULL;
00733 
00734     /*
00735      * If we need to unique-ify the inner path, we will consider only the
00736      * cheapest-total inner.
00737      */
00738     if (save_jointype == JOIN_UNIQUE_INNER)
00739     {
00740         /* No way to do this with an inner path parameterized by outer rel */
00741         if (inner_cheapest_total == NULL)
00742             return;
00743         inner_cheapest_total = (Path *)
00744             create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
00745         Assert(inner_cheapest_total);
00746     }
00747     else if (nestjoinOK)
00748     {
00749         /*
00750          * Consider materializing the cheapest inner path, unless
00751          * enable_material is off or the path in question materializes its
00752          * output anyway.
00753          */
00754         if (enable_material && inner_cheapest_total != NULL &&
00755             !ExecMaterializesOutput(inner_cheapest_total->pathtype))
00756             matpath = (Path *)
00757                 create_material_path(innerrel, inner_cheapest_total);
00758     }
00759 
00760     foreach(lc1, outerrel->pathlist)
00761     {
00762         Path       *outerpath = (Path *) lfirst(lc1);
00763         List       *merge_pathkeys;
00764         List       *mergeclauses;
00765         List       *innersortkeys;
00766         List       *trialsortkeys;
00767         Path       *cheapest_startup_inner;
00768         Path       *cheapest_total_inner;
00769         int         num_sortkeys;
00770         int         sortkeycnt;
00771 
00772         /*
00773          * We cannot use an outer path that is parameterized by the inner rel.
00774          */
00775         if (PATH_PARAM_BY_REL(outerpath, innerrel))
00776             continue;
00777 
00778         /*
00779          * If we need to unique-ify the outer path, it's pointless to consider
00780          * any but the cheapest outer.  (XXX we don't consider parameterized
00781          * outers, nor inners, for unique-ified cases.  Should we?)
00782          */
00783         if (save_jointype == JOIN_UNIQUE_OUTER)
00784         {
00785             if (outerpath != outerrel->cheapest_total_path)
00786                 continue;
00787             outerpath = (Path *) create_unique_path(root, outerrel,
00788                                                     outerpath, sjinfo);
00789             Assert(outerpath);
00790         }
00791 
00792         /*
00793          * The result will have this sort order (even if it is implemented as
00794          * a nestloop, and even if some of the mergeclauses are implemented by
00795          * qpquals rather than as true mergeclauses):
00796          */
00797         merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
00798                                              outerpath->pathkeys);
00799 
00800         if (save_jointype == JOIN_UNIQUE_INNER)
00801         {
00802             /*
00803              * Consider nestloop join, but only with the unique-ified cheapest
00804              * inner path
00805              */
00806             try_nestloop_path(root,
00807                               joinrel,
00808                               jointype,
00809                               sjinfo,
00810                               semifactors,
00811                               param_source_rels,
00812                               outerpath,
00813                               inner_cheapest_total,
00814                               restrictlist,
00815                               merge_pathkeys);
00816         }
00817         else if (nestjoinOK)
00818         {
00819             /*
00820              * Consider nestloop joins using this outer path and various
00821              * available paths for the inner relation.  We consider the
00822              * cheapest-total paths for each available parameterization of the
00823              * inner relation, including the unparameterized case.
00824              */
00825             ListCell   *lc2;
00826 
00827             foreach(lc2, innerrel->cheapest_parameterized_paths)
00828             {
00829                 Path       *innerpath = (Path *) lfirst(lc2);
00830 
00831                 try_nestloop_path(root,
00832                                   joinrel,
00833                                   jointype,
00834                                   sjinfo,
00835                                   semifactors,
00836                                   param_source_rels,
00837                                   outerpath,
00838                                   innerpath,
00839                                   restrictlist,
00840                                   merge_pathkeys);
00841             }
00842 
00843             /* Also consider materialized form of the cheapest inner path */
00844             if (matpath != NULL)
00845                 try_nestloop_path(root,
00846                                   joinrel,
00847                                   jointype,
00848                                   sjinfo,
00849                                   semifactors,
00850                                   param_source_rels,
00851                                   outerpath,
00852                                   matpath,
00853                                   restrictlist,
00854                                   merge_pathkeys);
00855         }
00856 
00857         /* Can't do anything else if outer path needs to be unique'd */
00858         if (save_jointype == JOIN_UNIQUE_OUTER)
00859             continue;
00860 
00861         /* Can't do anything else if inner rel is parameterized by outer */
00862         if (inner_cheapest_total == NULL)
00863             continue;
00864 
00865         /* Look for useful mergeclauses (if any) */
00866         mergeclauses = find_mergeclauses_for_pathkeys(root,
00867                                                       outerpath->pathkeys,
00868                                                       true,
00869                                                       mergeclause_list);
00870 
00871         /*
00872          * Done with this outer path if no chance for a mergejoin.
00873          *
00874          * Special corner case: for "x FULL JOIN y ON true", there will be no
00875          * join clauses at all.  Ordinarily we'd generate a clauseless
00876          * nestloop path, but since mergejoin is our only join type that
00877          * supports FULL JOIN without any join clauses, it's necessary to
00878          * generate a clauseless mergejoin path instead.
00879          */
00880         if (mergeclauses == NIL)
00881         {
00882             if (jointype == JOIN_FULL)
00883                  /* okay to try for mergejoin */ ;
00884             else
00885                 continue;
00886         }
00887         if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
00888             continue;
00889 
00890         /* Compute the required ordering of the inner path */
00891         innersortkeys = make_inner_pathkeys_for_merge(root,
00892                                                       mergeclauses,
00893                                                       outerpath->pathkeys);
00894 
00895         /*
00896          * Generate a mergejoin on the basis of sorting the cheapest inner.
00897          * Since a sort will be needed, only cheapest total cost matters. (But
00898          * try_mergejoin_path will do the right thing if inner_cheapest_total
00899          * is already correctly sorted.)
00900          */
00901         try_mergejoin_path(root,
00902                            joinrel,
00903                            jointype,
00904                            sjinfo,
00905                            param_source_rels,
00906                            outerpath,
00907                            inner_cheapest_total,
00908                            restrictlist,
00909                            merge_pathkeys,
00910                            mergeclauses,
00911                            NIL,
00912                            innersortkeys);
00913 
00914         /* Can't do anything else if inner path needs to be unique'd */
00915         if (save_jointype == JOIN_UNIQUE_INNER)
00916             continue;
00917 
00918         /*
00919          * Look for presorted inner paths that satisfy the innersortkey list
00920          * --- or any truncation thereof, if we are allowed to build a
00921          * mergejoin using a subset of the merge clauses.  Here, we consider
00922          * both cheap startup cost and cheap total cost.
00923          *
00924          * Currently we do not consider parameterized inner paths here. This
00925          * interacts with decisions elsewhere that also discriminate against
00926          * mergejoins with parameterized inputs; see comments in
00927          * src/backend/optimizer/README.
00928          *
00929          * As we shorten the sortkey list, we should consider only paths that
00930          * are strictly cheaper than (in particular, not the same as) any path
00931          * found in an earlier iteration.  Otherwise we'd be intentionally
00932          * using fewer merge keys than a given path allows (treating the rest
00933          * as plain joinquals), which is unlikely to be a good idea.  Also,
00934          * eliminating paths here on the basis of compare_path_costs is a lot
00935          * cheaper than building the mergejoin path only to throw it away.
00936          *
00937          * If inner_cheapest_total is well enough sorted to have not required
00938          * a sort in the path made above, we shouldn't make a duplicate path
00939          * with it, either.  We handle that case with the same logic that
00940          * handles the previous consideration, by initializing the variables
00941          * that track cheapest-so-far properly.  Note that we do NOT reject
00942          * inner_cheapest_total if we find it matches some shorter set of
00943          * pathkeys.  That case corresponds to using fewer mergekeys to avoid
00944          * sorting inner_cheapest_total, whereas we did sort it above, so the
00945          * plans being considered are different.
00946          */
00947         if (pathkeys_contained_in(innersortkeys,
00948                                   inner_cheapest_total->pathkeys))
00949         {
00950             /* inner_cheapest_total didn't require a sort */
00951             cheapest_startup_inner = inner_cheapest_total;
00952             cheapest_total_inner = inner_cheapest_total;
00953         }
00954         else
00955         {
00956             /* it did require a sort, at least for the full set of keys */
00957             cheapest_startup_inner = NULL;
00958             cheapest_total_inner = NULL;
00959         }
00960         num_sortkeys = list_length(innersortkeys);
00961         if (num_sortkeys > 1 && !useallclauses)
00962             trialsortkeys = list_copy(innersortkeys);   /* need modifiable copy */
00963         else
00964             trialsortkeys = innersortkeys;      /* won't really truncate */
00965 
00966         for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
00967         {
00968             Path       *innerpath;
00969             List       *newclauses = NIL;
00970 
00971             /*
00972              * Look for an inner path ordered well enough for the first
00973              * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
00974              * destructively, which is why we made a copy...
00975              */
00976             trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
00977             innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
00978                                                        trialsortkeys,
00979                                                        NULL,
00980                                                        TOTAL_COST);
00981             if (innerpath != NULL &&
00982                 (cheapest_total_inner == NULL ||
00983                  compare_path_costs(innerpath, cheapest_total_inner,
00984                                     TOTAL_COST) < 0))
00985             {
00986                 /* Found a cheap (or even-cheaper) sorted path */
00987                 /* Select the right mergeclauses, if we didn't already */
00988                 if (sortkeycnt < num_sortkeys)
00989                 {
00990                     newclauses =
00991                         find_mergeclauses_for_pathkeys(root,
00992                                                        trialsortkeys,
00993                                                        false,
00994                                                        mergeclauses);
00995                     Assert(newclauses != NIL);
00996                 }
00997                 else
00998                     newclauses = mergeclauses;
00999                 try_mergejoin_path(root,
01000                                    joinrel,
01001                                    jointype,
01002                                    sjinfo,
01003                                    param_source_rels,
01004                                    outerpath,
01005                                    innerpath,
01006                                    restrictlist,
01007                                    merge_pathkeys,
01008                                    newclauses,
01009                                    NIL,
01010                                    NIL);
01011                 cheapest_total_inner = innerpath;
01012             }
01013             /* Same on the basis of cheapest startup cost ... */
01014             innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
01015                                                        trialsortkeys,
01016                                                        NULL,
01017                                                        STARTUP_COST);
01018             if (innerpath != NULL &&
01019                 (cheapest_startup_inner == NULL ||
01020                  compare_path_costs(innerpath, cheapest_startup_inner,
01021                                     STARTUP_COST) < 0))
01022             {
01023                 /* Found a cheap (or even-cheaper) sorted path */
01024                 if (innerpath != cheapest_total_inner)
01025                 {
01026                     /*
01027                      * Avoid rebuilding clause list if we already made one;
01028                      * saves memory in big join trees...
01029                      */
01030                     if (newclauses == NIL)
01031                     {
01032                         if (sortkeycnt < num_sortkeys)
01033                         {
01034                             newclauses =
01035                                 find_mergeclauses_for_pathkeys(root,
01036                                                                trialsortkeys,
01037                                                                false,
01038                                                                mergeclauses);
01039                             Assert(newclauses != NIL);
01040                         }
01041                         else
01042                             newclauses = mergeclauses;
01043                     }
01044                     try_mergejoin_path(root,
01045                                        joinrel,
01046                                        jointype,
01047                                        sjinfo,
01048                                        param_source_rels,
01049                                        outerpath,
01050                                        innerpath,
01051                                        restrictlist,
01052                                        merge_pathkeys,
01053                                        newclauses,
01054                                        NIL,
01055                                        NIL);
01056                 }
01057                 cheapest_startup_inner = innerpath;
01058             }
01059 
01060             /*
01061              * Don't consider truncated sortkeys if we need all clauses.
01062              */
01063             if (useallclauses)
01064                 break;
01065         }
01066     }
01067 }
01068 
01069 /*
01070  * hash_inner_and_outer
01071  *    Create hashjoin join paths by explicitly hashing both the outer and
01072  *    inner keys of each available hash clause.
01073  *
01074  * 'joinrel' is the join relation
01075  * 'outerrel' is the outer join relation
01076  * 'innerrel' is the inner join relation
01077  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
01078  *      clauses that apply to this join
01079  * 'jointype' is the type of join to do
01080  * 'sjinfo' is extra info about the join for selectivity estimation
01081  * 'semifactors' contains valid data if jointype is SEMI or ANTI
01082  * 'param_source_rels' are OK targets for parameterization of result paths
01083  */
01084 static void
01085 hash_inner_and_outer(PlannerInfo *root,
01086                      RelOptInfo *joinrel,
01087                      RelOptInfo *outerrel,
01088                      RelOptInfo *innerrel,
01089                      List *restrictlist,
01090                      JoinType jointype,
01091                      SpecialJoinInfo *sjinfo,
01092                      SemiAntiJoinFactors *semifactors,
01093                      Relids param_source_rels)
01094 {
01095     bool        isouterjoin = IS_OUTER_JOIN(jointype);
01096     List       *hashclauses;
01097     ListCell   *l;
01098 
01099     /*
01100      * We need to build only one hashclauses list for any given pair of outer
01101      * and inner relations; all of the hashable clauses will be used as keys.
01102      *
01103      * Scan the join's restrictinfo list to find hashjoinable clauses that are
01104      * usable with this pair of sub-relations.
01105      */
01106     hashclauses = NIL;
01107     foreach(l, restrictlist)
01108     {
01109         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
01110 
01111         /*
01112          * If processing an outer join, only use its own join clauses for
01113          * hashing.  For inner joins we need not be so picky.
01114          */
01115         if (isouterjoin && restrictinfo->is_pushed_down)
01116             continue;
01117 
01118         if (!restrictinfo->can_join ||
01119             restrictinfo->hashjoinoperator == InvalidOid)
01120             continue;           /* not hashjoinable */
01121 
01122         /*
01123          * Check if clause has the form "outer op inner" or "inner op outer".
01124          */
01125         if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
01126             continue;           /* no good for these input relations */
01127 
01128         hashclauses = lappend(hashclauses, restrictinfo);
01129     }
01130 
01131     /* If we found any usable hashclauses, make paths */
01132     if (hashclauses)
01133     {
01134         /*
01135          * We consider both the cheapest-total-cost and cheapest-startup-cost
01136          * outer paths.  There's no need to consider any but the
01137          * cheapest-total-cost inner path, however.
01138          */
01139         Path       *cheapest_startup_outer = outerrel->cheapest_startup_path;
01140         Path       *cheapest_total_outer = outerrel->cheapest_total_path;
01141         Path       *cheapest_total_inner = innerrel->cheapest_total_path;
01142 
01143         /*
01144          * If either cheapest-total path is parameterized by the other rel, we
01145          * can't use a hashjoin.  (There's no use looking for alternative
01146          * input paths, since these should already be the least-parameterized
01147          * available paths.)
01148          */
01149         if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
01150             PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
01151             return;
01152 
01153         /* Unique-ify if need be; we ignore parameterized possibilities */
01154         if (jointype == JOIN_UNIQUE_OUTER)
01155         {
01156             cheapest_total_outer = (Path *)
01157                 create_unique_path(root, outerrel,
01158                                    cheapest_total_outer, sjinfo);
01159             Assert(cheapest_total_outer);
01160             jointype = JOIN_INNER;
01161             try_hashjoin_path(root,
01162                               joinrel,
01163                               jointype,
01164                               sjinfo,
01165                               semifactors,
01166                               param_source_rels,
01167                               cheapest_total_outer,
01168                               cheapest_total_inner,
01169                               restrictlist,
01170                               hashclauses);
01171             /* no possibility of cheap startup here */
01172         }
01173         else if (jointype == JOIN_UNIQUE_INNER)
01174         {
01175             cheapest_total_inner = (Path *)
01176                 create_unique_path(root, innerrel,
01177                                    cheapest_total_inner, sjinfo);
01178             Assert(cheapest_total_inner);
01179             jointype = JOIN_INNER;
01180             try_hashjoin_path(root,
01181                               joinrel,
01182                               jointype,
01183                               sjinfo,
01184                               semifactors,
01185                               param_source_rels,
01186                               cheapest_total_outer,
01187                               cheapest_total_inner,
01188                               restrictlist,
01189                               hashclauses);
01190             if (cheapest_startup_outer != NULL &&
01191                 cheapest_startup_outer != cheapest_total_outer)
01192                 try_hashjoin_path(root,
01193                                   joinrel,
01194                                   jointype,
01195                                   sjinfo,
01196                                   semifactors,
01197                                   param_source_rels,
01198                                   cheapest_startup_outer,
01199                                   cheapest_total_inner,
01200                                   restrictlist,
01201                                   hashclauses);
01202         }
01203         else
01204         {
01205             /*
01206              * For other jointypes, we consider the cheapest startup outer
01207              * together with the cheapest total inner, and then consider
01208              * pairings of cheapest-total paths including parameterized ones.
01209              * There is no use in generating parameterized paths on the basis
01210              * of possibly cheap startup cost, so this is sufficient.
01211              */
01212             ListCell   *lc1;
01213             ListCell   *lc2;
01214 
01215             if (cheapest_startup_outer != NULL)
01216                 try_hashjoin_path(root,
01217                                   joinrel,
01218                                   jointype,
01219                                   sjinfo,
01220                                   semifactors,
01221                                   param_source_rels,
01222                                   cheapest_startup_outer,
01223                                   cheapest_total_inner,
01224                                   restrictlist,
01225                                   hashclauses);
01226 
01227             foreach(lc1, outerrel->cheapest_parameterized_paths)
01228             {
01229                 Path       *outerpath = (Path *) lfirst(lc1);
01230 
01231                 /*
01232                  * We cannot use an outer path that is parameterized by the
01233                  * inner rel.
01234                  */
01235                 if (PATH_PARAM_BY_REL(outerpath, innerrel))
01236                     continue;
01237 
01238                 foreach(lc2, innerrel->cheapest_parameterized_paths)
01239                 {
01240                     Path       *innerpath = (Path *) lfirst(lc2);
01241 
01242                     /*
01243                      * We cannot use an inner path that is parameterized by
01244                      * the outer rel, either.
01245                      */
01246                     if (PATH_PARAM_BY_REL(innerpath, outerrel))
01247                         continue;
01248 
01249                     if (outerpath == cheapest_startup_outer &&
01250                         innerpath == cheapest_total_inner)
01251                         continue;       /* already tried it */
01252 
01253                     try_hashjoin_path(root,
01254                                       joinrel,
01255                                       jointype,
01256                                       sjinfo,
01257                                       semifactors,
01258                                       param_source_rels,
01259                                       outerpath,
01260                                       innerpath,
01261                                       restrictlist,
01262                                       hashclauses);
01263                 }
01264             }
01265         }
01266     }
01267 }
01268 
01269 /*
01270  * select_mergejoin_clauses
01271  *    Select mergejoin clauses that are usable for a particular join.
01272  *    Returns a list of RestrictInfo nodes for those clauses.
01273  *
01274  * *mergejoin_allowed is normally set to TRUE, but it is set to FALSE if
01275  * this is a right/full join and there are nonmergejoinable join clauses.
01276  * The executor's mergejoin machinery cannot handle such cases, so we have
01277  * to avoid generating a mergejoin plan.  (Note that this flag does NOT
01278  * consider whether there are actually any mergejoinable clauses.  This is
01279  * correct because in some cases we need to build a clauseless mergejoin.
01280  * Simply returning NIL is therefore not enough to distinguish safe from
01281  * unsafe cases.)
01282  *
01283  * We also mark each selected RestrictInfo to show which side is currently
01284  * being considered as outer.  These are transient markings that are only
01285  * good for the duration of the current add_paths_to_joinrel() call!
01286  *
01287  * We examine each restrictinfo clause known for the join to see
01288  * if it is mergejoinable and involves vars from the two sub-relations
01289  * currently of interest.
01290  */
01291 static List *
01292 select_mergejoin_clauses(PlannerInfo *root,
01293                          RelOptInfo *joinrel,
01294                          RelOptInfo *outerrel,
01295                          RelOptInfo *innerrel,
01296                          List *restrictlist,
01297                          JoinType jointype,
01298                          bool *mergejoin_allowed)
01299 {
01300     List       *result_list = NIL;
01301     bool        isouterjoin = IS_OUTER_JOIN(jointype);
01302     bool        have_nonmergeable_joinclause = false;
01303     ListCell   *l;
01304 
01305     foreach(l, restrictlist)
01306     {
01307         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
01308 
01309         /*
01310          * If processing an outer join, only use its own join clauses in the
01311          * merge.  For inner joins we can use pushed-down clauses too. (Note:
01312          * we don't set have_nonmergeable_joinclause here because pushed-down
01313          * clauses will become otherquals not joinquals.)
01314          */
01315         if (isouterjoin && restrictinfo->is_pushed_down)
01316             continue;
01317 
01318         /* Check that clause is a mergeable operator clause */
01319         if (!restrictinfo->can_join ||
01320             restrictinfo->mergeopfamilies == NIL)
01321         {
01322             /*
01323              * The executor can handle extra joinquals that are constants, but
01324              * not anything else, when doing right/full merge join.  (The
01325              * reason to support constants is so we can do FULL JOIN ON
01326              * FALSE.)
01327              */
01328             if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
01329                 have_nonmergeable_joinclause = true;
01330             continue;           /* not mergejoinable */
01331         }
01332 
01333         /*
01334          * Check if clause has the form "outer op inner" or "inner op outer".
01335          */
01336         if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
01337         {
01338             have_nonmergeable_joinclause = true;
01339             continue;           /* no good for these input relations */
01340         }
01341 
01342         /*
01343          * Insist that each side have a non-redundant eclass.  This
01344          * restriction is needed because various bits of the planner expect
01345          * that each clause in a merge be associatable with some pathkey in a
01346          * canonical pathkey list, but redundant eclasses can't appear in
01347          * canonical sort orderings.  (XXX it might be worth relaxing this,
01348          * but not enough time to address it for 8.3.)
01349          *
01350          * Note: it would be bad if this condition failed for an otherwise
01351          * mergejoinable FULL JOIN clause, since that would result in
01352          * undesirable planner failure.  I believe that is not possible
01353          * however; a variable involved in a full join could only appear in
01354          * below_outer_join eclasses, which aren't considered redundant.
01355          *
01356          * This case *can* happen for left/right join clauses: the outer-side
01357          * variable could be equated to a constant.  Because we will propagate
01358          * that constant across the join clause, the loss of ability to do a
01359          * mergejoin is not really all that big a deal, and so it's not clear
01360          * that improving this is important.
01361          */
01362         update_mergeclause_eclasses(root, restrictinfo);
01363 
01364         if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
01365             EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
01366         {
01367             have_nonmergeable_joinclause = true;
01368             continue;           /* can't handle redundant eclasses */
01369         }
01370 
01371         result_list = lappend(result_list, restrictinfo);
01372     }
01373 
01374     /*
01375      * Report whether mergejoin is allowed (see comment at top of function).
01376      */
01377     switch (jointype)
01378     {
01379         case JOIN_RIGHT:
01380         case JOIN_FULL:
01381             *mergejoin_allowed = !have_nonmergeable_joinclause;
01382             break;
01383         default:
01384             *mergejoin_allowed = true;
01385             break;
01386     }
01387 
01388     return result_list;
01389 }