PostgreSQL Source Code: src/backend/optimizer/util/pathnode.c Source File

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * pathnode.c
00004  *    Routines to manipulate pathlists and create path nodes
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/optimizer/util/pathnode.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 #include "postgres.h"
00016 
00017 #include <math.h>
00018 
00019 #include "miscadmin.h"
00020 #include "nodes/nodeFuncs.h"
00021 #include "optimizer/clauses.h"
00022 #include "optimizer/cost.h"
00023 #include "optimizer/pathnode.h"
00024 #include "optimizer/paths.h"
00025 #include "optimizer/restrictinfo.h"
00026 #include "optimizer/tlist.h"
00027 #include "parser/parsetree.h"
00028 #include "utils/lsyscache.h"
00029 #include "utils/selfuncs.h"
00030 
00031 
00032 typedef enum
00033 {
00034     COSTS_EQUAL,                /* path costs are fuzzily equal */
00035     COSTS_BETTER1,              /* first path is cheaper than second */
00036     COSTS_BETTER2,              /* second path is cheaper than first */
00037     COSTS_DIFFERENT             /* neither path dominates the other on cost */
00038 } PathCostComparison;
00039 
00040 static List *translate_sub_tlist(List *tlist, int relid);
00041 static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
00042 static Oid  distinct_col_search(int colno, List *colnos, List *opids);
00043 
00044 
00045 /*****************************************************************************
00046  *      MISC. PATH UTILITIES
00047  *****************************************************************************/
00048 
00049 /*
00050  * compare_path_costs
00051  *    Return -1, 0, or +1 according as path1 is cheaper, the same cost,
00052  *    or more expensive than path2 for the specified criterion.
00053  */
00054 int
00055 compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
00056 {
00057     if (criterion == STARTUP_COST)
00058     {
00059         if (path1->startup_cost < path2->startup_cost)
00060             return -1;
00061         if (path1->startup_cost > path2->startup_cost)
00062             return +1;
00063 
00064         /*
00065          * If paths have the same startup cost (not at all unlikely), order
00066          * them by total cost.
00067          */
00068         if (path1->total_cost < path2->total_cost)
00069             return -1;
00070         if (path1->total_cost > path2->total_cost)
00071             return +1;
00072     }
00073     else
00074     {
00075         if (path1->total_cost < path2->total_cost)
00076             return -1;
00077         if (path1->total_cost > path2->total_cost)
00078             return +1;
00079 
00080         /*
00081          * If paths have the same total cost, order them by startup cost.
00082          */
00083         if (path1->startup_cost < path2->startup_cost)
00084             return -1;
00085         if (path1->startup_cost > path2->startup_cost)
00086             return +1;
00087     }
00088     return 0;
00089 }
00090 
00091 /*
00092  * compare_path_fractional_costs
00093  *    Return -1, 0, or +1 according as path1 is cheaper, the same cost,
00094  *    or more expensive than path2 for fetching the specified fraction
00095  *    of the total tuples.
00096  *
00097  * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
00098  * path with the cheaper total_cost.
00099  */
00100 int
00101 compare_fractional_path_costs(Path *path1, Path *path2,
00102                               double fraction)
00103 {
00104     Cost        cost1,
00105                 cost2;
00106 
00107     if (fraction <= 0.0 || fraction >= 1.0)
00108         return compare_path_costs(path1, path2, TOTAL_COST);
00109     cost1 = path1->startup_cost +
00110         fraction * (path1->total_cost - path1->startup_cost);
00111     cost2 = path2->startup_cost +
00112         fraction * (path2->total_cost - path2->startup_cost);
00113     if (cost1 < cost2)
00114         return -1;
00115     if (cost1 > cost2)
00116         return +1;
00117     return 0;
00118 }
00119 
00120 /*
00121  * compare_path_costs_fuzzily
00122  *    Compare the costs of two paths to see if either can be said to
00123  *    dominate the other.
00124  *
00125  * We use fuzzy comparisons so that add_path() can avoid keeping both of
00126  * a pair of paths that really have insignificantly different cost.
00127  *
00128  * The fuzz_factor argument must be 1.0 plus delta, where delta is the
00129  * fraction of the smaller cost that is considered to be a significant
00130  * difference.  For example, fuzz_factor = 1.01 makes the fuzziness limit
00131  * be 1% of the smaller cost.
00132  *
00133  * The two paths are said to have "equal" costs if both startup and total
00134  * costs are fuzzily the same.  Path1 is said to be better than path2 if
00135  * it has fuzzily better startup cost and fuzzily no worse total cost,
00136  * or if it has fuzzily better total cost and fuzzily no worse startup cost.
00137  * Path2 is better than path1 if the reverse holds.  Finally, if one path
00138  * is fuzzily better than the other on startup cost and fuzzily worse on
00139  * total cost, we just say that their costs are "different", since neither
00140  * dominates the other across the whole performance spectrum.
00141  *
00142  * If consider_startup is false, then we don't care about keeping paths with
00143  * good startup cost, so we'll never return COSTS_DIFFERENT.
00144  *
00145  * This function also includes special hacks to support a policy enforced
00146  * by its sole caller, add_path(): paths that have any parameterization
00147  * cannot win comparisons on the grounds of having cheaper startup cost,
00148  * since we deem only total cost to be of interest for a parameterized path.
00149  * (Unparameterized paths are more common, so we check for this case last.)
00150  */
00151 static PathCostComparison
00152 compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
00153                            bool consider_startup)
00154 {
00155     /*
00156      * Check total cost first since it's more likely to be different; many
00157      * paths have zero startup cost.
00158      */
00159     if (path1->total_cost > path2->total_cost * fuzz_factor)
00160     {
00161         /* path1 fuzzily worse on total cost */
00162         if (consider_startup &&
00163             path2->startup_cost > path1->startup_cost * fuzz_factor &&
00164             path1->param_info == NULL)
00165         {
00166             /* ... but path2 fuzzily worse on startup, so DIFFERENT */
00167             return COSTS_DIFFERENT;
00168         }
00169         /* else path2 dominates */
00170         return COSTS_BETTER2;
00171     }
00172     if (path2->total_cost > path1->total_cost * fuzz_factor)
00173     {
00174         /* path2 fuzzily worse on total cost */
00175         if (consider_startup &&
00176             path1->startup_cost > path2->startup_cost * fuzz_factor &&
00177             path2->param_info == NULL)
00178         {
00179             /* ... but path1 fuzzily worse on startup, so DIFFERENT */
00180             return COSTS_DIFFERENT;
00181         }
00182         /* else path1 dominates */
00183         return COSTS_BETTER1;
00184     }
00185     /* fuzzily the same on total cost */
00186     /* (so we may as well compare startup cost, even if !consider_startup) */
00187     if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
00188         path2->param_info == NULL)
00189     {
00190         /* ... but path1 fuzzily worse on startup, so path2 wins */
00191         return COSTS_BETTER2;
00192     }
00193     if (path2->startup_cost > path1->startup_cost * fuzz_factor &&
00194         path1->param_info == NULL)
00195     {
00196         /* ... but path2 fuzzily worse on startup, so path1 wins */
00197         return COSTS_BETTER1;
00198     }
00199     /* fuzzily the same on both costs */
00200     return COSTS_EQUAL;
00201 }
00202 
00203 /*
00204  * set_cheapest
00205  *    Find the minimum-cost paths from among a relation's paths,
00206  *    and save them in the rel's cheapest-path fields.
00207  *
00208  * cheapest_total_path is normally the cheapest-total-cost unparameterized
00209  * path; but if there are no unparameterized paths, we assign it to be the
00210  * best (cheapest least-parameterized) parameterized path.  However, only
00211  * unparameterized paths are considered candidates for cheapest_startup_path,
00212  * so that will be NULL if there are no unparameterized paths.
00213  *
00214  * The cheapest_parameterized_paths list collects all parameterized paths
00215  * that have survived the add_path() tournament for this relation.  (Since
00216  * add_path ignores pathkeys and startup cost for a parameterized path,
00217  * these will be paths that have best total cost or best row count for their
00218  * parameterization.)  cheapest_parameterized_paths always includes the
00219  * cheapest-total unparameterized path, too, if there is one; the users of
00220  * that list find it more convenient if that's included.
00221  *
00222  * This is normally called only after we've finished constructing the path
00223  * list for the rel node.
00224  */
00225 void
00226 set_cheapest(RelOptInfo *parent_rel)
00227 {
00228     Path       *cheapest_startup_path;
00229     Path       *cheapest_total_path;
00230     Path       *best_param_path;
00231     List       *parameterized_paths;
00232     ListCell   *p;
00233 
00234     Assert(IsA(parent_rel, RelOptInfo));
00235 
00236     if (parent_rel->pathlist == NIL)
00237         elog(ERROR, "could not devise a query plan for the given query");
00238 
00239     cheapest_startup_path = cheapest_total_path = best_param_path = NULL;
00240     parameterized_paths = NIL;
00241 
00242     foreach(p, parent_rel->pathlist)
00243     {
00244         Path       *path = (Path *) lfirst(p);
00245         int         cmp;
00246 
00247         if (path->param_info)
00248         {
00249             /* Parameterized path, so add it to parameterized_paths */
00250             parameterized_paths = lappend(parameterized_paths, path);
00251 
00252             /*
00253              * If we have an unparameterized cheapest-total, we no longer care
00254              * about finding the best parameterized path, so move on.
00255              */
00256             if (cheapest_total_path)
00257                 continue;
00258 
00259             /*
00260              * Otherwise, track the best parameterized path, which is the one
00261              * with least total cost among those of the minimum
00262              * parameterization.
00263              */
00264             if (best_param_path == NULL)
00265                 best_param_path = path;
00266             else
00267             {
00268                 switch (bms_subset_compare(PATH_REQ_OUTER(path),
00269                                            PATH_REQ_OUTER(best_param_path)))
00270                 {
00271                     case BMS_EQUAL:
00272                         /* keep the cheaper one */
00273                         if (compare_path_costs(path, best_param_path,
00274                                                TOTAL_COST) < 0)
00275                             best_param_path = path;
00276                         break;
00277                     case BMS_SUBSET1:
00278                         /* new path is less-parameterized */
00279                         best_param_path = path;
00280                         break;
00281                     case BMS_SUBSET2:
00282                         /* old path is less-parameterized, keep it */
00283                         break;
00284                     case BMS_DIFFERENT:
00285                         /*
00286                          * This means that neither path has the least possible
00287                          * parameterization for the rel.  We'll sit on the old
00288                          * path until something better comes along.
00289                          */
00290                         break;
00291                 }
00292             }
00293         }
00294         else
00295         {
00296             /* Unparameterized path, so consider it for cheapest slots */
00297             if (cheapest_total_path == NULL)
00298             {
00299                 cheapest_startup_path = cheapest_total_path = path;
00300                 continue;
00301             }
00302 
00303             /*
00304              * If we find two paths of identical costs, try to keep the
00305              * better-sorted one.  The paths might have unrelated sort
00306              * orderings, in which case we can only guess which might be
00307              * better to keep, but if one is superior then we definitely
00308              * should keep that one.
00309              */
00310             cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
00311             if (cmp > 0 ||
00312                 (cmp == 0 &&
00313                  compare_pathkeys(cheapest_startup_path->pathkeys,
00314                                   path->pathkeys) == PATHKEYS_BETTER2))
00315                 cheapest_startup_path = path;
00316 
00317             cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
00318             if (cmp > 0 ||
00319                 (cmp == 0 &&
00320                  compare_pathkeys(cheapest_total_path->pathkeys,
00321                                   path->pathkeys) == PATHKEYS_BETTER2))
00322                 cheapest_total_path = path;
00323         }
00324     }
00325 
00326     /* Add cheapest unparameterized path, if any, to parameterized_paths */
00327     if (cheapest_total_path)
00328         parameterized_paths = lcons(cheapest_total_path, parameterized_paths);
00329 
00330     /*
00331      * If there is no unparameterized path, use the best parameterized path
00332      * as cheapest_total_path (but not as cheapest_startup_path).
00333      */
00334     if (cheapest_total_path == NULL)
00335         cheapest_total_path = best_param_path;
00336     Assert(cheapest_total_path != NULL);
00337 
00338     parent_rel->cheapest_startup_path = cheapest_startup_path;
00339     parent_rel->cheapest_total_path = cheapest_total_path;
00340     parent_rel->cheapest_unique_path = NULL;    /* computed only if needed */
00341     parent_rel->cheapest_parameterized_paths = parameterized_paths;
00342 }
00343 
00344 /*
00345  * add_path
00346  *    Consider a potential implementation path for the specified parent rel,
00347  *    and add it to the rel's pathlist if it is worthy of consideration.
00348  *    A path is worthy if it has a better sort order (better pathkeys) or
00349  *    cheaper cost (on either dimension), or generates fewer rows, than any
00350  *    existing path that has the same or superset parameterization rels.
00351  *
00352  *    We also remove from the rel's pathlist any old paths that are dominated
00353  *    by new_path --- that is, new_path is cheaper, at least as well ordered,
00354  *    generates no more rows, and requires no outer rels not required by the
00355  *    old path.
00356  *
00357  *    In most cases, a path with a superset parameterization will generate
00358  *    fewer rows (since it has more join clauses to apply), so that those two
00359  *    figures of merit move in opposite directions; this means that a path of
00360  *    one parameterization can seldom dominate a path of another.  But such
00361  *    cases do arise, so we make the full set of checks anyway.
00362  *
00363  *    There are two policy decisions embedded in this function, along with
00364  *    its sibling add_path_precheck: we treat all parameterized paths as
00365  *    having NIL pathkeys, and we ignore their startup costs, so that they
00366  *    compete only on parameterization, total cost and rowcount.  This is to
00367  *    reduce the number of parameterized paths that are kept.  See discussion
00368  *    in src/backend/optimizer/README.
00369  *
00370  *    Another policy that is enforced here is that we only consider cheap
00371  *    startup cost to be interesting if parent_rel->consider_startup is true.
00372  *
00373  *    The pathlist is kept sorted by total_cost, with cheaper paths
00374  *    at the front.  Within this routine, that's simply a speed hack:
00375  *    doing it that way makes it more likely that we will reject an inferior
00376  *    path after a few comparisons, rather than many comparisons.
00377  *    However, add_path_precheck relies on this ordering to exit early
00378  *    when possible.
00379  *
00380  *    NOTE: discarded Path objects are immediately pfree'd to reduce planner
00381  *    memory consumption.  We dare not try to free the substructure of a Path,
00382  *    since much of it may be shared with other Paths or the query tree itself;
00383  *    but just recycling discarded Path nodes is a very useful savings in
00384  *    a large join tree.  We can recycle the List nodes of pathlist, too.
00385  *
00386  *    BUT: we do not pfree IndexPath objects, since they may be referenced as
00387  *    children of BitmapHeapPaths as well as being paths in their own right.
00388  *
00389  * 'parent_rel' is the relation entry to which the path corresponds.
00390  * 'new_path' is a potential path for parent_rel.
00391  *
00392  * Returns nothing, but modifies parent_rel->pathlist.
00393  */
00394 void
00395 add_path(RelOptInfo *parent_rel, Path *new_path)
00396 {
00397     bool        accept_new = true;      /* unless we find a superior old path */
00398     ListCell   *insert_after = NULL;    /* where to insert new item */
00399     List       *new_path_pathkeys;
00400     ListCell   *p1;
00401     ListCell   *p1_prev;
00402     ListCell   *p1_next;
00403 
00404     /*
00405      * This is a convenient place to check for query cancel --- no part of the
00406      * planner goes very long without calling add_path().
00407      */
00408     CHECK_FOR_INTERRUPTS();
00409 
00410     /* Pretend parameterized paths have no pathkeys, per comment above */
00411     new_path_pathkeys = new_path->param_info ? NIL : new_path->pathkeys;
00412 
00413     /*
00414      * Loop to check proposed new path against old paths.  Note it is possible
00415      * for more than one old path to be tossed out because new_path dominates
00416      * it.
00417      *
00418      * We can't use foreach here because the loop body may delete the current
00419      * list cell.
00420      */
00421     p1_prev = NULL;
00422     for (p1 = list_head(parent_rel->pathlist); p1 != NULL; p1 = p1_next)
00423     {
00424         Path       *old_path = (Path *) lfirst(p1);
00425         bool        remove_old = false; /* unless new proves superior */
00426         PathCostComparison costcmp;
00427         PathKeysComparison keyscmp;
00428         BMS_Comparison outercmp;
00429 
00430         p1_next = lnext(p1);
00431 
00432         /*
00433          * Do a fuzzy cost comparison with 1% fuzziness limit.  (XXX does this
00434          * percentage need to be user-configurable?)
00435          */
00436         costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01,
00437                                              parent_rel->consider_startup);
00438 
00439         /*
00440          * If the two paths compare differently for startup and total cost,
00441          * then we want to keep both, and we can skip comparing pathkeys and
00442          * required_outer rels.  If they compare the same, proceed with the
00443          * other comparisons.  Row count is checked last.  (We make the tests
00444          * in this order because the cost comparison is most likely to turn
00445          * out "different", and the pathkeys comparison next most likely.  As
00446          * explained above, row count very seldom makes a difference, so even
00447          * though it's cheap to compare there's not much point in checking it
00448          * earlier.)
00449          */
00450         if (costcmp != COSTS_DIFFERENT)
00451         {
00452             /* Similarly check to see if either dominates on pathkeys */
00453             List       *old_path_pathkeys;
00454 
00455             old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
00456             keyscmp = compare_pathkeys(new_path_pathkeys,
00457                                        old_path_pathkeys);
00458             if (keyscmp != PATHKEYS_DIFFERENT)
00459             {
00460                 switch (costcmp)
00461                 {
00462                     case COSTS_EQUAL:
00463                         outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
00464                                                    PATH_REQ_OUTER(old_path));
00465                         if (keyscmp == PATHKEYS_BETTER1)
00466                         {
00467                             if ((outercmp == BMS_EQUAL ||
00468                                  outercmp == BMS_SUBSET1) &&
00469                                 new_path->rows <= old_path->rows)
00470                                 remove_old = true;      /* new dominates old */
00471                         }
00472                         else if (keyscmp == PATHKEYS_BETTER2)
00473                         {
00474                             if ((outercmp == BMS_EQUAL ||
00475                                  outercmp == BMS_SUBSET2) &&
00476                                 new_path->rows >= old_path->rows)
00477                                 accept_new = false;     /* old dominates new */
00478                         }
00479                         else    /* keyscmp == PATHKEYS_EQUAL */
00480                         {
00481                             if (outercmp == BMS_EQUAL)
00482                             {
00483                                 /*
00484                                  * Same pathkeys and outer rels, and fuzzily
00485                                  * the same cost, so keep just one; to decide
00486                                  * which, first check rows and then do a fuzzy
00487                                  * cost comparison with very small fuzz limit.
00488                                  * (We used to do an exact cost comparison,
00489                                  * but that results in annoying
00490                                  * platform-specific plan variations due to
00491                                  * roundoff in the cost estimates.)  If things
00492                                  * are still tied, arbitrarily keep only the
00493                                  * old path.  Notice that we will keep only
00494                                  * the old path even if the less-fuzzy
00495                                  * comparison decides the startup and total
00496                                  * costs compare differently.
00497                                  */
00498                                 if (new_path->rows < old_path->rows)
00499                                     remove_old = true;  /* new dominates old */
00500                                 else if (new_path->rows > old_path->rows)
00501                                     accept_new = false; /* old dominates new */
00502                                 else if (compare_path_costs_fuzzily(new_path,
00503                                                                     old_path,
00504                                                                     1.0000000001,
00505                                                                     parent_rel->consider_startup) == COSTS_BETTER1)
00506                                     remove_old = true;  /* new dominates old */
00507                                 else
00508                                     accept_new = false; /* old equals or
00509                                                          * dominates new */
00510                             }
00511                             else if (outercmp == BMS_SUBSET1 &&
00512                                      new_path->rows <= old_path->rows)
00513                                 remove_old = true;      /* new dominates old */
00514                             else if (outercmp == BMS_SUBSET2 &&
00515                                      new_path->rows >= old_path->rows)
00516                                 accept_new = false;     /* old dominates new */
00517                             /* else different parameterizations, keep both */
00518                         }
00519                         break;
00520                     case COSTS_BETTER1:
00521                         if (keyscmp != PATHKEYS_BETTER2)
00522                         {
00523                             outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
00524                                                    PATH_REQ_OUTER(old_path));
00525                             if ((outercmp == BMS_EQUAL ||
00526                                  outercmp == BMS_SUBSET1) &&
00527                                 new_path->rows <= old_path->rows)
00528                                 remove_old = true;      /* new dominates old */
00529                         }
00530                         break;
00531                     case COSTS_BETTER2:
00532                         if (keyscmp != PATHKEYS_BETTER1)
00533                         {
00534                             outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
00535                                                    PATH_REQ_OUTER(old_path));
00536                             if ((outercmp == BMS_EQUAL ||
00537                                  outercmp == BMS_SUBSET2) &&
00538                                 new_path->rows >= old_path->rows)
00539                                 accept_new = false;     /* old dominates new */
00540                         }
00541                         break;
00542                     case COSTS_DIFFERENT:
00543 
00544                         /*
00545                          * can't get here, but keep this case to keep compiler
00546                          * quiet
00547                          */
00548                         break;
00549                 }
00550             }
00551         }
00552 
00553         /*
00554          * Remove current element from pathlist if dominated by new.
00555          */
00556         if (remove_old)
00557         {
00558             parent_rel->pathlist = list_delete_cell(parent_rel->pathlist,
00559                                                     p1, p1_prev);
00560 
00561             /*
00562              * Delete the data pointed-to by the deleted cell, if possible
00563              */
00564             if (!IsA(old_path, IndexPath))
00565                 pfree(old_path);
00566             /* p1_prev does not advance */
00567         }
00568         else
00569         {
00570             /* new belongs after this old path if it has cost >= old's */
00571             if (new_path->total_cost >= old_path->total_cost)
00572                 insert_after = p1;
00573             /* p1_prev advances */
00574             p1_prev = p1;
00575         }
00576 
00577         /*
00578          * If we found an old path that dominates new_path, we can quit
00579          * scanning the pathlist; we will not add new_path, and we assume
00580          * new_path cannot dominate any other elements of the pathlist.
00581          */
00582         if (!accept_new)
00583             break;
00584     }
00585 
00586     if (accept_new)
00587     {
00588         /* Accept the new path: insert it at proper place in pathlist */
00589         if (insert_after)
00590             lappend_cell(parent_rel->pathlist, insert_after, new_path);
00591         else
00592             parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
00593     }
00594     else
00595     {
00596         /* Reject and recycle the new path */
00597         if (!IsA(new_path, IndexPath))
00598             pfree(new_path);
00599     }
00600 }
00601 
00602 /*
00603  * add_path_precheck
00604  *    Check whether a proposed new path could possibly get accepted.
00605  *    We assume we know the path's pathkeys and parameterization accurately,
00606  *    and have lower bounds for its costs.
00607  *
00608  * Note that we do not know the path's rowcount, since getting an estimate for
00609  * that is too expensive to do before prechecking.  We assume here that paths
00610  * of a superset parameterization will generate fewer rows; if that holds,
00611  * then paths with different parameterizations cannot dominate each other
00612  * and so we can simply ignore existing paths of another parameterization.
00613  * (In the infrequent cases where that rule of thumb fails, add_path will
00614  * get rid of the inferior path.)
00615  *
00616  * At the time this is called, we haven't actually built a Path structure,
00617  * so the required information has to be passed piecemeal.
00618  */
00619 bool
00620 add_path_precheck(RelOptInfo *parent_rel,
00621                   Cost startup_cost, Cost total_cost,
00622                   List *pathkeys, Relids required_outer)
00623 {
00624     List       *new_path_pathkeys;
00625     ListCell   *p1;
00626 
00627     /* Pretend parameterized paths have no pathkeys, per add_path policy */
00628     new_path_pathkeys = required_outer ? NIL : pathkeys;
00629 
00630     foreach(p1, parent_rel->pathlist)
00631     {
00632         Path       *old_path = (Path *) lfirst(p1);
00633         PathKeysComparison keyscmp;
00634 
00635         /*
00636          * We are looking for an old_path with the same parameterization (and
00637          * by assumption the same rowcount) that dominates the new path on
00638          * pathkeys as well as both cost metrics.  If we find one, we can
00639          * reject the new path.
00640          *
00641          * For speed, we make exact rather than fuzzy cost comparisons. If an
00642          * old path dominates the new path exactly on both costs, it will
00643          * surely do so fuzzily.
00644          */
00645         if (total_cost >= old_path->total_cost)
00646         {
00647             /* can win on startup cost only if unparameterized */
00648             if (startup_cost >= old_path->startup_cost || required_outer)
00649             {
00650                 /* new path does not win on cost, so check pathkeys... */
00651                 List       *old_path_pathkeys;
00652 
00653                 old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
00654                 keyscmp = compare_pathkeys(new_path_pathkeys,
00655                                            old_path_pathkeys);
00656                 if (keyscmp == PATHKEYS_EQUAL ||
00657                     keyscmp == PATHKEYS_BETTER2)
00658                 {
00659                     /* new path does not win on pathkeys... */
00660                     if (bms_equal(required_outer, PATH_REQ_OUTER(old_path)))
00661                     {
00662                         /* Found an old path that dominates the new one */
00663                         return false;
00664                     }
00665                 }
00666             }
00667         }
00668         else
00669         {
00670             /*
00671              * Since the pathlist is sorted by total_cost, we can stop looking
00672              * once we reach a path with a total_cost larger than the new
00673              * path's.
00674              */
00675             break;
00676         }
00677     }
00678 
00679     return true;
00680 }
00681 
00682 
00683 /*****************************************************************************
00684  *      PATH NODE CREATION ROUTINES
00685  *****************************************************************************/
00686 
00687 /*
00688  * create_seqscan_path
00689  *    Creates a path corresponding to a sequential scan, returning the
00690  *    pathnode.
00691  */
00692 Path *
00693 create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
00694 {
00695     Path       *pathnode = makeNode(Path);
00696 
00697     pathnode->pathtype = T_SeqScan;
00698     pathnode->parent = rel;
00699     pathnode->param_info = get_baserel_parampathinfo(root, rel,
00700                                                      required_outer);
00701     pathnode->pathkeys = NIL;   /* seqscan has unordered result */
00702 
00703     cost_seqscan(pathnode, root, rel, pathnode->param_info);
00704 
00705     return pathnode;
00706 }
00707 
00708 /*
00709  * create_index_path
00710  *    Creates a path node for an index scan.
00711  *
00712  * 'index' is a usable index.
00713  * 'indexclauses' is a list of RestrictInfo nodes representing clauses
00714  *          to be used as index qual conditions in the scan.
00715  * 'indexclausecols' is an integer list of index column numbers (zero based)
00716  *          the indexclauses can be used with.
00717  * 'indexorderbys' is a list of bare expressions (no RestrictInfos)
00718  *          to be used as index ordering operators in the scan.
00719  * 'indexorderbycols' is an integer list of index column numbers (zero based)
00720  *          the ordering operators can be used with.
00721  * 'pathkeys' describes the ordering of the path.
00722  * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
00723  *          for an ordered index, or NoMovementScanDirection for
00724  *          an unordered index.
00725  * 'indexonly' is true if an index-only scan is wanted.
00726  * 'required_outer' is the set of outer relids for a parameterized path.
00727  * 'loop_count' is the number of repetitions of the indexscan to factor into
00728  *      estimates of caching behavior.
00729  *
00730  * Returns the new path node.
00731  */
00732 IndexPath *
00733 create_index_path(PlannerInfo *root,
00734                   IndexOptInfo *index,
00735                   List *indexclauses,
00736                   List *indexclausecols,
00737                   List *indexorderbys,
00738                   List *indexorderbycols,
00739                   List *pathkeys,
00740                   ScanDirection indexscandir,
00741                   bool indexonly,
00742                   Relids required_outer,
00743                   double loop_count)
00744 {
00745     IndexPath  *pathnode = makeNode(IndexPath);
00746     RelOptInfo *rel = index->rel;
00747     List       *indexquals,
00748                *indexqualcols;
00749 
00750     pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
00751     pathnode->path.parent = rel;
00752     pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
00753                                                           required_outer);
00754     pathnode->path.pathkeys = pathkeys;
00755 
00756     /* Convert clauses to indexquals the executor can handle */
00757     expand_indexqual_conditions(index, indexclauses, indexclausecols,
00758                                 &indexquals, &indexqualcols);
00759 
00760     /* Fill in the pathnode */
00761     pathnode->indexinfo = index;
00762     pathnode->indexclauses = indexclauses;
00763     pathnode->indexquals = indexquals;
00764     pathnode->indexqualcols = indexqualcols;
00765     pathnode->indexorderbys = indexorderbys;
00766     pathnode->indexorderbycols = indexorderbycols;
00767     pathnode->indexscandir = indexscandir;
00768 
00769     cost_index(pathnode, root, loop_count);
00770 
00771     return pathnode;
00772 }
00773 
00774 /*
00775  * create_bitmap_heap_path
00776  *    Creates a path node for a bitmap scan.
00777  *
00778  * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
00779  * 'required_outer' is the set of outer relids for a parameterized path.
00780  * 'loop_count' is the number of repetitions of the indexscan to factor into
00781  *      estimates of caching behavior.
00782  *
00783  * loop_count should match the value used when creating the component
00784  * IndexPaths.
00785  */
00786 BitmapHeapPath *
00787 create_bitmap_heap_path(PlannerInfo *root,
00788                         RelOptInfo *rel,
00789                         Path *bitmapqual,
00790                         Relids required_outer,
00791                         double loop_count)
00792 {
00793     BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
00794 
00795     pathnode->path.pathtype = T_BitmapHeapScan;
00796     pathnode->path.parent = rel;
00797     pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
00798                                                           required_outer);
00799     pathnode->path.pathkeys = NIL;      /* always unordered */
00800 
00801     pathnode->bitmapqual = bitmapqual;
00802 
00803     cost_bitmap_heap_scan(&pathnode->path, root, rel,
00804                           pathnode->path.param_info,
00805                           bitmapqual, loop_count);
00806 
00807     return pathnode;
00808 }
00809 
00810 /*
00811  * create_bitmap_and_path
00812  *    Creates a path node representing a BitmapAnd.
00813  */
00814 BitmapAndPath *
00815 create_bitmap_and_path(PlannerInfo *root,
00816                        RelOptInfo *rel,
00817                        List *bitmapquals)
00818 {
00819     BitmapAndPath *pathnode = makeNode(BitmapAndPath);
00820 
00821     pathnode->path.pathtype = T_BitmapAnd;
00822     pathnode->path.parent = rel;
00823     pathnode->path.param_info = NULL;   /* not used in bitmap trees */
00824     pathnode->path.pathkeys = NIL;      /* always unordered */
00825 
00826     pathnode->bitmapquals = bitmapquals;
00827 
00828     /* this sets bitmapselectivity as well as the regular cost fields: */
00829     cost_bitmap_and_node(pathnode, root);
00830 
00831     return pathnode;
00832 }
00833 
00834 /*
00835  * create_bitmap_or_path
00836  *    Creates a path node representing a BitmapOr.
00837  */
00838 BitmapOrPath *
00839 create_bitmap_or_path(PlannerInfo *root,
00840                       RelOptInfo *rel,
00841                       List *bitmapquals)
00842 {
00843     BitmapOrPath *pathnode = makeNode(BitmapOrPath);
00844 
00845     pathnode->path.pathtype = T_BitmapOr;
00846     pathnode->path.parent = rel;
00847     pathnode->path.param_info = NULL;   /* not used in bitmap trees */
00848     pathnode->path.pathkeys = NIL;      /* always unordered */
00849 
00850     pathnode->bitmapquals = bitmapquals;
00851 
00852     /* this sets bitmapselectivity as well as the regular cost fields: */
00853     cost_bitmap_or_node(pathnode, root);
00854 
00855     return pathnode;
00856 }
00857 
00858 /*
00859  * create_tidscan_path
00860  *    Creates a path corresponding to a scan by TID, returning the pathnode.
00861  */
00862 TidPath *
00863 create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
00864                     Relids required_outer)
00865 {
00866     TidPath    *pathnode = makeNode(TidPath);
00867 
00868     pathnode->path.pathtype = T_TidScan;
00869     pathnode->path.parent = rel;
00870     pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
00871                                                           required_outer);
00872     pathnode->path.pathkeys = NIL;      /* always unordered */
00873 
00874     pathnode->tidquals = tidquals;
00875 
00876     cost_tidscan(&pathnode->path, root, rel, tidquals,
00877                  pathnode->path.param_info);
00878 
00879     return pathnode;
00880 }
00881 
00882 /*
00883  * create_append_path
00884  *    Creates a path corresponding to an Append plan, returning the
00885  *    pathnode.
00886  *
00887  * Note that we must handle subpaths = NIL, representing a dummy access path.
00888  */
00889 AppendPath *
00890 create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer)
00891 {
00892     AppendPath *pathnode = makeNode(AppendPath);
00893     ListCell   *l;
00894 
00895     pathnode->path.pathtype = T_Append;
00896     pathnode->path.parent = rel;
00897     pathnode->path.param_info = get_appendrel_parampathinfo(rel,
00898                                                             required_outer);
00899     pathnode->path.pathkeys = NIL;      /* result is always considered
00900                                          * unsorted */
00901     pathnode->subpaths = subpaths;
00902 
00903     /*
00904      * We don't bother with inventing a cost_append(), but just do it here.
00905      *
00906      * Compute rows and costs as sums of subplan rows and costs.  We charge
00907      * nothing extra for the Append itself, which perhaps is too optimistic,
00908      * but since it doesn't do any selection or projection, it is a pretty
00909      * cheap node.  If you change this, see also make_append().
00910      */
00911     pathnode->path.rows = 0;
00912     pathnode->path.startup_cost = 0;
00913     pathnode->path.total_cost = 0;
00914     foreach(l, subpaths)
00915     {
00916         Path       *subpath = (Path *) lfirst(l);
00917 
00918         pathnode->path.rows += subpath->rows;
00919 
00920         if (l == list_head(subpaths))   /* first node? */
00921             pathnode->path.startup_cost = subpath->startup_cost;
00922         pathnode->path.total_cost += subpath->total_cost;
00923 
00924         /* All child paths must have same parameterization */
00925         Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer));
00926     }
00927 
00928     return pathnode;
00929 }
00930 
00931 /*
00932  * create_merge_append_path
00933  *    Creates a path corresponding to a MergeAppend plan, returning the
00934  *    pathnode.
00935  */
00936 MergeAppendPath *
00937 create_merge_append_path(PlannerInfo *root,
00938                          RelOptInfo *rel,
00939                          List *subpaths,
00940                          List *pathkeys,
00941                          Relids required_outer)
00942 {
00943     MergeAppendPath *pathnode = makeNode(MergeAppendPath);
00944     Cost        input_startup_cost;
00945     Cost        input_total_cost;
00946     ListCell   *l;
00947 
00948     pathnode->path.pathtype = T_MergeAppend;
00949     pathnode->path.parent = rel;
00950     pathnode->path.param_info = get_appendrel_parampathinfo(rel,
00951                                                             required_outer);
00952     pathnode->path.pathkeys = pathkeys;
00953     pathnode->subpaths = subpaths;
00954 
00955     /*
00956      * Apply query-wide LIMIT if known and path is for sole base relation.
00957      * (Handling this at this low level is a bit klugy.)
00958      */
00959     if (bms_equal(rel->relids, root->all_baserels))
00960         pathnode->limit_tuples = root->limit_tuples;
00961     else
00962         pathnode->limit_tuples = -1.0;
00963 
00964     /*
00965      * Add up the sizes and costs of the input paths.
00966      */
00967     pathnode->path.rows = 0;
00968     input_startup_cost = 0;
00969     input_total_cost = 0;
00970     foreach(l, subpaths)
00971     {
00972         Path       *subpath = (Path *) lfirst(l);
00973 
00974         pathnode->path.rows += subpath->rows;
00975 
00976         if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
00977         {
00978             /* Subpath is adequately ordered, we won't need to sort it */
00979             input_startup_cost += subpath->startup_cost;
00980             input_total_cost += subpath->total_cost;
00981         }
00982         else
00983         {
00984             /* We'll need to insert a Sort node, so include cost for that */
00985             Path        sort_path;      /* dummy for result of cost_sort */
00986 
00987             cost_sort(&sort_path,
00988                       root,
00989                       pathkeys,
00990                       subpath->total_cost,
00991                       subpath->parent->tuples,
00992                       subpath->parent->width,
00993                       0.0,
00994                       work_mem,
00995                       pathnode->limit_tuples);
00996             input_startup_cost += sort_path.startup_cost;
00997             input_total_cost += sort_path.total_cost;
00998         }
00999 
01000         /* All child paths must have same parameterization */
01001         Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer));
01002     }
01003 
01004     /* Now we can compute total costs of the MergeAppend */
01005     cost_merge_append(&pathnode->path, root,
01006                       pathkeys, list_length(subpaths),
01007                       input_startup_cost, input_total_cost,
01008                       rel->tuples);
01009 
01010     return pathnode;
01011 }
01012 
01013 /*
01014  * create_result_path
01015  *    Creates a path representing a Result-and-nothing-else plan.
01016  *    This is only used for the case of a query with an empty jointree.
01017  */
01018 ResultPath *
01019 create_result_path(List *quals)
01020 {
01021     ResultPath *pathnode = makeNode(ResultPath);
01022 
01023     pathnode->path.pathtype = T_Result;
01024     pathnode->path.parent = NULL;
01025     pathnode->path.param_info = NULL;       /* there are no other rels... */
01026     pathnode->path.pathkeys = NIL;
01027     pathnode->quals = quals;
01028 
01029     /* Hardly worth defining a cost_result() function ... just do it */
01030     pathnode->path.rows = 1;
01031     pathnode->path.startup_cost = 0;
01032     pathnode->path.total_cost = cpu_tuple_cost;
01033 
01034     /*
01035      * In theory we should include the qual eval cost as well, but at present
01036      * that doesn't accomplish much except duplicate work that will be done
01037      * again in make_result; since this is only used for degenerate cases,
01038      * nothing interesting will be done with the path cost values...
01039      */
01040 
01041     return pathnode;
01042 }
01043 
01044 /*
01045  * create_material_path
01046  *    Creates a path corresponding to a Material plan, returning the
01047  *    pathnode.
01048  */
01049 MaterialPath *
01050 create_material_path(RelOptInfo *rel, Path *subpath)
01051 {
01052     MaterialPath *pathnode = makeNode(MaterialPath);
01053 
01054     Assert(subpath->parent == rel);
01055 
01056     pathnode->path.pathtype = T_Material;
01057     pathnode->path.parent = rel;
01058     pathnode->path.param_info = subpath->param_info;
01059     pathnode->path.pathkeys = subpath->pathkeys;
01060 
01061     pathnode->subpath = subpath;
01062 
01063     cost_material(&pathnode->path,
01064                   subpath->startup_cost,
01065                   subpath->total_cost,
01066                   subpath->rows,
01067                   rel->width);
01068 
01069     return pathnode;
01070 }
01071 
01072 /*
01073  * create_unique_path
01074  *    Creates a path representing elimination of distinct rows from the
01075  *    input data.  Distinct-ness is defined according to the needs of the
01076  *    semijoin represented by sjinfo.  If it is not possible to identify
01077  *    how to make the data unique, NULL is returned.
01078  *
01079  * If used at all, this is likely to be called repeatedly on the same rel;
01080  * and the input subpath should always be the same (the cheapest_total path
01081  * for the rel).  So we cache the result.
01082  */
01083 UniquePath *
01084 create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
01085                    SpecialJoinInfo *sjinfo)
01086 {
01087     UniquePath *pathnode;
01088     Path        sort_path;      /* dummy for result of cost_sort */
01089     Path        agg_path;       /* dummy for result of cost_agg */
01090     MemoryContext oldcontext;
01091     List       *in_operators;
01092     List       *uniq_exprs;
01093     bool        all_btree;
01094     bool        all_hash;
01095     int         numCols;
01096     ListCell   *lc;
01097 
01098     /* Caller made a mistake if subpath isn't cheapest_total ... */
01099     Assert(subpath == rel->cheapest_total_path);
01100     Assert(subpath->parent == rel);
01101     /* ... or if SpecialJoinInfo is the wrong one */
01102     Assert(sjinfo->jointype == JOIN_SEMI);
01103     Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
01104 
01105     /* If result already cached, return it */
01106     if (rel->cheapest_unique_path)
01107         return (UniquePath *) rel->cheapest_unique_path;
01108 
01109     /* If we previously failed, return NULL quickly */
01110     if (sjinfo->join_quals == NIL)
01111         return NULL;
01112 
01113     /*
01114      * We must ensure path struct and subsidiary data are allocated in main
01115      * planning context; otherwise GEQO memory management causes trouble.
01116      */
01117     oldcontext = MemoryContextSwitchTo(root->planner_cxt);
01118 
01119     /*----------
01120      * Look to see whether the semijoin's join quals consist of AND'ed
01121      * equality operators, with (only) RHS variables on only one side of
01122      * each one.  If so, we can figure out how to enforce uniqueness for
01123      * the RHS.
01124      *
01125      * Note that the input join_quals list is the list of quals that are
01126      * *syntactically* associated with the semijoin, which in practice means
01127      * the synthesized comparison list for an IN or the WHERE of an EXISTS.
01128      * Particularly in the latter case, it might contain clauses that aren't
01129      * *semantically* associated with the join, but refer to just one side or
01130      * the other.  We can ignore such clauses here, as they will just drop
01131      * down to be processed within one side or the other.  (It is okay to
01132      * consider only the syntactically-associated clauses here because for a
01133      * semijoin, no higher-level quals could refer to the RHS, and so there
01134      * can be no other quals that are semantically associated with this join.
01135      * We do things this way because it is useful to be able to run this test
01136      * before we have extracted the list of quals that are actually
01137      * semantically associated with the particular join.)
01138      *
01139      * Note that the in_operators list consists of the joinqual operators
01140      * themselves (but commuted if needed to put the RHS value on the right).
01141      * These could be cross-type operators, in which case the operator
01142      * actually needed for uniqueness is a related single-type operator.
01143      * We assume here that that operator will be available from the btree
01144      * or hash opclass when the time comes ... if not, create_unique_plan()
01145      * will fail.
01146      *----------
01147      */
01148     in_operators = NIL;
01149     uniq_exprs = NIL;
01150     all_btree = true;
01151     all_hash = enable_hashagg;  /* don't consider hash if not enabled */
01152     foreach(lc, sjinfo->join_quals)
01153     {
01154         OpExpr     *op = (OpExpr *) lfirst(lc);
01155         Oid         opno;
01156         Node       *left_expr;
01157         Node       *right_expr;
01158         Relids      left_varnos;
01159         Relids      right_varnos;
01160         Relids      all_varnos;
01161         Oid         opinputtype;
01162 
01163         /* Is it a binary opclause? */
01164         if (!IsA(op, OpExpr) ||
01165             list_length(op->args) != 2)
01166         {
01167             /* No, but does it reference both sides? */
01168             all_varnos = pull_varnos((Node *) op);
01169             if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
01170                 bms_is_subset(all_varnos, sjinfo->syn_righthand))
01171             {
01172                 /*
01173                  * Clause refers to only one rel, so ignore it --- unless it
01174                  * contains volatile functions, in which case we'd better
01175                  * punt.
01176                  */
01177                 if (contain_volatile_functions((Node *) op))
01178                     goto no_unique_path;
01179                 continue;
01180             }
01181             /* Non-operator clause referencing both sides, must punt */
01182             goto no_unique_path;
01183         }
01184 
01185         /* Extract data from binary opclause */
01186         opno = op->opno;
01187         left_expr = linitial(op->args);
01188         right_expr = lsecond(op->args);
01189         left_varnos = pull_varnos(left_expr);
01190         right_varnos = pull_varnos(right_expr);
01191         all_varnos = bms_union(left_varnos, right_varnos);
01192         opinputtype = exprType(left_expr);
01193 
01194         /* Does it reference both sides? */
01195         if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
01196             bms_is_subset(all_varnos, sjinfo->syn_righthand))
01197         {
01198             /*
01199              * Clause refers to only one rel, so ignore it --- unless it
01200              * contains volatile functions, in which case we'd better punt.
01201              */
01202             if (contain_volatile_functions((Node *) op))
01203                 goto no_unique_path;
01204             continue;
01205         }
01206 
01207         /* check rel membership of arguments */
01208         if (!bms_is_empty(right_varnos) &&
01209             bms_is_subset(right_varnos, sjinfo->syn_righthand) &&
01210             !bms_overlap(left_varnos, sjinfo->syn_righthand))
01211         {
01212             /* typical case, right_expr is RHS variable */
01213         }
01214         else if (!bms_is_empty(left_varnos) &&
01215                  bms_is_subset(left_varnos, sjinfo->syn_righthand) &&
01216                  !bms_overlap(right_varnos, sjinfo->syn_righthand))
01217         {
01218             /* flipped case, left_expr is RHS variable */
01219             opno = get_commutator(opno);
01220             if (!OidIsValid(opno))
01221                 goto no_unique_path;
01222             right_expr = left_expr;
01223         }
01224         else
01225             goto no_unique_path;
01226 
01227         /* all operators must be btree equality or hash equality */
01228         if (all_btree)
01229         {
01230             /* oprcanmerge is considered a hint... */
01231             if (!op_mergejoinable(opno, opinputtype) ||
01232                 get_mergejoin_opfamilies(opno) == NIL)
01233                 all_btree = false;
01234         }
01235         if (all_hash)
01236         {
01237             /* ... but oprcanhash had better be correct */
01238             if (!op_hashjoinable(opno, opinputtype))
01239                 all_hash = false;
01240         }
01241         if (!(all_btree || all_hash))
01242             goto no_unique_path;
01243 
01244         /* so far so good, keep building lists */
01245         in_operators = lappend_oid(in_operators, opno);
01246         uniq_exprs = lappend(uniq_exprs, copyObject(right_expr));
01247     }
01248 
01249     /* Punt if we didn't find at least one column to unique-ify */
01250     if (uniq_exprs == NIL)
01251         goto no_unique_path;
01252 
01253     /*
01254      * The expressions we'd need to unique-ify mustn't be volatile.
01255      */
01256     if (contain_volatile_functions((Node *) uniq_exprs))
01257         goto no_unique_path;
01258 
01259     /*
01260      * If we get here, we can unique-ify using at least one of sorting and
01261      * hashing.  Start building the result Path object.
01262      */
01263     pathnode = makeNode(UniquePath);
01264 
01265     pathnode->path.pathtype = T_Unique;
01266     pathnode->path.parent = rel;
01267     pathnode->path.param_info = subpath->param_info;
01268 
01269     /*
01270      * Assume the output is unsorted, since we don't necessarily have pathkeys
01271      * to represent it.  (This might get overridden below.)
01272      */
01273     pathnode->path.pathkeys = NIL;
01274 
01275     pathnode->subpath = subpath;
01276     pathnode->in_operators = in_operators;
01277     pathnode->uniq_exprs = uniq_exprs;
01278 
01279     /*
01280      * If the input is a relation and it has a unique index that proves the
01281      * uniq_exprs are unique, then we don't need to do anything.  Note that
01282      * relation_has_unique_index_for automatically considers restriction
01283      * clauses for the rel, as well.
01284      */
01285     if (rel->rtekind == RTE_RELATION && all_btree &&
01286         relation_has_unique_index_for(root, rel, NIL,
01287                                       uniq_exprs, in_operators))
01288     {
01289         pathnode->umethod = UNIQUE_PATH_NOOP;
01290         pathnode->path.rows = rel->rows;
01291         pathnode->path.startup_cost = subpath->startup_cost;
01292         pathnode->path.total_cost = subpath->total_cost;
01293         pathnode->path.pathkeys = subpath->pathkeys;
01294 
01295         rel->cheapest_unique_path = (Path *) pathnode;
01296 
01297         MemoryContextSwitchTo(oldcontext);
01298 
01299         return pathnode;
01300     }
01301 
01302     /*
01303      * If the input is a subquery whose output must be unique already, then we
01304      * don't need to do anything.  The test for uniqueness has to consider
01305      * exactly which columns we are extracting; for example "SELECT DISTINCT
01306      * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
01307      * this optimization unless uniq_exprs consists only of simple Vars
01308      * referencing subquery outputs.  (Possibly we could do something with
01309      * expressions in the subquery outputs, too, but for now keep it simple.)
01310      */
01311     if (rel->rtekind == RTE_SUBQUERY)
01312     {
01313         RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
01314         List       *sub_tlist_colnos;
01315 
01316         sub_tlist_colnos = translate_sub_tlist(uniq_exprs, rel->relid);
01317 
01318         if (sub_tlist_colnos &&
01319             query_is_distinct_for(rte->subquery,
01320                                   sub_tlist_colnos, in_operators))
01321         {
01322             pathnode->umethod = UNIQUE_PATH_NOOP;
01323             pathnode->path.rows = rel->rows;
01324             pathnode->path.startup_cost = subpath->startup_cost;
01325             pathnode->path.total_cost = subpath->total_cost;
01326             pathnode->path.pathkeys = subpath->pathkeys;
01327 
01328             rel->cheapest_unique_path = (Path *) pathnode;
01329 
01330             MemoryContextSwitchTo(oldcontext);
01331 
01332             return pathnode;
01333         }
01334     }
01335 
01336     /* Estimate number of output rows */
01337     pathnode->path.rows = estimate_num_groups(root, uniq_exprs, rel->rows);
01338     numCols = list_length(uniq_exprs);
01339 
01340     if (all_btree)
01341     {
01342         /*
01343          * Estimate cost for sort+unique implementation
01344          */
01345         cost_sort(&sort_path, root, NIL,
01346                   subpath->total_cost,
01347                   rel->rows,
01348                   rel->width,
01349                   0.0,
01350                   work_mem,
01351                   -1.0);
01352 
01353         /*
01354          * Charge one cpu_operator_cost per comparison per input tuple. We
01355          * assume all columns get compared at most of the tuples. (XXX
01356          * probably this is an overestimate.)  This should agree with
01357          * make_unique.
01358          */
01359         sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
01360     }
01361 
01362     if (all_hash)
01363     {
01364         /*
01365          * Estimate the overhead per hashtable entry at 64 bytes (same as in
01366          * planner.c).
01367          */
01368         int         hashentrysize = rel->width + 64;
01369 
01370         if (hashentrysize * pathnode->path.rows > work_mem * 1024L)
01371             all_hash = false;   /* don't try to hash */
01372         else
01373             cost_agg(&agg_path, root,
01374                      AGG_HASHED, NULL,
01375                      numCols, pathnode->path.rows,
01376                      subpath->startup_cost,
01377                      subpath->total_cost,
01378                      rel->rows);
01379     }
01380 
01381     if (all_btree && all_hash)
01382     {
01383         if (agg_path.total_cost < sort_path.total_cost)
01384             pathnode->umethod = UNIQUE_PATH_HASH;
01385         else
01386             pathnode->umethod = UNIQUE_PATH_SORT;
01387     }
01388     else if (all_btree)
01389         pathnode->umethod = UNIQUE_PATH_SORT;
01390     else if (all_hash)
01391         pathnode->umethod = UNIQUE_PATH_HASH;
01392     else
01393         goto no_unique_path;
01394 
01395     if (pathnode->umethod == UNIQUE_PATH_HASH)
01396     {
01397         pathnode->path.startup_cost = agg_path.startup_cost;
01398         pathnode->path.total_cost = agg_path.total_cost;
01399     }
01400     else
01401     {
01402         pathnode->path.startup_cost = sort_path.startup_cost;
01403         pathnode->path.total_cost = sort_path.total_cost;
01404     }
01405 
01406     rel->cheapest_unique_path = (Path *) pathnode;
01407 
01408     MemoryContextSwitchTo(oldcontext);
01409 
01410     return pathnode;
01411 
01412 no_unique_path:         /* failure exit */
01413 
01414     /* Mark the SpecialJoinInfo as not unique-able */
01415     sjinfo->join_quals = NIL;
01416 
01417     MemoryContextSwitchTo(oldcontext);
01418 
01419     return NULL;
01420 }
01421 
01422 /*
01423  * translate_sub_tlist - get subquery column numbers represented by tlist
01424  *
01425  * The given targetlist usually contains only Vars referencing the given relid.
01426  * Extract their varattnos (ie, the column numbers of the subquery) and return
01427  * as an integer List.
01428  *
01429  * If any of the tlist items is not a simple Var, we cannot determine whether
01430  * the subquery's uniqueness condition (if any) matches ours, so punt and
01431  * return NIL.
01432  */
01433 static List *
01434 translate_sub_tlist(List *tlist, int relid)
01435 {
01436     List       *result = NIL;
01437     ListCell   *l;
01438 
01439     foreach(l, tlist)
01440     {
01441         Var        *var = (Var *) lfirst(l);
01442 
01443         if (!var || !IsA(var, Var) ||
01444             var->varno != relid)
01445             return NIL;         /* punt */
01446 
01447         result = lappend_int(result, var->varattno);
01448     }
01449     return result;
01450 }
01451 
01452 /*
01453  * query_is_distinct_for - does query never return duplicates of the
01454  *      specified columns?
01455  *
01456  * colnos is an integer list of output column numbers (resno's).  We are
01457  * interested in whether rows consisting of just these columns are certain
01458  * to be distinct.  "Distinctness" is defined according to whether the
01459  * corresponding upper-level equality operators listed in opids would think
01460  * the values are distinct.  (Note: the opids entries could be cross-type
01461  * operators, and thus not exactly the equality operators that the subquery
01462  * would use itself.  We use equality_ops_are_compatible() to check
01463  * compatibility.  That looks at btree or hash opfamily membership, and so
01464  * should give trustworthy answers for all operators that we might need
01465  * to deal with here.)
01466  */
01467 static bool
01468 query_is_distinct_for(Query *query, List *colnos, List *opids)
01469 {
01470     ListCell   *l;
01471     Oid         opid;
01472 
01473     Assert(list_length(colnos) == list_length(opids));
01474 
01475     /*
01476      * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
01477      * columns in the DISTINCT clause appear in colnos and operator semantics
01478      * match.
01479      */
01480     if (query->distinctClause)
01481     {
01482         foreach(l, query->distinctClause)
01483         {
01484             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
01485             TargetEntry *tle = get_sortgroupclause_tle(sgc,
01486                                                        query->targetList);
01487 
01488             opid = distinct_col_search(tle->resno, colnos, opids);
01489             if (!OidIsValid(opid) ||
01490                 !equality_ops_are_compatible(opid, sgc->eqop))
01491                 break;          /* exit early if no match */
01492         }
01493         if (l == NULL)          /* had matches for all? */
01494             return true;
01495     }
01496 
01497     /*
01498      * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
01499      * appear in colnos and operator semantics match.
01500      */
01501     if (query->groupClause)
01502     {
01503         foreach(l, query->groupClause)
01504         {
01505             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
01506             TargetEntry *tle = get_sortgroupclause_tle(sgc,
01507                                                        query->targetList);
01508 
01509             opid = distinct_col_search(tle->resno, colnos, opids);
01510             if (!OidIsValid(opid) ||
01511                 !equality_ops_are_compatible(opid, sgc->eqop))
01512                 break;          /* exit early if no match */
01513         }
01514         if (l == NULL)          /* had matches for all? */
01515             return true;
01516     }
01517     else
01518     {
01519         /*
01520          * If we have no GROUP BY, but do have aggregates or HAVING, then the
01521          * result is at most one row so it's surely unique, for any operators.
01522          */
01523         if (query->hasAggs || query->havingQual)
01524             return true;
01525     }
01526 
01527     /*
01528      * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
01529      * except with ALL.
01530      */
01531     if (query->setOperations)
01532     {
01533         SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
01534 
01535         Assert(IsA(topop, SetOperationStmt));
01536         Assert(topop->op != SETOP_NONE);
01537 
01538         if (!topop->all)
01539         {
01540             ListCell   *lg;
01541 
01542             /* We're good if all the nonjunk output columns are in colnos */
01543             lg = list_head(topop->groupClauses);
01544             foreach(l, query->targetList)
01545             {
01546                 TargetEntry *tle = (TargetEntry *) lfirst(l);
01547                 SortGroupClause *sgc;
01548 
01549                 if (tle->resjunk)
01550                     continue;   /* ignore resjunk columns */
01551 
01552                 /* non-resjunk columns should have grouping clauses */
01553                 Assert(lg != NULL);
01554                 sgc = (SortGroupClause *) lfirst(lg);
01555                 lg = lnext(lg);
01556 
01557                 opid = distinct_col_search(tle->resno, colnos, opids);
01558                 if (!OidIsValid(opid) ||
01559                     !equality_ops_are_compatible(opid, sgc->eqop))
01560                     break;      /* exit early if no match */
01561             }
01562             if (l == NULL)      /* had matches for all? */
01563                 return true;
01564         }
01565     }
01566 
01567     /*
01568      * XXX Are there any other cases in which we can easily see the result
01569      * must be distinct?
01570      */
01571 
01572     return false;
01573 }
01574 
01575 /*
01576  * distinct_col_search - subroutine for query_is_distinct_for
01577  *
01578  * If colno is in colnos, return the corresponding element of opids,
01579  * else return InvalidOid.  (We expect colnos does not contain duplicates,
01580  * so the result is well-defined.)
01581  */
01582 static Oid
01583 distinct_col_search(int colno, List *colnos, List *opids)
01584 {
01585     ListCell   *lc1,
01586                *lc2;
01587 
01588     forboth(lc1, colnos, lc2, opids)
01589     {
01590         if (colno == lfirst_int(lc1))
01591             return lfirst_oid(lc2);
01592     }
01593     return InvalidOid;
01594 }
01595 
01596 /*
01597  * create_subqueryscan_path
01598  *    Creates a path corresponding to a sequential scan of a subquery,
01599  *    returning the pathnode.
01600  */
01601 Path *
01602 create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel,
01603                          List *pathkeys, Relids required_outer)
01604 {
01605     Path       *pathnode = makeNode(Path);
01606 
01607     pathnode->pathtype = T_SubqueryScan;
01608     pathnode->parent = rel;
01609     pathnode->param_info = get_baserel_parampathinfo(root, rel,
01610                                                      required_outer);
01611     pathnode->pathkeys = pathkeys;
01612 
01613     cost_subqueryscan(pathnode, root, rel, pathnode->param_info);
01614 
01615     return pathnode;
01616 }
01617 
01618 /*
01619  * create_functionscan_path
01620  *    Creates a path corresponding to a sequential scan of a function,
01621  *    returning the pathnode.
01622  */
01623 Path *
01624 create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
01625                          Relids required_outer)
01626 {
01627     Path       *pathnode = makeNode(Path);
01628 
01629     pathnode->pathtype = T_FunctionScan;
01630     pathnode->parent = rel;
01631     pathnode->param_info = get_baserel_parampathinfo(root, rel,
01632                                                      required_outer);
01633     pathnode->pathkeys = NIL;   /* for now, assume unordered result */
01634 
01635     cost_functionscan(pathnode, root, rel, pathnode->param_info);
01636 
01637     return pathnode;
01638 }
01639 
01640 /*
01641  * create_valuesscan_path
01642  *    Creates a path corresponding to a scan of a VALUES list,
01643  *    returning the pathnode.
01644  */
01645 Path *
01646 create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
01647                        Relids required_outer)
01648 {
01649     Path       *pathnode = makeNode(Path);
01650 
01651     pathnode->pathtype = T_ValuesScan;
01652     pathnode->parent = rel;
01653     pathnode->param_info = get_baserel_parampathinfo(root, rel,
01654                                                      required_outer);
01655     pathnode->pathkeys = NIL;   /* result is always unordered */
01656 
01657     cost_valuesscan(pathnode, root, rel, pathnode->param_info);
01658 
01659     return pathnode;
01660 }
01661 
01662 /*
01663  * create_ctescan_path
01664  *    Creates a path corresponding to a scan of a non-self-reference CTE,
01665  *    returning the pathnode.
01666  */
01667 Path *
01668 create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
01669 {
01670     Path       *pathnode = makeNode(Path);
01671 
01672     pathnode->pathtype = T_CteScan;
01673     pathnode->parent = rel;
01674     pathnode->param_info = get_baserel_parampathinfo(root, rel,
01675                                                      required_outer);
01676     pathnode->pathkeys = NIL;   /* XXX for now, result is always unordered */
01677 
01678     cost_ctescan(pathnode, root, rel, pathnode->param_info);
01679 
01680     return pathnode;
01681 }
01682 
01683 /*
01684  * create_worktablescan_path
01685  *    Creates a path corresponding to a scan of a self-reference CTE,
01686  *    returning the pathnode.
01687  */
01688 Path *
01689 create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel,
01690                           Relids required_outer)
01691 {
01692     Path       *pathnode = makeNode(Path);
01693 
01694     pathnode->pathtype = T_WorkTableScan;
01695     pathnode->parent = rel;
01696     pathnode->param_info = get_baserel_parampathinfo(root, rel,
01697                                                      required_outer);
01698     pathnode->pathkeys = NIL;   /* result is always unordered */
01699 
01700     /* Cost is the same as for a regular CTE scan */
01701     cost_ctescan(pathnode, root, rel, pathnode->param_info);
01702 
01703     return pathnode;
01704 }
01705 
01706 /*
01707  * create_foreignscan_path
01708  *    Creates a path corresponding to a scan of a foreign table,
01709  *    returning the pathnode.
01710  *
01711  * This function is never called from core Postgres; rather, it's expected
01712  * to be called by the GetForeignPaths function of a foreign data wrapper.
01713  * We make the FDW supply all fields of the path, since we do not have any
01714  * way to calculate them in core.
01715  */
01716 ForeignPath *
01717 create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
01718                         double rows, Cost startup_cost, Cost total_cost,
01719                         List *pathkeys,
01720                         Relids required_outer,
01721                         List *fdw_private)
01722 {
01723     ForeignPath *pathnode = makeNode(ForeignPath);
01724 
01725     pathnode->path.pathtype = T_ForeignScan;
01726     pathnode->path.parent = rel;
01727     pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
01728                                                           required_outer);
01729     pathnode->path.rows = rows;
01730     pathnode->path.startup_cost = startup_cost;
01731     pathnode->path.total_cost = total_cost;
01732     pathnode->path.pathkeys = pathkeys;
01733 
01734     pathnode->fdw_private = fdw_private;
01735 
01736     return pathnode;
01737 }
01738 
01739 /*
01740  * calc_nestloop_required_outer
01741  *    Compute the required_outer set for a nestloop join path
01742  *
01743  * Note: result must not share storage with either input
01744  */
01745 Relids
01746 calc_nestloop_required_outer(Path *outer_path, Path *inner_path)
01747 {
01748     Relids      outer_paramrels = PATH_REQ_OUTER(outer_path);
01749     Relids      inner_paramrels = PATH_REQ_OUTER(inner_path);
01750     Relids      required_outer;
01751 
01752     /* inner_path can require rels from outer path, but not vice versa */
01753     Assert(!bms_overlap(outer_paramrels, inner_path->parent->relids));
01754     /* easy case if inner path is not parameterized */
01755     if (!inner_paramrels)
01756         return bms_copy(outer_paramrels);
01757     /* else, form the union ... */
01758     required_outer = bms_union(outer_paramrels, inner_paramrels);
01759     /* ... and remove any mention of now-satisfied outer rels */
01760     required_outer = bms_del_members(required_outer,
01761                                      outer_path->parent->relids);
01762     /* maintain invariant that required_outer is exactly NULL if empty */
01763     if (bms_is_empty(required_outer))
01764     {
01765         bms_free(required_outer);
01766         required_outer = NULL;
01767     }
01768     return required_outer;
01769 }
01770 
01771 /*
01772  * calc_non_nestloop_required_outer
01773  *    Compute the required_outer set for a merge or hash join path
01774  *
01775  * Note: result must not share storage with either input
01776  */
01777 Relids
01778 calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
01779 {
01780     Relids      outer_paramrels = PATH_REQ_OUTER(outer_path);
01781     Relids      inner_paramrels = PATH_REQ_OUTER(inner_path);
01782     Relids      required_outer;
01783 
01784     /* neither path can require rels from the other */
01785     Assert(!bms_overlap(outer_paramrels, inner_path->parent->relids));
01786     Assert(!bms_overlap(inner_paramrels, outer_path->parent->relids));
01787     /* form the union ... */
01788     required_outer = bms_union(outer_paramrels, inner_paramrels);
01789     /* we do not need an explicit test for empty; bms_union gets it right */
01790     return required_outer;
01791 }
01792 
01793 /*
01794  * create_nestloop_path
01795  *    Creates a pathnode corresponding to a nestloop join between two
01796  *    relations.
01797  *
01798  * 'joinrel' is the join relation.
01799  * 'jointype' is the type of join required
01800  * 'workspace' is the result from initial_cost_nestloop
01801  * 'sjinfo' is extra info about the join for selectivity estimation
01802  * 'semifactors' contains valid data if jointype is SEMI or ANTI
01803  * 'outer_path' is the outer path
01804  * 'inner_path' is the inner path
01805  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
01806  * 'pathkeys' are the path keys of the new join path
01807  * 'required_outer' is the set of required outer rels
01808  *
01809  * Returns the resulting path node.
01810  */
01811 NestPath *
01812 create_nestloop_path(PlannerInfo *root,
01813                      RelOptInfo *joinrel,
01814                      JoinType jointype,
01815                      JoinCostWorkspace *workspace,
01816                      SpecialJoinInfo *sjinfo,
01817                      SemiAntiJoinFactors *semifactors,
01818                      Path *outer_path,
01819                      Path *inner_path,
01820                      List *restrict_clauses,
01821                      List *pathkeys,
01822                      Relids required_outer)
01823 {
01824     NestPath   *pathnode = makeNode(NestPath);
01825     Relids      inner_req_outer = PATH_REQ_OUTER(inner_path);
01826 
01827     /*
01828      * If the inner path is parameterized by the outer, we must drop any
01829      * restrict_clauses that are due to be moved into the inner path.  We have
01830      * to do this now, rather than postpone the work till createplan time,
01831      * because the restrict_clauses list can affect the size and cost
01832      * estimates for this path.
01833      */
01834     if (bms_overlap(inner_req_outer, outer_path->parent->relids))
01835     {
01836         Relids      inner_and_outer = bms_union(inner_path->parent->relids,
01837                                                 inner_req_outer);
01838         List       *jclauses = NIL;
01839         ListCell   *lc;
01840 
01841         foreach(lc, restrict_clauses)
01842         {
01843             RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
01844 
01845             if (!join_clause_is_movable_into(rinfo,
01846                                              inner_path->parent->relids,
01847                                              inner_and_outer))
01848                 jclauses = lappend(jclauses, rinfo);
01849         }
01850         restrict_clauses = jclauses;
01851     }
01852 
01853     pathnode->path.pathtype = T_NestLoop;
01854     pathnode->path.parent = joinrel;
01855     pathnode->path.param_info =
01856         get_joinrel_parampathinfo(root,
01857                                   joinrel,
01858                                   outer_path,
01859                                   inner_path,
01860                                   sjinfo,
01861                                   required_outer,
01862                                   &restrict_clauses);
01863     pathnode->path.pathkeys = pathkeys;
01864     pathnode->jointype = jointype;
01865     pathnode->outerjoinpath = outer_path;
01866     pathnode->innerjoinpath = inner_path;
01867     pathnode->joinrestrictinfo = restrict_clauses;
01868 
01869     final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors);
01870 
01871     return pathnode;
01872 }
01873 
01874 /*
01875  * create_mergejoin_path
01876  *    Creates a pathnode corresponding to a mergejoin join between
01877  *    two relations
01878  *
01879  * 'joinrel' is the join relation
01880  * 'jointype' is the type of join required
01881  * 'workspace' is the result from initial_cost_mergejoin
01882  * 'sjinfo' is extra info about the join for selectivity estimation
01883  * 'outer_path' is the outer path
01884  * 'inner_path' is the inner path
01885  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
01886  * 'pathkeys' are the path keys of the new join path
01887  * 'required_outer' is the set of required outer rels
01888  * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
01889  *      (this should be a subset of the restrict_clauses list)
01890  * 'outersortkeys' are the sort varkeys for the outer relation
01891  * 'innersortkeys' are the sort varkeys for the inner relation
01892  */
01893 MergePath *
01894 create_mergejoin_path(PlannerInfo *root,
01895                       RelOptInfo *joinrel,
01896                       JoinType jointype,
01897                       JoinCostWorkspace *workspace,
01898                       SpecialJoinInfo *sjinfo,
01899                       Path *outer_path,
01900                       Path *inner_path,
01901                       List *restrict_clauses,
01902                       List *pathkeys,
01903                       Relids required_outer,
01904                       List *mergeclauses,
01905                       List *outersortkeys,
01906                       List *innersortkeys)
01907 {
01908     MergePath  *pathnode = makeNode(MergePath);
01909 
01910     pathnode->jpath.path.pathtype = T_MergeJoin;
01911     pathnode->jpath.path.parent = joinrel;
01912     pathnode->jpath.path.param_info =
01913         get_joinrel_parampathinfo(root,
01914                                   joinrel,
01915                                   outer_path,
01916                                   inner_path,
01917                                   sjinfo,
01918                                   required_outer,
01919                                   &restrict_clauses);
01920     pathnode->jpath.path.pathkeys = pathkeys;
01921     pathnode->jpath.jointype = jointype;
01922     pathnode->jpath.outerjoinpath = outer_path;
01923     pathnode->jpath.innerjoinpath = inner_path;
01924     pathnode->jpath.joinrestrictinfo = restrict_clauses;
01925     pathnode->path_mergeclauses = mergeclauses;
01926     pathnode->outersortkeys = outersortkeys;
01927     pathnode->innersortkeys = innersortkeys;
01928     /* pathnode->materialize_inner will be set by final_cost_mergejoin */
01929 
01930     final_cost_mergejoin(root, pathnode, workspace, sjinfo);
01931 
01932     return pathnode;
01933 }
01934 
01935 /*
01936  * create_hashjoin_path
01937  *    Creates a pathnode corresponding to a hash join between two relations.
01938  *
01939  * 'joinrel' is the join relation
01940  * 'jointype' is the type of join required
01941  * 'workspace' is the result from initial_cost_hashjoin
01942  * 'sjinfo' is extra info about the join for selectivity estimation
01943  * 'semifactors' contains valid data if jointype is SEMI or ANTI
01944  * 'outer_path' is the cheapest outer path
01945  * 'inner_path' is the cheapest inner path
01946  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
01947  * 'required_outer' is the set of required outer rels
01948  * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
01949  *      (this should be a subset of the restrict_clauses list)
01950  */
01951 HashPath *
01952 create_hashjoin_path(PlannerInfo *root,
01953                      RelOptInfo *joinrel,
01954                      JoinType jointype,
01955                      JoinCostWorkspace *workspace,
01956                      SpecialJoinInfo *sjinfo,
01957                      SemiAntiJoinFactors *semifactors,
01958                      Path *outer_path,
01959                      Path *inner_path,
01960                      List *restrict_clauses,
01961                      Relids required_outer,
01962                      List *hashclauses)
01963 {
01964     HashPath   *pathnode = makeNode(HashPath);
01965 
01966     pathnode->jpath.path.pathtype = T_HashJoin;
01967     pathnode->jpath.path.parent = joinrel;
01968     pathnode->jpath.path.param_info =
01969         get_joinrel_parampathinfo(root,
01970                                   joinrel,
01971                                   outer_path,
01972                                   inner_path,
01973                                   sjinfo,
01974                                   required_outer,
01975                                   &restrict_clauses);
01976 
01977     /*
01978      * A hashjoin never has pathkeys, since its output ordering is
01979      * unpredictable due to possible batching.  XXX If the inner relation is
01980      * small enough, we could instruct the executor that it must not batch,
01981      * and then we could assume that the output inherits the outer relation's
01982      * ordering, which might save a sort step.  However there is considerable
01983      * downside if our estimate of the inner relation size is badly off. For
01984      * the moment we don't risk it.  (Note also that if we wanted to take this
01985      * seriously, joinpath.c would have to consider many more paths for the
01986      * outer rel than it does now.)
01987      */
01988     pathnode->jpath.path.pathkeys = NIL;
01989     pathnode->jpath.jointype = jointype;
01990     pathnode->jpath.outerjoinpath = outer_path;
01991     pathnode->jpath.innerjoinpath = inner_path;
01992     pathnode->jpath.joinrestrictinfo = restrict_clauses;
01993     pathnode->path_hashclauses = hashclauses;
01994     /* final_cost_hashjoin will fill in pathnode->num_batches */
01995 
01996     final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors);
01997 
01998     return pathnode;
01999 }
02000 
02001 /*
02002  * reparameterize_path
02003  *      Attempt to modify a Path to have greater parameterization
02004  *
02005  * We use this to attempt to bring all child paths of an appendrel to the
02006  * same parameterization level, ensuring that they all enforce the same set
02007  * of join quals (and thus that that parameterization can be attributed to
02008  * an append path built from such paths).  Currently, only a few path types
02009  * are supported here, though more could be added at need.  We return NULL
02010  * if we can't reparameterize the given path.
02011  *
02012  * Note: we intentionally do not pass created paths to add_path(); it would
02013  * possibly try to delete them on the grounds of being cost-inferior to the
02014  * paths they were made from, and we don't want that.  Paths made here are
02015  * not necessarily of general-purpose usefulness, but they can be useful
02016  * as members of an append path.
02017  */
02018 Path *
02019 reparameterize_path(PlannerInfo *root, Path *path,
02020                     Relids required_outer,
02021                     double loop_count)
02022 {
02023     RelOptInfo *rel = path->parent;
02024 
02025     /* Can only increase, not decrease, path's parameterization */
02026     if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
02027         return NULL;
02028     switch (path->pathtype)
02029     {
02030         case T_SeqScan:
02031             return create_seqscan_path(root, rel, required_outer);
02032         case T_IndexScan:
02033         case T_IndexOnlyScan:
02034             {
02035                 IndexPath  *ipath = (IndexPath *) path;
02036                 IndexPath  *newpath = makeNode(IndexPath);
02037 
02038                 /*
02039                  * We can't use create_index_path directly, and would not want
02040                  * to because it would re-compute the indexqual conditions
02041                  * which is wasted effort.  Instead we hack things a bit:
02042                  * flat-copy the path node, revise its param_info, and redo
02043                  * the cost estimate.
02044                  */
02045                 memcpy(newpath, ipath, sizeof(IndexPath));
02046                 newpath->path.param_info =
02047                     get_baserel_parampathinfo(root, rel, required_outer);
02048                 cost_index(newpath, root, loop_count);
02049                 return (Path *) newpath;
02050             }
02051         case T_BitmapHeapScan:
02052             {
02053                 BitmapHeapPath *bpath = (BitmapHeapPath *) path;
02054 
02055                 return (Path *) create_bitmap_heap_path(root,
02056                                                         rel,
02057                                                         bpath->bitmapqual,
02058                                                         required_outer,
02059                                                         loop_count);
02060             }
02061         case T_SubqueryScan:
02062             return create_subqueryscan_path(root, rel, path->pathkeys,
02063                                             required_outer);
02064         default:
02065             break;
02066     }
02067     return NULL;
02068 }
Header And Logo

pathnode.c