Header And Logo

PostgreSQL
| The world's most advanced open source database.

planmain.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * planmain.c
00004  *    Routines to plan a single query
00005  *
00006  * What's in a name, anyway?  The top-level entry point of the planner/
00007  * optimizer is over in planner.c, not here as you might think from the
00008  * file name.  But this is the main code for planning a basic join operation,
00009  * shorn of features like subselects, inheritance, aggregates, grouping,
00010  * and so on.  (Those are the things planner.c deals with.)
00011  *
00012  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00013  * Portions Copyright (c) 1994, Regents of the University of California
00014  *
00015  *
00016  * IDENTIFICATION
00017  *    src/backend/optimizer/plan/planmain.c
00018  *
00019  *-------------------------------------------------------------------------
00020  */
00021 #include "postgres.h"
00022 
00023 #include "miscadmin.h"
00024 #include "optimizer/cost.h"
00025 #include "optimizer/pathnode.h"
00026 #include "optimizer/paths.h"
00027 #include "optimizer/placeholder.h"
00028 #include "optimizer/planmain.h"
00029 #include "optimizer/tlist.h"
00030 #include "utils/selfuncs.h"
00031 
00032 
00033 /*
00034  * query_planner
00035  *    Generate a path (that is, a simplified plan) for a basic query,
00036  *    which may involve joins but not any fancier features.
00037  *
00038  * Since query_planner does not handle the toplevel processing (grouping,
00039  * sorting, etc) it cannot select the best path by itself.  It selects
00040  * two paths: the cheapest path that produces all the required tuples,
00041  * independent of any ordering considerations, and the cheapest path that
00042  * produces the expected fraction of the required tuples in the required
00043  * ordering, if there is a path that is cheaper for this than just sorting
00044  * the output of the cheapest overall path.  The caller (grouping_planner)
00045  * will make the final decision about which to use.
00046  *
00047  * Input parameters:
00048  * root describes the query to plan
00049  * tlist is the target list the query should produce
00050  *      (this is NOT necessarily root->parse->targetList!)
00051  * tuple_fraction is the fraction of tuples we expect will be retrieved
00052  * limit_tuples is a hard limit on number of tuples to retrieve,
00053  *      or -1 if no limit
00054  * qp_callback is a function to compute query_pathkeys once it's safe to do so
00055  * qp_extra is optional extra data to pass to qp_callback
00056  *
00057  * Output parameters:
00058  * *cheapest_path receives the overall-cheapest path for the query
00059  * *sorted_path receives the cheapest presorted path for the query,
00060  *              if any (NULL if there is no useful presorted path)
00061  * *num_groups receives the estimated number of groups, or 1 if query
00062  *              does not use grouping
00063  *
00064  * Note: the PlannerInfo node also includes a query_pathkeys field, which
00065  * tells query_planner the sort order that is desired in the final output
00066  * plan.  This value is *not* available at call time, but is computed by
00067  * qp_callback once we have completed merging the query's equivalence classes.
00068  * (We cannot construct canonical pathkeys until that's done.)
00069  *
00070  * tuple_fraction is interpreted as follows:
00071  *    0: expect all tuples to be retrieved (normal case)
00072  *    0 < tuple_fraction < 1: expect the given fraction of tuples available
00073  *      from the plan to be retrieved
00074  *    tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
00075  *      expected to be retrieved (ie, a LIMIT specification)
00076  * Note that a nonzero tuple_fraction could come from outer context; it is
00077  * therefore not redundant with limit_tuples.  We use limit_tuples to determine
00078  * whether a bounded sort can be used at runtime.
00079  */
00080 void
00081 query_planner(PlannerInfo *root, List *tlist,
00082               double tuple_fraction, double limit_tuples,
00083               query_pathkeys_callback qp_callback, void *qp_extra,
00084               Path **cheapest_path, Path **sorted_path,
00085               double *num_groups)
00086 {
00087     Query      *parse = root->parse;
00088     List       *joinlist;
00089     RelOptInfo *final_rel;
00090     Path       *cheapestpath;
00091     Path       *sortedpath;
00092     Index       rti;
00093     double      total_pages;
00094 
00095     /* Make tuple_fraction, limit_tuples accessible to lower-level routines */
00096     root->tuple_fraction = tuple_fraction;
00097     root->limit_tuples = limit_tuples;
00098 
00099     *num_groups = 1;            /* default result */
00100 
00101     /*
00102      * If the query has an empty join tree, then it's something easy like
00103      * "SELECT 2+2;" or "INSERT ... VALUES()".  Fall through quickly.
00104      */
00105     if (parse->jointree->fromlist == NIL)
00106     {
00107         /* We need a trivial path result */
00108         *cheapest_path = (Path *)
00109             create_result_path((List *) parse->jointree->quals);
00110         *sorted_path = NULL;
00111 
00112         /*
00113          * We still are required to call qp_callback, in case it's something
00114          * like "SELECT 2+2 ORDER BY 1".
00115          */
00116         root->canon_pathkeys = NIL;
00117         (*qp_callback) (root, qp_extra);
00118         return;
00119     }
00120 
00121     /*
00122      * Init planner lists to empty.
00123      *
00124      * NOTE: append_rel_list was set up by subquery_planner, so do not touch
00125      * here; eq_classes and minmax_aggs may contain data already, too.
00126      */
00127     root->join_rel_list = NIL;
00128     root->join_rel_hash = NULL;
00129     root->join_rel_level = NULL;
00130     root->join_cur_level = 0;
00131     root->canon_pathkeys = NIL;
00132     root->left_join_clauses = NIL;
00133     root->right_join_clauses = NIL;
00134     root->full_join_clauses = NIL;
00135     root->join_info_list = NIL;
00136     root->lateral_info_list = NIL;
00137     root->placeholder_list = NIL;
00138     root->initial_rels = NIL;
00139 
00140     /*
00141      * Make a flattened version of the rangetable for faster access (this is
00142      * OK because the rangetable won't change any more), and set up an empty
00143      * array for indexing base relations.
00144      */
00145     setup_simple_rel_arrays(root);
00146 
00147     /*
00148      * Construct RelOptInfo nodes for all base relations in query, and
00149      * indirectly for all appendrel member relations ("other rels").  This
00150      * will give us a RelOptInfo for every "simple" (non-join) rel involved in
00151      * the query.
00152      *
00153      * Note: the reason we find the rels by searching the jointree and
00154      * appendrel list, rather than just scanning the rangetable, is that the
00155      * rangetable may contain RTEs for rels not actively part of the query,
00156      * for example views.  We don't want to make RelOptInfos for them.
00157      */
00158     add_base_rels_to_query(root, (Node *) parse->jointree);
00159 
00160     /*
00161      * Examine the targetlist and join tree, adding entries to baserel
00162      * targetlists for all referenced Vars, and generating PlaceHolderInfo
00163      * entries for all referenced PlaceHolderVars.  Restrict and join clauses
00164      * are added to appropriate lists belonging to the mentioned relations. We
00165      * also build EquivalenceClasses for provably equivalent expressions. The
00166      * SpecialJoinInfo list is also built to hold information about join order
00167      * restrictions.  Finally, we form a target joinlist for make_one_rel() to
00168      * work from.
00169      */
00170     build_base_rel_tlists(root, tlist);
00171 
00172     find_placeholders_in_jointree(root);
00173 
00174     find_lateral_references(root);
00175 
00176     joinlist = deconstruct_jointree(root);
00177 
00178     /*
00179      * Create the LateralJoinInfo list now that we have finalized
00180      * PlaceHolderVar eval levels.
00181      */
00182     create_lateral_join_info(root);
00183 
00184     /*
00185      * Reconsider any postponed outer-join quals now that we have built up
00186      * equivalence classes.  (This could result in further additions or
00187      * mergings of classes.)
00188      */
00189     reconsider_outer_join_clauses(root);
00190 
00191     /*
00192      * If we formed any equivalence classes, generate additional restriction
00193      * clauses as appropriate.  (Implied join clauses are formed on-the-fly
00194      * later.)
00195      */
00196     generate_base_implied_equalities(root);
00197 
00198     /*
00199      * We have completed merging equivalence sets, so it's now possible to
00200      * generate pathkeys in canonical form; so compute query_pathkeys and
00201      * other pathkeys fields in PlannerInfo.
00202      */
00203     (*qp_callback) (root, qp_extra);
00204 
00205     /*
00206      * Examine any "placeholder" expressions generated during subquery pullup.
00207      * Make sure that the Vars they need are marked as needed at the relevant
00208      * join level.  This must be done before join removal because it might
00209      * cause Vars or placeholders to be needed above a join when they weren't
00210      * so marked before.
00211      */
00212     fix_placeholder_input_needed_levels(root);
00213 
00214     /*
00215      * Remove any useless outer joins.  Ideally this would be done during
00216      * jointree preprocessing, but the necessary information isn't available
00217      * until we've built baserel data structures and classified qual clauses.
00218      */
00219     joinlist = remove_useless_joins(root, joinlist);
00220 
00221     /*
00222      * Now distribute "placeholders" to base rels as needed.  This has to be
00223      * done after join removal because removal could change whether a
00224      * placeholder is evaluatable at a base rel.
00225      */
00226     add_placeholders_to_base_rels(root);
00227 
00228     /*
00229      * We should now have size estimates for every actual table involved in
00230      * the query, and we also know which if any have been deleted from the
00231      * query by join removal; so we can compute total_table_pages.
00232      *
00233      * Note that appendrels are not double-counted here, even though we don't
00234      * bother to distinguish RelOptInfos for appendrel parents, because the
00235      * parents will still have size zero.
00236      *
00237      * XXX if a table is self-joined, we will count it once per appearance,
00238      * which perhaps is the wrong thing ... but that's not completely clear,
00239      * and detecting self-joins here is difficult, so ignore it for now.
00240      */
00241     total_pages = 0;
00242     for (rti = 1; rti < root->simple_rel_array_size; rti++)
00243     {
00244         RelOptInfo *brel = root->simple_rel_array[rti];
00245 
00246         if (brel == NULL)
00247             continue;
00248 
00249         Assert(brel->relid == rti);     /* sanity check on array */
00250 
00251         if (brel->reloptkind == RELOPT_BASEREL ||
00252             brel->reloptkind == RELOPT_OTHER_MEMBER_REL)
00253             total_pages += (double) brel->pages;
00254     }
00255     root->total_table_pages = total_pages;
00256 
00257     /*
00258      * Ready to do the primary planning.
00259      */
00260     final_rel = make_one_rel(root, joinlist);
00261 
00262     if (!final_rel || !final_rel->cheapest_total_path ||
00263         final_rel->cheapest_total_path->param_info != NULL)
00264         elog(ERROR, "failed to construct the join relation");
00265 
00266     /*
00267      * If there's grouping going on, estimate the number of result groups. We
00268      * couldn't do this any earlier because it depends on relation size
00269      * estimates that were set up above.
00270      *
00271      * Then convert tuple_fraction to fractional form if it is absolute, and
00272      * adjust it based on the knowledge that grouping_planner will be doing
00273      * grouping or aggregation work with our result.
00274      *
00275      * This introduces some undesirable coupling between this code and
00276      * grouping_planner, but the alternatives seem even uglier; we couldn't
00277      * pass back completed paths without making these decisions here.
00278      */
00279     if (parse->groupClause)
00280     {
00281         List       *groupExprs;
00282 
00283         groupExprs = get_sortgrouplist_exprs(parse->groupClause,
00284                                              parse->targetList);
00285         *num_groups = estimate_num_groups(root,
00286                                           groupExprs,
00287                                           final_rel->rows);
00288 
00289         /*
00290          * In GROUP BY mode, an absolute LIMIT is relative to the number of
00291          * groups not the number of tuples.  If the caller gave us a fraction,
00292          * keep it as-is.  (In both cases, we are effectively assuming that
00293          * all the groups are about the same size.)
00294          */
00295         if (tuple_fraction >= 1.0)
00296             tuple_fraction /= *num_groups;
00297 
00298         /*
00299          * If both GROUP BY and ORDER BY are specified, we will need two
00300          * levels of sort --- and, therefore, certainly need to read all the
00301          * tuples --- unless ORDER BY is a subset of GROUP BY.  Likewise if we
00302          * have both DISTINCT and GROUP BY, or if we have a window
00303          * specification not compatible with the GROUP BY.
00304          */
00305         if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) ||
00306             !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) ||
00307          !pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys))
00308             tuple_fraction = 0.0;
00309 
00310         /* In any case, limit_tuples shouldn't be specified here */
00311         Assert(limit_tuples < 0);
00312     }
00313     else if (parse->hasAggs || root->hasHavingQual)
00314     {
00315         /*
00316          * Ungrouped aggregate will certainly want to read all the tuples, and
00317          * it will deliver a single result row (so leave *num_groups 1).
00318          */
00319         tuple_fraction = 0.0;
00320 
00321         /* limit_tuples shouldn't be specified here */
00322         Assert(limit_tuples < 0);
00323     }
00324     else if (parse->distinctClause)
00325     {
00326         /*
00327          * Since there was no grouping or aggregation, it's reasonable to
00328          * assume the UNIQUE filter has effects comparable to GROUP BY. Return
00329          * the estimated number of output rows for use by caller. (If DISTINCT
00330          * is used with grouping, we ignore its effects for rowcount
00331          * estimation purposes; this amounts to assuming the grouped rows are
00332          * distinct already.)
00333          */
00334         List       *distinctExprs;
00335 
00336         distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
00337                                                 parse->targetList);
00338         *num_groups = estimate_num_groups(root,
00339                                           distinctExprs,
00340                                           final_rel->rows);
00341 
00342         /*
00343          * Adjust tuple_fraction the same way as for GROUP BY, too.
00344          */
00345         if (tuple_fraction >= 1.0)
00346             tuple_fraction /= *num_groups;
00347 
00348         /* limit_tuples shouldn't be specified here */
00349         Assert(limit_tuples < 0);
00350     }
00351     else
00352     {
00353         /*
00354          * Plain non-grouped, non-aggregated query: an absolute tuple fraction
00355          * can be divided by the number of tuples.
00356          */
00357         if (tuple_fraction >= 1.0)
00358             tuple_fraction /= final_rel->rows;
00359     }
00360 
00361     /*
00362      * Pick out the cheapest-total path and the cheapest presorted path for
00363      * the requested pathkeys (if there is one).  We should take the tuple
00364      * fraction into account when selecting the cheapest presorted path, but
00365      * not when selecting the cheapest-total path, since if we have to sort
00366      * then we'll have to fetch all the tuples.  (But there's a special case:
00367      * if query_pathkeys is NIL, meaning order doesn't matter, then the
00368      * "cheapest presorted" path will be the cheapest overall for the tuple
00369      * fraction.)
00370      *
00371      * The cheapest-total path is also the one to use if grouping_planner
00372      * decides to use hashed aggregation, so we return it separately even if
00373      * this routine thinks the presorted path is the winner.
00374      */
00375     cheapestpath = final_rel->cheapest_total_path;
00376 
00377     sortedpath =
00378         get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
00379                                                   root->query_pathkeys,
00380                                                   NULL,
00381                                                   tuple_fraction);
00382 
00383     /* Don't return same path in both guises; just wastes effort */
00384     if (sortedpath == cheapestpath)
00385         sortedpath = NULL;
00386 
00387     /*
00388      * Forget about the presorted path if it would be cheaper to sort the
00389      * cheapest-total path.  Here we need consider only the behavior at the
00390      * tuple fraction point.
00391      */
00392     if (sortedpath)
00393     {
00394         Path        sort_path;  /* dummy for result of cost_sort */
00395 
00396         if (root->query_pathkeys == NIL ||
00397             pathkeys_contained_in(root->query_pathkeys,
00398                                   cheapestpath->pathkeys))
00399         {
00400             /* No sort needed for cheapest path */
00401             sort_path.startup_cost = cheapestpath->startup_cost;
00402             sort_path.total_cost = cheapestpath->total_cost;
00403         }
00404         else
00405         {
00406             /* Figure cost for sorting */
00407             cost_sort(&sort_path, root, root->query_pathkeys,
00408                       cheapestpath->total_cost,
00409                       final_rel->rows, final_rel->width,
00410                       0.0, work_mem, limit_tuples);
00411         }
00412 
00413         if (compare_fractional_path_costs(sortedpath, &sort_path,
00414                                           tuple_fraction) > 0)
00415         {
00416             /* Presorted path is a loser */
00417             sortedpath = NULL;
00418         }
00419     }
00420 
00421     *cheapest_path = cheapestpath;
00422     *sorted_path = sortedpath;
00423 }