00001 /*------------------------------------------------------------------------- 00002 * 00003 * planmain.c 00004 * Routines to plan a single query 00005 * 00006 * What's in a name, anyway? The top-level entry point of the planner/ 00007 * optimizer is over in planner.c, not here as you might think from the 00008 * file name. But this is the main code for planning a basic join operation, 00009 * shorn of features like subselects, inheritance, aggregates, grouping, 00010 * and so on. (Those are the things planner.c deals with.) 00011 * 00012 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00013 * Portions Copyright (c) 1994, Regents of the University of California 00014 * 00015 * 00016 * IDENTIFICATION 00017 * src/backend/optimizer/plan/planmain.c 00018 * 00019 *------------------------------------------------------------------------- 00020 */ 00021 #include "postgres.h" 00022 00023 #include "miscadmin.h" 00024 #include "optimizer/cost.h" 00025 #include "optimizer/pathnode.h" 00026 #include "optimizer/paths.h" 00027 #include "optimizer/placeholder.h" 00028 #include "optimizer/planmain.h" 00029 #include "optimizer/tlist.h" 00030 #include "utils/selfuncs.h" 00031 00032 00033 /* 00034 * query_planner 00035 * Generate a path (that is, a simplified plan) for a basic query, 00036 * which may involve joins but not any fancier features. 00037 * 00038 * Since query_planner does not handle the toplevel processing (grouping, 00039 * sorting, etc) it cannot select the best path by itself. It selects 00040 * two paths: the cheapest path that produces all the required tuples, 00041 * independent of any ordering considerations, and the cheapest path that 00042 * produces the expected fraction of the required tuples in the required 00043 * ordering, if there is a path that is cheaper for this than just sorting 00044 * the output of the cheapest overall path. The caller (grouping_planner) 00045 * will make the final decision about which to use. 00046 * 00047 * Input parameters: 00048 * root describes the query to plan 00049 * tlist is the target list the query should produce 00050 * (this is NOT necessarily root->parse->targetList!) 00051 * tuple_fraction is the fraction of tuples we expect will be retrieved 00052 * limit_tuples is a hard limit on number of tuples to retrieve, 00053 * or -1 if no limit 00054 * qp_callback is a function to compute query_pathkeys once it's safe to do so 00055 * qp_extra is optional extra data to pass to qp_callback 00056 * 00057 * Output parameters: 00058 * *cheapest_path receives the overall-cheapest path for the query 00059 * *sorted_path receives the cheapest presorted path for the query, 00060 * if any (NULL if there is no useful presorted path) 00061 * *num_groups receives the estimated number of groups, or 1 if query 00062 * does not use grouping 00063 * 00064 * Note: the PlannerInfo node also includes a query_pathkeys field, which 00065 * tells query_planner the sort order that is desired in the final output 00066 * plan. This value is *not* available at call time, but is computed by 00067 * qp_callback once we have completed merging the query's equivalence classes. 00068 * (We cannot construct canonical pathkeys until that's done.) 00069 * 00070 * tuple_fraction is interpreted as follows: 00071 * 0: expect all tuples to be retrieved (normal case) 00072 * 0 < tuple_fraction < 1: expect the given fraction of tuples available 00073 * from the plan to be retrieved 00074 * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples 00075 * expected to be retrieved (ie, a LIMIT specification) 00076 * Note that a nonzero tuple_fraction could come from outer context; it is 00077 * therefore not redundant with limit_tuples. We use limit_tuples to determine 00078 * whether a bounded sort can be used at runtime. 00079 */ 00080 void 00081 query_planner(PlannerInfo *root, List *tlist, 00082 double tuple_fraction, double limit_tuples, 00083 query_pathkeys_callback qp_callback, void *qp_extra, 00084 Path **cheapest_path, Path **sorted_path, 00085 double *num_groups) 00086 { 00087 Query *parse = root->parse; 00088 List *joinlist; 00089 RelOptInfo *final_rel; 00090 Path *cheapestpath; 00091 Path *sortedpath; 00092 Index rti; 00093 double total_pages; 00094 00095 /* Make tuple_fraction, limit_tuples accessible to lower-level routines */ 00096 root->tuple_fraction = tuple_fraction; 00097 root->limit_tuples = limit_tuples; 00098 00099 *num_groups = 1; /* default result */ 00100 00101 /* 00102 * If the query has an empty join tree, then it's something easy like 00103 * "SELECT 2+2;" or "INSERT ... VALUES()". Fall through quickly. 00104 */ 00105 if (parse->jointree->fromlist == NIL) 00106 { 00107 /* We need a trivial path result */ 00108 *cheapest_path = (Path *) 00109 create_result_path((List *) parse->jointree->quals); 00110 *sorted_path = NULL; 00111 00112 /* 00113 * We still are required to call qp_callback, in case it's something 00114 * like "SELECT 2+2 ORDER BY 1". 00115 */ 00116 root->canon_pathkeys = NIL; 00117 (*qp_callback) (root, qp_extra); 00118 return; 00119 } 00120 00121 /* 00122 * Init planner lists to empty. 00123 * 00124 * NOTE: append_rel_list was set up by subquery_planner, so do not touch 00125 * here; eq_classes and minmax_aggs may contain data already, too. 00126 */ 00127 root->join_rel_list = NIL; 00128 root->join_rel_hash = NULL; 00129 root->join_rel_level = NULL; 00130 root->join_cur_level = 0; 00131 root->canon_pathkeys = NIL; 00132 root->left_join_clauses = NIL; 00133 root->right_join_clauses = NIL; 00134 root->full_join_clauses = NIL; 00135 root->join_info_list = NIL; 00136 root->lateral_info_list = NIL; 00137 root->placeholder_list = NIL; 00138 root->initial_rels = NIL; 00139 00140 /* 00141 * Make a flattened version of the rangetable for faster access (this is 00142 * OK because the rangetable won't change any more), and set up an empty 00143 * array for indexing base relations. 00144 */ 00145 setup_simple_rel_arrays(root); 00146 00147 /* 00148 * Construct RelOptInfo nodes for all base relations in query, and 00149 * indirectly for all appendrel member relations ("other rels"). This 00150 * will give us a RelOptInfo for every "simple" (non-join) rel involved in 00151 * the query. 00152 * 00153 * Note: the reason we find the rels by searching the jointree and 00154 * appendrel list, rather than just scanning the rangetable, is that the 00155 * rangetable may contain RTEs for rels not actively part of the query, 00156 * for example views. We don't want to make RelOptInfos for them. 00157 */ 00158 add_base_rels_to_query(root, (Node *) parse->jointree); 00159 00160 /* 00161 * Examine the targetlist and join tree, adding entries to baserel 00162 * targetlists for all referenced Vars, and generating PlaceHolderInfo 00163 * entries for all referenced PlaceHolderVars. Restrict and join clauses 00164 * are added to appropriate lists belonging to the mentioned relations. We 00165 * also build EquivalenceClasses for provably equivalent expressions. The 00166 * SpecialJoinInfo list is also built to hold information about join order 00167 * restrictions. Finally, we form a target joinlist for make_one_rel() to 00168 * work from. 00169 */ 00170 build_base_rel_tlists(root, tlist); 00171 00172 find_placeholders_in_jointree(root); 00173 00174 find_lateral_references(root); 00175 00176 joinlist = deconstruct_jointree(root); 00177 00178 /* 00179 * Create the LateralJoinInfo list now that we have finalized 00180 * PlaceHolderVar eval levels. 00181 */ 00182 create_lateral_join_info(root); 00183 00184 /* 00185 * Reconsider any postponed outer-join quals now that we have built up 00186 * equivalence classes. (This could result in further additions or 00187 * mergings of classes.) 00188 */ 00189 reconsider_outer_join_clauses(root); 00190 00191 /* 00192 * If we formed any equivalence classes, generate additional restriction 00193 * clauses as appropriate. (Implied join clauses are formed on-the-fly 00194 * later.) 00195 */ 00196 generate_base_implied_equalities(root); 00197 00198 /* 00199 * We have completed merging equivalence sets, so it's now possible to 00200 * generate pathkeys in canonical form; so compute query_pathkeys and 00201 * other pathkeys fields in PlannerInfo. 00202 */ 00203 (*qp_callback) (root, qp_extra); 00204 00205 /* 00206 * Examine any "placeholder" expressions generated during subquery pullup. 00207 * Make sure that the Vars they need are marked as needed at the relevant 00208 * join level. This must be done before join removal because it might 00209 * cause Vars or placeholders to be needed above a join when they weren't 00210 * so marked before. 00211 */ 00212 fix_placeholder_input_needed_levels(root); 00213 00214 /* 00215 * Remove any useless outer joins. Ideally this would be done during 00216 * jointree preprocessing, but the necessary information isn't available 00217 * until we've built baserel data structures and classified qual clauses. 00218 */ 00219 joinlist = remove_useless_joins(root, joinlist); 00220 00221 /* 00222 * Now distribute "placeholders" to base rels as needed. This has to be 00223 * done after join removal because removal could change whether a 00224 * placeholder is evaluatable at a base rel. 00225 */ 00226 add_placeholders_to_base_rels(root); 00227 00228 /* 00229 * We should now have size estimates for every actual table involved in 00230 * the query, and we also know which if any have been deleted from the 00231 * query by join removal; so we can compute total_table_pages. 00232 * 00233 * Note that appendrels are not double-counted here, even though we don't 00234 * bother to distinguish RelOptInfos for appendrel parents, because the 00235 * parents will still have size zero. 00236 * 00237 * XXX if a table is self-joined, we will count it once per appearance, 00238 * which perhaps is the wrong thing ... but that's not completely clear, 00239 * and detecting self-joins here is difficult, so ignore it for now. 00240 */ 00241 total_pages = 0; 00242 for (rti = 1; rti < root->simple_rel_array_size; rti++) 00243 { 00244 RelOptInfo *brel = root->simple_rel_array[rti]; 00245 00246 if (brel == NULL) 00247 continue; 00248 00249 Assert(brel->relid == rti); /* sanity check on array */ 00250 00251 if (brel->reloptkind == RELOPT_BASEREL || 00252 brel->reloptkind == RELOPT_OTHER_MEMBER_REL) 00253 total_pages += (double) brel->pages; 00254 } 00255 root->total_table_pages = total_pages; 00256 00257 /* 00258 * Ready to do the primary planning. 00259 */ 00260 final_rel = make_one_rel(root, joinlist); 00261 00262 if (!final_rel || !final_rel->cheapest_total_path || 00263 final_rel->cheapest_total_path->param_info != NULL) 00264 elog(ERROR, "failed to construct the join relation"); 00265 00266 /* 00267 * If there's grouping going on, estimate the number of result groups. We 00268 * couldn't do this any earlier because it depends on relation size 00269 * estimates that were set up above. 00270 * 00271 * Then convert tuple_fraction to fractional form if it is absolute, and 00272 * adjust it based on the knowledge that grouping_planner will be doing 00273 * grouping or aggregation work with our result. 00274 * 00275 * This introduces some undesirable coupling between this code and 00276 * grouping_planner, but the alternatives seem even uglier; we couldn't 00277 * pass back completed paths without making these decisions here. 00278 */ 00279 if (parse->groupClause) 00280 { 00281 List *groupExprs; 00282 00283 groupExprs = get_sortgrouplist_exprs(parse->groupClause, 00284 parse->targetList); 00285 *num_groups = estimate_num_groups(root, 00286 groupExprs, 00287 final_rel->rows); 00288 00289 /* 00290 * In GROUP BY mode, an absolute LIMIT is relative to the number of 00291 * groups not the number of tuples. If the caller gave us a fraction, 00292 * keep it as-is. (In both cases, we are effectively assuming that 00293 * all the groups are about the same size.) 00294 */ 00295 if (tuple_fraction >= 1.0) 00296 tuple_fraction /= *num_groups; 00297 00298 /* 00299 * If both GROUP BY and ORDER BY are specified, we will need two 00300 * levels of sort --- and, therefore, certainly need to read all the 00301 * tuples --- unless ORDER BY is a subset of GROUP BY. Likewise if we 00302 * have both DISTINCT and GROUP BY, or if we have a window 00303 * specification not compatible with the GROUP BY. 00304 */ 00305 if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) || 00306 !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) || 00307 !pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys)) 00308 tuple_fraction = 0.0; 00309 00310 /* In any case, limit_tuples shouldn't be specified here */ 00311 Assert(limit_tuples < 0); 00312 } 00313 else if (parse->hasAggs || root->hasHavingQual) 00314 { 00315 /* 00316 * Ungrouped aggregate will certainly want to read all the tuples, and 00317 * it will deliver a single result row (so leave *num_groups 1). 00318 */ 00319 tuple_fraction = 0.0; 00320 00321 /* limit_tuples shouldn't be specified here */ 00322 Assert(limit_tuples < 0); 00323 } 00324 else if (parse->distinctClause) 00325 { 00326 /* 00327 * Since there was no grouping or aggregation, it's reasonable to 00328 * assume the UNIQUE filter has effects comparable to GROUP BY. Return 00329 * the estimated number of output rows for use by caller. (If DISTINCT 00330 * is used with grouping, we ignore its effects for rowcount 00331 * estimation purposes; this amounts to assuming the grouped rows are 00332 * distinct already.) 00333 */ 00334 List *distinctExprs; 00335 00336 distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, 00337 parse->targetList); 00338 *num_groups = estimate_num_groups(root, 00339 distinctExprs, 00340 final_rel->rows); 00341 00342 /* 00343 * Adjust tuple_fraction the same way as for GROUP BY, too. 00344 */ 00345 if (tuple_fraction >= 1.0) 00346 tuple_fraction /= *num_groups; 00347 00348 /* limit_tuples shouldn't be specified here */ 00349 Assert(limit_tuples < 0); 00350 } 00351 else 00352 { 00353 /* 00354 * Plain non-grouped, non-aggregated query: an absolute tuple fraction 00355 * can be divided by the number of tuples. 00356 */ 00357 if (tuple_fraction >= 1.0) 00358 tuple_fraction /= final_rel->rows; 00359 } 00360 00361 /* 00362 * Pick out the cheapest-total path and the cheapest presorted path for 00363 * the requested pathkeys (if there is one). We should take the tuple 00364 * fraction into account when selecting the cheapest presorted path, but 00365 * not when selecting the cheapest-total path, since if we have to sort 00366 * then we'll have to fetch all the tuples. (But there's a special case: 00367 * if query_pathkeys is NIL, meaning order doesn't matter, then the 00368 * "cheapest presorted" path will be the cheapest overall for the tuple 00369 * fraction.) 00370 * 00371 * The cheapest-total path is also the one to use if grouping_planner 00372 * decides to use hashed aggregation, so we return it separately even if 00373 * this routine thinks the presorted path is the winner. 00374 */ 00375 cheapestpath = final_rel->cheapest_total_path; 00376 00377 sortedpath = 00378 get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, 00379 root->query_pathkeys, 00380 NULL, 00381 tuple_fraction); 00382 00383 /* Don't return same path in both guises; just wastes effort */ 00384 if (sortedpath == cheapestpath) 00385 sortedpath = NULL; 00386 00387 /* 00388 * Forget about the presorted path if it would be cheaper to sort the 00389 * cheapest-total path. Here we need consider only the behavior at the 00390 * tuple fraction point. 00391 */ 00392 if (sortedpath) 00393 { 00394 Path sort_path; /* dummy for result of cost_sort */ 00395 00396 if (root->query_pathkeys == NIL || 00397 pathkeys_contained_in(root->query_pathkeys, 00398 cheapestpath->pathkeys)) 00399 { 00400 /* No sort needed for cheapest path */ 00401 sort_path.startup_cost = cheapestpath->startup_cost; 00402 sort_path.total_cost = cheapestpath->total_cost; 00403 } 00404 else 00405 { 00406 /* Figure cost for sorting */ 00407 cost_sort(&sort_path, root, root->query_pathkeys, 00408 cheapestpath->total_cost, 00409 final_rel->rows, final_rel->width, 00410 0.0, work_mem, limit_tuples); 00411 } 00412 00413 if (compare_fractional_path_costs(sortedpath, &sort_path, 00414 tuple_fraction) > 0) 00415 { 00416 /* Presorted path is a loser */ 00417 sortedpath = NULL; 00418 } 00419 } 00420 00421 *cheapest_path = cheapestpath; 00422 *sorted_path = sortedpath; 00423 }