Header And Logo

PostgreSQL
| The world's most advanced open source database.

planagg.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * planagg.c
00004  *    Special planning for aggregate queries.
00005  *
00006  * This module tries to replace MIN/MAX aggregate functions by subqueries
00007  * of the form
00008  *      (SELECT col FROM tab
00009  *       WHERE col IS NOT NULL AND existing-quals
00010  *       ORDER BY col ASC/DESC
00011  *       LIMIT 1)
00012  * Given a suitable index on tab.col, this can be much faster than the
00013  * generic scan-all-the-rows aggregation plan.  We can handle multiple
00014  * MIN/MAX aggregates by generating multiple subqueries, and their
00015  * orderings can be different.  However, if the query contains any
00016  * non-optimizable aggregates, there's no point since we'll have to
00017  * scan all the rows anyway.
00018  *
00019  *
00020  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00021  * Portions Copyright (c) 1994, Regents of the University of California
00022  *
00023  *
00024  * IDENTIFICATION
00025  *    src/backend/optimizer/plan/planagg.c
00026  *
00027  *-------------------------------------------------------------------------
00028  */
00029 #include "postgres.h"
00030 
00031 #include "access/htup_details.h"
00032 #include "catalog/pg_aggregate.h"
00033 #include "catalog/pg_type.h"
00034 #include "nodes/makefuncs.h"
00035 #include "nodes/nodeFuncs.h"
00036 #include "optimizer/clauses.h"
00037 #include "optimizer/cost.h"
00038 #include "optimizer/paths.h"
00039 #include "optimizer/planmain.h"
00040 #include "optimizer/planner.h"
00041 #include "optimizer/subselect.h"
00042 #include "parser/parsetree.h"
00043 #include "parser/parse_clause.h"
00044 #include "utils/lsyscache.h"
00045 #include "utils/syscache.h"
00046 
00047 
00048 static bool find_minmax_aggs_walker(Node *node, List **context);
00049 static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
00050                   Oid eqop, Oid sortop, bool nulls_first);
00051 static void minmax_qp_callback(PlannerInfo *root, void *extra);
00052 static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *mminfo);
00053 static Node *replace_aggs_with_params_mutator(Node *node, PlannerInfo *root);
00054 static Oid  fetch_agg_sort_op(Oid aggfnoid);
00055 
00056 
00057 /*
00058  * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
00059  *
00060  * Check to see whether the query contains MIN/MAX aggregate functions that
00061  * might be optimizable via indexscans.  If it does, and all the aggregates
00062  * are potentially optimizable, then set up root->minmax_aggs with a list of
00063  * these aggregates.
00064  *
00065  * Note: we are passed the preprocessed targetlist separately, because it's
00066  * not necessarily equal to root->parse->targetList.
00067  */
00068 void
00069 preprocess_minmax_aggregates(PlannerInfo *root, List *tlist)
00070 {
00071     Query      *parse = root->parse;
00072     FromExpr   *jtnode;
00073     RangeTblRef *rtr;
00074     RangeTblEntry *rte;
00075     List       *aggs_list;
00076     ListCell   *lc;
00077 
00078     /* minmax_aggs list should be empty at this point */
00079     Assert(root->minmax_aggs == NIL);
00080 
00081     /* Nothing to do if query has no aggregates */
00082     if (!parse->hasAggs)
00083         return;
00084 
00085     Assert(!parse->setOperations);      /* shouldn't get here if a setop */
00086     Assert(parse->rowMarks == NIL);     /* nor if FOR UPDATE */
00087 
00088     /*
00089      * Reject unoptimizable cases.
00090      *
00091      * We don't handle GROUP BY or windowing, because our current
00092      * implementations of grouping require looking at all the rows anyway, and
00093      * so there's not much point in optimizing MIN/MAX.  (Note: relaxing this
00094      * would likely require some restructuring in grouping_planner(), since it
00095      * performs assorted processing related to these features between calling
00096      * preprocess_minmax_aggregates and optimize_minmax_aggregates.)
00097      */
00098     if (parse->groupClause || parse->hasWindowFuncs)
00099         return;
00100 
00101     /*
00102      * We also restrict the query to reference exactly one table, since join
00103      * conditions can't be handled reasonably.  (We could perhaps handle a
00104      * query containing cartesian-product joins, but it hardly seems worth the
00105      * trouble.)  However, the single table could be buried in several levels
00106      * of FromExpr due to subqueries.  Note the "single" table could be an
00107      * inheritance parent, too, including the case of a UNION ALL subquery
00108      * that's been flattened to an appendrel.
00109      */
00110     jtnode = parse->jointree;
00111     while (IsA(jtnode, FromExpr))
00112     {
00113         if (list_length(jtnode->fromlist) != 1)
00114             return;
00115         jtnode = linitial(jtnode->fromlist);
00116     }
00117     if (!IsA(jtnode, RangeTblRef))
00118         return;
00119     rtr = (RangeTblRef *) jtnode;
00120     rte = planner_rt_fetch(rtr->rtindex, root);
00121     if (rte->rtekind == RTE_RELATION)
00122          /* ordinary relation, ok */ ;
00123     else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
00124          /* flattened UNION ALL subquery, ok */ ;
00125     else
00126         return;
00127 
00128     /*
00129      * Scan the tlist and HAVING qual to find all the aggregates and verify
00130      * all are MIN/MAX aggregates.  Stop as soon as we find one that isn't.
00131      */
00132     aggs_list = NIL;
00133     if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
00134         return;
00135     if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
00136         return;
00137 
00138     /*
00139      * OK, there is at least the possibility of performing the optimization.
00140      * Build an access path for each aggregate.  (We must do this now because
00141      * we need to call query_planner with a pristine copy of the current query
00142      * tree; it'll be too late when optimize_minmax_aggregates gets called.)
00143      * If any of the aggregates prove to be non-indexable, give up; there is
00144      * no point in optimizing just some of them.
00145      */
00146     foreach(lc, aggs_list)
00147     {
00148         MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
00149         Oid         eqop;
00150         bool        reverse;
00151 
00152         /*
00153          * We'll need the equality operator that goes with the aggregate's
00154          * ordering operator.
00155          */
00156         eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
00157         if (!OidIsValid(eqop))  /* shouldn't happen */
00158             elog(ERROR, "could not find equality operator for ordering operator %u",
00159                  mminfo->aggsortop);
00160 
00161         /*
00162          * We can use either an ordering that gives NULLS FIRST or one that
00163          * gives NULLS LAST; furthermore there's unlikely to be much
00164          * performance difference between them, so it doesn't seem worth
00165          * costing out both ways if we get a hit on the first one.  NULLS
00166          * FIRST is more likely to be available if the operator is a
00167          * reverse-sort operator, so try that first if reverse.
00168          */
00169         if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
00170             continue;
00171         if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
00172             continue;
00173 
00174         /* No indexable path for this aggregate, so fail */
00175         return;
00176     }
00177 
00178     /*
00179      * We're done until path generation is complete.  Save info for later.
00180      * (Setting root->minmax_aggs non-NIL signals we succeeded in making index
00181      * access paths for all the aggregates.)
00182      */
00183     root->minmax_aggs = aggs_list;
00184 }
00185 
00186 /*
00187  * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
00188  *
00189  * Check to see whether using the aggregate indexscans is cheaper than the
00190  * generic aggregate method.  If so, generate and return a Plan that does it
00191  * that way.  Otherwise, return NULL.
00192  *
00193  * Note: it seems likely that the generic method will never be cheaper
00194  * in practice, except maybe for tiny tables where it'd hardly matter.
00195  * Should we skip even trying to build the standard plan, if
00196  * preprocess_minmax_aggregates succeeds?
00197  *
00198  * We are passed the preprocessed tlist, as well as the estimated costs for
00199  * doing the aggregates the regular way, and the best path devised for
00200  * computing the input of a standard Agg node.
00201  */
00202 Plan *
00203 optimize_minmax_aggregates(PlannerInfo *root, List *tlist,
00204                            const AggClauseCosts *aggcosts, Path *best_path)
00205 {
00206     Query      *parse = root->parse;
00207     Cost        total_cost;
00208     Path        agg_p;
00209     Plan       *plan;
00210     Node       *hqual;
00211     ListCell   *lc;
00212 
00213     /* Nothing to do if preprocess_minmax_aggs rejected the query */
00214     if (root->minmax_aggs == NIL)
00215         return NULL;
00216 
00217     /*
00218      * Now we have enough info to compare costs against the generic aggregate
00219      * implementation.
00220      *
00221      * Note that we don't include evaluation cost of the tlist here; this is
00222      * OK since it isn't included in best_path's cost either, and should be
00223      * the same in either case.
00224      */
00225     total_cost = 0;
00226     foreach(lc, root->minmax_aggs)
00227     {
00228         MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
00229 
00230         total_cost += mminfo->pathcost;
00231     }
00232 
00233     cost_agg(&agg_p, root, AGG_PLAIN, aggcosts,
00234              0, 0,
00235              best_path->startup_cost, best_path->total_cost,
00236              best_path->parent->rows);
00237 
00238     if (total_cost > agg_p.total_cost)
00239         return NULL;            /* too expensive */
00240 
00241     /*
00242      * OK, we are going to generate an optimized plan.
00243      *
00244      * First, generate a subplan and output Param node for each agg.
00245      */
00246     foreach(lc, root->minmax_aggs)
00247     {
00248         MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
00249 
00250         make_agg_subplan(root, mminfo);
00251     }
00252 
00253     /*
00254      * Modify the targetlist and HAVING qual to reference subquery outputs
00255      */
00256     tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist, root);
00257     hqual = replace_aggs_with_params_mutator(parse->havingQual, root);
00258 
00259     /*
00260      * We have to replace Aggrefs with Params in equivalence classes too, else
00261      * ORDER BY or DISTINCT on an optimized aggregate will fail.  We don't
00262      * need to process child eclass members though, since they aren't of
00263      * interest anymore --- and replace_aggs_with_params_mutator isn't able
00264      * to handle Aggrefs containing translated child Vars, anyway.
00265      *
00266      * Note: at some point it might become necessary to mutate other data
00267      * structures too, such as the query's sortClause or distinctClause. Right
00268      * now, those won't be examined after this point.
00269      */
00270     mutate_eclass_expressions(root,
00271                               replace_aggs_with_params_mutator,
00272                               (void *) root,
00273                               false);
00274 
00275     /*
00276      * Generate the output plan --- basically just a Result
00277      */
00278     plan = (Plan *) make_result(root, tlist, hqual, NULL);
00279 
00280     /* Account for evaluation cost of the tlist (make_result did the rest) */
00281     add_tlist_costs_to_plan(root, plan, tlist);
00282 
00283     return plan;
00284 }
00285 
00286 /*
00287  * find_minmax_aggs_walker
00288  *      Recursively scan the Aggref nodes in an expression tree, and check
00289  *      that each one is a MIN/MAX aggregate.  If so, build a list of the
00290  *      distinct aggregate calls in the tree.
00291  *
00292  * Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
00293  * (This seemingly-backward definition is used because expression_tree_walker
00294  * aborts the scan on TRUE return, which is what we want.)
00295  *
00296  * Found aggregates are added to the list at *context; it's up to the caller
00297  * to initialize the list to NIL.
00298  *
00299  * This does not descend into subqueries, and so should be used only after
00300  * reduction of sublinks to subplans.  There mustn't be outer-aggregate
00301  * references either.
00302  */
00303 static bool
00304 find_minmax_aggs_walker(Node *node, List **context)
00305 {
00306     if (node == NULL)
00307         return false;
00308     if (IsA(node, Aggref))
00309     {
00310         Aggref     *aggref = (Aggref *) node;
00311         Oid         aggsortop;
00312         TargetEntry *curTarget;
00313         MinMaxAggInfo *mminfo;
00314         ListCell   *l;
00315 
00316         Assert(aggref->agglevelsup == 0);
00317         if (list_length(aggref->args) != 1 || aggref->aggorder != NIL)
00318             return true;        /* it couldn't be MIN/MAX */
00319         /* note: we do not care if DISTINCT is mentioned ... */
00320         curTarget = (TargetEntry *) linitial(aggref->args);
00321 
00322         aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
00323         if (!OidIsValid(aggsortop))
00324             return true;        /* not a MIN/MAX aggregate */
00325 
00326         if (contain_mutable_functions((Node *) curTarget->expr))
00327             return true;        /* not potentially indexable */
00328 
00329         if (type_is_rowtype(exprType((Node *) curTarget->expr)))
00330             return true;        /* IS NOT NULL would have weird semantics */
00331 
00332         /*
00333          * Check whether it's already in the list, and add it if not.
00334          */
00335         foreach(l, *context)
00336         {
00337             mminfo = (MinMaxAggInfo *) lfirst(l);
00338             if (mminfo->aggfnoid == aggref->aggfnoid &&
00339                 equal(mminfo->target, curTarget->expr))
00340                 return false;
00341         }
00342 
00343         mminfo = makeNode(MinMaxAggInfo);
00344         mminfo->aggfnoid = aggref->aggfnoid;
00345         mminfo->aggsortop = aggsortop;
00346         mminfo->target = curTarget->expr;
00347         mminfo->subroot = NULL; /* don't compute path yet */
00348         mminfo->path = NULL;
00349         mminfo->pathcost = 0;
00350         mminfo->param = NULL;
00351 
00352         *context = lappend(*context, mminfo);
00353 
00354         /*
00355          * We need not recurse into the argument, since it can't contain any
00356          * aggregates.
00357          */
00358         return false;
00359     }
00360     Assert(!IsA(node, SubLink));
00361     return expression_tree_walker(node, find_minmax_aggs_walker,
00362                                   (void *) context);
00363 }
00364 
00365 /*
00366  * build_minmax_path
00367  *      Given a MIN/MAX aggregate, try to build an indexscan Path it can be
00368  *      optimized with.
00369  *
00370  * If successful, stash the best path in *mminfo and return TRUE.
00371  * Otherwise, return FALSE.
00372  */
00373 static bool
00374 build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
00375                   Oid eqop, Oid sortop, bool nulls_first)
00376 {
00377     PlannerInfo *subroot;
00378     Query      *parse;
00379     TargetEntry *tle;
00380     NullTest   *ntest;
00381     SortGroupClause *sortcl;
00382     Path       *cheapest_path;
00383     Path       *sorted_path;
00384     double      dNumGroups;
00385     Cost        path_cost;
00386     double      path_fraction;
00387 
00388     /*----------
00389      * Generate modified query of the form
00390      *      (SELECT col FROM tab
00391      *       WHERE col IS NOT NULL AND existing-quals
00392      *       ORDER BY col ASC/DESC
00393      *       LIMIT 1)
00394      *----------
00395      */
00396     subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
00397     memcpy(subroot, root, sizeof(PlannerInfo));
00398     subroot->parse = parse = (Query *) copyObject(root->parse);
00399     /* make sure subroot planning won't change root->init_plans contents */
00400     subroot->init_plans = list_copy(root->init_plans);
00401     /* There shouldn't be any OJ or LATERAL info to translate, as yet */
00402     Assert(subroot->join_info_list == NIL);
00403     Assert(subroot->lateral_info_list == NIL);
00404     /* and we haven't created PlaceHolderInfos, either */
00405     Assert(subroot->placeholder_list == NIL);
00406 
00407     /* single tlist entry that is the aggregate target */
00408     tle = makeTargetEntry(copyObject(mminfo->target),
00409                           (AttrNumber) 1,
00410                           pstrdup("agg_target"),
00411                           false);
00412     parse->targetList = list_make1(tle);
00413 
00414     /* No HAVING, no DISTINCT, no aggregates anymore */
00415     parse->havingQual = NULL;
00416     subroot->hasHavingQual = false;
00417     parse->distinctClause = NIL;
00418     parse->hasDistinctOn = false;
00419     parse->hasAggs = false;
00420 
00421     /* Build "target IS NOT NULL" expression */
00422     ntest = makeNode(NullTest);
00423     ntest->nulltesttype = IS_NOT_NULL;
00424     ntest->arg = copyObject(mminfo->target);
00425     /* we checked it wasn't a rowtype in find_minmax_aggs_walker */
00426     ntest->argisrow = false;
00427 
00428     /* User might have had that in WHERE already */
00429     if (!list_member((List *) parse->jointree->quals, ntest))
00430         parse->jointree->quals = (Node *)
00431             lcons(ntest, (List *) parse->jointree->quals);
00432 
00433     /* Build suitable ORDER BY clause */
00434     sortcl = makeNode(SortGroupClause);
00435     sortcl->tleSortGroupRef = assignSortGroupRef(tle, parse->targetList);
00436     sortcl->eqop = eqop;
00437     sortcl->sortop = sortop;
00438     sortcl->nulls_first = nulls_first;
00439     sortcl->hashable = false;   /* no need to make this accurate */
00440     parse->sortClause = list_make1(sortcl);
00441 
00442     /* set up expressions for LIMIT 1 */
00443     parse->limitOffset = NULL;
00444     parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
00445                                            sizeof(int64),
00446                                            Int64GetDatum(1), false,
00447                                            FLOAT8PASSBYVAL);
00448 
00449     /*
00450      * Generate the best paths for this query, telling query_planner that we
00451      * have LIMIT 1.
00452      */
00453     query_planner(subroot, parse->targetList, 1.0, 1.0,
00454                   minmax_qp_callback, NULL,
00455                   &cheapest_path, &sorted_path, &dNumGroups);
00456 
00457     /*
00458      * Fail if no presorted path.  However, if query_planner determines that
00459      * the presorted path is also the cheapest, it will set sorted_path to
00460      * NULL ... don't be fooled.  (This is kind of a pain here, but it
00461      * simplifies life for grouping_planner, so leave it be.)
00462      */
00463     if (!sorted_path)
00464     {
00465         if (cheapest_path &&
00466             pathkeys_contained_in(subroot->sort_pathkeys,
00467                                   cheapest_path->pathkeys))
00468             sorted_path = cheapest_path;
00469         else
00470             return false;
00471     }
00472 
00473     /*
00474      * Determine cost to get just the first row of the presorted path.
00475      *
00476      * Note: cost calculation here should match
00477      * compare_fractional_path_costs().
00478      */
00479     if (sorted_path->parent->rows > 1.0)
00480         path_fraction = 1.0 / sorted_path->parent->rows;
00481     else
00482         path_fraction = 1.0;
00483 
00484     path_cost = sorted_path->startup_cost +
00485         path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
00486 
00487     /* Save state for further processing */
00488     mminfo->subroot = subroot;
00489     mminfo->path = sorted_path;
00490     mminfo->pathcost = path_cost;
00491 
00492     return true;
00493 }
00494 
00495 /*
00496  * Compute query_pathkeys and other pathkeys during plan generation
00497  */
00498 static void
00499 minmax_qp_callback(PlannerInfo *root, void *extra)
00500 {
00501     root->group_pathkeys = NIL;
00502     root->window_pathkeys = NIL;
00503     root->distinct_pathkeys = NIL;
00504 
00505     root->sort_pathkeys =
00506         make_pathkeys_for_sortclauses(root,
00507                                       root->parse->sortClause,
00508                                       root->parse->targetList);
00509 
00510     root->query_pathkeys = root->sort_pathkeys;
00511 }
00512 
00513 /*
00514  * Construct a suitable plan for a converted aggregate query
00515  */
00516 static void
00517 make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *mminfo)
00518 {
00519     PlannerInfo *subroot = mminfo->subroot;
00520     Query      *subparse = subroot->parse;
00521     Plan       *plan;
00522 
00523     /*
00524      * Generate the plan for the subquery. We already have a Path, but we have
00525      * to convert it to a Plan and attach a LIMIT node above it.
00526      */
00527     plan = create_plan(subroot, mminfo->path);
00528 
00529     plan->targetlist = subparse->targetList;
00530 
00531     plan = (Plan *) make_limit(plan,
00532                                subparse->limitOffset,
00533                                subparse->limitCount,
00534                                0, 1);
00535 
00536     /*
00537      * Convert the plan into an InitPlan, and make a Param for its result.
00538      */
00539     mminfo->param =
00540         SS_make_initplan_from_plan(subroot, plan,
00541                                    exprType((Node *) mminfo->target),
00542                                    -1,
00543                                    exprCollation((Node *) mminfo->target));
00544 
00545     /*
00546      * Make sure the initplan gets into the outer PlannerInfo, along with any
00547      * other initplans generated by the sub-planning run.  We had to include
00548      * the outer PlannerInfo's pre-existing initplans into the inner one's
00549      * init_plans list earlier, so make sure we don't put back any duplicate
00550      * entries.
00551      */
00552     root->init_plans = list_concat_unique_ptr(root->init_plans,
00553                                               subroot->init_plans);
00554 }
00555 
00556 /*
00557  * Replace original aggregate calls with subplan output Params
00558  */
00559 static Node *
00560 replace_aggs_with_params_mutator(Node *node, PlannerInfo *root)
00561 {
00562     if (node == NULL)
00563         return NULL;
00564     if (IsA(node, Aggref))
00565     {
00566         Aggref     *aggref = (Aggref *) node;
00567         TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args);
00568         ListCell   *lc;
00569 
00570         foreach(lc, root->minmax_aggs)
00571         {
00572             MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
00573 
00574             if (mminfo->aggfnoid == aggref->aggfnoid &&
00575                 equal(mminfo->target, curTarget->expr))
00576                 return (Node *) mminfo->param;
00577         }
00578         elog(ERROR, "failed to re-find MinMaxAggInfo record");
00579     }
00580     Assert(!IsA(node, SubLink));
00581     return expression_tree_mutator(node, replace_aggs_with_params_mutator,
00582                                    (void *) root);
00583 }
00584 
00585 /*
00586  * Get the OID of the sort operator, if any, associated with an aggregate.
00587  * Returns InvalidOid if there is no such operator.
00588  */
00589 static Oid
00590 fetch_agg_sort_op(Oid aggfnoid)
00591 {
00592     HeapTuple   aggTuple;
00593     Form_pg_aggregate aggform;
00594     Oid         aggsortop;
00595 
00596     /* fetch aggregate entry from pg_aggregate */
00597     aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
00598     if (!HeapTupleIsValid(aggTuple))
00599         return InvalidOid;
00600     aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
00601     aggsortop = aggform->aggsortop;
00602     ReleaseSysCache(aggTuple);
00603 
00604     return aggsortop;
00605 }