Header And Logo

PostgreSQL
| The world's most advanced open source database.

geqo_eval.c

Go to the documentation of this file.
00001 /*------------------------------------------------------------------------
00002  *
00003  * geqo_eval.c
00004  *    Routines to evaluate query trees
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  * src/backend/optimizer/geqo/geqo_eval.c
00010  *
00011  *-------------------------------------------------------------------------
00012  */
00013 
00014 /* contributed by:
00015    =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
00016    *  Martin Utesch              * Institute of Automatic Control      *
00017    =                             = University of Mining and Technology =
00018    *  [email protected]  * Freiberg, Germany                   *
00019    =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
00020  */
00021 
00022 #include "postgres.h"
00023 
00024 #include <float.h>
00025 #include <limits.h>
00026 #include <math.h>
00027 
00028 #include "optimizer/geqo.h"
00029 #include "optimizer/joininfo.h"
00030 #include "optimizer/pathnode.h"
00031 #include "optimizer/paths.h"
00032 #include "utils/memutils.h"
00033 
00034 
00035 /* A "clump" of already-joined relations within gimme_tree */
00036 typedef struct
00037 {
00038     RelOptInfo *joinrel;        /* joinrel for the set of relations */
00039     int         size;           /* number of input relations in clump */
00040 } Clump;
00041 
00042 static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump,
00043             bool force);
00044 static bool desirable_join(PlannerInfo *root,
00045                RelOptInfo *outer_rel, RelOptInfo *inner_rel);
00046 
00047 
00048 /*
00049  * geqo_eval
00050  *
00051  * Returns cost of a query tree as an individual of the population.
00052  */
00053 Cost
00054 geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
00055 {
00056     MemoryContext mycontext;
00057     MemoryContext oldcxt;
00058     RelOptInfo *joinrel;
00059     Path       *best_path;
00060     Cost        fitness;
00061     int         savelength;
00062     struct HTAB *savehash;
00063 
00064     /*
00065      * Create a private memory context that will hold all temp storage
00066      * allocated inside gimme_tree().
00067      *
00068      * Since geqo_eval() will be called many times, we can't afford to let all
00069      * that memory go unreclaimed until end of statement.  Note we make the
00070      * temp context a child of the planner's normal context, so that it will
00071      * be freed even if we abort via ereport(ERROR).
00072      */
00073     mycontext = AllocSetContextCreate(CurrentMemoryContext,
00074                                       "GEQO",
00075                                       ALLOCSET_DEFAULT_MINSIZE,
00076                                       ALLOCSET_DEFAULT_INITSIZE,
00077                                       ALLOCSET_DEFAULT_MAXSIZE);
00078     oldcxt = MemoryContextSwitchTo(mycontext);
00079 
00080     /*
00081      * gimme_tree will add entries to root->join_rel_list, which may or may
00082      * not already contain some entries.  The newly added entries will be
00083      * recycled by the MemoryContextDelete below, so we must ensure that the
00084      * list is restored to its former state before exiting.  We can do this by
00085      * truncating the list to its original length.  NOTE this assumes that any
00086      * added entries are appended at the end!
00087      *
00088      * We also must take care not to mess up the outer join_rel_hash, if there
00089      * is one.  We can do this by just temporarily setting the link to NULL.
00090      * (If we are dealing with enough join rels, which we very likely are, a
00091      * new hash table will get built and used locally.)
00092      *
00093      * join_rel_level[] shouldn't be in use, so just Assert it isn't.
00094      */
00095     savelength = list_length(root->join_rel_list);
00096     savehash = root->join_rel_hash;
00097     Assert(root->join_rel_level == NULL);
00098 
00099     root->join_rel_hash = NULL;
00100 
00101     /* construct the best path for the given combination of relations */
00102     joinrel = gimme_tree(root, tour, num_gene);
00103     best_path = joinrel->cheapest_total_path;
00104 
00105     /*
00106      * compute fitness
00107      *
00108      * XXX geqo does not currently support optimization for partial result
00109      * retrieval, nor do we take any cognizance of possible use of
00110      * parameterized paths --- how to fix?
00111      */
00112     fitness = best_path->total_cost;
00113 
00114     /*
00115      * Restore join_rel_list to its former state, and put back original
00116      * hashtable if any.
00117      */
00118     root->join_rel_list = list_truncate(root->join_rel_list,
00119                                         savelength);
00120     root->join_rel_hash = savehash;
00121 
00122     /* release all the memory acquired within gimme_tree */
00123     MemoryContextSwitchTo(oldcxt);
00124     MemoryContextDelete(mycontext);
00125 
00126     return fitness;
00127 }
00128 
00129 /*
00130  * gimme_tree
00131  *    Form planner estimates for a join tree constructed in the specified
00132  *    order.
00133  *
00134  *   'tour' is the proposed join order, of length 'num_gene'
00135  *
00136  * Returns a new join relation whose cheapest path is the best plan for
00137  * this join order.
00138  *
00139  * The original implementation of this routine always joined in the specified
00140  * order, and so could only build left-sided plans (and right-sided and
00141  * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
00142  * It could never produce a "bushy" plan.  This had a couple of big problems,
00143  * of which the worst was that there are situations involving join order
00144  * restrictions where the only valid plans are bushy.
00145  *
00146  * The present implementation takes the given tour as a guideline, but
00147  * postpones joins that are illegal or seem unsuitable according to some
00148  * heuristic rules.  This allows correct bushy plans to be generated at need,
00149  * and as a nice side-effect it seems to materially improve the quality of the
00150  * generated plans.
00151  */
00152 RelOptInfo *
00153 gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
00154 {
00155     GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private;
00156     List       *clumps;
00157     int         rel_count;
00158 
00159     /*
00160      * Sometimes, a relation can't yet be joined to others due to heuristics
00161      * or actual semantic restrictions.  We maintain a list of "clumps" of
00162      * successfully joined relations, with larger clumps at the front. Each
00163      * new relation from the tour is added to the first clump it can be joined
00164      * to; if there is none then it becomes a new clump of its own. When we
00165      * enlarge an existing clump we check to see if it can now be merged with
00166      * any other clumps.  After the tour is all scanned, we forget about the
00167      * heuristics and try to forcibly join any remaining clumps.  Some forced
00168      * joins might still fail due to semantics, but we should always be able
00169      * to find some join order that works.
00170      */
00171     clumps = NIL;
00172 
00173     for (rel_count = 0; rel_count < num_gene; rel_count++)
00174     {
00175         int         cur_rel_index;
00176         RelOptInfo *cur_rel;
00177         Clump      *cur_clump;
00178 
00179         /* Get the next input relation */
00180         cur_rel_index = (int) tour[rel_count];
00181         cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
00182                                           cur_rel_index - 1);
00183 
00184         /* Make it into a single-rel clump */
00185         cur_clump = (Clump *) palloc(sizeof(Clump));
00186         cur_clump->joinrel = cur_rel;
00187         cur_clump->size = 1;
00188 
00189         /* Merge it into the clumps list, using only desirable joins */
00190         clumps = merge_clump(root, clumps, cur_clump, false);
00191     }
00192 
00193     if (list_length(clumps) > 1)
00194     {
00195         /* Force-join the remaining clumps in some legal order */
00196         List       *fclumps;
00197         ListCell   *lc;
00198 
00199         fclumps = NIL;
00200         foreach(lc, clumps)
00201         {
00202             Clump      *clump = (Clump *) lfirst(lc);
00203 
00204             fclumps = merge_clump(root, fclumps, clump, true);
00205         }
00206         clumps = fclumps;
00207     }
00208 
00209     /* Did we succeed in forming a single join relation? */
00210     if (list_length(clumps) != 1)
00211         elog(ERROR, "failed to join all relations together");
00212 
00213     return ((Clump *) linitial(clumps))->joinrel;
00214 }
00215 
00216 /*
00217  * Merge a "clump" into the list of existing clumps for gimme_tree.
00218  *
00219  * We try to merge the clump into some existing clump, and repeat if
00220  * successful.  When no more merging is possible, insert the clump
00221  * into the list, preserving the list ordering rule (namely, that
00222  * clumps of larger size appear earlier).
00223  *
00224  * If force is true, merge anywhere a join is legal, even if it causes
00225  * a cartesian join to be performed.  When force is false, do only
00226  * "desirable" joins.
00227  */
00228 static List *
00229 merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
00230 {
00231     ListCell   *prev;
00232     ListCell   *lc;
00233 
00234     /* Look for a clump that new_clump can join to */
00235     prev = NULL;
00236     foreach(lc, clumps)
00237     {
00238         Clump      *old_clump = (Clump *) lfirst(lc);
00239 
00240         if (force ||
00241             desirable_join(root, old_clump->joinrel, new_clump->joinrel))
00242         {
00243             RelOptInfo *joinrel;
00244 
00245             /*
00246              * Construct a RelOptInfo representing the join of these two input
00247              * relations.  Note that we expect the joinrel not to exist in
00248              * root->join_rel_list yet, and so the paths constructed for it
00249              * will only include the ones we want.
00250              */
00251             joinrel = make_join_rel(root,
00252                                     old_clump->joinrel,
00253                                     new_clump->joinrel);
00254 
00255             /* Keep searching if join order is not valid */
00256             if (joinrel)
00257             {
00258                 /* Find and save the cheapest paths for this joinrel */
00259                 set_cheapest(joinrel);
00260 
00261                 /* Absorb new clump into old */
00262                 old_clump->joinrel = joinrel;
00263                 old_clump->size += new_clump->size;
00264                 pfree(new_clump);
00265 
00266                 /* Remove old_clump from list */
00267                 clumps = list_delete_cell(clumps, lc, prev);
00268 
00269                 /*
00270                  * Recursively try to merge the enlarged old_clump with
00271                  * others.  When no further merge is possible, we'll reinsert
00272                  * it into the list.
00273                  */
00274                 return merge_clump(root, clumps, old_clump, force);
00275             }
00276         }
00277         prev = lc;
00278     }
00279 
00280     /*
00281      * No merging is possible, so add new_clump as an independent clump, in
00282      * proper order according to size.  We can be fast for the common case
00283      * where it has size 1 --- it should always go at the end.
00284      */
00285     if (clumps == NIL || new_clump->size == 1)
00286         return lappend(clumps, new_clump);
00287 
00288     /* Check if it belongs at the front */
00289     lc = list_head(clumps);
00290     if (new_clump->size > ((Clump *) lfirst(lc))->size)
00291         return lcons(new_clump, clumps);
00292 
00293     /* Else search for the place to insert it */
00294     for (;;)
00295     {
00296         ListCell   *nxt = lnext(lc);
00297 
00298         if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size)
00299             break;              /* it belongs after 'lc', before 'nxt' */
00300         lc = nxt;
00301     }
00302     lappend_cell(clumps, lc, new_clump);
00303 
00304     return clumps;
00305 }
00306 
00307 /*
00308  * Heuristics for gimme_tree: do we want to join these two relations?
00309  */
00310 static bool
00311 desirable_join(PlannerInfo *root,
00312                RelOptInfo *outer_rel, RelOptInfo *inner_rel)
00313 {
00314     /*
00315      * Join if there is an applicable join clause, or if there is a join order
00316      * restriction forcing these rels to be joined.
00317      */
00318     if (have_relevant_joinclause(root, outer_rel, inner_rel) ||
00319         have_join_order_restriction(root, outer_rel, inner_rel))
00320         return true;
00321 
00322     /* Otherwise postpone the join till later. */
00323     return false;
00324 }