00001 /*------------------------------------------------------------------------ 00002 * 00003 * geqo_eval.c 00004 * Routines to evaluate query trees 00005 * 00006 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00007 * Portions Copyright (c) 1994, Regents of the University of California 00008 * 00009 * src/backend/optimizer/geqo/geqo_eval.c 00010 * 00011 *------------------------------------------------------------------------- 00012 */ 00013 00014 /* contributed by: 00015 =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= 00016 * Martin Utesch * Institute of Automatic Control * 00017 = = University of Mining and Technology = 00018 * [email protected] * Freiberg, Germany * 00019 =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= 00020 */ 00021 00022 #include "postgres.h" 00023 00024 #include <float.h> 00025 #include <limits.h> 00026 #include <math.h> 00027 00028 #include "optimizer/geqo.h" 00029 #include "optimizer/joininfo.h" 00030 #include "optimizer/pathnode.h" 00031 #include "optimizer/paths.h" 00032 #include "utils/memutils.h" 00033 00034 00035 /* A "clump" of already-joined relations within gimme_tree */ 00036 typedef struct 00037 { 00038 RelOptInfo *joinrel; /* joinrel for the set of relations */ 00039 int size; /* number of input relations in clump */ 00040 } Clump; 00041 00042 static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, 00043 bool force); 00044 static bool desirable_join(PlannerInfo *root, 00045 RelOptInfo *outer_rel, RelOptInfo *inner_rel); 00046 00047 00048 /* 00049 * geqo_eval 00050 * 00051 * Returns cost of a query tree as an individual of the population. 00052 */ 00053 Cost 00054 geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) 00055 { 00056 MemoryContext mycontext; 00057 MemoryContext oldcxt; 00058 RelOptInfo *joinrel; 00059 Path *best_path; 00060 Cost fitness; 00061 int savelength; 00062 struct HTAB *savehash; 00063 00064 /* 00065 * Create a private memory context that will hold all temp storage 00066 * allocated inside gimme_tree(). 00067 * 00068 * Since geqo_eval() will be called many times, we can't afford to let all 00069 * that memory go unreclaimed until end of statement. Note we make the 00070 * temp context a child of the planner's normal context, so that it will 00071 * be freed even if we abort via ereport(ERROR). 00072 */ 00073 mycontext = AllocSetContextCreate(CurrentMemoryContext, 00074 "GEQO", 00075 ALLOCSET_DEFAULT_MINSIZE, 00076 ALLOCSET_DEFAULT_INITSIZE, 00077 ALLOCSET_DEFAULT_MAXSIZE); 00078 oldcxt = MemoryContextSwitchTo(mycontext); 00079 00080 /* 00081 * gimme_tree will add entries to root->join_rel_list, which may or may 00082 * not already contain some entries. The newly added entries will be 00083 * recycled by the MemoryContextDelete below, so we must ensure that the 00084 * list is restored to its former state before exiting. We can do this by 00085 * truncating the list to its original length. NOTE this assumes that any 00086 * added entries are appended at the end! 00087 * 00088 * We also must take care not to mess up the outer join_rel_hash, if there 00089 * is one. We can do this by just temporarily setting the link to NULL. 00090 * (If we are dealing with enough join rels, which we very likely are, a 00091 * new hash table will get built and used locally.) 00092 * 00093 * join_rel_level[] shouldn't be in use, so just Assert it isn't. 00094 */ 00095 savelength = list_length(root->join_rel_list); 00096 savehash = root->join_rel_hash; 00097 Assert(root->join_rel_level == NULL); 00098 00099 root->join_rel_hash = NULL; 00100 00101 /* construct the best path for the given combination of relations */ 00102 joinrel = gimme_tree(root, tour, num_gene); 00103 best_path = joinrel->cheapest_total_path; 00104 00105 /* 00106 * compute fitness 00107 * 00108 * XXX geqo does not currently support optimization for partial result 00109 * retrieval, nor do we take any cognizance of possible use of 00110 * parameterized paths --- how to fix? 00111 */ 00112 fitness = best_path->total_cost; 00113 00114 /* 00115 * Restore join_rel_list to its former state, and put back original 00116 * hashtable if any. 00117 */ 00118 root->join_rel_list = list_truncate(root->join_rel_list, 00119 savelength); 00120 root->join_rel_hash = savehash; 00121 00122 /* release all the memory acquired within gimme_tree */ 00123 MemoryContextSwitchTo(oldcxt); 00124 MemoryContextDelete(mycontext); 00125 00126 return fitness; 00127 } 00128 00129 /* 00130 * gimme_tree 00131 * Form planner estimates for a join tree constructed in the specified 00132 * order. 00133 * 00134 * 'tour' is the proposed join order, of length 'num_gene' 00135 * 00136 * Returns a new join relation whose cheapest path is the best plan for 00137 * this join order. 00138 * 00139 * The original implementation of this routine always joined in the specified 00140 * order, and so could only build left-sided plans (and right-sided and 00141 * mixtures, as a byproduct of the fact that make_join_rel() is symmetric). 00142 * It could never produce a "bushy" plan. This had a couple of big problems, 00143 * of which the worst was that there are situations involving join order 00144 * restrictions where the only valid plans are bushy. 00145 * 00146 * The present implementation takes the given tour as a guideline, but 00147 * postpones joins that are illegal or seem unsuitable according to some 00148 * heuristic rules. This allows correct bushy plans to be generated at need, 00149 * and as a nice side-effect it seems to materially improve the quality of the 00150 * generated plans. 00151 */ 00152 RelOptInfo * 00153 gimme_tree(PlannerInfo *root, Gene *tour, int num_gene) 00154 { 00155 GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; 00156 List *clumps; 00157 int rel_count; 00158 00159 /* 00160 * Sometimes, a relation can't yet be joined to others due to heuristics 00161 * or actual semantic restrictions. We maintain a list of "clumps" of 00162 * successfully joined relations, with larger clumps at the front. Each 00163 * new relation from the tour is added to the first clump it can be joined 00164 * to; if there is none then it becomes a new clump of its own. When we 00165 * enlarge an existing clump we check to see if it can now be merged with 00166 * any other clumps. After the tour is all scanned, we forget about the 00167 * heuristics and try to forcibly join any remaining clumps. Some forced 00168 * joins might still fail due to semantics, but we should always be able 00169 * to find some join order that works. 00170 */ 00171 clumps = NIL; 00172 00173 for (rel_count = 0; rel_count < num_gene; rel_count++) 00174 { 00175 int cur_rel_index; 00176 RelOptInfo *cur_rel; 00177 Clump *cur_clump; 00178 00179 /* Get the next input relation */ 00180 cur_rel_index = (int) tour[rel_count]; 00181 cur_rel = (RelOptInfo *) list_nth(private->initial_rels, 00182 cur_rel_index - 1); 00183 00184 /* Make it into a single-rel clump */ 00185 cur_clump = (Clump *) palloc(sizeof(Clump)); 00186 cur_clump->joinrel = cur_rel; 00187 cur_clump->size = 1; 00188 00189 /* Merge it into the clumps list, using only desirable joins */ 00190 clumps = merge_clump(root, clumps, cur_clump, false); 00191 } 00192 00193 if (list_length(clumps) > 1) 00194 { 00195 /* Force-join the remaining clumps in some legal order */ 00196 List *fclumps; 00197 ListCell *lc; 00198 00199 fclumps = NIL; 00200 foreach(lc, clumps) 00201 { 00202 Clump *clump = (Clump *) lfirst(lc); 00203 00204 fclumps = merge_clump(root, fclumps, clump, true); 00205 } 00206 clumps = fclumps; 00207 } 00208 00209 /* Did we succeed in forming a single join relation? */ 00210 if (list_length(clumps) != 1) 00211 elog(ERROR, "failed to join all relations together"); 00212 00213 return ((Clump *) linitial(clumps))->joinrel; 00214 } 00215 00216 /* 00217 * Merge a "clump" into the list of existing clumps for gimme_tree. 00218 * 00219 * We try to merge the clump into some existing clump, and repeat if 00220 * successful. When no more merging is possible, insert the clump 00221 * into the list, preserving the list ordering rule (namely, that 00222 * clumps of larger size appear earlier). 00223 * 00224 * If force is true, merge anywhere a join is legal, even if it causes 00225 * a cartesian join to be performed. When force is false, do only 00226 * "desirable" joins. 00227 */ 00228 static List * 00229 merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force) 00230 { 00231 ListCell *prev; 00232 ListCell *lc; 00233 00234 /* Look for a clump that new_clump can join to */ 00235 prev = NULL; 00236 foreach(lc, clumps) 00237 { 00238 Clump *old_clump = (Clump *) lfirst(lc); 00239 00240 if (force || 00241 desirable_join(root, old_clump->joinrel, new_clump->joinrel)) 00242 { 00243 RelOptInfo *joinrel; 00244 00245 /* 00246 * Construct a RelOptInfo representing the join of these two input 00247 * relations. Note that we expect the joinrel not to exist in 00248 * root->join_rel_list yet, and so the paths constructed for it 00249 * will only include the ones we want. 00250 */ 00251 joinrel = make_join_rel(root, 00252 old_clump->joinrel, 00253 new_clump->joinrel); 00254 00255 /* Keep searching if join order is not valid */ 00256 if (joinrel) 00257 { 00258 /* Find and save the cheapest paths for this joinrel */ 00259 set_cheapest(joinrel); 00260 00261 /* Absorb new clump into old */ 00262 old_clump->joinrel = joinrel; 00263 old_clump->size += new_clump->size; 00264 pfree(new_clump); 00265 00266 /* Remove old_clump from list */ 00267 clumps = list_delete_cell(clumps, lc, prev); 00268 00269 /* 00270 * Recursively try to merge the enlarged old_clump with 00271 * others. When no further merge is possible, we'll reinsert 00272 * it into the list. 00273 */ 00274 return merge_clump(root, clumps, old_clump, force); 00275 } 00276 } 00277 prev = lc; 00278 } 00279 00280 /* 00281 * No merging is possible, so add new_clump as an independent clump, in 00282 * proper order according to size. We can be fast for the common case 00283 * where it has size 1 --- it should always go at the end. 00284 */ 00285 if (clumps == NIL || new_clump->size == 1) 00286 return lappend(clumps, new_clump); 00287 00288 /* Check if it belongs at the front */ 00289 lc = list_head(clumps); 00290 if (new_clump->size > ((Clump *) lfirst(lc))->size) 00291 return lcons(new_clump, clumps); 00292 00293 /* Else search for the place to insert it */ 00294 for (;;) 00295 { 00296 ListCell *nxt = lnext(lc); 00297 00298 if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size) 00299 break; /* it belongs after 'lc', before 'nxt' */ 00300 lc = nxt; 00301 } 00302 lappend_cell(clumps, lc, new_clump); 00303 00304 return clumps; 00305 } 00306 00307 /* 00308 * Heuristics for gimme_tree: do we want to join these two relations? 00309 */ 00310 static bool 00311 desirable_join(PlannerInfo *root, 00312 RelOptInfo *outer_rel, RelOptInfo *inner_rel) 00313 { 00314 /* 00315 * Join if there is an applicable join clause, or if there is a join order 00316 * restriction forcing these rels to be joined. 00317 */ 00318 if (have_relevant_joinclause(root, outer_rel, inner_rel) || 00319 have_join_order_restriction(root, outer_rel, inner_rel)) 00320 return true; 00321 00322 /* Otherwise postpone the join till later. */ 00323 return false; 00324 }