00001 /*------------------------------------------------------------------------- 00002 * 00003 * joinrels.c 00004 * Routines to determine which relations should be joined 00005 * 00006 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00007 * Portions Copyright (c) 1994, Regents of the University of California 00008 * 00009 * 00010 * IDENTIFICATION 00011 * src/backend/optimizer/path/joinrels.c 00012 * 00013 *------------------------------------------------------------------------- 00014 */ 00015 #include "postgres.h" 00016 00017 #include "optimizer/joininfo.h" 00018 #include "optimizer/pathnode.h" 00019 #include "optimizer/paths.h" 00020 #include "utils/memutils.h" 00021 00022 00023 static void make_rels_by_clause_joins(PlannerInfo *root, 00024 RelOptInfo *old_rel, 00025 ListCell *other_rels); 00026 static void make_rels_by_clauseless_joins(PlannerInfo *root, 00027 RelOptInfo *old_rel, 00028 ListCell *other_rels); 00029 static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel); 00030 static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel); 00031 static bool is_dummy_rel(RelOptInfo *rel); 00032 static void mark_dummy_rel(RelOptInfo *rel); 00033 static bool restriction_is_constant_false(List *restrictlist, 00034 bool only_pushed_down); 00035 00036 00037 /* 00038 * join_search_one_level 00039 * Consider ways to produce join relations containing exactly 'level' 00040 * jointree items. (This is one step of the dynamic-programming method 00041 * embodied in standard_join_search.) Join rel nodes for each feasible 00042 * combination of lower-level rels are created and returned in a list. 00043 * Implementation paths are created for each such joinrel, too. 00044 * 00045 * level: level of rels we want to make this time 00046 * root->join_rel_level[j], 1 <= j < level, is a list of rels containing j items 00047 * 00048 * The result is returned in root->join_rel_level[level]. 00049 */ 00050 void 00051 join_search_one_level(PlannerInfo *root, int level) 00052 { 00053 List **joinrels = root->join_rel_level; 00054 ListCell *r; 00055 int k; 00056 00057 Assert(joinrels[level] == NIL); 00058 00059 /* Set join_cur_level so that new joinrels are added to proper list */ 00060 root->join_cur_level = level; 00061 00062 /* 00063 * First, consider left-sided and right-sided plans, in which rels of 00064 * exactly level-1 member relations are joined against initial relations. 00065 * We prefer to join using join clauses, but if we find a rel of level-1 00066 * members that has no join clauses, we will generate Cartesian-product 00067 * joins against all initial rels not already contained in it. 00068 */ 00069 foreach(r, joinrels[level - 1]) 00070 { 00071 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); 00072 00073 if (old_rel->joininfo != NIL || old_rel->has_eclass_joins || 00074 has_join_restriction(root, old_rel)) 00075 { 00076 /* 00077 * There are join clauses or join order restrictions relevant to 00078 * this rel, so consider joins between this rel and (only) those 00079 * initial rels it is linked to by a clause or restriction. 00080 * 00081 * At level 2 this condition is symmetric, so there is no need to 00082 * look at initial rels before this one in the list; we already 00083 * considered such joins when we were at the earlier rel. (The 00084 * mirror-image joins are handled automatically by make_join_rel.) 00085 * In later passes (level > 2), we join rels of the previous level 00086 * to each initial rel they don't already include but have a join 00087 * clause or restriction with. 00088 */ 00089 ListCell *other_rels; 00090 00091 if (level == 2) /* consider remaining initial rels */ 00092 other_rels = lnext(r); 00093 else /* consider all initial rels */ 00094 other_rels = list_head(joinrels[1]); 00095 00096 make_rels_by_clause_joins(root, 00097 old_rel, 00098 other_rels); 00099 } 00100 else 00101 { 00102 /* 00103 * Oops, we have a relation that is not joined to any other 00104 * relation, either directly or by join-order restrictions. 00105 * Cartesian product time. 00106 * 00107 * We consider a cartesian product with each not-already-included 00108 * initial rel, whether it has other join clauses or not. At 00109 * level 2, if there are two or more clauseless initial rels, we 00110 * will redundantly consider joining them in both directions; but 00111 * such cases aren't common enough to justify adding complexity to 00112 * avoid the duplicated effort. 00113 */ 00114 make_rels_by_clauseless_joins(root, 00115 old_rel, 00116 list_head(joinrels[1])); 00117 } 00118 } 00119 00120 /* 00121 * Now, consider "bushy plans" in which relations of k initial rels are 00122 * joined to relations of level-k initial rels, for 2 <= k <= level-2. 00123 * 00124 * We only consider bushy-plan joins for pairs of rels where there is a 00125 * suitable join clause (or join order restriction), in order to avoid 00126 * unreasonable growth of planning time. 00127 */ 00128 for (k = 2;; k++) 00129 { 00130 int other_level = level - k; 00131 00132 /* 00133 * Since make_join_rel(x, y) handles both x,y and y,x cases, we only 00134 * need to go as far as the halfway point. 00135 */ 00136 if (k > other_level) 00137 break; 00138 00139 foreach(r, joinrels[k]) 00140 { 00141 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); 00142 ListCell *other_rels; 00143 ListCell *r2; 00144 00145 /* 00146 * We can ignore relations without join clauses here, unless they 00147 * participate in join-order restrictions --- then we might have 00148 * to force a bushy join plan. 00149 */ 00150 if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins && 00151 !has_join_restriction(root, old_rel)) 00152 continue; 00153 00154 if (k == other_level) 00155 other_rels = lnext(r); /* only consider remaining rels */ 00156 else 00157 other_rels = list_head(joinrels[other_level]); 00158 00159 for_each_cell(r2, other_rels) 00160 { 00161 RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2); 00162 00163 if (!bms_overlap(old_rel->relids, new_rel->relids)) 00164 { 00165 /* 00166 * OK, we can build a rel of the right level from this 00167 * pair of rels. Do so if there is at least one relevant 00168 * join clause or join order restriction. 00169 */ 00170 if (have_relevant_joinclause(root, old_rel, new_rel) || 00171 have_join_order_restriction(root, old_rel, new_rel)) 00172 { 00173 (void) make_join_rel(root, old_rel, new_rel); 00174 } 00175 } 00176 } 00177 } 00178 } 00179 00180 /*---------- 00181 * Last-ditch effort: if we failed to find any usable joins so far, force 00182 * a set of cartesian-product joins to be generated. This handles the 00183 * special case where all the available rels have join clauses but we 00184 * cannot use any of those clauses yet. This can only happen when we are 00185 * considering a join sub-problem (a sub-joinlist) and all the rels in the 00186 * sub-problem have only join clauses with rels outside the sub-problem. 00187 * An example is 00188 * 00189 * SELECT ... FROM a INNER JOIN b ON TRUE, c, d, ... 00190 * WHERE a.w = c.x and b.y = d.z; 00191 * 00192 * If the "a INNER JOIN b" sub-problem does not get flattened into the 00193 * upper level, we must be willing to make a cartesian join of a and b; 00194 * but the code above will not have done so, because it thought that both 00195 * a and b have joinclauses. We consider only left-sided and right-sided 00196 * cartesian joins in this case (no bushy). 00197 *---------- 00198 */ 00199 if (joinrels[level] == NIL) 00200 { 00201 /* 00202 * This loop is just like the first one, except we always call 00203 * make_rels_by_clauseless_joins(). 00204 */ 00205 foreach(r, joinrels[level - 1]) 00206 { 00207 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); 00208 00209 make_rels_by_clauseless_joins(root, 00210 old_rel, 00211 list_head(joinrels[1])); 00212 } 00213 00214 /*---------- 00215 * When special joins are involved, there may be no legal way 00216 * to make an N-way join for some values of N. For example consider 00217 * 00218 * SELECT ... FROM t1 WHERE 00219 * x IN (SELECT ... FROM t2,t3 WHERE ...) AND 00220 * y IN (SELECT ... FROM t4,t5 WHERE ...) 00221 * 00222 * We will flatten this query to a 5-way join problem, but there are 00223 * no 4-way joins that join_is_legal() will consider legal. We have 00224 * to accept failure at level 4 and go on to discover a workable 00225 * bushy plan at level 5. 00226 * 00227 * However, if there are no special joins and no lateral references 00228 * then join_is_legal() should never fail, and so the following sanity 00229 * check is useful. 00230 *---------- 00231 */ 00232 if (joinrels[level] == NIL && 00233 root->join_info_list == NIL && 00234 root->lateral_info_list == NIL) 00235 elog(ERROR, "failed to build any %d-way joins", level); 00236 } 00237 } 00238 00239 /* 00240 * make_rels_by_clause_joins 00241 * Build joins between the given relation 'old_rel' and other relations 00242 * that participate in join clauses that 'old_rel' also participates in 00243 * (or participate in join-order restrictions with it). 00244 * The join rels are returned in root->join_rel_level[join_cur_level]. 00245 * 00246 * Note: at levels above 2 we will generate the same joined relation in 00247 * multiple ways --- for example (a join b) join c is the same RelOptInfo as 00248 * (b join c) join a, though the second case will add a different set of Paths 00249 * to it. This is the reason for using the join_rel_level mechanism, which 00250 * automatically ensures that each new joinrel is only added to the list once. 00251 * 00252 * 'old_rel' is the relation entry for the relation to be joined 00253 * 'other_rels': the first cell in a linked list containing the other 00254 * rels to be considered for joining 00255 * 00256 * Currently, this is only used with initial rels in other_rels, but it 00257 * will work for joining to joinrels too. 00258 */ 00259 static void 00260 make_rels_by_clause_joins(PlannerInfo *root, 00261 RelOptInfo *old_rel, 00262 ListCell *other_rels) 00263 { 00264 ListCell *l; 00265 00266 for_each_cell(l, other_rels) 00267 { 00268 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l); 00269 00270 if (!bms_overlap(old_rel->relids, other_rel->relids) && 00271 (have_relevant_joinclause(root, old_rel, other_rel) || 00272 have_join_order_restriction(root, old_rel, other_rel))) 00273 { 00274 (void) make_join_rel(root, old_rel, other_rel); 00275 } 00276 } 00277 } 00278 00279 /* 00280 * make_rels_by_clauseless_joins 00281 * Given a relation 'old_rel' and a list of other relations 00282 * 'other_rels', create a join relation between 'old_rel' and each 00283 * member of 'other_rels' that isn't already included in 'old_rel'. 00284 * The join rels are returned in root->join_rel_level[join_cur_level]. 00285 * 00286 * 'old_rel' is the relation entry for the relation to be joined 00287 * 'other_rels': the first cell of a linked list containing the 00288 * other rels to be considered for joining 00289 * 00290 * Currently, this is only used with initial rels in other_rels, but it would 00291 * work for joining to joinrels too. 00292 */ 00293 static void 00294 make_rels_by_clauseless_joins(PlannerInfo *root, 00295 RelOptInfo *old_rel, 00296 ListCell *other_rels) 00297 { 00298 ListCell *l; 00299 00300 for_each_cell(l, other_rels) 00301 { 00302 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l); 00303 00304 if (!bms_overlap(other_rel->relids, old_rel->relids)) 00305 { 00306 (void) make_join_rel(root, old_rel, other_rel); 00307 } 00308 } 00309 } 00310 00311 00312 /* 00313 * join_is_legal 00314 * Determine whether a proposed join is legal given the query's 00315 * join order constraints; and if it is, determine the join type. 00316 * 00317 * Caller must supply not only the two rels, but the union of their relids. 00318 * (We could simplify the API by computing joinrelids locally, but this 00319 * would be redundant work in the normal path through make_join_rel.) 00320 * 00321 * On success, *sjinfo_p is set to NULL if this is to be a plain inner join, 00322 * else it's set to point to the associated SpecialJoinInfo node. Also, 00323 * *reversed_p is set TRUE if the given relations need to be swapped to 00324 * match the SpecialJoinInfo node. 00325 */ 00326 static bool 00327 join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, 00328 Relids joinrelids, 00329 SpecialJoinInfo **sjinfo_p, bool *reversed_p) 00330 { 00331 SpecialJoinInfo *match_sjinfo; 00332 bool reversed; 00333 bool unique_ified; 00334 bool is_valid_inner; 00335 bool lateral_fwd; 00336 bool lateral_rev; 00337 ListCell *l; 00338 00339 /* 00340 * Ensure output params are set on failure return. This is just to 00341 * suppress uninitialized-variable warnings from overly anal compilers. 00342 */ 00343 *sjinfo_p = NULL; 00344 *reversed_p = false; 00345 00346 /* 00347 * If we have any special joins, the proposed join might be illegal; and 00348 * in any case we have to determine its join type. Scan the join info 00349 * list for conflicts. 00350 */ 00351 match_sjinfo = NULL; 00352 reversed = false; 00353 unique_ified = false; 00354 is_valid_inner = true; 00355 00356 foreach(l, root->join_info_list) 00357 { 00358 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); 00359 00360 /* 00361 * This special join is not relevant unless its RHS overlaps the 00362 * proposed join. (Check this first as a fast path for dismissing 00363 * most irrelevant SJs quickly.) 00364 */ 00365 if (!bms_overlap(sjinfo->min_righthand, joinrelids)) 00366 continue; 00367 00368 /* 00369 * Also, not relevant if proposed join is fully contained within RHS 00370 * (ie, we're still building up the RHS). 00371 */ 00372 if (bms_is_subset(joinrelids, sjinfo->min_righthand)) 00373 continue; 00374 00375 /* 00376 * Also, not relevant if SJ is already done within either input. 00377 */ 00378 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && 00379 bms_is_subset(sjinfo->min_righthand, rel1->relids)) 00380 continue; 00381 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) && 00382 bms_is_subset(sjinfo->min_righthand, rel2->relids)) 00383 continue; 00384 00385 /* 00386 * If it's a semijoin and we already joined the RHS to any other rels 00387 * within either input, then we must have unique-ified the RHS at that 00388 * point (see below). Therefore the semijoin is no longer relevant in 00389 * this join path. 00390 */ 00391 if (sjinfo->jointype == JOIN_SEMI) 00392 { 00393 if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) && 00394 !bms_equal(sjinfo->syn_righthand, rel1->relids)) 00395 continue; 00396 if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) && 00397 !bms_equal(sjinfo->syn_righthand, rel2->relids)) 00398 continue; 00399 } 00400 00401 /* 00402 * If one input contains min_lefthand and the other contains 00403 * min_righthand, then we can perform the SJ at this join. 00404 * 00405 * Barf if we get matches to more than one SJ (is that possible?) 00406 */ 00407 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && 00408 bms_is_subset(sjinfo->min_righthand, rel2->relids)) 00409 { 00410 if (match_sjinfo) 00411 return false; /* invalid join path */ 00412 match_sjinfo = sjinfo; 00413 reversed = false; 00414 } 00415 else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) && 00416 bms_is_subset(sjinfo->min_righthand, rel1->relids)) 00417 { 00418 if (match_sjinfo) 00419 return false; /* invalid join path */ 00420 match_sjinfo = sjinfo; 00421 reversed = true; 00422 } 00423 else if (sjinfo->jointype == JOIN_SEMI && 00424 bms_equal(sjinfo->syn_righthand, rel2->relids) && 00425 create_unique_path(root, rel2, rel2->cheapest_total_path, 00426 sjinfo) != NULL) 00427 { 00428 /*---------- 00429 * For a semijoin, we can join the RHS to anything else by 00430 * unique-ifying the RHS (if the RHS can be unique-ified). 00431 * We will only get here if we have the full RHS but less 00432 * than min_lefthand on the LHS. 00433 * 00434 * The reason to consider such a join path is exemplified by 00435 * SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c) 00436 * If we insist on doing this as a semijoin we will first have 00437 * to form the cartesian product of A*B. But if we unique-ify 00438 * C then the semijoin becomes a plain innerjoin and we can join 00439 * in any order, eg C to A and then to B. When C is much smaller 00440 * than A and B this can be a huge win. So we allow C to be 00441 * joined to just A or just B here, and then make_join_rel has 00442 * to handle the case properly. 00443 * 00444 * Note that actually we'll allow unique-ified C to be joined to 00445 * some other relation D here, too. That is legal, if usually not 00446 * very sane, and this routine is only concerned with legality not 00447 * with whether the join is good strategy. 00448 *---------- 00449 */ 00450 if (match_sjinfo) 00451 return false; /* invalid join path */ 00452 match_sjinfo = sjinfo; 00453 reversed = false; 00454 unique_ified = true; 00455 } 00456 else if (sjinfo->jointype == JOIN_SEMI && 00457 bms_equal(sjinfo->syn_righthand, rel1->relids) && 00458 create_unique_path(root, rel1, rel1->cheapest_total_path, 00459 sjinfo) != NULL) 00460 { 00461 /* Reversed semijoin case */ 00462 if (match_sjinfo) 00463 return false; /* invalid join path */ 00464 match_sjinfo = sjinfo; 00465 reversed = true; 00466 unique_ified = true; 00467 } 00468 else 00469 { 00470 /*---------- 00471 * Otherwise, the proposed join overlaps the RHS but isn't 00472 * a valid implementation of this SJ. It might still be 00473 * a legal join, however. If both inputs overlap the RHS, 00474 * assume that it's OK. Since the inputs presumably got past 00475 * this function's checks previously, they can't overlap the 00476 * LHS and their violations of the RHS boundary must represent 00477 * SJs that have been determined to commute with this one. 00478 * We have to allow this to work correctly in cases like 00479 * (a LEFT JOIN (b JOIN (c LEFT JOIN d))) 00480 * when the c/d join has been determined to commute with the join 00481 * to a, and hence d is not part of min_righthand for the upper 00482 * join. It should be legal to join b to c/d but this will appear 00483 * as a violation of the upper join's RHS. 00484 * Furthermore, if one input overlaps the RHS and the other does 00485 * not, we should still allow the join if it is a valid 00486 * implementation of some other SJ. We have to allow this to 00487 * support the associative identity 00488 * (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab 00489 * since joining B directly to C violates the lower SJ's RHS. 00490 * We assume that make_outerjoininfo() set things up correctly 00491 * so that we'll only match to some SJ if the join is valid. 00492 * Set flag here to check at bottom of loop. 00493 *---------- 00494 */ 00495 if (sjinfo->jointype != JOIN_SEMI && 00496 bms_overlap(rel1->relids, sjinfo->min_righthand) && 00497 bms_overlap(rel2->relids, sjinfo->min_righthand)) 00498 { 00499 /* seems OK */ 00500 Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand)); 00501 } 00502 else 00503 is_valid_inner = false; 00504 } 00505 } 00506 00507 /* 00508 * Fail if violated some SJ's RHS and didn't match to another SJ. However, 00509 * "matching" to a semijoin we are implementing by unique-ification 00510 * doesn't count (think: it's really an inner join). 00511 */ 00512 if (!is_valid_inner && 00513 (match_sjinfo == NULL || unique_ified)) 00514 return false; /* invalid join path */ 00515 00516 /* 00517 * We also have to check for constraints imposed by LATERAL references. 00518 * The proposed rels could each contain lateral references to the other, 00519 * in which case the join is impossible. If there are lateral references 00520 * in just one direction, then the join has to be done with a nestloop 00521 * with the lateral referencer on the inside. If the join matches an SJ 00522 * that cannot be implemented by such a nestloop, the join is impossible. 00523 */ 00524 lateral_fwd = lateral_rev = false; 00525 foreach(l, root->lateral_info_list) 00526 { 00527 LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); 00528 00529 if (bms_is_member(ljinfo->lateral_rhs, rel2->relids) && 00530 bms_overlap(ljinfo->lateral_lhs, rel1->relids)) 00531 { 00532 /* has to be implemented as nestloop with rel1 on left */ 00533 if (lateral_rev) 00534 return false; /* have lateral refs in both directions */ 00535 lateral_fwd = true; 00536 if (!bms_is_subset(ljinfo->lateral_lhs, rel1->relids)) 00537 return false; /* rel1 can't compute the required parameter */ 00538 if (match_sjinfo && 00539 (reversed || match_sjinfo->jointype == JOIN_FULL)) 00540 return false; /* not implementable as nestloop */ 00541 } 00542 if (bms_is_member(ljinfo->lateral_rhs, rel1->relids) && 00543 bms_overlap(ljinfo->lateral_lhs, rel2->relids)) 00544 { 00545 /* has to be implemented as nestloop with rel2 on left */ 00546 if (lateral_fwd) 00547 return false; /* have lateral refs in both directions */ 00548 lateral_rev = true; 00549 if (!bms_is_subset(ljinfo->lateral_lhs, rel2->relids)) 00550 return false; /* rel2 can't compute the required parameter */ 00551 if (match_sjinfo && 00552 (!reversed || match_sjinfo->jointype == JOIN_FULL)) 00553 return false; /* not implementable as nestloop */ 00554 } 00555 } 00556 00557 /* Otherwise, it's a valid join */ 00558 *sjinfo_p = match_sjinfo; 00559 *reversed_p = reversed; 00560 return true; 00561 } 00562 00563 00564 /* 00565 * make_join_rel 00566 * Find or create a join RelOptInfo that represents the join of 00567 * the two given rels, and add to it path information for paths 00568 * created with the two rels as outer and inner rel. 00569 * (The join rel may already contain paths generated from other 00570 * pairs of rels that add up to the same set of base rels.) 00571 * 00572 * NB: will return NULL if attempted join is not valid. This can happen 00573 * when working with outer joins, or with IN or EXISTS clauses that have been 00574 * turned into joins. 00575 */ 00576 RelOptInfo * 00577 make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) 00578 { 00579 Relids joinrelids; 00580 SpecialJoinInfo *sjinfo; 00581 bool reversed; 00582 SpecialJoinInfo sjinfo_data; 00583 RelOptInfo *joinrel; 00584 List *restrictlist; 00585 00586 /* We should never try to join two overlapping sets of rels. */ 00587 Assert(!bms_overlap(rel1->relids, rel2->relids)); 00588 00589 /* Construct Relids set that identifies the joinrel. */ 00590 joinrelids = bms_union(rel1->relids, rel2->relids); 00591 00592 /* Check validity and determine join type. */ 00593 if (!join_is_legal(root, rel1, rel2, joinrelids, 00594 &sjinfo, &reversed)) 00595 { 00596 /* invalid join path */ 00597 bms_free(joinrelids); 00598 return NULL; 00599 } 00600 00601 /* Swap rels if needed to match the join info. */ 00602 if (reversed) 00603 { 00604 RelOptInfo *trel = rel1; 00605 00606 rel1 = rel2; 00607 rel2 = trel; 00608 } 00609 00610 /* 00611 * If it's a plain inner join, then we won't have found anything in 00612 * join_info_list. Make up a SpecialJoinInfo so that selectivity 00613 * estimation functions will know what's being joined. 00614 */ 00615 if (sjinfo == NULL) 00616 { 00617 sjinfo = &sjinfo_data; 00618 sjinfo->type = T_SpecialJoinInfo; 00619 sjinfo->min_lefthand = rel1->relids; 00620 sjinfo->min_righthand = rel2->relids; 00621 sjinfo->syn_lefthand = rel1->relids; 00622 sjinfo->syn_righthand = rel2->relids; 00623 sjinfo->jointype = JOIN_INNER; 00624 /* we don't bother trying to make the remaining fields valid */ 00625 sjinfo->lhs_strict = false; 00626 sjinfo->delay_upper_joins = false; 00627 sjinfo->join_quals = NIL; 00628 } 00629 00630 /* 00631 * Find or build the join RelOptInfo, and compute the restrictlist that 00632 * goes with this particular joining. 00633 */ 00634 joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo, 00635 &restrictlist); 00636 00637 /* 00638 * If we've already proven this join is empty, we needn't consider any 00639 * more paths for it. 00640 */ 00641 if (is_dummy_rel(joinrel)) 00642 { 00643 bms_free(joinrelids); 00644 return joinrel; 00645 } 00646 00647 /* 00648 * Consider paths using each rel as both outer and inner. Depending on 00649 * the join type, a provably empty outer or inner rel might mean the join 00650 * is provably empty too; in which case throw away any previously computed 00651 * paths and mark the join as dummy. (We do it this way since it's 00652 * conceivable that dummy-ness of a multi-element join might only be 00653 * noticeable for certain construction paths.) 00654 * 00655 * Also, a provably constant-false join restriction typically means that 00656 * we can skip evaluating one or both sides of the join. We do this by 00657 * marking the appropriate rel as dummy. For outer joins, a 00658 * constant-false restriction that is pushed down still means the whole 00659 * join is dummy, while a non-pushed-down one means that no inner rows 00660 * will join so we can treat the inner rel as dummy. 00661 * 00662 * We need only consider the jointypes that appear in join_info_list, plus 00663 * JOIN_INNER. 00664 */ 00665 switch (sjinfo->jointype) 00666 { 00667 case JOIN_INNER: 00668 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || 00669 restriction_is_constant_false(restrictlist, false)) 00670 { 00671 mark_dummy_rel(joinrel); 00672 break; 00673 } 00674 add_paths_to_joinrel(root, joinrel, rel1, rel2, 00675 JOIN_INNER, sjinfo, 00676 restrictlist); 00677 add_paths_to_joinrel(root, joinrel, rel2, rel1, 00678 JOIN_INNER, sjinfo, 00679 restrictlist); 00680 break; 00681 case JOIN_LEFT: 00682 if (is_dummy_rel(rel1) || 00683 restriction_is_constant_false(restrictlist, true)) 00684 { 00685 mark_dummy_rel(joinrel); 00686 break; 00687 } 00688 if (restriction_is_constant_false(restrictlist, false) && 00689 bms_is_subset(rel2->relids, sjinfo->syn_righthand)) 00690 mark_dummy_rel(rel2); 00691 add_paths_to_joinrel(root, joinrel, rel1, rel2, 00692 JOIN_LEFT, sjinfo, 00693 restrictlist); 00694 add_paths_to_joinrel(root, joinrel, rel2, rel1, 00695 JOIN_RIGHT, sjinfo, 00696 restrictlist); 00697 break; 00698 case JOIN_FULL: 00699 if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) || 00700 restriction_is_constant_false(restrictlist, true)) 00701 { 00702 mark_dummy_rel(joinrel); 00703 break; 00704 } 00705 add_paths_to_joinrel(root, joinrel, rel1, rel2, 00706 JOIN_FULL, sjinfo, 00707 restrictlist); 00708 add_paths_to_joinrel(root, joinrel, rel2, rel1, 00709 JOIN_FULL, sjinfo, 00710 restrictlist); 00711 00712 /* 00713 * If there are join quals that aren't mergeable or hashable, we 00714 * may not be able to build any valid plan. Complain here so that 00715 * we can give a somewhat-useful error message. (Since we have no 00716 * flexibility of planning for a full join, there's no chance of 00717 * succeeding later with another pair of input rels.) 00718 */ 00719 if (joinrel->pathlist == NIL) 00720 ereport(ERROR, 00721 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 00722 errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions"))); 00723 break; 00724 case JOIN_SEMI: 00725 00726 /* 00727 * We might have a normal semijoin, or a case where we don't have 00728 * enough rels to do the semijoin but can unique-ify the RHS and 00729 * then do an innerjoin (see comments in join_is_legal). In the 00730 * latter case we can't apply JOIN_SEMI joining. 00731 */ 00732 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && 00733 bms_is_subset(sjinfo->min_righthand, rel2->relids)) 00734 { 00735 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || 00736 restriction_is_constant_false(restrictlist, false)) 00737 { 00738 mark_dummy_rel(joinrel); 00739 break; 00740 } 00741 add_paths_to_joinrel(root, joinrel, rel1, rel2, 00742 JOIN_SEMI, sjinfo, 00743 restrictlist); 00744 } 00745 00746 /* 00747 * If we know how to unique-ify the RHS and one input rel is 00748 * exactly the RHS (not a superset) we can consider unique-ifying 00749 * it and then doing a regular join. (The create_unique_path 00750 * check here is probably redundant with what join_is_legal did, 00751 * but if so the check is cheap because it's cached. So test 00752 * anyway to be sure.) 00753 */ 00754 if (bms_equal(sjinfo->syn_righthand, rel2->relids) && 00755 create_unique_path(root, rel2, rel2->cheapest_total_path, 00756 sjinfo) != NULL) 00757 { 00758 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || 00759 restriction_is_constant_false(restrictlist, false)) 00760 { 00761 mark_dummy_rel(joinrel); 00762 break; 00763 } 00764 add_paths_to_joinrel(root, joinrel, rel1, rel2, 00765 JOIN_UNIQUE_INNER, sjinfo, 00766 restrictlist); 00767 add_paths_to_joinrel(root, joinrel, rel2, rel1, 00768 JOIN_UNIQUE_OUTER, sjinfo, 00769 restrictlist); 00770 } 00771 break; 00772 case JOIN_ANTI: 00773 if (is_dummy_rel(rel1) || 00774 restriction_is_constant_false(restrictlist, true)) 00775 { 00776 mark_dummy_rel(joinrel); 00777 break; 00778 } 00779 if (restriction_is_constant_false(restrictlist, false) && 00780 bms_is_subset(rel2->relids, sjinfo->syn_righthand)) 00781 mark_dummy_rel(rel2); 00782 add_paths_to_joinrel(root, joinrel, rel1, rel2, 00783 JOIN_ANTI, sjinfo, 00784 restrictlist); 00785 break; 00786 default: 00787 /* other values not expected here */ 00788 elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype); 00789 break; 00790 } 00791 00792 bms_free(joinrelids); 00793 00794 return joinrel; 00795 } 00796 00797 00798 /* 00799 * have_join_order_restriction 00800 * Detect whether the two relations should be joined to satisfy 00801 * a join-order restriction arising from special or lateral joins. 00802 * 00803 * In practice this is always used with have_relevant_joinclause(), and so 00804 * could be merged with that function, but it seems clearer to separate the 00805 * two concerns. We need this test because there are degenerate cases where 00806 * a clauseless join must be performed to satisfy join-order restrictions. 00807 * Also, if one rel has a lateral reference to the other, we should consider 00808 * joining them even if the join would be clauseless. 00809 * 00810 * Note: this is only a problem if one side of a degenerate outer join 00811 * contains multiple rels, or a clauseless join is required within an 00812 * IN/EXISTS RHS; else we will find a join path via the "last ditch" case in 00813 * join_search_one_level(). We could dispense with this test if we were 00814 * willing to try bushy plans in the "last ditch" case, but that seems much 00815 * less efficient. 00816 */ 00817 bool 00818 have_join_order_restriction(PlannerInfo *root, 00819 RelOptInfo *rel1, RelOptInfo *rel2) 00820 { 00821 bool result = false; 00822 ListCell *l; 00823 00824 /* 00825 * If either side has a lateral reference to the other, attempt the join 00826 * regardless of outer-join considerations. 00827 */ 00828 foreach(l, root->lateral_info_list) 00829 { 00830 LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); 00831 00832 if (bms_is_member(ljinfo->lateral_rhs, rel2->relids) && 00833 bms_overlap(ljinfo->lateral_lhs, rel1->relids)) 00834 return true; 00835 if (bms_is_member(ljinfo->lateral_rhs, rel1->relids) && 00836 bms_overlap(ljinfo->lateral_lhs, rel2->relids)) 00837 return true; 00838 } 00839 00840 /* 00841 * It's possible that the rels correspond to the left and right sides of a 00842 * degenerate outer join, that is, one with no joinclause mentioning the 00843 * non-nullable side; in which case we should force the join to occur. 00844 * 00845 * Also, the two rels could represent a clauseless join that has to be 00846 * completed to build up the LHS or RHS of an outer join. 00847 */ 00848 foreach(l, root->join_info_list) 00849 { 00850 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); 00851 00852 /* ignore full joins --- other mechanisms handle them */ 00853 if (sjinfo->jointype == JOIN_FULL) 00854 continue; 00855 00856 /* Can we perform the SJ with these rels? */ 00857 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && 00858 bms_is_subset(sjinfo->min_righthand, rel2->relids)) 00859 { 00860 result = true; 00861 break; 00862 } 00863 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) && 00864 bms_is_subset(sjinfo->min_righthand, rel1->relids)) 00865 { 00866 result = true; 00867 break; 00868 } 00869 00870 /* 00871 * Might we need to join these rels to complete the RHS? We have to 00872 * use "overlap" tests since either rel might include a lower SJ that 00873 * has been proven to commute with this one. 00874 */ 00875 if (bms_overlap(sjinfo->min_righthand, rel1->relids) && 00876 bms_overlap(sjinfo->min_righthand, rel2->relids)) 00877 { 00878 result = true; 00879 break; 00880 } 00881 00882 /* Likewise for the LHS. */ 00883 if (bms_overlap(sjinfo->min_lefthand, rel1->relids) && 00884 bms_overlap(sjinfo->min_lefthand, rel2->relids)) 00885 { 00886 result = true; 00887 break; 00888 } 00889 } 00890 00891 /* 00892 * We do not force the join to occur if either input rel can legally be 00893 * joined to anything else using joinclauses. This essentially means that 00894 * clauseless bushy joins are put off as long as possible. The reason is 00895 * that when there is a join order restriction high up in the join tree 00896 * (that is, with many rels inside the LHS or RHS), we would otherwise 00897 * expend lots of effort considering very stupid join combinations within 00898 * its LHS or RHS. 00899 */ 00900 if (result) 00901 { 00902 if (has_legal_joinclause(root, rel1) || 00903 has_legal_joinclause(root, rel2)) 00904 result = false; 00905 } 00906 00907 return result; 00908 } 00909 00910 00911 /* 00912 * has_join_restriction 00913 * Detect whether the specified relation has join-order restrictions, 00914 * due to being inside an outer join or an IN (sub-SELECT), 00915 * or participating in any LATERAL references. 00916 * 00917 * Essentially, this tests whether have_join_order_restriction() could 00918 * succeed with this rel and some other one. It's OK if we sometimes 00919 * say "true" incorrectly. (Therefore, we don't bother with the relatively 00920 * expensive has_legal_joinclause test.) 00921 */ 00922 static bool 00923 has_join_restriction(PlannerInfo *root, RelOptInfo *rel) 00924 { 00925 ListCell *l; 00926 00927 foreach(l, root->lateral_info_list) 00928 { 00929 LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); 00930 00931 if (bms_is_member(ljinfo->lateral_rhs, rel->relids) || 00932 bms_overlap(ljinfo->lateral_lhs, rel->relids)) 00933 return true; 00934 } 00935 00936 foreach(l, root->join_info_list) 00937 { 00938 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); 00939 00940 /* ignore full joins --- other mechanisms preserve their ordering */ 00941 if (sjinfo->jointype == JOIN_FULL) 00942 continue; 00943 00944 /* ignore if SJ is already contained in rel */ 00945 if (bms_is_subset(sjinfo->min_lefthand, rel->relids) && 00946 bms_is_subset(sjinfo->min_righthand, rel->relids)) 00947 continue; 00948 00949 /* restricted if it overlaps LHS or RHS, but doesn't contain SJ */ 00950 if (bms_overlap(sjinfo->min_lefthand, rel->relids) || 00951 bms_overlap(sjinfo->min_righthand, rel->relids)) 00952 return true; 00953 } 00954 00955 return false; 00956 } 00957 00958 00959 /* 00960 * has_legal_joinclause 00961 * Detect whether the specified relation can legally be joined 00962 * to any other rels using join clauses. 00963 * 00964 * We consider only joins to single other relations in the current 00965 * initial_rels list. This is sufficient to get a "true" result in most real 00966 * queries, and an occasional erroneous "false" will only cost a bit more 00967 * planning time. The reason for this limitation is that considering joins to 00968 * other joins would require proving that the other join rel can legally be 00969 * formed, which seems like too much trouble for something that's only a 00970 * heuristic to save planning time. (Note: we must look at initial_rels 00971 * and not all of the query, since when we are planning a sub-joinlist we 00972 * may be forced to make clauseless joins within initial_rels even though 00973 * there are join clauses linking to other parts of the query.) 00974 */ 00975 static bool 00976 has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel) 00977 { 00978 ListCell *lc; 00979 00980 foreach(lc, root->initial_rels) 00981 { 00982 RelOptInfo *rel2 = (RelOptInfo *) lfirst(lc); 00983 00984 /* ignore rels that are already in "rel" */ 00985 if (bms_overlap(rel->relids, rel2->relids)) 00986 continue; 00987 00988 if (have_relevant_joinclause(root, rel, rel2)) 00989 { 00990 Relids joinrelids; 00991 SpecialJoinInfo *sjinfo; 00992 bool reversed; 00993 00994 /* join_is_legal needs relids of the union */ 00995 joinrelids = bms_union(rel->relids, rel2->relids); 00996 00997 if (join_is_legal(root, rel, rel2, joinrelids, 00998 &sjinfo, &reversed)) 00999 { 01000 /* Yes, this will work */ 01001 bms_free(joinrelids); 01002 return true; 01003 } 01004 01005 bms_free(joinrelids); 01006 } 01007 } 01008 01009 return false; 01010 } 01011 01012 01013 /* 01014 * is_dummy_rel --- has relation been proven empty? 01015 */ 01016 static bool 01017 is_dummy_rel(RelOptInfo *rel) 01018 { 01019 return IS_DUMMY_REL(rel); 01020 } 01021 01022 /* 01023 * Mark a relation as proven empty. 01024 * 01025 * During GEQO planning, this can get invoked more than once on the same 01026 * baserel struct, so it's worth checking to see if the rel is already marked 01027 * dummy. 01028 * 01029 * Also, when called during GEQO join planning, we are in a short-lived 01030 * memory context. We must make sure that the dummy path attached to a 01031 * baserel survives the GEQO cycle, else the baserel is trashed for future 01032 * GEQO cycles. On the other hand, when we are marking a joinrel during GEQO, 01033 * we don't want the dummy path to clutter the main planning context. Upshot 01034 * is that the best solution is to explicitly make the dummy path in the same 01035 * context the given RelOptInfo is in. 01036 */ 01037 static void 01038 mark_dummy_rel(RelOptInfo *rel) 01039 { 01040 MemoryContext oldcontext; 01041 01042 /* Already marked? */ 01043 if (is_dummy_rel(rel)) 01044 return; 01045 01046 /* No, so choose correct context to make the dummy path in */ 01047 oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); 01048 01049 /* Set dummy size estimate */ 01050 rel->rows = 0; 01051 01052 /* Evict any previously chosen paths */ 01053 rel->pathlist = NIL; 01054 01055 /* Set up the dummy path */ 01056 add_path(rel, (Path *) create_append_path(rel, NIL, NULL)); 01057 01058 /* Set or update cheapest_total_path and related fields */ 01059 set_cheapest(rel); 01060 01061 MemoryContextSwitchTo(oldcontext); 01062 } 01063 01064 01065 /* 01066 * restriction_is_constant_false --- is a restrictlist just FALSE? 01067 * 01068 * In cases where a qual is provably constant FALSE, eval_const_expressions 01069 * will generally have thrown away anything that's ANDed with it. In outer 01070 * join situations this will leave us computing cartesian products only to 01071 * decide there's no match for an outer row, which is pretty stupid. So, 01072 * we need to detect the case. 01073 * 01074 * If only_pushed_down is TRUE, then consider only pushed-down quals. 01075 */ 01076 static bool 01077 restriction_is_constant_false(List *restrictlist, bool only_pushed_down) 01078 { 01079 ListCell *lc; 01080 01081 /* 01082 * Despite the above comment, the restriction list we see here might 01083 * possibly have other members besides the FALSE constant, since other 01084 * quals could get "pushed down" to the outer join level. So we check 01085 * each member of the list. 01086 */ 01087 foreach(lc, restrictlist) 01088 { 01089 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); 01090 01091 Assert(IsA(rinfo, RestrictInfo)); 01092 if (only_pushed_down && !rinfo->is_pushed_down) 01093 continue; 01094 01095 if (rinfo->clause && IsA(rinfo->clause, Const)) 01096 { 01097 Const *con = (Const *) rinfo->clause; 01098 01099 /* constant NULL is as good as constant FALSE for our purposes */ 01100 if (con->constisnull) 01101 return true; 01102 if (!DatumGetBool(con->constvalue)) 01103 return true; 01104 } 01105 } 01106 return false; 01107 }