00001 /*------------------------------------------------------------------------- 00002 * 00003 * analyzejoins.c 00004 * Routines for simplifying joins after initial query analysis 00005 * 00006 * While we do a great deal of join simplification in prep/prepjointree.c, 00007 * certain optimizations cannot be performed at that stage for lack of 00008 * detailed information about the query. The routines here are invoked 00009 * after initsplan.c has done its work, and can do additional join removal 00010 * and simplification steps based on the information extracted. The penalty 00011 * is that we have to work harder to clean up after ourselves when we modify 00012 * the query, since the derived data structures have to be updated too. 00013 * 00014 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00015 * Portions Copyright (c) 1994, Regents of the University of California 00016 * 00017 * 00018 * IDENTIFICATION 00019 * src/backend/optimizer/plan/analyzejoins.c 00020 * 00021 *------------------------------------------------------------------------- 00022 */ 00023 #include "postgres.h" 00024 00025 #include "optimizer/joininfo.h" 00026 #include "optimizer/pathnode.h" 00027 #include "optimizer/paths.h" 00028 #include "optimizer/planmain.h" 00029 #include "optimizer/var.h" 00030 00031 /* local functions */ 00032 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); 00033 static void remove_rel_from_query(PlannerInfo *root, int relid, 00034 Relids joinrelids); 00035 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); 00036 00037 00038 /* 00039 * remove_useless_joins 00040 * Check for relations that don't actually need to be joined at all, 00041 * and remove them from the query. 00042 * 00043 * We are passed the current joinlist and return the updated list. Other 00044 * data structures that have to be updated are accessible via "root". 00045 */ 00046 List * 00047 remove_useless_joins(PlannerInfo *root, List *joinlist) 00048 { 00049 ListCell *lc; 00050 00051 /* 00052 * We are only interested in relations that are left-joined to, so we can 00053 * scan the join_info_list to find them easily. 00054 */ 00055 restart: 00056 foreach(lc, root->join_info_list) 00057 { 00058 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); 00059 int innerrelid; 00060 int nremoved; 00061 00062 /* Skip if not removable */ 00063 if (!join_is_removable(root, sjinfo)) 00064 continue; 00065 00066 /* 00067 * Currently, join_is_removable can only succeed when the sjinfo's 00068 * righthand is a single baserel. Remove that rel from the query and 00069 * joinlist. 00070 */ 00071 innerrelid = bms_singleton_member(sjinfo->min_righthand); 00072 00073 remove_rel_from_query(root, innerrelid, 00074 bms_union(sjinfo->min_lefthand, 00075 sjinfo->min_righthand)); 00076 00077 /* We verify that exactly one reference gets removed from joinlist */ 00078 nremoved = 0; 00079 joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved); 00080 if (nremoved != 1) 00081 elog(ERROR, "failed to find relation %d in joinlist", innerrelid); 00082 00083 /* 00084 * We can delete this SpecialJoinInfo from the list too, since it's no 00085 * longer of interest. 00086 */ 00087 root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); 00088 00089 /* 00090 * Restart the scan. This is necessary to ensure we find all 00091 * removable joins independently of ordering of the join_info_list 00092 * (note that removal of attr_needed bits may make a join appear 00093 * removable that did not before). Also, since we just deleted the 00094 * current list cell, we'd have to have some kluge to continue the 00095 * list scan anyway. 00096 */ 00097 goto restart; 00098 } 00099 00100 return joinlist; 00101 } 00102 00103 /* 00104 * clause_sides_match_join 00105 * Determine whether a join clause is of the right form to use in this join. 00106 * 00107 * We already know that the clause is a binary opclause referencing only the 00108 * rels in the current join. The point here is to check whether it has the 00109 * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", 00110 * rather than mixing outer and inner vars on either side. If it matches, 00111 * we set the transient flag outer_is_left to identify which side is which. 00112 */ 00113 static inline bool 00114 clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, 00115 Relids innerrelids) 00116 { 00117 if (bms_is_subset(rinfo->left_relids, outerrelids) && 00118 bms_is_subset(rinfo->right_relids, innerrelids)) 00119 { 00120 /* lefthand side is outer */ 00121 rinfo->outer_is_left = true; 00122 return true; 00123 } 00124 else if (bms_is_subset(rinfo->left_relids, innerrelids) && 00125 bms_is_subset(rinfo->right_relids, outerrelids)) 00126 { 00127 /* righthand side is outer */ 00128 rinfo->outer_is_left = false; 00129 return true; 00130 } 00131 return false; /* no good for these input relations */ 00132 } 00133 00134 /* 00135 * join_is_removable 00136 * Check whether we need not perform this special join at all, because 00137 * it will just duplicate its left input. 00138 * 00139 * This is true for a left join for which the join condition cannot match 00140 * more than one inner-side row. (There are other possibly interesting 00141 * cases, but we don't have the infrastructure to prove them.) We also 00142 * have to check that the inner side doesn't generate any variables needed 00143 * above the join. 00144 */ 00145 static bool 00146 join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) 00147 { 00148 int innerrelid; 00149 RelOptInfo *innerrel; 00150 Relids joinrelids; 00151 List *clause_list = NIL; 00152 ListCell *l; 00153 int attroff; 00154 00155 /* 00156 * Currently, we only know how to remove left joins to a baserel with 00157 * unique indexes. We can check most of these criteria pretty trivially 00158 * to avoid doing useless extra work. But checking whether any of the 00159 * indexes are unique would require iterating over the indexlist, so for 00160 * now we just make sure there are indexes of some sort or other. If none 00161 * of them are unique, join removal will still fail, just slightly later. 00162 */ 00163 if (sjinfo->jointype != JOIN_LEFT || 00164 sjinfo->delay_upper_joins || 00165 bms_membership(sjinfo->min_righthand) != BMS_SINGLETON) 00166 return false; 00167 00168 innerrelid = bms_singleton_member(sjinfo->min_righthand); 00169 innerrel = find_base_rel(root, innerrelid); 00170 00171 if (innerrel->reloptkind != RELOPT_BASEREL || 00172 innerrel->rtekind != RTE_RELATION || 00173 innerrel->indexlist == NIL) 00174 return false; 00175 00176 /* Compute the relid set for the join we are considering */ 00177 joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); 00178 00179 /* 00180 * We can't remove the join if any inner-rel attributes are used above the 00181 * join. 00182 * 00183 * Note that this test only detects use of inner-rel attributes in higher 00184 * join conditions and the target list. There might be such attributes in 00185 * pushed-down conditions at this join, too. We check that case below. 00186 * 00187 * As a micro-optimization, it seems better to start with max_attr and 00188 * count down rather than starting with min_attr and counting up, on the 00189 * theory that the system attributes are somewhat less likely to be wanted 00190 * and should be tested last. 00191 */ 00192 for (attroff = innerrel->max_attr - innerrel->min_attr; 00193 attroff >= 0; 00194 attroff--) 00195 { 00196 if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids)) 00197 return false; 00198 } 00199 00200 /* 00201 * Similarly check that the inner rel isn't needed by any PlaceHolderVars 00202 * that will be used above the join. We only need to fail if such a PHV 00203 * actually references some inner-rel attributes; but the correct check 00204 * for that is relatively expensive, so we first check against ph_eval_at, 00205 * which must mention the inner rel if the PHV uses any inner-rel attrs. 00206 */ 00207 foreach(l, root->placeholder_list) 00208 { 00209 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); 00210 00211 if (bms_is_subset(phinfo->ph_needed, joinrelids)) 00212 continue; /* PHV is not used above the join */ 00213 if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids)) 00214 continue; /* it definitely doesn't reference innerrel */ 00215 if (bms_overlap(pull_varnos((Node *) phinfo->ph_var), 00216 innerrel->relids)) 00217 return false; /* it does reference innerrel */ 00218 } 00219 00220 /* 00221 * Search for mergejoinable clauses that constrain the inner rel against 00222 * either the outer rel or a pseudoconstant. If an operator is 00223 * mergejoinable then it behaves like equality for some btree opclass, so 00224 * it's what we want. The mergejoinability test also eliminates clauses 00225 * containing volatile functions, which we couldn't depend on. 00226 */ 00227 foreach(l, innerrel->joininfo) 00228 { 00229 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); 00230 00231 /* 00232 * If it's not a join clause for this outer join, we can't use it. 00233 * Note that if the clause is pushed-down, then it is logically from 00234 * above the outer join, even if it references no other rels (it might 00235 * be from WHERE, for example). 00236 */ 00237 if (restrictinfo->is_pushed_down || 00238 !bms_equal(restrictinfo->required_relids, joinrelids)) 00239 { 00240 /* 00241 * If such a clause actually references the inner rel then join 00242 * removal has to be disallowed. We have to check this despite 00243 * the previous attr_needed checks because of the possibility of 00244 * pushed-down clauses referencing the rel. 00245 */ 00246 if (bms_is_member(innerrelid, restrictinfo->clause_relids)) 00247 return false; 00248 continue; /* else, ignore; not useful here */ 00249 } 00250 00251 /* Ignore if it's not a mergejoinable clause */ 00252 if (!restrictinfo->can_join || 00253 restrictinfo->mergeopfamilies == NIL) 00254 continue; /* not mergejoinable */ 00255 00256 /* 00257 * Check if clause has the form "outer op inner" or "inner op outer". 00258 */ 00259 if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand, 00260 innerrel->relids)) 00261 continue; /* no good for these input relations */ 00262 00263 /* OK, add to list */ 00264 clause_list = lappend(clause_list, restrictinfo); 00265 } 00266 00267 /* 00268 * relation_has_unique_index_for automatically adds any usable restriction 00269 * clauses for the innerrel, so we needn't do that here. 00270 */ 00271 00272 /* Now examine the indexes to see if we have a matching unique index */ 00273 if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL)) 00274 return true; 00275 00276 /* 00277 * Some day it would be nice to check for other methods of establishing 00278 * distinctness. 00279 */ 00280 return false; 00281 } 00282 00283 00284 /* 00285 * Remove the target relid from the planner's data structures, having 00286 * determined that there is no need to include it in the query. 00287 * 00288 * We are not terribly thorough here. We must make sure that the rel is 00289 * no longer treated as a baserel, and that attributes of other baserels 00290 * are no longer marked as being needed at joins involving this rel. 00291 * Also, join quals involving the rel have to be removed from the joininfo 00292 * lists, but only if they belong to the outer join identified by joinrelids. 00293 */ 00294 static void 00295 remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) 00296 { 00297 RelOptInfo *rel = find_base_rel(root, relid); 00298 List *joininfos; 00299 Index rti; 00300 ListCell *l; 00301 ListCell *nextl; 00302 00303 /* 00304 * Mark the rel as "dead" to show it is no longer part of the join tree. 00305 * (Removing it from the baserel array altogether seems too risky.) 00306 */ 00307 rel->reloptkind = RELOPT_DEADREL; 00308 00309 /* 00310 * Remove references to the rel from other baserels' attr_needed arrays. 00311 */ 00312 for (rti = 1; rti < root->simple_rel_array_size; rti++) 00313 { 00314 RelOptInfo *otherrel = root->simple_rel_array[rti]; 00315 int attroff; 00316 00317 /* there may be empty slots corresponding to non-baserel RTEs */ 00318 if (otherrel == NULL) 00319 continue; 00320 00321 Assert(otherrel->relid == rti); /* sanity check on array */ 00322 00323 /* no point in processing target rel itself */ 00324 if (otherrel == rel) 00325 continue; 00326 00327 for (attroff = otherrel->max_attr - otherrel->min_attr; 00328 attroff >= 0; 00329 attroff--) 00330 { 00331 otherrel->attr_needed[attroff] = 00332 bms_del_member(otherrel->attr_needed[attroff], relid); 00333 } 00334 } 00335 00336 /* 00337 * Likewise remove references from SpecialJoinInfo data structures. 00338 * 00339 * This is relevant in case the outer join we're deleting is nested inside 00340 * other outer joins: the upper joins' relid sets have to be adjusted. The 00341 * RHS of the target outer join will be made empty here, but that's OK 00342 * since caller will delete that SpecialJoinInfo entirely. 00343 */ 00344 foreach(l, root->join_info_list) 00345 { 00346 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); 00347 00348 sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid); 00349 sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid); 00350 sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid); 00351 sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid); 00352 } 00353 00354 /* 00355 * Likewise remove references from LateralJoinInfo data structures. 00356 * 00357 * If we are deleting a LATERAL subquery, we can forget its 00358 * LateralJoinInfo altogether. Otherwise, make sure the target is not 00359 * included in any lateral_lhs set. (It probably can't be, since that 00360 * should have precluded deciding to remove it; but let's cope anyway.) 00361 */ 00362 for (l = list_head(root->lateral_info_list); l != NULL; l = nextl) 00363 { 00364 LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); 00365 00366 nextl = lnext(l); 00367 if (ljinfo->lateral_rhs == relid) 00368 root->lateral_info_list = list_delete_ptr(root->lateral_info_list, 00369 ljinfo); 00370 else 00371 ljinfo->lateral_lhs = bms_del_member(ljinfo->lateral_lhs, relid); 00372 } 00373 00374 /* 00375 * Likewise remove references from PlaceHolderVar data structures. 00376 * 00377 * Here we have a special case: if a PHV's eval_at set is just the target 00378 * relid, we want to leave it that way instead of reducing it to the empty 00379 * set. An empty eval_at set would confuse later processing since it 00380 * would match every possible eval placement. 00381 */ 00382 foreach(l, root->placeholder_list) 00383 { 00384 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); 00385 00386 phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid); 00387 if (bms_is_empty(phinfo->ph_eval_at)) /* oops, belay that */ 00388 phinfo->ph_eval_at = bms_add_member(phinfo->ph_eval_at, relid); 00389 00390 phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid); 00391 /* ph_may_need probably isn't used after this, but fix it anyway */ 00392 phinfo->ph_may_need = bms_del_member(phinfo->ph_may_need, relid); 00393 } 00394 00395 /* 00396 * Remove any joinquals referencing the rel from the joininfo lists. 00397 * 00398 * In some cases, a joinqual has to be put back after deleting its 00399 * reference to the target rel. This can occur for pseudoconstant and 00400 * outerjoin-delayed quals, which can get marked as requiring the rel in 00401 * order to force them to be evaluated at or above the join. We can't 00402 * just discard them, though. Only quals that logically belonged to the 00403 * outer join being discarded should be removed from the query. 00404 * 00405 * We must make a copy of the rel's old joininfo list before starting the 00406 * loop, because otherwise remove_join_clause_from_rels would destroy the 00407 * list while we're scanning it. 00408 */ 00409 joininfos = list_copy(rel->joininfo); 00410 foreach(l, joininfos) 00411 { 00412 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); 00413 00414 remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); 00415 00416 if (rinfo->is_pushed_down || 00417 !bms_equal(rinfo->required_relids, joinrelids)) 00418 { 00419 /* Recheck that qual doesn't actually reference the target rel */ 00420 Assert(!bms_is_member(relid, rinfo->clause_relids)); 00421 00422 /* 00423 * The required_relids probably aren't shared with anything else, 00424 * but let's copy them just to be sure. 00425 */ 00426 rinfo->required_relids = bms_copy(rinfo->required_relids); 00427 rinfo->required_relids = bms_del_member(rinfo->required_relids, 00428 relid); 00429 distribute_restrictinfo_to_rels(root, rinfo); 00430 } 00431 } 00432 } 00433 00434 /* 00435 * Remove any occurrences of the target relid from a joinlist structure. 00436 * 00437 * It's easiest to build a whole new list structure, so we handle it that 00438 * way. Efficiency is not a big deal here. 00439 * 00440 * *nremoved is incremented by the number of occurrences removed (there 00441 * should be exactly one, but the caller checks that). 00442 */ 00443 static List * 00444 remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) 00445 { 00446 List *result = NIL; 00447 ListCell *jl; 00448 00449 foreach(jl, joinlist) 00450 { 00451 Node *jlnode = (Node *) lfirst(jl); 00452 00453 if (IsA(jlnode, RangeTblRef)) 00454 { 00455 int varno = ((RangeTblRef *) jlnode)->rtindex; 00456 00457 if (varno == relid) 00458 (*nremoved)++; 00459 else 00460 result = lappend(result, jlnode); 00461 } 00462 else if (IsA(jlnode, List)) 00463 { 00464 /* Recurse to handle subproblem */ 00465 List *sublist; 00466 00467 sublist = remove_rel_from_joinlist((List *) jlnode, 00468 relid, nremoved); 00469 /* Avoid including empty sub-lists in the result */ 00470 if (sublist) 00471 result = lappend(result, sublist); 00472 } 00473 else 00474 { 00475 elog(ERROR, "unrecognized joinlist node type: %d", 00476 (int) nodeTag(jlnode)); 00477 } 00478 } 00479 00480 return result; 00481 }