00001 /*------------------------------------------------------------------------- 00002 * 00003 * orindxpath.c 00004 * Routines to find index paths that match a set of OR clauses 00005 * 00006 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00007 * Portions Copyright (c) 1994, Regents of the University of California 00008 * 00009 * 00010 * IDENTIFICATION 00011 * src/backend/optimizer/path/orindxpath.c 00012 * 00013 *------------------------------------------------------------------------- 00014 */ 00015 00016 #include "postgres.h" 00017 00018 #include "optimizer/cost.h" 00019 #include "optimizer/paths.h" 00020 #include "optimizer/restrictinfo.h" 00021 00022 00023 /*---------- 00024 * create_or_index_quals 00025 * Examine join OR-of-AND quals to see if any useful restriction OR 00026 * clauses can be extracted. If so, add them to the query. 00027 * 00028 * Although a join clause must reference other relations overall, 00029 * an OR of ANDs clause might contain sub-clauses that reference just this 00030 * relation and can be used to build a restriction clause. 00031 * For example consider 00032 * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); 00033 * We can transform this into 00034 * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) 00035 * AND (a.x = 42 OR a.x = 44) 00036 * AND (b.y = 43 OR b.z = 45); 00037 * which opens the potential to build OR indexscans on a and b. In essence 00038 * this is a partial transformation to CNF (AND of ORs format). It is not 00039 * complete, however, because we do not unravel the original OR --- doing so 00040 * would usually bloat the qualification expression to little gain. 00041 * 00042 * The added quals are partially redundant with the original OR, and therefore 00043 * will cause the size of the joinrel to be underestimated when it is finally 00044 * formed. (This would be true of a full transformation to CNF as well; the 00045 * fault is not really in the transformation, but in clauselist_selectivity's 00046 * inability to recognize redundant conditions.) To minimize the collateral 00047 * damage, we want to minimize the number of quals added. Therefore we do 00048 * not add every possible extracted restriction condition to the query. 00049 * Instead, we search for the single restriction condition that generates 00050 * the most useful (cheapest) OR indexscan, and add only that condition. 00051 * This is a pretty ad-hoc heuristic, but quite useful. 00052 * 00053 * We can then compensate for the redundancy of the added qual by poking 00054 * the recorded selectivity of the original OR clause, thereby ensuring 00055 * the added qual doesn't change the estimated size of the joinrel when 00056 * it is finally formed. This is a MAJOR HACK: it depends on the fact 00057 * that clause selectivities are cached and on the fact that the same 00058 * RestrictInfo node will appear in every joininfo list that might be used 00059 * when the joinrel is formed. And it probably isn't right in cases where 00060 * the size estimation is nonlinear (i.e., outer and IN joins). But it 00061 * beats not doing anything. 00062 * 00063 * NOTE: one might think this messiness could be worked around by generating 00064 * the indexscan path with a small path->rows value, and not touching the 00065 * rel's baserestrictinfo or rel->rows. However, that does not work. 00066 * The optimizer's fundamental design assumes that every general-purpose 00067 * Path for a given relation generates the same number of rows. Without 00068 * this assumption we'd not be able to optimize solely on the cost of Paths, 00069 * but would have to take number of output rows into account as well. 00070 * (The parameterized-paths stuff almost fixes this, but not quite...) 00071 * 00072 * 'rel' is the relation entry for which quals are to be created 00073 * 00074 * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE. 00075 * If no quals available, returns FALSE and doesn't change rel. 00076 * 00077 * Note: check_partial_indexes() must have been run previously. 00078 *---------- 00079 */ 00080 bool 00081 create_or_index_quals(PlannerInfo *root, RelOptInfo *rel) 00082 { 00083 BitmapOrPath *bestpath = NULL; 00084 RestrictInfo *bestrinfo = NULL; 00085 List *newrinfos; 00086 RestrictInfo *or_rinfo; 00087 Selectivity or_selec, 00088 orig_selec; 00089 ListCell *i; 00090 00091 /* Skip the whole mess if no indexes */ 00092 if (rel->indexlist == NIL) 00093 return false; 00094 00095 /* 00096 * Find potentially interesting OR joinclauses. We can use any joinclause 00097 * that is considered safe to move to this rel by the parameterized-path 00098 * machinery, even though what we are going to do with it is not exactly a 00099 * parameterized path. 00100 */ 00101 foreach(i, rel->joininfo) 00102 { 00103 RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); 00104 00105 if (restriction_is_or_clause(rinfo) && 00106 join_clause_is_movable_to(rinfo, rel->relid)) 00107 { 00108 /* 00109 * Use the generate_bitmap_or_paths() machinery to estimate the 00110 * value of each OR clause. We can use regular restriction 00111 * clauses along with the OR clause contents to generate 00112 * indexquals. We pass restriction_only = true so that any 00113 * sub-clauses that are actually joins will be ignored. 00114 */ 00115 List *orpaths; 00116 ListCell *k; 00117 00118 orpaths = generate_bitmap_or_paths(root, rel, 00119 list_make1(rinfo), 00120 rel->baserestrictinfo, 00121 true); 00122 00123 /* Locate the cheapest OR path */ 00124 foreach(k, orpaths) 00125 { 00126 BitmapOrPath *path = (BitmapOrPath *) lfirst(k); 00127 00128 Assert(IsA(path, BitmapOrPath)); 00129 if (bestpath == NULL || 00130 path->path.total_cost < bestpath->path.total_cost) 00131 { 00132 bestpath = path; 00133 bestrinfo = rinfo; 00134 } 00135 } 00136 } 00137 } 00138 00139 /* Fail if no suitable clauses found */ 00140 if (bestpath == NULL) 00141 return false; 00142 00143 /* 00144 * Convert the path's indexclauses structure to a RestrictInfo tree. We 00145 * include any partial-index predicates so as to get a reasonable 00146 * representation of what the path is actually scanning. 00147 */ 00148 newrinfos = make_restrictinfo_from_bitmapqual((Path *) bestpath, 00149 true, true); 00150 00151 /* It's possible we get back something other than a single OR clause */ 00152 if (list_length(newrinfos) != 1) 00153 return false; 00154 or_rinfo = (RestrictInfo *) linitial(newrinfos); 00155 Assert(IsA(or_rinfo, RestrictInfo)); 00156 if (!restriction_is_or_clause(or_rinfo)) 00157 return false; 00158 00159 /* 00160 * OK, add it to the rel's restriction list. 00161 */ 00162 rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos); 00163 00164 /* 00165 * Adjust the original OR clause's cached selectivity to compensate for 00166 * the selectivity of the added (but redundant) lower-level qual. This 00167 * should result in the join rel getting approximately the same rows 00168 * estimate as it would have gotten without all these shenanigans. (XXX 00169 * major hack alert ... this depends on the assumption that the 00170 * selectivity will stay cached ...) 00171 */ 00172 or_selec = clause_selectivity(root, (Node *) or_rinfo, 00173 0, JOIN_INNER, NULL); 00174 if (or_selec > 0 && or_selec < 1) 00175 { 00176 orig_selec = clause_selectivity(root, (Node *) bestrinfo, 00177 0, JOIN_INNER, NULL); 00178 bestrinfo->norm_selec = orig_selec / or_selec; 00179 /* clamp result to sane range */ 00180 if (bestrinfo->norm_selec > 1) 00181 bestrinfo->norm_selec = 1; 00182 /* It isn't an outer join clause, so no need to adjust outer_selec */ 00183 } 00184 00185 /* Tell caller to recompute partial index status and rowcount estimate */ 00186 return true; 00187 }