Header And Logo

PostgreSQL
| The world's most advanced open source database.

orindxpath.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * orindxpath.c
00004  *    Routines to find index paths that match a set of OR clauses
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/optimizer/path/orindxpath.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 
00016 #include "postgres.h"
00017 
00018 #include "optimizer/cost.h"
00019 #include "optimizer/paths.h"
00020 #include "optimizer/restrictinfo.h"
00021 
00022 
00023 /*----------
00024  * create_or_index_quals
00025  *    Examine join OR-of-AND quals to see if any useful restriction OR
00026  *    clauses can be extracted.  If so, add them to the query.
00027  *
00028  * Although a join clause must reference other relations overall,
00029  * an OR of ANDs clause might contain sub-clauses that reference just this
00030  * relation and can be used to build a restriction clause.
00031  * For example consider
00032  *      WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
00033  * We can transform this into
00034  *      WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
00035  *          AND (a.x = 42 OR a.x = 44)
00036  *          AND (b.y = 43 OR b.z = 45);
00037  * which opens the potential to build OR indexscans on a and b.  In essence
00038  * this is a partial transformation to CNF (AND of ORs format).  It is not
00039  * complete, however, because we do not unravel the original OR --- doing so
00040  * would usually bloat the qualification expression to little gain.
00041  *
00042  * The added quals are partially redundant with the original OR, and therefore
00043  * will cause the size of the joinrel to be underestimated when it is finally
00044  * formed.  (This would be true of a full transformation to CNF as well; the
00045  * fault is not really in the transformation, but in clauselist_selectivity's
00046  * inability to recognize redundant conditions.)  To minimize the collateral
00047  * damage, we want to minimize the number of quals added.  Therefore we do
00048  * not add every possible extracted restriction condition to the query.
00049  * Instead, we search for the single restriction condition that generates
00050  * the most useful (cheapest) OR indexscan, and add only that condition.
00051  * This is a pretty ad-hoc heuristic, but quite useful.
00052  *
00053  * We can then compensate for the redundancy of the added qual by poking
00054  * the recorded selectivity of the original OR clause, thereby ensuring
00055  * the added qual doesn't change the estimated size of the joinrel when
00056  * it is finally formed.  This is a MAJOR HACK: it depends on the fact
00057  * that clause selectivities are cached and on the fact that the same
00058  * RestrictInfo node will appear in every joininfo list that might be used
00059  * when the joinrel is formed.  And it probably isn't right in cases where
00060  * the size estimation is nonlinear (i.e., outer and IN joins).  But it
00061  * beats not doing anything.
00062  *
00063  * NOTE: one might think this messiness could be worked around by generating
00064  * the indexscan path with a small path->rows value, and not touching the
00065  * rel's baserestrictinfo or rel->rows.  However, that does not work.
00066  * The optimizer's fundamental design assumes that every general-purpose
00067  * Path for a given relation generates the same number of rows.  Without
00068  * this assumption we'd not be able to optimize solely on the cost of Paths,
00069  * but would have to take number of output rows into account as well.
00070  * (The parameterized-paths stuff almost fixes this, but not quite...)
00071  *
00072  * 'rel' is the relation entry for which quals are to be created
00073  *
00074  * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
00075  * If no quals available, returns FALSE and doesn't change rel.
00076  *
00077  * Note: check_partial_indexes() must have been run previously.
00078  *----------
00079  */
00080 bool
00081 create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
00082 {
00083     BitmapOrPath *bestpath = NULL;
00084     RestrictInfo *bestrinfo = NULL;
00085     List       *newrinfos;
00086     RestrictInfo *or_rinfo;
00087     Selectivity or_selec,
00088                 orig_selec;
00089     ListCell   *i;
00090 
00091     /* Skip the whole mess if no indexes */
00092     if (rel->indexlist == NIL)
00093         return false;
00094 
00095     /*
00096      * Find potentially interesting OR joinclauses.  We can use any joinclause
00097      * that is considered safe to move to this rel by the parameterized-path
00098      * machinery, even though what we are going to do with it is not exactly a
00099      * parameterized path.
00100      */
00101     foreach(i, rel->joininfo)
00102     {
00103         RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
00104 
00105         if (restriction_is_or_clause(rinfo) &&
00106             join_clause_is_movable_to(rinfo, rel->relid))
00107         {
00108             /*
00109              * Use the generate_bitmap_or_paths() machinery to estimate the
00110              * value of each OR clause.  We can use regular restriction
00111              * clauses along with the OR clause contents to generate
00112              * indexquals.  We pass restriction_only = true so that any
00113              * sub-clauses that are actually joins will be ignored.
00114              */
00115             List       *orpaths;
00116             ListCell   *k;
00117 
00118             orpaths = generate_bitmap_or_paths(root, rel,
00119                                                list_make1(rinfo),
00120                                                rel->baserestrictinfo,
00121                                                true);
00122 
00123             /* Locate the cheapest OR path */
00124             foreach(k, orpaths)
00125             {
00126                 BitmapOrPath *path = (BitmapOrPath *) lfirst(k);
00127 
00128                 Assert(IsA(path, BitmapOrPath));
00129                 if (bestpath == NULL ||
00130                     path->path.total_cost < bestpath->path.total_cost)
00131                 {
00132                     bestpath = path;
00133                     bestrinfo = rinfo;
00134                 }
00135             }
00136         }
00137     }
00138 
00139     /* Fail if no suitable clauses found */
00140     if (bestpath == NULL)
00141         return false;
00142 
00143     /*
00144      * Convert the path's indexclauses structure to a RestrictInfo tree. We
00145      * include any partial-index predicates so as to get a reasonable
00146      * representation of what the path is actually scanning.
00147      */
00148     newrinfos = make_restrictinfo_from_bitmapqual((Path *) bestpath,
00149                                                   true, true);
00150 
00151     /* It's possible we get back something other than a single OR clause */
00152     if (list_length(newrinfos) != 1)
00153         return false;
00154     or_rinfo = (RestrictInfo *) linitial(newrinfos);
00155     Assert(IsA(or_rinfo, RestrictInfo));
00156     if (!restriction_is_or_clause(or_rinfo))
00157         return false;
00158 
00159     /*
00160      * OK, add it to the rel's restriction list.
00161      */
00162     rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos);
00163 
00164     /*
00165      * Adjust the original OR clause's cached selectivity to compensate for
00166      * the selectivity of the added (but redundant) lower-level qual. This
00167      * should result in the join rel getting approximately the same rows
00168      * estimate as it would have gotten without all these shenanigans. (XXX
00169      * major hack alert ... this depends on the assumption that the
00170      * selectivity will stay cached ...)
00171      */
00172     or_selec = clause_selectivity(root, (Node *) or_rinfo,
00173                                   0, JOIN_INNER, NULL);
00174     if (or_selec > 0 && or_selec < 1)
00175     {
00176         orig_selec = clause_selectivity(root, (Node *) bestrinfo,
00177                                         0, JOIN_INNER, NULL);
00178         bestrinfo->norm_selec = orig_selec / or_selec;
00179         /* clamp result to sane range */
00180         if (bestrinfo->norm_selec > 1)
00181             bestrinfo->norm_selec = 1;
00182         /* It isn't an outer join clause, so no need to adjust outer_selec */
00183     }
00184 
00185     /* Tell caller to recompute partial index status and rowcount estimate */
00186     return true;
00187 }