#include "postgres.h"
#include "catalog/pg_operator.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/plancat.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
Go to the source code of this file.
Data Structures | |
struct | RangeQueryClause |
Typedefs | |
typedef struct RangeQueryClause | RangeQueryClause |
Functions | |
static void | addRangeClause (RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2) |
Selectivity | clauselist_selectivity (PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo) |
static bool | bms_is_subset_singleton (const Bitmapset *s, int x) |
static bool | treat_as_join_clause (Node *clause, RestrictInfo *rinfo, int varRelid, SpecialJoinInfo *sjinfo) |
Selectivity | clause_selectivity (PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo) |
typedef struct RangeQueryClause RangeQueryClause |
static void addRangeClause | ( | RangeQueryClause ** | rqlist, | |
Node * | clause, | |||
bool | varonleft, | |||
bool | isLTsel, | |||
Selectivity | s2 | |||
) | [static] |
Definition at line 287 of file clausesel.c.
References equal(), get_leftop(), get_rightop(), RangeQueryClause::have_hibound, RangeQueryClause::have_lobound, RangeQueryClause::hibound, RangeQueryClause::lobound, RangeQueryClause::next, palloc(), and RangeQueryClause::var.
Referenced by clauselist_selectivity().
{ RangeQueryClause *rqelem; Node *var; bool is_lobound; if (varonleft) { var = get_leftop((Expr *) clause); is_lobound = !isLTsel; /* x < something is high bound */ } else { var = get_rightop((Expr *) clause); is_lobound = isLTsel; /* something < x is low bound */ } for (rqelem = *rqlist; rqelem; rqelem = rqelem->next) { /* * We use full equal() here because the "var" might be a function of * one or more attributes of the same relation... */ if (!equal(var, rqelem->var)) continue; /* Found the right group to put this clause in */ if (is_lobound) { if (!rqelem->have_lobound) { rqelem->have_lobound = true; rqelem->lobound = s2; } else { /*------ * We have found two similar clauses, such as * x < y AND x < z. * Keep only the more restrictive one. *------ */ if (rqelem->lobound > s2) rqelem->lobound = s2; } } else { if (!rqelem->have_hibound) { rqelem->have_hibound = true; rqelem->hibound = s2; } else { /*------ * We have found two similar clauses, such as * x > y AND x > z. * Keep only the more restrictive one. *------ */ if (rqelem->hibound > s2) rqelem->hibound = s2; } } return; } /* No matching var found, so make a new clause-pair data structure */ rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); rqelem->var = var; if (is_lobound) { rqelem->have_lobound = true; rqelem->have_hibound = false; rqelem->lobound = s2; } else { rqelem->have_lobound = false; rqelem->have_hibound = true; rqelem->hibound = s2; } rqelem->next = *rqlist; *rqlist = rqelem; }
Definition at line 385 of file clausesel.c.
References BMS_EMPTY_SET, bms_is_member(), bms_membership(), BMS_MULTIPLE, and BMS_SINGLETON.
Referenced by clause_selectivity().
{ switch (bms_membership(s)) { case BMS_EMPTY_SET: return true; case BMS_SINGLETON: return bms_is_member(x, s); case BMS_MULTIPLE: return false; } /* can't get here... */ return false; }
Selectivity clause_selectivity | ( | PlannerInfo * | root, | |
Node * | clause, | |||
int | varRelid, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo | |||
) |
Definition at line 484 of file clausesel.c.
References and_clause(), arg, OpExpr::args, bms_is_subset_singleton(), BooleanEqualOperator, booltestsel(), RestrictInfo::clause, RestrictInfo::clause_relids, clause_selectivity(), clauselist_selectivity(), Const::constisnull, Const::constvalue, CurrentOfExpr::cvarno, DatumGetBool, DEBUG4, elog, estimate_expression_value(), find_base_rel(), get_notclausearg(), OpExpr::inputcollid, InvalidOid, is_funcclause, is_opclause, IsA, JOIN_INNER, join_selectivity(), lfirst, list_make2, makeBoolConst(), RestrictInfo::norm_selec, not_clause(), NULL, nulltestsel(), OpExpr::opno, or_clause(), RestrictInfo::orclause, RestrictInfo::outer_selec, RestrictInfo::pseudoconstant, restriction_selectivity(), rowcomparesel(), s1, s2, scalararraysel(), treat_as_join_clause(), RelOptInfo::tuples, RangeQueryClause::var, Var::varlevelsup, and Var::varno.
Referenced by approx_tuple_count(), booltestsel(), clause_selectivity(), clauselist_selectivity(), and create_or_index_quals().
{ Selectivity s1 = 0.5; /* default for any unhandled clause type */ RestrictInfo *rinfo = NULL; bool cacheable = false; if (clause == NULL) /* can this still happen? */ return s1; if (IsA(clause, RestrictInfo)) { rinfo = (RestrictInfo *) clause; /* * If the clause is marked pseudoconstant, then it will be used as a * gating qual and should not affect selectivity estimates; hence * return 1.0. The only exception is that a constant FALSE may be * taken as having selectivity 0.0, since it will surely mean no rows * out of the plan. This case is simple enough that we need not * bother caching the result. */ if (rinfo->pseudoconstant) { if (!IsA(rinfo->clause, Const)) return (Selectivity) 1.0; } /* * If the clause is marked redundant, always return 1.0. */ if (rinfo->norm_selec > 1) return (Selectivity) 1.0; /* * If possible, cache the result of the selectivity calculation for * the clause. We can cache if varRelid is zero or the clause * contains only vars of that relid --- otherwise varRelid will affect * the result, so mustn't cache. Outer join quals might be examined * with either their join's actual jointype or JOIN_INNER, so we need * two cache variables to remember both cases. Note: we assume the * result won't change if we are switching the input relations or * considering a unique-ified case, so we only need one cache variable * for all non-JOIN_INNER cases. */ if (varRelid == 0 || bms_is_subset_singleton(rinfo->clause_relids, varRelid)) { /* Cacheable --- do we already have the result? */ if (jointype == JOIN_INNER) { if (rinfo->norm_selec >= 0) return rinfo->norm_selec; } else { if (rinfo->outer_selec >= 0) return rinfo->outer_selec; } cacheable = true; } /* * Proceed with examination of contained clause. If the clause is an * OR-clause, we want to look at the variant with sub-RestrictInfos, * so that per-subclause selectivities can be cached. */ if (rinfo->orclause) clause = (Node *) rinfo->orclause; else clause = (Node *) rinfo->clause; } if (IsA(clause, Var)) { Var *var = (Var *) clause; /* * We probably shouldn't ever see an uplevel Var here, but if we do, * return the default selectivity... */ if (var->varlevelsup == 0 && (varRelid == 0 || varRelid == (int) var->varno)) { /* * A Var at the top of a clause must be a bool Var. This is * equivalent to the clause reln.attribute = 't', so we compute * the selectivity as if that is what we have. */ s1 = restriction_selectivity(root, BooleanEqualOperator, list_make2(var, makeBoolConst(true, false)), InvalidOid, varRelid); } } else if (IsA(clause, Const)) { /* bool constant is pretty easy... */ Const *con = (Const *) clause; s1 = con->constisnull ? 0.0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0; } else if (IsA(clause, Param)) { /* see if we can replace the Param */ Node *subst = estimate_expression_value(root, clause); if (IsA(subst, Const)) { /* bool constant is pretty easy... */ Const *con = (Const *) subst; s1 = con->constisnull ? 0.0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0; } else { /* XXX any way to do better than default? */ } } else if (not_clause(clause)) { /* inverse of the selectivity of the underlying clause */ s1 = 1.0 - clause_selectivity(root, (Node *) get_notclausearg((Expr *) clause), varRelid, jointype, sjinfo); } else if (and_clause(clause)) { /* share code with clauselist_selectivity() */ s1 = clauselist_selectivity(root, ((BoolExpr *) clause)->args, varRelid, jointype, sjinfo); } else if (or_clause(clause)) { /* * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to * account for the probable overlap of selected tuple sets. * * XXX is this too conservative? */ ListCell *arg; s1 = 0.0; foreach(arg, ((BoolExpr *) clause)->args) { Selectivity s2 = clause_selectivity(root, (Node *) lfirst(arg), varRelid, jointype, sjinfo); s1 = s1 + s2 - s1 * s2; } } else if (is_opclause(clause) || IsA(clause, DistinctExpr)) { OpExpr *opclause = (OpExpr *) clause; Oid opno = opclause->opno; if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo)) { /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, opclause->args, opclause->inputcollid, jointype, sjinfo); } else { /* Estimate selectivity for a restriction clause. */ s1 = restriction_selectivity(root, opno, opclause->args, opclause->inputcollid, varRelid); } /* * DistinctExpr has the same representation as OpExpr, but the * contained operator is "=" not "<>", so we must negate the result. * This estimation method doesn't give the right behavior for nulls, * but it's better than doing nothing. */ if (IsA(clause, DistinctExpr)) s1 = 1.0 - s1; } else if (is_funcclause(clause)) { /* * This is not an operator, so we guess at the selectivity. THIS IS A * HACK TO GET V4 OUT THE DOOR. FUNCS SHOULD BE ABLE TO HAVE * SELECTIVITIES THEMSELVES. -- JMH 7/9/92 */ s1 = (Selectivity) 0.3333333; } #ifdef NOT_USED else if (IsA(clause, SubPlan) || IsA(clause, AlternativeSubPlan)) { /* * Just for the moment! FIX ME! - vadim 02/04/98 */ s1 = (Selectivity) 0.5; } #endif else if (IsA(clause, ScalarArrayOpExpr)) { /* Use node specific selectivity calculation function */ s1 = scalararraysel(root, (ScalarArrayOpExpr *) clause, treat_as_join_clause(clause, rinfo, varRelid, sjinfo), varRelid, jointype, sjinfo); } else if (IsA(clause, RowCompareExpr)) { /* Use node specific selectivity calculation function */ s1 = rowcomparesel(root, (RowCompareExpr *) clause, varRelid, jointype, sjinfo); } else if (IsA(clause, NullTest)) { /* Use node specific selectivity calculation function */ s1 = nulltestsel(root, ((NullTest *) clause)->nulltesttype, (Node *) ((NullTest *) clause)->arg, varRelid, jointype, sjinfo); } else if (IsA(clause, BooleanTest)) { /* Use node specific selectivity calculation function */ s1 = booltestsel(root, ((BooleanTest *) clause)->booltesttype, (Node *) ((BooleanTest *) clause)->arg, varRelid, jointype, sjinfo); } else if (IsA(clause, CurrentOfExpr)) { /* CURRENT OF selects at most one row of its table */ CurrentOfExpr *cexpr = (CurrentOfExpr *) clause; RelOptInfo *crel = find_base_rel(root, cexpr->cvarno); if (crel->tuples > 0) s1 = 1.0 / crel->tuples; } else if (IsA(clause, RelabelType)) { /* Not sure this case is needed, but it can't hurt */ s1 = clause_selectivity(root, (Node *) ((RelabelType *) clause)->arg, varRelid, jointype, sjinfo); } else if (IsA(clause, CoerceToDomain)) { /* Not sure this case is needed, but it can't hurt */ s1 = clause_selectivity(root, (Node *) ((CoerceToDomain *) clause)->arg, varRelid, jointype, sjinfo); } /* Cache the result if possible */ if (cacheable) { if (jointype == JOIN_INNER) rinfo->norm_selec = s1; else rinfo->outer_selec = s1; } #ifdef SELECTIVITY_DEBUG elog(DEBUG4, "clause_selectivity: s1 %f", s1); #endif /* SELECTIVITY_DEBUG */ return s1; }
Selectivity clauselist_selectivity | ( | PlannerInfo * | root, | |
List * | clauses, | |||
int | varRelid, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo | |||
) |
Definition at line 93 of file clausesel.c.
References addRangeClause(), OpExpr::args, bms_membership(), RestrictInfo::clause, RestrictInfo::clause_relids, clause_selectivity(), DEFAULT_INEQ_SEL, get_oprrest(), RangeQueryClause::have_hibound, RangeQueryClause::have_lobound, RangeQueryClause::hibound, IS_NULL, is_opclause, is_pseudo_constant_clause(), is_pseudo_constant_clause_relids(), IsA, RestrictInfo::left_relids, lfirst, linitial, list_length(), RangeQueryClause::lobound, lsecond, RangeQueryClause::next, NULL, nulltestsel(), NumRelids(), OpExpr::opno, pfree(), RestrictInfo::pseudoconstant, RestrictInfo::right_relids, s1, s2, and RangeQueryClause::var.
Referenced by btcostestimate(), calc_joinrel_size_estimate(), clause_selectivity(), compute_semi_anti_join_factors(), estimate_size(), genericcostestimate(), get_parameterized_baserel_size(), gincostestimate(), postgresGetForeignRelSize(), and set_baserel_size_estimates().
{ Selectivity s1 = 1.0; RangeQueryClause *rqlist = NULL; ListCell *l; /* * If there's exactly one clause, then no use in trying to match up pairs, * so just go directly to clause_selectivity(). */ if (list_length(clauses) == 1) return clause_selectivity(root, (Node *) linitial(clauses), varRelid, jointype, sjinfo); /* * Initial scan over clauses. Anything that doesn't look like a potential * rangequery clause gets multiplied into s1 and forgotten. Anything that * does gets inserted into an rqlist entry. */ foreach(l, clauses) { Node *clause = (Node *) lfirst(l); RestrictInfo *rinfo; Selectivity s2; /* Always compute the selectivity using clause_selectivity */ s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo); /* * Check for being passed a RestrictInfo. * * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or * 0.0; just use that rather than looking for range pairs. */ if (IsA(clause, RestrictInfo)) { rinfo = (RestrictInfo *) clause; if (rinfo->pseudoconstant) { s1 = s1 * s2; continue; } clause = (Node *) rinfo->clause; } else rinfo = NULL; /* * See if it looks like a restriction clause with a pseudoconstant on * one side. (Anything more complicated than that might not behave in * the simple way we are expecting.) Most of the tests here can be * done more efficiently with rinfo than without. */ if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) { OpExpr *expr = (OpExpr *) clause; bool varonleft = true; bool ok; if (rinfo) { ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) && (is_pseudo_constant_clause_relids(lsecond(expr->args), rinfo->right_relids) || (varonleft = false, is_pseudo_constant_clause_relids(linitial(expr->args), rinfo->left_relids))); } else { ok = (NumRelids(clause) == 1) && (is_pseudo_constant_clause(lsecond(expr->args)) || (varonleft = false, is_pseudo_constant_clause(linitial(expr->args)))); } if (ok) { /* * If it's not a "<" or ">" operator, just merge the * selectivity in generically. But if it's the right oprrest, * add the clause to rqlist for later processing. */ switch (get_oprrest(expr->opno)) { case F_SCALARLTSEL: addRangeClause(&rqlist, clause, varonleft, true, s2); break; case F_SCALARGTSEL: addRangeClause(&rqlist, clause, varonleft, false, s2); break; default: /* Just merge the selectivity in generically */ s1 = s1 * s2; break; } continue; /* drop to loop bottom */ } } /* Not the right form, so treat it generically. */ s1 = s1 * s2; } /* * Now scan the rangequery pair list. */ while (rqlist != NULL) { RangeQueryClause *rqnext; if (rqlist->have_lobound && rqlist->have_hibound) { /* Successfully matched a pair of range clauses */ Selectivity s2; /* * Exact equality to the default value probably means the * selectivity function punted. This is not airtight but should * be good enough. */ if (rqlist->hibound == DEFAULT_INEQ_SEL || rqlist->lobound == DEFAULT_INEQ_SEL) { s2 = DEFAULT_RANGE_INEQ_SEL; } else { s2 = rqlist->hibound + rqlist->lobound - 1.0; /* Adjust for double-exclusion of NULLs */ s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid, jointype, sjinfo); /* * A zero or slightly negative s2 should be converted into a * small positive value; we probably are dealing with a very * tight range and got a bogus result due to roundoff errors. * However, if s2 is very negative, then we probably have * default selectivity estimates on one or both sides of the * range that we failed to recognize above for some reason. */ if (s2 <= 0.0) { if (s2 < -0.01) { /* * No data available --- use a default estimate that * is small, but not real small. */ s2 = DEFAULT_RANGE_INEQ_SEL; } else { /* * It's just roundoff error; use a small positive * value */ s2 = 1.0e-10; } } } /* Merge in the selectivity of the pair of clauses */ s1 *= s2; } else { /* Only found one of a pair, merge it in generically */ if (rqlist->have_lobound) s1 *= rqlist->lobound; else s1 *= rqlist->hibound; } /* release storage and advance */ rqnext = rqlist->next; pfree(rqlist); rqlist = rqnext; } return s1; }
static bool treat_as_join_clause | ( | Node * | clause, | |
RestrictInfo * | rinfo, | |||
int | varRelid, | |||
SpecialJoinInfo * | sjinfo | |||
) | [inline, static] |
Definition at line 406 of file clausesel.c.
References bms_membership(), BMS_MULTIPLE, RestrictInfo::clause_relids, NULL, and NumRelids().
Referenced by clause_selectivity().
{ if (varRelid != 0) { /* * Caller is forcing restriction mode (eg, because we are examining an * inner indexscan qual). */ return false; } else if (sjinfo == NULL) { /* * It must be a restriction clause, since it's being evaluated at a * scan node. */ return false; } else { /* * Otherwise, it's a join if there's more than one relation used. We * can optimize this calculation if an rinfo was passed. * * XXX Since we know the clause is being evaluated at a join, the * only way it could be single-relation is if it was delayed by outer * joins. Although we can make use of the restriction qual estimators * anyway, it seems likely that we ought to account for the * probability of injected nulls somehow. */ if (rinfo) return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); else return (NumRelids(clause) > 1); } }