#include "fmgr.h"
#include "access/htup.h"
#include "nodes/relation.h"
Go to the source code of this file.
#define CLAMP_PROBABILITY | ( | p | ) |
do { \ if (p < 0.0) \ p = 0.0; \ else if (p > 1.0) \ p = 1.0; \ } while (0)
Definition at line 57 of file selfuncs.h.
Referenced by arraycontsel(), booltestsel(), calc_rangesel(), eqjoinsel(), eqjoinsel_inner(), eqjoinsel_semi(), ineq_histogram_selectivity(), ltreeparentsel(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), mergejoinscansel(), nulltestsel(), patternsel(), rangesel(), regex_selectivity(), scalararraysel(), scalararraysel_containment(), scalarineqsel(), tsmatchsel(), tsquery_opr_selec(), var_eq_const(), and var_eq_non_const().
#define DEFAULT_EQ_SEL 0.005 |
Definition at line 34 of file selfuncs.h.
Referenced by eqsel().
#define DEFAULT_INEQ_SEL 0.3333333333333333 |
Definition at line 37 of file selfuncs.h.
Referenced by clauselist_selectivity(), mergejoinscansel(), scalargtjoinsel(), scalargtsel(), scalarltjoinsel(), and scalarltsel().
#define DEFAULT_MATCH_SEL 0.005 |
Definition at line 43 of file selfuncs.h.
Referenced by patternjoinsel().
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL) |
Definition at line 50 of file selfuncs.h.
#define DEFAULT_NUM_DISTINCT 200 |
Definition at line 46 of file selfuncs.h.
Referenced by get_variable_numdistinct().
#define DEFAULT_RANGE_INEQ_SEL 0.005 |
Definition at line 40 of file selfuncs.h.
#define DEFAULT_UNK_SEL 0.005 |
Definition at line 49 of file selfuncs.h.
#define ReleaseVariableStats | ( | vardata | ) |
do { \ if (HeapTupleIsValid((vardata).statsTuple)) \ (* (vardata).freefunc) ((vardata).statsTuple); \ } while(0)
Definition at line 80 of file selfuncs.h.
Referenced by arraycontsel(), booltestsel(), btcostestimate(), eqjoinsel(), eqsel(), estimate_hash_bucketsize(), estimate_num_groups(), get_restriction_variable(), ltreeparentsel(), mergejoinscansel(), nulltestsel(), patternsel(), rangesel(), scalararraysel_containment(), scalargtsel(), scalarltsel(), and tsmatchsel().
typedef bool(* get_index_stats_hook_type)(PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata) |
Definition at line 104 of file selfuncs.h.
typedef bool(* get_relation_stats_hook_type)(PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata) |
Definition at line 99 of file selfuncs.h.
typedef struct VariableStatData VariableStatData |
Definition at line 93 of file selfuncs.h.
{ Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact } Pattern_Prefix_Status;
enum Pattern_Type |
Definition at line 87 of file selfuncs.h.
{ Pattern_Type_Like, Pattern_Type_Like_IC, Pattern_Type_Regex, Pattern_Type_Regex_IC } Pattern_Type;
Datum arraycontjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 329 of file array_selfuncs.c.
References DEFAULT_SEL, and PG_RETURN_FLOAT8.
{ /* For the moment this is just a stub */ Oid operator = PG_GETARG_OID(1); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); }
Datum arraycontsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 249 of file array_selfuncs.c.
References calc_arraycontsel(), CLAMP_PROBABILITY, DEFAULT_SEL, get_base_element_type(), get_restriction_variable(), InvalidOid, IsA, OID_ARRAY_CONTAINED_OP, OID_ARRAY_CONTAINS_OP, PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, and VariableStatData::vartype.
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Selectivity selec; Oid element_typeid; /* * If expression is not (variable op something) or (something op * variable), then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); } /* * The "&&", "@>" and "<@" operators are strict, so we can cope with a * NULL constant right away. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } /* * If var is on the right, commute the operator, so that we can assume the * var is on the left in what follows. */ if (!varonleft) { if (operator == OID_ARRAY_CONTAINS_OP) operator = OID_ARRAY_CONTAINED_OP; else if (operator == OID_ARRAY_CONTAINED_OP) operator = OID_ARRAY_CONTAINS_OP; } /* * OK, there's a Var and a Const we're dealing with here. We need the * Const to be a array with same element type as column, else we can't do * anything useful. (Such cases will likely fail at runtime, but here * we'd rather just return a default estimate.) */ element_typeid = get_base_element_type(((Const *) other)->consttype); if (element_typeid != InvalidOid && element_typeid == get_base_element_type(vardata.vartype)) { selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue, element_typeid, operator); } else { selec = DEFAULT_SEL(operator); } ReleaseVariableStats(vardata); CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
Selectivity booltestsel | ( | PlannerInfo * | root, | |
BoolTestType | booltesttype, | |||
Node * | arg, | |||
int | varRelid, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo | |||
) |
Definition at line 1446 of file selfuncs.c.
References VariableStatData::atttype, VariableStatData::atttypmod, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool, elog, ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, NULL, ReleaseVariableStats, STATISTIC_KIND_MCV, VariableStatData::statsTuple, and values.
Referenced by clause_selectivity().
{ VariableStatData vardata; double selec; examine_variable(root, arg, varRelid, &vardata); if (HeapTupleIsValid(vardata.statsTuple)) { Form_pg_statistic stats; double freq_null; Datum *values; int nvalues; float4 *numbers; int nnumbers; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); freq_null = stats->stanullfrac; if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &values, &nvalues, &numbers, &nnumbers) && nnumbers > 0) { double freq_true; double freq_false; /* * Get first MCV frequency and derive frequency for true. */ if (DatumGetBool(values[0])) freq_true = numbers[0]; else freq_true = 1.0 - numbers[0] - freq_null; /* * Next derive frequency for false. Then use these as appropriate * to derive frequency for each case. */ freq_false = 1.0 - freq_true - freq_null; switch (booltesttype) { case IS_UNKNOWN: /* select only NULL values */ selec = freq_null; break; case IS_NOT_UNKNOWN: /* select non-NULL values */ selec = 1.0 - freq_null; break; case IS_TRUE: /* select only TRUE values */ selec = freq_true; break; case IS_NOT_TRUE: /* select non-TRUE values */ selec = 1.0 - freq_true; break; case IS_FALSE: /* select only FALSE values */ selec = freq_false; break; case IS_NOT_FALSE: /* select non-FALSE values */ selec = 1.0 - freq_false; break; default: elog(ERROR, "unrecognized booltesttype: %d", (int) booltesttype); selec = 0.0; /* Keep compiler quiet */ break; } free_attstatsslot(vardata.atttype, values, nvalues, numbers, nnumbers); } else { /* * No most-common-value info available. Still have null fraction * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust * for null fraction and assume an even split for boolean tests. */ switch (booltesttype) { case IS_UNKNOWN: /* * Use freq_null directly. */ selec = freq_null; break; case IS_NOT_UNKNOWN: /* * Select not unknown (not null) values. Calculate from * freq_null. */ selec = 1.0 - freq_null; break; case IS_TRUE: case IS_NOT_TRUE: case IS_FALSE: case IS_NOT_FALSE: selec = (1.0 - freq_null) / 2.0; break; default: elog(ERROR, "unrecognized booltesttype: %d", (int) booltesttype); selec = 0.0; /* Keep compiler quiet */ break; } } } else { /* * If we can't get variable statistics for the argument, perhaps * clause_selectivity can do something with it. We ignore the * possibility of a NULL value when using clause_selectivity, and just * assume the value is either TRUE or FALSE. */ switch (booltesttype) { case IS_UNKNOWN: selec = DEFAULT_UNK_SEL; break; case IS_NOT_UNKNOWN: selec = DEFAULT_NOT_UNK_SEL; break; case IS_TRUE: case IS_NOT_FALSE: selec = (double) clause_selectivity(root, arg, varRelid, jointype, sjinfo); break; case IS_FALSE: case IS_NOT_TRUE: selec = 1.0 - (double) clause_selectivity(root, arg, varRelid, jointype, sjinfo); break; default: elog(ERROR, "unrecognized booltesttype: %d", (int) booltesttype); selec = 0.0; /* Keep compiler quiet */ break; } } ReleaseVariableStats(vardata); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); return (Selectivity) selec; }
Datum btcostestimate | ( | PG_FUNCTION_ARGS | ) |
Definition at line 6194 of file selfuncs.c.
References add_predicate_to_quals(), NullTest::arg, ScalarArrayOpExpr::args, Assert, BoolGetDatum, BTEqualStrategyNumber, BTLessStrategyNumber, RestrictInfo::clause, clauselist_selectivity(), cpu_operator_cost, elog, ERROR, estimate_array_length(), forboth, free_attstatsslot(), VariableStatData::freefunc, genericcostestimate(), get_attstatsslot(), get_commutator(), get_index_stats_hook, get_leftop(), get_op_opfamily_strategy(), get_opfamily_member(), get_relation_stats_hook, get_rightop(), HeapTupleIsValid, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexOptInfo::indexkeys, IndexOptInfo::indexoid, IndexPath::indexqualcols, IndexPath::indexquals, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, RangeTblEntry::inh, Int16GetDatum, InvalidOid, IS_NULL, IsA, JOIN_INNER, lappend(), RowCompareExpr::largs, lfirst, lfirst_int, linitial, linitial_oid, lsecond, match_index_to_operand(), MemSet, IndexOptInfo::ncolumns, nodeTag, NULL, NullTest::nulltesttype, GenericCosts::num_sa_scans, GenericCosts::numIndexTuples, ObjectIdGetDatum, OidIsValid, IndexOptInfo::opcintype, IndexOptInfo::opfamily, ScalarArrayOpExpr::opno, RowCompareExpr::opnos, PG_GETARG_FLOAT8, PG_GETARG_POINTER, PG_RETURN_VOID, planner_rt_fetch, RowCompareExpr::rargs, IndexOptInfo::rel, ReleaseVariableStats, RangeTblEntry::relid, RelOptInfo::relid, IndexOptInfo::reverse_sort, rint(), RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3, STATISTIC_KIND_CORRELATION, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::tree_height, IndexOptInfo::tuples, RelOptInfo::tuples, and IndexOptInfo::unique.
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1); double loop_count = PG_GETARG_FLOAT8(2); Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3); Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4); Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5); double *indexCorrelation = (double *) PG_GETARG_POINTER(6); IndexOptInfo *index = path->indexinfo; GenericCosts costs; Oid relid; AttrNumber colnum; VariableStatData vardata; double numIndexTuples; Cost descentCost; List *indexBoundQuals; int indexcol; bool eqQualHere; bool found_saop; bool found_is_null_op; double num_sa_scans; ListCell *lcc, *lci; /* * For a btree scan, only leading '=' quals plus inequality quals for the * immediately next attribute contribute to index selectivity (these are * the "boundary quals" that determine the starting and stopping points of * the index scan). Additional quals can suppress visits to the heap, so * it's OK to count them in indexSelectivity, but they should not count * for estimating numIndexTuples. So we must examine the given indexquals * to find out which ones count as boundary quals. We rely on the * knowledge that they are given in index column order. * * For a RowCompareExpr, we consider only the first column, just as * rowcomparesel() does. * * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N * index scans not one, but the ScalarArrayOpExpr's operator can be * considered to act the same as it normally does. */ indexBoundQuals = NIL; indexcol = 0; eqQualHere = false; found_saop = false; found_is_null_op = false; num_sa_scans = 1; forboth(lcc, path->indexquals, lci, path->indexqualcols) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcc); Expr *clause; Node *leftop, *rightop PG_USED_FOR_ASSERTS_ONLY; Oid clause_op; int op_strategy; bool is_null_op = false; if (indexcol != lfirst_int(lci)) { /* Beginning of a new column's quals */ if (!eqQualHere) break; /* done if no '=' qual for indexcol */ eqQualHere = false; indexcol++; if (indexcol != lfirst_int(lci)) break; /* no quals at all for indexcol */ } Assert(IsA(rinfo, RestrictInfo)); clause = rinfo->clause; if (IsA(clause, OpExpr)) { leftop = get_leftop(clause); rightop = get_rightop(clause); clause_op = ((OpExpr *) clause)->opno; } else if (IsA(clause, RowCompareExpr)) { RowCompareExpr *rc = (RowCompareExpr *) clause; leftop = (Node *) linitial(rc->largs); rightop = (Node *) linitial(rc->rargs); clause_op = linitial_oid(rc->opnos); } else if (IsA(clause, ScalarArrayOpExpr)) { ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; leftop = (Node *) linitial(saop->args); rightop = (Node *) lsecond(saop->args); clause_op = saop->opno; found_saop = true; } else if (IsA(clause, NullTest)) { NullTest *nt = (NullTest *) clause; leftop = (Node *) nt->arg; rightop = NULL; clause_op = InvalidOid; if (nt->nulltesttype == IS_NULL) { found_is_null_op = true; is_null_op = true; } } else { elog(ERROR, "unsupported indexqual type: %d", (int) nodeTag(clause)); continue; /* keep compiler quiet */ } if (match_index_to_operand(leftop, indexcol, index)) { /* clause_op is correct */ } else { Assert(match_index_to_operand(rightop, indexcol, index)); /* Must flip operator to get the opfamily member */ clause_op = get_commutator(clause_op); } /* check for equality operator */ if (OidIsValid(clause_op)) { op_strategy = get_op_opfamily_strategy(clause_op, index->opfamily[indexcol]); Assert(op_strategy != 0); /* not a member of opfamily?? */ if (op_strategy == BTEqualStrategyNumber) eqQualHere = true; } else if (is_null_op) { /* IS NULL is like = for purposes of selectivity determination */ eqQualHere = true; } /* count up number of SA scans induced by indexBoundQuals only */ if (IsA(clause, ScalarArrayOpExpr)) { ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; int alength = estimate_array_length(lsecond(saop->args)); if (alength > 1) num_sa_scans *= alength; } indexBoundQuals = lappend(indexBoundQuals, rinfo); } /* * If index is unique and we found an '=' clause for each column, we can * just assume numIndexTuples = 1 and skip the expensive * clauselist_selectivity calculations. However, a ScalarArrayOp or * NullTest invalidates that theory, even though it sets eqQualHere. */ if (index->unique && indexcol == index->ncolumns - 1 && eqQualHere && !found_saop && !found_is_null_op) numIndexTuples = 1.0; else { List *selectivityQuals; Selectivity btreeSelectivity; /* * If the index is partial, AND the index predicate with the * index-bound quals to produce a more accurate idea of the number of * rows covered by the bound conditions. */ selectivityQuals = add_predicate_to_quals(index, indexBoundQuals); btreeSelectivity = clauselist_selectivity(root, selectivityQuals, index->rel->relid, JOIN_INNER, NULL); numIndexTuples = btreeSelectivity * index->rel->tuples; /* * As in genericcostestimate(), we have to adjust for any * ScalarArrayOpExpr quals included in indexBoundQuals, and then round * to integer. */ numIndexTuples = rint(numIndexTuples / num_sa_scans); } /* * Now do generic index cost estimation. */ MemSet(&costs, 0, sizeof(costs)); costs.numIndexTuples = numIndexTuples; genericcostestimate(root, path, loop_count, &costs); /* * Add a CPU-cost component to represent the costs of initial btree * descent. We don't charge any I/O cost for touching upper btree levels, * since they tend to stay in cache, but we still have to do about log2(N) * comparisons to descend a btree of N leaf tuples. We charge one * cpu_operator_cost per comparison. * * If there are ScalarArrayOpExprs, charge this once per SA scan. The * ones after the first one are not startup cost so far as the overall * plan is concerned, so add them only to "total" cost. */ if (index->tuples > 1) /* avoid computing log(0) */ { descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost; costs.indexStartupCost += descentCost; costs.indexTotalCost += costs.num_sa_scans * descentCost; } /* * Even though we're not charging I/O cost for touching upper btree pages, * it's still reasonable to charge some CPU cost per page descended * through. Moreover, if we had no such charge at all, bloated indexes * would appear to have the same search cost as unbloated ones, at least * in cases where only a single leaf page is expected to be visited. This * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page * touched. The number of such pages is btree tree height plus one (ie, * we charge for the leaf page too). As above, charge once per SA scan. */ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost; costs.indexStartupCost += descentCost; costs.indexTotalCost += costs.num_sa_scans * descentCost; /* * If we can get an estimate of the first column's ordering correlation C * from pg_statistic, estimate the index correlation as C for a * single-column index, or C * 0.75 for multiple columns. (The idea here * is that multiple columns dilute the importance of the first column's * ordering, but don't negate it entirely. Before 8.0 we divided the * correlation by the number of columns, but that seems too strong.) */ MemSet(&vardata, 0, sizeof(vardata)); if (index->indexkeys[0] != 0) { /* Simple variable --- look to stats for the underlying table */ RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); Assert(rte->rtekind == RTE_RELATION); relid = rte->relid; Assert(relid != InvalidOid); colnum = index->indexkeys[0]; if (get_relation_stats_hook && (*get_relation_stats_hook) (root, rte, colnum, &vardata)) { /* * The hook took control of acquiring a stats tuple. If it did * supply a tuple, it'd better have supplied a freefunc. */ if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc) elog(ERROR, "no function provided to release variable stats with"); } else { vardata.statsTuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(relid), Int16GetDatum(colnum), BoolGetDatum(rte->inh)); vardata.freefunc = ReleaseSysCache; } } else { /* Expression --- maybe there are stats for the index itself */ relid = index->indexoid; colnum = 1; if (get_index_stats_hook && (*get_index_stats_hook) (root, relid, colnum, &vardata)) { /* * The hook took control of acquiring a stats tuple. If it did * supply a tuple, it'd better have supplied a freefunc. */ if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc) elog(ERROR, "no function provided to release variable stats with"); } else { vardata.statsTuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(relid), Int16GetDatum(colnum), BoolGetDatum(false)); vardata.freefunc = ReleaseSysCache; } } if (HeapTupleIsValid(vardata.statsTuple)) { Oid sortop; float4 *numbers; int nnumbers; sortop = get_opfamily_member(index->opfamily[0], index->opcintype[0], index->opcintype[0], BTLessStrategyNumber); if (OidIsValid(sortop) && get_attstatsslot(vardata.statsTuple, InvalidOid, 0, STATISTIC_KIND_CORRELATION, sortop, NULL, NULL, NULL, &numbers, &nnumbers)) { double varCorrelation; Assert(nnumbers == 1); varCorrelation = numbers[0]; if (index->reverse_sort[0]) varCorrelation = -varCorrelation; if (index->ncolumns > 1) costs.indexCorrelation = varCorrelation * 0.75; else costs.indexCorrelation = varCorrelation; free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers); } } ReleaseVariableStats(vardata); *indexStartupCost = costs.indexStartupCost; *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; PG_RETURN_VOID(); }
Datum eqjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2133 of file selfuncs.c.
References CLAMP_PROBABILITY, elog, eqjoinsel_inner(), eqjoinsel_semi(), ERROR, find_join_input_rel(), get_commutator(), get_join_variables(), JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_SEMI, SpecialJoinInfo::jointype, SpecialJoinInfo::min_righthand, PG_GETARG_INT16, PG_GETARG_POINTER, PG_RETURN_FLOAT8, and ReleaseVariableStats.
Referenced by neqjoinsel().
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); #ifdef NOT_USED JoinType jointype = (JoinType) PG_GETARG_INT16(3); #endif SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); double selec; VariableStatData vardata1; VariableStatData vardata2; bool join_is_reversed; RelOptInfo *inner_rel; get_join_variables(root, args, sjinfo, &vardata1, &vardata2, &join_is_reversed); switch (sjinfo->jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_FULL: selec = eqjoinsel_inner(operator, &vardata1, &vardata2); break; case JOIN_SEMI: case JOIN_ANTI: /* * Look up the join's inner relation. min_righthand is sufficient * information because neither SEMI nor ANTI joins permit any * reassociation into or out of their RHS, so the righthand will * always be exactly that set of rels. */ inner_rel = find_join_input_rel(root, sjinfo->min_righthand); if (!join_is_reversed) selec = eqjoinsel_semi(operator, &vardata1, &vardata2, inner_rel); else selec = eqjoinsel_semi(get_commutator(operator), &vardata2, &vardata1, inner_rel); break; default: /* other values not expected here */ elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype); selec = 0; /* keep compiler quiet */ break; } ReleaseVariableStats(vardata1); ReleaseVariableStats(vardata2); CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
Datum eqsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 216 of file selfuncs.c.
References DEFAULT_EQ_SEL, get_restriction_variable(), IsA, PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, var_eq_const(), and var_eq_non_const().
Referenced by neqsel().
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; double selec; /* * If expression is not variable = something or something = variable, then * punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); /* * We can do a lot better if the something is a constant. (Note: the * Const might result from estimation rather than being a simple constant * in the query.) */ if (IsA(other, Const)) selec = var_eq_const(&vardata, operator, ((Const *) other)->constvalue, ((Const *) other)->constisnull, varonleft); else selec = var_eq_non_const(&vardata, operator, other, varonleft); ReleaseVariableStats(vardata); PG_RETURN_FLOAT8((float8) selec); }
int estimate_array_length | ( | Node * | arrayexpr | ) |
Definition at line 2028 of file selfuncs.c.
References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), DatumGetArrayTypeP, IsA, list_length(), and strip_array_coercion().
Referenced by btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().
{ /* look through any binary-compatible relabeling of arrayexpr */ arrayexpr = strip_array_coercion(arrayexpr); if (arrayexpr && IsA(arrayexpr, Const)) { Datum arraydatum = ((Const *) arrayexpr)->constvalue; bool arrayisnull = ((Const *) arrayexpr)->constisnull; ArrayType *arrayval; if (arrayisnull) return 0; arrayval = DatumGetArrayTypeP(arraydatum); return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval)); } else if (arrayexpr && IsA(arrayexpr, ArrayExpr) && !((ArrayExpr *) arrayexpr)->multidims) { return list_length(((ArrayExpr *) arrayexpr)->elements); } else { /* default guess --- see also scalararraysel */ return 10; } }
Selectivity estimate_hash_bucketsize | ( | PlannerInfo * | root, | |
Node * | hashkey, | |||
double | nbuckets | |||
) |
Definition at line 3430 of file selfuncs.c.
References VariableStatData::atttype, VariableStatData::atttypmod, examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, NULL, VariableStatData::rel, ReleaseVariableStats, RelOptInfo::rows, STATISTIC_KIND_MCV, VariableStatData::statsTuple, and RelOptInfo::tuples.
Referenced by final_cost_hashjoin().
{ VariableStatData vardata; double estfract, ndistinct, stanullfrac, mcvfreq, avgfreq; bool isdefault; float4 *numbers; int nnumbers; examine_variable(root, hashkey, 0, &vardata); /* Get number of distinct values */ ndistinct = get_variable_numdistinct(&vardata, &isdefault); /* If ndistinct isn't real, punt and return 0.1, per comments above */ if (isdefault) { ReleaseVariableStats(vardata); return (Selectivity) 0.1; } /* Get fraction that are null */ if (HeapTupleIsValid(vardata.statsTuple)) { Form_pg_statistic stats; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); stanullfrac = stats->stanullfrac; } else stanullfrac = 0.0; /* Compute avg freq of all distinct data values in raw relation */ avgfreq = (1.0 - stanullfrac) / ndistinct; /* * Adjust ndistinct to account for restriction clauses. Observe we are * assuming that the data distribution is affected uniformly by the * restriction clauses! * * XXX Possibly better way, but much more expensive: multiply by * selectivity of rel's restriction clauses that mention the target Var. */ if (vardata.rel) ndistinct *= vardata.rel->rows / vardata.rel->tuples; /* * Initial estimate of bucketsize fraction is 1/nbuckets as long as the * number of buckets is less than the expected number of distinct values; * otherwise it is 1/ndistinct. */ if (ndistinct > nbuckets) estfract = 1.0 / nbuckets; else estfract = 1.0 / ndistinct; /* * Look up the frequency of the most common value, if available. */ mcvfreq = 0.0; if (HeapTupleIsValid(vardata.statsTuple)) { if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, NULL, NULL, &numbers, &nnumbers)) { /* * The first MCV stat is for the most common value. */ if (nnumbers > 0) mcvfreq = numbers[0]; free_attstatsslot(vardata.atttype, NULL, 0, numbers, nnumbers); } } /* * Adjust estimated bucketsize upward to account for skewed distribution. */ if (avgfreq > 0.0 && mcvfreq > avgfreq) estfract *= mcvfreq / avgfreq; /* * Clamp bucketsize to sane range (the above adjustment could easily * produce an out-of-range result). We set the lower bound a little above * zero, since zero isn't a very sane result. */ if (estfract < 1.0e-6) estfract = 1.0e-6; else if (estfract > 1.0) estfract = 1.0; ReleaseVariableStats(vardata); return (Selectivity) estfract; }
double estimate_num_groups | ( | PlannerInfo * | root, | |
List * | groupExprs, | |||
double | input_rows | |||
) |
Definition at line 3206 of file selfuncs.c.
References add_unique_group_var(), Assert, contain_volatile_functions(), examine_variable(), exprType(), for_each_cell, HeapTupleIsValid, VariableStatData::isunique, lcons(), lfirst, linitial, list_head(), lnext, GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, GroupVarInfo::rel, ReleaseVariableStats, RELOPT_BASEREL, RelOptInfo::reloptkind, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.
Referenced by create_unique_path(), query_planner(), and recurse_set_operations().
{ List *varinfos = NIL; double numdistinct; ListCell *l; /* * If no grouping columns, there's exactly one group. (This can't happen * for normal cases with GROUP BY or DISTINCT, but it is possible for * corner cases with set operations.) */ if (groupExprs == NIL) return 1.0; /* * Count groups derived from boolean grouping expressions. For other * expressions, find the unique Vars used, treating an expression as a Var * if we can find stats for it. For each one, record the statistical * estimate of number of distinct values (total in its table, without * regard for filtering). */ numdistinct = 1.0; foreach(l, groupExprs) { Node *groupexpr = (Node *) lfirst(l); VariableStatData vardata; List *varshere; ListCell *l2; /* Short-circuit for expressions returning boolean */ if (exprType(groupexpr) == BOOLOID) { numdistinct *= 2.0; continue; } /* * If examine_variable is able to deduce anything about the GROUP BY * expression, treat it as a single variable even if it's really more * complicated. */ examine_variable(root, groupexpr, 0, &vardata); if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique) { varinfos = add_unique_group_var(root, varinfos, groupexpr, &vardata); ReleaseVariableStats(vardata); continue; } ReleaseVariableStats(vardata); /* * Else pull out the component Vars. Handle PlaceHolderVars by * recursing into their arguments (effectively assuming that the * PlaceHolderVar doesn't change the number of groups, which boils * down to ignoring the possible addition of nulls to the result set). */ varshere = pull_var_clause(groupexpr, PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS); /* * If we find any variable-free GROUP BY item, then either it is a * constant (and we can ignore it) or it contains a volatile function; * in the latter case we punt and assume that each input row will * yield a distinct group. */ if (varshere == NIL) { if (contain_volatile_functions(groupexpr)) return input_rows; continue; } /* * Else add variables to varinfos list */ foreach(l2, varshere) { Node *var = (Node *) lfirst(l2); examine_variable(root, var, 0, &vardata); varinfos = add_unique_group_var(root, varinfos, var, &vardata); ReleaseVariableStats(vardata); } } /* * If now no Vars, we must have an all-constant or all-boolean GROUP BY * list. */ if (varinfos == NIL) { /* Guard against out-of-range answers */ if (numdistinct > input_rows) numdistinct = input_rows; return numdistinct; } /* * Group Vars by relation and estimate total numdistinct. * * For each iteration of the outer loop, we process the frontmost Var in * varinfos, plus all other Vars in the same relation. We remove these * Vars from the newvarinfos list for the next iteration. This is the * easiest way to group Vars of same rel together. */ do { GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos); RelOptInfo *rel = varinfo1->rel; double reldistinct = varinfo1->ndistinct; double relmaxndistinct = reldistinct; int relvarcount = 1; List *newvarinfos = NIL; /* * Get the product of numdistinct estimates of the Vars for this rel. * Also, construct new varinfos list of remaining Vars. */ for_each_cell(l, lnext(list_head(varinfos))) { GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); if (varinfo2->rel == varinfo1->rel) { reldistinct *= varinfo2->ndistinct; if (relmaxndistinct < varinfo2->ndistinct) relmaxndistinct = varinfo2->ndistinct; relvarcount++; } else { /* not time to process varinfo2 yet */ newvarinfos = lcons(varinfo2, newvarinfos); } } /* * Sanity check --- don't divide by zero if empty relation. */ Assert(rel->reloptkind == RELOPT_BASEREL); if (rel->tuples > 0) { /* * Clamp to size of rel, or size of rel / 10 if multiple Vars. The * fudge factor is because the Vars are probably correlated but we * don't know by how much. We should never clamp to less than the * largest ndistinct value for any of the Vars, though, since * there will surely be at least that many groups. */ double clamp = rel->tuples; if (relvarcount > 1) { clamp *= 0.1; if (clamp < relmaxndistinct) { clamp = relmaxndistinct; /* for sanity in case some ndistinct is too large: */ if (clamp > rel->tuples) clamp = rel->tuples; } } if (reldistinct > clamp) reldistinct = clamp; /* * Multiply by restriction selectivity. */ reldistinct *= rel->rows / rel->tuples; /* * Update estimate of total distinct groups. */ numdistinct *= reldistinct; } varinfos = newvarinfos; } while (varinfos != NIL); numdistinct = ceil(numdistinct); /* Guard against out-of-range answers */ if (numdistinct > input_rows) numdistinct = input_rows; if (numdistinct < 1.0) numdistinct = 1.0; return numdistinct; }
void examine_variable | ( | PlannerInfo * | root, | |
Node * | node, | |||
int | varRelid, | |||
VariableStatData * | vardata | |||
) |
Definition at line 4275 of file selfuncs.c.
References arg, VariableStatData::atttype, VariableStatData::atttypmod, BMS_EMPTY_SET, bms_free(), bms_is_member(), bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), BoolGetDatum, elog, equal(), ERROR, examine_simple_variable(), exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, has_unique_index(), HeapTupleIsValid, IndexOptInfo::indexkeys, RelOptInfo::indexlist, IndexOptInfo::indexoid, IndexOptInfo::indexprs, IndexOptInfo::indpred, Int16GetDatum, IsA, VariableStatData::isunique, lfirst, list_head(), lnext, MemSet, IndexOptInfo::ncolumns, NIL, NULL, ObjectIdGetDatum, IndexOptInfo::predOK, pull_varnos(), VariableStatData::rel, SearchSysCache3, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::unique, VariableStatData::var, Var::varattno, Var::varno, Var::vartype, VariableStatData::vartype, and Var::vartypmod.
Referenced by booltestsel(), estimate_hash_bucketsize(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().
{ Node *basenode; Relids varnos; RelOptInfo *onerel; /* Make sure we don't return dangling pointers in vardata */ MemSet(vardata, 0, sizeof(VariableStatData)); /* Save the exposed type of the expression */ vardata->vartype = exprType(node); /* Look inside any binary-compatible relabeling */ if (IsA(node, RelabelType)) basenode = (Node *) ((RelabelType *) node)->arg; else basenode = node; /* Fast path for a simple Var */ if (IsA(basenode, Var) && (varRelid == 0 || varRelid == ((Var *) basenode)->varno)) { Var *var = (Var *) basenode; /* Set up result fields other than the stats tuple */ vardata->var = basenode; /* return Var without relabeling */ vardata->rel = find_base_rel(root, var->varno); vardata->atttype = var->vartype; vardata->atttypmod = var->vartypmod; vardata->isunique = has_unique_index(vardata->rel, var->varattno); /* Try to locate some stats */ examine_simple_variable(root, var, vardata); return; } /* * Okay, it's a more complicated expression. Determine variable * membership. Note that when varRelid isn't zero, only vars of that * relation are considered "real" vars. */ varnos = pull_varnos(basenode); onerel = NULL; switch (bms_membership(varnos)) { case BMS_EMPTY_SET: /* No Vars at all ... must be pseudo-constant clause */ break; case BMS_SINGLETON: if (varRelid == 0 || bms_is_member(varRelid, varnos)) { onerel = find_base_rel(root, (varRelid ? varRelid : bms_singleton_member(varnos))); vardata->rel = onerel; node = basenode; /* strip any relabeling */ } /* else treat it as a constant */ break; case BMS_MULTIPLE: if (varRelid == 0) { /* treat it as a variable of a join relation */ vardata->rel = find_join_rel(root, varnos); node = basenode; /* strip any relabeling */ } else if (bms_is_member(varRelid, varnos)) { /* ignore the vars belonging to other relations */ vardata->rel = find_base_rel(root, varRelid); node = basenode; /* strip any relabeling */ /* note: no point in expressional-index search here */ } /* else treat it as a constant */ break; } bms_free(varnos); vardata->var = node; vardata->atttype = exprType(node); vardata->atttypmod = exprTypmod(node); if (onerel) { /* * We have an expression in vars of a single relation. Try to match * it to expressional index columns, in hopes of finding some * statistics. * * XXX it's conceivable that there are multiple matches with different * index opfamilies; if so, we need to pick one that matches the * operator we are estimating for. FIXME later. */ ListCell *ilist; foreach(ilist, onerel->indexlist) { IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); ListCell *indexpr_item; int pos; indexpr_item = list_head(index->indexprs); if (indexpr_item == NULL) continue; /* no expressions here... */ for (pos = 0; pos < index->ncolumns; pos++) { if (index->indexkeys[pos] == 0) { Node *indexkey; if (indexpr_item == NULL) elog(ERROR, "too few entries in indexprs list"); indexkey = (Node *) lfirst(indexpr_item); if (indexkey && IsA(indexkey, RelabelType)) indexkey = (Node *) ((RelabelType *) indexkey)->arg; if (equal(node, indexkey)) { /* * Found a match ... is it a unique index? Tests here * should match has_unique_index(). */ if (index->unique && index->ncolumns == 1 && (index->indpred == NIL || index->predOK)) vardata->isunique = true; /* * Has it got stats? We only consider stats for * non-partial indexes, since partial indexes probably * don't reflect whole-relation statistics; the above * check for uniqueness is the only info we take from * a partial index. * * An index stats hook, however, must make its own * decisions about what to do with partial indexes. */ if (get_index_stats_hook && (*get_index_stats_hook) (root, index->indexoid, pos + 1, vardata)) { /* * The hook took control of acquiring a stats * tuple. If it did supply a tuple, it'd better * have supplied a freefunc. */ if (HeapTupleIsValid(vardata->statsTuple) && !vardata->freefunc) elog(ERROR, "no function provided to release variable stats with"); } else if (index->indpred == NIL) { vardata->statsTuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(index->indexoid), Int16GetDatum(pos + 1), BoolGetDatum(false)); vardata->freefunc = ReleaseSysCache; } if (vardata->statsTuple) break; } indexpr_item = lnext(indexpr_item); } } if (vardata->statsTuple) break; } } }
void get_join_variables | ( | PlannerInfo * | root, | |
List * | args, | |||
SpecialJoinInfo * | sjinfo, | |||
VariableStatData * | vardata1, | |||
VariableStatData * | vardata2, | |||
bool * | join_is_reversed | |||
) |
Definition at line 4216 of file selfuncs.c.
References bms_is_subset(), elog, ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.
Referenced by eqjoinsel().
{ Node *left, *right; if (list_length(args) != 2) elog(ERROR, "join operator should take two arguments"); left = (Node *) linitial(args); right = (Node *) lsecond(args); examine_variable(root, left, 0, vardata1); examine_variable(root, right, 0, vardata2); if (vardata1->rel && bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand)) *join_is_reversed = true; /* var1 is on RHS */ else if (vardata2->rel && bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand)) *join_is_reversed = true; /* var2 is on LHS */ else *join_is_reversed = false; }
bool get_restriction_variable | ( | PlannerInfo * | root, | |
List * | args, | |||
int | varRelid, | |||
VariableStatData * | vardata, | |||
Node ** | other, | |||
bool * | varonleft | |||
) |
Definition at line 4156 of file selfuncs.c.
References estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, NULL, VariableStatData::rel, ReleaseVariableStats, and VariableStatData::var.
Referenced by arraycontsel(), eqsel(), ltreeparentsel(), patternsel(), rangesel(), scalargtsel(), scalarltsel(), and tsmatchsel().
{ Node *left, *right; VariableStatData rdata; /* Fail if not a binary opclause (probably shouldn't happen) */ if (list_length(args) != 2) return false; left = (Node *) linitial(args); right = (Node *) lsecond(args); /* * Examine both sides. Note that when varRelid is nonzero, Vars of other * relations will be treated as pseudoconstants. */ examine_variable(root, left, varRelid, vardata); examine_variable(root, right, varRelid, &rdata); /* * If one side is a variable and the other not, we win. */ if (vardata->rel && rdata.rel == NULL) { *varonleft = true; *other = estimate_expression_value(root, rdata.var); /* Assume we need no ReleaseVariableStats(rdata) here */ return true; } if (vardata->rel == NULL && rdata.rel) { *varonleft = false; *other = estimate_expression_value(root, vardata->var); /* Assume we need no ReleaseVariableStats(*vardata) here */ *vardata = rdata; return true; } /* Ooops, clause has wrong structure (probably var op var) */ ReleaseVariableStats(*vardata); ReleaseVariableStats(rdata); return false; }
double get_variable_numdistinct | ( | VariableStatData * | vardata, | |
bool * | isdefault | |||
) |
Definition at line 4613 of file selfuncs.c.
References BOOLOID, DEFAULT_NUM_DISTINCT, GETSTRUCT, HeapTupleIsValid, IsA, VariableStatData::isunique, NULL, ObjectIdAttributeNumber, VariableStatData::rel, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.
Referenced by add_unique_group_var(), eqjoinsel_inner(), eqjoinsel_semi(), estimate_hash_bucketsize(), var_eq_const(), and var_eq_non_const().
{ double stadistinct; double ntuples; *isdefault = false; /* * Determine the stadistinct value to use. There are cases where we can * get an estimate even without a pg_statistic entry, or can get a better * value than is in pg_statistic. */ if (HeapTupleIsValid(vardata->statsTuple)) { /* Use the pg_statistic entry */ Form_pg_statistic stats; stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); stadistinct = stats->stadistinct; } else if (vardata->vartype == BOOLOID) { /* * Special-case boolean columns: presumably, two distinct values. * * Are there any other datatypes we should wire in special estimates * for? */ stadistinct = 2.0; } else { /* * We don't keep statistics for system columns, but in some cases we * can infer distinctness anyway. */ if (vardata->var && IsA(vardata->var, Var)) { switch (((Var *) vardata->var)->varattno) { case ObjectIdAttributeNumber: case SelfItemPointerAttributeNumber: stadistinct = -1.0; /* unique */ break; case TableOidAttributeNumber: stadistinct = 1.0; /* only 1 value */ break; default: stadistinct = 0.0; /* means "unknown" */ break; } } else stadistinct = 0.0; /* means "unknown" */ /* * XXX consider using estimate_num_groups on expressions? */ } /* * If there is a unique index or DISTINCT clause for the variable, assume * it is unique no matter what pg_statistic says; the statistics could be * out of date, or we might have found a partial unique index that proves * the var is unique for this query. */ if (vardata->isunique) stadistinct = -1.0; /* * If we had an absolute estimate, use that. */ if (stadistinct > 0.0) return stadistinct; /* * Otherwise we need to get the relation size; punt if not available. */ if (vardata->rel == NULL) { *isdefault = true; return DEFAULT_NUM_DISTINCT; } ntuples = vardata->rel->tuples; if (ntuples <= 0.0) { *isdefault = true; return DEFAULT_NUM_DISTINCT; } /* * If we had a relative estimate, use that. */ if (stadistinct < 0.0) return floor((-stadistinct * ntuples) + 0.5); /* * With no data, estimate ndistinct = ntuples if the table is small, else * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so * that the behavior isn't discontinuous. */ if (ntuples < DEFAULT_NUM_DISTINCT) return ntuples; *isdefault = true; return DEFAULT_NUM_DISTINCT; }
Datum gincostestimate | ( | PG_FUNCTION_ARGS | ) |
Definition at line 7023 of file selfuncs.c.
References AccessShareLock, GinQualCounts::arrayScans, Assert, RestrictInfo::clause, clauselist_selectivity(), cost_qual_eval(), cpu_index_tuple_cost, cpu_operator_cost, elog, ERROR, GinQualCounts::exactEntries, get_tablespace_page_costs(), gincost_opexpr(), gincost_scalararrayopexpr(), ginGetStats(), GinQualCounts::haveFullScan, index_close(), index_open(), index_pages_fetched(), IndexPath::indexinfo, IndexOptInfo::indexoid, IndexPath::indexorderbys, IndexPath::indexquals, IndexOptInfo::indpred, IsA, JOIN_INNER, lfirst, list_concat(), list_length(), list_make1, Min, GinStatsData::nDataPages, GinStatsData::nEntries, GinStatsData::nEntryPages, NIL, nodeTag, GinStatsData::nPendingPages, GinStatsData::nTotalPages, NULL, IndexOptInfo::pages, GinQualCounts::partialEntries, QualCost::per_tuple, PG_GETARG_FLOAT8, PG_GETARG_POINTER, PG_RETURN_VOID, predicate_implied_by(), IndexOptInfo::rel, RelOptInfo::relid, IndexOptInfo::reltablespace, rint(), scale, GinQualCounts::searchEntries, SizeOfIptrData, QualCost::startup, and IndexOptInfo::tuples.
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1); double loop_count = PG_GETARG_FLOAT8(2); Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3); Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4); Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5); double *indexCorrelation = (double *) PG_GETARG_POINTER(6); IndexOptInfo *index = path->indexinfo; List *indexQuals = path->indexquals; List *indexOrderBys = path->indexorderbys; ListCell *l; List *selectivityQuals; double numPages = index->pages, numTuples = index->tuples; double numEntryPages, numDataPages, numPendingPages, numEntries; GinQualCounts counts; bool matchPossible; double entryPagesFetched, dataPagesFetched, dataPagesFetchedBySel; double qual_op_cost, qual_arg_cost, spc_random_page_cost, outer_scans; QualCost index_qual_cost; Relation indexRel; GinStatsData ginStats; /* * Obtain statistic information from the meta page */ indexRel = index_open(index->indexoid, AccessShareLock); ginGetStats(indexRel, &ginStats); index_close(indexRel, AccessShareLock); numEntryPages = ginStats.nEntryPages; numDataPages = ginStats.nDataPages; numPendingPages = ginStats.nPendingPages; numEntries = ginStats.nEntries; /* * nPendingPages can be trusted, but the other fields are as of the last * VACUUM. Scale them by the ratio numPages / nTotalPages to account for * growth since then. If the fields are zero (implying no VACUUM at all, * and an index created pre-9.1), assume all pages are entry pages. */ if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0) { numEntryPages = numPages; numDataPages = 0; numEntries = numTuples; /* bogus, but no other info available */ } else { double scale = numPages / ginStats.nTotalPages; numEntryPages = ceil(numEntryPages * scale); numDataPages = ceil(numDataPages * scale); numEntries = ceil(numEntries * scale); /* ensure we didn't round up too much */ numEntryPages = Min(numEntryPages, numPages); numDataPages = Min(numDataPages, numPages - numEntryPages); } /* In an empty index, numEntries could be zero. Avoid divide-by-zero */ if (numEntries < 1) numEntries = 1; /* * Include predicate in selectivityQuals (should match * genericcostestimate) */ if (index->indpred != NIL) { List *predExtraQuals = NIL; foreach(l, index->indpred) { Node *predQual = (Node *) lfirst(l); List *oneQual = list_make1(predQual); if (!predicate_implied_by(oneQual, indexQuals)) predExtraQuals = list_concat(predExtraQuals, oneQual); } /* list_concat avoids modifying the passed-in indexQuals list */ selectivityQuals = list_concat(predExtraQuals, indexQuals); } else selectivityQuals = indexQuals; /* Estimate the fraction of main-table tuples that will be visited */ *indexSelectivity = clauselist_selectivity(root, selectivityQuals, index->rel->relid, JOIN_INNER, NULL); /* fetch estimated page cost for schema containing index */ get_tablespace_page_costs(index->reltablespace, &spc_random_page_cost, NULL); /* * Generic assumption about index correlation: there isn't any. */ *indexCorrelation = 0.0; /* * Examine quals to estimate number of search entries & partial matches */ memset(&counts, 0, sizeof(counts)); counts.arrayScans = 1; matchPossible = true; foreach(l, indexQuals) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); Expr *clause; Assert(IsA(rinfo, RestrictInfo)); clause = rinfo->clause; if (IsA(clause, OpExpr)) { matchPossible = gincost_opexpr(index, (OpExpr *) clause, &counts); if (!matchPossible) break; } else if (IsA(clause, ScalarArrayOpExpr)) { matchPossible = gincost_scalararrayopexpr(index, (ScalarArrayOpExpr *) clause, numEntries, &counts); if (!matchPossible) break; } else { /* shouldn't be anything else for a GIN index */ elog(ERROR, "unsupported GIN indexqual type: %d", (int) nodeTag(clause)); } } /* Fall out if there were any provably-unsatisfiable quals */ if (!matchPossible) { *indexStartupCost = 0; *indexTotalCost = 0; *indexSelectivity = 0; PG_RETURN_VOID(); } if (counts.haveFullScan || indexQuals == NIL) { /* * Full index scan will be required. We treat this as if every key in * the index had been listed in the query; is that reasonable? */ counts.partialEntries = 0; counts.exactEntries = numEntries; counts.searchEntries = numEntries; } /* Will we have more than one iteration of a nestloop scan? */ outer_scans = loop_count; /* * Compute cost to begin scan, first of all, pay attention to pending * list. */ entryPagesFetched = numPendingPages; /* * Estimate number of entry pages read. We need to do * counts.searchEntries searches. Use a power function as it should be, * but tuples on leaf pages usually is much greater. Here we include all * searches in entry tree, including search of first entry in partial * match algorithm */ entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15))); /* * Add an estimate of entry pages read by partial match algorithm. It's a * scan over leaf pages in entry tree. We haven't any useful stats here, * so estimate it as proportion. */ entryPagesFetched += ceil(numEntryPages * counts.partialEntries / numEntries); /* * Partial match algorithm reads all data pages before doing actual scan, * so it's a startup cost. Again, we haven't any useful stats here, so, * estimate it as proportion */ dataPagesFetched = ceil(numDataPages * counts.partialEntries / numEntries); /* * Calculate cache effects if more than one scan due to nestloops or array * quals. The result is pro-rated per nestloop scan, but the array qual * factor shouldn't be pro-rated (compare genericcostestimate). */ if (outer_scans > 1 || counts.arrayScans > 1) { entryPagesFetched *= outer_scans * counts.arrayScans; entryPagesFetched = index_pages_fetched(entryPagesFetched, (BlockNumber) numEntryPages, numEntryPages, root); entryPagesFetched /= outer_scans; dataPagesFetched *= outer_scans * counts.arrayScans; dataPagesFetched = index_pages_fetched(dataPagesFetched, (BlockNumber) numDataPages, numDataPages, root); dataPagesFetched /= outer_scans; } /* * Here we use random page cost because logically-close pages could be far * apart on disk. */ *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost; /* * Now we compute the number of data pages fetched while the scan * proceeds. */ /* data pages scanned for each exact (non-partial) matched entry */ dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries); /* * Estimate number of data pages read, using selectivity estimation and * capacity of data page. */ dataPagesFetchedBySel = ceil(*indexSelectivity * (numTuples / (BLCKSZ / SizeOfIptrData))); if (dataPagesFetchedBySel > dataPagesFetched) { /* * At least one of entries is very frequent and, unfortunately, we * couldn't get statistic about entries (only tsvector has such * statistics). So, we obviously have too small estimation of pages * fetched from data tree. Re-estimate it from known capacity of data * pages */ dataPagesFetched = dataPagesFetchedBySel; } /* Account for cache effects, the same as above */ if (outer_scans > 1 || counts.arrayScans > 1) { dataPagesFetched *= outer_scans * counts.arrayScans; dataPagesFetched = index_pages_fetched(dataPagesFetched, (BlockNumber) numDataPages, numDataPages, root); dataPagesFetched /= outer_scans; } /* And apply random_page_cost as the cost per page */ *indexTotalCost = *indexStartupCost + dataPagesFetched * spc_random_page_cost; /* * Add on index qual eval costs, much as in genericcostestimate */ cost_qual_eval(&index_qual_cost, indexQuals, root); qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple; cost_qual_eval(&index_qual_cost, indexOrderBys, root); qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple; qual_op_cost = cpu_operator_cost * (list_length(indexQuals) + list_length(indexOrderBys)); qual_arg_cost -= qual_op_cost; if (qual_arg_cost < 0) /* just in case... */ qual_arg_cost = 0; *indexStartupCost += qual_arg_cost; *indexTotalCost += qual_arg_cost; *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost); PG_RETURN_VOID(); }
Datum gistcostestimate | ( | PG_FUNCTION_ARGS | ) |
Definition at line 6586 of file selfuncs.c.
References cpu_operator_cost, genericcostestimate(), GenericCosts::indexCorrelation, IndexPath::indexinfo, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, MemSet, GenericCosts::num_sa_scans, IndexOptInfo::pages, PG_GETARG_FLOAT8, PG_GETARG_POINTER, PG_RETURN_VOID, IndexOptInfo::tree_height, and IndexOptInfo::tuples.
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1); double loop_count = PG_GETARG_FLOAT8(2); Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3); Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4); Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5); double *indexCorrelation = (double *) PG_GETARG_POINTER(6); IndexOptInfo *index = path->indexinfo; GenericCosts costs; Cost descentCost; MemSet(&costs, 0, sizeof(costs)); genericcostestimate(root, path, loop_count, &costs); /* * We model index descent costs similarly to those for btree, but to do * that we first need an idea of the tree height. We somewhat arbitrarily * assume that the fanout is 100, meaning the tree height is at most * log100(index->pages). * * Although this computation isn't really expensive enough to require * caching, we might as well use index->tree_height to cache it. */ if (index->tree_height < 0) /* unknown? */ { if (index->pages > 1) /* avoid computing log(0) */ index->tree_height = (int) (log(index->pages) / log(100.0)); else index->tree_height = 0; } /* * Add a CPU-cost component to represent the costs of initial descent. * We just use log(N) here not log2(N) since the branching factor isn't * necessarily two anyway. As for btree, charge once per SA scan. */ if (index->tuples > 1) /* avoid computing log(0) */ { descentCost = ceil(log(index->tuples)) * cpu_operator_cost; costs.indexStartupCost += descentCost; costs.indexTotalCost += costs.num_sa_scans * descentCost; } /* * Likewise add a per-page charge, calculated the same as for btrees. */ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost; costs.indexStartupCost += descentCost; costs.indexTotalCost += costs.num_sa_scans * descentCost; *indexStartupCost = costs.indexStartupCost; *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; PG_RETURN_VOID(); }
Datum hashcostestimate | ( | PG_FUNCTION_ARGS | ) |
Definition at line 6537 of file selfuncs.c.
References genericcostestimate(), GenericCosts::indexCorrelation, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, MemSet, PG_GETARG_FLOAT8, PG_GETARG_POINTER, and PG_RETURN_VOID.
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1); double loop_count = PG_GETARG_FLOAT8(2); Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3); Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4); Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5); double *indexCorrelation = (double *) PG_GETARG_POINTER(6); GenericCosts costs; MemSet(&costs, 0, sizeof(costs)); genericcostestimate(root, path, loop_count, &costs); /* * A hash index has no descent costs as such, since the index AM can go * directly to the target bucket after computing the hash value. There * are a couple of other hash-specific costs that we could conceivably add * here, though: * * Ideally we'd charge spc_random_page_cost for each page in the target * bucket, not just the numIndexPages pages that genericcostestimate * thought we'd visit. However in most cases we don't know which bucket * that will be. There's no point in considering the average bucket size * because the hash AM makes sure that's always one page. * * Likewise, we could consider charging some CPU for each index tuple in * the bucket, if we knew how many there were. But the per-tuple cost is * just a hash value comparison, not a general datatype-dependent * comparison, so any such charge ought to be quite a bit less than * cpu_operator_cost; which makes it probably not worth worrying about. * * A bigger issue is that chance hash-value collisions will result in * wasted probes into the heap. We don't currently attempt to model this * cost on the grounds that it's rare, but maybe it's not rare enough. * (Any fix for this ought to consider the generic lossy-operator problem, * though; it's not entirely hash-specific.) */ *indexStartupCost = costs.indexStartupCost; *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; PG_RETURN_VOID(); }
double histogram_selectivity | ( | VariableStatData * | vardata, | |
FmgrInfo * | opproc, | |||
Datum | constval, | |||
bool | varonleft, | |||
int | min_hist_size, | |||
int | n_skip, | |||
int * | hist_size | |||
) |
Definition at line 680 of file selfuncs.c.
References Assert, VariableStatData::atttype, VariableStatData::atttypmod, DatumGetBool, DEFAULT_COLLATION_OID, free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), HeapTupleIsValid, i, InvalidOid, NULL, STATISTIC_KIND_HISTOGRAM, VariableStatData::statsTuple, and values.
Referenced by ltreeparentsel(), and patternsel().
{ double result; Datum *values; int nvalues; /* check sanity of parameters */ Assert(n_skip >= 0); Assert(min_hist_size > 2 * n_skip); if (HeapTupleIsValid(vardata->statsTuple) && get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &values, &nvalues, NULL, NULL)) { *hist_size = nvalues; if (nvalues >= min_hist_size) { int nmatch = 0; int i; for (i = n_skip; i < nvalues - n_skip; i++) { if (varonleft ? DatumGetBool(FunctionCall2Coll(opproc, DEFAULT_COLLATION_OID, values[i], constval)) : DatumGetBool(FunctionCall2Coll(opproc, DEFAULT_COLLATION_OID, constval, values[i]))) nmatch++; } result = ((double) nmatch) / ((double) (nvalues - 2 * n_skip)); } else result = -1; free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } else { *hist_size = 0; result = -1; } return result; }
Datum iclikejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2721 of file selfuncs.c.
References Pattern_Type_Like_IC, patternjoinsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false)); }
Datum iclikesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1401 of file selfuncs.c.
References Pattern_Type_Like_IC, patternsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false)); }
Datum icnlikejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2757 of file selfuncs.c.
References Pattern_Type_Like_IC, patternjoinsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true)); }
Datum icnlikesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1437 of file selfuncs.c.
References Pattern_Type_Like_IC, patternsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true)); }
Datum icregexeqjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2703 of file selfuncs.c.
References Pattern_Type_Regex_IC, patternjoinsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false)); }
Datum icregexeqsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1383 of file selfuncs.c.
References Pattern_Type_Regex_IC, patternsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false)); }
Datum icregexnejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2739 of file selfuncs.c.
References Pattern_Type_Regex_IC, patternjoinsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true)); }
Datum icregexnesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1419 of file selfuncs.c.
References Pattern_Type_Regex_IC, patternsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true)); }
Datum likejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2712 of file selfuncs.c.
References Pattern_Type_Like, patternjoinsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false)); }
Datum likesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1392 of file selfuncs.c.
References Pattern_Type_Like, patternsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false)); }
Definition at line 5693 of file selfuncs.c.
References BYTEAOID, Const::consttype, Const::constvalue, DatumGetBool, DatumGetByteaP, DatumGetCString, DatumGetPointer, DirectFunctionCall1, FunctionCall2Coll(), lc_collate_is_c(), NAMEOID, nameout(), palloc(), pfree(), pg_database_encoding_character_incrementer(), pg_mbcliplen(), PointerGetDatum, SET_VARSIZE, string_to_bytea_const(), string_to_const(), TextDatumGetCString, VARDATA, VARHDRSZ, VARSIZE, and varstr_cmp().
Referenced by prefix_quals(), and prefix_selectivity().
{ Oid datatype = str_const->consttype; char *workstr; int len; Datum cmpstr; text *cmptxt = NULL; mbcharacter_incrementer charinc; /* * Get a modifiable copy of the prefix string in C-string format, and set * up the string we will compare to as a Datum. In C locale this can just * be the given prefix string, otherwise we need to add a suffix. Types * NAME and BYTEA sort bytewise so they don't need a suffix either. */ if (datatype == NAMEOID) { workstr = DatumGetCString(DirectFunctionCall1(nameout, str_const->constvalue)); len = strlen(workstr); cmpstr = str_const->constvalue; } else if (datatype == BYTEAOID) { bytea *bstr = DatumGetByteaP(str_const->constvalue); len = VARSIZE(bstr) - VARHDRSZ; workstr = (char *) palloc(len); memcpy(workstr, VARDATA(bstr), len); if ((Pointer) bstr != DatumGetPointer(str_const->constvalue)) pfree(bstr); cmpstr = str_const->constvalue; } else { workstr = TextDatumGetCString(str_const->constvalue); len = strlen(workstr); if (lc_collate_is_c(collation) || len == 0) cmpstr = str_const->constvalue; else { /* If first time through, determine the suffix to use */ static char suffixchar = 0; static Oid suffixcollation = 0; if (!suffixchar || suffixcollation != collation) { char *best; best = "Z"; if (varstr_cmp(best, 1, "z", 1, collation) < 0) best = "z"; if (varstr_cmp(best, 1, "y", 1, collation) < 0) best = "y"; if (varstr_cmp(best, 1, "9", 1, collation) < 0) best = "9"; suffixchar = *best; suffixcollation = collation; } /* And build the string to compare to */ cmptxt = (text *) palloc(VARHDRSZ + len + 1); SET_VARSIZE(cmptxt, VARHDRSZ + len + 1); memcpy(VARDATA(cmptxt), workstr, len); *(VARDATA(cmptxt) + len) = suffixchar; cmpstr = PointerGetDatum(cmptxt); } } /* Select appropriate character-incrementer function */ if (datatype == BYTEAOID) charinc = byte_increment; else charinc = pg_database_encoding_character_incrementer(); /* And search ... */ while (len > 0) { int charlen; unsigned char *lastchar; /* Identify the last character --- for bytea, just the last byte */ if (datatype == BYTEAOID) charlen = 1; else charlen = len - pg_mbcliplen(workstr, len, len - 1); lastchar = (unsigned char *) (workstr + len - charlen); /* * Try to generate a larger string by incrementing the last character * (for BYTEA, we treat each byte as a character). * * Note: the incrementer function is expected to return true if it's * generated a valid-per-the-encoding new character, otherwise false. * The contents of the character on false return are unspecified. */ while (charinc(lastchar, charlen)) { Const *workstr_const; if (datatype == BYTEAOID) workstr_const = string_to_bytea_const(workstr, len); else workstr_const = string_to_const(workstr, datatype); if (DatumGetBool(FunctionCall2Coll(ltproc, collation, cmpstr, workstr_const->constvalue))) { /* Successfully made a string larger than cmpstr */ if (cmptxt) pfree(cmptxt); pfree(workstr); return workstr_const; } /* No good, release unusable value and try again */ pfree(DatumGetPointer(workstr_const->constvalue)); pfree(workstr_const); } /* * No luck here, so truncate off the last character and try to * increment the next one. */ len -= charlen; workstr[len] = '\0'; } /* Failed... */ if (cmptxt) pfree(cmptxt); pfree(workstr); return NULL; }
double mcv_selectivity | ( | VariableStatData * | vardata, | |
FmgrInfo * | opproc, | |||
Datum | constval, | |||
bool | varonleft, | |||
double * | sumcommonp | |||
) |
Definition at line 602 of file selfuncs.c.
References VariableStatData::atttype, VariableStatData::atttypmod, DatumGetBool, DEFAULT_COLLATION_OID, free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), HeapTupleIsValid, i, InvalidOid, NULL, STATISTIC_KIND_MCV, VariableStatData::statsTuple, and values.
Referenced by ltreeparentsel(), patternsel(), and scalarineqsel().
{ double mcv_selec, sumcommon; Datum *values; int nvalues; float4 *numbers; int nnumbers; int i; mcv_selec = 0.0; sumcommon = 0.0; if (HeapTupleIsValid(vardata->statsTuple) && get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &values, &nvalues, &numbers, &nnumbers)) { for (i = 0; i < nvalues; i++) { if (varonleft ? DatumGetBool(FunctionCall2Coll(opproc, DEFAULT_COLLATION_OID, values[i], constval)) : DatumGetBool(FunctionCall2Coll(opproc, DEFAULT_COLLATION_OID, constval, values[i]))) mcv_selec += numbers[i]; sumcommon += numbers[i]; } free_attstatsslot(vardata->atttype, values, nvalues, numbers, nnumbers); } *sumcommonp = sumcommon; return mcv_selec; }
void mergejoinscansel | ( | PlannerInfo * | root, | |
Node * | clause, | |||
Oid | opfamily, | |||
int | strategy, | |||
bool | nulls_first, | |||
Selectivity * | leftstart, | |||
Selectivity * | leftend, | |||
Selectivity * | rightstart, | |||
Selectivity * | rightend | |||
) |
Definition at line 2784 of file selfuncs.c.
References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, CLAMP_PROBABILITY, DEFAULT_INEQ_SEL, examine_variable(), get_leftop(), get_op_opfamily_properties(), get_opfamily_member(), get_rightop(), get_variable_range(), GETSTRUCT, HeapTupleIsValid, is_opclause, OidIsValid, ReleaseVariableStats, scalarineqsel(), and VariableStatData::statsTuple.
Referenced by cached_scansel().
{ Node *left, *right; VariableStatData leftvar, rightvar; int op_strategy; Oid op_lefttype; Oid op_righttype; Oid opno, lsortop, rsortop, lstatop, rstatop, ltop, leop, revltop, revleop; bool isgt; Datum leftmin, leftmax, rightmin, rightmax; double selec; /* Set default results if we can't figure anything out. */ /* XXX should default "start" fraction be a bit more than 0? */ *leftstart = *rightstart = 0.0; *leftend = *rightend = 1.0; /* Deconstruct the merge clause */ if (!is_opclause(clause)) return; /* shouldn't happen */ opno = ((OpExpr *) clause)->opno; left = get_leftop((Expr *) clause); right = get_rightop((Expr *) clause); if (!right) return; /* shouldn't happen */ /* Look for stats for the inputs */ examine_variable(root, left, 0, &leftvar); examine_variable(root, right, 0, &rightvar); /* Extract the operator's declared left/right datatypes */ get_op_opfamily_properties(opno, opfamily, false, &op_strategy, &op_lefttype, &op_righttype); Assert(op_strategy == BTEqualStrategyNumber); /* * Look up the various operators we need. If we don't find them all, it * probably means the opfamily is broken, but we just fail silently. * * Note: we expect that pg_statistic histograms will be sorted by the '<' * operator, regardless of which sort direction we are considering. */ switch (strategy) { case BTLessStrategyNumber: isgt = false; if (op_lefttype == op_righttype) { /* easy case */ ltop = get_opfamily_member(opfamily, op_lefttype, op_righttype, BTLessStrategyNumber); leop = get_opfamily_member(opfamily, op_lefttype, op_righttype, BTLessEqualStrategyNumber); lsortop = ltop; rsortop = ltop; lstatop = lsortop; rstatop = rsortop; revltop = ltop; revleop = leop; } else { ltop = get_opfamily_member(opfamily, op_lefttype, op_righttype, BTLessStrategyNumber); leop = get_opfamily_member(opfamily, op_lefttype, op_righttype, BTLessEqualStrategyNumber); lsortop = get_opfamily_member(opfamily, op_lefttype, op_lefttype, BTLessStrategyNumber); rsortop = get_opfamily_member(opfamily, op_righttype, op_righttype, BTLessStrategyNumber); lstatop = lsortop; rstatop = rsortop; revltop = get_opfamily_member(opfamily, op_righttype, op_lefttype, BTLessStrategyNumber); revleop = get_opfamily_member(opfamily, op_righttype, op_lefttype, BTLessEqualStrategyNumber); } break; case BTGreaterStrategyNumber: /* descending-order case */ isgt = true; if (op_lefttype == op_righttype) { /* easy case */ ltop = get_opfamily_member(opfamily, op_lefttype, op_righttype, BTGreaterStrategyNumber); leop = get_opfamily_member(opfamily, op_lefttype, op_righttype, BTGreaterEqualStrategyNumber); lsortop = ltop; rsortop = ltop; lstatop = get_opfamily_member(opfamily, op_lefttype, op_lefttype, BTLessStrategyNumber); rstatop = lstatop; revltop = ltop; revleop = leop; } else { ltop = get_opfamily_member(opfamily, op_lefttype, op_righttype, BTGreaterStrategyNumber); leop = get_opfamily_member(opfamily, op_lefttype, op_righttype, BTGreaterEqualStrategyNumber); lsortop = get_opfamily_member(opfamily, op_lefttype, op_lefttype, BTGreaterStrategyNumber); rsortop = get_opfamily_member(opfamily, op_righttype, op_righttype, BTGreaterStrategyNumber); lstatop = get_opfamily_member(opfamily, op_lefttype, op_lefttype, BTLessStrategyNumber); rstatop = get_opfamily_member(opfamily, op_righttype, op_righttype, BTLessStrategyNumber); revltop = get_opfamily_member(opfamily, op_righttype, op_lefttype, BTGreaterStrategyNumber); revleop = get_opfamily_member(opfamily, op_righttype, op_lefttype, BTGreaterEqualStrategyNumber); } break; default: goto fail; /* shouldn't get here */ } if (!OidIsValid(lsortop) || !OidIsValid(rsortop) || !OidIsValid(lstatop) || !OidIsValid(rstatop) || !OidIsValid(ltop) || !OidIsValid(leop) || !OidIsValid(revltop) || !OidIsValid(revleop)) goto fail; /* insufficient info in catalogs */ /* Try to get ranges of both inputs */ if (!isgt) { if (!get_variable_range(root, &leftvar, lstatop, &leftmin, &leftmax)) goto fail; /* no range available from stats */ if (!get_variable_range(root, &rightvar, rstatop, &rightmin, &rightmax)) goto fail; /* no range available from stats */ } else { /* need to swap the max and min */ if (!get_variable_range(root, &leftvar, lstatop, &leftmax, &leftmin)) goto fail; /* no range available from stats */ if (!get_variable_range(root, &rightvar, rstatop, &rightmax, &rightmin)) goto fail; /* no range available from stats */ } /* * Now, the fraction of the left variable that will be scanned is the * fraction that's <= the right-side maximum value. But only believe * non-default estimates, else stick with our 1.0. */ selec = scalarineqsel(root, leop, isgt, &leftvar, rightmax, op_righttype); if (selec != DEFAULT_INEQ_SEL) *leftend = selec; /* And similarly for the right variable. */ selec = scalarineqsel(root, revleop, isgt, &rightvar, leftmax, op_lefttype); if (selec != DEFAULT_INEQ_SEL) *rightend = selec; /* * Only one of the two "end" fractions can really be less than 1.0; * believe the smaller estimate and reset the other one to exactly 1.0. If * we get exactly equal estimates (as can easily happen with self-joins), * believe neither. */ if (*leftend > *rightend) *leftend = 1.0; else if (*leftend < *rightend) *rightend = 1.0; else *leftend = *rightend = 1.0; /* * Also, the fraction of the left variable that will be scanned before the * first join pair is found is the fraction that's < the right-side * minimum value. But only believe non-default estimates, else stick with * our own default. */ selec = scalarineqsel(root, ltop, isgt, &leftvar, rightmin, op_righttype); if (selec != DEFAULT_INEQ_SEL) *leftstart = selec; /* And similarly for the right variable. */ selec = scalarineqsel(root, revltop, isgt, &rightvar, leftmin, op_lefttype); if (selec != DEFAULT_INEQ_SEL) *rightstart = selec; /* * Only one of the two "start" fractions can really be more than zero; * believe the larger estimate and reset the other one to exactly 0.0. If * we get exactly equal estimates (as can easily happen with self-joins), * believe neither. */ if (*leftstart < *rightstart) *leftstart = 0.0; else if (*leftstart > *rightstart) *rightstart = 0.0; else *leftstart = *rightstart = 0.0; /* * If the sort order is nulls-first, we're going to have to skip over any * nulls too. These would not have been counted by scalarineqsel, and we * can safely add in this fraction regardless of whether we believe * scalarineqsel's results or not. But be sure to clamp the sum to 1.0! */ if (nulls_first) { Form_pg_statistic stats; if (HeapTupleIsValid(leftvar.statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple); *leftstart += stats->stanullfrac; CLAMP_PROBABILITY(*leftstart); *leftend += stats->stanullfrac; CLAMP_PROBABILITY(*leftend); } if (HeapTupleIsValid(rightvar.statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple); *rightstart += stats->stanullfrac; CLAMP_PROBABILITY(*rightstart); *rightend += stats->stanullfrac; CLAMP_PROBABILITY(*rightend); } } /* Disbelieve start >= end, just in case that can happen */ if (*leftstart >= *leftend) { *leftstart = 0.0; *leftend = 1.0; } if (*rightstart >= *rightend) { *rightstart = 0.0; *rightend = 1.0; } fail: ReleaseVariableStats(leftvar); ReleaseVariableStats(rightvar); }
Datum neqjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2629 of file selfuncs.c.
References DatumGetFloat8, DirectFunctionCall5, eqjoinsel(), get_negator(), Int16GetDatum, ObjectIdGetDatum, PG_GETARG_INT16, PG_GETARG_POINTER, PG_RETURN_FLOAT8, and PointerGetDatum.
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); JoinType jointype = (JoinType) PG_GETARG_INT16(3); SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); Oid eqop; float8 result; /* * We want 1 - eqjoinsel() where the equality operator is the one * associated with this != operator, that is, its negator. */ eqop = get_negator(operator); if (eqop) { result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel, PointerGetDatum(root), ObjectIdGetDatum(eqop), PointerGetDatum(args), Int16GetDatum(jointype), PointerGetDatum(sjinfo))); } else { /* Use default selectivity (should we raise an error instead?) */ result = DEFAULT_EQ_SEL; } result = 1.0 - result; PG_RETURN_FLOAT8(result); }
Datum neqsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 483 of file selfuncs.c.
References DatumGetFloat8, DirectFunctionCall4, eqsel(), get_negator(), Int32GetDatum, ObjectIdGetDatum, PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_FLOAT8, and PointerGetDatum.
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); Oid eqop; float8 result; /* * We want 1 - eqsel() where the equality operator is the one associated * with this != operator, that is, its negator. */ eqop = get_negator(operator); if (eqop) { result = DatumGetFloat8(DirectFunctionCall4(eqsel, PointerGetDatum(root), ObjectIdGetDatum(eqop), PointerGetDatum(args), Int32GetDatum(varRelid))); } else { /* Use default selectivity (should we raise an error instead?) */ result = DEFAULT_EQ_SEL; } result = 1.0 - result; PG_RETURN_FLOAT8(result); }
Datum nlikejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2748 of file selfuncs.c.
References Pattern_Type_Like, patternjoinsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true)); }
Datum nlikesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1428 of file selfuncs.c.
References Pattern_Type_Like, patternsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true)); }
Selectivity nulltestsel | ( | PlannerInfo * | root, | |
NullTestType | nulltesttype, | |||
Node * | arg, | |||
int | varRelid, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo | |||
) |
Definition at line 1613 of file selfuncs.c.
References CLAMP_PROBABILITY, elog, ERROR, examine_variable(), GETSTRUCT, HeapTupleIsValid, IS_NOT_NULL, IS_NULL, ReleaseVariableStats, and VariableStatData::statsTuple.
Referenced by clause_selectivity(), and clauselist_selectivity().
{ VariableStatData vardata; double selec; examine_variable(root, arg, varRelid, &vardata); if (HeapTupleIsValid(vardata.statsTuple)) { Form_pg_statistic stats; double freq_null; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); freq_null = stats->stanullfrac; switch (nulltesttype) { case IS_NULL: /* * Use freq_null directly. */ selec = freq_null; break; case IS_NOT_NULL: /* * Select not unknown (not null) values. Calculate from * freq_null. */ selec = 1.0 - freq_null; break; default: elog(ERROR, "unrecognized nulltesttype: %d", (int) nulltesttype); return (Selectivity) 0; /* keep compiler quiet */ } } else { /* * No ANALYZE stats available, so make a guess */ switch (nulltesttype) { case IS_NULL: selec = DEFAULT_UNK_SEL; break; case IS_NOT_NULL: selec = DEFAULT_NOT_UNK_SEL; break; default: elog(ERROR, "unrecognized nulltesttype: %d", (int) nulltesttype); return (Selectivity) 0; /* keep compiler quiet */ } } ReleaseVariableStats(vardata); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); return (Selectivity) selec; }
Pattern_Prefix_Status pattern_fixed_prefix | ( | Const * | patt, | |
Pattern_Type | ptype, | |||
Oid | collation, | |||
Const ** | prefix, | |||
Selectivity * | rest_selec | |||
) |
Definition at line 5328 of file selfuncs.c.
References elog, ERROR, like_fixed_prefix(), Pattern_Type_Like, Pattern_Type_Like_IC, Pattern_Type_Regex, Pattern_Type_Regex_IC, and regex_fixed_prefix().
Referenced by expand_indexqual_opclause(), match_special_index_operator(), and patternsel().
{ Pattern_Prefix_Status result; switch (ptype) { case Pattern_Type_Like: result = like_fixed_prefix(patt, false, collation, prefix, rest_selec); break; case Pattern_Type_Like_IC: result = like_fixed_prefix(patt, true, collation, prefix, rest_selec); break; case Pattern_Type_Regex: result = regex_fixed_prefix(patt, false, collation, prefix, rest_selec); break; case Pattern_Type_Regex_IC: result = regex_fixed_prefix(patt, true, collation, prefix, rest_selec); break; default: elog(ERROR, "unrecognized ptype: %d", (int) ptype); result = Pattern_Prefix_None; /* keep compiler quiet */ break; } return result; }
Datum regexeqjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2694 of file selfuncs.c.
References Pattern_Type_Regex, patternjoinsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false)); }
Datum regexeqsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1374 of file selfuncs.c.
References Pattern_Type_Regex, patternsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false)); }
Datum regexnejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2730 of file selfuncs.c.
References Pattern_Type_Regex, patternjoinsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true)); }
Datum regexnesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1410 of file selfuncs.c.
References Pattern_Type_Regex, patternsel(), and PG_RETURN_FLOAT8.
{ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true)); }
Selectivity rowcomparesel | ( | PlannerInfo * | root, | |
RowCompareExpr * | clause, | |||
int | varRelid, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo | |||
) |
Definition at line 2066 of file selfuncs.c.
References RowCompareExpr::inputcollids, join_selectivity(), RowCompareExpr::largs, linitial, linitial_oid, list_make2, NULL, NumRelids(), RowCompareExpr::opnos, RowCompareExpr::rargs, restriction_selectivity(), and s1.
Referenced by clause_selectivity().
{ Selectivity s1; Oid opno = linitial_oid(clause->opnos); Oid inputcollid = linitial_oid(clause->inputcollids); List *opargs; bool is_join_clause; /* Build equivalent arg list for single operator */ opargs = list_make2(linitial(clause->largs), linitial(clause->rargs)); /* * Decide if it's a join clause. This should match clausesel.c's * treat_as_join_clause(), except that we intentionally consider only the * leading columns and not the rest of the clause. */ if (varRelid != 0) { /* * Caller is forcing restriction mode (eg, because we are examining an * inner indexscan qual). */ is_join_clause = false; } else if (sjinfo == NULL) { /* * It must be a restriction clause, since it's being evaluated at a * scan node. */ is_join_clause = false; } else { /* * Otherwise, it's a join if there's more than one relation used. */ is_join_clause = (NumRelids((Node *) opargs) > 1); } if (is_join_clause) { /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, opargs, inputcollid, jointype, sjinfo); } else { /* Estimate selectivity for a restriction clause. */ s1 = restriction_selectivity(root, opno, opargs, inputcollid, varRelid); } return s1; }
Selectivity scalararraysel | ( | PlannerInfo * | root, | |
ScalarArrayOpExpr * | clause, | |||
bool | is_join_clause, | |||
int | varRelid, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo | |||
) |
Definition at line 1713 of file selfuncs.c.
References ScalarArrayOpExpr::args, ARR_ELEMTYPE, Assert, CLAMP_PROBABILITY, CaseTestExpr::collation, DatumGetArrayTypeP, DatumGetFloat8, deconstruct_array(), ArrayExpr::element_typeid, ArrayExpr::elements, TypeCacheEntry::eq_opr, exprCollation(), exprType(), fmgr_info(), FunctionCall4Coll(), FunctionCall5Coll(), get_base_element_type(), get_negator(), get_oprjoin(), get_oprrest(), get_typlenbyval(), get_typlenbyvalalign(), i, ScalarArrayOpExpr::inputcollid, Int16GetDatum, Int32GetDatum, IsA, lfirst, linitial, list_length(), list_make2, lookup_type_cache(), lsecond, makeConst(), makeNode, ObjectIdGetDatum, OidIsValid, ScalarArrayOpExpr::opno, PointerGetDatum, s1, s2, scalararraysel_containment(), strip_array_coercion(), TYPECACHE_EQ_OPR, CaseTestExpr::typeId, CaseTestExpr::typeMod, and ScalarArrayOpExpr::useOr.
Referenced by clause_selectivity().
{ Oid operator = clause->opno; bool useOr = clause->useOr; bool isEquality = false; bool isInequality = false; Node *leftop; Node *rightop; Oid nominal_element_type; Oid nominal_element_collation; TypeCacheEntry *typentry; RegProcedure oprsel; FmgrInfo oprselproc; Selectivity s1; Selectivity s1disjoint; /* First, deconstruct the expression */ Assert(list_length(clause->args) == 2); leftop = (Node *) linitial(clause->args); rightop = (Node *) lsecond(clause->args); /* get nominal (after relabeling) element type of rightop */ nominal_element_type = get_base_element_type(exprType(rightop)); if (!OidIsValid(nominal_element_type)) return (Selectivity) 0.5; /* probably shouldn't happen */ /* get nominal collation, too, for generating constants */ nominal_element_collation = exprCollation(rightop); /* look through any binary-compatible relabeling of rightop */ rightop = strip_array_coercion(rightop); /* * Detect whether the operator is the default equality or inequality * operator of the array element type. */ typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR); if (OidIsValid(typentry->eq_opr)) { if (operator == typentry->eq_opr) isEquality = true; else if (get_negator(operator) == typentry->eq_opr) isInequality = true; } /* * If it is equality or inequality, we might be able to estimate this as a * form of array containment; for instance "const = ANY(column)" can be * treated as "ARRAY[const] <@ column". scalararraysel_containment tries * that, and returns the selectivity estimate if successful, or -1 if not. */ if ((isEquality || isInequality) && !is_join_clause) { s1 = scalararraysel_containment(root, leftop, rightop, nominal_element_type, isEquality, useOr, varRelid); if (s1 >= 0.0) return s1; } /* * Look up the underlying operator's selectivity estimator. Punt if it * hasn't got one. */ if (is_join_clause) oprsel = get_oprjoin(operator); else oprsel = get_oprrest(operator); if (!oprsel) return (Selectivity) 0.5; fmgr_info(oprsel, &oprselproc); /* * In the array-containment check above, we must only believe that an * operator is equality or inequality if it is the default btree equality * operator (or its negator) for the element type, since those are the * operators that array containment will use. But in what follows, we can * be a little laxer, and also believe that any operators using eqsel() or * neqsel() as selectivity estimator act like equality or inequality. */ if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL) isEquality = true; else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL) isInequality = true; /* * We consider three cases: * * 1. rightop is an Array constant: deconstruct the array, apply the * operator's selectivity function for each array element, and merge the * results in the same way that clausesel.c does for AND/OR combinations. * * 2. rightop is an ARRAY[] construct: apply the operator's selectivity * function for each element of the ARRAY[] construct, and merge. * * 3. otherwise, make a guess ... */ if (rightop && IsA(rightop, Const)) { Datum arraydatum = ((Const *) rightop)->constvalue; bool arrayisnull = ((Const *) rightop)->constisnull; ArrayType *arrayval; int16 elmlen; bool elmbyval; char elmalign; int num_elems; Datum *elem_values; bool *elem_nulls; int i; if (arrayisnull) /* qual can't succeed if null array */ return (Selectivity) 0.0; arrayval = DatumGetArrayTypeP(arraydatum); get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), &elmlen, &elmbyval, &elmalign); deconstruct_array(arrayval, ARR_ELEMTYPE(arrayval), elmlen, elmbyval, elmalign, &elem_values, &elem_nulls, &num_elems); /* * For generic operators, we assume the probability of success is * independent for each array element. But for "= ANY" or "<> ALL", * if the array elements are distinct (which'd typically be the case) * then the probabilities are disjoint, and we should just sum them. * * If we were being really tense we would try to confirm that the * elements are all distinct, but that would be expensive and it * doesn't seem to be worth the cycles; it would amount to penalizing * well-written queries in favor of poorly-written ones. However, we * do protect ourselves a little bit by checking whether the * disjointness assumption leads to an impossible (out of range) * probability; if so, we fall back to the normal calculation. */ s1 = s1disjoint = (useOr ? 0.0 : 1.0); for (i = 0; i < num_elems; i++) { List *args; Selectivity s2; args = list_make2(leftop, makeConst(nominal_element_type, -1, nominal_element_collation, elmlen, elem_values[i], elem_nulls[i], elmbyval)); if (is_join_clause) s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, clause->inputcollid, PointerGetDatum(root), ObjectIdGetDatum(operator), PointerGetDatum(args), Int16GetDatum(jointype), PointerGetDatum(sjinfo))); else s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, clause->inputcollid, PointerGetDatum(root), ObjectIdGetDatum(operator), PointerGetDatum(args), Int32GetDatum(varRelid))); if (useOr) { s1 = s1 + s2 - s1 * s2; if (isEquality) s1disjoint += s2; } else { s1 = s1 * s2; if (isInequality) s1disjoint += s2 - 1.0; } } /* accept disjoint-probability estimate if in range */ if ((useOr ? isEquality : isInequality) && s1disjoint >= 0.0 && s1disjoint <= 1.0) s1 = s1disjoint; } else if (rightop && IsA(rightop, ArrayExpr) && !((ArrayExpr *) rightop)->multidims) { ArrayExpr *arrayexpr = (ArrayExpr *) rightop; int16 elmlen; bool elmbyval; ListCell *l; get_typlenbyval(arrayexpr->element_typeid, &elmlen, &elmbyval); /* * We use the assumption of disjoint probabilities here too, although * the odds of equal array elements are rather higher if the elements * are not all constants (which they won't be, else constant folding * would have reduced the ArrayExpr to a Const). In this path it's * critical to have the sanity check on the s1disjoint estimate. */ s1 = s1disjoint = (useOr ? 0.0 : 1.0); foreach(l, arrayexpr->elements) { Node *elem = (Node *) lfirst(l); List *args; Selectivity s2; /* * Theoretically, if elem isn't of nominal_element_type we should * insert a RelabelType, but it seems unlikely that any operator * estimation function would really care ... */ args = list_make2(leftop, elem); if (is_join_clause) s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, clause->inputcollid, PointerGetDatum(root), ObjectIdGetDatum(operator), PointerGetDatum(args), Int16GetDatum(jointype), PointerGetDatum(sjinfo))); else s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, clause->inputcollid, PointerGetDatum(root), ObjectIdGetDatum(operator), PointerGetDatum(args), Int32GetDatum(varRelid))); if (useOr) { s1 = s1 + s2 - s1 * s2; if (isEquality) s1disjoint += s2; } else { s1 = s1 * s2; if (isInequality) s1disjoint += s2 - 1.0; } } /* accept disjoint-probability estimate if in range */ if ((useOr ? isEquality : isInequality) && s1disjoint >= 0.0 && s1disjoint <= 1.0) s1 = s1disjoint; } else { CaseTestExpr *dummyexpr; List *args; Selectivity s2; int i; /* * We need a dummy rightop to pass to the operator selectivity * routine. It can be pretty much anything that doesn't look like a * constant; CaseTestExpr is a convenient choice. */ dummyexpr = makeNode(CaseTestExpr); dummyexpr->typeId = nominal_element_type; dummyexpr->typeMod = -1; dummyexpr->collation = clause->inputcollid; args = list_make2(leftop, dummyexpr); if (is_join_clause) s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, clause->inputcollid, PointerGetDatum(root), ObjectIdGetDatum(operator), PointerGetDatum(args), Int16GetDatum(jointype), PointerGetDatum(sjinfo))); else s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, clause->inputcollid, PointerGetDatum(root), ObjectIdGetDatum(operator), PointerGetDatum(args), Int32GetDatum(varRelid))); s1 = useOr ? 0.0 : 1.0; /* * Arbitrarily assume 10 elements in the eventual array value (see * also estimate_array_length). We don't risk an assumption of * disjoint probabilities here. */ for (i = 0; i < 10; i++) { if (useOr) s1 = s1 + s2 - s1 * s2; else s1 = s1 * s2; } } /* result should be in range, but make sure... */ CLAMP_PROBABILITY(s1); return s1; }
Selectivity scalararraysel_containment | ( | PlannerInfo * | root, | |
Node * | leftop, | |||
Node * | rightop, | |||
Oid | elemtype, | |||
bool | isEquality, | |||
bool | useOr, | |||
int | varRelid | |||
) |
Definition at line 80 of file array_selfuncs.c.
References VariableStatData::atttypmod, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, estimate_expression_value(), examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), NULL, OID_ARRAY_CONTAINED_OP, OID_ARRAY_CONTAINS_OP, OidIsValid, VariableStatData::rel, ReleaseVariableStats, STATISTIC_KIND_DECHIST, STATISTIC_KIND_MCELEM, VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and values.
Referenced by scalararraysel().
{ Selectivity selec; VariableStatData vardata; Datum constval; TypeCacheEntry *typentry; FmgrInfo *cmpfunc; /* * rightop must be a variable, else punt. */ examine_variable(root, rightop, varRelid, &vardata); if (!vardata.rel) { ReleaseVariableStats(vardata); return -1.0; } /* * Aggressively reduce leftop to a constant, if possible. */ leftop = estimate_expression_value(root, leftop); if (!IsA(leftop, Const)) { ReleaseVariableStats(vardata); return -1.0; } if (((Const *) leftop)->constisnull) { /* qual can't succeed if null on left */ ReleaseVariableStats(vardata); return (Selectivity) 0.0; } constval = ((Const *) leftop)->constvalue; /* Get element type's default comparison function */ typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO); if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) { ReleaseVariableStats(vardata); return -1.0; } cmpfunc = &typentry->cmp_proc_finfo; /* * If the operator is <>, swap ANY/ALL, then invert the result later. */ if (!isEquality) useOr = !useOr; /* Get array element stats for var, if available */ if (HeapTupleIsValid(vardata.statsTuple)) { Form_pg_statistic stats; Datum *values; int nvalues; float4 *numbers; int nnumbers; float4 *hist; int nhist; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); /* MCELEM will be an array of same type as element */ if (get_attstatsslot(vardata.statsTuple, elemtype, vardata.atttypmod, STATISTIC_KIND_MCELEM, InvalidOid, NULL, &values, &nvalues, &numbers, &nnumbers)) { /* For ALL case, also get histogram of distinct-element counts */ if (useOr || !get_attstatsslot(vardata.statsTuple, elemtype, vardata.atttypmod, STATISTIC_KIND_DECHIST, InvalidOid, NULL, NULL, NULL, &hist, &nhist)) { hist = NULL; nhist = 0; } /* * For = ANY, estimate as var @> ARRAY[const]. * * For = ALL, estimate as var <@ ARRAY[const]. */ if (useOr) selec = mcelem_array_contain_overlap_selec(values, nvalues, numbers, nnumbers, &constval, 1, OID_ARRAY_CONTAINS_OP, cmpfunc); else selec = mcelem_array_contained_selec(values, nvalues, numbers, nnumbers, &constval, 1, hist, nhist, OID_ARRAY_CONTAINED_OP, cmpfunc); if (hist) free_attstatsslot(elemtype, NULL, 0, hist, nhist); free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers); } else { /* No most-common-elements info, so do without */ if (useOr) selec = mcelem_array_contain_overlap_selec(NULL, 0, NULL, 0, &constval, 1, OID_ARRAY_CONTAINS_OP, cmpfunc); else selec = mcelem_array_contained_selec(NULL, 0, NULL, 0, &constval, 1, NULL, 0, OID_ARRAY_CONTAINED_OP, cmpfunc); } /* * MCE stats count only non-null rows, so adjust for null rows. */ selec *= (1.0 - stats->stanullfrac); } else { /* No stats at all, so do without */ if (useOr) selec = mcelem_array_contain_overlap_selec(NULL, 0, NULL, 0, &constval, 1, OID_ARRAY_CONTAINS_OP, cmpfunc); else selec = mcelem_array_contained_selec(NULL, 0, NULL, 0, &constval, 1, NULL, 0, OID_ARRAY_CONTAINED_OP, cmpfunc); /* we assume no nulls here, so no stanullfrac correction */ } ReleaseVariableStats(vardata); /* * If the operator is <>, invert the results. */ if (!isEquality) selec = 1.0 - selec; CLAMP_PROBABILITY(selec); return selec; }
Datum scalargtjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2675 of file selfuncs.c.
References DEFAULT_INEQ_SEL, and PG_RETURN_FLOAT8.
Datum scalargtsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1034 of file selfuncs.c.
References DEFAULT_INEQ_SEL, get_restriction_variable(), IsA, PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, and scalarineqsel().
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Datum constval; Oid consttype; bool isgt; double selec; /* * If expression is not variable op something or something op variable, * then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); } /* * If the constant is NULL, assume operator is strict and return zero, ie, * operator will never return TRUE. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } constval = ((Const *) other)->constvalue; consttype = ((Const *) other)->consttype; /* * Force the var to be on the left to simplify logic in scalarineqsel. */ if (varonleft) { /* we have var > other */ isgt = true; } else { /* we have other > var, commute to make var < other */ operator = get_commutator(operator); if (!operator) { /* Use default selectivity (should we raise an error instead?) */ ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); } isgt = false; } selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype); ReleaseVariableStats(vardata); PG_RETURN_FLOAT8((float8) selec); }
Datum scalarltjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2666 of file selfuncs.c.
References DEFAULT_INEQ_SEL, and PG_RETURN_FLOAT8.
Datum scalarltsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 959 of file selfuncs.c.
References DEFAULT_INEQ_SEL, get_restriction_variable(), IsA, PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, and scalarineqsel().
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Datum constval; Oid consttype; bool isgt; double selec; /* * If expression is not variable op something or something op variable, * then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); } /* * If the constant is NULL, assume operator is strict and return zero, ie, * operator will never return TRUE. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } constval = ((Const *) other)->constvalue; consttype = ((Const *) other)->consttype; /* * Force the var to be on the left to simplify logic in scalarineqsel. */ if (varonleft) { /* we have var < other */ isgt = false; } else { /* we have other < var, commute to make var > other */ operator = get_commutator(operator); if (!operator) { /* Use default selectivity (should we raise an error instead?) */ ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); } isgt = true; } selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype); ReleaseVariableStats(vardata); PG_RETURN_FLOAT8((float8) selec); }
Datum spgcostestimate | ( | PG_FUNCTION_ARGS | ) |
Definition at line 6648 of file selfuncs.c.
References cpu_operator_cost, genericcostestimate(), GenericCosts::indexCorrelation, IndexPath::indexinfo, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, MemSet, GenericCosts::num_sa_scans, IndexOptInfo::pages, PG_GETARG_FLOAT8, PG_GETARG_POINTER, PG_RETURN_VOID, IndexOptInfo::tree_height, and IndexOptInfo::tuples.
{ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1); double loop_count = PG_GETARG_FLOAT8(2); Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3); Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4); Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5); double *indexCorrelation = (double *) PG_GETARG_POINTER(6); IndexOptInfo *index = path->indexinfo; GenericCosts costs; Cost descentCost; MemSet(&costs, 0, sizeof(costs)); genericcostestimate(root, path, loop_count, &costs); /* * We model index descent costs similarly to those for btree, but to do * that we first need an idea of the tree height. We somewhat arbitrarily * assume that the fanout is 100, meaning the tree height is at most * log100(index->pages). * * Although this computation isn't really expensive enough to require * caching, we might as well use index->tree_height to cache it. */ if (index->tree_height < 0) /* unknown? */ { if (index->pages > 1) /* avoid computing log(0) */ index->tree_height = (int) (log(index->pages) / log(100.0)); else index->tree_height = 0; } /* * Add a CPU-cost component to represent the costs of initial descent. * We just use log(N) here not log2(N) since the branching factor isn't * necessarily two anyway. As for btree, charge once per SA scan. */ if (index->tuples > 1) /* avoid computing log(0) */ { descentCost = ceil(log(index->tuples)) * cpu_operator_cost; costs.indexStartupCost += descentCost; costs.indexTotalCost += costs.num_sa_scans * descentCost; } /* * Likewise add a per-page charge, calculated the same as for btrees. */ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost; costs.indexStartupCost += descentCost; costs.indexTotalCost += costs.num_sa_scans * descentCost; *indexStartupCost = costs.indexStartupCost; *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; PG_RETURN_VOID(); }
PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook |
Definition at line 145 of file selfuncs.c.
Referenced by btcostestimate(), and examine_variable().
PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook |
Definition at line 144 of file selfuncs.c.
Referenced by btcostestimate(), and examine_simple_variable().