#include "fmgr.h"#include "access/htup.h"#include "nodes/relation.h"

Go to the source code of this file.
| #define CLAMP_PROBABILITY | ( | p | ) |
do { \ if (p < 0.0) \ p = 0.0; \ else if (p > 1.0) \ p = 1.0; \ } while (0)
Definition at line 57 of file selfuncs.h.
Referenced by arraycontsel(), booltestsel(), calc_rangesel(), eqjoinsel(), eqjoinsel_inner(), eqjoinsel_semi(), ineq_histogram_selectivity(), ltreeparentsel(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), mergejoinscansel(), nulltestsel(), patternsel(), rangesel(), regex_selectivity(), scalararraysel(), scalararraysel_containment(), scalarineqsel(), tsmatchsel(), tsquery_opr_selec(), var_eq_const(), and var_eq_non_const().
| #define DEFAULT_EQ_SEL 0.005 |
Definition at line 34 of file selfuncs.h.
Referenced by eqsel().
| #define DEFAULT_INEQ_SEL 0.3333333333333333 |
Definition at line 37 of file selfuncs.h.
Referenced by clauselist_selectivity(), mergejoinscansel(), scalargtjoinsel(), scalargtsel(), scalarltjoinsel(), and scalarltsel().
| #define DEFAULT_MATCH_SEL 0.005 |
Definition at line 43 of file selfuncs.h.
Referenced by patternjoinsel().
| #define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL) |
Definition at line 50 of file selfuncs.h.
| #define DEFAULT_NUM_DISTINCT 200 |
Definition at line 46 of file selfuncs.h.
Referenced by get_variable_numdistinct().
| #define DEFAULT_RANGE_INEQ_SEL 0.005 |
Definition at line 40 of file selfuncs.h.
| #define DEFAULT_UNK_SEL 0.005 |
Definition at line 49 of file selfuncs.h.
| #define ReleaseVariableStats | ( | vardata | ) |
do { \ if (HeapTupleIsValid((vardata).statsTuple)) \ (* (vardata).freefunc) ((vardata).statsTuple); \ } while(0)
Definition at line 80 of file selfuncs.h.
Referenced by arraycontsel(), booltestsel(), btcostestimate(), eqjoinsel(), eqsel(), estimate_hash_bucketsize(), estimate_num_groups(), get_restriction_variable(), ltreeparentsel(), mergejoinscansel(), nulltestsel(), patternsel(), rangesel(), scalararraysel_containment(), scalargtsel(), scalarltsel(), and tsmatchsel().
| typedef bool(* get_index_stats_hook_type)(PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata) |
Definition at line 104 of file selfuncs.h.
| typedef bool(* get_relation_stats_hook_type)(PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata) |
Definition at line 99 of file selfuncs.h.
| typedef struct VariableStatData VariableStatData |
Definition at line 93 of file selfuncs.h.
{
Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
} Pattern_Prefix_Status;
| enum Pattern_Type |
Definition at line 87 of file selfuncs.h.
{
Pattern_Type_Like, Pattern_Type_Like_IC,
Pattern_Type_Regex, Pattern_Type_Regex_IC
} Pattern_Type;
| Datum arraycontjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 329 of file array_selfuncs.c.
References DEFAULT_SEL, and PG_RETURN_FLOAT8.
{
/* For the moment this is just a stub */
Oid operator = PG_GETARG_OID(1);
PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
}
| Datum arraycontsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 249 of file array_selfuncs.c.
References calc_arraycontsel(), CLAMP_PROBABILITY, DEFAULT_SEL, get_base_element_type(), get_restriction_variable(), InvalidOid, IsA, OID_ARRAY_CONTAINED_OP, OID_ARRAY_CONTAINS_OP, PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, and VariableStatData::vartype.
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
VariableStatData vardata;
Node *other;
bool varonleft;
Selectivity selec;
Oid element_typeid;
/*
* If expression is not (variable op something) or (something op
* variable), then punt and return a default estimate.
*/
if (!get_restriction_variable(root, args, varRelid,
&vardata, &other, &varonleft))
PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
/*
* Can't do anything useful if the something is not a constant, either.
*/
if (!IsA(other, Const))
{
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
}
/*
* The "&&", "@>" and "<@" operators are strict, so we can cope with a
* NULL constant right away.
*/
if (((Const *) other)->constisnull)
{
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(0.0);
}
/*
* If var is on the right, commute the operator, so that we can assume the
* var is on the left in what follows.
*/
if (!varonleft)
{
if (operator == OID_ARRAY_CONTAINS_OP)
operator = OID_ARRAY_CONTAINED_OP;
else if (operator == OID_ARRAY_CONTAINED_OP)
operator = OID_ARRAY_CONTAINS_OP;
}
/*
* OK, there's a Var and a Const we're dealing with here. We need the
* Const to be a array with same element type as column, else we can't do
* anything useful. (Such cases will likely fail at runtime, but here
* we'd rather just return a default estimate.)
*/
element_typeid = get_base_element_type(((Const *) other)->consttype);
if (element_typeid != InvalidOid &&
element_typeid == get_base_element_type(vardata.vartype))
{
selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue,
element_typeid, operator);
}
else
{
selec = DEFAULT_SEL(operator);
}
ReleaseVariableStats(vardata);
CLAMP_PROBABILITY(selec);
PG_RETURN_FLOAT8((float8) selec);
}
| Selectivity booltestsel | ( | PlannerInfo * | root, | |
| BoolTestType | booltesttype, | |||
| Node * | arg, | |||
| int | varRelid, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo | |||
| ) |
Definition at line 1446 of file selfuncs.c.
References VariableStatData::atttype, VariableStatData::atttypmod, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool, elog, ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, NULL, ReleaseVariableStats, STATISTIC_KIND_MCV, VariableStatData::statsTuple, and values.
Referenced by clause_selectivity().
{
VariableStatData vardata;
double selec;
examine_variable(root, arg, varRelid, &vardata);
if (HeapTupleIsValid(vardata.statsTuple))
{
Form_pg_statistic stats;
double freq_null;
Datum *values;
int nvalues;
float4 *numbers;
int nnumbers;
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
freq_null = stats->stanullfrac;
if (get_attstatsslot(vardata.statsTuple,
vardata.atttype, vardata.atttypmod,
STATISTIC_KIND_MCV, InvalidOid,
NULL,
&values, &nvalues,
&numbers, &nnumbers)
&& nnumbers > 0)
{
double freq_true;
double freq_false;
/*
* Get first MCV frequency and derive frequency for true.
*/
if (DatumGetBool(values[0]))
freq_true = numbers[0];
else
freq_true = 1.0 - numbers[0] - freq_null;
/*
* Next derive frequency for false. Then use these as appropriate
* to derive frequency for each case.
*/
freq_false = 1.0 - freq_true - freq_null;
switch (booltesttype)
{
case IS_UNKNOWN:
/* select only NULL values */
selec = freq_null;
break;
case IS_NOT_UNKNOWN:
/* select non-NULL values */
selec = 1.0 - freq_null;
break;
case IS_TRUE:
/* select only TRUE values */
selec = freq_true;
break;
case IS_NOT_TRUE:
/* select non-TRUE values */
selec = 1.0 - freq_true;
break;
case IS_FALSE:
/* select only FALSE values */
selec = freq_false;
break;
case IS_NOT_FALSE:
/* select non-FALSE values */
selec = 1.0 - freq_false;
break;
default:
elog(ERROR, "unrecognized booltesttype: %d",
(int) booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
free_attstatsslot(vardata.atttype, values, nvalues,
numbers, nnumbers);
}
else
{
/*
* No most-common-value info available. Still have null fraction
* information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
* for null fraction and assume an even split for boolean tests.
*/
switch (booltesttype)
{
case IS_UNKNOWN:
/*
* Use freq_null directly.
*/
selec = freq_null;
break;
case IS_NOT_UNKNOWN:
/*
* Select not unknown (not null) values. Calculate from
* freq_null.
*/
selec = 1.0 - freq_null;
break;
case IS_TRUE:
case IS_NOT_TRUE:
case IS_FALSE:
case IS_NOT_FALSE:
selec = (1.0 - freq_null) / 2.0;
break;
default:
elog(ERROR, "unrecognized booltesttype: %d",
(int) booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
}
}
else
{
/*
* If we can't get variable statistics for the argument, perhaps
* clause_selectivity can do something with it. We ignore the
* possibility of a NULL value when using clause_selectivity, and just
* assume the value is either TRUE or FALSE.
*/
switch (booltesttype)
{
case IS_UNKNOWN:
selec = DEFAULT_UNK_SEL;
break;
case IS_NOT_UNKNOWN:
selec = DEFAULT_NOT_UNK_SEL;
break;
case IS_TRUE:
case IS_NOT_FALSE:
selec = (double) clause_selectivity(root, arg,
varRelid,
jointype, sjinfo);
break;
case IS_FALSE:
case IS_NOT_TRUE:
selec = 1.0 - (double) clause_selectivity(root, arg,
varRelid,
jointype, sjinfo);
break;
default:
elog(ERROR, "unrecognized booltesttype: %d",
(int) booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
}
ReleaseVariableStats(vardata);
/* result should be in range, but make sure... */
CLAMP_PROBABILITY(selec);
return (Selectivity) selec;
}
| Datum btcostestimate | ( | PG_FUNCTION_ARGS | ) |
Definition at line 6194 of file selfuncs.c.
References add_predicate_to_quals(), NullTest::arg, ScalarArrayOpExpr::args, Assert, BoolGetDatum, BTEqualStrategyNumber, BTLessStrategyNumber, RestrictInfo::clause, clauselist_selectivity(), cpu_operator_cost, elog, ERROR, estimate_array_length(), forboth, free_attstatsslot(), VariableStatData::freefunc, genericcostestimate(), get_attstatsslot(), get_commutator(), get_index_stats_hook, get_leftop(), get_op_opfamily_strategy(), get_opfamily_member(), get_relation_stats_hook, get_rightop(), HeapTupleIsValid, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexOptInfo::indexkeys, IndexOptInfo::indexoid, IndexPath::indexqualcols, IndexPath::indexquals, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, RangeTblEntry::inh, Int16GetDatum, InvalidOid, IS_NULL, IsA, JOIN_INNER, lappend(), RowCompareExpr::largs, lfirst, lfirst_int, linitial, linitial_oid, lsecond, match_index_to_operand(), MemSet, IndexOptInfo::ncolumns, nodeTag, NULL, NullTest::nulltesttype, GenericCosts::num_sa_scans, GenericCosts::numIndexTuples, ObjectIdGetDatum, OidIsValid, IndexOptInfo::opcintype, IndexOptInfo::opfamily, ScalarArrayOpExpr::opno, RowCompareExpr::opnos, PG_GETARG_FLOAT8, PG_GETARG_POINTER, PG_RETURN_VOID, planner_rt_fetch, RowCompareExpr::rargs, IndexOptInfo::rel, ReleaseVariableStats, RangeTblEntry::relid, RelOptInfo::relid, IndexOptInfo::reverse_sort, rint(), RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3, STATISTIC_KIND_CORRELATION, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::tree_height, IndexOptInfo::tuples, RelOptInfo::tuples, and IndexOptInfo::unique.
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
GenericCosts costs;
Oid relid;
AttrNumber colnum;
VariableStatData vardata;
double numIndexTuples;
Cost descentCost;
List *indexBoundQuals;
int indexcol;
bool eqQualHere;
bool found_saop;
bool found_is_null_op;
double num_sa_scans;
ListCell *lcc,
*lci;
/*
* For a btree scan, only leading '=' quals plus inequality quals for the
* immediately next attribute contribute to index selectivity (these are
* the "boundary quals" that determine the starting and stopping points of
* the index scan). Additional quals can suppress visits to the heap, so
* it's OK to count them in indexSelectivity, but they should not count
* for estimating numIndexTuples. So we must examine the given indexquals
* to find out which ones count as boundary quals. We rely on the
* knowledge that they are given in index column order.
*
* For a RowCompareExpr, we consider only the first column, just as
* rowcomparesel() does.
*
* If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
* index scans not one, but the ScalarArrayOpExpr's operator can be
* considered to act the same as it normally does.
*/
indexBoundQuals = NIL;
indexcol = 0;
eqQualHere = false;
found_saop = false;
found_is_null_op = false;
num_sa_scans = 1;
forboth(lcc, path->indexquals, lci, path->indexqualcols)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcc);
Expr *clause;
Node *leftop,
*rightop PG_USED_FOR_ASSERTS_ONLY;
Oid clause_op;
int op_strategy;
bool is_null_op = false;
if (indexcol != lfirst_int(lci))
{
/* Beginning of a new column's quals */
if (!eqQualHere)
break; /* done if no '=' qual for indexcol */
eqQualHere = false;
indexcol++;
if (indexcol != lfirst_int(lci))
break; /* no quals at all for indexcol */
}
Assert(IsA(rinfo, RestrictInfo));
clause = rinfo->clause;
if (IsA(clause, OpExpr))
{
leftop = get_leftop(clause);
rightop = get_rightop(clause);
clause_op = ((OpExpr *) clause)->opno;
}
else if (IsA(clause, RowCompareExpr))
{
RowCompareExpr *rc = (RowCompareExpr *) clause;
leftop = (Node *) linitial(rc->largs);
rightop = (Node *) linitial(rc->rargs);
clause_op = linitial_oid(rc->opnos);
}
else if (IsA(clause, ScalarArrayOpExpr))
{
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
leftop = (Node *) linitial(saop->args);
rightop = (Node *) lsecond(saop->args);
clause_op = saop->opno;
found_saop = true;
}
else if (IsA(clause, NullTest))
{
NullTest *nt = (NullTest *) clause;
leftop = (Node *) nt->arg;
rightop = NULL;
clause_op = InvalidOid;
if (nt->nulltesttype == IS_NULL)
{
found_is_null_op = true;
is_null_op = true;
}
}
else
{
elog(ERROR, "unsupported indexqual type: %d",
(int) nodeTag(clause));
continue; /* keep compiler quiet */
}
if (match_index_to_operand(leftop, indexcol, index))
{
/* clause_op is correct */
}
else
{
Assert(match_index_to_operand(rightop, indexcol, index));
/* Must flip operator to get the opfamily member */
clause_op = get_commutator(clause_op);
}
/* check for equality operator */
if (OidIsValid(clause_op))
{
op_strategy = get_op_opfamily_strategy(clause_op,
index->opfamily[indexcol]);
Assert(op_strategy != 0); /* not a member of opfamily?? */
if (op_strategy == BTEqualStrategyNumber)
eqQualHere = true;
}
else if (is_null_op)
{
/* IS NULL is like = for purposes of selectivity determination */
eqQualHere = true;
}
/* count up number of SA scans induced by indexBoundQuals only */
if (IsA(clause, ScalarArrayOpExpr))
{
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
int alength = estimate_array_length(lsecond(saop->args));
if (alength > 1)
num_sa_scans *= alength;
}
indexBoundQuals = lappend(indexBoundQuals, rinfo);
}
/*
* If index is unique and we found an '=' clause for each column, we can
* just assume numIndexTuples = 1 and skip the expensive
* clauselist_selectivity calculations. However, a ScalarArrayOp or
* NullTest invalidates that theory, even though it sets eqQualHere.
*/
if (index->unique &&
indexcol == index->ncolumns - 1 &&
eqQualHere &&
!found_saop &&
!found_is_null_op)
numIndexTuples = 1.0;
else
{
List *selectivityQuals;
Selectivity btreeSelectivity;
/*
* If the index is partial, AND the index predicate with the
* index-bound quals to produce a more accurate idea of the number of
* rows covered by the bound conditions.
*/
selectivityQuals = add_predicate_to_quals(index, indexBoundQuals);
btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
index->rel->relid,
JOIN_INNER,
NULL);
numIndexTuples = btreeSelectivity * index->rel->tuples;
/*
* As in genericcostestimate(), we have to adjust for any
* ScalarArrayOpExpr quals included in indexBoundQuals, and then round
* to integer.
*/
numIndexTuples = rint(numIndexTuples / num_sa_scans);
}
/*
* Now do generic index cost estimation.
*/
MemSet(&costs, 0, sizeof(costs));
costs.numIndexTuples = numIndexTuples;
genericcostestimate(root, path, loop_count, &costs);
/*
* Add a CPU-cost component to represent the costs of initial btree
* descent. We don't charge any I/O cost for touching upper btree levels,
* since they tend to stay in cache, but we still have to do about log2(N)
* comparisons to descend a btree of N leaf tuples. We charge one
* cpu_operator_cost per comparison.
*
* If there are ScalarArrayOpExprs, charge this once per SA scan. The
* ones after the first one are not startup cost so far as the overall
* plan is concerned, so add them only to "total" cost.
*/
if (index->tuples > 1) /* avoid computing log(0) */
{
descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
costs.indexStartupCost += descentCost;
costs.indexTotalCost += costs.num_sa_scans * descentCost;
}
/*
* Even though we're not charging I/O cost for touching upper btree pages,
* it's still reasonable to charge some CPU cost per page descended
* through. Moreover, if we had no such charge at all, bloated indexes
* would appear to have the same search cost as unbloated ones, at least
* in cases where only a single leaf page is expected to be visited. This
* cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
* touched. The number of such pages is btree tree height plus one (ie,
* we charge for the leaf page too). As above, charge once per SA scan.
*/
descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
costs.indexStartupCost += descentCost;
costs.indexTotalCost += costs.num_sa_scans * descentCost;
/*
* If we can get an estimate of the first column's ordering correlation C
* from pg_statistic, estimate the index correlation as C for a
* single-column index, or C * 0.75 for multiple columns. (The idea here
* is that multiple columns dilute the importance of the first column's
* ordering, but don't negate it entirely. Before 8.0 we divided the
* correlation by the number of columns, but that seems too strong.)
*/
MemSet(&vardata, 0, sizeof(vardata));
if (index->indexkeys[0] != 0)
{
/* Simple variable --- look to stats for the underlying table */
RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
Assert(rte->rtekind == RTE_RELATION);
relid = rte->relid;
Assert(relid != InvalidOid);
colnum = index->indexkeys[0];
if (get_relation_stats_hook &&
(*get_relation_stats_hook) (root, rte, colnum, &vardata))
{
/*
* The hook took control of acquiring a stats tuple. If it did
* supply a tuple, it'd better have supplied a freefunc.
*/
if (HeapTupleIsValid(vardata.statsTuple) &&
!vardata.freefunc)
elog(ERROR, "no function provided to release variable stats with");
}
else
{
vardata.statsTuple = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(relid),
Int16GetDatum(colnum),
BoolGetDatum(rte->inh));
vardata.freefunc = ReleaseSysCache;
}
}
else
{
/* Expression --- maybe there are stats for the index itself */
relid = index->indexoid;
colnum = 1;
if (get_index_stats_hook &&
(*get_index_stats_hook) (root, relid, colnum, &vardata))
{
/*
* The hook took control of acquiring a stats tuple. If it did
* supply a tuple, it'd better have supplied a freefunc.
*/
if (HeapTupleIsValid(vardata.statsTuple) &&
!vardata.freefunc)
elog(ERROR, "no function provided to release variable stats with");
}
else
{
vardata.statsTuple = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(relid),
Int16GetDatum(colnum),
BoolGetDatum(false));
vardata.freefunc = ReleaseSysCache;
}
}
if (HeapTupleIsValid(vardata.statsTuple))
{
Oid sortop;
float4 *numbers;
int nnumbers;
sortop = get_opfamily_member(index->opfamily[0],
index->opcintype[0],
index->opcintype[0],
BTLessStrategyNumber);
if (OidIsValid(sortop) &&
get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
STATISTIC_KIND_CORRELATION,
sortop,
NULL,
NULL, NULL,
&numbers, &nnumbers))
{
double varCorrelation;
Assert(nnumbers == 1);
varCorrelation = numbers[0];
if (index->reverse_sort[0])
varCorrelation = -varCorrelation;
if (index->ncolumns > 1)
costs.indexCorrelation = varCorrelation * 0.75;
else
costs.indexCorrelation = varCorrelation;
free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
}
}
ReleaseVariableStats(vardata);
*indexStartupCost = costs.indexStartupCost;
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
PG_RETURN_VOID();
}
| Datum eqjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2133 of file selfuncs.c.
References CLAMP_PROBABILITY, elog, eqjoinsel_inner(), eqjoinsel_semi(), ERROR, find_join_input_rel(), get_commutator(), get_join_variables(), JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_SEMI, SpecialJoinInfo::jointype, SpecialJoinInfo::min_righthand, PG_GETARG_INT16, PG_GETARG_POINTER, PG_RETURN_FLOAT8, and ReleaseVariableStats.
Referenced by neqjoinsel().
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
#ifdef NOT_USED
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
#endif
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
double selec;
VariableStatData vardata1;
VariableStatData vardata2;
bool join_is_reversed;
RelOptInfo *inner_rel;
get_join_variables(root, args, sjinfo,
&vardata1, &vardata2, &join_is_reversed);
switch (sjinfo->jointype)
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_FULL:
selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
break;
case JOIN_SEMI:
case JOIN_ANTI:
/*
* Look up the join's inner relation. min_righthand is sufficient
* information because neither SEMI nor ANTI joins permit any
* reassociation into or out of their RHS, so the righthand will
* always be exactly that set of rels.
*/
inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
if (!join_is_reversed)
selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
inner_rel);
else
selec = eqjoinsel_semi(get_commutator(operator),
&vardata2, &vardata1,
inner_rel);
break;
default:
/* other values not expected here */
elog(ERROR, "unrecognized join type: %d",
(int) sjinfo->jointype);
selec = 0; /* keep compiler quiet */
break;
}
ReleaseVariableStats(vardata1);
ReleaseVariableStats(vardata2);
CLAMP_PROBABILITY(selec);
PG_RETURN_FLOAT8((float8) selec);
}
| Datum eqsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 216 of file selfuncs.c.
References DEFAULT_EQ_SEL, get_restriction_variable(), IsA, PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, var_eq_const(), and var_eq_non_const().
Referenced by neqsel().
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
VariableStatData vardata;
Node *other;
bool varonleft;
double selec;
/*
* If expression is not variable = something or something = variable, then
* punt and return a default estimate.
*/
if (!get_restriction_variable(root, args, varRelid,
&vardata, &other, &varonleft))
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
/*
* We can do a lot better if the something is a constant. (Note: the
* Const might result from estimation rather than being a simple constant
* in the query.)
*/
if (IsA(other, Const))
selec = var_eq_const(&vardata, operator,
((Const *) other)->constvalue,
((Const *) other)->constisnull,
varonleft);
else
selec = var_eq_non_const(&vardata, operator, other,
varonleft);
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8((float8) selec);
}
| int estimate_array_length | ( | Node * | arrayexpr | ) |
Definition at line 2028 of file selfuncs.c.
References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), DatumGetArrayTypeP, IsA, list_length(), and strip_array_coercion().
Referenced by btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().
{
/* look through any binary-compatible relabeling of arrayexpr */
arrayexpr = strip_array_coercion(arrayexpr);
if (arrayexpr && IsA(arrayexpr, Const))
{
Datum arraydatum = ((Const *) arrayexpr)->constvalue;
bool arrayisnull = ((Const *) arrayexpr)->constisnull;
ArrayType *arrayval;
if (arrayisnull)
return 0;
arrayval = DatumGetArrayTypeP(arraydatum);
return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
}
else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
!((ArrayExpr *) arrayexpr)->multidims)
{
return list_length(((ArrayExpr *) arrayexpr)->elements);
}
else
{
/* default guess --- see also scalararraysel */
return 10;
}
}
| Selectivity estimate_hash_bucketsize | ( | PlannerInfo * | root, | |
| Node * | hashkey, | |||
| double | nbuckets | |||
| ) |
Definition at line 3430 of file selfuncs.c.
References VariableStatData::atttype, VariableStatData::atttypmod, examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, NULL, VariableStatData::rel, ReleaseVariableStats, RelOptInfo::rows, STATISTIC_KIND_MCV, VariableStatData::statsTuple, and RelOptInfo::tuples.
Referenced by final_cost_hashjoin().
{
VariableStatData vardata;
double estfract,
ndistinct,
stanullfrac,
mcvfreq,
avgfreq;
bool isdefault;
float4 *numbers;
int nnumbers;
examine_variable(root, hashkey, 0, &vardata);
/* Get number of distinct values */
ndistinct = get_variable_numdistinct(&vardata, &isdefault);
/* If ndistinct isn't real, punt and return 0.1, per comments above */
if (isdefault)
{
ReleaseVariableStats(vardata);
return (Selectivity) 0.1;
}
/* Get fraction that are null */
if (HeapTupleIsValid(vardata.statsTuple))
{
Form_pg_statistic stats;
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
stanullfrac = stats->stanullfrac;
}
else
stanullfrac = 0.0;
/* Compute avg freq of all distinct data values in raw relation */
avgfreq = (1.0 - stanullfrac) / ndistinct;
/*
* Adjust ndistinct to account for restriction clauses. Observe we are
* assuming that the data distribution is affected uniformly by the
* restriction clauses!
*
* XXX Possibly better way, but much more expensive: multiply by
* selectivity of rel's restriction clauses that mention the target Var.
*/
if (vardata.rel)
ndistinct *= vardata.rel->rows / vardata.rel->tuples;
/*
* Initial estimate of bucketsize fraction is 1/nbuckets as long as the
* number of buckets is less than the expected number of distinct values;
* otherwise it is 1/ndistinct.
*/
if (ndistinct > nbuckets)
estfract = 1.0 / nbuckets;
else
estfract = 1.0 / ndistinct;
/*
* Look up the frequency of the most common value, if available.
*/
mcvfreq = 0.0;
if (HeapTupleIsValid(vardata.statsTuple))
{
if (get_attstatsslot(vardata.statsTuple,
vardata.atttype, vardata.atttypmod,
STATISTIC_KIND_MCV, InvalidOid,
NULL,
NULL, NULL,
&numbers, &nnumbers))
{
/*
* The first MCV stat is for the most common value.
*/
if (nnumbers > 0)
mcvfreq = numbers[0];
free_attstatsslot(vardata.atttype, NULL, 0,
numbers, nnumbers);
}
}
/*
* Adjust estimated bucketsize upward to account for skewed distribution.
*/
if (avgfreq > 0.0 && mcvfreq > avgfreq)
estfract *= mcvfreq / avgfreq;
/*
* Clamp bucketsize to sane range (the above adjustment could easily
* produce an out-of-range result). We set the lower bound a little above
* zero, since zero isn't a very sane result.
*/
if (estfract < 1.0e-6)
estfract = 1.0e-6;
else if (estfract > 1.0)
estfract = 1.0;
ReleaseVariableStats(vardata);
return (Selectivity) estfract;
}
| double estimate_num_groups | ( | PlannerInfo * | root, | |
| List * | groupExprs, | |||
| double | input_rows | |||
| ) |
Definition at line 3206 of file selfuncs.c.
References add_unique_group_var(), Assert, contain_volatile_functions(), examine_variable(), exprType(), for_each_cell, HeapTupleIsValid, VariableStatData::isunique, lcons(), lfirst, linitial, list_head(), lnext, GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, GroupVarInfo::rel, ReleaseVariableStats, RELOPT_BASEREL, RelOptInfo::reloptkind, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.
Referenced by create_unique_path(), query_planner(), and recurse_set_operations().
{
List *varinfos = NIL;
double numdistinct;
ListCell *l;
/*
* If no grouping columns, there's exactly one group. (This can't happen
* for normal cases with GROUP BY or DISTINCT, but it is possible for
* corner cases with set operations.)
*/
if (groupExprs == NIL)
return 1.0;
/*
* Count groups derived from boolean grouping expressions. For other
* expressions, find the unique Vars used, treating an expression as a Var
* if we can find stats for it. For each one, record the statistical
* estimate of number of distinct values (total in its table, without
* regard for filtering).
*/
numdistinct = 1.0;
foreach(l, groupExprs)
{
Node *groupexpr = (Node *) lfirst(l);
VariableStatData vardata;
List *varshere;
ListCell *l2;
/* Short-circuit for expressions returning boolean */
if (exprType(groupexpr) == BOOLOID)
{
numdistinct *= 2.0;
continue;
}
/*
* If examine_variable is able to deduce anything about the GROUP BY
* expression, treat it as a single variable even if it's really more
* complicated.
*/
examine_variable(root, groupexpr, 0, &vardata);
if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
{
varinfos = add_unique_group_var(root, varinfos,
groupexpr, &vardata);
ReleaseVariableStats(vardata);
continue;
}
ReleaseVariableStats(vardata);
/*
* Else pull out the component Vars. Handle PlaceHolderVars by
* recursing into their arguments (effectively assuming that the
* PlaceHolderVar doesn't change the number of groups, which boils
* down to ignoring the possible addition of nulls to the result set).
*/
varshere = pull_var_clause(groupexpr,
PVC_RECURSE_AGGREGATES,
PVC_RECURSE_PLACEHOLDERS);
/*
* If we find any variable-free GROUP BY item, then either it is a
* constant (and we can ignore it) or it contains a volatile function;
* in the latter case we punt and assume that each input row will
* yield a distinct group.
*/
if (varshere == NIL)
{
if (contain_volatile_functions(groupexpr))
return input_rows;
continue;
}
/*
* Else add variables to varinfos list
*/
foreach(l2, varshere)
{
Node *var = (Node *) lfirst(l2);
examine_variable(root, var, 0, &vardata);
varinfos = add_unique_group_var(root, varinfos, var, &vardata);
ReleaseVariableStats(vardata);
}
}
/*
* If now no Vars, we must have an all-constant or all-boolean GROUP BY
* list.
*/
if (varinfos == NIL)
{
/* Guard against out-of-range answers */
if (numdistinct > input_rows)
numdistinct = input_rows;
return numdistinct;
}
/*
* Group Vars by relation and estimate total numdistinct.
*
* For each iteration of the outer loop, we process the frontmost Var in
* varinfos, plus all other Vars in the same relation. We remove these
* Vars from the newvarinfos list for the next iteration. This is the
* easiest way to group Vars of same rel together.
*/
do
{
GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
RelOptInfo *rel = varinfo1->rel;
double reldistinct = varinfo1->ndistinct;
double relmaxndistinct = reldistinct;
int relvarcount = 1;
List *newvarinfos = NIL;
/*
* Get the product of numdistinct estimates of the Vars for this rel.
* Also, construct new varinfos list of remaining Vars.
*/
for_each_cell(l, lnext(list_head(varinfos)))
{
GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
if (varinfo2->rel == varinfo1->rel)
{
reldistinct *= varinfo2->ndistinct;
if (relmaxndistinct < varinfo2->ndistinct)
relmaxndistinct = varinfo2->ndistinct;
relvarcount++;
}
else
{
/* not time to process varinfo2 yet */
newvarinfos = lcons(varinfo2, newvarinfos);
}
}
/*
* Sanity check --- don't divide by zero if empty relation.
*/
Assert(rel->reloptkind == RELOPT_BASEREL);
if (rel->tuples > 0)
{
/*
* Clamp to size of rel, or size of rel / 10 if multiple Vars. The
* fudge factor is because the Vars are probably correlated but we
* don't know by how much. We should never clamp to less than the
* largest ndistinct value for any of the Vars, though, since
* there will surely be at least that many groups.
*/
double clamp = rel->tuples;
if (relvarcount > 1)
{
clamp *= 0.1;
if (clamp < relmaxndistinct)
{
clamp = relmaxndistinct;
/* for sanity in case some ndistinct is too large: */
if (clamp > rel->tuples)
clamp = rel->tuples;
}
}
if (reldistinct > clamp)
reldistinct = clamp;
/*
* Multiply by restriction selectivity.
*/
reldistinct *= rel->rows / rel->tuples;
/*
* Update estimate of total distinct groups.
*/
numdistinct *= reldistinct;
}
varinfos = newvarinfos;
} while (varinfos != NIL);
numdistinct = ceil(numdistinct);
/* Guard against out-of-range answers */
if (numdistinct > input_rows)
numdistinct = input_rows;
if (numdistinct < 1.0)
numdistinct = 1.0;
return numdistinct;
}
| void examine_variable | ( | PlannerInfo * | root, | |
| Node * | node, | |||
| int | varRelid, | |||
| VariableStatData * | vardata | |||
| ) |
Definition at line 4275 of file selfuncs.c.
References arg, VariableStatData::atttype, VariableStatData::atttypmod, BMS_EMPTY_SET, bms_free(), bms_is_member(), bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), BoolGetDatum, elog, equal(), ERROR, examine_simple_variable(), exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, has_unique_index(), HeapTupleIsValid, IndexOptInfo::indexkeys, RelOptInfo::indexlist, IndexOptInfo::indexoid, IndexOptInfo::indexprs, IndexOptInfo::indpred, Int16GetDatum, IsA, VariableStatData::isunique, lfirst, list_head(), lnext, MemSet, IndexOptInfo::ncolumns, NIL, NULL, ObjectIdGetDatum, IndexOptInfo::predOK, pull_varnos(), VariableStatData::rel, SearchSysCache3, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::unique, VariableStatData::var, Var::varattno, Var::varno, Var::vartype, VariableStatData::vartype, and Var::vartypmod.
Referenced by booltestsel(), estimate_hash_bucketsize(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().
{
Node *basenode;
Relids varnos;
RelOptInfo *onerel;
/* Make sure we don't return dangling pointers in vardata */
MemSet(vardata, 0, sizeof(VariableStatData));
/* Save the exposed type of the expression */
vardata->vartype = exprType(node);
/* Look inside any binary-compatible relabeling */
if (IsA(node, RelabelType))
basenode = (Node *) ((RelabelType *) node)->arg;
else
basenode = node;
/* Fast path for a simple Var */
if (IsA(basenode, Var) &&
(varRelid == 0 || varRelid == ((Var *) basenode)->varno))
{
Var *var = (Var *) basenode;
/* Set up result fields other than the stats tuple */
vardata->var = basenode; /* return Var without relabeling */
vardata->rel = find_base_rel(root, var->varno);
vardata->atttype = var->vartype;
vardata->atttypmod = var->vartypmod;
vardata->isunique = has_unique_index(vardata->rel, var->varattno);
/* Try to locate some stats */
examine_simple_variable(root, var, vardata);
return;
}
/*
* Okay, it's a more complicated expression. Determine variable
* membership. Note that when varRelid isn't zero, only vars of that
* relation are considered "real" vars.
*/
varnos = pull_varnos(basenode);
onerel = NULL;
switch (bms_membership(varnos))
{
case BMS_EMPTY_SET:
/* No Vars at all ... must be pseudo-constant clause */
break;
case BMS_SINGLETON:
if (varRelid == 0 || bms_is_member(varRelid, varnos))
{
onerel = find_base_rel(root,
(varRelid ? varRelid : bms_singleton_member(varnos)));
vardata->rel = onerel;
node = basenode; /* strip any relabeling */
}
/* else treat it as a constant */
break;
case BMS_MULTIPLE:
if (varRelid == 0)
{
/* treat it as a variable of a join relation */
vardata->rel = find_join_rel(root, varnos);
node = basenode; /* strip any relabeling */
}
else if (bms_is_member(varRelid, varnos))
{
/* ignore the vars belonging to other relations */
vardata->rel = find_base_rel(root, varRelid);
node = basenode; /* strip any relabeling */
/* note: no point in expressional-index search here */
}
/* else treat it as a constant */
break;
}
bms_free(varnos);
vardata->var = node;
vardata->atttype = exprType(node);
vardata->atttypmod = exprTypmod(node);
if (onerel)
{
/*
* We have an expression in vars of a single relation. Try to match
* it to expressional index columns, in hopes of finding some
* statistics.
*
* XXX it's conceivable that there are multiple matches with different
* index opfamilies; if so, we need to pick one that matches the
* operator we are estimating for. FIXME later.
*/
ListCell *ilist;
foreach(ilist, onerel->indexlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
ListCell *indexpr_item;
int pos;
indexpr_item = list_head(index->indexprs);
if (indexpr_item == NULL)
continue; /* no expressions here... */
for (pos = 0; pos < index->ncolumns; pos++)
{
if (index->indexkeys[pos] == 0)
{
Node *indexkey;
if (indexpr_item == NULL)
elog(ERROR, "too few entries in indexprs list");
indexkey = (Node *) lfirst(indexpr_item);
if (indexkey && IsA(indexkey, RelabelType))
indexkey = (Node *) ((RelabelType *) indexkey)->arg;
if (equal(node, indexkey))
{
/*
* Found a match ... is it a unique index? Tests here
* should match has_unique_index().
*/
if (index->unique &&
index->ncolumns == 1 &&
(index->indpred == NIL || index->predOK))
vardata->isunique = true;
/*
* Has it got stats? We only consider stats for
* non-partial indexes, since partial indexes probably
* don't reflect whole-relation statistics; the above
* check for uniqueness is the only info we take from
* a partial index.
*
* An index stats hook, however, must make its own
* decisions about what to do with partial indexes.
*/
if (get_index_stats_hook &&
(*get_index_stats_hook) (root, index->indexoid,
pos + 1, vardata))
{
/*
* The hook took control of acquiring a stats
* tuple. If it did supply a tuple, it'd better
* have supplied a freefunc.
*/
if (HeapTupleIsValid(vardata->statsTuple) &&
!vardata->freefunc)
elog(ERROR, "no function provided to release variable stats with");
}
else if (index->indpred == NIL)
{
vardata->statsTuple =
SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(index->indexoid),
Int16GetDatum(pos + 1),
BoolGetDatum(false));
vardata->freefunc = ReleaseSysCache;
}
if (vardata->statsTuple)
break;
}
indexpr_item = lnext(indexpr_item);
}
}
if (vardata->statsTuple)
break;
}
}
}
| void get_join_variables | ( | PlannerInfo * | root, | |
| List * | args, | |||
| SpecialJoinInfo * | sjinfo, | |||
| VariableStatData * | vardata1, | |||
| VariableStatData * | vardata2, | |||
| bool * | join_is_reversed | |||
| ) |
Definition at line 4216 of file selfuncs.c.
References bms_is_subset(), elog, ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.
Referenced by eqjoinsel().
{
Node *left,
*right;
if (list_length(args) != 2)
elog(ERROR, "join operator should take two arguments");
left = (Node *) linitial(args);
right = (Node *) lsecond(args);
examine_variable(root, left, 0, vardata1);
examine_variable(root, right, 0, vardata2);
if (vardata1->rel &&
bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
*join_is_reversed = true; /* var1 is on RHS */
else if (vardata2->rel &&
bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
*join_is_reversed = true; /* var2 is on LHS */
else
*join_is_reversed = false;
}
| bool get_restriction_variable | ( | PlannerInfo * | root, | |
| List * | args, | |||
| int | varRelid, | |||
| VariableStatData * | vardata, | |||
| Node ** | other, | |||
| bool * | varonleft | |||
| ) |
Definition at line 4156 of file selfuncs.c.
References estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, NULL, VariableStatData::rel, ReleaseVariableStats, and VariableStatData::var.
Referenced by arraycontsel(), eqsel(), ltreeparentsel(), patternsel(), rangesel(), scalargtsel(), scalarltsel(), and tsmatchsel().
{
Node *left,
*right;
VariableStatData rdata;
/* Fail if not a binary opclause (probably shouldn't happen) */
if (list_length(args) != 2)
return false;
left = (Node *) linitial(args);
right = (Node *) lsecond(args);
/*
* Examine both sides. Note that when varRelid is nonzero, Vars of other
* relations will be treated as pseudoconstants.
*/
examine_variable(root, left, varRelid, vardata);
examine_variable(root, right, varRelid, &rdata);
/*
* If one side is a variable and the other not, we win.
*/
if (vardata->rel && rdata.rel == NULL)
{
*varonleft = true;
*other = estimate_expression_value(root, rdata.var);
/* Assume we need no ReleaseVariableStats(rdata) here */
return true;
}
if (vardata->rel == NULL && rdata.rel)
{
*varonleft = false;
*other = estimate_expression_value(root, vardata->var);
/* Assume we need no ReleaseVariableStats(*vardata) here */
*vardata = rdata;
return true;
}
/* Ooops, clause has wrong structure (probably var op var) */
ReleaseVariableStats(*vardata);
ReleaseVariableStats(rdata);
return false;
}
| double get_variable_numdistinct | ( | VariableStatData * | vardata, | |
| bool * | isdefault | |||
| ) |
Definition at line 4613 of file selfuncs.c.
References BOOLOID, DEFAULT_NUM_DISTINCT, GETSTRUCT, HeapTupleIsValid, IsA, VariableStatData::isunique, NULL, ObjectIdAttributeNumber, VariableStatData::rel, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.
Referenced by add_unique_group_var(), eqjoinsel_inner(), eqjoinsel_semi(), estimate_hash_bucketsize(), var_eq_const(), and var_eq_non_const().
{
double stadistinct;
double ntuples;
*isdefault = false;
/*
* Determine the stadistinct value to use. There are cases where we can
* get an estimate even without a pg_statistic entry, or can get a better
* value than is in pg_statistic.
*/
if (HeapTupleIsValid(vardata->statsTuple))
{
/* Use the pg_statistic entry */
Form_pg_statistic stats;
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
stadistinct = stats->stadistinct;
}
else if (vardata->vartype == BOOLOID)
{
/*
* Special-case boolean columns: presumably, two distinct values.
*
* Are there any other datatypes we should wire in special estimates
* for?
*/
stadistinct = 2.0;
}
else
{
/*
* We don't keep statistics for system columns, but in some cases we
* can infer distinctness anyway.
*/
if (vardata->var && IsA(vardata->var, Var))
{
switch (((Var *) vardata->var)->varattno)
{
case ObjectIdAttributeNumber:
case SelfItemPointerAttributeNumber:
stadistinct = -1.0; /* unique */
break;
case TableOidAttributeNumber:
stadistinct = 1.0; /* only 1 value */
break;
default:
stadistinct = 0.0; /* means "unknown" */
break;
}
}
else
stadistinct = 0.0; /* means "unknown" */
/*
* XXX consider using estimate_num_groups on expressions?
*/
}
/*
* If there is a unique index or DISTINCT clause for the variable, assume
* it is unique no matter what pg_statistic says; the statistics could be
* out of date, or we might have found a partial unique index that proves
* the var is unique for this query.
*/
if (vardata->isunique)
stadistinct = -1.0;
/*
* If we had an absolute estimate, use that.
*/
if (stadistinct > 0.0)
return stadistinct;
/*
* Otherwise we need to get the relation size; punt if not available.
*/
if (vardata->rel == NULL)
{
*isdefault = true;
return DEFAULT_NUM_DISTINCT;
}
ntuples = vardata->rel->tuples;
if (ntuples <= 0.0)
{
*isdefault = true;
return DEFAULT_NUM_DISTINCT;
}
/*
* If we had a relative estimate, use that.
*/
if (stadistinct < 0.0)
return floor((-stadistinct * ntuples) + 0.5);
/*
* With no data, estimate ndistinct = ntuples if the table is small, else
* use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
* that the behavior isn't discontinuous.
*/
if (ntuples < DEFAULT_NUM_DISTINCT)
return ntuples;
*isdefault = true;
return DEFAULT_NUM_DISTINCT;
}
| Datum gincostestimate | ( | PG_FUNCTION_ARGS | ) |
Definition at line 7023 of file selfuncs.c.
References AccessShareLock, GinQualCounts::arrayScans, Assert, RestrictInfo::clause, clauselist_selectivity(), cost_qual_eval(), cpu_index_tuple_cost, cpu_operator_cost, elog, ERROR, GinQualCounts::exactEntries, get_tablespace_page_costs(), gincost_opexpr(), gincost_scalararrayopexpr(), ginGetStats(), GinQualCounts::haveFullScan, index_close(), index_open(), index_pages_fetched(), IndexPath::indexinfo, IndexOptInfo::indexoid, IndexPath::indexorderbys, IndexPath::indexquals, IndexOptInfo::indpred, IsA, JOIN_INNER, lfirst, list_concat(), list_length(), list_make1, Min, GinStatsData::nDataPages, GinStatsData::nEntries, GinStatsData::nEntryPages, NIL, nodeTag, GinStatsData::nPendingPages, GinStatsData::nTotalPages, NULL, IndexOptInfo::pages, GinQualCounts::partialEntries, QualCost::per_tuple, PG_GETARG_FLOAT8, PG_GETARG_POINTER, PG_RETURN_VOID, predicate_implied_by(), IndexOptInfo::rel, RelOptInfo::relid, IndexOptInfo::reltablespace, rint(), scale, GinQualCounts::searchEntries, SizeOfIptrData, QualCost::startup, and IndexOptInfo::tuples.
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
List *indexQuals = path->indexquals;
List *indexOrderBys = path->indexorderbys;
ListCell *l;
List *selectivityQuals;
double numPages = index->pages,
numTuples = index->tuples;
double numEntryPages,
numDataPages,
numPendingPages,
numEntries;
GinQualCounts counts;
bool matchPossible;
double entryPagesFetched,
dataPagesFetched,
dataPagesFetchedBySel;
double qual_op_cost,
qual_arg_cost,
spc_random_page_cost,
outer_scans;
QualCost index_qual_cost;
Relation indexRel;
GinStatsData ginStats;
/*
* Obtain statistic information from the meta page
*/
indexRel = index_open(index->indexoid, AccessShareLock);
ginGetStats(indexRel, &ginStats);
index_close(indexRel, AccessShareLock);
numEntryPages = ginStats.nEntryPages;
numDataPages = ginStats.nDataPages;
numPendingPages = ginStats.nPendingPages;
numEntries = ginStats.nEntries;
/*
* nPendingPages can be trusted, but the other fields are as of the last
* VACUUM. Scale them by the ratio numPages / nTotalPages to account for
* growth since then. If the fields are zero (implying no VACUUM at all,
* and an index created pre-9.1), assume all pages are entry pages.
*/
if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0)
{
numEntryPages = numPages;
numDataPages = 0;
numEntries = numTuples; /* bogus, but no other info available */
}
else
{
double scale = numPages / ginStats.nTotalPages;
numEntryPages = ceil(numEntryPages * scale);
numDataPages = ceil(numDataPages * scale);
numEntries = ceil(numEntries * scale);
/* ensure we didn't round up too much */
numEntryPages = Min(numEntryPages, numPages);
numDataPages = Min(numDataPages, numPages - numEntryPages);
}
/* In an empty index, numEntries could be zero. Avoid divide-by-zero */
if (numEntries < 1)
numEntries = 1;
/*
* Include predicate in selectivityQuals (should match
* genericcostestimate)
*/
if (index->indpred != NIL)
{
List *predExtraQuals = NIL;
foreach(l, index->indpred)
{
Node *predQual = (Node *) lfirst(l);
List *oneQual = list_make1(predQual);
if (!predicate_implied_by(oneQual, indexQuals))
predExtraQuals = list_concat(predExtraQuals, oneQual);
}
/* list_concat avoids modifying the passed-in indexQuals list */
selectivityQuals = list_concat(predExtraQuals, indexQuals);
}
else
selectivityQuals = indexQuals;
/* Estimate the fraction of main-table tuples that will be visited */
*indexSelectivity = clauselist_selectivity(root, selectivityQuals,
index->rel->relid,
JOIN_INNER,
NULL);
/* fetch estimated page cost for schema containing index */
get_tablespace_page_costs(index->reltablespace,
&spc_random_page_cost,
NULL);
/*
* Generic assumption about index correlation: there isn't any.
*/
*indexCorrelation = 0.0;
/*
* Examine quals to estimate number of search entries & partial matches
*/
memset(&counts, 0, sizeof(counts));
counts.arrayScans = 1;
matchPossible = true;
foreach(l, indexQuals)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
Expr *clause;
Assert(IsA(rinfo, RestrictInfo));
clause = rinfo->clause;
if (IsA(clause, OpExpr))
{
matchPossible = gincost_opexpr(index,
(OpExpr *) clause,
&counts);
if (!matchPossible)
break;
}
else if (IsA(clause, ScalarArrayOpExpr))
{
matchPossible = gincost_scalararrayopexpr(index,
(ScalarArrayOpExpr *) clause,
numEntries,
&counts);
if (!matchPossible)
break;
}
else
{
/* shouldn't be anything else for a GIN index */
elog(ERROR, "unsupported GIN indexqual type: %d",
(int) nodeTag(clause));
}
}
/* Fall out if there were any provably-unsatisfiable quals */
if (!matchPossible)
{
*indexStartupCost = 0;
*indexTotalCost = 0;
*indexSelectivity = 0;
PG_RETURN_VOID();
}
if (counts.haveFullScan || indexQuals == NIL)
{
/*
* Full index scan will be required. We treat this as if every key in
* the index had been listed in the query; is that reasonable?
*/
counts.partialEntries = 0;
counts.exactEntries = numEntries;
counts.searchEntries = numEntries;
}
/* Will we have more than one iteration of a nestloop scan? */
outer_scans = loop_count;
/*
* Compute cost to begin scan, first of all, pay attention to pending
* list.
*/
entryPagesFetched = numPendingPages;
/*
* Estimate number of entry pages read. We need to do
* counts.searchEntries searches. Use a power function as it should be,
* but tuples on leaf pages usually is much greater. Here we include all
* searches in entry tree, including search of first entry in partial
* match algorithm
*/
entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
/*
* Add an estimate of entry pages read by partial match algorithm. It's a
* scan over leaf pages in entry tree. We haven't any useful stats here,
* so estimate it as proportion.
*/
entryPagesFetched += ceil(numEntryPages * counts.partialEntries / numEntries);
/*
* Partial match algorithm reads all data pages before doing actual scan,
* so it's a startup cost. Again, we haven't any useful stats here, so,
* estimate it as proportion
*/
dataPagesFetched = ceil(numDataPages * counts.partialEntries / numEntries);
/*
* Calculate cache effects if more than one scan due to nestloops or array
* quals. The result is pro-rated per nestloop scan, but the array qual
* factor shouldn't be pro-rated (compare genericcostestimate).
*/
if (outer_scans > 1 || counts.arrayScans > 1)
{
entryPagesFetched *= outer_scans * counts.arrayScans;
entryPagesFetched = index_pages_fetched(entryPagesFetched,
(BlockNumber) numEntryPages,
numEntryPages, root);
entryPagesFetched /= outer_scans;
dataPagesFetched *= outer_scans * counts.arrayScans;
dataPagesFetched = index_pages_fetched(dataPagesFetched,
(BlockNumber) numDataPages,
numDataPages, root);
dataPagesFetched /= outer_scans;
}
/*
* Here we use random page cost because logically-close pages could be far
* apart on disk.
*/
*indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
/*
* Now we compute the number of data pages fetched while the scan
* proceeds.
*/
/* data pages scanned for each exact (non-partial) matched entry */
dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
/*
* Estimate number of data pages read, using selectivity estimation and
* capacity of data page.
*/
dataPagesFetchedBySel = ceil(*indexSelectivity *
(numTuples / (BLCKSZ / SizeOfIptrData)));
if (dataPagesFetchedBySel > dataPagesFetched)
{
/*
* At least one of entries is very frequent and, unfortunately, we
* couldn't get statistic about entries (only tsvector has such
* statistics). So, we obviously have too small estimation of pages
* fetched from data tree. Re-estimate it from known capacity of data
* pages
*/
dataPagesFetched = dataPagesFetchedBySel;
}
/* Account for cache effects, the same as above */
if (outer_scans > 1 || counts.arrayScans > 1)
{
dataPagesFetched *= outer_scans * counts.arrayScans;
dataPagesFetched = index_pages_fetched(dataPagesFetched,
(BlockNumber) numDataPages,
numDataPages, root);
dataPagesFetched /= outer_scans;
}
/* And apply random_page_cost as the cost per page */
*indexTotalCost = *indexStartupCost +
dataPagesFetched * spc_random_page_cost;
/*
* Add on index qual eval costs, much as in genericcostestimate
*/
cost_qual_eval(&index_qual_cost, indexQuals, root);
qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
cost_qual_eval(&index_qual_cost, indexOrderBys, root);
qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
qual_op_cost = cpu_operator_cost *
(list_length(indexQuals) + list_length(indexOrderBys));
qual_arg_cost -= qual_op_cost;
if (qual_arg_cost < 0) /* just in case... */
qual_arg_cost = 0;
*indexStartupCost += qual_arg_cost;
*indexTotalCost += qual_arg_cost;
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
PG_RETURN_VOID();
}
| Datum gistcostestimate | ( | PG_FUNCTION_ARGS | ) |
Definition at line 6586 of file selfuncs.c.
References cpu_operator_cost, genericcostestimate(), GenericCosts::indexCorrelation, IndexPath::indexinfo, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, MemSet, GenericCosts::num_sa_scans, IndexOptInfo::pages, PG_GETARG_FLOAT8, PG_GETARG_POINTER, PG_RETURN_VOID, IndexOptInfo::tree_height, and IndexOptInfo::tuples.
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
GenericCosts costs;
Cost descentCost;
MemSet(&costs, 0, sizeof(costs));
genericcostestimate(root, path, loop_count, &costs);
/*
* We model index descent costs similarly to those for btree, but to do
* that we first need an idea of the tree height. We somewhat arbitrarily
* assume that the fanout is 100, meaning the tree height is at most
* log100(index->pages).
*
* Although this computation isn't really expensive enough to require
* caching, we might as well use index->tree_height to cache it.
*/
if (index->tree_height < 0) /* unknown? */
{
if (index->pages > 1) /* avoid computing log(0) */
index->tree_height = (int) (log(index->pages) / log(100.0));
else
index->tree_height = 0;
}
/*
* Add a CPU-cost component to represent the costs of initial descent.
* We just use log(N) here not log2(N) since the branching factor isn't
* necessarily two anyway. As for btree, charge once per SA scan.
*/
if (index->tuples > 1) /* avoid computing log(0) */
{
descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
costs.indexStartupCost += descentCost;
costs.indexTotalCost += costs.num_sa_scans * descentCost;
}
/*
* Likewise add a per-page charge, calculated the same as for btrees.
*/
descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
costs.indexStartupCost += descentCost;
costs.indexTotalCost += costs.num_sa_scans * descentCost;
*indexStartupCost = costs.indexStartupCost;
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
PG_RETURN_VOID();
}
| Datum hashcostestimate | ( | PG_FUNCTION_ARGS | ) |
Definition at line 6537 of file selfuncs.c.
References genericcostestimate(), GenericCosts::indexCorrelation, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, MemSet, PG_GETARG_FLOAT8, PG_GETARG_POINTER, and PG_RETURN_VOID.
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
GenericCosts costs;
MemSet(&costs, 0, sizeof(costs));
genericcostestimate(root, path, loop_count, &costs);
/*
* A hash index has no descent costs as such, since the index AM can go
* directly to the target bucket after computing the hash value. There
* are a couple of other hash-specific costs that we could conceivably add
* here, though:
*
* Ideally we'd charge spc_random_page_cost for each page in the target
* bucket, not just the numIndexPages pages that genericcostestimate
* thought we'd visit. However in most cases we don't know which bucket
* that will be. There's no point in considering the average bucket size
* because the hash AM makes sure that's always one page.
*
* Likewise, we could consider charging some CPU for each index tuple in
* the bucket, if we knew how many there were. But the per-tuple cost is
* just a hash value comparison, not a general datatype-dependent
* comparison, so any such charge ought to be quite a bit less than
* cpu_operator_cost; which makes it probably not worth worrying about.
*
* A bigger issue is that chance hash-value collisions will result in
* wasted probes into the heap. We don't currently attempt to model this
* cost on the grounds that it's rare, but maybe it's not rare enough.
* (Any fix for this ought to consider the generic lossy-operator problem,
* though; it's not entirely hash-specific.)
*/
*indexStartupCost = costs.indexStartupCost;
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
PG_RETURN_VOID();
}
| double histogram_selectivity | ( | VariableStatData * | vardata, | |
| FmgrInfo * | opproc, | |||
| Datum | constval, | |||
| bool | varonleft, | |||
| int | min_hist_size, | |||
| int | n_skip, | |||
| int * | hist_size | |||
| ) |
Definition at line 680 of file selfuncs.c.
References Assert, VariableStatData::atttype, VariableStatData::atttypmod, DatumGetBool, DEFAULT_COLLATION_OID, free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), HeapTupleIsValid, i, InvalidOid, NULL, STATISTIC_KIND_HISTOGRAM, VariableStatData::statsTuple, and values.
Referenced by ltreeparentsel(), and patternsel().
{
double result;
Datum *values;
int nvalues;
/* check sanity of parameters */
Assert(n_skip >= 0);
Assert(min_hist_size > 2 * n_skip);
if (HeapTupleIsValid(vardata->statsTuple) &&
get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_HISTOGRAM, InvalidOid,
NULL,
&values, &nvalues,
NULL, NULL))
{
*hist_size = nvalues;
if (nvalues >= min_hist_size)
{
int nmatch = 0;
int i;
for (i = n_skip; i < nvalues - n_skip; i++)
{
if (varonleft ?
DatumGetBool(FunctionCall2Coll(opproc,
DEFAULT_COLLATION_OID,
values[i],
constval)) :
DatumGetBool(FunctionCall2Coll(opproc,
DEFAULT_COLLATION_OID,
constval,
values[i])))
nmatch++;
}
result = ((double) nmatch) / ((double) (nvalues - 2 * n_skip));
}
else
result = -1;
free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
}
else
{
*hist_size = 0;
result = -1;
}
return result;
}
| Datum iclikejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2721 of file selfuncs.c.
References Pattern_Type_Like_IC, patternjoinsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
}
| Datum iclikesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1401 of file selfuncs.c.
References Pattern_Type_Like_IC, patternsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
}
| Datum icnlikejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2757 of file selfuncs.c.
References Pattern_Type_Like_IC, patternjoinsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
}
| Datum icnlikesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1437 of file selfuncs.c.
References Pattern_Type_Like_IC, patternsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
}
| Datum icregexeqjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2703 of file selfuncs.c.
References Pattern_Type_Regex_IC, patternjoinsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
}
| Datum icregexeqsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1383 of file selfuncs.c.
References Pattern_Type_Regex_IC, patternsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
}
| Datum icregexnejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2739 of file selfuncs.c.
References Pattern_Type_Regex_IC, patternjoinsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
}
| Datum icregexnesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1419 of file selfuncs.c.
References Pattern_Type_Regex_IC, patternsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
}
| Datum likejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2712 of file selfuncs.c.
References Pattern_Type_Like, patternjoinsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
}
| Datum likesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1392 of file selfuncs.c.
References Pattern_Type_Like, patternsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
}
Definition at line 5693 of file selfuncs.c.
References BYTEAOID, Const::consttype, Const::constvalue, DatumGetBool, DatumGetByteaP, DatumGetCString, DatumGetPointer, DirectFunctionCall1, FunctionCall2Coll(), lc_collate_is_c(), NAMEOID, nameout(), palloc(), pfree(), pg_database_encoding_character_incrementer(), pg_mbcliplen(), PointerGetDatum, SET_VARSIZE, string_to_bytea_const(), string_to_const(), TextDatumGetCString, VARDATA, VARHDRSZ, VARSIZE, and varstr_cmp().
Referenced by prefix_quals(), and prefix_selectivity().
{
Oid datatype = str_const->consttype;
char *workstr;
int len;
Datum cmpstr;
text *cmptxt = NULL;
mbcharacter_incrementer charinc;
/*
* Get a modifiable copy of the prefix string in C-string format, and set
* up the string we will compare to as a Datum. In C locale this can just
* be the given prefix string, otherwise we need to add a suffix. Types
* NAME and BYTEA sort bytewise so they don't need a suffix either.
*/
if (datatype == NAMEOID)
{
workstr = DatumGetCString(DirectFunctionCall1(nameout,
str_const->constvalue));
len = strlen(workstr);
cmpstr = str_const->constvalue;
}
else if (datatype == BYTEAOID)
{
bytea *bstr = DatumGetByteaP(str_const->constvalue);
len = VARSIZE(bstr) - VARHDRSZ;
workstr = (char *) palloc(len);
memcpy(workstr, VARDATA(bstr), len);
if ((Pointer) bstr != DatumGetPointer(str_const->constvalue))
pfree(bstr);
cmpstr = str_const->constvalue;
}
else
{
workstr = TextDatumGetCString(str_const->constvalue);
len = strlen(workstr);
if (lc_collate_is_c(collation) || len == 0)
cmpstr = str_const->constvalue;
else
{
/* If first time through, determine the suffix to use */
static char suffixchar = 0;
static Oid suffixcollation = 0;
if (!suffixchar || suffixcollation != collation)
{
char *best;
best = "Z";
if (varstr_cmp(best, 1, "z", 1, collation) < 0)
best = "z";
if (varstr_cmp(best, 1, "y", 1, collation) < 0)
best = "y";
if (varstr_cmp(best, 1, "9", 1, collation) < 0)
best = "9";
suffixchar = *best;
suffixcollation = collation;
}
/* And build the string to compare to */
cmptxt = (text *) palloc(VARHDRSZ + len + 1);
SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
memcpy(VARDATA(cmptxt), workstr, len);
*(VARDATA(cmptxt) + len) = suffixchar;
cmpstr = PointerGetDatum(cmptxt);
}
}
/* Select appropriate character-incrementer function */
if (datatype == BYTEAOID)
charinc = byte_increment;
else
charinc = pg_database_encoding_character_incrementer();
/* And search ... */
while (len > 0)
{
int charlen;
unsigned char *lastchar;
/* Identify the last character --- for bytea, just the last byte */
if (datatype == BYTEAOID)
charlen = 1;
else
charlen = len - pg_mbcliplen(workstr, len, len - 1);
lastchar = (unsigned char *) (workstr + len - charlen);
/*
* Try to generate a larger string by incrementing the last character
* (for BYTEA, we treat each byte as a character).
*
* Note: the incrementer function is expected to return true if it's
* generated a valid-per-the-encoding new character, otherwise false.
* The contents of the character on false return are unspecified.
*/
while (charinc(lastchar, charlen))
{
Const *workstr_const;
if (datatype == BYTEAOID)
workstr_const = string_to_bytea_const(workstr, len);
else
workstr_const = string_to_const(workstr, datatype);
if (DatumGetBool(FunctionCall2Coll(ltproc,
collation,
cmpstr,
workstr_const->constvalue)))
{
/* Successfully made a string larger than cmpstr */
if (cmptxt)
pfree(cmptxt);
pfree(workstr);
return workstr_const;
}
/* No good, release unusable value and try again */
pfree(DatumGetPointer(workstr_const->constvalue));
pfree(workstr_const);
}
/*
* No luck here, so truncate off the last character and try to
* increment the next one.
*/
len -= charlen;
workstr[len] = '\0';
}
/* Failed... */
if (cmptxt)
pfree(cmptxt);
pfree(workstr);
return NULL;
}
| double mcv_selectivity | ( | VariableStatData * | vardata, | |
| FmgrInfo * | opproc, | |||
| Datum | constval, | |||
| bool | varonleft, | |||
| double * | sumcommonp | |||
| ) |
Definition at line 602 of file selfuncs.c.
References VariableStatData::atttype, VariableStatData::atttypmod, DatumGetBool, DEFAULT_COLLATION_OID, free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), HeapTupleIsValid, i, InvalidOid, NULL, STATISTIC_KIND_MCV, VariableStatData::statsTuple, and values.
Referenced by ltreeparentsel(), patternsel(), and scalarineqsel().
{
double mcv_selec,
sumcommon;
Datum *values;
int nvalues;
float4 *numbers;
int nnumbers;
int i;
mcv_selec = 0.0;
sumcommon = 0.0;
if (HeapTupleIsValid(vardata->statsTuple) &&
get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_MCV, InvalidOid,
NULL,
&values, &nvalues,
&numbers, &nnumbers))
{
for (i = 0; i < nvalues; i++)
{
if (varonleft ?
DatumGetBool(FunctionCall2Coll(opproc,
DEFAULT_COLLATION_OID,
values[i],
constval)) :
DatumGetBool(FunctionCall2Coll(opproc,
DEFAULT_COLLATION_OID,
constval,
values[i])))
mcv_selec += numbers[i];
sumcommon += numbers[i];
}
free_attstatsslot(vardata->atttype, values, nvalues,
numbers, nnumbers);
}
*sumcommonp = sumcommon;
return mcv_selec;
}
| void mergejoinscansel | ( | PlannerInfo * | root, | |
| Node * | clause, | |||
| Oid | opfamily, | |||
| int | strategy, | |||
| bool | nulls_first, | |||
| Selectivity * | leftstart, | |||
| Selectivity * | leftend, | |||
| Selectivity * | rightstart, | |||
| Selectivity * | rightend | |||
| ) |
Definition at line 2784 of file selfuncs.c.
References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, CLAMP_PROBABILITY, DEFAULT_INEQ_SEL, examine_variable(), get_leftop(), get_op_opfamily_properties(), get_opfamily_member(), get_rightop(), get_variable_range(), GETSTRUCT, HeapTupleIsValid, is_opclause, OidIsValid, ReleaseVariableStats, scalarineqsel(), and VariableStatData::statsTuple.
Referenced by cached_scansel().
{
Node *left,
*right;
VariableStatData leftvar,
rightvar;
int op_strategy;
Oid op_lefttype;
Oid op_righttype;
Oid opno,
lsortop,
rsortop,
lstatop,
rstatop,
ltop,
leop,
revltop,
revleop;
bool isgt;
Datum leftmin,
leftmax,
rightmin,
rightmax;
double selec;
/* Set default results if we can't figure anything out. */
/* XXX should default "start" fraction be a bit more than 0? */
*leftstart = *rightstart = 0.0;
*leftend = *rightend = 1.0;
/* Deconstruct the merge clause */
if (!is_opclause(clause))
return; /* shouldn't happen */
opno = ((OpExpr *) clause)->opno;
left = get_leftop((Expr *) clause);
right = get_rightop((Expr *) clause);
if (!right)
return; /* shouldn't happen */
/* Look for stats for the inputs */
examine_variable(root, left, 0, &leftvar);
examine_variable(root, right, 0, &rightvar);
/* Extract the operator's declared left/right datatypes */
get_op_opfamily_properties(opno, opfamily, false,
&op_strategy,
&op_lefttype,
&op_righttype);
Assert(op_strategy == BTEqualStrategyNumber);
/*
* Look up the various operators we need. If we don't find them all, it
* probably means the opfamily is broken, but we just fail silently.
*
* Note: we expect that pg_statistic histograms will be sorted by the '<'
* operator, regardless of which sort direction we are considering.
*/
switch (strategy)
{
case BTLessStrategyNumber:
isgt = false;
if (op_lefttype == op_righttype)
{
/* easy case */
ltop = get_opfamily_member(opfamily,
op_lefttype, op_righttype,
BTLessStrategyNumber);
leop = get_opfamily_member(opfamily,
op_lefttype, op_righttype,
BTLessEqualStrategyNumber);
lsortop = ltop;
rsortop = ltop;
lstatop = lsortop;
rstatop = rsortop;
revltop = ltop;
revleop = leop;
}
else
{
ltop = get_opfamily_member(opfamily,
op_lefttype, op_righttype,
BTLessStrategyNumber);
leop = get_opfamily_member(opfamily,
op_lefttype, op_righttype,
BTLessEqualStrategyNumber);
lsortop = get_opfamily_member(opfamily,
op_lefttype, op_lefttype,
BTLessStrategyNumber);
rsortop = get_opfamily_member(opfamily,
op_righttype, op_righttype,
BTLessStrategyNumber);
lstatop = lsortop;
rstatop = rsortop;
revltop = get_opfamily_member(opfamily,
op_righttype, op_lefttype,
BTLessStrategyNumber);
revleop = get_opfamily_member(opfamily,
op_righttype, op_lefttype,
BTLessEqualStrategyNumber);
}
break;
case BTGreaterStrategyNumber:
/* descending-order case */
isgt = true;
if (op_lefttype == op_righttype)
{
/* easy case */
ltop = get_opfamily_member(opfamily,
op_lefttype, op_righttype,
BTGreaterStrategyNumber);
leop = get_opfamily_member(opfamily,
op_lefttype, op_righttype,
BTGreaterEqualStrategyNumber);
lsortop = ltop;
rsortop = ltop;
lstatop = get_opfamily_member(opfamily,
op_lefttype, op_lefttype,
BTLessStrategyNumber);
rstatop = lstatop;
revltop = ltop;
revleop = leop;
}
else
{
ltop = get_opfamily_member(opfamily,
op_lefttype, op_righttype,
BTGreaterStrategyNumber);
leop = get_opfamily_member(opfamily,
op_lefttype, op_righttype,
BTGreaterEqualStrategyNumber);
lsortop = get_opfamily_member(opfamily,
op_lefttype, op_lefttype,
BTGreaterStrategyNumber);
rsortop = get_opfamily_member(opfamily,
op_righttype, op_righttype,
BTGreaterStrategyNumber);
lstatop = get_opfamily_member(opfamily,
op_lefttype, op_lefttype,
BTLessStrategyNumber);
rstatop = get_opfamily_member(opfamily,
op_righttype, op_righttype,
BTLessStrategyNumber);
revltop = get_opfamily_member(opfamily,
op_righttype, op_lefttype,
BTGreaterStrategyNumber);
revleop = get_opfamily_member(opfamily,
op_righttype, op_lefttype,
BTGreaterEqualStrategyNumber);
}
break;
default:
goto fail; /* shouldn't get here */
}
if (!OidIsValid(lsortop) ||
!OidIsValid(rsortop) ||
!OidIsValid(lstatop) ||
!OidIsValid(rstatop) ||
!OidIsValid(ltop) ||
!OidIsValid(leop) ||
!OidIsValid(revltop) ||
!OidIsValid(revleop))
goto fail; /* insufficient info in catalogs */
/* Try to get ranges of both inputs */
if (!isgt)
{
if (!get_variable_range(root, &leftvar, lstatop,
&leftmin, &leftmax))
goto fail; /* no range available from stats */
if (!get_variable_range(root, &rightvar, rstatop,
&rightmin, &rightmax))
goto fail; /* no range available from stats */
}
else
{
/* need to swap the max and min */
if (!get_variable_range(root, &leftvar, lstatop,
&leftmax, &leftmin))
goto fail; /* no range available from stats */
if (!get_variable_range(root, &rightvar, rstatop,
&rightmax, &rightmin))
goto fail; /* no range available from stats */
}
/*
* Now, the fraction of the left variable that will be scanned is the
* fraction that's <= the right-side maximum value. But only believe
* non-default estimates, else stick with our 1.0.
*/
selec = scalarineqsel(root, leop, isgt, &leftvar,
rightmax, op_righttype);
if (selec != DEFAULT_INEQ_SEL)
*leftend = selec;
/* And similarly for the right variable. */
selec = scalarineqsel(root, revleop, isgt, &rightvar,
leftmax, op_lefttype);
if (selec != DEFAULT_INEQ_SEL)
*rightend = selec;
/*
* Only one of the two "end" fractions can really be less than 1.0;
* believe the smaller estimate and reset the other one to exactly 1.0. If
* we get exactly equal estimates (as can easily happen with self-joins),
* believe neither.
*/
if (*leftend > *rightend)
*leftend = 1.0;
else if (*leftend < *rightend)
*rightend = 1.0;
else
*leftend = *rightend = 1.0;
/*
* Also, the fraction of the left variable that will be scanned before the
* first join pair is found is the fraction that's < the right-side
* minimum value. But only believe non-default estimates, else stick with
* our own default.
*/
selec = scalarineqsel(root, ltop, isgt, &leftvar,
rightmin, op_righttype);
if (selec != DEFAULT_INEQ_SEL)
*leftstart = selec;
/* And similarly for the right variable. */
selec = scalarineqsel(root, revltop, isgt, &rightvar,
leftmin, op_lefttype);
if (selec != DEFAULT_INEQ_SEL)
*rightstart = selec;
/*
* Only one of the two "start" fractions can really be more than zero;
* believe the larger estimate and reset the other one to exactly 0.0. If
* we get exactly equal estimates (as can easily happen with self-joins),
* believe neither.
*/
if (*leftstart < *rightstart)
*leftstart = 0.0;
else if (*leftstart > *rightstart)
*rightstart = 0.0;
else
*leftstart = *rightstart = 0.0;
/*
* If the sort order is nulls-first, we're going to have to skip over any
* nulls too. These would not have been counted by scalarineqsel, and we
* can safely add in this fraction regardless of whether we believe
* scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
*/
if (nulls_first)
{
Form_pg_statistic stats;
if (HeapTupleIsValid(leftvar.statsTuple))
{
stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
*leftstart += stats->stanullfrac;
CLAMP_PROBABILITY(*leftstart);
*leftend += stats->stanullfrac;
CLAMP_PROBABILITY(*leftend);
}
if (HeapTupleIsValid(rightvar.statsTuple))
{
stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
*rightstart += stats->stanullfrac;
CLAMP_PROBABILITY(*rightstart);
*rightend += stats->stanullfrac;
CLAMP_PROBABILITY(*rightend);
}
}
/* Disbelieve start >= end, just in case that can happen */
if (*leftstart >= *leftend)
{
*leftstart = 0.0;
*leftend = 1.0;
}
if (*rightstart >= *rightend)
{
*rightstart = 0.0;
*rightend = 1.0;
}
fail:
ReleaseVariableStats(leftvar);
ReleaseVariableStats(rightvar);
}
| Datum neqjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2629 of file selfuncs.c.
References DatumGetFloat8, DirectFunctionCall5, eqjoinsel(), get_negator(), Int16GetDatum, ObjectIdGetDatum, PG_GETARG_INT16, PG_GETARG_POINTER, PG_RETURN_FLOAT8, and PointerGetDatum.
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
Oid eqop;
float8 result;
/*
* We want 1 - eqjoinsel() where the equality operator is the one
* associated with this != operator, that is, its negator.
*/
eqop = get_negator(operator);
if (eqop)
{
result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
PointerGetDatum(root),
ObjectIdGetDatum(eqop),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
}
else
{
/* Use default selectivity (should we raise an error instead?) */
result = DEFAULT_EQ_SEL;
}
result = 1.0 - result;
PG_RETURN_FLOAT8(result);
}
| Datum neqsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 483 of file selfuncs.c.
References DatumGetFloat8, DirectFunctionCall4, eqsel(), get_negator(), Int32GetDatum, ObjectIdGetDatum, PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_FLOAT8, and PointerGetDatum.
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
Oid eqop;
float8 result;
/*
* We want 1 - eqsel() where the equality operator is the one associated
* with this != operator, that is, its negator.
*/
eqop = get_negator(operator);
if (eqop)
{
result = DatumGetFloat8(DirectFunctionCall4(eqsel,
PointerGetDatum(root),
ObjectIdGetDatum(eqop),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
}
else
{
/* Use default selectivity (should we raise an error instead?) */
result = DEFAULT_EQ_SEL;
}
result = 1.0 - result;
PG_RETURN_FLOAT8(result);
}
| Datum nlikejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2748 of file selfuncs.c.
References Pattern_Type_Like, patternjoinsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
}
| Datum nlikesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1428 of file selfuncs.c.
References Pattern_Type_Like, patternsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
}
| Selectivity nulltestsel | ( | PlannerInfo * | root, | |
| NullTestType | nulltesttype, | |||
| Node * | arg, | |||
| int | varRelid, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo | |||
| ) |
Definition at line 1613 of file selfuncs.c.
References CLAMP_PROBABILITY, elog, ERROR, examine_variable(), GETSTRUCT, HeapTupleIsValid, IS_NOT_NULL, IS_NULL, ReleaseVariableStats, and VariableStatData::statsTuple.
Referenced by clause_selectivity(), and clauselist_selectivity().
{
VariableStatData vardata;
double selec;
examine_variable(root, arg, varRelid, &vardata);
if (HeapTupleIsValid(vardata.statsTuple))
{
Form_pg_statistic stats;
double freq_null;
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
freq_null = stats->stanullfrac;
switch (nulltesttype)
{
case IS_NULL:
/*
* Use freq_null directly.
*/
selec = freq_null;
break;
case IS_NOT_NULL:
/*
* Select not unknown (not null) values. Calculate from
* freq_null.
*/
selec = 1.0 - freq_null;
break;
default:
elog(ERROR, "unrecognized nulltesttype: %d",
(int) nulltesttype);
return (Selectivity) 0; /* keep compiler quiet */
}
}
else
{
/*
* No ANALYZE stats available, so make a guess
*/
switch (nulltesttype)
{
case IS_NULL:
selec = DEFAULT_UNK_SEL;
break;
case IS_NOT_NULL:
selec = DEFAULT_NOT_UNK_SEL;
break;
default:
elog(ERROR, "unrecognized nulltesttype: %d",
(int) nulltesttype);
return (Selectivity) 0; /* keep compiler quiet */
}
}
ReleaseVariableStats(vardata);
/* result should be in range, but make sure... */
CLAMP_PROBABILITY(selec);
return (Selectivity) selec;
}
| Pattern_Prefix_Status pattern_fixed_prefix | ( | Const * | patt, | |
| Pattern_Type | ptype, | |||
| Oid | collation, | |||
| Const ** | prefix, | |||
| Selectivity * | rest_selec | |||
| ) |
Definition at line 5328 of file selfuncs.c.
References elog, ERROR, like_fixed_prefix(), Pattern_Type_Like, Pattern_Type_Like_IC, Pattern_Type_Regex, Pattern_Type_Regex_IC, and regex_fixed_prefix().
Referenced by expand_indexqual_opclause(), match_special_index_operator(), and patternsel().
{
Pattern_Prefix_Status result;
switch (ptype)
{
case Pattern_Type_Like:
result = like_fixed_prefix(patt, false, collation,
prefix, rest_selec);
break;
case Pattern_Type_Like_IC:
result = like_fixed_prefix(patt, true, collation,
prefix, rest_selec);
break;
case Pattern_Type_Regex:
result = regex_fixed_prefix(patt, false, collation,
prefix, rest_selec);
break;
case Pattern_Type_Regex_IC:
result = regex_fixed_prefix(patt, true, collation,
prefix, rest_selec);
break;
default:
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
result = Pattern_Prefix_None; /* keep compiler quiet */
break;
}
return result;
}
| Datum regexeqjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2694 of file selfuncs.c.
References Pattern_Type_Regex, patternjoinsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
}
| Datum regexeqsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1374 of file selfuncs.c.
References Pattern_Type_Regex, patternsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
}
| Datum regexnejoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2730 of file selfuncs.c.
References Pattern_Type_Regex, patternjoinsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
}
| Datum regexnesel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1410 of file selfuncs.c.
References Pattern_Type_Regex, patternsel(), and PG_RETURN_FLOAT8.
{
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
}
| Selectivity rowcomparesel | ( | PlannerInfo * | root, | |
| RowCompareExpr * | clause, | |||
| int | varRelid, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo | |||
| ) |
Definition at line 2066 of file selfuncs.c.
References RowCompareExpr::inputcollids, join_selectivity(), RowCompareExpr::largs, linitial, linitial_oid, list_make2, NULL, NumRelids(), RowCompareExpr::opnos, RowCompareExpr::rargs, restriction_selectivity(), and s1.
Referenced by clause_selectivity().
{
Selectivity s1;
Oid opno = linitial_oid(clause->opnos);
Oid inputcollid = linitial_oid(clause->inputcollids);
List *opargs;
bool is_join_clause;
/* Build equivalent arg list for single operator */
opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
/*
* Decide if it's a join clause. This should match clausesel.c's
* treat_as_join_clause(), except that we intentionally consider only the
* leading columns and not the rest of the clause.
*/
if (varRelid != 0)
{
/*
* Caller is forcing restriction mode (eg, because we are examining an
* inner indexscan qual).
*/
is_join_clause = false;
}
else if (sjinfo == NULL)
{
/*
* It must be a restriction clause, since it's being evaluated at a
* scan node.
*/
is_join_clause = false;
}
else
{
/*
* Otherwise, it's a join if there's more than one relation used.
*/
is_join_clause = (NumRelids((Node *) opargs) > 1);
}
if (is_join_clause)
{
/* Estimate selectivity for a join clause. */
s1 = join_selectivity(root, opno,
opargs,
inputcollid,
jointype,
sjinfo);
}
else
{
/* Estimate selectivity for a restriction clause. */
s1 = restriction_selectivity(root, opno,
opargs,
inputcollid,
varRelid);
}
return s1;
}
| Selectivity scalararraysel | ( | PlannerInfo * | root, | |
| ScalarArrayOpExpr * | clause, | |||
| bool | is_join_clause, | |||
| int | varRelid, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo | |||
| ) |
Definition at line 1713 of file selfuncs.c.
References ScalarArrayOpExpr::args, ARR_ELEMTYPE, Assert, CLAMP_PROBABILITY, CaseTestExpr::collation, DatumGetArrayTypeP, DatumGetFloat8, deconstruct_array(), ArrayExpr::element_typeid, ArrayExpr::elements, TypeCacheEntry::eq_opr, exprCollation(), exprType(), fmgr_info(), FunctionCall4Coll(), FunctionCall5Coll(), get_base_element_type(), get_negator(), get_oprjoin(), get_oprrest(), get_typlenbyval(), get_typlenbyvalalign(), i, ScalarArrayOpExpr::inputcollid, Int16GetDatum, Int32GetDatum, IsA, lfirst, linitial, list_length(), list_make2, lookup_type_cache(), lsecond, makeConst(), makeNode, ObjectIdGetDatum, OidIsValid, ScalarArrayOpExpr::opno, PointerGetDatum, s1, s2, scalararraysel_containment(), strip_array_coercion(), TYPECACHE_EQ_OPR, CaseTestExpr::typeId, CaseTestExpr::typeMod, and ScalarArrayOpExpr::useOr.
Referenced by clause_selectivity().
{
Oid operator = clause->opno;
bool useOr = clause->useOr;
bool isEquality = false;
bool isInequality = false;
Node *leftop;
Node *rightop;
Oid nominal_element_type;
Oid nominal_element_collation;
TypeCacheEntry *typentry;
RegProcedure oprsel;
FmgrInfo oprselproc;
Selectivity s1;
Selectivity s1disjoint;
/* First, deconstruct the expression */
Assert(list_length(clause->args) == 2);
leftop = (Node *) linitial(clause->args);
rightop = (Node *) lsecond(clause->args);
/* get nominal (after relabeling) element type of rightop */
nominal_element_type = get_base_element_type(exprType(rightop));
if (!OidIsValid(nominal_element_type))
return (Selectivity) 0.5; /* probably shouldn't happen */
/* get nominal collation, too, for generating constants */
nominal_element_collation = exprCollation(rightop);
/* look through any binary-compatible relabeling of rightop */
rightop = strip_array_coercion(rightop);
/*
* Detect whether the operator is the default equality or inequality
* operator of the array element type.
*/
typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
if (OidIsValid(typentry->eq_opr))
{
if (operator == typentry->eq_opr)
isEquality = true;
else if (get_negator(operator) == typentry->eq_opr)
isInequality = true;
}
/*
* If it is equality or inequality, we might be able to estimate this as a
* form of array containment; for instance "const = ANY(column)" can be
* treated as "ARRAY[const] <@ column". scalararraysel_containment tries
* that, and returns the selectivity estimate if successful, or -1 if not.
*/
if ((isEquality || isInequality) && !is_join_clause)
{
s1 = scalararraysel_containment(root, leftop, rightop,
nominal_element_type,
isEquality, useOr, varRelid);
if (s1 >= 0.0)
return s1;
}
/*
* Look up the underlying operator's selectivity estimator. Punt if it
* hasn't got one.
*/
if (is_join_clause)
oprsel = get_oprjoin(operator);
else
oprsel = get_oprrest(operator);
if (!oprsel)
return (Selectivity) 0.5;
fmgr_info(oprsel, &oprselproc);
/*
* In the array-containment check above, we must only believe that an
* operator is equality or inequality if it is the default btree equality
* operator (or its negator) for the element type, since those are the
* operators that array containment will use. But in what follows, we can
* be a little laxer, and also believe that any operators using eqsel() or
* neqsel() as selectivity estimator act like equality or inequality.
*/
if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
isEquality = true;
else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
isInequality = true;
/*
* We consider three cases:
*
* 1. rightop is an Array constant: deconstruct the array, apply the
* operator's selectivity function for each array element, and merge the
* results in the same way that clausesel.c does for AND/OR combinations.
*
* 2. rightop is an ARRAY[] construct: apply the operator's selectivity
* function for each element of the ARRAY[] construct, and merge.
*
* 3. otherwise, make a guess ...
*/
if (rightop && IsA(rightop, Const))
{
Datum arraydatum = ((Const *) rightop)->constvalue;
bool arrayisnull = ((Const *) rightop)->constisnull;
ArrayType *arrayval;
int16 elmlen;
bool elmbyval;
char elmalign;
int num_elems;
Datum *elem_values;
bool *elem_nulls;
int i;
if (arrayisnull) /* qual can't succeed if null array */
return (Selectivity) 0.0;
arrayval = DatumGetArrayTypeP(arraydatum);
get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
&elmlen, &elmbyval, &elmalign);
deconstruct_array(arrayval,
ARR_ELEMTYPE(arrayval),
elmlen, elmbyval, elmalign,
&elem_values, &elem_nulls, &num_elems);
/*
* For generic operators, we assume the probability of success is
* independent for each array element. But for "= ANY" or "<> ALL",
* if the array elements are distinct (which'd typically be the case)
* then the probabilities are disjoint, and we should just sum them.
*
* If we were being really tense we would try to confirm that the
* elements are all distinct, but that would be expensive and it
* doesn't seem to be worth the cycles; it would amount to penalizing
* well-written queries in favor of poorly-written ones. However, we
* do protect ourselves a little bit by checking whether the
* disjointness assumption leads to an impossible (out of range)
* probability; if so, we fall back to the normal calculation.
*/
s1 = s1disjoint = (useOr ? 0.0 : 1.0);
for (i = 0; i < num_elems; i++)
{
List *args;
Selectivity s2;
args = list_make2(leftop,
makeConst(nominal_element_type,
-1,
nominal_element_collation,
elmlen,
elem_values[i],
elem_nulls[i],
elmbyval));
if (is_join_clause)
s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
else
s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
if (useOr)
{
s1 = s1 + s2 - s1 * s2;
if (isEquality)
s1disjoint += s2;
}
else
{
s1 = s1 * s2;
if (isInequality)
s1disjoint += s2 - 1.0;
}
}
/* accept disjoint-probability estimate if in range */
if ((useOr ? isEquality : isInequality) &&
s1disjoint >= 0.0 && s1disjoint <= 1.0)
s1 = s1disjoint;
}
else if (rightop && IsA(rightop, ArrayExpr) &&
!((ArrayExpr *) rightop)->multidims)
{
ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
int16 elmlen;
bool elmbyval;
ListCell *l;
get_typlenbyval(arrayexpr->element_typeid,
&elmlen, &elmbyval);
/*
* We use the assumption of disjoint probabilities here too, although
* the odds of equal array elements are rather higher if the elements
* are not all constants (which they won't be, else constant folding
* would have reduced the ArrayExpr to a Const). In this path it's
* critical to have the sanity check on the s1disjoint estimate.
*/
s1 = s1disjoint = (useOr ? 0.0 : 1.0);
foreach(l, arrayexpr->elements)
{
Node *elem = (Node *) lfirst(l);
List *args;
Selectivity s2;
/*
* Theoretically, if elem isn't of nominal_element_type we should
* insert a RelabelType, but it seems unlikely that any operator
* estimation function would really care ...
*/
args = list_make2(leftop, elem);
if (is_join_clause)
s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
else
s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
if (useOr)
{
s1 = s1 + s2 - s1 * s2;
if (isEquality)
s1disjoint += s2;
}
else
{
s1 = s1 * s2;
if (isInequality)
s1disjoint += s2 - 1.0;
}
}
/* accept disjoint-probability estimate if in range */
if ((useOr ? isEquality : isInequality) &&
s1disjoint >= 0.0 && s1disjoint <= 1.0)
s1 = s1disjoint;
}
else
{
CaseTestExpr *dummyexpr;
List *args;
Selectivity s2;
int i;
/*
* We need a dummy rightop to pass to the operator selectivity
* routine. It can be pretty much anything that doesn't look like a
* constant; CaseTestExpr is a convenient choice.
*/
dummyexpr = makeNode(CaseTestExpr);
dummyexpr->typeId = nominal_element_type;
dummyexpr->typeMod = -1;
dummyexpr->collation = clause->inputcollid;
args = list_make2(leftop, dummyexpr);
if (is_join_clause)
s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
else
s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
s1 = useOr ? 0.0 : 1.0;
/*
* Arbitrarily assume 10 elements in the eventual array value (see
* also estimate_array_length). We don't risk an assumption of
* disjoint probabilities here.
*/
for (i = 0; i < 10; i++)
{
if (useOr)
s1 = s1 + s2 - s1 * s2;
else
s1 = s1 * s2;
}
}
/* result should be in range, but make sure... */
CLAMP_PROBABILITY(s1);
return s1;
}
| Selectivity scalararraysel_containment | ( | PlannerInfo * | root, | |
| Node * | leftop, | |||
| Node * | rightop, | |||
| Oid | elemtype, | |||
| bool | isEquality, | |||
| bool | useOr, | |||
| int | varRelid | |||
| ) |
Definition at line 80 of file array_selfuncs.c.
References VariableStatData::atttypmod, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, estimate_expression_value(), examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), NULL, OID_ARRAY_CONTAINED_OP, OID_ARRAY_CONTAINS_OP, OidIsValid, VariableStatData::rel, ReleaseVariableStats, STATISTIC_KIND_DECHIST, STATISTIC_KIND_MCELEM, VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and values.
Referenced by scalararraysel().
{
Selectivity selec;
VariableStatData vardata;
Datum constval;
TypeCacheEntry *typentry;
FmgrInfo *cmpfunc;
/*
* rightop must be a variable, else punt.
*/
examine_variable(root, rightop, varRelid, &vardata);
if (!vardata.rel)
{
ReleaseVariableStats(vardata);
return -1.0;
}
/*
* Aggressively reduce leftop to a constant, if possible.
*/
leftop = estimate_expression_value(root, leftop);
if (!IsA(leftop, Const))
{
ReleaseVariableStats(vardata);
return -1.0;
}
if (((Const *) leftop)->constisnull)
{
/* qual can't succeed if null on left */
ReleaseVariableStats(vardata);
return (Selectivity) 0.0;
}
constval = ((Const *) leftop)->constvalue;
/* Get element type's default comparison function */
typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
{
ReleaseVariableStats(vardata);
return -1.0;
}
cmpfunc = &typentry->cmp_proc_finfo;
/*
* If the operator is <>, swap ANY/ALL, then invert the result later.
*/
if (!isEquality)
useOr = !useOr;
/* Get array element stats for var, if available */
if (HeapTupleIsValid(vardata.statsTuple))
{
Form_pg_statistic stats;
Datum *values;
int nvalues;
float4 *numbers;
int nnumbers;
float4 *hist;
int nhist;
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
/* MCELEM will be an array of same type as element */
if (get_attstatsslot(vardata.statsTuple,
elemtype, vardata.atttypmod,
STATISTIC_KIND_MCELEM, InvalidOid,
NULL,
&values, &nvalues,
&numbers, &nnumbers))
{
/* For ALL case, also get histogram of distinct-element counts */
if (useOr ||
!get_attstatsslot(vardata.statsTuple,
elemtype, vardata.atttypmod,
STATISTIC_KIND_DECHIST, InvalidOid,
NULL,
NULL, NULL,
&hist, &nhist))
{
hist = NULL;
nhist = 0;
}
/*
* For = ANY, estimate as var @> ARRAY[const].
*
* For = ALL, estimate as var <@ ARRAY[const].
*/
if (useOr)
selec = mcelem_array_contain_overlap_selec(values, nvalues,
numbers, nnumbers,
&constval, 1,
OID_ARRAY_CONTAINS_OP,
cmpfunc);
else
selec = mcelem_array_contained_selec(values, nvalues,
numbers, nnumbers,
&constval, 1,
hist, nhist,
OID_ARRAY_CONTAINED_OP,
cmpfunc);
if (hist)
free_attstatsslot(elemtype, NULL, 0, hist, nhist);
free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers);
}
else
{
/* No most-common-elements info, so do without */
if (useOr)
selec = mcelem_array_contain_overlap_selec(NULL, 0,
NULL, 0,
&constval, 1,
OID_ARRAY_CONTAINS_OP,
cmpfunc);
else
selec = mcelem_array_contained_selec(NULL, 0,
NULL, 0,
&constval, 1,
NULL, 0,
OID_ARRAY_CONTAINED_OP,
cmpfunc);
}
/*
* MCE stats count only non-null rows, so adjust for null rows.
*/
selec *= (1.0 - stats->stanullfrac);
}
else
{
/* No stats at all, so do without */
if (useOr)
selec = mcelem_array_contain_overlap_selec(NULL, 0,
NULL, 0,
&constval, 1,
OID_ARRAY_CONTAINS_OP,
cmpfunc);
else
selec = mcelem_array_contained_selec(NULL, 0,
NULL, 0,
&constval, 1,
NULL, 0,
OID_ARRAY_CONTAINED_OP,
cmpfunc);
/* we assume no nulls here, so no stanullfrac correction */
}
ReleaseVariableStats(vardata);
/*
* If the operator is <>, invert the results.
*/
if (!isEquality)
selec = 1.0 - selec;
CLAMP_PROBABILITY(selec);
return selec;
}
| Datum scalargtjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2675 of file selfuncs.c.
References DEFAULT_INEQ_SEL, and PG_RETURN_FLOAT8.
| Datum scalargtsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 1034 of file selfuncs.c.
References DEFAULT_INEQ_SEL, get_restriction_variable(), IsA, PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, and scalarineqsel().
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
VariableStatData vardata;
Node *other;
bool varonleft;
Datum constval;
Oid consttype;
bool isgt;
double selec;
/*
* If expression is not variable op something or something op variable,
* then punt and return a default estimate.
*/
if (!get_restriction_variable(root, args, varRelid,
&vardata, &other, &varonleft))
PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
/*
* Can't do anything useful if the something is not a constant, either.
*/
if (!IsA(other, Const))
{
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
}
/*
* If the constant is NULL, assume operator is strict and return zero, ie,
* operator will never return TRUE.
*/
if (((Const *) other)->constisnull)
{
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(0.0);
}
constval = ((Const *) other)->constvalue;
consttype = ((Const *) other)->consttype;
/*
* Force the var to be on the left to simplify logic in scalarineqsel.
*/
if (varonleft)
{
/* we have var > other */
isgt = true;
}
else
{
/* we have other > var, commute to make var < other */
operator = get_commutator(operator);
if (!operator)
{
/* Use default selectivity (should we raise an error instead?) */
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
}
isgt = false;
}
selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8((float8) selec);
}
| Datum scalarltjoinsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 2666 of file selfuncs.c.
References DEFAULT_INEQ_SEL, and PG_RETURN_FLOAT8.
| Datum scalarltsel | ( | PG_FUNCTION_ARGS | ) |
Definition at line 959 of file selfuncs.c.
References DEFAULT_INEQ_SEL, get_restriction_variable(), IsA, PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, and scalarineqsel().
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
VariableStatData vardata;
Node *other;
bool varonleft;
Datum constval;
Oid consttype;
bool isgt;
double selec;
/*
* If expression is not variable op something or something op variable,
* then punt and return a default estimate.
*/
if (!get_restriction_variable(root, args, varRelid,
&vardata, &other, &varonleft))
PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
/*
* Can't do anything useful if the something is not a constant, either.
*/
if (!IsA(other, Const))
{
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
}
/*
* If the constant is NULL, assume operator is strict and return zero, ie,
* operator will never return TRUE.
*/
if (((Const *) other)->constisnull)
{
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(0.0);
}
constval = ((Const *) other)->constvalue;
consttype = ((Const *) other)->consttype;
/*
* Force the var to be on the left to simplify logic in scalarineqsel.
*/
if (varonleft)
{
/* we have var < other */
isgt = false;
}
else
{
/* we have other < var, commute to make var > other */
operator = get_commutator(operator);
if (!operator)
{
/* Use default selectivity (should we raise an error instead?) */
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
}
isgt = true;
}
selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8((float8) selec);
}
| Datum spgcostestimate | ( | PG_FUNCTION_ARGS | ) |
Definition at line 6648 of file selfuncs.c.
References cpu_operator_cost, genericcostestimate(), GenericCosts::indexCorrelation, IndexPath::indexinfo, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, MemSet, GenericCosts::num_sa_scans, IndexOptInfo::pages, PG_GETARG_FLOAT8, PG_GETARG_POINTER, PG_RETURN_VOID, IndexOptInfo::tree_height, and IndexOptInfo::tuples.
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
GenericCosts costs;
Cost descentCost;
MemSet(&costs, 0, sizeof(costs));
genericcostestimate(root, path, loop_count, &costs);
/*
* We model index descent costs similarly to those for btree, but to do
* that we first need an idea of the tree height. We somewhat arbitrarily
* assume that the fanout is 100, meaning the tree height is at most
* log100(index->pages).
*
* Although this computation isn't really expensive enough to require
* caching, we might as well use index->tree_height to cache it.
*/
if (index->tree_height < 0) /* unknown? */
{
if (index->pages > 1) /* avoid computing log(0) */
index->tree_height = (int) (log(index->pages) / log(100.0));
else
index->tree_height = 0;
}
/*
* Add a CPU-cost component to represent the costs of initial descent.
* We just use log(N) here not log2(N) since the branching factor isn't
* necessarily two anyway. As for btree, charge once per SA scan.
*/
if (index->tuples > 1) /* avoid computing log(0) */
{
descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
costs.indexStartupCost += descentCost;
costs.indexTotalCost += costs.num_sa_scans * descentCost;
}
/*
* Likewise add a per-page charge, calculated the same as for btrees.
*/
descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
costs.indexStartupCost += descentCost;
costs.indexTotalCost += costs.num_sa_scans * descentCost;
*indexStartupCost = costs.indexStartupCost;
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
PG_RETURN_VOID();
}
| PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook |
Definition at line 145 of file selfuncs.c.
Referenced by btcostestimate(), and examine_variable().
| PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook |
Definition at line 144 of file selfuncs.c.
Referenced by btcostestimate(), and examine_simple_variable().
1.7.1