Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Typedefs | Functions

clausesel.c File Reference

#include "postgres.h"
#include "catalog/pg_operator.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/plancat.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
Include dependency graph for clausesel.c:

Go to the source code of this file.

Data Structures

struct  RangeQueryClause

Typedefs

typedef struct RangeQueryClause RangeQueryClause

Functions

static void addRangeClause (RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2)
Selectivity clauselist_selectivity (PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
static bool bms_is_subset_singleton (const Bitmapset *s, int x)
static bool treat_as_join_clause (Node *clause, RestrictInfo *rinfo, int varRelid, SpecialJoinInfo *sjinfo)
Selectivity clause_selectivity (PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)

Typedef Documentation


Function Documentation

static void addRangeClause ( RangeQueryClause **  rqlist,
Node clause,
bool  varonleft,
bool  isLTsel,
Selectivity  s2 
) [static]

Definition at line 287 of file clausesel.c.

References equal(), get_leftop(), get_rightop(), RangeQueryClause::have_hibound, RangeQueryClause::have_lobound, RangeQueryClause::hibound, RangeQueryClause::lobound, RangeQueryClause::next, palloc(), and RangeQueryClause::var.

Referenced by clauselist_selectivity().

{
    RangeQueryClause *rqelem;
    Node       *var;
    bool        is_lobound;

    if (varonleft)
    {
        var = get_leftop((Expr *) clause);
        is_lobound = !isLTsel;  /* x < something is high bound */
    }
    else
    {
        var = get_rightop((Expr *) clause);
        is_lobound = isLTsel;   /* something < x is low bound */
    }

    for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
    {
        /*
         * We use full equal() here because the "var" might be a function of
         * one or more attributes of the same relation...
         */
        if (!equal(var, rqelem->var))
            continue;
        /* Found the right group to put this clause in */
        if (is_lobound)
        {
            if (!rqelem->have_lobound)
            {
                rqelem->have_lobound = true;
                rqelem->lobound = s2;
            }
            else
            {

                /*------
                 * We have found two similar clauses, such as
                 * x < y AND x < z.
                 * Keep only the more restrictive one.
                 *------
                 */
                if (rqelem->lobound > s2)
                    rqelem->lobound = s2;
            }
        }
        else
        {
            if (!rqelem->have_hibound)
            {
                rqelem->have_hibound = true;
                rqelem->hibound = s2;
            }
            else
            {

                /*------
                 * We have found two similar clauses, such as
                 * x > y AND x > z.
                 * Keep only the more restrictive one.
                 *------
                 */
                if (rqelem->hibound > s2)
                    rqelem->hibound = s2;
            }
        }
        return;
    }

    /* No matching var found, so make a new clause-pair data structure */
    rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
    rqelem->var = var;
    if (is_lobound)
    {
        rqelem->have_lobound = true;
        rqelem->have_hibound = false;
        rqelem->lobound = s2;
    }
    else
    {
        rqelem->have_lobound = false;
        rqelem->have_hibound = true;
        rqelem->hibound = s2;
    }
    rqelem->next = *rqlist;
    *rqlist = rqelem;
}

static bool bms_is_subset_singleton ( const Bitmapset s,
int  x 
) [static]

Definition at line 385 of file clausesel.c.

References BMS_EMPTY_SET, bms_is_member(), bms_membership(), BMS_MULTIPLE, and BMS_SINGLETON.

Referenced by clause_selectivity().

{
    switch (bms_membership(s))
    {
        case BMS_EMPTY_SET:
            return true;
        case BMS_SINGLETON:
            return bms_is_member(x, s);
        case BMS_MULTIPLE:
            return false;
    }
    /* can't get here... */
    return false;
}

Selectivity clause_selectivity ( PlannerInfo root,
Node clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 484 of file clausesel.c.

References and_clause(), arg, OpExpr::args, bms_is_subset_singleton(), BooleanEqualOperator, booltestsel(), RestrictInfo::clause, RestrictInfo::clause_relids, clause_selectivity(), clauselist_selectivity(), Const::constisnull, Const::constvalue, CurrentOfExpr::cvarno, DatumGetBool, DEBUG4, elog, estimate_expression_value(), find_base_rel(), get_notclausearg(), OpExpr::inputcollid, InvalidOid, is_funcclause, is_opclause, IsA, JOIN_INNER, join_selectivity(), lfirst, list_make2, makeBoolConst(), RestrictInfo::norm_selec, not_clause(), NULL, nulltestsel(), OpExpr::opno, or_clause(), RestrictInfo::orclause, RestrictInfo::outer_selec, RestrictInfo::pseudoconstant, restriction_selectivity(), rowcomparesel(), s1, s2, scalararraysel(), treat_as_join_clause(), RelOptInfo::tuples, RangeQueryClause::var, Var::varlevelsup, and Var::varno.

Referenced by approx_tuple_count(), booltestsel(), clause_selectivity(), clauselist_selectivity(), and create_or_index_quals().

{
    Selectivity s1 = 0.5;       /* default for any unhandled clause type */
    RestrictInfo *rinfo = NULL;
    bool        cacheable = false;

    if (clause == NULL)         /* can this still happen? */
        return s1;

    if (IsA(clause, RestrictInfo))
    {
        rinfo = (RestrictInfo *) clause;

        /*
         * If the clause is marked pseudoconstant, then it will be used as a
         * gating qual and should not affect selectivity estimates; hence
         * return 1.0.  The only exception is that a constant FALSE may be
         * taken as having selectivity 0.0, since it will surely mean no rows
         * out of the plan.  This case is simple enough that we need not
         * bother caching the result.
         */
        if (rinfo->pseudoconstant)
        {
            if (!IsA(rinfo->clause, Const))
                return (Selectivity) 1.0;
        }

        /*
         * If the clause is marked redundant, always return 1.0.
         */
        if (rinfo->norm_selec > 1)
            return (Selectivity) 1.0;

        /*
         * If possible, cache the result of the selectivity calculation for
         * the clause.  We can cache if varRelid is zero or the clause
         * contains only vars of that relid --- otherwise varRelid will affect
         * the result, so mustn't cache.  Outer join quals might be examined
         * with either their join's actual jointype or JOIN_INNER, so we need
         * two cache variables to remember both cases.  Note: we assume the
         * result won't change if we are switching the input relations or
         * considering a unique-ified case, so we only need one cache variable
         * for all non-JOIN_INNER cases.
         */
        if (varRelid == 0 ||
            bms_is_subset_singleton(rinfo->clause_relids, varRelid))
        {
            /* Cacheable --- do we already have the result? */
            if (jointype == JOIN_INNER)
            {
                if (rinfo->norm_selec >= 0)
                    return rinfo->norm_selec;
            }
            else
            {
                if (rinfo->outer_selec >= 0)
                    return rinfo->outer_selec;
            }
            cacheable = true;
        }

        /*
         * Proceed with examination of contained clause.  If the clause is an
         * OR-clause, we want to look at the variant with sub-RestrictInfos,
         * so that per-subclause selectivities can be cached.
         */
        if (rinfo->orclause)
            clause = (Node *) rinfo->orclause;
        else
            clause = (Node *) rinfo->clause;
    }

    if (IsA(clause, Var))
    {
        Var        *var = (Var *) clause;

        /*
         * We probably shouldn't ever see an uplevel Var here, but if we do,
         * return the default selectivity...
         */
        if (var->varlevelsup == 0 &&
            (varRelid == 0 || varRelid == (int) var->varno))
        {
            /*
             * A Var at the top of a clause must be a bool Var. This is
             * equivalent to the clause reln.attribute = 't', so we compute
             * the selectivity as if that is what we have.
             */
            s1 = restriction_selectivity(root,
                                         BooleanEqualOperator,
                                         list_make2(var,
                                                    makeBoolConst(true,
                                                                  false)),
                                         InvalidOid,
                                         varRelid);
        }
    }
    else if (IsA(clause, Const))
    {
        /* bool constant is pretty easy... */
        Const      *con = (Const *) clause;

        s1 = con->constisnull ? 0.0 :
            DatumGetBool(con->constvalue) ? 1.0 : 0.0;
    }
    else if (IsA(clause, Param))
    {
        /* see if we can replace the Param */
        Node       *subst = estimate_expression_value(root, clause);

        if (IsA(subst, Const))
        {
            /* bool constant is pretty easy... */
            Const      *con = (Const *) subst;

            s1 = con->constisnull ? 0.0 :
                DatumGetBool(con->constvalue) ? 1.0 : 0.0;
        }
        else
        {
            /* XXX any way to do better than default? */
        }
    }
    else if (not_clause(clause))
    {
        /* inverse of the selectivity of the underlying clause */
        s1 = 1.0 - clause_selectivity(root,
                                  (Node *) get_notclausearg((Expr *) clause),
                                      varRelid,
                                      jointype,
                                      sjinfo);
    }
    else if (and_clause(clause))
    {
        /* share code with clauselist_selectivity() */
        s1 = clauselist_selectivity(root,
                                    ((BoolExpr *) clause)->args,
                                    varRelid,
                                    jointype,
                                    sjinfo);
    }
    else if (or_clause(clause))
    {
        /*
         * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
         * account for the probable overlap of selected tuple sets.
         *
         * XXX is this too conservative?
         */
        ListCell   *arg;

        s1 = 0.0;
        foreach(arg, ((BoolExpr *) clause)->args)
        {
            Selectivity s2 = clause_selectivity(root,
                                                (Node *) lfirst(arg),
                                                varRelid,
                                                jointype,
                                                sjinfo);

            s1 = s1 + s2 - s1 * s2;
        }
    }
    else if (is_opclause(clause) || IsA(clause, DistinctExpr))
    {
        OpExpr     *opclause = (OpExpr *) clause;
        Oid         opno = opclause->opno;

        if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
        {
            /* Estimate selectivity for a join clause. */
            s1 = join_selectivity(root, opno,
                                  opclause->args,
                                  opclause->inputcollid,
                                  jointype,
                                  sjinfo);
        }
        else
        {
            /* Estimate selectivity for a restriction clause. */
            s1 = restriction_selectivity(root, opno,
                                         opclause->args,
                                         opclause->inputcollid,
                                         varRelid);
        }

        /*
         * DistinctExpr has the same representation as OpExpr, but the
         * contained operator is "=" not "<>", so we must negate the result.
         * This estimation method doesn't give the right behavior for nulls,
         * but it's better than doing nothing.
         */
        if (IsA(clause, DistinctExpr))
            s1 = 1.0 - s1;
    }
    else if (is_funcclause(clause))
    {
        /*
         * This is not an operator, so we guess at the selectivity. THIS IS A
         * HACK TO GET V4 OUT THE DOOR.  FUNCS SHOULD BE ABLE TO HAVE
         * SELECTIVITIES THEMSELVES.       -- JMH 7/9/92
         */
        s1 = (Selectivity) 0.3333333;
    }
#ifdef NOT_USED
    else if (IsA(clause, SubPlan) ||
             IsA(clause, AlternativeSubPlan))
    {
        /*
         * Just for the moment! FIX ME! - vadim 02/04/98
         */
        s1 = (Selectivity) 0.5;
    }
#endif
    else if (IsA(clause, ScalarArrayOpExpr))
    {
        /* Use node specific selectivity calculation function */
        s1 = scalararraysel(root,
                            (ScalarArrayOpExpr *) clause,
                            treat_as_join_clause(clause, rinfo,
                                                 varRelid, sjinfo),
                            varRelid,
                            jointype,
                            sjinfo);
    }
    else if (IsA(clause, RowCompareExpr))
    {
        /* Use node specific selectivity calculation function */
        s1 = rowcomparesel(root,
                           (RowCompareExpr *) clause,
                           varRelid,
                           jointype,
                           sjinfo);
    }
    else if (IsA(clause, NullTest))
    {
        /* Use node specific selectivity calculation function */
        s1 = nulltestsel(root,
                         ((NullTest *) clause)->nulltesttype,
                         (Node *) ((NullTest *) clause)->arg,
                         varRelid,
                         jointype,
                         sjinfo);
    }
    else if (IsA(clause, BooleanTest))
    {
        /* Use node specific selectivity calculation function */
        s1 = booltestsel(root,
                         ((BooleanTest *) clause)->booltesttype,
                         (Node *) ((BooleanTest *) clause)->arg,
                         varRelid,
                         jointype,
                         sjinfo);
    }
    else if (IsA(clause, CurrentOfExpr))
    {
        /* CURRENT OF selects at most one row of its table */
        CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
        RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);

        if (crel->tuples > 0)
            s1 = 1.0 / crel->tuples;
    }
    else if (IsA(clause, RelabelType))
    {
        /* Not sure this case is needed, but it can't hurt */
        s1 = clause_selectivity(root,
                                (Node *) ((RelabelType *) clause)->arg,
                                varRelid,
                                jointype,
                                sjinfo);
    }
    else if (IsA(clause, CoerceToDomain))
    {
        /* Not sure this case is needed, but it can't hurt */
        s1 = clause_selectivity(root,
                                (Node *) ((CoerceToDomain *) clause)->arg,
                                varRelid,
                                jointype,
                                sjinfo);
    }

    /* Cache the result if possible */
    if (cacheable)
    {
        if (jointype == JOIN_INNER)
            rinfo->norm_selec = s1;
        else
            rinfo->outer_selec = s1;
    }

#ifdef SELECTIVITY_DEBUG
    elog(DEBUG4, "clause_selectivity: s1 %f", s1);
#endif   /* SELECTIVITY_DEBUG */

    return s1;
}

Selectivity clauselist_selectivity ( PlannerInfo root,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 93 of file clausesel.c.

References addRangeClause(), OpExpr::args, bms_membership(), RestrictInfo::clause, RestrictInfo::clause_relids, clause_selectivity(), DEFAULT_INEQ_SEL, get_oprrest(), RangeQueryClause::have_hibound, RangeQueryClause::have_lobound, RangeQueryClause::hibound, IS_NULL, is_opclause, is_pseudo_constant_clause(), is_pseudo_constant_clause_relids(), IsA, RestrictInfo::left_relids, lfirst, linitial, list_length(), RangeQueryClause::lobound, lsecond, RangeQueryClause::next, NULL, nulltestsel(), NumRelids(), OpExpr::opno, pfree(), RestrictInfo::pseudoconstant, RestrictInfo::right_relids, s1, s2, and RangeQueryClause::var.

Referenced by btcostestimate(), calc_joinrel_size_estimate(), clause_selectivity(), compute_semi_anti_join_factors(), estimate_size(), genericcostestimate(), get_parameterized_baserel_size(), gincostestimate(), postgresGetForeignRelSize(), and set_baserel_size_estimates().

{
    Selectivity s1 = 1.0;
    RangeQueryClause *rqlist = NULL;
    ListCell   *l;

    /*
     * If there's exactly one clause, then no use in trying to match up pairs,
     * so just go directly to clause_selectivity().
     */
    if (list_length(clauses) == 1)
        return clause_selectivity(root, (Node *) linitial(clauses),
                                  varRelid, jointype, sjinfo);

    /*
     * Initial scan over clauses.  Anything that doesn't look like a potential
     * rangequery clause gets multiplied into s1 and forgotten. Anything that
     * does gets inserted into an rqlist entry.
     */
    foreach(l, clauses)
    {
        Node       *clause = (Node *) lfirst(l);
        RestrictInfo *rinfo;
        Selectivity s2;

        /* Always compute the selectivity using clause_selectivity */
        s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);

        /*
         * Check for being passed a RestrictInfo.
         *
         * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
         * 0.0; just use that rather than looking for range pairs.
         */
        if (IsA(clause, RestrictInfo))
        {
            rinfo = (RestrictInfo *) clause;
            if (rinfo->pseudoconstant)
            {
                s1 = s1 * s2;
                continue;
            }
            clause = (Node *) rinfo->clause;
        }
        else
            rinfo = NULL;

        /*
         * See if it looks like a restriction clause with a pseudoconstant on
         * one side.  (Anything more complicated than that might not behave in
         * the simple way we are expecting.)  Most of the tests here can be
         * done more efficiently with rinfo than without.
         */
        if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
        {
            OpExpr     *expr = (OpExpr *) clause;
            bool        varonleft = true;
            bool        ok;

            if (rinfo)
            {
                ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
                    (is_pseudo_constant_clause_relids(lsecond(expr->args),
                                                      rinfo->right_relids) ||
                     (varonleft = false,
                      is_pseudo_constant_clause_relids(linitial(expr->args),
                                                       rinfo->left_relids)));
            }
            else
            {
                ok = (NumRelids(clause) == 1) &&
                    (is_pseudo_constant_clause(lsecond(expr->args)) ||
                     (varonleft = false,
                      is_pseudo_constant_clause(linitial(expr->args))));
            }

            if (ok)
            {
                /*
                 * If it's not a "<" or ">" operator, just merge the
                 * selectivity in generically.  But if it's the right oprrest,
                 * add the clause to rqlist for later processing.
                 */
                switch (get_oprrest(expr->opno))
                {
                    case F_SCALARLTSEL:
                        addRangeClause(&rqlist, clause,
                                       varonleft, true, s2);
                        break;
                    case F_SCALARGTSEL:
                        addRangeClause(&rqlist, clause,
                                       varonleft, false, s2);
                        break;
                    default:
                        /* Just merge the selectivity in generically */
                        s1 = s1 * s2;
                        break;
                }
                continue;       /* drop to loop bottom */
            }
        }

        /* Not the right form, so treat it generically. */
        s1 = s1 * s2;
    }

    /*
     * Now scan the rangequery pair list.
     */
    while (rqlist != NULL)
    {
        RangeQueryClause *rqnext;

        if (rqlist->have_lobound && rqlist->have_hibound)
        {
            /* Successfully matched a pair of range clauses */
            Selectivity s2;

            /*
             * Exact equality to the default value probably means the
             * selectivity function punted.  This is not airtight but should
             * be good enough.
             */
            if (rqlist->hibound == DEFAULT_INEQ_SEL ||
                rqlist->lobound == DEFAULT_INEQ_SEL)
            {
                s2 = DEFAULT_RANGE_INEQ_SEL;
            }
            else
            {
                s2 = rqlist->hibound + rqlist->lobound - 1.0;

                /* Adjust for double-exclusion of NULLs */
                s2 += nulltestsel(root, IS_NULL, rqlist->var,
                                  varRelid, jointype, sjinfo);

                /*
                 * A zero or slightly negative s2 should be converted into a
                 * small positive value; we probably are dealing with a very
                 * tight range and got a bogus result due to roundoff errors.
                 * However, if s2 is very negative, then we probably have
                 * default selectivity estimates on one or both sides of the
                 * range that we failed to recognize above for some reason.
                 */
                if (s2 <= 0.0)
                {
                    if (s2 < -0.01)
                    {
                        /*
                         * No data available --- use a default estimate that
                         * is small, but not real small.
                         */
                        s2 = DEFAULT_RANGE_INEQ_SEL;
                    }
                    else
                    {
                        /*
                         * It's just roundoff error; use a small positive
                         * value
                         */
                        s2 = 1.0e-10;
                    }
                }
            }
            /* Merge in the selectivity of the pair of clauses */
            s1 *= s2;
        }
        else
        {
            /* Only found one of a pair, merge it in generically */
            if (rqlist->have_lobound)
                s1 *= rqlist->lobound;
            else
                s1 *= rqlist->hibound;
        }
        /* release storage and advance */
        rqnext = rqlist->next;
        pfree(rqlist);
        rqlist = rqnext;
    }

    return s1;
}

static bool treat_as_join_clause ( Node clause,
RestrictInfo rinfo,
int  varRelid,
SpecialJoinInfo sjinfo 
) [inline, static]

Definition at line 406 of file clausesel.c.

References bms_membership(), BMS_MULTIPLE, RestrictInfo::clause_relids, NULL, and NumRelids().

Referenced by clause_selectivity().

{
    if (varRelid != 0)
    {
        /*
         * Caller is forcing restriction mode (eg, because we are examining an
         * inner indexscan qual).
         */
        return false;
    }
    else if (sjinfo == NULL)
    {
        /*
         * It must be a restriction clause, since it's being evaluated at a
         * scan node.
         */
        return false;
    }
    else
    {
        /*
         * Otherwise, it's a join if there's more than one relation used. We
         * can optimize this calculation if an rinfo was passed.
         *
         * XXX  Since we know the clause is being evaluated at a join, the
         * only way it could be single-relation is if it was delayed by outer
         * joins.  Although we can make use of the restriction qual estimators
         * anyway, it seems likely that we ought to account for the
         * probability of injected nulls somehow.
         */
        if (rinfo)
            return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
        else
            return (NumRelids(clause) > 1);
    }
}