Header And Logo

PostgreSQL
| The world's most advanced open source database.

Functions

rangetypes_typanalyze.c File Reference

#include "postgres.h"
#include "catalog/pg_operator.h"
#include "commands/vacuum.h"
#include "utils/builtins.h"
#include "utils/rangetypes.h"
Include dependency graph for rangetypes_typanalyze.c:

Go to the source code of this file.

Functions

static int float8_qsort_cmp (const void *a1, const void *a2)
static int range_bound_qsort_cmp (const void *a1, const void *a2, void *arg)
static void compute_range_stats (VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows)
Datum range_typanalyze (PG_FUNCTION_ARGS)

Function Documentation

static void compute_range_stats ( VacAttrStats stats,
AnalyzeAttrFetchFunc  fetchfunc,
int  samplerows,
double  totalrows 
) [static]

Definition at line 95 of file rangetypes_typanalyze.c.

References VacAttrStats::anl_context, VacAttrStats::attr, DatumGetFloat8, DatumGetPointer, DatumGetRangeType, VacAttrStats::extra_data, float8_qsort_cmp(), Float8GetDatum(), FmgrInfo::fn_oid, FunctionCall2Coll(), get_float8_infinity(), i, RangeBound::infinite, MemoryContextSwitchTo(), VacAttrStats::numnumbers, VacAttrStats::numvalues, OidIsValid, palloc(), PointerGetDatum, qsort, qsort_arg(), range_bound_qsort_cmp(), range_deserialize(), range_serialize(), TypeCacheEntry::rng_collation, TypeCacheEntry::rng_subdiff_finfo, VacAttrStats::stadistinct, VacAttrStats::stakind, VacAttrStats::stanullfrac, VacAttrStats::stanumbers, VacAttrStats::staop, VacAttrStats::stats_valid, VacAttrStats::statypalign, VacAttrStats::statypbyval, VacAttrStats::statypid, VacAttrStats::statyplen, VacAttrStats::stavalues, VacAttrStats::stawidth, vacuum_delay_point(), RangeBound::val, value, and VARSIZE_ANY.

{
    TypeCacheEntry *typcache = (TypeCacheEntry *) stats->extra_data;
    bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
    int         null_cnt = 0;
    int         non_null_cnt = 0;
    int         non_empty_cnt = 0;
    int         empty_cnt = 0;
    int         range_no;
    int         slot_idx;
    int         num_bins = stats->attr->attstattarget;
    int         num_hist;
    float8     *lengths;
    RangeBound *lowers, *uppers;
    double      total_width = 0;

    /* Allocate memory to hold range bounds and lengths of the sample ranges. */
    lowers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
    uppers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
    lengths = (float8 *) palloc(sizeof(float8) * samplerows);

    /* Loop over the sample ranges. */
    for (range_no = 0; range_no < samplerows; range_no++)
    {
        Datum       value;
        bool        isnull,
                    empty;
        RangeType  *range;
        RangeBound  lower,
                    upper;
        float8      length;

        vacuum_delay_point();

        value = fetchfunc(stats, range_no, &isnull);
        if (isnull)
        {
            /* range is null, just count that */
            null_cnt++;
            continue;
        }

        /*
         * XXX: should we ignore wide values, like std_typanalyze does, to
         * avoid bloating the statistics table?
         */
        total_width += VARSIZE_ANY(DatumGetPointer(value));

        /* Get range and deserialize it for further analysis. */
        range = DatumGetRangeType(value);
        range_deserialize(typcache, range, &lower, &upper, &empty);

        if (!empty)
        {
            /* Remember bounds and length for further usage in histograms */
            lowers[non_empty_cnt] = lower;
            uppers[non_empty_cnt] = upper;

            if (lower.infinite || upper.infinite)
            {
                /* Length of any kind of an infinite range is infinite */
                length = get_float8_infinity();
            }
            else if (has_subdiff)
            {
                /*
                 * For an ordinary range, use subdiff function between upper
                 * and lower bound values.
                 */
                length = DatumGetFloat8(FunctionCall2Coll(
                                            &typcache->rng_subdiff_finfo,
                                            typcache->rng_collation,
                                            upper.val, lower.val));
            }
            else
            {
                /* Use default value of 1.0 if no subdiff is available. */
                length = 1.0;
            }
            lengths[non_empty_cnt] = length;

            non_empty_cnt++;
        }
        else
            empty_cnt++;

        non_null_cnt++;
    }

    slot_idx = 0;

    /* We can only compute real stats if we found some non-null values. */
    if (non_null_cnt > 0)
    {
        Datum      *bound_hist_values;
        Datum      *length_hist_values;
        int         pos,
                    posfrac,
                    delta,
                    deltafrac,
                    i;
        MemoryContext old_cxt;
        float4     *emptyfrac;

        stats->stats_valid = true;
        /* Do the simple null-frac and width stats */
        stats->stanullfrac = (double) null_cnt / (double) samplerows;
        stats->stawidth = total_width / (double) non_null_cnt;
        stats->stadistinct = -1.0;

        /* Must copy the target values into anl_context */
        old_cxt = MemoryContextSwitchTo(stats->anl_context);

        /*
         * Generate a bounds histogram slot entry if there are at least two
         * values.
         */
        if (non_empty_cnt >= 2)
        {
            /* Sort bound values */
            qsort_arg(lowers, non_empty_cnt, sizeof(RangeBound),
                      range_bound_qsort_cmp, typcache);
            qsort_arg(uppers, non_empty_cnt, sizeof(RangeBound),
                      range_bound_qsort_cmp, typcache);

            num_hist = non_empty_cnt;
            if (num_hist > num_bins)
                num_hist = num_bins + 1;

            bound_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));

            /*
             * The object of this loop is to construct ranges from first and
             * last entries in lowers[] and uppers[] along with evenly-spaced
             * values in between. So the i'th value is a range of
             * lowers[(i * (nvals - 1)) / (num_hist - 1)] and
             * uppers[(i * (nvals - 1)) / (num_hist - 1)]. But computing that
             * subscript directly risks integer overflow when the stats target
             * is more than a couple thousand.  Instead we add
             * (nvals - 1) / (num_hist - 1) to pos at each step, tracking the
             * integral and fractional parts of the sum separately.
             */
            delta = (non_empty_cnt - 1) / (num_hist - 1);
            deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
            pos = posfrac = 0;

            for (i = 0; i < num_hist; i++)
            {
                bound_hist_values[i] = PointerGetDatum(range_serialize(
                                typcache, &lowers[pos], &uppers[pos], false));
                pos += delta;
                posfrac += deltafrac;
                if (posfrac >= (num_hist - 1))
                {
                    /* fractional part exceeds 1, carry to integer part */
                    pos++;
                    posfrac -= (num_hist - 1);
                }
            }

            stats->stakind[slot_idx] = STATISTIC_KIND_BOUNDS_HISTOGRAM;
            stats->stavalues[slot_idx] = bound_hist_values;
            stats->numvalues[slot_idx] = num_hist;
            slot_idx++;
        }

        /*
         * Generate a length histogram slot entry if there are at least two
         * values.
         */
        if (non_empty_cnt >= 2)
        {
            /*
             * Ascending sort of range lengths for further filling of
             * histogram
             */
            qsort(lengths, non_empty_cnt, sizeof(float8), float8_qsort_cmp);

            num_hist = non_empty_cnt;
            if (num_hist > num_bins)
                num_hist = num_bins + 1;

            length_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));

            /*
             * The object of this loop is to copy the first and last lengths[]
             * entries along with evenly-spaced values in between. So the i'th
             * value is lengths[(i * (nvals - 1)) / (num_hist - 1)]. But
             * computing that subscript directly risks integer overflow when the
             * stats target is more than a couple thousand.  Instead we add
             * (nvals - 1) / (num_hist - 1) to pos at each step, tracking the
             * integral and fractional parts of the sum separately.
             */
            delta = (non_empty_cnt - 1) / (num_hist - 1);
            deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
            pos = posfrac = 0;

            for (i = 0; i < num_hist; i++)
            {
                length_hist_values[i] = Float8GetDatum(lengths[pos]);
                pos += delta;
                posfrac += deltafrac;
                if (posfrac >= (num_hist - 1))
                {
                    /* fractional part exceeds 1, carry to integer part */
                    pos++;
                    posfrac -= (num_hist - 1);
                }
            }
        }
        else
        {
            /*
             * Even when we don't create the histogram, store an empty array
             * to mean "no histogram". We can't just leave stavalues NULL,
             * because get_attstatsslot() errors if you ask for stavalues, and
             * it's NULL. We'll still store the empty fraction in stanumbers.
             */
            length_hist_values = palloc(0);
            num_hist = 0;
        }
        stats->staop[slot_idx] = Float8LessOperator;
        stats->stavalues[slot_idx] = length_hist_values;
        stats->numvalues[slot_idx] = num_hist;
        stats->statypid[slot_idx] = FLOAT8OID;
        stats->statyplen[slot_idx] = sizeof(float8);
#ifdef USE_FLOAT8_BYVAL
        stats->statypbyval[slot_idx] = true;
#else
        stats->statypbyval[slot_idx] = false;
#endif
        stats->statypalign[slot_idx] = 'd';

        /* Store the fraction of empty ranges */
        emptyfrac = (float4 *) palloc(sizeof(float4));
        *emptyfrac = ((double) empty_cnt) / ((double) non_null_cnt);
        stats->stanumbers[slot_idx] = emptyfrac;
        stats->numnumbers[slot_idx] = 1;

        stats->stakind[slot_idx] = STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM;
        slot_idx++;

        MemoryContextSwitchTo(old_cxt);
    }
    else if (null_cnt > 0)
    {
        /* We found only nulls; assume the column is entirely null */
        stats->stats_valid = true;
        stats->stanullfrac = 1.0;
        stats->stawidth = 0;        /* "unknown" */
        stats->stadistinct = 0.0;   /* "unknown" */
    }
    /*
     * We don't need to bother cleaning up any of our temporary palloc's. The
     * hashtable should also go away, as it used a child memory context.
     */
}

static int float8_qsort_cmp ( const void *  a1,
const void *  a2 
) [static]

Definition at line 65 of file rangetypes_typanalyze.c.

Referenced by compute_range_stats().

{
    const float8 *f1 = (const float8 *) a1;
    const float8 *f2 = (const float8 *) a2;

    if (*f1 < *f2)
        return -1;
    else if (*f1 == *f2)
        return 0;
    else
        return 1;
}

static int range_bound_qsort_cmp ( const void *  a1,
const void *  a2,
void *  arg 
) [static]

Definition at line 82 of file rangetypes_typanalyze.c.

References range_cmp_bounds().

Referenced by compute_range_stats().

{
    RangeBound *b1 = (RangeBound *)a1;
    RangeBound *b2 = (RangeBound *)a2;
    TypeCacheEntry *typcache = (TypeCacheEntry *)arg;

    return range_cmp_bounds(typcache, b1, b2);
}

Datum range_typanalyze ( PG_FUNCTION_ARGS   ) 

Definition at line 41 of file rangetypes_typanalyze.c.

References VacAttrStats::attr, VacAttrStats::attrtypid, VacAttrStats::compute_stats, default_statistics_target, VacAttrStats::extra_data, VacAttrStats::minrows, PG_GETARG_POINTER, PG_RETURN_BOOL, and range_get_typcache().

{
    VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
    TypeCacheEntry *typcache;
    Form_pg_attribute attr = stats->attr;

    /* Get information about range type */
    typcache = range_get_typcache(fcinfo, stats->attrtypid);

    if (attr->attstattarget < 0)
        attr->attstattarget = default_statistics_target;

    stats->compute_stats = compute_range_stats;
    stats->extra_data = typcache;
    /* same as in std_typanalyze */
    stats->minrows = 300 * attr->attstattarget;

    PG_RETURN_BOOL(true);
}