#include "postgres.h"#include "catalog/pg_operator.h"#include "commands/vacuum.h"#include "utils/builtins.h"#include "utils/rangetypes.h"
Go to the source code of this file.
Functions | |
| static int | float8_qsort_cmp (const void *a1, const void *a2) |
| static int | range_bound_qsort_cmp (const void *a1, const void *a2, void *arg) |
| static void | compute_range_stats (VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows) |
| Datum | range_typanalyze (PG_FUNCTION_ARGS) |
| static void compute_range_stats | ( | VacAttrStats * | stats, | |
| AnalyzeAttrFetchFunc | fetchfunc, | |||
| int | samplerows, | |||
| double | totalrows | |||
| ) | [static] |
Definition at line 95 of file rangetypes_typanalyze.c.
References VacAttrStats::anl_context, VacAttrStats::attr, DatumGetFloat8, DatumGetPointer, DatumGetRangeType, VacAttrStats::extra_data, float8_qsort_cmp(), Float8GetDatum(), FmgrInfo::fn_oid, FunctionCall2Coll(), get_float8_infinity(), i, RangeBound::infinite, MemoryContextSwitchTo(), VacAttrStats::numnumbers, VacAttrStats::numvalues, OidIsValid, palloc(), PointerGetDatum, qsort, qsort_arg(), range_bound_qsort_cmp(), range_deserialize(), range_serialize(), TypeCacheEntry::rng_collation, TypeCacheEntry::rng_subdiff_finfo, VacAttrStats::stadistinct, VacAttrStats::stakind, VacAttrStats::stanullfrac, VacAttrStats::stanumbers, VacAttrStats::staop, VacAttrStats::stats_valid, VacAttrStats::statypalign, VacAttrStats::statypbyval, VacAttrStats::statypid, VacAttrStats::statyplen, VacAttrStats::stavalues, VacAttrStats::stawidth, vacuum_delay_point(), RangeBound::val, value, and VARSIZE_ANY.
{
TypeCacheEntry *typcache = (TypeCacheEntry *) stats->extra_data;
bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
int null_cnt = 0;
int non_null_cnt = 0;
int non_empty_cnt = 0;
int empty_cnt = 0;
int range_no;
int slot_idx;
int num_bins = stats->attr->attstattarget;
int num_hist;
float8 *lengths;
RangeBound *lowers, *uppers;
double total_width = 0;
/* Allocate memory to hold range bounds and lengths of the sample ranges. */
lowers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
uppers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
lengths = (float8 *) palloc(sizeof(float8) * samplerows);
/* Loop over the sample ranges. */
for (range_no = 0; range_no < samplerows; range_no++)
{
Datum value;
bool isnull,
empty;
RangeType *range;
RangeBound lower,
upper;
float8 length;
vacuum_delay_point();
value = fetchfunc(stats, range_no, &isnull);
if (isnull)
{
/* range is null, just count that */
null_cnt++;
continue;
}
/*
* XXX: should we ignore wide values, like std_typanalyze does, to
* avoid bloating the statistics table?
*/
total_width += VARSIZE_ANY(DatumGetPointer(value));
/* Get range and deserialize it for further analysis. */
range = DatumGetRangeType(value);
range_deserialize(typcache, range, &lower, &upper, &empty);
if (!empty)
{
/* Remember bounds and length for further usage in histograms */
lowers[non_empty_cnt] = lower;
uppers[non_empty_cnt] = upper;
if (lower.infinite || upper.infinite)
{
/* Length of any kind of an infinite range is infinite */
length = get_float8_infinity();
}
else if (has_subdiff)
{
/*
* For an ordinary range, use subdiff function between upper
* and lower bound values.
*/
length = DatumGetFloat8(FunctionCall2Coll(
&typcache->rng_subdiff_finfo,
typcache->rng_collation,
upper.val, lower.val));
}
else
{
/* Use default value of 1.0 if no subdiff is available. */
length = 1.0;
}
lengths[non_empty_cnt] = length;
non_empty_cnt++;
}
else
empty_cnt++;
non_null_cnt++;
}
slot_idx = 0;
/* We can only compute real stats if we found some non-null values. */
if (non_null_cnt > 0)
{
Datum *bound_hist_values;
Datum *length_hist_values;
int pos,
posfrac,
delta,
deltafrac,
i;
MemoryContext old_cxt;
float4 *emptyfrac;
stats->stats_valid = true;
/* Do the simple null-frac and width stats */
stats->stanullfrac = (double) null_cnt / (double) samplerows;
stats->stawidth = total_width / (double) non_null_cnt;
stats->stadistinct = -1.0;
/* Must copy the target values into anl_context */
old_cxt = MemoryContextSwitchTo(stats->anl_context);
/*
* Generate a bounds histogram slot entry if there are at least two
* values.
*/
if (non_empty_cnt >= 2)
{
/* Sort bound values */
qsort_arg(lowers, non_empty_cnt, sizeof(RangeBound),
range_bound_qsort_cmp, typcache);
qsort_arg(uppers, non_empty_cnt, sizeof(RangeBound),
range_bound_qsort_cmp, typcache);
num_hist = non_empty_cnt;
if (num_hist > num_bins)
num_hist = num_bins + 1;
bound_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
/*
* The object of this loop is to construct ranges from first and
* last entries in lowers[] and uppers[] along with evenly-spaced
* values in between. So the i'th value is a range of
* lowers[(i * (nvals - 1)) / (num_hist - 1)] and
* uppers[(i * (nvals - 1)) / (num_hist - 1)]. But computing that
* subscript directly risks integer overflow when the stats target
* is more than a couple thousand. Instead we add
* (nvals - 1) / (num_hist - 1) to pos at each step, tracking the
* integral and fractional parts of the sum separately.
*/
delta = (non_empty_cnt - 1) / (num_hist - 1);
deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
pos = posfrac = 0;
for (i = 0; i < num_hist; i++)
{
bound_hist_values[i] = PointerGetDatum(range_serialize(
typcache, &lowers[pos], &uppers[pos], false));
pos += delta;
posfrac += deltafrac;
if (posfrac >= (num_hist - 1))
{
/* fractional part exceeds 1, carry to integer part */
pos++;
posfrac -= (num_hist - 1);
}
}
stats->stakind[slot_idx] = STATISTIC_KIND_BOUNDS_HISTOGRAM;
stats->stavalues[slot_idx] = bound_hist_values;
stats->numvalues[slot_idx] = num_hist;
slot_idx++;
}
/*
* Generate a length histogram slot entry if there are at least two
* values.
*/
if (non_empty_cnt >= 2)
{
/*
* Ascending sort of range lengths for further filling of
* histogram
*/
qsort(lengths, non_empty_cnt, sizeof(float8), float8_qsort_cmp);
num_hist = non_empty_cnt;
if (num_hist > num_bins)
num_hist = num_bins + 1;
length_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
/*
* The object of this loop is to copy the first and last lengths[]
* entries along with evenly-spaced values in between. So the i'th
* value is lengths[(i * (nvals - 1)) / (num_hist - 1)]. But
* computing that subscript directly risks integer overflow when the
* stats target is more than a couple thousand. Instead we add
* (nvals - 1) / (num_hist - 1) to pos at each step, tracking the
* integral and fractional parts of the sum separately.
*/
delta = (non_empty_cnt - 1) / (num_hist - 1);
deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
pos = posfrac = 0;
for (i = 0; i < num_hist; i++)
{
length_hist_values[i] = Float8GetDatum(lengths[pos]);
pos += delta;
posfrac += deltafrac;
if (posfrac >= (num_hist - 1))
{
/* fractional part exceeds 1, carry to integer part */
pos++;
posfrac -= (num_hist - 1);
}
}
}
else
{
/*
* Even when we don't create the histogram, store an empty array
* to mean "no histogram". We can't just leave stavalues NULL,
* because get_attstatsslot() errors if you ask for stavalues, and
* it's NULL. We'll still store the empty fraction in stanumbers.
*/
length_hist_values = palloc(0);
num_hist = 0;
}
stats->staop[slot_idx] = Float8LessOperator;
stats->stavalues[slot_idx] = length_hist_values;
stats->numvalues[slot_idx] = num_hist;
stats->statypid[slot_idx] = FLOAT8OID;
stats->statyplen[slot_idx] = sizeof(float8);
#ifdef USE_FLOAT8_BYVAL
stats->statypbyval[slot_idx] = true;
#else
stats->statypbyval[slot_idx] = false;
#endif
stats->statypalign[slot_idx] = 'd';
/* Store the fraction of empty ranges */
emptyfrac = (float4 *) palloc(sizeof(float4));
*emptyfrac = ((double) empty_cnt) / ((double) non_null_cnt);
stats->stanumbers[slot_idx] = emptyfrac;
stats->numnumbers[slot_idx] = 1;
stats->stakind[slot_idx] = STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM;
slot_idx++;
MemoryContextSwitchTo(old_cxt);
}
else if (null_cnt > 0)
{
/* We found only nulls; assume the column is entirely null */
stats->stats_valid = true;
stats->stanullfrac = 1.0;
stats->stawidth = 0; /* "unknown" */
stats->stadistinct = 0.0; /* "unknown" */
}
/*
* We don't need to bother cleaning up any of our temporary palloc's. The
* hashtable should also go away, as it used a child memory context.
*/
}
| static int float8_qsort_cmp | ( | const void * | a1, | |
| const void * | a2 | |||
| ) | [static] |
Definition at line 65 of file rangetypes_typanalyze.c.
Referenced by compute_range_stats().
| static int range_bound_qsort_cmp | ( | const void * | a1, | |
| const void * | a2, | |||
| void * | arg | |||
| ) | [static] |
Definition at line 82 of file rangetypes_typanalyze.c.
References range_cmp_bounds().
Referenced by compute_range_stats().
{
RangeBound *b1 = (RangeBound *)a1;
RangeBound *b2 = (RangeBound *)a2;
TypeCacheEntry *typcache = (TypeCacheEntry *)arg;
return range_cmp_bounds(typcache, b1, b2);
}
| Datum range_typanalyze | ( | PG_FUNCTION_ARGS | ) |
Definition at line 41 of file rangetypes_typanalyze.c.
References VacAttrStats::attr, VacAttrStats::attrtypid, VacAttrStats::compute_stats, default_statistics_target, VacAttrStats::extra_data, VacAttrStats::minrows, PG_GETARG_POINTER, PG_RETURN_BOOL, and range_get_typcache().
{
VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
TypeCacheEntry *typcache;
Form_pg_attribute attr = stats->attr;
/* Get information about range type */
typcache = range_get_typcache(fcinfo, stats->attrtypid);
if (attr->attstattarget < 0)
attr->attstattarget = default_statistics_target;
stats->compute_stats = compute_range_stats;
stats->extra_data = typcache;
/* same as in std_typanalyze */
stats->minrows = 300 * attr->attstattarget;
PG_RETURN_BOOL(true);
}
1.7.1