#include "postgres.h"
#include "catalog/pg_operator.h"
#include "commands/vacuum.h"
#include "utils/builtins.h"
#include "utils/rangetypes.h"
Go to the source code of this file.
Functions | |
static int | float8_qsort_cmp (const void *a1, const void *a2) |
static int | range_bound_qsort_cmp (const void *a1, const void *a2, void *arg) |
static void | compute_range_stats (VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows) |
Datum | range_typanalyze (PG_FUNCTION_ARGS) |
static void compute_range_stats | ( | VacAttrStats * | stats, | |
AnalyzeAttrFetchFunc | fetchfunc, | |||
int | samplerows, | |||
double | totalrows | |||
) | [static] |
Definition at line 95 of file rangetypes_typanalyze.c.
References VacAttrStats::anl_context, VacAttrStats::attr, DatumGetFloat8, DatumGetPointer, DatumGetRangeType, VacAttrStats::extra_data, float8_qsort_cmp(), Float8GetDatum(), FmgrInfo::fn_oid, FunctionCall2Coll(), get_float8_infinity(), i, RangeBound::infinite, MemoryContextSwitchTo(), VacAttrStats::numnumbers, VacAttrStats::numvalues, OidIsValid, palloc(), PointerGetDatum, qsort, qsort_arg(), range_bound_qsort_cmp(), range_deserialize(), range_serialize(), TypeCacheEntry::rng_collation, TypeCacheEntry::rng_subdiff_finfo, VacAttrStats::stadistinct, VacAttrStats::stakind, VacAttrStats::stanullfrac, VacAttrStats::stanumbers, VacAttrStats::staop, VacAttrStats::stats_valid, VacAttrStats::statypalign, VacAttrStats::statypbyval, VacAttrStats::statypid, VacAttrStats::statyplen, VacAttrStats::stavalues, VacAttrStats::stawidth, vacuum_delay_point(), RangeBound::val, value, and VARSIZE_ANY.
{ TypeCacheEntry *typcache = (TypeCacheEntry *) stats->extra_data; bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid); int null_cnt = 0; int non_null_cnt = 0; int non_empty_cnt = 0; int empty_cnt = 0; int range_no; int slot_idx; int num_bins = stats->attr->attstattarget; int num_hist; float8 *lengths; RangeBound *lowers, *uppers; double total_width = 0; /* Allocate memory to hold range bounds and lengths of the sample ranges. */ lowers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows); uppers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows); lengths = (float8 *) palloc(sizeof(float8) * samplerows); /* Loop over the sample ranges. */ for (range_no = 0; range_no < samplerows; range_no++) { Datum value; bool isnull, empty; RangeType *range; RangeBound lower, upper; float8 length; vacuum_delay_point(); value = fetchfunc(stats, range_no, &isnull); if (isnull) { /* range is null, just count that */ null_cnt++; continue; } /* * XXX: should we ignore wide values, like std_typanalyze does, to * avoid bloating the statistics table? */ total_width += VARSIZE_ANY(DatumGetPointer(value)); /* Get range and deserialize it for further analysis. */ range = DatumGetRangeType(value); range_deserialize(typcache, range, &lower, &upper, &empty); if (!empty) { /* Remember bounds and length for further usage in histograms */ lowers[non_empty_cnt] = lower; uppers[non_empty_cnt] = upper; if (lower.infinite || upper.infinite) { /* Length of any kind of an infinite range is infinite */ length = get_float8_infinity(); } else if (has_subdiff) { /* * For an ordinary range, use subdiff function between upper * and lower bound values. */ length = DatumGetFloat8(FunctionCall2Coll( &typcache->rng_subdiff_finfo, typcache->rng_collation, upper.val, lower.val)); } else { /* Use default value of 1.0 if no subdiff is available. */ length = 1.0; } lengths[non_empty_cnt] = length; non_empty_cnt++; } else empty_cnt++; non_null_cnt++; } slot_idx = 0; /* We can only compute real stats if we found some non-null values. */ if (non_null_cnt > 0) { Datum *bound_hist_values; Datum *length_hist_values; int pos, posfrac, delta, deltafrac, i; MemoryContext old_cxt; float4 *emptyfrac; stats->stats_valid = true; /* Do the simple null-frac and width stats */ stats->stanullfrac = (double) null_cnt / (double) samplerows; stats->stawidth = total_width / (double) non_null_cnt; stats->stadistinct = -1.0; /* Must copy the target values into anl_context */ old_cxt = MemoryContextSwitchTo(stats->anl_context); /* * Generate a bounds histogram slot entry if there are at least two * values. */ if (non_empty_cnt >= 2) { /* Sort bound values */ qsort_arg(lowers, non_empty_cnt, sizeof(RangeBound), range_bound_qsort_cmp, typcache); qsort_arg(uppers, non_empty_cnt, sizeof(RangeBound), range_bound_qsort_cmp, typcache); num_hist = non_empty_cnt; if (num_hist > num_bins) num_hist = num_bins + 1; bound_hist_values = (Datum *) palloc(num_hist * sizeof(Datum)); /* * The object of this loop is to construct ranges from first and * last entries in lowers[] and uppers[] along with evenly-spaced * values in between. So the i'th value is a range of * lowers[(i * (nvals - 1)) / (num_hist - 1)] and * uppers[(i * (nvals - 1)) / (num_hist - 1)]. But computing that * subscript directly risks integer overflow when the stats target * is more than a couple thousand. Instead we add * (nvals - 1) / (num_hist - 1) to pos at each step, tracking the * integral and fractional parts of the sum separately. */ delta = (non_empty_cnt - 1) / (num_hist - 1); deltafrac = (non_empty_cnt - 1) % (num_hist - 1); pos = posfrac = 0; for (i = 0; i < num_hist; i++) { bound_hist_values[i] = PointerGetDatum(range_serialize( typcache, &lowers[pos], &uppers[pos], false)); pos += delta; posfrac += deltafrac; if (posfrac >= (num_hist - 1)) { /* fractional part exceeds 1, carry to integer part */ pos++; posfrac -= (num_hist - 1); } } stats->stakind[slot_idx] = STATISTIC_KIND_BOUNDS_HISTOGRAM; stats->stavalues[slot_idx] = bound_hist_values; stats->numvalues[slot_idx] = num_hist; slot_idx++; } /* * Generate a length histogram slot entry if there are at least two * values. */ if (non_empty_cnt >= 2) { /* * Ascending sort of range lengths for further filling of * histogram */ qsort(lengths, non_empty_cnt, sizeof(float8), float8_qsort_cmp); num_hist = non_empty_cnt; if (num_hist > num_bins) num_hist = num_bins + 1; length_hist_values = (Datum *) palloc(num_hist * sizeof(Datum)); /* * The object of this loop is to copy the first and last lengths[] * entries along with evenly-spaced values in between. So the i'th * value is lengths[(i * (nvals - 1)) / (num_hist - 1)]. But * computing that subscript directly risks integer overflow when the * stats target is more than a couple thousand. Instead we add * (nvals - 1) / (num_hist - 1) to pos at each step, tracking the * integral and fractional parts of the sum separately. */ delta = (non_empty_cnt - 1) / (num_hist - 1); deltafrac = (non_empty_cnt - 1) % (num_hist - 1); pos = posfrac = 0; for (i = 0; i < num_hist; i++) { length_hist_values[i] = Float8GetDatum(lengths[pos]); pos += delta; posfrac += deltafrac; if (posfrac >= (num_hist - 1)) { /* fractional part exceeds 1, carry to integer part */ pos++; posfrac -= (num_hist - 1); } } } else { /* * Even when we don't create the histogram, store an empty array * to mean "no histogram". We can't just leave stavalues NULL, * because get_attstatsslot() errors if you ask for stavalues, and * it's NULL. We'll still store the empty fraction in stanumbers. */ length_hist_values = palloc(0); num_hist = 0; } stats->staop[slot_idx] = Float8LessOperator; stats->stavalues[slot_idx] = length_hist_values; stats->numvalues[slot_idx] = num_hist; stats->statypid[slot_idx] = FLOAT8OID; stats->statyplen[slot_idx] = sizeof(float8); #ifdef USE_FLOAT8_BYVAL stats->statypbyval[slot_idx] = true; #else stats->statypbyval[slot_idx] = false; #endif stats->statypalign[slot_idx] = 'd'; /* Store the fraction of empty ranges */ emptyfrac = (float4 *) palloc(sizeof(float4)); *emptyfrac = ((double) empty_cnt) / ((double) non_null_cnt); stats->stanumbers[slot_idx] = emptyfrac; stats->numnumbers[slot_idx] = 1; stats->stakind[slot_idx] = STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM; slot_idx++; MemoryContextSwitchTo(old_cxt); } else if (null_cnt > 0) { /* We found only nulls; assume the column is entirely null */ stats->stats_valid = true; stats->stanullfrac = 1.0; stats->stawidth = 0; /* "unknown" */ stats->stadistinct = 0.0; /* "unknown" */ } /* * We don't need to bother cleaning up any of our temporary palloc's. The * hashtable should also go away, as it used a child memory context. */ }
static int float8_qsort_cmp | ( | const void * | a1, | |
const void * | a2 | |||
) | [static] |
Definition at line 65 of file rangetypes_typanalyze.c.
Referenced by compute_range_stats().
static int range_bound_qsort_cmp | ( | const void * | a1, | |
const void * | a2, | |||
void * | arg | |||
) | [static] |
Definition at line 82 of file rangetypes_typanalyze.c.
References range_cmp_bounds().
Referenced by compute_range_stats().
{ RangeBound *b1 = (RangeBound *)a1; RangeBound *b2 = (RangeBound *)a2; TypeCacheEntry *typcache = (TypeCacheEntry *)arg; return range_cmp_bounds(typcache, b1, b2); }
Datum range_typanalyze | ( | PG_FUNCTION_ARGS | ) |
Definition at line 41 of file rangetypes_typanalyze.c.
References VacAttrStats::attr, VacAttrStats::attrtypid, VacAttrStats::compute_stats, default_statistics_target, VacAttrStats::extra_data, VacAttrStats::minrows, PG_GETARG_POINTER, PG_RETURN_BOOL, and range_get_typcache().
{ VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0); TypeCacheEntry *typcache; Form_pg_attribute attr = stats->attr; /* Get information about range type */ typcache = range_get_typcache(fcinfo, stats->attrtypid); if (attr->attstattarget < 0) attr->attstattarget = default_statistics_target; stats->compute_stats = compute_range_stats; stats->extra_data = typcache; /* same as in std_typanalyze */ stats->minrows = 300 * attr->attstattarget; PG_RETURN_BOOL(true); }