00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "postgres.h"
00016
00017 #include <math.h>
00018
00019 #include "access/multixact.h"
00020 #include "access/transam.h"
00021 #include "access/tupconvert.h"
00022 #include "access/tuptoaster.h"
00023 #include "access/visibilitymap.h"
00024 #include "access/xact.h"
00025 #include "catalog/index.h"
00026 #include "catalog/indexing.h"
00027 #include "catalog/pg_collation.h"
00028 #include "catalog/pg_inherits_fn.h"
00029 #include "catalog/pg_namespace.h"
00030 #include "commands/dbcommands.h"
00031 #include "commands/tablecmds.h"
00032 #include "commands/vacuum.h"
00033 #include "executor/executor.h"
00034 #include "foreign/fdwapi.h"
00035 #include "miscadmin.h"
00036 #include "nodes/nodeFuncs.h"
00037 #include "parser/parse_oper.h"
00038 #include "parser/parse_relation.h"
00039 #include "pgstat.h"
00040 #include "postmaster/autovacuum.h"
00041 #include "storage/bufmgr.h"
00042 #include "storage/lmgr.h"
00043 #include "storage/proc.h"
00044 #include "storage/procarray.h"
00045 #include "utils/acl.h"
00046 #include "utils/attoptcache.h"
00047 #include "utils/datum.h"
00048 #include "utils/guc.h"
00049 #include "utils/lsyscache.h"
00050 #include "utils/memutils.h"
00051 #include "utils/pg_rusage.h"
00052 #include "utils/sortsupport.h"
00053 #include "utils/syscache.h"
00054 #include "utils/timestamp.h"
00055 #include "utils/tqual.h"
00056
00057
00058
00059 typedef struct
00060 {
00061 BlockNumber N;
00062 int n;
00063 BlockNumber t;
00064 int m;
00065 } BlockSamplerData;
00066
00067 typedef BlockSamplerData *BlockSampler;
00068
00069
00070 typedef struct AnlIndexData
00071 {
00072 IndexInfo *indexInfo;
00073 double tupleFract;
00074 VacAttrStats **vacattrstats;
00075 int attr_cnt;
00076 } AnlIndexData;
00077
00078
00079
00080 int default_statistics_target = 100;
00081
00082
00083 static MemoryContext anl_context = NULL;
00084 static BufferAccessStrategy vac_strategy;
00085
00086
00087 static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
00088 AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
00089 bool inh, int elevel);
00090 static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
00091 int samplesize);
00092 static bool BlockSampler_HasMore(BlockSampler bs);
00093 static BlockNumber BlockSampler_Next(BlockSampler bs);
00094 static void compute_index_stats(Relation onerel, double totalrows,
00095 AnlIndexData *indexdata, int nindexes,
00096 HeapTuple *rows, int numrows,
00097 MemoryContext col_context);
00098 static VacAttrStats *examine_attribute(Relation onerel, int attnum,
00099 Node *index_expr);
00100 static int acquire_sample_rows(Relation onerel, int elevel,
00101 HeapTuple *rows, int targrows,
00102 double *totalrows, double *totaldeadrows);
00103 static int compare_rows(const void *a, const void *b);
00104 static int acquire_inherited_sample_rows(Relation onerel, int elevel,
00105 HeapTuple *rows, int targrows,
00106 double *totalrows, double *totaldeadrows);
00107 static void update_attstats(Oid relid, bool inh,
00108 int natts, VacAttrStats **vacattrstats);
00109 static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
00110 static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
00111
00112
00113
00114
00115
00116 void
00117 analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
00118 {
00119 Relation onerel;
00120 int elevel;
00121 AcquireSampleRowsFunc acquirefunc = NULL;
00122 BlockNumber relpages = 0;
00123
00124
00125 if (vacstmt->options & VACOPT_VERBOSE)
00126 elevel = INFO;
00127 else
00128 elevel = DEBUG2;
00129
00130
00131 vac_strategy = bstrategy;
00132
00133
00134
00135
00136 CHECK_FOR_INTERRUPTS();
00137
00138
00139
00140
00141
00142
00143
00144
00145 if (!(vacstmt->options & VACOPT_NOWAIT))
00146 onerel = try_relation_open(relid, ShareUpdateExclusiveLock);
00147 else if (ConditionalLockRelationOid(relid, ShareUpdateExclusiveLock))
00148 onerel = try_relation_open(relid, NoLock);
00149 else
00150 {
00151 onerel = NULL;
00152 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
00153 ereport(LOG,
00154 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
00155 errmsg("skipping analyze of \"%s\" --- lock not available",
00156 vacstmt->relation->relname)));
00157 }
00158 if (!onerel)
00159 return;
00160
00161
00162
00163
00164 if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
00165 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
00166 {
00167
00168 if (!(vacstmt->options & VACOPT_VACUUM))
00169 {
00170 if (onerel->rd_rel->relisshared)
00171 ereport(WARNING,
00172 (errmsg("skipping \"%s\" --- only superuser can analyze it",
00173 RelationGetRelationName(onerel))));
00174 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
00175 ereport(WARNING,
00176 (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
00177 RelationGetRelationName(onerel))));
00178 else
00179 ereport(WARNING,
00180 (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
00181 RelationGetRelationName(onerel))));
00182 }
00183 relation_close(onerel, ShareUpdateExclusiveLock);
00184 return;
00185 }
00186
00187
00188
00189
00190
00191
00192
00193 if (RELATION_IS_OTHER_TEMP(onerel))
00194 {
00195 relation_close(onerel, ShareUpdateExclusiveLock);
00196 return;
00197 }
00198
00199
00200
00201
00202 if (RelationGetRelid(onerel) == StatisticRelationId)
00203 {
00204 relation_close(onerel, ShareUpdateExclusiveLock);
00205 return;
00206 }
00207
00208
00209
00210
00211
00212
00213 if (onerel->rd_rel->relkind == RELKIND_RELATION ||
00214 onerel->rd_rel->relkind == RELKIND_MATVIEW)
00215 {
00216
00217 acquirefunc = acquire_sample_rows;
00218
00219 relpages = RelationGetNumberOfBlocks(onerel);
00220 }
00221 else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
00222 {
00223
00224
00225
00226
00227 FdwRoutine *fdwroutine;
00228 bool ok = false;
00229
00230 fdwroutine = GetFdwRoutineForRelation(onerel, false);
00231
00232 if (fdwroutine->AnalyzeForeignTable != NULL)
00233 ok = fdwroutine->AnalyzeForeignTable(onerel,
00234 &acquirefunc,
00235 &relpages);
00236
00237 if (!ok)
00238 {
00239 ereport(WARNING,
00240 (errmsg("skipping \"%s\" --- cannot analyze this foreign table",
00241 RelationGetRelationName(onerel))));
00242 relation_close(onerel, ShareUpdateExclusiveLock);
00243 return;
00244 }
00245 }
00246 else
00247 {
00248
00249 if (!(vacstmt->options & VACOPT_VACUUM))
00250 ereport(WARNING,
00251 (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
00252 RelationGetRelationName(onerel))));
00253 relation_close(onerel, ShareUpdateExclusiveLock);
00254 return;
00255 }
00256
00257
00258
00259
00260 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
00261 MyPgXact->vacuumFlags |= PROC_IN_ANALYZE;
00262 LWLockRelease(ProcArrayLock);
00263
00264
00265
00266
00267 do_analyze_rel(onerel, vacstmt, acquirefunc, relpages, false, elevel);
00268
00269
00270
00271
00272 if (onerel->rd_rel->relhassubclass)
00273 do_analyze_rel(onerel, vacstmt, acquirefunc, relpages, true, elevel);
00274
00275
00276
00277
00278
00279
00280
00281 relation_close(onerel, NoLock);
00282
00283
00284
00285
00286
00287 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
00288 MyPgXact->vacuumFlags &= ~PROC_IN_ANALYZE;
00289 LWLockRelease(ProcArrayLock);
00290 }
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300 static void
00301 do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
00302 AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
00303 bool inh, int elevel)
00304 {
00305 int attr_cnt,
00306 tcnt,
00307 i,
00308 ind;
00309 Relation *Irel;
00310 int nindexes;
00311 bool hasindex;
00312 VacAttrStats **vacattrstats;
00313 AnlIndexData *indexdata;
00314 int targrows,
00315 numrows;
00316 double totalrows,
00317 totaldeadrows;
00318 HeapTuple *rows;
00319 PGRUsage ru0;
00320 TimestampTz starttime = 0;
00321 MemoryContext caller_context;
00322 Oid save_userid;
00323 int save_sec_context;
00324 int save_nestlevel;
00325
00326 if (inh)
00327 ereport(elevel,
00328 (errmsg("analyzing \"%s.%s\" inheritance tree",
00329 get_namespace_name(RelationGetNamespace(onerel)),
00330 RelationGetRelationName(onerel))));
00331 else
00332 ereport(elevel,
00333 (errmsg("analyzing \"%s.%s\"",
00334 get_namespace_name(RelationGetNamespace(onerel)),
00335 RelationGetRelationName(onerel))));
00336
00337
00338
00339
00340
00341 anl_context = AllocSetContextCreate(CurrentMemoryContext,
00342 "Analyze",
00343 ALLOCSET_DEFAULT_MINSIZE,
00344 ALLOCSET_DEFAULT_INITSIZE,
00345 ALLOCSET_DEFAULT_MAXSIZE);
00346 caller_context = MemoryContextSwitchTo(anl_context);
00347
00348
00349
00350
00351
00352
00353 GetUserIdAndSecContext(&save_userid, &save_sec_context);
00354 SetUserIdAndSecContext(onerel->rd_rel->relowner,
00355 save_sec_context | SECURITY_RESTRICTED_OPERATION);
00356 save_nestlevel = NewGUCNestLevel();
00357
00358
00359 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
00360 {
00361 pg_rusage_init(&ru0);
00362 if (Log_autovacuum_min_duration > 0)
00363 starttime = GetCurrentTimestamp();
00364 }
00365
00366
00367
00368
00369
00370
00371 if (vacstmt->va_cols != NIL)
00372 {
00373 ListCell *le;
00374
00375 vacattrstats = (VacAttrStats **) palloc(list_length(vacstmt->va_cols) *
00376 sizeof(VacAttrStats *));
00377 tcnt = 0;
00378 foreach(le, vacstmt->va_cols)
00379 {
00380 char *col = strVal(lfirst(le));
00381
00382 i = attnameAttNum(onerel, col, false);
00383 if (i == InvalidAttrNumber)
00384 ereport(ERROR,
00385 (errcode(ERRCODE_UNDEFINED_COLUMN),
00386 errmsg("column \"%s\" of relation \"%s\" does not exist",
00387 col, RelationGetRelationName(onerel))));
00388 vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
00389 if (vacattrstats[tcnt] != NULL)
00390 tcnt++;
00391 }
00392 attr_cnt = tcnt;
00393 }
00394 else
00395 {
00396 attr_cnt = onerel->rd_att->natts;
00397 vacattrstats = (VacAttrStats **)
00398 palloc(attr_cnt * sizeof(VacAttrStats *));
00399 tcnt = 0;
00400 for (i = 1; i <= attr_cnt; i++)
00401 {
00402 vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
00403 if (vacattrstats[tcnt] != NULL)
00404 tcnt++;
00405 }
00406 attr_cnt = tcnt;
00407 }
00408
00409
00410
00411
00412
00413
00414
00415
00416 if (!inh)
00417 vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
00418 else
00419 {
00420 Irel = NULL;
00421 nindexes = 0;
00422 }
00423 hasindex = (nindexes > 0);
00424 indexdata = NULL;
00425 if (hasindex)
00426 {
00427 indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData));
00428 for (ind = 0; ind < nindexes; ind++)
00429 {
00430 AnlIndexData *thisdata = &indexdata[ind];
00431 IndexInfo *indexInfo;
00432
00433 thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
00434 thisdata->tupleFract = 1.0;
00435 if (indexInfo->ii_Expressions != NIL && vacstmt->va_cols == NIL)
00436 {
00437 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
00438
00439 thisdata->vacattrstats = (VacAttrStats **)
00440 palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));
00441 tcnt = 0;
00442 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
00443 {
00444 int keycol = indexInfo->ii_KeyAttrNumbers[i];
00445
00446 if (keycol == 0)
00447 {
00448
00449 Node *indexkey;
00450
00451 if (indexpr_item == NULL)
00452 elog(ERROR, "too few entries in indexprs list");
00453 indexkey = (Node *) lfirst(indexpr_item);
00454 indexpr_item = lnext(indexpr_item);
00455 thisdata->vacattrstats[tcnt] =
00456 examine_attribute(Irel[ind], i + 1, indexkey);
00457 if (thisdata->vacattrstats[tcnt] != NULL)
00458 tcnt++;
00459 }
00460 }
00461 thisdata->attr_cnt = tcnt;
00462 }
00463 }
00464 }
00465
00466
00467
00468
00469
00470
00471
00472 targrows = 100;
00473 for (i = 0; i < attr_cnt; i++)
00474 {
00475 if (targrows < vacattrstats[i]->minrows)
00476 targrows = vacattrstats[i]->minrows;
00477 }
00478 for (ind = 0; ind < nindexes; ind++)
00479 {
00480 AnlIndexData *thisdata = &indexdata[ind];
00481
00482 for (i = 0; i < thisdata->attr_cnt; i++)
00483 {
00484 if (targrows < thisdata->vacattrstats[i]->minrows)
00485 targrows = thisdata->vacattrstats[i]->minrows;
00486 }
00487 }
00488
00489
00490
00491
00492 rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
00493 if (inh)
00494 numrows = acquire_inherited_sample_rows(onerel, elevel,
00495 rows, targrows,
00496 &totalrows, &totaldeadrows);
00497 else
00498 numrows = (*acquirefunc) (onerel, elevel,
00499 rows, targrows,
00500 &totalrows, &totaldeadrows);
00501
00502
00503
00504
00505
00506
00507
00508 if (numrows > 0)
00509 {
00510 MemoryContext col_context,
00511 old_context;
00512
00513 col_context = AllocSetContextCreate(anl_context,
00514 "Analyze Column",
00515 ALLOCSET_DEFAULT_MINSIZE,
00516 ALLOCSET_DEFAULT_INITSIZE,
00517 ALLOCSET_DEFAULT_MAXSIZE);
00518 old_context = MemoryContextSwitchTo(col_context);
00519
00520 for (i = 0; i < attr_cnt; i++)
00521 {
00522 VacAttrStats *stats = vacattrstats[i];
00523 AttributeOpts *aopt;
00524
00525 stats->rows = rows;
00526 stats->tupDesc = onerel->rd_att;
00527 (*stats->compute_stats) (stats,
00528 std_fetch_func,
00529 numrows,
00530 totalrows);
00531
00532
00533
00534
00535
00536 aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
00537 if (aopt != NULL)
00538 {
00539 float8 n_distinct;
00540
00541 n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
00542 if (n_distinct != 0.0)
00543 stats->stadistinct = n_distinct;
00544 }
00545
00546 MemoryContextResetAndDeleteChildren(col_context);
00547 }
00548
00549 if (hasindex)
00550 compute_index_stats(onerel, totalrows,
00551 indexdata, nindexes,
00552 rows, numrows,
00553 col_context);
00554
00555 MemoryContextSwitchTo(old_context);
00556 MemoryContextDelete(col_context);
00557
00558
00559
00560
00561
00562
00563 update_attstats(RelationGetRelid(onerel), inh,
00564 attr_cnt, vacattrstats);
00565
00566 for (ind = 0; ind < nindexes; ind++)
00567 {
00568 AnlIndexData *thisdata = &indexdata[ind];
00569
00570 update_attstats(RelationGetRelid(Irel[ind]), false,
00571 thisdata->attr_cnt, thisdata->vacattrstats);
00572 }
00573 }
00574
00575
00576
00577
00578
00579 if (!inh)
00580 vac_update_relstats(onerel,
00581 relpages,
00582 totalrows,
00583 visibilitymap_count(onerel),
00584 hasindex,
00585 InvalidTransactionId,
00586 InvalidMultiXactId);
00587
00588
00589
00590
00591
00592
00593 if (!inh && !(vacstmt->options & VACOPT_VACUUM))
00594 {
00595 for (ind = 0; ind < nindexes; ind++)
00596 {
00597 AnlIndexData *thisdata = &indexdata[ind];
00598 double totalindexrows;
00599
00600 totalindexrows = ceil(thisdata->tupleFract * totalrows);
00601 vac_update_relstats(Irel[ind],
00602 RelationGetNumberOfBlocks(Irel[ind]),
00603 totalindexrows,
00604 0,
00605 false,
00606 InvalidTransactionId,
00607 InvalidMultiXactId);
00608 }
00609 }
00610
00611
00612
00613
00614
00615
00616 if (!inh)
00617 pgstat_report_analyze(onerel, totalrows, totaldeadrows);
00618
00619
00620 if (!(vacstmt->options & VACOPT_VACUUM))
00621 {
00622 for (ind = 0; ind < nindexes; ind++)
00623 {
00624 IndexBulkDeleteResult *stats;
00625 IndexVacuumInfo ivinfo;
00626
00627 ivinfo.index = Irel[ind];
00628 ivinfo.analyze_only = true;
00629 ivinfo.estimated_count = true;
00630 ivinfo.message_level = elevel;
00631 ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
00632 ivinfo.strategy = vac_strategy;
00633
00634 stats = index_vacuum_cleanup(&ivinfo, NULL);
00635
00636 if (stats)
00637 pfree(stats);
00638 }
00639 }
00640
00641
00642 vac_close_indexes(nindexes, Irel, NoLock);
00643
00644
00645 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
00646 {
00647 if (Log_autovacuum_min_duration == 0 ||
00648 TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
00649 Log_autovacuum_min_duration))
00650 ereport(LOG,
00651 (errmsg("automatic analyze of table \"%s.%s.%s\" system usage: %s",
00652 get_database_name(MyDatabaseId),
00653 get_namespace_name(RelationGetNamespace(onerel)),
00654 RelationGetRelationName(onerel),
00655 pg_rusage_show(&ru0))));
00656 }
00657
00658
00659 AtEOXact_GUC(false, save_nestlevel);
00660
00661
00662 SetUserIdAndSecContext(save_userid, save_sec_context);
00663
00664
00665 MemoryContextSwitchTo(caller_context);
00666 MemoryContextDelete(anl_context);
00667 anl_context = NULL;
00668 }
00669
00670
00671
00672
00673 static void
00674 compute_index_stats(Relation onerel, double totalrows,
00675 AnlIndexData *indexdata, int nindexes,
00676 HeapTuple *rows, int numrows,
00677 MemoryContext col_context)
00678 {
00679 MemoryContext ind_context,
00680 old_context;
00681 Datum values[INDEX_MAX_KEYS];
00682 bool isnull[INDEX_MAX_KEYS];
00683 int ind,
00684 i;
00685
00686 ind_context = AllocSetContextCreate(anl_context,
00687 "Analyze Index",
00688 ALLOCSET_DEFAULT_MINSIZE,
00689 ALLOCSET_DEFAULT_INITSIZE,
00690 ALLOCSET_DEFAULT_MAXSIZE);
00691 old_context = MemoryContextSwitchTo(ind_context);
00692
00693 for (ind = 0; ind < nindexes; ind++)
00694 {
00695 AnlIndexData *thisdata = &indexdata[ind];
00696 IndexInfo *indexInfo = thisdata->indexInfo;
00697 int attr_cnt = thisdata->attr_cnt;
00698 TupleTableSlot *slot;
00699 EState *estate;
00700 ExprContext *econtext;
00701 List *predicate;
00702 Datum *exprvals;
00703 bool *exprnulls;
00704 int numindexrows,
00705 tcnt,
00706 rowno;
00707 double totalindexrows;
00708
00709
00710 if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL)
00711 continue;
00712
00713
00714
00715
00716
00717
00718 estate = CreateExecutorState();
00719 econtext = GetPerTupleExprContext(estate);
00720
00721 slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel));
00722
00723
00724 econtext->ecxt_scantuple = slot;
00725
00726
00727 predicate = (List *)
00728 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
00729 estate);
00730
00731
00732 exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum));
00733 exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool));
00734 numindexrows = 0;
00735 tcnt = 0;
00736 for (rowno = 0; rowno < numrows; rowno++)
00737 {
00738 HeapTuple heapTuple = rows[rowno];
00739
00740
00741
00742
00743
00744 ResetExprContext(econtext);
00745
00746
00747 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
00748
00749
00750 if (predicate != NIL)
00751 {
00752 if (!ExecQual(predicate, econtext, false))
00753 continue;
00754 }
00755 numindexrows++;
00756
00757 if (attr_cnt > 0)
00758 {
00759
00760
00761
00762
00763 FormIndexDatum(indexInfo,
00764 slot,
00765 estate,
00766 values,
00767 isnull);
00768
00769
00770
00771
00772
00773 for (i = 0; i < attr_cnt; i++)
00774 {
00775 VacAttrStats *stats = thisdata->vacattrstats[i];
00776 int attnum = stats->attr->attnum;
00777
00778 if (isnull[attnum - 1])
00779 {
00780 exprvals[tcnt] = (Datum) 0;
00781 exprnulls[tcnt] = true;
00782 }
00783 else
00784 {
00785 exprvals[tcnt] = datumCopy(values[attnum - 1],
00786 stats->attrtype->typbyval,
00787 stats->attrtype->typlen);
00788 exprnulls[tcnt] = false;
00789 }
00790 tcnt++;
00791 }
00792 }
00793 }
00794
00795
00796
00797
00798
00799 thisdata->tupleFract = (double) numindexrows / (double) numrows;
00800 totalindexrows = ceil(thisdata->tupleFract * totalrows);
00801
00802
00803
00804
00805 if (numindexrows > 0)
00806 {
00807 MemoryContextSwitchTo(col_context);
00808 for (i = 0; i < attr_cnt; i++)
00809 {
00810 VacAttrStats *stats = thisdata->vacattrstats[i];
00811 AttributeOpts *aopt =
00812 get_attribute_options(stats->attr->attrelid,
00813 stats->attr->attnum);
00814
00815 stats->exprvals = exprvals + i;
00816 stats->exprnulls = exprnulls + i;
00817 stats->rowstride = attr_cnt;
00818 (*stats->compute_stats) (stats,
00819 ind_fetch_func,
00820 numindexrows,
00821 totalindexrows);
00822
00823
00824
00825
00826
00827
00828 if (aopt != NULL && aopt->n_distinct != 0.0)
00829 stats->stadistinct = aopt->n_distinct;
00830
00831 MemoryContextResetAndDeleteChildren(col_context);
00832 }
00833 }
00834
00835
00836 MemoryContextSwitchTo(ind_context);
00837
00838 ExecDropSingleTupleTableSlot(slot);
00839 FreeExecutorState(estate);
00840 MemoryContextResetAndDeleteChildren(ind_context);
00841 }
00842
00843 MemoryContextSwitchTo(old_context);
00844 MemoryContextDelete(ind_context);
00845 }
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856 static VacAttrStats *
00857 examine_attribute(Relation onerel, int attnum, Node *index_expr)
00858 {
00859 Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
00860 HeapTuple typtuple;
00861 VacAttrStats *stats;
00862 int i;
00863 bool ok;
00864
00865
00866 if (attr->attisdropped)
00867 return NULL;
00868
00869
00870 if (attr->attstattarget == 0)
00871 return NULL;
00872
00873
00874
00875
00876
00877 stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
00878 stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
00879 memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890 if (index_expr)
00891 {
00892 stats->attrtypid = exprType(index_expr);
00893 stats->attrtypmod = exprTypmod(index_expr);
00894 }
00895 else
00896 {
00897 stats->attrtypid = attr->atttypid;
00898 stats->attrtypmod = attr->atttypmod;
00899 }
00900
00901 typtuple = SearchSysCacheCopy1(TYPEOID,
00902 ObjectIdGetDatum(stats->attrtypid));
00903 if (!HeapTupleIsValid(typtuple))
00904 elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
00905 stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
00906 stats->anl_context = anl_context;
00907 stats->tupattnum = attnum;
00908
00909
00910
00911
00912
00913
00914 for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
00915 {
00916 stats->statypid[i] = stats->attrtypid;
00917 stats->statyplen[i] = stats->attrtype->typlen;
00918 stats->statypbyval[i] = stats->attrtype->typbyval;
00919 stats->statypalign[i] = stats->attrtype->typalign;
00920 }
00921
00922
00923
00924
00925
00926 if (OidIsValid(stats->attrtype->typanalyze))
00927 ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
00928 PointerGetDatum(stats)));
00929 else
00930 ok = std_typanalyze(stats);
00931
00932 if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
00933 {
00934 heap_freetuple(typtuple);
00935 pfree(stats->attr);
00936 pfree(stats);
00937 return NULL;
00938 }
00939
00940 return stats;
00941 }
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956 static void
00957 BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize)
00958 {
00959 bs->N = nblocks;
00960
00961
00962
00963
00964
00965 bs->n = samplesize;
00966 bs->t = 0;
00967 bs->m = 0;
00968 }
00969
00970 static bool
00971 BlockSampler_HasMore(BlockSampler bs)
00972 {
00973 return (bs->t < bs->N) && (bs->m < bs->n);
00974 }
00975
00976 static BlockNumber
00977 BlockSampler_Next(BlockSampler bs)
00978 {
00979 BlockNumber K = bs->N - bs->t;
00980 int k = bs->n - bs->m;
00981 double p;
00982 double V;
00983
00984 Assert(BlockSampler_HasMore(bs));
00985
00986 if ((BlockNumber) k >= K)
00987 {
00988
00989 bs->m++;
00990 return bs->t++;
00991 }
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014 V = anl_random_fract();
01015 p = 1.0 - (double) k / (double) K;
01016 while (V < p)
01017 {
01018
01019 bs->t++;
01020 K--;
01021
01022
01023 p *= 1.0 - (double) k / (double) K;
01024 }
01025
01026
01027 bs->m++;
01028 return bs->t++;
01029 }
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039
01040
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064 static int
01065 acquire_sample_rows(Relation onerel, int elevel,
01066 HeapTuple *rows, int targrows,
01067 double *totalrows, double *totaldeadrows)
01068 {
01069 int numrows = 0;
01070 double samplerows = 0;
01071 double liverows = 0;
01072 double deadrows = 0;
01073 double rowstoskip = -1;
01074 BlockNumber totalblocks;
01075 TransactionId OldestXmin;
01076 BlockSamplerData bs;
01077 double rstate;
01078
01079 Assert(targrows > 0);
01080
01081 totalblocks = RelationGetNumberOfBlocks(onerel);
01082
01083
01084 OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true);
01085
01086
01087 BlockSampler_Init(&bs, totalblocks, targrows);
01088
01089 rstate = anl_init_selection_state(targrows);
01090
01091
01092 while (BlockSampler_HasMore(&bs))
01093 {
01094 BlockNumber targblock = BlockSampler_Next(&bs);
01095 Buffer targbuffer;
01096 Page targpage;
01097 OffsetNumber targoffset,
01098 maxoffset;
01099
01100 vacuum_delay_point();
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111 targbuffer = ReadBufferExtended(onerel, MAIN_FORKNUM, targblock,
01112 RBM_NORMAL, vac_strategy);
01113 LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
01114 targpage = BufferGetPage(targbuffer);
01115 maxoffset = PageGetMaxOffsetNumber(targpage);
01116
01117
01118 for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)
01119 {
01120 ItemId itemid;
01121 HeapTupleData targtuple;
01122 bool sample_it = false;
01123
01124 itemid = PageGetItemId(targpage, targoffset);
01125
01126
01127
01128
01129
01130
01131
01132 if (!ItemIdIsNormal(itemid))
01133 {
01134 if (ItemIdIsDead(itemid))
01135 deadrows += 1;
01136 continue;
01137 }
01138
01139 ItemPointerSet(&targtuple.t_self, targblock, targoffset);
01140
01141 targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
01142 targtuple.t_len = ItemIdGetLength(itemid);
01143
01144 switch (HeapTupleSatisfiesVacuum(targtuple.t_data,
01145 OldestXmin,
01146 targbuffer))
01147 {
01148 case HEAPTUPLE_LIVE:
01149 sample_it = true;
01150 liverows += 1;
01151 break;
01152
01153 case HEAPTUPLE_DEAD:
01154 case HEAPTUPLE_RECENTLY_DEAD:
01155
01156 deadrows += 1;
01157 break;
01158
01159 case HEAPTUPLE_INSERT_IN_PROGRESS:
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data)))
01181 {
01182 sample_it = true;
01183 liverows += 1;
01184 }
01185 break;
01186
01187 case HEAPTUPLE_DELETE_IN_PROGRESS:
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data)))
01201 deadrows += 1;
01202 else
01203 liverows += 1;
01204 break;
01205
01206 default:
01207 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
01208 break;
01209 }
01210
01211 if (sample_it)
01212 {
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225 if (numrows < targrows)
01226 rows[numrows++] = heap_copytuple(&targtuple);
01227 else
01228 {
01229
01230
01231
01232
01233
01234
01235 if (rowstoskip < 0)
01236 rowstoskip = anl_get_next_S(samplerows, targrows,
01237 &rstate);
01238
01239 if (rowstoskip <= 0)
01240 {
01241
01242
01243
01244
01245 int k = (int) (targrows * anl_random_fract());
01246
01247 Assert(k >= 0 && k < targrows);
01248 heap_freetuple(rows[k]);
01249 rows[k] = heap_copytuple(&targtuple);
01250 }
01251
01252 rowstoskip -= 1;
01253 }
01254
01255 samplerows += 1;
01256 }
01257 }
01258
01259
01260 UnlockReleaseBuffer(targbuffer);
01261 }
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271 if (numrows == targrows)
01272 qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
01273
01274
01275
01276
01277
01278
01279
01280 *totalrows = vac_estimate_reltuples(onerel, true,
01281 totalblocks,
01282 bs.m,
01283 liverows);
01284 if (bs.m > 0)
01285 *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
01286 else
01287 *totaldeadrows = 0.0;
01288
01289
01290
01291
01292 ereport(elevel,
01293 (errmsg("\"%s\": scanned %d of %u pages, "
01294 "containing %.0f live rows and %.0f dead rows; "
01295 "%d rows in sample, %.0f estimated total rows",
01296 RelationGetRelationName(onerel),
01297 bs.m, totalblocks,
01298 liverows, deadrows,
01299 numrows, *totalrows)));
01300
01301 return numrows;
01302 }
01303
01304
01305 double
01306 anl_random_fract(void)
01307 {
01308 return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
01309 }
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325 double
01326 anl_init_selection_state(int n)
01327 {
01328
01329 return exp(-log(anl_random_fract()) / n);
01330 }
01331
01332 double
01333 anl_get_next_S(double t, int n, double *stateptr)
01334 {
01335 double S;
01336
01337
01338 if (t <= (22.0 * n))
01339 {
01340
01341 double V,
01342 quot;
01343
01344 V = anl_random_fract();
01345 S = 0;
01346 t += 1;
01347
01348 quot = (t - (double) n) / t;
01349
01350 while (quot > V)
01351 {
01352 S += 1;
01353 t += 1;
01354 quot *= (t - (double) n) / t;
01355 }
01356 }
01357 else
01358 {
01359
01360 double W = *stateptr;
01361 double term = t - (double) n + 1;
01362
01363 for (;;)
01364 {
01365 double numer,
01366 numer_lim,
01367 denom;
01368 double U,
01369 X,
01370 lhs,
01371 rhs,
01372 y,
01373 tmp;
01374
01375
01376 U = anl_random_fract();
01377 X = t * (W - 1.0);
01378 S = floor(X);
01379
01380 tmp = (t + 1) / term;
01381 lhs = exp(log(((U * tmp * tmp) * (term + S)) / (t + X)) / n);
01382 rhs = (((t + X) / (term + S)) * term) / t;
01383 if (lhs <= rhs)
01384 {
01385 W = rhs / lhs;
01386 break;
01387 }
01388
01389 y = (((U * (t + 1)) / term) * (t + S + 1)) / (t + X);
01390 if ((double) n < S)
01391 {
01392 denom = t;
01393 numer_lim = term + S;
01394 }
01395 else
01396 {
01397 denom = t - (double) n + S;
01398 numer_lim = t + 1;
01399 }
01400 for (numer = t + S; numer >= numer_lim; numer -= 1)
01401 {
01402 y *= numer / denom;
01403 denom -= 1;
01404 }
01405 W = exp(-log(anl_random_fract()) / n);
01406 if (exp(log(y) / n) <= (t + X) / t)
01407 break;
01408 }
01409 *stateptr = W;
01410 }
01411 return S;
01412 }
01413
01414
01415
01416
01417 static int
01418 compare_rows(const void *a, const void *b)
01419 {
01420 HeapTuple ha = *(const HeapTuple *) a;
01421 HeapTuple hb = *(const HeapTuple *) b;
01422 BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
01423 OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
01424 BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
01425 OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
01426
01427 if (ba < bb)
01428 return -1;
01429 if (ba > bb)
01430 return 1;
01431 if (oa < ob)
01432 return -1;
01433 if (oa > ob)
01434 return 1;
01435 return 0;
01436 }
01437
01438
01439
01440
01441
01442
01443
01444
01445
01446 static int
01447 acquire_inherited_sample_rows(Relation onerel, int elevel,
01448 HeapTuple *rows, int targrows,
01449 double *totalrows, double *totaldeadrows)
01450 {
01451 List *tableOIDs;
01452 Relation *rels;
01453 double *relblocks;
01454 double totalblocks;
01455 int numrows,
01456 nrels,
01457 i;
01458 ListCell *lc;
01459
01460
01461
01462
01463
01464 tableOIDs =
01465 find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL);
01466
01467
01468
01469
01470
01471
01472
01473
01474 if (list_length(tableOIDs) < 2)
01475 {
01476
01477 CommandCounterIncrement();
01478 SetRelationHasSubclass(RelationGetRelid(onerel), false);
01479 return 0;
01480 }
01481
01482
01483
01484
01485
01486 rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
01487 relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
01488 totalblocks = 0;
01489 nrels = 0;
01490 foreach(lc, tableOIDs)
01491 {
01492 Oid childOID = lfirst_oid(lc);
01493 Relation childrel;
01494
01495
01496 childrel = heap_open(childOID, NoLock);
01497
01498
01499 if (RELATION_IS_OTHER_TEMP(childrel))
01500 {
01501
01502 Assert(childrel != onerel);
01503 heap_close(childrel, AccessShareLock);
01504 continue;
01505 }
01506
01507 rels[nrels] = childrel;
01508 relblocks[nrels] = (double) RelationGetNumberOfBlocks(childrel);
01509 totalblocks += relblocks[nrels];
01510 nrels++;
01511 }
01512
01513
01514
01515
01516
01517
01518
01519 numrows = 0;
01520 *totalrows = 0;
01521 *totaldeadrows = 0;
01522 for (i = 0; i < nrels; i++)
01523 {
01524 Relation childrel = rels[i];
01525 double childblocks = relblocks[i];
01526
01527 if (childblocks > 0)
01528 {
01529 int childtargrows;
01530
01531 childtargrows = (int) rint(targrows * childblocks / totalblocks);
01532
01533 childtargrows = Min(childtargrows, targrows - numrows);
01534 if (childtargrows > 0)
01535 {
01536 int childrows;
01537 double trows,
01538 tdrows;
01539
01540
01541 childrows = acquire_sample_rows(childrel,
01542 elevel,
01543 rows + numrows,
01544 childtargrows,
01545 &trows,
01546 &tdrows);
01547
01548
01549 if (childrows > 0 &&
01550 !equalTupleDescs(RelationGetDescr(childrel),
01551 RelationGetDescr(onerel)))
01552 {
01553 TupleConversionMap *map;
01554
01555 map = convert_tuples_by_name(RelationGetDescr(childrel),
01556 RelationGetDescr(onerel),
01557 gettext_noop("could not convert row type"));
01558 if (map != NULL)
01559 {
01560 int j;
01561
01562 for (j = 0; j < childrows; j++)
01563 {
01564 HeapTuple newtup;
01565
01566 newtup = do_convert_tuple(rows[numrows + j], map);
01567 heap_freetuple(rows[numrows + j]);
01568 rows[numrows + j] = newtup;
01569 }
01570 free_conversion_map(map);
01571 }
01572 }
01573
01574
01575 numrows += childrows;
01576 *totalrows += trows;
01577 *totaldeadrows += tdrows;
01578 }
01579 }
01580
01581
01582
01583
01584
01585 heap_close(childrel, NoLock);
01586 }
01587
01588 return numrows;
01589 }
01590
01591
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604
01605
01606
01607
01608
01609
01610
01611
01612
01613
01614 static void
01615 update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
01616 {
01617 Relation sd;
01618 int attno;
01619
01620 if (natts <= 0)
01621 return;
01622
01623 sd = heap_open(StatisticRelationId, RowExclusiveLock);
01624
01625 for (attno = 0; attno < natts; attno++)
01626 {
01627 VacAttrStats *stats = vacattrstats[attno];
01628 HeapTuple stup,
01629 oldtup;
01630 int i,
01631 k,
01632 n;
01633 Datum values[Natts_pg_statistic];
01634 bool nulls[Natts_pg_statistic];
01635 bool replaces[Natts_pg_statistic];
01636
01637
01638 if (!stats->stats_valid)
01639 continue;
01640
01641
01642
01643
01644 for (i = 0; i < Natts_pg_statistic; ++i)
01645 {
01646 nulls[i] = false;
01647 replaces[i] = true;
01648 }
01649
01650 values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
01651 values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum);
01652 values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
01653 values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
01654 values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
01655 values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
01656 i = Anum_pg_statistic_stakind1 - 1;
01657 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
01658 {
01659 values[i++] = Int16GetDatum(stats->stakind[k]);
01660 }
01661 i = Anum_pg_statistic_staop1 - 1;
01662 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
01663 {
01664 values[i++] = ObjectIdGetDatum(stats->staop[k]);
01665 }
01666 i = Anum_pg_statistic_stanumbers1 - 1;
01667 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
01668 {
01669 int nnum = stats->numnumbers[k];
01670
01671 if (nnum > 0)
01672 {
01673 Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
01674 ArrayType *arry;
01675
01676 for (n = 0; n < nnum; n++)
01677 numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
01678
01679 arry = construct_array(numdatums, nnum,
01680 FLOAT4OID,
01681 sizeof(float4), FLOAT4PASSBYVAL, 'i');
01682 values[i++] = PointerGetDatum(arry);
01683 }
01684 else
01685 {
01686 nulls[i] = true;
01687 values[i++] = (Datum) 0;
01688 }
01689 }
01690 i = Anum_pg_statistic_stavalues1 - 1;
01691 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
01692 {
01693 if (stats->numvalues[k] > 0)
01694 {
01695 ArrayType *arry;
01696
01697 arry = construct_array(stats->stavalues[k],
01698 stats->numvalues[k],
01699 stats->statypid[k],
01700 stats->statyplen[k],
01701 stats->statypbyval[k],
01702 stats->statypalign[k]);
01703 values[i++] = PointerGetDatum(arry);
01704 }
01705 else
01706 {
01707 nulls[i] = true;
01708 values[i++] = (Datum) 0;
01709 }
01710 }
01711
01712
01713 oldtup = SearchSysCache3(STATRELATTINH,
01714 ObjectIdGetDatum(relid),
01715 Int16GetDatum(stats->attr->attnum),
01716 BoolGetDatum(inh));
01717
01718 if (HeapTupleIsValid(oldtup))
01719 {
01720
01721 stup = heap_modify_tuple(oldtup,
01722 RelationGetDescr(sd),
01723 values,
01724 nulls,
01725 replaces);
01726 ReleaseSysCache(oldtup);
01727 simple_heap_update(sd, &stup->t_self, stup);
01728 }
01729 else
01730 {
01731
01732 stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
01733 simple_heap_insert(sd, stup);
01734 }
01735
01736
01737 CatalogUpdateIndexes(sd, stup);
01738
01739 heap_freetuple(stup);
01740 }
01741
01742 heap_close(sd, RowExclusiveLock);
01743 }
01744
01745
01746
01747
01748
01749
01750
01751 static Datum
01752 std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
01753 {
01754 int attnum = stats->tupattnum;
01755 HeapTuple tuple = stats->rows[rownum];
01756 TupleDesc tupDesc = stats->tupDesc;
01757
01758 return heap_getattr(tuple, attnum, tupDesc, isNull);
01759 }
01760
01761
01762
01763
01764
01765
01766
01767 static Datum
01768 ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
01769 {
01770 int i;
01771
01772
01773 i = rownum * stats->rowstride;
01774 *isNull = stats->exprnulls[i];
01775 return stats->exprvals[i];
01776 }
01777
01778
01779
01780
01781
01782
01783
01784
01785
01786
01787
01788
01789
01790
01791
01792
01793
01794
01795
01796
01797
01798 #define WIDTH_THRESHOLD 1024
01799
01800 #define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
01801 #define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
01802
01803
01804
01805
01806 typedef struct
01807 {
01808 Oid eqopr;
01809 Oid eqfunc;
01810 Oid ltopr;
01811 } StdAnalyzeData;
01812
01813 typedef struct
01814 {
01815 Datum value;
01816 int tupno;
01817 } ScalarItem;
01818
01819 typedef struct
01820 {
01821 int count;
01822 int first;
01823 } ScalarMCVItem;
01824
01825 typedef struct
01826 {
01827 SortSupport ssup;
01828 int *tupnoLink;
01829 } CompareScalarsContext;
01830
01831
01832 static void compute_minimal_stats(VacAttrStatsP stats,
01833 AnalyzeAttrFetchFunc fetchfunc,
01834 int samplerows,
01835 double totalrows);
01836 static void compute_scalar_stats(VacAttrStatsP stats,
01837 AnalyzeAttrFetchFunc fetchfunc,
01838 int samplerows,
01839 double totalrows);
01840 static int compare_scalars(const void *a, const void *b, void *arg);
01841 static int compare_mcvs(const void *a, const void *b);
01842
01843
01844
01845
01846
01847 bool
01848 std_typanalyze(VacAttrStats *stats)
01849 {
01850 Form_pg_attribute attr = stats->attr;
01851 Oid ltopr;
01852 Oid eqopr;
01853 StdAnalyzeData *mystats;
01854
01855
01856
01857 if (attr->attstattarget < 0)
01858 attr->attstattarget = default_statistics_target;
01859
01860
01861 get_sort_group_operators(stats->attrtypid,
01862 false, false, false,
01863 <opr, &eqopr, NULL,
01864 NULL);
01865
01866
01867 if (!OidIsValid(eqopr))
01868 return false;
01869
01870
01871 mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
01872 mystats->eqopr = eqopr;
01873 mystats->eqfunc = get_opcode(eqopr);
01874 mystats->ltopr = ltopr;
01875 stats->extra_data = mystats;
01876
01877
01878
01879
01880 if (OidIsValid(ltopr))
01881 {
01882
01883 stats->compute_stats = compute_scalar_stats;
01884
01885
01886
01887
01888
01889
01890
01891
01892
01893
01894
01895
01896
01897
01898
01899
01900
01901
01902
01903 stats->minrows = 300 * attr->attstattarget;
01904 }
01905 else
01906 {
01907
01908 stats->compute_stats = compute_minimal_stats;
01909
01910 stats->minrows = 300 * attr->attstattarget;
01911 }
01912
01913 return true;
01914 }
01915
01916
01917
01918
01919
01920
01921
01922
01923
01924
01925
01926
01927
01928
01929
01930
01931 static void
01932 compute_minimal_stats(VacAttrStatsP stats,
01933 AnalyzeAttrFetchFunc fetchfunc,
01934 int samplerows,
01935 double totalrows)
01936 {
01937 int i;
01938 int null_cnt = 0;
01939 int nonnull_cnt = 0;
01940 int toowide_cnt = 0;
01941 double total_width = 0;
01942 bool is_varlena = (!stats->attrtype->typbyval &&
01943 stats->attrtype->typlen == -1);
01944 bool is_varwidth = (!stats->attrtype->typbyval &&
01945 stats->attrtype->typlen < 0);
01946 FmgrInfo f_cmpeq;
01947 typedef struct
01948 {
01949 Datum value;
01950 int count;
01951 } TrackItem;
01952 TrackItem *track;
01953 int track_cnt,
01954 track_max;
01955 int num_mcv = stats->attr->attstattarget;
01956 StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
01957
01958
01959
01960
01961 track_max = 2 * num_mcv;
01962 if (track_max < 10)
01963 track_max = 10;
01964 track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
01965 track_cnt = 0;
01966
01967 fmgr_info(mystats->eqfunc, &f_cmpeq);
01968
01969 for (i = 0; i < samplerows; i++)
01970 {
01971 Datum value;
01972 bool isnull;
01973 bool match;
01974 int firstcount1,
01975 j;
01976
01977 vacuum_delay_point();
01978
01979 value = fetchfunc(stats, i, &isnull);
01980
01981
01982 if (isnull)
01983 {
01984 null_cnt++;
01985 continue;
01986 }
01987 nonnull_cnt++;
01988
01989
01990
01991
01992
01993
01994
01995 if (is_varlena)
01996 {
01997 total_width += VARSIZE_ANY(DatumGetPointer(value));
01998
01999
02000
02001
02002
02003
02004
02005
02006 if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
02007 {
02008 toowide_cnt++;
02009 continue;
02010 }
02011 value = PointerGetDatum(PG_DETOAST_DATUM(value));
02012 }
02013 else if (is_varwidth)
02014 {
02015
02016 total_width += strlen(DatumGetCString(value)) + 1;
02017 }
02018
02019
02020
02021
02022 match = false;
02023 firstcount1 = track_cnt;
02024 for (j = 0; j < track_cnt; j++)
02025 {
02026
02027 if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
02028 DEFAULT_COLLATION_OID,
02029 value, track[j].value)))
02030 {
02031 match = true;
02032 break;
02033 }
02034 if (j < firstcount1 && track[j].count == 1)
02035 firstcount1 = j;
02036 }
02037
02038 if (match)
02039 {
02040
02041 track[j].count++;
02042
02043 while (j > 0 && track[j].count > track[j - 1].count)
02044 {
02045 swapDatum(track[j].value, track[j - 1].value);
02046 swapInt(track[j].count, track[j - 1].count);
02047 j--;
02048 }
02049 }
02050 else
02051 {
02052
02053 if (track_cnt < track_max)
02054 track_cnt++;
02055 for (j = track_cnt - 1; j > firstcount1; j--)
02056 {
02057 track[j].value = track[j - 1].value;
02058 track[j].count = track[j - 1].count;
02059 }
02060 if (firstcount1 < track_cnt)
02061 {
02062 track[firstcount1].value = value;
02063 track[firstcount1].count = 1;
02064 }
02065 }
02066 }
02067
02068
02069 if (nonnull_cnt > 0)
02070 {
02071 int nmultiple,
02072 summultiple;
02073
02074 stats->stats_valid = true;
02075
02076 stats->stanullfrac = (double) null_cnt / (double) samplerows;
02077 if (is_varwidth)
02078 stats->stawidth = total_width / (double) nonnull_cnt;
02079 else
02080 stats->stawidth = stats->attrtype->typlen;
02081
02082
02083 summultiple = 0;
02084 for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
02085 {
02086 if (track[nmultiple].count == 1)
02087 break;
02088 summultiple += track[nmultiple].count;
02089 }
02090
02091 if (nmultiple == 0)
02092 {
02093
02094 stats->stadistinct = -1.0;
02095 }
02096 else if (track_cnt < track_max && toowide_cnt == 0 &&
02097 nmultiple == track_cnt)
02098 {
02099
02100
02101
02102
02103
02104 stats->stadistinct = track_cnt;
02105 }
02106 else
02107 {
02108
02109
02110
02111
02112
02113
02114
02115
02116
02117
02118
02119
02120
02121
02122
02123
02124
02125
02126 int f1 = nonnull_cnt - summultiple;
02127 int d = f1 + nmultiple;
02128 double numer,
02129 denom,
02130 stadistinct;
02131
02132 numer = (double) samplerows *(double) d;
02133
02134 denom = (double) (samplerows - f1) +
02135 (double) f1 *(double) samplerows / totalrows;
02136
02137 stadistinct = numer / denom;
02138
02139 if (stadistinct < (double) d)
02140 stadistinct = (double) d;
02141 if (stadistinct > totalrows)
02142 stadistinct = totalrows;
02143 stats->stadistinct = floor(stadistinct + 0.5);
02144 }
02145
02146
02147
02148
02149
02150
02151
02152 if (stats->stadistinct > 0.1 * totalrows)
02153 stats->stadistinct = -(stats->stadistinct / totalrows);
02154
02155
02156
02157
02158
02159
02160
02161
02162
02163
02164 if (track_cnt < track_max && toowide_cnt == 0 &&
02165 stats->stadistinct > 0 &&
02166 track_cnt <= num_mcv)
02167 {
02168
02169 num_mcv = track_cnt;
02170 }
02171 else
02172 {
02173 double ndistinct = stats->stadistinct;
02174 double avgcount,
02175 mincount;
02176
02177 if (ndistinct < 0)
02178 ndistinct = -ndistinct * totalrows;
02179
02180 avgcount = (double) samplerows / ndistinct;
02181
02182 mincount = avgcount * 1.25;
02183 if (mincount < 2)
02184 mincount = 2;
02185 if (num_mcv > track_cnt)
02186 num_mcv = track_cnt;
02187 for (i = 0; i < num_mcv; i++)
02188 {
02189 if (track[i].count < mincount)
02190 {
02191 num_mcv = i;
02192 break;
02193 }
02194 }
02195 }
02196
02197
02198 if (num_mcv > 0)
02199 {
02200 MemoryContext old_context;
02201 Datum *mcv_values;
02202 float4 *mcv_freqs;
02203
02204
02205 old_context = MemoryContextSwitchTo(stats->anl_context);
02206 mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
02207 mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
02208 for (i = 0; i < num_mcv; i++)
02209 {
02210 mcv_values[i] = datumCopy(track[i].value,
02211 stats->attrtype->typbyval,
02212 stats->attrtype->typlen);
02213 mcv_freqs[i] = (double) track[i].count / (double) samplerows;
02214 }
02215 MemoryContextSwitchTo(old_context);
02216
02217 stats->stakind[0] = STATISTIC_KIND_MCV;
02218 stats->staop[0] = mystats->eqopr;
02219 stats->stanumbers[0] = mcv_freqs;
02220 stats->numnumbers[0] = num_mcv;
02221 stats->stavalues[0] = mcv_values;
02222 stats->numvalues[0] = num_mcv;
02223
02224
02225
02226
02227
02228 }
02229 }
02230 else if (null_cnt > 0)
02231 {
02232
02233 stats->stats_valid = true;
02234 stats->stanullfrac = 1.0;
02235 if (is_varwidth)
02236 stats->stawidth = 0;
02237 else
02238 stats->stawidth = stats->attrtype->typlen;
02239 stats->stadistinct = 0.0;
02240 }
02241
02242
02243 }
02244
02245
02246
02247
02248
02249
02250
02251
02252
02253
02254
02255
02256
02257
02258 static void
02259 compute_scalar_stats(VacAttrStatsP stats,
02260 AnalyzeAttrFetchFunc fetchfunc,
02261 int samplerows,
02262 double totalrows)
02263 {
02264 int i;
02265 int null_cnt = 0;
02266 int nonnull_cnt = 0;
02267 int toowide_cnt = 0;
02268 double total_width = 0;
02269 bool is_varlena = (!stats->attrtype->typbyval &&
02270 stats->attrtype->typlen == -1);
02271 bool is_varwidth = (!stats->attrtype->typbyval &&
02272 stats->attrtype->typlen < 0);
02273 double corr_xysum;
02274 SortSupportData ssup;
02275 ScalarItem *values;
02276 int values_cnt = 0;
02277 int *tupnoLink;
02278 ScalarMCVItem *track;
02279 int track_cnt = 0;
02280 int num_mcv = stats->attr->attstattarget;
02281 int num_bins = stats->attr->attstattarget;
02282 StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
02283
02284 values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
02285 tupnoLink = (int *) palloc(samplerows * sizeof(int));
02286 track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
02287
02288 memset(&ssup, 0, sizeof(ssup));
02289 ssup.ssup_cxt = CurrentMemoryContext;
02290
02291 ssup.ssup_collation = DEFAULT_COLLATION_OID;
02292 ssup.ssup_nulls_first = false;
02293
02294 PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
02295
02296
02297 for (i = 0; i < samplerows; i++)
02298 {
02299 Datum value;
02300 bool isnull;
02301
02302 vacuum_delay_point();
02303
02304 value = fetchfunc(stats, i, &isnull);
02305
02306
02307 if (isnull)
02308 {
02309 null_cnt++;
02310 continue;
02311 }
02312 nonnull_cnt++;
02313
02314
02315
02316
02317
02318
02319
02320 if (is_varlena)
02321 {
02322 total_width += VARSIZE_ANY(DatumGetPointer(value));
02323
02324
02325
02326
02327
02328
02329
02330
02331 if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
02332 {
02333 toowide_cnt++;
02334 continue;
02335 }
02336 value = PointerGetDatum(PG_DETOAST_DATUM(value));
02337 }
02338 else if (is_varwidth)
02339 {
02340
02341 total_width += strlen(DatumGetCString(value)) + 1;
02342 }
02343
02344
02345 values[values_cnt].value = value;
02346 values[values_cnt].tupno = values_cnt;
02347 tupnoLink[values_cnt] = values_cnt;
02348 values_cnt++;
02349 }
02350
02351
02352 if (values_cnt > 0)
02353 {
02354 int ndistinct,
02355 nmultiple,
02356 num_hist,
02357 dups_cnt;
02358 int slot_idx = 0;
02359 CompareScalarsContext cxt;
02360
02361
02362 cxt.ssup = &ssup;
02363 cxt.tupnoLink = tupnoLink;
02364 qsort_arg((void *) values, values_cnt, sizeof(ScalarItem),
02365 compare_scalars, (void *) &cxt);
02366
02367
02368
02369
02370
02371
02372
02373
02374
02375
02376
02377
02378
02379
02380
02381
02382
02383
02384
02385
02386 corr_xysum = 0;
02387 ndistinct = 0;
02388 nmultiple = 0;
02389 dups_cnt = 0;
02390 for (i = 0; i < values_cnt; i++)
02391 {
02392 int tupno = values[i].tupno;
02393
02394 corr_xysum += ((double) i) * ((double) tupno);
02395 dups_cnt++;
02396 if (tupnoLink[tupno] == tupno)
02397 {
02398
02399 ndistinct++;
02400 if (dups_cnt > 1)
02401 {
02402 nmultiple++;
02403 if (track_cnt < num_mcv ||
02404 dups_cnt > track[track_cnt - 1].count)
02405 {
02406
02407
02408
02409
02410
02411
02412 int j;
02413
02414 if (track_cnt < num_mcv)
02415 track_cnt++;
02416 for (j = track_cnt - 1; j > 0; j--)
02417 {
02418 if (dups_cnt <= track[j - 1].count)
02419 break;
02420 track[j].count = track[j - 1].count;
02421 track[j].first = track[j - 1].first;
02422 }
02423 track[j].count = dups_cnt;
02424 track[j].first = i + 1 - dups_cnt;
02425 }
02426 }
02427 dups_cnt = 0;
02428 }
02429 }
02430
02431 stats->stats_valid = true;
02432
02433 stats->stanullfrac = (double) null_cnt / (double) samplerows;
02434 if (is_varwidth)
02435 stats->stawidth = total_width / (double) nonnull_cnt;
02436 else
02437 stats->stawidth = stats->attrtype->typlen;
02438
02439 if (nmultiple == 0)
02440 {
02441
02442 stats->stadistinct = -1.0;
02443 }
02444 else if (toowide_cnt == 0 && nmultiple == ndistinct)
02445 {
02446
02447
02448
02449
02450 stats->stadistinct = ndistinct;
02451 }
02452 else
02453 {
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468 int f1 = ndistinct - nmultiple + toowide_cnt;
02469 int d = f1 + nmultiple;
02470 double numer,
02471 denom,
02472 stadistinct;
02473
02474 numer = (double) samplerows *(double) d;
02475
02476 denom = (double) (samplerows - f1) +
02477 (double) f1 *(double) samplerows / totalrows;
02478
02479 stadistinct = numer / denom;
02480
02481 if (stadistinct < (double) d)
02482 stadistinct = (double) d;
02483 if (stadistinct > totalrows)
02484 stadistinct = totalrows;
02485 stats->stadistinct = floor(stadistinct + 0.5);
02486 }
02487
02488
02489
02490
02491
02492
02493
02494 if (stats->stadistinct > 0.1 * totalrows)
02495 stats->stadistinct = -(stats->stadistinct / totalrows);
02496
02497
02498
02499
02500
02501
02502
02503
02504
02505
02506
02507
02508
02509
02510
02511 if (track_cnt == ndistinct && toowide_cnt == 0 &&
02512 stats->stadistinct > 0 &&
02513 track_cnt <= num_mcv)
02514 {
02515
02516 num_mcv = track_cnt;
02517 }
02518 else
02519 {
02520 double ndistinct = stats->stadistinct;
02521 double avgcount,
02522 mincount,
02523 maxmincount;
02524
02525 if (ndistinct < 0)
02526 ndistinct = -ndistinct * totalrows;
02527
02528 avgcount = (double) samplerows / ndistinct;
02529
02530 mincount = avgcount * 1.25;
02531 if (mincount < 2)
02532 mincount = 2;
02533
02534 maxmincount = (double) samplerows / (double) num_bins;
02535 if (mincount > maxmincount)
02536 mincount = maxmincount;
02537 if (num_mcv > track_cnt)
02538 num_mcv = track_cnt;
02539 for (i = 0; i < num_mcv; i++)
02540 {
02541 if (track[i].count < mincount)
02542 {
02543 num_mcv = i;
02544 break;
02545 }
02546 }
02547 }
02548
02549
02550 if (num_mcv > 0)
02551 {
02552 MemoryContext old_context;
02553 Datum *mcv_values;
02554 float4 *mcv_freqs;
02555
02556
02557 old_context = MemoryContextSwitchTo(stats->anl_context);
02558 mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
02559 mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
02560 for (i = 0; i < num_mcv; i++)
02561 {
02562 mcv_values[i] = datumCopy(values[track[i].first].value,
02563 stats->attrtype->typbyval,
02564 stats->attrtype->typlen);
02565 mcv_freqs[i] = (double) track[i].count / (double) samplerows;
02566 }
02567 MemoryContextSwitchTo(old_context);
02568
02569 stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
02570 stats->staop[slot_idx] = mystats->eqopr;
02571 stats->stanumbers[slot_idx] = mcv_freqs;
02572 stats->numnumbers[slot_idx] = num_mcv;
02573 stats->stavalues[slot_idx] = mcv_values;
02574 stats->numvalues[slot_idx] = num_mcv;
02575
02576
02577
02578
02579
02580 slot_idx++;
02581 }
02582
02583
02584
02585
02586
02587
02588 num_hist = ndistinct - num_mcv;
02589 if (num_hist > num_bins)
02590 num_hist = num_bins + 1;
02591 if (num_hist >= 2)
02592 {
02593 MemoryContext old_context;
02594 Datum *hist_values;
02595 int nvals;
02596 int pos,
02597 posfrac,
02598 delta,
02599 deltafrac;
02600
02601
02602 qsort((void *) track, num_mcv,
02603 sizeof(ScalarMCVItem), compare_mcvs);
02604
02605
02606
02607
02608
02609
02610
02611
02612 if (num_mcv > 0)
02613 {
02614 int src,
02615 dest;
02616 int j;
02617
02618 src = dest = 0;
02619 j = 0;
02620 while (src < values_cnt)
02621 {
02622 int ncopy;
02623
02624 if (j < num_mcv)
02625 {
02626 int first = track[j].first;
02627
02628 if (src >= first)
02629 {
02630
02631 src = first + track[j].count;
02632 j++;
02633 continue;
02634 }
02635 ncopy = first - src;
02636 }
02637 else
02638 ncopy = values_cnt - src;
02639 memmove(&values[dest], &values[src],
02640 ncopy * sizeof(ScalarItem));
02641 src += ncopy;
02642 dest += ncopy;
02643 }
02644 nvals = dest;
02645 }
02646 else
02647 nvals = values_cnt;
02648 Assert(nvals >= num_hist);
02649
02650
02651 old_context = MemoryContextSwitchTo(stats->anl_context);
02652 hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
02653
02654
02655
02656
02657
02658
02659
02660
02661
02662
02663 delta = (nvals - 1) / (num_hist - 1);
02664 deltafrac = (nvals - 1) % (num_hist - 1);
02665 pos = posfrac = 0;
02666
02667 for (i = 0; i < num_hist; i++)
02668 {
02669 hist_values[i] = datumCopy(values[pos].value,
02670 stats->attrtype->typbyval,
02671 stats->attrtype->typlen);
02672 pos += delta;
02673 posfrac += deltafrac;
02674 if (posfrac >= (num_hist - 1))
02675 {
02676
02677 pos++;
02678 posfrac -= (num_hist - 1);
02679 }
02680 }
02681
02682 MemoryContextSwitchTo(old_context);
02683
02684 stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
02685 stats->staop[slot_idx] = mystats->ltopr;
02686 stats->stavalues[slot_idx] = hist_values;
02687 stats->numvalues[slot_idx] = num_hist;
02688
02689
02690
02691
02692
02693 slot_idx++;
02694 }
02695
02696
02697 if (values_cnt > 1)
02698 {
02699 MemoryContext old_context;
02700 float4 *corrs;
02701 double corr_xsum,
02702 corr_x2sum;
02703
02704
02705 old_context = MemoryContextSwitchTo(stats->anl_context);
02706 corrs = (float4 *) palloc(sizeof(float4));
02707 MemoryContextSwitchTo(old_context);
02708
02709
02710
02711
02712
02713
02714
02715
02716
02717
02718 corr_xsum = ((double) (values_cnt - 1)) *
02719 ((double) values_cnt) / 2.0;
02720 corr_x2sum = ((double) (values_cnt - 1)) *
02721 ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0;
02722
02723
02724 corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
02725 (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
02726
02727 stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
02728 stats->staop[slot_idx] = mystats->ltopr;
02729 stats->stanumbers[slot_idx] = corrs;
02730 stats->numnumbers[slot_idx] = 1;
02731 slot_idx++;
02732 }
02733 }
02734 else if (nonnull_cnt == 0 && null_cnt > 0)
02735 {
02736
02737 stats->stats_valid = true;
02738 stats->stanullfrac = 1.0;
02739 if (is_varwidth)
02740 stats->stawidth = 0;
02741 else
02742 stats->stawidth = stats->attrtype->typlen;
02743 stats->stadistinct = 0.0;
02744 }
02745
02746
02747 }
02748
02749
02750
02751
02752
02753
02754
02755
02756
02757
02758 static int
02759 compare_scalars(const void *a, const void *b, void *arg)
02760 {
02761 Datum da = ((const ScalarItem *) a)->value;
02762 int ta = ((const ScalarItem *) a)->tupno;
02763 Datum db = ((const ScalarItem *) b)->value;
02764 int tb = ((const ScalarItem *) b)->tupno;
02765 CompareScalarsContext *cxt = (CompareScalarsContext *) arg;
02766 int compare;
02767
02768 compare = ApplySortComparator(da, false, db, false, cxt->ssup);
02769 if (compare != 0)
02770 return compare;
02771
02772
02773
02774
02775 if (cxt->tupnoLink[ta] < tb)
02776 cxt->tupnoLink[ta] = tb;
02777 if (cxt->tupnoLink[tb] < ta)
02778 cxt->tupnoLink[tb] = ta;
02779
02780
02781
02782
02783 return ta - tb;
02784 }
02785
02786
02787
02788
02789 static int
02790 compare_mcvs(const void *a, const void *b)
02791 {
02792 int da = ((const ScalarMCVItem *) a)->first;
02793 int db = ((const ScalarMCVItem *) b)->first;
02794
02795 return da - db;
02796 }