#include "postgres.h"
#include <math.h>
#include "access/genam.h"
#include "access/gist_private.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "optimizer/cost.h"
#include "storage/bufmgr.h"
#include "storage/smgr.h"
#include "utils/memutils.h"
#include "utils/rel.h"
Go to the source code of this file.
#define BUFFERING_MODE_SWITCH_CHECK_STEP 256 |
Definition at line 30 of file gistbuild.c.
Referenced by gistBuildCallback().
#define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096 |
Definition at line 38 of file gistbuild.c.
Referenced by gistBuildCallback().
enum GistBufferingMode |
Definition at line 40 of file gistbuild.c.
{ GIST_BUFFERING_DISABLED, /* in regular build mode and aren't going to * switch */ GIST_BUFFERING_AUTO, /* in regular build mode, but will switch to * buffering build mode if the index grows too * big */ GIST_BUFFERING_STATS, /* gathering statistics of index tuple size * before switching to the buffering build * mode */ GIST_BUFFERING_ACTIVE /* in buffering build mode */ } GistBufferingMode;
static int calculatePagesPerBuffer | ( | GISTBuildState * | buildstate, | |
int | levelStep | |||
) | [static] |
Definition at line 429 of file gistbuild.c.
References GISTBuildState::freespace, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, rint(), and SizeOfPageHeaderData.
Referenced by gistBuildCallback(), and gistInitBuffering().
{ double pagesPerBuffer; double avgIndexTuplesPerPage; double itupAvgSize; Size pageFreeSpace; /* Calc space of index page which is available for index tuples */ pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData) - sizeof(ItemIdData) - buildstate->freespace; /* * Calculate average size of already inserted index tuples using gathered * statistics. */ itupAvgSize = (double) buildstate->indtuplesSize / (double) buildstate->indtuples; avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize; /* * Recalculate required size of buffers. */ pagesPerBuffer = 2 * pow(avgIndexTuplesPerPage, levelStep); return (int) rint(pagesPerBuffer); }
static void gistBufferingBuildInsert | ( | GISTBuildState * | buildstate, | |
IndexTuple | itup | |||
) | [static] |
Definition at line 534 of file gistbuild.c.
References GISTBuildState::gfbb, gistProcessEmptyingQueue(), gistProcessItup(), and GISTBuildBuffers::rootlevel.
Referenced by gistBuildCallback().
{ /* Insert the tuple to buffers. */ gistProcessItup(buildstate, itup, 0, buildstate->gfbb->rootlevel); /* If we filled up (half of a) buffer, process buffer emptying. */ gistProcessEmptyingQueue(buildstate); }
static Buffer gistBufferingFindCorrectParent | ( | GISTBuildState * | buildstate, | |
BlockNumber | childblkno, | |||
int | level, | |||
BlockNumber * | parentblk, | |||
OffsetNumber * | downlinkoffnum | |||
) | [static] |
Definition at line 844 of file gistbuild.c.
References BufferGetPage, elog, ERROR, FirstOffsetNumber, GIST_EXCLUSIVE, gistcheckpage(), gistGetParent(), GISTBuildState::indexrel, InvalidBlockNumber, InvalidOffsetNumber, ItemPointerGetBlockNumber, LockBuffer(), OffsetNumberNext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, ReadBuffer(), and IndexTupleData::t_tid.
Referenced by gistbufferinginserttuples().
{ BlockNumber parent; Buffer buffer; Page page; OffsetNumber maxoff; OffsetNumber off; if (level > 0) parent = gistGetParent(buildstate, childblkno); else { /* * For a leaf page, the caller must supply a correct parent block * number. */ if (*parentblkno == InvalidBlockNumber) elog(ERROR, "no parent buffer provided of child %d", childblkno); parent = *parentblkno; } buffer = ReadBuffer(buildstate->indexrel, parent); page = BufferGetPage(buffer); LockBuffer(buffer, GIST_EXCLUSIVE); gistcheckpage(buildstate->indexrel, buffer); maxoff = PageGetMaxOffsetNumber(page); /* Check if it was not moved */ if (parent == *parentblkno && *parentblkno != InvalidBlockNumber && *downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff) { ItemId iid = PageGetItemId(page, *downlinkoffnum); IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno) { /* Still there */ return buffer; } } /* * Downlink was not at the offset where it used to be. Scan the page to * find it. During normal gist insertions, it might've moved to another * page, to the right, but during a buffering build, we keep track of the * parent of each page in the lookup table so we should always know what * page it's on. */ for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off)) { ItemId iid = PageGetItemId(page, off); IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno) { /* yes!!, found it */ *downlinkoffnum = off; return buffer; } } elog(ERROR, "failed to re-find parent for block %u", childblkno); return InvalidBuffer; /* keep compiler quiet */ }
static BlockNumber gistbufferinginserttuples | ( | GISTBuildState * | buildstate, | |
Buffer | buffer, | |||
int | level, | |||
IndexTuple * | itup, | |||
int | ntup, | |||
OffsetNumber | oldoffnum, | |||
BlockNumber | parentblk, | |||
OffsetNumber | downlinkoffnum | |||
) | [static] |
Definition at line 676 of file gistbuild.c.
References Assert, GISTPageSplitInfo::buf, BufferGetBlockNumber(), BufferGetPage, DEBUG2, GISTPageSplitInfo::downlink, elog, FirstOffsetNumber, GISTBuildState::freespace, GISTBuildState::gfbb, GIST_ROOT_BLKNO, GIST_SHARE, gistBufferingFindCorrectParent(), gistMemorizeAllDownlinks(), gistMemorizeParent(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), GISTBuildState::giststate, i, GISTBuildState::indexrel, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, ItemPointerGetBlockNumber, lfirst, list_free_deep(), list_length(), LockBuffer(), PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, palloc(), ReadBuffer(), GISTBuildBuffers::rootlevel, IndexTupleData::t_tid, and UnlockReleaseBuffer().
Referenced by gistProcessItup().
{ GISTBuildBuffers *gfbb = buildstate->gfbb; List *splitinfo; bool is_split; BlockNumber placed_to_blk = InvalidBlockNumber; is_split = gistplacetopage(buildstate->indexrel, buildstate->freespace, buildstate->giststate, buffer, itup, ntup, oldoffnum, &placed_to_blk, InvalidBuffer, &splitinfo, false); /* * If this is a root split, update the root path item kept in memory. This * ensures that all path stacks are always complete, including all parent * nodes up to the root. That simplifies the algorithm to re-find correct * parent. */ if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO) { Page page = BufferGetPage(buffer); OffsetNumber off; OffsetNumber maxoff; Assert(level == gfbb->rootlevel); gfbb->rootlevel++; elog(DEBUG2, "splitting GiST root page, now %d levels deep", gfbb->rootlevel); /* * All the downlinks on the old root page are now on one of the child * pages. Visit all the new child pages to memorize the parents of the * grandchildren. */ if (gfbb->rootlevel > 1) { maxoff = PageGetMaxOffsetNumber(page); for (off = FirstOffsetNumber; off <= maxoff; off++) { ItemId iid = PageGetItemId(page, off); IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); Buffer childbuf = ReadBuffer(buildstate->indexrel, childblkno); LockBuffer(childbuf, GIST_SHARE); gistMemorizeAllDownlinks(buildstate, childbuf); UnlockReleaseBuffer(childbuf); /* * Also remember that the parent of the new child page is the * root block. */ gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO); } } } if (splitinfo) { /* * Insert the downlinks to the parent. This is analogous with * gistfinishsplit() in the regular insertion code, but the locking is * simpler, and we have to maintain the buffers on internal nodes and * the parent map. */ IndexTuple *downlinks; int ndownlinks, i; Buffer parentBuffer; ListCell *lc; /* Parent may have changed since we memorized this path. */ parentBuffer = gistBufferingFindCorrectParent(buildstate, BufferGetBlockNumber(buffer), level, &parentblk, &downlinkoffnum); /* * If there's a buffer associated with this page, that needs to be * split too. gistRelocateBuildBuffersOnSplit() will also adjust the * downlinks in 'splitinfo', to make sure they're consistent not only * with the tuples already on the pages, but also the tuples in the * buffers that will eventually be inserted to them. */ gistRelocateBuildBuffersOnSplit(gfbb, buildstate->giststate, buildstate->indexrel, level, buffer, splitinfo); /* Create an array of all the downlink tuples */ ndownlinks = list_length(splitinfo); downlinks = (IndexTuple *) palloc(sizeof(IndexTuple) * ndownlinks); i = 0; foreach(lc, splitinfo) { GISTPageSplitInfo *splitinfo = lfirst(lc); /* * Remember the parent of each new child page in our parent map. * This assumes that the downlinks fit on the parent page. If the * parent page is split, too, when we recurse up to insert the * downlinks, the recursive gistbufferinginserttuples() call will * update the map again. */ if (level > 0) gistMemorizeParent(buildstate, BufferGetBlockNumber(splitinfo->buf), BufferGetBlockNumber(parentBuffer)); /* * Also update the parent map for all the downlinks that got moved * to a different page. (actually this also loops through the * downlinks that stayed on the original page, but it does no * harm). */ if (level > 1) gistMemorizeAllDownlinks(buildstate, splitinfo->buf); /* * Since there's no concurrent access, we can release the lower * level buffers immediately. This includes the original page. */ UnlockReleaseBuffer(splitinfo->buf); downlinks[i++] = splitinfo->downlink; } /* Insert them into parent. */ gistbufferinginserttuples(buildstate, parentBuffer, level + 1, downlinks, ndownlinks, downlinkoffnum, InvalidBlockNumber, InvalidOffsetNumber); list_free_deep(splitinfo); /* we don't need this anymore */ } else UnlockReleaseBuffer(buffer); return placed_to_blk; }
Datum gistbuild | ( | PG_FUNCTION_ARGS | ) |
Definition at line 112 of file gistbuild.c.
References Assert, XLogRecData::buffer, BufferGetBlockNumber(), BufferGetPage, GISTBuildState::bufferingMode, GiSTOptions::bufferingModeOffset, createTempGistContext(), CurrentMemoryContext, XLogRecData::data, DEBUG1, elog, END_CRIT_SECTION, ERROR, F_LEAF, GiSTOptions::fillfactor, fillfactor, freeGISTstate(), GISTBuildState::freespace, GIST_BUFFERING_ACTIVE, GIST_BUFFERING_AUTO, GIST_BUFFERING_DISABLED, GIST_BUFFERING_STATS, GIST_ROOT_BLKNO, gistBuildCallback(), gistEmptyAllBuffers(), gistFreeBuildBuffers(), gistGetFakeLSN(), GISTInitBuffer(), gistNewBuffer(), GISTBuildState::giststate, IndexBuildResult::heap_tuples, IndexBuildResult::index_tuples, IndexBuildHeapScan(), GISTBuildState::indexrel, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, initGISTstate(), XLogRecData::len, MarkBufferDirty(), MemoryContextDelete(), MemoryContextSwitchTo(), XLogRecData::next, PageSetLSN, palloc(), PG_GETARG_POINTER, PG_RETURN_POINTER, RelationData::rd_node, RelationData::rd_options, RelationGetNumberOfBlocks, RelationGetRelationName, RelationNeedsWAL, START_CRIT_SECTION, GISTSTATE::tempCxt, UnlockReleaseBuffer(), XLOG_GIST_CREATE_INDEX, and XLogInsert().
{ Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); IndexBuildResult *result; double reltuples; GISTBuildState buildstate; Buffer buffer; Page page; MemoryContext oldcxt = CurrentMemoryContext; int fillfactor; buildstate.indexrel = index; if (index->rd_options) { /* Get buffering mode from the options string */ GiSTOptions *options = (GiSTOptions *) index->rd_options; char *bufferingMode = (char *) options + options->bufferingModeOffset; if (strcmp(bufferingMode, "on") == 0) buildstate.bufferingMode = GIST_BUFFERING_STATS; else if (strcmp(bufferingMode, "off") == 0) buildstate.bufferingMode = GIST_BUFFERING_DISABLED; else buildstate.bufferingMode = GIST_BUFFERING_AUTO; fillfactor = options->fillfactor; } else { /* * By default, switch to buffering mode when the index grows too large * to fit in cache. */ buildstate.bufferingMode = GIST_BUFFERING_AUTO; fillfactor = GIST_DEFAULT_FILLFACTOR; } /* Calculate target amount of free space to leave on pages */ buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100; /* * We expect to be called exactly once for any index relation. If that's * not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); /* no locking is needed */ buildstate.giststate = initGISTstate(index); /* * Create a temporary memory context that is reset once for each tuple * processed. (Note: we don't bother to make this a child of the * giststate's scanCxt, so we have to delete it separately at the end.) */ buildstate.giststate->tempCxt = createTempGistContext(); /* initialize the root page */ buffer = gistNewBuffer(index); Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO); page = BufferGetPage(buffer); START_CRIT_SECTION(); GISTInitBuffer(buffer, F_LEAF); MarkBufferDirty(buffer); if (RelationNeedsWAL(index)) { XLogRecPtr recptr; XLogRecData rdata; rdata.data = (char *) &(index->rd_node); rdata.len = sizeof(RelFileNode); rdata.buffer = InvalidBuffer; rdata.next = NULL; recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata); PageSetLSN(page, recptr); } else PageSetLSN(page, gistGetFakeLSN(heap)); UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); /* build the index */ buildstate.indtuples = 0; buildstate.indtuplesSize = 0; /* * Do the heap scan. */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, gistBuildCallback, (void *) &buildstate); /* * If buffering was used, flush out all the tuples that are still in the * buffers. */ if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE) { elog(DEBUG1, "all tuples processed, emptying buffers"); gistEmptyAllBuffers(&buildstate); gistFreeBuildBuffers(buildstate.gfbb); } /* okay, all heap tuples are indexed */ MemoryContextSwitchTo(oldcxt); MemoryContextDelete(buildstate.giststate->tempCxt); freeGISTstate(buildstate.giststate); /* * Return statistics */ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); result->heap_tuples = reltuples; result->index_tuples = (double) buildstate.indtuples; PG_RETURN_POINTER(result); }
static void gistBuildCallback | ( | Relation | index, | |
HeapTuple | htup, | |||
Datum * | values, | |||
bool * | isnull, | |||
bool | tupleIsAlive, | |||
void * | state | |||
) | [static] |
Definition at line 462 of file gistbuild.c.
References BUFFERING_MODE_SWITCH_CHECK_STEP, BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET, GISTBuildState::bufferingMode, calculatePagesPerBuffer(), effective_cache_size, GISTBuildState::freespace, GISTBuildState::gfbb, GIST_BUFFERING_ACTIVE, GIST_BUFFERING_AUTO, GIST_BUFFERING_STATS, gistBufferingBuildInsert(), gistdoinsert(), gistFormTuple(), gistInitBuffering(), GISTBuildState::giststate, IndexTupleSize, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, GISTBuildBuffers::levelStep, MAIN_FORKNUM, MemoryContextReset(), MemoryContextSwitchTo(), GISTBuildBuffers::pagesPerBuffer, RelationData::rd_smgr, smgrnblocks(), HeapTupleData::t_self, IndexTupleData::t_tid, and GISTSTATE::tempCxt.
Referenced by gistbuild().
{ GISTBuildState *buildstate = (GISTBuildState *) state; IndexTuple itup; MemoryContext oldCtx; oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); /* form an index tuple and point it at the heap tuple */ itup = gistFormTuple(buildstate->giststate, index, values, isnull, true); itup->t_tid = htup->t_self; if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE) { /* We have buffers, so use them. */ gistBufferingBuildInsert(buildstate, itup); } else { /* * There's no buffers (yet). Since we already have the index relation * locked, we call gistdoinsert directly. */ gistdoinsert(index, itup, buildstate->freespace, buildstate->giststate); } /* Update tuple count and total size. */ buildstate->indtuples += 1; buildstate->indtuplesSize += IndexTupleSize(itup); MemoryContextSwitchTo(oldCtx); MemoryContextReset(buildstate->giststate->tempCxt); if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE && buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0) { /* Adjust the target buffer size now */ buildstate->gfbb->pagesPerBuffer = calculatePagesPerBuffer(buildstate, buildstate->gfbb->levelStep); } /* * In 'auto' mode, check if the index has grown too large to fit in cache, * and switch to buffering mode if it has. * * To avoid excessive calls to smgrnblocks(), only check this every * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples */ if ((buildstate->bufferingMode == GIST_BUFFERING_AUTO && buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 && effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM)) || (buildstate->bufferingMode == GIST_BUFFERING_STATS && buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET)) { /* * Index doesn't fit in effective cache anymore. Try to switch to * buffering build mode. */ gistInitBuffering(buildstate); } }
static void gistEmptyAllBuffers | ( | GISTBuildState * | buildstate | ) | [static] |
Definition at line 991 of file gistbuild.c.
References GISTNodeBuffer::blocksCount, GISTBuildBuffers::bufferEmptyingQueue, GISTBuildBuffers::buffersOnLevels, GISTBuildBuffers::buffersOnLevelsLen, GISTBuildBuffers::context, DEBUG2, elog, GISTBuildState::gfbb, gistProcessEmptyingQueue(), GISTBuildState::giststate, i, lcons(), linitial, list_delete_first(), MemoryContextSwitchTo(), NIL, GISTNodeBuffer::queuedForEmptying, and GISTSTATE::tempCxt.
Referenced by gistbuild().
{ GISTBuildBuffers *gfbb = buildstate->gfbb; MemoryContext oldCtx; int i; oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); /* * Iterate through the levels from top to bottom. */ for (i = gfbb->buffersOnLevelsLen - 1; i >= 0; i--) { /* * Empty all buffers on this level. Note that new buffers can pop up * in the list during the processing, as a result of page splits, so a * simple walk through the list won't work. We remove buffers from the * list when we see them empty; a buffer can't become non-empty once * it's been fully emptied. */ while (gfbb->buffersOnLevels[i] != NIL) { GISTNodeBuffer *nodeBuffer; nodeBuffer = (GISTNodeBuffer *) linitial(gfbb->buffersOnLevels[i]); if (nodeBuffer->blocksCount != 0) { /* * Add this buffer to the emptying queue, and proceed to empty * the queue. */ if (!nodeBuffer->queuedForEmptying) { MemoryContextSwitchTo(gfbb->context); nodeBuffer->queuedForEmptying = true; gfbb->bufferEmptyingQueue = lcons(nodeBuffer, gfbb->bufferEmptyingQueue); MemoryContextSwitchTo(buildstate->giststate->tempCxt); } gistProcessEmptyingQueue(buildstate); } else gfbb->buffersOnLevels[i] = list_delete_first(gfbb->buffersOnLevels[i]); } elog(DEBUG2, "emptied all buffers at level %d", i); } MemoryContextSwitchTo(oldCtx); }
static int gistGetMaxLevel | ( | Relation | index | ) | [static] |
Definition at line 1046 of file gistbuild.c.
References BufferGetPage, FirstOffsetNumber, GIST_SHARE, GistPageIsLeaf, ItemPointerGetBlockNumber, LockBuffer(), PageGetItem, PageGetItemId, ReadBuffer(), IndexTupleData::t_tid, and UnlockReleaseBuffer().
Referenced by gistInitBuffering().
{ int maxLevel; BlockNumber blkno; /* * Traverse down the tree, starting from the root, until we hit the leaf * level. */ maxLevel = 0; blkno = GIST_ROOT_BLKNO; while (true) { Buffer buffer; Page page; IndexTuple itup; buffer = ReadBuffer(index, blkno); /* * There's no concurrent access during index build, so locking is just * pro forma. */ LockBuffer(buffer, GIST_SHARE); page = (Page) BufferGetPage(buffer); if (GistPageIsLeaf(page)) { /* We hit the bottom, so we're done. */ UnlockReleaseBuffer(buffer); break; } /* * Pick the first downlink on the page, and follow it. It doesn't * matter which downlink we choose, the tree has the same depth * everywhere, so we just pick the first one. */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); UnlockReleaseBuffer(buffer); /* * We're going down on the tree. It means that there is yet one more * level in the tree. */ maxLevel++; } return maxLevel; }
static BlockNumber gistGetParent | ( | GISTBuildState * | buildstate, | |
BlockNumber | child | |||
) | [static] |
Definition at line 1188 of file gistbuild.c.
References elog, ERROR, hash_search(), ParentMapEntry::parentblkno, and GISTBuildState::parentMap.
Referenced by gistBufferingFindCorrectParent().
{ ParentMapEntry *entry; bool found; /* Find node buffer in hash table */ entry = (ParentMapEntry *) hash_search(buildstate->parentMap, (const void *) &child, HASH_FIND, &found); if (!found) elog(ERROR, "could not find parent of block %d in lookup table", child); return entry->parentblkno; }
static void gistInitBuffering | ( | GISTBuildState * | buildstate | ) | [static] |
Definition at line 268 of file gistbuild.c.
References tupleDesc::attrs, GISTBuildState::bufferingMode, calculatePagesPerBuffer(), DEBUG1, effective_cache_size, elog, GISTBuildState::freespace, GISTBuildState::gfbb, gistGetMaxLevel(), gistInitBuildBuffers(), gistInitParentMap(), i, GISTBuildState::indexrel, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, maintenance_work_mem, MAXALIGN, tupleDesc::natts, RelationData::rd_att, and SizeOfPageHeaderData.
Referenced by gistBuildCallback().
{ Relation index = buildstate->indexrel; int pagesPerBuffer; Size pageFreeSpace; Size itupAvgSize, itupMinSize; double avgIndexTuplesPerPage, maxIndexTuplesPerPage; int i; int levelStep; /* Calc space of index page which is available for index tuples */ pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData) - sizeof(ItemIdData) - buildstate->freespace; /* * Calculate average size of already inserted index tuples using gathered * statistics. */ itupAvgSize = (double) buildstate->indtuplesSize / (double) buildstate->indtuples; /* * Calculate minimal possible size of index tuple by index metadata. * Minimal possible size of varlena is VARHDRSZ. * * XXX: that's not actually true, as a short varlen can be just 2 bytes. * And we should take padding into account here. */ itupMinSize = (Size) MAXALIGN(sizeof(IndexTupleData)); for (i = 0; i < index->rd_att->natts; i++) { if (index->rd_att->attrs[i]->attlen < 0) itupMinSize += VARHDRSZ; else itupMinSize += index->rd_att->attrs[i]->attlen; } /* Calculate average and maximal number of index tuples which fit to page */ avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize; maxIndexTuplesPerPage = pageFreeSpace / itupMinSize; /* * We need to calculate two parameters for the buffering algorithm: * levelStep and pagesPerBuffer. * * levelStep determines the size of subtree that we operate on, while * emptying a buffer. A higher value is better, as you need fewer buffer * emptying steps to build the index. However, if you set it too high, the * subtree doesn't fit in cache anymore, and you quickly lose the benefit * of the buffers. * * In Arge et al's paper, levelStep is chosen as logB(M/4B), where B is * the number of tuples on page (ie. fanout), and M is the amount of * internal memory available. Curiously, they doesn't explain *why* that * setting is optimal. We calculate it by taking the highest levelStep so * that a subtree still fits in cache. For a small B, our way of * calculating levelStep is very close to Arge et al's formula. For a * large B, our formula gives a value that is 2x higher. * * The average size (in pages) of a subtree of depth n can be calculated * as a geometric series: * * B^0 + B^1 + B^2 + ... + B^n = (1 - B^(n + 1)) / (1 - B) * * where B is the average number of index tuples on page. The subtree is * cached in the shared buffer cache and the OS cache, so we choose * levelStep so that the subtree size is comfortably smaller than * effective_cache_size, with a safety factor of 4. * * The estimate on the average number of index tuples on page is based on * average tuple sizes observed before switching to buffered build, so the * real subtree size can be somewhat larger. Also, it would selfish to * gobble the whole cache for our index build. The safety factor of 4 * should account for those effects. * * The other limiting factor for setting levelStep is that while * processing a subtree, we need to hold one page for each buffer at the * next lower buffered level. The max. number of buffers needed for that * is maxIndexTuplesPerPage^levelStep. This is very conservative, but * hopefully maintenance_work_mem is set high enough that you're * constrained by effective_cache_size rather than maintenance_work_mem. * * XXX: the buffer hash table consumes a fair amount of memory too per * buffer, but that is not currently taken into account. That scales on * the total number of buffers used, ie. the index size and on levelStep. * Note that a higher levelStep *reduces* the amount of memory needed for * the hash table. */ levelStep = 1; for (;;) { double subtreesize; double maxlowestlevelpages; /* size of an average subtree at this levelStep (in pages). */ subtreesize = (1 - pow(avgIndexTuplesPerPage, (double) (levelStep + 1))) / (1 - avgIndexTuplesPerPage); /* max number of pages at the lowest level of a subtree */ maxlowestlevelpages = pow(maxIndexTuplesPerPage, (double) levelStep); /* subtree must fit in cache (with safety factor of 4) */ if (subtreesize > effective_cache_size / 4) break; /* each node in the lowest level of a subtree has one page in memory */ if (maxlowestlevelpages > ((double) maintenance_work_mem * 1024) / BLCKSZ) break; /* Good, we can handle this levelStep. See if we can go one higher. */ levelStep++; } /* * We just reached an unacceptable value of levelStep in previous loop. * So, decrease levelStep to get last acceptable value. */ levelStep--; /* * If there's not enough cache or maintenance_work_mem, fall back to plain * inserts. */ if (levelStep <= 0) { elog(DEBUG1, "failed to switch to buffered GiST build"); buildstate->bufferingMode = GIST_BUFFERING_DISABLED; return; } /* * The second parameter to set is pagesPerBuffer, which determines the * size of each buffer. We adjust pagesPerBuffer also during the build, * which is why this calculation is in a separate function. */ pagesPerBuffer = calculatePagesPerBuffer(buildstate, levelStep); /* Initialize GISTBuildBuffers with these parameters */ buildstate->gfbb = gistInitBuildBuffers(pagesPerBuffer, levelStep, gistGetMaxLevel(index)); gistInitParentMap(buildstate); buildstate->bufferingMode = GIST_BUFFERING_ACTIVE; elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d", levelStep, pagesPerBuffer); }
static void gistInitParentMap | ( | GISTBuildState * | buildstate | ) | [static] |
Definition at line 1135 of file gistbuild.c.
References CurrentMemoryContext, HASHCTL::entrysize, HASHCTL::hash, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_FUNCTION, HASHCTL::hcxt, HASHCTL::keysize, and GISTBuildState::parentMap.
Referenced by gistInitBuffering().
{ HASHCTL hashCtl; hashCtl.keysize = sizeof(BlockNumber); hashCtl.entrysize = sizeof(ParentMapEntry); hashCtl.hcxt = CurrentMemoryContext; hashCtl.hash = oid_hash; buildstate->parentMap = hash_create("gistbuild parent map", 1024, &hashCtl, HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION); }
static void gistMemorizeAllDownlinks | ( | GISTBuildState * | buildstate, | |
Buffer | parent | |||
) | [static] |
Definition at line 1167 of file gistbuild.c.
References Assert, BufferGetBlockNumber(), BufferGetPage, FirstOffsetNumber, gistMemorizeParent(), GistPageIsLeaf, ItemPointerGetBlockNumber, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, and IndexTupleData::t_tid.
Referenced by gistbufferinginserttuples().
{ OffsetNumber maxoff; OffsetNumber off; BlockNumber parentblkno = BufferGetBlockNumber(parentbuf); Page page = BufferGetPage(parentbuf); Assert(!GistPageIsLeaf(page)); maxoff = PageGetMaxOffsetNumber(page); for (off = FirstOffsetNumber; off <= maxoff; off++) { ItemId iid = PageGetItemId(page, off); IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); gistMemorizeParent(buildstate, childblkno, parentblkno); } }
static void gistMemorizeParent | ( | GISTBuildState * | buildstate, | |
BlockNumber | child, | |||
BlockNumber | parent | |||
) | [static] |
Definition at line 1151 of file gistbuild.c.
References hash_search(), ParentMapEntry::parentblkno, and GISTBuildState::parentMap.
Referenced by gistbufferinginserttuples(), gistMemorizeAllDownlinks(), and gistProcessItup().
{ ParentMapEntry *entry; bool found; entry = (ParentMapEntry *) hash_search(buildstate->parentMap, (const void *) &child, HASH_ENTER, &found); entry->parentblkno = parent; }
static void gistProcessEmptyingQueue | ( | GISTBuildState * | buildstate | ) | [static] |
Definition at line 918 of file gistbuild.c.
References GISTBuildBuffers::bufferEmptyingQueue, GISTBuildState::gfbb, gistPopItupFromNodeBuffer(), gistProcessItup(), GISTBuildState::giststate, gistUnloadNodeBuffers(), GISTNodeBuffer::level, linitial, list_delete_first(), MemoryContextReset(), NIL, GISTNodeBuffer::nodeBlocknum, GISTNodeBuffer::queuedForEmptying, and GISTSTATE::tempCxt.
Referenced by gistBufferingBuildInsert(), and gistEmptyAllBuffers().
{ GISTBuildBuffers *gfbb = buildstate->gfbb; /* Iterate while we have elements in buffers emptying stack. */ while (gfbb->bufferEmptyingQueue != NIL) { GISTNodeBuffer *emptyingNodeBuffer; /* Get node buffer from emptying stack. */ emptyingNodeBuffer = (GISTNodeBuffer *) linitial(gfbb->bufferEmptyingQueue); gfbb->bufferEmptyingQueue = list_delete_first(gfbb->bufferEmptyingQueue); emptyingNodeBuffer->queuedForEmptying = false; /* * We are going to load last pages of buffers where emptying will be * to. So let's unload any previously loaded buffers. */ gistUnloadNodeBuffers(gfbb); /* * Pop tuples from the buffer and run them down to the buffers at * lower level, or leaf pages. We continue until one of the lower * level buffers fills up, or this buffer runs empty. * * In Arge et al's paper, the buffer emptying is stopped after * processing 1/2 node buffer worth of tuples, to avoid overfilling * any of the lower level buffers. However, it's more efficient to * keep going until one of the lower level buffers actually fills up, * so that's what we do. This doesn't need to be exact, if a buffer * overfills by a few tuples, there's no harm done. */ while (true) { IndexTuple itup; /* Get next index tuple from the buffer */ if (!gistPopItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup)) break; /* * Run it down to the underlying node buffer or leaf page. * * Note: it's possible that the buffer we're emptying splits as a * result of this call. If that happens, our emptyingNodeBuffer * points to the left half of the split. After split, it's very * likely that the new left buffer is no longer over the half-full * threshold, but we might as well keep flushing tuples from it * until we fill a lower-level buffer. */ if (gistProcessItup(buildstate, itup, emptyingNodeBuffer->nodeBlocknum, emptyingNodeBuffer->level)) { /* * A lower level buffer filled up. Stop emptying this buffer, * to avoid overflowing the lower level buffer. */ break; } /* Free all the memory allocated during index tuple processing */ MemoryContextReset(buildstate->giststate->tempCxt); } } }
static bool gistProcessItup | ( | GISTBuildState * | buildstate, | |
IndexTuple | itup, | |||
BlockNumber | startblkno, | |||
int | startlevel | |||
) | [static] |
Definition at line 550 of file gistbuild.c.
References Assert, BUFFER_OVERFLOWED, BufferGetPage, CHECK_FOR_INTERRUPTS, GISTBuildState::gfbb, GIST_EXCLUSIVE, gistbufferinginserttuples(), gistchoose(), gistgetadjusted(), gistGetNodeBuffer(), gistMemorizeParent(), gistPushItupToNodeBuffer(), GISTBuildState::giststate, GISTBuildState::indexrel, InvalidBlockNumber, InvalidOffsetNumber, ItemPointerGetBlockNumber, LEVEL_HAS_BUFFERS, LockBuffer(), PageGetItem, PageGetItemId, ReadBuffer(), IndexTupleData::t_tid, and UnlockReleaseBuffer().
Referenced by gistBufferingBuildInsert(), and gistProcessEmptyingQueue().
{ GISTSTATE *giststate = buildstate->giststate; GISTBuildBuffers *gfbb = buildstate->gfbb; Relation indexrel = buildstate->indexrel; BlockNumber childblkno; Buffer buffer; bool result = false; BlockNumber blkno; int level; OffsetNumber downlinkoffnum = InvalidOffsetNumber; BlockNumber parentblkno = InvalidBlockNumber; CHECK_FOR_INTERRUPTS(); /* * Loop until we reach a leaf page (level == 0) or a level with buffers * (not including the level we start at, because we would otherwise make * no progress). */ blkno = startblkno; level = startlevel; for (;;) { ItemId iid; IndexTuple idxtuple, newtup; Page page; OffsetNumber childoffnum; /* Have we reached a level with buffers? */ if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel) break; /* Have we reached a leaf page? */ if (level == 0) break; /* * Nope. Descend down to the next level then. Choose a child to * descend down to. */ buffer = ReadBuffer(indexrel, blkno); LockBuffer(buffer, GIST_EXCLUSIVE); page = (Page) BufferGetPage(buffer); childoffnum = gistchoose(indexrel, page, itup, giststate); iid = PageGetItemId(page, childoffnum); idxtuple = (IndexTuple) PageGetItem(page, iid); childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); if (level > 1) gistMemorizeParent(buildstate, childblkno, blkno); /* * Check that the key representing the target child node is consistent * with the key we're inserting. Update it if it's not. */ newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate); if (newtup) { blkno = gistbufferinginserttuples(buildstate, buffer, level, &newtup, 1, childoffnum, InvalidBlockNumber, InvalidOffsetNumber); /* gistbufferinginserttuples() released the buffer */ } else UnlockReleaseBuffer(buffer); /* Descend to the child */ parentblkno = blkno; blkno = childblkno; downlinkoffnum = childoffnum; Assert(level > 0); level--; } if (LEVEL_HAS_BUFFERS(level, gfbb)) { /* * We've reached level with buffers. Place the index tuple to the * buffer, and add the buffer to the emptying queue if it overflows. */ GISTNodeBuffer *childNodeBuffer; /* Find the buffer or create a new one */ childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level); /* Add index tuple to it */ gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup); if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb)) result = true; } else { /* * We've reached a leaf page. Place the tuple here. */ Assert(level == 0); buffer = ReadBuffer(indexrel, blkno); LockBuffer(buffer, GIST_EXCLUSIVE); gistbufferinginserttuples(buildstate, buffer, level, &itup, 1, InvalidOffsetNumber, parentblkno, downlinkoffnum); /* gistbufferinginserttuples() released the buffer */ } return result; }
void gistValidateBufferingOption | ( | char * | value | ) |
Definition at line 245 of file gistbuild.c.
References ereport, errcode(), errdetail(), errmsg(), ERROR, and NULL.