#include "postgres.h"#include <math.h>#include "access/genam.h"#include "access/gist_private.h"#include "catalog/index.h"#include "miscadmin.h"#include "optimizer/cost.h"#include "storage/bufmgr.h"#include "storage/smgr.h"#include "utils/memutils.h"#include "utils/rel.h"
Go to the source code of this file.
| #define BUFFERING_MODE_SWITCH_CHECK_STEP 256 |
Definition at line 30 of file gistbuild.c.
Referenced by gistBuildCallback().
| #define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096 |
Definition at line 38 of file gistbuild.c.
Referenced by gistBuildCallback().
| enum GistBufferingMode |
Definition at line 40 of file gistbuild.c.
{
GIST_BUFFERING_DISABLED, /* in regular build mode and aren't going to
* switch */
GIST_BUFFERING_AUTO, /* in regular build mode, but will switch to
* buffering build mode if the index grows too
* big */
GIST_BUFFERING_STATS, /* gathering statistics of index tuple size
* before switching to the buffering build
* mode */
GIST_BUFFERING_ACTIVE /* in buffering build mode */
} GistBufferingMode;
| static int calculatePagesPerBuffer | ( | GISTBuildState * | buildstate, | |
| int | levelStep | |||
| ) | [static] |
Definition at line 429 of file gistbuild.c.
References GISTBuildState::freespace, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, rint(), and SizeOfPageHeaderData.
Referenced by gistBuildCallback(), and gistInitBuffering().
{
double pagesPerBuffer;
double avgIndexTuplesPerPage;
double itupAvgSize;
Size pageFreeSpace;
/* Calc space of index page which is available for index tuples */
pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
- sizeof(ItemIdData)
- buildstate->freespace;
/*
* Calculate average size of already inserted index tuples using gathered
* statistics.
*/
itupAvgSize = (double) buildstate->indtuplesSize /
(double) buildstate->indtuples;
avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
/*
* Recalculate required size of buffers.
*/
pagesPerBuffer = 2 * pow(avgIndexTuplesPerPage, levelStep);
return (int) rint(pagesPerBuffer);
}
| static void gistBufferingBuildInsert | ( | GISTBuildState * | buildstate, | |
| IndexTuple | itup | |||
| ) | [static] |
Definition at line 534 of file gistbuild.c.
References GISTBuildState::gfbb, gistProcessEmptyingQueue(), gistProcessItup(), and GISTBuildBuffers::rootlevel.
Referenced by gistBuildCallback().
{
/* Insert the tuple to buffers. */
gistProcessItup(buildstate, itup, 0, buildstate->gfbb->rootlevel);
/* If we filled up (half of a) buffer, process buffer emptying. */
gistProcessEmptyingQueue(buildstate);
}
| static Buffer gistBufferingFindCorrectParent | ( | GISTBuildState * | buildstate, | |
| BlockNumber | childblkno, | |||
| int | level, | |||
| BlockNumber * | parentblk, | |||
| OffsetNumber * | downlinkoffnum | |||
| ) | [static] |
Definition at line 844 of file gistbuild.c.
References BufferGetPage, elog, ERROR, FirstOffsetNumber, GIST_EXCLUSIVE, gistcheckpage(), gistGetParent(), GISTBuildState::indexrel, InvalidBlockNumber, InvalidOffsetNumber, ItemPointerGetBlockNumber, LockBuffer(), OffsetNumberNext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, ReadBuffer(), and IndexTupleData::t_tid.
Referenced by gistbufferinginserttuples().
{
BlockNumber parent;
Buffer buffer;
Page page;
OffsetNumber maxoff;
OffsetNumber off;
if (level > 0)
parent = gistGetParent(buildstate, childblkno);
else
{
/*
* For a leaf page, the caller must supply a correct parent block
* number.
*/
if (*parentblkno == InvalidBlockNumber)
elog(ERROR, "no parent buffer provided of child %d", childblkno);
parent = *parentblkno;
}
buffer = ReadBuffer(buildstate->indexrel, parent);
page = BufferGetPage(buffer);
LockBuffer(buffer, GIST_EXCLUSIVE);
gistcheckpage(buildstate->indexrel, buffer);
maxoff = PageGetMaxOffsetNumber(page);
/* Check if it was not moved */
if (parent == *parentblkno && *parentblkno != InvalidBlockNumber &&
*downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff)
{
ItemId iid = PageGetItemId(page, *downlinkoffnum);
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
{
/* Still there */
return buffer;
}
}
/*
* Downlink was not at the offset where it used to be. Scan the page to
* find it. During normal gist insertions, it might've moved to another
* page, to the right, but during a buffering build, we keep track of the
* parent of each page in the lookup table so we should always know what
* page it's on.
*/
for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off))
{
ItemId iid = PageGetItemId(page, off);
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
{
/* yes!!, found it */
*downlinkoffnum = off;
return buffer;
}
}
elog(ERROR, "failed to re-find parent for block %u", childblkno);
return InvalidBuffer; /* keep compiler quiet */
}
| static BlockNumber gistbufferinginserttuples | ( | GISTBuildState * | buildstate, | |
| Buffer | buffer, | |||
| int | level, | |||
| IndexTuple * | itup, | |||
| int | ntup, | |||
| OffsetNumber | oldoffnum, | |||
| BlockNumber | parentblk, | |||
| OffsetNumber | downlinkoffnum | |||
| ) | [static] |
Definition at line 676 of file gistbuild.c.
References Assert, GISTPageSplitInfo::buf, BufferGetBlockNumber(), BufferGetPage, DEBUG2, GISTPageSplitInfo::downlink, elog, FirstOffsetNumber, GISTBuildState::freespace, GISTBuildState::gfbb, GIST_ROOT_BLKNO, GIST_SHARE, gistBufferingFindCorrectParent(), gistMemorizeAllDownlinks(), gistMemorizeParent(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), GISTBuildState::giststate, i, GISTBuildState::indexrel, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, ItemPointerGetBlockNumber, lfirst, list_free_deep(), list_length(), LockBuffer(), PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, palloc(), ReadBuffer(), GISTBuildBuffers::rootlevel, IndexTupleData::t_tid, and UnlockReleaseBuffer().
Referenced by gistProcessItup().
{
GISTBuildBuffers *gfbb = buildstate->gfbb;
List *splitinfo;
bool is_split;
BlockNumber placed_to_blk = InvalidBlockNumber;
is_split = gistplacetopage(buildstate->indexrel,
buildstate->freespace,
buildstate->giststate,
buffer,
itup, ntup, oldoffnum, &placed_to_blk,
InvalidBuffer,
&splitinfo,
false);
/*
* If this is a root split, update the root path item kept in memory. This
* ensures that all path stacks are always complete, including all parent
* nodes up to the root. That simplifies the algorithm to re-find correct
* parent.
*/
if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO)
{
Page page = BufferGetPage(buffer);
OffsetNumber off;
OffsetNumber maxoff;
Assert(level == gfbb->rootlevel);
gfbb->rootlevel++;
elog(DEBUG2, "splitting GiST root page, now %d levels deep", gfbb->rootlevel);
/*
* All the downlinks on the old root page are now on one of the child
* pages. Visit all the new child pages to memorize the parents of the
* grandchildren.
*/
if (gfbb->rootlevel > 1)
{
maxoff = PageGetMaxOffsetNumber(page);
for (off = FirstOffsetNumber; off <= maxoff; off++)
{
ItemId iid = PageGetItemId(page, off);
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
Buffer childbuf = ReadBuffer(buildstate->indexrel, childblkno);
LockBuffer(childbuf, GIST_SHARE);
gistMemorizeAllDownlinks(buildstate, childbuf);
UnlockReleaseBuffer(childbuf);
/*
* Also remember that the parent of the new child page is the
* root block.
*/
gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO);
}
}
}
if (splitinfo)
{
/*
* Insert the downlinks to the parent. This is analogous with
* gistfinishsplit() in the regular insertion code, but the locking is
* simpler, and we have to maintain the buffers on internal nodes and
* the parent map.
*/
IndexTuple *downlinks;
int ndownlinks,
i;
Buffer parentBuffer;
ListCell *lc;
/* Parent may have changed since we memorized this path. */
parentBuffer =
gistBufferingFindCorrectParent(buildstate,
BufferGetBlockNumber(buffer),
level,
&parentblk,
&downlinkoffnum);
/*
* If there's a buffer associated with this page, that needs to be
* split too. gistRelocateBuildBuffersOnSplit() will also adjust the
* downlinks in 'splitinfo', to make sure they're consistent not only
* with the tuples already on the pages, but also the tuples in the
* buffers that will eventually be inserted to them.
*/
gistRelocateBuildBuffersOnSplit(gfbb,
buildstate->giststate,
buildstate->indexrel,
level,
buffer, splitinfo);
/* Create an array of all the downlink tuples */
ndownlinks = list_length(splitinfo);
downlinks = (IndexTuple *) palloc(sizeof(IndexTuple) * ndownlinks);
i = 0;
foreach(lc, splitinfo)
{
GISTPageSplitInfo *splitinfo = lfirst(lc);
/*
* Remember the parent of each new child page in our parent map.
* This assumes that the downlinks fit on the parent page. If the
* parent page is split, too, when we recurse up to insert the
* downlinks, the recursive gistbufferinginserttuples() call will
* update the map again.
*/
if (level > 0)
gistMemorizeParent(buildstate,
BufferGetBlockNumber(splitinfo->buf),
BufferGetBlockNumber(parentBuffer));
/*
* Also update the parent map for all the downlinks that got moved
* to a different page. (actually this also loops through the
* downlinks that stayed on the original page, but it does no
* harm).
*/
if (level > 1)
gistMemorizeAllDownlinks(buildstate, splitinfo->buf);
/*
* Since there's no concurrent access, we can release the lower
* level buffers immediately. This includes the original page.
*/
UnlockReleaseBuffer(splitinfo->buf);
downlinks[i++] = splitinfo->downlink;
}
/* Insert them into parent. */
gistbufferinginserttuples(buildstate, parentBuffer, level + 1,
downlinks, ndownlinks, downlinkoffnum,
InvalidBlockNumber, InvalidOffsetNumber);
list_free_deep(splitinfo); /* we don't need this anymore */
}
else
UnlockReleaseBuffer(buffer);
return placed_to_blk;
}
| Datum gistbuild | ( | PG_FUNCTION_ARGS | ) |
Definition at line 112 of file gistbuild.c.
References Assert, XLogRecData::buffer, BufferGetBlockNumber(), BufferGetPage, GISTBuildState::bufferingMode, GiSTOptions::bufferingModeOffset, createTempGistContext(), CurrentMemoryContext, XLogRecData::data, DEBUG1, elog, END_CRIT_SECTION, ERROR, F_LEAF, GiSTOptions::fillfactor, fillfactor, freeGISTstate(), GISTBuildState::freespace, GIST_BUFFERING_ACTIVE, GIST_BUFFERING_AUTO, GIST_BUFFERING_DISABLED, GIST_BUFFERING_STATS, GIST_ROOT_BLKNO, gistBuildCallback(), gistEmptyAllBuffers(), gistFreeBuildBuffers(), gistGetFakeLSN(), GISTInitBuffer(), gistNewBuffer(), GISTBuildState::giststate, IndexBuildResult::heap_tuples, IndexBuildResult::index_tuples, IndexBuildHeapScan(), GISTBuildState::indexrel, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, initGISTstate(), XLogRecData::len, MarkBufferDirty(), MemoryContextDelete(), MemoryContextSwitchTo(), XLogRecData::next, PageSetLSN, palloc(), PG_GETARG_POINTER, PG_RETURN_POINTER, RelationData::rd_node, RelationData::rd_options, RelationGetNumberOfBlocks, RelationGetRelationName, RelationNeedsWAL, START_CRIT_SECTION, GISTSTATE::tempCxt, UnlockReleaseBuffer(), XLOG_GIST_CREATE_INDEX, and XLogInsert().
{
Relation heap = (Relation) PG_GETARG_POINTER(0);
Relation index = (Relation) PG_GETARG_POINTER(1);
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
IndexBuildResult *result;
double reltuples;
GISTBuildState buildstate;
Buffer buffer;
Page page;
MemoryContext oldcxt = CurrentMemoryContext;
int fillfactor;
buildstate.indexrel = index;
if (index->rd_options)
{
/* Get buffering mode from the options string */
GiSTOptions *options = (GiSTOptions *) index->rd_options;
char *bufferingMode = (char *) options + options->bufferingModeOffset;
if (strcmp(bufferingMode, "on") == 0)
buildstate.bufferingMode = GIST_BUFFERING_STATS;
else if (strcmp(bufferingMode, "off") == 0)
buildstate.bufferingMode = GIST_BUFFERING_DISABLED;
else
buildstate.bufferingMode = GIST_BUFFERING_AUTO;
fillfactor = options->fillfactor;
}
else
{
/*
* By default, switch to buffering mode when the index grows too large
* to fit in cache.
*/
buildstate.bufferingMode = GIST_BUFFERING_AUTO;
fillfactor = GIST_DEFAULT_FILLFACTOR;
}
/* Calculate target amount of free space to leave on pages */
buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
/*
* We expect to be called exactly once for any index relation. If that's
* not the case, big trouble's what we have.
*/
if (RelationGetNumberOfBlocks(index) != 0)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
/* no locking is needed */
buildstate.giststate = initGISTstate(index);
/*
* Create a temporary memory context that is reset once for each tuple
* processed. (Note: we don't bother to make this a child of the
* giststate's scanCxt, so we have to delete it separately at the end.)
*/
buildstate.giststate->tempCxt = createTempGistContext();
/* initialize the root page */
buffer = gistNewBuffer(index);
Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
page = BufferGetPage(buffer);
START_CRIT_SECTION();
GISTInitBuffer(buffer, F_LEAF);
MarkBufferDirty(buffer);
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
XLogRecData rdata;
rdata.data = (char *) &(index->rd_node);
rdata.len = sizeof(RelFileNode);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
PageSetLSN(page, recptr);
}
else
PageSetLSN(page, gistGetFakeLSN(heap));
UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
/* build the index */
buildstate.indtuples = 0;
buildstate.indtuplesSize = 0;
/*
* Do the heap scan.
*/
reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
gistBuildCallback, (void *) &buildstate);
/*
* If buffering was used, flush out all the tuples that are still in the
* buffers.
*/
if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE)
{
elog(DEBUG1, "all tuples processed, emptying buffers");
gistEmptyAllBuffers(&buildstate);
gistFreeBuildBuffers(buildstate.gfbb);
}
/* okay, all heap tuples are indexed */
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(buildstate.giststate->tempCxt);
freeGISTstate(buildstate.giststate);
/*
* Return statistics
*/
result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
result->heap_tuples = reltuples;
result->index_tuples = (double) buildstate.indtuples;
PG_RETURN_POINTER(result);
}
| static void gistBuildCallback | ( | Relation | index, | |
| HeapTuple | htup, | |||
| Datum * | values, | |||
| bool * | isnull, | |||
| bool | tupleIsAlive, | |||
| void * | state | |||
| ) | [static] |
Definition at line 462 of file gistbuild.c.
References BUFFERING_MODE_SWITCH_CHECK_STEP, BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET, GISTBuildState::bufferingMode, calculatePagesPerBuffer(), effective_cache_size, GISTBuildState::freespace, GISTBuildState::gfbb, GIST_BUFFERING_ACTIVE, GIST_BUFFERING_AUTO, GIST_BUFFERING_STATS, gistBufferingBuildInsert(), gistdoinsert(), gistFormTuple(), gistInitBuffering(), GISTBuildState::giststate, IndexTupleSize, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, GISTBuildBuffers::levelStep, MAIN_FORKNUM, MemoryContextReset(), MemoryContextSwitchTo(), GISTBuildBuffers::pagesPerBuffer, RelationData::rd_smgr, smgrnblocks(), HeapTupleData::t_self, IndexTupleData::t_tid, and GISTSTATE::tempCxt.
Referenced by gistbuild().
{
GISTBuildState *buildstate = (GISTBuildState *) state;
IndexTuple itup;
MemoryContext oldCtx;
oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
/* form an index tuple and point it at the heap tuple */
itup = gistFormTuple(buildstate->giststate, index, values, isnull, true);
itup->t_tid = htup->t_self;
if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
{
/* We have buffers, so use them. */
gistBufferingBuildInsert(buildstate, itup);
}
else
{
/*
* There's no buffers (yet). Since we already have the index relation
* locked, we call gistdoinsert directly.
*/
gistdoinsert(index, itup, buildstate->freespace,
buildstate->giststate);
}
/* Update tuple count and total size. */
buildstate->indtuples += 1;
buildstate->indtuplesSize += IndexTupleSize(itup);
MemoryContextSwitchTo(oldCtx);
MemoryContextReset(buildstate->giststate->tempCxt);
if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE &&
buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)
{
/* Adjust the target buffer size now */
buildstate->gfbb->pagesPerBuffer =
calculatePagesPerBuffer(buildstate, buildstate->gfbb->levelStep);
}
/*
* In 'auto' mode, check if the index has grown too large to fit in cache,
* and switch to buffering mode if it has.
*
* To avoid excessive calls to smgrnblocks(), only check this every
* BUFFERING_MODE_SWITCH_CHECK_STEP index tuples
*/
if ((buildstate->bufferingMode == GIST_BUFFERING_AUTO &&
buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM)) ||
(buildstate->bufferingMode == GIST_BUFFERING_STATS &&
buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
{
/*
* Index doesn't fit in effective cache anymore. Try to switch to
* buffering build mode.
*/
gistInitBuffering(buildstate);
}
}
| static void gistEmptyAllBuffers | ( | GISTBuildState * | buildstate | ) | [static] |
Definition at line 991 of file gistbuild.c.
References GISTNodeBuffer::blocksCount, GISTBuildBuffers::bufferEmptyingQueue, GISTBuildBuffers::buffersOnLevels, GISTBuildBuffers::buffersOnLevelsLen, GISTBuildBuffers::context, DEBUG2, elog, GISTBuildState::gfbb, gistProcessEmptyingQueue(), GISTBuildState::giststate, i, lcons(), linitial, list_delete_first(), MemoryContextSwitchTo(), NIL, GISTNodeBuffer::queuedForEmptying, and GISTSTATE::tempCxt.
Referenced by gistbuild().
{
GISTBuildBuffers *gfbb = buildstate->gfbb;
MemoryContext oldCtx;
int i;
oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
/*
* Iterate through the levels from top to bottom.
*/
for (i = gfbb->buffersOnLevelsLen - 1; i >= 0; i--)
{
/*
* Empty all buffers on this level. Note that new buffers can pop up
* in the list during the processing, as a result of page splits, so a
* simple walk through the list won't work. We remove buffers from the
* list when we see them empty; a buffer can't become non-empty once
* it's been fully emptied.
*/
while (gfbb->buffersOnLevels[i] != NIL)
{
GISTNodeBuffer *nodeBuffer;
nodeBuffer = (GISTNodeBuffer *) linitial(gfbb->buffersOnLevels[i]);
if (nodeBuffer->blocksCount != 0)
{
/*
* Add this buffer to the emptying queue, and proceed to empty
* the queue.
*/
if (!nodeBuffer->queuedForEmptying)
{
MemoryContextSwitchTo(gfbb->context);
nodeBuffer->queuedForEmptying = true;
gfbb->bufferEmptyingQueue =
lcons(nodeBuffer, gfbb->bufferEmptyingQueue);
MemoryContextSwitchTo(buildstate->giststate->tempCxt);
}
gistProcessEmptyingQueue(buildstate);
}
else
gfbb->buffersOnLevels[i] =
list_delete_first(gfbb->buffersOnLevels[i]);
}
elog(DEBUG2, "emptied all buffers at level %d", i);
}
MemoryContextSwitchTo(oldCtx);
}
| static int gistGetMaxLevel | ( | Relation | index | ) | [static] |
Definition at line 1046 of file gistbuild.c.
References BufferGetPage, FirstOffsetNumber, GIST_SHARE, GistPageIsLeaf, ItemPointerGetBlockNumber, LockBuffer(), PageGetItem, PageGetItemId, ReadBuffer(), IndexTupleData::t_tid, and UnlockReleaseBuffer().
Referenced by gistInitBuffering().
{
int maxLevel;
BlockNumber blkno;
/*
* Traverse down the tree, starting from the root, until we hit the leaf
* level.
*/
maxLevel = 0;
blkno = GIST_ROOT_BLKNO;
while (true)
{
Buffer buffer;
Page page;
IndexTuple itup;
buffer = ReadBuffer(index, blkno);
/*
* There's no concurrent access during index build, so locking is just
* pro forma.
*/
LockBuffer(buffer, GIST_SHARE);
page = (Page) BufferGetPage(buffer);
if (GistPageIsLeaf(page))
{
/* We hit the bottom, so we're done. */
UnlockReleaseBuffer(buffer);
break;
}
/*
* Pick the first downlink on the page, and follow it. It doesn't
* matter which downlink we choose, the tree has the same depth
* everywhere, so we just pick the first one.
*/
itup = (IndexTuple) PageGetItem(page,
PageGetItemId(page, FirstOffsetNumber));
blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
UnlockReleaseBuffer(buffer);
/*
* We're going down on the tree. It means that there is yet one more
* level in the tree.
*/
maxLevel++;
}
return maxLevel;
}
| static BlockNumber gistGetParent | ( | GISTBuildState * | buildstate, | |
| BlockNumber | child | |||
| ) | [static] |
Definition at line 1188 of file gistbuild.c.
References elog, ERROR, hash_search(), ParentMapEntry::parentblkno, and GISTBuildState::parentMap.
Referenced by gistBufferingFindCorrectParent().
{
ParentMapEntry *entry;
bool found;
/* Find node buffer in hash table */
entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
(const void *) &child,
HASH_FIND,
&found);
if (!found)
elog(ERROR, "could not find parent of block %d in lookup table", child);
return entry->parentblkno;
}
| static void gistInitBuffering | ( | GISTBuildState * | buildstate | ) | [static] |
Definition at line 268 of file gistbuild.c.
References tupleDesc::attrs, GISTBuildState::bufferingMode, calculatePagesPerBuffer(), DEBUG1, effective_cache_size, elog, GISTBuildState::freespace, GISTBuildState::gfbb, gistGetMaxLevel(), gistInitBuildBuffers(), gistInitParentMap(), i, GISTBuildState::indexrel, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, maintenance_work_mem, MAXALIGN, tupleDesc::natts, RelationData::rd_att, and SizeOfPageHeaderData.
Referenced by gistBuildCallback().
{
Relation index = buildstate->indexrel;
int pagesPerBuffer;
Size pageFreeSpace;
Size itupAvgSize,
itupMinSize;
double avgIndexTuplesPerPage,
maxIndexTuplesPerPage;
int i;
int levelStep;
/* Calc space of index page which is available for index tuples */
pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
- sizeof(ItemIdData)
- buildstate->freespace;
/*
* Calculate average size of already inserted index tuples using gathered
* statistics.
*/
itupAvgSize = (double) buildstate->indtuplesSize /
(double) buildstate->indtuples;
/*
* Calculate minimal possible size of index tuple by index metadata.
* Minimal possible size of varlena is VARHDRSZ.
*
* XXX: that's not actually true, as a short varlen can be just 2 bytes.
* And we should take padding into account here.
*/
itupMinSize = (Size) MAXALIGN(sizeof(IndexTupleData));
for (i = 0; i < index->rd_att->natts; i++)
{
if (index->rd_att->attrs[i]->attlen < 0)
itupMinSize += VARHDRSZ;
else
itupMinSize += index->rd_att->attrs[i]->attlen;
}
/* Calculate average and maximal number of index tuples which fit to page */
avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
maxIndexTuplesPerPage = pageFreeSpace / itupMinSize;
/*
* We need to calculate two parameters for the buffering algorithm:
* levelStep and pagesPerBuffer.
*
* levelStep determines the size of subtree that we operate on, while
* emptying a buffer. A higher value is better, as you need fewer buffer
* emptying steps to build the index. However, if you set it too high, the
* subtree doesn't fit in cache anymore, and you quickly lose the benefit
* of the buffers.
*
* In Arge et al's paper, levelStep is chosen as logB(M/4B), where B is
* the number of tuples on page (ie. fanout), and M is the amount of
* internal memory available. Curiously, they doesn't explain *why* that
* setting is optimal. We calculate it by taking the highest levelStep so
* that a subtree still fits in cache. For a small B, our way of
* calculating levelStep is very close to Arge et al's formula. For a
* large B, our formula gives a value that is 2x higher.
*
* The average size (in pages) of a subtree of depth n can be calculated
* as a geometric series:
*
* B^0 + B^1 + B^2 + ... + B^n = (1 - B^(n + 1)) / (1 - B)
*
* where B is the average number of index tuples on page. The subtree is
* cached in the shared buffer cache and the OS cache, so we choose
* levelStep so that the subtree size is comfortably smaller than
* effective_cache_size, with a safety factor of 4.
*
* The estimate on the average number of index tuples on page is based on
* average tuple sizes observed before switching to buffered build, so the
* real subtree size can be somewhat larger. Also, it would selfish to
* gobble the whole cache for our index build. The safety factor of 4
* should account for those effects.
*
* The other limiting factor for setting levelStep is that while
* processing a subtree, we need to hold one page for each buffer at the
* next lower buffered level. The max. number of buffers needed for that
* is maxIndexTuplesPerPage^levelStep. This is very conservative, but
* hopefully maintenance_work_mem is set high enough that you're
* constrained by effective_cache_size rather than maintenance_work_mem.
*
* XXX: the buffer hash table consumes a fair amount of memory too per
* buffer, but that is not currently taken into account. That scales on
* the total number of buffers used, ie. the index size and on levelStep.
* Note that a higher levelStep *reduces* the amount of memory needed for
* the hash table.
*/
levelStep = 1;
for (;;)
{
double subtreesize;
double maxlowestlevelpages;
/* size of an average subtree at this levelStep (in pages). */
subtreesize =
(1 - pow(avgIndexTuplesPerPage, (double) (levelStep + 1))) /
(1 - avgIndexTuplesPerPage);
/* max number of pages at the lowest level of a subtree */
maxlowestlevelpages = pow(maxIndexTuplesPerPage, (double) levelStep);
/* subtree must fit in cache (with safety factor of 4) */
if (subtreesize > effective_cache_size / 4)
break;
/* each node in the lowest level of a subtree has one page in memory */
if (maxlowestlevelpages > ((double) maintenance_work_mem * 1024) / BLCKSZ)
break;
/* Good, we can handle this levelStep. See if we can go one higher. */
levelStep++;
}
/*
* We just reached an unacceptable value of levelStep in previous loop.
* So, decrease levelStep to get last acceptable value.
*/
levelStep--;
/*
* If there's not enough cache or maintenance_work_mem, fall back to plain
* inserts.
*/
if (levelStep <= 0)
{
elog(DEBUG1, "failed to switch to buffered GiST build");
buildstate->bufferingMode = GIST_BUFFERING_DISABLED;
return;
}
/*
* The second parameter to set is pagesPerBuffer, which determines the
* size of each buffer. We adjust pagesPerBuffer also during the build,
* which is why this calculation is in a separate function.
*/
pagesPerBuffer = calculatePagesPerBuffer(buildstate, levelStep);
/* Initialize GISTBuildBuffers with these parameters */
buildstate->gfbb = gistInitBuildBuffers(pagesPerBuffer, levelStep,
gistGetMaxLevel(index));
gistInitParentMap(buildstate);
buildstate->bufferingMode = GIST_BUFFERING_ACTIVE;
elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",
levelStep, pagesPerBuffer);
}
| static void gistInitParentMap | ( | GISTBuildState * | buildstate | ) | [static] |
Definition at line 1135 of file gistbuild.c.
References CurrentMemoryContext, HASHCTL::entrysize, HASHCTL::hash, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_FUNCTION, HASHCTL::hcxt, HASHCTL::keysize, and GISTBuildState::parentMap.
Referenced by gistInitBuffering().
{
HASHCTL hashCtl;
hashCtl.keysize = sizeof(BlockNumber);
hashCtl.entrysize = sizeof(ParentMapEntry);
hashCtl.hcxt = CurrentMemoryContext;
hashCtl.hash = oid_hash;
buildstate->parentMap = hash_create("gistbuild parent map",
1024,
&hashCtl,
HASH_ELEM | HASH_CONTEXT
| HASH_FUNCTION);
}
| static void gistMemorizeAllDownlinks | ( | GISTBuildState * | buildstate, | |
| Buffer | parent | |||
| ) | [static] |
Definition at line 1167 of file gistbuild.c.
References Assert, BufferGetBlockNumber(), BufferGetPage, FirstOffsetNumber, gistMemorizeParent(), GistPageIsLeaf, ItemPointerGetBlockNumber, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, and IndexTupleData::t_tid.
Referenced by gistbufferinginserttuples().
{
OffsetNumber maxoff;
OffsetNumber off;
BlockNumber parentblkno = BufferGetBlockNumber(parentbuf);
Page page = BufferGetPage(parentbuf);
Assert(!GistPageIsLeaf(page));
maxoff = PageGetMaxOffsetNumber(page);
for (off = FirstOffsetNumber; off <= maxoff; off++)
{
ItemId iid = PageGetItemId(page, off);
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
gistMemorizeParent(buildstate, childblkno, parentblkno);
}
}
| static void gistMemorizeParent | ( | GISTBuildState * | buildstate, | |
| BlockNumber | child, | |||
| BlockNumber | parent | |||
| ) | [static] |
Definition at line 1151 of file gistbuild.c.
References hash_search(), ParentMapEntry::parentblkno, and GISTBuildState::parentMap.
Referenced by gistbufferinginserttuples(), gistMemorizeAllDownlinks(), and gistProcessItup().
{
ParentMapEntry *entry;
bool found;
entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
(const void *) &child,
HASH_ENTER,
&found);
entry->parentblkno = parent;
}
| static void gistProcessEmptyingQueue | ( | GISTBuildState * | buildstate | ) | [static] |
Definition at line 918 of file gistbuild.c.
References GISTBuildBuffers::bufferEmptyingQueue, GISTBuildState::gfbb, gistPopItupFromNodeBuffer(), gistProcessItup(), GISTBuildState::giststate, gistUnloadNodeBuffers(), GISTNodeBuffer::level, linitial, list_delete_first(), MemoryContextReset(), NIL, GISTNodeBuffer::nodeBlocknum, GISTNodeBuffer::queuedForEmptying, and GISTSTATE::tempCxt.
Referenced by gistBufferingBuildInsert(), and gistEmptyAllBuffers().
{
GISTBuildBuffers *gfbb = buildstate->gfbb;
/* Iterate while we have elements in buffers emptying stack. */
while (gfbb->bufferEmptyingQueue != NIL)
{
GISTNodeBuffer *emptyingNodeBuffer;
/* Get node buffer from emptying stack. */
emptyingNodeBuffer = (GISTNodeBuffer *) linitial(gfbb->bufferEmptyingQueue);
gfbb->bufferEmptyingQueue = list_delete_first(gfbb->bufferEmptyingQueue);
emptyingNodeBuffer->queuedForEmptying = false;
/*
* We are going to load last pages of buffers where emptying will be
* to. So let's unload any previously loaded buffers.
*/
gistUnloadNodeBuffers(gfbb);
/*
* Pop tuples from the buffer and run them down to the buffers at
* lower level, or leaf pages. We continue until one of the lower
* level buffers fills up, or this buffer runs empty.
*
* In Arge et al's paper, the buffer emptying is stopped after
* processing 1/2 node buffer worth of tuples, to avoid overfilling
* any of the lower level buffers. However, it's more efficient to
* keep going until one of the lower level buffers actually fills up,
* so that's what we do. This doesn't need to be exact, if a buffer
* overfills by a few tuples, there's no harm done.
*/
while (true)
{
IndexTuple itup;
/* Get next index tuple from the buffer */
if (!gistPopItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup))
break;
/*
* Run it down to the underlying node buffer or leaf page.
*
* Note: it's possible that the buffer we're emptying splits as a
* result of this call. If that happens, our emptyingNodeBuffer
* points to the left half of the split. After split, it's very
* likely that the new left buffer is no longer over the half-full
* threshold, but we might as well keep flushing tuples from it
* until we fill a lower-level buffer.
*/
if (gistProcessItup(buildstate, itup, emptyingNodeBuffer->nodeBlocknum, emptyingNodeBuffer->level))
{
/*
* A lower level buffer filled up. Stop emptying this buffer,
* to avoid overflowing the lower level buffer.
*/
break;
}
/* Free all the memory allocated during index tuple processing */
MemoryContextReset(buildstate->giststate->tempCxt);
}
}
}
| static bool gistProcessItup | ( | GISTBuildState * | buildstate, | |
| IndexTuple | itup, | |||
| BlockNumber | startblkno, | |||
| int | startlevel | |||
| ) | [static] |
Definition at line 550 of file gistbuild.c.
References Assert, BUFFER_OVERFLOWED, BufferGetPage, CHECK_FOR_INTERRUPTS, GISTBuildState::gfbb, GIST_EXCLUSIVE, gistbufferinginserttuples(), gistchoose(), gistgetadjusted(), gistGetNodeBuffer(), gistMemorizeParent(), gistPushItupToNodeBuffer(), GISTBuildState::giststate, GISTBuildState::indexrel, InvalidBlockNumber, InvalidOffsetNumber, ItemPointerGetBlockNumber, LEVEL_HAS_BUFFERS, LockBuffer(), PageGetItem, PageGetItemId, ReadBuffer(), IndexTupleData::t_tid, and UnlockReleaseBuffer().
Referenced by gistBufferingBuildInsert(), and gistProcessEmptyingQueue().
{
GISTSTATE *giststate = buildstate->giststate;
GISTBuildBuffers *gfbb = buildstate->gfbb;
Relation indexrel = buildstate->indexrel;
BlockNumber childblkno;
Buffer buffer;
bool result = false;
BlockNumber blkno;
int level;
OffsetNumber downlinkoffnum = InvalidOffsetNumber;
BlockNumber parentblkno = InvalidBlockNumber;
CHECK_FOR_INTERRUPTS();
/*
* Loop until we reach a leaf page (level == 0) or a level with buffers
* (not including the level we start at, because we would otherwise make
* no progress).
*/
blkno = startblkno;
level = startlevel;
for (;;)
{
ItemId iid;
IndexTuple idxtuple,
newtup;
Page page;
OffsetNumber childoffnum;
/* Have we reached a level with buffers? */
if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel)
break;
/* Have we reached a leaf page? */
if (level == 0)
break;
/*
* Nope. Descend down to the next level then. Choose a child to
* descend down to.
*/
buffer = ReadBuffer(indexrel, blkno);
LockBuffer(buffer, GIST_EXCLUSIVE);
page = (Page) BufferGetPage(buffer);
childoffnum = gistchoose(indexrel, page, itup, giststate);
iid = PageGetItemId(page, childoffnum);
idxtuple = (IndexTuple) PageGetItem(page, iid);
childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
if (level > 1)
gistMemorizeParent(buildstate, childblkno, blkno);
/*
* Check that the key representing the target child node is consistent
* with the key we're inserting. Update it if it's not.
*/
newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate);
if (newtup)
{
blkno = gistbufferinginserttuples(buildstate, buffer, level,
&newtup, 1, childoffnum,
InvalidBlockNumber, InvalidOffsetNumber);
/* gistbufferinginserttuples() released the buffer */
}
else
UnlockReleaseBuffer(buffer);
/* Descend to the child */
parentblkno = blkno;
blkno = childblkno;
downlinkoffnum = childoffnum;
Assert(level > 0);
level--;
}
if (LEVEL_HAS_BUFFERS(level, gfbb))
{
/*
* We've reached level with buffers. Place the index tuple to the
* buffer, and add the buffer to the emptying queue if it overflows.
*/
GISTNodeBuffer *childNodeBuffer;
/* Find the buffer or create a new one */
childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level);
/* Add index tuple to it */
gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup);
if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb))
result = true;
}
else
{
/*
* We've reached a leaf page. Place the tuple here.
*/
Assert(level == 0);
buffer = ReadBuffer(indexrel, blkno);
LockBuffer(buffer, GIST_EXCLUSIVE);
gistbufferinginserttuples(buildstate, buffer, level,
&itup, 1, InvalidOffsetNumber,
parentblkno, downlinkoffnum);
/* gistbufferinginserttuples() released the buffer */
}
return result;
}
| void gistValidateBufferingOption | ( | char * | value | ) |
Definition at line 245 of file gistbuild.c.
References ereport, errcode(), errdetail(), errmsg(), ERROR, and NULL.
1.7.1