Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Defines | Enumerations | Functions

gistbuild.c File Reference

#include "postgres.h"
#include <math.h>
#include "access/genam.h"
#include "access/gist_private.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "optimizer/cost.h"
#include "storage/bufmgr.h"
#include "storage/smgr.h"
#include "utils/memutils.h"
#include "utils/rel.h"
Include dependency graph for gistbuild.c:

Go to the source code of this file.

Data Structures

struct  GISTBuildState
struct  ParentMapEntry

Defines

#define BUFFERING_MODE_SWITCH_CHECK_STEP   256
#define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET   4096

Enumerations

enum  GistBufferingMode { GIST_BUFFERING_DISABLED, GIST_BUFFERING_AUTO, GIST_BUFFERING_STATS, GIST_BUFFERING_ACTIVE }

Functions

static void gistInitBuffering (GISTBuildState *buildstate)
static int calculatePagesPerBuffer (GISTBuildState *buildstate, int levelStep)
static void gistBuildCallback (Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
static void gistBufferingBuildInsert (GISTBuildState *buildstate, IndexTuple itup)
static bool gistProcessItup (GISTBuildState *buildstate, IndexTuple itup, BlockNumber startblkno, int startlevel)
static BlockNumber gistbufferinginserttuples (GISTBuildState *buildstate, Buffer buffer, int level, IndexTuple *itup, int ntup, OffsetNumber oldoffnum, BlockNumber parentblk, OffsetNumber downlinkoffnum)
static Buffer gistBufferingFindCorrectParent (GISTBuildState *buildstate, BlockNumber childblkno, int level, BlockNumber *parentblk, OffsetNumber *downlinkoffnum)
static void gistProcessEmptyingQueue (GISTBuildState *buildstate)
static void gistEmptyAllBuffers (GISTBuildState *buildstate)
static int gistGetMaxLevel (Relation index)
static void gistInitParentMap (GISTBuildState *buildstate)
static void gistMemorizeParent (GISTBuildState *buildstate, BlockNumber child, BlockNumber parent)
static void gistMemorizeAllDownlinks (GISTBuildState *buildstate, Buffer parent)
static BlockNumber gistGetParent (GISTBuildState *buildstate, BlockNumber child)
Datum gistbuild (PG_FUNCTION_ARGS)
void gistValidateBufferingOption (char *value)

Define Documentation

#define BUFFERING_MODE_SWITCH_CHECK_STEP   256

Definition at line 30 of file gistbuild.c.

Referenced by gistBuildCallback().

#define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET   4096

Definition at line 38 of file gistbuild.c.

Referenced by gistBuildCallback().


Enumeration Type Documentation

Enumerator:
GIST_BUFFERING_DISABLED 
GIST_BUFFERING_AUTO 
GIST_BUFFERING_STATS 
GIST_BUFFERING_ACTIVE 

Definition at line 40 of file gistbuild.c.

{
    GIST_BUFFERING_DISABLED,    /* in regular build mode and aren't going to
                                 * switch */
    GIST_BUFFERING_AUTO,        /* in regular build mode, but will switch to
                                 * buffering build mode if the index grows too
                                 * big */
    GIST_BUFFERING_STATS,       /* gathering statistics of index tuple size
                                 * before switching to the buffering build
                                 * mode */
    GIST_BUFFERING_ACTIVE       /* in buffering build mode */
} GistBufferingMode;


Function Documentation

static int calculatePagesPerBuffer ( GISTBuildState buildstate,
int  levelStep 
) [static]

Definition at line 429 of file gistbuild.c.

References GISTBuildState::freespace, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, rint(), and SizeOfPageHeaderData.

Referenced by gistBuildCallback(), and gistInitBuffering().

{
    double      pagesPerBuffer;
    double      avgIndexTuplesPerPage;
    double      itupAvgSize;
    Size        pageFreeSpace;

    /* Calc space of index page which is available for index tuples */
    pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
        - sizeof(ItemIdData)
        - buildstate->freespace;

    /*
     * Calculate average size of already inserted index tuples using gathered
     * statistics.
     */
    itupAvgSize = (double) buildstate->indtuplesSize /
        (double) buildstate->indtuples;

    avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;

    /*
     * Recalculate required size of buffers.
     */
    pagesPerBuffer = 2 * pow(avgIndexTuplesPerPage, levelStep);

    return (int) rint(pagesPerBuffer);
}

static void gistBufferingBuildInsert ( GISTBuildState buildstate,
IndexTuple  itup 
) [static]

Definition at line 534 of file gistbuild.c.

References GISTBuildState::gfbb, gistProcessEmptyingQueue(), gistProcessItup(), and GISTBuildBuffers::rootlevel.

Referenced by gistBuildCallback().

{
    /* Insert the tuple to buffers. */
    gistProcessItup(buildstate, itup, 0, buildstate->gfbb->rootlevel);

    /* If we filled up (half of a) buffer, process buffer emptying. */
    gistProcessEmptyingQueue(buildstate);
}

static Buffer gistBufferingFindCorrectParent ( GISTBuildState buildstate,
BlockNumber  childblkno,
int  level,
BlockNumber parentblk,
OffsetNumber downlinkoffnum 
) [static]

Definition at line 844 of file gistbuild.c.

References BufferGetPage, elog, ERROR, FirstOffsetNumber, GIST_EXCLUSIVE, gistcheckpage(), gistGetParent(), GISTBuildState::indexrel, InvalidBlockNumber, InvalidOffsetNumber, ItemPointerGetBlockNumber, LockBuffer(), OffsetNumberNext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, ReadBuffer(), and IndexTupleData::t_tid.

Referenced by gistbufferinginserttuples().

{
    BlockNumber parent;
    Buffer      buffer;
    Page        page;
    OffsetNumber maxoff;
    OffsetNumber off;

    if (level > 0)
        parent = gistGetParent(buildstate, childblkno);
    else
    {
        /*
         * For a leaf page, the caller must supply a correct parent block
         * number.
         */
        if (*parentblkno == InvalidBlockNumber)
            elog(ERROR, "no parent buffer provided of child %d", childblkno);
        parent = *parentblkno;
    }

    buffer = ReadBuffer(buildstate->indexrel, parent);
    page = BufferGetPage(buffer);
    LockBuffer(buffer, GIST_EXCLUSIVE);
    gistcheckpage(buildstate->indexrel, buffer);
    maxoff = PageGetMaxOffsetNumber(page);

    /* Check if it was not moved */
    if (parent == *parentblkno && *parentblkno != InvalidBlockNumber &&
        *downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff)
    {
        ItemId      iid = PageGetItemId(page, *downlinkoffnum);
        IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);

        if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
        {
            /* Still there */
            return buffer;
        }
    }

    /*
     * Downlink was not at the offset where it used to be. Scan the page to
     * find it. During normal gist insertions, it might've moved to another
     * page, to the right, but during a buffering build, we keep track of the
     * parent of each page in the lookup table so we should always know what
     * page it's on.
     */
    for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off))
    {
        ItemId      iid = PageGetItemId(page, off);
        IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);

        if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
        {
            /* yes!!, found it */
            *downlinkoffnum = off;
            return buffer;
        }
    }

    elog(ERROR, "failed to re-find parent for block %u", childblkno);
    return InvalidBuffer;       /* keep compiler quiet */
}

static BlockNumber gistbufferinginserttuples ( GISTBuildState buildstate,
Buffer  buffer,
int  level,
IndexTuple itup,
int  ntup,
OffsetNumber  oldoffnum,
BlockNumber  parentblk,
OffsetNumber  downlinkoffnum 
) [static]

Definition at line 676 of file gistbuild.c.

References Assert, GISTPageSplitInfo::buf, BufferGetBlockNumber(), BufferGetPage, DEBUG2, GISTPageSplitInfo::downlink, elog, FirstOffsetNumber, GISTBuildState::freespace, GISTBuildState::gfbb, GIST_ROOT_BLKNO, GIST_SHARE, gistBufferingFindCorrectParent(), gistMemorizeAllDownlinks(), gistMemorizeParent(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), GISTBuildState::giststate, i, GISTBuildState::indexrel, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, ItemPointerGetBlockNumber, lfirst, list_free_deep(), list_length(), LockBuffer(), PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, palloc(), ReadBuffer(), GISTBuildBuffers::rootlevel, IndexTupleData::t_tid, and UnlockReleaseBuffer().

Referenced by gistProcessItup().

{
    GISTBuildBuffers *gfbb = buildstate->gfbb;
    List       *splitinfo;
    bool        is_split;
    BlockNumber placed_to_blk = InvalidBlockNumber;

    is_split = gistplacetopage(buildstate->indexrel,
                               buildstate->freespace,
                               buildstate->giststate,
                               buffer,
                               itup, ntup, oldoffnum, &placed_to_blk,
                               InvalidBuffer,
                               &splitinfo,
                               false);

    /*
     * If this is a root split, update the root path item kept in memory. This
     * ensures that all path stacks are always complete, including all parent
     * nodes up to the root. That simplifies the algorithm to re-find correct
     * parent.
     */
    if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO)
    {
        Page        page = BufferGetPage(buffer);
        OffsetNumber off;
        OffsetNumber maxoff;

        Assert(level == gfbb->rootlevel);
        gfbb->rootlevel++;

        elog(DEBUG2, "splitting GiST root page, now %d levels deep", gfbb->rootlevel);

        /*
         * All the downlinks on the old root page are now on one of the child
         * pages. Visit all the new child pages to memorize the parents of the
         * grandchildren.
         */
        if (gfbb->rootlevel > 1)
        {
            maxoff = PageGetMaxOffsetNumber(page);
            for (off = FirstOffsetNumber; off <= maxoff; off++)
            {
                ItemId      iid = PageGetItemId(page, off);
                IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
                BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
                Buffer      childbuf = ReadBuffer(buildstate->indexrel, childblkno);

                LockBuffer(childbuf, GIST_SHARE);
                gistMemorizeAllDownlinks(buildstate, childbuf);
                UnlockReleaseBuffer(childbuf);

                /*
                 * Also remember that the parent of the new child page is the
                 * root block.
                 */
                gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO);
            }
        }
    }

    if (splitinfo)
    {
        /*
         * Insert the downlinks to the parent. This is analogous with
         * gistfinishsplit() in the regular insertion code, but the locking is
         * simpler, and we have to maintain the buffers on internal nodes and
         * the parent map.
         */
        IndexTuple *downlinks;
        int         ndownlinks,
                    i;
        Buffer      parentBuffer;
        ListCell   *lc;

        /* Parent may have changed since we memorized this path. */
        parentBuffer =
            gistBufferingFindCorrectParent(buildstate,
                                           BufferGetBlockNumber(buffer),
                                           level,
                                           &parentblk,
                                           &downlinkoffnum);

        /*
         * If there's a buffer associated with this page, that needs to be
         * split too. gistRelocateBuildBuffersOnSplit() will also adjust the
         * downlinks in 'splitinfo', to make sure they're consistent not only
         * with the tuples already on the pages, but also the tuples in the
         * buffers that will eventually be inserted to them.
         */
        gistRelocateBuildBuffersOnSplit(gfbb,
                                        buildstate->giststate,
                                        buildstate->indexrel,
                                        level,
                                        buffer, splitinfo);

        /* Create an array of all the downlink tuples */
        ndownlinks = list_length(splitinfo);
        downlinks = (IndexTuple *) palloc(sizeof(IndexTuple) * ndownlinks);
        i = 0;
        foreach(lc, splitinfo)
        {
            GISTPageSplitInfo *splitinfo = lfirst(lc);

            /*
             * Remember the parent of each new child page in our parent map.
             * This assumes that the downlinks fit on the parent page. If the
             * parent page is split, too, when we recurse up to insert the
             * downlinks, the recursive gistbufferinginserttuples() call will
             * update the map again.
             */
            if (level > 0)
                gistMemorizeParent(buildstate,
                                   BufferGetBlockNumber(splitinfo->buf),
                                   BufferGetBlockNumber(parentBuffer));

            /*
             * Also update the parent map for all the downlinks that got moved
             * to a different page. (actually this also loops through the
             * downlinks that stayed on the original page, but it does no
             * harm).
             */
            if (level > 1)
                gistMemorizeAllDownlinks(buildstate, splitinfo->buf);

            /*
             * Since there's no concurrent access, we can release the lower
             * level buffers immediately. This includes the original page.
             */
            UnlockReleaseBuffer(splitinfo->buf);
            downlinks[i++] = splitinfo->downlink;
        }

        /* Insert them into parent. */
        gistbufferinginserttuples(buildstate, parentBuffer, level + 1,
                                  downlinks, ndownlinks, downlinkoffnum,
                                  InvalidBlockNumber, InvalidOffsetNumber);

        list_free_deep(splitinfo);      /* we don't need this anymore */
    }
    else
        UnlockReleaseBuffer(buffer);

    return placed_to_blk;
}

Datum gistbuild ( PG_FUNCTION_ARGS   ) 

Definition at line 112 of file gistbuild.c.

References Assert, XLogRecData::buffer, BufferGetBlockNumber(), BufferGetPage, GISTBuildState::bufferingMode, GiSTOptions::bufferingModeOffset, createTempGistContext(), CurrentMemoryContext, XLogRecData::data, DEBUG1, elog, END_CRIT_SECTION, ERROR, F_LEAF, GiSTOptions::fillfactor, fillfactor, freeGISTstate(), GISTBuildState::freespace, GIST_BUFFERING_ACTIVE, GIST_BUFFERING_AUTO, GIST_BUFFERING_DISABLED, GIST_BUFFERING_STATS, GIST_ROOT_BLKNO, gistBuildCallback(), gistEmptyAllBuffers(), gistFreeBuildBuffers(), gistGetFakeLSN(), GISTInitBuffer(), gistNewBuffer(), GISTBuildState::giststate, IndexBuildResult::heap_tuples, IndexBuildResult::index_tuples, IndexBuildHeapScan(), GISTBuildState::indexrel, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, initGISTstate(), XLogRecData::len, MarkBufferDirty(), MemoryContextDelete(), MemoryContextSwitchTo(), XLogRecData::next, PageSetLSN, palloc(), PG_GETARG_POINTER, PG_RETURN_POINTER, RelationData::rd_node, RelationData::rd_options, RelationGetNumberOfBlocks, RelationGetRelationName, RelationNeedsWAL, START_CRIT_SECTION, GISTSTATE::tempCxt, UnlockReleaseBuffer(), XLOG_GIST_CREATE_INDEX, and XLogInsert().

{
    Relation    heap = (Relation) PG_GETARG_POINTER(0);
    Relation    index = (Relation) PG_GETARG_POINTER(1);
    IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
    IndexBuildResult *result;
    double      reltuples;
    GISTBuildState buildstate;
    Buffer      buffer;
    Page        page;
    MemoryContext oldcxt = CurrentMemoryContext;
    int         fillfactor;

    buildstate.indexrel = index;
    if (index->rd_options)
    {
        /* Get buffering mode from the options string */
        GiSTOptions *options = (GiSTOptions *) index->rd_options;
        char       *bufferingMode = (char *) options + options->bufferingModeOffset;

        if (strcmp(bufferingMode, "on") == 0)
            buildstate.bufferingMode = GIST_BUFFERING_STATS;
        else if (strcmp(bufferingMode, "off") == 0)
            buildstate.bufferingMode = GIST_BUFFERING_DISABLED;
        else
            buildstate.bufferingMode = GIST_BUFFERING_AUTO;

        fillfactor = options->fillfactor;
    }
    else
    {
        /*
         * By default, switch to buffering mode when the index grows too large
         * to fit in cache.
         */
        buildstate.bufferingMode = GIST_BUFFERING_AUTO;
        fillfactor = GIST_DEFAULT_FILLFACTOR;
    }
    /* Calculate target amount of free space to leave on pages */
    buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;

    /*
     * We expect to be called exactly once for any index relation. If that's
     * not the case, big trouble's what we have.
     */
    if (RelationGetNumberOfBlocks(index) != 0)
        elog(ERROR, "index \"%s\" already contains data",
             RelationGetRelationName(index));

    /* no locking is needed */
    buildstate.giststate = initGISTstate(index);

    /*
     * Create a temporary memory context that is reset once for each tuple
     * processed.  (Note: we don't bother to make this a child of the
     * giststate's scanCxt, so we have to delete it separately at the end.)
     */
    buildstate.giststate->tempCxt = createTempGistContext();

    /* initialize the root page */
    buffer = gistNewBuffer(index);
    Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
    page = BufferGetPage(buffer);

    START_CRIT_SECTION();

    GISTInitBuffer(buffer, F_LEAF);

    MarkBufferDirty(buffer);

    if (RelationNeedsWAL(index))
    {
        XLogRecPtr  recptr;
        XLogRecData rdata;

        rdata.data = (char *) &(index->rd_node);
        rdata.len = sizeof(RelFileNode);
        rdata.buffer = InvalidBuffer;
        rdata.next = NULL;

        recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
        PageSetLSN(page, recptr);
    }
    else
        PageSetLSN(page, gistGetFakeLSN(heap));

    UnlockReleaseBuffer(buffer);

    END_CRIT_SECTION();

    /* build the index */
    buildstate.indtuples = 0;
    buildstate.indtuplesSize = 0;

    /*
     * Do the heap scan.
     */
    reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
                                   gistBuildCallback, (void *) &buildstate);

    /*
     * If buffering was used, flush out all the tuples that are still in the
     * buffers.
     */
    if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE)
    {
        elog(DEBUG1, "all tuples processed, emptying buffers");
        gistEmptyAllBuffers(&buildstate);
        gistFreeBuildBuffers(buildstate.gfbb);
    }

    /* okay, all heap tuples are indexed */
    MemoryContextSwitchTo(oldcxt);
    MemoryContextDelete(buildstate.giststate->tempCxt);

    freeGISTstate(buildstate.giststate);

    /*
     * Return statistics
     */
    result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));

    result->heap_tuples = reltuples;
    result->index_tuples = (double) buildstate.indtuples;

    PG_RETURN_POINTER(result);
}

static void gistBuildCallback ( Relation  index,
HeapTuple  htup,
Datum values,
bool isnull,
bool  tupleIsAlive,
void *  state 
) [static]

Definition at line 462 of file gistbuild.c.

References BUFFERING_MODE_SWITCH_CHECK_STEP, BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET, GISTBuildState::bufferingMode, calculatePagesPerBuffer(), effective_cache_size, GISTBuildState::freespace, GISTBuildState::gfbb, GIST_BUFFERING_ACTIVE, GIST_BUFFERING_AUTO, GIST_BUFFERING_STATS, gistBufferingBuildInsert(), gistdoinsert(), gistFormTuple(), gistInitBuffering(), GISTBuildState::giststate, IndexTupleSize, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, GISTBuildBuffers::levelStep, MAIN_FORKNUM, MemoryContextReset(), MemoryContextSwitchTo(), GISTBuildBuffers::pagesPerBuffer, RelationData::rd_smgr, smgrnblocks(), HeapTupleData::t_self, IndexTupleData::t_tid, and GISTSTATE::tempCxt.

Referenced by gistbuild().

{
    GISTBuildState *buildstate = (GISTBuildState *) state;
    IndexTuple  itup;
    MemoryContext oldCtx;

    oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);

    /* form an index tuple and point it at the heap tuple */
    itup = gistFormTuple(buildstate->giststate, index, values, isnull, true);
    itup->t_tid = htup->t_self;

    if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
    {
        /* We have buffers, so use them. */
        gistBufferingBuildInsert(buildstate, itup);
    }
    else
    {
        /*
         * There's no buffers (yet). Since we already have the index relation
         * locked, we call gistdoinsert directly.
         */
        gistdoinsert(index, itup, buildstate->freespace,
                     buildstate->giststate);
    }

    /* Update tuple count and total size. */
    buildstate->indtuples += 1;
    buildstate->indtuplesSize += IndexTupleSize(itup);

    MemoryContextSwitchTo(oldCtx);
    MemoryContextReset(buildstate->giststate->tempCxt);

    if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE &&
        buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)
    {
        /* Adjust the target buffer size now */
        buildstate->gfbb->pagesPerBuffer =
            calculatePagesPerBuffer(buildstate, buildstate->gfbb->levelStep);
    }

    /*
     * In 'auto' mode, check if the index has grown too large to fit in cache,
     * and switch to buffering mode if it has.
     *
     * To avoid excessive calls to smgrnblocks(), only check this every
     * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples
     */
    if ((buildstate->bufferingMode == GIST_BUFFERING_AUTO &&
         buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
         effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM)) ||
        (buildstate->bufferingMode == GIST_BUFFERING_STATS &&
         buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
    {
        /*
         * Index doesn't fit in effective cache anymore. Try to switch to
         * buffering build mode.
         */
        gistInitBuffering(buildstate);
    }
}

static void gistEmptyAllBuffers ( GISTBuildState buildstate  )  [static]

Definition at line 991 of file gistbuild.c.

References GISTNodeBuffer::blocksCount, GISTBuildBuffers::bufferEmptyingQueue, GISTBuildBuffers::buffersOnLevels, GISTBuildBuffers::buffersOnLevelsLen, GISTBuildBuffers::context, DEBUG2, elog, GISTBuildState::gfbb, gistProcessEmptyingQueue(), GISTBuildState::giststate, i, lcons(), linitial, list_delete_first(), MemoryContextSwitchTo(), NIL, GISTNodeBuffer::queuedForEmptying, and GISTSTATE::tempCxt.

Referenced by gistbuild().

{
    GISTBuildBuffers *gfbb = buildstate->gfbb;
    MemoryContext oldCtx;
    int         i;

    oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);

    /*
     * Iterate through the levels from top to bottom.
     */
    for (i = gfbb->buffersOnLevelsLen - 1; i >= 0; i--)
    {
        /*
         * Empty all buffers on this level. Note that new buffers can pop up
         * in the list during the processing, as a result of page splits, so a
         * simple walk through the list won't work. We remove buffers from the
         * list when we see them empty; a buffer can't become non-empty once
         * it's been fully emptied.
         */
        while (gfbb->buffersOnLevels[i] != NIL)
        {
            GISTNodeBuffer *nodeBuffer;

            nodeBuffer = (GISTNodeBuffer *) linitial(gfbb->buffersOnLevels[i]);

            if (nodeBuffer->blocksCount != 0)
            {
                /*
                 * Add this buffer to the emptying queue, and proceed to empty
                 * the queue.
                 */
                if (!nodeBuffer->queuedForEmptying)
                {
                    MemoryContextSwitchTo(gfbb->context);
                    nodeBuffer->queuedForEmptying = true;
                    gfbb->bufferEmptyingQueue =
                        lcons(nodeBuffer, gfbb->bufferEmptyingQueue);
                    MemoryContextSwitchTo(buildstate->giststate->tempCxt);
                }
                gistProcessEmptyingQueue(buildstate);
            }
            else
                gfbb->buffersOnLevels[i] =
                    list_delete_first(gfbb->buffersOnLevels[i]);
        }
        elog(DEBUG2, "emptied all buffers at level %d", i);
    }
    MemoryContextSwitchTo(oldCtx);
}

static int gistGetMaxLevel ( Relation  index  )  [static]

Definition at line 1046 of file gistbuild.c.

References BufferGetPage, FirstOffsetNumber, GIST_SHARE, GistPageIsLeaf, ItemPointerGetBlockNumber, LockBuffer(), PageGetItem, PageGetItemId, ReadBuffer(), IndexTupleData::t_tid, and UnlockReleaseBuffer().

Referenced by gistInitBuffering().

{
    int         maxLevel;
    BlockNumber blkno;

    /*
     * Traverse down the tree, starting from the root, until we hit the leaf
     * level.
     */
    maxLevel = 0;
    blkno = GIST_ROOT_BLKNO;
    while (true)
    {
        Buffer      buffer;
        Page        page;
        IndexTuple  itup;

        buffer = ReadBuffer(index, blkno);

        /*
         * There's no concurrent access during index build, so locking is just
         * pro forma.
         */
        LockBuffer(buffer, GIST_SHARE);
        page = (Page) BufferGetPage(buffer);

        if (GistPageIsLeaf(page))
        {
            /* We hit the bottom, so we're done. */
            UnlockReleaseBuffer(buffer);
            break;
        }

        /*
         * Pick the first downlink on the page, and follow it. It doesn't
         * matter which downlink we choose, the tree has the same depth
         * everywhere, so we just pick the first one.
         */
        itup = (IndexTuple) PageGetItem(page,
                                     PageGetItemId(page, FirstOffsetNumber));
        blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
        UnlockReleaseBuffer(buffer);

        /*
         * We're going down on the tree. It means that there is yet one more
         * level in the tree.
         */
        maxLevel++;
    }
    return maxLevel;
}

static BlockNumber gistGetParent ( GISTBuildState buildstate,
BlockNumber  child 
) [static]

Definition at line 1188 of file gistbuild.c.

References elog, ERROR, hash_search(), ParentMapEntry::parentblkno, and GISTBuildState::parentMap.

Referenced by gistBufferingFindCorrectParent().

{
    ParentMapEntry *entry;
    bool        found;

    /* Find node buffer in hash table */
    entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
                                           (const void *) &child,
                                           HASH_FIND,
                                           &found);
    if (!found)
        elog(ERROR, "could not find parent of block %d in lookup table", child);

    return entry->parentblkno;
}

static void gistInitBuffering ( GISTBuildState buildstate  )  [static]

Definition at line 268 of file gistbuild.c.

References tupleDesc::attrs, GISTBuildState::bufferingMode, calculatePagesPerBuffer(), DEBUG1, effective_cache_size, elog, GISTBuildState::freespace, GISTBuildState::gfbb, gistGetMaxLevel(), gistInitBuildBuffers(), gistInitParentMap(), i, GISTBuildState::indexrel, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, maintenance_work_mem, MAXALIGN, tupleDesc::natts, RelationData::rd_att, and SizeOfPageHeaderData.

Referenced by gistBuildCallback().

{
    Relation    index = buildstate->indexrel;
    int         pagesPerBuffer;
    Size        pageFreeSpace;
    Size        itupAvgSize,
                itupMinSize;
    double      avgIndexTuplesPerPage,
                maxIndexTuplesPerPage;
    int         i;
    int         levelStep;

    /* Calc space of index page which is available for index tuples */
    pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
        - sizeof(ItemIdData)
        - buildstate->freespace;

    /*
     * Calculate average size of already inserted index tuples using gathered
     * statistics.
     */
    itupAvgSize = (double) buildstate->indtuplesSize /
        (double) buildstate->indtuples;

    /*
     * Calculate minimal possible size of index tuple by index metadata.
     * Minimal possible size of varlena is VARHDRSZ.
     *
     * XXX: that's not actually true, as a short varlen can be just 2 bytes.
     * And we should take padding into account here.
     */
    itupMinSize = (Size) MAXALIGN(sizeof(IndexTupleData));
    for (i = 0; i < index->rd_att->natts; i++)
    {
        if (index->rd_att->attrs[i]->attlen < 0)
            itupMinSize += VARHDRSZ;
        else
            itupMinSize += index->rd_att->attrs[i]->attlen;
    }

    /* Calculate average and maximal number of index tuples which fit to page */
    avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
    maxIndexTuplesPerPage = pageFreeSpace / itupMinSize;

    /*
     * We need to calculate two parameters for the buffering algorithm:
     * levelStep and pagesPerBuffer.
     *
     * levelStep determines the size of subtree that we operate on, while
     * emptying a buffer. A higher value is better, as you need fewer buffer
     * emptying steps to build the index. However, if you set it too high, the
     * subtree doesn't fit in cache anymore, and you quickly lose the benefit
     * of the buffers.
     *
     * In Arge et al's paper, levelStep is chosen as logB(M/4B), where B is
     * the number of tuples on page (ie. fanout), and M is the amount of
     * internal memory available. Curiously, they doesn't explain *why* that
     * setting is optimal. We calculate it by taking the highest levelStep so
     * that a subtree still fits in cache. For a small B, our way of
     * calculating levelStep is very close to Arge et al's formula. For a
     * large B, our formula gives a value that is 2x higher.
     *
     * The average size (in pages) of a subtree of depth n can be calculated
     * as a geometric series:
     *
     * B^0 + B^1 + B^2 + ... + B^n = (1 - B^(n + 1)) / (1 - B)
     *
     * where B is the average number of index tuples on page. The subtree is
     * cached in the shared buffer cache and the OS cache, so we choose
     * levelStep so that the subtree size is comfortably smaller than
     * effective_cache_size, with a safety factor of 4.
     *
     * The estimate on the average number of index tuples on page is based on
     * average tuple sizes observed before switching to buffered build, so the
     * real subtree size can be somewhat larger. Also, it would selfish to
     * gobble the whole cache for our index build. The safety factor of 4
     * should account for those effects.
     *
     * The other limiting factor for setting levelStep is that while
     * processing a subtree, we need to hold one page for each buffer at the
     * next lower buffered level. The max. number of buffers needed for that
     * is maxIndexTuplesPerPage^levelStep. This is very conservative, but
     * hopefully maintenance_work_mem is set high enough that you're
     * constrained by effective_cache_size rather than maintenance_work_mem.
     *
     * XXX: the buffer hash table consumes a fair amount of memory too per
     * buffer, but that is not currently taken into account. That scales on
     * the total number of buffers used, ie. the index size and on levelStep.
     * Note that a higher levelStep *reduces* the amount of memory needed for
     * the hash table.
     */
    levelStep = 1;
    for (;;)
    {
        double      subtreesize;
        double      maxlowestlevelpages;

        /* size of an average subtree at this levelStep (in pages). */
        subtreesize =
            (1 - pow(avgIndexTuplesPerPage, (double) (levelStep + 1))) /
            (1 - avgIndexTuplesPerPage);

        /* max number of pages at the lowest level of a subtree */
        maxlowestlevelpages = pow(maxIndexTuplesPerPage, (double) levelStep);

        /* subtree must fit in cache (with safety factor of 4) */
        if (subtreesize > effective_cache_size / 4)
            break;

        /* each node in the lowest level of a subtree has one page in memory */
        if (maxlowestlevelpages > ((double) maintenance_work_mem * 1024) / BLCKSZ)
            break;

        /* Good, we can handle this levelStep. See if we can go one higher. */
        levelStep++;
    }

    /*
     * We just reached an unacceptable value of levelStep in previous loop.
     * So, decrease levelStep to get last acceptable value.
     */
    levelStep--;

    /*
     * If there's not enough cache or maintenance_work_mem, fall back to plain
     * inserts.
     */
    if (levelStep <= 0)
    {
        elog(DEBUG1, "failed to switch to buffered GiST build");
        buildstate->bufferingMode = GIST_BUFFERING_DISABLED;
        return;
    }

    /*
     * The second parameter to set is pagesPerBuffer, which determines the
     * size of each buffer. We adjust pagesPerBuffer also during the build,
     * which is why this calculation is in a separate function.
     */
    pagesPerBuffer = calculatePagesPerBuffer(buildstate, levelStep);

    /* Initialize GISTBuildBuffers with these parameters */
    buildstate->gfbb = gistInitBuildBuffers(pagesPerBuffer, levelStep,
                                            gistGetMaxLevel(index));

    gistInitParentMap(buildstate);

    buildstate->bufferingMode = GIST_BUFFERING_ACTIVE;

    elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",
         levelStep, pagesPerBuffer);
}

static void gistInitParentMap ( GISTBuildState buildstate  )  [static]

Definition at line 1135 of file gistbuild.c.

References CurrentMemoryContext, HASHCTL::entrysize, HASHCTL::hash, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_FUNCTION, HASHCTL::hcxt, HASHCTL::keysize, and GISTBuildState::parentMap.

Referenced by gistInitBuffering().

{
    HASHCTL     hashCtl;

    hashCtl.keysize = sizeof(BlockNumber);
    hashCtl.entrysize = sizeof(ParentMapEntry);
    hashCtl.hcxt = CurrentMemoryContext;
    hashCtl.hash = oid_hash;
    buildstate->parentMap = hash_create("gistbuild parent map",
                                        1024,
                                        &hashCtl,
                                        HASH_ELEM | HASH_CONTEXT
                                        | HASH_FUNCTION);
}

static void gistMemorizeAllDownlinks ( GISTBuildState buildstate,
Buffer  parent 
) [static]

Definition at line 1167 of file gistbuild.c.

References Assert, BufferGetBlockNumber(), BufferGetPage, FirstOffsetNumber, gistMemorizeParent(), GistPageIsLeaf, ItemPointerGetBlockNumber, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, and IndexTupleData::t_tid.

Referenced by gistbufferinginserttuples().

{
    OffsetNumber maxoff;
    OffsetNumber off;
    BlockNumber parentblkno = BufferGetBlockNumber(parentbuf);
    Page        page = BufferGetPage(parentbuf);

    Assert(!GistPageIsLeaf(page));

    maxoff = PageGetMaxOffsetNumber(page);
    for (off = FirstOffsetNumber; off <= maxoff; off++)
    {
        ItemId      iid = PageGetItemId(page, off);
        IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
        BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));

        gistMemorizeParent(buildstate, childblkno, parentblkno);
    }
}

static void gistMemorizeParent ( GISTBuildState buildstate,
BlockNumber  child,
BlockNumber  parent 
) [static]

Definition at line 1151 of file gistbuild.c.

References hash_search(), ParentMapEntry::parentblkno, and GISTBuildState::parentMap.

Referenced by gistbufferinginserttuples(), gistMemorizeAllDownlinks(), and gistProcessItup().

{
    ParentMapEntry *entry;
    bool        found;

    entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
                                           (const void *) &child,
                                           HASH_ENTER,
                                           &found);
    entry->parentblkno = parent;
}

static void gistProcessEmptyingQueue ( GISTBuildState buildstate  )  [static]

Definition at line 918 of file gistbuild.c.

References GISTBuildBuffers::bufferEmptyingQueue, GISTBuildState::gfbb, gistPopItupFromNodeBuffer(), gistProcessItup(), GISTBuildState::giststate, gistUnloadNodeBuffers(), GISTNodeBuffer::level, linitial, list_delete_first(), MemoryContextReset(), NIL, GISTNodeBuffer::nodeBlocknum, GISTNodeBuffer::queuedForEmptying, and GISTSTATE::tempCxt.

Referenced by gistBufferingBuildInsert(), and gistEmptyAllBuffers().

{
    GISTBuildBuffers *gfbb = buildstate->gfbb;

    /* Iterate while we have elements in buffers emptying stack. */
    while (gfbb->bufferEmptyingQueue != NIL)
    {
        GISTNodeBuffer *emptyingNodeBuffer;

        /* Get node buffer from emptying stack. */
        emptyingNodeBuffer = (GISTNodeBuffer *) linitial(gfbb->bufferEmptyingQueue);
        gfbb->bufferEmptyingQueue = list_delete_first(gfbb->bufferEmptyingQueue);
        emptyingNodeBuffer->queuedForEmptying = false;

        /*
         * We are going to load last pages of buffers where emptying will be
         * to. So let's unload any previously loaded buffers.
         */
        gistUnloadNodeBuffers(gfbb);

        /*
         * Pop tuples from the buffer and run them down to the buffers at
         * lower level, or leaf pages. We continue until one of the lower
         * level buffers fills up, or this buffer runs empty.
         *
         * In Arge et al's paper, the buffer emptying is stopped after
         * processing 1/2 node buffer worth of tuples, to avoid overfilling
         * any of the lower level buffers. However, it's more efficient to
         * keep going until one of the lower level buffers actually fills up,
         * so that's what we do. This doesn't need to be exact, if a buffer
         * overfills by a few tuples, there's no harm done.
         */
        while (true)
        {
            IndexTuple  itup;

            /* Get next index tuple from the buffer */
            if (!gistPopItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup))
                break;

            /*
             * Run it down to the underlying node buffer or leaf page.
             *
             * Note: it's possible that the buffer we're emptying splits as a
             * result of this call. If that happens, our emptyingNodeBuffer
             * points to the left half of the split. After split, it's very
             * likely that the new left buffer is no longer over the half-full
             * threshold, but we might as well keep flushing tuples from it
             * until we fill a lower-level buffer.
             */
            if (gistProcessItup(buildstate, itup, emptyingNodeBuffer->nodeBlocknum, emptyingNodeBuffer->level))
            {
                /*
                 * A lower level buffer filled up. Stop emptying this buffer,
                 * to avoid overflowing the lower level buffer.
                 */
                break;
            }

            /* Free all the memory allocated during index tuple processing */
            MemoryContextReset(buildstate->giststate->tempCxt);
        }
    }
}

static bool gistProcessItup ( GISTBuildState buildstate,
IndexTuple  itup,
BlockNumber  startblkno,
int  startlevel 
) [static]

Definition at line 550 of file gistbuild.c.

References Assert, BUFFER_OVERFLOWED, BufferGetPage, CHECK_FOR_INTERRUPTS, GISTBuildState::gfbb, GIST_EXCLUSIVE, gistbufferinginserttuples(), gistchoose(), gistgetadjusted(), gistGetNodeBuffer(), gistMemorizeParent(), gistPushItupToNodeBuffer(), GISTBuildState::giststate, GISTBuildState::indexrel, InvalidBlockNumber, InvalidOffsetNumber, ItemPointerGetBlockNumber, LEVEL_HAS_BUFFERS, LockBuffer(), PageGetItem, PageGetItemId, ReadBuffer(), IndexTupleData::t_tid, and UnlockReleaseBuffer().

Referenced by gistBufferingBuildInsert(), and gistProcessEmptyingQueue().

{
    GISTSTATE  *giststate = buildstate->giststate;
    GISTBuildBuffers *gfbb = buildstate->gfbb;
    Relation    indexrel = buildstate->indexrel;
    BlockNumber childblkno;
    Buffer      buffer;
    bool        result = false;
    BlockNumber blkno;
    int         level;
    OffsetNumber downlinkoffnum = InvalidOffsetNumber;
    BlockNumber parentblkno = InvalidBlockNumber;

    CHECK_FOR_INTERRUPTS();

    /*
     * Loop until we reach a leaf page (level == 0) or a level with buffers
     * (not including the level we start at, because we would otherwise make
     * no progress).
     */
    blkno = startblkno;
    level = startlevel;
    for (;;)
    {
        ItemId      iid;
        IndexTuple  idxtuple,
                    newtup;
        Page        page;
        OffsetNumber childoffnum;

        /* Have we reached a level with buffers? */
        if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel)
            break;

        /* Have we reached a leaf page? */
        if (level == 0)
            break;

        /*
         * Nope. Descend down to the next level then. Choose a child to
         * descend down to.
         */

        buffer = ReadBuffer(indexrel, blkno);
        LockBuffer(buffer, GIST_EXCLUSIVE);

        page = (Page) BufferGetPage(buffer);
        childoffnum = gistchoose(indexrel, page, itup, giststate);
        iid = PageGetItemId(page, childoffnum);
        idxtuple = (IndexTuple) PageGetItem(page, iid);
        childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));

        if (level > 1)
            gistMemorizeParent(buildstate, childblkno, blkno);

        /*
         * Check that the key representing the target child node is consistent
         * with the key we're inserting. Update it if it's not.
         */
        newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate);
        if (newtup)
        {
            blkno  = gistbufferinginserttuples(buildstate, buffer, level,
                                               &newtup, 1, childoffnum,
                                      InvalidBlockNumber, InvalidOffsetNumber);
            /* gistbufferinginserttuples() released the buffer */
        }
        else
            UnlockReleaseBuffer(buffer);

        /* Descend to the child */
        parentblkno = blkno;
        blkno = childblkno;
        downlinkoffnum = childoffnum;
        Assert(level > 0);
        level--;
    }

    if (LEVEL_HAS_BUFFERS(level, gfbb))
    {
        /*
         * We've reached level with buffers. Place the index tuple to the
         * buffer, and add the buffer to the emptying queue if it overflows.
         */
        GISTNodeBuffer *childNodeBuffer;

        /* Find the buffer or create a new one */
        childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level);

        /* Add index tuple to it */
        gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup);

        if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb))
            result = true;
    }
    else
    {
        /*
         * We've reached a leaf page. Place the tuple here.
         */
        Assert(level == 0);
        buffer = ReadBuffer(indexrel, blkno);
        LockBuffer(buffer, GIST_EXCLUSIVE);
        gistbufferinginserttuples(buildstate, buffer, level,
                                  &itup, 1, InvalidOffsetNumber,
                                  parentblkno, downlinkoffnum);
        /* gistbufferinginserttuples() released the buffer */
    }

    return result;
}

void gistValidateBufferingOption ( char *  value  ) 

Definition at line 245 of file gistbuild.c.

References ereport, errcode(), errdetail(), errmsg(), ERROR, and NULL.

{
    if (value == NULL ||
        (strcmp(value, "on") != 0 &&
         strcmp(value, "off") != 0 &&
         strcmp(value, "auto") != 0))
    {
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                 errmsg("invalid value for \"buffering\" option"),
              errdetail("Valid values are \"on\", \"off\", and \"auto\".")));
    }
}