Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Typedefs | Functions

spgvacuum.c File Reference

#include "postgres.h"
#include "access/genam.h"
#include "access/spgist_private.h"
#include "access/transam.h"
#include "catalog/storage_xlog.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "utils/snapmgr.h"
Include dependency graph for spgvacuum.c:

Go to the source code of this file.

Data Structures

struct  spgVacPendingItem
struct  spgBulkDeleteState

Typedefs

typedef struct spgVacPendingItem spgVacPendingItem
typedef struct spgBulkDeleteState spgBulkDeleteState

Functions

static void spgAddPendingTID (spgBulkDeleteState *bds, ItemPointer tid)
static void spgClearPendingList (spgBulkDeleteState *bds)
static void vacuumLeafPage (spgBulkDeleteState *bds, Relation index, Buffer buffer, bool forPending)
static void vacuumLeafRoot (spgBulkDeleteState *bds, Relation index, Buffer buffer)
static void vacuumRedirectAndPlaceholder (Relation index, Buffer buffer)
static void spgvacuumpage (spgBulkDeleteState *bds, BlockNumber blkno)
static void spgprocesspending (spgBulkDeleteState *bds)
static void spgvacuumscan (spgBulkDeleteState *bds)
Datum spgbulkdelete (PG_FUNCTION_ARGS)
static bool dummy_callback (ItemPointer itemptr, void *state)
Datum spgvacuumcleanup (PG_FUNCTION_ARGS)

Typedef Documentation


Function Documentation

static bool dummy_callback ( ItemPointer  itemptr,
void *  state 
) [static]

Definition at line 917 of file spgvacuum.c.

{
    return false;
}

static void spgAddPendingTID ( spgBulkDeleteState bds,
ItemPointer  tid 
) [static]

Definition at line 62 of file spgvacuum.c.

References spgVacPendingItem::done, ItemPointerEquals(), spgVacPendingItem::next, NULL, palloc(), spgBulkDeleteState::pendingList, and spgVacPendingItem::tid.

Referenced by spgprocesspending(), and vacuumLeafPage().

{
    spgVacPendingItem *pitem;
    spgVacPendingItem **listLink;

    /* search the list for pre-existing entry */
    listLink = &bds->pendingList;
    while (*listLink != NULL)
    {
        pitem = *listLink;
        if (ItemPointerEquals(tid, &pitem->tid))
            return;             /* already in list, do nothing */
        listLink = &pitem->next;
    }
    /* not there, so append new entry */
    pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
    pitem->tid = *tid;
    pitem->done = false;
    pitem->next = NULL;
    *listLink = pitem;
}

Datum spgbulkdelete ( PG_FUNCTION_ARGS   ) 

Definition at line 894 of file spgvacuum.c.

References spgBulkDeleteState::callback, callback(), spgBulkDeleteState::callback_state, spgBulkDeleteState::info, NULL, palloc0(), PG_GETARG_POINTER, PG_RETURN_POINTER, spgvacuumscan(), and spgBulkDeleteState::stats.

{
    IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
    IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
    IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
    void       *callback_state = (void *) PG_GETARG_POINTER(3);
    spgBulkDeleteState bds;

    /* allocate stats if first time through, else re-use existing struct */
    if (stats == NULL)
        stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
    bds.info = info;
    bds.stats = stats;
    bds.callback = callback;
    bds.callback_state = callback_state;

    spgvacuumscan(&bds);

    PG_RETURN_POINTER(stats);
}

static void spgClearPendingList ( spgBulkDeleteState bds  )  [static]

Definition at line 88 of file spgvacuum.c.

References Assert, spgVacPendingItem::done, spgVacPendingItem::next, NULL, spgBulkDeleteState::pendingList, and pfree().

Referenced by spgprocesspending().

{
    spgVacPendingItem *pitem;
    spgVacPendingItem *nitem;

    for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
    {
        nitem = pitem->next;
        /* All items in list should have been dealt with */
        Assert(pitem->done);
        pfree(pitem);
    }
    bds->pendingList = NULL;
}

static void spgprocesspending ( spgBulkDeleteState bds  )  [static]

Definition at line 686 of file spgvacuum.c.

References BUFFER_LOCK_EXCLUSIVE, BufferGetPage, spgVacPendingItem::done, elog, ERROR, i, IndexVacuumInfo::index, spgBulkDeleteState::info, ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, ItemPointerIsValid, LockBuffer(), MAIN_FORKNUM, spgVacPendingItem::next, NULL, PageGetItem, PageGetItemId, PageIsNew, spgBulkDeleteState::pendingList, RBM_NORMAL, ReadBufferExtended(), RelationGetRelationName, SGITITERATE, spgAddPendingTID(), spgClearPendingList(), SPGIST_LIVE, SPGIST_REDIRECT, SpGistBlockIsRoot, SpGistPageIsDeleted, SpGistPageIsLeaf, SpGistSetLastUsedPage(), IndexVacuumInfo::strategy, IndexTupleData::t_tid, spgVacPendingItem::tid, SpGistInnerTupleData::tupstate, UnlockReleaseBuffer(), vacuum_delay_point(), vacuumLeafPage(), and vacuumRedirectAndPlaceholder().

Referenced by spgvacuumscan().

{
    Relation    index = bds->info->index;
    spgVacPendingItem *pitem;
    spgVacPendingItem *nitem;
    BlockNumber blkno;
    Buffer      buffer;
    Page        page;

    for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
    {
        if (pitem->done)
            continue;           /* ignore already-done items */

        /* call vacuum_delay_point while not holding any buffer lock */
        vacuum_delay_point();

        /* examine the referenced page */
        blkno = ItemPointerGetBlockNumber(&pitem->tid);
        buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
                                    RBM_NORMAL, bds->info->strategy);
        LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
        page = (Page) BufferGetPage(buffer);

        if (PageIsNew(page) || SpGistPageIsDeleted(page))
        {
            /* Probably shouldn't happen, but ignore it */
        }
        else if (SpGistPageIsLeaf(page))
        {
            if (SpGistBlockIsRoot(blkno))
            {
                /* this should definitely not happen */
                elog(ERROR, "redirection leads to root page of index \"%s\"",
                     RelationGetRelationName(index));
            }

            /* deal with any deletable tuples */
            vacuumLeafPage(bds, index, buffer, true);
            /* might as well do this while we are here */
            vacuumRedirectAndPlaceholder(index, buffer);

            SpGistSetLastUsedPage(index, buffer);

            /*
             * We can mark as done not only this item, but any later ones
             * pointing at the same page, since we vacuumed the whole page.
             */
            pitem->done = true;
            for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
            {
                if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
                    nitem->done = true;
            }
        }
        else
        {
            /*
             * On an inner page, visit the referenced inner tuple and add all
             * its downlinks to the pending list.  We might have pending items
             * for more than one inner tuple on the same page (in fact this is
             * pretty likely given the way space allocation works), so get
             * them all while we are here.
             */
            for (nitem = pitem; nitem != NULL; nitem = nitem->next)
            {
                if (nitem->done)
                    continue;
                if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
                {
                    OffsetNumber offset;
                    SpGistInnerTuple innerTuple;

                    offset = ItemPointerGetOffsetNumber(&nitem->tid);
                    innerTuple = (SpGistInnerTuple) PageGetItem(page,
                                                PageGetItemId(page, offset));
                    if (innerTuple->tupstate == SPGIST_LIVE)
                    {
                        SpGistNodeTuple node;
                        int         i;

                        SGITITERATE(innerTuple, i, node)
                        {
                            if (ItemPointerIsValid(&node->t_tid))
                                spgAddPendingTID(bds, &node->t_tid);
                        }
                    }
                    else if (innerTuple->tupstate == SPGIST_REDIRECT)
                    {
                        /* transfer attention to redirect point */
                        spgAddPendingTID(bds,
                                   &((SpGistDeadTuple) innerTuple)->pointer);
                    }
                    else
                        elog(ERROR, "unexpected SPGiST tuple state: %d",
                             innerTuple->tupstate);

                    nitem->done = true;
                }
            }
        }

        UnlockReleaseBuffer(buffer);
    }

    spgClearPendingList(bds);
}

Datum spgvacuumcleanup ( PG_FUNCTION_ARGS   ) 

Definition at line 928 of file spgvacuum.c.

References IndexVacuumInfo::analyze_only, spgBulkDeleteState::callback, spgBulkDeleteState::callback_state, IndexVacuumInfo::estimated_count, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), spgBulkDeleteState::info, NULL, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, palloc0(), PG_GETARG_POINTER, PG_RETURN_POINTER, spgvacuumscan(), and spgBulkDeleteState::stats.

{
    IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
    IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
    Relation    index = info->index;
    spgBulkDeleteState bds;

    /* No-op in ANALYZE ONLY mode */
    if (info->analyze_only)
        PG_RETURN_POINTER(stats);

    /*
     * We don't need to scan the index if there was a preceding bulkdelete
     * pass.  Otherwise, make a pass that won't delete any live tuples, but
     * might still accomplish useful stuff with redirect/placeholder cleanup,
     * and in any case will provide stats.
     */
    if (stats == NULL)
    {
        stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
        bds.info = info;
        bds.stats = stats;
        bds.callback = dummy_callback;
        bds.callback_state = NULL;

        spgvacuumscan(&bds);
    }

    /* Finally, vacuum the FSM */
    IndexFreeSpaceMapVacuum(index);

    /*
     * It's quite possible for us to be fooled by concurrent tuple moves into
     * double-counting some index tuples, so disbelieve any total that exceeds
     * the underlying heap's count ... if we know that accurately.  Otherwise
     * this might just make matters worse.
     */
    if (!info->estimated_count)
    {
        if (stats->num_index_tuples > info->num_heap_tuples)
            stats->num_index_tuples = info->num_heap_tuples;
    }

    PG_RETURN_POINTER(stats);
}

static void spgvacuumpage ( spgBulkDeleteState bds,
BlockNumber  blkno 
) [static]

Definition at line 604 of file spgvacuum.c.

References BUFFER_LOCK_EXCLUSIVE, BufferGetPage, IndexVacuumInfo::index, spgBulkDeleteState::info, spgBulkDeleteState::lastFilledBlock, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), PageIsEmpty, PageIsNew, IndexBulkDeleteResult::pages_deleted, RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), SpGistBlockIsRoot, SpGistInitBuffer(), SpGistPageIsDeleted, SpGistPageIsLeaf, SpGistPageSetDeleted, SpGistSetLastUsedPage(), spgBulkDeleteState::stats, IndexVacuumInfo::strategy, UnlockReleaseBuffer(), vacuum_delay_point(), vacuumLeafPage(), vacuumLeafRoot(), and vacuumRedirectAndPlaceholder().

Referenced by spgvacuumscan().

{
    Relation    index = bds->info->index;
    Buffer      buffer;
    Page        page;

    /* call vacuum_delay_point while not holding any buffer lock */
    vacuum_delay_point();

    buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
                                RBM_NORMAL, bds->info->strategy);
    LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
    page = (Page) BufferGetPage(buffer);

    if (PageIsNew(page))
    {
        /*
         * We found an all-zero page, which could happen if the database
         * crashed just after extending the file.  Initialize and recycle it.
         */
        SpGistInitBuffer(buffer, 0);
        SpGistPageSetDeleted(page);
        /* We don't bother to WAL-log this action; easy to redo */
        MarkBufferDirty(buffer);
    }
    else if (SpGistPageIsDeleted(page))
    {
        /* nothing to do */
    }
    else if (SpGistPageIsLeaf(page))
    {
        if (SpGistBlockIsRoot(blkno))
        {
            vacuumLeafRoot(bds, index, buffer);
            /* no need for vacuumRedirectAndPlaceholder */
        }
        else
        {
            vacuumLeafPage(bds, index, buffer, false);
            vacuumRedirectAndPlaceholder(index, buffer);
        }
    }
    else
    {
        /* inner page */
        vacuumRedirectAndPlaceholder(index, buffer);
    }

    /*
     * The root pages must never be deleted, nor marked as available in FSM,
     * because we don't want them ever returned by a search for a place to put
     * a new tuple.  Otherwise, check for empty/deletable page, and make sure
     * FSM knows about it.
     */
    if (!SpGistBlockIsRoot(blkno))
    {
        /* If page is now empty, mark it deleted */
        if (PageIsEmpty(page) && !SpGistPageIsDeleted(page))
        {
            SpGistPageSetDeleted(page);
            /* We don't bother to WAL-log this action; easy to redo */
            MarkBufferDirty(buffer);
        }

        if (SpGistPageIsDeleted(page))
        {
            RecordFreeIndexPage(index, blkno);
            bds->stats->pages_deleted++;
        }
        else
            bds->lastFilledBlock = blkno;
    }

    SpGistSetLastUsedPage(index, buffer);

    UnlockReleaseBuffer(buffer);
}

static void spgvacuumscan ( spgBulkDeleteState bds  )  [static]

Definition at line 798 of file spgvacuum.c.

References IndexBulkDeleteResult::estimated_count, ExclusiveLock, GetActiveSnapshot(), IndexVacuumInfo::index, spgBulkDeleteState::info, initSpGistState(), spgBulkDeleteState::lastFilledBlock, LockRelationForExtension(), spgBulkDeleteState::myXmin, NULL, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_removed, spgBulkDeleteState::pendingList, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, RelationTruncate(), SPGIST_METAPAGE_BLKNO, SpGistUpdateMetaPage(), spgprocesspending(), spgBulkDeleteState::spgstate, spgvacuumpage(), spgBulkDeleteState::stats, UnlockRelationForExtension(), and SnapshotData::xmin.

Referenced by spgbulkdelete(), and spgvacuumcleanup().

{
    Relation    index = bds->info->index;
    bool        needLock;
    BlockNumber num_pages,
                blkno;

    /* Finish setting up spgBulkDeleteState */
    initSpGistState(&bds->spgstate, index);
    bds->pendingList = NULL;
    bds->myXmin = GetActiveSnapshot()->xmin;
    bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;

    /*
     * Reset counts that will be incremented during the scan; needed in case
     * of multiple scans during a single VACUUM command
     */
    bds->stats->estimated_count = false;
    bds->stats->num_index_tuples = 0;
    bds->stats->pages_deleted = 0;

    /* We can skip locking for new or temp relations */
    needLock = !RELATION_IS_LOCAL(index);

    /*
     * The outer loop iterates over all index pages except the metapage, in
     * physical order (we hope the kernel will cooperate in providing
     * read-ahead for speed).  It is critical that we visit all leaf pages,
     * including ones added after we start the scan, else we might fail to
     * delete some deletable tuples.  See more extensive comments about this
     * in btvacuumscan().
     */
    blkno = SPGIST_METAPAGE_BLKNO + 1;
    for (;;)
    {
        /* Get the current relation length */
        if (needLock)
            LockRelationForExtension(index, ExclusiveLock);
        num_pages = RelationGetNumberOfBlocks(index);
        if (needLock)
            UnlockRelationForExtension(index, ExclusiveLock);

        /* Quit if we've scanned the whole relation */
        if (blkno >= num_pages)
            break;
        /* Iterate over pages, then loop back to recheck length */
        for (; blkno < num_pages; blkno++)
        {
            spgvacuumpage(bds, blkno);
            /* empty the pending-list after each page */
            if (bds->pendingList != NULL)
                spgprocesspending(bds);
        }
    }

    /* Propagate local lastUsedPage cache to metablock */
    SpGistUpdateMetaPage(index);

    /*
     * Truncate index if possible
     *
     * XXX disabled because it's unsafe due to possible concurrent inserts.
     * We'd have to rescan the pages to make sure they're still empty, and it
     * doesn't seem worth it.  Note that btree doesn't do this either.
     *
     * Another reason not to truncate is that it could invalidate the cached
     * pages-with-freespace pointers in the metapage and other backends'
     * relation caches, that is leave them pointing to nonexistent pages.
     * Adding RelationGetNumberOfBlocks calls to protect the places that use
     * those pointers would be unduly expensive.
     */
#ifdef NOT_USED
    if (num_pages > bds->lastFilledBlock + 1)
    {
        BlockNumber lastBlock = num_pages - 1;

        num_pages = bds->lastFilledBlock + 1;
        RelationTruncate(index, num_pages);
        bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
        bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
    }
#endif

    /* Report final stats */
    bds->stats->num_pages = num_pages;
    bds->stats->pages_free = bds->stats->pages_deleted;
}

static void vacuumLeafPage ( spgBulkDeleteState bds,
Relation  index,
Buffer  buffer,
bool  forPending 
) [static]

Definition at line 124 of file spgvacuum.c.

References ACCEPT_RDATA_BUFFER, ACCEPT_RDATA_DATA, Assert, spgxlogVacuumLeaf::blkno, BufferGetBlockNumber(), BufferGetPage, spgBulkDeleteState::callback, spgBulkDeleteState::callback_state, elog, END_CRIT_SECTION, ERROR, FirstOffsetNumber, SpGistLeafTupleData::heapPtr, i, InvalidBlockNumber, InvalidOffsetNumber, ItemPointerIsValid, MarkBufferDirty(), MaxIndexTuplesPerPage, spgBulkDeleteState::myXmin, spgxlogVacuumLeaf::nChain, spgxlogVacuumLeaf::nDead, SpGistDeadTupleData::nextOffset, SpGistLeafTupleData::nextOffset, spgxlogVacuumLeaf::nMove, spgxlogVacuumLeaf::node, spgxlogVacuumLeaf::nPlaceholder, IndexBulkDeleteResult::num_index_tuples, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageSetLSN, SpGistDeadTupleData::pointer, RelationData::rd_node, RelationGetRelationName, RelationNeedsWAL, spgAddPendingTID(), SPGIST_DEAD, SPGIST_LIVE, SPGIST_PLACEHOLDER, SPGIST_REDIRECT, spgPageIndexMultiDelete(), spgBulkDeleteState::spgstate, START_CRIT_SECTION, spgxlogVacuumLeaf::stateSrc, spgBulkDeleteState::stats, STORE_STATE, TransactionIdFollowsOrEquals(), IndexBulkDeleteResult::tuples_removed, SpGistLeafTupleData::tupstate, SpGistDeadTupleData::xid, XLOG_SPGIST_VACUUM_LEAF, and XLogInsert().

Referenced by spgprocesspending(), and spgvacuumpage().

{
    Page        page = BufferGetPage(buffer);
    spgxlogVacuumLeaf xlrec;
    XLogRecData rdata[8];
    OffsetNumber toDead[MaxIndexTuplesPerPage];
    OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
    OffsetNumber moveSrc[MaxIndexTuplesPerPage];
    OffsetNumber moveDest[MaxIndexTuplesPerPage];
    OffsetNumber chainSrc[MaxIndexTuplesPerPage];
    OffsetNumber chainDest[MaxIndexTuplesPerPage];
    OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
    bool        deletable[MaxIndexTuplesPerPage + 1];
    int         nDeletable;
    OffsetNumber i,
                max = PageGetMaxOffsetNumber(page);

    memset(predecessor, 0, sizeof(predecessor));
    memset(deletable, 0, sizeof(deletable));
    nDeletable = 0;

    /* Scan page, identify tuples to delete, accumulate stats */
    for (i = FirstOffsetNumber; i <= max; i++)
    {
        SpGistLeafTuple lt;

        lt = (SpGistLeafTuple) PageGetItem(page,
                                           PageGetItemId(page, i));
        if (lt->tupstate == SPGIST_LIVE)
        {
            Assert(ItemPointerIsValid(&lt->heapPtr));

            if (bds->callback(&lt->heapPtr, bds->callback_state))
            {
                bds->stats->tuples_removed += 1;
                deletable[i] = true;
                nDeletable++;
            }
            else
            {
                if (!forPending)
                    bds->stats->num_index_tuples += 1;
            }

            /* Form predecessor map, too */
            if (lt->nextOffset != InvalidOffsetNumber)
            {
                /* paranoia about corrupted chain links */
                if (lt->nextOffset < FirstOffsetNumber ||
                    lt->nextOffset > max ||
                    predecessor[lt->nextOffset] != InvalidOffsetNumber)
                    elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
                         BufferGetBlockNumber(buffer),
                         RelationGetRelationName(index));
                predecessor[lt->nextOffset] = i;
            }
        }
        else if (lt->tupstate == SPGIST_REDIRECT)
        {
            SpGistDeadTuple dt = (SpGistDeadTuple) lt;

            Assert(dt->nextOffset == InvalidOffsetNumber);
            Assert(ItemPointerIsValid(&dt->pointer));

            /*
             * Add target TID to pending list if the redirection could have
             * happened since VACUUM started.
             *
             * Note: we could make a tighter test by seeing if the xid is
             * "running" according to the active snapshot; but tqual.c doesn't
             * currently export a suitable API, and it's not entirely clear
             * that a tighter test is worth the cycles anyway.
             */
            if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
                spgAddPendingTID(bds, &dt->pointer);
        }
        else
        {
            Assert(lt->nextOffset == InvalidOffsetNumber);
        }
    }

    if (nDeletable == 0)
        return;                 /* nothing more to do */

    /*----------
     * Figure out exactly what we have to do.  We do this separately from
     * actually modifying the page, mainly so that we have a representation
     * that can be dumped into WAL and then the replay code can do exactly
     * the same thing.  The output of this step consists of six arrays
     * describing four kinds of operations, to be performed in this order:
     *
     * toDead[]: tuple numbers to be replaced with DEAD tuples
     * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
     * moveSrc[]: tuple numbers that need to be relocated to another offset
     * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
     * moveDest[]: new locations for moveSrc tuples
     * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
     * chainDest[]: new values of nextOffset for chainSrc members
     *
     * It's easiest to figure out what we have to do by processing tuple
     * chains, so we iterate over all the tuples (not just the deletable
     * ones!) to identify chain heads, then chase down each chain and make
     * work item entries for deletable tuples within the chain.
     *----------
     */
    xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;

    for (i = FirstOffsetNumber; i <= max; i++)
    {
        SpGistLeafTuple head;
        bool        interveningDeletable;
        OffsetNumber prevLive;
        OffsetNumber j;

        head = (SpGistLeafTuple) PageGetItem(page,
                                             PageGetItemId(page, i));
        if (head->tupstate != SPGIST_LIVE)
            continue;           /* can't be a chain member */
        if (predecessor[i] != 0)
            continue;           /* not a chain head */

        /* initialize ... */
        interveningDeletable = false;
        prevLive = deletable[i] ? InvalidOffsetNumber : i;

        /* scan down the chain ... */
        j = head->nextOffset;
        while (j != InvalidOffsetNumber)
        {
            SpGistLeafTuple lt;

            lt = (SpGistLeafTuple) PageGetItem(page,
                                               PageGetItemId(page, j));
            if (lt->tupstate != SPGIST_LIVE)
            {
                /* all tuples in chain should be live */
                elog(ERROR, "unexpected SPGiST tuple state: %d",
                     lt->tupstate);
            }

            if (deletable[j])
            {
                /* This tuple should be replaced by a placeholder */
                toPlaceholder[xlrec.nPlaceholder] = j;
                xlrec.nPlaceholder++;
                /* previous live tuple's chain link will need an update */
                interveningDeletable = true;
            }
            else if (prevLive == InvalidOffsetNumber)
            {
                /*
                 * This is the first live tuple in the chain.  It has to move
                 * to the head position.
                 */
                moveSrc[xlrec.nMove] = j;
                moveDest[xlrec.nMove] = i;
                xlrec.nMove++;
                /* Chain updates will be applied after the move */
                prevLive = i;
                interveningDeletable = false;
            }
            else
            {
                /*
                 * Second or later live tuple.  Arrange to re-chain it to the
                 * previous live one, if there was a gap.
                 */
                if (interveningDeletable)
                {
                    chainSrc[xlrec.nChain] = prevLive;
                    chainDest[xlrec.nChain] = j;
                    xlrec.nChain++;
                }
                prevLive = j;
                interveningDeletable = false;
            }

            j = lt->nextOffset;
        }

        if (prevLive == InvalidOffsetNumber)
        {
            /* The chain is entirely removable, so we need a DEAD tuple */
            toDead[xlrec.nDead] = i;
            xlrec.nDead++;
        }
        else if (interveningDeletable)
        {
            /* One or more deletions at end of chain, so close it off */
            chainSrc[xlrec.nChain] = prevLive;
            chainDest[xlrec.nChain] = InvalidOffsetNumber;
            xlrec.nChain++;
        }
    }

    /* sanity check ... */
    if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
        elog(ERROR, "inconsistent counts of deletable tuples");

    /* Prepare WAL record */
    xlrec.node = index->rd_node;
    xlrec.blkno = BufferGetBlockNumber(buffer);
    STORE_STATE(&bds->spgstate, xlrec.stateSrc);

    ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
    /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
    ACCEPT_RDATA_DATA(toDead, sizeof(OffsetNumber) * xlrec.nDead, 1);
    ACCEPT_RDATA_DATA(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder, 2);
    ACCEPT_RDATA_DATA(moveSrc, sizeof(OffsetNumber) * xlrec.nMove, 3);
    ACCEPT_RDATA_DATA(moveDest, sizeof(OffsetNumber) * xlrec.nMove, 4);
    ACCEPT_RDATA_DATA(chainSrc, sizeof(OffsetNumber) * xlrec.nChain, 5);
    ACCEPT_RDATA_DATA(chainDest, sizeof(OffsetNumber) * xlrec.nChain, 6);
    ACCEPT_RDATA_BUFFER(buffer, 7);

    /* Do the updates */
    START_CRIT_SECTION();

    spgPageIndexMultiDelete(&bds->spgstate, page,
                            toDead, xlrec.nDead,
                            SPGIST_DEAD, SPGIST_DEAD,
                            InvalidBlockNumber, InvalidOffsetNumber);

    spgPageIndexMultiDelete(&bds->spgstate, page,
                            toPlaceholder, xlrec.nPlaceholder,
                            SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
                            InvalidBlockNumber, InvalidOffsetNumber);

    /*
     * We implement the move step by swapping the item pointers of the source
     * and target tuples, then replacing the newly-source tuples with
     * placeholders.  This is perhaps unduly friendly with the page data
     * representation, but it's fast and doesn't risk page overflow when a
     * tuple to be relocated is large.
     */
    for (i = 0; i < xlrec.nMove; i++)
    {
        ItemId      idSrc = PageGetItemId(page, moveSrc[i]);
        ItemId      idDest = PageGetItemId(page, moveDest[i]);
        ItemIdData  tmp;

        tmp = *idSrc;
        *idSrc = *idDest;
        *idDest = tmp;
    }

    spgPageIndexMultiDelete(&bds->spgstate, page,
                            moveSrc, xlrec.nMove,
                            SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
                            InvalidBlockNumber, InvalidOffsetNumber);

    for (i = 0; i < xlrec.nChain; i++)
    {
        SpGistLeafTuple lt;

        lt = (SpGistLeafTuple) PageGetItem(page,
                                           PageGetItemId(page, chainSrc[i]));
        Assert(lt->tupstate == SPGIST_LIVE);
        lt->nextOffset = chainDest[i];
    }

    MarkBufferDirty(buffer);

    if (RelationNeedsWAL(index))
    {
        XLogRecPtr  recptr;

        recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, rdata);

        PageSetLSN(page, recptr);
    }

    END_CRIT_SECTION();
}

static void vacuumLeafRoot ( spgBulkDeleteState bds,
Relation  index,
Buffer  buffer 
) [static]

Definition at line 406 of file spgvacuum.c.

References ACCEPT_RDATA_BUFFER, ACCEPT_RDATA_DATA, Assert, spgxlogVacuumRoot::blkno, BufferGetBlockNumber(), BufferGetPage, spgBulkDeleteState::callback, spgBulkDeleteState::callback_state, elog, END_CRIT_SECTION, ERROR, FirstOffsetNumber, SpGistLeafTupleData::heapPtr, i, ItemPointerIsValid, MarkBufferDirty(), spgxlogVacuumRoot::nDelete, spgxlogVacuumRoot::node, IndexBulkDeleteResult::num_index_tuples, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageIndexMultiDelete(), PageSetLSN, RelationData::rd_node, RelationNeedsWAL, SPGIST_LIVE, spgBulkDeleteState::spgstate, START_CRIT_SECTION, spgxlogVacuumRoot::stateSrc, spgBulkDeleteState::stats, STORE_STATE, IndexBulkDeleteResult::tuples_removed, SpGistLeafTupleData::tupstate, XLOG_SPGIST_VACUUM_ROOT, and XLogInsert().

Referenced by spgvacuumpage().

{
    Page        page = BufferGetPage(buffer);
    spgxlogVacuumRoot xlrec;
    XLogRecData rdata[3];
    OffsetNumber toDelete[MaxIndexTuplesPerPage];
    OffsetNumber i,
                max = PageGetMaxOffsetNumber(page);

    xlrec.blkno = BufferGetBlockNumber(buffer);
    xlrec.nDelete = 0;

    /* Scan page, identify tuples to delete, accumulate stats */
    for (i = FirstOffsetNumber; i <= max; i++)
    {
        SpGistLeafTuple lt;

        lt = (SpGistLeafTuple) PageGetItem(page,
                                           PageGetItemId(page, i));
        if (lt->tupstate == SPGIST_LIVE)
        {
            Assert(ItemPointerIsValid(&lt->heapPtr));

            if (bds->callback(&lt->heapPtr, bds->callback_state))
            {
                bds->stats->tuples_removed += 1;
                toDelete[xlrec.nDelete] = i;
                xlrec.nDelete++;
            }
            else
            {
                bds->stats->num_index_tuples += 1;
            }
        }
        else
        {
            /* all tuples on root should be live */
            elog(ERROR, "unexpected SPGiST tuple state: %d",
                 lt->tupstate);
        }
    }

    if (xlrec.nDelete == 0)
        return;                 /* nothing more to do */

    /* Prepare WAL record */
    xlrec.node = index->rd_node;
    STORE_STATE(&bds->spgstate, xlrec.stateSrc);

    ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
    /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
    ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * xlrec.nDelete, 1);
    ACCEPT_RDATA_BUFFER(buffer, 2);

    /* Do the update */
    START_CRIT_SECTION();

    /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
    PageIndexMultiDelete(page, toDelete, xlrec.nDelete);

    MarkBufferDirty(buffer);

    if (RelationNeedsWAL(index))
    {
        XLogRecPtr  recptr;

        recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT, rdata);

        PageSetLSN(page, recptr);
    }

    END_CRIT_SECTION();
}

static void vacuumRedirectAndPlaceholder ( Relation  index,
Buffer  buffer 
) [static]

Definition at line 490 of file spgvacuum.c.

References ACCEPT_RDATA_BUFFER, ACCEPT_RDATA_DATA, Assert, spgxlogVacuumRedirect::blkno, BufferGetBlockNumber(), BufferGetPage, END_CRIT_SECTION, FirstOffsetNumber, spgxlogVacuumRedirect::firstPlaceholder, i, InvalidOffsetNumber, ItemPointerSetInvalid, MarkBufferDirty(), spgxlogVacuumRedirect::newestRedirectXid, spgxlogVacuumRedirect::node, SpGistPageOpaqueData::nPlaceholder, SpGistPageOpaqueData::nRedirection, spgxlogVacuumRedirect::nToPlaceholder, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageIndexMultiDelete(), PageSetLSN, SpGistDeadTupleData::pointer, RelationData::rd_node, RecentGlobalXmin, RelationNeedsWAL, SPGIST_PLACEHOLDER, SPGIST_REDIRECT, SpGistPageGetOpaque, START_CRIT_SECTION, TransactionIdIsValid, TransactionIdPrecedes(), SpGistDeadTupleData::tupstate, SpGistDeadTupleData::xid, XLOG_SPGIST_VACUUM_REDIRECT, and XLogInsert().

Referenced by spgprocesspending(), and spgvacuumpage().

{
    Page        page = BufferGetPage(buffer);
    SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
    OffsetNumber i,
                max = PageGetMaxOffsetNumber(page),
                firstPlaceholder = InvalidOffsetNumber;
    bool        hasNonPlaceholder = false;
    bool        hasUpdate = false;
    OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
    OffsetNumber itemnos[MaxIndexTuplesPerPage];
    spgxlogVacuumRedirect xlrec;
    XLogRecData rdata[3];

    xlrec.node = index->rd_node;
    xlrec.blkno = BufferGetBlockNumber(buffer);
    xlrec.nToPlaceholder = 0;
    xlrec.newestRedirectXid = InvalidTransactionId;

    START_CRIT_SECTION();

    /*
     * Scan backwards to convert old redirection tuples to placeholder tuples,
     * and identify location of last non-placeholder tuple while at it.
     */
    for (i = max;
         i >= FirstOffsetNumber &&
         (opaque->nRedirection > 0 || !hasNonPlaceholder);
         i--)
    {
        SpGistDeadTuple dt;

        dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));

        if (dt->tupstate == SPGIST_REDIRECT &&
            TransactionIdPrecedes(dt->xid, RecentGlobalXmin))
        {
            dt->tupstate = SPGIST_PLACEHOLDER;
            Assert(opaque->nRedirection > 0);
            opaque->nRedirection--;
            opaque->nPlaceholder++;

            /* remember newest XID among the removed redirects */
            if (!TransactionIdIsValid(xlrec.newestRedirectXid) ||
                TransactionIdPrecedes(xlrec.newestRedirectXid, dt->xid))
                xlrec.newestRedirectXid = dt->xid;

            ItemPointerSetInvalid(&dt->pointer);

            itemToPlaceholder[xlrec.nToPlaceholder] = i;
            xlrec.nToPlaceholder++;

            hasUpdate = true;
        }

        if (dt->tupstate == SPGIST_PLACEHOLDER)
        {
            if (!hasNonPlaceholder)
                firstPlaceholder = i;
        }
        else
        {
            hasNonPlaceholder = true;
        }
    }

    /*
     * Any placeholder tuples at the end of page can safely be removed.  We
     * can't remove ones before the last non-placeholder, though, because we
     * can't alter the offset numbers of non-placeholder tuples.
     */
    if (firstPlaceholder != InvalidOffsetNumber)
    {
        /*
         * We do not store this array to rdata because it's easy to recreate.
         */
        for (i = firstPlaceholder; i <= max; i++)
            itemnos[i - firstPlaceholder] = i;

        i = max - firstPlaceholder + 1;
        Assert(opaque->nPlaceholder >= i);
        opaque->nPlaceholder -= i;

        /* The array is surely sorted, so can use PageIndexMultiDelete */
        PageIndexMultiDelete(page, itemnos, i);

        hasUpdate = true;
    }

    xlrec.firstPlaceholder = firstPlaceholder;

    if (hasUpdate)
        MarkBufferDirty(buffer);

    if (hasUpdate && RelationNeedsWAL(index))
    {
        XLogRecPtr  recptr;

        ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
        ACCEPT_RDATA_DATA(itemToPlaceholder, sizeof(OffsetNumber) * xlrec.nToPlaceholder, 1);
        ACCEPT_RDATA_BUFFER(buffer, 2);

        recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, rdata);

        PageSetLSN(page, recptr);
    }

    END_CRIT_SECTION();
}