#include "postgres.h"#include "access/genam.h"#include "access/gist_private.h"#include "access/heapam_xlog.h"#include "catalog/index.h"#include "catalog/pg_collation.h"#include "miscadmin.h"#include "storage/bufmgr.h"#include "storage/indexfsm.h"#include "utils/memutils.h"#include "utils/rel.h"
Go to the source code of this file.
| #define ROTATEDIST | ( | d | ) |
do { \ SplitedPageLayout *tmp=(SplitedPageLayout*)palloc(sizeof(SplitedPageLayout)); \ memset(tmp,0,sizeof(SplitedPageLayout)); \ tmp->block.blkno = InvalidBlockNumber; \ tmp->buffer = InvalidBuffer; \ tmp->next = (d); \ (d)=tmp; \ } while(0)
Definition at line 41 of file gist.c.
Referenced by gistSplit().
| MemoryContext createTempGistContext | ( | void | ) |
Definition at line 60 of file gist.c.
References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), and CurrentMemoryContext.
Referenced by gist_xlog_startup(), gistbeginscan(), and gistbuild().
{
return AllocSetContextCreate(CurrentMemoryContext,
"GiST temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
}
| void freeGISTstate | ( | GISTSTATE * | giststate | ) |
Definition at line 1391 of file gist.c.
References MemoryContextDelete(), and GISTSTATE::scanCxt.
Referenced by gistbuild(), gistendscan(), and gistinsert().
{
/* It's sufficient to delete the scanCxt */
MemoryContextDelete(giststate->scanCxt);
}
| Datum gistbuildempty | ( | PG_FUNCTION_ARGS | ) |
Definition at line 73 of file gist.c.
References BUFFER_LOCK_EXCLUSIVE, END_CRIT_SECTION, F_LEAF, GISTInitBuffer(), INIT_FORKNUM, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), NULL, P_NEW, PG_GETARG_POINTER, PG_RETURN_VOID, RBM_NORMAL, ReadBufferExtended(), START_CRIT_SECTION, and UnlockReleaseBuffer().
{
Relation index = (Relation) PG_GETARG_POINTER(0);
Buffer buffer;
/* Initialize the root page */
buffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* Initialize and xlog buffer */
START_CRIT_SECTION();
GISTInitBuffer(buffer, F_LEAF);
MarkBufferDirty(buffer);
log_newpage_buffer(buffer);
END_CRIT_SECTION();
/* Unlock and release the buffer */
UnlockReleaseBuffer(buffer);
PG_RETURN_VOID();
}
| void gistdoinsert | ( | Relation | r, | |
| IndexTuple | itup, | |||
| Size | freespace, | |||
| GISTSTATE * | giststate | |||
| ) |
Definition at line 505 of file gist.c.
References Assert, GISTInsertStack::blkno, GISTInsertStack::buffer, BufferGetPage, GISTInsertStack::downlinkoffnum, ereport, errdetail(), errhint(), errmsg(), ERROR, GISTInsertState::freespace, GIST_EXCLUSIVE, GIST_ROOT_BLKNO, GIST_SHARE, GIST_UNLOCK, gistcheckpage(), gistchoose(), gistfixsplit(), GistFollowRight, gistgetadjusted(), gistinserttuple(), GistPageGetNSN, GistPageIsLeaf, GistTupleIsInvalid, InvalidOffsetNumber, ItemPointerGetBlockNumber, LockBuffer(), GISTInsertStack::lsn, GISTInsertStack::page, PageGetItem, PageGetItemId, PageGetLSN, palloc0(), GISTInsertStack::parent, GISTInsertState::r, ReadBuffer(), RelationGetRelationName, RelationNeedsWAL, ReleaseBuffer(), GISTInsertState::stack, IndexTupleData::t_tid, UnlockReleaseBuffer(), and XLogRecPtrIsInvalid.
Referenced by gistBuildCallback(), and gistinsert().
{
ItemId iid;
IndexTuple idxtuple;
GISTInsertStack firststack;
GISTInsertStack *stack;
GISTInsertState state;
bool xlocked = false;
memset(&state, 0, sizeof(GISTInsertState));
state.freespace = freespace;
state.r = r;
/* Start from the root */
firststack.blkno = GIST_ROOT_BLKNO;
firststack.lsn = 0;
firststack.parent = NULL;
firststack.downlinkoffnum = InvalidOffsetNumber;
state.stack = stack = &firststack;
/*
* Walk down along the path of smallest penalty, updating the parent
* pointers with the key we're inserting as we go. If we crash in the
* middle, the tree is consistent, although the possible parent updates
* were a waste.
*/
for (;;)
{
if (XLogRecPtrIsInvalid(stack->lsn))
stack->buffer = ReadBuffer(state.r, stack->blkno);
/*
* Be optimistic and grab shared lock first. Swap it for an exclusive
* lock later if we need to update the page.
*/
if (!xlocked)
{
LockBuffer(stack->buffer, GIST_SHARE);
gistcheckpage(state.r, stack->buffer);
}
stack->page = (Page) BufferGetPage(stack->buffer);
stack->lsn = PageGetLSN(stack->page);
Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn));
/*
* If this page was split but the downlink was never inserted to the
* parent because the inserting backend crashed before doing that, fix
* that now.
*/
if (GistFollowRight(stack->page))
{
if (!xlocked)
{
LockBuffer(stack->buffer, GIST_UNLOCK);
LockBuffer(stack->buffer, GIST_EXCLUSIVE);
xlocked = true;
/* someone might've completed the split when we unlocked */
if (!GistFollowRight(stack->page))
continue;
}
gistfixsplit(&state, giststate);
UnlockReleaseBuffer(stack->buffer);
xlocked = false;
state.stack = stack = stack->parent;
continue;
}
if (stack->blkno != GIST_ROOT_BLKNO &&
stack->parent->lsn < GistPageGetNSN(stack->page))
{
/*
* Concurrent split detected. There's no guarantee that the
* downlink for this page is consistent with the tuple we're
* inserting anymore, so go back to parent and rechoose the best
* child.
*/
UnlockReleaseBuffer(stack->buffer);
xlocked = false;
state.stack = stack = stack->parent;
continue;
}
if (!GistPageIsLeaf(stack->page))
{
/*
* This is an internal page so continue to walk down the tree.
* Find the child node that has the minimum insertion penalty.
*/
BlockNumber childblkno;
IndexTuple newtup;
GISTInsertStack *item;
OffsetNumber downlinkoffnum;
downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
iid = PageGetItemId(stack->page, downlinkoffnum);
idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
/*
* Check that it's not a leftover invalid tuple from pre-9.1
*/
if (GistTupleIsInvalid(idxtuple))
ereport(ERROR,
(errmsg("index \"%s\" contains an inner tuple marked as invalid",
RelationGetRelationName(r)),
errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
errhint("Please REINDEX it.")));
/*
* Check that the key representing the target child node is
* consistent with the key we're inserting. Update it if it's not.
*/
newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
if (newtup)
{
/*
* Swap shared lock for an exclusive one. Beware, the page may
* change while we unlock/lock the page...
*/
if (!xlocked)
{
LockBuffer(stack->buffer, GIST_UNLOCK);
LockBuffer(stack->buffer, GIST_EXCLUSIVE);
xlocked = true;
stack->page = (Page) BufferGetPage(stack->buffer);
if (PageGetLSN(stack->page) != stack->lsn)
{
/* the page was changed while we unlocked it, retry */
continue;
}
}
/*
* Update the tuple.
*
* We still hold the lock after gistinserttuple(), but it
* might have to split the page to make the updated tuple fit.
* In that case the updated tuple might migrate to the other
* half of the split, so we have to go back to the parent and
* descend back to the half that's a better fit for the new
* tuple.
*/
if (gistinserttuple(&state, stack, giststate, newtup,
downlinkoffnum))
{
/*
* If this was a root split, the root page continues to be
* the parent and the updated tuple went to one of the
* child pages, so we just need to retry from the root
* page.
*/
if (stack->blkno != GIST_ROOT_BLKNO)
{
UnlockReleaseBuffer(stack->buffer);
xlocked = false;
state.stack = stack = stack->parent;
}
continue;
}
}
LockBuffer(stack->buffer, GIST_UNLOCK);
xlocked = false;
/* descend to the chosen child */
item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
item->blkno = childblkno;
item->parent = stack;
item->downlinkoffnum = downlinkoffnum;
state.stack = stack = item;
}
else
{
/*
* Leaf page. Insert the new key. We've already updated all the
* parents on the way down, but we might have to split the page if
* it doesn't fit. gistinserthere() will take care of that.
*/
/*
* Swap shared lock for an exclusive one. Be careful, the page may
* change while we unlock/lock the page...
*/
if (!xlocked)
{
LockBuffer(stack->buffer, GIST_UNLOCK);
LockBuffer(stack->buffer, GIST_EXCLUSIVE);
xlocked = true;
stack->page = (Page) BufferGetPage(stack->buffer);
stack->lsn = PageGetLSN(stack->page);
if (stack->blkno == GIST_ROOT_BLKNO)
{
/*
* the only page that can become inner instead of leaf is
* the root page, so for root we should recheck it
*/
if (!GistPageIsLeaf(stack->page))
{
/*
* very rare situation: during unlock/lock index with
* number of pages = 1 was increased
*/
LockBuffer(stack->buffer, GIST_UNLOCK);
xlocked = false;
continue;
}
/*
* we don't need to check root split, because checking
* leaf/inner is enough to recognize split for root
*/
}
else if (GistFollowRight(stack->page) ||
stack->parent->lsn < GistPageGetNSN(stack->page))
{
/*
* The page was split while we momentarily unlocked the
* page. Go back to parent.
*/
UnlockReleaseBuffer(stack->buffer);
xlocked = false;
state.stack = stack = stack->parent;
continue;
}
}
/* now state.stack->(page, buffer and blkno) points to leaf page */
gistinserttuple(&state, stack, giststate, itup,
InvalidOffsetNumber);
LockBuffer(stack->buffer, GIST_UNLOCK);
/* Release any pins we might still hold before exiting */
for (; stack; stack = stack->parent)
ReleaseBuffer(stack->buffer);
break;
}
}
}
| static void gistFindCorrectParent | ( | Relation | r, | |
| GISTInsertStack * | child | |||
| ) | [static] |
Definition at line 868 of file gist.c.
References GISTInsertStack::blkno, GISTInsertStack::buffer, BufferGetPage, GISTInsertStack::downlinkoffnum, FirstOffsetNumber, GIST_EXCLUSIVE, gistcheckpage(), gistFindPath(), GistPageGetOpaque, i, InvalidBlockNumber, InvalidOffsetNumber, ItemPointerGetBlockNumber, LockBuffer(), GISTInsertStack::lsn, OffsetNumberNext, GISTInsertStack::page, PageGetItem, PageGetItemId, PageGetLSN, PageGetMaxOffsetNumber, GISTInsertStack::parent, ReadBuffer(), ReleaseBuffer(), IndexTupleData::t_tid, and UnlockReleaseBuffer().
Referenced by gistfinishsplit(), and gistformdownlink().
{
GISTInsertStack *parent = child->parent;
gistcheckpage(r, parent->buffer);
parent->page = (Page) BufferGetPage(parent->buffer);
/* here we don't need to distinguish between split and page update */
if (child->downlinkoffnum == InvalidOffsetNumber ||
parent->lsn != PageGetLSN(parent->page))
{
/* parent is changed, look child in right links until found */
OffsetNumber i,
maxoff;
ItemId iid;
IndexTuple idxtuple;
GISTInsertStack *ptr;
while (true)
{
maxoff = PageGetMaxOffsetNumber(parent->page);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
iid = PageGetItemId(parent->page, i);
idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
{
/* yes!!, found */
child->downlinkoffnum = i;
return;
}
}
parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
UnlockReleaseBuffer(parent->buffer);
if (parent->blkno == InvalidBlockNumber)
{
/*
* End of chain and still didn't find parent. It's a very-very
* rare situation when root splited.
*/
break;
}
parent->buffer = ReadBuffer(r, parent->blkno);
LockBuffer(parent->buffer, GIST_EXCLUSIVE);
gistcheckpage(r, parent->buffer);
parent->page = (Page) BufferGetPage(parent->buffer);
}
/*
* awful!!, we need search tree to find parent ... , but before we
* should release all old parent
*/
ptr = child->parent->parent; /* child->parent already released
* above */
while (ptr)
{
ReleaseBuffer(ptr->buffer);
ptr = ptr->parent;
}
/* ok, find new path */
ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
/* read all buffers as expected by caller */
/* note we don't lock them or gistcheckpage them here! */
while (ptr)
{
ptr->buffer = ReadBuffer(r, ptr->blkno);
ptr->page = (Page) BufferGetPage(ptr->buffer);
ptr = ptr->parent;
}
/* install new chain of parents to stack */
child->parent = parent;
/* make recursive call to normal processing */
LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
gistFindCorrectParent(r, child);
}
return;
}
| static GISTInsertStack* gistFindPath | ( | Relation | r, | |
| BlockNumber | child, | |||
| OffsetNumber * | downlinkoffnum | |||
| ) | [static] |
Definition at line 758 of file gist.c.
References GISTInsertStack::blkno, BufferGetPage, GISTInsertStack::downlinkoffnum, elog, ERROR, FirstOffsetNumber, GIST_SHARE, gistcheckpage(), GistFollowRight, GistPageGetNSN, GistPageGetOpaque, GistPageIsLeaf, i, InvalidBlockNumber, ItemPointerGetBlockNumber, lappend(), lcons(), linitial, list_delete_first(), list_make1, LockBuffer(), GISTInsertStack::lsn, NIL, OffsetNumberNext, PageGetItem, PageGetItemId, PageGetLSN, PageGetMaxOffsetNumber, palloc0(), GISTInsertStack::parent, ReadBuffer(), RelationGetRelationName, IndexTupleData::t_tid, and UnlockReleaseBuffer().
Referenced by gistFindCorrectParent().
{
Page page;
Buffer buffer;
OffsetNumber i,
maxoff;
ItemId iid;
IndexTuple idxtuple;
List *fifo;
GISTInsertStack *top,
*ptr;
BlockNumber blkno;
top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
top->blkno = GIST_ROOT_BLKNO;
top->downlinkoffnum = InvalidOffsetNumber;
fifo = list_make1(top);
while (fifo != NIL)
{
/* Get next page to visit */
top = linitial(fifo);
fifo = list_delete_first(fifo);
buffer = ReadBuffer(r, top->blkno);
LockBuffer(buffer, GIST_SHARE);
gistcheckpage(r, buffer);
page = (Page) BufferGetPage(buffer);
if (GistPageIsLeaf(page))
{
/*
* Because we scan the index top-down, all the rest of the pages
* in the queue must be leaf pages as well.
*/
UnlockReleaseBuffer(buffer);
break;
}
top->lsn = PageGetLSN(page);
/*
* If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
* downlink. This should not normally happen..
*/
if (GistFollowRight(page))
elog(ERROR, "concurrent GiST page split was incomplete");
if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
{
/*
* Page was split while we looked elsewhere. We didn't see the
* downlink to the right page when we scanned the parent, so add
* it to the queue now.
*
* Put the right page ahead of the queue, so that we visit it
* next. That's important, because if this is the lowest internal
* level, just above leaves, we might already have queued up some
* leaf pages, and we assume that there can't be any non-leaf
* pages behind leaf pages.
*/
ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
ptr->blkno = GistPageGetOpaque(page)->rightlink;
ptr->downlinkoffnum = InvalidOffsetNumber;
ptr->parent = top->parent;
fifo = lcons(ptr, fifo);
}
maxoff = PageGetMaxOffsetNumber(page);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
iid = PageGetItemId(page, i);
idxtuple = (IndexTuple) PageGetItem(page, iid);
blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
if (blkno == child)
{
/* Found it! */
UnlockReleaseBuffer(buffer);
*downlinkoffnum = i;
return top;
}
else
{
/* Append this child to the list of pages to visit later */
ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
ptr->blkno = blkno;
ptr->downlinkoffnum = i;
ptr->parent = top;
fifo = lappend(fifo, ptr);
}
}
UnlockReleaseBuffer(buffer);
}
elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
RelationGetRelationName(r), child);
return NULL; /* keep compiler quiet */
}
| static void gistfinishsplit | ( | GISTInsertState * | state, | |
| GISTInsertStack * | stack, | |||
| GISTSTATE * | giststate, | |||
| List * | splitinfo, | |||
| bool | releasebuf | |||
| ) | [static] |
Definition at line 1162 of file gist.c.
References Assert, GISTPageSplitInfo::buf, GISTInsertStack::buffer, GISTPageSplitInfo::downlink, GISTInsertStack::downlinkoffnum, GIST_EXCLUSIVE, gistFindCorrectParent(), gistinserttuples(), InvalidOffsetNumber, lcons(), lfirst, linitial, list_delete_first(), list_length(), LockBuffer(), lsecond, GISTInsertStack::parent, and GISTInsertState::r.
Referenced by gistfixsplit(), and gistinserttuples().
{
ListCell *lc;
List *reversed;
GISTPageSplitInfo *right;
GISTPageSplitInfo *left;
IndexTuple tuples[2];
/* A split always contains at least two halves */
Assert(list_length(splitinfo) >= 2);
/*
* We need to insert downlinks for each new page, and update the downlink
* for the original (leftmost) page in the split. Begin at the rightmost
* page, inserting one downlink at a time until there's only two pages
* left. Finally insert the downlink for the last new page and update the
* downlink for the original page as one operation.
*/
/* for convenience, create a copy of the list in reverse order */
reversed = NIL;
foreach(lc, splitinfo)
{
reversed = lcons(lfirst(lc), reversed);
}
LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
gistFindCorrectParent(state->r, stack);
/*
* insert downlinks for the siblings from right to left, until there are
* only two siblings left.
*/
while (list_length(reversed) > 2)
{
right = (GISTPageSplitInfo *) linitial(reversed);
left = (GISTPageSplitInfo *) lsecond(reversed);
if (gistinserttuples(state, stack->parent, giststate,
&right->downlink, 1,
InvalidOffsetNumber,
left->buf, right->buf, false, false))
{
/*
* If the parent page was split, need to relocate the original
* parent pointer.
*/
gistFindCorrectParent(state->r, stack);
}
/* gistinserttuples() released the lock on right->buf. */
reversed = list_delete_first(reversed);
}
right = (GISTPageSplitInfo *) linitial(reversed);
left = (GISTPageSplitInfo *) lsecond(reversed);
/*
* Finally insert downlink for the remaining right page and update the
* downlink for the original page to not contain the tuples that were
* moved to the new pages.
*/
tuples[0] = left->downlink;
tuples[1] = right->downlink;
gistinserttuples(state, stack->parent, giststate,
tuples, 2,
stack->downlinkoffnum,
left->buf, right->buf,
true, /* Unlock parent */
unlockbuf /* Unlock stack->buffer if caller wants that */
);
Assert(left->buf == stack->buffer);
}
| static void gistfixsplit | ( | GISTInsertState * | state, | |
| GISTSTATE * | giststate | |||
| ) | [static] |
Definition at line 1017 of file gist.c.
References Assert, GISTInsertStack::blkno, GISTPageSplitInfo::buf, buf, GISTInsertStack::buffer, BufferGetPage, GISTPageSplitInfo::downlink, GISTInsertStack::downlinkoffnum, elog, GIST_EXCLUSIVE, gistfinishsplit(), GistFollowRight, gistformdownlink(), GistPageGetOpaque, lappend(), LockBuffer(), LOG, OffsetNumberIsValid, GISTInsertStack::page, palloc(), GISTInsertState::r, ReadBuffer(), RelationGetRelationName, and GISTInsertState::stack.
Referenced by gistdoinsert().
{
GISTInsertStack *stack = state->stack;
Buffer buf;
Page page;
List *splitinfo = NIL;
elog(LOG, "fixing incomplete split in index \"%s\", block %u",
RelationGetRelationName(state->r), stack->blkno);
Assert(GistFollowRight(stack->page));
Assert(OffsetNumberIsValid(stack->downlinkoffnum));
buf = stack->buffer;
/*
* Read the chain of split pages, following the rightlinks. Construct a
* downlink tuple for each page.
*/
for (;;)
{
GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
IndexTuple downlink;
page = BufferGetPage(buf);
/* Form the new downlink tuples to insert to parent */
downlink = gistformdownlink(state->r, buf, giststate, stack);
si->buf = buf;
si->downlink = downlink;
splitinfo = lappend(splitinfo, si);
if (GistFollowRight(page))
{
/* lock next page */
buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
LockBuffer(buf, GIST_EXCLUSIVE);
}
else
break;
}
/* Insert the downlinks */
gistfinishsplit(state, stack, giststate, splitinfo, false);
}
| static IndexTuple gistformdownlink | ( | Relation | rel, | |
| Buffer | buf, | |||
| GISTSTATE * | giststate, | |||
| GISTInsertStack * | stack | |||
| ) | [static] |
Definition at line 957 of file gist.c.
References GISTInsertStack::buffer, BufferGetBlockNumber(), BufferGetPage, CopyIndexTuple(), GISTInsertStack::downlinkoffnum, FirstOffsetNumber, GIST_EXCLUSIVE, GIST_UNLOCK, gistFindCorrectParent(), gistgetadjusted(), GistTupleSetValid, ItemPointerSetBlockNumber, LockBuffer(), NULL, OffsetNumberNext, GISTInsertStack::page, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, GISTInsertStack::parent, and IndexTupleData::t_tid.
Referenced by gistfixsplit().
{
Page page = BufferGetPage(buf);
OffsetNumber maxoff;
OffsetNumber offset;
IndexTuple downlink = NULL;
maxoff = PageGetMaxOffsetNumber(page);
for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
{
IndexTuple ituple = (IndexTuple)
PageGetItem(page, PageGetItemId(page, offset));
if (downlink == NULL)
downlink = CopyIndexTuple(ituple);
else
{
IndexTuple newdownlink;
newdownlink = gistgetadjusted(rel, downlink, ituple,
giststate);
if (newdownlink)
downlink = newdownlink;
}
}
/*
* If the page is completely empty, we can't form a meaningful downlink
* for it. But we have to insert a downlink for the page. Any key will do,
* as long as its consistent with the downlink of parent page, so that we
* can legally insert it to the parent. A minimal one that matches as few
* scans as possible would be best, to keep scans from doing useless work,
* but we don't know how to construct that. So we just use the downlink of
* the original page that was split - that's as far from optimal as it can
* get but will do..
*/
if (!downlink)
{
ItemId iid;
LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
gistFindCorrectParent(rel, stack);
iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
downlink = CopyIndexTuple(downlink);
LockBuffer(stack->parent->buffer, GIST_UNLOCK);
}
ItemPointerSetBlockNumber(&(downlink->t_tid), BufferGetBlockNumber(buf));
GistTupleSetValid(downlink);
return downlink;
}
| Datum gistinsert | ( | PG_FUNCTION_ARGS | ) |
Definition at line 102 of file gist.c.
References freeGISTstate(), gistdoinsert(), gistFormTuple(), initGISTstate(), MemoryContextSwitchTo(), PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_BOOL, IndexTupleData::t_tid, GISTSTATE::tempCxt, and values.
{
Relation r = (Relation) PG_GETARG_POINTER(0);
Datum *values = (Datum *) PG_GETARG_POINTER(1);
bool *isnull = (bool *) PG_GETARG_POINTER(2);
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
#ifdef NOT_USED
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5);
#endif
IndexTuple itup;
GISTSTATE *giststate;
MemoryContext oldCxt;
giststate = initGISTstate(r);
/*
* We use the giststate's scan context as temp context too. This means
* that any memory leaked by the support functions is not reclaimed until
* end of insert. In most cases, we aren't going to call the support
* functions very many times before finishing the insert, so this seems
* cheaper than resetting a temp context for each function call.
*/
oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
itup = gistFormTuple(giststate, r,
values, isnull, true /* size is currently bogus */ );
itup->t_tid = *ht_ctid;
gistdoinsert(r, itup, 0, giststate);
/* cleanup */
MemoryContextSwitchTo(oldCxt);
freeGISTstate(giststate);
PG_RETURN_BOOL(false);
}
| static bool gistinserttuple | ( | GISTInsertState * | state, | |
| GISTInsertStack * | stack, | |||
| GISTSTATE * | giststate, | |||
| IndexTuple | tuple, | |||
| OffsetNumber | oldoffnum | |||
| ) | [static] |
Definition at line 1076 of file gist.c.
References gistinserttuples(), and InvalidBuffer.
Referenced by gistdoinsert().
{
return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
InvalidBuffer, InvalidBuffer, false, false);
}
| static bool gistinserttuples | ( | GISTInsertState * | state, | |
| GISTInsertStack * | stack, | |||
| GISTSTATE * | giststate, | |||
| IndexTuple * | tuples, | |||
| int | ntup, | |||
| OffsetNumber | oldoffnum, | |||
| Buffer | leftchild, | |||
| Buffer | rightchild, | |||
| bool | unlockbuf, | |||
| bool | unlockleftchild | |||
| ) | [static] |
Definition at line 1110 of file gist.c.
References GISTInsertStack::buffer, BufferIsValid, GISTInsertState::freespace, GIST_UNLOCK, gistfinishsplit(), gistplacetopage(), LockBuffer(), NULL, GISTInsertState::r, and UnlockReleaseBuffer().
Referenced by gistfinishsplit(), and gistinserttuple().
{
List *splitinfo;
bool is_split;
/* Insert the tuple(s) to the page, splitting the page if necessary */
is_split = gistplacetopage(state->r, state->freespace, giststate,
stack->buffer,
tuples, ntup,
oldoffnum, NULL,
leftchild,
&splitinfo,
true);
/*
* Before recursing up in case the page was split, release locks on the
* child pages. We don't need to keep them locked when updating the
* parent.
*/
if (BufferIsValid(rightchild))
UnlockReleaseBuffer(rightchild);
if (BufferIsValid(leftchild) && unlockleftchild)
LockBuffer(leftchild, GIST_UNLOCK);
/*
* If we had to split, insert/update the downlinks in the parent. If the
* caller requested us to release the lock on stack->buffer, tell
* gistfinishsplit() to do that as soon as it's safe to do so. If we
* didn't have to split, release it ourselves.
*/
if (splitinfo)
gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
else if (unlockbuf)
LockBuffer(stack->buffer, GIST_UNLOCK);
return is_split;
}
| bool gistplacetopage | ( | Relation | rel, | |
| Size | freespace, | |||
| GISTSTATE * | giststate, | |||
| Buffer | buffer, | |||
| IndexTuple * | itup, | |||
| int | ntup, | |||
| OffsetNumber | oldoffnum, | |||
| BlockNumber * | newblkno, | |||
| Buffer | leftchildbuf, | |||
| List ** | splitinfo, | |||
| bool | markfollowright | |||
| ) |
Definition at line 172 of file gist.c.
References gistxlogPage::blkno, SplitedPageLayout::block, GISTPageSplitInfo::buf, SplitedPageLayout::buffer, BufferGetBlockNumber(), BufferGetPage, BufferIsValid, GISTPageSplitInfo::downlink, elog, END_CRIT_SECTION, ERROR, F_LEAF, GIST_ROOT_BLKNO, GistClearFollowRight, gistextractpage(), gistfillbuffer(), gistfillitupvec(), GistFollowRight, gistGetFakeLSN(), GISTInitBuffer(), gistjoinvector(), GistMarkFollowRight, gistNewBuffer(), gistnospace(), GistPageGetNSN, GistPageGetOpaque, GistPageIsLeaf, GistPageSetNSN, gistSplit(), GistTupleSetValid, gistXLogSplit(), gistXLogUpdate(), i, IndexTupleSize, InvalidOffsetNumber, ItemPointerEquals(), ItemPointerSetBlockNumber, SplitedPageLayout::itup, lappend(), SplitedPageLayout::lenlist, SplitedPageLayout::list, MarkBufferDirty(), memmove, SplitedPageLayout::next, gistxlogPage::num, OffsetNumberIsValid, SplitedPageLayout::page, PageAddItem(), PageGetTempPageCopySpecial(), PageIndexTupleDelete(), PageRestoreTempPage(), PageSetLSN, palloc(), RelationData::rd_node, RelationGetRelationName, RelationNeedsWAL, START_CRIT_SECTION, IndexTupleData::t_tid, and UnlockReleaseBuffer().
Referenced by gistbufferinginserttuples(), and gistinserttuples().
{
BlockNumber blkno = BufferGetBlockNumber(buffer);
Page page = BufferGetPage(buffer);
bool is_leaf = (GistPageIsLeaf(page)) ? true : false;
XLogRecPtr recptr;
int i;
bool is_split;
/*
* Refuse to modify a page that's incompletely split. This should not
* happen because we finish any incomplete splits while we walk down the
* tree. However, it's remotely possible that another concurrent inserter
* splits a parent page, and errors out before completing the split. We
* will just throw an error in that case, and leave any split we had in
* progress unfinished too. The next insert that comes along will clean up
* the mess.
*/
if (GistFollowRight(page))
elog(ERROR, "concurrent GiST page split was incomplete");
*splitinfo = NIL;
/*
* if isupdate, remove old key: This node's key has been modified, either
* because a child split occurred or because we needed to adjust our key
* for an insert in a child node. Therefore, remove the old version of
* this node's key.
*
* for WAL replay, in the non-split case we handle this by setting up a
* one-element todelete array; in the split case, it's handled implicitly
* because the tuple vector passed to gistSplit won't include this tuple.
*/
is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
if (is_split)
{
/* no space for insertion */
IndexTuple *itvec;
int tlen;
SplitedPageLayout *dist = NULL,
*ptr;
BlockNumber oldrlink = InvalidBlockNumber;
GistNSN oldnsn = 0;
SplitedPageLayout rootpg;
bool is_rootsplit;
is_rootsplit = (blkno == GIST_ROOT_BLKNO);
/*
* Form index tuples vector to split. If we're replacing an old tuple,
* remove the old version from the vector.
*/
itvec = gistextractpage(page, &tlen);
if (OffsetNumberIsValid(oldoffnum))
{
/* on inner page we should remove old tuple */
int pos = oldoffnum - FirstOffsetNumber;
tlen--;
if (pos != tlen)
memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
}
itvec = gistjoinvector(itvec, &tlen, itup, ntup);
dist = gistSplit(rel, page, itvec, tlen, giststate);
/*
* Set up pages to work with. Allocate new buffers for all but the
* leftmost page. The original page becomes the new leftmost page, and
* is just replaced with the new contents.
*
* For a root-split, allocate new buffers for all child pages, the
* original page is overwritten with new root page containing
* downlinks to the new child pages.
*/
ptr = dist;
if (!is_rootsplit)
{
/* save old rightlink and NSN */
oldrlink = GistPageGetOpaque(page)->rightlink;
oldnsn = GistPageGetNSN(page);
dist->buffer = buffer;
dist->block.blkno = BufferGetBlockNumber(buffer);
dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
/* clean all flags except F_LEAF */
GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
ptr = ptr->next;
}
for (; ptr; ptr = ptr->next)
{
/* Allocate new page */
ptr->buffer = gistNewBuffer(rel);
GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
ptr->page = BufferGetPage(ptr->buffer);
ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
}
/*
* Now that we know which blocks the new pages go to, set up downlink
* tuples to point to them.
*/
for (ptr = dist; ptr; ptr = ptr->next)
{
ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
GistTupleSetValid(ptr->itup);
}
/*
* If this is a root split, we construct the new root page with the
* downlinks here directly, instead of requiring the caller to insert
* them. Add the new root page to the list along with the child pages.
*/
if (is_rootsplit)
{
IndexTuple *downlinks;
int ndownlinks = 0;
int i;
rootpg.buffer = buffer;
rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));
GistPageGetOpaque(rootpg.page)->flags = 0;
/* Prepare a vector of all the downlinks */
for (ptr = dist; ptr; ptr = ptr->next)
ndownlinks++;
downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
for (i = 0, ptr = dist; ptr; ptr = ptr->next)
downlinks[i++] = ptr->itup;
rootpg.block.blkno = GIST_ROOT_BLKNO;
rootpg.block.num = ndownlinks;
rootpg.list = gistfillitupvec(downlinks, ndownlinks,
&(rootpg.lenlist));
rootpg.itup = NULL;
rootpg.next = dist;
dist = &rootpg;
}
else
{
/* Prepare split-info to be returned to caller */
for (ptr = dist; ptr; ptr = ptr->next)
{
GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
si->buf = ptr->buffer;
si->downlink = ptr->itup;
*splitinfo = lappend(*splitinfo, si);
}
}
/*
* Fill all pages. All the pages are new, ie. freshly allocated empty
* pages, or a temporary copy of the old page.
*/
for (ptr = dist; ptr; ptr = ptr->next)
{
char *data = (char *) (ptr->list);
for (i = 0; i < ptr->block.num; i++)
{
IndexTuple thistup = (IndexTuple) data;
if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
/*
* If this is the first inserted/updated tuple, let the caller
* know which page it landed on.
*/
if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
*newblkno = ptr->block.blkno;
data += IndexTupleSize(thistup);
}
/* Set up rightlinks */
if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
GistPageGetOpaque(ptr->page)->rightlink =
ptr->next->block.blkno;
else
GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
/*
* Mark the all but the right-most page with the follow-right
* flag. It will be cleared as soon as the downlink is inserted
* into the parent, but this ensures that if we error out before
* that, the index is still consistent. (in buffering build mode,
* any error will abort the index build anyway, so this is not
* needed.)
*/
if (ptr->next && !is_rootsplit && markfollowright)
GistMarkFollowRight(ptr->page);
else
GistClearFollowRight(ptr->page);
/*
* Copy the NSN of the original page to all pages. The
* F_FOLLOW_RIGHT flags ensure that scans will follow the
* rightlinks until the downlinks are inserted.
*/
GistPageSetNSN(ptr->page, oldnsn);
}
START_CRIT_SECTION();
/*
* Must mark buffers dirty before XLogInsert, even though we'll still
* be changing their opaque fields below.
*/
for (ptr = dist; ptr; ptr = ptr->next)
MarkBufferDirty(ptr->buffer);
if (BufferIsValid(leftchildbuf))
MarkBufferDirty(leftchildbuf);
/*
* The first page in the chain was a temporary working copy meant to
* replace the old page. Copy it over the old page.
*/
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
dist->page = BufferGetPage(dist->buffer);
/* Write the WAL record */
if (RelationNeedsWAL(rel))
recptr = gistXLogSplit(rel->rd_node, blkno, is_leaf,
dist, oldrlink, oldnsn, leftchildbuf,
markfollowright);
else
recptr = gistGetFakeLSN(rel);
for (ptr = dist; ptr; ptr = ptr->next)
{
PageSetLSN(ptr->page, recptr);
}
/*
* Return the new child buffers to the caller.
*
* If this was a root split, we've already inserted the downlink
* pointers, in the form of a new root page. Therefore we can release
* all the new buffers, and keep just the root page locked.
*/
if (is_rootsplit)
{
for (ptr = dist->next; ptr; ptr = ptr->next)
UnlockReleaseBuffer(ptr->buffer);
}
}
else
{
/*
* Enough space. We also get here if ntuples==0.
*/
START_CRIT_SECTION();
if (OffsetNumberIsValid(oldoffnum))
PageIndexTupleDelete(page, oldoffnum);
gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
MarkBufferDirty(buffer);
if (BufferIsValid(leftchildbuf))
MarkBufferDirty(leftchildbuf);
if (RelationNeedsWAL(rel))
{
OffsetNumber ndeloffs = 0,
deloffs[1];
if (OffsetNumberIsValid(oldoffnum))
{
deloffs[0] = oldoffnum;
ndeloffs = 1;
}
recptr = gistXLogUpdate(rel->rd_node, buffer,
deloffs, ndeloffs, itup, ntup,
leftchildbuf);
PageSetLSN(page, recptr);
}
else
{
recptr = gistGetFakeLSN(rel);
PageSetLSN(page, recptr);
}
if (newblkno)
*newblkno = blkno;
}
/*
* If we inserted the downlink for a child page, set NSN and clear
* F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
* follow the rightlink if and only if they looked at the parent page
* before we inserted the downlink.
*
* Note that we do this *after* writing the WAL record. That means that
* the possible full page image in the WAL record does not include these
* changes, and they must be replayed even if the page is restored from
* the full page image. There's a chicken-and-egg problem: if we updated
* the child pages first, we wouldn't know the recptr of the WAL record
* we're about to write.
*/
if (BufferIsValid(leftchildbuf))
{
Page leftpg = BufferGetPage(leftchildbuf);
GistPageSetNSN(leftpg, recptr);
GistClearFollowRight(leftpg);
PageSetLSN(leftpg, recptr);
}
END_CRIT_SECTION();
return is_split;
}
| SplitedPageLayout* gistSplit | ( | Relation | r, | |
| Page | page, | |||
| IndexTuple * | itup, | |||
| int | len, | |||
| GISTSTATE * | giststate | |||
| ) |
Definition at line 1242 of file gist.c.
References SplitedPageLayout::block, gistfillitupvec(), gistfitpage(), gistFormTuple(), gistSplit(), gistSplitByKey(), i, SplitedPageLayout::itup, SplitedPageLayout::lenlist, SplitedPageLayout::list, tupleDesc::natts, SplitedPageLayout::next, gistxlogPage::num, palloc(), ROTATEDIST, GistSplitVector::spl_lattr, GIST_SPLITVEC::spl_left, GistSplitVector::spl_lisnull, GIST_SPLITVEC::spl_nleft, GIST_SPLITVEC::spl_nright, GistSplitVector::spl_rattr, GIST_SPLITVEC::spl_right, GistSplitVector::spl_risnull, GistSplitVector::splitVector, TRUE, and GISTSTATE::tupdesc.
Referenced by gistplacetopage(), and gistSplit().
{
IndexTuple *lvectup,
*rvectup;
GistSplitVector v;
int i;
SplitedPageLayout *res = NULL;
memset(v.spl_lisnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
memset(v.spl_risnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
gistSplitByKey(r, page, itup, len, giststate, &v, 0);
/* form left and right vector */
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
for (i = 0; i < v.splitVector.spl_nleft; i++)
lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
for (i = 0; i < v.splitVector.spl_nright; i++)
rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
/* finalize splitting (may need another split) */
if (!gistfitpage(rvectup, v.splitVector.spl_nright))
{
res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
}
else
{
ROTATEDIST(res);
res->block.num = v.splitVector.spl_nright;
res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
}
if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
{
SplitedPageLayout *resptr,
*subres;
resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
/* install on list's tail */
while (resptr->next)
resptr = resptr->next;
resptr->next = res;
res = subres;
}
else
{
ROTATEDIST(res);
res->block.num = v.splitVector.spl_nleft;
res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
}
return res;
}
Definition at line 1310 of file gist.c.
References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), GISTSTATE::compressFn, GISTSTATE::consistentFn, CurrentMemoryContext, GISTSTATE::decompressFn, GISTSTATE::distanceFn, elog, GISTSTATE::equalFn, ERROR, fmgr_info_copy(), FmgrInfo::fn_oid, GIST_COMPRESS_PROC, GIST_CONSISTENT_PROC, GIST_DECOMPRESS_PROC, GIST_DISTANCE_PROC, GIST_EQUAL_PROC, GIST_PENALTY_PROC, GIST_PICKSPLIT_PROC, GIST_UNION_PROC, i, index_getprocid(), index_getprocinfo(), INDEX_MAX_KEYS, MemoryContextSwitchTo(), tupleDesc::natts, OidIsValid, palloc(), GISTSTATE::penaltyFn, GISTSTATE::picksplitFn, RelationData::rd_att, RelationData::rd_indcollation, GISTSTATE::scanCxt, GISTSTATE::supportCollation, GISTSTATE::tempCxt, GISTSTATE::tupdesc, and GISTSTATE::unionFn.
Referenced by gistbeginscan(), gistbuild(), and gistinsert().
{
GISTSTATE *giststate;
MemoryContext scanCxt;
MemoryContext oldCxt;
int i;
/* safety check to protect fixed-size arrays in GISTSTATE */
if (index->rd_att->natts > INDEX_MAX_KEYS)
elog(ERROR, "numberOfAttributes %d > %d",
index->rd_att->natts, INDEX_MAX_KEYS);
/* Create the memory context that will hold the GISTSTATE */
scanCxt = AllocSetContextCreate(CurrentMemoryContext,
"GiST scan context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldCxt = MemoryContextSwitchTo(scanCxt);
/* Create and fill in the GISTSTATE */
giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
giststate->scanCxt = scanCxt;
giststate->tempCxt = scanCxt; /* caller must change this if needed */
giststate->tupdesc = index->rd_att;
for (i = 0; i < index->rd_att->natts; i++)
{
fmgr_info_copy(&(giststate->consistentFn[i]),
index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
scanCxt);
fmgr_info_copy(&(giststate->unionFn[i]),
index_getprocinfo(index, i + 1, GIST_UNION_PROC),
scanCxt);
fmgr_info_copy(&(giststate->compressFn[i]),
index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
scanCxt);
fmgr_info_copy(&(giststate->decompressFn[i]),
index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
scanCxt);
fmgr_info_copy(&(giststate->penaltyFn[i]),
index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
scanCxt);
fmgr_info_copy(&(giststate->picksplitFn[i]),
index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
scanCxt);
fmgr_info_copy(&(giststate->equalFn[i]),
index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
scanCxt);
/* opclasses are not required to provide a Distance method */
if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
fmgr_info_copy(&(giststate->distanceFn[i]),
index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
scanCxt);
else
giststate->distanceFn[i].fn_oid = InvalidOid;
/*
* If the index column has a specified collation, we should honor that
* while doing comparisons. However, we may have a collatable storage
* type for a noncollatable indexed data type. If there's no index
* collation then specify default collation in case the support
* functions need collation. This is harmless if the support
* functions don't care about collation, so we just do it
* unconditionally. (We could alternatively call get_typcollation,
* but that seems like expensive overkill --- there aren't going to be
* any cases where a GiST storage type has a nondefault collation.)
*/
if (OidIsValid(index->rd_indcollation[i]))
giststate->supportCollation[i] = index->rd_indcollation[i];
else
giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
}
MemoryContextSwitchTo(oldCxt);
return giststate;
}
1.7.1