#include "access/gist.h"
#include "access/itup.h"
#include "fmgr.h"
#include "storage/bufmgr.h"
#include "storage/buffile.h"
#include "utils/rbtree.h"
#include "utils/hsearch.h"
Go to the source code of this file.
#define BUFFER_HALF_FILLED | ( | nodeBuffer, | ||
gfbb | ||||
) | ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer / 2) |
Definition at line 339 of file gist_private.h.
Referenced by gistPushItupToNodeBuffer().
#define BUFFER_OVERFLOWED | ( | nodeBuffer, | ||
gfbb | ||||
) | ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer) |
Definition at line 347 of file gist_private.h.
Referenced by gistProcessItup().
#define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(GISTNodeBufferPage, tupledata)) |
Definition at line 37 of file gist_private.h.
Referenced by gistGetItupFromPage(), and gistPlaceItupToPage().
#define GIST_DEFAULT_FILLFACTOR 90 |
Definition at line 464 of file gist_private.h.
#define GIST_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE |
Definition at line 27 of file gist_private.h.
Referenced by gistBufferingFindCorrectParent(), gistbulkdelete(), gistdoinsert(), gistFindCorrectParent(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistNewBuffer(), and gistProcessItup().
#define GIST_MIN_FILLFACTOR 10 |
Definition at line 463 of file gist_private.h.
#define GIST_ROOT_BLKNO 0 |
Definition at line 277 of file gist_private.h.
Referenced by gistbufferinginserttuples(), gistbuild(), gistbulkdelete(), gistdoinsert(), gistplacetopage(), gistRedoCreateIndex(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistRelocateBuildBuffersOnSplit(), gistvacuumcleanup(), pgstat_relation(), and pushStackIfSplited().
#define GIST_SHARE BUFFER_LOCK_SHARE |
Definition at line 26 of file gist_private.h.
Referenced by gistbufferinginserttuples(), gistbulkdelete(), gistdoinsert(), gistFindPath(), gistGetMaxLevel(), gistScanPage(), gistvacuumcleanup(), and pgstat_gist_page().
#define GIST_UNLOCK BUFFER_LOCK_UNLOCK |
Definition at line 28 of file gist_private.h.
Referenced by gistbulkdelete(), gistdoinsert(), gistformdownlink(), gistinserttuples(), and gistNewBuffer().
#define GiSTPageSize ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) ) |
Definition at line 460 of file gist_private.h.
Referenced by gistfitpage().
#define GISTSearchItemIsHeap | ( | item | ) | ((item).blkno == InvalidBlockNumber) |
Definition at line 120 of file gist_private.h.
Referenced by getNextNearest(), gistScanPage(), and GISTSearchTreeItemCombiner().
#define GistTupleIsInvalid | ( | itup | ) | ( ItemPointerGetOffsetNumber( &((itup)->t_tid) ) == TUPLE_IS_INVALID ) |
Definition at line 303 of file gist_private.h.
Referenced by gistbulkdelete(), gistdoinsert(), and gistindex_keytest().
#define GistTupleSetValid | ( | itup | ) | ItemPointerSetOffsetNumber( &((itup)->t_tid), TUPLE_IS_VALID ) |
Definition at line 304 of file gist_private.h.
Referenced by gistformdownlink(), and gistplacetopage().
#define GSTIHDRSZ offsetof(GISTSearchTreeItem, distances) |
Definition at line 135 of file gist_private.h.
Referenced by gistbeginscan(), gistrescan(), and GISTSearchTreeItemAllocator().
#define LEVEL_HAS_BUFFERS | ( | nlevel, | ||
gfbb | ||||
) |
Definition at line 334 of file gist_private.h.
Referenced by gistProcessItup(), and gistRelocateBuildBuffersOnSplit().
#define PAGE_FREE_SPACE | ( | nbp | ) | (nbp->freespace) |
Definition at line 39 of file gist_private.h.
Referenced by gistAllocateNewPageBuffer(), gistGetItupFromPage(), gistPlaceItupToPage(), and gistPushItupToNodeBuffer().
#define PAGE_IS_EMPTY | ( | nbp | ) | (nbp->freespace == BLCKSZ - BUFFER_PAGE_DATA_OFFSET) |
Definition at line 41 of file gist_private.h.
Referenced by gistGetItupFromPage(), and gistPopItupFromNodeBuffer().
#define PAGE_NO_SPACE | ( | nbp, | ||
itup | ||||
) |
(PAGE_FREE_SPACE(nbp) < \ MAXALIGN(IndexTupleSize(itup)))
Definition at line 43 of file gist_private.h.
Referenced by gistPushItupToNodeBuffer().
#define TUPLE_IS_INVALID 0xfffe |
Definition at line 301 of file gist_private.h.
#define TUPLE_IS_VALID 0xffff |
Definition at line 300 of file gist_private.h.
#define XLOG_GIST_CREATE_INDEX 0x50 |
Definition at line 169 of file gist_private.h.
Referenced by gist_desc(), gist_redo(), and gistbuild().
#define XLOG_GIST_PAGE_SPLIT 0x30 |
Definition at line 167 of file gist_private.h.
Referenced by gist_desc(), gist_redo(), and gistXLogSplit().
#define XLOG_GIST_PAGE_UPDATE 0x00 |
Definition at line 165 of file gist_private.h.
Referenced by gist_desc(), gist_redo(), and gistXLogUpdate().
typedef struct GISTBuildBuffers GISTBuildBuffers |
typedef struct GISTInsertStack GISTInsertStack |
typedef struct GiSTOptions GiSTOptions |
typedef GISTScanOpaqueData* GISTScanOpaque |
Definition at line 160 of file gist_private.h.
typedef struct GISTScanOpaqueData GISTScanOpaqueData |
typedef struct GISTSearchHeapItem GISTSearchHeapItem |
typedef struct GISTSearchItem GISTSearchItem |
typedef struct GISTSearchTreeItem GISTSearchTreeItem |
typedef struct GistSplitVector GistSplitVector |
typedef struct gistxlogPage gistxlogPage |
typedef struct gistxlogPageSplit gistxlogPageSplit |
typedef struct gistxlogPageUpdate gistxlogPageUpdate |
typedef struct SplitedPageLayout SplitedPageLayout |
MemoryContext createTempGistContext | ( | void | ) |
Definition at line 60 of file gist.c.
References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), and CurrentMemoryContext.
Referenced by gist_xlog_startup(), gistbeginscan(), and gistbuild().
{ return AllocSetContextCreate(CurrentMemoryContext, "GiST temporary context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); }
void freeGISTstate | ( | GISTSTATE * | giststate | ) |
Definition at line 1391 of file gist.c.
References MemoryContextDelete(), and GISTSTATE::scanCxt.
Referenced by gistbuild(), gistendscan(), and gistinsert().
{ /* It's sufficient to delete the scanCxt */ MemoryContextDelete(giststate->scanCxt); }
void gist_desc | ( | StringInfo | buf, | |
uint8 | xl_info, | |||
char * | rec | |||
) |
Definition at line 45 of file gistdesc.c.
References appendStringInfo(), out_gistxlogPageSplit(), out_gistxlogPageUpdate(), XLOG_GIST_CREATE_INDEX, XLOG_GIST_PAGE_SPLIT, and XLOG_GIST_PAGE_UPDATE.
{ uint8 info = xl_info & ~XLR_INFO_MASK; switch (info) { case XLOG_GIST_PAGE_UPDATE: appendStringInfo(buf, "page_update: "); out_gistxlogPageUpdate(buf, (gistxlogPageUpdate *) rec); break; case XLOG_GIST_PAGE_SPLIT: out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec); break; case XLOG_GIST_CREATE_INDEX: appendStringInfo(buf, "create_index: rel %u/%u/%u", ((RelFileNode *) rec)->spcNode, ((RelFileNode *) rec)->dbNode, ((RelFileNode *) rec)->relNode); break; default: appendStringInfo(buf, "unknown gist op code %u", info); break; } }
void gist_redo | ( | XLogRecPtr | lsn, | |
XLogRecord * | record | |||
) |
Definition at line 330 of file gistxlog.c.
References elog, gistRedoCreateIndex(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), MemoryContextReset(), MemoryContextSwitchTo(), PANIC, XLogRecord::xl_info, XLOG_GIST_CREATE_INDEX, XLOG_GIST_PAGE_SPLIT, and XLOG_GIST_PAGE_UPDATE.
{ uint8 info = record->xl_info & ~XLR_INFO_MASK; MemoryContext oldCxt; /* * GiST indexes do not require any conflict processing. NB: If we ever * implement a similar optimization we have in b-tree, and remove killed * tuples outside VACUUM, we'll need to handle that here. */ oldCxt = MemoryContextSwitchTo(opCtx); switch (info) { case XLOG_GIST_PAGE_UPDATE: gistRedoPageUpdateRecord(lsn, record); break; case XLOG_GIST_PAGE_SPLIT: gistRedoPageSplitRecord(lsn, record); break; case XLOG_GIST_CREATE_INDEX: gistRedoCreateIndex(lsn, record); break; default: elog(PANIC, "gist_redo: unknown op code %u", info); } MemoryContextSwitchTo(oldCxt); MemoryContextReset(opCtx); }
void gist_xlog_cleanup | ( | void | ) |
Definition at line 368 of file gistxlog.c.
References MemoryContextDelete().
{ MemoryContextDelete(opCtx); }
void gist_xlog_startup | ( | void | ) |
Definition at line 362 of file gistxlog.c.
References createTempGistContext().
{ opCtx = createTempGistContext(); }
Datum gistbuild | ( | PG_FUNCTION_ARGS | ) |
Definition at line 112 of file gistbuild.c.
References Assert, XLogRecData::buffer, BufferGetBlockNumber(), BufferGetPage, GISTBuildState::bufferingMode, GiSTOptions::bufferingModeOffset, createTempGistContext(), CurrentMemoryContext, XLogRecData::data, DEBUG1, elog, END_CRIT_SECTION, ERROR, F_LEAF, GiSTOptions::fillfactor, fillfactor, freeGISTstate(), GISTBuildState::freespace, GIST_BUFFERING_ACTIVE, GIST_BUFFERING_AUTO, GIST_BUFFERING_DISABLED, GIST_BUFFERING_STATS, GIST_ROOT_BLKNO, gistBuildCallback(), gistEmptyAllBuffers(), gistFreeBuildBuffers(), gistGetFakeLSN(), GISTInitBuffer(), gistNewBuffer(), GISTBuildState::giststate, IndexBuildResult::heap_tuples, IndexBuildResult::index_tuples, IndexBuildHeapScan(), GISTBuildState::indexrel, GISTBuildState::indtuples, GISTBuildState::indtuplesSize, initGISTstate(), XLogRecData::len, MarkBufferDirty(), MemoryContextDelete(), MemoryContextSwitchTo(), XLogRecData::next, PageSetLSN, palloc(), PG_GETARG_POINTER, PG_RETURN_POINTER, RelationData::rd_node, RelationData::rd_options, RelationGetNumberOfBlocks, RelationGetRelationName, RelationNeedsWAL, START_CRIT_SECTION, GISTSTATE::tempCxt, UnlockReleaseBuffer(), XLOG_GIST_CREATE_INDEX, and XLogInsert().
{ Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); IndexBuildResult *result; double reltuples; GISTBuildState buildstate; Buffer buffer; Page page; MemoryContext oldcxt = CurrentMemoryContext; int fillfactor; buildstate.indexrel = index; if (index->rd_options) { /* Get buffering mode from the options string */ GiSTOptions *options = (GiSTOptions *) index->rd_options; char *bufferingMode = (char *) options + options->bufferingModeOffset; if (strcmp(bufferingMode, "on") == 0) buildstate.bufferingMode = GIST_BUFFERING_STATS; else if (strcmp(bufferingMode, "off") == 0) buildstate.bufferingMode = GIST_BUFFERING_DISABLED; else buildstate.bufferingMode = GIST_BUFFERING_AUTO; fillfactor = options->fillfactor; } else { /* * By default, switch to buffering mode when the index grows too large * to fit in cache. */ buildstate.bufferingMode = GIST_BUFFERING_AUTO; fillfactor = GIST_DEFAULT_FILLFACTOR; } /* Calculate target amount of free space to leave on pages */ buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100; /* * We expect to be called exactly once for any index relation. If that's * not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); /* no locking is needed */ buildstate.giststate = initGISTstate(index); /* * Create a temporary memory context that is reset once for each tuple * processed. (Note: we don't bother to make this a child of the * giststate's scanCxt, so we have to delete it separately at the end.) */ buildstate.giststate->tempCxt = createTempGistContext(); /* initialize the root page */ buffer = gistNewBuffer(index); Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO); page = BufferGetPage(buffer); START_CRIT_SECTION(); GISTInitBuffer(buffer, F_LEAF); MarkBufferDirty(buffer); if (RelationNeedsWAL(index)) { XLogRecPtr recptr; XLogRecData rdata; rdata.data = (char *) &(index->rd_node); rdata.len = sizeof(RelFileNode); rdata.buffer = InvalidBuffer; rdata.next = NULL; recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata); PageSetLSN(page, recptr); } else PageSetLSN(page, gistGetFakeLSN(heap)); UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); /* build the index */ buildstate.indtuples = 0; buildstate.indtuplesSize = 0; /* * Do the heap scan. */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, gistBuildCallback, (void *) &buildstate); /* * If buffering was used, flush out all the tuples that are still in the * buffers. */ if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE) { elog(DEBUG1, "all tuples processed, emptying buffers"); gistEmptyAllBuffers(&buildstate); gistFreeBuildBuffers(buildstate.gfbb); } /* okay, all heap tuples are indexed */ MemoryContextSwitchTo(oldcxt); MemoryContextDelete(buildstate.giststate->tempCxt); freeGISTstate(buildstate.giststate); /* * Return statistics */ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); result->heap_tuples = reltuples; result->index_tuples = (double) buildstate.indtuples; PG_RETURN_POINTER(result); }
Datum gistbuildempty | ( | PG_FUNCTION_ARGS | ) |
Definition at line 73 of file gist.c.
References BUFFER_LOCK_EXCLUSIVE, END_CRIT_SECTION, F_LEAF, GISTInitBuffer(), INIT_FORKNUM, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), NULL, P_NEW, PG_GETARG_POINTER, PG_RETURN_VOID, RBM_NORMAL, ReadBufferExtended(), START_CRIT_SECTION, and UnlockReleaseBuffer().
{ Relation index = (Relation) PG_GETARG_POINTER(0); Buffer buffer; /* Initialize the root page */ buffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* Initialize and xlog buffer */ START_CRIT_SECTION(); GISTInitBuffer(buffer, F_LEAF); MarkBufferDirty(buffer); log_newpage_buffer(buffer); END_CRIT_SECTION(); /* Unlock and release the buffer */ UnlockReleaseBuffer(buffer); PG_RETURN_VOID(); }
Datum gistbulkdelete | ( | PG_FUNCTION_ARGS | ) |
Definition at line 141 of file gistvacuum.c.
References GistBDItem::blkno, BufferGetPage, callback(), END_CRIT_SECTION, ereport, errdetail(), errhint(), errmsg(), IndexBulkDeleteResult::estimated_count, FirstOffsetNumber, GIST_EXCLUSIVE, GIST_ROOT_BLKNO, GIST_SHARE, GIST_UNLOCK, gistcheckpage(), gistGetFakeLSN(), GistMarkTuplesDeleted, GistPageIsLeaf, GistTupleIsInvalid, gistXLogUpdate(), i, IndexVacuumInfo::index, InvalidBuffer, ItemPointerGetBlockNumber, LockBuffer(), LOG, MAIN_FORKNUM, MarkBufferDirty(), GistBDItem::next, NULL, IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, PageGetItem, PageGetItemId, PageGetLSN, PageGetMaxOffsetNumber, PageIndexTupleDelete(), PageSetLSN, palloc(), palloc0(), GistBDItem::parentlsn, pfree(), PG_GETARG_POINTER, PG_RETURN_POINTER, pushStackIfSplited(), RBM_NORMAL, RelationData::rd_node, ReadBufferExtended(), RelationGetRelationName, RelationNeedsWAL, START_CRIT_SECTION, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, UnlockReleaseBuffer(), and vacuum_delay_point().
{ IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2); void *callback_state = (void *) PG_GETARG_POINTER(3); Relation rel = info->index; GistBDItem *stack, *ptr; /* first time through? */ if (stats == NULL) stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* we'll re-count the tuples each time */ stats->estimated_count = false; stats->num_index_tuples = 0; stack = (GistBDItem *) palloc0(sizeof(GistBDItem)); stack->blkno = GIST_ROOT_BLKNO; while (stack) { Buffer buffer; Page page; OffsetNumber i, maxoff; IndexTuple idxtuple; ItemId iid; buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIST_SHARE); gistcheckpage(rel, buffer); page = (Page) BufferGetPage(buffer); if (GistPageIsLeaf(page)) { OffsetNumber todelete[MaxOffsetNumber]; int ntodelete = 0; LockBuffer(buffer, GIST_UNLOCK); LockBuffer(buffer, GIST_EXCLUSIVE); page = (Page) BufferGetPage(buffer); if (stack->blkno == GIST_ROOT_BLKNO && !GistPageIsLeaf(page)) { /* only the root can become non-leaf during relock */ UnlockReleaseBuffer(buffer); /* one more check */ continue; } /* * check for split proceeded after look at parent, we should check * it after relock */ pushStackIfSplited(page, stack); /* * Remove deletable tuples from page */ maxoff = PageGetMaxOffsetNumber(page); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { iid = PageGetItemId(page, i); idxtuple = (IndexTuple) PageGetItem(page, iid); if (callback(&(idxtuple->t_tid), callback_state)) { todelete[ntodelete] = i - ntodelete; ntodelete++; stats->tuples_removed += 1; } else stats->num_index_tuples += 1; } if (ntodelete) { START_CRIT_SECTION(); MarkBufferDirty(buffer); for (i = 0; i < ntodelete; i++) PageIndexTupleDelete(page, todelete[i]); GistMarkTuplesDeleted(page); if (RelationNeedsWAL(rel)) { XLogRecPtr recptr; recptr = gistXLogUpdate(rel->rd_node, buffer, todelete, ntodelete, NULL, 0, InvalidBuffer); PageSetLSN(page, recptr); } else PageSetLSN(page, gistGetFakeLSN(rel)); END_CRIT_SECTION(); } } else { /* check for split proceeded after look at parent */ pushStackIfSplited(page, stack); maxoff = PageGetMaxOffsetNumber(page); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { iid = PageGetItemId(page, i); idxtuple = (IndexTuple) PageGetItem(page, iid); ptr = (GistBDItem *) palloc(sizeof(GistBDItem)); ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); ptr->parentlsn = PageGetLSN(page); ptr->next = stack->next; stack->next = ptr; if (GistTupleIsInvalid(idxtuple)) ereport(LOG, (errmsg("index \"%s\" contains an inner tuple marked as invalid", RelationGetRelationName(rel)), errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."), errhint("Please REINDEX it."))); } } UnlockReleaseBuffer(buffer); ptr = stack->next; pfree(stack); stack = ptr; vacuum_delay_point(); } PG_RETURN_POINTER(stats); }
void gistcentryinit | ( | GISTSTATE * | giststate, | |
int | nkey, | |||
GISTENTRY * | e, | |||
Datum | k, | |||
Relation | r, | |||
Page | pg, | |||
OffsetNumber | o, | |||
bool | l, | |||
bool | isNull | |||
) |
Definition at line 566 of file gistutil.c.
References GISTSTATE::compressFn, DatumGetPointer, FunctionCall1Coll(), gistentryinit, GISTENTRY::key, GISTENTRY::leafkey, GISTENTRY::offset, GISTENTRY::page, PointerGetDatum, GISTENTRY::rel, and GISTSTATE::supportCollation.
Referenced by gistFormTuple().
{ if (!isNull) { GISTENTRY *cep; gistentryinit(*e, k, r, pg, o, l); cep = (GISTENTRY *) DatumGetPointer(FunctionCall1Coll(&giststate->compressFn[nkey], giststate->supportCollation[nkey], PointerGetDatum(e))); /* compressFn may just return the given pointer */ if (cep != e) gistentryinit(*e, cep->key, cep->rel, cep->page, cep->offset, cep->leafkey); } else gistentryinit(*e, (Datum) 0, r, pg, o, l); }
Definition at line 677 of file gistutil.c.
References BufferGetBlockNumber(), BufferGetPage, ereport, errcode(), errhint(), errmsg(), ERROR, MAXALIGN, PageGetSpecialSize, PageIsNew, and RelationGetRelationName.
Referenced by gistBufferingFindCorrectParent(), gistbulkdelete(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistNewBuffer(), gistScanPage(), and pgstat_gist_page().
{ Page page = BufferGetPage(buf); /* * ReadBuffer verifies that every newly-read page passes * PageHeaderIsValid, which means it either contains a reasonably sane * page header or is all-zero. We have to defend against the all-zero * case, however. */ if (PageIsNew(page)) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("index \"%s\" contains unexpected zero page at block %u", RelationGetRelationName(rel), BufferGetBlockNumber(buf)), errhint("Please REINDEX it."))); /* * Additionally check that the special area looks sane. */ if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData))) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("index \"%s\" contains corrupted page at block %u", RelationGetRelationName(rel), BufferGetBlockNumber(buf)), errhint("Please REINDEX it."))); }
OffsetNumber gistchoose | ( | Relation | r, | |
Page | p, | |||
IndexTuple | it, | |||
GISTSTATE * | giststate | |||
) |
Definition at line 367 of file gistutil.c.
References Assert, FALSE, FirstOffsetNumber, gistDeCompressAtt(), gistdentryinit(), GistPageIsLeaf, gistpenalty(), i, index_getattr, MAX_RANDOM_VALUE, tupleDesc::natts, NULL, OffsetNumberNext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, random(), RelationData::rd_att, and GISTSTATE::tupdesc.
Referenced by gistdoinsert(), and gistProcessItup().
{ OffsetNumber result; OffsetNumber maxoff; OffsetNumber i; float best_penalty[INDEX_MAX_KEYS]; GISTENTRY entry, identry[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; int keep_current_best; Assert(!GistPageIsLeaf(p)); gistDeCompressAtt(giststate, r, it, NULL, (OffsetNumber) 0, identry, isnull); /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */ result = FirstOffsetNumber; /* * The index may have multiple columns, and there's a penalty value for * each column. The penalty associated with a column that appears earlier * in the index definition is strictly more important than the penalty of * a column that appears later in the index definition. * * best_penalty[j] is the best penalty we have seen so far for column j, * or -1 when we haven't yet examined column j. Array entries to the * right of the first -1 are undefined. */ best_penalty[0] = -1; /* * If we find a tuple that's exactly as good as the currently best one, we * could use either one. When inserting a lot of tuples with the same or * similar keys, it's preferable to descend down the same path when * possible, as that's more cache-friendly. On the other hand, if all * inserts land on the same leaf page after a split, we're never going to * insert anything to the other half of the split, and will end up using * only 50% of the available space. Distributing the inserts evenly would * lead to better space usage, but that hurts cache-locality during * insertion. To get the best of both worlds, when we find a tuple that's * exactly as good as the previous best, choose randomly whether to stick * to the old best, or use the new one. Once we decide to stick to the * old best, we keep sticking to it for any subsequent equally good tuples * we might find. This favors tuples with low offsets, but still allows * some inserts to go to other equally-good subtrees. * * keep_current_best is -1 if we haven't yet had to make a random choice * whether to keep the current best tuple. If we have done so, and * decided to keep it, keep_current_best is 1; if we've decided to * replace, keep_current_best is 0. (This state will be reset to -1 as * soon as we've made the replacement, but sometimes we make the choice in * advance of actually finding a replacement best tuple.) */ keep_current_best = -1; /* * Loop over tuples on page. */ maxoff = PageGetMaxOffsetNumber(p); Assert(maxoff >= FirstOffsetNumber); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); bool zero_penalty; int j; zero_penalty = true; /* Loop over index attributes. */ for (j = 0; j < r->rd_att->natts; j++) { Datum datum; float usize; bool IsNull; /* Compute penalty for this column. */ datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, FALSE, IsNull); usize = gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j]); if (usize > 0) zero_penalty = false; if (best_penalty[j] < 0 || usize < best_penalty[j]) { /* * New best penalty for column. Tentatively select this tuple * as the target, and record the best penalty. Then reset the * next column's penalty to "unknown" (and indirectly, the * same for all the ones to its right). This will force us to * adopt this tuple's penalty values as the best for all the * remaining columns during subsequent loop iterations. */ result = i; best_penalty[j] = usize; if (j < r->rd_att->natts - 1) best_penalty[j + 1] = -1; /* we have new best, so reset keep-it decision */ keep_current_best = -1; } else if (best_penalty[j] == usize) { /* * The current tuple is exactly as good for this column as the * best tuple seen so far. The next iteration of this loop * will compare the next column. */ } else { /* * The current tuple is worse for this column than the best * tuple seen so far. Skip the remaining columns and move on * to the next tuple, if any. */ zero_penalty = false; /* so outer loop won't exit */ break; } } /* * If we looped past the last column, and did not update "result", * then this tuple is exactly as good as the prior best tuple. */ if (j == r->rd_att->natts && result != i) { if (keep_current_best == -1) { /* we didn't make the random choice yet for this old best */ keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0; } if (keep_current_best == 0) { /* we choose to use the new tuple */ result = i; /* choose again if there are even more exactly-as-good ones */ keep_current_best = -1; } } /* * If we find a tuple with zero penalty for all columns, and we've * decided we don't want to search for another tuple with equal * penalty, there's no need to examine remaining tuples; just break * out of the loop and return it. */ if (zero_penalty) { if (keep_current_best == -1) { /* we didn't make the random choice yet for this old best */ keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0; } if (keep_current_best == 1) break; } } return result; }
void gistDeCompressAtt | ( | GISTSTATE * | giststate, | |
Relation | r, | |||
IndexTuple | tuple, | |||
Page | p, | |||
OffsetNumber | o, | |||
GISTENTRY * | attdata, | |||
bool * | isnull | |||
) |
Definition at line 290 of file gistutil.c.
References FALSE, gistdentryinit(), i, index_getattr, tupleDesc::natts, RelationData::rd_att, and GISTSTATE::tupdesc.
Referenced by gistchoose(), gistgetadjusted(), gistRelocateBuildBuffersOnSplit(), and placeOne().
{ int i; for (i = 0; i < r->rd_att->natts; i++) { Datum datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]); gistdentryinit(giststate, i, &attdata[i], datum, r, p, o, FALSE, isnull[i]); } }
void gistdentryinit | ( | GISTSTATE * | giststate, | |
int | nkey, | |||
GISTENTRY * | e, | |||
Datum | k, | |||
Relation | r, | |||
Page | pg, | |||
OffsetNumber | o, | |||
bool | l, | |||
bool | isNull | |||
) |
Definition at line 539 of file gistutil.c.
References DatumGetPointer, GISTSTATE::decompressFn, FunctionCall1Coll(), gistentryinit, GISTENTRY::key, GISTENTRY::leafkey, GISTENTRY::offset, GISTENTRY::page, PointerGetDatum, GISTENTRY::rel, and GISTSTATE::supportCollation.
Referenced by gistchoose(), gistDeCompressAtt(), gistindex_keytest(), gistMakeUnionItVec(), and gistSplitByKey().
{ if (!isNull) { GISTENTRY *dep; gistentryinit(*e, k, r, pg, o, l); dep = (GISTENTRY *) DatumGetPointer(FunctionCall1Coll(&giststate->decompressFn[nkey], giststate->supportCollation[nkey], PointerGetDatum(e))); /* decompressFn may just return the given pointer */ if (dep != e) gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset, dep->leafkey); } else gistentryinit(*e, (Datum) 0, r, pg, o, l); }
void gistdoinsert | ( | Relation | r, | |
IndexTuple | itup, | |||
Size | freespace, | |||
GISTSTATE * | GISTstate | |||
) |
Definition at line 505 of file gist.c.
References Assert, GISTInsertStack::blkno, GISTInsertStack::buffer, BufferGetPage, GISTInsertStack::downlinkoffnum, ereport, errdetail(), errhint(), errmsg(), ERROR, GISTInsertState::freespace, GIST_EXCLUSIVE, GIST_ROOT_BLKNO, GIST_SHARE, GIST_UNLOCK, gistcheckpage(), gistchoose(), gistfixsplit(), GistFollowRight, gistgetadjusted(), gistinserttuple(), GistPageGetNSN, GistPageIsLeaf, GistTupleIsInvalid, InvalidOffsetNumber, ItemPointerGetBlockNumber, LockBuffer(), GISTInsertStack::lsn, GISTInsertStack::page, PageGetItem, PageGetItemId, PageGetLSN, palloc0(), GISTInsertStack::parent, GISTInsertState::r, ReadBuffer(), RelationGetRelationName, RelationNeedsWAL, ReleaseBuffer(), GISTInsertState::stack, IndexTupleData::t_tid, UnlockReleaseBuffer(), and XLogRecPtrIsInvalid.
Referenced by gistBuildCallback(), and gistinsert().
{ ItemId iid; IndexTuple idxtuple; GISTInsertStack firststack; GISTInsertStack *stack; GISTInsertState state; bool xlocked = false; memset(&state, 0, sizeof(GISTInsertState)); state.freespace = freespace; state.r = r; /* Start from the root */ firststack.blkno = GIST_ROOT_BLKNO; firststack.lsn = 0; firststack.parent = NULL; firststack.downlinkoffnum = InvalidOffsetNumber; state.stack = stack = &firststack; /* * Walk down along the path of smallest penalty, updating the parent * pointers with the key we're inserting as we go. If we crash in the * middle, the tree is consistent, although the possible parent updates * were a waste. */ for (;;) { if (XLogRecPtrIsInvalid(stack->lsn)) stack->buffer = ReadBuffer(state.r, stack->blkno); /* * Be optimistic and grab shared lock first. Swap it for an exclusive * lock later if we need to update the page. */ if (!xlocked) { LockBuffer(stack->buffer, GIST_SHARE); gistcheckpage(state.r, stack->buffer); } stack->page = (Page) BufferGetPage(stack->buffer); stack->lsn = PageGetLSN(stack->page); Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn)); /* * If this page was split but the downlink was never inserted to the * parent because the inserting backend crashed before doing that, fix * that now. */ if (GistFollowRight(stack->page)) { if (!xlocked) { LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; /* someone might've completed the split when we unlocked */ if (!GistFollowRight(stack->page)) continue; } gistfixsplit(&state, giststate); UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; continue; } if (stack->blkno != GIST_ROOT_BLKNO && stack->parent->lsn < GistPageGetNSN(stack->page)) { /* * Concurrent split detected. There's no guarantee that the * downlink for this page is consistent with the tuple we're * inserting anymore, so go back to parent and rechoose the best * child. */ UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; continue; } if (!GistPageIsLeaf(stack->page)) { /* * This is an internal page so continue to walk down the tree. * Find the child node that has the minimum insertion penalty. */ BlockNumber childblkno; IndexTuple newtup; GISTInsertStack *item; OffsetNumber downlinkoffnum; downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate); iid = PageGetItemId(stack->page, downlinkoffnum); idxtuple = (IndexTuple) PageGetItem(stack->page, iid); childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); /* * Check that it's not a leftover invalid tuple from pre-9.1 */ if (GistTupleIsInvalid(idxtuple)) ereport(ERROR, (errmsg("index \"%s\" contains an inner tuple marked as invalid", RelationGetRelationName(r)), errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."), errhint("Please REINDEX it."))); /* * Check that the key representing the target child node is * consistent with the key we're inserting. Update it if it's not. */ newtup = gistgetadjusted(state.r, idxtuple, itup, giststate); if (newtup) { /* * Swap shared lock for an exclusive one. Beware, the page may * change while we unlock/lock the page... */ if (!xlocked) { LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; stack->page = (Page) BufferGetPage(stack->buffer); if (PageGetLSN(stack->page) != stack->lsn) { /* the page was changed while we unlocked it, retry */ continue; } } /* * Update the tuple. * * We still hold the lock after gistinserttuple(), but it * might have to split the page to make the updated tuple fit. * In that case the updated tuple might migrate to the other * half of the split, so we have to go back to the parent and * descend back to the half that's a better fit for the new * tuple. */ if (gistinserttuple(&state, stack, giststate, newtup, downlinkoffnum)) { /* * If this was a root split, the root page continues to be * the parent and the updated tuple went to one of the * child pages, so we just need to retry from the root * page. */ if (stack->blkno != GIST_ROOT_BLKNO) { UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; } continue; } } LockBuffer(stack->buffer, GIST_UNLOCK); xlocked = false; /* descend to the chosen child */ item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack)); item->blkno = childblkno; item->parent = stack; item->downlinkoffnum = downlinkoffnum; state.stack = stack = item; } else { /* * Leaf page. Insert the new key. We've already updated all the * parents on the way down, but we might have to split the page if * it doesn't fit. gistinserthere() will take care of that. */ /* * Swap shared lock for an exclusive one. Be careful, the page may * change while we unlock/lock the page... */ if (!xlocked) { LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; stack->page = (Page) BufferGetPage(stack->buffer); stack->lsn = PageGetLSN(stack->page); if (stack->blkno == GIST_ROOT_BLKNO) { /* * the only page that can become inner instead of leaf is * the root page, so for root we should recheck it */ if (!GistPageIsLeaf(stack->page)) { /* * very rare situation: during unlock/lock index with * number of pages = 1 was increased */ LockBuffer(stack->buffer, GIST_UNLOCK); xlocked = false; continue; } /* * we don't need to check root split, because checking * leaf/inner is enough to recognize split for root */ } else if (GistFollowRight(stack->page) || stack->parent->lsn < GistPageGetNSN(stack->page)) { /* * The page was split while we momentarily unlocked the * page. Go back to parent. */ UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; continue; } } /* now state.stack->(page, buffer and blkno) points to leaf page */ gistinserttuple(&state, stack, giststate, itup, InvalidOffsetNumber); LockBuffer(stack->buffer, GIST_UNLOCK); /* Release any pins we might still hold before exiting */ for (; stack; stack = stack->parent) ReleaseBuffer(stack->buffer); break; } } }
IndexTuple* gistextractpage | ( | Page | page, | |
int * | len | |||
) |
Definition at line 90 of file gistutil.c.
References FirstOffsetNumber, i, OffsetNumberNext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, and palloc().
Referenced by gistplacetopage().
{ OffsetNumber i, maxoff; IndexTuple *itvec; maxoff = PageGetMaxOffsetNumber(page); *len = maxoff; itvec = palloc(sizeof(IndexTuple) * maxoff); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); return itvec; }
void gistfillbuffer | ( | Page | page, | |
IndexTuple * | itup, | |||
int | len, | |||
OffsetNumber | off | |||
) |
Definition at line 29 of file gistutil.c.
References elog, ERROR, FirstOffsetNumber, i, IndexTupleSize, InvalidOffsetNumber, OffsetNumberNext, PageAddItem(), PageGetMaxOffsetNumber, and PageIsEmpty.
Referenced by gistplacetopage(), and gistRedoPageSplitRecord().
{ OffsetNumber l = InvalidOffsetNumber; int i; if (off == InvalidOffsetNumber) off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumberNext(PageGetMaxOffsetNumber(page)); for (i = 0; i < len; i++) { Size sz = IndexTupleSize(itup[i]); l = PageAddItem(page, (Item) itup[i], sz, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes", i, len, (int) sz); off++; } }
IndexTupleData* gistfillitupvec | ( | IndexTuple * | vec, | |
int | veclen, | |||
int * | memlen | |||
) |
Definition at line 122 of file gistutil.c.
References i, IndexTupleSize, and palloc().
Referenced by gistplacetopage(), and gistSplit().
{ char *ptr, *ret; int i; *memlen = 0; for (i = 0; i < veclen; i++) *memlen += IndexTupleSize(vec[i]); ptr = ret = palloc(*memlen); for (i = 0; i < veclen; i++) { memcpy(ptr, vec[i], IndexTupleSize(vec[i])); ptr += IndexTupleSize(vec[i]); } return (IndexTupleData *) ret; }
bool gistfitpage | ( | IndexTuple * | itvec, | |
int | len | |||
) |
Definition at line 74 of file gistutil.c.
References GiSTPageSize, i, and IndexTupleSize.
Referenced by gistSplit().
{ int i; Size size = 0; for (i = 0; i < len; i++) size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData); /* TODO: Consider fillfactor */ return (size <= GiSTPageSize); }
IndexTuple gistFormTuple | ( | GISTSTATE * | giststate, | |
Relation | r, | |||
Datum * | attdata, | |||
bool * | isnull, | |||
bool | newValues | |||
) |
void gistFreeBuildBuffers | ( | GISTBuildBuffers * | gfbb | ) |
Definition at line 513 of file gistbuildbuffers.c.
References BufFileClose(), and GISTBuildBuffers::pfile.
Referenced by gistbuild().
{ /* Close buffers file. */ BufFileClose(gfbb->pfile); /* All other things will be freed on memory context release */ }
IndexTuple gistgetadjusted | ( | Relation | r, | |
IndexTuple | oldtup, | |||
IndexTuple | addtup, | |||
GISTSTATE * | giststate | |||
) |
Definition at line 309 of file gistutil.c.
References gistDeCompressAtt(), gistFormTuple(), gistKeyIsEQ(), gistMakeUnionKey(), i, tupleDesc::natts, NULL, RelationData::rd_att, and IndexTupleData::t_tid.
Referenced by gistdoinsert(), gistformdownlink(), gistProcessItup(), and gistRelocateBuildBuffersOnSplit().
{ bool neednew = FALSE; GISTENTRY oldentries[INDEX_MAX_KEYS], addentries[INDEX_MAX_KEYS]; bool oldisnull[INDEX_MAX_KEYS], addisnull[INDEX_MAX_KEYS]; Datum attr[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; IndexTuple newtup = NULL; int i; gistDeCompressAtt(giststate, r, oldtup, NULL, (OffsetNumber) 0, oldentries, oldisnull); gistDeCompressAtt(giststate, r, addtup, NULL, (OffsetNumber) 0, addentries, addisnull); for (i = 0; i < r->rd_att->natts; i++) { gistMakeUnionKey(giststate, i, oldentries + i, oldisnull[i], addentries + i, addisnull[i], attr + i, isnull + i); if (neednew) /* we already need new key, so we can skip check */ continue; if (isnull[i]) /* union of key may be NULL if and only if both keys are NULL */ continue; if (!addisnull[i]) { if (oldisnull[i] || !gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i])) neednew = true; } } if (neednew) { /* need to update key */ newtup = gistFormTuple(giststate, r, attr, isnull, false); newtup->t_tid = oldtup->t_tid; } return newtup; }
Datum gistgetbitmap | ( | PG_FUNCTION_ARGS | ) |
Definition at line 548 of file gistget.c.
References CHECK_FOR_INTERRUPTS, GISTScanOpaqueData::curPageData, GISTScanOpaqueData::curTreeItem, GISTSearchTreeItem::distances, getNextGISTSearchItem(), gistScanPage(), IndexScanDescData::indexRelation, GISTScanOpaqueData::nPageData, NULL, IndexScanDescData::opaque, pfree(), PG_GETARG_POINTER, PG_RETURN_INT64, pgstat_count_index_scan, and GISTScanOpaqueData::qual_ok.
{ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1); GISTScanOpaque so = (GISTScanOpaque) scan->opaque; int64 ntids = 0; GISTSearchItem fakeItem; if (!so->qual_ok) PG_RETURN_INT64(0); pgstat_count_index_scan(scan->indexRelation); /* Begin the scan by processing the root page */ so->curTreeItem = NULL; so->curPageData = so->nPageData = 0; fakeItem.blkno = GIST_ROOT_BLKNO; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); gistScanPage(scan, &fakeItem, NULL, tbm, &ntids); /* * While scanning a leaf page, ItemPointers of matching heap tuples will * be stored directly into tbm, so we don't need to deal with them here. */ for (;;) { GISTSearchItem *item = getNextGISTSearchItem(so); if (!item) break; CHECK_FOR_INTERRUPTS(); gistScanPage(scan, item, so->curTreeItem->distances, tbm, &ntids); pfree(item); } PG_RETURN_INT64(ntids); }
XLogRecPtr gistGetFakeLSN | ( | Relation | rel | ) |
Definition at line 806 of file gistutil.c.
References Assert, GetFakeLSNForUnloggedRel(), RelationData::rd_rel, RELPERSISTENCE_TEMP, and RELPERSISTENCE_UNLOGGED.
Referenced by gistbuild(), gistbulkdelete(), and gistplacetopage().
{ static XLogRecPtr counter = 1; if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) { /* * Temporary relations are only accessible in our session, so a * simple backend-local counter will do. */ return counter++; } else { /* * Unlogged relations are accessible from other backends, and survive * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us. */ Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); return GetFakeLSNForUnloggedRel(); } }
GISTNodeBuffer* gistGetNodeBuffer | ( | GISTBuildBuffers * | gfbb, | |
GISTSTATE * | giststate, | |||
BlockNumber | blkno, | |||
int | level | |||
) |
Definition at line 120 of file gistbuildbuffers.c.
References GISTNodeBuffer::blocksCount, GISTBuildBuffers::buffersOnLevels, GISTBuildBuffers::buffersOnLevelsLen, GISTBuildBuffers::context, hash_search(), i, lcons(), GISTNodeBuffer::level, MemoryContextSwitchTo(), GISTBuildBuffers::nodeBuffersTab, GISTNodeBuffer::pageBlocknum, GISTNodeBuffer::pageBuffer, GISTNodeBuffer::queuedForEmptying, and repalloc().
Referenced by gistProcessItup(), and gistRelocateBuildBuffersOnSplit().
{ GISTNodeBuffer *nodeBuffer; bool found; /* Find node buffer in hash table */ nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab, (const void *) &nodeBlocknum, HASH_ENTER, &found); if (!found) { /* * Node buffer wasn't found. Initialize the new buffer as empty. */ MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context); /* nodeBuffer->nodeBlocknum is the hash key and was filled in already */ nodeBuffer->blocksCount = 0; nodeBuffer->pageBlocknum = InvalidBlockNumber; nodeBuffer->pageBuffer = NULL; nodeBuffer->queuedForEmptying = false; nodeBuffer->level = level; /* * Add this buffer to the list of buffers on this level. Enlarge * buffersOnLevels array if needed. */ if (level >= gfbb->buffersOnLevelsLen) { int i; gfbb->buffersOnLevels = (List **) repalloc(gfbb->buffersOnLevels, (level + 1) * sizeof(List *)); /* initialize the enlarged portion */ for (i = gfbb->buffersOnLevelsLen; i <= level; i++) gfbb->buffersOnLevels[i] = NIL; gfbb->buffersOnLevelsLen = level + 1; } /* * Prepend the new buffer to the list of buffers on this level. It's * not arbitrary that the new buffer is put to the beginning of the * list: in the final emptying phase we loop through all buffers at * each level, and flush them. If a page is split during the emptying, * it's more efficient to flush the new splitted pages first, before * moving on to pre-existing pages on the level. The buffers just * created during the page split are likely still in cache, so * flushing them immediately is more efficient than putting them to * the end of the queue. */ gfbb->buffersOnLevels[level] = lcons(nodeBuffer, gfbb->buffersOnLevels[level]); MemoryContextSwitchTo(oldcxt); } return nodeBuffer; }
Datum gistgettuple | ( | PG_FUNCTION_ARGS | ) |
Definition at line 473 of file gistget.c.
References GISTSearchItem::blkno, CHECK_FOR_INTERRUPTS, GISTScanOpaqueData::curPageData, GISTScanOpaqueData::curTreeItem, GISTSearchItem::data, GISTSearchTreeItem::distances, elog, ERROR, GISTScanOpaqueData::firstCall, ForwardScanDirection, getNextGISTSearchItem(), getNextNearest(), gistScanPage(), GISTSearchHeapItem::heapPtr, IndexScanDescData::indexRelation, GISTScanOpaqueData::nPageData, NULL, IndexScanDescData::numberOfOrderBys, IndexScanDescData::opaque, GISTScanOpaqueData::pageData, GISTSearchItem::parentlsn, pfree(), PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_BOOL, pgstat_count_index_scan, GISTScanOpaqueData::qual_ok, GISTSearchHeapItem::recheck, HeapTupleData::t_self, IndexScanDescData::xs_ctup, and IndexScanDescData::xs_recheck.
{ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); GISTScanOpaque so = (GISTScanOpaque) scan->opaque; if (dir != ForwardScanDirection) elog(ERROR, "GiST only supports forward scan direction"); if (!so->qual_ok) PG_RETURN_BOOL(false); if (so->firstCall) { /* Begin the scan by processing the root page */ GISTSearchItem fakeItem; pgstat_count_index_scan(scan->indexRelation); so->firstCall = false; so->curTreeItem = NULL; so->curPageData = so->nPageData = 0; fakeItem.blkno = GIST_ROOT_BLKNO; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); gistScanPage(scan, &fakeItem, NULL, NULL, NULL); } if (scan->numberOfOrderBys > 0) { /* Must fetch tuples in strict distance order */ PG_RETURN_BOOL(getNextNearest(scan)); } else { /* Fetch tuples index-page-at-a-time */ for (;;) { if (so->curPageData < so->nPageData) { /* continuing to return tuples from a leaf page */ scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr; scan->xs_recheck = so->pageData[so->curPageData].recheck; so->curPageData++; PG_RETURN_BOOL(true); } /* find and process the next index page */ do { GISTSearchItem *item = getNextGISTSearchItem(so); if (!item) PG_RETURN_BOOL(false); CHECK_FOR_INTERRUPTS(); /* * While scanning a leaf page, ItemPointers of matching heap * tuples are stored in so->pageData. If there are any on * this page, we fall out of the inner "do" and loop around to * return them. */ gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL); pfree(item); } while (so->nPageData == 0); } } }
Definition at line 655 of file gistutil.c.
References BufferGetPage, BufferGetPageSize, GISTPageOpaqueData::flags, GISTPageOpaqueData::gist_page_id, GistPageGetOpaque, PageInit(), and GISTPageOpaqueData::rightlink.
Referenced by gistbuild(), gistbuildempty(), gistplacetopage(), gistRedoCreateIndex(), and gistRedoPageSplitRecord().
{ GISTPageOpaque opaque; Page page; Size pageSize; pageSize = BufferGetPageSize(b); page = BufferGetPage(b); PageInit(page, pageSize, sizeof(GISTPageOpaqueData)); opaque = GistPageGetOpaque(page); /* page was already zeroed by PageInit, so this is not needed: */ /* memset(&(opaque->nsn), 0, sizeof(GistNSN)); */ opaque->rightlink = InvalidBlockNumber; opaque->flags = f; opaque->gist_page_id = GIST_PAGE_ID; }
GISTBuildBuffers* gistInitBuildBuffers | ( | int | pagesPerBuffer, | |
int | levelStep, | |||
int | maxLevel | |||
) |
Definition at line 48 of file gistbuildbuffers.c.
References GISTBuildBuffers::bufferEmptyingQueue, GISTBuildBuffers::buffersOnLevels, GISTBuildBuffers::buffersOnLevelsLen, BufFileCreateTemp(), GISTBuildBuffers::context, CurrentMemoryContext, HASHCTL::entrysize, GISTBuildBuffers::freeBlocks, GISTBuildBuffers::freeBlocksLen, HASHCTL::hash, HASH_COMPARE, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_FUNCTION, HASHCTL::hcxt, HASHCTL::keysize, GISTBuildBuffers::levelStep, GISTBuildBuffers::loadedBuffers, GISTBuildBuffers::loadedBuffersCount, GISTBuildBuffers::loadedBuffersLen, HASHCTL::match, GISTBuildBuffers::nFileBlocks, GISTBuildBuffers::nFreeBlocks, GISTBuildBuffers::nodeBuffersTab, GISTBuildBuffers::pagesPerBuffer, palloc(), GISTBuildBuffers::pfile, and GISTBuildBuffers::rootlevel.
Referenced by gistInitBuffering().
{ GISTBuildBuffers *gfbb; HASHCTL hashCtl; gfbb = palloc(sizeof(GISTBuildBuffers)); gfbb->pagesPerBuffer = pagesPerBuffer; gfbb->levelStep = levelStep; /* * Create a temporary file to hold buffer pages that are swapped out of * memory. */ gfbb->pfile = BufFileCreateTemp(false); gfbb->nFileBlocks = 0; /* Initialize free page management. */ gfbb->nFreeBlocks = 0; gfbb->freeBlocksLen = 32; gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long)); /* * Current memory context will be used for all in-memory data structures * of buffers which are persistent during buffering build. */ gfbb->context = CurrentMemoryContext; /* * nodeBuffersTab hash is association between index blocks and it's * buffers. */ hashCtl.keysize = sizeof(BlockNumber); hashCtl.entrysize = sizeof(GISTNodeBuffer); hashCtl.hcxt = CurrentMemoryContext; hashCtl.hash = tag_hash; hashCtl.match = memcmp; gfbb->nodeBuffersTab = hash_create("gistbuildbuffers", 1024, &hashCtl, HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE); gfbb->bufferEmptyingQueue = NIL; /* * Per-level node buffers lists for final buffers emptying process. Node * buffers are inserted here when they are created. */ gfbb->buffersOnLevelsLen = 1; gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) * gfbb->buffersOnLevelsLen); gfbb->buffersOnLevels[0] = NIL; /* * Block numbers of node buffers which last pages are currently loaded * into main memory. */ gfbb->loadedBuffersLen = 32; gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *)); gfbb->loadedBuffersCount = 0; gfbb->rootlevel = maxLevel; return gfbb; }
Datum gistinsert | ( | PG_FUNCTION_ARGS | ) |
Definition at line 102 of file gist.c.
References freeGISTstate(), gistdoinsert(), gistFormTuple(), initGISTstate(), MemoryContextSwitchTo(), PG_GETARG_INT32, PG_GETARG_POINTER, PG_RETURN_BOOL, IndexTupleData::t_tid, GISTSTATE::tempCxt, and values.
{ Relation r = (Relation) PG_GETARG_POINTER(0); Datum *values = (Datum *) PG_GETARG_POINTER(1); bool *isnull = (bool *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5); #endif IndexTuple itup; GISTSTATE *giststate; MemoryContext oldCxt; giststate = initGISTstate(r); /* * We use the giststate's scan context as temp context too. This means * that any memory leaked by the support functions is not reclaimed until * end of insert. In most cases, we aren't going to call the support * functions very many times before finishing the insert, so this seems * cheaper than resetting a temp context for each function call. */ oldCxt = MemoryContextSwitchTo(giststate->tempCxt); itup = gistFormTuple(giststate, r, values, isnull, true /* size is currently bogus */ ); itup->t_tid = *ht_ctid; gistdoinsert(r, itup, 0, giststate); /* cleanup */ MemoryContextSwitchTo(oldCxt); freeGISTstate(giststate); PG_RETURN_BOOL(false); }
IndexTuple* gistjoinvector | ( | IndexTuple * | itvec, | |
int * | len, | |||
IndexTuple * | additvec, | |||
int | addlen | |||
) |
Definition at line 109 of file gistutil.c.
References memmove, and repalloc().
Referenced by gistplacetopage().
{ itvec = (IndexTuple *) repalloc((void *) itvec, sizeof(IndexTuple) * ((*len) + addlen)); memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen); *len += addlen; return itvec; }
Definition at line 275 of file gistutil.c.
References GISTSTATE::equalFn, FunctionCall3Coll(), PointerGetDatum, and GISTSTATE::supportCollation.
Referenced by gistgetadjusted(), and gistUserPicksplit().
{ bool result; FunctionCall3Coll(&giststate->equalFn[attno], giststate->supportCollation[attno], a, b, PointerGetDatum(&result)); return result; }
void gistMakeUnionItVec | ( | GISTSTATE * | giststate, | |
IndexTuple * | itvec, | |||
int | len, | |||
Datum * | attr, | |||
bool * | isnull | |||
) |
Definition at line 150 of file gistutil.c.
References FALSE, FunctionCall2Coll(), GEVHDRSZ, gistdentryinit(), i, index_getattr, GistEntryVector::n, tupleDesc::natts, NULL, palloc(), PointerGetDatum, GISTSTATE::supportCollation, GISTSTATE::tupdesc, GISTSTATE::unionFn, and GistEntryVector::vector.
Referenced by gistunion(), and gistunionsubkeyvec().
{ int i; GistEntryVector *evec; int attrsize; evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ); for (i = 0; i < giststate->tupdesc->natts; i++) { int j; /* Collect non-null datums for this column */ evec->n = 0; for (j = 0; j < len; j++) { Datum datum; bool IsNull; datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull); if (IsNull) continue; gistdentryinit(giststate, i, evec->vector + evec->n, datum, NULL, NULL, (OffsetNumber) 0, FALSE, IsNull); evec->n++; } /* If this column was all NULLs, the union is NULL */ if (evec->n == 0) { attr[i] = (Datum) 0; isnull[i] = TRUE; } else { if (evec->n == 1) { /* unionFn may expect at least two inputs */ evec->n = 2; evec->vector[1] = evec->vector[0]; } /* Make union and store in attr array */ attr[i] = FunctionCall2Coll(&giststate->unionFn[i], giststate->supportCollation[i], PointerGetDatum(evec), PointerGetDatum(&attrsize)); isnull[i] = FALSE; } } }
void gistMakeUnionKey | ( | GISTSTATE * | giststate, | |
int | attno, | |||
GISTENTRY * | entry1, | |||
bool | isnull1, | |||
GISTENTRY * | entry2, | |||
bool | isnull2, | |||
Datum * | dst, | |||
bool * | dstisnull | |||
) |
Definition at line 227 of file gistutil.c.
References FALSE, FunctionCall2Coll(), GEVHDRSZ, GistEntryVector::n, PointerGetDatum, GISTSTATE::supportCollation, GISTSTATE::unionFn, and GistEntryVector::vector.
Referenced by gistgetadjusted(), and supportSecondarySplit().
{ /* we need a GistEntryVector with room for exactly 2 elements */ union { GistEntryVector gev; char padding[2 * sizeof(GISTENTRY) + GEVHDRSZ]; } storage; GistEntryVector *evec = &storage.gev; int dstsize; evec->n = 2; if (isnull1 && isnull2) { *dstisnull = TRUE; *dst = (Datum) 0; } else { if (isnull1 == FALSE && isnull2 == FALSE) { evec->vector[0] = *entry1; evec->vector[1] = *entry2; } else if (isnull1 == FALSE) { evec->vector[0] = *entry1; evec->vector[1] = *entry1; } else { evec->vector[0] = *entry2; evec->vector[1] = *entry2; } *dstisnull = FALSE; *dst = FunctionCall2Coll(&giststate->unionFn[attno], giststate->supportCollation[attno], PointerGetDatum(evec), PointerGetDatum(&dstsize)); } }
Definition at line 716 of file gistutil.c.
References BufferGetPage, ConditionalLockBuffer(), ExclusiveLock, GetFreeIndexPage(), GIST_EXCLUSIVE, GIST_UNLOCK, gistcheckpage(), GistPageIsDeleted, InvalidBlockNumber, LockBuffer(), LockRelationForExtension(), P_NEW, PageIsNew, ReadBuffer(), RELATION_IS_LOCAL, ReleaseBuffer(), and UnlockRelationForExtension().
Referenced by gistbuild(), and gistplacetopage().
{ Buffer buffer; bool needLock; /* First, try to get a page from FSM */ for (;;) { BlockNumber blkno = GetFreeIndexPage(r); if (blkno == InvalidBlockNumber) break; /* nothing left in FSM */ buffer = ReadBuffer(r, blkno); /* * We have to guard against the possibility that someone else already * recycled this page; the buffer may be locked if so. */ if (ConditionalLockBuffer(buffer)) { Page page = BufferGetPage(buffer); if (PageIsNew(page)) return buffer; /* OK to use, if never initialized */ gistcheckpage(r, buffer); if (GistPageIsDeleted(page)) return buffer; /* OK to use */ LockBuffer(buffer, GIST_UNLOCK); } /* Can't use it, so release buffer and try again */ ReleaseBuffer(buffer); } /* Must extend the file */ needLock = !RELATION_IS_LOCAL(r); if (needLock) LockRelationForExtension(r, ExclusiveLock); buffer = ReadBuffer(r, P_NEW); LockBuffer(buffer, GIST_EXCLUSIVE); if (needLock) UnlockRelationForExtension(r, ExclusiveLock); return buffer; }
bool gistnospace | ( | Page | page, | |
IndexTuple * | itvec, | |||
int | len, | |||
OffsetNumber | todelete, | |||
Size | freespace | |||
) |
Definition at line 54 of file gistutil.c.
References i, IndexTupleSize, InvalidOffsetNumber, PageGetFreeSpace(), PageGetItem, and PageGetItemId.
Referenced by gistplacetopage().
{ unsigned int size = freespace, deleted = 0; int i; for (i = 0; i < len; i++) size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData); if (todelete != InvalidOffsetNumber) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete)); deleted = IndexTupleSize(itup) + sizeof(ItemIdData); } return (PageGetFreeSpace(page) + deleted < size); }
Datum gistoptions | ( | PG_FUNCTION_ARGS | ) |
Definition at line 770 of file gistutil.c.
References allocateReloptStruct(), fillfactor, fillRelOptions(), lengthof, offsetof, parseRelOptions(), pfree(), PG_GETARG_BOOL, PG_GETARG_DATUM, PG_RETURN_BYTEA_P, PG_RETURN_NULL, and RELOPT_KIND_GIST.
{ Datum reloptions = PG_GETARG_DATUM(0); bool validate = PG_GETARG_BOOL(1); relopt_value *options; GiSTOptions *rdopts; int numoptions; static const relopt_parse_elt tab[] = { {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)}, {"buffering", RELOPT_TYPE_STRING, offsetof(GiSTOptions, bufferingModeOffset)} }; options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIST, &numoptions); /* if none set, we're done */ if (numoptions == 0) PG_RETURN_NULL(); rdopts = allocateReloptStruct(sizeof(GiSTOptions), options, numoptions); fillRelOptions((void *) rdopts, sizeof(GiSTOptions), options, numoptions, validate, tab, lengthof(tab)); pfree(options); PG_RETURN_BYTEA_P(rdopts); }
float gistpenalty | ( | GISTSTATE * | giststate, | |
int | attno, | |||
GISTENTRY * | key1, | |||
bool | isNull1, | |||
GISTENTRY * | key2, | |||
bool | isNull2 | |||
) |
Definition at line 622 of file gistutil.c.
References FALSE, FmgrInfo::fn_strict, FunctionCall3Coll(), get_float4_infinity(), GISTSTATE::penaltyFn, PointerGetDatum, and GISTSTATE::supportCollation.
Referenced by findDontCares(), gistchoose(), gistRelocateBuildBuffersOnSplit(), placeOne(), and supportSecondarySplit().
{ float penalty = 0.0; if (giststate->penaltyFn[attno].fn_strict == FALSE || (isNullOrig == FALSE && isNullAdd == FALSE)) { FunctionCall3Coll(&giststate->penaltyFn[attno], giststate->supportCollation[attno], PointerGetDatum(orig), PointerGetDatum(add), PointerGetDatum(&penalty)); /* disallow negative or NaN penalty */ if (isnan(penalty) || penalty < 0.0) penalty = 0.0; } else if (isNullOrig && isNullAdd) penalty = 0.0; else { /* try to prevent mixing null and non-null values */ penalty = get_float4_infinity(); } return penalty; }
bool gistplacetopage | ( | Relation | rel, | |
Size | freespace, | |||
GISTSTATE * | giststate, | |||
Buffer | buffer, | |||
IndexTuple * | itup, | |||
int | ntup, | |||
OffsetNumber | oldoffnum, | |||
BlockNumber * | newblkno, | |||
Buffer | leftchildbuf, | |||
List ** | splitinfo, | |||
bool | markleftchild | |||
) |
Definition at line 172 of file gist.c.
References gistxlogPage::blkno, SplitedPageLayout::block, GISTPageSplitInfo::buf, SplitedPageLayout::buffer, BufferGetBlockNumber(), BufferGetPage, BufferIsValid, GISTPageSplitInfo::downlink, elog, END_CRIT_SECTION, ERROR, F_LEAF, GIST_ROOT_BLKNO, GistClearFollowRight, gistextractpage(), gistfillbuffer(), gistfillitupvec(), GistFollowRight, gistGetFakeLSN(), GISTInitBuffer(), gistjoinvector(), GistMarkFollowRight, gistNewBuffer(), gistnospace(), GistPageGetNSN, GistPageGetOpaque, GistPageIsLeaf, GistPageSetNSN, gistSplit(), GistTupleSetValid, gistXLogSplit(), gistXLogUpdate(), i, IndexTupleSize, InvalidOffsetNumber, ItemPointerEquals(), ItemPointerSetBlockNumber, SplitedPageLayout::itup, lappend(), SplitedPageLayout::lenlist, SplitedPageLayout::list, MarkBufferDirty(), memmove, SplitedPageLayout::next, gistxlogPage::num, OffsetNumberIsValid, SplitedPageLayout::page, PageAddItem(), PageGetTempPageCopySpecial(), PageIndexTupleDelete(), PageRestoreTempPage(), PageSetLSN, palloc(), RelationData::rd_node, RelationGetRelationName, RelationNeedsWAL, START_CRIT_SECTION, IndexTupleData::t_tid, and UnlockReleaseBuffer().
Referenced by gistbufferinginserttuples(), and gistinserttuples().
{ BlockNumber blkno = BufferGetBlockNumber(buffer); Page page = BufferGetPage(buffer); bool is_leaf = (GistPageIsLeaf(page)) ? true : false; XLogRecPtr recptr; int i; bool is_split; /* * Refuse to modify a page that's incompletely split. This should not * happen because we finish any incomplete splits while we walk down the * tree. However, it's remotely possible that another concurrent inserter * splits a parent page, and errors out before completing the split. We * will just throw an error in that case, and leave any split we had in * progress unfinished too. The next insert that comes along will clean up * the mess. */ if (GistFollowRight(page)) elog(ERROR, "concurrent GiST page split was incomplete"); *splitinfo = NIL; /* * if isupdate, remove old key: This node's key has been modified, either * because a child split occurred or because we needed to adjust our key * for an insert in a child node. Therefore, remove the old version of * this node's key. * * for WAL replay, in the non-split case we handle this by setting up a * one-element todelete array; in the split case, it's handled implicitly * because the tuple vector passed to gistSplit won't include this tuple. */ is_split = gistnospace(page, itup, ntup, oldoffnum, freespace); if (is_split) { /* no space for insertion */ IndexTuple *itvec; int tlen; SplitedPageLayout *dist = NULL, *ptr; BlockNumber oldrlink = InvalidBlockNumber; GistNSN oldnsn = 0; SplitedPageLayout rootpg; bool is_rootsplit; is_rootsplit = (blkno == GIST_ROOT_BLKNO); /* * Form index tuples vector to split. If we're replacing an old tuple, * remove the old version from the vector. */ itvec = gistextractpage(page, &tlen); if (OffsetNumberIsValid(oldoffnum)) { /* on inner page we should remove old tuple */ int pos = oldoffnum - FirstOffsetNumber; tlen--; if (pos != tlen) memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos)); } itvec = gistjoinvector(itvec, &tlen, itup, ntup); dist = gistSplit(rel, page, itvec, tlen, giststate); /* * Set up pages to work with. Allocate new buffers for all but the * leftmost page. The original page becomes the new leftmost page, and * is just replaced with the new contents. * * For a root-split, allocate new buffers for all child pages, the * original page is overwritten with new root page containing * downlinks to the new child pages. */ ptr = dist; if (!is_rootsplit) { /* save old rightlink and NSN */ oldrlink = GistPageGetOpaque(page)->rightlink; oldnsn = GistPageGetNSN(page); dist->buffer = buffer; dist->block.blkno = BufferGetBlockNumber(buffer); dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer)); /* clean all flags except F_LEAF */ GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0; ptr = ptr->next; } for (; ptr; ptr = ptr->next) { /* Allocate new page */ ptr->buffer = gistNewBuffer(rel); GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0); ptr->page = BufferGetPage(ptr->buffer); ptr->block.blkno = BufferGetBlockNumber(ptr->buffer); } /* * Now that we know which blocks the new pages go to, set up downlink * tuples to point to them. */ for (ptr = dist; ptr; ptr = ptr->next) { ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno); GistTupleSetValid(ptr->itup); } /* * If this is a root split, we construct the new root page with the * downlinks here directly, instead of requiring the caller to insert * them. Add the new root page to the list along with the child pages. */ if (is_rootsplit) { IndexTuple *downlinks; int ndownlinks = 0; int i; rootpg.buffer = buffer; rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer)); GistPageGetOpaque(rootpg.page)->flags = 0; /* Prepare a vector of all the downlinks */ for (ptr = dist; ptr; ptr = ptr->next) ndownlinks++; downlinks = palloc(sizeof(IndexTuple) * ndownlinks); for (i = 0, ptr = dist; ptr; ptr = ptr->next) downlinks[i++] = ptr->itup; rootpg.block.blkno = GIST_ROOT_BLKNO; rootpg.block.num = ndownlinks; rootpg.list = gistfillitupvec(downlinks, ndownlinks, &(rootpg.lenlist)); rootpg.itup = NULL; rootpg.next = dist; dist = &rootpg; } else { /* Prepare split-info to be returned to caller */ for (ptr = dist; ptr; ptr = ptr->next) { GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo)); si->buf = ptr->buffer; si->downlink = ptr->itup; *splitinfo = lappend(*splitinfo, si); } } /* * Fill all pages. All the pages are new, ie. freshly allocated empty * pages, or a temporary copy of the old page. */ for (ptr = dist; ptr; ptr = ptr->next) { char *data = (char *) (ptr->list); for (i = 0; i < ptr->block.num; i++) { IndexTuple thistup = (IndexTuple) data; if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel)); /* * If this is the first inserted/updated tuple, let the caller * know which page it landed on. */ if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid)) *newblkno = ptr->block.blkno; data += IndexTupleSize(thistup); } /* Set up rightlinks */ if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO) GistPageGetOpaque(ptr->page)->rightlink = ptr->next->block.blkno; else GistPageGetOpaque(ptr->page)->rightlink = oldrlink; /* * Mark the all but the right-most page with the follow-right * flag. It will be cleared as soon as the downlink is inserted * into the parent, but this ensures that if we error out before * that, the index is still consistent. (in buffering build mode, * any error will abort the index build anyway, so this is not * needed.) */ if (ptr->next && !is_rootsplit && markfollowright) GistMarkFollowRight(ptr->page); else GistClearFollowRight(ptr->page); /* * Copy the NSN of the original page to all pages. The * F_FOLLOW_RIGHT flags ensure that scans will follow the * rightlinks until the downlinks are inserted. */ GistPageSetNSN(ptr->page, oldnsn); } START_CRIT_SECTION(); /* * Must mark buffers dirty before XLogInsert, even though we'll still * be changing their opaque fields below. */ for (ptr = dist; ptr; ptr = ptr->next) MarkBufferDirty(ptr->buffer); if (BufferIsValid(leftchildbuf)) MarkBufferDirty(leftchildbuf); /* * The first page in the chain was a temporary working copy meant to * replace the old page. Copy it over the old page. */ PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer)); dist->page = BufferGetPage(dist->buffer); /* Write the WAL record */ if (RelationNeedsWAL(rel)) recptr = gistXLogSplit(rel->rd_node, blkno, is_leaf, dist, oldrlink, oldnsn, leftchildbuf, markfollowright); else recptr = gistGetFakeLSN(rel); for (ptr = dist; ptr; ptr = ptr->next) { PageSetLSN(ptr->page, recptr); } /* * Return the new child buffers to the caller. * * If this was a root split, we've already inserted the downlink * pointers, in the form of a new root page. Therefore we can release * all the new buffers, and keep just the root page locked. */ if (is_rootsplit) { for (ptr = dist->next; ptr; ptr = ptr->next) UnlockReleaseBuffer(ptr->buffer); } } else { /* * Enough space. We also get here if ntuples==0. */ START_CRIT_SECTION(); if (OffsetNumberIsValid(oldoffnum)) PageIndexTupleDelete(page, oldoffnum); gistfillbuffer(page, itup, ntup, InvalidOffsetNumber); MarkBufferDirty(buffer); if (BufferIsValid(leftchildbuf)) MarkBufferDirty(leftchildbuf); if (RelationNeedsWAL(rel)) { OffsetNumber ndeloffs = 0, deloffs[1]; if (OffsetNumberIsValid(oldoffnum)) { deloffs[0] = oldoffnum; ndeloffs = 1; } recptr = gistXLogUpdate(rel->rd_node, buffer, deloffs, ndeloffs, itup, ntup, leftchildbuf); PageSetLSN(page, recptr); } else { recptr = gistGetFakeLSN(rel); PageSetLSN(page, recptr); } if (newblkno) *newblkno = blkno; } /* * If we inserted the downlink for a child page, set NSN and clear * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to * follow the rightlink if and only if they looked at the parent page * before we inserted the downlink. * * Note that we do this *after* writing the WAL record. That means that * the possible full page image in the WAL record does not include these * changes, and they must be replayed even if the page is restored from * the full page image. There's a chicken-and-egg problem: if we updated * the child pages first, we wouldn't know the recptr of the WAL record * we're about to write. */ if (BufferIsValid(leftchildbuf)) { Page leftpg = BufferGetPage(leftchildbuf); GistPageSetNSN(leftpg, recptr); GistClearFollowRight(leftpg); PageSetLSN(leftpg, recptr); } END_CRIT_SECTION(); return is_split; }
bool gistPopItupFromNodeBuffer | ( | GISTBuildBuffers * | gfbb, | |
GISTNodeBuffer * | nodeBuffer, | |||
IndexTuple * | item | |||
) |
Definition at line 412 of file gistbuildbuffers.c.
References Assert, GISTNodeBuffer::blocksCount, gistBuffersReleaseBlock(), gistGetItupFromPage(), gistLoadNodeBuffer(), InvalidBlockNumber, PAGE_IS_EMPTY, GISTNodeBuffer::pageBuffer, GISTBuildBuffers::pfile, pfree(), GISTNodeBufferPage::prev, and ReadTempFileBlock().
Referenced by gistProcessEmptyingQueue(), and gistRelocateBuildBuffersOnSplit().
{ /* * If node buffer is empty then return false. */ if (nodeBuffer->blocksCount <= 0) return false; /* Load last page of node buffer if needed */ if (!nodeBuffer->pageBuffer) gistLoadNodeBuffer(gfbb, nodeBuffer); /* * Get index tuple from last non-empty page. */ gistGetItupFromPage(nodeBuffer->pageBuffer, itup); /* * If we just removed the last tuple from the page, fetch previous page on * this node buffer (if any). */ if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer)) { BlockNumber prevblkno; /* * blocksCount includes the page in pageBuffer, so decrease it now. */ nodeBuffer->blocksCount--; /* * If there's more pages, fetch previous one. */ prevblkno = nodeBuffer->pageBuffer->prev; if (prevblkno != InvalidBlockNumber) { /* There is a previous page. Fetch it. */ Assert(nodeBuffer->blocksCount > 0); ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer); /* * Now that we've read the block in memory, we can release its * on-disk block for reuse. */ gistBuffersReleaseBlock(gfbb, prevblkno); } else { /* No more pages. Free memory. */ Assert(nodeBuffer->blocksCount == 0); pfree(nodeBuffer->pageBuffer); nodeBuffer->pageBuffer = NULL; } } return true; }
void gistPushItupToNodeBuffer | ( | GISTBuildBuffers * | gfbb, | |
GISTNodeBuffer * | nodeBuffer, | |||
IndexTuple | item | |||
) |
Definition at line 342 of file gistbuildbuffers.c.
References GISTNodeBuffer::blocksCount, BUFFER_HALF_FILLED, GISTBuildBuffers::bufferEmptyingQueue, GISTBuildBuffers::context, gistAddLoadedBuffer(), gistAllocateNewPageBuffer(), gistBuffersGetFreeBlock(), gistLoadNodeBuffer(), gistPlaceItupToPage(), lcons(), MAXALIGN, MemoryContextSwitchTo(), offsetof, PAGE_FREE_SPACE, PAGE_NO_SPACE, GISTNodeBuffer::pageBuffer, GISTBuildBuffers::pfile, GISTNodeBufferPage::prev, GISTNodeBuffer::queuedForEmptying, and WriteTempFileBlock().
Referenced by gistProcessItup(), and gistRelocateBuildBuffersOnSplit().
{ /* * Most part of memory operations will be in buffering build persistent * context. So, let's switch to it. */ MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context); /* * If the buffer is currently empty, create the first page. */ if (nodeBuffer->blocksCount == 0) { nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb); nodeBuffer->blocksCount = 1; gistAddLoadedBuffer(gfbb, nodeBuffer); } /* Load last page of node buffer if it wasn't in memory already */ if (!nodeBuffer->pageBuffer) gistLoadNodeBuffer(gfbb, nodeBuffer); /* * Check if there is enough space on the last page for the tuple. */ if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup)) { /* * Nope. Swap previous block to disk and allocate a new one. */ BlockNumber blkno; /* Write filled page to the disk */ blkno = gistBuffersGetFreeBlock(gfbb); WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer); /* * Reset the in-memory page as empty, and link the previous block to * the new page by storing its block number in the prev-link. */ PAGE_FREE_SPACE(nodeBuffer->pageBuffer) = BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata)); nodeBuffer->pageBuffer->prev = blkno; /* We've just added one more page */ nodeBuffer->blocksCount++; } gistPlaceItupToPage(nodeBuffer->pageBuffer, itup); /* * If the buffer just overflowed, add it to the emptying queue. */ if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying) { gfbb->bufferEmptyingQueue = lcons(nodeBuffer, gfbb->bufferEmptyingQueue); nodeBuffer->queuedForEmptying = true; } /* Restore memory context */ MemoryContextSwitchTo(oldcxt); }
void gistRelocateBuildBuffersOnSplit | ( | GISTBuildBuffers * | gfbb, | |
GISTSTATE * | giststate, | |||
Relation | r, | |||
int | level, | |||
Buffer | buffer, | |||
List * | splitinfo | |||
) |
Definition at line 539 of file gistbuildbuffers.c.
References Assert, GISTNodeBuffer::blocksCount, GISTPageSplitInfo::buf, BufferGetBlockNumber(), GISTPageSplitInfo::downlink, RelocationBufferInfo::entry, GIST_ROOT_BLKNO, gistDeCompressAtt(), gistgetadjusted(), gistGetNodeBuffer(), gistpenalty(), gistPopItupFromNodeBuffer(), gistPushItupToNodeBuffer(), HASH_FIND, hash_search(), i, RelocationBufferInfo::isnull, GISTNodeBuffer::isTemp, LEVEL_HAS_BUFFERS, lfirst, list_length(), tupleDesc::natts, RelocationBufferInfo::nodeBuffer, GISTBuildBuffers::nodeBuffersTab, NULL, GISTNodeBuffer::pageBlocknum, GISTNodeBuffer::pageBuffer, palloc(), pfree(), RelationData::rd_att, and RelocationBufferInfo::splitinfo.
Referenced by gistbufferinginserttuples().
{ RelocationBufferInfo *relocationBuffersInfos; bool found; GISTNodeBuffer *nodeBuffer; BlockNumber blocknum; IndexTuple itup; int splitPagesCount = 0, i; GISTENTRY entry[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; GISTNodeBuffer oldBuf; ListCell *lc; /* If the splitted page doesn't have buffers, we have nothing to do. */ if (!LEVEL_HAS_BUFFERS(level, gfbb)) return; /* * Get the node buffer of the splitted page. */ blocknum = BufferGetBlockNumber(buffer); nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum, HASH_FIND, &found); if (!found) { /* The page has no buffer, so we have nothing to do. */ return; } /* * Make a copy of the old buffer, as we're going reuse it as the buffer * for the new left page, which is on the same block as the old page. * That's not true for the root page, but that's fine because we never * have a buffer on the root page anyway. The original algorithm as * described by Arge et al did, but it's of no use, as you might as well * read the tuples straight from the heap instead of the root buffer. */ Assert(blocknum != GIST_ROOT_BLKNO); memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer)); oldBuf.isTemp = true; /* Reset the old buffer, used for the new left page from now on */ nodeBuffer->blocksCount = 0; nodeBuffer->pageBuffer = NULL; nodeBuffer->pageBlocknum = InvalidBlockNumber; /* * Allocate memory for information about relocation buffers. */ splitPagesCount = list_length(splitinfo); relocationBuffersInfos = (RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) * splitPagesCount); /* * Fill relocation buffers information for node buffers of pages produced * by split. */ i = 0; foreach(lc, splitinfo) { GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc); GISTNodeBuffer *newNodeBuffer; /* Decompress parent index tuple of node buffer page. */ gistDeCompressAtt(giststate, r, si->downlink, NULL, (OffsetNumber) 0, relocationBuffersInfos[i].entry, relocationBuffersInfos[i].isnull); /* * Create a node buffer for the page. The leftmost half is on the same * block as the old page before split, so for the leftmost half this * will return the original buffer. The tuples on the original buffer * were relinked to the temporary buffer, so the original one is now * empty. */ newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level); relocationBuffersInfos[i].nodeBuffer = newNodeBuffer; relocationBuffersInfos[i].splitinfo = si; i++; } /* * Loop through all index tuples in the buffer of the page being split, * moving them to buffers for the new pages. We try to move each tuple to * the page that will result in the lowest penalty for the leading column * or, in the case of a tie, the lowest penalty for the earliest column * that is not tied. * * The page searching logic is very similar to gistchoose(). */ while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup)) { float best_penalty[INDEX_MAX_KEYS]; int i, which; IndexTuple newtup; RelocationBufferInfo *targetBufferInfo; gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, entry, isnull); /* default to using first page (shouldn't matter) */ which = 0; /* * best_penalty[j] is the best penalty we have seen so far for column * j, or -1 when we haven't yet examined column j. Array entries to * the right of the first -1 are undefined. */ best_penalty[0] = -1; /* * Loop over possible target pages, looking for one to move this tuple * to. */ for (i = 0; i < splitPagesCount; i++) { RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i]; bool zero_penalty; int j; zero_penalty = true; /* Loop over index attributes. */ for (j = 0; j < r->rd_att->natts; j++) { float usize; /* Compute penalty for this column. */ usize = gistpenalty(giststate, j, &splitPageInfo->entry[j], splitPageInfo->isnull[j], &entry[j], isnull[j]); if (usize > 0) zero_penalty = false; if (best_penalty[j] < 0 || usize < best_penalty[j]) { /* * New best penalty for column. Tentatively select this * page as the target, and record the best penalty. Then * reset the next column's penalty to "unknown" (and * indirectly, the same for all the ones to its right). * This will force us to adopt this page's penalty values * as the best for all the remaining columns during * subsequent loop iterations. */ which = i; best_penalty[j] = usize; if (j < r->rd_att->natts - 1) best_penalty[j + 1] = -1; } else if (best_penalty[j] == usize) { /* * The current page is exactly as good for this column as * the best page seen so far. The next iteration of this * loop will compare the next column. */ } else { /* * The current page is worse for this column than the best * page seen so far. Skip the remaining columns and move * on to the next page, if any. */ zero_penalty = false; /* so outer loop won't exit */ break; } } /* * If we find a page with zero penalty for all columns, there's no * need to examine remaining pages; just break out of the loop and * return it. */ if (zero_penalty) break; } /* OK, "which" is the page index to push the tuple to */ targetBufferInfo = &relocationBuffersInfos[which]; /* Push item to selected node buffer */ gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup); /* Adjust the downlink for this page, if needed. */ newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink, itup, giststate); if (newtup) { gistDeCompressAtt(giststate, r, newtup, NULL, (OffsetNumber) 0, targetBufferInfo->entry, targetBufferInfo->isnull); targetBufferInfo->splitinfo->downlink = newtup; } } pfree(relocationBuffersInfos); }
SplitedPageLayout* gistSplit | ( | Relation | r, | |
Page | page, | |||
IndexTuple * | itup, | |||
int | len, | |||
GISTSTATE * | giststate | |||
) |
Definition at line 1242 of file gist.c.
References SplitedPageLayout::block, gistfillitupvec(), gistfitpage(), gistFormTuple(), gistSplit(), gistSplitByKey(), i, SplitedPageLayout::itup, SplitedPageLayout::lenlist, SplitedPageLayout::list, tupleDesc::natts, SplitedPageLayout::next, gistxlogPage::num, palloc(), ROTATEDIST, GistSplitVector::spl_lattr, GIST_SPLITVEC::spl_left, GistSplitVector::spl_lisnull, GIST_SPLITVEC::spl_nleft, GIST_SPLITVEC::spl_nright, GistSplitVector::spl_rattr, GIST_SPLITVEC::spl_right, GistSplitVector::spl_risnull, GistSplitVector::splitVector, TRUE, and GISTSTATE::tupdesc.
Referenced by gistplacetopage(), and gistSplit().
{ IndexTuple *lvectup, *rvectup; GistSplitVector v; int i; SplitedPageLayout *res = NULL; memset(v.spl_lisnull, TRUE, sizeof(bool) * giststate->tupdesc->natts); memset(v.spl_risnull, TRUE, sizeof(bool) * giststate->tupdesc->natts); gistSplitByKey(r, page, itup, len, giststate, &v, 0); /* form left and right vector */ lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1)); rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1)); for (i = 0; i < v.splitVector.spl_nleft; i++) lvectup[i] = itup[v.splitVector.spl_left[i] - 1]; for (i = 0; i < v.splitVector.spl_nright; i++) rvectup[i] = itup[v.splitVector.spl_right[i] - 1]; /* finalize splitting (may need another split) */ if (!gistfitpage(rvectup, v.splitVector.spl_nright)) { res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate); } else { ROTATEDIST(res); res->block.num = v.splitVector.spl_nright; res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist)); res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false); } if (!gistfitpage(lvectup, v.splitVector.spl_nleft)) { SplitedPageLayout *resptr, *subres; resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate); /* install on list's tail */ while (resptr->next) resptr = resptr->next; resptr->next = res; res = subres; } else { ROTATEDIST(res); res->block.num = v.splitVector.spl_nleft; res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist)); res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false); } return res; }
void gistSplitByKey | ( | Relation | r, | |
Page | page, | |||
IndexTuple * | itup, | |||
int | len, | |||
GISTSTATE * | giststate, | |||
GistSplitVector * | v, | |||
int | attno | |||
) |
Definition at line 623 of file gistsplit.c.
References Assert, FALSE, GEVHDRSZ, gistdentryinit(), gistSplitByKey(), gistSplitHalf(), gistunionsubkey(), gistUserPicksplit(), i, index_getattr, GistEntryVector::n, tupleDesc::natts, NULL, palloc(), GistSplitVector::spl_dontcare, GIST_SPLITVEC::spl_left, GistSplitVector::spl_lisnull, GIST_SPLITVEC::spl_nleft, GIST_SPLITVEC::spl_nright, GIST_SPLITVEC::spl_right, GistSplitVector::spl_risnull, GistSplitVector::splitVector, GISTSTATE::tupdesc, and GistEntryVector::vector.
Referenced by gistSplit(), and gistSplitByKey().
{ GistEntryVector *entryvec; OffsetNumber *offNullTuples; int nOffNullTuples = 0; int i; /* generate the item array, and identify tuples with null keys */ /* note that entryvec->vector[0] goes unused in this code */ entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY)); entryvec->n = len + 1; offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); for (i = 1; i <= len; i++) { Datum datum; bool IsNull; datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, attno, &(entryvec->vector[i]), datum, r, page, i, FALSE, IsNull); if (IsNull) offNullTuples[nOffNullTuples++] = i; } if (nOffNullTuples == len) { /* * Corner case: All keys in attno column are null, so just transfer * our attention to the next column. If there's no next column, just * split page in half. */ v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE; if (attno + 1 < giststate->tupdesc->natts) gistSplitByKey(r, page, itup, len, giststate, v, attno + 1); else gistSplitHalf(&v->splitVector, len); } else if (nOffNullTuples > 0) { int j = 0; /* * We don't want to mix NULL and not-NULL keys on one page, so split * nulls to right page and not-nulls to left. */ v->splitVector.spl_right = offNullTuples; v->splitVector.spl_nright = nOffNullTuples; v->spl_risnull[attno] = TRUE; v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); v->splitVector.spl_nleft = 0; for (i = 1; i <= len; i++) if (j < v->splitVector.spl_nright && offNullTuples[j] == i) j++; else v->splitVector.spl_left[v->splitVector.spl_nleft++] = i; /* Compute union keys, unless outer recursion level will handle it */ if (attno == 0 && giststate->tupdesc->natts == 1) { v->spl_dontcare = NULL; gistunionsubkey(giststate, itup, v); } } else { /* * All keys are not-null, so apply user-defined PickSplit method */ if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate)) { /* * Splitting on attno column is not optimal, so consider * redistributing don't-care tuples according to the next column */ Assert(attno + 1 < giststate->tupdesc->natts); if (v->spl_dontcare == NULL) { /* * This split was actually degenerate, so ignore it altogether * and just split according to the next column. */ gistSplitByKey(r, page, itup, len, giststate, v, attno + 1); } else { /* * Form an array of just the don't-care tuples to pass to a * recursive invocation of this function for the next column. */ IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple)); OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); int newlen = 0; GIST_SPLITVEC backupSplit; for (i = 0; i < len; i++) { if (v->spl_dontcare[i + 1]) { newitup[newlen] = itup[i]; map[newlen] = i + 1; newlen++; } } Assert(newlen > 0); /* * Make a backup copy of v->splitVector, since the recursive * call will overwrite that with its own result. */ backupSplit = v->splitVector; backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len); memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft); backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len); memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright); /* Recursively decide how to split the don't-care tuples */ gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1); /* Merge result of subsplit with non-don't-care tuples */ for (i = 0; i < v->splitVector.spl_nleft; i++) backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1]; for (i = 0; i < v->splitVector.spl_nright; i++) backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1]; v->splitVector = backupSplit; } } } /* * If we're handling a multicolumn index, at the end of the recursion * recompute the left and right union datums for all index columns. This * makes sure we hand back correct union datums in all corner cases, * including when we haven't processed all columns to start with, or when * a secondary split moved "don't care" tuples from one side to the other * (we really shouldn't assume that that didn't change the union datums). * * Note: when we're in an internal recursion (attno > 0), we do not worry * about whether the union datums we return with are sensible, since * calling levels won't care. Also, in a single-column index, we expect * that PickSplit (or the special cases above) produced correct union * datums. */ if (attno == 0 && giststate->tupdesc->natts > 1) { v->spl_dontcare = NULL; gistunionsubkey(giststate, itup, v); } }
IndexTuple gistunion | ( | Relation | r, | |
IndexTuple * | itvec, | |||
int | len, | |||
GISTSTATE * | giststate | |||
) |
Definition at line 213 of file gistutil.c.
References gistFormTuple(), and gistMakeUnionItVec().
{ Datum attr[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; gistMakeUnionItVec(giststate, itvec, len, attr, isnull); return gistFormTuple(giststate, r, attr, isnull, false); }
void gistUnloadNodeBuffers | ( | GISTBuildBuffers * | gfbb | ) |
Definition at line 278 of file gistbuildbuffers.c.
References gistUnloadNodeBuffer(), i, GISTBuildBuffers::loadedBuffers, and GISTBuildBuffers::loadedBuffersCount.
Referenced by gistProcessEmptyingQueue().
{ int i; /* Unload all the buffers that have a page loaded in memory. */ for (i = 0; i < gfbb->loadedBuffersCount; i++) gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]); /* Now there are no node buffers with loaded last page */ gfbb->loadedBuffersCount = 0; }
Datum gistvacuumcleanup | ( | PG_FUNCTION_ARGS | ) |
Definition at line 29 of file gistvacuum.c.
References IndexVacuumInfo::analyze_only, BufferGetPage, IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, ExclusiveLock, GIST_ROOT_BLKNO, GIST_SHARE, GistPageIsDeleted, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), LockBuffer(), LockRelationForExtension(), MAIN_FORKNUM, NULL, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, PageIsNew, IndexBulkDeleteResult::pages_free, palloc0(), PG_GETARG_POINTER, PG_RETURN_POINTER, RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::strategy, UnlockRelationForExtension(), UnlockReleaseBuffer(), and vacuum_delay_point().
{ IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); Relation rel = info->index; BlockNumber npages, blkno; BlockNumber totFreePages; bool needLock; /* No-op in ANALYZE ONLY mode */ if (info->analyze_only) PG_RETURN_POINTER(stats); /* Set up all-zero stats if gistbulkdelete wasn't called */ if (stats == NULL) { stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* use heap's tuple count */ stats->num_index_tuples = info->num_heap_tuples; stats->estimated_count = info->estimated_count; /* * XXX the above is wrong if index is partial. Would it be OK to just * return NULL, or is there work we must do below? */ } /* * Need lock unless it's local to this backend. */ needLock = !RELATION_IS_LOCAL(rel); /* try to find deleted pages */ if (needLock) LockRelationForExtension(rel, ExclusiveLock); npages = RelationGetNumberOfBlocks(rel); if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); totFreePages = 0; for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++) { Buffer buffer; Page page; vacuum_delay_point(); buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIST_SHARE); page = (Page) BufferGetPage(buffer); if (PageIsNew(page) || GistPageIsDeleted(page)) { totFreePages++; RecordFreeIndexPage(rel, blkno); } UnlockReleaseBuffer(buffer); } /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); /* return statistics */ stats->pages_free = totFreePages; if (needLock) LockRelationForExtension(rel, ExclusiveLock); stats->num_pages = RelationGetNumberOfBlocks(rel); if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); PG_RETURN_POINTER(stats); }
void gistValidateBufferingOption | ( | char * | value | ) |
Definition at line 245 of file gistbuild.c.
References ereport, errcode(), errdetail(), errmsg(), ERROR, and NULL.
XLogRecPtr gistXLogSplit | ( | RelFileNode | node, | |
BlockNumber | blkno, | |||
bool | page_is_leaf, | |||
SplitedPageLayout * | dist, | |||
BlockNumber | origrlink, | |||
GistNSN | oldnsn, | |||
Buffer | leftchild, | |||
bool | markfollowright | |||
) |
Definition at line 377 of file gistxlog.c.
References SplitedPageLayout::block, XLogRecData::buffer, XLogRecData::buffer_std, BufferGetBlockNumber(), BufferIsValid, cur, XLogRecData::data, gistxlogPageSplit::leftchild, XLogRecData::len, SplitedPageLayout::lenlist, SplitedPageLayout::list, gistxlogPageSplit::markfollowright, XLogRecData::next, SplitedPageLayout::next, gistxlogPageSplit::node, gistxlogPageSplit::npage, gistxlogPageSplit::origblkno, gistxlogPageSplit::origleaf, gistxlogPageSplit::orignsn, gistxlogPageSplit::origrlink, palloc(), pfree(), XLOG_GIST_PAGE_SPLIT, and XLogInsert().
Referenced by gistplacetopage().
{ XLogRecData *rdata; gistxlogPageSplit xlrec; SplitedPageLayout *ptr; int npage = 0, cur; XLogRecPtr recptr; for (ptr = dist; ptr; ptr = ptr->next) npage++; rdata = (XLogRecData *) palloc(sizeof(XLogRecData) * (npage * 2 + 2)); xlrec.node = node; xlrec.origblkno = blkno; xlrec.origrlink = origrlink; xlrec.orignsn = orignsn; xlrec.origleaf = page_is_leaf; xlrec.npage = (uint16) npage; xlrec.leftchild = BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber; xlrec.markfollowright = markfollowright; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(gistxlogPageSplit); rdata[0].buffer = InvalidBuffer; cur = 1; /* * Include a full page image of the child buf. (only necessary if a * checkpoint happened since the child page was split) */ if (BufferIsValid(leftchildbuf)) { rdata[cur - 1].next = &(rdata[cur]); rdata[cur].data = NULL; rdata[cur].len = 0; rdata[cur].buffer = leftchildbuf; rdata[cur].buffer_std = true; cur++; } for (ptr = dist; ptr; ptr = ptr->next) { rdata[cur - 1].next = &(rdata[cur]); rdata[cur].buffer = InvalidBuffer; rdata[cur].data = (char *) &(ptr->block); rdata[cur].len = sizeof(gistxlogPage); cur++; rdata[cur - 1].next = &(rdata[cur]); rdata[cur].buffer = InvalidBuffer; rdata[cur].data = (char *) (ptr->list); rdata[cur].len = ptr->lenlist; cur++; } rdata[cur - 1].next = NULL; recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata); pfree(rdata); return recptr; }
XLogRecPtr gistXLogUpdate | ( | RelFileNode | node, | |
Buffer | buffer, | |||
OffsetNumber * | todelete, | |||
int | ntodelete, | |||
IndexTuple * | itup, | |||
int | ntup, | |||
Buffer | leftchild | |||
) |
Definition at line 460 of file gistxlog.c.
References gistxlogPageUpdate::blkno, XLogRecData::buffer, XLogRecData::buffer_std, BufferGetBlockNumber(), BufferIsValid, cur, XLogRecData::data, i, IndexTupleSize, gistxlogPageUpdate::leftchild, XLogRecData::len, XLogRecData::next, gistxlogPageUpdate::node, gistxlogPageUpdate::ntodelete, palloc(), pfree(), XLOG_GIST_PAGE_UPDATE, and XLogInsert().
Referenced by gistbulkdelete(), and gistplacetopage().
{ XLogRecData *rdata; gistxlogPageUpdate xlrec; int cur, i; XLogRecPtr recptr; rdata = (XLogRecData *) palloc(sizeof(XLogRecData) * (3 + ituplen)); xlrec.node = node; xlrec.blkno = BufferGetBlockNumber(buffer); xlrec.ntodelete = ntodelete; xlrec.leftchild = BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(gistxlogPageUpdate); rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); rdata[1].data = (char *) todelete; rdata[1].len = sizeof(OffsetNumber) * ntodelete; rdata[1].buffer = buffer; rdata[1].buffer_std = true; cur = 2; /* new tuples */ for (i = 0; i < ituplen; i++) { rdata[cur - 1].next = &(rdata[cur]); rdata[cur].data = (char *) (itup[i]); rdata[cur].len = IndexTupleSize(itup[i]); rdata[cur].buffer = buffer; rdata[cur].buffer_std = true; cur++; } /* * Include a full page image of the child buf. (only necessary if a * checkpoint happened since the child page was split) */ if (BufferIsValid(leftchildbuf)) { rdata[cur - 1].next = &(rdata[cur]); rdata[cur].data = NULL; rdata[cur].len = 0; rdata[cur].buffer = leftchildbuf; rdata[cur].buffer_std = true; cur++; } rdata[cur - 1].next = NULL; recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata); pfree(rdata); return recptr; }
Definition at line 1310 of file gist.c.
References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), GISTSTATE::compressFn, GISTSTATE::consistentFn, CurrentMemoryContext, GISTSTATE::decompressFn, GISTSTATE::distanceFn, elog, GISTSTATE::equalFn, ERROR, fmgr_info_copy(), FmgrInfo::fn_oid, GIST_COMPRESS_PROC, GIST_CONSISTENT_PROC, GIST_DECOMPRESS_PROC, GIST_DISTANCE_PROC, GIST_EQUAL_PROC, GIST_PENALTY_PROC, GIST_PICKSPLIT_PROC, GIST_UNION_PROC, i, index_getprocid(), index_getprocinfo(), INDEX_MAX_KEYS, MemoryContextSwitchTo(), tupleDesc::natts, OidIsValid, palloc(), GISTSTATE::penaltyFn, GISTSTATE::picksplitFn, RelationData::rd_att, RelationData::rd_indcollation, GISTSTATE::scanCxt, GISTSTATE::supportCollation, GISTSTATE::tempCxt, GISTSTATE::tupdesc, and GISTSTATE::unionFn.
Referenced by gistbeginscan(), gistbuild(), and gistinsert().
{ GISTSTATE *giststate; MemoryContext scanCxt; MemoryContext oldCxt; int i; /* safety check to protect fixed-size arrays in GISTSTATE */ if (index->rd_att->natts > INDEX_MAX_KEYS) elog(ERROR, "numberOfAttributes %d > %d", index->rd_att->natts, INDEX_MAX_KEYS); /* Create the memory context that will hold the GISTSTATE */ scanCxt = AllocSetContextCreate(CurrentMemoryContext, "GiST scan context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldCxt = MemoryContextSwitchTo(scanCxt); /* Create and fill in the GISTSTATE */ giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE)); giststate->scanCxt = scanCxt; giststate->tempCxt = scanCxt; /* caller must change this if needed */ giststate->tupdesc = index->rd_att; for (i = 0; i < index->rd_att->natts; i++) { fmgr_info_copy(&(giststate->consistentFn[i]), index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC), scanCxt); fmgr_info_copy(&(giststate->unionFn[i]), index_getprocinfo(index, i + 1, GIST_UNION_PROC), scanCxt); fmgr_info_copy(&(giststate->compressFn[i]), index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC), scanCxt); fmgr_info_copy(&(giststate->decompressFn[i]), index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC), scanCxt); fmgr_info_copy(&(giststate->penaltyFn[i]), index_getprocinfo(index, i + 1, GIST_PENALTY_PROC), scanCxt); fmgr_info_copy(&(giststate->picksplitFn[i]), index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC), scanCxt); fmgr_info_copy(&(giststate->equalFn[i]), index_getprocinfo(index, i + 1, GIST_EQUAL_PROC), scanCxt); /* opclasses are not required to provide a Distance method */ if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC))) fmgr_info_copy(&(giststate->distanceFn[i]), index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC), scanCxt); else giststate->distanceFn[i].fn_oid = InvalidOid; /* * If the index column has a specified collation, we should honor that * while doing comparisons. However, we may have a collatable storage * type for a noncollatable indexed data type. If there's no index * collation then specify default collation in case the support * functions need collation. This is harmless if the support * functions don't care about collation, so we just do it * unconditionally. (We could alternatively call get_typcollation, * but that seems like expensive overkill --- there aren't going to be * any cases where a GiST storage type has a nondefault collation.) */ if (OidIsValid(index->rd_indcollation[i])) giststate->supportCollation[i] = index->rd_indcollation[i]; else giststate->supportCollation[i] = DEFAULT_COLLATION_OID; } MemoryContextSwitchTo(oldCxt); return giststate; }