Header And Logo

PostgreSQL
| The world's most advanced open source database.

gininsert.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * gininsert.c
00004  *    insert routines for the postgres inverted index access method.
00005  *
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  * IDENTIFICATION
00011  *          src/backend/access/gin/gininsert.c
00012  *-------------------------------------------------------------------------
00013  */
00014 
00015 #include "postgres.h"
00016 
00017 #include "access/gin_private.h"
00018 #include "access/heapam_xlog.h"
00019 #include "catalog/index.h"
00020 #include "miscadmin.h"
00021 #include "storage/bufmgr.h"
00022 #include "storage/smgr.h"
00023 #include "storage/indexfsm.h"
00024 #include "utils/memutils.h"
00025 #include "utils/rel.h"
00026 
00027 
00028 typedef struct
00029 {
00030     GinState    ginstate;
00031     double      indtuples;
00032     GinStatsData buildStats;
00033     MemoryContext tmpCtx;
00034     MemoryContext funcCtx;
00035     BuildAccumulator accum;
00036 } GinBuildState;
00037 
00038 /*
00039  * Creates new posting tree with one page, containing the given TIDs.
00040  * Returns the page number (which will be the root of this posting tree).
00041  *
00042  * items[] must be in sorted order with no duplicates.
00043  */
00044 static BlockNumber
00045 createPostingTree(Relation index, ItemPointerData *items, uint32 nitems)
00046 {
00047     BlockNumber blkno;
00048     Buffer      buffer = GinNewBuffer(index);
00049     Page        page;
00050 
00051     /* Assert that the items[] array will fit on one page */
00052     Assert(nitems <= GinMaxLeafDataItems);
00053 
00054     START_CRIT_SECTION();
00055 
00056     GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
00057     page = BufferGetPage(buffer);
00058     blkno = BufferGetBlockNumber(buffer);
00059 
00060     memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems);
00061     GinPageGetOpaque(page)->maxoff = nitems;
00062 
00063     MarkBufferDirty(buffer);
00064 
00065     if (RelationNeedsWAL(index))
00066     {
00067         XLogRecPtr  recptr;
00068         XLogRecData rdata[2];
00069         ginxlogCreatePostingTree data;
00070 
00071         data.node = index->rd_node;
00072         data.blkno = blkno;
00073         data.nitem = nitems;
00074 
00075         rdata[0].buffer = InvalidBuffer;
00076         rdata[0].data = (char *) &data;
00077         rdata[0].len = sizeof(ginxlogCreatePostingTree);
00078         rdata[0].next = &rdata[1];
00079 
00080         rdata[1].buffer = InvalidBuffer;
00081         rdata[1].data = (char *) items;
00082         rdata[1].len = sizeof(ItemPointerData) * nitems;
00083         rdata[1].next = NULL;
00084 
00085         recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
00086         PageSetLSN(page, recptr);
00087     }
00088 
00089     UnlockReleaseBuffer(buffer);
00090 
00091     END_CRIT_SECTION();
00092 
00093     return blkno;
00094 }
00095 
00096 
00097 /*
00098  * Adds array of item pointers to tuple's posting list, or
00099  * creates posting tree and tuple pointing to tree in case
00100  * of not enough space.  Max size of tuple is defined in
00101  * GinFormTuple().  Returns a new, modified index tuple.
00102  * items[] must be in sorted order with no duplicates.
00103  */
00104 static IndexTuple
00105 addItemPointersToLeafTuple(GinState *ginstate,
00106                            IndexTuple old,
00107                            ItemPointerData *items, uint32 nitem,
00108                            GinStatsData *buildStats)
00109 {
00110     OffsetNumber attnum;
00111     Datum       key;
00112     GinNullCategory category;
00113     IndexTuple  res;
00114 
00115     Assert(!GinIsPostingTree(old));
00116 
00117     attnum = gintuple_get_attrnum(ginstate, old);
00118     key = gintuple_get_key(ginstate, old, &category);
00119 
00120     /* try to build tuple with room for all the items */
00121     res = GinFormTuple(ginstate, attnum, key, category,
00122                        NULL, nitem + GinGetNPosting(old),
00123                        false);
00124 
00125     if (res)
00126     {
00127         /* good, small enough */
00128         uint32      newnitem;
00129 
00130         /* fill in the posting list with union of old and new TIDs */
00131         newnitem = ginMergeItemPointers(GinGetPosting(res),
00132                                         GinGetPosting(old),
00133                                         GinGetNPosting(old),
00134                                         items, nitem);
00135         /* merge might have eliminated some duplicate items */
00136         GinShortenTuple(res, newnitem);
00137     }
00138     else
00139     {
00140         /* posting list would be too big, convert to posting tree */
00141         BlockNumber postingRoot;
00142         GinPostingTreeScan *gdi;
00143 
00144         /*
00145          * Initialize posting tree with the old tuple's posting list.  It's
00146          * surely small enough to fit on one posting-tree page, and should
00147          * already be in order with no duplicates.
00148          */
00149         postingRoot = createPostingTree(ginstate->index,
00150                                         GinGetPosting(old),
00151                                         GinGetNPosting(old));
00152 
00153         /* During index build, count the newly-added data page */
00154         if (buildStats)
00155             buildStats->nDataPages++;
00156 
00157         /* Now insert the TIDs-to-be-added into the posting tree */
00158         gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE);
00159         gdi->btree.isBuild = (buildStats != NULL);
00160 
00161         ginInsertItemPointers(gdi, items, nitem, buildStats);
00162 
00163         pfree(gdi);
00164 
00165         /* And build a new posting-tree-only result tuple */
00166         res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true);
00167         GinSetPostingTree(res, postingRoot);
00168     }
00169 
00170     return res;
00171 }
00172 
00173 /*
00174  * Build a fresh leaf tuple, either posting-list or posting-tree format
00175  * depending on whether the given items list will fit.
00176  * items[] must be in sorted order with no duplicates.
00177  *
00178  * This is basically the same logic as in addItemPointersToLeafTuple,
00179  * but working from slightly different input.
00180  */
00181 static IndexTuple
00182 buildFreshLeafTuple(GinState *ginstate,
00183                     OffsetNumber attnum, Datum key, GinNullCategory category,
00184                     ItemPointerData *items, uint32 nitem,
00185                     GinStatsData *buildStats)
00186 {
00187     IndexTuple  res;
00188 
00189     /* try to build tuple with room for all the items */
00190     res = GinFormTuple(ginstate, attnum, key, category,
00191                        items, nitem, false);
00192 
00193     if (!res)
00194     {
00195         /* posting list would be too big, build posting tree */
00196         BlockNumber postingRoot;
00197 
00198         /*
00199          * Build posting-tree-only result tuple.  We do this first so as to
00200          * fail quickly if the key is too big.
00201          */
00202         res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true);
00203 
00204         /*
00205          * Initialize posting tree with as many TIDs as will fit on the first
00206          * page.
00207          */
00208         postingRoot = createPostingTree(ginstate->index,
00209                                         items,
00210                                         Min(nitem, GinMaxLeafDataItems));
00211 
00212         /* During index build, count the newly-added data page */
00213         if (buildStats)
00214             buildStats->nDataPages++;
00215 
00216         /* Add any remaining TIDs to the posting tree */
00217         if (nitem > GinMaxLeafDataItems)
00218         {
00219             GinPostingTreeScan *gdi;
00220 
00221             gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE);
00222             gdi->btree.isBuild = (buildStats != NULL);
00223 
00224             ginInsertItemPointers(gdi,
00225                                   items + GinMaxLeafDataItems,
00226                                   nitem - GinMaxLeafDataItems,
00227                                   buildStats);
00228 
00229             pfree(gdi);
00230         }
00231 
00232         /* And save the root link in the result tuple */
00233         GinSetPostingTree(res, postingRoot);
00234     }
00235 
00236     return res;
00237 }
00238 
00239 /*
00240  * Insert one or more heap TIDs associated with the given key value.
00241  * This will either add a single key entry, or enlarge a pre-existing entry.
00242  *
00243  * During an index build, buildStats is non-null and the counters
00244  * it contains should be incremented as needed.
00245  */
00246 void
00247 ginEntryInsert(GinState *ginstate,
00248                OffsetNumber attnum, Datum key, GinNullCategory category,
00249                ItemPointerData *items, uint32 nitem,
00250                GinStatsData *buildStats)
00251 {
00252     GinBtreeData btree;
00253     GinBtreeStack *stack;
00254     IndexTuple  itup;
00255     Page        page;
00256 
00257     /* During index build, count the to-be-inserted entry */
00258     if (buildStats)
00259         buildStats->nEntries++;
00260 
00261     ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
00262 
00263     stack = ginFindLeafPage(&btree, NULL);
00264     page = BufferGetPage(stack->buffer);
00265 
00266     if (btree.findItem(&btree, stack))
00267     {
00268         /* found pre-existing entry */
00269         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
00270 
00271         if (GinIsPostingTree(itup))
00272         {
00273             /* add entries to existing posting tree */
00274             BlockNumber rootPostingTree = GinGetPostingTree(itup);
00275             GinPostingTreeScan *gdi;
00276 
00277             /* release all stack */
00278             LockBuffer(stack->buffer, GIN_UNLOCK);
00279             freeGinBtreeStack(stack);
00280 
00281             /* insert into posting tree */
00282             gdi = ginPrepareScanPostingTree(ginstate->index, rootPostingTree, FALSE);
00283             gdi->btree.isBuild = (buildStats != NULL);
00284             ginInsertItemPointers(gdi, items, nitem, buildStats);
00285             pfree(gdi);
00286 
00287             return;
00288         }
00289 
00290         /* modify an existing leaf entry */
00291         itup = addItemPointersToLeafTuple(ginstate, itup,
00292                                           items, nitem, buildStats);
00293 
00294         btree.isDelete = TRUE;
00295     }
00296     else
00297     {
00298         /* no match, so construct a new leaf entry */
00299         itup = buildFreshLeafTuple(ginstate, attnum, key, category,
00300                                    items, nitem, buildStats);
00301     }
00302 
00303     /* Insert the new or modified leaf tuple */
00304     btree.entry = itup;
00305     ginInsertValue(&btree, stack, buildStats);
00306     pfree(itup);
00307 }
00308 
00309 /*
00310  * Extract index entries for a single indexable item, and add them to the
00311  * BuildAccumulator's state.
00312  *
00313  * This function is used only during initial index creation.
00314  */
00315 static void
00316 ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
00317                        Datum value, bool isNull,
00318                        ItemPointer heapptr)
00319 {
00320     Datum      *entries;
00321     GinNullCategory *categories;
00322     int32       nentries;
00323     MemoryContext oldCtx;
00324 
00325     oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
00326     entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
00327                                 value, isNull,
00328                                 &nentries, &categories);
00329     MemoryContextSwitchTo(oldCtx);
00330 
00331     ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
00332                        entries, categories, nentries);
00333 
00334     buildstate->indtuples += nentries;
00335 
00336     MemoryContextReset(buildstate->funcCtx);
00337 }
00338 
00339 static void
00340 ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
00341                  bool *isnull, bool tupleIsAlive, void *state)
00342 {
00343     GinBuildState *buildstate = (GinBuildState *) state;
00344     MemoryContext oldCtx;
00345     int         i;
00346 
00347     oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
00348 
00349     for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
00350         ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
00351                                values[i], isnull[i],
00352                                &htup->t_self);
00353 
00354     /* If we've maxed out our available memory, dump everything to the index */
00355     if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L)
00356     {
00357         ItemPointerData *list;
00358         Datum       key;
00359         GinNullCategory category;
00360         uint32      nlist;
00361         OffsetNumber attnum;
00362 
00363         ginBeginBAScan(&buildstate->accum);
00364         while ((list = ginGetBAEntry(&buildstate->accum,
00365                                   &attnum, &key, &category, &nlist)) != NULL)
00366         {
00367             /* there could be many entries, so be willing to abort here */
00368             CHECK_FOR_INTERRUPTS();
00369             ginEntryInsert(&buildstate->ginstate, attnum, key, category,
00370                            list, nlist, &buildstate->buildStats);
00371         }
00372 
00373         MemoryContextReset(buildstate->tmpCtx);
00374         ginInitBA(&buildstate->accum);
00375     }
00376 
00377     MemoryContextSwitchTo(oldCtx);
00378 }
00379 
00380 Datum
00381 ginbuild(PG_FUNCTION_ARGS)
00382 {
00383     Relation    heap = (Relation) PG_GETARG_POINTER(0);
00384     Relation    index = (Relation) PG_GETARG_POINTER(1);
00385     IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
00386     IndexBuildResult *result;
00387     double      reltuples;
00388     GinBuildState buildstate;
00389     Buffer      RootBuffer,
00390                 MetaBuffer;
00391     ItemPointerData *list;
00392     Datum       key;
00393     GinNullCategory category;
00394     uint32      nlist;
00395     MemoryContext oldCtx;
00396     OffsetNumber attnum;
00397 
00398     if (RelationGetNumberOfBlocks(index) != 0)
00399         elog(ERROR, "index \"%s\" already contains data",
00400              RelationGetRelationName(index));
00401 
00402     initGinState(&buildstate.ginstate, index);
00403     buildstate.indtuples = 0;
00404     memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
00405 
00406     /* initialize the meta page */
00407     MetaBuffer = GinNewBuffer(index);
00408 
00409     /* initialize the root page */
00410     RootBuffer = GinNewBuffer(index);
00411 
00412     START_CRIT_SECTION();
00413     GinInitMetabuffer(MetaBuffer);
00414     MarkBufferDirty(MetaBuffer);
00415     GinInitBuffer(RootBuffer, GIN_LEAF);
00416     MarkBufferDirty(RootBuffer);
00417 
00418     if (RelationNeedsWAL(index))
00419     {
00420         XLogRecPtr  recptr;
00421         XLogRecData rdata;
00422         Page        page;
00423 
00424         rdata.buffer = InvalidBuffer;
00425         rdata.data = (char *) &(index->rd_node);
00426         rdata.len = sizeof(RelFileNode);
00427         rdata.next = NULL;
00428 
00429         recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
00430 
00431         page = BufferGetPage(RootBuffer);
00432         PageSetLSN(page, recptr);
00433 
00434         page = BufferGetPage(MetaBuffer);
00435         PageSetLSN(page, recptr);
00436     }
00437 
00438     UnlockReleaseBuffer(MetaBuffer);
00439     UnlockReleaseBuffer(RootBuffer);
00440     END_CRIT_SECTION();
00441 
00442     /* count the root as first entry page */
00443     buildstate.buildStats.nEntryPages++;
00444 
00445     /*
00446      * create a temporary memory context that is reset once for each tuple
00447      * inserted into the index
00448      */
00449     buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
00450                                               "Gin build temporary context",
00451                                               ALLOCSET_DEFAULT_MINSIZE,
00452                                               ALLOCSET_DEFAULT_INITSIZE,
00453                                               ALLOCSET_DEFAULT_MAXSIZE);
00454 
00455     buildstate.funcCtx = AllocSetContextCreate(buildstate.tmpCtx,
00456                      "Gin build temporary context for user-defined function",
00457                                                ALLOCSET_DEFAULT_MINSIZE,
00458                                                ALLOCSET_DEFAULT_INITSIZE,
00459                                                ALLOCSET_DEFAULT_MAXSIZE);
00460 
00461     buildstate.accum.ginstate = &buildstate.ginstate;
00462     ginInitBA(&buildstate.accum);
00463 
00464     /*
00465      * Do the heap scan.  We disallow sync scan here because dataPlaceToPage
00466      * prefers to receive tuples in TID order.
00467      */
00468     reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
00469                                    ginBuildCallback, (void *) &buildstate);
00470 
00471     /* dump remaining entries to the index */
00472     oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
00473     ginBeginBAScan(&buildstate.accum);
00474     while ((list = ginGetBAEntry(&buildstate.accum,
00475                                  &attnum, &key, &category, &nlist)) != NULL)
00476     {
00477         /* there could be many entries, so be willing to abort here */
00478         CHECK_FOR_INTERRUPTS();
00479         ginEntryInsert(&buildstate.ginstate, attnum, key, category,
00480                        list, nlist, &buildstate.buildStats);
00481     }
00482     MemoryContextSwitchTo(oldCtx);
00483 
00484     MemoryContextDelete(buildstate.tmpCtx);
00485 
00486     /*
00487      * Update metapage stats
00488      */
00489     buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
00490     ginUpdateStats(index, &buildstate.buildStats);
00491 
00492     /*
00493      * Return statistics
00494      */
00495     result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
00496 
00497     result->heap_tuples = reltuples;
00498     result->index_tuples = buildstate.indtuples;
00499 
00500     PG_RETURN_POINTER(result);
00501 }
00502 
00503 /*
00504  *  ginbuildempty() -- build an empty gin index in the initialization fork
00505  */
00506 Datum
00507 ginbuildempty(PG_FUNCTION_ARGS)
00508 {
00509     Relation    index = (Relation) PG_GETARG_POINTER(0);
00510     Buffer      RootBuffer,
00511                 MetaBuffer;
00512 
00513     /* An empty GIN index has two pages. */
00514     MetaBuffer =
00515         ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
00516     LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE);
00517     RootBuffer =
00518         ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
00519     LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE);
00520 
00521     /* Initialize and xlog metabuffer and root buffer. */
00522     START_CRIT_SECTION();
00523     GinInitMetabuffer(MetaBuffer);
00524     MarkBufferDirty(MetaBuffer);
00525     log_newpage_buffer(MetaBuffer);
00526     GinInitBuffer(RootBuffer, GIN_LEAF);
00527     MarkBufferDirty(RootBuffer);
00528     log_newpage_buffer(RootBuffer);
00529     END_CRIT_SECTION();
00530 
00531     /* Unlock and release the buffers. */
00532     UnlockReleaseBuffer(MetaBuffer);
00533     UnlockReleaseBuffer(RootBuffer);
00534 
00535     PG_RETURN_VOID();
00536 }
00537 
00538 /*
00539  * Insert index entries for a single indexable item during "normal"
00540  * (non-fast-update) insertion
00541  */
00542 static void
00543 ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
00544                    Datum value, bool isNull,
00545                    ItemPointer item)
00546 {
00547     Datum      *entries;
00548     GinNullCategory *categories;
00549     int32       i,
00550                 nentries;
00551 
00552     entries = ginExtractEntries(ginstate, attnum, value, isNull,
00553                                 &nentries, &categories);
00554 
00555     for (i = 0; i < nentries; i++)
00556         ginEntryInsert(ginstate, attnum, entries[i], categories[i],
00557                        item, 1, NULL);
00558 }
00559 
00560 Datum
00561 gininsert(PG_FUNCTION_ARGS)
00562 {
00563     Relation    index = (Relation) PG_GETARG_POINTER(0);
00564     Datum      *values = (Datum *) PG_GETARG_POINTER(1);
00565     bool       *isnull = (bool *) PG_GETARG_POINTER(2);
00566     ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
00567 
00568 #ifdef NOT_USED
00569     Relation    heapRel = (Relation) PG_GETARG_POINTER(4);
00570     IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5);
00571 #endif
00572     GinState    ginstate;
00573     MemoryContext oldCtx;
00574     MemoryContext insertCtx;
00575     int         i;
00576 
00577     insertCtx = AllocSetContextCreate(CurrentMemoryContext,
00578                                       "Gin insert temporary context",
00579                                       ALLOCSET_DEFAULT_MINSIZE,
00580                                       ALLOCSET_DEFAULT_INITSIZE,
00581                                       ALLOCSET_DEFAULT_MAXSIZE);
00582 
00583     oldCtx = MemoryContextSwitchTo(insertCtx);
00584 
00585     initGinState(&ginstate, index);
00586 
00587     if (GinGetUseFastUpdate(index))
00588     {
00589         GinTupleCollector collector;
00590 
00591         memset(&collector, 0, sizeof(GinTupleCollector));
00592 
00593         for (i = 0; i < ginstate.origTupdesc->natts; i++)
00594             ginHeapTupleFastCollect(&ginstate, &collector,
00595                                     (OffsetNumber) (i + 1),
00596                                     values[i], isnull[i],
00597                                     ht_ctid);
00598 
00599         ginHeapTupleFastInsert(&ginstate, &collector);
00600     }
00601     else
00602     {
00603         for (i = 0; i < ginstate.origTupdesc->natts; i++)
00604             ginHeapTupleInsert(&ginstate, (OffsetNumber) (i + 1),
00605                                values[i], isnull[i],
00606                                ht_ctid);
00607     }
00608 
00609     MemoryContextSwitchTo(oldCtx);
00610     MemoryContextDelete(insertCtx);
00611 
00612     PG_RETURN_BOOL(false);
00613 }