Header And Logo

PostgreSQL
| The world's most advanced open source database.

ginget.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * ginget.c
00004  *    fetch tuples from a GIN scan.
00005  *
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  * IDENTIFICATION
00011  *          src/backend/access/gin/ginget.c
00012  *-------------------------------------------------------------------------
00013  */
00014 
00015 #include "postgres.h"
00016 
00017 #include "access/gin_private.h"
00018 #include "access/relscan.h"
00019 #include "miscadmin.h"
00020 #include "utils/datum.h"
00021 #include "utils/memutils.h"
00022 
00023 
00024 typedef struct pendingPosition
00025 {
00026     Buffer      pendingBuffer;
00027     OffsetNumber firstOffset;
00028     OffsetNumber lastOffset;
00029     ItemPointerData item;
00030     bool       *hasMatchKey;
00031 } pendingPosition;
00032 
00033 
00034 /*
00035  * Convenience function for invoking a key's consistentFn
00036  */
00037 static bool
00038 callConsistentFn(GinState *ginstate, GinScanKey key)
00039 {
00040     /*
00041      * If we're dealing with a dummy EVERYTHING key, we don't want to call the
00042      * consistentFn; just claim it matches.
00043      */
00044     if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
00045     {
00046         key->recheckCurItem = false;
00047         return true;
00048     }
00049 
00050     /*
00051      * Initialize recheckCurItem in case the consistentFn doesn't know it
00052      * should set it.  The safe assumption in that case is to force recheck.
00053      */
00054     key->recheckCurItem = true;
00055 
00056     return DatumGetBool(FunctionCall8Coll(&ginstate->consistentFn[key->attnum - 1],
00057                                  ginstate->supportCollation[key->attnum - 1],
00058                                           PointerGetDatum(key->entryRes),
00059                                           UInt16GetDatum(key->strategy),
00060                                           key->query,
00061                                           UInt32GetDatum(key->nuserentries),
00062                                           PointerGetDatum(key->extra_data),
00063                                        PointerGetDatum(&key->recheckCurItem),
00064                                           PointerGetDatum(key->queryValues),
00065                                      PointerGetDatum(key->queryCategories)));
00066 }
00067 
00068 /*
00069  * Tries to refind previously taken ItemPointer on a posting page.
00070  */
00071 static bool
00072 findItemInPostingPage(Page page, ItemPointer item, OffsetNumber *off)
00073 {
00074     OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
00075     int         res;
00076 
00077     if (GinPageGetOpaque(page)->flags & GIN_DELETED)
00078         /* page was deleted by concurrent vacuum */
00079         return false;
00080 
00081     /*
00082      * scan page to find equal or first greater value
00083      */
00084     for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
00085     {
00086         res = ginCompareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
00087 
00088         if (res <= 0)
00089             return true;
00090     }
00091 
00092     return false;
00093 }
00094 
00095 /*
00096  * Goes to the next page if current offset is outside of bounds
00097  */
00098 static bool
00099 moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
00100 {
00101     Page        page = BufferGetPage(stack->buffer);
00102 
00103     if (stack->off > PageGetMaxOffsetNumber(page))
00104     {
00105         /*
00106          * We scanned the whole page, so we should take right page
00107          */
00108         stack->blkno = GinPageGetOpaque(page)->rightlink;
00109 
00110         if (GinPageRightMost(page))
00111             return false;       /* no more pages */
00112 
00113         LockBuffer(stack->buffer, GIN_UNLOCK);
00114         stack->buffer = ReleaseAndReadBuffer(stack->buffer,
00115                                              btree->index,
00116                                              stack->blkno);
00117         LockBuffer(stack->buffer, GIN_SHARE);
00118         stack->off = FirstOffsetNumber;
00119     }
00120 
00121     return true;
00122 }
00123 
00124 /*
00125  * Scan all pages of a posting tree and save all its heap ItemPointers
00126  * in scanEntry->matchBitmap
00127  */
00128 static void
00129 scanPostingTree(Relation index, GinScanEntry scanEntry,
00130                 BlockNumber rootPostingTree)
00131 {
00132     GinPostingTreeScan *gdi;
00133     Buffer      buffer;
00134     Page        page;
00135     BlockNumber blkno;
00136 
00137     /* Descend to the leftmost leaf page */
00138     gdi = ginPrepareScanPostingTree(index, rootPostingTree, TRUE);
00139 
00140     buffer = ginScanBeginPostingTree(gdi);
00141     IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
00142 
00143     freeGinBtreeStack(gdi->stack);
00144     pfree(gdi);
00145 
00146     /*
00147      * Loop iterates through all leaf pages of posting tree
00148      */
00149     for (;;)
00150     {
00151         page = BufferGetPage(buffer);
00152 
00153         if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 &&
00154             GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
00155         {
00156             tbm_add_tuples(scanEntry->matchBitmap,
00157                    (ItemPointer) GinDataPageGetItem(page, FirstOffsetNumber),
00158                            GinPageGetOpaque(page)->maxoff, false);
00159             scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
00160         }
00161 
00162         if (GinPageRightMost(page))
00163             break;              /* no more pages */
00164 
00165         blkno = GinPageGetOpaque(page)->rightlink;
00166         LockBuffer(buffer, GIN_UNLOCK);
00167         buffer = ReleaseAndReadBuffer(buffer, index, blkno);
00168         LockBuffer(buffer, GIN_SHARE);
00169     }
00170 
00171     UnlockReleaseBuffer(buffer);
00172 }
00173 
00174 /*
00175  * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
00176  * match the search entry.  This supports three different match modes:
00177  *
00178  * 1. Partial-match support: scan from current point until the
00179  *    comparePartialFn says we're done.
00180  * 2. SEARCH_MODE_ALL: scan from current point (which should be first
00181  *    key for the current attnum) until we hit null items or end of attnum
00182  * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
00183  *    key for the current attnum) until we hit end of attnum
00184  *
00185  * Returns true if done, false if it's necessary to restart scan from scratch
00186  */
00187 static bool
00188 collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
00189                    GinScanEntry scanEntry)
00190 {
00191     OffsetNumber attnum;
00192     Form_pg_attribute attr;
00193 
00194     /* Initialize empty bitmap result */
00195     scanEntry->matchBitmap = tbm_create(work_mem * 1024L);
00196 
00197     /* Null query cannot partial-match anything */
00198     if (scanEntry->isPartialMatch &&
00199         scanEntry->queryCategory != GIN_CAT_NORM_KEY)
00200         return true;
00201 
00202     /* Locate tupdesc entry for key column (for attbyval/attlen data) */
00203     attnum = scanEntry->attnum;
00204     attr = btree->ginstate->origTupdesc->attrs[attnum - 1];
00205 
00206     for (;;)
00207     {
00208         Page        page;
00209         IndexTuple  itup;
00210         Datum       idatum;
00211         GinNullCategory icategory;
00212 
00213         /*
00214          * stack->off points to the interested entry, buffer is already locked
00215          */
00216         if (moveRightIfItNeeded(btree, stack) == false)
00217             return true;
00218 
00219         page = BufferGetPage(stack->buffer);
00220         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
00221 
00222         /*
00223          * If tuple stores another attribute then stop scan
00224          */
00225         if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
00226             return true;
00227 
00228         /* Safe to fetch attribute value */
00229         idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
00230 
00231         /*
00232          * Check for appropriate scan stop conditions
00233          */
00234         if (scanEntry->isPartialMatch)
00235         {
00236             int32       cmp;
00237 
00238             /*
00239              * In partial match, stop scan at any null (including
00240              * placeholders); partial matches never match nulls
00241              */
00242             if (icategory != GIN_CAT_NORM_KEY)
00243                 return true;
00244 
00245             /*----------
00246              * Check of partial match.
00247              * case cmp == 0 => match
00248              * case cmp > 0 => not match and finish scan
00249              * case cmp < 0 => not match and continue scan
00250              *----------
00251              */
00252             cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
00253                                btree->ginstate->supportCollation[attnum - 1],
00254                                                   scanEntry->queryKey,
00255                                                   idatum,
00256                                          UInt16GetDatum(scanEntry->strategy),
00257                                     PointerGetDatum(scanEntry->extra_data)));
00258 
00259             if (cmp > 0)
00260                 return true;
00261             else if (cmp < 0)
00262             {
00263                 stack->off++;
00264                 continue;
00265             }
00266         }
00267         else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
00268         {
00269             /*
00270              * In ALL mode, we are not interested in null items, so we can
00271              * stop if we get to a null-item placeholder (which will be the
00272              * last entry for a given attnum).  We do want to include NULL_KEY
00273              * and EMPTY_ITEM entries, though.
00274              */
00275             if (icategory == GIN_CAT_NULL_ITEM)
00276                 return true;
00277         }
00278 
00279         /*
00280          * OK, we want to return the TIDs listed in this entry.
00281          */
00282         if (GinIsPostingTree(itup))
00283         {
00284             BlockNumber rootPostingTree = GinGetPostingTree(itup);
00285 
00286             /*
00287              * We should unlock current page (but not unpin) during tree scan
00288              * to prevent deadlock with vacuum processes.
00289              *
00290              * We save current entry value (idatum) to be able to re-find our
00291              * tuple after re-locking
00292              */
00293             if (icategory == GIN_CAT_NORM_KEY)
00294                 idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
00295 
00296             LockBuffer(stack->buffer, GIN_UNLOCK);
00297 
00298             /* Collect all the TIDs in this entry's posting tree */
00299             scanPostingTree(btree->index, scanEntry, rootPostingTree);
00300 
00301             /*
00302              * We lock again the entry page and while it was unlocked insert
00303              * might have occurred, so we need to re-find our position.
00304              */
00305             LockBuffer(stack->buffer, GIN_SHARE);
00306             page = BufferGetPage(stack->buffer);
00307             if (!GinPageIsLeaf(page))
00308             {
00309                 /*
00310                  * Root page becomes non-leaf while we unlock it. We will
00311                  * start again, this situation doesn't occur often - root can
00312                  * became a non-leaf only once per life of index.
00313                  */
00314                 return false;
00315             }
00316 
00317             /* Search forward to re-find idatum */
00318             for (;;)
00319             {
00320                 Datum       newDatum;
00321                 GinNullCategory newCategory;
00322 
00323                 if (moveRightIfItNeeded(btree, stack) == false)
00324                     elog(ERROR, "lost saved point in index");   /* must not happen !!! */
00325 
00326                 page = BufferGetPage(stack->buffer);
00327                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
00328 
00329                 if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
00330                     elog(ERROR, "lost saved point in index");   /* must not happen !!! */
00331                 newDatum = gintuple_get_key(btree->ginstate, itup,
00332                                             &newCategory);
00333 
00334                 if (ginCompareEntries(btree->ginstate, attnum,
00335                                       newDatum, newCategory,
00336                                       idatum, icategory) == 0)
00337                     break;      /* Found! */
00338 
00339                 stack->off++;
00340             }
00341 
00342             if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
00343                 pfree(DatumGetPointer(idatum));
00344         }
00345         else
00346         {
00347             tbm_add_tuples(scanEntry->matchBitmap,
00348                            GinGetPosting(itup), GinGetNPosting(itup), false);
00349             scanEntry->predictNumberResult += GinGetNPosting(itup);
00350         }
00351 
00352         /*
00353          * Done with this entry, go to the next
00354          */
00355         stack->off++;
00356     }
00357 }
00358 
00359 /*
00360  * Start* functions setup beginning state of searches: finds correct buffer and pins it.
00361  */
00362 static void
00363 startScanEntry(GinState *ginstate, GinScanEntry entry)
00364 {
00365     GinBtreeData btreeEntry;
00366     GinBtreeStack *stackEntry;
00367     Page        page;
00368     bool        needUnlock;
00369 
00370 restartScanEntry:
00371     entry->buffer = InvalidBuffer;
00372     ItemPointerSetMin(&entry->curItem);
00373     entry->offset = InvalidOffsetNumber;
00374     entry->list = NULL;
00375     entry->nlist = 0;
00376     entry->matchBitmap = NULL;
00377     entry->matchResult = NULL;
00378     entry->reduceResult = FALSE;
00379     entry->predictNumberResult = 0;
00380 
00381     /*
00382      * we should find entry, and begin scan of posting tree or just store
00383      * posting list in memory
00384      */
00385     ginPrepareEntryScan(&btreeEntry, entry->attnum,
00386                         entry->queryKey, entry->queryCategory,
00387                         ginstate);
00388     btreeEntry.searchMode = TRUE;
00389     stackEntry = ginFindLeafPage(&btreeEntry, NULL);
00390     page = BufferGetPage(stackEntry->buffer);
00391     needUnlock = TRUE;
00392 
00393     entry->isFinished = TRUE;
00394 
00395     if (entry->isPartialMatch ||
00396         entry->queryCategory == GIN_CAT_EMPTY_QUERY)
00397     {
00398         /*
00399          * btreeEntry.findItem locates the first item >= given search key.
00400          * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
00401          * because of the way the GIN_CAT_EMPTY_QUERY category code is
00402          * assigned.)  We scan forward from there and collect all TIDs needed
00403          * for the entry type.
00404          */
00405         btreeEntry.findItem(&btreeEntry, stackEntry);
00406         if (collectMatchBitmap(&btreeEntry, stackEntry, entry) == false)
00407         {
00408             /*
00409              * GIN tree was seriously restructured, so we will cleanup all
00410              * found data and rescan. See comments near 'return false' in
00411              * collectMatchBitmap()
00412              */
00413             if (entry->matchBitmap)
00414             {
00415                 if (entry->matchIterator)
00416                     tbm_end_iterate(entry->matchIterator);
00417                 entry->matchIterator = NULL;
00418                 tbm_free(entry->matchBitmap);
00419                 entry->matchBitmap = NULL;
00420             }
00421             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
00422             freeGinBtreeStack(stackEntry);
00423             goto restartScanEntry;
00424         }
00425 
00426         if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
00427         {
00428             entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
00429             entry->isFinished = FALSE;
00430         }
00431     }
00432     else if (btreeEntry.findItem(&btreeEntry, stackEntry))
00433     {
00434         IndexTuple  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
00435 
00436         if (GinIsPostingTree(itup))
00437         {
00438             BlockNumber rootPostingTree = GinGetPostingTree(itup);
00439             GinPostingTreeScan *gdi;
00440             Page        page;
00441 
00442             /*
00443              * We should unlock entry page before touching posting tree to
00444              * prevent deadlocks with vacuum processes. Because entry is never
00445              * deleted from page and posting tree is never reduced to the
00446              * posting list, we can unlock page after getting BlockNumber of
00447              * root of posting tree.
00448              */
00449             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
00450             needUnlock = FALSE;
00451             gdi = ginPrepareScanPostingTree(ginstate->index, rootPostingTree, TRUE);
00452 
00453             entry->buffer = ginScanBeginPostingTree(gdi);
00454 
00455             /*
00456              * We keep buffer pinned because we need to prevent deletion of
00457              * page during scan. See GIN's vacuum implementation. RefCount is
00458              * increased to keep buffer pinned after freeGinBtreeStack() call.
00459              */
00460             IncrBufferRefCount(entry->buffer);
00461 
00462             page = BufferGetPage(entry->buffer);
00463             entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
00464 
00465             /*
00466              * Keep page content in memory to prevent durable page locking
00467              */
00468             entry->list = (ItemPointerData *) palloc(BLCKSZ);
00469             entry->nlist = GinPageGetOpaque(page)->maxoff;
00470             memcpy(entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
00471                    GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
00472 
00473             LockBuffer(entry->buffer, GIN_UNLOCK);
00474             freeGinBtreeStack(gdi->stack);
00475             pfree(gdi);
00476             entry->isFinished = FALSE;
00477         }
00478         else if (GinGetNPosting(itup) > 0)
00479         {
00480             entry->nlist = GinGetNPosting(itup);
00481             entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
00482             memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
00483             entry->isFinished = FALSE;
00484         }
00485     }
00486 
00487     if (needUnlock)
00488         LockBuffer(stackEntry->buffer, GIN_UNLOCK);
00489     freeGinBtreeStack(stackEntry);
00490 }
00491 
00492 static void
00493 startScanKey(GinState *ginstate, GinScanKey key)
00494 {
00495     ItemPointerSetMin(&key->curItem);
00496     key->curItemMatches = false;
00497     key->recheckCurItem = false;
00498     key->isFinished = false;
00499 }
00500 
00501 static void
00502 startScan(IndexScanDesc scan)
00503 {
00504     GinScanOpaque so = (GinScanOpaque) scan->opaque;
00505     GinState   *ginstate = &so->ginstate;
00506     uint32      i;
00507 
00508     for (i = 0; i < so->totalentries; i++)
00509         startScanEntry(ginstate, so->entries[i]);
00510 
00511     if (GinFuzzySearchLimit > 0)
00512     {
00513         /*
00514          * If all of keys more than threshold we will try to reduce result, we
00515          * hope (and only hope, for intersection operation of array our
00516          * supposition isn't true), that total result will not more than
00517          * minimal predictNumberResult.
00518          */
00519 
00520         for (i = 0; i < so->totalentries; i++)
00521             if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
00522                 return;
00523 
00524         for (i = 0; i < so->totalentries; i++)
00525             if (so->entries[i]->predictNumberResult > so->totalentries * GinFuzzySearchLimit)
00526             {
00527                 so->entries[i]->predictNumberResult /= so->totalentries;
00528                 so->entries[i]->reduceResult = TRUE;
00529             }
00530     }
00531 
00532     for (i = 0; i < so->nkeys; i++)
00533         startScanKey(ginstate, so->keys + i);
00534 }
00535 
00536 /*
00537  * Gets next ItemPointer from PostingTree. Note, that we copy
00538  * page into GinScanEntry->list array and unlock page, but keep it pinned
00539  * to prevent interference with vacuum
00540  */
00541 static void
00542 entryGetNextItem(GinState *ginstate, GinScanEntry entry)
00543 {
00544     Page        page;
00545     BlockNumber blkno;
00546 
00547     for (;;)
00548     {
00549         if (entry->offset < entry->nlist)
00550         {
00551             entry->curItem = entry->list[entry->offset++];
00552             return;
00553         }
00554 
00555         LockBuffer(entry->buffer, GIN_SHARE);
00556         page = BufferGetPage(entry->buffer);
00557         for (;;)
00558         {
00559             /*
00560              * It's needed to go by right link. During that we should refind
00561              * first ItemPointer greater that stored
00562              */
00563 
00564             blkno = GinPageGetOpaque(page)->rightlink;
00565 
00566             LockBuffer(entry->buffer, GIN_UNLOCK);
00567             if (blkno == InvalidBlockNumber)
00568             {
00569                 ReleaseBuffer(entry->buffer);
00570                 ItemPointerSetInvalid(&entry->curItem);
00571                 entry->buffer = InvalidBuffer;
00572                 entry->isFinished = TRUE;
00573                 return;
00574             }
00575 
00576             entry->buffer = ReleaseAndReadBuffer(entry->buffer,
00577                                                  ginstate->index,
00578                                                  blkno);
00579             LockBuffer(entry->buffer, GIN_SHARE);
00580             page = BufferGetPage(entry->buffer);
00581 
00582             entry->offset = InvalidOffsetNumber;
00583             if (!ItemPointerIsValid(&entry->curItem) ||
00584                 findItemInPostingPage(page, &entry->curItem, &entry->offset))
00585             {
00586                 /*
00587                  * Found position equal to or greater than stored
00588                  */
00589                 entry->nlist = GinPageGetOpaque(page)->maxoff;
00590                 memcpy(entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
00591                    GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
00592 
00593                 LockBuffer(entry->buffer, GIN_UNLOCK);
00594 
00595                 if (!ItemPointerIsValid(&entry->curItem) ||
00596                     ginCompareItemPointers(&entry->curItem,
00597                                        entry->list + entry->offset - 1) == 0)
00598                 {
00599                     /*
00600                      * First pages are deleted or empty, or we found exact
00601                      * position, so break inner loop and continue outer one.
00602                      */
00603                     break;
00604                 }
00605 
00606                 /*
00607                  * Find greater than entry->curItem position, store it.
00608                  */
00609                 entry->curItem = entry->list[entry->offset - 1];
00610 
00611                 return;
00612             }
00613         }
00614     }
00615 }
00616 
00617 #define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
00618 #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
00619 
00620 /*
00621  * Sets entry->curItem to next heap item pointer for one entry of one scan key,
00622  * or sets entry->isFinished to TRUE if there are no more.
00623  *
00624  * Item pointers must be returned in ascending order.
00625  *
00626  * Note: this can return a "lossy page" item pointer, indicating that the
00627  * entry potentially matches all items on that heap page.  However, it is
00628  * not allowed to return both a lossy page pointer and exact (regular)
00629  * item pointers for the same page.  (Doing so would break the key-combination
00630  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
00631  * current implementation this is guaranteed by the behavior of tidbitmaps.
00632  */
00633 static void
00634 entryGetItem(GinState *ginstate, GinScanEntry entry)
00635 {
00636     Assert(!entry->isFinished);
00637 
00638     if (entry->matchBitmap)
00639     {
00640         do
00641         {
00642             if (entry->matchResult == NULL ||
00643                 entry->offset >= entry->matchResult->ntuples)
00644             {
00645                 entry->matchResult = tbm_iterate(entry->matchIterator);
00646 
00647                 if (entry->matchResult == NULL)
00648                 {
00649                     ItemPointerSetInvalid(&entry->curItem);
00650                     tbm_end_iterate(entry->matchIterator);
00651                     entry->matchIterator = NULL;
00652                     entry->isFinished = TRUE;
00653                     break;
00654                 }
00655 
00656                 /*
00657                  * Reset counter to the beginning of entry->matchResult. Note:
00658                  * entry->offset is still greater than matchResult->ntuples if
00659                  * matchResult is lossy.  So, on next call we will get next
00660                  * result from TIDBitmap.
00661                  */
00662                 entry->offset = 0;
00663             }
00664 
00665             if (entry->matchResult->ntuples < 0)
00666             {
00667                 /*
00668                  * lossy result, so we need to check the whole page
00669                  */
00670                 ItemPointerSetLossyPage(&entry->curItem,
00671                                         entry->matchResult->blockno);
00672 
00673                 /*
00674                  * We might as well fall out of the loop; we could not
00675                  * estimate number of results on this page to support correct
00676                  * reducing of result even if it's enabled
00677                  */
00678                 break;
00679             }
00680 
00681             ItemPointerSet(&entry->curItem,
00682                            entry->matchResult->blockno,
00683                            entry->matchResult->offsets[entry->offset]);
00684             entry->offset++;
00685         } while (entry->reduceResult == TRUE && dropItem(entry));
00686     }
00687     else if (!BufferIsValid(entry->buffer))
00688     {
00689         entry->offset++;
00690         if (entry->offset <= entry->nlist)
00691             entry->curItem = entry->list[entry->offset - 1];
00692         else
00693         {
00694             ItemPointerSetInvalid(&entry->curItem);
00695             entry->isFinished = TRUE;
00696         }
00697     }
00698     else
00699     {
00700         do
00701         {
00702             entryGetNextItem(ginstate, entry);
00703         } while (entry->isFinished == FALSE &&
00704                  entry->reduceResult == TRUE &&
00705                  dropItem(entry));
00706     }
00707 }
00708 
00709 /*
00710  * Identify the "current" item among the input entry streams for this scan key,
00711  * and test whether it passes the scan key qual condition.
00712  *
00713  * The current item is the smallest curItem among the inputs.  key->curItem
00714  * is set to that value.  key->curItemMatches is set to indicate whether that
00715  * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
00716  * iff recheck is needed for this item pointer (including the case where the
00717  * item pointer is a lossy page pointer).
00718  *
00719  * If all entry streams are exhausted, sets key->isFinished to TRUE.
00720  *
00721  * Item pointers must be returned in ascending order.
00722  *
00723  * Note: this can return a "lossy page" item pointer, indicating that the
00724  * key potentially matches all items on that heap page.  However, it is
00725  * not allowed to return both a lossy page pointer and exact (regular)
00726  * item pointers for the same page.  (Doing so would break the key-combination
00727  * logic in scanGetItem.)
00728  */
00729 static void
00730 keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
00731 {
00732     ItemPointerData minItem;
00733     ItemPointerData curPageLossy;
00734     uint32      i;
00735     uint32      lossyEntry;
00736     bool        haveLossyEntry;
00737     GinScanEntry entry;
00738     bool        res;
00739     MemoryContext oldCtx;
00740 
00741     Assert(!key->isFinished);
00742 
00743     /*
00744      * Find the minimum of the active entry curItems.
00745      *
00746      * Note: a lossy-page entry is encoded by a ItemPointer with max value for
00747      * offset (0xffff), so that it will sort after any exact entries for the
00748      * same page.  So we'll prefer to return exact pointers not lossy
00749      * pointers, which is good.
00750      */
00751     ItemPointerSetMax(&minItem);
00752 
00753     for (i = 0; i < key->nentries; i++)
00754     {
00755         entry = key->scanEntry[i];
00756         if (entry->isFinished == FALSE &&
00757             ginCompareItemPointers(&entry->curItem, &minItem) < 0)
00758             minItem = entry->curItem;
00759     }
00760 
00761     if (ItemPointerIsMax(&minItem))
00762     {
00763         /* all entries are finished */
00764         key->isFinished = TRUE;
00765         return;
00766     }
00767 
00768     /*
00769      * We might have already tested this item; if so, no need to repeat work.
00770      * (Note: the ">" case can happen, if minItem is exact but we previously
00771      * had to set curItem to a lossy-page pointer.)
00772      */
00773     if (ginCompareItemPointers(&key->curItem, &minItem) >= 0)
00774         return;
00775 
00776     /*
00777      * OK, advance key->curItem and perform consistentFn test.
00778      */
00779     key->curItem = minItem;
00780 
00781     /*
00782      * Lossy-page entries pose a problem, since we don't know the correct
00783      * entryRes state to pass to the consistentFn, and we also don't know what
00784      * its combining logic will be (could be AND, OR, or even NOT). If the
00785      * logic is OR then the consistentFn might succeed for all items in the
00786      * lossy page even when none of the other entries match.
00787      *
00788      * If we have a single lossy-page entry then we check to see if the
00789      * consistentFn will succeed with only that entry TRUE.  If so, we return
00790      * a lossy-page pointer to indicate that the whole heap page must be
00791      * checked.  (On subsequent calls, we'll do nothing until minItem is past
00792      * the page altogether, thus ensuring that we never return both regular
00793      * and lossy pointers for the same page.)
00794      *
00795      * This idea could be generalized to more than one lossy-page entry, but
00796      * ideally lossy-page entries should be infrequent so it would seldom be
00797      * the case that we have more than one at once.  So it doesn't seem worth
00798      * the extra complexity to optimize that case. If we do find more than
00799      * one, we just punt and return a lossy-page pointer always.
00800      *
00801      * Note that only lossy-page entries pointing to the current item's page
00802      * should trigger this processing; we might have future lossy pages in the
00803      * entry array, but they aren't relevant yet.
00804      */
00805     ItemPointerSetLossyPage(&curPageLossy,
00806                             GinItemPointerGetBlockNumber(&key->curItem));
00807 
00808     lossyEntry = 0;
00809     haveLossyEntry = false;
00810     for (i = 0; i < key->nentries; i++)
00811     {
00812         entry = key->scanEntry[i];
00813         if (entry->isFinished == FALSE &&
00814             ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
00815         {
00816             if (haveLossyEntry)
00817             {
00818                 /* Multiple lossy entries, punt */
00819                 key->curItem = curPageLossy;
00820                 key->curItemMatches = true;
00821                 key->recheckCurItem = true;
00822                 return;
00823             }
00824             lossyEntry = i;
00825             haveLossyEntry = true;
00826         }
00827     }
00828 
00829     /* prepare for calling consistentFn in temp context */
00830     oldCtx = MemoryContextSwitchTo(tempCtx);
00831 
00832     if (haveLossyEntry)
00833     {
00834         /* Single lossy-page entry, so see if whole page matches */
00835         memset(key->entryRes, FALSE, key->nentries);
00836         key->entryRes[lossyEntry] = TRUE;
00837 
00838         if (callConsistentFn(ginstate, key))
00839         {
00840             /* Yes, so clean up ... */
00841             MemoryContextSwitchTo(oldCtx);
00842             MemoryContextReset(tempCtx);
00843 
00844             /* and return lossy pointer for whole page */
00845             key->curItem = curPageLossy;
00846             key->curItemMatches = true;
00847             key->recheckCurItem = true;
00848             return;
00849         }
00850     }
00851 
00852     /*
00853      * At this point we know that we don't need to return a lossy whole-page
00854      * pointer, but we might have matches for individual exact item pointers,
00855      * possibly in combination with a lossy pointer.  Our strategy if there's
00856      * a lossy pointer is to try the consistentFn both ways and return a hit
00857      * if it accepts either one (forcing the hit to be marked lossy so it will
00858      * be rechecked).  An exception is that we don't need to try it both ways
00859      * if the lossy pointer is in a "hidden" entry, because the consistentFn's
00860      * result can't depend on that.
00861      *
00862      * Prepare entryRes array to be passed to consistentFn.
00863      */
00864     for (i = 0; i < key->nentries; i++)
00865     {
00866         entry = key->scanEntry[i];
00867         if (entry->isFinished == FALSE &&
00868             ginCompareItemPointers(&entry->curItem, &key->curItem) == 0)
00869             key->entryRes[i] = TRUE;
00870         else
00871             key->entryRes[i] = FALSE;
00872     }
00873     if (haveLossyEntry)
00874         key->entryRes[lossyEntry] = TRUE;
00875 
00876     res = callConsistentFn(ginstate, key);
00877 
00878     if (!res && haveLossyEntry && lossyEntry < key->nuserentries)
00879     {
00880         /* try the other way for the lossy item */
00881         key->entryRes[lossyEntry] = FALSE;
00882 
00883         res = callConsistentFn(ginstate, key);
00884     }
00885 
00886     key->curItemMatches = res;
00887     /* If we matched a lossy entry, force recheckCurItem = true */
00888     if (haveLossyEntry)
00889         key->recheckCurItem = true;
00890 
00891     /* clean up after consistentFn calls */
00892     MemoryContextSwitchTo(oldCtx);
00893     MemoryContextReset(tempCtx);
00894 }
00895 
00896 /*
00897  * Get next heap item pointer (after advancePast) from scan.
00898  * Returns true if anything found.
00899  * On success, *item and *recheck are set.
00900  *
00901  * Note: this is very nearly the same logic as in keyGetItem(), except
00902  * that we know the keys are to be combined with AND logic, whereas in
00903  * keyGetItem() the combination logic is known only to the consistentFn.
00904  */
00905 static bool
00906 scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
00907             ItemPointerData *item, bool *recheck)
00908 {
00909     GinScanOpaque so = (GinScanOpaque) scan->opaque;
00910     GinState   *ginstate = &so->ginstate;
00911     ItemPointerData myAdvancePast = *advancePast;
00912     uint32      i;
00913     bool        allFinished;
00914     bool        match;
00915 
00916     for (;;)
00917     {
00918         /*
00919          * Advance any entries that are <= myAdvancePast.  In particular,
00920          * since entry->curItem was initialized with ItemPointerSetMin, this
00921          * ensures we fetch the first item for each entry on the first call.
00922          */
00923         allFinished = TRUE;
00924 
00925         for (i = 0; i < so->totalentries; i++)
00926         {
00927             GinScanEntry entry = so->entries[i];
00928 
00929             while (entry->isFinished == FALSE &&
00930                    ginCompareItemPointers(&entry->curItem,
00931                                           &myAdvancePast) <= 0)
00932                 entryGetItem(ginstate, entry);
00933 
00934             if (entry->isFinished == FALSE)
00935                 allFinished = FALSE;
00936         }
00937 
00938         if (allFinished)
00939         {
00940             /* all entries exhausted, so we're done */
00941             return false;
00942         }
00943 
00944         /*
00945          * Perform the consistentFn test for each scan key.  If any key
00946          * reports isFinished, meaning its subset of the entries is exhausted,
00947          * we can stop.  Otherwise, set *item to the minimum of the key
00948          * curItems.
00949          */
00950         ItemPointerSetMax(item);
00951 
00952         for (i = 0; i < so->nkeys; i++)
00953         {
00954             GinScanKey  key = so->keys + i;
00955 
00956             keyGetItem(&so->ginstate, so->tempCtx, key);
00957 
00958             if (key->isFinished)
00959                 return false;   /* finished one of keys */
00960 
00961             if (ginCompareItemPointers(&key->curItem, item) < 0)
00962                 *item = key->curItem;
00963         }
00964 
00965         Assert(!ItemPointerIsMax(item));
00966 
00967         /*----------
00968          * Now *item contains first ItemPointer after previous result.
00969          *
00970          * The item is a valid hit only if all the keys succeeded for either
00971          * that exact TID, or a lossy reference to the same page.
00972          *
00973          * This logic works only if a keyGetItem stream can never contain both
00974          * exact and lossy pointers for the same page.  Else we could have a
00975          * case like
00976          *
00977          *      stream 1        stream 2
00978          *      ...             ...
00979          *      42/6            42/7
00980          *      50/1            42/0xffff
00981          *      ...             ...
00982          *
00983          * We would conclude that 42/6 is not a match and advance stream 1,
00984          * thus never detecting the match to the lossy pointer in stream 2.
00985          * (keyGetItem has a similar problem versus entryGetItem.)
00986          *----------
00987          */
00988         match = true;
00989         for (i = 0; i < so->nkeys; i++)
00990         {
00991             GinScanKey  key = so->keys + i;
00992 
00993             if (key->curItemMatches)
00994             {
00995                 if (ginCompareItemPointers(item, &key->curItem) == 0)
00996                     continue;
00997                 if (ItemPointerIsLossyPage(&key->curItem) &&
00998                     GinItemPointerGetBlockNumber(&key->curItem) ==
00999                     GinItemPointerGetBlockNumber(item))
01000                     continue;
01001             }
01002             match = false;
01003             break;
01004         }
01005 
01006         if (match)
01007             break;
01008 
01009         /*
01010          * No hit.  Update myAdvancePast to this TID, so that on the next pass
01011          * we'll move to the next possible entry.
01012          */
01013         myAdvancePast = *item;
01014     }
01015 
01016     /*
01017      * We must return recheck = true if any of the keys are marked recheck.
01018      */
01019     *recheck = false;
01020     for (i = 0; i < so->nkeys; i++)
01021     {
01022         GinScanKey  key = so->keys + i;
01023 
01024         if (key->recheckCurItem)
01025         {
01026             *recheck = true;
01027             break;
01028         }
01029     }
01030 
01031     return TRUE;
01032 }
01033 
01034 
01035 /*
01036  * Functions for scanning the pending list
01037  */
01038 
01039 
01040 /*
01041  * Get ItemPointer of next heap row to be checked from pending list.
01042  * Returns false if there are no more. On pages with several heap rows
01043  * it returns each row separately, on page with part of heap row returns
01044  * per page data.  pos->firstOffset and pos->lastOffset are set to identify
01045  * the range of pending-list tuples belonging to this heap row.
01046  *
01047  * The pendingBuffer is presumed pinned and share-locked on entry, and is
01048  * pinned and share-locked on success exit.  On failure exit it's released.
01049  */
01050 static bool
01051 scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
01052 {
01053     OffsetNumber maxoff;
01054     Page        page;
01055     IndexTuple  itup;
01056 
01057     ItemPointerSetInvalid(&pos->item);
01058     for (;;)
01059     {
01060         page = BufferGetPage(pos->pendingBuffer);
01061 
01062         maxoff = PageGetMaxOffsetNumber(page);
01063         if (pos->firstOffset > maxoff)
01064         {
01065             BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
01066 
01067             if (blkno == InvalidBlockNumber)
01068             {
01069                 UnlockReleaseBuffer(pos->pendingBuffer);
01070                 pos->pendingBuffer = InvalidBuffer;
01071 
01072                 return false;
01073             }
01074             else
01075             {
01076                 /*
01077                  * Here we must prevent deletion of next page by insertcleanup
01078                  * process, which may be trying to obtain exclusive lock on
01079                  * current page.  So, we lock next page before releasing the
01080                  * current one
01081                  */
01082                 Buffer      tmpbuf = ReadBuffer(scan->indexRelation, blkno);
01083 
01084                 LockBuffer(tmpbuf, GIN_SHARE);
01085                 UnlockReleaseBuffer(pos->pendingBuffer);
01086 
01087                 pos->pendingBuffer = tmpbuf;
01088                 pos->firstOffset = FirstOffsetNumber;
01089             }
01090         }
01091         else
01092         {
01093             itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
01094             pos->item = itup->t_tid;
01095             if (GinPageHasFullRow(page))
01096             {
01097                 /*
01098                  * find itempointer to the next row
01099                  */
01100                 for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
01101                 {
01102                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
01103                     if (!ItemPointerEquals(&pos->item, &itup->t_tid))
01104                         break;
01105                 }
01106             }
01107             else
01108             {
01109                 /*
01110                  * All itempointers are the same on this page
01111                  */
01112                 pos->lastOffset = maxoff + 1;
01113             }
01114 
01115             /*
01116              * Now pos->firstOffset points to the first tuple of current heap
01117              * row, pos->lastOffset points to the first tuple of next heap row
01118              * (or to the end of page)
01119              */
01120             break;
01121         }
01122     }
01123 
01124     return true;
01125 }
01126 
01127 /*
01128  * Scan pending-list page from current tuple (off) up till the first of:
01129  * - match is found (then returns true)
01130  * - no later match is possible
01131  * - tuple's attribute number is not equal to entry's attrnum
01132  * - reach end of page
01133  *
01134  * datum[]/category[]/datumExtracted[] arrays are used to cache the results
01135  * of gintuple_get_key() on the current page.
01136  */
01137 static bool
01138 matchPartialInPendingList(GinState *ginstate, Page page,
01139                           OffsetNumber off, OffsetNumber maxoff,
01140                           GinScanEntry entry,
01141                           Datum *datum, GinNullCategory *category,
01142                           bool *datumExtracted)
01143 {
01144     IndexTuple  itup;
01145     int32       cmp;
01146 
01147     /* Partial match to a null is not possible */
01148     if (entry->queryCategory != GIN_CAT_NORM_KEY)
01149         return false;
01150 
01151     while (off < maxoff)
01152     {
01153         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
01154 
01155         if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
01156             return false;
01157 
01158         if (datumExtracted[off - 1] == false)
01159         {
01160             datum[off - 1] = gintuple_get_key(ginstate, itup,
01161                                               &category[off - 1]);
01162             datumExtracted[off - 1] = true;
01163         }
01164 
01165         /* Once we hit nulls, no further match is possible */
01166         if (category[off - 1] != GIN_CAT_NORM_KEY)
01167             return false;
01168 
01169         /*----------
01170          * Check partial match.
01171          * case cmp == 0 => match
01172          * case cmp > 0 => not match and end scan (no later match possible)
01173          * case cmp < 0 => not match and continue scan
01174          *----------
01175          */
01176         cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
01177                                ginstate->supportCollation[entry->attnum - 1],
01178                                               entry->queryKey,
01179                                               datum[off - 1],
01180                                               UInt16GetDatum(entry->strategy),
01181                                         PointerGetDatum(entry->extra_data)));
01182         if (cmp == 0)
01183             return true;
01184         else if (cmp > 0)
01185             return false;
01186 
01187         off++;
01188     }
01189 
01190     return false;
01191 }
01192 
01193 /*
01194  * Set up the entryRes array for each key by looking at
01195  * every entry for current heap row in pending list.
01196  *
01197  * Returns true if each scan key has at least one entryRes match.
01198  * This corresponds to the situations where the normal index search will
01199  * try to apply the key's consistentFn.  (A tuple not meeting that requirement
01200  * cannot be returned by the normal search since no entry stream will
01201  * source its TID.)
01202  *
01203  * The pendingBuffer is presumed pinned and share-locked on entry.
01204  */
01205 static bool
01206 collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
01207 {
01208     GinScanOpaque so = (GinScanOpaque) scan->opaque;
01209     OffsetNumber attrnum;
01210     Page        page;
01211     IndexTuple  itup;
01212     int         i,
01213                 j;
01214 
01215     /*
01216      * Reset all entryRes and hasMatchKey flags
01217      */
01218     for (i = 0; i < so->nkeys; i++)
01219     {
01220         GinScanKey  key = so->keys + i;
01221 
01222         memset(key->entryRes, FALSE, key->nentries);
01223     }
01224     memset(pos->hasMatchKey, FALSE, so->nkeys);
01225 
01226     /*
01227      * Outer loop iterates over multiple pending-list pages when a single heap
01228      * row has entries spanning those pages.
01229      */
01230     for (;;)
01231     {
01232         Datum       datum[BLCKSZ / sizeof(IndexTupleData)];
01233         GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
01234         bool        datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
01235 
01236         Assert(pos->lastOffset > pos->firstOffset);
01237         memset(datumExtracted + pos->firstOffset - 1, 0,
01238                sizeof(bool) * (pos->lastOffset - pos->firstOffset));
01239 
01240         page = BufferGetPage(pos->pendingBuffer);
01241 
01242         for (i = 0; i < so->nkeys; i++)
01243         {
01244             GinScanKey  key = so->keys + i;
01245 
01246             for (j = 0; j < key->nentries; j++)
01247             {
01248                 GinScanEntry entry = key->scanEntry[j];
01249                 OffsetNumber StopLow = pos->firstOffset,
01250                             StopHigh = pos->lastOffset,
01251                             StopMiddle;
01252 
01253                 /* If already matched on earlier page, do no extra work */
01254                 if (key->entryRes[j])
01255                     continue;
01256 
01257                 /*
01258                  * Interesting tuples are from pos->firstOffset to
01259                  * pos->lastOffset and they are ordered by (attnum, Datum) as
01260                  * it's done in entry tree.  So we can use binary search to
01261                  * avoid linear scanning.
01262                  */
01263                 while (StopLow < StopHigh)
01264                 {
01265                     int         res;
01266 
01267                     StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
01268 
01269                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
01270 
01271                     attrnum = gintuple_get_attrnum(&so->ginstate, itup);
01272 
01273                     if (key->attnum < attrnum)
01274                     {
01275                         StopHigh = StopMiddle;
01276                         continue;
01277                     }
01278                     if (key->attnum > attrnum)
01279                     {
01280                         StopLow = StopMiddle + 1;
01281                         continue;
01282                     }
01283 
01284                     if (datumExtracted[StopMiddle - 1] == false)
01285                     {
01286                         datum[StopMiddle - 1] =
01287                             gintuple_get_key(&so->ginstate, itup,
01288                                              &category[StopMiddle - 1]);
01289                         datumExtracted[StopMiddle - 1] = true;
01290                     }
01291 
01292                     if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
01293                     {
01294                         /* special behavior depending on searchMode */
01295                         if (entry->searchMode == GIN_SEARCH_MODE_ALL)
01296                         {
01297                             /* match anything except NULL_ITEM */
01298                             if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
01299                                 res = -1;
01300                             else
01301                                 res = 0;
01302                         }
01303                         else
01304                         {
01305                             /* match everything */
01306                             res = 0;
01307                         }
01308                     }
01309                     else
01310                     {
01311                         res = ginCompareEntries(&so->ginstate,
01312                                                 entry->attnum,
01313                                                 entry->queryKey,
01314                                                 entry->queryCategory,
01315                                                 datum[StopMiddle - 1],
01316                                                 category[StopMiddle - 1]);
01317                     }
01318 
01319                     if (res == 0)
01320                     {
01321                         /*
01322                          * Found exact match (there can be only one, except in
01323                          * EMPTY_QUERY mode).
01324                          *
01325                          * If doing partial match, scan forward from here to
01326                          * end of page to check for matches.
01327                          *
01328                          * See comment above about tuple's ordering.
01329                          */
01330                         if (entry->isPartialMatch)
01331                             key->entryRes[j] =
01332                                 matchPartialInPendingList(&so->ginstate,
01333                                                           page,
01334                                                           StopMiddle,
01335                                                           pos->lastOffset,
01336                                                           entry,
01337                                                           datum,
01338                                                           category,
01339                                                           datumExtracted);
01340                         else
01341                             key->entryRes[j] = true;
01342 
01343                         /* done with binary search */
01344                         break;
01345                     }
01346                     else if (res < 0)
01347                         StopHigh = StopMiddle;
01348                     else
01349                         StopLow = StopMiddle + 1;
01350                 }
01351 
01352                 if (StopLow >= StopHigh && entry->isPartialMatch)
01353                 {
01354                     /*
01355                      * No exact match on this page.  If doing partial match,
01356                      * scan from the first tuple greater than target value to
01357                      * end of page.  Note that since we don't remember whether
01358                      * the comparePartialFn told us to stop early on a
01359                      * previous page, we will uselessly apply comparePartialFn
01360                      * to the first tuple on each subsequent page.
01361                      */
01362                     key->entryRes[j] =
01363                         matchPartialInPendingList(&so->ginstate,
01364                                                   page,
01365                                                   StopHigh,
01366                                                   pos->lastOffset,
01367                                                   entry,
01368                                                   datum,
01369                                                   category,
01370                                                   datumExtracted);
01371                 }
01372 
01373                 pos->hasMatchKey[i] |= key->entryRes[j];
01374             }
01375         }
01376 
01377         /* Advance firstOffset over the scanned tuples */
01378         pos->firstOffset = pos->lastOffset;
01379 
01380         if (GinPageHasFullRow(page))
01381         {
01382             /*
01383              * We have examined all pending entries for the current heap row.
01384              * Break out of loop over pages.
01385              */
01386             break;
01387         }
01388         else
01389         {
01390             /*
01391              * Advance to next page of pending entries for the current heap
01392              * row.  Complain if there isn't one.
01393              */
01394             ItemPointerData item = pos->item;
01395 
01396             if (scanGetCandidate(scan, pos) == false ||
01397                 !ItemPointerEquals(&pos->item, &item))
01398                 elog(ERROR, "could not find additional pending pages for same heap tuple");
01399         }
01400     }
01401 
01402     /*
01403      * Now return "true" if all scan keys have at least one matching datum
01404      */
01405     for (i = 0; i < so->nkeys; i++)
01406     {
01407         if (pos->hasMatchKey[i] == false)
01408             return false;
01409     }
01410 
01411     return true;
01412 }
01413 
01414 /*
01415  * Collect all matched rows from pending list into bitmap
01416  */
01417 static void
01418 scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
01419 {
01420     GinScanOpaque so = (GinScanOpaque) scan->opaque;
01421     MemoryContext oldCtx;
01422     bool        recheck,
01423                 match;
01424     int         i;
01425     pendingPosition pos;
01426     Buffer      metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
01427     BlockNumber blkno;
01428 
01429     *ntids = 0;
01430 
01431     LockBuffer(metabuffer, GIN_SHARE);
01432     blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
01433 
01434     /*
01435      * fetch head of list before unlocking metapage. head page must be pinned
01436      * to prevent deletion by vacuum process
01437      */
01438     if (blkno == InvalidBlockNumber)
01439     {
01440         /* No pending list, so proceed with normal scan */
01441         UnlockReleaseBuffer(metabuffer);
01442         return;
01443     }
01444 
01445     pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
01446     LockBuffer(pos.pendingBuffer, GIN_SHARE);
01447     pos.firstOffset = FirstOffsetNumber;
01448     UnlockReleaseBuffer(metabuffer);
01449     pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
01450 
01451     /*
01452      * loop for each heap row. scanGetCandidate returns full row or row's
01453      * tuples from first page.
01454      */
01455     while (scanGetCandidate(scan, &pos))
01456     {
01457         /*
01458          * Check entries in tuple and set up entryRes array.
01459          *
01460          * If pending tuples belonging to the current heap row are spread
01461          * across several pages, collectMatchesForHeapRow will read all of
01462          * those pages.
01463          */
01464         if (!collectMatchesForHeapRow(scan, &pos))
01465             continue;
01466 
01467         /*
01468          * Matching of entries of one row is finished, so check row using
01469          * consistent functions.
01470          */
01471         oldCtx = MemoryContextSwitchTo(so->tempCtx);
01472         recheck = false;
01473         match = true;
01474 
01475         for (i = 0; i < so->nkeys; i++)
01476         {
01477             GinScanKey  key = so->keys + i;
01478 
01479             if (!callConsistentFn(&so->ginstate, key))
01480             {
01481                 match = false;
01482                 break;
01483             }
01484             recheck |= key->recheckCurItem;
01485         }
01486 
01487         MemoryContextSwitchTo(oldCtx);
01488         MemoryContextReset(so->tempCtx);
01489 
01490         if (match)
01491         {
01492             tbm_add_tuples(tbm, &pos.item, 1, recheck);
01493             (*ntids)++;
01494         }
01495     }
01496 
01497     pfree(pos.hasMatchKey);
01498 }
01499 
01500 
01501 #define GinIsNewKey(s)      ( ((GinScanOpaque) scan->opaque)->keys == NULL )
01502 #define GinIsVoidRes(s)     ( ((GinScanOpaque) scan->opaque)->isVoidRes )
01503 
01504 Datum
01505 gingetbitmap(PG_FUNCTION_ARGS)
01506 {
01507     IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
01508     TIDBitmap  *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
01509     int64       ntids;
01510     ItemPointerData iptr;
01511     bool        recheck;
01512 
01513     /*
01514      * Set up the scan keys, and check for unsatisfiable query.
01515      */
01516     if (GinIsNewKey(scan))
01517         ginNewScanKey(scan);
01518 
01519     if (GinIsVoidRes(scan))
01520         PG_RETURN_INT64(0);
01521 
01522     ntids = 0;
01523 
01524     /*
01525      * First, scan the pending list and collect any matching entries into the
01526      * bitmap.  After we scan a pending item, some other backend could post it
01527      * into the main index, and so we might visit it a second time during the
01528      * main scan.  This is okay because we'll just re-set the same bit in the
01529      * bitmap.  (The possibility of duplicate visits is a major reason why GIN
01530      * can't support the amgettuple API, however.) Note that it would not do
01531      * to scan the main index before the pending list, since concurrent
01532      * cleanup could then make us miss entries entirely.
01533      */
01534     scanPendingInsert(scan, tbm, &ntids);
01535 
01536     /*
01537      * Now scan the main index.
01538      */
01539     startScan(scan);
01540 
01541     ItemPointerSetMin(&iptr);
01542 
01543     for (;;)
01544     {
01545         CHECK_FOR_INTERRUPTS();
01546 
01547         if (!scanGetItem(scan, &iptr, &iptr, &recheck))
01548             break;
01549 
01550         if (ItemPointerIsLossyPage(&iptr))
01551             tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
01552         else
01553             tbm_add_tuples(tbm, &iptr, 1, recheck);
01554         ntids++;
01555     }
01556 
01557     PG_RETURN_INT64(ntids);
01558 }