Header And Logo

PostgreSQL
| The world's most advanced open source database.

gistget.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * gistget.c
00004  *    fetch tuples from a GiST scan.
00005  *
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/access/gist/gistget.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 #include "postgres.h"
00016 
00017 #include "access/gist_private.h"
00018 #include "access/relscan.h"
00019 #include "miscadmin.h"
00020 #include "pgstat.h"
00021 #include "utils/builtins.h"
00022 #include "utils/memutils.h"
00023 #include "utils/rel.h"
00024 
00025 
00026 /*
00027  * gistindex_keytest() -- does this index tuple satisfy the scan key(s)?
00028  *
00029  * The index tuple might represent either a heap tuple or a lower index page,
00030  * depending on whether the containing page is a leaf page or not.
00031  *
00032  * On success return for a heap tuple, *recheck_p is set to indicate
00033  * whether recheck is needed.  We recheck if any of the consistent() functions
00034  * request it.  recheck is not interesting when examining a non-leaf entry,
00035  * since we must visit the lower index page if there's any doubt.
00036  *
00037  * If we are doing an ordered scan, so->distances[] is filled with distance
00038  * data from the distance() functions before returning success.
00039  *
00040  * We must decompress the key in the IndexTuple before passing it to the
00041  * sk_funcs (which actually are the opclass Consistent or Distance methods).
00042  *
00043  * Note that this function is always invoked in a short-lived memory context,
00044  * so we don't need to worry about cleaning up allocated memory, either here
00045  * or in the implementation of any Consistent or Distance methods.
00046  */
00047 static bool
00048 gistindex_keytest(IndexScanDesc scan,
00049                   IndexTuple tuple,
00050                   Page page,
00051                   OffsetNumber offset,
00052                   bool *recheck_p)
00053 {
00054     GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
00055     GISTSTATE  *giststate = so->giststate;
00056     ScanKey     key = scan->keyData;
00057     int         keySize = scan->numberOfKeys;
00058     double     *distance_p;
00059     Relation    r = scan->indexRelation;
00060 
00061     *recheck_p = false;
00062 
00063     /*
00064      * If it's a leftover invalid tuple from pre-9.1, treat it as a match with
00065      * minimum possible distances.  This means we'll always follow it to the
00066      * referenced page.
00067      */
00068     if (GistTupleIsInvalid(tuple))
00069     {
00070         int         i;
00071 
00072         if (GistPageIsLeaf(page))       /* shouldn't happen */
00073             elog(ERROR, "invalid GiST tuple found on leaf page");
00074         for (i = 0; i < scan->numberOfOrderBys; i++)
00075             so->distances[i] = -get_float8_infinity();
00076         return true;
00077     }
00078 
00079     /* Check whether it matches according to the Consistent functions */
00080     while (keySize > 0)
00081     {
00082         Datum       datum;
00083         bool        isNull;
00084 
00085         datum = index_getattr(tuple,
00086                               key->sk_attno,
00087                               giststate->tupdesc,
00088                               &isNull);
00089 
00090         if (key->sk_flags & SK_ISNULL)
00091         {
00092             /*
00093              * On non-leaf page we can't conclude that child hasn't NULL
00094              * values because of assumption in GiST: union (VAL, NULL) is VAL.
00095              * But if on non-leaf page key IS NULL, then all children are
00096              * NULL.
00097              */
00098             if (key->sk_flags & SK_SEARCHNULL)
00099             {
00100                 if (GistPageIsLeaf(page) && !isNull)
00101                     return false;
00102             }
00103             else
00104             {
00105                 Assert(key->sk_flags & SK_SEARCHNOTNULL);
00106                 if (isNull)
00107                     return false;
00108             }
00109         }
00110         else if (isNull)
00111         {
00112             return false;
00113         }
00114         else
00115         {
00116             Datum       test;
00117             bool        recheck;
00118             GISTENTRY   de;
00119 
00120             gistdentryinit(giststate, key->sk_attno - 1, &de,
00121                            datum, r, page, offset,
00122                            FALSE, isNull);
00123 
00124             /*
00125              * Call the Consistent function to evaluate the test.  The
00126              * arguments are the index datum (as a GISTENTRY*), the comparison
00127              * datum, the comparison operator's strategy number and subtype
00128              * from pg_amop, and the recheck flag.
00129              *
00130              * (Presently there's no need to pass the subtype since it'll
00131              * always be zero, but might as well pass it for possible future
00132              * use.)
00133              *
00134              * We initialize the recheck flag to true (the safest assumption)
00135              * in case the Consistent function forgets to set it.
00136              */
00137             recheck = true;
00138 
00139             test = FunctionCall5Coll(&key->sk_func,
00140                                      key->sk_collation,
00141                                      PointerGetDatum(&de),
00142                                      key->sk_argument,
00143                                      Int32GetDatum(key->sk_strategy),
00144                                      ObjectIdGetDatum(key->sk_subtype),
00145                                      PointerGetDatum(&recheck));
00146 
00147             if (!DatumGetBool(test))
00148                 return false;
00149             *recheck_p |= recheck;
00150         }
00151 
00152         key++;
00153         keySize--;
00154     }
00155 
00156     /* OK, it passes --- now let's compute the distances */
00157     key = scan->orderByData;
00158     distance_p = so->distances;
00159     keySize = scan->numberOfOrderBys;
00160     while (keySize > 0)
00161     {
00162         Datum       datum;
00163         bool        isNull;
00164 
00165         datum = index_getattr(tuple,
00166                               key->sk_attno,
00167                               giststate->tupdesc,
00168                               &isNull);
00169 
00170         if ((key->sk_flags & SK_ISNULL) || isNull)
00171         {
00172             /* Assume distance computes as null and sorts to the end */
00173             *distance_p = get_float8_infinity();
00174         }
00175         else
00176         {
00177             Datum       dist;
00178             GISTENTRY   de;
00179 
00180             gistdentryinit(giststate, key->sk_attno - 1, &de,
00181                            datum, r, page, offset,
00182                            FALSE, isNull);
00183 
00184             /*
00185              * Call the Distance function to evaluate the distance.  The
00186              * arguments are the index datum (as a GISTENTRY*), the comparison
00187              * datum, and the ordering operator's strategy number and subtype
00188              * from pg_amop.
00189              *
00190              * (Presently there's no need to pass the subtype since it'll
00191              * always be zero, but might as well pass it for possible future
00192              * use.)
00193              *
00194              * Note that Distance functions don't get a recheck argument. We
00195              * can't tolerate lossy distance calculations on leaf tuples;
00196              * there is no opportunity to re-sort the tuples afterwards.
00197              */
00198             dist = FunctionCall4Coll(&key->sk_func,
00199                                      key->sk_collation,
00200                                      PointerGetDatum(&de),
00201                                      key->sk_argument,
00202                                      Int32GetDatum(key->sk_strategy),
00203                                      ObjectIdGetDatum(key->sk_subtype));
00204 
00205             *distance_p = DatumGetFloat8(dist);
00206         }
00207 
00208         key++;
00209         distance_p++;
00210         keySize--;
00211     }
00212 
00213     return true;
00214 }
00215 
00216 /*
00217  * Scan all items on the GiST index page identified by *pageItem, and insert
00218  * them into the queue (or directly to output areas)
00219  *
00220  * scan: index scan we are executing
00221  * pageItem: search queue item identifying an index page to scan
00222  * myDistances: distances array associated with pageItem, or NULL at the root
00223  * tbm: if not NULL, gistgetbitmap's output bitmap
00224  * ntids: if not NULL, gistgetbitmap's output tuple counter
00225  *
00226  * If tbm/ntids aren't NULL, we are doing an amgetbitmap scan, and heap
00227  * tuples should be reported directly into the bitmap.  If they are NULL,
00228  * we're doing a plain or ordered indexscan.  For a plain indexscan, heap
00229  * tuple TIDs are returned into so->pageData[].  For an ordered indexscan,
00230  * heap tuple TIDs are pushed into individual search queue items.
00231  *
00232  * If we detect that the index page has split since we saw its downlink
00233  * in the parent, we push its new right sibling onto the queue so the
00234  * sibling will be processed next.
00235  */
00236 static void
00237 gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
00238              TIDBitmap *tbm, int64 *ntids)
00239 {
00240     GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
00241     Buffer      buffer;
00242     Page        page;
00243     GISTPageOpaque opaque;
00244     OffsetNumber maxoff;
00245     OffsetNumber i;
00246     GISTSearchTreeItem *tmpItem = so->tmpTreeItem;
00247     bool        isNew;
00248     MemoryContext oldcxt;
00249 
00250     Assert(!GISTSearchItemIsHeap(*pageItem));
00251 
00252     buffer = ReadBuffer(scan->indexRelation, pageItem->blkno);
00253     LockBuffer(buffer, GIST_SHARE);
00254     gistcheckpage(scan->indexRelation, buffer);
00255     page = BufferGetPage(buffer);
00256     opaque = GistPageGetOpaque(page);
00257 
00258     /*
00259      * Check if we need to follow the rightlink. We need to follow it if the
00260      * page was concurrently split since we visited the parent (in which case
00261      * parentlsn < nsn), or if the system crashed after a page split but
00262      * before the downlink was inserted into the parent.
00263      */
00264     if (!XLogRecPtrIsInvalid(pageItem->data.parentlsn) &&
00265         (GistFollowRight(page) ||
00266          pageItem->data.parentlsn < GistPageGetNSN(page)) &&
00267         opaque->rightlink != InvalidBlockNumber /* sanity check */ )
00268     {
00269         /* There was a page split, follow right link to add pages */
00270         GISTSearchItem *item;
00271 
00272         /* This can't happen when starting at the root */
00273         Assert(myDistances != NULL);
00274 
00275         oldcxt = MemoryContextSwitchTo(so->queueCxt);
00276 
00277         /* Create new GISTSearchItem for the right sibling index page */
00278         item = palloc(sizeof(GISTSearchItem));
00279         item->next = NULL;
00280         item->blkno = opaque->rightlink;
00281         item->data.parentlsn = pageItem->data.parentlsn;
00282 
00283         /* Insert it into the queue using same distances as for this page */
00284         tmpItem->head = item;
00285         tmpItem->lastHeap = NULL;
00286         memcpy(tmpItem->distances, myDistances,
00287                sizeof(double) * scan->numberOfOrderBys);
00288 
00289         (void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew);
00290 
00291         MemoryContextSwitchTo(oldcxt);
00292     }
00293 
00294     so->nPageData = so->curPageData = 0;
00295 
00296     /*
00297      * check all tuples on page
00298      */
00299     maxoff = PageGetMaxOffsetNumber(page);
00300     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
00301     {
00302         IndexTuple  it = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
00303         bool        match;
00304         bool        recheck;
00305 
00306         /*
00307          * Must call gistindex_keytest in tempCxt, and clean up any leftover
00308          * junk afterward.
00309          */
00310         oldcxt = MemoryContextSwitchTo(so->giststate->tempCxt);
00311 
00312         match = gistindex_keytest(scan, it, page, i, &recheck);
00313 
00314         MemoryContextSwitchTo(oldcxt);
00315         MemoryContextReset(so->giststate->tempCxt);
00316 
00317         /* Ignore tuple if it doesn't match */
00318         if (!match)
00319             continue;
00320 
00321         if (tbm && GistPageIsLeaf(page))
00322         {
00323             /*
00324              * getbitmap scan, so just push heap tuple TIDs into the bitmap
00325              * without worrying about ordering
00326              */
00327             tbm_add_tuples(tbm, &it->t_tid, 1, recheck);
00328             (*ntids)++;
00329         }
00330         else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page))
00331         {
00332             /*
00333              * Non-ordered scan, so report heap tuples in so->pageData[]
00334              */
00335             so->pageData[so->nPageData].heapPtr = it->t_tid;
00336             so->pageData[so->nPageData].recheck = recheck;
00337             so->nPageData++;
00338         }
00339         else
00340         {
00341             /*
00342              * Must push item into search queue.  We get here for any lower
00343              * index page, and also for heap tuples if doing an ordered
00344              * search.
00345              */
00346             GISTSearchItem *item;
00347 
00348             oldcxt = MemoryContextSwitchTo(so->queueCxt);
00349 
00350             /* Create new GISTSearchItem for this item */
00351             item = palloc(sizeof(GISTSearchItem));
00352             item->next = NULL;
00353 
00354             if (GistPageIsLeaf(page))
00355             {
00356                 /* Creating heap-tuple GISTSearchItem */
00357                 item->blkno = InvalidBlockNumber;
00358                 item->data.heap.heapPtr = it->t_tid;
00359                 item->data.heap.recheck = recheck;
00360             }
00361             else
00362             {
00363                 /* Creating index-page GISTSearchItem */
00364                 item->blkno = ItemPointerGetBlockNumber(&it->t_tid);
00365 
00366                 /*
00367                  * LSN of current page is lsn of parent page for child. We only
00368                  * have a shared lock, so we need to get the LSN atomically.
00369                  */
00370                 item->data.parentlsn = BufferGetLSNAtomic(buffer);
00371             }
00372 
00373             /* Insert it into the queue using new distance data */
00374             tmpItem->head = item;
00375             tmpItem->lastHeap = GISTSearchItemIsHeap(*item) ? item : NULL;
00376             memcpy(tmpItem->distances, so->distances,
00377                    sizeof(double) * scan->numberOfOrderBys);
00378 
00379             (void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew);
00380 
00381             MemoryContextSwitchTo(oldcxt);
00382         }
00383     }
00384 
00385     UnlockReleaseBuffer(buffer);
00386 }
00387 
00388 /*
00389  * Extract next item (in order) from search queue
00390  *
00391  * Returns a GISTSearchItem or NULL.  Caller must pfree item when done with it.
00392  *
00393  * NOTE: on successful return, so->curTreeItem is the GISTSearchTreeItem that
00394  * contained the result item.  Callers can use so->curTreeItem->distances as
00395  * the distances value for the item.
00396  */
00397 static GISTSearchItem *
00398 getNextGISTSearchItem(GISTScanOpaque so)
00399 {
00400     for (;;)
00401     {
00402         GISTSearchItem *item;
00403 
00404         /* Update curTreeItem if we don't have one */
00405         if (so->curTreeItem == NULL)
00406         {
00407             so->curTreeItem = (GISTSearchTreeItem *) rb_leftmost(so->queue);
00408             /* Done when tree is empty */
00409             if (so->curTreeItem == NULL)
00410                 break;
00411         }
00412 
00413         item = so->curTreeItem->head;
00414         if (item != NULL)
00415         {
00416             /* Delink item from chain */
00417             so->curTreeItem->head = item->next;
00418             if (item == so->curTreeItem->lastHeap)
00419                 so->curTreeItem->lastHeap = NULL;
00420             /* Return item; caller is responsible to pfree it */
00421             return item;
00422         }
00423 
00424         /* curTreeItem is exhausted, so remove it from rbtree */
00425         rb_delete(so->queue, (RBNode *) so->curTreeItem);
00426         so->curTreeItem = NULL;
00427     }
00428 
00429     return NULL;
00430 }
00431 
00432 /*
00433  * Fetch next heap tuple in an ordered search
00434  */
00435 static bool
00436 getNextNearest(IndexScanDesc scan)
00437 {
00438     GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
00439     bool        res = false;
00440 
00441     do
00442     {
00443         GISTSearchItem *item = getNextGISTSearchItem(so);
00444 
00445         if (!item)
00446             break;
00447 
00448         if (GISTSearchItemIsHeap(*item))
00449         {
00450             /* found a heap item at currently minimal distance */
00451             scan->xs_ctup.t_self = item->data.heap.heapPtr;
00452             scan->xs_recheck = item->data.heap.recheck;
00453             res = true;
00454         }
00455         else
00456         {
00457             /* visit an index page, extract its items into queue */
00458             CHECK_FOR_INTERRUPTS();
00459 
00460             gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL);
00461         }
00462 
00463         pfree(item);
00464     } while (!res);
00465 
00466     return res;
00467 }
00468 
00469 /*
00470  * gistgettuple() -- Get the next tuple in the scan
00471  */
00472 Datum
00473 gistgettuple(PG_FUNCTION_ARGS)
00474 {
00475     IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
00476     ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
00477     GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
00478 
00479     if (dir != ForwardScanDirection)
00480         elog(ERROR, "GiST only supports forward scan direction");
00481 
00482     if (!so->qual_ok)
00483         PG_RETURN_BOOL(false);
00484 
00485     if (so->firstCall)
00486     {
00487         /* Begin the scan by processing the root page */
00488         GISTSearchItem fakeItem;
00489 
00490         pgstat_count_index_scan(scan->indexRelation);
00491 
00492         so->firstCall = false;
00493         so->curTreeItem = NULL;
00494         so->curPageData = so->nPageData = 0;
00495 
00496         fakeItem.blkno = GIST_ROOT_BLKNO;
00497         memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
00498         gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
00499     }
00500 
00501     if (scan->numberOfOrderBys > 0)
00502     {
00503         /* Must fetch tuples in strict distance order */
00504         PG_RETURN_BOOL(getNextNearest(scan));
00505     }
00506     else
00507     {
00508         /* Fetch tuples index-page-at-a-time */
00509         for (;;)
00510         {
00511             if (so->curPageData < so->nPageData)
00512             {
00513                 /* continuing to return tuples from a leaf page */
00514                 scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr;
00515                 scan->xs_recheck = so->pageData[so->curPageData].recheck;
00516                 so->curPageData++;
00517                 PG_RETURN_BOOL(true);
00518             }
00519 
00520             /* find and process the next index page */
00521             do
00522             {
00523                 GISTSearchItem *item = getNextGISTSearchItem(so);
00524 
00525                 if (!item)
00526                     PG_RETURN_BOOL(false);
00527 
00528                 CHECK_FOR_INTERRUPTS();
00529 
00530                 /*
00531                  * While scanning a leaf page, ItemPointers of matching heap
00532                  * tuples are stored in so->pageData.  If there are any on
00533                  * this page, we fall out of the inner "do" and loop around to
00534                  * return them.
00535                  */
00536                 gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL);
00537 
00538                 pfree(item);
00539             } while (so->nPageData == 0);
00540         }
00541     }
00542 }
00543 
00544 /*
00545  * gistgetbitmap() -- Get a bitmap of all heap tuple locations
00546  */
00547 Datum
00548 gistgetbitmap(PG_FUNCTION_ARGS)
00549 {
00550     IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
00551     TIDBitmap  *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
00552     GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
00553     int64       ntids = 0;
00554     GISTSearchItem fakeItem;
00555 
00556     if (!so->qual_ok)
00557         PG_RETURN_INT64(0);
00558 
00559     pgstat_count_index_scan(scan->indexRelation);
00560 
00561     /* Begin the scan by processing the root page */
00562     so->curTreeItem = NULL;
00563     so->curPageData = so->nPageData = 0;
00564 
00565     fakeItem.blkno = GIST_ROOT_BLKNO;
00566     memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
00567     gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
00568 
00569     /*
00570      * While scanning a leaf page, ItemPointers of matching heap tuples will
00571      * be stored directly into tbm, so we don't need to deal with them here.
00572      */
00573     for (;;)
00574     {
00575         GISTSearchItem *item = getNextGISTSearchItem(so);
00576 
00577         if (!item)
00578             break;
00579 
00580         CHECK_FOR_INTERRUPTS();
00581 
00582         gistScanPage(scan, item, so->curTreeItem->distances, tbm, &ntids);
00583 
00584         pfree(item);
00585     }
00586 
00587     PG_RETURN_INT64(ntids);
00588 }