Header And Logo

PostgreSQL
| The world's most advanced open source database.

hashsearch.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * hashsearch.c
00004  *    search code for postgres hash tables
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/access/hash/hashsearch.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 #include "postgres.h"
00016 
00017 #include "access/hash.h"
00018 #include "access/relscan.h"
00019 #include "miscadmin.h"
00020 #include "pgstat.h"
00021 #include "utils/rel.h"
00022 
00023 
00024 /*
00025  *  _hash_next() -- Get the next item in a scan.
00026  *
00027  *      On entry, we have a valid hashso_curpos in the scan, and a
00028  *      pin and read lock on the page that contains that item.
00029  *      We find the next item in the scan, if any.
00030  *      On success exit, we have the page containing the next item
00031  *      pinned and locked.
00032  */
00033 bool
00034 _hash_next(IndexScanDesc scan, ScanDirection dir)
00035 {
00036     Relation    rel = scan->indexRelation;
00037     HashScanOpaque so = (HashScanOpaque) scan->opaque;
00038     Buffer      buf;
00039     Page        page;
00040     OffsetNumber offnum;
00041     ItemPointer current;
00042     IndexTuple  itup;
00043 
00044     /* we still have the buffer pinned and read-locked */
00045     buf = so->hashso_curbuf;
00046     Assert(BufferIsValid(buf));
00047 
00048     /*
00049      * step to next valid tuple.
00050      */
00051     if (!_hash_step(scan, &buf, dir))
00052         return false;
00053 
00054     /* if we're here, _hash_step found a valid tuple */
00055     current = &(so->hashso_curpos);
00056     offnum = ItemPointerGetOffsetNumber(current);
00057     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
00058     page = BufferGetPage(buf);
00059     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
00060     so->hashso_heappos = itup->t_tid;
00061 
00062     return true;
00063 }
00064 
00065 /*
00066  * Advance to next page in a bucket, if any.
00067  */
00068 static void
00069 _hash_readnext(Relation rel,
00070                Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
00071 {
00072     BlockNumber blkno;
00073 
00074     blkno = (*opaquep)->hasho_nextblkno;
00075     _hash_relbuf(rel, *bufp);
00076     *bufp = InvalidBuffer;
00077     /* check for interrupts while we're not holding any buffer lock */
00078     CHECK_FOR_INTERRUPTS();
00079     if (BlockNumberIsValid(blkno))
00080     {
00081         *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
00082         *pagep = BufferGetPage(*bufp);
00083         *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
00084     }
00085 }
00086 
00087 /*
00088  * Advance to previous page in a bucket, if any.
00089  */
00090 static void
00091 _hash_readprev(Relation rel,
00092                Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
00093 {
00094     BlockNumber blkno;
00095 
00096     blkno = (*opaquep)->hasho_prevblkno;
00097     _hash_relbuf(rel, *bufp);
00098     *bufp = InvalidBuffer;
00099     /* check for interrupts while we're not holding any buffer lock */
00100     CHECK_FOR_INTERRUPTS();
00101     if (BlockNumberIsValid(blkno))
00102     {
00103         *bufp = _hash_getbuf(rel, blkno, HASH_READ,
00104                              LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
00105         *pagep = BufferGetPage(*bufp);
00106         *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
00107     }
00108 }
00109 
00110 /*
00111  *  _hash_first() -- Find the first item in a scan.
00112  *
00113  *      Find the first item in the index that
00114  *      satisfies the qualification associated with the scan descriptor. On
00115  *      success, the page containing the current index tuple is read locked
00116  *      and pinned, and the scan's opaque data entry is updated to
00117  *      include the buffer.
00118  */
00119 bool
00120 _hash_first(IndexScanDesc scan, ScanDirection dir)
00121 {
00122     Relation    rel = scan->indexRelation;
00123     HashScanOpaque so = (HashScanOpaque) scan->opaque;
00124     ScanKey     cur;
00125     uint32      hashkey;
00126     Bucket      bucket;
00127     BlockNumber blkno;
00128     BlockNumber oldblkno = InvalidBuffer;
00129     bool        retry = false;
00130     Buffer      buf;
00131     Buffer      metabuf;
00132     Page        page;
00133     HashPageOpaque opaque;
00134     HashMetaPage metap;
00135     IndexTuple  itup;
00136     ItemPointer current;
00137     OffsetNumber offnum;
00138 
00139     pgstat_count_index_scan(rel);
00140 
00141     current = &(so->hashso_curpos);
00142     ItemPointerSetInvalid(current);
00143 
00144     /*
00145      * We do not support hash scans with no index qualification, because we
00146      * would have to read the whole index rather than just one bucket. That
00147      * creates a whole raft of problems, since we haven't got a practical way
00148      * to lock all the buckets against splits or compactions.
00149      */
00150     if (scan->numberOfKeys < 1)
00151         ereport(ERROR,
00152                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00153                  errmsg("hash indexes do not support whole-index scans")));
00154 
00155     /* There may be more than one index qual, but we hash only the first */
00156     cur = &scan->keyData[0];
00157 
00158     /* We support only single-column hash indexes */
00159     Assert(cur->sk_attno == 1);
00160     /* And there's only one operator strategy, too */
00161     Assert(cur->sk_strategy == HTEqualStrategyNumber);
00162 
00163     /*
00164      * If the constant in the index qual is NULL, assume it cannot match any
00165      * items in the index.
00166      */
00167     if (cur->sk_flags & SK_ISNULL)
00168         return false;
00169 
00170     /*
00171      * Okay to compute the hash key.  We want to do this before acquiring any
00172      * locks, in case a user-defined hash function happens to be slow.
00173      *
00174      * If scankey operator is not a cross-type comparison, we can use the
00175      * cached hash function; otherwise gotta look it up in the catalogs.
00176      *
00177      * We support the convention that sk_subtype == InvalidOid means the
00178      * opclass input type; this is a hack to simplify life for ScanKeyInit().
00179      */
00180     if (cur->sk_subtype == rel->rd_opcintype[0] ||
00181         cur->sk_subtype == InvalidOid)
00182         hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
00183     else
00184         hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
00185                                            cur->sk_subtype);
00186 
00187     so->hashso_sk_hash = hashkey;
00188 
00189     /* Read the metapage */
00190     metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
00191     metap = HashPageGetMeta(BufferGetPage(metabuf));
00192 
00193     /*
00194      * Loop until we get a lock on the correct target bucket.
00195      */
00196     for (;;)
00197     {
00198         /*
00199          * Compute the target bucket number, and convert to block number.
00200          */
00201         bucket = _hash_hashkey2bucket(hashkey,
00202                                       metap->hashm_maxbucket,
00203                                       metap->hashm_highmask,
00204                                       metap->hashm_lowmask);
00205 
00206         blkno = BUCKET_TO_BLKNO(metap, bucket);
00207 
00208         /* Release metapage lock, but keep pin. */
00209         _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
00210 
00211         /*
00212          * If the previous iteration of this loop locked what is still the
00213          * correct target bucket, we are done.  Otherwise, drop any old lock
00214          * and lock what now appears to be the correct bucket.
00215          */
00216         if (retry)
00217         {
00218             if (oldblkno == blkno)
00219                 break;
00220             _hash_droplock(rel, oldblkno, HASH_SHARE);
00221         }
00222         _hash_getlock(rel, blkno, HASH_SHARE);
00223 
00224         /*
00225          * Reacquire metapage lock and check that no bucket split has taken
00226          * place while we were awaiting the bucket lock.
00227          */
00228         _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_READ);
00229         oldblkno = blkno;
00230         retry = true;
00231     }
00232 
00233     /* done with the metapage */
00234     _hash_dropbuf(rel, metabuf);
00235 
00236     /* Update scan opaque state to show we have lock on the bucket */
00237     so->hashso_bucket = bucket;
00238     so->hashso_bucket_valid = true;
00239     so->hashso_bucket_blkno = blkno;
00240 
00241     /* Fetch the primary bucket page for the bucket */
00242     buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
00243     page = BufferGetPage(buf);
00244     opaque = (HashPageOpaque) PageGetSpecialPointer(page);
00245     Assert(opaque->hasho_bucket == bucket);
00246 
00247     /* If a backwards scan is requested, move to the end of the chain */
00248     if (ScanDirectionIsBackward(dir))
00249     {
00250         while (BlockNumberIsValid(opaque->hasho_nextblkno))
00251             _hash_readnext(rel, &buf, &page, &opaque);
00252     }
00253 
00254     /* Now find the first tuple satisfying the qualification */
00255     if (!_hash_step(scan, &buf, dir))
00256         return false;
00257 
00258     /* if we're here, _hash_step found a valid tuple */
00259     offnum = ItemPointerGetOffsetNumber(current);
00260     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
00261     page = BufferGetPage(buf);
00262     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
00263     so->hashso_heappos = itup->t_tid;
00264 
00265     return true;
00266 }
00267 
00268 /*
00269  *  _hash_step() -- step to the next valid item in a scan in the bucket.
00270  *
00271  *      If no valid record exists in the requested direction, return
00272  *      false.  Else, return true and set the hashso_curpos for the
00273  *      scan to the right thing.
00274  *
00275  *      'bufP' points to the current buffer, which is pinned and read-locked.
00276  *      On success exit, we have pin and read-lock on whichever page
00277  *      contains the right item; on failure, we have released all buffers.
00278  */
00279 bool
00280 _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
00281 {
00282     Relation    rel = scan->indexRelation;
00283     HashScanOpaque so = (HashScanOpaque) scan->opaque;
00284     ItemPointer current;
00285     Buffer      buf;
00286     Page        page;
00287     HashPageOpaque opaque;
00288     OffsetNumber maxoff;
00289     OffsetNumber offnum;
00290     BlockNumber blkno;
00291     IndexTuple  itup;
00292 
00293     current = &(so->hashso_curpos);
00294 
00295     buf = *bufP;
00296     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
00297     page = BufferGetPage(buf);
00298     opaque = (HashPageOpaque) PageGetSpecialPointer(page);
00299 
00300     /*
00301      * If _hash_step is called from _hash_first, current will not be valid, so
00302      * we can't dereference it.  However, in that case, we presumably want to
00303      * start at the beginning/end of the page...
00304      */
00305     maxoff = PageGetMaxOffsetNumber(page);
00306     if (ItemPointerIsValid(current))
00307         offnum = ItemPointerGetOffsetNumber(current);
00308     else
00309         offnum = InvalidOffsetNumber;
00310 
00311     /*
00312      * 'offnum' now points to the last tuple we examined (if any).
00313      *
00314      * continue to step through tuples until: 1) we get to the end of the
00315      * bucket chain or 2) we find a valid tuple.
00316      */
00317     do
00318     {
00319         switch (dir)
00320         {
00321             case ForwardScanDirection:
00322                 if (offnum != InvalidOffsetNumber)
00323                     offnum = OffsetNumberNext(offnum);  /* move forward */
00324                 else
00325                 {
00326                     /* new page, locate starting position by binary search */
00327                     offnum = _hash_binsearch(page, so->hashso_sk_hash);
00328                 }
00329 
00330                 for (;;)
00331                 {
00332                     /*
00333                      * check if we're still in the range of items with the
00334                      * target hash key
00335                      */
00336                     if (offnum <= maxoff)
00337                     {
00338                         Assert(offnum >= FirstOffsetNumber);
00339                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
00340                         if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
00341                             break;      /* yes, so exit for-loop */
00342                     }
00343 
00344                     /*
00345                      * ran off the end of this page, try the next
00346                      */
00347                     _hash_readnext(rel, &buf, &page, &opaque);
00348                     if (BufferIsValid(buf))
00349                     {
00350                         maxoff = PageGetMaxOffsetNumber(page);
00351                         offnum = _hash_binsearch(page, so->hashso_sk_hash);
00352                     }
00353                     else
00354                     {
00355                         /* end of bucket */
00356                         itup = NULL;
00357                         break;  /* exit for-loop */
00358                     }
00359                 }
00360                 break;
00361 
00362             case BackwardScanDirection:
00363                 if (offnum != InvalidOffsetNumber)
00364                     offnum = OffsetNumberPrev(offnum);  /* move back */
00365                 else
00366                 {
00367                     /* new page, locate starting position by binary search */
00368                     offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
00369                 }
00370 
00371                 for (;;)
00372                 {
00373                     /*
00374                      * check if we're still in the range of items with the
00375                      * target hash key
00376                      */
00377                     if (offnum >= FirstOffsetNumber)
00378                     {
00379                         Assert(offnum <= maxoff);
00380                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
00381                         if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
00382                             break;      /* yes, so exit for-loop */
00383                     }
00384 
00385                     /*
00386                      * ran off the end of this page, try the next
00387                      */
00388                     _hash_readprev(rel, &buf, &page, &opaque);
00389                     if (BufferIsValid(buf))
00390                     {
00391                         maxoff = PageGetMaxOffsetNumber(page);
00392                         offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
00393                     }
00394                     else
00395                     {
00396                         /* end of bucket */
00397                         itup = NULL;
00398                         break;  /* exit for-loop */
00399                     }
00400                 }
00401                 break;
00402 
00403             default:
00404                 /* NoMovementScanDirection */
00405                 /* this should not be reached */
00406                 itup = NULL;
00407                 break;
00408         }
00409 
00410         if (itup == NULL)
00411         {
00412             /* we ran off the end of the bucket without finding a match */
00413             *bufP = so->hashso_curbuf = InvalidBuffer;
00414             ItemPointerSetInvalid(current);
00415             return false;
00416         }
00417 
00418         /* check the tuple quals, loop around if not met */
00419     } while (!_hash_checkqual(scan, itup));
00420 
00421     /* if we made it to here, we've found a valid tuple */
00422     blkno = BufferGetBlockNumber(buf);
00423     *bufP = so->hashso_curbuf = buf;
00424     ItemPointerSet(current, blkno, offnum);
00425     return true;
00426 }