Header And Logo

PostgreSQL
| The world's most advanced open source database.

hashovfl.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * hashovfl.c
00004  *    Overflow page management code for the Postgres hash access method
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/access/hash/hashovfl.c
00012  *
00013  * NOTES
00014  *    Overflow pages look like ordinary relation pages.
00015  *
00016  *-------------------------------------------------------------------------
00017  */
00018 #include "postgres.h"
00019 
00020 #include "access/hash.h"
00021 #include "utils/rel.h"
00022 
00023 
00024 static Buffer _hash_getovflpage(Relation rel, Buffer metabuf);
00025 static uint32 _hash_firstfreebit(uint32 map);
00026 
00027 
00028 /*
00029  * Convert overflow page bit number (its index in the free-page bitmaps)
00030  * to block number within the index.
00031  */
00032 static BlockNumber
00033 bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
00034 {
00035     uint32      splitnum = metap->hashm_ovflpoint;
00036     uint32      i;
00037 
00038     /* Convert zero-based bitnumber to 1-based page number */
00039     ovflbitnum += 1;
00040 
00041     /* Determine the split number for this page (must be >= 1) */
00042     for (i = 1;
00043          i < splitnum && ovflbitnum > metap->hashm_spares[i];
00044          i++)
00045          /* loop */ ;
00046 
00047     /*
00048      * Convert to absolute page number by adding the number of bucket pages
00049      * that exist before this split point.
00050      */
00051     return (BlockNumber) ((1 << i) + ovflbitnum);
00052 }
00053 
00054 /*
00055  * Convert overflow page block number to bit number for free-page bitmap.
00056  */
00057 static uint32
00058 blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
00059 {
00060     uint32      splitnum = metap->hashm_ovflpoint;
00061     uint32      i;
00062     uint32      bitnum;
00063 
00064     /* Determine the split number containing this page */
00065     for (i = 1; i <= splitnum; i++)
00066     {
00067         if (ovflblkno <= (BlockNumber) (1 << i))
00068             break;              /* oops */
00069         bitnum = ovflblkno - (1 << i);
00070         if (bitnum <= metap->hashm_spares[i])
00071             return bitnum - 1;  /* -1 to convert 1-based to 0-based */
00072     }
00073 
00074     elog(ERROR, "invalid overflow block number %u", ovflblkno);
00075     return 0;                   /* keep compiler quiet */
00076 }
00077 
00078 /*
00079  *  _hash_addovflpage
00080  *
00081  *  Add an overflow page to the bucket whose last page is pointed to by 'buf'.
00082  *
00083  *  On entry, the caller must hold a pin but no lock on 'buf'.  The pin is
00084  *  dropped before exiting (we assume the caller is not interested in 'buf'
00085  *  anymore).  The returned overflow page will be pinned and write-locked;
00086  *  it is guaranteed to be empty.
00087  *
00088  *  The caller must hold a pin, but no lock, on the metapage buffer.
00089  *  That buffer is returned in the same state.
00090  *
00091  *  The caller must hold at least share lock on the bucket, to ensure that
00092  *  no one else tries to compact the bucket meanwhile.  This guarantees that
00093  *  'buf' won't stop being part of the bucket while it's unlocked.
00094  *
00095  * NB: since this could be executed concurrently by multiple processes,
00096  * one should not assume that the returned overflow page will be the
00097  * immediate successor of the originally passed 'buf'.  Additional overflow
00098  * pages might have been added to the bucket chain in between.
00099  */
00100 Buffer
00101 _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
00102 {
00103     Buffer      ovflbuf;
00104     Page        page;
00105     Page        ovflpage;
00106     HashPageOpaque pageopaque;
00107     HashPageOpaque ovflopaque;
00108 
00109     /* allocate and lock an empty overflow page */
00110     ovflbuf = _hash_getovflpage(rel, metabuf);
00111 
00112     /*
00113      * Write-lock the tail page.  It is okay to hold two buffer locks here
00114      * since there cannot be anyone else contending for access to ovflbuf.
00115      */
00116     _hash_chgbufaccess(rel, buf, HASH_NOLOCK, HASH_WRITE);
00117 
00118     /* probably redundant... */
00119     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
00120 
00121     /* loop to find current tail page, in case someone else inserted too */
00122     for (;;)
00123     {
00124         BlockNumber nextblkno;
00125 
00126         page = BufferGetPage(buf);
00127         pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
00128         nextblkno = pageopaque->hasho_nextblkno;
00129 
00130         if (!BlockNumberIsValid(nextblkno))
00131             break;
00132 
00133         /* we assume we do not need to write the unmodified page */
00134         _hash_relbuf(rel, buf);
00135 
00136         buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
00137     }
00138 
00139     /* now that we have correct backlink, initialize new overflow page */
00140     ovflpage = BufferGetPage(ovflbuf);
00141     ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
00142     ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
00143     ovflopaque->hasho_nextblkno = InvalidBlockNumber;
00144     ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
00145     ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
00146     ovflopaque->hasho_page_id = HASHO_PAGE_ID;
00147 
00148     MarkBufferDirty(ovflbuf);
00149 
00150     /* logically chain overflow page to previous page */
00151     pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
00152     _hash_wrtbuf(rel, buf);
00153 
00154     return ovflbuf;
00155 }
00156 
00157 /*
00158  *  _hash_getovflpage()
00159  *
00160  *  Find an available overflow page and return it.  The returned buffer
00161  *  is pinned and write-locked, and has had _hash_pageinit() applied,
00162  *  but it is caller's responsibility to fill the special space.
00163  *
00164  * The caller must hold a pin, but no lock, on the metapage buffer.
00165  * That buffer is left in the same state at exit.
00166  */
00167 static Buffer
00168 _hash_getovflpage(Relation rel, Buffer metabuf)
00169 {
00170     HashMetaPage metap;
00171     Buffer      mapbuf = 0;
00172     Buffer      newbuf;
00173     BlockNumber blkno;
00174     uint32      orig_firstfree;
00175     uint32      splitnum;
00176     uint32     *freep = NULL;
00177     uint32      max_ovflpg;
00178     uint32      bit;
00179     uint32      first_page;
00180     uint32      last_bit;
00181     uint32      last_page;
00182     uint32      i,
00183                 j;
00184 
00185     /* Get exclusive lock on the meta page */
00186     _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
00187 
00188     _hash_checkpage(rel, metabuf, LH_META_PAGE);
00189     metap = HashPageGetMeta(BufferGetPage(metabuf));
00190 
00191     /* start search at hashm_firstfree */
00192     orig_firstfree = metap->hashm_firstfree;
00193     first_page = orig_firstfree >> BMPG_SHIFT(metap);
00194     bit = orig_firstfree & BMPG_MASK(metap);
00195     i = first_page;
00196     j = bit / BITS_PER_MAP;
00197     bit &= ~(BITS_PER_MAP - 1);
00198 
00199     /* outer loop iterates once per bitmap page */
00200     for (;;)
00201     {
00202         BlockNumber mapblkno;
00203         Page        mappage;
00204         uint32      last_inpage;
00205 
00206         /* want to end search with the last existing overflow page */
00207         splitnum = metap->hashm_ovflpoint;
00208         max_ovflpg = metap->hashm_spares[splitnum] - 1;
00209         last_page = max_ovflpg >> BMPG_SHIFT(metap);
00210         last_bit = max_ovflpg & BMPG_MASK(metap);
00211 
00212         if (i > last_page)
00213             break;
00214 
00215         Assert(i < metap->hashm_nmaps);
00216         mapblkno = metap->hashm_mapp[i];
00217 
00218         if (i == last_page)
00219             last_inpage = last_bit;
00220         else
00221             last_inpage = BMPGSZ_BIT(metap) - 1;
00222 
00223         /* Release exclusive lock on metapage while reading bitmap page */
00224         _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
00225 
00226         mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
00227         mappage = BufferGetPage(mapbuf);
00228         freep = HashPageGetBitmap(mappage);
00229 
00230         for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
00231         {
00232             if (freep[j] != ALL_SET)
00233                 goto found;
00234         }
00235 
00236         /* No free space here, try to advance to next map page */
00237         _hash_relbuf(rel, mapbuf);
00238         i++;
00239         j = 0;                  /* scan from start of next map page */
00240         bit = 0;
00241 
00242         /* Reacquire exclusive lock on the meta page */
00243         _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
00244     }
00245 
00246     /*
00247      * No free pages --- have to extend the relation to add an overflow page.
00248      * First, check to see if we have to add a new bitmap page too.
00249      */
00250     if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
00251     {
00252         /*
00253          * We create the new bitmap page with all pages marked "in use".
00254          * Actually two pages in the new bitmap's range will exist
00255          * immediately: the bitmap page itself, and the following page which
00256          * is the one we return to the caller.  Both of these are correctly
00257          * marked "in use".  Subsequent pages do not exist yet, but it is
00258          * convenient to pre-mark them as "in use" too.
00259          */
00260         bit = metap->hashm_spares[splitnum];
00261         _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
00262         metap->hashm_spares[splitnum]++;
00263     }
00264     else
00265     {
00266         /*
00267          * Nothing to do here; since the page will be past the last used page,
00268          * we know its bitmap bit was preinitialized to "in use".
00269          */
00270     }
00271 
00272     /* Calculate address of the new overflow page */
00273     bit = metap->hashm_spares[splitnum];
00274     blkno = bitno_to_blkno(metap, bit);
00275 
00276     /*
00277      * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
00278      * relation length stays in sync with ours.  XXX It's annoying to do this
00279      * with metapage write lock held; would be better to use a lock that
00280      * doesn't block incoming searches.
00281      */
00282     newbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
00283 
00284     metap->hashm_spares[splitnum]++;
00285 
00286     /*
00287      * Adjust hashm_firstfree to avoid redundant searches.  But don't risk
00288      * changing it if someone moved it while we were searching bitmap pages.
00289      */
00290     if (metap->hashm_firstfree == orig_firstfree)
00291         metap->hashm_firstfree = bit + 1;
00292 
00293     /* Write updated metapage and release lock, but not pin */
00294     _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
00295 
00296     return newbuf;
00297 
00298 found:
00299     /* convert bit to bit number within page */
00300     bit += _hash_firstfreebit(freep[j]);
00301 
00302     /* mark page "in use" in the bitmap */
00303     SETBIT(freep, bit);
00304     _hash_wrtbuf(rel, mapbuf);
00305 
00306     /* Reacquire exclusive lock on the meta page */
00307     _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
00308 
00309     /* convert bit to absolute bit number */
00310     bit += (i << BMPG_SHIFT(metap));
00311 
00312     /* Calculate address of the recycled overflow page */
00313     blkno = bitno_to_blkno(metap, bit);
00314 
00315     /*
00316      * Adjust hashm_firstfree to avoid redundant searches.  But don't risk
00317      * changing it if someone moved it while we were searching bitmap pages.
00318      */
00319     if (metap->hashm_firstfree == orig_firstfree)
00320     {
00321         metap->hashm_firstfree = bit + 1;
00322 
00323         /* Write updated metapage and release lock, but not pin */
00324         _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
00325     }
00326     else
00327     {
00328         /* We didn't change the metapage, so no need to write */
00329         _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
00330     }
00331 
00332     /* Fetch, init, and return the recycled page */
00333     return _hash_getinitbuf(rel, blkno);
00334 }
00335 
00336 /*
00337  *  _hash_firstfreebit()
00338  *
00339  *  Return the number of the first bit that is not set in the word 'map'.
00340  */
00341 static uint32
00342 _hash_firstfreebit(uint32 map)
00343 {
00344     uint32      i,
00345                 mask;
00346 
00347     mask = 0x1;
00348     for (i = 0; i < BITS_PER_MAP; i++)
00349     {
00350         if (!(mask & map))
00351             return i;
00352         mask <<= 1;
00353     }
00354 
00355     elog(ERROR, "firstfreebit found no free bit");
00356 
00357     return 0;                   /* keep compiler quiet */
00358 }
00359 
00360 /*
00361  *  _hash_freeovflpage() -
00362  *
00363  *  Remove this overflow page from its bucket's chain, and mark the page as
00364  *  free.  On entry, ovflbuf is write-locked; it is released before exiting.
00365  *
00366  *  Since this function is invoked in VACUUM, we provide an access strategy
00367  *  parameter that controls fetches of the bucket pages.
00368  *
00369  *  Returns the block number of the page that followed the given page
00370  *  in the bucket, or InvalidBlockNumber if no following page.
00371  *
00372  *  NB: caller must not hold lock on metapage, nor on either page that's
00373  *  adjacent in the bucket chain.  The caller had better hold exclusive lock
00374  *  on the bucket, too.
00375  */
00376 BlockNumber
00377 _hash_freeovflpage(Relation rel, Buffer ovflbuf,
00378                    BufferAccessStrategy bstrategy)
00379 {
00380     HashMetaPage metap;
00381     Buffer      metabuf;
00382     Buffer      mapbuf;
00383     BlockNumber ovflblkno;
00384     BlockNumber prevblkno;
00385     BlockNumber blkno;
00386     BlockNumber nextblkno;
00387     HashPageOpaque ovflopaque;
00388     Page        ovflpage;
00389     Page        mappage;
00390     uint32     *freep;
00391     uint32      ovflbitno;
00392     int32       bitmappage,
00393                 bitmapbit;
00394     Bucket bucket PG_USED_FOR_ASSERTS_ONLY;
00395 
00396     /* Get information from the doomed page */
00397     _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
00398     ovflblkno = BufferGetBlockNumber(ovflbuf);
00399     ovflpage = BufferGetPage(ovflbuf);
00400     ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
00401     nextblkno = ovflopaque->hasho_nextblkno;
00402     prevblkno = ovflopaque->hasho_prevblkno;
00403     bucket = ovflopaque->hasho_bucket;
00404 
00405     /*
00406      * Zero the page for debugging's sake; then write and release it. (Note:
00407      * if we failed to zero the page here, we'd have problems with the Assert
00408      * in _hash_pageinit() when the page is reused.)
00409      */
00410     MemSet(ovflpage, 0, BufferGetPageSize(ovflbuf));
00411     _hash_wrtbuf(rel, ovflbuf);
00412 
00413     /*
00414      * Fix up the bucket chain.  this is a doubly-linked list, so we must fix
00415      * up the bucket chain members behind and ahead of the overflow page being
00416      * deleted.  No concurrency issues since we hold exclusive lock on the
00417      * entire bucket.
00418      */
00419     if (BlockNumberIsValid(prevblkno))
00420     {
00421         Buffer      prevbuf = _hash_getbuf_with_strategy(rel,
00422                                                          prevblkno,
00423                                                          HASH_WRITE,
00424                                            LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
00425                                                          bstrategy);
00426         Page        prevpage = BufferGetPage(prevbuf);
00427         HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
00428 
00429         Assert(prevopaque->hasho_bucket == bucket);
00430         prevopaque->hasho_nextblkno = nextblkno;
00431         _hash_wrtbuf(rel, prevbuf);
00432     }
00433     if (BlockNumberIsValid(nextblkno))
00434     {
00435         Buffer      nextbuf = _hash_getbuf_with_strategy(rel,
00436                                                          nextblkno,
00437                                                          HASH_WRITE,
00438                                                          LH_OVERFLOW_PAGE,
00439                                                          bstrategy);
00440         Page        nextpage = BufferGetPage(nextbuf);
00441         HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
00442 
00443         Assert(nextopaque->hasho_bucket == bucket);
00444         nextopaque->hasho_prevblkno = prevblkno;
00445         _hash_wrtbuf(rel, nextbuf);
00446     }
00447 
00448     /* Note: bstrategy is intentionally not used for metapage and bitmap */
00449 
00450     /* Read the metapage so we can determine which bitmap page to use */
00451     metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
00452     metap = HashPageGetMeta(BufferGetPage(metabuf));
00453 
00454     /* Identify which bit to set */
00455     ovflbitno = blkno_to_bitno(metap, ovflblkno);
00456 
00457     bitmappage = ovflbitno >> BMPG_SHIFT(metap);
00458     bitmapbit = ovflbitno & BMPG_MASK(metap);
00459 
00460     if (bitmappage >= metap->hashm_nmaps)
00461         elog(ERROR, "invalid overflow bit number %u", ovflbitno);
00462     blkno = metap->hashm_mapp[bitmappage];
00463 
00464     /* Release metapage lock while we access the bitmap page */
00465     _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
00466 
00467     /* Clear the bitmap bit to indicate that this overflow page is free */
00468     mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
00469     mappage = BufferGetPage(mapbuf);
00470     freep = HashPageGetBitmap(mappage);
00471     Assert(ISSET(freep, bitmapbit));
00472     CLRBIT(freep, bitmapbit);
00473     _hash_wrtbuf(rel, mapbuf);
00474 
00475     /* Get write-lock on metapage to update firstfree */
00476     _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
00477 
00478     /* if this is now the first free page, update hashm_firstfree */
00479     if (ovflbitno < metap->hashm_firstfree)
00480     {
00481         metap->hashm_firstfree = ovflbitno;
00482         _hash_wrtbuf(rel, metabuf);
00483     }
00484     else
00485     {
00486         /* no need to change metapage */
00487         _hash_relbuf(rel, metabuf);
00488     }
00489 
00490     return nextblkno;
00491 }
00492 
00493 
00494 /*
00495  *  _hash_initbitmap()
00496  *
00497  *   Initialize a new bitmap page.  The metapage has a write-lock upon
00498  *   entering the function, and must be written by caller after return.
00499  *
00500  * 'blkno' is the block number of the new bitmap page.
00501  *
00502  * All bits in the new bitmap page are set to "1", indicating "in use".
00503  */
00504 void
00505 _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
00506                  ForkNumber forkNum)
00507 {
00508     Buffer      buf;
00509     Page        pg;
00510     HashPageOpaque op;
00511     uint32     *freep;
00512 
00513     /*
00514      * It is okay to write-lock the new bitmap page while holding metapage
00515      * write lock, because no one else could be contending for the new page.
00516      * Also, the metapage lock makes it safe to extend the index using
00517      * _hash_getnewbuf.
00518      *
00519      * There is some loss of concurrency in possibly doing I/O for the new
00520      * page while holding the metapage lock, but this path is taken so seldom
00521      * that it's not worth worrying about.
00522      */
00523     buf = _hash_getnewbuf(rel, blkno, forkNum);
00524     pg = BufferGetPage(buf);
00525 
00526     /* initialize the page's special space */
00527     op = (HashPageOpaque) PageGetSpecialPointer(pg);
00528     op->hasho_prevblkno = InvalidBlockNumber;
00529     op->hasho_nextblkno = InvalidBlockNumber;
00530     op->hasho_bucket = -1;
00531     op->hasho_flag = LH_BITMAP_PAGE;
00532     op->hasho_page_id = HASHO_PAGE_ID;
00533 
00534     /* set all of the bits to 1 */
00535     freep = HashPageGetBitmap(pg);
00536     MemSet(freep, 0xFF, BMPGSZ_BYTE(metap));
00537 
00538     /* write out the new bitmap page (releasing write lock and pin) */
00539     _hash_wrtbuf(rel, buf);
00540 
00541     /* add the new bitmap page to the metapage's list of bitmaps */
00542     /* metapage already has a write lock */
00543     if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
00544         ereport(ERROR,
00545                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00546                  errmsg("out of overflow pages in hash index \"%s\"",
00547                         RelationGetRelationName(rel))));
00548 
00549     metap->hashm_mapp[metap->hashm_nmaps] = blkno;
00550 
00551     metap->hashm_nmaps++;
00552 }
00553 
00554 
00555 /*
00556  *  _hash_squeezebucket(rel, bucket)
00557  *
00558  *  Try to squeeze the tuples onto pages occurring earlier in the
00559  *  bucket chain in an attempt to free overflow pages. When we start
00560  *  the "squeezing", the page from which we start taking tuples (the
00561  *  "read" page) is the last bucket in the bucket chain and the page
00562  *  onto which we start squeezing tuples (the "write" page) is the
00563  *  first page in the bucket chain.  The read page works backward and
00564  *  the write page works forward; the procedure terminates when the
00565  *  read page and write page are the same page.
00566  *
00567  *  At completion of this procedure, it is guaranteed that all pages in
00568  *  the bucket are nonempty, unless the bucket is totally empty (in
00569  *  which case all overflow pages will be freed).  The original implementation
00570  *  required that to be true on entry as well, but it's a lot easier for
00571  *  callers to leave empty overflow pages and let this guy clean it up.
00572  *
00573  *  Caller must hold exclusive lock on the target bucket.  This allows
00574  *  us to safely lock multiple pages in the bucket.
00575  *
00576  *  Since this function is invoked in VACUUM, we provide an access strategy
00577  *  parameter that controls fetches of the bucket pages.
00578  */
00579 void
00580 _hash_squeezebucket(Relation rel,
00581                     Bucket bucket,
00582                     BlockNumber bucket_blkno,
00583                     BufferAccessStrategy bstrategy)
00584 {
00585     BlockNumber wblkno;
00586     BlockNumber rblkno;
00587     Buffer      wbuf;
00588     Buffer      rbuf;
00589     Page        wpage;
00590     Page        rpage;
00591     HashPageOpaque wopaque;
00592     HashPageOpaque ropaque;
00593     bool        wbuf_dirty;
00594 
00595     /*
00596      * start squeezing into the base bucket page.
00597      */
00598     wblkno = bucket_blkno;
00599     wbuf = _hash_getbuf_with_strategy(rel,
00600                                       wblkno,
00601                                       HASH_WRITE,
00602                                       LH_BUCKET_PAGE,
00603                                       bstrategy);
00604     wpage = BufferGetPage(wbuf);
00605     wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
00606 
00607     /*
00608      * if there aren't any overflow pages, there's nothing to squeeze.
00609      */
00610     if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
00611     {
00612         _hash_relbuf(rel, wbuf);
00613         return;
00614     }
00615 
00616     /*
00617      * Find the last page in the bucket chain by starting at the base bucket
00618      * page and working forward.  Note: we assume that a hash bucket chain is
00619      * usually smaller than the buffer ring being used by VACUUM, else using
00620      * the access strategy here would be counterproductive.
00621      */
00622     rbuf = InvalidBuffer;
00623     ropaque = wopaque;
00624     do
00625     {
00626         rblkno = ropaque->hasho_nextblkno;
00627         if (rbuf != InvalidBuffer)
00628             _hash_relbuf(rel, rbuf);
00629         rbuf = _hash_getbuf_with_strategy(rel,
00630                                           rblkno,
00631                                           HASH_WRITE,
00632                                           LH_OVERFLOW_PAGE,
00633                                           bstrategy);
00634         rpage = BufferGetPage(rbuf);
00635         ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
00636         Assert(ropaque->hasho_bucket == bucket);
00637     } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
00638 
00639     /*
00640      * squeeze the tuples.
00641      */
00642     wbuf_dirty = false;
00643     for (;;)
00644     {
00645         OffsetNumber roffnum;
00646         OffsetNumber maxroffnum;
00647         OffsetNumber deletable[MaxOffsetNumber];
00648         int         ndeletable = 0;
00649 
00650         /* Scan each tuple in "read" page */
00651         maxroffnum = PageGetMaxOffsetNumber(rpage);
00652         for (roffnum = FirstOffsetNumber;
00653              roffnum <= maxroffnum;
00654              roffnum = OffsetNumberNext(roffnum))
00655         {
00656             IndexTuple  itup;
00657             Size        itemsz;
00658 
00659             itup = (IndexTuple) PageGetItem(rpage,
00660                                             PageGetItemId(rpage, roffnum));
00661             itemsz = IndexTupleDSize(*itup);
00662             itemsz = MAXALIGN(itemsz);
00663 
00664             /*
00665              * Walk up the bucket chain, looking for a page big enough for
00666              * this item.  Exit if we reach the read page.
00667              */
00668             while (PageGetFreeSpace(wpage) < itemsz)
00669             {
00670                 Assert(!PageIsEmpty(wpage));
00671 
00672                 wblkno = wopaque->hasho_nextblkno;
00673                 Assert(BlockNumberIsValid(wblkno));
00674 
00675                 if (wbuf_dirty)
00676                     _hash_wrtbuf(rel, wbuf);
00677                 else
00678                     _hash_relbuf(rel, wbuf);
00679 
00680                 /* nothing more to do if we reached the read page */
00681                 if (rblkno == wblkno)
00682                 {
00683                     if (ndeletable > 0)
00684                     {
00685                         /* Delete tuples we already moved off read page */
00686                         PageIndexMultiDelete(rpage, deletable, ndeletable);
00687                         _hash_wrtbuf(rel, rbuf);
00688                     }
00689                     else
00690                         _hash_relbuf(rel, rbuf);
00691                     return;
00692                 }
00693 
00694                 wbuf = _hash_getbuf_with_strategy(rel,
00695                                                   wblkno,
00696                                                   HASH_WRITE,
00697                                                   LH_OVERFLOW_PAGE,
00698                                                   bstrategy);
00699                 wpage = BufferGetPage(wbuf);
00700                 wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
00701                 Assert(wopaque->hasho_bucket == bucket);
00702                 wbuf_dirty = false;
00703             }
00704 
00705             /*
00706              * we have found room so insert on the "write" page, being careful
00707              * to preserve hashkey ordering.  (If we insert many tuples into
00708              * the same "write" page it would be worth qsort'ing instead of
00709              * doing repeated _hash_pgaddtup.)
00710              */
00711             (void) _hash_pgaddtup(rel, wbuf, itemsz, itup);
00712             wbuf_dirty = true;
00713 
00714             /* remember tuple for deletion from "read" page */
00715             deletable[ndeletable++] = roffnum;
00716         }
00717 
00718         /*
00719          * If we reach here, there are no live tuples on the "read" page ---
00720          * it was empty when we got to it, or we moved them all.  So we can
00721          * just free the page without bothering with deleting tuples
00722          * individually.  Then advance to the previous "read" page.
00723          *
00724          * Tricky point here: if our read and write pages are adjacent in the
00725          * bucket chain, our write lock on wbuf will conflict with
00726          * _hash_freeovflpage's attempt to update the sibling links of the
00727          * removed page.  However, in that case we are done anyway, so we can
00728          * simply drop the write lock before calling _hash_freeovflpage.
00729          */
00730         rblkno = ropaque->hasho_prevblkno;
00731         Assert(BlockNumberIsValid(rblkno));
00732 
00733         /* are we freeing the page adjacent to wbuf? */
00734         if (rblkno == wblkno)
00735         {
00736             /* yes, so release wbuf lock first */
00737             if (wbuf_dirty)
00738                 _hash_wrtbuf(rel, wbuf);
00739             else
00740                 _hash_relbuf(rel, wbuf);
00741             /* free this overflow page (releases rbuf) */
00742             _hash_freeovflpage(rel, rbuf, bstrategy);
00743             /* done */
00744             return;
00745         }
00746 
00747         /* free this overflow page, then get the previous one */
00748         _hash_freeovflpage(rel, rbuf, bstrategy);
00749 
00750         rbuf = _hash_getbuf_with_strategy(rel,
00751                                           rblkno,
00752                                           HASH_WRITE,
00753                                           LH_OVERFLOW_PAGE,
00754                                           bstrategy);
00755         rpage = BufferGetPage(rbuf);
00756         ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
00757         Assert(ropaque->hasho_bucket == bucket);
00758     }
00759 
00760     /* NOTREACHED */
00761 }