Header And Logo

PostgreSQL
| The world's most advanced open source database.

Defines | Functions

hashpage.c File Reference

#include "postgres.h"
#include "access/hash.h"
#include "miscadmin.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
Include dependency graph for hashpage.c:

Go to the source code of this file.

Defines

#define USELOCKING(rel)   (!RELATION_IS_LOCAL(rel))

Functions

static bool _hash_alloc_buckets (Relation rel, BlockNumber firstblock, uint32 nblocks)
static void _hash_splitbucket (Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket, BlockNumber start_oblkno, BlockNumber start_nblkno, uint32 maxbucket, uint32 highmask, uint32 lowmask)
void _hash_getlock (Relation rel, BlockNumber whichlock, int access)
bool _hash_try_getlock (Relation rel, BlockNumber whichlock, int access)
void _hash_droplock (Relation rel, BlockNumber whichlock, int access)
Buffer _hash_getbuf (Relation rel, BlockNumber blkno, int access, int flags)
Buffer _hash_getinitbuf (Relation rel, BlockNumber blkno)
Buffer _hash_getnewbuf (Relation rel, BlockNumber blkno, ForkNumber forkNum)
Buffer _hash_getbuf_with_strategy (Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
void _hash_relbuf (Relation rel, Buffer buf)
void _hash_dropbuf (Relation rel, Buffer buf)
void _hash_wrtbuf (Relation rel, Buffer buf)
void _hash_chgbufaccess (Relation rel, Buffer buf, int from_access, int to_access)
uint32 _hash_metapinit (Relation rel, double num_tuples, ForkNumber forkNum)
void _hash_pageinit (Page page, Size size)
void _hash_expandtable (Relation rel, Buffer metabuf)

Define Documentation

#define USELOCKING (   rel  )     (!RELATION_IS_LOCAL(rel))

Definition at line 54 of file hashpage.c.

Referenced by _hash_droplock(), _hash_getlock(), and _hash_try_getlock().


Function Documentation

static bool _hash_alloc_buckets ( Relation  rel,
BlockNumber  firstblock,
uint32  nblocks 
) [static]

Definition at line 698 of file hashpage.c.

References InvalidBlockNumber, MAIN_FORKNUM, MemSet, RelationData::rd_smgr, RelationOpenSmgr, and smgrextend().

Referenced by _hash_expandtable().

{
    BlockNumber lastblock;
    char        zerobuf[BLCKSZ];

    lastblock = firstblock + nblocks - 1;

    /*
     * Check for overflow in block number calculation; if so, we cannot extend
     * the index anymore.
     */
    if (lastblock < firstblock || lastblock == InvalidBlockNumber)
        return false;

    MemSet(zerobuf, 0, sizeof(zerobuf));

    RelationOpenSmgr(rel);
    smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false);

    return true;
}

void _hash_chgbufaccess ( Relation  rel,
Buffer  buf,
int  from_access,
int  to_access 
)
void _hash_dropbuf ( Relation  rel,
Buffer  buf 
)

Definition at line 260 of file hashpage.c.

References ReleaseBuffer().

Referenced by _hash_doinsert(), _hash_first(), hashendscan(), and hashrescan().

void _hash_droplock ( Relation  rel,
BlockNumber  whichlock,
int  access 
)

Definition at line 91 of file hashpage.c.

References UnlockPage(), and USELOCKING.

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_first(), hashbulkdelete(), hashendscan(), hashrescan(), and pgstat_hash_page().

{
    if (USELOCKING(rel))
        UnlockPage(rel, whichlock, access);
}

void _hash_expandtable ( Relation  rel,
Buffer  metabuf 
)

Definition at line 497 of file hashpage.c.

References _hash_alloc_buckets(), _hash_checkpage(), _hash_chgbufaccess(), _hash_droplock(), _hash_has_active_scan(), _hash_log2(), _hash_splitbucket(), _hash_try_getlock(), Assert, BUCKET_TO_BLKNO, BufferGetPage, elog, END_CRIT_SECTION, ERROR, HASH_EXCLUSIVE, HASH_NOLOCK, HASH_READ, HASH_WRITE, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, HashPageGetMeta, LH_META_PAGE, and START_CRIT_SECTION.

Referenced by _hash_doinsert().

{
    HashMetaPage metap;
    Bucket      old_bucket;
    Bucket      new_bucket;
    uint32      spare_ndx;
    BlockNumber start_oblkno;
    BlockNumber start_nblkno;
    uint32      maxbucket;
    uint32      highmask;
    uint32      lowmask;

    /*
     * Write-lock the meta page.  It used to be necessary to acquire a
     * heavyweight lock to begin a split, but that is no longer required.
     */
    _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);

    _hash_checkpage(rel, metabuf, LH_META_PAGE);
    metap = HashPageGetMeta(BufferGetPage(metabuf));

    /*
     * Check to see if split is still needed; someone else might have already
     * done one while we waited for the lock.
     *
     * Make sure this stays in sync with _hash_doinsert()
     */
    if (metap->hashm_ntuples <=
        (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1))
        goto fail;

    /*
     * Can't split anymore if maxbucket has reached its maximum possible
     * value.
     *
     * Ideally we'd allow bucket numbers up to UINT_MAX-1 (no higher because
     * the calculation maxbucket+1 mustn't overflow).  Currently we restrict
     * to half that because of overflow looping in _hash_log2() and
     * insufficient space in hashm_spares[].  It's moot anyway because an
     * index with 2^32 buckets would certainly overflow BlockNumber and hence
     * _hash_alloc_buckets() would fail, but if we supported buckets smaller
     * than a disk block then this would be an independent constraint.
     *
     * If you change this, see also the maximum initial number of buckets in
     * _hash_metapinit().
     */
    if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
        goto fail;

    /*
     * Determine which bucket is to be split, and attempt to lock the old
     * bucket.  If we can't get the lock, give up.
     *
     * The lock protects us against other backends, but not against our own
     * backend.  Must check for active scans separately.
     */
    new_bucket = metap->hashm_maxbucket + 1;

    old_bucket = (new_bucket & metap->hashm_lowmask);

    start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);

    if (_hash_has_active_scan(rel, old_bucket))
        goto fail;

    if (!_hash_try_getlock(rel, start_oblkno, HASH_EXCLUSIVE))
        goto fail;

    /*
     * Likewise lock the new bucket (should never fail).
     *
     * Note: it is safe to compute the new bucket's blkno here, even though we
     * may still need to update the BUCKET_TO_BLKNO mapping.  This is because
     * the current value of hashm_spares[hashm_ovflpoint] correctly shows
     * where we are going to put a new splitpoint's worth of buckets.
     */
    start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket);

    if (_hash_has_active_scan(rel, new_bucket))
        elog(ERROR, "scan in progress on supposedly new bucket");

    if (!_hash_try_getlock(rel, start_nblkno, HASH_EXCLUSIVE))
        elog(ERROR, "could not get lock on supposedly new bucket");

    /*
     * If the split point is increasing (hashm_maxbucket's log base 2
     * increases), we need to allocate a new batch of bucket pages.
     */
    spare_ndx = _hash_log2(new_bucket + 1);
    if (spare_ndx > metap->hashm_ovflpoint)
    {
        Assert(spare_ndx == metap->hashm_ovflpoint + 1);

        /*
         * The number of buckets in the new splitpoint is equal to the total
         * number already in existence, i.e. new_bucket.  Currently this maps
         * one-to-one to blocks required, but someday we may need a more
         * complicated calculation here.
         */
        if (!_hash_alloc_buckets(rel, start_nblkno, new_bucket))
        {
            /* can't split due to BlockNumber overflow */
            _hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
            _hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
            goto fail;
        }
    }

    /*
     * Okay to proceed with split.  Update the metapage bucket mapping info.
     *
     * Since we are scribbling on the metapage data right in the shared
     * buffer, any failure in this next little bit leaves us with a big
     * problem: the metapage is effectively corrupt but could get written back
     * to disk.  We don't really expect any failure, but just to be sure,
     * establish a critical section.
     */
    START_CRIT_SECTION();

    metap->hashm_maxbucket = new_bucket;

    if (new_bucket > metap->hashm_highmask)
    {
        /* Starting a new doubling */
        metap->hashm_lowmask = metap->hashm_highmask;
        metap->hashm_highmask = new_bucket | metap->hashm_lowmask;
    }

    /*
     * If the split point is increasing (hashm_maxbucket's log base 2
     * increases), we need to adjust the hashm_spares[] array and
     * hashm_ovflpoint so that future overflow pages will be created beyond
     * this new batch of bucket pages.
     */
    if (spare_ndx > metap->hashm_ovflpoint)
    {
        metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];
        metap->hashm_ovflpoint = spare_ndx;
    }

    /* Done mucking with metapage */
    END_CRIT_SECTION();

    /*
     * Copy bucket mapping info now; this saves re-accessing the meta page
     * inside _hash_splitbucket's inner loop.  Note that once we drop the
     * split lock, other splits could begin, so these values might be out of
     * date before _hash_splitbucket finishes.  That's okay, since all it
     * needs is to tell which of these two buckets to map hashkeys into.
     */
    maxbucket = metap->hashm_maxbucket;
    highmask = metap->hashm_highmask;
    lowmask = metap->hashm_lowmask;

    /* Write out the metapage and drop lock, but keep pin */
    _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);

    /* Relocate records to the new bucket */
    _hash_splitbucket(rel, metabuf, old_bucket, new_bucket,
                      start_oblkno, start_nblkno,
                      maxbucket, highmask, lowmask);

    /* Release bucket locks, allowing others to access them */
    _hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
    _hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);

    return;

    /* Here if decide not to split or fail to acquire old bucket lock */
fail:

    /* We didn't write the metapage, so just drop lock */
    _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
}

Buffer _hash_getbuf ( Relation  rel,
BlockNumber  blkno,
int  access,
int  flags 
)

Definition at line 115 of file hashpage.c.

References _hash_checkpage(), buf, elog, ERROR, HASH_NOLOCK, LockBuffer(), P_NEW, and ReadBuffer().

Referenced by _hash_addovflpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getovflpage(), _hash_readnext(), _hash_readprev(), _hash_splitbucket(), and hashbulkdelete().

{
    Buffer      buf;

    if (blkno == P_NEW)
        elog(ERROR, "hash AM does not use P_NEW");

    buf = ReadBuffer(rel, blkno);

    if (access != HASH_NOLOCK)
        LockBuffer(buf, access);

    /* ref count and lock type are correct */

    _hash_checkpage(rel, buf, flags);

    return buf;
}

Buffer _hash_getbuf_with_strategy ( Relation  rel,
BlockNumber  blkno,
int  access,
int  flags,
BufferAccessStrategy  bstrategy 
)

Definition at line 222 of file hashpage.c.

References _hash_checkpage(), buf, elog, ERROR, HASH_NOLOCK, LockBuffer(), MAIN_FORKNUM, P_NEW, RBM_NORMAL, and ReadBufferExtended().

Referenced by _hash_freeovflpage(), _hash_squeezebucket(), hashbulkdelete(), and pgstat_hash_page().

{
    Buffer      buf;

    if (blkno == P_NEW)
        elog(ERROR, "hash AM does not use P_NEW");

    buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);

    if (access != HASH_NOLOCK)
        LockBuffer(buf, access);

    /* ref count and lock type are correct */

    _hash_checkpage(rel, buf, flags);

    return buf;
}

Buffer _hash_getinitbuf ( Relation  rel,
BlockNumber  blkno 
)

Definition at line 151 of file hashpage.c.

References _hash_pageinit(), buf, BufferGetPage, BufferGetPageSize, elog, ERROR, HASH_WRITE, LockBuffer(), MAIN_FORKNUM, NULL, P_NEW, RBM_ZERO, and ReadBufferExtended().

Referenced by _hash_getovflpage().

{
    Buffer      buf;

    if (blkno == P_NEW)
        elog(ERROR, "hash AM does not use P_NEW");

    buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO, NULL);

    LockBuffer(buf, HASH_WRITE);

    /* ref count and lock type are correct */

    /* initialize the page */
    _hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf));

    return buf;
}

void _hash_getlock ( Relation  rel,
BlockNumber  whichlock,
int  access 
)

Definition at line 67 of file hashpage.c.

References LockPage(), and USELOCKING.

Referenced by _hash_doinsert(), _hash_first(), hashbulkdelete(), and pgstat_hash_page().

{
    if (USELOCKING(rel))
        LockPage(rel, whichlock, access);
}

Buffer _hash_getnewbuf ( Relation  rel,
BlockNumber  blkno,
ForkNumber  forkNum 
)

Definition at line 183 of file hashpage.c.

References _hash_pageinit(), buf, BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, elog, ERROR, HASH_WRITE, LockBuffer(), NULL, P_NEW, RBM_NORMAL, RBM_ZERO, ReadBufferExtended(), RelationGetNumberOfBlocksInFork(), and RelationGetRelationName.

Referenced by _hash_getovflpage(), _hash_initbitmap(), _hash_metapinit(), and _hash_splitbucket().

{
    BlockNumber nblocks = RelationGetNumberOfBlocksInFork(rel, forkNum);
    Buffer      buf;

    if (blkno == P_NEW)
        elog(ERROR, "hash AM does not use P_NEW");
    if (blkno > nblocks)
        elog(ERROR, "access to noncontiguous page in hash index \"%s\"",
             RelationGetRelationName(rel));

    /* smgr insists we use P_NEW to extend the relation */
    if (blkno == nblocks)
    {
        buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL);
        if (BufferGetBlockNumber(buf) != blkno)
            elog(ERROR, "unexpected hash relation size: %u, should be %u",
                 BufferGetBlockNumber(buf), blkno);
    }
    else
        buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO, NULL);

    LockBuffer(buf, HASH_WRITE);

    /* ref count and lock type are correct */

    /* initialize the page */
    _hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf));

    return buf;
}

uint32 _hash_metapinit ( Relation  rel,
double  num_tuples,
ForkNumber  forkNum 
)

Definition at line 324 of file hashpage.c.

References _hash_chgbufaccess(), _hash_getnewbuf(), _hash_initbitmap(), _hash_log2(), _hash_wrtbuf(), Assert, BMPG_MASK, BMPG_SHIFT, BUCKET_TO_BLKNO, buf, BufferGetPage, CHECK_FOR_INTERRUPTS, elog, ERROR, HASH_DEFAULT_FILLFACTOR, HASH_MAX_SPLITPOINTS, HASH_METAPAGE, HASH_NOLOCK, HASH_WRITE, HashGetMaxBitmapSize, HashMetaPageData::hashm_bmshift, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_bsize, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_magic, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_ntuples, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_procid, HashMetaPageData::hashm_spares, HashMetaPageData::hashm_version, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HashPageOpaqueData::hasho_prevblkno, HashPageGetMeta, HASHPROC, i, index_getprocid(), MAXALIGN, MemSet, PageGetSpecialPointer, RelationGetNumberOfBlocksInFork(), RelationGetRelationName, and RelationGetTargetPageUsage.

Referenced by hashbuild(), and hashbuildempty().

{
    HashMetaPage metap;
    HashPageOpaque pageopaque;
    Buffer      metabuf;
    Buffer      buf;
    Page        pg;
    int32       data_width;
    int32       item_width;
    int32       ffactor;
    double      dnumbuckets;
    uint32      num_buckets;
    uint32      log2_num_buckets;
    uint32      i;

    /* safety check */
    if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
        elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
             RelationGetRelationName(rel));

    /*
     * Determine the target fill factor (in tuples per bucket) for this index.
     * The idea is to make the fill factor correspond to pages about as full
     * as the user-settable fillfactor parameter says.  We can compute it
     * exactly since the index datatype (i.e. uint32 hash key) is fixed-width.
     */
    data_width = sizeof(uint32);
    item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
        sizeof(ItemIdData);     /* include the line pointer */
    ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width;
    /* keep to a sane range */
    if (ffactor < 10)
        ffactor = 10;

    /*
     * Choose the number of initial bucket pages to match the fill factor
     * given the estimated number of tuples.  We round up the result to the
     * next power of 2, however, and always force at least 2 bucket pages. The
     * upper limit is determined by considerations explained in
     * _hash_expandtable().
     */
    dnumbuckets = num_tuples / ffactor;
    if (dnumbuckets <= 2.0)
        num_buckets = 2;
    else if (dnumbuckets >= (double) 0x40000000)
        num_buckets = 0x40000000;
    else
        num_buckets = ((uint32) 1) << _hash_log2((uint32) dnumbuckets);

    log2_num_buckets = _hash_log2(num_buckets);
    Assert(num_buckets == (((uint32) 1) << log2_num_buckets));
    Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS);

    /*
     * We initialize the metapage, the first N bucket pages, and the first
     * bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
     * calls to occur.  This ensures that the smgr level has the right idea of
     * the physical index length.
     */
    metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
    pg = BufferGetPage(metabuf);

    pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
    pageopaque->hasho_prevblkno = InvalidBlockNumber;
    pageopaque->hasho_nextblkno = InvalidBlockNumber;
    pageopaque->hasho_bucket = -1;
    pageopaque->hasho_flag = LH_META_PAGE;
    pageopaque->hasho_page_id = HASHO_PAGE_ID;

    metap = HashPageGetMeta(pg);

    metap->hashm_magic = HASH_MAGIC;
    metap->hashm_version = HASH_VERSION;
    metap->hashm_ntuples = 0;
    metap->hashm_nmaps = 0;
    metap->hashm_ffactor = ffactor;
    metap->hashm_bsize = HashGetMaxBitmapSize(pg);
    /* find largest bitmap array size that will fit in page size */
    for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
    {
        if ((1 << i) <= metap->hashm_bsize)
            break;
    }
    Assert(i > 0);
    metap->hashm_bmsize = 1 << i;
    metap->hashm_bmshift = i + BYTE_TO_BIT;
    Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));

    /*
     * Label the index with its primary hash support function's OID.  This is
     * pretty useless for normal operation (in fact, hashm_procid is not used
     * anywhere), but it might be handy for forensic purposes so we keep it.
     */
    metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);

    /*
     * We initialize the index with N buckets, 0 .. N-1, occupying physical
     * blocks 1 to N.  The first freespace bitmap page is in block N+1. Since
     * N is a power of 2, we can set the masks this way:
     */
    metap->hashm_maxbucket = metap->hashm_lowmask = num_buckets - 1;
    metap->hashm_highmask = (num_buckets << 1) - 1;

    MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
    MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));

    /* Set up mapping for one spare page after the initial splitpoints */
    metap->hashm_spares[log2_num_buckets] = 1;
    metap->hashm_ovflpoint = log2_num_buckets;
    metap->hashm_firstfree = 0;

    /*
     * Release buffer lock on the metapage while we initialize buckets.
     * Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
     * won't accomplish anything.  It's a bad idea to hold buffer locks for
     * long intervals in any case, since that can block the bgwriter.
     */
    _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);

    /*
     * Initialize the first N buckets
     */
    for (i = 0; i < num_buckets; i++)
    {
        /* Allow interrupts, in case N is huge */
        CHECK_FOR_INTERRUPTS();

        buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum);
        pg = BufferGetPage(buf);
        pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
        pageopaque->hasho_prevblkno = InvalidBlockNumber;
        pageopaque->hasho_nextblkno = InvalidBlockNumber;
        pageopaque->hasho_bucket = i;
        pageopaque->hasho_flag = LH_BUCKET_PAGE;
        pageopaque->hasho_page_id = HASHO_PAGE_ID;
        _hash_wrtbuf(rel, buf);
    }

    /* Now reacquire buffer lock on metapage */
    _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);

    /*
     * Initialize first bitmap page
     */
    _hash_initbitmap(rel, metap, num_buckets + 1, forkNum);

    /* all done */
    _hash_wrtbuf(rel, metabuf);

    return num_buckets;
}

void _hash_pageinit ( Page  page,
Size  size 
)

Definition at line 480 of file hashpage.c.

References Assert, PageInit(), and PageIsNew.

Referenced by _hash_getinitbuf(), and _hash_getnewbuf().

{
    Assert(PageIsNew(page));
    PageInit(page, size, sizeof(HashPageOpaqueData));
}

void _hash_relbuf ( Relation  rel,
Buffer  buf 
)
static void _hash_splitbucket ( Relation  rel,
Buffer  metabuf,
Bucket  obucket,
Bucket  nbucket,
BlockNumber  start_oblkno,
BlockNumber  start_nblkno,
uint32  maxbucket,
uint32  highmask,
uint32  lowmask 
) [static]

Definition at line 737 of file hashpage.c.

References _hash_addovflpage(), _hash_chgbufaccess(), _hash_get_indextuple_hashkey(), _hash_getbuf(), _hash_getnewbuf(), _hash_hashkey2bucket(), _hash_pgaddtup(), _hash_relbuf(), _hash_squeezebucket(), _hash_wrtbuf(), Assert, BlockNumberIsValid, BufferGetPage, FirstOffsetNumber, HASH_NOLOCK, HASH_WRITE, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HashPageOpaqueData::hasho_prevblkno, IndexTupleDSize, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, MAIN_FORKNUM, MAXALIGN, NULL, OffsetNumberNext, PageGetFreeSpace(), PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, and PageIndexMultiDelete().

Referenced by _hash_expandtable().

{
    BlockNumber oblkno;
    BlockNumber nblkno;
    Buffer      obuf;
    Buffer      nbuf;
    Page        opage;
    Page        npage;
    HashPageOpaque oopaque;
    HashPageOpaque nopaque;

    /*
     * It should be okay to simultaneously write-lock pages from each bucket,
     * since no one else can be trying to acquire buffer lock on pages of
     * either bucket.
     */
    oblkno = start_oblkno;
    obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_BUCKET_PAGE);
    opage = BufferGetPage(obuf);
    oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);

    nblkno = start_nblkno;
    nbuf = _hash_getnewbuf(rel, nblkno, MAIN_FORKNUM);
    npage = BufferGetPage(nbuf);

    /* initialize the new bucket's primary page */
    nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
    nopaque->hasho_prevblkno = InvalidBlockNumber;
    nopaque->hasho_nextblkno = InvalidBlockNumber;
    nopaque->hasho_bucket = nbucket;
    nopaque->hasho_flag = LH_BUCKET_PAGE;
    nopaque->hasho_page_id = HASHO_PAGE_ID;

    /*
     * Partition the tuples in the old bucket between the old bucket and the
     * new bucket, advancing along the old bucket's overflow bucket chain and
     * adding overflow pages to the new bucket as needed.  Outer loop iterates
     * once per page in old bucket.
     */
    for (;;)
    {
        OffsetNumber ooffnum;
        OffsetNumber omaxoffnum;
        OffsetNumber deletable[MaxOffsetNumber];
        int         ndeletable = 0;

        /* Scan each tuple in old page */
        omaxoffnum = PageGetMaxOffsetNumber(opage);
        for (ooffnum = FirstOffsetNumber;
             ooffnum <= omaxoffnum;
             ooffnum = OffsetNumberNext(ooffnum))
        {
            IndexTuple  itup;
            Size        itemsz;
            Bucket      bucket;

            /*
             * Fetch the item's hash key (conveniently stored in the item) and
             * determine which bucket it now belongs in.
             */
            itup = (IndexTuple) PageGetItem(opage,
                                            PageGetItemId(opage, ooffnum));
            bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
                                          maxbucket, highmask, lowmask);

            if (bucket == nbucket)
            {
                /*
                 * insert the tuple into the new bucket.  if it doesn't fit on
                 * the current page in the new bucket, we must allocate a new
                 * overflow page and place the tuple on that page instead.
                 */
                itemsz = IndexTupleDSize(*itup);
                itemsz = MAXALIGN(itemsz);

                if (PageGetFreeSpace(npage) < itemsz)
                {
                    /* write out nbuf and drop lock, but keep pin */
                    _hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK);
                    /* chain to a new overflow page */
                    nbuf = _hash_addovflpage(rel, metabuf, nbuf);
                    npage = BufferGetPage(nbuf);
                    /* we don't need nblkno or nopaque within the loop */
                }

                /*
                 * Insert tuple on new page, using _hash_pgaddtup to ensure
                 * correct ordering by hashkey.  This is a tad inefficient
                 * since we may have to shuffle itempointers repeatedly.
                 * Possible future improvement: accumulate all the items for
                 * the new page and qsort them before insertion.
                 */
                (void) _hash_pgaddtup(rel, nbuf, itemsz, itup);

                /*
                 * Mark tuple for deletion from old page.
                 */
                deletable[ndeletable++] = ooffnum;
            }
            else
            {
                /*
                 * the tuple stays on this page, so nothing to do.
                 */
                Assert(bucket == obucket);
            }
        }

        oblkno = oopaque->hasho_nextblkno;

        /*
         * Done scanning this old page.  If we moved any tuples, delete them
         * from the old page.
         */
        if (ndeletable > 0)
        {
            PageIndexMultiDelete(opage, deletable, ndeletable);
            _hash_wrtbuf(rel, obuf);
        }
        else
            _hash_relbuf(rel, obuf);

        /* Exit loop if no more overflow pages in old bucket */
        if (!BlockNumberIsValid(oblkno))
            break;

        /* Else, advance to next old page */
        obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
        opage = BufferGetPage(obuf);
        oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
    }

    /*
     * We're at the end of the old bucket chain, so we're done partitioning
     * the tuples.  Before quitting, call _hash_squeezebucket to ensure the
     * tuples remaining in the old bucket (including the overflow pages) are
     * packed as tightly as possible.  The new bucket is already tight.
     */
    _hash_wrtbuf(rel, nbuf);

    _hash_squeezebucket(rel, obucket, start_oblkno, NULL);
}

bool _hash_try_getlock ( Relation  rel,
BlockNumber  whichlock,
int  access 
)

Definition at line 79 of file hashpage.c.

References ConditionalLockPage(), and USELOCKING.

Referenced by _hash_expandtable().

{
    if (USELOCKING(rel))
        return ConditionalLockPage(rel, whichlock, access);
    else
        return true;
}

void _hash_wrtbuf ( Relation  rel,
Buffer  buf 
)