#include "postgres.h"#include "access/hash.h"#include "miscadmin.h"#include "storage/lmgr.h"#include "storage/smgr.h"
Go to the source code of this file.
| #define USELOCKING | ( | rel | ) | (!RELATION_IS_LOCAL(rel)) |
Definition at line 54 of file hashpage.c.
Referenced by _hash_droplock(), _hash_getlock(), and _hash_try_getlock().
| static bool _hash_alloc_buckets | ( | Relation | rel, | |
| BlockNumber | firstblock, | |||
| uint32 | nblocks | |||
| ) | [static] |
Definition at line 698 of file hashpage.c.
References InvalidBlockNumber, MAIN_FORKNUM, MemSet, RelationData::rd_smgr, RelationOpenSmgr, and smgrextend().
Referenced by _hash_expandtable().
{
BlockNumber lastblock;
char zerobuf[BLCKSZ];
lastblock = firstblock + nblocks - 1;
/*
* Check for overflow in block number calculation; if so, we cannot extend
* the index anymore.
*/
if (lastblock < firstblock || lastblock == InvalidBlockNumber)
return false;
MemSet(zerobuf, 0, sizeof(zerobuf));
RelationOpenSmgr(rel);
smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false);
return true;
}
Definition at line 297 of file hashpage.c.
References BUFFER_LOCK_UNLOCK, HASH_NOLOCK, HASH_WRITE, LockBuffer(), and MarkBufferDirty().
Referenced by _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_first(), _hash_freeovflpage(), _hash_getovflpage(), _hash_metapinit(), _hash_splitbucket(), and hashgettuple().
{
if (from_access == HASH_WRITE)
MarkBufferDirty(buf);
if (from_access != HASH_NOLOCK)
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
if (to_access != HASH_NOLOCK)
LockBuffer(buf, to_access);
}
Definition at line 260 of file hashpage.c.
References ReleaseBuffer().
Referenced by _hash_doinsert(), _hash_first(), hashendscan(), and hashrescan().
{
ReleaseBuffer(buf);
}
| void _hash_droplock | ( | Relation | rel, | |
| BlockNumber | whichlock, | |||
| int | access | |||
| ) |
Definition at line 91 of file hashpage.c.
References UnlockPage(), and USELOCKING.
Referenced by _hash_doinsert(), _hash_expandtable(), _hash_first(), hashbulkdelete(), hashendscan(), hashrescan(), and pgstat_hash_page().
{
if (USELOCKING(rel))
UnlockPage(rel, whichlock, access);
}
Definition at line 497 of file hashpage.c.
References _hash_alloc_buckets(), _hash_checkpage(), _hash_chgbufaccess(), _hash_droplock(), _hash_has_active_scan(), _hash_log2(), _hash_splitbucket(), _hash_try_getlock(), Assert, BUCKET_TO_BLKNO, BufferGetPage, elog, END_CRIT_SECTION, ERROR, HASH_EXCLUSIVE, HASH_NOLOCK, HASH_READ, HASH_WRITE, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_ntuples, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_spares, HashPageGetMeta, LH_META_PAGE, and START_CRIT_SECTION.
Referenced by _hash_doinsert().
{
HashMetaPage metap;
Bucket old_bucket;
Bucket new_bucket;
uint32 spare_ndx;
BlockNumber start_oblkno;
BlockNumber start_nblkno;
uint32 maxbucket;
uint32 highmask;
uint32 lowmask;
/*
* Write-lock the meta page. It used to be necessary to acquire a
* heavyweight lock to begin a split, but that is no longer required.
*/
_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
_hash_checkpage(rel, metabuf, LH_META_PAGE);
metap = HashPageGetMeta(BufferGetPage(metabuf));
/*
* Check to see if split is still needed; someone else might have already
* done one while we waited for the lock.
*
* Make sure this stays in sync with _hash_doinsert()
*/
if (metap->hashm_ntuples <=
(double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1))
goto fail;
/*
* Can't split anymore if maxbucket has reached its maximum possible
* value.
*
* Ideally we'd allow bucket numbers up to UINT_MAX-1 (no higher because
* the calculation maxbucket+1 mustn't overflow). Currently we restrict
* to half that because of overflow looping in _hash_log2() and
* insufficient space in hashm_spares[]. It's moot anyway because an
* index with 2^32 buckets would certainly overflow BlockNumber and hence
* _hash_alloc_buckets() would fail, but if we supported buckets smaller
* than a disk block then this would be an independent constraint.
*
* If you change this, see also the maximum initial number of buckets in
* _hash_metapinit().
*/
if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
goto fail;
/*
* Determine which bucket is to be split, and attempt to lock the old
* bucket. If we can't get the lock, give up.
*
* The lock protects us against other backends, but not against our own
* backend. Must check for active scans separately.
*/
new_bucket = metap->hashm_maxbucket + 1;
old_bucket = (new_bucket & metap->hashm_lowmask);
start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);
if (_hash_has_active_scan(rel, old_bucket))
goto fail;
if (!_hash_try_getlock(rel, start_oblkno, HASH_EXCLUSIVE))
goto fail;
/*
* Likewise lock the new bucket (should never fail).
*
* Note: it is safe to compute the new bucket's blkno here, even though we
* may still need to update the BUCKET_TO_BLKNO mapping. This is because
* the current value of hashm_spares[hashm_ovflpoint] correctly shows
* where we are going to put a new splitpoint's worth of buckets.
*/
start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket);
if (_hash_has_active_scan(rel, new_bucket))
elog(ERROR, "scan in progress on supposedly new bucket");
if (!_hash_try_getlock(rel, start_nblkno, HASH_EXCLUSIVE))
elog(ERROR, "could not get lock on supposedly new bucket");
/*
* If the split point is increasing (hashm_maxbucket's log base 2
* increases), we need to allocate a new batch of bucket pages.
*/
spare_ndx = _hash_log2(new_bucket + 1);
if (spare_ndx > metap->hashm_ovflpoint)
{
Assert(spare_ndx == metap->hashm_ovflpoint + 1);
/*
* The number of buckets in the new splitpoint is equal to the total
* number already in existence, i.e. new_bucket. Currently this maps
* one-to-one to blocks required, but someday we may need a more
* complicated calculation here.
*/
if (!_hash_alloc_buckets(rel, start_nblkno, new_bucket))
{
/* can't split due to BlockNumber overflow */
_hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
_hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
goto fail;
}
}
/*
* Okay to proceed with split. Update the metapage bucket mapping info.
*
* Since we are scribbling on the metapage data right in the shared
* buffer, any failure in this next little bit leaves us with a big
* problem: the metapage is effectively corrupt but could get written back
* to disk. We don't really expect any failure, but just to be sure,
* establish a critical section.
*/
START_CRIT_SECTION();
metap->hashm_maxbucket = new_bucket;
if (new_bucket > metap->hashm_highmask)
{
/* Starting a new doubling */
metap->hashm_lowmask = metap->hashm_highmask;
metap->hashm_highmask = new_bucket | metap->hashm_lowmask;
}
/*
* If the split point is increasing (hashm_maxbucket's log base 2
* increases), we need to adjust the hashm_spares[] array and
* hashm_ovflpoint so that future overflow pages will be created beyond
* this new batch of bucket pages.
*/
if (spare_ndx > metap->hashm_ovflpoint)
{
metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];
metap->hashm_ovflpoint = spare_ndx;
}
/* Done mucking with metapage */
END_CRIT_SECTION();
/*
* Copy bucket mapping info now; this saves re-accessing the meta page
* inside _hash_splitbucket's inner loop. Note that once we drop the
* split lock, other splits could begin, so these values might be out of
* date before _hash_splitbucket finishes. That's okay, since all it
* needs is to tell which of these two buckets to map hashkeys into.
*/
maxbucket = metap->hashm_maxbucket;
highmask = metap->hashm_highmask;
lowmask = metap->hashm_lowmask;
/* Write out the metapage and drop lock, but keep pin */
_hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
/* Relocate records to the new bucket */
_hash_splitbucket(rel, metabuf, old_bucket, new_bucket,
start_oblkno, start_nblkno,
maxbucket, highmask, lowmask);
/* Release bucket locks, allowing others to access them */
_hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
_hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
return;
/* Here if decide not to split or fail to acquire old bucket lock */
fail:
/* We didn't write the metapage, so just drop lock */
_hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
}
| Buffer _hash_getbuf | ( | Relation | rel, | |
| BlockNumber | blkno, | |||
| int | access, | |||
| int | flags | |||
| ) |
Definition at line 115 of file hashpage.c.
References _hash_checkpage(), buf, elog, ERROR, HASH_NOLOCK, LockBuffer(), P_NEW, and ReadBuffer().
Referenced by _hash_addovflpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getovflpage(), _hash_readnext(), _hash_readprev(), _hash_splitbucket(), and hashbulkdelete().
{
Buffer buf;
if (blkno == P_NEW)
elog(ERROR, "hash AM does not use P_NEW");
buf = ReadBuffer(rel, blkno);
if (access != HASH_NOLOCK)
LockBuffer(buf, access);
/* ref count and lock type are correct */
_hash_checkpage(rel, buf, flags);
return buf;
}
| Buffer _hash_getbuf_with_strategy | ( | Relation | rel, | |
| BlockNumber | blkno, | |||
| int | access, | |||
| int | flags, | |||
| BufferAccessStrategy | bstrategy | |||
| ) |
Definition at line 222 of file hashpage.c.
References _hash_checkpage(), buf, elog, ERROR, HASH_NOLOCK, LockBuffer(), MAIN_FORKNUM, P_NEW, RBM_NORMAL, and ReadBufferExtended().
Referenced by _hash_freeovflpage(), _hash_squeezebucket(), hashbulkdelete(), and pgstat_hash_page().
{
Buffer buf;
if (blkno == P_NEW)
elog(ERROR, "hash AM does not use P_NEW");
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
if (access != HASH_NOLOCK)
LockBuffer(buf, access);
/* ref count and lock type are correct */
_hash_checkpage(rel, buf, flags);
return buf;
}
| Buffer _hash_getinitbuf | ( | Relation | rel, | |
| BlockNumber | blkno | |||
| ) |
Definition at line 151 of file hashpage.c.
References _hash_pageinit(), buf, BufferGetPage, BufferGetPageSize, elog, ERROR, HASH_WRITE, LockBuffer(), MAIN_FORKNUM, NULL, P_NEW, RBM_ZERO, and ReadBufferExtended().
Referenced by _hash_getovflpage().
{
Buffer buf;
if (blkno == P_NEW)
elog(ERROR, "hash AM does not use P_NEW");
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO, NULL);
LockBuffer(buf, HASH_WRITE);
/* ref count and lock type are correct */
/* initialize the page */
_hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf));
return buf;
}
| void _hash_getlock | ( | Relation | rel, | |
| BlockNumber | whichlock, | |||
| int | access | |||
| ) |
Definition at line 67 of file hashpage.c.
References LockPage(), and USELOCKING.
Referenced by _hash_doinsert(), _hash_first(), hashbulkdelete(), and pgstat_hash_page().
{
if (USELOCKING(rel))
LockPage(rel, whichlock, access);
}
| Buffer _hash_getnewbuf | ( | Relation | rel, | |
| BlockNumber | blkno, | |||
| ForkNumber | forkNum | |||
| ) |
Definition at line 183 of file hashpage.c.
References _hash_pageinit(), buf, BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, elog, ERROR, HASH_WRITE, LockBuffer(), NULL, P_NEW, RBM_NORMAL, RBM_ZERO, ReadBufferExtended(), RelationGetNumberOfBlocksInFork(), and RelationGetRelationName.
Referenced by _hash_getovflpage(), _hash_initbitmap(), _hash_metapinit(), and _hash_splitbucket().
{
BlockNumber nblocks = RelationGetNumberOfBlocksInFork(rel, forkNum);
Buffer buf;
if (blkno == P_NEW)
elog(ERROR, "hash AM does not use P_NEW");
if (blkno > nblocks)
elog(ERROR, "access to noncontiguous page in hash index \"%s\"",
RelationGetRelationName(rel));
/* smgr insists we use P_NEW to extend the relation */
if (blkno == nblocks)
{
buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL);
if (BufferGetBlockNumber(buf) != blkno)
elog(ERROR, "unexpected hash relation size: %u, should be %u",
BufferGetBlockNumber(buf), blkno);
}
else
buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO, NULL);
LockBuffer(buf, HASH_WRITE);
/* ref count and lock type are correct */
/* initialize the page */
_hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf));
return buf;
}
| uint32 _hash_metapinit | ( | Relation | rel, | |
| double | num_tuples, | |||
| ForkNumber | forkNum | |||
| ) |
Definition at line 324 of file hashpage.c.
References _hash_chgbufaccess(), _hash_getnewbuf(), _hash_initbitmap(), _hash_log2(), _hash_wrtbuf(), Assert, BMPG_MASK, BMPG_SHIFT, BUCKET_TO_BLKNO, buf, BufferGetPage, CHECK_FOR_INTERRUPTS, elog, ERROR, HASH_DEFAULT_FILLFACTOR, HASH_MAX_SPLITPOINTS, HASH_METAPAGE, HASH_NOLOCK, HASH_WRITE, HashGetMaxBitmapSize, HashMetaPageData::hashm_bmshift, HashMetaPageData::hashm_bmsize, HashMetaPageData::hashm_bsize, HashMetaPageData::hashm_ffactor, HashMetaPageData::hashm_firstfree, HashMetaPageData::hashm_highmask, HashMetaPageData::hashm_lowmask, HashMetaPageData::hashm_magic, HashMetaPageData::hashm_mapp, HashMetaPageData::hashm_maxbucket, HashMetaPageData::hashm_nmaps, HashMetaPageData::hashm_ntuples, HashMetaPageData::hashm_ovflpoint, HashMetaPageData::hashm_procid, HashMetaPageData::hashm_spares, HashMetaPageData::hashm_version, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HashPageOpaqueData::hasho_prevblkno, HashPageGetMeta, HASHPROC, i, index_getprocid(), MAXALIGN, MemSet, PageGetSpecialPointer, RelationGetNumberOfBlocksInFork(), RelationGetRelationName, and RelationGetTargetPageUsage.
Referenced by hashbuild(), and hashbuildempty().
{
HashMetaPage metap;
HashPageOpaque pageopaque;
Buffer metabuf;
Buffer buf;
Page pg;
int32 data_width;
int32 item_width;
int32 ffactor;
double dnumbuckets;
uint32 num_buckets;
uint32 log2_num_buckets;
uint32 i;
/* safety check */
if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
RelationGetRelationName(rel));
/*
* Determine the target fill factor (in tuples per bucket) for this index.
* The idea is to make the fill factor correspond to pages about as full
* as the user-settable fillfactor parameter says. We can compute it
* exactly since the index datatype (i.e. uint32 hash key) is fixed-width.
*/
data_width = sizeof(uint32);
item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
sizeof(ItemIdData); /* include the line pointer */
ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width;
/* keep to a sane range */
if (ffactor < 10)
ffactor = 10;
/*
* Choose the number of initial bucket pages to match the fill factor
* given the estimated number of tuples. We round up the result to the
* next power of 2, however, and always force at least 2 bucket pages. The
* upper limit is determined by considerations explained in
* _hash_expandtable().
*/
dnumbuckets = num_tuples / ffactor;
if (dnumbuckets <= 2.0)
num_buckets = 2;
else if (dnumbuckets >= (double) 0x40000000)
num_buckets = 0x40000000;
else
num_buckets = ((uint32) 1) << _hash_log2((uint32) dnumbuckets);
log2_num_buckets = _hash_log2(num_buckets);
Assert(num_buckets == (((uint32) 1) << log2_num_buckets));
Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS);
/*
* We initialize the metapage, the first N bucket pages, and the first
* bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
* calls to occur. This ensures that the smgr level has the right idea of
* the physical index length.
*/
metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
pg = BufferGetPage(metabuf);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
pageopaque->hasho_prevblkno = InvalidBlockNumber;
pageopaque->hasho_nextblkno = InvalidBlockNumber;
pageopaque->hasho_bucket = -1;
pageopaque->hasho_flag = LH_META_PAGE;
pageopaque->hasho_page_id = HASHO_PAGE_ID;
metap = HashPageGetMeta(pg);
metap->hashm_magic = HASH_MAGIC;
metap->hashm_version = HASH_VERSION;
metap->hashm_ntuples = 0;
metap->hashm_nmaps = 0;
metap->hashm_ffactor = ffactor;
metap->hashm_bsize = HashGetMaxBitmapSize(pg);
/* find largest bitmap array size that will fit in page size */
for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
{
if ((1 << i) <= metap->hashm_bsize)
break;
}
Assert(i > 0);
metap->hashm_bmsize = 1 << i;
metap->hashm_bmshift = i + BYTE_TO_BIT;
Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));
/*
* Label the index with its primary hash support function's OID. This is
* pretty useless for normal operation (in fact, hashm_procid is not used
* anywhere), but it might be handy for forensic purposes so we keep it.
*/
metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
/*
* We initialize the index with N buckets, 0 .. N-1, occupying physical
* blocks 1 to N. The first freespace bitmap page is in block N+1. Since
* N is a power of 2, we can set the masks this way:
*/
metap->hashm_maxbucket = metap->hashm_lowmask = num_buckets - 1;
metap->hashm_highmask = (num_buckets << 1) - 1;
MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
/* Set up mapping for one spare page after the initial splitpoints */
metap->hashm_spares[log2_num_buckets] = 1;
metap->hashm_ovflpoint = log2_num_buckets;
metap->hashm_firstfree = 0;
/*
* Release buffer lock on the metapage while we initialize buckets.
* Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
* won't accomplish anything. It's a bad idea to hold buffer locks for
* long intervals in any case, since that can block the bgwriter.
*/
_hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
/*
* Initialize the first N buckets
*/
for (i = 0; i < num_buckets; i++)
{
/* Allow interrupts, in case N is huge */
CHECK_FOR_INTERRUPTS();
buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum);
pg = BufferGetPage(buf);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
pageopaque->hasho_prevblkno = InvalidBlockNumber;
pageopaque->hasho_nextblkno = InvalidBlockNumber;
pageopaque->hasho_bucket = i;
pageopaque->hasho_flag = LH_BUCKET_PAGE;
pageopaque->hasho_page_id = HASHO_PAGE_ID;
_hash_wrtbuf(rel, buf);
}
/* Now reacquire buffer lock on metapage */
_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
/*
* Initialize first bitmap page
*/
_hash_initbitmap(rel, metap, num_buckets + 1, forkNum);
/* all done */
_hash_wrtbuf(rel, metabuf);
return num_buckets;
}
Definition at line 480 of file hashpage.c.
References Assert, PageInit(), and PageIsNew.
Referenced by _hash_getinitbuf(), and _hash_getnewbuf().
{
Assert(PageIsNew(page));
PageInit(page, size, sizeof(HashPageOpaqueData));
}
Definition at line 249 of file hashpage.c.
References UnlockReleaseBuffer().
Referenced by _hash_addovflpage(), _hash_doinsert(), _hash_freeovflpage(), _hash_getovflpage(), _hash_readnext(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), hashbulkdelete(), and pgstat_hash_page().
{
UnlockReleaseBuffer(buf);
}
| static void _hash_splitbucket | ( | Relation | rel, | |
| Buffer | metabuf, | |||
| Bucket | obucket, | |||
| Bucket | nbucket, | |||
| BlockNumber | start_oblkno, | |||
| BlockNumber | start_nblkno, | |||
| uint32 | maxbucket, | |||
| uint32 | highmask, | |||
| uint32 | lowmask | |||
| ) | [static] |
Definition at line 737 of file hashpage.c.
References _hash_addovflpage(), _hash_chgbufaccess(), _hash_get_indextuple_hashkey(), _hash_getbuf(), _hash_getnewbuf(), _hash_hashkey2bucket(), _hash_pgaddtup(), _hash_relbuf(), _hash_squeezebucket(), _hash_wrtbuf(), Assert, BlockNumberIsValid, BufferGetPage, FirstOffsetNumber, HASH_NOLOCK, HASH_WRITE, HashPageOpaqueData::hasho_bucket, HashPageOpaqueData::hasho_flag, HashPageOpaqueData::hasho_nextblkno, HashPageOpaqueData::hasho_page_id, HashPageOpaqueData::hasho_prevblkno, IndexTupleDSize, LH_BUCKET_PAGE, LH_OVERFLOW_PAGE, MAIN_FORKNUM, MAXALIGN, NULL, OffsetNumberNext, PageGetFreeSpace(), PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, and PageIndexMultiDelete().
Referenced by _hash_expandtable().
{
BlockNumber oblkno;
BlockNumber nblkno;
Buffer obuf;
Buffer nbuf;
Page opage;
Page npage;
HashPageOpaque oopaque;
HashPageOpaque nopaque;
/*
* It should be okay to simultaneously write-lock pages from each bucket,
* since no one else can be trying to acquire buffer lock on pages of
* either bucket.
*/
oblkno = start_oblkno;
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_BUCKET_PAGE);
opage = BufferGetPage(obuf);
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
nblkno = start_nblkno;
nbuf = _hash_getnewbuf(rel, nblkno, MAIN_FORKNUM);
npage = BufferGetPage(nbuf);
/* initialize the new bucket's primary page */
nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
nopaque->hasho_prevblkno = InvalidBlockNumber;
nopaque->hasho_nextblkno = InvalidBlockNumber;
nopaque->hasho_bucket = nbucket;
nopaque->hasho_flag = LH_BUCKET_PAGE;
nopaque->hasho_page_id = HASHO_PAGE_ID;
/*
* Partition the tuples in the old bucket between the old bucket and the
* new bucket, advancing along the old bucket's overflow bucket chain and
* adding overflow pages to the new bucket as needed. Outer loop iterates
* once per page in old bucket.
*/
for (;;)
{
OffsetNumber ooffnum;
OffsetNumber omaxoffnum;
OffsetNumber deletable[MaxOffsetNumber];
int ndeletable = 0;
/* Scan each tuple in old page */
omaxoffnum = PageGetMaxOffsetNumber(opage);
for (ooffnum = FirstOffsetNumber;
ooffnum <= omaxoffnum;
ooffnum = OffsetNumberNext(ooffnum))
{
IndexTuple itup;
Size itemsz;
Bucket bucket;
/*
* Fetch the item's hash key (conveniently stored in the item) and
* determine which bucket it now belongs in.
*/
itup = (IndexTuple) PageGetItem(opage,
PageGetItemId(opage, ooffnum));
bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
maxbucket, highmask, lowmask);
if (bucket == nbucket)
{
/*
* insert the tuple into the new bucket. if it doesn't fit on
* the current page in the new bucket, we must allocate a new
* overflow page and place the tuple on that page instead.
*/
itemsz = IndexTupleDSize(*itup);
itemsz = MAXALIGN(itemsz);
if (PageGetFreeSpace(npage) < itemsz)
{
/* write out nbuf and drop lock, but keep pin */
_hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK);
/* chain to a new overflow page */
nbuf = _hash_addovflpage(rel, metabuf, nbuf);
npage = BufferGetPage(nbuf);
/* we don't need nblkno or nopaque within the loop */
}
/*
* Insert tuple on new page, using _hash_pgaddtup to ensure
* correct ordering by hashkey. This is a tad inefficient
* since we may have to shuffle itempointers repeatedly.
* Possible future improvement: accumulate all the items for
* the new page and qsort them before insertion.
*/
(void) _hash_pgaddtup(rel, nbuf, itemsz, itup);
/*
* Mark tuple for deletion from old page.
*/
deletable[ndeletable++] = ooffnum;
}
else
{
/*
* the tuple stays on this page, so nothing to do.
*/
Assert(bucket == obucket);
}
}
oblkno = oopaque->hasho_nextblkno;
/*
* Done scanning this old page. If we moved any tuples, delete them
* from the old page.
*/
if (ndeletable > 0)
{
PageIndexMultiDelete(opage, deletable, ndeletable);
_hash_wrtbuf(rel, obuf);
}
else
_hash_relbuf(rel, obuf);
/* Exit loop if no more overflow pages in old bucket */
if (!BlockNumberIsValid(oblkno))
break;
/* Else, advance to next old page */
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
opage = BufferGetPage(obuf);
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
}
/*
* We're at the end of the old bucket chain, so we're done partitioning
* the tuples. Before quitting, call _hash_squeezebucket to ensure the
* tuples remaining in the old bucket (including the overflow pages) are
* packed as tightly as possible. The new bucket is already tight.
*/
_hash_wrtbuf(rel, nbuf);
_hash_squeezebucket(rel, obucket, start_oblkno, NULL);
}
| bool _hash_try_getlock | ( | Relation | rel, | |
| BlockNumber | whichlock, | |||
| int | access | |||
| ) |
Definition at line 79 of file hashpage.c.
References ConditionalLockPage(), and USELOCKING.
Referenced by _hash_expandtable().
{
if (USELOCKING(rel))
return ConditionalLockPage(rel, whichlock, access);
else
return true;
}
Definition at line 278 of file hashpage.c.
References MarkBufferDirty(), and UnlockReleaseBuffer().
Referenced by _hash_addovflpage(), _hash_doinsert(), _hash_freeovflpage(), _hash_getovflpage(), _hash_initbitmap(), _hash_metapinit(), _hash_splitbucket(), _hash_squeezebucket(), and hashbulkdelete().
{
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
}
1.7.1