00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "postgres.h"
00030
00031 #include "access/hash.h"
00032 #include "miscadmin.h"
00033 #include "storage/lmgr.h"
00034 #include "storage/smgr.h"
00035
00036
00037 static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock,
00038 uint32 nblocks);
00039 static void _hash_splitbucket(Relation rel, Buffer metabuf,
00040 Bucket obucket, Bucket nbucket,
00041 BlockNumber start_oblkno,
00042 BlockNumber start_nblkno,
00043 uint32 maxbucket,
00044 uint32 highmask, uint32 lowmask);
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 #define USELOCKING(rel) (!RELATION_IS_LOCAL(rel))
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066 void
00067 _hash_getlock(Relation rel, BlockNumber whichlock, int access)
00068 {
00069 if (USELOCKING(rel))
00070 LockPage(rel, whichlock, access);
00071 }
00072
00073
00074
00075
00076
00077
00078 bool
00079 _hash_try_getlock(Relation rel, BlockNumber whichlock, int access)
00080 {
00081 if (USELOCKING(rel))
00082 return ConditionalLockPage(rel, whichlock, access);
00083 else
00084 return true;
00085 }
00086
00087
00088
00089
00090 void
00091 _hash_droplock(Relation rel, BlockNumber whichlock, int access)
00092 {
00093 if (USELOCKING(rel))
00094 UnlockPage(rel, whichlock, access);
00095 }
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114 Buffer
00115 _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
00116 {
00117 Buffer buf;
00118
00119 if (blkno == P_NEW)
00120 elog(ERROR, "hash AM does not use P_NEW");
00121
00122 buf = ReadBuffer(rel, blkno);
00123
00124 if (access != HASH_NOLOCK)
00125 LockBuffer(buf, access);
00126
00127
00128
00129 _hash_checkpage(rel, buf, flags);
00130
00131 return buf;
00132 }
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150 Buffer
00151 _hash_getinitbuf(Relation rel, BlockNumber blkno)
00152 {
00153 Buffer buf;
00154
00155 if (blkno == P_NEW)
00156 elog(ERROR, "hash AM does not use P_NEW");
00157
00158 buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO, NULL);
00159
00160 LockBuffer(buf, HASH_WRITE);
00161
00162
00163
00164
00165 _hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf));
00166
00167 return buf;
00168 }
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182 Buffer
00183 _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
00184 {
00185 BlockNumber nblocks = RelationGetNumberOfBlocksInFork(rel, forkNum);
00186 Buffer buf;
00187
00188 if (blkno == P_NEW)
00189 elog(ERROR, "hash AM does not use P_NEW");
00190 if (blkno > nblocks)
00191 elog(ERROR, "access to noncontiguous page in hash index \"%s\"",
00192 RelationGetRelationName(rel));
00193
00194
00195 if (blkno == nblocks)
00196 {
00197 buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL);
00198 if (BufferGetBlockNumber(buf) != blkno)
00199 elog(ERROR, "unexpected hash relation size: %u, should be %u",
00200 BufferGetBlockNumber(buf), blkno);
00201 }
00202 else
00203 buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO, NULL);
00204
00205 LockBuffer(buf, HASH_WRITE);
00206
00207
00208
00209
00210 _hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf));
00211
00212 return buf;
00213 }
00214
00215
00216
00217
00218
00219
00220
00221 Buffer
00222 _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
00223 int access, int flags,
00224 BufferAccessStrategy bstrategy)
00225 {
00226 Buffer buf;
00227
00228 if (blkno == P_NEW)
00229 elog(ERROR, "hash AM does not use P_NEW");
00230
00231 buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
00232
00233 if (access != HASH_NOLOCK)
00234 LockBuffer(buf, access);
00235
00236
00237
00238 _hash_checkpage(rel, buf, flags);
00239
00240 return buf;
00241 }
00242
00243
00244
00245
00246
00247
00248 void
00249 _hash_relbuf(Relation rel, Buffer buf)
00250 {
00251 UnlockReleaseBuffer(buf);
00252 }
00253
00254
00255
00256
00257
00258
00259 void
00260 _hash_dropbuf(Relation rel, Buffer buf)
00261 {
00262 ReleaseBuffer(buf);
00263 }
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277 void
00278 _hash_wrtbuf(Relation rel, Buffer buf)
00279 {
00280 MarkBufferDirty(buf);
00281 UnlockReleaseBuffer(buf);
00282 }
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296 void
00297 _hash_chgbufaccess(Relation rel,
00298 Buffer buf,
00299 int from_access,
00300 int to_access)
00301 {
00302 if (from_access == HASH_WRITE)
00303 MarkBufferDirty(buf);
00304 if (from_access != HASH_NOLOCK)
00305 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
00306 if (to_access != HASH_NOLOCK)
00307 LockBuffer(buf, to_access);
00308 }
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323 uint32
00324 _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
00325 {
00326 HashMetaPage metap;
00327 HashPageOpaque pageopaque;
00328 Buffer metabuf;
00329 Buffer buf;
00330 Page pg;
00331 int32 data_width;
00332 int32 item_width;
00333 int32 ffactor;
00334 double dnumbuckets;
00335 uint32 num_buckets;
00336 uint32 log2_num_buckets;
00337 uint32 i;
00338
00339
00340 if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
00341 elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
00342 RelationGetRelationName(rel));
00343
00344
00345
00346
00347
00348
00349
00350 data_width = sizeof(uint32);
00351 item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
00352 sizeof(ItemIdData);
00353 ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width;
00354
00355 if (ffactor < 10)
00356 ffactor = 10;
00357
00358
00359
00360
00361
00362
00363
00364
00365 dnumbuckets = num_tuples / ffactor;
00366 if (dnumbuckets <= 2.0)
00367 num_buckets = 2;
00368 else if (dnumbuckets >= (double) 0x40000000)
00369 num_buckets = 0x40000000;
00370 else
00371 num_buckets = ((uint32) 1) << _hash_log2((uint32) dnumbuckets);
00372
00373 log2_num_buckets = _hash_log2(num_buckets);
00374 Assert(num_buckets == (((uint32) 1) << log2_num_buckets));
00375 Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS);
00376
00377
00378
00379
00380
00381
00382
00383 metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
00384 pg = BufferGetPage(metabuf);
00385
00386 pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
00387 pageopaque->hasho_prevblkno = InvalidBlockNumber;
00388 pageopaque->hasho_nextblkno = InvalidBlockNumber;
00389 pageopaque->hasho_bucket = -1;
00390 pageopaque->hasho_flag = LH_META_PAGE;
00391 pageopaque->hasho_page_id = HASHO_PAGE_ID;
00392
00393 metap = HashPageGetMeta(pg);
00394
00395 metap->hashm_magic = HASH_MAGIC;
00396 metap->hashm_version = HASH_VERSION;
00397 metap->hashm_ntuples = 0;
00398 metap->hashm_nmaps = 0;
00399 metap->hashm_ffactor = ffactor;
00400 metap->hashm_bsize = HashGetMaxBitmapSize(pg);
00401
00402 for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
00403 {
00404 if ((1 << i) <= metap->hashm_bsize)
00405 break;
00406 }
00407 Assert(i > 0);
00408 metap->hashm_bmsize = 1 << i;
00409 metap->hashm_bmshift = i + BYTE_TO_BIT;
00410 Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));
00411
00412
00413
00414
00415
00416
00417 metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
00418
00419
00420
00421
00422
00423
00424 metap->hashm_maxbucket = metap->hashm_lowmask = num_buckets - 1;
00425 metap->hashm_highmask = (num_buckets << 1) - 1;
00426
00427 MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
00428 MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
00429
00430
00431 metap->hashm_spares[log2_num_buckets] = 1;
00432 metap->hashm_ovflpoint = log2_num_buckets;
00433 metap->hashm_firstfree = 0;
00434
00435
00436
00437
00438
00439
00440
00441 _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
00442
00443
00444
00445
00446 for (i = 0; i < num_buckets; i++)
00447 {
00448
00449 CHECK_FOR_INTERRUPTS();
00450
00451 buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum);
00452 pg = BufferGetPage(buf);
00453 pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
00454 pageopaque->hasho_prevblkno = InvalidBlockNumber;
00455 pageopaque->hasho_nextblkno = InvalidBlockNumber;
00456 pageopaque->hasho_bucket = i;
00457 pageopaque->hasho_flag = LH_BUCKET_PAGE;
00458 pageopaque->hasho_page_id = HASHO_PAGE_ID;
00459 _hash_wrtbuf(rel, buf);
00460 }
00461
00462
00463 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
00464
00465
00466
00467
00468 _hash_initbitmap(rel, metap, num_buckets + 1, forkNum);
00469
00470
00471 _hash_wrtbuf(rel, metabuf);
00472
00473 return num_buckets;
00474 }
00475
00476
00477
00478
00479 void
00480 _hash_pageinit(Page page, Size size)
00481 {
00482 Assert(PageIsNew(page));
00483 PageInit(page, size, sizeof(HashPageOpaqueData));
00484 }
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496 void
00497 _hash_expandtable(Relation rel, Buffer metabuf)
00498 {
00499 HashMetaPage metap;
00500 Bucket old_bucket;
00501 Bucket new_bucket;
00502 uint32 spare_ndx;
00503 BlockNumber start_oblkno;
00504 BlockNumber start_nblkno;
00505 uint32 maxbucket;
00506 uint32 highmask;
00507 uint32 lowmask;
00508
00509
00510
00511
00512
00513 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
00514
00515 _hash_checkpage(rel, metabuf, LH_META_PAGE);
00516 metap = HashPageGetMeta(BufferGetPage(metabuf));
00517
00518
00519
00520
00521
00522
00523
00524 if (metap->hashm_ntuples <=
00525 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1))
00526 goto fail;
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543 if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
00544 goto fail;
00545
00546
00547
00548
00549
00550
00551
00552
00553 new_bucket = metap->hashm_maxbucket + 1;
00554
00555 old_bucket = (new_bucket & metap->hashm_lowmask);
00556
00557 start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);
00558
00559 if (_hash_has_active_scan(rel, old_bucket))
00560 goto fail;
00561
00562 if (!_hash_try_getlock(rel, start_oblkno, HASH_EXCLUSIVE))
00563 goto fail;
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573 start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket);
00574
00575 if (_hash_has_active_scan(rel, new_bucket))
00576 elog(ERROR, "scan in progress on supposedly new bucket");
00577
00578 if (!_hash_try_getlock(rel, start_nblkno, HASH_EXCLUSIVE))
00579 elog(ERROR, "could not get lock on supposedly new bucket");
00580
00581
00582
00583
00584
00585 spare_ndx = _hash_log2(new_bucket + 1);
00586 if (spare_ndx > metap->hashm_ovflpoint)
00587 {
00588 Assert(spare_ndx == metap->hashm_ovflpoint + 1);
00589
00590
00591
00592
00593
00594
00595
00596 if (!_hash_alloc_buckets(rel, start_nblkno, new_bucket))
00597 {
00598
00599 _hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
00600 _hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
00601 goto fail;
00602 }
00603 }
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614 START_CRIT_SECTION();
00615
00616 metap->hashm_maxbucket = new_bucket;
00617
00618 if (new_bucket > metap->hashm_highmask)
00619 {
00620
00621 metap->hashm_lowmask = metap->hashm_highmask;
00622 metap->hashm_highmask = new_bucket | metap->hashm_lowmask;
00623 }
00624
00625
00626
00627
00628
00629
00630
00631 if (spare_ndx > metap->hashm_ovflpoint)
00632 {
00633 metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];
00634 metap->hashm_ovflpoint = spare_ndx;
00635 }
00636
00637
00638 END_CRIT_SECTION();
00639
00640
00641
00642
00643
00644
00645
00646
00647 maxbucket = metap->hashm_maxbucket;
00648 highmask = metap->hashm_highmask;
00649 lowmask = metap->hashm_lowmask;
00650
00651
00652 _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
00653
00654
00655 _hash_splitbucket(rel, metabuf, old_bucket, new_bucket,
00656 start_oblkno, start_nblkno,
00657 maxbucket, highmask, lowmask);
00658
00659
00660 _hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
00661 _hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
00662
00663 return;
00664
00665
00666 fail:
00667
00668
00669 _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
00670 }
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697 static bool
00698 _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
00699 {
00700 BlockNumber lastblock;
00701 char zerobuf[BLCKSZ];
00702
00703 lastblock = firstblock + nblocks - 1;
00704
00705
00706
00707
00708
00709 if (lastblock < firstblock || lastblock == InvalidBlockNumber)
00710 return false;
00711
00712 MemSet(zerobuf, 0, sizeof(zerobuf));
00713
00714 RelationOpenSmgr(rel);
00715 smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false);
00716
00717 return true;
00718 }
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736 static void
00737 _hash_splitbucket(Relation rel,
00738 Buffer metabuf,
00739 Bucket obucket,
00740 Bucket nbucket,
00741 BlockNumber start_oblkno,
00742 BlockNumber start_nblkno,
00743 uint32 maxbucket,
00744 uint32 highmask,
00745 uint32 lowmask)
00746 {
00747 BlockNumber oblkno;
00748 BlockNumber nblkno;
00749 Buffer obuf;
00750 Buffer nbuf;
00751 Page opage;
00752 Page npage;
00753 HashPageOpaque oopaque;
00754 HashPageOpaque nopaque;
00755
00756
00757
00758
00759
00760
00761 oblkno = start_oblkno;
00762 obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_BUCKET_PAGE);
00763 opage = BufferGetPage(obuf);
00764 oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
00765
00766 nblkno = start_nblkno;
00767 nbuf = _hash_getnewbuf(rel, nblkno, MAIN_FORKNUM);
00768 npage = BufferGetPage(nbuf);
00769
00770
00771 nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
00772 nopaque->hasho_prevblkno = InvalidBlockNumber;
00773 nopaque->hasho_nextblkno = InvalidBlockNumber;
00774 nopaque->hasho_bucket = nbucket;
00775 nopaque->hasho_flag = LH_BUCKET_PAGE;
00776 nopaque->hasho_page_id = HASHO_PAGE_ID;
00777
00778
00779
00780
00781
00782
00783
00784 for (;;)
00785 {
00786 OffsetNumber ooffnum;
00787 OffsetNumber omaxoffnum;
00788 OffsetNumber deletable[MaxOffsetNumber];
00789 int ndeletable = 0;
00790
00791
00792 omaxoffnum = PageGetMaxOffsetNumber(opage);
00793 for (ooffnum = FirstOffsetNumber;
00794 ooffnum <= omaxoffnum;
00795 ooffnum = OffsetNumberNext(ooffnum))
00796 {
00797 IndexTuple itup;
00798 Size itemsz;
00799 Bucket bucket;
00800
00801
00802
00803
00804
00805 itup = (IndexTuple) PageGetItem(opage,
00806 PageGetItemId(opage, ooffnum));
00807 bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
00808 maxbucket, highmask, lowmask);
00809
00810 if (bucket == nbucket)
00811 {
00812
00813
00814
00815
00816
00817 itemsz = IndexTupleDSize(*itup);
00818 itemsz = MAXALIGN(itemsz);
00819
00820 if (PageGetFreeSpace(npage) < itemsz)
00821 {
00822
00823 _hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK);
00824
00825 nbuf = _hash_addovflpage(rel, metabuf, nbuf);
00826 npage = BufferGetPage(nbuf);
00827
00828 }
00829
00830
00831
00832
00833
00834
00835
00836
00837 (void) _hash_pgaddtup(rel, nbuf, itemsz, itup);
00838
00839
00840
00841
00842 deletable[ndeletable++] = ooffnum;
00843 }
00844 else
00845 {
00846
00847
00848
00849 Assert(bucket == obucket);
00850 }
00851 }
00852
00853 oblkno = oopaque->hasho_nextblkno;
00854
00855
00856
00857
00858
00859 if (ndeletable > 0)
00860 {
00861 PageIndexMultiDelete(opage, deletable, ndeletable);
00862 _hash_wrtbuf(rel, obuf);
00863 }
00864 else
00865 _hash_relbuf(rel, obuf);
00866
00867
00868 if (!BlockNumberIsValid(oblkno))
00869 break;
00870
00871
00872 obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
00873 opage = BufferGetPage(obuf);
00874 oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
00875 }
00876
00877
00878
00879
00880
00881
00882
00883 _hash_wrtbuf(rel, nbuf);
00884
00885 _hash_squeezebucket(rel, obucket, start_oblkno, NULL);
00886 }