00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "postgres.h"
00019
00020 #include "access/hash.h"
00021 #include "utils/rel.h"
00022
00023
00024 static Buffer _hash_getovflpage(Relation rel, Buffer metabuf);
00025 static uint32 _hash_firstfreebit(uint32 map);
00026
00027
00028
00029
00030
00031
00032 static BlockNumber
00033 bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
00034 {
00035 uint32 splitnum = metap->hashm_ovflpoint;
00036 uint32 i;
00037
00038
00039 ovflbitnum += 1;
00040
00041
00042 for (i = 1;
00043 i < splitnum && ovflbitnum > metap->hashm_spares[i];
00044 i++)
00045 ;
00046
00047
00048
00049
00050
00051 return (BlockNumber) ((1 << i) + ovflbitnum);
00052 }
00053
00054
00055
00056
00057 static uint32
00058 blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
00059 {
00060 uint32 splitnum = metap->hashm_ovflpoint;
00061 uint32 i;
00062 uint32 bitnum;
00063
00064
00065 for (i = 1; i <= splitnum; i++)
00066 {
00067 if (ovflblkno <= (BlockNumber) (1 << i))
00068 break;
00069 bitnum = ovflblkno - (1 << i);
00070 if (bitnum <= metap->hashm_spares[i])
00071 return bitnum - 1;
00072 }
00073
00074 elog(ERROR, "invalid overflow block number %u", ovflblkno);
00075 return 0;
00076 }
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100 Buffer
00101 _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
00102 {
00103 Buffer ovflbuf;
00104 Page page;
00105 Page ovflpage;
00106 HashPageOpaque pageopaque;
00107 HashPageOpaque ovflopaque;
00108
00109
00110 ovflbuf = _hash_getovflpage(rel, metabuf);
00111
00112
00113
00114
00115
00116 _hash_chgbufaccess(rel, buf, HASH_NOLOCK, HASH_WRITE);
00117
00118
00119 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
00120
00121
00122 for (;;)
00123 {
00124 BlockNumber nextblkno;
00125
00126 page = BufferGetPage(buf);
00127 pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
00128 nextblkno = pageopaque->hasho_nextblkno;
00129
00130 if (!BlockNumberIsValid(nextblkno))
00131 break;
00132
00133
00134 _hash_relbuf(rel, buf);
00135
00136 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
00137 }
00138
00139
00140 ovflpage = BufferGetPage(ovflbuf);
00141 ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
00142 ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
00143 ovflopaque->hasho_nextblkno = InvalidBlockNumber;
00144 ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
00145 ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
00146 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
00147
00148 MarkBufferDirty(ovflbuf);
00149
00150
00151 pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
00152 _hash_wrtbuf(rel, buf);
00153
00154 return ovflbuf;
00155 }
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167 static Buffer
00168 _hash_getovflpage(Relation rel, Buffer metabuf)
00169 {
00170 HashMetaPage metap;
00171 Buffer mapbuf = 0;
00172 Buffer newbuf;
00173 BlockNumber blkno;
00174 uint32 orig_firstfree;
00175 uint32 splitnum;
00176 uint32 *freep = NULL;
00177 uint32 max_ovflpg;
00178 uint32 bit;
00179 uint32 first_page;
00180 uint32 last_bit;
00181 uint32 last_page;
00182 uint32 i,
00183 j;
00184
00185
00186 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
00187
00188 _hash_checkpage(rel, metabuf, LH_META_PAGE);
00189 metap = HashPageGetMeta(BufferGetPage(metabuf));
00190
00191
00192 orig_firstfree = metap->hashm_firstfree;
00193 first_page = orig_firstfree >> BMPG_SHIFT(metap);
00194 bit = orig_firstfree & BMPG_MASK(metap);
00195 i = first_page;
00196 j = bit / BITS_PER_MAP;
00197 bit &= ~(BITS_PER_MAP - 1);
00198
00199
00200 for (;;)
00201 {
00202 BlockNumber mapblkno;
00203 Page mappage;
00204 uint32 last_inpage;
00205
00206
00207 splitnum = metap->hashm_ovflpoint;
00208 max_ovflpg = metap->hashm_spares[splitnum] - 1;
00209 last_page = max_ovflpg >> BMPG_SHIFT(metap);
00210 last_bit = max_ovflpg & BMPG_MASK(metap);
00211
00212 if (i > last_page)
00213 break;
00214
00215 Assert(i < metap->hashm_nmaps);
00216 mapblkno = metap->hashm_mapp[i];
00217
00218 if (i == last_page)
00219 last_inpage = last_bit;
00220 else
00221 last_inpage = BMPGSZ_BIT(metap) - 1;
00222
00223
00224 _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
00225
00226 mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
00227 mappage = BufferGetPage(mapbuf);
00228 freep = HashPageGetBitmap(mappage);
00229
00230 for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
00231 {
00232 if (freep[j] != ALL_SET)
00233 goto found;
00234 }
00235
00236
00237 _hash_relbuf(rel, mapbuf);
00238 i++;
00239 j = 0;
00240 bit = 0;
00241
00242
00243 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
00244 }
00245
00246
00247
00248
00249
00250 if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
00251 {
00252
00253
00254
00255
00256
00257
00258
00259
00260 bit = metap->hashm_spares[splitnum];
00261 _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
00262 metap->hashm_spares[splitnum]++;
00263 }
00264 else
00265 {
00266
00267
00268
00269
00270 }
00271
00272
00273 bit = metap->hashm_spares[splitnum];
00274 blkno = bitno_to_blkno(metap, bit);
00275
00276
00277
00278
00279
00280
00281
00282 newbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
00283
00284 metap->hashm_spares[splitnum]++;
00285
00286
00287
00288
00289
00290 if (metap->hashm_firstfree == orig_firstfree)
00291 metap->hashm_firstfree = bit + 1;
00292
00293
00294 _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
00295
00296 return newbuf;
00297
00298 found:
00299
00300 bit += _hash_firstfreebit(freep[j]);
00301
00302
00303 SETBIT(freep, bit);
00304 _hash_wrtbuf(rel, mapbuf);
00305
00306
00307 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
00308
00309
00310 bit += (i << BMPG_SHIFT(metap));
00311
00312
00313 blkno = bitno_to_blkno(metap, bit);
00314
00315
00316
00317
00318
00319 if (metap->hashm_firstfree == orig_firstfree)
00320 {
00321 metap->hashm_firstfree = bit + 1;
00322
00323
00324 _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
00325 }
00326 else
00327 {
00328
00329 _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
00330 }
00331
00332
00333 return _hash_getinitbuf(rel, blkno);
00334 }
00335
00336
00337
00338
00339
00340
00341 static uint32
00342 _hash_firstfreebit(uint32 map)
00343 {
00344 uint32 i,
00345 mask;
00346
00347 mask = 0x1;
00348 for (i = 0; i < BITS_PER_MAP; i++)
00349 {
00350 if (!(mask & map))
00351 return i;
00352 mask <<= 1;
00353 }
00354
00355 elog(ERROR, "firstfreebit found no free bit");
00356
00357 return 0;
00358 }
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376 BlockNumber
00377 _hash_freeovflpage(Relation rel, Buffer ovflbuf,
00378 BufferAccessStrategy bstrategy)
00379 {
00380 HashMetaPage metap;
00381 Buffer metabuf;
00382 Buffer mapbuf;
00383 BlockNumber ovflblkno;
00384 BlockNumber prevblkno;
00385 BlockNumber blkno;
00386 BlockNumber nextblkno;
00387 HashPageOpaque ovflopaque;
00388 Page ovflpage;
00389 Page mappage;
00390 uint32 *freep;
00391 uint32 ovflbitno;
00392 int32 bitmappage,
00393 bitmapbit;
00394 Bucket bucket PG_USED_FOR_ASSERTS_ONLY;
00395
00396
00397 _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
00398 ovflblkno = BufferGetBlockNumber(ovflbuf);
00399 ovflpage = BufferGetPage(ovflbuf);
00400 ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
00401 nextblkno = ovflopaque->hasho_nextblkno;
00402 prevblkno = ovflopaque->hasho_prevblkno;
00403 bucket = ovflopaque->hasho_bucket;
00404
00405
00406
00407
00408
00409
00410 MemSet(ovflpage, 0, BufferGetPageSize(ovflbuf));
00411 _hash_wrtbuf(rel, ovflbuf);
00412
00413
00414
00415
00416
00417
00418
00419 if (BlockNumberIsValid(prevblkno))
00420 {
00421 Buffer prevbuf = _hash_getbuf_with_strategy(rel,
00422 prevblkno,
00423 HASH_WRITE,
00424 LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
00425 bstrategy);
00426 Page prevpage = BufferGetPage(prevbuf);
00427 HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
00428
00429 Assert(prevopaque->hasho_bucket == bucket);
00430 prevopaque->hasho_nextblkno = nextblkno;
00431 _hash_wrtbuf(rel, prevbuf);
00432 }
00433 if (BlockNumberIsValid(nextblkno))
00434 {
00435 Buffer nextbuf = _hash_getbuf_with_strategy(rel,
00436 nextblkno,
00437 HASH_WRITE,
00438 LH_OVERFLOW_PAGE,
00439 bstrategy);
00440 Page nextpage = BufferGetPage(nextbuf);
00441 HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
00442
00443 Assert(nextopaque->hasho_bucket == bucket);
00444 nextopaque->hasho_prevblkno = prevblkno;
00445 _hash_wrtbuf(rel, nextbuf);
00446 }
00447
00448
00449
00450
00451 metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
00452 metap = HashPageGetMeta(BufferGetPage(metabuf));
00453
00454
00455 ovflbitno = blkno_to_bitno(metap, ovflblkno);
00456
00457 bitmappage = ovflbitno >> BMPG_SHIFT(metap);
00458 bitmapbit = ovflbitno & BMPG_MASK(metap);
00459
00460 if (bitmappage >= metap->hashm_nmaps)
00461 elog(ERROR, "invalid overflow bit number %u", ovflbitno);
00462 blkno = metap->hashm_mapp[bitmappage];
00463
00464
00465 _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
00466
00467
00468 mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
00469 mappage = BufferGetPage(mapbuf);
00470 freep = HashPageGetBitmap(mappage);
00471 Assert(ISSET(freep, bitmapbit));
00472 CLRBIT(freep, bitmapbit);
00473 _hash_wrtbuf(rel, mapbuf);
00474
00475
00476 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
00477
00478
00479 if (ovflbitno < metap->hashm_firstfree)
00480 {
00481 metap->hashm_firstfree = ovflbitno;
00482 _hash_wrtbuf(rel, metabuf);
00483 }
00484 else
00485 {
00486
00487 _hash_relbuf(rel, metabuf);
00488 }
00489
00490 return nextblkno;
00491 }
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504 void
00505 _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
00506 ForkNumber forkNum)
00507 {
00508 Buffer buf;
00509 Page pg;
00510 HashPageOpaque op;
00511 uint32 *freep;
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523 buf = _hash_getnewbuf(rel, blkno, forkNum);
00524 pg = BufferGetPage(buf);
00525
00526
00527 op = (HashPageOpaque) PageGetSpecialPointer(pg);
00528 op->hasho_prevblkno = InvalidBlockNumber;
00529 op->hasho_nextblkno = InvalidBlockNumber;
00530 op->hasho_bucket = -1;
00531 op->hasho_flag = LH_BITMAP_PAGE;
00532 op->hasho_page_id = HASHO_PAGE_ID;
00533
00534
00535 freep = HashPageGetBitmap(pg);
00536 MemSet(freep, 0xFF, BMPGSZ_BYTE(metap));
00537
00538
00539 _hash_wrtbuf(rel, buf);
00540
00541
00542
00543 if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
00544 ereport(ERROR,
00545 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00546 errmsg("out of overflow pages in hash index \"%s\"",
00547 RelationGetRelationName(rel))));
00548
00549 metap->hashm_mapp[metap->hashm_nmaps] = blkno;
00550
00551 metap->hashm_nmaps++;
00552 }
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579 void
00580 _hash_squeezebucket(Relation rel,
00581 Bucket bucket,
00582 BlockNumber bucket_blkno,
00583 BufferAccessStrategy bstrategy)
00584 {
00585 BlockNumber wblkno;
00586 BlockNumber rblkno;
00587 Buffer wbuf;
00588 Buffer rbuf;
00589 Page wpage;
00590 Page rpage;
00591 HashPageOpaque wopaque;
00592 HashPageOpaque ropaque;
00593 bool wbuf_dirty;
00594
00595
00596
00597
00598 wblkno = bucket_blkno;
00599 wbuf = _hash_getbuf_with_strategy(rel,
00600 wblkno,
00601 HASH_WRITE,
00602 LH_BUCKET_PAGE,
00603 bstrategy);
00604 wpage = BufferGetPage(wbuf);
00605 wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
00606
00607
00608
00609
00610 if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
00611 {
00612 _hash_relbuf(rel, wbuf);
00613 return;
00614 }
00615
00616
00617
00618
00619
00620
00621
00622 rbuf = InvalidBuffer;
00623 ropaque = wopaque;
00624 do
00625 {
00626 rblkno = ropaque->hasho_nextblkno;
00627 if (rbuf != InvalidBuffer)
00628 _hash_relbuf(rel, rbuf);
00629 rbuf = _hash_getbuf_with_strategy(rel,
00630 rblkno,
00631 HASH_WRITE,
00632 LH_OVERFLOW_PAGE,
00633 bstrategy);
00634 rpage = BufferGetPage(rbuf);
00635 ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
00636 Assert(ropaque->hasho_bucket == bucket);
00637 } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
00638
00639
00640
00641
00642 wbuf_dirty = false;
00643 for (;;)
00644 {
00645 OffsetNumber roffnum;
00646 OffsetNumber maxroffnum;
00647 OffsetNumber deletable[MaxOffsetNumber];
00648 int ndeletable = 0;
00649
00650
00651 maxroffnum = PageGetMaxOffsetNumber(rpage);
00652 for (roffnum = FirstOffsetNumber;
00653 roffnum <= maxroffnum;
00654 roffnum = OffsetNumberNext(roffnum))
00655 {
00656 IndexTuple itup;
00657 Size itemsz;
00658
00659 itup = (IndexTuple) PageGetItem(rpage,
00660 PageGetItemId(rpage, roffnum));
00661 itemsz = IndexTupleDSize(*itup);
00662 itemsz = MAXALIGN(itemsz);
00663
00664
00665
00666
00667
00668 while (PageGetFreeSpace(wpage) < itemsz)
00669 {
00670 Assert(!PageIsEmpty(wpage));
00671
00672 wblkno = wopaque->hasho_nextblkno;
00673 Assert(BlockNumberIsValid(wblkno));
00674
00675 if (wbuf_dirty)
00676 _hash_wrtbuf(rel, wbuf);
00677 else
00678 _hash_relbuf(rel, wbuf);
00679
00680
00681 if (rblkno == wblkno)
00682 {
00683 if (ndeletable > 0)
00684 {
00685
00686 PageIndexMultiDelete(rpage, deletable, ndeletable);
00687 _hash_wrtbuf(rel, rbuf);
00688 }
00689 else
00690 _hash_relbuf(rel, rbuf);
00691 return;
00692 }
00693
00694 wbuf = _hash_getbuf_with_strategy(rel,
00695 wblkno,
00696 HASH_WRITE,
00697 LH_OVERFLOW_PAGE,
00698 bstrategy);
00699 wpage = BufferGetPage(wbuf);
00700 wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
00701 Assert(wopaque->hasho_bucket == bucket);
00702 wbuf_dirty = false;
00703 }
00704
00705
00706
00707
00708
00709
00710
00711 (void) _hash_pgaddtup(rel, wbuf, itemsz, itup);
00712 wbuf_dirty = true;
00713
00714
00715 deletable[ndeletable++] = roffnum;
00716 }
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730 rblkno = ropaque->hasho_prevblkno;
00731 Assert(BlockNumberIsValid(rblkno));
00732
00733
00734 if (rblkno == wblkno)
00735 {
00736
00737 if (wbuf_dirty)
00738 _hash_wrtbuf(rel, wbuf);
00739 else
00740 _hash_relbuf(rel, wbuf);
00741
00742 _hash_freeovflpage(rel, rbuf, bstrategy);
00743
00744 return;
00745 }
00746
00747
00748 _hash_freeovflpage(rel, rbuf, bstrategy);
00749
00750 rbuf = _hash_getbuf_with_strategy(rel,
00751 rblkno,
00752 HASH_WRITE,
00753 LH_OVERFLOW_PAGE,
00754 bstrategy);
00755 rpage = BufferGetPage(rbuf);
00756 ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
00757 Assert(ropaque->hasho_bucket == bucket);
00758 }
00759
00760
00761 }