00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include "postgres.h"
00017
00018 #include "access/heapam.h"
00019 #include "access/nbtree.h"
00020 #include "access/transam.h"
00021 #include "miscadmin.h"
00022 #include "storage/lmgr.h"
00023 #include "storage/predicate.h"
00024 #include "utils/inval.h"
00025 #include "utils/tqual.h"
00026
00027
00028 typedef struct
00029 {
00030
00031 Size newitemsz;
00032 int fillfactor;
00033 bool is_leaf;
00034 bool is_rightmost;
00035 OffsetNumber newitemoff;
00036 int leftspace;
00037 int rightspace;
00038 int olddataitemstotal;
00039
00040 bool have_split;
00041
00042
00043 bool newitemonleft;
00044 OffsetNumber firstright;
00045 int best_delta;
00046 } FindSplitData;
00047
00048
00049 static Buffer _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
00050
00051 static TransactionId _bt_check_unique(Relation rel, IndexTuple itup,
00052 Relation heapRel, Buffer buf, OffsetNumber offset,
00053 ScanKey itup_scankey,
00054 IndexUniqueCheck checkUnique, bool *is_unique);
00055 static void _bt_findinsertloc(Relation rel,
00056 Buffer *bufptr,
00057 OffsetNumber *offsetptr,
00058 int keysz,
00059 ScanKey scankey,
00060 IndexTuple newtup,
00061 Relation heapRel);
00062 static void _bt_insertonpg(Relation rel, Buffer buf,
00063 BTStack stack,
00064 IndexTuple itup,
00065 OffsetNumber newitemoff,
00066 bool split_only_page);
00067 static Buffer _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
00068 OffsetNumber newitemoff, Size newitemsz,
00069 IndexTuple newitem, bool newitemonleft);
00070 static OffsetNumber _bt_findsplitloc(Relation rel, Page page,
00071 OffsetNumber newitemoff,
00072 Size newitemsz,
00073 bool *newitemonleft);
00074 static void _bt_checksplitloc(FindSplitData *state,
00075 OffsetNumber firstoldonright, bool newitemonleft,
00076 int dataitemstoleft, Size firstoldonrightsz);
00077 static bool _bt_pgaddtup(Page page, Size itemsize, IndexTuple itup,
00078 OffsetNumber itup_off);
00079 static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
00080 int keysz, ScanKey scankey);
00081 static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel);
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102 bool
00103 _bt_doinsert(Relation rel, IndexTuple itup,
00104 IndexUniqueCheck checkUnique, Relation heapRel)
00105 {
00106 bool is_unique = false;
00107 int natts = rel->rd_rel->relnatts;
00108 ScanKey itup_scankey;
00109 BTStack stack;
00110 Buffer buf;
00111 OffsetNumber offset;
00112
00113
00114 itup_scankey = _bt_mkscankey(rel, itup);
00115
00116 top:
00117
00118 stack = _bt_search(rel, natts, itup_scankey, false, &buf, BT_WRITE);
00119
00120 offset = InvalidOffsetNumber;
00121
00122
00123 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
00124 LockBuffer(buf, BT_WRITE);
00125
00126
00127
00128
00129
00130
00131
00132
00133 buf = _bt_moveright(rel, buf, natts, itup_scankey, false, BT_WRITE);
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156 if (checkUnique != UNIQUE_CHECK_NO)
00157 {
00158 TransactionId xwait;
00159
00160 offset = _bt_binsrch(rel, buf, natts, itup_scankey, false);
00161 xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey,
00162 checkUnique, &is_unique);
00163
00164 if (TransactionIdIsValid(xwait))
00165 {
00166
00167 _bt_relbuf(rel, buf);
00168 XactLockTableWait(xwait);
00169
00170 _bt_freestack(stack);
00171 goto top;
00172 }
00173 }
00174
00175 if (checkUnique != UNIQUE_CHECK_EXISTING)
00176 {
00177
00178
00179
00180
00181
00182
00183
00184 CheckForSerializableConflictIn(rel, NULL, buf);
00185
00186 _bt_findinsertloc(rel, &buf, &offset, natts, itup_scankey, itup, heapRel);
00187 _bt_insertonpg(rel, buf, stack, itup, offset, false);
00188 }
00189 else
00190 {
00191
00192 _bt_relbuf(rel, buf);
00193 }
00194
00195
00196 _bt_freestack(stack);
00197 _bt_freeskey(itup_scankey);
00198
00199 return is_unique;
00200 }
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218 static TransactionId
00219 _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
00220 Buffer buf, OffsetNumber offset, ScanKey itup_scankey,
00221 IndexUniqueCheck checkUnique, bool *is_unique)
00222 {
00223 TupleDesc itupdesc = RelationGetDescr(rel);
00224 int natts = rel->rd_rel->relnatts;
00225 SnapshotData SnapshotDirty;
00226 OffsetNumber maxoff;
00227 Page page;
00228 BTPageOpaque opaque;
00229 Buffer nbuf = InvalidBuffer;
00230 bool found = false;
00231
00232
00233 *is_unique = true;
00234
00235 InitDirtySnapshot(SnapshotDirty);
00236
00237 page = BufferGetPage(buf);
00238 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
00239 maxoff = PageGetMaxOffsetNumber(page);
00240
00241
00242
00243
00244 for (;;)
00245 {
00246 ItemId curitemid;
00247 IndexTuple curitup;
00248 BlockNumber nblkno;
00249
00250
00251
00252
00253
00254 if (offset <= maxoff)
00255 {
00256 curitemid = PageGetItemId(page, offset);
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272 if (!ItemIdIsDead(curitemid))
00273 {
00274 ItemPointerData htid;
00275 bool all_dead;
00276
00277
00278
00279
00280
00281
00282
00283 if (!_bt_isequal(itupdesc, page, offset, natts, itup_scankey))
00284 break;
00285
00286
00287 curitup = (IndexTuple) PageGetItem(page, curitemid);
00288 htid = curitup->t_tid;
00289
00290
00291
00292
00293
00294
00295 if (checkUnique == UNIQUE_CHECK_EXISTING &&
00296 ItemPointerCompare(&htid, &itup->t_tid) == 0)
00297 {
00298 found = true;
00299 }
00300
00301
00302
00303
00304
00305
00306 else if (heap_hot_search(&htid, heapRel, &SnapshotDirty,
00307 &all_dead))
00308 {
00309 TransactionId xwait;
00310
00311
00312
00313
00314
00315
00316
00317
00318 if (checkUnique == UNIQUE_CHECK_PARTIAL)
00319 {
00320 if (nbuf != InvalidBuffer)
00321 _bt_relbuf(rel, nbuf);
00322 *is_unique = false;
00323 return InvalidTransactionId;
00324 }
00325
00326
00327
00328
00329
00330 xwait = (TransactionIdIsValid(SnapshotDirty.xmin)) ?
00331 SnapshotDirty.xmin : SnapshotDirty.xmax;
00332
00333 if (TransactionIdIsValid(xwait))
00334 {
00335 if (nbuf != InvalidBuffer)
00336 _bt_relbuf(rel, nbuf);
00337
00338 return xwait;
00339 }
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358 htid = itup->t_tid;
00359 if (heap_hot_search(&htid, heapRel, SnapshotSelf, NULL))
00360 {
00361
00362 }
00363 else
00364 {
00365
00366
00367
00368
00369 break;
00370 }
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380 if (nbuf != InvalidBuffer)
00381 _bt_relbuf(rel, nbuf);
00382 _bt_relbuf(rel, buf);
00383
00384 {
00385 Datum values[INDEX_MAX_KEYS];
00386 bool isnull[INDEX_MAX_KEYS];
00387
00388 index_deform_tuple(itup, RelationGetDescr(rel),
00389 values, isnull);
00390 ereport(ERROR,
00391 (errcode(ERRCODE_UNIQUE_VIOLATION),
00392 errmsg("duplicate key value violates unique constraint \"%s\"",
00393 RelationGetRelationName(rel)),
00394 errdetail("Key %s already exists.",
00395 BuildIndexValueDescription(rel,
00396 values, isnull)),
00397 errtableconstraint(heapRel,
00398 RelationGetRelationName(rel))));
00399 }
00400 }
00401 else if (all_dead)
00402 {
00403
00404
00405
00406
00407
00408 ItemIdMarkDead(curitemid);
00409 opaque->btpo_flags |= BTP_HAS_GARBAGE;
00410
00411
00412
00413
00414
00415 if (nbuf != InvalidBuffer)
00416 MarkBufferDirtyHint(nbuf);
00417 else
00418 MarkBufferDirtyHint(buf);
00419 }
00420 }
00421 }
00422
00423
00424
00425
00426 if (offset < maxoff)
00427 offset = OffsetNumberNext(offset);
00428 else
00429 {
00430
00431 if (P_RIGHTMOST(opaque))
00432 break;
00433 if (!_bt_isequal(itupdesc, page, P_HIKEY,
00434 natts, itup_scankey))
00435 break;
00436
00437 for (;;)
00438 {
00439 nblkno = opaque->btpo_next;
00440 nbuf = _bt_relandgetbuf(rel, nbuf, nblkno, BT_READ);
00441 page = BufferGetPage(nbuf);
00442 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
00443 if (!P_IGNORE(opaque))
00444 break;
00445 if (P_RIGHTMOST(opaque))
00446 elog(ERROR, "fell off the end of index \"%s\"",
00447 RelationGetRelationName(rel));
00448 }
00449 maxoff = PageGetMaxOffsetNumber(page);
00450 offset = P_FIRSTDATAKEY(opaque);
00451 }
00452 }
00453
00454
00455
00456
00457
00458
00459 if (checkUnique == UNIQUE_CHECK_EXISTING && !found)
00460 ereport(ERROR,
00461 (errcode(ERRCODE_INTERNAL_ERROR),
00462 errmsg("failed to re-find tuple within index \"%s\"",
00463 RelationGetRelationName(rel)),
00464 errhint("This may be because of a non-immutable index expression."),
00465 errtableconstraint(heapRel,
00466 RelationGetRelationName(rel))));
00467
00468 if (nbuf != InvalidBuffer)
00469 _bt_relbuf(rel, nbuf);
00470
00471 return InvalidTransactionId;
00472 }
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504 static void
00505 _bt_findinsertloc(Relation rel,
00506 Buffer *bufptr,
00507 OffsetNumber *offsetptr,
00508 int keysz,
00509 ScanKey scankey,
00510 IndexTuple newtup,
00511 Relation heapRel)
00512 {
00513 Buffer buf = *bufptr;
00514 Page page = BufferGetPage(buf);
00515 Size itemsz;
00516 BTPageOpaque lpageop;
00517 bool movedright,
00518 vacuumed;
00519 OffsetNumber newitemoff;
00520 OffsetNumber firstlegaloff = *offsetptr;
00521
00522 lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
00523
00524 itemsz = IndexTupleDSize(*newtup);
00525 itemsz = MAXALIGN(itemsz);
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537 if (itemsz > BTMaxItemSize(page))
00538 ereport(ERROR,
00539 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00540 errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
00541 (unsigned long) itemsz,
00542 (unsigned long) BTMaxItemSize(page),
00543 RelationGetRelationName(rel)),
00544 errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
00545 "Consider a function index of an MD5 hash of the value, "
00546 "or use full text indexing."),
00547 errtableconstraint(heapRel,
00548 RelationGetRelationName(rel))));
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568 movedright = false;
00569 vacuumed = false;
00570 while (PageGetFreeSpace(page) < itemsz)
00571 {
00572 Buffer rbuf;
00573
00574
00575
00576
00577
00578 if (P_ISLEAF(lpageop) && P_HAS_GARBAGE(lpageop))
00579 {
00580 _bt_vacuum_one_page(rel, buf, heapRel);
00581
00582
00583
00584
00585
00586 vacuumed = true;
00587
00588 if (PageGetFreeSpace(page) >= itemsz)
00589 break;
00590 }
00591
00592
00593
00594
00595 if (P_RIGHTMOST(lpageop) ||
00596 _bt_compare(rel, keysz, scankey, page, P_HIKEY) != 0 ||
00597 random() <= (MAX_RANDOM_VALUE / 100))
00598 break;
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608 rbuf = InvalidBuffer;
00609
00610 for (;;)
00611 {
00612 BlockNumber rblkno = lpageop->btpo_next;
00613
00614 rbuf = _bt_relandgetbuf(rel, rbuf, rblkno, BT_WRITE);
00615 page = BufferGetPage(rbuf);
00616 lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
00617 if (!P_IGNORE(lpageop))
00618 break;
00619 if (P_RIGHTMOST(lpageop))
00620 elog(ERROR, "fell off the end of index \"%s\"",
00621 RelationGetRelationName(rel));
00622 }
00623 _bt_relbuf(rel, buf);
00624 buf = rbuf;
00625 movedright = true;
00626 vacuumed = false;
00627 }
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637 if (movedright)
00638 newitemoff = P_FIRSTDATAKEY(lpageop);
00639 else if (firstlegaloff != InvalidOffsetNumber && !vacuumed)
00640 newitemoff = firstlegaloff;
00641 else
00642 newitemoff = _bt_binsrch(rel, buf, keysz, scankey, false);
00643
00644 *bufptr = buf;
00645 *offsetptr = newitemoff;
00646 }
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677 static void
00678 _bt_insertonpg(Relation rel,
00679 Buffer buf,
00680 BTStack stack,
00681 IndexTuple itup,
00682 OffsetNumber newitemoff,
00683 bool split_only_page)
00684 {
00685 Page page;
00686 BTPageOpaque lpageop;
00687 OffsetNumber firstright = InvalidOffsetNumber;
00688 Size itemsz;
00689
00690 page = BufferGetPage(buf);
00691 lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
00692
00693 itemsz = IndexTupleDSize(*itup);
00694 itemsz = MAXALIGN(itemsz);
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704 if (PageGetFreeSpace(page) < itemsz)
00705 {
00706 bool is_root = P_ISROOT(lpageop);
00707 bool is_only = P_LEFTMOST(lpageop) && P_RIGHTMOST(lpageop);
00708 bool newitemonleft;
00709 Buffer rbuf;
00710
00711
00712 firstright = _bt_findsplitloc(rel, page,
00713 newitemoff, itemsz,
00714 &newitemonleft);
00715
00716
00717 rbuf = _bt_split(rel, buf, firstright,
00718 newitemoff, itemsz, itup, newitemonleft);
00719 PredicateLockPageSplit(rel,
00720 BufferGetBlockNumber(buf),
00721 BufferGetBlockNumber(rbuf));
00722
00723
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739 _bt_insert_parent(rel, buf, rbuf, stack, is_root, is_only);
00740 }
00741 else
00742 {
00743 Buffer metabuf = InvalidBuffer;
00744 Page metapg = NULL;
00745 BTMetaPageData *metad = NULL;
00746 OffsetNumber itup_off;
00747 BlockNumber itup_blkno;
00748
00749 itup_off = newitemoff;
00750 itup_blkno = BufferGetBlockNumber(buf);
00751
00752
00753
00754
00755
00756
00757
00758
00759 if (split_only_page)
00760 {
00761 Assert(!P_ISLEAF(lpageop));
00762
00763 metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
00764 metapg = BufferGetPage(metabuf);
00765 metad = BTPageGetMeta(metapg);
00766
00767 if (metad->btm_fastlevel >= lpageop->btpo.level)
00768 {
00769
00770 _bt_relbuf(rel, metabuf);
00771 metabuf = InvalidBuffer;
00772 }
00773 }
00774
00775
00776 START_CRIT_SECTION();
00777
00778 if (!_bt_pgaddtup(page, itemsz, itup, newitemoff))
00779 elog(PANIC, "failed to add new item to block %u in index \"%s\"",
00780 itup_blkno, RelationGetRelationName(rel));
00781
00782 MarkBufferDirty(buf);
00783
00784 if (BufferIsValid(metabuf))
00785 {
00786 metad->btm_fastroot = itup_blkno;
00787 metad->btm_fastlevel = lpageop->btpo.level;
00788 MarkBufferDirty(metabuf);
00789 }
00790
00791
00792 if (RelationNeedsWAL(rel))
00793 {
00794 xl_btree_insert xlrec;
00795 BlockNumber xldownlink;
00796 xl_btree_metadata xlmeta;
00797 uint8 xlinfo;
00798 XLogRecPtr recptr;
00799 XLogRecData rdata[4];
00800 XLogRecData *nextrdata;
00801 IndexTupleData trunctuple;
00802
00803 xlrec.target.node = rel->rd_node;
00804 ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off);
00805
00806 rdata[0].data = (char *) &xlrec;
00807 rdata[0].len = SizeOfBtreeInsert;
00808 rdata[0].buffer = InvalidBuffer;
00809 rdata[0].next = nextrdata = &(rdata[1]);
00810
00811 if (P_ISLEAF(lpageop))
00812 xlinfo = XLOG_BTREE_INSERT_LEAF;
00813 else
00814 {
00815 xldownlink = ItemPointerGetBlockNumber(&(itup->t_tid));
00816 Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
00817
00818 nextrdata->data = (char *) &xldownlink;
00819 nextrdata->len = sizeof(BlockNumber);
00820 nextrdata->buffer = InvalidBuffer;
00821 nextrdata->next = nextrdata + 1;
00822 nextrdata++;
00823
00824 xlinfo = XLOG_BTREE_INSERT_UPPER;
00825 }
00826
00827 if (BufferIsValid(metabuf))
00828 {
00829 xlmeta.root = metad->btm_root;
00830 xlmeta.level = metad->btm_level;
00831 xlmeta.fastroot = metad->btm_fastroot;
00832 xlmeta.fastlevel = metad->btm_fastlevel;
00833
00834 nextrdata->data = (char *) &xlmeta;
00835 nextrdata->len = sizeof(xl_btree_metadata);
00836 nextrdata->buffer = InvalidBuffer;
00837 nextrdata->next = nextrdata + 1;
00838 nextrdata++;
00839
00840 xlinfo = XLOG_BTREE_INSERT_META;
00841 }
00842
00843
00844 if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop))
00845 {
00846 trunctuple = *itup;
00847 trunctuple.t_info = sizeof(IndexTupleData);
00848 nextrdata->data = (char *) &trunctuple;
00849 nextrdata->len = sizeof(IndexTupleData);
00850 }
00851 else
00852 {
00853 nextrdata->data = (char *) itup;
00854 nextrdata->len = IndexTupleDSize(*itup);
00855 }
00856 nextrdata->buffer = buf;
00857 nextrdata->buffer_std = true;
00858 nextrdata->next = NULL;
00859
00860 recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
00861
00862 if (BufferIsValid(metabuf))
00863 {
00864 PageSetLSN(metapg, recptr);
00865 }
00866
00867 PageSetLSN(page, recptr);
00868 }
00869
00870 END_CRIT_SECTION();
00871
00872
00873 if (BufferIsValid(metabuf))
00874 {
00875 if (!InRecovery)
00876 CacheInvalidateRelcache(rel);
00877 _bt_relbuf(rel, metabuf);
00878 }
00879
00880 _bt_relbuf(rel, buf);
00881 }
00882 }
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895 static Buffer
00896 _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
00897 OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem,
00898 bool newitemonleft)
00899 {
00900 Buffer rbuf;
00901 Page origpage;
00902 Page leftpage,
00903 rightpage;
00904 BlockNumber origpagenumber,
00905 rightpagenumber;
00906 BTPageOpaque ropaque,
00907 lopaque,
00908 oopaque;
00909 Buffer sbuf = InvalidBuffer;
00910 Page spage = NULL;
00911 BTPageOpaque sopaque = NULL;
00912 Size itemsz;
00913 ItemId itemid;
00914 IndexTuple item;
00915 OffsetNumber leftoff,
00916 rightoff;
00917 OffsetNumber maxoff;
00918 OffsetNumber i;
00919 bool isroot;
00920
00921
00922 rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935 origpage = BufferGetPage(buf);
00936 leftpage = PageGetTempPage(origpage);
00937 rightpage = BufferGetPage(rbuf);
00938
00939 origpagenumber = BufferGetBlockNumber(buf);
00940 rightpagenumber = BufferGetBlockNumber(rbuf);
00941
00942 _bt_pageinit(leftpage, BufferGetPageSize(buf));
00943
00944
00945
00946
00947
00948
00949
00950 PageSetLSN(leftpage, PageGetLSN(origpage));
00951
00952
00953 oopaque = (BTPageOpaque) PageGetSpecialPointer(origpage);
00954 lopaque = (BTPageOpaque) PageGetSpecialPointer(leftpage);
00955 ropaque = (BTPageOpaque) PageGetSpecialPointer(rightpage);
00956
00957 isroot = P_ISROOT(oopaque);
00958
00959
00960
00961 lopaque->btpo_flags = oopaque->btpo_flags;
00962 lopaque->btpo_flags &= ~(BTP_ROOT | BTP_SPLIT_END | BTP_HAS_GARBAGE);
00963 ropaque->btpo_flags = lopaque->btpo_flags;
00964 lopaque->btpo_prev = oopaque->btpo_prev;
00965 lopaque->btpo_next = rightpagenumber;
00966 ropaque->btpo_prev = origpagenumber;
00967 ropaque->btpo_next = oopaque->btpo_next;
00968 lopaque->btpo.level = ropaque->btpo.level = oopaque->btpo.level;
00969
00970 lopaque->btpo_cycleid = _bt_vacuum_cycleid(rel);
00971 ropaque->btpo_cycleid = lopaque->btpo_cycleid;
00972
00973
00974
00975
00976
00977
00978
00979
00980 rightoff = P_HIKEY;
00981
00982 if (!P_RIGHTMOST(oopaque))
00983 {
00984 itemid = PageGetItemId(origpage, P_HIKEY);
00985 itemsz = ItemIdGetLength(itemid);
00986 item = (IndexTuple) PageGetItem(origpage, itemid);
00987 if (PageAddItem(rightpage, (Item) item, itemsz, rightoff,
00988 false, false) == InvalidOffsetNumber)
00989 {
00990 memset(rightpage, 0, BufferGetPageSize(rbuf));
00991 elog(ERROR, "failed to add hikey to the right sibling"
00992 " while splitting block %u of index \"%s\"",
00993 origpagenumber, RelationGetRelationName(rel));
00994 }
00995 rightoff = OffsetNumberNext(rightoff);
00996 }
00997
00998
00999
01000
01001
01002
01003 leftoff = P_HIKEY;
01004 if (!newitemonleft && newitemoff == firstright)
01005 {
01006
01007 itemsz = newitemsz;
01008 item = newitem;
01009 }
01010 else
01011 {
01012
01013 itemid = PageGetItemId(origpage, firstright);
01014 itemsz = ItemIdGetLength(itemid);
01015 item = (IndexTuple) PageGetItem(origpage, itemid);
01016 }
01017 if (PageAddItem(leftpage, (Item) item, itemsz, leftoff,
01018 false, false) == InvalidOffsetNumber)
01019 {
01020 memset(rightpage, 0, BufferGetPageSize(rbuf));
01021 elog(ERROR, "failed to add hikey to the left sibling"
01022 " while splitting block %u of index \"%s\"",
01023 origpagenumber, RelationGetRelationName(rel));
01024 }
01025 leftoff = OffsetNumberNext(leftoff);
01026
01027
01028
01029
01030
01031
01032
01033 maxoff = PageGetMaxOffsetNumber(origpage);
01034
01035 for (i = P_FIRSTDATAKEY(oopaque); i <= maxoff; i = OffsetNumberNext(i))
01036 {
01037 itemid = PageGetItemId(origpage, i);
01038 itemsz = ItemIdGetLength(itemid);
01039 item = (IndexTuple) PageGetItem(origpage, itemid);
01040
01041
01042 if (i == newitemoff)
01043 {
01044 if (newitemonleft)
01045 {
01046 if (!_bt_pgaddtup(leftpage, newitemsz, newitem, leftoff))
01047 {
01048 memset(rightpage, 0, BufferGetPageSize(rbuf));
01049 elog(ERROR, "failed to add new item to the left sibling"
01050 " while splitting block %u of index \"%s\"",
01051 origpagenumber, RelationGetRelationName(rel));
01052 }
01053 leftoff = OffsetNumberNext(leftoff);
01054 }
01055 else
01056 {
01057 if (!_bt_pgaddtup(rightpage, newitemsz, newitem, rightoff))
01058 {
01059 memset(rightpage, 0, BufferGetPageSize(rbuf));
01060 elog(ERROR, "failed to add new item to the right sibling"
01061 " while splitting block %u of index \"%s\"",
01062 origpagenumber, RelationGetRelationName(rel));
01063 }
01064 rightoff = OffsetNumberNext(rightoff);
01065 }
01066 }
01067
01068
01069 if (i < firstright)
01070 {
01071 if (!_bt_pgaddtup(leftpage, itemsz, item, leftoff))
01072 {
01073 memset(rightpage, 0, BufferGetPageSize(rbuf));
01074 elog(ERROR, "failed to add old item to the left sibling"
01075 " while splitting block %u of index \"%s\"",
01076 origpagenumber, RelationGetRelationName(rel));
01077 }
01078 leftoff = OffsetNumberNext(leftoff);
01079 }
01080 else
01081 {
01082 if (!_bt_pgaddtup(rightpage, itemsz, item, rightoff))
01083 {
01084 memset(rightpage, 0, BufferGetPageSize(rbuf));
01085 elog(ERROR, "failed to add old item to the right sibling"
01086 " while splitting block %u of index \"%s\"",
01087 origpagenumber, RelationGetRelationName(rel));
01088 }
01089 rightoff = OffsetNumberNext(rightoff);
01090 }
01091 }
01092
01093
01094 if (i <= newitemoff)
01095 {
01096
01097
01098
01099
01100
01101 Assert(!newitemonleft);
01102 if (!_bt_pgaddtup(rightpage, newitemsz, newitem, rightoff))
01103 {
01104 memset(rightpage, 0, BufferGetPageSize(rbuf));
01105 elog(ERROR, "failed to add new item to the right sibling"
01106 " while splitting block %u of index \"%s\"",
01107 origpagenumber, RelationGetRelationName(rel));
01108 }
01109 rightoff = OffsetNumberNext(rightoff);
01110 }
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120 if (!P_RIGHTMOST(oopaque))
01121 {
01122 sbuf = _bt_getbuf(rel, oopaque->btpo_next, BT_WRITE);
01123 spage = BufferGetPage(sbuf);
01124 sopaque = (BTPageOpaque) PageGetSpecialPointer(spage);
01125 if (sopaque->btpo_prev != origpagenumber)
01126 {
01127 memset(rightpage, 0, BufferGetPageSize(rbuf));
01128 elog(ERROR, "right sibling's left-link doesn't match: "
01129 "block %u links to %u instead of expected %u in index \"%s\"",
01130 oopaque->btpo_next, sopaque->btpo_prev, origpagenumber,
01131 RelationGetRelationName(rel));
01132 }
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144
01145
01146
01147 if (sopaque->btpo_cycleid != ropaque->btpo_cycleid)
01148 ropaque->btpo_flags |= BTP_SPLIT_END;
01149 }
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159 START_CRIT_SECTION();
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175 PageRestoreTempPage(leftpage, origpage);
01176
01177
01178 MarkBufferDirty(buf);
01179 MarkBufferDirty(rbuf);
01180
01181 if (!P_RIGHTMOST(ropaque))
01182 {
01183 sopaque->btpo_prev = rightpagenumber;
01184 MarkBufferDirty(sbuf);
01185 }
01186
01187
01188 if (RelationNeedsWAL(rel))
01189 {
01190 xl_btree_split xlrec;
01191 uint8 xlinfo;
01192 XLogRecPtr recptr;
01193 XLogRecData rdata[7];
01194 XLogRecData *lastrdata;
01195
01196 xlrec.node = rel->rd_node;
01197 xlrec.leftsib = origpagenumber;
01198 xlrec.rightsib = rightpagenumber;
01199 xlrec.rnext = ropaque->btpo_next;
01200 xlrec.level = ropaque->btpo.level;
01201 xlrec.firstright = firstright;
01202
01203 rdata[0].data = (char *) &xlrec;
01204 rdata[0].len = SizeOfBtreeSplit;
01205 rdata[0].buffer = InvalidBuffer;
01206
01207 lastrdata = &rdata[0];
01208
01209 if (ropaque->btpo.level > 0)
01210 {
01211
01212 lastrdata->next = lastrdata + 1;
01213 lastrdata++;
01214
01215 lastrdata->data = (char *) &newitem->t_tid.ip_blkid;
01216 lastrdata->len = sizeof(BlockIdData);
01217 lastrdata->buffer = InvalidBuffer;
01218
01219
01220
01221
01222
01223
01224
01225
01226 lastrdata->next = lastrdata + 1;
01227 lastrdata++;
01228
01229 itemid = PageGetItemId(origpage, P_HIKEY);
01230 item = (IndexTuple) PageGetItem(origpage, itemid);
01231 lastrdata->data = (char *) item;
01232 lastrdata->len = MAXALIGN(IndexTupleSize(item));
01233 lastrdata->buffer = buf;
01234 lastrdata->buffer_std = true;
01235 }
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246 if (newitemonleft)
01247 {
01248 lastrdata->next = lastrdata + 1;
01249 lastrdata++;
01250
01251 lastrdata->data = (char *) &newitemoff;
01252 lastrdata->len = sizeof(OffsetNumber);
01253 lastrdata->buffer = InvalidBuffer;
01254
01255 lastrdata->next = lastrdata + 1;
01256 lastrdata++;
01257
01258 lastrdata->data = (char *) newitem;
01259 lastrdata->len = MAXALIGN(newitemsz);
01260 lastrdata->buffer = buf;
01261 lastrdata->buffer_std = true;
01262 }
01263 else if (ropaque->btpo.level == 0)
01264 {
01265
01266
01267
01268
01269
01270
01271 lastrdata->next = lastrdata + 1;
01272 lastrdata++;
01273
01274 lastrdata->data = NULL;
01275 lastrdata->len = 0;
01276 lastrdata->buffer = buf;
01277 lastrdata->buffer_std = true;
01278 }
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292 lastrdata->next = lastrdata + 1;
01293 lastrdata++;
01294
01295 lastrdata->data = (char *) rightpage +
01296 ((PageHeader) rightpage)->pd_upper;
01297 lastrdata->len = ((PageHeader) rightpage)->pd_special -
01298 ((PageHeader) rightpage)->pd_upper;
01299 lastrdata->buffer = InvalidBuffer;
01300
01301
01302 if (!P_RIGHTMOST(ropaque))
01303 {
01304 lastrdata->next = lastrdata + 1;
01305 lastrdata++;
01306
01307 lastrdata->data = NULL;
01308 lastrdata->len = 0;
01309 lastrdata->buffer = sbuf;
01310 lastrdata->buffer_std = true;
01311 }
01312
01313 lastrdata->next = NULL;
01314
01315 if (isroot)
01316 xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT;
01317 else
01318 xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R;
01319
01320 recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
01321
01322 PageSetLSN(origpage, recptr);
01323 PageSetLSN(rightpage, recptr);
01324 if (!P_RIGHTMOST(ropaque))
01325 {
01326 PageSetLSN(spage, recptr);
01327 }
01328 }
01329
01330 END_CRIT_SECTION();
01331
01332
01333 if (!P_RIGHTMOST(ropaque))
01334 _bt_relbuf(rel, sbuf);
01335
01336
01337 return rbuf;
01338 }
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365 static OffsetNumber
01366 _bt_findsplitloc(Relation rel,
01367 Page page,
01368 OffsetNumber newitemoff,
01369 Size newitemsz,
01370 bool *newitemonleft)
01371 {
01372 BTPageOpaque opaque;
01373 OffsetNumber offnum;
01374 OffsetNumber maxoff;
01375 ItemId itemid;
01376 FindSplitData state;
01377 int leftspace,
01378 rightspace,
01379 goodenough,
01380 olddataitemstotal,
01381 olddataitemstoleft;
01382 bool goodenoughfound;
01383
01384 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01385
01386
01387 newitemsz += sizeof(ItemIdData);
01388
01389
01390 leftspace = rightspace =
01391 PageGetPageSize(page) - SizeOfPageHeaderData -
01392 MAXALIGN(sizeof(BTPageOpaqueData));
01393
01394
01395 if (!P_RIGHTMOST(opaque))
01396 {
01397 itemid = PageGetItemId(page, P_HIKEY);
01398 rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
01399 sizeof(ItemIdData));
01400 }
01401
01402
01403 olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(page);
01404
01405 state.newitemsz = newitemsz;
01406 state.is_leaf = P_ISLEAF(opaque);
01407 state.is_rightmost = P_RIGHTMOST(opaque);
01408 state.have_split = false;
01409 if (state.is_leaf)
01410 state.fillfactor = RelationGetFillFactor(rel,
01411 BTREE_DEFAULT_FILLFACTOR);
01412 else
01413 state.fillfactor = BTREE_NONLEAF_FILLFACTOR;
01414 state.newitemonleft = false;
01415 state.firstright = 0;
01416 state.best_delta = 0;
01417 state.leftspace = leftspace;
01418 state.rightspace = rightspace;
01419 state.olddataitemstotal = olddataitemstotal;
01420 state.newitemoff = newitemoff;
01421
01422
01423
01424
01425
01426
01427
01428
01429
01430
01431 goodenough = leftspace / 16;
01432
01433
01434
01435
01436
01437 olddataitemstoleft = 0;
01438 goodenoughfound = false;
01439 maxoff = PageGetMaxOffsetNumber(page);
01440
01441 for (offnum = P_FIRSTDATAKEY(opaque);
01442 offnum <= maxoff;
01443 offnum = OffsetNumberNext(offnum))
01444 {
01445 Size itemsz;
01446
01447 itemid = PageGetItemId(page, offnum);
01448 itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
01449
01450
01451
01452
01453 if (offnum > newitemoff)
01454 _bt_checksplitloc(&state, offnum, true,
01455 olddataitemstoleft, itemsz);
01456
01457 else if (offnum < newitemoff)
01458 _bt_checksplitloc(&state, offnum, false,
01459 olddataitemstoleft, itemsz);
01460 else
01461 {
01462
01463 _bt_checksplitloc(&state, offnum, true,
01464 olddataitemstoleft, itemsz);
01465
01466 _bt_checksplitloc(&state, offnum, false,
01467 olddataitemstoleft, itemsz);
01468 }
01469
01470
01471 if (state.have_split && state.best_delta <= goodenough)
01472 {
01473 goodenoughfound = true;
01474 break;
01475 }
01476
01477 olddataitemstoleft += itemsz;
01478 }
01479
01480
01481
01482
01483
01484
01485 if (newitemoff > maxoff && !goodenoughfound)
01486 _bt_checksplitloc(&state, newitemoff, false, olddataitemstotal, 0);
01487
01488
01489
01490
01491
01492 if (!state.have_split)
01493 elog(ERROR, "could not find a feasible split point for index \"%s\"",
01494 RelationGetRelationName(rel));
01495
01496 *newitemonleft = state.newitemonleft;
01497 return state.firstright;
01498 }
01499
01500
01501
01502
01503
01504
01505
01506
01507
01508
01509
01510
01511
01512
01513 static void
01514 _bt_checksplitloc(FindSplitData *state,
01515 OffsetNumber firstoldonright,
01516 bool newitemonleft,
01517 int olddataitemstoleft,
01518 Size firstoldonrightsz)
01519 {
01520 int leftfree,
01521 rightfree;
01522 Size firstrightitemsz;
01523 bool newitemisfirstonright;
01524
01525
01526 newitemisfirstonright = (firstoldonright == state->newitemoff
01527 && !newitemonleft);
01528
01529 if (newitemisfirstonright)
01530 firstrightitemsz = state->newitemsz;
01531 else
01532 firstrightitemsz = firstoldonrightsz;
01533
01534
01535 leftfree = state->leftspace - olddataitemstoleft;
01536 rightfree = state->rightspace -
01537 (state->olddataitemstotal - olddataitemstoleft);
01538
01539
01540
01541
01542
01543 leftfree -= firstrightitemsz;
01544
01545
01546 if (newitemonleft)
01547 leftfree -= (int) state->newitemsz;
01548 else
01549 rightfree -= (int) state->newitemsz;
01550
01551
01552
01553
01554
01555 if (!state->is_leaf)
01556 rightfree += (int) firstrightitemsz -
01557 (int) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
01558
01559
01560
01561
01562 if (leftfree >= 0 && rightfree >= 0)
01563 {
01564 int delta;
01565
01566 if (state->is_rightmost)
01567 {
01568
01569
01570
01571
01572 delta = (state->fillfactor * leftfree)
01573 - ((100 - state->fillfactor) * rightfree);
01574 }
01575 else
01576 {
01577
01578 delta = leftfree - rightfree;
01579 }
01580
01581 if (delta < 0)
01582 delta = -delta;
01583 if (!state->have_split || delta < state->best_delta)
01584 {
01585 state->have_split = true;
01586 state->newitemonleft = newitemonleft;
01587 state->firstright = firstoldonright;
01588 state->best_delta = delta;
01589 }
01590 }
01591 }
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604
01605
01606
01607
01608
01609 void
01610 _bt_insert_parent(Relation rel,
01611 Buffer buf,
01612 Buffer rbuf,
01613 BTStack stack,
01614 bool is_root,
01615 bool is_only)
01616 {
01617
01618
01619
01620
01621
01622
01623
01624
01625
01626
01627
01628
01629
01630
01631 if (is_root)
01632 {
01633 Buffer rootbuf;
01634
01635 Assert(stack == NULL);
01636 Assert(is_only);
01637
01638 rootbuf = _bt_newroot(rel, buf, rbuf);
01639
01640 _bt_relbuf(rel, rootbuf);
01641 _bt_relbuf(rel, rbuf);
01642 _bt_relbuf(rel, buf);
01643 }
01644 else
01645 {
01646 BlockNumber bknum = BufferGetBlockNumber(buf);
01647 BlockNumber rbknum = BufferGetBlockNumber(rbuf);
01648 Page page = BufferGetPage(buf);
01649 IndexTuple new_item;
01650 BTStackData fakestack;
01651 IndexTuple ritem;
01652 Buffer pbuf;
01653
01654 if (stack == NULL)
01655 {
01656 BTPageOpaque lpageop;
01657
01658 if (!InRecovery)
01659 elog(DEBUG2, "concurrent ROOT page split");
01660 lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
01661
01662 pbuf = _bt_get_endpoint(rel, lpageop->btpo.level + 1, false);
01663
01664 stack = &fakestack;
01665 stack->bts_blkno = BufferGetBlockNumber(pbuf);
01666 stack->bts_offset = InvalidOffsetNumber;
01667
01668 stack->bts_parent = NULL;
01669 _bt_relbuf(rel, pbuf);
01670 }
01671
01672
01673 ritem = (IndexTuple) PageGetItem(page,
01674 PageGetItemId(page, P_HIKEY));
01675
01676
01677 new_item = CopyIndexTuple(ritem);
01678 ItemPointerSet(&(new_item->t_tid), rbknum, P_HIKEY);
01679
01680
01681
01682
01683
01684
01685
01686
01687 ItemPointerSet(&(stack->bts_btentry.t_tid), bknum, P_HIKEY);
01688
01689 pbuf = _bt_getstackbuf(rel, stack, BT_WRITE);
01690
01691
01692 _bt_relbuf(rel, rbuf);
01693 _bt_relbuf(rel, buf);
01694
01695
01696 if (pbuf == InvalidBuffer)
01697 elog(ERROR, "failed to re-find parent key in index \"%s\" for split pages %u/%u",
01698 RelationGetRelationName(rel), bknum, rbknum);
01699
01700
01701 _bt_insertonpg(rel, pbuf, stack->bts_parent,
01702 new_item, stack->bts_offset + 1,
01703 is_only);
01704
01705
01706 pfree(new_item);
01707 }
01708 }
01709
01710
01711
01712
01713
01714
01715
01716
01717
01718
01719
01720
01721
01722
01723 Buffer
01724 _bt_getstackbuf(Relation rel, BTStack stack, int access)
01725 {
01726 BlockNumber blkno;
01727 OffsetNumber start;
01728
01729 blkno = stack->bts_blkno;
01730 start = stack->bts_offset;
01731
01732 for (;;)
01733 {
01734 Buffer buf;
01735 Page page;
01736 BTPageOpaque opaque;
01737
01738 buf = _bt_getbuf(rel, blkno, access);
01739 page = BufferGetPage(buf);
01740 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01741
01742 if (!P_IGNORE(opaque))
01743 {
01744 OffsetNumber offnum,
01745 minoff,
01746 maxoff;
01747 ItemId itemid;
01748 IndexTuple item;
01749
01750 minoff = P_FIRSTDATAKEY(opaque);
01751 maxoff = PageGetMaxOffsetNumber(page);
01752
01753
01754
01755
01756
01757
01758 if (start < minoff)
01759 start = minoff;
01760
01761
01762
01763
01764
01765 if (start > maxoff)
01766 start = OffsetNumberNext(maxoff);
01767
01768
01769
01770
01771
01772
01773 for (offnum = start;
01774 offnum <= maxoff;
01775 offnum = OffsetNumberNext(offnum))
01776 {
01777 itemid = PageGetItemId(page, offnum);
01778 item = (IndexTuple) PageGetItem(page, itemid);
01779 if (BTEntrySame(item, &stack->bts_btentry))
01780 {
01781
01782 stack->bts_blkno = blkno;
01783 stack->bts_offset = offnum;
01784 return buf;
01785 }
01786 }
01787
01788 for (offnum = OffsetNumberPrev(start);
01789 offnum >= minoff;
01790 offnum = OffsetNumberPrev(offnum))
01791 {
01792 itemid = PageGetItemId(page, offnum);
01793 item = (IndexTuple) PageGetItem(page, itemid);
01794 if (BTEntrySame(item, &stack->bts_btentry))
01795 {
01796
01797 stack->bts_blkno = blkno;
01798 stack->bts_offset = offnum;
01799 return buf;
01800 }
01801 }
01802 }
01803
01804
01805
01806
01807 if (P_RIGHTMOST(opaque))
01808 {
01809 _bt_relbuf(rel, buf);
01810 return InvalidBuffer;
01811 }
01812 blkno = opaque->btpo_next;
01813 start = InvalidOffsetNumber;
01814 _bt_relbuf(rel, buf);
01815 }
01816 }
01817
01818
01819
01820
01821
01822
01823
01824
01825
01826
01827
01828
01829
01830
01831
01832
01833
01834
01835
01836 static Buffer
01837 _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
01838 {
01839 Buffer rootbuf;
01840 Page lpage,
01841 rootpage;
01842 BlockNumber lbkno,
01843 rbkno;
01844 BlockNumber rootblknum;
01845 BTPageOpaque rootopaque;
01846 ItemId itemid;
01847 IndexTuple item;
01848 Size itemsz;
01849 IndexTuple new_item;
01850 Buffer metabuf;
01851 Page metapg;
01852 BTMetaPageData *metad;
01853
01854 lbkno = BufferGetBlockNumber(lbuf);
01855 rbkno = BufferGetBlockNumber(rbuf);
01856 lpage = BufferGetPage(lbuf);
01857
01858
01859 rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
01860 rootpage = BufferGetPage(rootbuf);
01861 rootblknum = BufferGetBlockNumber(rootbuf);
01862
01863
01864 metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
01865 metapg = BufferGetPage(metabuf);
01866 metad = BTPageGetMeta(metapg);
01867
01868
01869 START_CRIT_SECTION();
01870
01871
01872 rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
01873 rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE;
01874 rootopaque->btpo_flags = BTP_ROOT;
01875 rootopaque->btpo.level =
01876 ((BTPageOpaque) PageGetSpecialPointer(lpage))->btpo.level + 1;
01877 rootopaque->btpo_cycleid = 0;
01878
01879
01880 metad->btm_root = rootblknum;
01881 metad->btm_level = rootopaque->btpo.level;
01882 metad->btm_fastroot = rootblknum;
01883 metad->btm_fastlevel = rootopaque->btpo.level;
01884
01885
01886
01887
01888
01889
01890 itemsz = sizeof(IndexTupleData);
01891 new_item = (IndexTuple) palloc(itemsz);
01892 new_item->t_info = itemsz;
01893 ItemPointerSet(&(new_item->t_tid), lbkno, P_HIKEY);
01894
01895
01896
01897
01898
01899
01900
01901
01902
01903 if (PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY,
01904 false, false) == InvalidOffsetNumber)
01905 elog(PANIC, "failed to add leftkey to new root page"
01906 " while splitting block %u of index \"%s\"",
01907 BufferGetBlockNumber(lbuf), RelationGetRelationName(rel));
01908 pfree(new_item);
01909
01910
01911
01912
01913
01914 itemid = PageGetItemId(lpage, P_HIKEY);
01915 itemsz = ItemIdGetLength(itemid);
01916 item = (IndexTuple) PageGetItem(lpage, itemid);
01917 new_item = CopyIndexTuple(item);
01918 ItemPointerSet(&(new_item->t_tid), rbkno, P_HIKEY);
01919
01920
01921
01922
01923 if (PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY,
01924 false, false) == InvalidOffsetNumber)
01925 elog(PANIC, "failed to add rightkey to new root page"
01926 " while splitting block %u of index \"%s\"",
01927 BufferGetBlockNumber(lbuf), RelationGetRelationName(rel));
01928 pfree(new_item);
01929
01930 MarkBufferDirty(rootbuf);
01931 MarkBufferDirty(metabuf);
01932
01933
01934 if (RelationNeedsWAL(rel))
01935 {
01936 xl_btree_newroot xlrec;
01937 XLogRecPtr recptr;
01938 XLogRecData rdata[2];
01939
01940 xlrec.node = rel->rd_node;
01941 xlrec.rootblk = rootblknum;
01942 xlrec.level = metad->btm_level;
01943
01944 rdata[0].data = (char *) &xlrec;
01945 rdata[0].len = SizeOfBtreeNewroot;
01946 rdata[0].buffer = InvalidBuffer;
01947 rdata[0].next = &(rdata[1]);
01948
01949
01950
01951
01952
01953 rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper;
01954 rdata[1].len = ((PageHeader) rootpage)->pd_special -
01955 ((PageHeader) rootpage)->pd_upper;
01956 rdata[1].buffer = InvalidBuffer;
01957 rdata[1].next = NULL;
01958
01959 recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);
01960
01961 PageSetLSN(rootpage, recptr);
01962 PageSetLSN(metapg, recptr);
01963 }
01964
01965 END_CRIT_SECTION();
01966
01967
01968 if (!InRecovery)
01969 CacheInvalidateRelcache(rel);
01970
01971
01972 _bt_relbuf(rel, metabuf);
01973
01974 return rootbuf;
01975 }
01976
01977
01978
01979
01980
01981
01982
01983
01984
01985
01986
01987
01988
01989
01990
01991
01992 static bool
01993 _bt_pgaddtup(Page page,
01994 Size itemsize,
01995 IndexTuple itup,
01996 OffsetNumber itup_off)
01997 {
01998 BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01999 IndexTupleData trunctuple;
02000
02001 if (!P_ISLEAF(opaque) && itup_off == P_FIRSTDATAKEY(opaque))
02002 {
02003 trunctuple = *itup;
02004 trunctuple.t_info = sizeof(IndexTupleData);
02005 itup = &trunctuple;
02006 itemsize = sizeof(IndexTupleData);
02007 }
02008
02009 if (PageAddItem(page, (Item) itup, itemsize, itup_off,
02010 false, false) == InvalidOffsetNumber)
02011 return false;
02012
02013 return true;
02014 }
02015
02016
02017
02018
02019
02020
02021
02022 static bool
02023 _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
02024 int keysz, ScanKey scankey)
02025 {
02026 IndexTuple itup;
02027 int i;
02028
02029
02030 Assert(P_ISLEAF((BTPageOpaque) PageGetSpecialPointer(page)));
02031
02032 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
02033
02034 for (i = 1; i <= keysz; i++)
02035 {
02036 AttrNumber attno;
02037 Datum datum;
02038 bool isNull;
02039 int32 result;
02040
02041 attno = scankey->sk_attno;
02042 Assert(attno == i);
02043 datum = index_getattr(itup, attno, itupdesc, &isNull);
02044
02045
02046 if (isNull || (scankey->sk_flags & SK_ISNULL))
02047 return false;
02048
02049 result = DatumGetInt32(FunctionCall2Coll(&scankey->sk_func,
02050 scankey->sk_collation,
02051 datum,
02052 scankey->sk_argument));
02053
02054 if (result != 0)
02055 return false;
02056
02057 scankey++;
02058 }
02059
02060
02061 return true;
02062 }
02063
02064
02065
02066
02067
02068
02069
02070
02071 static void
02072 _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel)
02073 {
02074 OffsetNumber deletable[MaxOffsetNumber];
02075 int ndeletable = 0;
02076 OffsetNumber offnum,
02077 minoff,
02078 maxoff;
02079 Page page = BufferGetPage(buffer);
02080 BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
02081
02082
02083
02084
02085
02086 minoff = P_FIRSTDATAKEY(opaque);
02087 maxoff = PageGetMaxOffsetNumber(page);
02088 for (offnum = minoff;
02089 offnum <= maxoff;
02090 offnum = OffsetNumberNext(offnum))
02091 {
02092 ItemId itemId = PageGetItemId(page, offnum);
02093
02094 if (ItemIdIsDead(itemId))
02095 deletable[ndeletable++] = offnum;
02096 }
02097
02098 if (ndeletable > 0)
02099 _bt_delitems_delete(rel, buffer, deletable, ndeletable, heapRel);
02100
02101
02102
02103
02104
02105
02106
02107 }