00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include "postgres.h"
00017
00018 #include "access/nbtree.h"
00019 #include "access/relscan.h"
00020 #include "miscadmin.h"
00021 #include "pgstat.h"
00022 #include "storage/predicate.h"
00023 #include "utils/lsyscache.h"
00024 #include "utils/rel.h"
00025
00026
00027 static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir,
00028 OffsetNumber offnum);
00029 static void _bt_saveitem(BTScanOpaque so, int itemIndex,
00030 OffsetNumber offnum, IndexTuple itup);
00031 static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
00032 static Buffer _bt_walk_left(Relation rel, Buffer buf);
00033 static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056 BTStack
00057 _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
00058 Buffer *bufP, int access)
00059 {
00060 BTStack stack_in = NULL;
00061
00062
00063 *bufP = _bt_getroot(rel, access);
00064
00065
00066 if (!BufferIsValid(*bufP))
00067 return (BTStack) NULL;
00068
00069
00070 for (;;)
00071 {
00072 Page page;
00073 BTPageOpaque opaque;
00074 OffsetNumber offnum;
00075 ItemId itemid;
00076 IndexTuple itup;
00077 BlockNumber blkno;
00078 BlockNumber par_blkno;
00079 BTStack new_stack;
00080
00081
00082
00083
00084
00085
00086 *bufP = _bt_moveright(rel, *bufP, keysz, scankey, nextkey, BT_READ);
00087
00088
00089 page = BufferGetPage(*bufP);
00090 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
00091 if (P_ISLEAF(opaque))
00092 break;
00093
00094
00095
00096
00097
00098 offnum = _bt_binsrch(rel, *bufP, keysz, scankey, nextkey);
00099 itemid = PageGetItemId(page, offnum);
00100 itup = (IndexTuple) PageGetItem(page, itemid);
00101 blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
00102 par_blkno = BufferGetBlockNumber(*bufP);
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114 new_stack = (BTStack) palloc(sizeof(BTStackData));
00115 new_stack->bts_blkno = par_blkno;
00116 new_stack->bts_offset = offnum;
00117 memcpy(&new_stack->bts_btentry, itup, sizeof(IndexTupleData));
00118 new_stack->bts_parent = stack_in;
00119
00120
00121 *bufP = _bt_relandgetbuf(rel, *bufP, blkno, BT_READ);
00122
00123
00124 stack_in = new_stack;
00125 }
00126
00127 return stack_in;
00128 }
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155 Buffer
00156 _bt_moveright(Relation rel,
00157 Buffer buf,
00158 int keysz,
00159 ScanKey scankey,
00160 bool nextkey,
00161 int access)
00162 {
00163 Page page;
00164 BTPageOpaque opaque;
00165 int32 cmpval;
00166
00167 page = BufferGetPage(buf);
00168 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185 cmpval = nextkey ? 0 : 1;
00186
00187 while (!P_RIGHTMOST(opaque) &&
00188 (P_IGNORE(opaque) ||
00189 _bt_compare(rel, keysz, scankey, page, P_HIKEY) >= cmpval))
00190 {
00191
00192 BlockNumber rblkno = opaque->btpo_next;
00193
00194 buf = _bt_relandgetbuf(rel, buf, rblkno, access);
00195 page = BufferGetPage(buf);
00196 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
00197 }
00198
00199 if (P_IGNORE(opaque))
00200 elog(ERROR, "fell off the end of index \"%s\"",
00201 RelationGetRelationName(rel));
00202
00203 return buf;
00204 }
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233 OffsetNumber
00234 _bt_binsrch(Relation rel,
00235 Buffer buf,
00236 int keysz,
00237 ScanKey scankey,
00238 bool nextkey)
00239 {
00240 Page page;
00241 BTPageOpaque opaque;
00242 OffsetNumber low,
00243 high;
00244 int32 result,
00245 cmpval;
00246
00247 page = BufferGetPage(buf);
00248 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
00249
00250 low = P_FIRSTDATAKEY(opaque);
00251 high = PageGetMaxOffsetNumber(page);
00252
00253
00254
00255
00256
00257
00258
00259
00260 if (high < low)
00261 return low;
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275 high++;
00276
00277 cmpval = nextkey ? 0 : 1;
00278
00279 while (high > low)
00280 {
00281 OffsetNumber mid = low + ((high - low) / 2);
00282
00283
00284
00285 result = _bt_compare(rel, keysz, scankey, page, mid);
00286
00287 if (result >= cmpval)
00288 low = mid + 1;
00289 else
00290 high = mid;
00291 }
00292
00293
00294
00295
00296
00297
00298
00299
00300 if (P_ISLEAF(opaque))
00301 return low;
00302
00303
00304
00305
00306
00307 Assert(low > P_FIRSTDATAKEY(opaque));
00308
00309 return OffsetNumberPrev(low);
00310 }
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338 int32
00339 _bt_compare(Relation rel,
00340 int keysz,
00341 ScanKey scankey,
00342 Page page,
00343 OffsetNumber offnum)
00344 {
00345 TupleDesc itupdesc = RelationGetDescr(rel);
00346 BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
00347 IndexTuple itup;
00348 int i;
00349
00350
00351
00352
00353
00354 if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque))
00355 return 1;
00356
00357 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371 for (i = 1; i <= keysz; i++)
00372 {
00373 Datum datum;
00374 bool isNull;
00375 int32 result;
00376
00377 datum = index_getattr(itup, scankey->sk_attno, itupdesc, &isNull);
00378
00379
00380 if (scankey->sk_flags & SK_ISNULL)
00381 {
00382 if (isNull)
00383 result = 0;
00384 else if (scankey->sk_flags & SK_BT_NULLS_FIRST)
00385 result = -1;
00386 else
00387 result = 1;
00388 }
00389 else if (isNull)
00390 {
00391 if (scankey->sk_flags & SK_BT_NULLS_FIRST)
00392 result = 1;
00393 else
00394 result = -1;
00395 }
00396 else
00397 {
00398
00399
00400
00401
00402
00403
00404
00405
00406 result = DatumGetInt32(FunctionCall2Coll(&scankey->sk_func,
00407 scankey->sk_collation,
00408 datum,
00409 scankey->sk_argument));
00410
00411 if (!(scankey->sk_flags & SK_BT_DESC))
00412 result = -result;
00413 }
00414
00415
00416 if (result != 0)
00417 return result;
00418
00419 scankey++;
00420 }
00421
00422
00423 return 0;
00424 }
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446 bool
00447 _bt_first(IndexScanDesc scan, ScanDirection dir)
00448 {
00449 Relation rel = scan->indexRelation;
00450 BTScanOpaque so = (BTScanOpaque) scan->opaque;
00451 Buffer buf;
00452 BTStack stack;
00453 OffsetNumber offnum;
00454 StrategyNumber strat;
00455 bool nextkey;
00456 bool goback;
00457 ScanKey startKeys[INDEX_MAX_KEYS];
00458 ScanKeyData scankeys[INDEX_MAX_KEYS];
00459 ScanKeyData notnullkeys[INDEX_MAX_KEYS];
00460 int keysCount = 0;
00461 int i;
00462 StrategyNumber strat_total;
00463 BTScanPosItem *currItem;
00464
00465 pgstat_count_index_scan(rel);
00466
00467
00468
00469
00470
00471 _bt_preprocess_keys(scan);
00472
00473
00474
00475
00476
00477 if (!so->qual_ok)
00478 return false;
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526 strat_total = BTEqualStrategyNumber;
00527 if (so->numberOfKeys > 0)
00528 {
00529 AttrNumber curattr;
00530 ScanKey chosen;
00531 ScanKey impliesNN;
00532 ScanKey cur;
00533
00534
00535
00536
00537
00538
00539 curattr = 1;
00540 chosen = NULL;
00541
00542 impliesNN = NULL;
00543
00544
00545
00546
00547
00548
00549 for (cur = so->keyData, i = 0;; cur++, i++)
00550 {
00551 if (i >= so->numberOfKeys || cur->sk_attno != curattr)
00552 {
00553
00554
00555
00556
00557 if (chosen == NULL && impliesNN != NULL &&
00558 ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
00559 ScanDirectionIsForward(dir) :
00560 ScanDirectionIsBackward(dir)))
00561 {
00562
00563 chosen = ¬nullkeys[keysCount];
00564 ScanKeyEntryInitialize(chosen,
00565 (SK_SEARCHNOTNULL | SK_ISNULL |
00566 (impliesNN->sk_flags &
00567 (SK_BT_DESC | SK_BT_NULLS_FIRST))),
00568 curattr,
00569 ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
00570 BTGreaterStrategyNumber :
00571 BTLessStrategyNumber),
00572 InvalidOid,
00573 InvalidOid,
00574 InvalidOid,
00575 (Datum) 0);
00576 }
00577
00578
00579
00580
00581
00582 if (chosen == NULL)
00583 break;
00584 startKeys[keysCount++] = chosen;
00585
00586
00587
00588
00589
00590 strat = chosen->sk_strategy;
00591 if (strat != BTEqualStrategyNumber)
00592 {
00593 strat_total = strat;
00594 if (strat == BTGreaterStrategyNumber ||
00595 strat == BTLessStrategyNumber)
00596 break;
00597 }
00598
00599
00600
00601
00602
00603
00604 if (i >= so->numberOfKeys ||
00605 cur->sk_attno != curattr + 1)
00606 break;
00607
00608
00609
00610
00611 curattr = cur->sk_attno;
00612 chosen = NULL;
00613 impliesNN = NULL;
00614 }
00615
00616
00617
00618
00619
00620
00621
00622
00623 switch (cur->sk_strategy)
00624 {
00625 case BTLessStrategyNumber:
00626 case BTLessEqualStrategyNumber:
00627 if (chosen == NULL)
00628 {
00629 if (ScanDirectionIsBackward(dir))
00630 chosen = cur;
00631 else
00632 impliesNN = cur;
00633 }
00634 break;
00635 case BTEqualStrategyNumber:
00636
00637 chosen = cur;
00638 break;
00639 case BTGreaterEqualStrategyNumber:
00640 case BTGreaterStrategyNumber:
00641 if (chosen == NULL)
00642 {
00643 if (ScanDirectionIsForward(dir))
00644 chosen = cur;
00645 else
00646 impliesNN = cur;
00647 }
00648 break;
00649 }
00650 }
00651 }
00652
00653
00654
00655
00656
00657
00658 if (keysCount == 0)
00659 return _bt_endpoint(scan, dir);
00660
00661
00662
00663
00664
00665
00666
00667 Assert(keysCount <= INDEX_MAX_KEYS);
00668 for (i = 0; i < keysCount; i++)
00669 {
00670 ScanKey cur = startKeys[i];
00671
00672 Assert(cur->sk_attno == i + 1);
00673
00674 if (cur->sk_flags & SK_ROW_HEADER)
00675 {
00676
00677
00678
00679
00680
00681
00682
00683
00684
00685 ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
00686
00687 Assert(subkey->sk_flags & SK_ROW_MEMBER);
00688 if (subkey->sk_flags & SK_ISNULL)
00689 return false;
00690 memcpy(scankeys + i, subkey, sizeof(ScanKeyData));
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706 if (i == keysCount - 1)
00707 {
00708 bool used_all_subkeys = false;
00709
00710 Assert(!(subkey->sk_flags & SK_ROW_END));
00711 for (;;)
00712 {
00713 subkey++;
00714 Assert(subkey->sk_flags & SK_ROW_MEMBER);
00715 if (subkey->sk_attno != keysCount + 1)
00716 break;
00717 if (subkey->sk_strategy != cur->sk_strategy)
00718 break;
00719 if (subkey->sk_flags & SK_ISNULL)
00720 break;
00721 Assert(keysCount < INDEX_MAX_KEYS);
00722 memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData));
00723 keysCount++;
00724 if (subkey->sk_flags & SK_ROW_END)
00725 {
00726 used_all_subkeys = true;
00727 break;
00728 }
00729 }
00730 if (!used_all_subkeys)
00731 {
00732 switch (strat_total)
00733 {
00734 case BTLessStrategyNumber:
00735 strat_total = BTLessEqualStrategyNumber;
00736 break;
00737 case BTGreaterStrategyNumber:
00738 strat_total = BTGreaterEqualStrategyNumber;
00739 break;
00740 }
00741 }
00742 break;
00743 }
00744 }
00745 else
00746 {
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761
00762 if (cur->sk_subtype == rel->rd_opcintype[i] ||
00763 cur->sk_subtype == InvalidOid)
00764 {
00765 FmgrInfo *procinfo;
00766
00767 procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC);
00768 ScanKeyEntryInitializeWithInfo(scankeys + i,
00769 cur->sk_flags,
00770 cur->sk_attno,
00771 InvalidStrategy,
00772 cur->sk_subtype,
00773 cur->sk_collation,
00774 procinfo,
00775 cur->sk_argument);
00776 }
00777 else
00778 {
00779 RegProcedure cmp_proc;
00780
00781 cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
00782 rel->rd_opcintype[i],
00783 cur->sk_subtype,
00784 BTORDER_PROC);
00785 if (!RegProcedureIsValid(cmp_proc))
00786 elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
00787 BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype,
00788 cur->sk_attno, RelationGetRelationName(rel));
00789 ScanKeyEntryInitialize(scankeys + i,
00790 cur->sk_flags,
00791 cur->sk_attno,
00792 InvalidStrategy,
00793 cur->sk_subtype,
00794 cur->sk_collation,
00795 cmp_proc,
00796 cur->sk_argument);
00797 }
00798 }
00799 }
00800
00801
00802
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814 switch (strat_total)
00815 {
00816 case BTLessStrategyNumber:
00817
00818
00819
00820
00821
00822
00823
00824 nextkey = false;
00825 goback = true;
00826 break;
00827
00828 case BTLessEqualStrategyNumber:
00829
00830
00831
00832
00833
00834
00835
00836 nextkey = true;
00837 goback = true;
00838 break;
00839
00840 case BTEqualStrategyNumber:
00841
00842
00843
00844
00845
00846 if (ScanDirectionIsBackward(dir))
00847 {
00848
00849
00850
00851
00852 nextkey = true;
00853 goback = true;
00854 }
00855 else
00856 {
00857
00858
00859
00860
00861 nextkey = false;
00862 goback = false;
00863 }
00864 break;
00865
00866 case BTGreaterEqualStrategyNumber:
00867
00868
00869
00870
00871
00872 nextkey = false;
00873 goback = false;
00874 break;
00875
00876 case BTGreaterStrategyNumber:
00877
00878
00879
00880
00881
00882 nextkey = true;
00883 goback = false;
00884 break;
00885
00886 default:
00887
00888 elog(ERROR, "unrecognized strat_total: %d", (int) strat_total);
00889 return false;
00890 }
00891
00892
00893
00894
00895
00896 stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ);
00897
00898
00899 _bt_freestack(stack);
00900
00901
00902 so->currPos.buf = buf;
00903
00904 if (!BufferIsValid(buf))
00905 {
00906
00907
00908
00909
00910 PredicateLockRelation(rel, scan->xs_snapshot);
00911 return false;
00912 }
00913 else
00914 PredicateLockPage(rel, BufferGetBlockNumber(buf),
00915 scan->xs_snapshot);
00916
00917
00918 if (ScanDirectionIsForward(dir))
00919 {
00920 so->currPos.moreLeft = false;
00921 so->currPos.moreRight = true;
00922 }
00923 else
00924 {
00925 so->currPos.moreLeft = true;
00926 so->currPos.moreRight = false;
00927 }
00928 so->numKilled = 0;
00929 so->markItemIndex = -1;
00930
00931
00932 offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey);
00933
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952 if (goback)
00953 offnum = OffsetNumberPrev(offnum);
00954
00955
00956
00957
00958 if (!_bt_readpage(scan, dir, offnum))
00959 {
00960
00961
00962
00963
00964 if (!_bt_steppage(scan, dir))
00965 return false;
00966 }
00967
00968
00969 LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
00970
00971
00972 currItem = &so->currPos.items[so->currPos.itemIndex];
00973 scan->xs_ctup.t_self = currItem->heapTid;
00974 if (scan->xs_want_itup)
00975 scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
00976
00977 return true;
00978 }
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994 bool
00995 _bt_next(IndexScanDesc scan, ScanDirection dir)
00996 {
00997 BTScanOpaque so = (BTScanOpaque) scan->opaque;
00998 BTScanPosItem *currItem;
00999
01000
01001
01002
01003
01004 if (ScanDirectionIsForward(dir))
01005 {
01006 if (++so->currPos.itemIndex > so->currPos.lastItem)
01007 {
01008
01009 Assert(BufferIsValid(so->currPos.buf));
01010 LockBuffer(so->currPos.buf, BT_READ);
01011 if (!_bt_steppage(scan, dir))
01012 return false;
01013
01014 LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
01015 }
01016 }
01017 else
01018 {
01019 if (--so->currPos.itemIndex < so->currPos.firstItem)
01020 {
01021
01022 Assert(BufferIsValid(so->currPos.buf));
01023 LockBuffer(so->currPos.buf, BT_READ);
01024 if (!_bt_steppage(scan, dir))
01025 return false;
01026
01027 LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
01028 }
01029 }
01030
01031
01032 currItem = &so->currPos.items[so->currPos.itemIndex];
01033 scan->xs_ctup.t_self = currItem->heapTid;
01034 if (scan->xs_want_itup)
01035 scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
01036
01037 return true;
01038 }
01039
01040
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055 static bool
01056 _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
01057 {
01058 BTScanOpaque so = (BTScanOpaque) scan->opaque;
01059 Page page;
01060 BTPageOpaque opaque;
01061 OffsetNumber minoff;
01062 OffsetNumber maxoff;
01063 int itemIndex;
01064 IndexTuple itup;
01065 bool continuescan;
01066
01067
01068 Assert(BufferIsValid(so->currPos.buf));
01069
01070 page = BufferGetPage(so->currPos.buf);
01071 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01072 minoff = P_FIRSTDATAKEY(opaque);
01073 maxoff = PageGetMaxOffsetNumber(page);
01074
01075
01076
01077
01078
01079
01080 so->currPos.nextPage = opaque->btpo_next;
01081
01082
01083 so->currPos.nextTupleOffset = 0;
01084
01085 if (ScanDirectionIsForward(dir))
01086 {
01087
01088 itemIndex = 0;
01089
01090 offnum = Max(offnum, minoff);
01091
01092 while (offnum <= maxoff)
01093 {
01094 itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan);
01095 if (itup != NULL)
01096 {
01097
01098 _bt_saveitem(so, itemIndex, offnum, itup);
01099 itemIndex++;
01100 }
01101 if (!continuescan)
01102 {
01103
01104 so->currPos.moreRight = false;
01105 break;
01106 }
01107
01108 offnum = OffsetNumberNext(offnum);
01109 }
01110
01111 Assert(itemIndex <= MaxIndexTuplesPerPage);
01112 so->currPos.firstItem = 0;
01113 so->currPos.lastItem = itemIndex - 1;
01114 so->currPos.itemIndex = 0;
01115 }
01116 else
01117 {
01118
01119 itemIndex = MaxIndexTuplesPerPage;
01120
01121 offnum = Min(offnum, maxoff);
01122
01123 while (offnum >= minoff)
01124 {
01125 itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan);
01126 if (itup != NULL)
01127 {
01128
01129 itemIndex--;
01130 _bt_saveitem(so, itemIndex, offnum, itup);
01131 }
01132 if (!continuescan)
01133 {
01134
01135 so->currPos.moreLeft = false;
01136 break;
01137 }
01138
01139 offnum = OffsetNumberPrev(offnum);
01140 }
01141
01142 Assert(itemIndex >= 0);
01143 so->currPos.firstItem = itemIndex;
01144 so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
01145 so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
01146 }
01147
01148 return (so->currPos.firstItem <= so->currPos.lastItem);
01149 }
01150
01151
01152 static void
01153 _bt_saveitem(BTScanOpaque so, int itemIndex,
01154 OffsetNumber offnum, IndexTuple itup)
01155 {
01156 BTScanPosItem *currItem = &so->currPos.items[itemIndex];
01157
01158 currItem->heapTid = itup->t_tid;
01159 currItem->indexOffset = offnum;
01160 if (so->currTuples)
01161 {
01162 Size itupsz = IndexTupleSize(itup);
01163
01164 currItem->tupleOffset = so->currPos.nextTupleOffset;
01165 memcpy(so->currTuples + so->currPos.nextTupleOffset, itup, itupsz);
01166 so->currPos.nextTupleOffset += MAXALIGN(itupsz);
01167 }
01168 }
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182 static bool
01183 _bt_steppage(IndexScanDesc scan, ScanDirection dir)
01184 {
01185 BTScanOpaque so = (BTScanOpaque) scan->opaque;
01186 Relation rel;
01187 Page page;
01188 BTPageOpaque opaque;
01189
01190
01191 Assert(BufferIsValid(so->currPos.buf));
01192
01193
01194 if (so->numKilled > 0)
01195 _bt_killitems(scan, true);
01196
01197
01198
01199
01200
01201 if (so->markItemIndex >= 0)
01202 {
01203
01204 IncrBufferRefCount(so->currPos.buf);
01205 memcpy(&so->markPos, &so->currPos,
01206 offsetof(BTScanPosData, items[1]) +
01207 so->currPos.lastItem * sizeof(BTScanPosItem));
01208 if (so->markTuples)
01209 memcpy(so->markTuples, so->currTuples,
01210 so->currPos.nextTupleOffset);
01211 so->markPos.itemIndex = so->markItemIndex;
01212 so->markItemIndex = -1;
01213 }
01214
01215 rel = scan->indexRelation;
01216
01217 if (ScanDirectionIsForward(dir))
01218 {
01219
01220
01221 BlockNumber blkno = so->currPos.nextPage;
01222
01223
01224 so->currPos.moreLeft = true;
01225
01226 for (;;)
01227 {
01228
01229 _bt_relbuf(rel, so->currPos.buf);
01230 so->currPos.buf = InvalidBuffer;
01231
01232 if (blkno == P_NONE || !so->currPos.moreRight)
01233 return false;
01234
01235 CHECK_FOR_INTERRUPTS();
01236
01237 so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ);
01238
01239 page = BufferGetPage(so->currPos.buf);
01240 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01241 if (!P_IGNORE(opaque))
01242 {
01243 PredicateLockPage(rel, blkno, scan->xs_snapshot);
01244
01245
01246 if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque)))
01247 break;
01248 }
01249
01250 blkno = opaque->btpo_next;
01251 }
01252 }
01253 else
01254 {
01255
01256 so->currPos.moreRight = true;
01257
01258
01259
01260
01261
01262
01263
01264
01265 for (;;)
01266 {
01267
01268 if (!so->currPos.moreLeft)
01269 {
01270 _bt_relbuf(rel, so->currPos.buf);
01271 so->currPos.buf = InvalidBuffer;
01272 return false;
01273 }
01274
01275
01276 so->currPos.buf = _bt_walk_left(rel, so->currPos.buf);
01277
01278
01279 if (so->currPos.buf == InvalidBuffer)
01280 return false;
01281
01282
01283
01284
01285
01286
01287 page = BufferGetPage(so->currPos.buf);
01288 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01289 if (!P_IGNORE(opaque))
01290 {
01291 PredicateLockPage(rel, BufferGetBlockNumber(so->currPos.buf), scan->xs_snapshot);
01292
01293
01294 if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page)))
01295 break;
01296 }
01297 }
01298 }
01299
01300 return true;
01301 }
01302
01303
01304
01305
01306
01307
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317 static Buffer
01318 _bt_walk_left(Relation rel, Buffer buf)
01319 {
01320 Page page;
01321 BTPageOpaque opaque;
01322
01323 page = BufferGetPage(buf);
01324 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01325
01326 for (;;)
01327 {
01328 BlockNumber obknum;
01329 BlockNumber lblkno;
01330 BlockNumber blkno;
01331 int tries;
01332
01333
01334 if (P_LEFTMOST(opaque))
01335 {
01336 _bt_relbuf(rel, buf);
01337 break;
01338 }
01339
01340 obknum = BufferGetBlockNumber(buf);
01341
01342 blkno = lblkno = opaque->btpo_prev;
01343 _bt_relbuf(rel, buf);
01344
01345 CHECK_FOR_INTERRUPTS();
01346 buf = _bt_getbuf(rel, blkno, BT_READ);
01347 page = BufferGetPage(buf);
01348 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361 tries = 0;
01362 for (;;)
01363 {
01364 if (!P_ISDELETED(opaque) && opaque->btpo_next == obknum)
01365 {
01366
01367 return buf;
01368 }
01369 if (P_RIGHTMOST(opaque) || ++tries > 4)
01370 break;
01371 blkno = opaque->btpo_next;
01372 buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
01373 page = BufferGetPage(buf);
01374 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01375 }
01376
01377
01378 buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ);
01379 page = BufferGetPage(buf);
01380 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01381 if (P_ISDELETED(opaque))
01382 {
01383
01384
01385
01386
01387
01388
01389 for (;;)
01390 {
01391 if (P_RIGHTMOST(opaque))
01392 elog(ERROR, "fell off the end of index \"%s\"",
01393 RelationGetRelationName(rel));
01394 blkno = opaque->btpo_next;
01395 buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
01396 page = BufferGetPage(buf);
01397 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01398 if (!P_ISDELETED(opaque))
01399 break;
01400 }
01401
01402
01403
01404
01405
01406 }
01407 else
01408 {
01409
01410
01411
01412
01413
01414 if (opaque->btpo_prev == lblkno)
01415 elog(ERROR, "could not find left sibling of block %u in index \"%s\"",
01416 obknum, RelationGetRelationName(rel));
01417
01418 }
01419 }
01420
01421 return InvalidBuffer;
01422 }
01423
01424
01425
01426
01427
01428
01429
01430
01431
01432 Buffer
01433 _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
01434 {
01435 Buffer buf;
01436 Page page;
01437 BTPageOpaque opaque;
01438 OffsetNumber offnum;
01439 BlockNumber blkno;
01440 IndexTuple itup;
01441
01442
01443
01444
01445
01446
01447 if (level == 0)
01448 buf = _bt_getroot(rel, BT_READ);
01449 else
01450 buf = _bt_gettrueroot(rel);
01451
01452 if (!BufferIsValid(buf))
01453 return InvalidBuffer;
01454
01455 page = BufferGetPage(buf);
01456 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01457
01458 for (;;)
01459 {
01460
01461
01462
01463
01464
01465
01466 while (P_IGNORE(opaque) ||
01467 (rightmost && !P_RIGHTMOST(opaque)))
01468 {
01469 blkno = opaque->btpo_next;
01470 if (blkno == P_NONE)
01471 elog(ERROR, "fell off the end of index \"%s\"",
01472 RelationGetRelationName(rel));
01473 buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
01474 page = BufferGetPage(buf);
01475 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01476 }
01477
01478
01479 if (opaque->btpo.level == level)
01480 break;
01481 if (opaque->btpo.level < level)
01482 elog(ERROR, "btree level %u not found in index \"%s\"",
01483 level, RelationGetRelationName(rel));
01484
01485
01486 if (rightmost)
01487 offnum = PageGetMaxOffsetNumber(page);
01488 else
01489 offnum = P_FIRSTDATAKEY(opaque);
01490
01491 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
01492 blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
01493
01494 buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
01495 page = BufferGetPage(buf);
01496 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01497 }
01498
01499 return buf;
01500 }
01501
01502
01503
01504
01505
01506
01507
01508
01509
01510
01511 static bool
01512 _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
01513 {
01514 Relation rel = scan->indexRelation;
01515 BTScanOpaque so = (BTScanOpaque) scan->opaque;
01516 Buffer buf;
01517 Page page;
01518 BTPageOpaque opaque;
01519 OffsetNumber start;
01520 BTScanPosItem *currItem;
01521
01522
01523
01524
01525
01526
01527 buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir));
01528
01529 if (!BufferIsValid(buf))
01530 {
01531
01532
01533
01534
01535 PredicateLockRelation(rel, scan->xs_snapshot);
01536 so->currPos.buf = InvalidBuffer;
01537 return false;
01538 }
01539
01540 PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
01541 page = BufferGetPage(buf);
01542 opaque = (BTPageOpaque) PageGetSpecialPointer(page);
01543 Assert(P_ISLEAF(opaque));
01544
01545 if (ScanDirectionIsForward(dir))
01546 {
01547
01548
01549
01550 start = P_FIRSTDATAKEY(opaque);
01551 }
01552 else if (ScanDirectionIsBackward(dir))
01553 {
01554 Assert(P_RIGHTMOST(opaque));
01555
01556 start = PageGetMaxOffsetNumber(page);
01557 }
01558 else
01559 {
01560 elog(ERROR, "invalid scan direction: %d", (int) dir);
01561 start = 0;
01562 }
01563
01564
01565 so->currPos.buf = buf;
01566
01567
01568 if (ScanDirectionIsForward(dir))
01569 {
01570 so->currPos.moreLeft = false;
01571 so->currPos.moreRight = true;
01572 }
01573 else
01574 {
01575 so->currPos.moreLeft = true;
01576 so->currPos.moreRight = false;
01577 }
01578 so->numKilled = 0;
01579 so->markItemIndex = -1;
01580
01581
01582
01583
01584 if (!_bt_readpage(scan, dir, start))
01585 {
01586
01587
01588
01589
01590 if (!_bt_steppage(scan, dir))
01591 return false;
01592 }
01593
01594
01595 LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
01596
01597
01598 currItem = &so->currPos.items[so->currPos.itemIndex];
01599 scan->xs_ctup.t_self = currItem->heapTid;
01600 if (scan->xs_want_itup)
01601 scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
01602
01603 return true;
01604 }