00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #include "postgres.h"
00037
00038 #include "access/relscan.h"
00039 #include "access/transam.h"
00040 #include "executor/execdebug.h"
00041 #include "executor/nodeBitmapHeapscan.h"
00042 #include "pgstat.h"
00043 #include "storage/bufmgr.h"
00044 #include "storage/predicate.h"
00045 #include "utils/memutils.h"
00046 #include "utils/rel.h"
00047 #include "utils/snapmgr.h"
00048 #include "utils/tqual.h"
00049
00050
00051 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
00052 static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
00053
00054
00055
00056
00057
00058
00059
00060
00061 static TupleTableSlot *
00062 BitmapHeapNext(BitmapHeapScanState *node)
00063 {
00064 ExprContext *econtext;
00065 HeapScanDesc scan;
00066 TIDBitmap *tbm;
00067 TBMIterator *tbmiterator;
00068 TBMIterateResult *tbmres;
00069
00070 #ifdef USE_PREFETCH
00071 TBMIterator *prefetch_iterator;
00072 #endif
00073 OffsetNumber targoffset;
00074 TupleTableSlot *slot;
00075
00076
00077
00078
00079 econtext = node->ss.ps.ps_ExprContext;
00080 slot = node->ss.ss_ScanTupleSlot;
00081 scan = node->ss.ss_currentScanDesc;
00082 tbm = node->tbm;
00083 tbmiterator = node->tbmiterator;
00084 tbmres = node->tbmres;
00085 #ifdef USE_PREFETCH
00086 prefetch_iterator = node->prefetch_iterator;
00087 #endif
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102 if (tbm == NULL)
00103 {
00104 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
00105
00106 if (!tbm || !IsA(tbm, TIDBitmap))
00107 elog(ERROR, "unrecognized result from subplan");
00108
00109 node->tbm = tbm;
00110 node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
00111 node->tbmres = tbmres = NULL;
00112
00113 #ifdef USE_PREFETCH
00114 if (target_prefetch_pages > 0)
00115 {
00116 node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm);
00117 node->prefetch_pages = 0;
00118 node->prefetch_target = -1;
00119 }
00120 #endif
00121 }
00122
00123 for (;;)
00124 {
00125 Page dp;
00126 ItemId lp;
00127
00128
00129
00130
00131 if (tbmres == NULL)
00132 {
00133 node->tbmres = tbmres = tbm_iterate(tbmiterator);
00134 if (tbmres == NULL)
00135 {
00136
00137 break;
00138 }
00139
00140 #ifdef USE_PREFETCH
00141 if (node->prefetch_pages > 0)
00142 {
00143
00144 node->prefetch_pages--;
00145 }
00146 else if (prefetch_iterator)
00147 {
00148
00149 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
00150
00151 if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
00152 elog(ERROR, "prefetch and main iterators are out of sync");
00153 }
00154 #endif
00155
00156
00157
00158
00159
00160
00161
00162 if (tbmres->blockno >= scan->rs_nblocks)
00163 {
00164 node->tbmres = tbmres = NULL;
00165 continue;
00166 }
00167
00168
00169
00170
00171 bitgetpage(scan, tbmres);
00172
00173
00174
00175
00176 scan->rs_cindex = 0;
00177
00178 #ifdef USE_PREFETCH
00179
00180
00181
00182
00183
00184
00185
00186 if (node->prefetch_target >= target_prefetch_pages)
00187 ;
00188 else if (node->prefetch_target >= target_prefetch_pages / 2)
00189 node->prefetch_target = target_prefetch_pages;
00190 else if (node->prefetch_target > 0)
00191 node->prefetch_target *= 2;
00192 else
00193 node->prefetch_target++;
00194 #endif
00195 }
00196 else
00197 {
00198
00199
00200
00201 scan->rs_cindex++;
00202
00203 #ifdef USE_PREFETCH
00204
00205
00206
00207
00208
00209 if (node->prefetch_target < target_prefetch_pages)
00210 node->prefetch_target++;
00211 #endif
00212 }
00213
00214
00215
00216
00217 if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
00218 {
00219 node->tbmres = tbmres = NULL;
00220 continue;
00221 }
00222
00223 #ifdef USE_PREFETCH
00224
00225
00226
00227
00228
00229
00230
00231
00232 if (prefetch_iterator)
00233 {
00234 while (node->prefetch_pages < node->prefetch_target)
00235 {
00236 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
00237
00238 if (tbmpre == NULL)
00239 {
00240
00241 tbm_end_iterate(prefetch_iterator);
00242 node->prefetch_iterator = prefetch_iterator = NULL;
00243 break;
00244 }
00245 node->prefetch_pages++;
00246 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
00247 }
00248 }
00249 #endif
00250
00251
00252
00253
00254 targoffset = scan->rs_vistuples[scan->rs_cindex];
00255 dp = (Page) BufferGetPage(scan->rs_cbuf);
00256 lp = PageGetItemId(dp, targoffset);
00257 Assert(ItemIdIsNormal(lp));
00258
00259 scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
00260 scan->rs_ctup.t_len = ItemIdGetLength(lp);
00261 ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
00262
00263 pgstat_count_heap_fetch(scan->rs_rd);
00264
00265
00266
00267
00268
00269 ExecStoreTuple(&scan->rs_ctup,
00270 slot,
00271 scan->rs_cbuf,
00272 false);
00273
00274
00275
00276
00277
00278 if (tbmres->recheck)
00279 {
00280 econtext->ecxt_scantuple = slot;
00281 ResetExprContext(econtext);
00282
00283 if (!ExecQual(node->bitmapqualorig, econtext, false))
00284 {
00285
00286 InstrCountFiltered2(node, 1);
00287 ExecClearTuple(slot);
00288 continue;
00289 }
00290 }
00291
00292
00293 return slot;
00294 }
00295
00296
00297
00298
00299 return ExecClearTuple(slot);
00300 }
00301
00302
00303
00304
00305
00306
00307
00308
00309 static void
00310 bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
00311 {
00312 BlockNumber page = tbmres->blockno;
00313 Buffer buffer;
00314 Snapshot snapshot;
00315 int ntup;
00316
00317
00318
00319
00320 Assert(page < scan->rs_nblocks);
00321
00322 scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
00323 scan->rs_rd,
00324 page);
00325 buffer = scan->rs_cbuf;
00326 snapshot = scan->rs_snapshot;
00327
00328 ntup = 0;
00329
00330
00331
00332
00333 Assert(TransactionIdIsValid(RecentGlobalXmin));
00334 heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin);
00335
00336
00337
00338
00339
00340
00341 LockBuffer(buffer, BUFFER_LOCK_SHARE);
00342
00343
00344
00345
00346 if (tbmres->ntuples >= 0)
00347 {
00348
00349
00350
00351
00352
00353 int curslot;
00354
00355 for (curslot = 0; curslot < tbmres->ntuples; curslot++)
00356 {
00357 OffsetNumber offnum = tbmres->offsets[curslot];
00358 ItemPointerData tid;
00359 HeapTupleData heapTuple;
00360
00361 ItemPointerSet(&tid, page, offnum);
00362 if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
00363 &heapTuple, NULL, true))
00364 scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
00365 }
00366 }
00367 else
00368 {
00369
00370
00371
00372
00373 Page dp = (Page) BufferGetPage(buffer);
00374 OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
00375 OffsetNumber offnum;
00376
00377 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
00378 {
00379 ItemId lp;
00380 HeapTupleData loctup;
00381 bool valid;
00382
00383 lp = PageGetItemId(dp, offnum);
00384 if (!ItemIdIsNormal(lp))
00385 continue;
00386 loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
00387 loctup.t_len = ItemIdGetLength(lp);
00388 loctup.t_tableOid = scan->rs_rd->rd_id;
00389 ItemPointerSet(&loctup.t_self, page, offnum);
00390 valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
00391 if (valid)
00392 {
00393 scan->rs_vistuples[ntup++] = offnum;
00394 PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
00395 }
00396 CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
00397 buffer, snapshot);
00398 }
00399 }
00400
00401 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
00402
00403 Assert(ntup <= MaxHeapTuplesPerPage);
00404 scan->rs_ntuples = ntup;
00405 }
00406
00407
00408
00409
00410 static bool
00411 BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
00412 {
00413 ExprContext *econtext;
00414
00415
00416
00417
00418 econtext = node->ss.ps.ps_ExprContext;
00419
00420
00421 econtext->ecxt_scantuple = slot;
00422
00423 ResetExprContext(econtext);
00424
00425 return ExecQual(node->bitmapqualorig, econtext, false);
00426 }
00427
00428
00429
00430
00431
00432 TupleTableSlot *
00433 ExecBitmapHeapScan(BitmapHeapScanState *node)
00434 {
00435 return ExecScan(&node->ss,
00436 (ExecScanAccessMtd) BitmapHeapNext,
00437 (ExecScanRecheckMtd) BitmapHeapRecheck);
00438 }
00439
00440
00441
00442
00443
00444 void
00445 ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
00446 {
00447
00448 heap_rescan(node->ss.ss_currentScanDesc, NULL);
00449
00450 if (node->tbmiterator)
00451 tbm_end_iterate(node->tbmiterator);
00452 if (node->prefetch_iterator)
00453 tbm_end_iterate(node->prefetch_iterator);
00454 if (node->tbm)
00455 tbm_free(node->tbm);
00456 node->tbm = NULL;
00457 node->tbmiterator = NULL;
00458 node->tbmres = NULL;
00459 node->prefetch_iterator = NULL;
00460
00461 ExecScanReScan(&node->ss);
00462
00463
00464
00465
00466
00467 if (node->ss.ps.lefttree->chgParam == NULL)
00468 ExecReScan(node->ss.ps.lefttree);
00469 }
00470
00471
00472
00473
00474
00475 void
00476 ExecEndBitmapHeapScan(BitmapHeapScanState *node)
00477 {
00478 Relation relation;
00479 HeapScanDesc scanDesc;
00480
00481
00482
00483
00484 relation = node->ss.ss_currentRelation;
00485 scanDesc = node->ss.ss_currentScanDesc;
00486
00487
00488
00489
00490 ExecFreeExprContext(&node->ss.ps);
00491
00492
00493
00494
00495 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
00496 ExecClearTuple(node->ss.ss_ScanTupleSlot);
00497
00498
00499
00500
00501 ExecEndNode(outerPlanState(node));
00502
00503
00504
00505
00506 if (node->tbmiterator)
00507 tbm_end_iterate(node->tbmiterator);
00508 if (node->prefetch_iterator)
00509 tbm_end_iterate(node->prefetch_iterator);
00510 if (node->tbm)
00511 tbm_free(node->tbm);
00512
00513
00514
00515
00516 heap_endscan(scanDesc);
00517
00518
00519
00520
00521 ExecCloseScanRelation(relation);
00522 }
00523
00524
00525
00526
00527
00528
00529
00530 BitmapHeapScanState *
00531 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
00532 {
00533 BitmapHeapScanState *scanstate;
00534 Relation currentRelation;
00535
00536
00537 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
00538
00539
00540
00541
00542
00543 Assert(IsMVCCSnapshot(estate->es_snapshot));
00544
00545
00546
00547
00548 scanstate = makeNode(BitmapHeapScanState);
00549 scanstate->ss.ps.plan = (Plan *) node;
00550 scanstate->ss.ps.state = estate;
00551
00552 scanstate->tbm = NULL;
00553 scanstate->tbmiterator = NULL;
00554 scanstate->tbmres = NULL;
00555 scanstate->prefetch_iterator = NULL;
00556 scanstate->prefetch_pages = 0;
00557 scanstate->prefetch_target = 0;
00558
00559
00560
00561
00562
00563
00564 ExecAssignExprContext(estate, &scanstate->ss.ps);
00565
00566 scanstate->ss.ps.ps_TupFromTlist = false;
00567
00568
00569
00570
00571 scanstate->ss.ps.targetlist = (List *)
00572 ExecInitExpr((Expr *) node->scan.plan.targetlist,
00573 (PlanState *) scanstate);
00574 scanstate->ss.ps.qual = (List *)
00575 ExecInitExpr((Expr *) node->scan.plan.qual,
00576 (PlanState *) scanstate);
00577 scanstate->bitmapqualorig = (List *)
00578 ExecInitExpr((Expr *) node->bitmapqualorig,
00579 (PlanState *) scanstate);
00580
00581
00582
00583
00584 ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
00585 ExecInitScanTupleSlot(estate, &scanstate->ss);
00586
00587
00588
00589
00590 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
00591
00592 scanstate->ss.ss_currentRelation = currentRelation;
00593
00594
00595
00596
00597
00598 scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
00599 estate->es_snapshot,
00600 0,
00601 NULL);
00602
00603
00604
00605
00606 ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
00607
00608
00609
00610
00611 ExecAssignResultTypeFromTL(&scanstate->ss.ps);
00612 ExecAssignScanProjectionInfo(&scanstate->ss);
00613
00614
00615
00616
00617
00618
00619
00620
00621 outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
00622
00623
00624
00625
00626 return scanstate;
00627 }