00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "postgres.h"
00016
00017 #include "access/heapam.h"
00018 #include "access/heapam_xlog.h"
00019 #include "access/transam.h"
00020 #include "access/htup_details.h"
00021 #include "miscadmin.h"
00022 #include "pgstat.h"
00023 #include "storage/bufmgr.h"
00024 #include "utils/rel.h"
00025 #include "utils/tqual.h"
00026
00027
00028
00029 typedef struct
00030 {
00031 TransactionId new_prune_xid;
00032 TransactionId latestRemovedXid;
00033
00034 int nredirected;
00035 int ndead;
00036 int nunused;
00037
00038 OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
00039 OffsetNumber nowdead[MaxHeapTuplesPerPage];
00040 OffsetNumber nowunused[MaxHeapTuplesPerPage];
00041
00042 bool marked[MaxHeapTuplesPerPage + 1];
00043 } PruneState;
00044
00045
00046 static int heap_prune_chain(Relation relation, Buffer buffer,
00047 OffsetNumber rootoffnum,
00048 TransactionId OldestXmin,
00049 PruneState *prstate);
00050 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
00051 static void heap_prune_record_redirect(PruneState *prstate,
00052 OffsetNumber offnum, OffsetNumber rdoffnum);
00053 static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
00054 static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072 void
00073 heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
00074 {
00075 Page page = BufferGetPage(buffer);
00076 Size minfree;
00077
00078
00079
00080
00081
00082
00083
00084 if (!PageIsPrunable(page, OldestXmin))
00085 return;
00086
00087
00088
00089
00090
00091
00092 if (RecoveryInProgress())
00093 return;
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107 minfree = RelationGetTargetPageFreeSpace(relation,
00108 HEAP_DEFAULT_FILLFACTOR);
00109 minfree = Max(minfree, BLCKSZ / 10);
00110
00111 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
00112 {
00113
00114 if (!ConditionalLockBufferForCleanup(buffer))
00115 return;
00116
00117
00118
00119
00120
00121
00122
00123 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
00124 {
00125 TransactionId ignore = InvalidTransactionId;
00126
00127
00128
00129 (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore);
00130 }
00131
00132
00133 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
00134 }
00135 }
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154 int
00155 heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
00156 bool report_stats, TransactionId *latestRemovedXid)
00157 {
00158 int ndeleted = 0;
00159 Page page = BufferGetPage(buffer);
00160 OffsetNumber offnum,
00161 maxoff;
00162 PruneState prstate;
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175 prstate.new_prune_xid = InvalidTransactionId;
00176 prstate.latestRemovedXid = *latestRemovedXid;
00177 prstate.nredirected = prstate.ndead = prstate.nunused = 0;
00178 memset(prstate.marked, 0, sizeof(prstate.marked));
00179
00180
00181 maxoff = PageGetMaxOffsetNumber(page);
00182 for (offnum = FirstOffsetNumber;
00183 offnum <= maxoff;
00184 offnum = OffsetNumberNext(offnum))
00185 {
00186 ItemId itemid;
00187
00188
00189 if (prstate.marked[offnum])
00190 continue;
00191
00192
00193 itemid = PageGetItemId(page, offnum);
00194 if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
00195 continue;
00196
00197
00198 ndeleted += heap_prune_chain(relation, buffer, offnum,
00199 OldestXmin,
00200 &prstate);
00201 }
00202
00203
00204 START_CRIT_SECTION();
00205
00206
00207 if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
00208 {
00209
00210
00211
00212
00213 heap_page_prune_execute(buffer,
00214 prstate.redirected, prstate.nredirected,
00215 prstate.nowdead, prstate.ndead,
00216 prstate.nowunused, prstate.nunused);
00217
00218
00219
00220
00221
00222 ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
00223
00224
00225
00226
00227
00228
00229 PageClearFull(page);
00230
00231 MarkBufferDirty(buffer);
00232
00233
00234
00235
00236 if (RelationNeedsWAL(relation))
00237 {
00238 XLogRecPtr recptr;
00239
00240 recptr = log_heap_clean(relation, buffer,
00241 prstate.redirected, prstate.nredirected,
00242 prstate.nowdead, prstate.ndead,
00243 prstate.nowunused, prstate.nunused,
00244 prstate.latestRemovedXid);
00245
00246 PageSetLSN(BufferGetPage(buffer), recptr);
00247 }
00248 }
00249 else
00250 {
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260 if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
00261 PageIsFull(page))
00262 {
00263 ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
00264 PageClearFull(page);
00265 MarkBufferDirtyHint(buffer);
00266 }
00267 }
00268
00269 END_CRIT_SECTION();
00270
00271
00272
00273
00274
00275
00276 if (report_stats && ndeleted > prstate.ndead)
00277 pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
00278
00279 *latestRemovedXid = prstate.latestRemovedXid;
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297 return ndeleted;
00298 }
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326 static int
00327 heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
00328 TransactionId OldestXmin,
00329 PruneState *prstate)
00330 {
00331 int ndeleted = 0;
00332 Page dp = (Page) BufferGetPage(buffer);
00333 TransactionId priorXmax = InvalidTransactionId;
00334 ItemId rootlp;
00335 HeapTupleHeader htup;
00336 OffsetNumber latestdead = InvalidOffsetNumber,
00337 maxoff = PageGetMaxOffsetNumber(dp),
00338 offnum;
00339 OffsetNumber chainitems[MaxHeapTuplesPerPage];
00340 int nchain = 0,
00341 i;
00342
00343 rootlp = PageGetItemId(dp, rootoffnum);
00344
00345
00346
00347
00348 if (ItemIdIsNormal(rootlp))
00349 {
00350 htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
00351 if (HeapTupleHeaderIsHeapOnly(htup))
00352 {
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371 if (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer)
00372 == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
00373 {
00374 heap_prune_record_unused(prstate, rootoffnum);
00375 HeapTupleHeaderAdvanceLatestRemovedXid(htup,
00376 &prstate->latestRemovedXid);
00377 ndeleted++;
00378 }
00379
00380
00381 return ndeleted;
00382 }
00383 }
00384
00385
00386 offnum = rootoffnum;
00387
00388
00389 for (;;)
00390 {
00391 ItemId lp;
00392 bool tupdead,
00393 recent_dead;
00394
00395
00396 if (offnum < FirstOffsetNumber || offnum > maxoff)
00397 break;
00398
00399
00400 if (prstate->marked[offnum])
00401 break;
00402
00403 lp = PageGetItemId(dp, offnum);
00404
00405
00406 if (!ItemIdIsUsed(lp))
00407 break;
00408
00409
00410
00411
00412
00413
00414 if (ItemIdIsRedirected(lp))
00415 {
00416 if (nchain > 0)
00417 break;
00418 chainitems[nchain++] = offnum;
00419 offnum = ItemIdGetRedirect(rootlp);
00420 continue;
00421 }
00422
00423
00424
00425
00426
00427
00428 if (ItemIdIsDead(lp))
00429 break;
00430
00431 Assert(ItemIdIsNormal(lp));
00432 htup = (HeapTupleHeader) PageGetItem(dp, lp);
00433
00434
00435
00436
00437 if (TransactionIdIsValid(priorXmax) &&
00438 !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
00439 break;
00440
00441
00442
00443
00444 chainitems[nchain++] = offnum;
00445
00446
00447
00448
00449 tupdead = recent_dead = false;
00450
00451 switch (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer))
00452 {
00453 case HEAPTUPLE_DEAD:
00454 tupdead = true;
00455 break;
00456
00457 case HEAPTUPLE_RECENTLY_DEAD:
00458 recent_dead = true;
00459
00460
00461
00462
00463
00464 heap_prune_record_prunable(prstate,
00465 HeapTupleHeaderGetUpdateXid(htup));
00466 break;
00467
00468 case HEAPTUPLE_DELETE_IN_PROGRESS:
00469
00470
00471
00472
00473
00474 heap_prune_record_prunable(prstate,
00475 HeapTupleHeaderGetUpdateXid(htup));
00476 break;
00477
00478 case HEAPTUPLE_LIVE:
00479 case HEAPTUPLE_INSERT_IN_PROGRESS:
00480
00481
00482
00483
00484
00485
00486
00487 break;
00488
00489 default:
00490 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
00491 break;
00492 }
00493
00494
00495
00496
00497
00498
00499
00500
00501 if (tupdead)
00502 {
00503 latestdead = offnum;
00504 HeapTupleHeaderAdvanceLatestRemovedXid(htup,
00505 &prstate->latestRemovedXid);
00506 }
00507 else if (!recent_dead)
00508 break;
00509
00510
00511
00512
00513
00514 if (!HeapTupleHeaderIsHotUpdated(htup))
00515 break;
00516
00517
00518
00519
00520 Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
00521 BufferGetBlockNumber(buffer));
00522 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
00523 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
00524 }
00525
00526
00527
00528
00529
00530
00531 if (OffsetNumberIsValid(latestdead))
00532 {
00533
00534
00535
00536
00537
00538
00539
00540 for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
00541 {
00542 heap_prune_record_unused(prstate, chainitems[i]);
00543 ndeleted++;
00544 }
00545
00546
00547
00548
00549
00550
00551 if (ItemIdIsNormal(rootlp))
00552 ndeleted++;
00553
00554
00555
00556
00557
00558
00559 if (i >= nchain)
00560 heap_prune_record_dead(prstate, rootoffnum);
00561 else
00562 heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
00563 }
00564 else if (nchain < 2 && ItemIdIsRedirected(rootlp))
00565 {
00566
00567
00568
00569
00570
00571
00572
00573 heap_prune_record_dead(prstate, rootoffnum);
00574 }
00575
00576 return ndeleted;
00577 }
00578
00579
00580 static void
00581 heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
00582 {
00583
00584
00585
00586
00587 Assert(TransactionIdIsNormal(xid));
00588 if (!TransactionIdIsValid(prstate->new_prune_xid) ||
00589 TransactionIdPrecedes(xid, prstate->new_prune_xid))
00590 prstate->new_prune_xid = xid;
00591 }
00592
00593
00594 static void
00595 heap_prune_record_redirect(PruneState *prstate,
00596 OffsetNumber offnum, OffsetNumber rdoffnum)
00597 {
00598 Assert(prstate->nredirected < MaxHeapTuplesPerPage);
00599 prstate->redirected[prstate->nredirected * 2] = offnum;
00600 prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
00601 prstate->nredirected++;
00602 Assert(!prstate->marked[offnum]);
00603 prstate->marked[offnum] = true;
00604 Assert(!prstate->marked[rdoffnum]);
00605 prstate->marked[rdoffnum] = true;
00606 }
00607
00608
00609 static void
00610 heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
00611 {
00612 Assert(prstate->ndead < MaxHeapTuplesPerPage);
00613 prstate->nowdead[prstate->ndead] = offnum;
00614 prstate->ndead++;
00615 Assert(!prstate->marked[offnum]);
00616 prstate->marked[offnum] = true;
00617 }
00618
00619
00620 static void
00621 heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
00622 {
00623 Assert(prstate->nunused < MaxHeapTuplesPerPage);
00624 prstate->nowunused[prstate->nunused] = offnum;
00625 prstate->nunused++;
00626 Assert(!prstate->marked[offnum]);
00627 prstate->marked[offnum] = true;
00628 }
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640 void
00641 heap_page_prune_execute(Buffer buffer,
00642 OffsetNumber *redirected, int nredirected,
00643 OffsetNumber *nowdead, int ndead,
00644 OffsetNumber *nowunused, int nunused)
00645 {
00646 Page page = (Page) BufferGetPage(buffer);
00647 OffsetNumber *offnum;
00648 int i;
00649
00650
00651 offnum = redirected;
00652 for (i = 0; i < nredirected; i++)
00653 {
00654 OffsetNumber fromoff = *offnum++;
00655 OffsetNumber tooff = *offnum++;
00656 ItemId fromlp = PageGetItemId(page, fromoff);
00657
00658 ItemIdSetRedirect(fromlp, tooff);
00659 }
00660
00661
00662 offnum = nowdead;
00663 for (i = 0; i < ndead; i++)
00664 {
00665 OffsetNumber off = *offnum++;
00666 ItemId lp = PageGetItemId(page, off);
00667
00668 ItemIdSetDead(lp);
00669 }
00670
00671
00672 offnum = nowunused;
00673 for (i = 0; i < nunused; i++)
00674 {
00675 OffsetNumber off = *offnum++;
00676 ItemId lp = PageGetItemId(page, off);
00677
00678 ItemIdSetUnused(lp);
00679 }
00680
00681
00682
00683
00684
00685 PageRepairFragmentation(page);
00686 }
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704 void
00705 heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
00706 {
00707 OffsetNumber offnum,
00708 maxoff;
00709
00710 MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
00711
00712 maxoff = PageGetMaxOffsetNumber(page);
00713 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
00714 {
00715 ItemId lp = PageGetItemId(page, offnum);
00716 HeapTupleHeader htup;
00717 OffsetNumber nextoffnum;
00718 TransactionId priorXmax;
00719
00720
00721 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
00722 continue;
00723
00724 if (ItemIdIsNormal(lp))
00725 {
00726 htup = (HeapTupleHeader) PageGetItem(page, lp);
00727
00728
00729
00730
00731
00732
00733 if (HeapTupleHeaderIsHeapOnly(htup))
00734 continue;
00735
00736
00737
00738
00739
00740 root_offsets[offnum - 1] = offnum;
00741
00742
00743 if (!HeapTupleHeaderIsHotUpdated(htup))
00744 continue;
00745
00746
00747 nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
00748 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
00749 }
00750 else
00751 {
00752
00753 Assert(ItemIdIsRedirected(lp));
00754
00755 nextoffnum = ItemIdGetRedirect(lp);
00756 priorXmax = InvalidTransactionId;
00757 }
00758
00759
00760
00761
00762
00763
00764
00765
00766
00767 for (;;)
00768 {
00769 lp = PageGetItemId(page, nextoffnum);
00770
00771
00772 if (!ItemIdIsNormal(lp))
00773 break;
00774
00775 htup = (HeapTupleHeader) PageGetItem(page, lp);
00776
00777 if (TransactionIdIsValid(priorXmax) &&
00778 !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
00779 break;
00780
00781
00782 root_offsets[nextoffnum - 1] = offnum;
00783
00784
00785 if (!HeapTupleHeaderIsHotUpdated(htup))
00786 break;
00787
00788 nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
00789 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
00790 }
00791 }
00792 }