#include "postgres.h"#include "access/heapam_xlog.h"#include "access/nbtree.h"#include "access/transam.h"#include "storage/procarray.h"#include "miscadmin.h"
Go to the source code of this file.
| typedef struct bt_incomplete_action bt_incomplete_action |
| static void _bt_restore_meta | ( | RelFileNode | rnode, | |
| XLogRecPtr | lsn, | |||
| BlockNumber | root, | |||
| uint32 | level, | |||
| BlockNumber | fastroot, | |||
| uint32 | fastlevel | |||
| ) | [static] |
Definition at line 155 of file nbtxlog.c.
References _bt_pageinit(), Assert, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTPageGetMeta, BTPageOpaqueData::btpo_flags, BTREE_METAPAGE, BufferGetPage, BufferGetPageSize, BufferIsValid, MarkBufferDirty(), PageGetSpecialPointer, PageSetLSN, UnlockReleaseBuffer(), and XLogReadBuffer().
Referenced by btree_xlog_delete_page(), btree_xlog_insert(), and btree_xlog_newroot().
{
Buffer metabuf;
Page metapg;
BTMetaPageData *md;
BTPageOpaque pageop;
metabuf = XLogReadBuffer(rnode, BTREE_METAPAGE, true);
Assert(BufferIsValid(metabuf));
metapg = BufferGetPage(metabuf);
_bt_pageinit(metapg, BufferGetPageSize(metabuf));
md = BTPageGetMeta(metapg);
md->btm_magic = BTREE_MAGIC;
md->btm_version = BTREE_VERSION;
md->btm_root = root;
md->btm_level = level;
md->btm_fastroot = fastroot;
md->btm_fastlevel = fastlevel;
pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
pageop->btpo_flags = BTP_META;
/*
* Set pd_lower just past the end of the metadata. This is not essential
* but it makes the page look compressible to xlog.c.
*/
((PageHeader) metapg)->pd_lower =
((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
PageSetLSN(metapg, lsn);
MarkBufferDirty(metabuf);
UnlockReleaseBuffer(metabuf);
}
| static void _bt_restore_page | ( | Page | page, | |
| char * | from, | |||
| int | len | |||
| ) | [static] |
Definition at line 135 of file nbtxlog.c.
References elog, end, FirstOffsetNumber, IndexTupleDSize, InvalidOffsetNumber, MAXALIGN, PageAddItem(), and PANIC.
Referenced by btree_xlog_newroot(), and btree_xlog_split().
{
IndexTupleData itupdata;
Size itemsz;
char *end = from + len;
for (; from < end;)
{
/* Need to copy tuple header due to alignment considerations */
memcpy(&itupdata, from, sizeof(IndexTupleData));
itemsz = IndexTupleDSize(itupdata);
itemsz = MAXALIGN(itemsz);
if (PageAddItem(page, (Item) from, itemsz, FirstOffsetNumber,
false, false) == InvalidOffsetNumber)
elog(PANIC, "_bt_restore_page: cannot add item to page");
from += itemsz;
}
}
| void btree_redo | ( | XLogRecPtr | lsn, | |
| XLogRecord * | record | |||
| ) |
Definition at line 1023 of file nbtxlog.c.
References btree_xlog_delete(), btree_xlog_delete_page(), btree_xlog_insert(), btree_xlog_newroot(), btree_xlog_reuse_page(), btree_xlog_split(), btree_xlog_vacuum(), elog, PANIC, XLogRecord::xl_info, XLOG_BTREE_DELETE, XLOG_BTREE_DELETE_PAGE, XLOG_BTREE_DELETE_PAGE_HALF, XLOG_BTREE_DELETE_PAGE_META, XLOG_BTREE_INSERT_LEAF, XLOG_BTREE_INSERT_META, XLOG_BTREE_INSERT_UPPER, XLOG_BTREE_NEWROOT, XLOG_BTREE_REUSE_PAGE, XLOG_BTREE_SPLIT_L, XLOG_BTREE_SPLIT_L_ROOT, XLOG_BTREE_SPLIT_R, XLOG_BTREE_SPLIT_R_ROOT, and XLOG_BTREE_VACUUM.
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
switch (info)
{
case XLOG_BTREE_INSERT_LEAF:
btree_xlog_insert(true, false, lsn, record);
break;
case XLOG_BTREE_INSERT_UPPER:
btree_xlog_insert(false, false, lsn, record);
break;
case XLOG_BTREE_INSERT_META:
btree_xlog_insert(false, true, lsn, record);
break;
case XLOG_BTREE_SPLIT_L:
btree_xlog_split(true, false, lsn, record);
break;
case XLOG_BTREE_SPLIT_R:
btree_xlog_split(false, false, lsn, record);
break;
case XLOG_BTREE_SPLIT_L_ROOT:
btree_xlog_split(true, true, lsn, record);
break;
case XLOG_BTREE_SPLIT_R_ROOT:
btree_xlog_split(false, true, lsn, record);
break;
case XLOG_BTREE_VACUUM:
btree_xlog_vacuum(lsn, record);
break;
case XLOG_BTREE_DELETE:
btree_xlog_delete(lsn, record);
break;
case XLOG_BTREE_DELETE_PAGE:
case XLOG_BTREE_DELETE_PAGE_META:
case XLOG_BTREE_DELETE_PAGE_HALF:
btree_xlog_delete_page(info, lsn, record);
break;
case XLOG_BTREE_NEWROOT:
btree_xlog_newroot(lsn, record);
break;
case XLOG_BTREE_REUSE_PAGE:
btree_xlog_reuse_page(lsn, record);
break;
default:
elog(PANIC, "btree_redo: unknown op code %u", info);
}
}
| bool btree_safe_restartpoint | ( | void | ) |
Definition at line 1141 of file nbtxlog.c.
{
if (incomplete_actions)
return false;
return true;
}
| void btree_xlog_cleanup | ( | void | ) |
Definition at line 1079 of file nbtxlog.c.
References _bt_insert_parent(), _bt_pagedel(), buf, BufferGetPage, BufferIsValid, CreateFakeRelcacheEntry(), bt_incomplete_action::delblk, elog, FreeFakeRelcacheEntry(), bt_incomplete_action::is_root, bt_incomplete_action::is_split, bt_incomplete_action::leftblk, lfirst, bt_incomplete_action::node, NULL, P_LEFTMOST, P_RIGHTMOST, PageGetSpecialPointer, PANIC, bt_incomplete_action::rightblk, and XLogReadBuffer().
{
ListCell *l;
foreach(l, incomplete_actions)
{
bt_incomplete_action *action = (bt_incomplete_action *) lfirst(l);
if (action->is_split)
{
/* finish an incomplete split */
Buffer lbuf,
rbuf;
Page lpage,
rpage;
BTPageOpaque lpageop,
rpageop;
bool is_only;
Relation reln;
lbuf = XLogReadBuffer(action->node, action->leftblk, false);
/* failure is impossible because we wrote this page earlier */
if (!BufferIsValid(lbuf))
elog(PANIC, "btree_xlog_cleanup: left block unfound");
lpage = (Page) BufferGetPage(lbuf);
lpageop = (BTPageOpaque) PageGetSpecialPointer(lpage);
rbuf = XLogReadBuffer(action->node, action->rightblk, false);
/* failure is impossible because we wrote this page earlier */
if (!BufferIsValid(rbuf))
elog(PANIC, "btree_xlog_cleanup: right block unfound");
rpage = (Page) BufferGetPage(rbuf);
rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage);
/* if the pages are all of their level, it's a only-page split */
is_only = P_LEFTMOST(lpageop) && P_RIGHTMOST(rpageop);
reln = CreateFakeRelcacheEntry(action->node);
_bt_insert_parent(reln, lbuf, rbuf, NULL,
action->is_root, is_only);
FreeFakeRelcacheEntry(reln);
}
else
{
/* finish an incomplete deletion (of a half-dead page) */
Buffer buf;
buf = XLogReadBuffer(action->node, action->delblk, false);
if (BufferIsValid(buf))
{
Relation reln;
reln = CreateFakeRelcacheEntry(action->node);
if (_bt_pagedel(reln, buf, NULL) == 0)
elog(PANIC, "btree_xlog_cleanup: _bt_pagedel failed");
FreeFakeRelcacheEntry(reln);
}
}
}
incomplete_actions = NIL;
}
| static void btree_xlog_delete | ( | XLogRecPtr | lsn, | |
| XLogRecord * | record | |||
| ) | [static] |
Definition at line 713 of file nbtxlog.c.
References xl_btree_delete::block, BTPageOpaqueData::btpo_flags, btree_xlog_delete_get_latestRemovedXid(), BufferGetPage, BufferIsValid, InHotStandby, MarkBufferDirty(), xl_btree_delete::nitems, xl_btree_delete::node, PageGetLSN, PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, ResolveRecoveryConflictWithSnapshot(), RestoreBackupBlock(), SizeOfBtreeDelete, UnlockReleaseBuffer(), XLogRecord::xl_info, XLogRecord::xl_len, XLogReadBuffer(), XLogRecGetData, and XLR_BKP_BLOCK.
Referenced by btree_redo().
{
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
Buffer buffer;
Page page;
BTPageOpaque opaque;
/*
* If we have any conflict processing to do, it must happen before we
* update the page.
*
* Btree delete records can conflict with standby queries. You might
* think that vacuum records would conflict as well, but we've handled
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
* cleaned by the vacuum of the heap and so we can resolve any conflicts
* just once when that arrives. After that we know that no conflicts
* exist from individual btree vacuum records on that index.
*/
if (InHotStandby)
{
TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(xlrec);
ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node);
}
/* If we have a full-page image, restore it and we're done */
if (record->xl_info & XLR_BKP_BLOCK(0))
{
(void) RestoreBackupBlock(lsn, record, 0, false, false);
return;
}
/*
* We don't need to take a cleanup lock to apply these changes. See
* nbtree/README for details.
*/
buffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page))
{
UnlockReleaseBuffer(buffer);
return;
}
if (record->xl_len > SizeOfBtreeDelete)
{
OffsetNumber *unused;
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
PageIndexMultiDelete(page, unused, xlrec->nitems);
}
/*
* Mark the page as not containing any LP_DEAD items --- see comments in
* _bt_delitems_delete().
*/
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
| static TransactionId btree_xlog_delete_get_latestRemovedXid | ( | xl_btree_delete * | xlrec | ) | [static] |
Definition at line 577 of file nbtxlog.c.
References Assert, xl_btree_delete::block, BufferGetPage, BufferIsValid, CHECK_FOR_INTERRUPTS, CountDBBackends(), elog, HeapTupleHeaderAdvanceLatestRemovedXid(), xl_btree_delete::hnode, i, InvalidOid, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, xl_btree_delete::nitems, xl_btree_delete::node, PageGetItem, PageGetItemId, PANIC, reachedConsistency, IndexTupleData::t_tid, UnlockReleaseBuffer(), and XLogReadBuffer().
Referenced by btree_xlog_delete().
{
OffsetNumber *unused;
Buffer ibuffer,
hbuffer;
Page ipage,
hpage;
ItemId iitemid,
hitemid;
IndexTuple itup;
HeapTupleHeader htuphdr;
BlockNumber hblkno;
OffsetNumber hoffnum;
TransactionId latestRemovedXid = InvalidTransactionId;
int i;
/*
* If there's nothing running on the standby we don't need to derive a
* full latestRemovedXid value, so use a fast path out of here. This
* returns InvalidTransactionId, and so will conflict with all HS
* transactions; but since we just worked out that that's zero people,
* it's OK.
*
* XXX There is a race condition here, which is that a new backend might
* start just after we look. If so, it cannot need to conflict, but this
* coding will result in throwing a conflict anyway.
*/
if (CountDBBackends(InvalidOid) == 0)
return latestRemovedXid;
/*
* In what follows, we have to examine the previous state of the index
* page, as well as the heap page(s) it points to. This is only valid if
* WAL replay has reached a consistent database state; which means that
* the preceding check is not just an optimization, but is *necessary*.
* We won't have let in any user sessions before we reach consistency.
*/
if (!reachedConsistency)
elog(PANIC, "btree_xlog_delete_get_latestRemovedXid: cannot operate with inconsistent data");
/*
* Get index page. If the DB is consistent, this should not fail, nor
* should any of the heap page fetches below. If one does, we return
* InvalidTransactionId to cancel all HS transactions. That's probably
* overkill, but it's safe, and certainly better than panicking here.
*/
ibuffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
if (!BufferIsValid(ibuffer))
return InvalidTransactionId;
ipage = (Page) BufferGetPage(ibuffer);
/*
* Loop through the deleted index items to obtain the TransactionId from
* the heap items they point to.
*/
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
for (i = 0; i < xlrec->nitems; i++)
{
/*
* Identify the index tuple about to be deleted
*/
iitemid = PageGetItemId(ipage, unused[i]);
itup = (IndexTuple) PageGetItem(ipage, iitemid);
/*
* Locate the heap page that the index tuple points at
*/
hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
hbuffer = XLogReadBuffer(xlrec->hnode, hblkno, false);
if (!BufferIsValid(hbuffer))
{
UnlockReleaseBuffer(ibuffer);
return InvalidTransactionId;
}
hpage = (Page) BufferGetPage(hbuffer);
/*
* Look up the heap tuple header that the index tuple points at by
* using the heap node supplied with the xlrec. We can't use
* heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer.
* Note that we are not looking at tuple data here, just headers.
*/
hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid));
hitemid = PageGetItemId(hpage, hoffnum);
/*
* Follow any redirections until we find something useful.
*/
while (ItemIdIsRedirected(hitemid))
{
hoffnum = ItemIdGetRedirect(hitemid);
hitemid = PageGetItemId(hpage, hoffnum);
CHECK_FOR_INTERRUPTS();
}
/*
* If the heap item has storage, then read the header and use that to
* set latestRemovedXid.
*
* Some LP_DEAD items may not be accessible, so we ignore them.
*/
if (ItemIdHasStorage(hitemid))
{
htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid);
HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid);
}
else if (ItemIdIsDead(hitemid))
{
/*
* Conjecture: if hitemid is dead then it had xids before the xids
* marked on LP_NORMAL items. So we just ignore this item and move
* onto the next, for the purposes of calculating
* latestRemovedxids.
*/
}
else
Assert(!ItemIdIsUsed(hitemid));
UnlockReleaseBuffer(hbuffer);
}
UnlockReleaseBuffer(ibuffer);
/*
* XXX If all heap tuples were LP_DEAD then we will be returning
* InvalidTransactionId here, causing conflict for all HS
* transactions. That should happen very rarely (reasoning please?). Also
* note that caller can't tell the difference between this case and the
* fast path exit above. May need to change that in future.
*/
return latestRemovedXid;
}
| static void btree_xlog_delete_page | ( | uint8 | info, | |
| XLogRecPtr | lsn, | |||
| XLogRecord * | record | |||
| ) | [static] |
Definition at line 782 of file nbtxlog.c.
References _bt_pageinit(), _bt_restore_meta(), Assert, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, xl_btree_delete_page::btpo_xact, BufferGetPage, BufferGetPageSize, BufferIsValid, xl_btree_delete_page::deadblk, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, forget_matching_deletion(), ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, ItemPointerSet, xl_btree_delete_page::leftblk, xl_btree_metadata::level, log_incomplete_deletion(), MarkBufferDirty(), xl_btreetid::node, OffsetNumberNext, P_FIRSTDATAKEY, P_HIKEY, P_NONE, PageGetItem, PageGetItemId, PageGetLSN, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIndexTupleDelete(), PageSetLSN, RestoreBackupBlock(), xl_btree_delete_page::rightblk, xl_btree_metadata::root, SizeOfBtreeDeletePage, IndexTupleData::t_tid, xl_btree_delete_page::target, xl_btreetid::tid, UnlockReleaseBuffer(), BTPageOpaqueData::xact, XLogRecord::xl_info, XLOG_BTREE_DELETE_PAGE_HALF, XLOG_BTREE_DELETE_PAGE_META, XLogReadBuffer(), XLogRecGetData, and XLR_BKP_BLOCK.
Referenced by btree_redo().
{
xl_btree_delete_page *xlrec = (xl_btree_delete_page *) XLogRecGetData(record);
BlockNumber parent;
BlockNumber target;
BlockNumber leftsib;
BlockNumber rightsib;
Buffer buffer;
Page page;
BTPageOpaque pageop;
parent = ItemPointerGetBlockNumber(&(xlrec->target.tid));
target = xlrec->deadblk;
leftsib = xlrec->leftblk;
rightsib = xlrec->rightblk;
/*
* In normal operation, we would lock all the pages this WAL record
* touches before changing any of them. In WAL replay, it should be okay
* to lock just one page at a time, since no concurrent index updates can
* be happening, and readers should not care whether they arrive at the
* target page or not (since it's surely empty).
*/
/* parent page */
if (record->xl_info & XLR_BKP_BLOCK(0))
(void) RestoreBackupBlock(lsn, record, 0, false, false);
else
{
buffer = XLogReadBuffer(xlrec->target.node, parent, false);
if (BufferIsValid(buffer))
{
page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
if (lsn <= PageGetLSN(page))
{
UnlockReleaseBuffer(buffer);
}
else
{
OffsetNumber poffset;
poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
if (poffset >= PageGetMaxOffsetNumber(page))
{
Assert(info == XLOG_BTREE_DELETE_PAGE_HALF);
Assert(poffset == P_FIRSTDATAKEY(pageop));
PageIndexTupleDelete(page, poffset);
pageop->btpo_flags |= BTP_HALF_DEAD;
}
else
{
ItemId itemid;
IndexTuple itup;
OffsetNumber nextoffset;
Assert(info != XLOG_BTREE_DELETE_PAGE_HALF);
itemid = PageGetItemId(page, poffset);
itup = (IndexTuple) PageGetItem(page, itemid);
ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY);
nextoffset = OffsetNumberNext(poffset);
PageIndexTupleDelete(page, nextoffset);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
/* Fix left-link of right sibling */
if (record->xl_info & XLR_BKP_BLOCK(1))
(void) RestoreBackupBlock(lsn, record, 1, false, false);
else
{
buffer = XLogReadBuffer(xlrec->target.node, rightsib, false);
if (BufferIsValid(buffer))
{
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page))
{
UnlockReleaseBuffer(buffer);
}
else
{
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_prev = leftsib;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
/* Fix right-link of left sibling, if any */
if (record->xl_info & XLR_BKP_BLOCK(2))
(void) RestoreBackupBlock(lsn, record, 2, false, false);
else
{
if (leftsib != P_NONE)
{
buffer = XLogReadBuffer(xlrec->target.node, leftsib, false);
if (BufferIsValid(buffer))
{
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page))
{
UnlockReleaseBuffer(buffer);
}
else
{
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_next = rightsib;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
}
/* Rewrite target page as empty deleted page */
buffer = XLogReadBuffer(xlrec->target.node, target, true);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer));
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_prev = leftsib;
pageop->btpo_next = rightsib;
pageop->btpo.xact = xlrec->btpo_xact;
pageop->btpo_flags = BTP_DELETED;
pageop->btpo_cycleid = 0;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/* Update metapage if needed */
if (info == XLOG_BTREE_DELETE_PAGE_META)
{
xl_btree_metadata md;
memcpy(&md, (char *) xlrec + SizeOfBtreeDeletePage,
sizeof(xl_btree_metadata));
_bt_restore_meta(xlrec->target.node, lsn,
md.root, md.level,
md.fastroot, md.fastlevel);
}
/* Forget any completed deletion */
forget_matching_deletion(xlrec->target.node, target);
/* If parent became half-dead, remember it for deletion */
if (info == XLOG_BTREE_DELETE_PAGE_HALF)
log_incomplete_deletion(xlrec->target.node, parent);
}
| static void btree_xlog_insert | ( | bool | isleaf, | |
| bool | ismeta, | |||
| XLogRecPtr | lsn, | |||
| XLogRecord * | record | |||
| ) | [static] |
Definition at line 194 of file nbtxlog.c.
References _bt_restore_meta(), BufferGetPage, BufferIsValid, elog, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, forget_matching_split(), InvalidOffsetNumber, ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, xl_btree_metadata::level, MarkBufferDirty(), xl_btreetid::node, PageAddItem(), PageGetLSN, PageSetLSN, PANIC, RestoreBackupBlock(), xl_btree_metadata::root, SizeOfBtreeInsert, xl_btree_insert::target, xl_btreetid::tid, UnlockReleaseBuffer(), XLogRecord::xl_info, XLogRecord::xl_len, XLogReadBuffer(), XLogRecGetData, and XLR_BKP_BLOCK.
Referenced by btree_redo().
{
xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
Buffer buffer;
Page page;
char *datapos;
int datalen;
xl_btree_metadata md;
BlockNumber downlink = 0;
datapos = (char *) xlrec + SizeOfBtreeInsert;
datalen = record->xl_len - SizeOfBtreeInsert;
if (!isleaf)
{
memcpy(&downlink, datapos, sizeof(BlockNumber));
datapos += sizeof(BlockNumber);
datalen -= sizeof(BlockNumber);
}
if (ismeta)
{
memcpy(&md, datapos, sizeof(xl_btree_metadata));
datapos += sizeof(xl_btree_metadata);
datalen -= sizeof(xl_btree_metadata);
}
if (record->xl_info & XLR_BKP_BLOCK(0))
(void) RestoreBackupBlock(lsn, record, 0, false, false);
else
{
buffer = XLogReadBuffer(xlrec->target.node,
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
false);
if (BufferIsValid(buffer))
{
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page))
{
UnlockReleaseBuffer(buffer);
}
else
{
if (PageAddItem(page, (Item) datapos, datalen,
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
false, false) == InvalidOffsetNumber)
elog(PANIC, "btree_insert_redo: failed to add item");
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
/*
* Note: in normal operation, we'd update the metapage while still holding
* lock on the page we inserted into. But during replay it's not
* necessary to hold that lock, since no other index updates can be
* happening concurrently, and readers will cope fine with following an
* obsolete link from the metapage.
*/
if (ismeta)
_bt_restore_meta(xlrec->target.node, lsn,
md.root, md.level,
md.fastroot, md.fastlevel);
/* Forget any split this insertion completes */
if (!isleaf)
forget_matching_split(xlrec->target.node, downlink, false);
}
| static void btree_xlog_newroot | ( | XLogRecPtr | lsn, | |
| XLogRecord * | record | |||
| ) | [static] |
Definition at line 945 of file nbtxlog.c.
References _bt_pageinit(), _bt_restore_meta(), _bt_restore_page(), Assert, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BufferGetPage, BufferGetPageSize, BufferIsValid, forget_matching_split(), ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, xl_btree_newroot::level, BTPageOpaqueData::level, MarkBufferDirty(), xl_btree_newroot::node, P_FIRSTKEY, P_HIKEY, PageGetItem, PageGetItemId, PageGetSpecialPointer, PageSetLSN, xl_btree_newroot::rootblk, SizeOfBtreeNewroot, IndexTupleData::t_tid, UnlockReleaseBuffer(), XLogRecord::xl_info, XLogRecord::xl_len, XLogReadBuffer(), XLogRecGetData, and XLR_BKP_BLOCK_MASK.
Referenced by btree_redo().
{
xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
Buffer buffer;
Page page;
BTPageOpaque pageop;
BlockNumber downlink = 0;
/* Backup blocks are not used in newroot records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
buffer = XLogReadBuffer(xlrec->node, xlrec->rootblk, true);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer));
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_flags = BTP_ROOT;
pageop->btpo_prev = pageop->btpo_next = P_NONE;
pageop->btpo.level = xlrec->level;
if (xlrec->level == 0)
pageop->btpo_flags |= BTP_LEAF;
pageop->btpo_cycleid = 0;
if (record->xl_len > SizeOfBtreeNewroot)
{
IndexTuple itup;
_bt_restore_page(page,
(char *) xlrec + SizeOfBtreeNewroot,
record->xl_len - SizeOfBtreeNewroot);
/* extract downlink to the right-hand split page */
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_FIRSTKEY));
downlink = ItemPointerGetBlockNumber(&(itup->t_tid));
Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
_bt_restore_meta(xlrec->node, lsn,
xlrec->rootblk, xlrec->level,
xlrec->rootblk, xlrec->level);
/* Check to see if this satisfies any incomplete insertions */
if (record->xl_len > SizeOfBtreeNewroot)
forget_matching_split(xlrec->node, downlink, true);
}
| static void btree_xlog_reuse_page | ( | XLogRecPtr | lsn, | |
| XLogRecord * | record | |||
| ) | [static] |
Definition at line 997 of file nbtxlog.c.
References Assert, InHotStandby, xl_btree_reuse_page::latestRemovedXid, xl_btree_reuse_page::node, ResolveRecoveryConflictWithSnapshot(), XLogRecord::xl_info, XLogRecGetData, and XLR_BKP_BLOCK_MASK.
Referenced by btree_redo().
{
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
/*
* Btree reuse_page records exist to provide a conflict point when we
* reuse pages in the index via the FSM. That's all they do though.
*
* latestRemovedXid was the page's btpo.xact. The btpo.xact <
* RecentGlobalXmin test in _bt_page_recyclable() conceptually mirrors the
* pgxact->xmin > limitXmin test in GetConflictingVirtualXIDs().
* Consequently, one XID value achieves the same exclusion effect on
* master and standby.
*/
if (InHotStandby)
{
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid,
xlrec->node);
}
/* Backup blocks are not used in reuse_page records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
}
| static void btree_xlog_split | ( | bool | onleft, | |
| bool | isroot, | |||
| XLogRecPtr | lsn, | |||
| XLogRecord * | record | |||
| ) | [static] |
Definition at line 267 of file nbtxlog.c.
References _bt_pageinit(), _bt_restore_page(), Assert, BlockIdGetBlockNumber, BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BufferGetPage, BufferGetPageSize, BufferIsValid, elog, xl_btree_split::firstright, forget_matching_split(), IndexTupleSize, InvalidOffsetNumber, ItemIdGetLength, xl_btree_split::leftsib, BTPageOpaqueData::level, xl_btree_split::level, log_incomplete_split(), MarkBufferDirty(), MAXALIGN, xl_btree_split::node, P_FIRSTDATAKEY, P_HIKEY, P_NONE, P_RIGHTMOST, PageAddItem(), PageGetItem, PageGetItemId, PageGetLSN, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, PANIC, RestoreBackupBlock(), xl_btree_split::rightsib, xl_btree_split::rnext, SizeOfBtreeSplit, UnlockReleaseBuffer(), XLogRecord::xl_info, XLogRecord::xl_len, XLogReadBuffer(), XLogRecGetData, and XLR_BKP_BLOCK.
Referenced by btree_redo().
{
xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
Buffer rbuf;
Page rpage;
BTPageOpaque ropaque;
char *datapos;
int datalen;
OffsetNumber newitemoff = 0;
Item newitem = NULL;
Size newitemsz = 0;
Item left_hikey = NULL;
Size left_hikeysz = 0;
datapos = (char *) xlrec + SizeOfBtreeSplit;
datalen = record->xl_len - SizeOfBtreeSplit;
/* Forget any split this insertion completes */
if (xlrec->level > 0)
{
/* we assume SizeOfBtreeSplit is at least 16-bit aligned */
BlockNumber downlink = BlockIdGetBlockNumber((BlockId) datapos);
datapos += sizeof(BlockIdData);
datalen -= sizeof(BlockIdData);
forget_matching_split(xlrec->node, downlink, false);
/* Extract left hikey and its size (still assuming 16-bit alignment) */
if (!(record->xl_info & XLR_BKP_BLOCK(0)))
{
/* We assume 16-bit alignment is enough for IndexTupleSize */
left_hikey = (Item) datapos;
left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
datapos += left_hikeysz;
datalen -= left_hikeysz;
}
}
/* Extract newitem and newitemoff, if present */
if (onleft)
{
/* Extract the offset (still assuming 16-bit alignment) */
memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
datapos += sizeof(OffsetNumber);
datalen -= sizeof(OffsetNumber);
}
if (onleft && !(record->xl_info & XLR_BKP_BLOCK(0)))
{
/*
* We assume that 16-bit alignment is enough to apply IndexTupleSize
* (since it's fetching from a uint16 field) and also enough for
* PageAddItem to insert the tuple.
*/
newitem = (Item) datapos;
newitemsz = MAXALIGN(IndexTupleSize(newitem));
datapos += newitemsz;
datalen -= newitemsz;
}
/* Reconstruct right (new) sibling page from scratch */
rbuf = XLogReadBuffer(xlrec->node, xlrec->rightsib, true);
Assert(BufferIsValid(rbuf));
rpage = (Page) BufferGetPage(rbuf);
_bt_pageinit(rpage, BufferGetPageSize(rbuf));
ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
ropaque->btpo_prev = xlrec->leftsib;
ropaque->btpo_next = xlrec->rnext;
ropaque->btpo.level = xlrec->level;
ropaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0;
ropaque->btpo_cycleid = 0;
_bt_restore_page(rpage, datapos, datalen);
/*
* On leaf level, the high key of the left page is equal to the first key
* on the right page.
*/
if (xlrec->level == 0)
{
ItemId hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque));
left_hikey = PageGetItem(rpage, hiItemId);
left_hikeysz = ItemIdGetLength(hiItemId);
}
PageSetLSN(rpage, lsn);
MarkBufferDirty(rbuf);
/* don't release the buffer yet; we touch right page's first item below */
/* Now reconstruct left (original) sibling page */
if (record->xl_info & XLR_BKP_BLOCK(0))
(void) RestoreBackupBlock(lsn, record, 0, false, false);
else
{
Buffer lbuf = XLogReadBuffer(xlrec->node, xlrec->leftsib, false);
if (BufferIsValid(lbuf))
{
/*
* Note that this code ensures that the items remaining on the
* left page are in the correct item number order, but it does not
* reproduce the physical order they would have had. Is this
* worth changing? See also _bt_restore_page().
*/
Page lpage = (Page) BufferGetPage(lbuf);
BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
if (lsn > PageGetLSN(lpage))
{
OffsetNumber off;
OffsetNumber maxoff = PageGetMaxOffsetNumber(lpage);
OffsetNumber deletable[MaxOffsetNumber];
int ndeletable = 0;
/*
* Remove the items from the left page that were copied to the
* right page. Also remove the old high key, if any. (We must
* remove everything before trying to insert any items, else
* we risk not having enough space.)
*/
if (!P_RIGHTMOST(lopaque))
{
deletable[ndeletable++] = P_HIKEY;
/*
* newitemoff is given to us relative to the original
* page's item numbering, so adjust it for this deletion.
*/
newitemoff--;
}
for (off = xlrec->firstright; off <= maxoff; off++)
deletable[ndeletable++] = off;
if (ndeletable > 0)
PageIndexMultiDelete(lpage, deletable, ndeletable);
/*
* Add the new item if it was inserted on left page.
*/
if (onleft)
{
if (PageAddItem(lpage, newitem, newitemsz, newitemoff,
false, false) == InvalidOffsetNumber)
elog(PANIC, "failed to add new item to left page after split");
}
/* Set high key */
if (PageAddItem(lpage, left_hikey, left_hikeysz,
P_HIKEY, false, false) == InvalidOffsetNumber)
elog(PANIC, "failed to add high key to left page after split");
/* Fix opaque fields */
lopaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0;
lopaque->btpo_next = xlrec->rightsib;
lopaque->btpo_cycleid = 0;
PageSetLSN(lpage, lsn);
MarkBufferDirty(lbuf);
}
UnlockReleaseBuffer(lbuf);
}
}
/* We no longer need the right buffer */
UnlockReleaseBuffer(rbuf);
/*
* Fix left-link of the page to the right of the new right sibling.
*
* Note: in normal operation, we do this while still holding lock on the
* two split pages. However, that's not necessary for correctness in WAL
* replay, because no other index update can be in progress, and readers
* will cope properly when following an obsolete left-link.
*/
if (record->xl_info & XLR_BKP_BLOCK(1))
(void) RestoreBackupBlock(lsn, record, 1, false, false);
else if (xlrec->rnext != P_NONE)
{
Buffer buffer = XLogReadBuffer(xlrec->node, xlrec->rnext, false);
if (BufferIsValid(buffer))
{
Page page = (Page) BufferGetPage(buffer);
if (lsn > PageGetLSN(page))
{
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_prev = xlrec->rightsib;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
}
/* The job ain't done till the parent link is inserted... */
log_incomplete_split(xlrec->node,
xlrec->leftsib, xlrec->rightsib, isroot);
}
| void btree_xlog_startup | ( | void | ) |
Definition at line 1073 of file nbtxlog.c.
{
incomplete_actions = NIL;
}
| static void btree_xlog_vacuum | ( | XLogRecPtr | lsn, | |
| XLogRecord * | record | |||
| ) | [static] |
Definition at line 477 of file nbtxlog.c.
References xl_btree_vacuum::block, BTPageOpaqueData::btpo_flags, BufferGetPage, BufferIsValid, xl_btree_vacuum::lastBlockVacuumed, LockBufferForCleanup(), MAIN_FORKNUM, MarkBufferDirty(), xl_btree_vacuum::node, PageGetLSN, PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, RBM_NORMAL, RestoreBackupBlock(), SizeOfBtreeVacuum, STANDBY_SNAPSHOT_READY, standbyState, UnlockReleaseBuffer(), XLogRecord::xl_info, XLogRecord::xl_len, XLogReadBufferExtended(), XLogRecGetData, and XLR_BKP_BLOCK.
Referenced by btree_redo().
{
xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
Buffer buffer;
Page page;
BTPageOpaque opaque;
/*
* If queries might be active then we need to ensure every block is
* unpinned between the lastBlockVacuumed and the current block, if there
* are any. This ensures that every block in the index is touched during
* VACUUM as required to ensure scans work correctly.
*/
if (standbyState == STANDBY_SNAPSHOT_READY &&
(xlrec->lastBlockVacuumed + 1) != xlrec->block)
{
BlockNumber blkno = xlrec->lastBlockVacuumed + 1;
for (; blkno < xlrec->block; blkno++)
{
/*
* XXX we don't actually need to read the block, we just need to
* confirm it is unpinned. If we had a special call into the
* buffer manager we could optimise this so that if the block is
* not in shared_buffers we confirm it as unpinned.
*
* Another simple optimization would be to check if there's any
* backends running; if not, we could just skip this.
*/
buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL);
if (BufferIsValid(buffer))
{
LockBufferForCleanup(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
/*
* If we have a full-page image, restore it (using a cleanup lock) and
* we're done.
*/
if (record->xl_info & XLR_BKP_BLOCK(0))
{
(void) RestoreBackupBlock(lsn, record, 0, true, false);
return;
}
/*
* Like in btvacuumpage(), we need to take a cleanup lock on every leaf
* page. See nbtree/README for details.
*/
buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL);
if (!BufferIsValid(buffer))
return;
LockBufferForCleanup(buffer);
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page))
{
UnlockReleaseBuffer(buffer);
return;
}
if (record->xl_len > SizeOfBtreeVacuum)
{
OffsetNumber *unused;
OffsetNumber *unend;
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeVacuum);
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
if ((unend - unused) > 0)
PageIndexMultiDelete(page, unused, unend - unused);
}
/*
* Mark the page as not containing any LP_DEAD items --- see comments in
* _bt_delitems_vacuum().
*/
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
| static void forget_matching_deletion | ( | RelFileNode | node, | |
| BlockNumber | delblk | |||
| ) | [static] |
Definition at line 100 of file nbtxlog.c.
References bt_incomplete_action::delblk, bt_incomplete_action::is_split, lfirst, list_delete_ptr(), bt_incomplete_action::node, pfree(), and RelFileNodeEquals.
Referenced by btree_xlog_delete_page().
{
ListCell *l;
foreach(l, incomplete_actions)
{
bt_incomplete_action *action = (bt_incomplete_action *) lfirst(l);
if (RelFileNodeEquals(node, action->node) &&
!action->is_split &&
delblk == action->delblk)
{
incomplete_actions = list_delete_ptr(incomplete_actions, action);
pfree(action);
break; /* need not look further */
}
}
}
| static void forget_matching_split | ( | RelFileNode | node, | |
| BlockNumber | downlink, | |||
| bool | is_root | |||
| ) | [static] |
Definition at line 66 of file nbtxlog.c.
References elog, bt_incomplete_action::is_root, bt_incomplete_action::is_split, lfirst, list_delete_ptr(), LOG, bt_incomplete_action::node, pfree(), RelFileNodeEquals, and bt_incomplete_action::rightblk.
Referenced by btree_xlog_insert(), btree_xlog_newroot(), and btree_xlog_split().
{
ListCell *l;
foreach(l, incomplete_actions)
{
bt_incomplete_action *action = (bt_incomplete_action *) lfirst(l);
if (RelFileNodeEquals(node, action->node) &&
action->is_split &&
downlink == action->rightblk)
{
if (is_root != action->is_root)
elog(LOG, "forget_matching_split: fishy is_root data (expected %d, got %d)",
action->is_root, is_root);
incomplete_actions = list_delete_ptr(incomplete_actions, action);
pfree(action);
break; /* need not look further */
}
}
}
| static void log_incomplete_deletion | ( | RelFileNode | node, | |
| BlockNumber | delblk | |||
| ) | [static] |
Definition at line 89 of file nbtxlog.c.
References bt_incomplete_action::delblk, bt_incomplete_action::is_split, lappend(), bt_incomplete_action::node, and palloc().
Referenced by btree_xlog_delete_page().
{
bt_incomplete_action *action = palloc(sizeof(bt_incomplete_action));
action->node = node;
action->is_split = false;
action->delblk = delblk;
incomplete_actions = lappend(incomplete_actions, action);
}
| static void log_incomplete_split | ( | RelFileNode | node, | |
| BlockNumber | leftblk, | |||
| BlockNumber | rightblk, | |||
| bool | is_root | |||
| ) | [static] |
Definition at line 52 of file nbtxlog.c.
References bt_incomplete_action::is_root, bt_incomplete_action::is_split, lappend(), bt_incomplete_action::leftblk, bt_incomplete_action::node, palloc(), and bt_incomplete_action::rightblk.
Referenced by btree_xlog_split().
{
bt_incomplete_action *action = palloc(sizeof(bt_incomplete_action));
action->node = node;
action->is_split = true;
action->is_root = is_root;
action->leftblk = leftblk;
action->rightblk = rightblk;
incomplete_actions = lappend(incomplete_actions, action);
}
List* incomplete_actions [static] |
1.7.1