#include "postgres.h"#include "access/heapam.h"#include "access/heapam_xlog.h"#include "access/rewriteheap.h"#include "access/transam.h"#include "access/tuptoaster.h"#include "storage/bufmgr.h"#include "storage/smgr.h"#include "utils/memutils.h"#include "utils/rel.h"#include "utils/tqual.h"
Go to the source code of this file.
Data Structures | |
| struct | RewriteStateData |
| struct | TidHashKey |
| struct | UnresolvedTupData |
| struct | OldToNewMappingData |
Typedefs | |
| typedef struct RewriteStateData | RewriteStateData |
| typedef UnresolvedTupData * | UnresolvedTup |
| typedef OldToNewMappingData * | OldToNewMapping |
Functions | |
| static void | raw_heap_insert (RewriteState state, HeapTuple tup) |
| RewriteState | begin_heap_rewrite (Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId freeze_multi, bool use_wal) |
| void | end_heap_rewrite (RewriteState state) |
| void | rewrite_heap_tuple (RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple) |
| bool | rewrite_heap_dead_tuple (RewriteState state, HeapTuple old_tuple) |
| typedef OldToNewMappingData* OldToNewMapping |
Definition at line 170 of file rewriteheap.c.
| typedef struct RewriteStateData RewriteStateData |
| typedef UnresolvedTupData* UnresolvedTup |
Definition at line 162 of file rewriteheap.c.
| RewriteState begin_heap_rewrite | ( | Relation | new_heap, | |
| TransactionId | oldest_xmin, | |||
| TransactionId | freeze_xid, | |||
| MultiXactId | freeze_multi, | |||
| bool | use_wal | |||
| ) |
Definition at line 190 of file rewriteheap.c.
References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), CurrentMemoryContext, HASHCTL::entrysize, HASHCTL::hash, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_FUNCTION, HASHCTL::hcxt, HASHCTL::keysize, MemoryContextSwitchTo(), palloc(), palloc0(), RelationGetNumberOfBlocks, RewriteStateData::rs_blockno, RewriteStateData::rs_buffer, RewriteStateData::rs_buffer_valid, RewriteStateData::rs_cxt, RewriteStateData::rs_freeze_multi, RewriteStateData::rs_freeze_xid, RewriteStateData::rs_new_rel, RewriteStateData::rs_old_new_tid_map, RewriteStateData::rs_oldest_xmin, RewriteStateData::rs_unresolved_tups, and RewriteStateData::rs_use_wal.
Referenced by copy_heap_data().
{
RewriteState state;
MemoryContext rw_cxt;
MemoryContext old_cxt;
HASHCTL hash_ctl;
/*
* To ease cleanup, make a separate context that will contain the
* RewriteState struct itself plus all subsidiary data.
*/
rw_cxt = AllocSetContextCreate(CurrentMemoryContext,
"Table rewrite",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
old_cxt = MemoryContextSwitchTo(rw_cxt);
/* Create and fill in the state struct */
state = palloc0(sizeof(RewriteStateData));
state->rs_new_rel = new_heap;
state->rs_buffer = (Page) palloc(BLCKSZ);
/* new_heap needn't be empty, just locked */
state->rs_blockno = RelationGetNumberOfBlocks(new_heap);
state->rs_buffer_valid = false;
state->rs_use_wal = use_wal;
state->rs_oldest_xmin = oldest_xmin;
state->rs_freeze_xid = freeze_xid;
state->rs_freeze_multi = freeze_multi;
state->rs_cxt = rw_cxt;
/* Initialize hash tables used to track update chains */
memset(&hash_ctl, 0, sizeof(hash_ctl));
hash_ctl.keysize = sizeof(TidHashKey);
hash_ctl.entrysize = sizeof(UnresolvedTupData);
hash_ctl.hcxt = state->rs_cxt;
hash_ctl.hash = tag_hash;
state->rs_unresolved_tups =
hash_create("Rewrite / Unresolved ctids",
128, /* arbitrary initial size */
&hash_ctl,
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
hash_ctl.entrysize = sizeof(OldToNewMappingData);
state->rs_old_new_tid_map =
hash_create("Rewrite / Old to new tid map",
128, /* arbitrary initial size */
&hash_ctl,
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
MemoryContextSwitchTo(old_cxt);
return state;
}
| void end_heap_rewrite | ( | RewriteState | state | ) |
Definition at line 256 of file rewriteheap.c.
References hash_seq_init(), hash_seq_search(), heap_sync(), ItemPointerSetInvalid, log_newpage(), MAIN_FORKNUM, MemoryContextDelete(), NULL, PageSetChecksumInplace(), raw_heap_insert(), RelationData::rd_node, RelationData::rd_smgr, RelationNeedsWAL, RelationOpenSmgr, RewriteStateData::rs_blockno, RewriteStateData::rs_buffer, RewriteStateData::rs_buffer_valid, RewriteStateData::rs_cxt, RewriteStateData::rs_new_rel, RewriteStateData::rs_unresolved_tups, RewriteStateData::rs_use_wal, smgrextend(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, and UnresolvedTupData::tuple.
Referenced by copy_heap_data().
{
HASH_SEQ_STATUS seq_status;
UnresolvedTup unresolved;
/*
* Write any remaining tuples in the UnresolvedTups table. If we have any
* left, they should in fact be dead, but let's err on the safe side.
*/
hash_seq_init(&seq_status, state->rs_unresolved_tups);
while ((unresolved = hash_seq_search(&seq_status)) != NULL)
{
ItemPointerSetInvalid(&unresolved->tuple->t_data->t_ctid);
raw_heap_insert(state, unresolved->tuple);
}
/* Write the last page, if any */
if (state->rs_buffer_valid)
{
if (state->rs_use_wal)
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
state->rs_buffer);
RelationOpenSmgr(state->rs_new_rel);
PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno,
(char *) state->rs_buffer, true);
}
/*
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
* to ensure that the toast table gets fsync'd too.
*
* It's obvious that we must do this when not WAL-logging. It's less
* obvious that we have to do it even if we did WAL-log the pages. The
* reason is the same as in tablecmds.c's copy_relation_data(): we're
* writing data that's not in shared buffers, and so a CHECKPOINT
* occurring during the rewriteheap operation won't have fsync'd data we
* wrote before the checkpoint.
*/
if (RelationNeedsWAL(state->rs_new_rel))
heap_sync(state->rs_new_rel);
/* Deleting the context frees everything */
MemoryContextDelete(state->rs_cxt);
}
| static void raw_heap_insert | ( | RewriteState | state, | |
| HeapTuple | tup | |||
| ) | [static] |
Definition at line 565 of file rewriteheap.c.
References Assert, elog, ereport, errcode(), errmsg(), ERROR, HEAP_DEFAULT_FILLFACTOR, heap_freetuple(), HEAP_INSERT_SKIP_FSM, HEAP_INSERT_SKIP_WAL, HeapTupleHasExternal, InvalidOffsetNumber, ItemPointerIsValid, ItemPointerSet, log_newpage(), MAIN_FORKNUM, MAXALIGN, MaxHeapTupleSize, NULL, PageAddItem(), PageGetHeapFreeSpace(), PageGetItem, PageGetItemId, PageInit(), PageSetChecksumInplace(), RelationData::rd_node, RelationData::rd_rel, RelationData::rd_smgr, RelationGetTargetPageFreeSpace, RelationOpenSmgr, RELKIND_TOASTVALUE, RewriteStateData::rs_blockno, RewriteStateData::rs_buffer, RewriteStateData::rs_buffer_valid, RewriteStateData::rs_new_rel, RewriteStateData::rs_use_wal, smgrextend(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, toast_insert_or_update(), and TOAST_TUPLE_THRESHOLD.
Referenced by end_heap_rewrite(), and rewrite_heap_tuple().
{
Page page = state->rs_buffer;
Size pageFreeSpace,
saveFreeSpace;
Size len;
OffsetNumber newoff;
HeapTuple heaptup;
/*
* If the new tuple is too big for storage or contains already toasted
* out-of-line attributes from some other relation, invoke the toaster.
*
* Note: below this point, heaptup is the data we actually intend to store
* into the relation; tup is the caller's original untoasted data.
*/
if (state->rs_new_rel->rd_rel->relkind == RELKIND_TOASTVALUE)
{
/* toast table entries should never be recursively toasted */
Assert(!HeapTupleHasExternal(tup));
heaptup = tup;
}
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
HEAP_INSERT_SKIP_FSM |
(state->rs_use_wal ?
0 : HEAP_INSERT_SKIP_WAL));
else
heaptup = tup;
len = MAXALIGN(heaptup->t_len); /* be conservative */
/*
* If we're gonna fail for oversize tuple, do it right away
*/
if (len > MaxHeapTupleSize)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("row is too big: size %lu, maximum size %lu",
(unsigned long) len,
(unsigned long) MaxHeapTupleSize)));
/* Compute desired extra freespace due to fillfactor option */
saveFreeSpace = RelationGetTargetPageFreeSpace(state->rs_new_rel,
HEAP_DEFAULT_FILLFACTOR);
/* Now we can check to see if there's enough free space already. */
if (state->rs_buffer_valid)
{
pageFreeSpace = PageGetHeapFreeSpace(page);
if (len + saveFreeSpace > pageFreeSpace)
{
/* Doesn't fit, so write out the existing page */
/* XLOG stuff */
if (state->rs_use_wal)
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
page);
/*
* Now write the page. We say isTemp = true even if it's not a
* temp table, because there's no need for smgr to schedule an
* fsync for this write; we'll do it ourselves in
* end_heap_rewrite.
*/
RelationOpenSmgr(state->rs_new_rel);
PageSetChecksumInplace(page, state->rs_blockno);
smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM,
state->rs_blockno, (char *) page, true);
state->rs_blockno++;
state->rs_buffer_valid = false;
}
}
if (!state->rs_buffer_valid)
{
/* Initialize a new empty page */
PageInit(page, BLCKSZ, 0);
state->rs_buffer_valid = true;
}
/* And now we can insert the tuple into the page */
newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len,
InvalidOffsetNumber, false, true);
if (newoff == InvalidOffsetNumber)
elog(ERROR, "failed to add tuple");
/* Update caller's t_self to the actual position where it was stored */
ItemPointerSet(&(tup->t_self), state->rs_blockno, newoff);
/*
* Insert the correct position into CTID of the stored tuple, too, if the
* caller didn't supply a valid CTID.
*/
if (!ItemPointerIsValid(&tup->t_data->t_ctid))
{
ItemId newitemid;
HeapTupleHeader onpage_tup;
newitemid = PageGetItemId(page, newoff);
onpage_tup = (HeapTupleHeader) PageGetItem(page, newitemid);
onpage_tup->t_ctid = tup->t_self;
}
/* If heaptup is a private copy, release it. */
if (heaptup != tup)
heap_freetuple(heaptup);
}
| bool rewrite_heap_dead_tuple | ( | RewriteState | state, | |
| HeapTuple | old_tuple | |||
| ) |
Definition at line 515 of file rewriteheap.c.
References Assert, HASH_FIND, HASH_REMOVE, hash_search(), heap_freetuple(), HeapTupleHeaderGetXmin, NULL, RewriteStateData::rs_unresolved_tups, HeapTupleData::t_data, HeapTupleData::t_self, TidHashKey::tid, UnresolvedTupData::tuple, and TidHashKey::xmin.
Referenced by copy_heap_data().
{
/*
* If we have already seen an earlier tuple in the update chain that
* points to this tuple, let's forget about that earlier tuple. It's in
* fact dead as well, our simple xmax < OldestXmin test in
* HeapTupleSatisfiesVacuum just wasn't enough to detect it. It happens
* when xmin of a tuple is greater than xmax, which sounds
* counter-intuitive but is perfectly valid.
*
* We don't bother to try to detect the situation the other way round,
* when we encounter the dead tuple first and then the recently dead one
* that points to it. If that happens, we'll have some unmatched entries
* in the UnresolvedTups hash table at the end. That can happen anyway,
* because a vacuum might have removed the dead tuple in the chain before
* us.
*/
UnresolvedTup unresolved;
TidHashKey hashkey;
bool found;
memset(&hashkey, 0, sizeof(hashkey));
hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data);
hashkey.tid = old_tuple->t_self;
unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
HASH_FIND, NULL);
if (unresolved != NULL)
{
/* Need to free the contained tuple as well as the hashtable entry */
heap_freetuple(unresolved->tuple);
hash_search(state->rs_unresolved_tups, &hashkey,
HASH_REMOVE, &found);
Assert(found);
return true;
}
return false;
}
| void rewrite_heap_tuple | ( | RewriteState | state, | |
| HeapTuple | old_tuple, | |||
| HeapTuple | new_tuple | |||
| ) |
Definition at line 319 of file rewriteheap.c.
References Assert, HASH_ENTER, HASH_FIND, HASH_REMOVE, hash_search(), heap_copytuple(), heap_freetuple(), heap_freeze_tuple(), HEAP_UPDATED, HEAP_XMAX_INVALID, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsOnlyLocked(), ItemPointerEquals(), ItemPointerSetInvalid, MemoryContextSwitchTo(), OldToNewMappingData::new_tid, NULL, UnresolvedTupData::old_tid, raw_heap_insert(), RewriteStateData::rs_cxt, RewriteStateData::rs_freeze_multi, RewriteStateData::rs_freeze_xid, RewriteStateData::rs_old_new_tid_map, RewriteStateData::rs_oldest_xmin, RewriteStateData::rs_unresolved_tups, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_self, TidHashKey::tid, TransactionIdPrecedes(), UnresolvedTupData::tuple, and TidHashKey::xmin.
Referenced by reform_and_rewrite_tuple().
{
MemoryContext old_cxt;
ItemPointerData old_tid;
TidHashKey hashkey;
bool found;
bool free_new;
old_cxt = MemoryContextSwitchTo(state->rs_cxt);
/*
* Copy the original tuple's visibility information into new_tuple.
*
* XXX we might later need to copy some t_infomask2 bits, too? Right now,
* we intentionally clear the HOT status bits.
*/
memcpy(&new_tuple->t_data->t_choice.t_heap,
&old_tuple->t_data->t_choice.t_heap,
sizeof(HeapTupleFields));
new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
new_tuple->t_data->t_infomask |=
old_tuple->t_data->t_infomask & HEAP_XACT_MASK;
/*
* While we have our hands on the tuple, we may as well freeze any
* very-old xmin or xmax, so that future VACUUM effort can be saved.
*/
heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid,
state->rs_freeze_multi);
/*
* Invalid ctid means that ctid should point to the tuple itself. We'll
* override it later if the tuple is part of an update chain.
*/
ItemPointerSetInvalid(&new_tuple->t_data->t_ctid);
/*
* If the tuple has been updated, check the old-to-new mapping hash table.
*/
if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
!(ItemPointerEquals(&(old_tuple->t_self),
&(old_tuple->t_data->t_ctid))))
{
OldToNewMapping mapping;
memset(&hashkey, 0, sizeof(hashkey));
hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data);
hashkey.tid = old_tuple->t_data->t_ctid;
mapping = (OldToNewMapping)
hash_search(state->rs_old_new_tid_map, &hashkey,
HASH_FIND, NULL);
if (mapping != NULL)
{
/*
* We've already copied the tuple that t_ctid points to, so we can
* set the ctid of this tuple to point to the new location, and
* insert it right away.
*/
new_tuple->t_data->t_ctid = mapping->new_tid;
/* We don't need the mapping entry anymore */
hash_search(state->rs_old_new_tid_map, &hashkey,
HASH_REMOVE, &found);
Assert(found);
}
else
{
/*
* We haven't seen the tuple t_ctid points to yet. Stash this
* tuple into unresolved_tups to be written later.
*/
UnresolvedTup unresolved;
unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
HASH_ENTER, &found);
Assert(!found);
unresolved->old_tid = old_tuple->t_self;
unresolved->tuple = heap_copytuple(new_tuple);
/*
* We can't do anything more now, since we don't know where the
* tuple will be written.
*/
MemoryContextSwitchTo(old_cxt);
return;
}
}
/*
* Now we will write the tuple, and then check to see if it is the B tuple
* in any new or known pair. When we resolve a known pair, we will be
* able to write that pair's A tuple, and then we have to check if it
* resolves some other pair. Hence, we need a loop here.
*/
old_tid = old_tuple->t_self;
free_new = false;
for (;;)
{
ItemPointerData new_tid;
/* Insert the tuple and find out where it's put in new_heap */
raw_heap_insert(state, new_tuple);
new_tid = new_tuple->t_self;
/*
* If the tuple is the updated version of a row, and the prior version
* wouldn't be DEAD yet, then we need to either resolve the prior
* version (if it's waiting in rs_unresolved_tups), or make an entry
* in rs_old_new_tid_map (so we can resolve it when we do see it). The
* previous tuple's xmax would equal this one's xmin, so it's
* RECENTLY_DEAD if and only if the xmin is not before OldestXmin.
*/
if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) &&
!TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data),
state->rs_oldest_xmin))
{
/*
* Okay, this is B in an update pair. See if we've seen A.
*/
UnresolvedTup unresolved;
memset(&hashkey, 0, sizeof(hashkey));
hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);
hashkey.tid = old_tid;
unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
HASH_FIND, NULL);
if (unresolved != NULL)
{
/*
* We have seen and memorized the previous tuple already. Now
* that we know where we inserted the tuple its t_ctid points
* to, fix its t_ctid and insert it to the new heap.
*/
if (free_new)
heap_freetuple(new_tuple);
new_tuple = unresolved->tuple;
free_new = true;
old_tid = unresolved->old_tid;
new_tuple->t_data->t_ctid = new_tid;
/*
* We don't need the hash entry anymore, but don't free its
* tuple just yet.
*/
hash_search(state->rs_unresolved_tups, &hashkey,
HASH_REMOVE, &found);
Assert(found);
/* loop back to insert the previous tuple in the chain */
continue;
}
else
{
/*
* Remember the new tid of this tuple. We'll use it to set the
* ctid when we find the previous tuple in the chain.
*/
OldToNewMapping mapping;
mapping = hash_search(state->rs_old_new_tid_map, &hashkey,
HASH_ENTER, &found);
Assert(!found);
mapping->new_tid = new_tid;
}
}
/* Done with this (chain of) tuples, for now */
if (free_new)
heap_freetuple(new_tuple);
break;
}
MemoryContextSwitchTo(old_cxt);
}
1.7.1