Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Typedefs | Functions

rewriteheap.c File Reference

#include "postgres.h"
#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
#include "access/tuptoaster.h"
#include "storage/bufmgr.h"
#include "storage/smgr.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/tqual.h"
Include dependency graph for rewriteheap.c:

Go to the source code of this file.

Data Structures

struct  RewriteStateData
struct  TidHashKey
struct  UnresolvedTupData
struct  OldToNewMappingData

Typedefs

typedef struct RewriteStateData RewriteStateData
typedef UnresolvedTupDataUnresolvedTup
typedef OldToNewMappingDataOldToNewMapping

Functions

static void raw_heap_insert (RewriteState state, HeapTuple tup)
RewriteState begin_heap_rewrite (Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId freeze_multi, bool use_wal)
void end_heap_rewrite (RewriteState state)
void rewrite_heap_tuple (RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
bool rewrite_heap_dead_tuple (RewriteState state, HeapTuple old_tuple)

Typedef Documentation

Definition at line 170 of file rewriteheap.c.

Definition at line 162 of file rewriteheap.c.


Function Documentation

RewriteState begin_heap_rewrite ( Relation  new_heap,
TransactionId  oldest_xmin,
TransactionId  freeze_xid,
MultiXactId  freeze_multi,
bool  use_wal 
)

Definition at line 190 of file rewriteheap.c.

References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), CurrentMemoryContext, HASHCTL::entrysize, HASHCTL::hash, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_FUNCTION, HASHCTL::hcxt, HASHCTL::keysize, MemoryContextSwitchTo(), palloc(), palloc0(), RelationGetNumberOfBlocks, RewriteStateData::rs_blockno, RewriteStateData::rs_buffer, RewriteStateData::rs_buffer_valid, RewriteStateData::rs_cxt, RewriteStateData::rs_freeze_multi, RewriteStateData::rs_freeze_xid, RewriteStateData::rs_new_rel, RewriteStateData::rs_old_new_tid_map, RewriteStateData::rs_oldest_xmin, RewriteStateData::rs_unresolved_tups, and RewriteStateData::rs_use_wal.

Referenced by copy_heap_data().

{
    RewriteState state;
    MemoryContext rw_cxt;
    MemoryContext old_cxt;
    HASHCTL     hash_ctl;

    /*
     * To ease cleanup, make a separate context that will contain the
     * RewriteState struct itself plus all subsidiary data.
     */
    rw_cxt = AllocSetContextCreate(CurrentMemoryContext,
                                   "Table rewrite",
                                   ALLOCSET_DEFAULT_MINSIZE,
                                   ALLOCSET_DEFAULT_INITSIZE,
                                   ALLOCSET_DEFAULT_MAXSIZE);
    old_cxt = MemoryContextSwitchTo(rw_cxt);

    /* Create and fill in the state struct */
    state = palloc0(sizeof(RewriteStateData));

    state->rs_new_rel = new_heap;
    state->rs_buffer = (Page) palloc(BLCKSZ);
    /* new_heap needn't be empty, just locked */
    state->rs_blockno = RelationGetNumberOfBlocks(new_heap);
    state->rs_buffer_valid = false;
    state->rs_use_wal = use_wal;
    state->rs_oldest_xmin = oldest_xmin;
    state->rs_freeze_xid = freeze_xid;
    state->rs_freeze_multi = freeze_multi;
    state->rs_cxt = rw_cxt;

    /* Initialize hash tables used to track update chains */
    memset(&hash_ctl, 0, sizeof(hash_ctl));
    hash_ctl.keysize = sizeof(TidHashKey);
    hash_ctl.entrysize = sizeof(UnresolvedTupData);
    hash_ctl.hcxt = state->rs_cxt;
    hash_ctl.hash = tag_hash;

    state->rs_unresolved_tups =
        hash_create("Rewrite / Unresolved ctids",
                    128,        /* arbitrary initial size */
                    &hash_ctl,
                    HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);

    hash_ctl.entrysize = sizeof(OldToNewMappingData);

    state->rs_old_new_tid_map =
        hash_create("Rewrite / Old to new tid map",
                    128,        /* arbitrary initial size */
                    &hash_ctl,
                    HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);

    MemoryContextSwitchTo(old_cxt);

    return state;
}

void end_heap_rewrite ( RewriteState  state  ) 

Definition at line 256 of file rewriteheap.c.

References hash_seq_init(), hash_seq_search(), heap_sync(), ItemPointerSetInvalid, log_newpage(), MAIN_FORKNUM, MemoryContextDelete(), NULL, PageSetChecksumInplace(), raw_heap_insert(), RelationData::rd_node, RelationData::rd_smgr, RelationNeedsWAL, RelationOpenSmgr, RewriteStateData::rs_blockno, RewriteStateData::rs_buffer, RewriteStateData::rs_buffer_valid, RewriteStateData::rs_cxt, RewriteStateData::rs_new_rel, RewriteStateData::rs_unresolved_tups, RewriteStateData::rs_use_wal, smgrextend(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, and UnresolvedTupData::tuple.

Referenced by copy_heap_data().

{
    HASH_SEQ_STATUS seq_status;
    UnresolvedTup unresolved;

    /*
     * Write any remaining tuples in the UnresolvedTups table. If we have any
     * left, they should in fact be dead, but let's err on the safe side.
     */
    hash_seq_init(&seq_status, state->rs_unresolved_tups);

    while ((unresolved = hash_seq_search(&seq_status)) != NULL)
    {
        ItemPointerSetInvalid(&unresolved->tuple->t_data->t_ctid);
        raw_heap_insert(state, unresolved->tuple);
    }

    /* Write the last page, if any */
    if (state->rs_buffer_valid)
    {
        if (state->rs_use_wal)
            log_newpage(&state->rs_new_rel->rd_node,
                        MAIN_FORKNUM,
                        state->rs_blockno,
                        state->rs_buffer);
        RelationOpenSmgr(state->rs_new_rel);

        PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);

        smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno,
                   (char *) state->rs_buffer, true);
    }

    /*
     * If the rel is WAL-logged, must fsync before commit.  We use heap_sync
     * to ensure that the toast table gets fsync'd too.
     *
     * It's obvious that we must do this when not WAL-logging. It's less
     * obvious that we have to do it even if we did WAL-log the pages. The
     * reason is the same as in tablecmds.c's copy_relation_data(): we're
     * writing data that's not in shared buffers, and so a CHECKPOINT
     * occurring during the rewriteheap operation won't have fsync'd data we
     * wrote before the checkpoint.
     */
    if (RelationNeedsWAL(state->rs_new_rel))
        heap_sync(state->rs_new_rel);

    /* Deleting the context frees everything */
    MemoryContextDelete(state->rs_cxt);
}

static void raw_heap_insert ( RewriteState  state,
HeapTuple  tup 
) [static]

Definition at line 565 of file rewriteheap.c.

References Assert, elog, ereport, errcode(), errmsg(), ERROR, HEAP_DEFAULT_FILLFACTOR, heap_freetuple(), HEAP_INSERT_SKIP_FSM, HEAP_INSERT_SKIP_WAL, HeapTupleHasExternal, InvalidOffsetNumber, ItemPointerIsValid, ItemPointerSet, log_newpage(), MAIN_FORKNUM, MAXALIGN, MaxHeapTupleSize, NULL, PageAddItem(), PageGetHeapFreeSpace(), PageGetItem, PageGetItemId, PageInit(), PageSetChecksumInplace(), RelationData::rd_node, RelationData::rd_rel, RelationData::rd_smgr, RelationGetTargetPageFreeSpace, RelationOpenSmgr, RELKIND_TOASTVALUE, RewriteStateData::rs_blockno, RewriteStateData::rs_buffer, RewriteStateData::rs_buffer_valid, RewriteStateData::rs_new_rel, RewriteStateData::rs_use_wal, smgrextend(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, toast_insert_or_update(), and TOAST_TUPLE_THRESHOLD.

Referenced by end_heap_rewrite(), and rewrite_heap_tuple().

{
    Page        page = state->rs_buffer;
    Size        pageFreeSpace,
                saveFreeSpace;
    Size        len;
    OffsetNumber newoff;
    HeapTuple   heaptup;

    /*
     * If the new tuple is too big for storage or contains already toasted
     * out-of-line attributes from some other relation, invoke the toaster.
     *
     * Note: below this point, heaptup is the data we actually intend to store
     * into the relation; tup is the caller's original untoasted data.
     */
    if (state->rs_new_rel->rd_rel->relkind == RELKIND_TOASTVALUE)
    {
        /* toast table entries should never be recursively toasted */
        Assert(!HeapTupleHasExternal(tup));
        heaptup = tup;
    }
    else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
        heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
                                         HEAP_INSERT_SKIP_FSM |
                                         (state->rs_use_wal ?
                                          0 : HEAP_INSERT_SKIP_WAL));
    else
        heaptup = tup;

    len = MAXALIGN(heaptup->t_len);     /* be conservative */

    /*
     * If we're gonna fail for oversize tuple, do it right away
     */
    if (len > MaxHeapTupleSize)
        ereport(ERROR,
                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                 errmsg("row is too big: size %lu, maximum size %lu",
                        (unsigned long) len,
                        (unsigned long) MaxHeapTupleSize)));

    /* Compute desired extra freespace due to fillfactor option */
    saveFreeSpace = RelationGetTargetPageFreeSpace(state->rs_new_rel,
                                                   HEAP_DEFAULT_FILLFACTOR);

    /* Now we can check to see if there's enough free space already. */
    if (state->rs_buffer_valid)
    {
        pageFreeSpace = PageGetHeapFreeSpace(page);

        if (len + saveFreeSpace > pageFreeSpace)
        {
            /* Doesn't fit, so write out the existing page */

            /* XLOG stuff */
            if (state->rs_use_wal)
                log_newpage(&state->rs_new_rel->rd_node,
                            MAIN_FORKNUM,
                            state->rs_blockno,
                            page);

            /*
             * Now write the page. We say isTemp = true even if it's not a
             * temp table, because there's no need for smgr to schedule an
             * fsync for this write; we'll do it ourselves in
             * end_heap_rewrite.
             */
            RelationOpenSmgr(state->rs_new_rel);

            PageSetChecksumInplace(page, state->rs_blockno);

            smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM,
                       state->rs_blockno, (char *) page, true);

            state->rs_blockno++;
            state->rs_buffer_valid = false;
        }
    }

    if (!state->rs_buffer_valid)
    {
        /* Initialize a new empty page */
        PageInit(page, BLCKSZ, 0);
        state->rs_buffer_valid = true;
    }

    /* And now we can insert the tuple into the page */
    newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len,
                         InvalidOffsetNumber, false, true);
    if (newoff == InvalidOffsetNumber)
        elog(ERROR, "failed to add tuple");

    /* Update caller's t_self to the actual position where it was stored */
    ItemPointerSet(&(tup->t_self), state->rs_blockno, newoff);

    /*
     * Insert the correct position into CTID of the stored tuple, too, if the
     * caller didn't supply a valid CTID.
     */
    if (!ItemPointerIsValid(&tup->t_data->t_ctid))
    {
        ItemId      newitemid;
        HeapTupleHeader onpage_tup;

        newitemid = PageGetItemId(page, newoff);
        onpage_tup = (HeapTupleHeader) PageGetItem(page, newitemid);

        onpage_tup->t_ctid = tup->t_self;
    }

    /* If heaptup is a private copy, release it. */
    if (heaptup != tup)
        heap_freetuple(heaptup);
}

bool rewrite_heap_dead_tuple ( RewriteState  state,
HeapTuple  old_tuple 
)

Definition at line 515 of file rewriteheap.c.

References Assert, HASH_FIND, HASH_REMOVE, hash_search(), heap_freetuple(), HeapTupleHeaderGetXmin, NULL, RewriteStateData::rs_unresolved_tups, HeapTupleData::t_data, HeapTupleData::t_self, TidHashKey::tid, UnresolvedTupData::tuple, and TidHashKey::xmin.

Referenced by copy_heap_data().

{
    /*
     * If we have already seen an earlier tuple in the update chain that
     * points to this tuple, let's forget about that earlier tuple. It's in
     * fact dead as well, our simple xmax < OldestXmin test in
     * HeapTupleSatisfiesVacuum just wasn't enough to detect it. It happens
     * when xmin of a tuple is greater than xmax, which sounds
     * counter-intuitive but is perfectly valid.
     *
     * We don't bother to try to detect the situation the other way round,
     * when we encounter the dead tuple first and then the recently dead one
     * that points to it. If that happens, we'll have some unmatched entries
     * in the UnresolvedTups hash table at the end. That can happen anyway,
     * because a vacuum might have removed the dead tuple in the chain before
     * us.
     */
    UnresolvedTup unresolved;
    TidHashKey  hashkey;
    bool        found;

    memset(&hashkey, 0, sizeof(hashkey));
    hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data);
    hashkey.tid = old_tuple->t_self;

    unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
                             HASH_FIND, NULL);

    if (unresolved != NULL)
    {
        /* Need to free the contained tuple as well as the hashtable entry */
        heap_freetuple(unresolved->tuple);
        hash_search(state->rs_unresolved_tups, &hashkey,
                    HASH_REMOVE, &found);
        Assert(found);
        return true;
    }

    return false;
}

void rewrite_heap_tuple ( RewriteState  state,
HeapTuple  old_tuple,
HeapTuple  new_tuple 
)

Definition at line 319 of file rewriteheap.c.

References Assert, HASH_ENTER, HASH_FIND, HASH_REMOVE, hash_search(), heap_copytuple(), heap_freetuple(), heap_freeze_tuple(), HEAP_UPDATED, HEAP_XMAX_INVALID, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsOnlyLocked(), ItemPointerEquals(), ItemPointerSetInvalid, MemoryContextSwitchTo(), OldToNewMappingData::new_tid, NULL, UnresolvedTupData::old_tid, raw_heap_insert(), RewriteStateData::rs_cxt, RewriteStateData::rs_freeze_multi, RewriteStateData::rs_freeze_xid, RewriteStateData::rs_old_new_tid_map, RewriteStateData::rs_oldest_xmin, RewriteStateData::rs_unresolved_tups, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_self, TidHashKey::tid, TransactionIdPrecedes(), UnresolvedTupData::tuple, and TidHashKey::xmin.

Referenced by reform_and_rewrite_tuple().

{
    MemoryContext old_cxt;
    ItemPointerData old_tid;
    TidHashKey  hashkey;
    bool        found;
    bool        free_new;

    old_cxt = MemoryContextSwitchTo(state->rs_cxt);

    /*
     * Copy the original tuple's visibility information into new_tuple.
     *
     * XXX we might later need to copy some t_infomask2 bits, too? Right now,
     * we intentionally clear the HOT status bits.
     */
    memcpy(&new_tuple->t_data->t_choice.t_heap,
           &old_tuple->t_data->t_choice.t_heap,
           sizeof(HeapTupleFields));

    new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
    new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
    new_tuple->t_data->t_infomask |=
        old_tuple->t_data->t_infomask & HEAP_XACT_MASK;

    /*
     * While we have our hands on the tuple, we may as well freeze any
     * very-old xmin or xmax, so that future VACUUM effort can be saved.
     */
    heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid,
                      state->rs_freeze_multi);

    /*
     * Invalid ctid means that ctid should point to the tuple itself. We'll
     * override it later if the tuple is part of an update chain.
     */
    ItemPointerSetInvalid(&new_tuple->t_data->t_ctid);

    /*
     * If the tuple has been updated, check the old-to-new mapping hash table.
     */
    if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
          HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
        !(ItemPointerEquals(&(old_tuple->t_self),
                            &(old_tuple->t_data->t_ctid))))
    {
        OldToNewMapping mapping;

        memset(&hashkey, 0, sizeof(hashkey));
        hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data);
        hashkey.tid = old_tuple->t_data->t_ctid;

        mapping = (OldToNewMapping)
            hash_search(state->rs_old_new_tid_map, &hashkey,
                        HASH_FIND, NULL);

        if (mapping != NULL)
        {
            /*
             * We've already copied the tuple that t_ctid points to, so we can
             * set the ctid of this tuple to point to the new location, and
             * insert it right away.
             */
            new_tuple->t_data->t_ctid = mapping->new_tid;

            /* We don't need the mapping entry anymore */
            hash_search(state->rs_old_new_tid_map, &hashkey,
                        HASH_REMOVE, &found);
            Assert(found);
        }
        else
        {
            /*
             * We haven't seen the tuple t_ctid points to yet. Stash this
             * tuple into unresolved_tups to be written later.
             */
            UnresolvedTup unresolved;

            unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
                                     HASH_ENTER, &found);
            Assert(!found);

            unresolved->old_tid = old_tuple->t_self;
            unresolved->tuple = heap_copytuple(new_tuple);

            /*
             * We can't do anything more now, since we don't know where the
             * tuple will be written.
             */
            MemoryContextSwitchTo(old_cxt);
            return;
        }
    }

    /*
     * Now we will write the tuple, and then check to see if it is the B tuple
     * in any new or known pair.  When we resolve a known pair, we will be
     * able to write that pair's A tuple, and then we have to check if it
     * resolves some other pair.  Hence, we need a loop here.
     */
    old_tid = old_tuple->t_self;
    free_new = false;

    for (;;)
    {
        ItemPointerData new_tid;

        /* Insert the tuple and find out where it's put in new_heap */
        raw_heap_insert(state, new_tuple);
        new_tid = new_tuple->t_self;

        /*
         * If the tuple is the updated version of a row, and the prior version
         * wouldn't be DEAD yet, then we need to either resolve the prior
         * version (if it's waiting in rs_unresolved_tups), or make an entry
         * in rs_old_new_tid_map (so we can resolve it when we do see it). The
         * previous tuple's xmax would equal this one's xmin, so it's
         * RECENTLY_DEAD if and only if the xmin is not before OldestXmin.
         */
        if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) &&
            !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data),
                                   state->rs_oldest_xmin))
        {
            /*
             * Okay, this is B in an update pair.  See if we've seen A.
             */
            UnresolvedTup unresolved;

            memset(&hashkey, 0, sizeof(hashkey));
            hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);
            hashkey.tid = old_tid;

            unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
                                     HASH_FIND, NULL);

            if (unresolved != NULL)
            {
                /*
                 * We have seen and memorized the previous tuple already. Now
                 * that we know where we inserted the tuple its t_ctid points
                 * to, fix its t_ctid and insert it to the new heap.
                 */
                if (free_new)
                    heap_freetuple(new_tuple);
                new_tuple = unresolved->tuple;
                free_new = true;
                old_tid = unresolved->old_tid;
                new_tuple->t_data->t_ctid = new_tid;

                /*
                 * We don't need the hash entry anymore, but don't free its
                 * tuple just yet.
                 */
                hash_search(state->rs_unresolved_tups, &hashkey,
                            HASH_REMOVE, &found);
                Assert(found);

                /* loop back to insert the previous tuple in the chain */
                continue;
            }
            else
            {
                /*
                 * Remember the new tid of this tuple. We'll use it to set the
                 * ctid when we find the previous tuple in the chain.
                 */
                OldToNewMapping mapping;

                mapping = hash_search(state->rs_old_new_tid_map, &hashkey,
                                      HASH_ENTER, &found);
                Assert(!found);

                mapping->new_tid = new_tid;
            }
        }

        /* Done with this (chain of) tuples, for now */
        if (free_new)
            heap_freetuple(new_tuple);
        break;
    }

    MemoryContextSwitchTo(old_cxt);
}