Header And Logo

PostgreSQL
| The world's most advanced open source database.

Defines | Typedefs | Enumerations | Functions | Variables

bufmgr.h File Reference

#include "storage/block.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
#include "storage/relfilenode.h"
#include "utils/relcache.h"
Include dependency graph for bufmgr.h:

Go to the source code of this file.

Defines

#define P_NEW   InvalidBlockNumber
#define BUFFER_LOCK_UNLOCK   0
#define BUFFER_LOCK_SHARE   1
#define BUFFER_LOCK_EXCLUSIVE   2
#define BufferIsValid(bufnum)
#define BufferIsPinned(bufnum)
#define BufferGetBlock(buffer)
#define BufferGetPageSize(buffer)
#define BufferGetPage(buffer)   ((Page)BufferGetBlock(buffer))
#define RelationGetNumberOfBlocks(reln)   RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)

Typedefs

typedef void * Block
typedef enum
BufferAccessStrategyType 
BufferAccessStrategyType

Enumerations

enum  BufferAccessStrategyType { BAS_NORMAL, BAS_BULKREAD, BAS_BULKWRITE, BAS_VACUUM }
enum  ReadBufferMode { RBM_NORMAL, RBM_ZERO, RBM_ZERO_ON_ERROR }

Functions

void PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Buffer ReadBufferWithoutRelcache (RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
void ReleaseBuffer (Buffer buffer)
void UnlockReleaseBuffer (Buffer buffer)
void MarkBufferDirty (Buffer buffer)
void IncrBufferRefCount (Buffer buffer)
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
void InitBufferPool (void)
void InitBufferPoolAccess (void)
void InitBufferPoolBackend (void)
void AtEOXact_Buffers (bool isCommit)
void PrintBufferLeakWarning (Buffer buffer)
void CheckPointBuffers (int flags)
BlockNumber BufferGetBlockNumber (Buffer buffer)
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
void FlushRelationBuffers (Relation rel)
void FlushDatabaseBuffers (Oid dbid)
void DropRelFileNodeBuffers (RelFileNodeBackend rnode, ForkNumber forkNum, BlockNumber firstDelBlock)
void DropRelFileNodesAllBuffers (RelFileNodeBackend *rnodes, int nnodes)
void DropDatabaseBuffers (Oid dbid)
bool BufferIsPermanent (Buffer buffer)
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
Size BufferShmemSize (void)
void BufferGetTag (Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
void MarkBufferDirtyHint (Buffer buffer)
void UnlockBuffers (void)
void LockBuffer (Buffer buffer, int mode)
bool ConditionalLockBuffer (Buffer buffer)
void LockBufferForCleanup (Buffer buffer)
bool ConditionalLockBufferForCleanup (Buffer buffer)
bool HoldingBufferPinThatDelaysRecovery (void)
void AbortBufferIO (void)
void BufmgrCommit (void)
bool BgBufferSync (void)
void AtProcExit_LocalBuffers (void)
BufferAccessStrategy GetAccessStrategy (BufferAccessStrategyType btype)
void FreeAccessStrategy (BufferAccessStrategy strategy)

Variables

PGDLLIMPORT int NBuffers
bool zero_damaged_pages
int bgwriter_lru_maxpages
double bgwriter_lru_multiplier
bool track_io_timing
int target_prefetch_pages
PGDLLIMPORT char * BufferBlocks
PGDLLIMPORT int32PrivateRefCount
PGDLLIMPORT int NLocBuffer
PGDLLIMPORT BlockLocalBufferBlockPointers
PGDLLIMPORT int32LocalRefCount

Define Documentation

#define BUFFER_LOCK_EXCLUSIVE   2
#define BUFFER_LOCK_SHARE   1
#define BUFFER_LOCK_UNLOCK   0
#define BufferGetBlock (   buffer  ) 
Value:
( \
    AssertMacro(BufferIsValid(buffer)), \
    BufferIsLocal(buffer) ? \
        LocalBufferBlockPointers[-(buffer) - 1] \
    : \
        (Block) (BufferBlocks + ((Size) ((buffer) - 1)) * BLCKSZ) \
)

Definition at line 127 of file bufmgr.h.

Referenced by XLogInsert(), and XLogSaveBufferForHint().

#define BufferGetPage (   buffer  )     ((Page)BufferGetBlock(buffer))

Definition at line 157 of file bufmgr.h.

Referenced by _bt_binsrch(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_endpoint(), _bt_findinsertloc(), _bt_get_endpoint(), _bt_getbuf(), _bt_getroot(), _bt_getrootheight(), _bt_getstackbuf(), _bt_gettrueroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_killitems(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_parent_deletion_safe(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_split(), _bt_steppage(), _bt_vacuum_one_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_expandtable(), _hash_first(), _hash_freeovflpage(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_getovflpage(), _hash_initbitmap(), _hash_metapinit(), _hash_next(), _hash_pgaddtup(), _hash_readnext(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_step(), acquire_sample_rows(), allocNewBuffer(), AlterSequence(), bitgetpage(), BitmapHeapNext(), bt_metap(), bt_page_items(), btree_xlog_cleanup(), btree_xlog_delete(), btree_xlog_delete_get_latestRemovedXid(), btree_xlog_delete_page(), btree_xlog_insert(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_vacuum(), btvacuumpage(), BufferGetLSNAtomic(), collectMatchBitmap(), collectMatchesForHeapRow(), count_nondeletable_pages(), createPostingTree(), dataIsEnoughSpace(), dataLocateItem(), dataLocateLeafItem(), dataPlaceToPage(), dataSplitPage(), do_setval(), doPickSplit(), entryGetNextItem(), entryIsEnoughSpace(), entryLocateEntry(), entryLocateLeafEntry(), entryPlaceToPage(), entrySplitPage(), fill_seq_with_data(), FreeSpaceMapTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetBTPageStatistics(), GetRecordedFreeSpace(), GetTupleForTrigger(), GetVisibilityMapPins(), ginbuild(), ginbulkdelete(), ginContinueSplit(), ginDataFillRoot(), ginDeletePage(), ginEntryFillRoot(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), GinInitBuffer(), GinInitMetabuffer(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginPageGetLinkItup(), ginRedoCreateIndex(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumPage(), ginScanToDelete(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumEntryPage(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuild(), gistbulkdelete(), gistcheckpage(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), GISTInitBuffer(), gistMemorizeAllDownlinks(), gistNewBuffer(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoCreateIndex(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuumcleanup(), hashbulkdelete(), hashgetbitmap(), hashgettuple(), heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_is_all_visible(), heap_page_prune(), heap_page_prune_execute(), heap_page_prune_opt(), heap_prune_chain(), heap_update(), heap_xlog_clean(), heap_xlog_delete(), heap_xlog_freeze(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_newpage(), heap_xlog_update(), heap_xlog_visible(), heapgetpage(), heapgettup(), heapgettup_pagemode(), IndexBuildHeapScan(), lazy_check_needs_freeze(), lazy_scan_heap(), lazy_vacuum_heap(), lazy_vacuum_page(), log_heap_update(), log_newpage_buffer(), MarkBufferDirtyHint(), moveLeafs(), moveRightIfItNeeded(), nextval_internal(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstatginindex(), pgstatindex(), read_seq_tuple(), RelationGetBufferForTuple(), RelationPutHeapTuple(), RestoreBackupBlockContents(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistInitBuffer(), SpGistNewBuffer(), SpGistSetLastUsedPage(), SpGistUpdateMetaPage(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoCreateIndex(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), startScanEntry(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), validate_index_heapscan(), visibilitymap_clear(), visibilitymap_count(), visibilitymap_set(), visibilitymap_test(), visibilitymap_truncate(), vm_readbuf(), writeListPage(), XLogCheckBuffer(), XLogReadBufferExtended(), XLogRecordPageWithFreeSpace(), and xlogVacuumPage().

#define BufferGetPageSize (   buffer  ) 
#define BufferIsPinned (   bufnum  ) 
Value:
( \
    !BufferIsValid(bufnum) ? \
        false \
    : \
        BufferIsLocal(bufnum) ? \
            (LocalRefCount[-(bufnum) - 1] > 0) \
        : \
            (PrivateRefCount[(bufnum) - 1] > 0) \
)

Definition at line 109 of file bufmgr.h.

Referenced by BufferGetBlockNumber(), BufferGetLSNAtomic(), BufferGetTag(), BufferIsPermanent(), and IncrBufferRefCount().

#define BufferIsValid (   bufnum  ) 
Value:
( \
    AssertMacro((bufnum) <= NBuffers && (bufnum) >= -NLocBuffer), \
    (bufnum) != InvalidBuffer  \
)

Definition at line 96 of file bufmgr.h.

Referenced by _bt_endpoint(), _bt_first(), _bt_get_endpoint(), _bt_insertonpg(), _bt_killitems(), _bt_next(), _bt_pagedel(), _bt_readpage(), _bt_relandgetbuf(), _bt_restore_meta(), _bt_search(), _bt_steppage(), _hash_next(), _hash_step(), btree_xlog_cleanup(), btree_xlog_delete(), btree_xlog_delete_get_latestRemovedXid(), btree_xlog_delete_page(), btree_xlog_insert(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_vacuum(), BufferGetLSNAtomic(), BufferIsPermanent(), CheckForSerializableConflictIn(), ConditionalLockBuffer(), ConditionalLockBufferForCleanup(), entryGetItem(), ExecClearTuple(), ExecDelete(), ExecMaterializeSlot(), ExecStoreMinimalTuple(), ExecStoreTuple(), FreeSpaceMapTruncateRel(), fsm_search(), fsm_vacuum_page(), GetRecordedFreeSpace(), GetVisibilityMapPins(), ginContinueSplit(), ginRedoCreateIndex(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumPage(), gistinserttuples(), gistplacetopage(), gistRedoClearFollowRight(), gistRedoCreateIndex(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistXLogSplit(), gistXLogUpdate(), hashendscan(), hashgettuple(), hashrescan(), heap_endscan(), heap_rescan(), heap_restrpos(), heap_update(), heap_xlog_clean(), heap_xlog_delete(), heap_xlog_freeze(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_newpage(), heap_xlog_update(), heap_xlog_visible(), heapgetpage(), heapgettup(), heapgettup_pagemode(), index_endscan(), index_getnext(), index_getnext_tid(), index_rescan(), lazy_scan_heap(), lazy_vacuum_heap(), lazy_vacuum_page(), LockBuffer(), LockBufferForCleanup(), log_heap_visible(), MarkBufferDirty(), MarkBufferDirtyHint(), PrintBufferLeakWarning(), ReleaseAndReadBuffer(), ReleaseBuffer(), RestoreBackupBlockContents(), seq_redo(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoCreateIndex(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), systable_recheck_tuple(), visibilitymap_clear(), visibilitymap_count(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), visibilitymap_test(), visibilitymap_truncate(), and XLogReadBuffer().

#define P_NEW   InvalidBlockNumber
#define RelationGetNumberOfBlocks (   reln  )     RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)

Typedef Documentation

typedef void* Block

Definition at line 23 of file bufmgr.h.


Enumeration Type Documentation

Enumerator:
BAS_NORMAL 
BAS_BULKREAD 
BAS_BULKWRITE 
BAS_VACUUM 

Definition at line 26 of file bufmgr.h.

{
    BAS_NORMAL,                 /* Normal random access */
    BAS_BULKREAD,               /* Large read-only scan (hint bit updates are
                                 * ok) */
    BAS_BULKWRITE,              /* Large multi-block write (e.g. COPY IN) */
    BAS_VACUUM                  /* VACUUM */
} BufferAccessStrategyType;

Enumerator:
RBM_NORMAL 
RBM_ZERO 
RBM_ZERO_ON_ERROR 

Definition at line 36 of file bufmgr.h.

{
    RBM_NORMAL,                 /* Normal read */
    RBM_ZERO,                   /* Don't read from disk, caller will
                                 * initialize */
    RBM_ZERO_ON_ERROR           /* Read, but return an all-zeros page on error */
} ReadBufferMode;


Function Documentation

void AbortBufferIO ( void   ) 

Definition at line 3098 of file bufmgr.c.

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_VALID, ereport, errcode(), errdetail(), errmsg(), sbufdesc::flags, buftag::forkNum, sbufdesc::io_in_progress_lock, IsForInput, LockBufHdr, LW_EXCLUSIVE, LWLockAcquire(), pfree(), relpathperm, buftag::rnode, sbufdesc::tag, TerminateBufferIO(), UnlockBufHdr, and WARNING.

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

{
    volatile BufferDesc *buf = InProgressBuf;

    if (buf)
    {
        /*
         * Since LWLockReleaseAll has already been called, we're not holding
         * the buffer's io_in_progress_lock. We have to re-acquire it so that
         * we can use TerminateBufferIO. Anyone who's executing WaitIO on the
         * buffer will be in a busy spin until we succeed in doing this.
         */
        LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);

        LockBufHdr(buf);
        Assert(buf->flags & BM_IO_IN_PROGRESS);
        if (IsForInput)
        {
            Assert(!(buf->flags & BM_DIRTY));
            /* We'd better not think buffer is valid yet */
            Assert(!(buf->flags & BM_VALID));
            UnlockBufHdr(buf);
        }
        else
        {
            BufFlags    sv_flags;

            sv_flags = buf->flags;
            Assert(sv_flags & BM_DIRTY);
            UnlockBufHdr(buf);
            /* Issue notice if this is not the first failure... */
            if (sv_flags & BM_IO_ERROR)
            {
                /* Buffer is pinned, so we can read tag without spinlock */
                char       *path;

                path = relpathperm(buf->tag.rnode, buf->tag.forkNum);
                ereport(WARNING,
                        (errcode(ERRCODE_IO_ERROR),
                         errmsg("could not write block %u of %s",
                                buf->tag.blockNum, path),
                         errdetail("Multiple failures --- write error might be permanent.")));
                pfree(path);
            }
        }
        TerminateBufferIO(buf, false, BM_IO_ERROR);
    }
}

void AtEOXact_Buffers ( bool  isCommit  ) 

Definition at line 1708 of file bufmgr.c.

References Assert, assert_enabled, AtEOXact_LocalBuffers(), NBuffers, PrintBufferLeakWarning(), and PrivateRefCount.

Referenced by AbortTransaction(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

{
#ifdef USE_ASSERT_CHECKING
    if (assert_enabled)
    {
        int         RefCountErrors = 0;
        Buffer      b;

        for (b = 1; b <= NBuffers; b++)
        {
            if (PrivateRefCount[b - 1] != 0)
            {
                PrintBufferLeakWarning(b);
                RefCountErrors++;
            }
        }
        Assert(RefCountErrors == 0);
    }
#endif

    AtEOXact_LocalBuffers(isCommit);
}

void AtProcExit_LocalBuffers ( void   ) 

Definition at line 532 of file localbuf.c.

References Assert, assert_enabled, i, LocalRefCount, NLocBuffer, and PrintBufferLeakWarning().

Referenced by AtProcExit_Buffers().

{
#ifdef USE_ASSERT_CHECKING
    if (assert_enabled && LocalRefCount)
    {
        int         RefCountErrors = 0;
        int         i;

        for (i = 0; i < NLocBuffer; i++)
        {
            if (LocalRefCount[i] != 0)
            {
                Buffer  b = -i - 1;

                PrintBufferLeakWarning(b);
                RefCountErrors++;
            }
        }
        Assert(RefCountErrors == 0);
    }
#endif
}

bool BgBufferSync ( void   ) 

Definition at line 1350 of file bufmgr.c.

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, BgWriterStats, BUF_REUSABLE, BUF_WRITTEN, CurrentResourceOwner, DEBUG1, DEBUG2, elog, PgStat_MsgBgWriter::m_buf_alloc, PgStat_MsgBgWriter::m_buf_written_clean, PgStat_MsgBgWriter::m_maxwritten_clean, NBuffers, ResourceOwnerEnlargeBuffers(), StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

{
    /* info obtained from freelist.c */
    int         strategy_buf_id;
    uint32      strategy_passes;
    uint32      recent_alloc;

    /*
     * Information saved between calls so we can determine the strategy
     * point's advance rate and avoid scanning already-cleaned buffers.
     */
    static bool saved_info_valid = false;
    static int  prev_strategy_buf_id;
    static uint32 prev_strategy_passes;
    static int  next_to_clean;
    static uint32 next_passes;

    /* Moving averages of allocation rate and clean-buffer density */
    static float smoothed_alloc = 0;
    static float smoothed_density = 10.0;

    /* Potentially these could be tunables, but for now, not */
    float       smoothing_samples = 16;
    float       scan_whole_pool_milliseconds = 120000.0;

    /* Used to compute how far we scan ahead */
    long        strategy_delta;
    int         bufs_to_lap;
    int         bufs_ahead;
    float       scans_per_alloc;
    int         reusable_buffers_est;
    int         upcoming_alloc_est;
    int         min_scan_buffers;

    /* Variables for the scanning loop proper */
    int         num_to_scan;
    int         num_written;
    int         reusable_buffers;

    /* Variables for final smoothed_density update */
    long        new_strategy_delta;
    uint32      new_recent_alloc;

    /*
     * Find out where the freelist clock sweep currently is, and how many
     * buffer allocations have happened since our last call.
     */
    strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);

    /* Report buffer alloc counts to pgstat */
    BgWriterStats.m_buf_alloc += recent_alloc;

    /*
     * If we're not running the LRU scan, just stop after doing the stats
     * stuff.  We mark the saved state invalid so that we can recover sanely
     * if LRU scan is turned back on later.
     */
    if (bgwriter_lru_maxpages <= 0)
    {
        saved_info_valid = false;
        return true;
    }

    /*
     * Compute strategy_delta = how many buffers have been scanned by the
     * clock sweep since last time.  If first time through, assume none. Then
     * see if we are still ahead of the clock sweep, and if so, how many
     * buffers we could scan before we'd catch up with it and "lap" it. Note:
     * weird-looking coding of xxx_passes comparisons are to avoid bogus
     * behavior when the passes counts wrap around.
     */
    if (saved_info_valid)
    {
        int32       passes_delta = strategy_passes - prev_strategy_passes;

        strategy_delta = strategy_buf_id - prev_strategy_buf_id;
        strategy_delta += (long) passes_delta *NBuffers;

        Assert(strategy_delta >= 0);

        if ((int32) (next_passes - strategy_passes) > 0)
        {
            /* we're one pass ahead of the strategy point */
            bufs_to_lap = strategy_buf_id - next_to_clean;
#ifdef BGW_DEBUG
            elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
                 next_passes, next_to_clean,
                 strategy_passes, strategy_buf_id,
                 strategy_delta, bufs_to_lap);
#endif
        }
        else if (next_passes == strategy_passes &&
                 next_to_clean >= strategy_buf_id)
        {
            /* on same pass, but ahead or at least not behind */
            bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
#ifdef BGW_DEBUG
            elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
                 next_passes, next_to_clean,
                 strategy_passes, strategy_buf_id,
                 strategy_delta, bufs_to_lap);
#endif
        }
        else
        {
            /*
             * We're behind, so skip forward to the strategy point and start
             * cleaning from there.
             */
#ifdef BGW_DEBUG
            elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
                 next_passes, next_to_clean,
                 strategy_passes, strategy_buf_id,
                 strategy_delta);
#endif
            next_to_clean = strategy_buf_id;
            next_passes = strategy_passes;
            bufs_to_lap = NBuffers;
        }
    }
    else
    {
        /*
         * Initializing at startup or after LRU scanning had been off. Always
         * start at the strategy point.
         */
#ifdef BGW_DEBUG
        elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
             strategy_passes, strategy_buf_id);
#endif
        strategy_delta = 0;
        next_to_clean = strategy_buf_id;
        next_passes = strategy_passes;
        bufs_to_lap = NBuffers;
    }

    /* Update saved info for next time */
    prev_strategy_buf_id = strategy_buf_id;
    prev_strategy_passes = strategy_passes;
    saved_info_valid = true;

    /*
     * Compute how many buffers had to be scanned for each new allocation, ie,
     * 1/density of reusable buffers, and track a moving average of that.
     *
     * If the strategy point didn't move, we don't update the density estimate
     */
    if (strategy_delta > 0 && recent_alloc > 0)
    {
        scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
        smoothed_density += (scans_per_alloc - smoothed_density) /
            smoothing_samples;
    }

    /*
     * Estimate how many reusable buffers there are between the current
     * strategy point and where we've scanned ahead to, based on the smoothed
     * density estimate.
     */
    bufs_ahead = NBuffers - bufs_to_lap;
    reusable_buffers_est = (float) bufs_ahead / smoothed_density;

    /*
     * Track a moving average of recent buffer allocations.  Here, rather than
     * a true average we want a fast-attack, slow-decline behavior: we
     * immediately follow any increase.
     */
    if (smoothed_alloc <= (float) recent_alloc)
        smoothed_alloc = recent_alloc;
    else
        smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
            smoothing_samples;

    /* Scale the estimate by a GUC to allow more aggressive tuning. */
    upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);

    /*
     * If recent_alloc remains at zero for many cycles, smoothed_alloc will
     * eventually underflow to zero, and the underflows produce annoying
     * kernel warnings on some platforms.  Once upcoming_alloc_est has gone to
     * zero, there's no point in tracking smaller and smaller values of
     * smoothed_alloc, so just reset it to exactly zero to avoid this
     * syndrome.  It will pop back up as soon as recent_alloc increases.
     */
    if (upcoming_alloc_est == 0)
        smoothed_alloc = 0;

    /*
     * Even in cases where there's been little or no buffer allocation
     * activity, we want to make a small amount of progress through the buffer
     * cache so that as many reusable buffers as possible are clean after an
     * idle period.
     *
     * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
     * the BGW will be called during the scan_whole_pool time; slice the
     * buffer pool into that many sections.
     */
    min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));

    if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
    {
#ifdef BGW_DEBUG
        elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
             upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
#endif
        upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
    }

    /*
     * Now write out dirty reusable buffers, working forward from the
     * next_to_clean point, until we have lapped the strategy scan, or cleaned
     * enough buffers to match our estimate of the next cycle's allocation
     * requirements, or hit the bgwriter_lru_maxpages limit.
     */

    /* Make sure we can handle the pin inside SyncOneBuffer */
    ResourceOwnerEnlargeBuffers(CurrentResourceOwner);

    num_to_scan = bufs_to_lap;
    num_written = 0;
    reusable_buffers = reusable_buffers_est;

    /* Execute the LRU scan */
    while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
    {
        int         buffer_state = SyncOneBuffer(next_to_clean, true);

        if (++next_to_clean >= NBuffers)
        {
            next_to_clean = 0;
            next_passes++;
        }
        num_to_scan--;

        if (buffer_state & BUF_WRITTEN)
        {
            reusable_buffers++;
            if (++num_written >= bgwriter_lru_maxpages)
            {
                BgWriterStats.m_maxwritten_clean++;
                break;
            }
        }
        else if (buffer_state & BUF_REUSABLE)
            reusable_buffers++;
    }

    BgWriterStats.m_buf_written_clean += num_written;

#ifdef BGW_DEBUG
    elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
         recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
         smoothed_density, reusable_buffers_est, upcoming_alloc_est,
         bufs_to_lap - num_to_scan,
         num_written,
         reusable_buffers - reusable_buffers_est);
#endif

    /*
     * Consider the above scan as being like a new allocation scan.
     * Characterize its density and update the smoothed one based on it. This
     * effectively halves the moving average period in cases where both the
     * strategy and the background writer are doing some useful scanning,
     * which is helpful because a long memory isn't as desirable on the
     * density estimates.
     */
    new_strategy_delta = bufs_to_lap - num_to_scan;
    new_recent_alloc = reusable_buffers - reusable_buffers_est;
    if (new_strategy_delta > 0 && new_recent_alloc > 0)
    {
        scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
        smoothed_density += (scans_per_alloc - smoothed_density) /
            smoothing_samples;

#ifdef BGW_DEBUG
        elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
             new_recent_alloc, new_strategy_delta,
             scans_per_alloc, smoothed_density);
#endif
    }

    /* Return true if OK to hibernate */
    return (bufs_to_lap == 0 && recent_alloc == 0);
}

BlockNumber BufferGetBlockNumber ( Buffer  buffer  ) 

Definition at line 1854 of file bufmgr.c.

References Assert, buftag::blockNum, BufferDescriptors, BufferIsLocal, BufferIsPinned, LocalBufferDescriptors, and sbufdesc::tag.

Referenced by _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_endpoint(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_newroot(), _bt_pagedel(), _bt_search(), _bt_split(), _bt_steppage(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_step(), allocNewBuffer(), btvacuumpage(), CheckForSerializableConflictIn(), createPostingTree(), dataPlaceToPage(), dataSplitPage(), doPickSplit(), entryPlaceToPage(), entrySplitPage(), fill_seq_with_data(), ginDataFillRoot(), ginFindParents(), ginInsertValue(), ginPageGetLinkItup(), ginRedoSplit(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistformdownlink(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistXLogSplit(), gistXLogUpdate(), heap_delete(), heap_hot_search_buffer(), heap_multi_insert(), heap_page_is_all_visible(), heap_prune_chain(), heap_update(), index_getnext(), log_heap_clean(), log_heap_freeze(), log_heap_visible(), makeSublist(), moveLeafs(), ReadBufferBI(), RelationGetBufferForTuple(), RelationPutHeapTuple(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgWalk(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), visibilitymap_test(), writeListPage(), XLogReadBufferExtended(), and xlogVacuumPage().

{
    volatile BufferDesc *bufHdr;

    Assert(BufferIsPinned(buffer));

    if (BufferIsLocal(buffer))
        bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
    else
        bufHdr = &BufferDescriptors[buffer - 1];

    /* pinned, so OK to read tag without spinlock */
    return bufHdr->tag.blockNum;
}

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer  ) 

Definition at line 2076 of file bufmgr.c.

References Assert, BufferDescriptors, BufferGetPage, BufferIsLocal, BufferIsPinned, BufferIsValid, DataChecksumsEnabled(), LockBufHdr, PageGetLSN, and UnlockBufHdr.

Referenced by gistScanPage(), and XLogCheckBuffer().

{
    volatile BufferDesc *bufHdr = &BufferDescriptors[buffer - 1];
    char                *page = BufferGetPage(buffer);
    XLogRecPtr           lsn;

    /*
     * If we don't need locking for correctness, fastpath out.
     */
    if (!DataChecksumsEnabled() || BufferIsLocal(buffer))
        return PageGetLSN(page);

    /* Make sure we've got a real buffer, and that we hold a pin on it. */
    Assert(BufferIsValid(buffer));
    Assert(BufferIsPinned(buffer));

    LockBufHdr(bufHdr);
    lsn = PageGetLSN(page);
    UnlockBufHdr(bufHdr);

    return lsn;
}

void BufferGetTag ( Buffer  buffer,
RelFileNode rnode,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 1875 of file bufmgr.c.

References Assert, buftag::blockNum, BufferDescriptors, BufferIsLocal, BufferIsPinned, buftag::forkNum, LocalBufferDescriptors, buftag::rnode, and sbufdesc::tag.

Referenced by fsm_search_avail(), log_newpage_buffer(), and XLogCheckBuffer().

{
    volatile BufferDesc *bufHdr;

    /* Do the same checks as BufferGetBlockNumber. */
    Assert(BufferIsPinned(buffer));

    if (BufferIsLocal(buffer))
        bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
    else
        bufHdr = &BufferDescriptors[buffer - 1];

    /* pinned, so OK to read tag without spinlock */
    *rnode = bufHdr->tag.rnode;
    *forknum = bufHdr->tag.forkNum;
    *blknum = bufHdr->tag.blockNum;
}

bool BufferIsPermanent ( Buffer  buffer  ) 

Definition at line 2046 of file bufmgr.c.

References Assert, BM_PERMANENT, BufferDescriptors, BufferIsLocal, BufferIsPinned, BufferIsValid, and sbufdesc::flags.

Referenced by SetHintBits().

{
    volatile BufferDesc *bufHdr;

    /* Local buffers are used only for temp relations. */
    if (BufferIsLocal(buffer))
        return false;

    /* Make sure we've got a real buffer, and that we hold a pin on it. */
    Assert(BufferIsValid(buffer));
    Assert(BufferIsPinned(buffer));

    /*
     * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
     * need not bother with the buffer header spinlock.  Even if someone else
     * changes the buffer header flags while we're doing this, we assume that
     * changing an aligned 2-byte BufFlags value is atomic, so we'll read the
     * old value or the new value, but not random garbage.
     */
    bufHdr = &BufferDescriptors[buffer - 1];
    return (bufHdr->flags & BM_PERMANENT) != 0;
}

Size BufferShmemSize ( void   ) 

Definition at line 164 of file buf_init.c.

References add_size(), mul_size(), NBuffers, and StrategyShmemSize().

Referenced by CreateSharedMemoryAndSemaphores().

{
    Size        size = 0;

    /* size of buffer descriptors */
    size = add_size(size, mul_size(NBuffers, sizeof(BufferDesc)));

    /* size of data pages */
    size = add_size(size, mul_size(NBuffers, BLCKSZ));

    /* size of stuff controlled by freelist.c */
    size = add_size(size, StrategyShmemSize());

    return size;
}

void BufmgrCommit ( void   ) 

Definition at line 1840 of file bufmgr.c.

Referenced by PrepareTransaction(), and RecordTransactionCommit().

{
    /* Nothing to do in bufmgr anymore... */
}

void CheckPointBuffers ( int  flags  ) 

Definition at line 1823 of file bufmgr.c.

References BufferSync(), CheckpointStats, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and smgrsync().

Referenced by CheckPointGuts().

{
    TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
    CheckpointStats.ckpt_write_t = GetCurrentTimestamp();
    BufferSync(flags);
    CheckpointStats.ckpt_sync_t = GetCurrentTimestamp();
    TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
    smgrsync();
    CheckpointStats.ckpt_sync_end_t = GetCurrentTimestamp();
    TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
}

bool ConditionalLockBuffer ( Buffer  buffer  ) 
bool ConditionalLockBufferForCleanup ( Buffer  buffer  ) 

Definition at line 2900 of file bufmgr.c.

References Assert, BUFFER_LOCK_UNLOCK, BufferDescriptors, BufferIsLocal, BufferIsValid, ConditionalLockBuffer(), LocalRefCount, LockBuffer(), LockBufHdr, PrivateRefCount, sbufdesc::refcount, and UnlockBufHdr.

Referenced by heap_page_prune_opt(), lazy_scan_heap(), and lazy_vacuum_heap().

{
    volatile BufferDesc *bufHdr;

    Assert(BufferIsValid(buffer));

    if (BufferIsLocal(buffer))
    {
        /* There should be exactly one pin */
        Assert(LocalRefCount[-buffer - 1] > 0);
        if (LocalRefCount[-buffer - 1] != 1)
            return false;
        /* Nobody else to wait for */
        return true;
    }

    /* There should be exactly one local pin */
    Assert(PrivateRefCount[buffer - 1] > 0);
    if (PrivateRefCount[buffer - 1] != 1)
        return false;

    /* Try to acquire lock */
    if (!ConditionalLockBuffer(buffer))
        return false;

    bufHdr = &BufferDescriptors[buffer - 1];
    LockBufHdr(bufHdr);
    Assert(bufHdr->refcount > 0);
    if (bufHdr->refcount == 1)
    {
        /* Successfully acquired exclusive lock with pincount 1 */
        UnlockBufHdr(bufHdr);
        return true;
    }

    /* Failed, so release the lock */
    UnlockBufHdr(bufHdr);
    LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    return false;
}

void DropDatabaseBuffers ( Oid  dbid  ) 

Definition at line 2284 of file bufmgr.c.

References BufferDescriptors, RelFileNode::dbNode, i, InvalidateBuffer(), LockBufHdr, NBuffers, buftag::rnode, sbufdesc::tag, and UnlockBufHdr.

Referenced by dbase_redo(), and dropdb().

{
    int         i;

    /*
     * We needn't consider local buffers, since by assumption the target
     * database isn't our own.
     */

    for (i = 0; i < NBuffers; i++)
    {
        volatile BufferDesc *bufHdr = &BufferDescriptors[i];

        /*
         * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
         * and saves some cycles.
         */
        if (bufHdr->tag.rnode.dbNode != dbid)
            continue;

        LockBufHdr(bufHdr);
        if (bufHdr->tag.rnode.dbNode == dbid)
            InvalidateBuffer(bufHdr);   /* releases spinlock */
        else
            UnlockBufHdr(bufHdr);
    }
}

void DropRelFileNodeBuffers ( RelFileNodeBackend  rnode,
ForkNumber  forkNum,
BlockNumber  firstDelBlock 
)

Definition at line 2126 of file bufmgr.c.

References RelFileNodeBackend::backend, buftag::blockNum, BufferDescriptors, DropRelFileNodeLocalBuffers(), buftag::forkNum, i, InvalidateBuffer(), LockBufHdr, MyBackendId, NBuffers, RelFileNodeBackend::node, RelFileNodeBackendIsTemp, RelFileNodeEquals, buftag::rnode, sbufdesc::tag, and UnlockBufHdr.

Referenced by smgrdounlinkfork(), and smgrtruncate().

{
    int         i;

    /* If it's a local relation, it's localbuf.c's problem. */
    if (RelFileNodeBackendIsTemp(rnode))
    {
        if (rnode.backend == MyBackendId)
            DropRelFileNodeLocalBuffers(rnode.node, forkNum, firstDelBlock);
        return;
    }

    for (i = 0; i < NBuffers; i++)
    {
        volatile BufferDesc *bufHdr = &BufferDescriptors[i];

        /*
         * We can make this a tad faster by prechecking the buffer tag before
         * we attempt to lock the buffer; this saves a lot of lock
         * acquisitions in typical cases.  It should be safe because the
         * caller must have AccessExclusiveLock on the relation, or some other
         * reason to be certain that no one is loading new pages of the rel
         * into the buffer pool.  (Otherwise we might well miss such pages
         * entirely.)  Therefore, while the tag might be changing while we
         * look at it, it can't be changing *to* a value we care about, only
         * *away* from such a value.  So false negatives are impossible, and
         * false positives are safe because we'll recheck after getting the
         * buffer lock.
         *
         * We could check forkNum and blockNum as well as the rnode, but the
         * incremental win from doing so seems small.
         */
        if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
            continue;

        LockBufHdr(bufHdr);
        if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
            bufHdr->tag.forkNum == forkNum &&
            bufHdr->tag.blockNum >= firstDelBlock)
            InvalidateBuffer(bufHdr);   /* releases spinlock */
        else
            UnlockBufHdr(bufHdr);
    }
}

void DropRelFileNodesAllBuffers ( RelFileNodeBackend rnodes,
int  nnodes 
)

Definition at line 2182 of file bufmgr.c.

References BufferDescriptors, DropRelFileNodeAllLocalBuffers(), i, InvalidateBuffer(), LockBufHdr, MyBackendId, NBuffers, RelFileNodeBackend::node, NULL, palloc(), pfree(), pg_qsort(), RelFileNodeBackendIsTemp, RelFileNodeEquals, buftag::rnode, rnode_comparator(), sbufdesc::tag, and UnlockBufHdr.

Referenced by smgrdounlink(), and smgrdounlinkall().

{
    int         i,
                n = 0;
    RelFileNode *nodes;
    bool        use_bsearch;

    if (nnodes == 0)
        return;

    nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */

    /* If it's a local relation, it's localbuf.c's problem. */
    for (i = 0; i < nnodes; i++)
    {
        if (RelFileNodeBackendIsTemp(rnodes[i]))
        {
            if (rnodes[i].backend == MyBackendId)
                DropRelFileNodeAllLocalBuffers(rnodes[i].node);
        }
        else
            nodes[n++] = rnodes[i].node;
    }

    /*
     * If there are no non-local relations, then we're done. Release the memory
     * and return.
     */
    if (n == 0)
    {
        pfree(nodes);
        return;
    }

    /*
     * For low number of relations to drop just use a simple walk through, to
     * save the bsearch overhead. The threshold to use is rather a guess than a
     * exactly determined value, as it depends on many factors (CPU and RAM
     * speeds, amount of shared buffers etc.).
     */
    use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD;

    /* sort the list of rnodes if necessary */
    if (use_bsearch)
        pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);

    for (i = 0; i < NBuffers; i++)
    {
        RelFileNode *rnode = NULL;
        volatile BufferDesc *bufHdr = &BufferDescriptors[i];

        /*
         * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
         * and saves some cycles.
         */

        if (!use_bsearch)
        {
            int     j;

            for (j = 0; j < n; j++)
            {
                if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
                {
                    rnode = &nodes[j];
                    break;
                }
            }
        }
        else
        {
            rnode = bsearch((const void *) &(bufHdr->tag.rnode),
                            nodes, n, sizeof(RelFileNode),
                            rnode_comparator);
        }

        /* buffer doesn't belong to any of the given relfilenodes; skip it */
        if (rnode == NULL)
            continue;

        LockBufHdr(bufHdr);
        if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
            InvalidateBuffer(bufHdr);   /* releases spinlock */
        else
            UnlockBufHdr(bufHdr);
    }

    pfree(nodes);
}

void FlushDatabaseBuffers ( Oid  dbid  ) 

Definition at line 2474 of file bufmgr.c.

References BM_DIRTY, BM_VALID, BufferDescriptors, sbufdesc::content_lock, CurrentResourceOwner, RelFileNode::dbNode, sbufdesc::flags, FlushBuffer(), LockBufHdr, LW_SHARED, LWLockAcquire(), LWLockRelease(), NULL, PinBuffer_Locked(), ResourceOwnerEnlargeBuffers(), buftag::rnode, sbufdesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by dbase_redo().

{
    int         i;
    volatile BufferDesc *bufHdr;

    /* Make sure we can handle the pin inside the loop */
    ResourceOwnerEnlargeBuffers(CurrentResourceOwner);

    for (i = 0; i < NBuffers; i++)
    {
        bufHdr = &BufferDescriptors[i];

        /*
         * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
         * and saves some cycles.
         */
        if (bufHdr->tag.rnode.dbNode != dbid)
            continue;

        LockBufHdr(bufHdr);
        if (bufHdr->tag.rnode.dbNode == dbid &&
            (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
        {
            PinBuffer_Locked(bufHdr);
            LWLockAcquire(bufHdr->content_lock, LW_SHARED);
            FlushBuffer(bufHdr, NULL);
            LWLockRelease(bufHdr->content_lock);
            UnpinBuffer(bufHdr, true);
        }
        else
            UnlockBufHdr(bufHdr);
    }
}

void FlushRelationBuffers ( Relation  rel  ) 

Definition at line 2384 of file bufmgr.c.

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_VALID, BufferDescriptors, ErrorContextCallback::callback, sbufdesc::content_lock, CurrentResourceOwner, error_context_stack, sbufdesc::flags, FlushBuffer(), buftag::forkNum, LocalBufferDescriptors, LocalBufHdrGetBlock, LockBufHdr, LW_SHARED, LWLockAcquire(), LWLockRelease(), NLocBuffer, PageSetChecksumInplace(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_node, RelationData::rd_smgr, RelationOpenSmgr, RelationUsesLocalBuffers, RelFileNodeEquals, ResourceOwnerEnlargeBuffers(), buftag::rnode, smgrwrite(), sbufdesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by ATExecSetTableSpace(), and heap_sync().

{
    int         i;
    volatile BufferDesc *bufHdr;

    /* Open rel at the smgr level if not already done */
    RelationOpenSmgr(rel);

    if (RelationUsesLocalBuffers(rel))
    {
        for (i = 0; i < NLocBuffer; i++)
        {
            bufHdr = &LocalBufferDescriptors[i];
            if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
                (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
            {
                ErrorContextCallback    errcallback;
                Page                    localpage;

                localpage = (char *) LocalBufHdrGetBlock(bufHdr);

                /* Setup error traceback support for ereport() */
                errcallback.callback = local_buffer_write_error_callback;
                errcallback.arg = (void *) bufHdr;
                errcallback.previous = error_context_stack;
                error_context_stack = &errcallback;

                PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);

                smgrwrite(rel->rd_smgr,
                          bufHdr->tag.forkNum,
                          bufHdr->tag.blockNum,
                          localpage,
                          false);

                bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);

                /* Pop the error context stack */
                error_context_stack = errcallback.previous;
            }
        }

        return;
    }

    /* Make sure we can handle the pin inside the loop */
    ResourceOwnerEnlargeBuffers(CurrentResourceOwner);

    for (i = 0; i < NBuffers; i++)
    {
        bufHdr = &BufferDescriptors[i];

        /*
         * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
         * and saves some cycles.
         */
        if (!RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
            continue;

        LockBufHdr(bufHdr);
        if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
            (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
        {
            PinBuffer_Locked(bufHdr);
            LWLockAcquire(bufHdr->content_lock, LW_SHARED);
            FlushBuffer(bufHdr, rel->rd_smgr);
            LWLockRelease(bufHdr->content_lock);
            UnpinBuffer(bufHdr, true);
        }
        else
            UnlockBufHdr(bufHdr);
    }
}

void FreeAccessStrategy ( BufferAccessStrategy  strategy  ) 

Definition at line 462 of file freelist.c.

References NULL, and pfree().

Referenced by FreeBulkInsertState(), heap_endscan(), and initscan().

{
    /* don't crash if called on a "default" strategy */
    if (strategy != NULL)
        pfree(strategy);
}

BufferAccessStrategy GetAccessStrategy ( BufferAccessStrategyType  btype  ) 

Definition at line 407 of file freelist.c.

References BAS_BULKREAD, BAS_BULKWRITE, BAS_NORMAL, BAS_VACUUM, BufferAccessStrategyData::btype, elog, ERROR, Min, NBuffers, offsetof, palloc0(), and BufferAccessStrategyData::ring_size.

Referenced by do_autovacuum(), GetBulkInsertState(), initscan(), pgstat_heap(), pgstat_index(), pgstatindex(), and vacuum().

{
    BufferAccessStrategy strategy;
    int         ring_size;

    /*
     * Select ring size to use.  See buffer/README for rationales.
     *
     * Note: if you change the ring size for BAS_BULKREAD, see also
     * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
     */
    switch (btype)
    {
        case BAS_NORMAL:
            /* if someone asks for NORMAL, just give 'em a "default" object */
            return NULL;

        case BAS_BULKREAD:
            ring_size = 256 * 1024 / BLCKSZ;
            break;
        case BAS_BULKWRITE:
            ring_size = 16 * 1024 * 1024 / BLCKSZ;
            break;
        case BAS_VACUUM:
            ring_size = 256 * 1024 / BLCKSZ;
            break;

        default:
            elog(ERROR, "unrecognized buffer access strategy: %d",
                 (int) btype);
            return NULL;        /* keep compiler quiet */
    }

    /* Make sure ring isn't an undue fraction of shared buffers */
    ring_size = Min(NBuffers / 8, ring_size);

    /* Allocate the object and initialize all elements to zeroes */
    strategy = (BufferAccessStrategy)
        palloc0(offsetof(BufferAccessStrategyData, buffers) +
                ring_size * sizeof(Buffer));

    /* Set fields that don't start out zero */
    strategy->btype = btype;
    strategy->ring_size = ring_size;

    return strategy;
}

bool HoldingBufferPinThatDelaysRecovery ( void   ) 

Definition at line 2874 of file bufmgr.c.

References GetStartupBufferPinWaitBufId(), and PrivateRefCount.

Referenced by CheckRecoveryConflictDeadlock(), and RecoveryConflictInterrupt().

{
    int         bufid = GetStartupBufferPinWaitBufId();

    /*
     * If we get woken slowly then it's possible that the Startup process was
     * already woken by other backends before we got here. Also possible that
     * we get here by multiple interrupts or interrupts at inappropriate
     * times, so make sure we do nothing if the bufid is not set.
     */
    if (bufid < 0)
        return false;

    if (PrivateRefCount[bufid] > 0)
        return true;

    return false;
}

void IncrBufferRefCount ( Buffer  buffer  ) 
void InitBufferPool ( void   ) 

Definition at line 73 of file buf_init.c.

References Assert, buf, sbufdesc::buf_hdr_lock, sbufdesc::buf_id, BufferBlocks, CLEAR_BUFFERTAG, sbufdesc::content_lock, sbufdesc::flags, sbufdesc::freeNext, i, sbufdesc::io_in_progress_lock, LWLockAssign(), NBuffers, sbufdesc::refcount, ShmemInitStruct(), SpinLockInit, StrategyInitialize(), sbufdesc::tag, sbufdesc::usage_count, and sbufdesc::wait_backend_pid.

Referenced by CreateSharedMemoryAndSemaphores().

{
    bool        foundBufs,
                foundDescs;

    BufferDescriptors = (BufferDesc *)
        ShmemInitStruct("Buffer Descriptors",
                        NBuffers * sizeof(BufferDesc), &foundDescs);

    BufferBlocks = (char *)
        ShmemInitStruct("Buffer Blocks",
                        NBuffers * (Size) BLCKSZ, &foundBufs);

    if (foundDescs || foundBufs)
    {
        /* both should be present or neither */
        Assert(foundDescs && foundBufs);
        /* note: this path is only taken in EXEC_BACKEND case */
    }
    else
    {
        BufferDesc *buf;
        int         i;

        buf = BufferDescriptors;

        /*
         * Initialize all the buffer headers.
         */
        for (i = 0; i < NBuffers; buf++, i++)
        {
            CLEAR_BUFFERTAG(buf->tag);
            buf->flags = 0;
            buf->usage_count = 0;
            buf->refcount = 0;
            buf->wait_backend_pid = 0;

            SpinLockInit(&buf->buf_hdr_lock);

            buf->buf_id = i;

            /*
             * Initially link all the buffers together as unused. Subsequent
             * management of this list is done by freelist.c.
             */
            buf->freeNext = i + 1;

            buf->io_in_progress_lock = LWLockAssign();
            buf->content_lock = LWLockAssign();
        }

        /* Correct last entry of linked list */
        BufferDescriptors[NBuffers - 1].freeNext = FREENEXT_END_OF_LIST;
    }

    /* Init other shared buffer-management stuff */
    StrategyInitialize(!foundDescs);
}

void InitBufferPoolAccess ( void   ) 

Definition at line 145 of file buf_init.c.

References calloc, ereport, errcode(), errmsg(), FATAL, NBuffers, and PrivateRefCount.

Referenced by BaseInit().

{
    /*
     * Allocate and zero local arrays of per-buffer info.
     */
    PrivateRefCount = (int32 *) calloc(NBuffers, sizeof(int32));
    if (!PrivateRefCount)
        ereport(FATAL,
                (errcode(ERRCODE_OUT_OF_MEMORY),
                 errmsg("out of memory")));
}

void InitBufferPoolBackend ( void   ) 

Definition at line 1741 of file bufmgr.c.

References AtProcExit_Buffers(), and on_shmem_exit().

Referenced by AuxiliaryProcessMain(), and InitPostgres().

void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 2746 of file bufmgr.c.

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptors, BufferIsLocal, BufferIsValid, sbufdesc::content_lock, elog, ERROR, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), and LWLockRelease().

Referenced by _bt_doinsert(), _bt_endpoint(), _bt_first(), _bt_getbuf(), _bt_getroot(), _bt_killitems(), _bt_next(), _bt_relandgetbuf(), _hash_chgbufaccess(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), acquire_sample_rows(), bitgetpage(), bt_metap(), bt_page_items(), bt_page_stats(), btvacuumpage(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), copy_heap_data(), count_nondeletable_pages(), entryGetNextItem(), fill_seq_with_data(), FreeSpaceMapTruncateRel(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetTupleForTrigger(), GetVisibilityMapPins(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertItemPointers(), ginInsertValue(), GinNewBuffer(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuildempty(), gistbulkdelete(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuumcleanup(), heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_update(), heap_xlog_newpage(), heap_xlog_visible(), heapgetpage(), heapgettup(), index_fetch_heap(), IndexBuildHeapScan(), lazy_scan_heap(), LockBufferForCleanup(), moveRightIfItNeeded(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex(), pgstatindex(), read_seq_tuple(), RelationGetBufferForTuple(), RestoreBackupBlockContents(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), systable_recheck_tuple(), UnlockReleaseBuffer(), validate_index_heapscan(), visibilitymap_clear(), visibilitymap_set(), visibilitymap_truncate(), XLogReadBuffer(), and XLogRecordPageWithFreeSpace().

{
    volatile BufferDesc *buf;

    Assert(BufferIsValid(buffer));
    if (BufferIsLocal(buffer))
        return;                 /* local buffers need no lock */

    buf = &(BufferDescriptors[buffer - 1]);

    if (mode == BUFFER_LOCK_UNLOCK)
        LWLockRelease(buf->content_lock);
    else if (mode == BUFFER_LOCK_SHARE)
        LWLockAcquire(buf->content_lock, LW_SHARED);
    else if (mode == BUFFER_LOCK_EXCLUSIVE)
        LWLockAcquire(buf->content_lock, LW_EXCLUSIVE);
    else
        elog(ERROR, "unrecognized buffer lock mode: %d", mode);
}

void LockBufferForCleanup ( Buffer  buffer  ) 

Definition at line 2802 of file bufmgr.c.

References Assert, BM_PIN_COUNT_WAITER, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferDescriptors, BufferIsLocal, BufferIsValid, elog, ERROR, sbufdesc::flags, InHotStandby, LocalRefCount, LockBuffer(), LockBufHdr, MyProcPid, NULL, PrivateRefCount, ProcWaitForSignal(), sbufdesc::refcount, ResolveRecoveryConflictWithBufferPin(), SetStartupBufferPinWaitBufId(), UnlockBufHdr, and sbufdesc::wait_backend_pid.

Referenced by btree_xlog_vacuum(), btvacuumpage(), btvacuumscan(), ginVacuumPostingTreeLeaves(), heap_xlog_clean(), lazy_scan_heap(), and RestoreBackupBlockContents().

{
    volatile BufferDesc *bufHdr;

    Assert(BufferIsValid(buffer));
    Assert(PinCountWaitBuf == NULL);

    if (BufferIsLocal(buffer))
    {
        /* There should be exactly one pin */
        if (LocalRefCount[-buffer - 1] != 1)
            elog(ERROR, "incorrect local pin count: %d",
                 LocalRefCount[-buffer - 1]);
        /* Nobody else to wait for */
        return;
    }

    /* There should be exactly one local pin */
    if (PrivateRefCount[buffer - 1] != 1)
        elog(ERROR, "incorrect local pin count: %d",
             PrivateRefCount[buffer - 1]);

    bufHdr = &BufferDescriptors[buffer - 1];

    for (;;)
    {
        /* Try to acquire lock */
        LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
        LockBufHdr(bufHdr);
        Assert(bufHdr->refcount > 0);
        if (bufHdr->refcount == 1)
        {
            /* Successfully acquired exclusive lock with pincount 1 */
            UnlockBufHdr(bufHdr);
            return;
        }
        /* Failed, so mark myself as waiting for pincount 1 */
        if (bufHdr->flags & BM_PIN_COUNT_WAITER)
        {
            UnlockBufHdr(bufHdr);
            LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
            elog(ERROR, "multiple backends attempting to wait for pincount 1");
        }
        bufHdr->wait_backend_pid = MyProcPid;
        bufHdr->flags |= BM_PIN_COUNT_WAITER;
        PinCountWaitBuf = bufHdr;
        UnlockBufHdr(bufHdr);
        LockBuffer(buffer, BUFFER_LOCK_UNLOCK);

        /* Wait to be signaled by UnpinBuffer() */
        if (InHotStandby)
        {
            /* Publish the bufid that Startup process waits on */
            SetStartupBufferPinWaitBufId(buffer - 1);
            /* Set alarm and then wait to be signaled by UnpinBuffer() */
            ResolveRecoveryConflictWithBufferPin();
            /* Reset the published bufid */
            SetStartupBufferPinWaitBufId(-1);
        }
        else
            ProcWaitForSignal();

        PinCountWaitBuf = NULL;
        /* Loop back and try again */
    }
}

void MarkBufferDirty ( Buffer  buffer  ) 

Definition at line 984 of file bufmgr.c.

References Assert, BM_DIRTY, BufferDescriptors, BufferIsLocal, BufferIsValid, sbufdesc::content_lock, elog, ERROR, sbufdesc::flags, LockBufHdr, LWLockHeldByMe(), MarkLocalBufferDirty(), pgBufferUsage, PrivateRefCount, sbufdesc::refcount, BufferUsage::shared_blks_dirtied, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, and VacuumPageDirty.

Referenced by _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_newroot(), _bt_pagedel(), _bt_restore_meta(), _bt_split(), _hash_addovflpage(), _hash_chgbufaccess(), _hash_wrtbuf(), addLeafTuple(), AlterSequence(), btree_xlog_delete(), btree_xlog_delete_page(), btree_xlog_insert(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_vacuum(), createPostingTree(), do_setval(), doPickSplit(), fill_seq_with_data(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginInsertValue(), ginRedoCreateIndex(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumPage(), ginUpdateStats(), ginVacuumPostingTreeLeaves(), gistbuild(), gistbuildempty(), gistbulkdelete(), gistplacetopage(), gistRedoClearFollowRight(), gistRedoCreateIndex(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), heap_delete(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune(), heap_update(), heap_xlog_clean(), heap_xlog_delete(), heap_xlog_freeze(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_newpage(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_heap(), lazy_vacuum_page(), moveLeafs(), nextval_internal(), RestoreBackupBlockContents(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoCreateIndex(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_set(), visibilitymap_truncate(), and writeListPage().

{
    volatile BufferDesc *bufHdr;

    if (!BufferIsValid(buffer))
        elog(ERROR, "bad buffer ID: %d", buffer);

    if (BufferIsLocal(buffer))
    {
        MarkLocalBufferDirty(buffer);
        return;
    }

    bufHdr = &BufferDescriptors[buffer - 1];

    Assert(PrivateRefCount[buffer - 1] > 0);
    /* unfortunately we can't check if the lock is held exclusively */
    Assert(LWLockHeldByMe(bufHdr->content_lock));

    LockBufHdr(bufHdr);

    Assert(bufHdr->refcount > 0);

    /*
     * If the buffer was not dirty already, do vacuum accounting.
     */
    if (!(bufHdr->flags & BM_DIRTY))
    {
        VacuumPageDirty++;
        pgBufferUsage.shared_blks_dirtied++;
        if (VacuumCostActive)
            VacuumCostBalance += VacuumCostPageDirty;
    }

    bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);

    UnlockBufHdr(bufHdr);
}

void MarkBufferDirtyHint ( Buffer  buffer  ) 

Definition at line 2585 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BufferDescriptors, BufferGetPage, BufferIsLocal, BufferIsValid, sbufdesc::content_lock, DataChecksumsEnabled(), PGXACT::delayChkpt, elog, ERROR, sbufdesc::flags, LockBufHdr, LWLockHeldByMe(), MarkLocalBufferDirty(), MyPgXact, PageSetLSN, PrivateRefCount, RecoveryInProgress(), sbufdesc::refcount, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), btvacuumpage(), FreeSpaceMapTruncateRel(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), hashgettuple(), heap_page_prune(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

{
    volatile BufferDesc *bufHdr;
    Page    page = BufferGetPage(buffer);

    if (!BufferIsValid(buffer))
        elog(ERROR, "bad buffer ID: %d", buffer);

    if (BufferIsLocal(buffer))
    {
        MarkLocalBufferDirty(buffer);
        return;
    }

    bufHdr = &BufferDescriptors[buffer - 1];

    Assert(PrivateRefCount[buffer - 1] > 0);
    /* here, either share or exclusive lock is OK */
    Assert(LWLockHeldByMe(bufHdr->content_lock));

    /*
     * This routine might get called many times on the same page, if we are
     * making the first scan after commit of an xact that added/deleted many
     * tuples. So, be as quick as we can if the buffer is already dirty.  We do
     * this by not acquiring spinlock if it looks like the status bits are
     * already set.  Since we make this test unlocked, there's a chance we
     * might fail to notice that the flags have just been cleared, and failed
     * to reset them, due to memory-ordering issues.  But since this function
     * is only intended to be used in cases where failing to write out the data
     * would be harmless anyway, it doesn't really matter.
     */
    if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
        (BM_DIRTY | BM_JUST_DIRTIED))
    {
        XLogRecPtr  lsn = InvalidXLogRecPtr;
        bool        dirtied = false;
        bool        delayChkpt = false;

        /*
         * If checksums are enabled, and the buffer is permanent, then a full
         * page image may be required even for some hint bit updates to protect
         * against torn pages. This full page image is only necessary if the
         * hint bit update is the first change to the page since the last
         * checkpoint.
         *
         * We don't check full_page_writes here because that logic is
         * included when we call XLogInsert() since the value changes
         * dynamically.
         */
        if (DataChecksumsEnabled() && (bufHdr->flags & BM_PERMANENT))
        {
            /*
             * If we're in recovery we cannot dirty a page because of a hint.
             * We can set the hint, just not dirty the page as a result so
             * the hint is lost when we evict the page or shutdown.
             *
             * See src/backend/storage/page/README for longer discussion.
             */
            if (RecoveryInProgress())
                return;

            /*
             * If the block is already dirty because we either made a change
             * or set a hint already, then we don't need to write a full page
             * image.  Note that aggressive cleaning of blocks
             * dirtied by hint bit setting would increase the call rate.
             * Bulk setting of hint bits would reduce the call rate...
             *
             * We must issue the WAL record before we mark the buffer dirty.
             * Otherwise we might write the page before we write the WAL.
             * That causes a race condition, since a checkpoint might occur
             * between writing the WAL record and marking the buffer dirty.
             * We solve that with a kluge, but one that is already in use
             * during transaction commit to prevent race conditions.
             * Basically, we simply prevent the checkpoint WAL record from
             * being written until we have marked the buffer dirty. We don't
             * start the checkpoint flush until we have marked dirty, so our
             * checkpoint must flush the change to disk successfully or the
             * checkpoint never gets written, so crash recovery will fix.
             *
             * It's possible we may enter here without an xid, so it is
             * essential that CreateCheckpoint waits for virtual transactions
             * rather than full transactionids.
             */
            MyPgXact->delayChkpt = delayChkpt = true;
            lsn = XLogSaveBufferForHint(buffer);
        }

        LockBufHdr(bufHdr);
        Assert(bufHdr->refcount > 0);
        if (!(bufHdr->flags & BM_DIRTY))
        {
            dirtied = true;     /* Means "will be dirtied by this action" */

            /*
             * Set the page LSN if we wrote a backup block. We aren't
             * supposed to set this when only holding a share lock but
             * as long as we serialise it somehow we're OK. We choose to
             * set LSN while holding the buffer header lock, which causes
             * any reader of an LSN who holds only a share lock to also
             * obtain a buffer header lock before using PageGetLSN(),
             * which is enforced in BufferGetLSNAtomic().
             *
             * If checksums are enabled, you might think we should reset the
             * checksum here. That will happen when the page is written
             * sometime later in this checkpoint cycle.
             */
            if (!XLogRecPtrIsInvalid(lsn))
                PageSetLSN(page, lsn);
        }
        bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
        UnlockBufHdr(bufHdr);

        if (delayChkpt)
            MyPgXact->delayChkpt = false;

        if (dirtied)
        {
            VacuumPageDirty++;
            if (VacuumCostActive)
                VacuumCostBalance += VacuumCostPageDirty;
        }
    }
}

void PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 125 of file bufmgr.c.

References Assert, BlockNumberIsValid, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), ereport, errcode(), errmsg(), ERROR, INIT_BUFFERTAG, LocalPrefetchBuffer(), LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeBackend::node, RelationData::rd_smgr, RELATION_IS_OTHER_TEMP, RelationIsValid, RelationOpenSmgr, RelationUsesLocalBuffers, SMgrRelationData::smgr_rnode, and smgrprefetch().

Referenced by BitmapHeapNext().

{
#ifdef USE_PREFETCH
    Assert(RelationIsValid(reln));
    Assert(BlockNumberIsValid(blockNum));

    /* Open it at the smgr level if not already done */
    RelationOpenSmgr(reln);

    if (RelationUsesLocalBuffers(reln))
    {
        /* see comments in ReadBufferExtended */
        if (RELATION_IS_OTHER_TEMP(reln))
            ereport(ERROR,
                    (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                errmsg("cannot access temporary tables of other sessions")));

        /* pass it off to localbuf.c */
        LocalPrefetchBuffer(reln->rd_smgr, forkNum, blockNum);
    }
    else
    {
        BufferTag   newTag;     /* identity of requested block */
        uint32      newHash;    /* hash value for newTag */
        LWLockId    newPartitionLock;   /* buffer partition lock for it */
        int         buf_id;

        /* create a tag so we can lookup the buffer */
        INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node,
                       forkNum, blockNum);

        /* determine its hash code and partition lock ID */
        newHash = BufTableHashCode(&newTag);
        newPartitionLock = BufMappingPartitionLock(newHash);

        /* see if the block is in the buffer pool already */
        LWLockAcquire(newPartitionLock, LW_SHARED);
        buf_id = BufTableLookup(&newTag, newHash);
        LWLockRelease(newPartitionLock);

        /* If not in buffers, initiate prefetch */
        if (buf_id < 0)
            smgrprefetch(reln->rd_smgr, forkNum, blockNum);

        /*
         * If the block *is* in buffers, we do nothing.  This is not really
         * ideal: the block might be just about to be evicted, which would be
         * stupid since we know we are going to need it soon.  But the only
         * easy answer is to bump the usage_count, which does not seem like a
         * great solution: when the caller does ultimately touch the block,
         * usage_count would get bumped again, resulting in too much
         * favoritism for blocks that are involved in a prefetch sequence. A
         * real fix would involve some additional per-buffer state, and it's
         * not clear that there's enough of a problem to justify that.
         */
    }
#endif   /* USE_PREFETCH */
}

void PrintBufferLeakWarning ( Buffer  buffer  ) 

Definition at line 1782 of file bufmgr.c.

References Assert, buftag::blockNum, buf, BufferDescriptors, BufferIsLocal, BufferIsValid, elog, sbufdesc::flags, buftag::forkNum, LocalBufferDescriptors, LocalRefCount, MyBackendId, pfree(), PrivateRefCount, sbufdesc::refcount, relpathbackend(), buftag::rnode, sbufdesc::tag, and WARNING.

Referenced by AtEOXact_Buffers(), AtEOXact_LocalBuffers(), AtProcExit_Buffers(), AtProcExit_LocalBuffers(), and ResourceOwnerReleaseInternal().

{
    volatile BufferDesc *buf;
    int32       loccount;
    char       *path;
    BackendId   backend;

    Assert(BufferIsValid(buffer));
    if (BufferIsLocal(buffer))
    {
        buf = &LocalBufferDescriptors[-buffer - 1];
        loccount = LocalRefCount[-buffer - 1];
        backend = MyBackendId;
    }
    else
    {
        buf = &BufferDescriptors[buffer - 1];
        loccount = PrivateRefCount[buffer - 1];
        backend = InvalidBackendId;
    }

    /* theoretically we should lock the bufhdr here */
    path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum);
    elog(WARNING,
         "buffer refcount leak: [%03d] "
         "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
         buffer, path,
         buf->tag.blockNum, buf->flags,
         buf->refcount, loccount);
    pfree(path);
}

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)
Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)

Definition at line 228 of file bufmgr.c.

References buf, ereport, errcode(), errmsg(), ERROR, pgstat_count_buffer_hit, pgstat_count_buffer_read, RelationData::rd_rel, RelationData::rd_smgr, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationOpenSmgr.

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), acquire_sample_rows(), btvacuumpage(), btvacuumscan(), count_nondeletable_pages(), fsm_readbuf(), get_raw_page_internal(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistbuildempty(), gistbulkdelete(), gistvacuumcleanup(), heapgetpage(), lazy_scan_heap(), lazy_vacuum_heap(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatindex(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), spgvacuumpage(), and vm_readbuf().

{
    bool        hit;
    Buffer      buf;

    /* Open it at the smgr level if not already done */
    RelationOpenSmgr(reln);

    /*
     * Reject attempts to read non-local temporary relations; we would be
     * likely to get wrong data since we have no visibility into the owning
     * session's local buffers.
     */
    if (RELATION_IS_OTHER_TEMP(reln))
        ereport(ERROR,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("cannot access temporary tables of other sessions")));

    /*
     * Read the buffer, and update pgstat counters to reflect a cache hit or
     * miss.
     */
    pgstat_count_buffer_read(reln);
    buf = ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence,
                            forkNum, blockNum, mode, strategy, &hit);
    if (hit)
        pgstat_count_buffer_hit(reln);
    return buf;
}

Buffer ReadBufferWithoutRelcache ( RelFileNode  rnode,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)

Definition at line 270 of file bufmgr.c.

References Assert, InRecovery, InvalidBackendId, ReadBuffer_common(), RELPERSISTENCE_PERMANENT, and smgropen().

Referenced by XLogReadBufferExtended().

{
    bool        hit;

    SMgrRelation smgr = smgropen(rnode, InvalidBackendId);

    Assert(InRecovery);

    return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum,
                             mode, strategy, &hit);
}

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 2032 of file bufmgr.c.

References RelationData::rd_smgr, RelationOpenSmgr, and smgrnblocks().

Referenced by _hash_getnewbuf(), and _hash_metapinit().

{
    /* Open it at the smgr level if not already done */
    RelationOpenSmgr(relation);

    return smgrnblocks(relation->rd_smgr, forkNum);
}

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 1037 of file bufmgr.c.

References Assert, buftag::blockNum, BufferDescriptors, BufferIsLocal, BufferIsValid, CurrentResourceOwner, buftag::forkNum, LocalBufferDescriptors, LocalRefCount, PrivateRefCount, RelationData::rd_node, ReadBuffer(), RelFileNodeEquals, ResourceOwnerForgetBuffer(), buftag::rnode, sbufdesc::tag, and UnpinBuffer().

Referenced by _bt_relandgetbuf(), bitgetpage(), entryGetNextItem(), ginFindLeafPage(), ginInsertValue(), index_fetch_heap(), moveRightIfItNeeded(), and scanPostingTree().

{
    ForkNumber  forkNum = MAIN_FORKNUM;
    volatile BufferDesc *bufHdr;

    if (BufferIsValid(buffer))
    {
        if (BufferIsLocal(buffer))
        {
            Assert(LocalRefCount[-buffer - 1] > 0);
            bufHdr = &LocalBufferDescriptors[-buffer - 1];
            if (bufHdr->tag.blockNum == blockNum &&
                RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
                bufHdr->tag.forkNum == forkNum)
                return buffer;
            ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
            LocalRefCount[-buffer - 1]--;
        }
        else
        {
            Assert(PrivateRefCount[buffer - 1] > 0);
            bufHdr = &BufferDescriptors[buffer - 1];
            /* we have pin, so it's ok to examine tag without spinlock */
            if (bufHdr->tag.blockNum == blockNum &&
                RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
                bufHdr->tag.forkNum == forkNum)
                return buffer;
            UnpinBuffer(bufHdr, true);
        }
    }

    return ReadBuffer(relation, blockNum);
}

void ReleaseBuffer ( Buffer  buffer  ) 

Definition at line 2512 of file bufmgr.c.

References Assert, BufferDescriptors, BufferIsLocal, BufferIsValid, CurrentResourceOwner, elog, ERROR, LocalRefCount, PrivateRefCount, ResourceOwnerForgetBuffer(), and UnpinBuffer().

Referenced by _bt_getbuf(), _hash_dropbuf(), AfterTriggerExecute(), btendscan(), btmarkpos(), btrescan(), btrestrpos(), entryGetNextItem(), EvalPlanQualFetch(), EvalPlanQualFetchRowMarks(), ExecClearTuple(), ExecDelete(), ExecEndIndexOnlyScan(), ExecLockRows(), ExecMaterializeSlot(), ExecStoreMinimalTuple(), ExecStoreTuple(), FreeBulkInsertState(), freeGinBtreeStack(), freeScanKeys(), fsm_vacuum_page(), get_raw_page_internal(), GetRecordedFreeSpace(), GetTupleForTrigger(), ginDeletePage(), ginFindParents(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), heap_delete(), heap_endscan(), heap_fetch(), heap_hot_search(), heap_insert(), heap_multi_insert(), heap_rescan(), heap_restrpos(), heap_update(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapgetpage(), heapgettup(), heapgettup_pagemode(), index_endscan(), index_getnext_tid(), index_rescan(), lazy_scan_heap(), lazy_vacuum_heap(), pgstatindex(), ReadBufferBI(), RelationGetBufferForTuple(), ResourceOwnerReleaseInternal(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), TidNext(), UnlockReleaseBuffer(), visibilitymap_count(), visibilitymap_pin(), visibilitymap_test(), and XLogReadBufferExtended().

{
    volatile BufferDesc *bufHdr;

    if (!BufferIsValid(buffer))
        elog(ERROR, "bad buffer ID: %d", buffer);

    ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);

    if (BufferIsLocal(buffer))
    {
        Assert(LocalRefCount[-buffer - 1] > 0);
        LocalRefCount[-buffer - 1]--;
        return;
    }

    bufHdr = &BufferDescriptors[buffer - 1];

    Assert(PrivateRefCount[buffer - 1] > 0);

    if (PrivateRefCount[buffer - 1] > 1)
        PrivateRefCount[buffer - 1]--;
    else
        UnpinBuffer(bufHdr, false);
}

void UnlockBuffers ( void   ) 

Definition at line 2720 of file bufmgr.c.

References BM_PIN_COUNT_WAITER, sbufdesc::flags, LockBufHdr, MyProcPid, UnlockBufHdr, and sbufdesc::wait_backend_pid.

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

{
    volatile BufferDesc *buf = PinCountWaitBuf;

    if (buf)
    {
        LockBufHdr(buf);

        /*
         * Don't complain if flag bit not set; it could have been reset but we
         * got a cancel/die interrupt before getting the signal.
         */
        if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 &&
            buf->wait_backend_pid == MyProcPid)
            buf->flags &= ~BM_PIN_COUNT_WAITER;

        UnlockBufHdr(buf);

        PinCountWaitBuf = NULL;
    }
}

void UnlockReleaseBuffer ( Buffer  buffer  ) 

Definition at line 2544 of file bufmgr.c.

References BUFFER_LOCK_UNLOCK, LockBuffer(), and ReleaseBuffer().

Referenced by _bt_relbuf(), _bt_restore_meta(), _hash_relbuf(), _hash_wrtbuf(), acquire_sample_rows(), allocNewBuffer(), AlterSequence(), bt_metap(), bt_page_items(), bt_page_stats(), btree_xlog_delete(), btree_xlog_delete_get_latestRemovedXid(), btree_xlog_delete_page(), btree_xlog_insert(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_vacuum(), count_nondeletable_pages(), createPostingTree(), do_setval(), doPickSplit(), fill_seq_with_data(), FreeSpaceMapTruncateRel(), fsm_search(), fsm_set_and_search(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginContinueSplit(), ginDeletePage(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), ginRedoCreateIndex(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumPage(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbufferinginserttuples(), gistbuild(), gistbuildempty(), gistbulkdelete(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistGetMaxLevel(), gistinserttuples(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoCreateIndex(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuumcleanup(), heap_delete(), heap_get_latest_tid(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_update(), heap_xlog_clean(), heap_xlog_delete(), heap_xlog_freeze(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_newpage(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_heap(), lazy_vacuum_heap(), moveLeafs(), nextval_internal(), pg_sequence_parameters(), pgstat_gist_page(), pgstat_heap(), pgstatginindex(), ResetSequence(), RestoreBackupBlockContents(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistUpdateMetaPage(), spgMatchNodeAction(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoCreateIndex(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), visibilitymap_truncate(), writeListPage(), and XLogRecordPageWithFreeSpace().


Variable Documentation

Definition at line 71 of file bufmgr.c.

Referenced by BgBufferSync().

Definition at line 72 of file bufmgr.c.

Referenced by BgBufferSync().

PGDLLIMPORT char* BufferBlocks

Definition at line 22 of file buf_init.c.

Referenced by InitBufferPool().

Definition at line 44 of file localbuf.c.

Referenced by InitLocalBuffers().

PGDLLIMPORT int32* LocalRefCount
PGDLLIMPORT int NBuffers
PGDLLIMPORT int NLocBuffer
PGDLLIMPORT int32* PrivateRefCount

Definition at line 80 of file bufmgr.c.

Referenced by assign_effective_io_concurrency(), and BitmapHeapNext().

Definition at line 73 of file bufmgr.c.

Referenced by FlushBuffer(), and ReadBuffer_common().

Definition at line 70 of file bufmgr.c.

Referenced by mdread(), and ReadBuffer_common().