Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Typedefs | Functions | Variables

freelist.c File Reference

#include "postgres.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
Include dependency graph for freelist.c:

Go to the source code of this file.

Data Structures

struct  BufferStrategyControl
struct  BufferAccessStrategyData

Typedefs

typedef struct
BufferAccessStrategyData 
BufferAccessStrategyData

Functions

static volatile BufferDescGetBufferFromRing (BufferAccessStrategy strategy)
static void AddBufferToRing (BufferAccessStrategy strategy, volatile BufferDesc *buf)
volatile BufferDescStrategyGetBuffer (BufferAccessStrategy strategy, bool *lock_held)
void StrategyFreeBuffer (volatile BufferDesc *buf)
int StrategySyncStart (uint32 *complete_passes, uint32 *num_buf_alloc)
void StrategyNotifyBgWriter (Latch *bgwriterLatch)
Size StrategyShmemSize (void)
void StrategyInitialize (bool init)
BufferAccessStrategy GetAccessStrategy (BufferAccessStrategyType btype)
void FreeAccessStrategy (BufferAccessStrategy strategy)
bool StrategyRejectBuffer (BufferAccessStrategy strategy, volatile BufferDesc *buf)

Variables

static BufferStrategyControlStrategyControl = NULL

Typedef Documentation


Function Documentation

static void AddBufferToRing ( BufferAccessStrategy  strategy,
volatile BufferDesc buf 
) [static]
void FreeAccessStrategy ( BufferAccessStrategy  strategy  ) 

Definition at line 462 of file freelist.c.

References NULL, and pfree().

Referenced by FreeBulkInsertState(), heap_endscan(), and initscan().

{
    /* don't crash if called on a "default" strategy */
    if (strategy != NULL)
        pfree(strategy);
}

BufferAccessStrategy GetAccessStrategy ( BufferAccessStrategyType  btype  ) 

Definition at line 407 of file freelist.c.

References BAS_BULKREAD, BAS_BULKWRITE, BAS_NORMAL, BAS_VACUUM, BufferAccessStrategyData::btype, elog, ERROR, Min, NBuffers, offsetof, palloc0(), and BufferAccessStrategyData::ring_size.

Referenced by do_autovacuum(), GetBulkInsertState(), initscan(), pgstat_heap(), pgstat_index(), pgstatindex(), and vacuum().

{
    BufferAccessStrategy strategy;
    int         ring_size;

    /*
     * Select ring size to use.  See buffer/README for rationales.
     *
     * Note: if you change the ring size for BAS_BULKREAD, see also
     * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
     */
    switch (btype)
    {
        case BAS_NORMAL:
            /* if someone asks for NORMAL, just give 'em a "default" object */
            return NULL;

        case BAS_BULKREAD:
            ring_size = 256 * 1024 / BLCKSZ;
            break;
        case BAS_BULKWRITE:
            ring_size = 16 * 1024 * 1024 / BLCKSZ;
            break;
        case BAS_VACUUM:
            ring_size = 256 * 1024 / BLCKSZ;
            break;

        default:
            elog(ERROR, "unrecognized buffer access strategy: %d",
                 (int) btype);
            return NULL;        /* keep compiler quiet */
    }

    /* Make sure ring isn't an undue fraction of shared buffers */
    ring_size = Min(NBuffers / 8, ring_size);

    /* Allocate the object and initialize all elements to zeroes */
    strategy = (BufferAccessStrategy)
        palloc0(offsetof(BufferAccessStrategyData, buffers) +
                ring_size * sizeof(Buffer));

    /* Set fields that don't start out zero */
    strategy->btype = btype;
    strategy->ring_size = ring_size;

    return strategy;
}

static volatile BufferDesc * GetBufferFromRing ( BufferAccessStrategy  strategy  )  [static]

Definition at line 476 of file freelist.c.

References buf, BufferDescriptors, BufferAccessStrategyData::buffers, BufferAccessStrategyData::current, BufferAccessStrategyData::current_was_in_ring, InvalidBuffer, LockBufHdr, sbufdesc::refcount, BufferAccessStrategyData::ring_size, UnlockBufHdr, and sbufdesc::usage_count.

Referenced by StrategyGetBuffer().

{
    volatile BufferDesc *buf;
    Buffer      bufnum;

    /* Advance to next ring slot */
    if (++strategy->current >= strategy->ring_size)
        strategy->current = 0;

    /*
     * If the slot hasn't been filled yet, tell the caller to allocate a new
     * buffer with the normal allocation strategy.  He will then fill this
     * slot by calling AddBufferToRing with the new buffer.
     */
    bufnum = strategy->buffers[strategy->current];
    if (bufnum == InvalidBuffer)
    {
        strategy->current_was_in_ring = false;
        return NULL;
    }

    /*
     * If the buffer is pinned we cannot use it under any circumstances.
     *
     * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
     * since our own previous usage of the ring element would have left it
     * there, but it might've been decremented by clock sweep since then). A
     * higher usage_count indicates someone else has touched the buffer, so we
     * shouldn't re-use it.
     */
    buf = &BufferDescriptors[bufnum - 1];
    LockBufHdr(buf);
    if (buf->refcount == 0 && buf->usage_count <= 1)
    {
        strategy->current_was_in_ring = true;
        return buf;
    }
    UnlockBufHdr(buf);

    /*
     * Tell caller to allocate a new buffer with the normal allocation
     * strategy.  He'll then replace this ring element via AddBufferToRing.
     */
    strategy->current_was_in_ring = false;
    return NULL;
}

void StrategyFreeBuffer ( volatile BufferDesc buf  ) 

Definition at line 242 of file freelist.c.

References sbufdesc::buf_id, BufFreelistLock, BufferStrategyControl::firstFreeBuffer, sbufdesc::freeNext, FREENEXT_NOT_IN_LIST, BufferStrategyControl::lastFreeBuffer, LW_EXCLUSIVE, LWLockAcquire(), and LWLockRelease().

Referenced by InvalidateBuffer().

{
    LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);

    /*
     * It is possible that we are told to put something in the freelist that
     * is already in it; don't screw up the list if so.
     */
    if (buf->freeNext == FREENEXT_NOT_IN_LIST)
    {
        buf->freeNext = StrategyControl->firstFreeBuffer;
        if (buf->freeNext < 0)
            StrategyControl->lastFreeBuffer = buf->buf_id;
        StrategyControl->firstFreeBuffer = buf->buf_id;
    }

    LWLockRelease(BufFreelistLock);
}

volatile BufferDesc* StrategyGetBuffer ( BufferAccessStrategy  strategy,
bool lock_held 
)

Definition at line 112 of file freelist.c.

References AddBufferToRing(), Assert, BufferStrategyControl::bgwriterLatch, buf, BufferDescriptors, BufFreelistLock, BufferStrategyControl::completePasses, elog, ERROR, BufferStrategyControl::firstFreeBuffer, sbufdesc::freeNext, FREENEXT_NOT_IN_LIST, GetBufferFromRing(), LockBufHdr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NBuffers, BufferStrategyControl::nextVictimBuffer, NULL, BufferStrategyControl::numBufferAllocs, sbufdesc::refcount, SetLatch(), UnlockBufHdr, and sbufdesc::usage_count.

Referenced by BufferAlloc().

{
    volatile BufferDesc *buf;
    Latch      *bgwriterLatch;
    int         trycounter;

    /*
     * If given a strategy object, see whether it can select a buffer. We
     * assume strategy objects don't need the BufFreelistLock.
     */
    if (strategy != NULL)
    {
        buf = GetBufferFromRing(strategy);
        if (buf != NULL)
        {
            *lock_held = false;
            return buf;
        }
    }

    /* Nope, so lock the freelist */
    *lock_held = true;
    LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);

    /*
     * We count buffer allocation requests so that the bgwriter can estimate
     * the rate of buffer consumption.  Note that buffers recycled by a
     * strategy object are intentionally not counted here.
     */
    StrategyControl->numBufferAllocs++;

    /*
     * If bgwriterLatch is set, we need to waken the bgwriter, but we should
     * not do so while holding BufFreelistLock; so release and re-grab.  This
     * is annoyingly tedious, but it happens at most once per bgwriter cycle,
     * so the performance hit is minimal.
     */
    bgwriterLatch = StrategyControl->bgwriterLatch;
    if (bgwriterLatch)
    {
        StrategyControl->bgwriterLatch = NULL;
        LWLockRelease(BufFreelistLock);
        SetLatch(bgwriterLatch);
        LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
    }

    /*
     * Try to get a buffer from the freelist.  Note that the freeNext fields
     * are considered to be protected by the BufFreelistLock not the
     * individual buffer spinlocks, so it's OK to manipulate them without
     * holding the spinlock.
     */
    while (StrategyControl->firstFreeBuffer >= 0)
    {
        buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
        Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);

        /* Unconditionally remove buffer from freelist */
        StrategyControl->firstFreeBuffer = buf->freeNext;
        buf->freeNext = FREENEXT_NOT_IN_LIST;

        /*
         * If the buffer is pinned or has a nonzero usage_count, we cannot use
         * it; discard it and retry.  (This can only happen if VACUUM put a
         * valid buffer in the freelist and then someone else used it before
         * we got to it.  It's probably impossible altogether as of 8.3, but
         * we'd better check anyway.)
         */
        LockBufHdr(buf);
        if (buf->refcount == 0 && buf->usage_count == 0)
        {
            if (strategy != NULL)
                AddBufferToRing(strategy, buf);
            return buf;
        }
        UnlockBufHdr(buf);
    }

    /* Nothing on the freelist, so run the "clock sweep" algorithm */
    trycounter = NBuffers;
    for (;;)
    {
        buf = &BufferDescriptors[StrategyControl->nextVictimBuffer];

        if (++StrategyControl->nextVictimBuffer >= NBuffers)
        {
            StrategyControl->nextVictimBuffer = 0;
            StrategyControl->completePasses++;
        }

        /*
         * If the buffer is pinned or has a nonzero usage_count, we cannot use
         * it; decrement the usage_count (unless pinned) and keep scanning.
         */
        LockBufHdr(buf);
        if (buf->refcount == 0)
        {
            if (buf->usage_count > 0)
            {
                buf->usage_count--;
                trycounter = NBuffers;
            }
            else
            {
                /* Found a usable buffer */
                if (strategy != NULL)
                    AddBufferToRing(strategy, buf);
                return buf;
            }
        }
        else if (--trycounter == 0)
        {
            /*
             * We've scanned all the buffers without making any state changes,
             * so all the buffers are pinned (or were when we looked at them).
             * We could hope that someone will free one eventually, but it's
             * probably better to fail than to risk getting stuck in an
             * infinite loop.
             */
            UnlockBufHdr(buf);
            elog(ERROR, "no unpinned buffers available");
        }
        UnlockBufHdr(buf);
    }
}

void StrategyInitialize ( bool  init  ) 

Definition at line 342 of file freelist.c.

References Assert, BufferStrategyControl::bgwriterLatch, BufferStrategyControl::completePasses, BufferStrategyControl::firstFreeBuffer, InitBufTable(), BufferStrategyControl::lastFreeBuffer, NBuffers, BufferStrategyControl::nextVictimBuffer, NUM_BUFFER_PARTITIONS, BufferStrategyControl::numBufferAllocs, and ShmemInitStruct().

Referenced by InitBufferPool().

{
    bool        found;

    /*
     * Initialize the shared buffer lookup hashtable.
     *
     * Since we can't tolerate running out of lookup table entries, we must be
     * sure to specify an adequate table size here.  The maximum steady-state
     * usage is of course NBuffers entries, but BufferAlloc() tries to insert
     * a new entry before deleting the old.  In principle this could be
     * happening in each partition concurrently, so we could need as many as
     * NBuffers + NUM_BUFFER_PARTITIONS entries.
     */
    InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS);

    /*
     * Get or create the shared strategy control block
     */
    StrategyControl = (BufferStrategyControl *)
        ShmemInitStruct("Buffer Strategy Status",
                        sizeof(BufferStrategyControl),
                        &found);

    if (!found)
    {
        /*
         * Only done once, usually in postmaster
         */
        Assert(init);

        /*
         * Grab the whole linked list of free buffers for our strategy. We
         * assume it was previously set up by InitBufferPool().
         */
        StrategyControl->firstFreeBuffer = 0;
        StrategyControl->lastFreeBuffer = NBuffers - 1;

        /* Initialize the clock sweep pointer */
        StrategyControl->nextVictimBuffer = 0;

        /* Clear statistics */
        StrategyControl->completePasses = 0;
        StrategyControl->numBufferAllocs = 0;

        /* No pending notification */
        StrategyControl->bgwriterLatch = NULL;
    }
    else
        Assert(!init);
}

void StrategyNotifyBgWriter ( Latch bgwriterLatch  ) 

Definition at line 299 of file freelist.c.

References BufferStrategyControl::bgwriterLatch, BufFreelistLock, LW_EXCLUSIVE, LWLockAcquire(), and LWLockRelease().

Referenced by BackgroundWriterMain().

{
    /*
     * We acquire the BufFreelistLock just to ensure that the store appears
     * atomic to StrategyGetBuffer.  The bgwriter should call this rather
     * infrequently, so there's no performance penalty from being safe.
     */
    LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
    StrategyControl->bgwriterLatch = bgwriterLatch;
    LWLockRelease(BufFreelistLock);
}

bool StrategyRejectBuffer ( BufferAccessStrategy  strategy,
volatile BufferDesc buf 
)

Definition at line 547 of file freelist.c.

References BAS_BULKREAD, BufferAccessStrategyData::btype, BufferDescriptorGetBuffer, BufferAccessStrategyData::buffers, BufferAccessStrategyData::current, and BufferAccessStrategyData::current_was_in_ring.

Referenced by BufferAlloc().

{
    /* We only do this in bulkread mode */
    if (strategy->btype != BAS_BULKREAD)
        return false;

    /* Don't muck with behavior of normal buffer-replacement strategy */
    if (!strategy->current_was_in_ring ||
      strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
        return false;

    /*
     * Remove the dirty buffer from the ring; necessary to prevent infinite
     * loop if all ring members are dirty.
     */
    strategy->buffers[strategy->current] = InvalidBuffer;

    return true;
}

Size StrategyShmemSize ( void   ) 

Definition at line 321 of file freelist.c.

References add_size(), BufTableShmemSize(), MAXALIGN, NBuffers, and NUM_BUFFER_PARTITIONS.

Referenced by BufferShmemSize().

{
    Size        size = 0;

    /* size of lookup hash table ... see comment in StrategyInitialize */
    size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS));

    /* size of the shared replacement strategy control block */
    size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl)));

    return size;
}

int StrategySyncStart ( uint32 complete_passes,
uint32 num_buf_alloc 
)

Variable Documentation

Definition at line 52 of file freelist.c.