Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Defines | Typedefs | Enumerations | Functions | Variables

slru.c File Reference

#include "postgres.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "miscadmin.h"
Include dependency graph for slru.c:

Go to the source code of this file.

Data Structures

struct  SlruFlushData

Defines

#define SlruFileName(ctl, path, seg)   snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
#define MAX_FLUSH_BUFFERS   16
#define SlruRecentlyUsed(shared, slotno)

Typedefs

typedef struct SlruFlushData SlruFlushData
typedef struct SlruFlushDataSlruFlush

Enumerations

enum  SlruErrorCause {
  SLRU_OPEN_FAILED, SLRU_SEEK_FAILED, SLRU_READ_FAILED, SLRU_WRITE_FAILED,
  SLRU_FSYNC_FAILED, SLRU_CLOSE_FAILED
}

Functions

static void SimpleLruZeroLSNs (SlruCtl ctl, int slotno)
static void SimpleLruWaitIO (SlruCtl ctl, int slotno)
static void SlruInternalWritePage (SlruCtl ctl, int slotno, SlruFlush fdata)
static bool SlruPhysicalReadPage (SlruCtl ctl, int pageno, int slotno)
static bool SlruPhysicalWritePage (SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
static void SlruReportIOError (SlruCtl ctl, int pageno, TransactionId xid)
static int SlruSelectLRUPage (SlruCtl ctl, int pageno)
static bool SlruScanDirCbDeleteCutoff (SlruCtl ctl, char *filename, int segpage, void *data)
Size SimpleLruShmemSize (int nslots, int nlsns)
void SimpleLruInit (SlruCtl ctl, const char *name, int nslots, int nlsns, LWLockId ctllock, const char *subdir)
int SimpleLruZeroPage (SlruCtl ctl, int pageno)
int SimpleLruReadPage (SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
int SimpleLruReadPage_ReadOnly (SlruCtl ctl, int pageno, TransactionId xid)
void SimpleLruWritePage (SlruCtl ctl, int slotno)
void SimpleLruFlush (SlruCtl ctl, bool checkpoint)
void SimpleLruTruncate (SlruCtl ctl, int cutoffPage)
bool SlruScanDirCbReportPresence (SlruCtl ctl, char *filename, int segpage, void *data)
bool SlruScanDirCbDeleteAll (SlruCtl ctl, char *filename, int segpage, void *data)
bool SlruScanDirectory (SlruCtl ctl, SlruScanCallback callback, void *data)

Variables

static SlruErrorCause slru_errcause
static int slru_errno

Define Documentation

#define MAX_FLUSH_BUFFERS   16

Definition at line 72 of file slru.c.

Referenced by SlruPhysicalWritePage().

#define SlruFileName (   ctl,
  path,
  seg 
)    snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)

Definition at line 62 of file slru.c.

Referenced by SlruPhysicalReadPage(), SlruPhysicalWritePage(), and SlruReportIOError().

#define SlruRecentlyUsed (   shared,
  slotno 
)
Value:
do { \
        int     new_lru_count = (shared)->cur_lru_count; \
        if (new_lru_count != (shared)->page_lru_count[slotno]) { \
            (shared)->cur_lru_count = ++new_lru_count; \
            (shared)->page_lru_count[slotno] = new_lru_count; \
        } \
    } while (0)

Definition at line 102 of file slru.c.

Referenced by SimpleLruReadPage(), SimpleLruReadPage_ReadOnly(), and SimpleLruZeroPage().


Typedef Documentation

typedef struct SlruFlushData* SlruFlush

Definition at line 81 of file slru.c.

typedef struct SlruFlushData SlruFlushData

Enumeration Type Documentation

Enumerator:
SLRU_OPEN_FAILED 
SLRU_SEEK_FAILED 
SLRU_READ_FAILED 
SLRU_WRITE_FAILED 
SLRU_FSYNC_FAILED 
SLRU_CLOSE_FAILED 

Definition at line 112 of file slru.c.

{
    SLRU_OPEN_FAILED,
    SLRU_SEEK_FAILED,
    SLRU_READ_FAILED,
    SLRU_WRITE_FAILED,
    SLRU_FSYNC_FAILED,
    SLRU_CLOSE_FAILED
} SlruErrorCause;


Function Documentation

void SimpleLruFlush ( SlruCtl  ctl,
bool  checkpoint 
)

Definition at line 1034 of file slru.c.

References Assert, CloseTransientFile(), SlruSharedData::ControlLock, SlruCtlData::do_fsync, SlruFlushData::fd, i, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruFlushData::num_files, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_status, pg_fsync(), SlruFlushData::segno, SlruCtlData::shared, slru_errcause, slru_errno, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruInternalWritePage(), and SlruReportIOError().

Referenced by CheckPointCLOG(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointSUBTRANS(), ShutdownCLOG(), ShutdownMultiXact(), and ShutdownSUBTRANS().

{
    SlruShared  shared = ctl->shared;
    SlruFlushData fdata;
    int         slotno;
    int         pageno = 0;
    int         i;
    bool        ok;

    /*
     * Find and write dirty pages
     */
    fdata.num_files = 0;

    LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);

    for (slotno = 0; slotno < shared->num_slots; slotno++)
    {
        SlruInternalWritePage(ctl, slotno, &fdata);

        /*
         * When called during a checkpoint, we cannot assert that the slot is
         * clean now, since another process might have re-dirtied it already.
         * That's okay.
         */
        Assert(checkpoint ||
               shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
               (shared->page_status[slotno] == SLRU_PAGE_VALID &&
                !shared->page_dirty[slotno]));
    }

    LWLockRelease(shared->ControlLock);

    /*
     * Now fsync and close any files that were open
     */
    ok = true;
    for (i = 0; i < fdata.num_files; i++)
    {
        if (ctl->do_fsync && pg_fsync(fdata.fd[i]))
        {
            slru_errcause = SLRU_FSYNC_FAILED;
            slru_errno = errno;
            pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
            ok = false;
        }

        if (CloseTransientFile(fdata.fd[i]))
        {
            slru_errcause = SLRU_CLOSE_FAILED;
            slru_errno = errno;
            pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
            ok = false;
        }
    }
    if (!ok)
        SlruReportIOError(ctl, pageno, InvalidTransactionId);
}

void SimpleLruInit ( SlruCtl  ctl,
const char *  name,
int  nslots,
int  nlsns,
LWLockId  ctllock,
const char *  subdir 
)

Definition at line 163 of file slru.c.

References Assert, SlruSharedData::buffer_locks, BUFFERALIGN, SlruSharedData::ControlLock, SlruSharedData::cur_lru_count, SlruCtlData::Dir, SlruCtlData::do_fsync, SlruSharedData::group_lsn, IsUnderPostmaster, SlruSharedData::lsn_groups_per_page, LWLockAssign(), MAXALIGN, SlruSharedData::num_slots, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, ShmemInitStruct(), SimpleLruShmemSize(), and StrNCpy.

Referenced by AsyncShmemInit(), CLOGShmemInit(), MultiXactShmemInit(), OldSerXidInit(), and SUBTRANSShmemInit().

{
    SlruShared  shared;
    bool        found;

    shared = (SlruShared) ShmemInitStruct(name,
                                          SimpleLruShmemSize(nslots, nlsns),
                                          &found);

    if (!IsUnderPostmaster)
    {
        /* Initialize locks and shared memory area */
        char       *ptr;
        Size        offset;
        int         slotno;

        Assert(!found);

        memset(shared, 0, sizeof(SlruSharedData));

        shared->ControlLock = ctllock;

        shared->num_slots = nslots;
        shared->lsn_groups_per_page = nlsns;

        shared->cur_lru_count = 0;

        /* shared->latest_page_number will be set later */

        ptr = (char *) shared;
        offset = MAXALIGN(sizeof(SlruSharedData));
        shared->page_buffer = (char **) (ptr + offset);
        offset += MAXALIGN(nslots * sizeof(char *));
        shared->page_status = (SlruPageStatus *) (ptr + offset);
        offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
        shared->page_dirty = (bool *) (ptr + offset);
        offset += MAXALIGN(nslots * sizeof(bool));
        shared->page_number = (int *) (ptr + offset);
        offset += MAXALIGN(nslots * sizeof(int));
        shared->page_lru_count = (int *) (ptr + offset);
        offset += MAXALIGN(nslots * sizeof(int));
        shared->buffer_locks = (LWLockId *) (ptr + offset);
        offset += MAXALIGN(nslots * sizeof(LWLockId));

        if (nlsns > 0)
        {
            shared->group_lsn = (XLogRecPtr *) (ptr + offset);
            offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
        }

        ptr += BUFFERALIGN(offset);
        for (slotno = 0; slotno < nslots; slotno++)
        {
            shared->page_buffer[slotno] = ptr;
            shared->page_status[slotno] = SLRU_PAGE_EMPTY;
            shared->page_dirty[slotno] = false;
            shared->page_lru_count[slotno] = 0;
            shared->buffer_locks[slotno] = LWLockAssign();
            ptr += BLCKSZ;
        }
    }
    else
        Assert(found);

    /*
     * Initialize the unshared control struct, including directory path. We
     * assume caller set PagePrecedes.
     */
    ctl->shared = shared;
    ctl->do_fsync = true;       /* default behavior */
    StrNCpy(ctl->Dir, subdir, sizeof(ctl->Dir));
}

int SimpleLruReadPage ( SlruCtl  ctl,
int  pageno,
bool  write_ok,
TransactionId  xid 
)

Definition at line 358 of file slru.c.

References Assert, SlruSharedData::buffer_locks, SlruSharedData::ControlLock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruWaitIO(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruPhysicalReadPage(), SlruRecentlyUsed, SlruReportIOError(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), GetMultiXactIdMembers(), OldSerXidAdd(), RecordNewMultiXact(), SimpleLruReadPage_ReadOnly(), StartupMultiXact(), SubTransSetParent(), TransactionIdSetPageStatus(), and TrimCLOG().

{
    SlruShared  shared = ctl->shared;

    /* Outer loop handles restart if we must wait for someone else's I/O */
    for (;;)
    {
        int         slotno;
        bool        ok;

        /* See if page already is in memory; if not, pick victim slot */
        slotno = SlruSelectLRUPage(ctl, pageno);

        /* Did we find the page in memory? */
        if (shared->page_number[slotno] == pageno &&
            shared->page_status[slotno] != SLRU_PAGE_EMPTY)
        {
            /*
             * If page is still being read in, we must wait for I/O.  Likewise
             * if the page is being written and the caller said that's not OK.
             */
            if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
                (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
                 !write_ok))
            {
                SimpleLruWaitIO(ctl, slotno);
                /* Now we must recheck state from the top */
                continue;
            }
            /* Otherwise, it's ready to use */
            SlruRecentlyUsed(shared, slotno);
            return slotno;
        }

        /* We found no match; assert we selected a freeable slot */
        Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
               (shared->page_status[slotno] == SLRU_PAGE_VALID &&
                !shared->page_dirty[slotno]));

        /* Mark the slot read-busy */
        shared->page_number[slotno] = pageno;
        shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
        shared->page_dirty[slotno] = false;

        /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
        LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE);

        /* Release control lock while doing I/O */
        LWLockRelease(shared->ControlLock);

        /* Do the read */
        ok = SlruPhysicalReadPage(ctl, pageno, slotno);

        /* Set the LSNs for this newly read-in page to zero */
        SimpleLruZeroLSNs(ctl, slotno);

        /* Re-acquire control lock and update page state */
        LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);

        Assert(shared->page_number[slotno] == pageno &&
               shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
               !shared->page_dirty[slotno]);

        shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;

        LWLockRelease(shared->buffer_locks[slotno]);

        /* Now it's okay to ereport if we failed */
        if (!ok)
            SlruReportIOError(ctl, pageno, xid);

        SlruRecentlyUsed(shared, slotno);
        return slotno;
    }
}

int SimpleLruReadPage_ReadOnly ( SlruCtl  ctl,
int  pageno,
TransactionId  xid 
)

Definition at line 450 of file slru.c.

References SlruSharedData::ControlLock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruReadPage(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, and SlruRecentlyUsed.

Referenced by asyncQueueReadAllNotifications(), OldSerXidGetMinConflictCommitSeqNo(), SubTransGetParent(), TransactionIdGetStatus(), and TruncateMultiXact().

{
    SlruShared  shared = ctl->shared;
    int         slotno;

    /* Try to find the page while holding only shared lock */
    LWLockAcquire(shared->ControlLock, LW_SHARED);

    /* See if page is already in a buffer */
    for (slotno = 0; slotno < shared->num_slots; slotno++)
    {
        if (shared->page_number[slotno] == pageno &&
            shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
            shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
        {
            /* See comments for SlruRecentlyUsed macro */
            SlruRecentlyUsed(shared, slotno);
            return slotno;
        }
    }

    /* No luck, so switch to normal exclusive lock and do regular read */
    LWLockRelease(shared->ControlLock);
    LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);

    return SimpleLruReadPage(ctl, pageno, true, xid);
}

Size SimpleLruShmemSize ( int  nslots,
int  nlsns 
)

Definition at line 143 of file slru.c.

References BUFFERALIGN, and MAXALIGN.

Referenced by AsyncShmemSize(), CLOGShmemSize(), MultiXactShmemSize(), PredicateLockShmemSize(), SimpleLruInit(), and SUBTRANSShmemSize().

{
    Size        sz;

    /* we assume nslots isn't so large as to risk overflow */
    sz = MAXALIGN(sizeof(SlruSharedData));
    sz += MAXALIGN(nslots * sizeof(char *));    /* page_buffer[] */
    sz += MAXALIGN(nslots * sizeof(SlruPageStatus));    /* page_status[] */
    sz += MAXALIGN(nslots * sizeof(bool));      /* page_dirty[] */
    sz += MAXALIGN(nslots * sizeof(int));       /* page_number[] */
    sz += MAXALIGN(nslots * sizeof(int));       /* page_lru_count[] */
    sz += MAXALIGN(nslots * sizeof(LWLockId));  /* buffer_locks[] */

    if (nlsns > 0)
        sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));    /* group_lsn[] */

    return BUFFERALIGN(sz) + BLCKSZ * nslots;
}

void SimpleLruTruncate ( SlruCtl  ctl,
int  cutoffPage 
)

Definition at line 1097 of file slru.c.

References SlruSharedData::ControlLock, SlruCtlData::Dir, ereport, errmsg(), SlruSharedData::latest_page_number, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NULL, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::PagePrecedes, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruInternalWritePage(), SlruScanDirCbDeleteCutoff(), and SlruScanDirectory().

Referenced by asyncQueueAdvanceTail(), CheckPointPredicate(), clog_redo(), TruncateCLOG(), TruncateMultiXact(), and TruncateSUBTRANS().

{
    SlruShared  shared = ctl->shared;
    int         slotno;

    /*
     * The cutoff point is the start of the segment containing cutoffPage.
     */
    cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;

    /*
     * Scan shared memory and remove any pages preceding the cutoff page, to
     * ensure we won't rewrite them later.  (Since this is normally called in
     * or just after a checkpoint, any dirty pages should have been flushed
     * already ... we're just being extra careful here.)
     */
    LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);

restart:;

    /*
     * While we are holding the lock, make an important safety check: the
     * planned cutoff point must be <= the current endpoint page. Otherwise we
     * have already wrapped around, and proceeding with the truncation would
     * risk removing the current segment.
     */
    if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
    {
        LWLockRelease(shared->ControlLock);
        ereport(LOG,
          (errmsg("could not truncate directory \"%s\": apparent wraparound",
                  ctl->Dir)));
        return;
    }

    for (slotno = 0; slotno < shared->num_slots; slotno++)
    {
        if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
            continue;
        if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
            continue;

        /*
         * If page is clean, just change state to EMPTY (expected case).
         */
        if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
            !shared->page_dirty[slotno])
        {
            shared->page_status[slotno] = SLRU_PAGE_EMPTY;
            continue;
        }

        /*
         * Hmm, we have (or may have) I/O operations acting on the page, so
         * we've got to wait for them to finish and then start again. This is
         * the same logic as in SlruSelectLRUPage.  (XXX if page is dirty,
         * wouldn't it be OK to just discard it without writing it?  For now,
         * keep the logic the same as it was.)
         */
        if (shared->page_status[slotno] == SLRU_PAGE_VALID)
            SlruInternalWritePage(ctl, slotno, NULL);
        else
            SimpleLruWaitIO(ctl, slotno);
        goto restart;
    }

    LWLockRelease(shared->ControlLock);

    /* Now we can remove the old segment(s) */
    (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
}

static void SimpleLruWaitIO ( SlruCtl  ctl,
int  slotno 
) [static]

Definition at line 304 of file slru.c.

References SlruSharedData::buffer_locks, SlruSharedData::ControlLock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_status, SlruCtlData::shared, SLRU_PAGE_READ_IN_PROGRESS, and SLRU_PAGE_WRITE_IN_PROGRESS.

Referenced by SimpleLruReadPage(), SimpleLruTruncate(), SlruInternalWritePage(), and SlruSelectLRUPage().

{
    SlruShared  shared = ctl->shared;

    /* See notes at top of file */
    LWLockRelease(shared->ControlLock);
    LWLockAcquire(shared->buffer_locks[slotno], LW_SHARED);
    LWLockRelease(shared->buffer_locks[slotno]);
    LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);

    /*
     * If the slot is still in an io-in-progress state, then either someone
     * already started a new I/O on the slot, or a previous I/O failed and
     * neglected to reset the page state.  That shouldn't happen, really, but
     * it seems worth a few extra cycles to check and recover from it. We can
     * cheaply test for failure by seeing if the buffer lock is still held (we
     * assume that transaction abort would release the lock).
     */
    if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
        shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
    {
        if (LWLockConditionalAcquire(shared->buffer_locks[slotno], LW_SHARED))
        {
            /* indeed, the I/O must have failed */
            if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
                shared->page_status[slotno] = SLRU_PAGE_EMPTY;
            else    /* write_in_progress */
            {
                shared->page_status[slotno] = SLRU_PAGE_VALID;
                shared->page_dirty[slotno] = true;
            }
            LWLockRelease(shared->buffer_locks[slotno]);
        }
    }
}

void SimpleLruWritePage ( SlruCtl  ctl,
int  slotno 
)
static void SimpleLruZeroLSNs ( SlruCtl  ctl,
int  slotno 
) [static]

Definition at line 287 of file slru.c.

References SlruSharedData::group_lsn, SlruSharedData::lsn_groups_per_page, MemSet, and SlruCtlData::shared.

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

{
    SlruShared  shared = ctl->shared;

    if (shared->lsn_groups_per_page > 0)
        MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
               shared->lsn_groups_per_page * sizeof(XLogRecPtr));
}

int SimpleLruZeroPage ( SlruCtl  ctl,
int  pageno 
)

Definition at line 246 of file slru.c.

References Assert, SlruSharedData::latest_page_number, MemSet, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruRecentlyUsed, and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), AsyncShmemInit(), OldSerXidAdd(), ZeroCLOGPage(), ZeroMultiXactMemberPage(), ZeroMultiXactOffsetPage(), and ZeroSUBTRANSPage().

{
    SlruShared  shared = ctl->shared;
    int         slotno;

    /* Find a suitable buffer slot for the page */
    slotno = SlruSelectLRUPage(ctl, pageno);
    Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
           (shared->page_status[slotno] == SLRU_PAGE_VALID &&
            !shared->page_dirty[slotno]) ||
           shared->page_number[slotno] == pageno);

    /* Mark the slot as containing this page */
    shared->page_number[slotno] = pageno;
    shared->page_status[slotno] = SLRU_PAGE_VALID;
    shared->page_dirty[slotno] = true;
    SlruRecentlyUsed(shared, slotno);

    /* Set the buffer to zeroes */
    MemSet(shared->page_buffer[slotno], 0, BLCKSZ);

    /* Set the LSNs for this new page to zero */
    SimpleLruZeroLSNs(ctl, slotno);

    /* Assume this page is now the latest active page */
    shared->latest_page_number = pageno;

    return slotno;
}

static void SlruInternalWritePage ( SlruCtl  ctl,
int  slotno,
SlruFlush  fdata 
) [static]

Definition at line 490 of file slru.c.

References Assert, SlruSharedData::buffer_locks, CloseTransientFile(), SlruSharedData::ControlLock, SlruFlushData::fd, i, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruFlushData::num_files, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruPhysicalWritePage(), and SlruReportIOError().

Referenced by SimpleLruFlush(), SimpleLruTruncate(), SimpleLruWritePage(), and SlruSelectLRUPage().

{
    SlruShared  shared = ctl->shared;
    int         pageno = shared->page_number[slotno];
    bool        ok;

    /* If a write is in progress, wait for it to finish */
    while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
           shared->page_number[slotno] == pageno)
    {
        SimpleLruWaitIO(ctl, slotno);
    }

    /*
     * Do nothing if page is not dirty, or if buffer no longer contains the
     * same page we were called for.
     */
    if (!shared->page_dirty[slotno] ||
        shared->page_status[slotno] != SLRU_PAGE_VALID ||
        shared->page_number[slotno] != pageno)
        return;

    /*
     * Mark the slot write-busy, and clear the dirtybit.  After this point, a
     * transaction status update on this page will mark it dirty again.
     */
    shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
    shared->page_dirty[slotno] = false;

    /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
    LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE);

    /* Release control lock while doing I/O */
    LWLockRelease(shared->ControlLock);

    /* Do the write */
    ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);

    /* If we failed, and we're in a flush, better close the files */
    if (!ok && fdata)
    {
        int         i;

        for (i = 0; i < fdata->num_files; i++)
            CloseTransientFile(fdata->fd[i]);
    }

    /* Re-acquire control lock and update page state */
    LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);

    Assert(shared->page_number[slotno] == pageno &&
           shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);

    /* If we failed to write, mark the page dirty again */
    if (!ok)
        shared->page_dirty[slotno] = true;

    shared->page_status[slotno] = SLRU_PAGE_VALID;

    LWLockRelease(shared->buffer_locks[slotno]);

    /* Now it's okay to ereport if we failed */
    if (!ok)
        SlruReportIOError(ctl, pageno, InvalidTransactionId);
}

static bool SlruPhysicalReadPage ( SlruCtl  ctl,
int  pageno,
int  slotno 
) [static]

Definition at line 578 of file slru.c.

References CloseTransientFile(), ereport, errmsg(), SlruFlushData::fd, InRecovery, LOG, MemSet, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, read, SlruFlushData::segno, SlruCtlData::shared, slru_errcause, slru_errno, and SlruFileName.

Referenced by SimpleLruReadPage().

{
    SlruShared  shared = ctl->shared;
    int         segno = pageno / SLRU_PAGES_PER_SEGMENT;
    int         rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
    int         offset = rpageno * BLCKSZ;
    char        path[MAXPGPATH];
    int         fd;

    SlruFileName(ctl, path, segno);

    /*
     * In a crash-and-restart situation, it's possible for us to receive
     * commands to set the commit status of transactions whose bits are in
     * already-truncated segments of the commit log (see notes in
     * SlruPhysicalWritePage).  Hence, if we are InRecovery, allow the case
     * where the file doesn't exist, and return zeroes instead.
     */
    fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
    if (fd < 0)
    {
        if (errno != ENOENT || !InRecovery)
        {
            slru_errcause = SLRU_OPEN_FAILED;
            slru_errno = errno;
            return false;
        }

        ereport(LOG,
                (errmsg("file \"%s\" doesn't exist, reading as zeroes",
                        path)));
        MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
        return true;
    }

    if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
    {
        slru_errcause = SLRU_SEEK_FAILED;
        slru_errno = errno;
        CloseTransientFile(fd);
        return false;
    }

    errno = 0;
    if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
    {
        slru_errcause = SLRU_READ_FAILED;
        slru_errno = errno;
        CloseTransientFile(fd);
        return false;
    }

    if (CloseTransientFile(fd))
    {
        slru_errcause = SLRU_CLOSE_FAILED;
        slru_errno = errno;
        return false;
    }

    return true;
}

static bool SlruPhysicalWritePage ( SlruCtl  ctl,
int  pageno,
int  slotno,
SlruFlush  fdata 
) [static]

Definition at line 655 of file slru.c.

References CloseTransientFile(), SlruCtlData::do_fsync, END_CRIT_SECTION, SlruFlushData::fd, fd(), SlruSharedData::group_lsn, i, SlruSharedData::lsn_groups_per_page, MAX_FLUSH_BUFFERS, NULL, SlruFlushData::num_files, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_fsync(), SlruFlushData::segno, SlruCtlData::shared, slru_errcause, slru_errno, SlruFileName, START_CRIT_SECTION, write, XLogFlush(), and XLogRecPtrIsInvalid.

Referenced by SlruInternalWritePage().

{
    SlruShared  shared = ctl->shared;
    int         segno = pageno / SLRU_PAGES_PER_SEGMENT;
    int         rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
    int         offset = rpageno * BLCKSZ;
    char        path[MAXPGPATH];
    int         fd = -1;

    /*
     * Honor the write-WAL-before-data rule, if appropriate, so that we do not
     * write out data before associated WAL records.  This is the same action
     * performed during FlushBuffer() in the main buffer manager.
     */
    if (shared->group_lsn != NULL)
    {
        /*
         * We must determine the largest async-commit LSN for the page. This
         * is a bit tedious, but since this entire function is a slow path
         * anyway, it seems better to do this here than to maintain a per-page
         * LSN variable (which'd need an extra comparison in the
         * transaction-commit path).
         */
        XLogRecPtr  max_lsn;
        int         lsnindex,
                    lsnoff;

        lsnindex = slotno * shared->lsn_groups_per_page;
        max_lsn = shared->group_lsn[lsnindex++];
        for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
        {
            XLogRecPtr  this_lsn = shared->group_lsn[lsnindex++];

            if (max_lsn < this_lsn)
                max_lsn = this_lsn;
        }

        if (!XLogRecPtrIsInvalid(max_lsn))
        {
            /*
             * As noted above, elog(ERROR) is not acceptable here, so if
             * XLogFlush were to fail, we must PANIC.  This isn't much of a
             * restriction because XLogFlush is just about all critical
             * section anyway, but let's make sure.
             */
            START_CRIT_SECTION();
            XLogFlush(max_lsn);
            END_CRIT_SECTION();
        }
    }

    /*
     * During a Flush, we may already have the desired file open.
     */
    if (fdata)
    {
        int         i;

        for (i = 0; i < fdata->num_files; i++)
        {
            if (fdata->segno[i] == segno)
            {
                fd = fdata->fd[i];
                break;
            }
        }
    }

    if (fd < 0)
    {
        /*
         * If the file doesn't already exist, we should create it.  It is
         * possible for this to need to happen when writing a page that's not
         * first in its segment; we assume the OS can cope with that. (Note:
         * it might seem that it'd be okay to create files only when
         * SimpleLruZeroPage is called for the first page of a segment.
         * However, if after a crash and restart the REDO logic elects to
         * replay the log from a checkpoint before the latest one, then it's
         * possible that we will get commands to set transaction status of
         * transactions that have already been truncated from the commit log.
         * Easiest way to deal with that is to accept references to
         * nonexistent files here and in SlruPhysicalReadPage.)
         *
         * Note: it is possible for more than one backend to be executing this
         * code simultaneously for different pages of the same file. Hence,
         * don't use O_EXCL or O_TRUNC or anything like that.
         */
        SlruFileName(ctl, path, segno);
        fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY,
                               S_IRUSR | S_IWUSR);
        if (fd < 0)
        {
            slru_errcause = SLRU_OPEN_FAILED;
            slru_errno = errno;
            return false;
        }

        if (fdata)
        {
            if (fdata->num_files < MAX_FLUSH_BUFFERS)
            {
                fdata->fd[fdata->num_files] = fd;
                fdata->segno[fdata->num_files] = segno;
                fdata->num_files++;
            }
            else
            {
                /*
                 * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
                 * fall back to treating it as a standalone write.
                 */
                fdata = NULL;
            }
        }
    }

    if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
    {
        slru_errcause = SLRU_SEEK_FAILED;
        slru_errno = errno;
        if (!fdata)
            CloseTransientFile(fd);
        return false;
    }

    errno = 0;
    if (write(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
    {
        /* if write didn't set errno, assume problem is no disk space */
        if (errno == 0)
            errno = ENOSPC;
        slru_errcause = SLRU_WRITE_FAILED;
        slru_errno = errno;
        if (!fdata)
            CloseTransientFile(fd);
        return false;
    }

    /*
     * If not part of Flush, need to fsync now.  We assume this happens
     * infrequently enough that it's not a performance issue.
     */
    if (!fdata)
    {
        if (ctl->do_fsync && pg_fsync(fd))
        {
            slru_errcause = SLRU_FSYNC_FAILED;
            slru_errno = errno;
            CloseTransientFile(fd);
            return false;
        }

        if (CloseTransientFile(fd))
        {
            slru_errcause = SLRU_CLOSE_FAILED;
            slru_errno = errno;
            return false;
        }
    }

    return true;
}

static void SlruReportIOError ( SlruCtl  ctl,
int  pageno,
TransactionId  xid 
) [static]

Definition at line 823 of file slru.c.

References elog, ereport, errcode_for_file_access(), errdetail(), errmsg(), ERROR, SlruFlushData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, and SlruFileName.

Referenced by SimpleLruFlush(), SimpleLruReadPage(), and SlruInternalWritePage().

{
    int         segno = pageno / SLRU_PAGES_PER_SEGMENT;
    int         rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
    int         offset = rpageno * BLCKSZ;
    char        path[MAXPGPATH];

    SlruFileName(ctl, path, segno);
    errno = slru_errno;
    switch (slru_errcause)
    {
        case SLRU_OPEN_FAILED:
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not access status of transaction %u", xid),
                     errdetail("Could not open file \"%s\": %m.", path)));
            break;
        case SLRU_SEEK_FAILED:
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not access status of transaction %u", xid),
                 errdetail("Could not seek in file \"%s\" to offset %u: %m.",
                           path, offset)));
            break;
        case SLRU_READ_FAILED:
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not access status of transaction %u", xid),
               errdetail("Could not read from file \"%s\" at offset %u: %m.",
                         path, offset)));
            break;
        case SLRU_WRITE_FAILED:
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not access status of transaction %u", xid),
                errdetail("Could not write to file \"%s\" at offset %u: %m.",
                          path, offset)));
            break;
        case SLRU_FSYNC_FAILED:
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not access status of transaction %u", xid),
                     errdetail("Could not fsync file \"%s\": %m.",
                               path)));
            break;
        case SLRU_CLOSE_FAILED:
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not access status of transaction %u", xid),
                     errdetail("Could not close file \"%s\": %m.",
                               path)));
            break;
        default:
            /* can't get here, we trust */
            elog(ERROR, "unrecognized SimpleLru error cause: %d",
                 (int) slru_errcause);
            break;
    }
}

bool SlruScanDirCbDeleteAll ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)

Definition at line 1213 of file slru.c.

References DEBUG2, SlruCtlData::Dir, ereport, errmsg(), MAXPGPATH, snprintf(), and unlink().

Referenced by AsyncShmemInit().

{
    char        path[MAXPGPATH];

    snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
    ereport(DEBUG2,
            (errmsg("removing file \"%s\"", path)));
    unlink(path);

    return false;               /* keep going */
}

static bool SlruScanDirCbDeleteCutoff ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
) [static]

Definition at line 1192 of file slru.c.

References DEBUG2, SlruCtlData::Dir, ereport, errmsg(), MAXPGPATH, SlruCtlData::PagePrecedes, snprintf(), and unlink().

Referenced by SimpleLruTruncate().

{
    char        path[MAXPGPATH];
    int         cutoffPage = *(int *) data;

    if (ctl->PagePrecedes(segpage, cutoffPage))
    {
        snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
        ereport(DEBUG2,
                (errmsg("removing file \"%s\"", path)));
        unlink(path);
    }

    return false;               /* keep going */
}

bool SlruScanDirCbReportPresence ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)

Definition at line 1175 of file slru.c.

References SlruCtlData::PagePrecedes.

Referenced by TruncateCLOG().

{
    int         cutoffPage = *(int *) data;

    cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;

    if (ctl->PagePrecedes(segpage, cutoffPage))
        return true;            /* found one; don't iterate any more */

    return false;               /* keep going */
}

bool SlruScanDirectory ( SlruCtl  ctl,
SlruScanCallback  callback,
void *  data 
)

Definition at line 1236 of file slru.c.

References AllocateDir(), callback(), dirent::d_name, DEBUG2, SlruCtlData::Dir, elog, FreeDir(), NULL, and ReadDir().

Referenced by AsyncShmemInit(), SimpleLruTruncate(), TruncateCLOG(), and TruncateMultiXact().

{
    bool        retval = false;
    DIR        *cldir;
    struct dirent *clde;
    int         segno;
    int         segpage;

    cldir = AllocateDir(ctl->Dir);
    while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
    {
        if (strlen(clde->d_name) == 4 &&
            strspn(clde->d_name, "0123456789ABCDEF") == 4)
        {
            segno = (int) strtol(clde->d_name, NULL, 16);
            segpage = segno * SLRU_PAGES_PER_SEGMENT;

            elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
                 ctl->Dir, clde->d_name);
            retval = callback(ctl, clde->d_name, segpage, data);
            if (retval)
                break;
        }
    }
    FreeDir(cldir);

    return retval;
}

static int SlruSelectLRUPage ( SlruCtl  ctl,
int  pageno 
) [static]

Definition at line 897 of file slru.c.

References SlruSharedData::cur_lru_count, SlruSharedData::latest_page_number, NULL, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::PagePrecedes, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, and SlruInternalWritePage().

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

{
    SlruShared  shared = ctl->shared;

    /* Outer loop handles restart after I/O */
    for (;;)
    {
        int         slotno;
        int         cur_count;
        int         bestvalidslot = 0;  /* keep compiler quiet */
        int         best_valid_delta = -1;
        int         best_valid_page_number = 0; /* keep compiler quiet */
        int         bestinvalidslot = 0;        /* keep compiler quiet */
        int         best_invalid_delta = -1;
        int         best_invalid_page_number = 0;       /* keep compiler quiet */

        /* See if page already has a buffer assigned */
        for (slotno = 0; slotno < shared->num_slots; slotno++)
        {
            if (shared->page_number[slotno] == pageno &&
                shared->page_status[slotno] != SLRU_PAGE_EMPTY)
                return slotno;
        }

        /*
         * If we find any EMPTY slot, just select that one. Else choose a
         * victim page to replace.  We normally take the least recently used
         * valid page, but we will never take the slot containing
         * latest_page_number, even if it appears least recently used.  We
         * will select a slot that is already I/O busy only if there is no
         * other choice: a read-busy slot will not be least recently used once
         * the read finishes, and waiting for an I/O on a write-busy slot is
         * inferior to just picking some other slot.  Testing shows the slot
         * we pick instead will often be clean, allowing us to begin a read at
         * once.
         *
         * Normally the page_lru_count values will all be different and so
         * there will be a well-defined LRU page.  But since we allow
         * concurrent execution of SlruRecentlyUsed() within
         * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
         * acquire the same lru_count values.  In that case we break ties by
         * choosing the furthest-back page.
         *
         * Notice that this next line forcibly advances cur_lru_count to a
         * value that is certainly beyond any value that will be in the
         * page_lru_count array after the loop finishes.  This ensures that
         * the next execution of SlruRecentlyUsed will mark the page newly
         * used, even if it's for a page that has the current counter value.
         * That gets us back on the path to having good data when there are
         * multiple pages with the same lru_count.
         */
        cur_count = (shared->cur_lru_count)++;
        for (slotno = 0; slotno < shared->num_slots; slotno++)
        {
            int         this_delta;
            int         this_page_number;

            if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
                return slotno;
            this_delta = cur_count - shared->page_lru_count[slotno];
            if (this_delta < 0)
            {
                /*
                 * Clean up in case shared updates have caused cur_count
                 * increments to get "lost".  We back off the page counts,
                 * rather than trying to increase cur_count, to avoid any
                 * question of infinite loops or failure in the presence of
                 * wrapped-around counts.
                 */
                shared->page_lru_count[slotno] = cur_count;
                this_delta = 0;
            }
            this_page_number = shared->page_number[slotno];
            if (this_page_number == shared->latest_page_number)
                continue;
            if (shared->page_status[slotno] == SLRU_PAGE_VALID)
            {
                if (this_delta > best_valid_delta ||
                    (this_delta == best_valid_delta &&
                     ctl->PagePrecedes(this_page_number,
                                       best_valid_page_number)))
                {
                    bestvalidslot = slotno;
                    best_valid_delta = this_delta;
                    best_valid_page_number = this_page_number;
                }
            }
            else
            {
                if (this_delta > best_invalid_delta ||
                    (this_delta == best_invalid_delta &&
                     ctl->PagePrecedes(this_page_number,
                                       best_invalid_page_number)))
                {
                    bestinvalidslot = slotno;
                    best_invalid_delta = this_delta;
                    best_invalid_page_number = this_page_number;
                }
            }
        }

        /*
         * If all pages (except possibly the latest one) are I/O busy, we'll
         * have to wait for an I/O to complete and then retry.  In that
         * unhappy case, we choose to wait for the I/O on the least recently
         * used slot, on the assumption that it was likely initiated first of
         * all the I/Os in progress and may therefore finish first.
         */
        if (best_valid_delta < 0)
        {
            SimpleLruWaitIO(ctl, bestinvalidslot);
            continue;
        }

        /*
         * If the selected page is clean, we're set.
         */
        if (!shared->page_dirty[bestvalidslot])
            return bestvalidslot;

        /*
         * Write the page.
         */
        SlruInternalWritePage(ctl, bestvalidslot, NULL);

        /*
         * Now loop back and try again.  This is the easiest way of dealing
         * with corner cases such as the victim page being re-dirtied while we
         * wrote it.
         */
    }
}


Variable Documentation

int slru_errno [static]