Header And Logo

PostgreSQL
| The world's most advanced open source database.

buf_internals.h

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * buf_internals.h
00004  *    Internal definitions for buffer manager and the buffer replacement
00005  *    strategy.
00006  *
00007  *
00008  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00009  * Portions Copyright (c) 1994, Regents of the University of California
00010  *
00011  * src/include/storage/buf_internals.h
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 #ifndef BUFMGR_INTERNALS_H
00016 #define BUFMGR_INTERNALS_H
00017 
00018 #include "storage/buf.h"
00019 #include "storage/latch.h"
00020 #include "storage/lwlock.h"
00021 #include "storage/shmem.h"
00022 #include "storage/smgr.h"
00023 #include "storage/spin.h"
00024 #include "utils/relcache.h"
00025 
00026 
00027 /*
00028  * Flags for buffer descriptors
00029  *
00030  * Note: TAG_VALID essentially means that there is a buffer hashtable
00031  * entry associated with the buffer's tag.
00032  */
00033 #define BM_DIRTY                (1 << 0)        /* data needs writing */
00034 #define BM_VALID                (1 << 1)        /* data is valid */
00035 #define BM_TAG_VALID            (1 << 2)        /* tag is assigned */
00036 #define BM_IO_IN_PROGRESS       (1 << 3)        /* read or write in progress */
00037 #define BM_IO_ERROR             (1 << 4)        /* previous I/O failed */
00038 #define BM_JUST_DIRTIED         (1 << 5)        /* dirtied since write started */
00039 #define BM_PIN_COUNT_WAITER     (1 << 6)        /* have waiter for sole pin */
00040 #define BM_CHECKPOINT_NEEDED    (1 << 7)        /* must write for checkpoint */
00041 #define BM_PERMANENT            (1 << 8)        /* permanent relation (not
00042                                                  * unlogged) */
00043 
00044 typedef bits16 BufFlags;
00045 
00046 /*
00047  * The maximum allowed value of usage_count represents a tradeoff between
00048  * accuracy and speed of the clock-sweep buffer management algorithm.  A
00049  * large value (comparable to NBuffers) would approximate LRU semantics.
00050  * But it can take as many as BM_MAX_USAGE_COUNT+1 complete cycles of
00051  * clock sweeps to find a free buffer, so in practice we don't want the
00052  * value to be very large.
00053  */
00054 #define BM_MAX_USAGE_COUNT  5
00055 
00056 /*
00057  * Buffer tag identifies which disk block the buffer contains.
00058  *
00059  * Note: the BufferTag data must be sufficient to determine where to write the
00060  * block, without reference to pg_class or pg_tablespace entries.  It's
00061  * possible that the backend flushing the buffer doesn't even believe the
00062  * relation is visible yet (its xact may have started before the xact that
00063  * created the rel).  The storage manager must be able to cope anyway.
00064  *
00065  * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
00066  * to be fixed to zero them, since this struct is used as a hash key.
00067  */
00068 typedef struct buftag
00069 {
00070     RelFileNode rnode;          /* physical relation identifier */
00071     ForkNumber  forkNum;
00072     BlockNumber blockNum;       /* blknum relative to begin of reln */
00073 } BufferTag;
00074 
00075 #define CLEAR_BUFFERTAG(a) \
00076 ( \
00077     (a).rnode.spcNode = InvalidOid, \
00078     (a).rnode.dbNode = InvalidOid, \
00079     (a).rnode.relNode = InvalidOid, \
00080     (a).forkNum = InvalidForkNumber, \
00081     (a).blockNum = InvalidBlockNumber \
00082 )
00083 
00084 #define INIT_BUFFERTAG(a,xx_rnode,xx_forkNum,xx_blockNum) \
00085 ( \
00086     (a).rnode = (xx_rnode), \
00087     (a).forkNum = (xx_forkNum), \
00088     (a).blockNum = (xx_blockNum) \
00089 )
00090 
00091 #define BUFFERTAGS_EQUAL(a,b) \
00092 ( \
00093     RelFileNodeEquals((a).rnode, (b).rnode) && \
00094     (a).blockNum == (b).blockNum && \
00095     (a).forkNum == (b).forkNum \
00096 )
00097 
00098 /*
00099  * The shared buffer mapping table is partitioned to reduce contention.
00100  * To determine which partition lock a given tag requires, compute the tag's
00101  * hash code with BufTableHashCode(), then apply BufMappingPartitionLock().
00102  * NB: NUM_BUFFER_PARTITIONS must be a power of 2!
00103  */
00104 #define BufTableHashPartition(hashcode) \
00105     ((hashcode) % NUM_BUFFER_PARTITIONS)
00106 #define BufMappingPartitionLock(hashcode) \
00107     ((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode)))
00108 
00109 /*
00110  *  BufferDesc -- shared descriptor/state data for a single shared buffer.
00111  *
00112  * Note: buf_hdr_lock must be held to examine or change the tag, flags,
00113  * usage_count, refcount, or wait_backend_pid fields.  buf_id field never
00114  * changes after initialization, so does not need locking.  freeNext is
00115  * protected by the BufFreelistLock not buf_hdr_lock.  The LWLocks can take
00116  * care of themselves.  The buf_hdr_lock is *not* used to control access to
00117  * the data in the buffer!
00118  *
00119  * An exception is that if we have the buffer pinned, its tag can't change
00120  * underneath us, so we can examine the tag without locking the spinlock.
00121  * Also, in places we do one-time reads of the flags without bothering to
00122  * lock the spinlock; this is generally for situations where we don't expect
00123  * the flag bit being tested to be changing.
00124  *
00125  * We can't physically remove items from a disk page if another backend has
00126  * the buffer pinned.  Hence, a backend may need to wait for all other pins
00127  * to go away.  This is signaled by storing its own PID into
00128  * wait_backend_pid and setting flag bit BM_PIN_COUNT_WAITER.  At present,
00129  * there can be only one such waiter per buffer.
00130  *
00131  * We use this same struct for local buffer headers, but the lock fields
00132  * are not used and not all of the flag bits are useful either.
00133  */
00134 typedef struct sbufdesc
00135 {
00136     BufferTag   tag;            /* ID of page contained in buffer */
00137     BufFlags    flags;          /* see bit definitions above */
00138     uint16      usage_count;    /* usage counter for clock sweep code */
00139     unsigned    refcount;       /* # of backends holding pins on buffer */
00140     int         wait_backend_pid;       /* backend PID of pin-count waiter */
00141 
00142     slock_t     buf_hdr_lock;   /* protects the above fields */
00143 
00144     int         buf_id;         /* buffer's index number (from 0) */
00145     int         freeNext;       /* link in freelist chain */
00146 
00147     LWLockId    io_in_progress_lock;    /* to wait for I/O to complete */
00148     LWLockId    content_lock;   /* to lock access to buffer contents */
00149 } BufferDesc;
00150 
00151 #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
00152 
00153 /*
00154  * The freeNext field is either the index of the next freelist entry,
00155  * or one of these special values:
00156  */
00157 #define FREENEXT_END_OF_LIST    (-1)
00158 #define FREENEXT_NOT_IN_LIST    (-2)
00159 
00160 /*
00161  * Macros for acquiring/releasing a shared buffer header's spinlock.
00162  * Do not apply these to local buffers!
00163  *
00164  * Note: as a general coding rule, if you are using these then you probably
00165  * need to be using a volatile-qualified pointer to the buffer header, to
00166  * ensure that the compiler doesn't rearrange accesses to the header to
00167  * occur before or after the spinlock is acquired/released.
00168  */
00169 #define LockBufHdr(bufHdr)      SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
00170 #define UnlockBufHdr(bufHdr)    SpinLockRelease(&(bufHdr)->buf_hdr_lock)
00171 
00172 
00173 /* in buf_init.c */
00174 extern PGDLLIMPORT BufferDesc *BufferDescriptors;
00175 
00176 /* in localbuf.c */
00177 extern BufferDesc *LocalBufferDescriptors;
00178 
00179 
00180 /*
00181  * Internal routines: only called by bufmgr
00182  */
00183 
00184 /* freelist.c */
00185 extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
00186                   bool *lock_held);
00187 extern void StrategyFreeBuffer(volatile BufferDesc *buf);
00188 extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
00189                      volatile BufferDesc *buf);
00190 
00191 extern int  StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
00192 extern void StrategyNotifyBgWriter(Latch *bgwriterLatch);
00193 
00194 extern Size StrategyShmemSize(void);
00195 extern void StrategyInitialize(bool init);
00196 
00197 /* buf_table.c */
00198 extern Size BufTableShmemSize(int size);
00199 extern void InitBufTable(int size);
00200 extern uint32 BufTableHashCode(BufferTag *tagPtr);
00201 extern int  BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
00202 extern int  BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
00203 extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
00204 
00205 /* localbuf.c */
00206 extern void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
00207                     BlockNumber blockNum);
00208 extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
00209                  BlockNumber blockNum, bool *foundPtr);
00210 extern void MarkLocalBufferDirty(Buffer buffer);
00211 extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
00212                             BlockNumber firstDelBlock);
00213 extern void DropRelFileNodeAllLocalBuffers(RelFileNode rnode);
00214 extern void AtEOXact_LocalBuffers(bool isCommit);
00215 
00216 #endif   /* BUFMGR_INTERNALS_H */