Header And Logo

PostgreSQL
| The world's most advanced open source database.

xlog.h

Go to the documentation of this file.
00001 /*
00002  * xlog.h
00003  *
00004  * PostgreSQL transaction log manager
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  * src/include/access/xlog.h
00010  */
00011 #ifndef XLOG_H
00012 #define XLOG_H
00013 
00014 #include "access/rmgr.h"
00015 #include "access/xlogdefs.h"
00016 #include "datatype/timestamp.h"
00017 #include "lib/stringinfo.h"
00018 #include "storage/buf.h"
00019 #include "utils/pg_crc.h"
00020 
00021 /*
00022  * The overall layout of an XLOG record is:
00023  *      Fixed-size header (XLogRecord struct)
00024  *      rmgr-specific data
00025  *      BkpBlock
00026  *      backup block data
00027  *      BkpBlock
00028  *      backup block data
00029  *      ...
00030  *
00031  * where there can be zero to four backup blocks (as signaled by xl_info flag
00032  * bits).  XLogRecord structs always start on MAXALIGN boundaries in the WAL
00033  * files, and we round up SizeOfXLogRecord so that the rmgr data is also
00034  * guaranteed to begin on a MAXALIGN boundary.  However, no padding is added
00035  * to align BkpBlock structs or backup block data.
00036  *
00037  * NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
00038  * and also not any backup blocks.  xl_tot_len counts everything.  Neither
00039  * length field is rounded up to an alignment boundary.
00040  */
00041 typedef struct XLogRecord
00042 {
00043     uint32      xl_tot_len;     /* total len of entire record */
00044     TransactionId xl_xid;       /* xact id */
00045     uint32      xl_len;         /* total len of rmgr data */
00046     uint8       xl_info;        /* flag bits, see below */
00047     RmgrId      xl_rmid;        /* resource manager for this record */
00048     /* 2 bytes of padding here, initialize to zero */
00049     XLogRecPtr  xl_prev;        /* ptr to previous record in log */
00050     pg_crc32    xl_crc;         /* CRC for this record */
00051 
00052     /* If MAXALIGN==8, there are 4 wasted bytes here */
00053 
00054     /* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
00055 
00056 } XLogRecord;
00057 
00058 #define SizeOfXLogRecord    MAXALIGN(sizeof(XLogRecord))
00059 
00060 #define XLogRecGetData(record)  ((char*) (record) + SizeOfXLogRecord)
00061 
00062 /*
00063  * XLOG uses only low 4 bits of xl_info.  High 4 bits may be used by rmgr.
00064  */
00065 #define XLR_INFO_MASK           0x0F
00066 
00067 /*
00068  * If we backed up any disk blocks with the XLOG record, we use flag bits in
00069  * xl_info to signal it.  We support backup of up to 4 disk blocks per XLOG
00070  * record.
00071  */
00072 #define XLR_BKP_BLOCK_MASK      0x0F    /* all info bits used for bkp blocks */
00073 #define XLR_MAX_BKP_BLOCKS      4
00074 #define XLR_BKP_BLOCK(iblk)     (0x08 >> (iblk))        /* iblk in 0..3 */
00075 
00076 /* Sync methods */
00077 #define SYNC_METHOD_FSYNC       0
00078 #define SYNC_METHOD_FDATASYNC   1
00079 #define SYNC_METHOD_OPEN        2       /* for O_SYNC */
00080 #define SYNC_METHOD_FSYNC_WRITETHROUGH  3
00081 #define SYNC_METHOD_OPEN_DSYNC  4       /* for O_DSYNC */
00082 extern int  sync_method;
00083 
00084 /*
00085  * The rmgr data to be written by XLogInsert() is defined by a chain of
00086  * one or more XLogRecData structs.  (Multiple structs would be used when
00087  * parts of the source data aren't physically adjacent in memory, or when
00088  * multiple associated buffers need to be specified.)
00089  *
00090  * If buffer is valid then XLOG will check if buffer must be backed up
00091  * (ie, whether this is first change of that page since last checkpoint).
00092  * If so, the whole page contents are attached to the XLOG record, and XLOG
00093  * sets XLR_BKP_BLOCK(N) bit in xl_info.  Note that the buffer must be pinned
00094  * and exclusive-locked by the caller, so that it won't change under us.
00095  * NB: when the buffer is backed up, we DO NOT insert the data pointed to by
00096  * this XLogRecData struct into the XLOG record, since we assume it's present
00097  * in the buffer.  Therefore, rmgr redo routines MUST pay attention to
00098  * XLR_BKP_BLOCK(N) to know what is actually stored in the XLOG record.
00099  * The N'th XLR_BKP_BLOCK bit corresponds to the N'th distinct buffer
00100  * value (ignoring InvalidBuffer) appearing in the rdata chain.
00101  *
00102  * When buffer is valid, caller must set buffer_std to indicate whether the
00103  * page uses standard pd_lower/pd_upper header fields.  If this is true, then
00104  * XLOG is allowed to omit the free space between pd_lower and pd_upper from
00105  * the backed-up page image.  Note that even when buffer_std is false, the
00106  * page MUST have an LSN field as its first eight bytes!
00107  *
00108  * Note: data can be NULL to indicate no rmgr data associated with this chain
00109  * entry.  This can be sensible (ie, not a wasted entry) if buffer is valid.
00110  * The implication is that the buffer has been changed by the operation being
00111  * logged, and so may need to be backed up, but the change can be redone using
00112  * only information already present elsewhere in the XLOG entry.
00113  */
00114 typedef struct XLogRecData
00115 {
00116     char       *data;           /* start of rmgr data to include */
00117     uint32      len;            /* length of rmgr data to include */
00118     Buffer      buffer;         /* buffer associated with data, if any */
00119     bool        buffer_std;     /* buffer has standard pd_lower/pd_upper */
00120     struct XLogRecData *next;   /* next struct in chain, or NULL */
00121 } XLogRecData;
00122 
00123 extern PGDLLIMPORT TimeLineID ThisTimeLineID;   /* current TLI */
00124 
00125 /*
00126  * Prior to 8.4, all activity during recovery was carried out by the startup
00127  * process. This local variable continues to be used in many parts of the
00128  * code to indicate actions taken by RecoveryManagers. Other processes that
00129  * potentially perform work during recovery should check RecoveryInProgress().
00130  * See XLogCtl notes in xlog.c.
00131  */
00132 extern bool InRecovery;
00133 
00134 /*
00135  * Like InRecovery, standbyState is only valid in the startup process.
00136  * In all other processes it will have the value STANDBY_DISABLED (so
00137  * InHotStandby will read as FALSE).
00138  *
00139  * In DISABLED state, we're performing crash recovery or hot standby was
00140  * disabled in postgresql.conf.
00141  *
00142  * In INITIALIZED state, we've run InitRecoveryTransactionEnvironment, but
00143  * we haven't yet processed a RUNNING_XACTS or shutdown-checkpoint WAL record
00144  * to initialize our master-transaction tracking system.
00145  *
00146  * When the transaction tracking is initialized, we enter the SNAPSHOT_PENDING
00147  * state. The tracked information might still be incomplete, so we can't allow
00148  * connections yet, but redo functions must update the in-memory state when
00149  * appropriate.
00150  *
00151  * In SNAPSHOT_READY mode, we have full knowledge of transactions that are
00152  * (or were) running in the master at the current WAL location. Snapshots
00153  * can be taken, and read-only queries can be run.
00154  */
00155 typedef enum
00156 {
00157     STANDBY_DISABLED,
00158     STANDBY_INITIALIZED,
00159     STANDBY_SNAPSHOT_PENDING,
00160     STANDBY_SNAPSHOT_READY
00161 } HotStandbyState;
00162 
00163 extern HotStandbyState standbyState;
00164 
00165 #define InHotStandby (standbyState >= STANDBY_SNAPSHOT_PENDING)
00166 
00167 /*
00168  * Recovery target type.
00169  * Only set during a Point in Time recovery, not when standby_mode = on
00170  */
00171 typedef enum
00172 {
00173     RECOVERY_TARGET_UNSET,
00174     RECOVERY_TARGET_XID,
00175     RECOVERY_TARGET_TIME,
00176     RECOVERY_TARGET_NAME
00177 } RecoveryTargetType;
00178 
00179 extern XLogRecPtr XactLastRecEnd;
00180 
00181 extern bool reachedConsistency;
00182 
00183 /* these variables are GUC parameters related to XLOG */
00184 extern int  CheckPointSegments;
00185 extern int  wal_keep_segments;
00186 extern int  XLOGbuffers;
00187 extern int  XLogArchiveTimeout;
00188 extern bool XLogArchiveMode;
00189 extern char *XLogArchiveCommand;
00190 extern bool EnableHotStandby;
00191 extern bool fullPageWrites;
00192 extern bool log_checkpoints;
00193 
00194 /* WAL levels */
00195 typedef enum WalLevel
00196 {
00197     WAL_LEVEL_MINIMAL = 0,
00198     WAL_LEVEL_ARCHIVE,
00199     WAL_LEVEL_HOT_STANDBY
00200 } WalLevel;
00201 extern int  wal_level;
00202 
00203 #define XLogArchivingActive()   (XLogArchiveMode && wal_level >= WAL_LEVEL_ARCHIVE)
00204 #define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
00205 
00206 /*
00207  * Is WAL-logging necessary for archival or log-shipping, or can we skip
00208  * WAL-logging if we fsync() the data before committing instead?
00209  */
00210 #define XLogIsNeeded() (wal_level >= WAL_LEVEL_ARCHIVE)
00211 
00212 /* Do we need to WAL-log information required only for Hot Standby? */
00213 #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_HOT_STANDBY)
00214 
00215 #ifdef WAL_DEBUG
00216 extern bool XLOG_DEBUG;
00217 #endif
00218 
00219 /*
00220  * OR-able request flag bits for checkpoints.  The "cause" bits are used only
00221  * for logging purposes.  Note: the flags must be defined so that it's
00222  * sensible to OR together request flags arising from different requestors.
00223  */
00224 
00225 /* These directly affect the behavior of CreateCheckPoint and subsidiaries */
00226 #define CHECKPOINT_IS_SHUTDOWN  0x0001  /* Checkpoint is for shutdown */
00227 #define CHECKPOINT_END_OF_RECOVERY  0x0002      /* Like shutdown checkpoint,
00228                                                  * but issued at end of WAL
00229                                                  * recovery */
00230 #define CHECKPOINT_IMMEDIATE    0x0004  /* Do it without delays */
00231 #define CHECKPOINT_FORCE        0x0008  /* Force even if no activity */
00232 /* These are important to RequestCheckpoint */
00233 #define CHECKPOINT_WAIT         0x0010  /* Wait for completion */
00234 /* These indicate the cause of a checkpoint request */
00235 #define CHECKPOINT_CAUSE_XLOG   0x0020  /* XLOG consumption */
00236 #define CHECKPOINT_CAUSE_TIME   0x0040  /* Elapsed time */
00237 
00238 /* Checkpoint statistics */
00239 typedef struct CheckpointStatsData
00240 {
00241     TimestampTz ckpt_start_t;   /* start of checkpoint */
00242     TimestampTz ckpt_write_t;   /* start of flushing buffers */
00243     TimestampTz ckpt_sync_t;    /* start of fsyncs */
00244     TimestampTz ckpt_sync_end_t;    /* end of fsyncs */
00245     TimestampTz ckpt_end_t;     /* end of checkpoint */
00246 
00247     int         ckpt_bufs_written;      /* # of buffers written */
00248 
00249     int         ckpt_segs_added;    /* # of new xlog segments created */
00250     int         ckpt_segs_removed;      /* # of xlog segments deleted */
00251     int         ckpt_segs_recycled;     /* # of xlog segments recycled */
00252 
00253     int         ckpt_sync_rels; /* # of relations synced */
00254     uint64      ckpt_longest_sync;      /* Longest sync for one relation */
00255     uint64      ckpt_agg_sync_time;     /* The sum of all the individual sync
00256                                          * times, which is not necessarily the
00257                                          * same as the total elapsed time for
00258                                          * the entire sync phase. */
00259 } CheckpointStatsData;
00260 
00261 extern CheckpointStatsData CheckpointStats;
00262 
00263 extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
00264 extern void XLogFlush(XLogRecPtr RecPtr);
00265 extern bool XLogBackgroundFlush(void);
00266 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
00267 extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
00268 extern int  XLogFileOpen(XLogSegNo segno);
00269 
00270 extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer);
00271 
00272 extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
00273 extern void XLogSetAsyncXactLSN(XLogRecPtr record);
00274 
00275 extern Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,
00276                    int block_index,
00277                    bool get_cleanup_lock, bool keep_buffer);
00278 
00279 extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
00280 extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);
00281 
00282 extern void issue_xlog_fsync(int fd, XLogSegNo segno);
00283 
00284 extern bool RecoveryInProgress(void);
00285 extern bool HotStandbyActive(void);
00286 extern bool XLogInsertAllowed(void);
00287 extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
00288 extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
00289 extern XLogRecPtr GetXLogInsertRecPtr(void);
00290 extern XLogRecPtr GetXLogWriteRecPtr(void);
00291 extern bool RecoveryIsPaused(void);
00292 extern void SetRecoveryPause(bool recoveryPause);
00293 extern TimestampTz GetLatestXTime(void);
00294 extern TimestampTz GetCurrentChunkReplayStartTime(void);
00295 extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
00296 
00297 extern void UpdateControlFile(void);
00298 extern uint64 GetSystemIdentifier(void);
00299 extern bool DataChecksumsEnabled(void);
00300 extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
00301 extern Size XLOGShmemSize(void);
00302 extern void XLOGShmemInit(void);
00303 extern void BootStrapXLOG(void);
00304 extern void StartupXLOG(void);
00305 extern void ShutdownXLOG(int code, Datum arg);
00306 extern void InitXLOGAccess(void);
00307 extern void CreateCheckPoint(int flags);
00308 extern bool CreateRestartPoint(int flags);
00309 extern void XLogPutNextOid(Oid nextOid);
00310 extern XLogRecPtr XLogRestorePoint(const char *rpName);
00311 extern void UpdateFullPageWrites(void);
00312 extern XLogRecPtr GetRedoRecPtr(void);
00313 extern XLogRecPtr GetInsertRecPtr(void);
00314 extern XLogRecPtr GetFlushRecPtr(void);
00315 extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
00316 
00317 extern bool CheckPromoteSignal(void);
00318 extern void WakeupRecovery(void);
00319 extern void SetWalWriterSleeping(bool sleeping);
00320 
00321 /*
00322  * Starting/stopping a base backup
00323  */
00324 extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
00325                    TimeLineID *starttli_p, char **labelfile);
00326 extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
00327                   TimeLineID *stoptli_p);
00328 extern void do_pg_abort_backup(void);
00329 
00330 /* File path names (all relative to $PGDATA) */
00331 #define BACKUP_LABEL_FILE       "backup_label"
00332 #define BACKUP_LABEL_OLD        "backup_label.old"
00333 
00334 #endif   /* XLOG_H */