Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

log.h

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: log.h,v 12.12 2005/10/20 18:57:05 bostic Exp $
00008  */
00009 
00010 #ifndef _LOG_H_
00011 #define _LOG_H_
00012 
00013 /*******************************************************
00014  * DBREG:
00015  *      The DB file register code keeps track of open files.  It's stored
00016  *      in the log subsystem's shared region, and so appears in the log.h
00017  *      header file, but is logically separate.
00018  *******************************************************/
00019 /*
00020  * The per-process table that maps log file-id's to DB structures.
00021  */
00022 typedef struct __db_entry {
00023         DB      *dbp;                   /* Open dbp for this file id. */
00024         int     deleted;                /* File was not found during open. */
00025 } DB_ENTRY;
00026 
00027 /*
00028  * FNAME --
00029  *      File name and id.
00030  */
00031 struct __fname {
00032         SH_TAILQ_ENTRY q;               /* File name queue. */
00033 
00034         int32_t   id;                   /* Logging file id. */
00035         DBTYPE    s_type;               /* Saved DB type. */
00036 
00037         roff_t    name_off;             /* Name offset. */
00038         db_pgno_t meta_pgno;            /* Page number of the meta page. */
00039         u_int8_t  ufid[DB_FILE_ID_LEN]; /* Unique file id. */
00040 
00041         u_int32_t create_txnid;         /*
00042                                          * Txn ID of the DB create, stored so
00043                                          * we can log it at register time.
00044                                          */
00045 #define DB_FNAME_NOTLOGGED      0x01    /* Log of close failed. */
00046 #define DB_FNAME_DURABLE        0x02    /* File is durable. */
00047         u_int32_t flags;
00048 };
00049 
00050 /* File open/close register log record opcodes. */
00051 #define DBREG_CHKPNT    1               /* Checkpoint: file name/id dump. */
00052 #define DBREG_CLOSE     2               /* File close. */
00053 #define DBREG_OPEN      3               /* File open. */
00054 #define DBREG_PREOPEN   4               /* Open in mpool only. */
00055 #define DBREG_RCLOSE    5               /* File close after recovery. */
00056 #define DBREG_REOPEN    6               /* Open for in-memory database. */
00057 
00058 /*******************************************************
00059  * LOG:
00060  *      The log subsystem information.
00061  *******************************************************/
00062 struct __db_log;        typedef struct __db_log DB_LOG;
00063 struct __hdr;           typedef struct __hdr HDR;
00064 struct __log;           typedef struct __log LOG;
00065 struct __log_persist;   typedef struct __log_persist LOGP;
00066 
00067 #define LFPREFIX        "log."          /* Log file name prefix. */
00068 #define LFNAME          "log.%010d"     /* Log file name template. */
00069 #define LFNAME_V1       "log.%05d"      /* Log file name template, rev 1. */
00070 
00071 #define LG_MAX_DEFAULT          (10 * MEGABYTE) /* 10 MB. */
00072 #define LG_MAX_INMEM            (256 * 1024)    /* 256 KB. */
00073 #define LG_BSIZE_DEFAULT        (32 * 1024)     /* 32 KB. */
00074 #define LG_BSIZE_INMEM          (1 * MEGABYTE)  /* 1 MB. */
00075 #define LG_BASE_REGION_SIZE     (60 * 1024)     /* 60 KB. */
00076 
00077 /*
00078  * DB_LOG
00079  *      Per-process log structure.
00080  */
00081 struct __db_log {
00082         /*
00083          * These fields need to be protected for multi-threaded support.
00084          */
00085         db_mutex_t mtx_dbreg;           /* Mutex for thread protection. */
00086 
00087         DB_ENTRY *dbentry;              /* Recovery file-id mapping. */
00088 #define DB_GROW_SIZE    64
00089         int32_t dbentry_cnt;            /* Entries.  Grows by DB_GROW_SIZE. */
00090 
00091         /*
00092          * These fields are only accessed when the region lock is held, so
00093          * they do not have to be protected by the thread lock as well.
00094          */
00095         u_int32_t lfname;               /* Log file "name". */
00096         DB_FH    *lfhp;                 /* Log file handle. */
00097 
00098         u_int8_t *bufp;                 /* Region buffer. */
00099 
00100         /* These fields are not thread protected. */
00101         DB_ENV   *dbenv;                /* Reference to error information. */
00102         REGINFO   reginfo;              /* Region information. */
00103 
00104 #define DBLOG_RECOVER           0x01    /* We are in recovery. */
00105 #define DBLOG_FORCE_OPEN        0x02    /* Force the DB open even if it appears
00106                                          * to be deleted. */
00107         u_int32_t flags;
00108 };
00109 
00110 /*
00111  * HDR --
00112  *      Log record header.
00113  */
00114 struct __hdr {
00115         u_int32_t prev;                 /* Previous offset. */
00116         u_int32_t len;                  /* Current length. */
00117         u_int8_t  chksum[DB_MAC_KEY];   /* Current checksum. */
00118         u_int8_t  iv[DB_IV_BYTES];      /* IV */
00119         u_int32_t orig_size;            /* Original size of log record */
00120         /* !!! - 'size' is not written to log, must be last in hdr */
00121         size_t    size;                 /* Size of header to use */
00122 };
00123 
00124 /*
00125  * We use HDR internally, and then when we write out, we write out
00126  * prev, len, and then a 4-byte checksum if normal operation or
00127  * a crypto-checksum and IV and original size if running in crypto
00128  * mode.  We must store the original size in case we pad.  Set the
00129  * size when we set up the header.  We compute a DB_MAC_KEY size
00130  * checksum regardless, but we can safely just use the first 4 bytes.
00131  */
00132 #define HDR_NORMAL_SZ   12
00133 #define HDR_CRYPTO_SZ   12 + DB_MAC_KEY + DB_IV_BYTES
00134 
00135 struct __log_persist {
00136         u_int32_t magic;                /* DB_LOGMAGIC */
00137         u_int32_t version;              /* DB_LOGVERSION */
00138 
00139         u_int32_t log_size;             /* Log file size. */
00140         u_int32_t notused;              /* Historically the log file mode. */
00141 };
00142 
00143 /* Macros to lock/unlock the log region as a whole. */
00144 #define LOG_SYSTEM_LOCK(dbenv)                                          \
00145         MUTEX_LOCK(dbenv, ((LOG *)((DB_LOG *)                           \
00146             (dbenv)->lg_handle)->reginfo.primary)->mtx_region)
00147 #define LOG_SYSTEM_UNLOCK(dbenv)                                        \
00148         MUTEX_UNLOCK(dbenv, ((LOG *)((DB_LOG *)                         \
00149             (dbenv)->lg_handle)->reginfo.primary)->mtx_region)
00150 
00151 /*
00152  * LOG --
00153  *      Shared log region.  One of these is allocated in shared memory,
00154  *      and describes the log.
00155  */
00156 struct __log {
00157         db_mutex_t mtx_region;          /* Region mutex. */
00158 
00159         db_mutex_t mtx_filelist;        /* Mutex guarding file name list. */
00160 
00161         LOGP    persist;                /* Persistent information. */
00162 
00163         SH_TAILQ_HEAD(__fq1) fq;        /* List of file names. */
00164         int32_t fid_max;                /* Max fid allocated. */
00165         roff_t  free_fid_stack;         /* Stack of free file ids. */
00166         u_int   free_fids;              /* Height of free fid stack. */
00167         u_int   free_fids_alloced;      /* N free fid slots allocated. */
00168 
00169         /*
00170          * The lsn LSN is the file offset that we're about to write and which
00171          * we will return to the user.
00172          */
00173         DB_LSN    lsn;                  /* LSN at current file offset. */
00174 
00175         /*
00176          * The f_lsn LSN is the LSN (returned to the user) that "owns" the
00177          * first byte of the buffer.  If the record associated with the LSN
00178          * spans buffers, it may not reflect the physical file location of
00179          * the first byte of the buffer.
00180          */
00181         DB_LSN    f_lsn;                /* LSN of first byte in the buffer. */
00182         size_t    b_off;                /* Current offset in the buffer. */
00183         u_int32_t w_off;                /* Current write offset in the file. */
00184         u_int32_t len;                  /* Length of the last record. */
00185 
00186         DB_LSN    active_lsn;           /* Oldest active LSN in the buffer. */
00187         size_t    a_off;                /* Offset in the buffer of first active
00188                                            file. */
00189 
00190         /*
00191          * The s_lsn LSN is the last LSN that we know is on disk, not just
00192          * written, but synced.  This field is protected by the flush mutex
00193          * rather than by the region mutex.
00194          */
00195         db_mutex_t mtx_flush;           /* Mutex guarding flushing. */
00196         int        in_flush;            /* Log flush in progress. */
00197         DB_LSN     s_lsn;               /* LSN of the last sync. */
00198 
00199         DB_LOG_STAT stat;               /* Log statistics. */
00200 
00201         /*
00202          * !!!
00203          * NOTE: the next 11 fields, waiting_lsn, verify_lsn, max_wait_lsn,
00204          * maxperm_lsn, wait_recs, rcvd_recs, ready_lsn and bulk_* are NOT
00205          * protected by the log region lock.  They are protected by
00206          * REP->mtx_clientdb.  If you need access to both, you must acquire
00207          * REP->mtx_clientdb before acquiring the log region lock.
00208          *
00209          * The waiting_lsn is used by the replication system.  It is the
00210          * first LSN that we are holding without putting in the log, because
00211          * we received one or more log records out of order.  Associated with
00212          * the waiting_lsn is the number of log records that we still have to
00213          * receive before we decide that we should request it again.
00214          *
00215          * The max_wait_lsn is used to control retransmission in the face
00216          * of dropped messages.  If we are requesting all records from the
00217          * current gap (i.e., chunk of the log that we are missing), then
00218          * the max_wait_lsn contains the first LSN that we are known to have
00219          * in the __db.rep.db.  If we requested only a single record, then
00220          * the max_wait_lsn has the LSN of that record we requested.
00221          */
00222         DB_LSN    waiting_lsn;          /* First log record after a gap. */
00223         DB_LSN    verify_lsn;           /* LSN we are waiting to verify. */
00224         DB_LSN    max_wait_lsn;         /* Maximum LSN requested. */
00225         DB_LSN    max_perm_lsn;         /* Maximum PERMANENT LSN processed. */
00226         u_int32_t wait_recs;            /* Records to wait before requesting. */
00227         u_int32_t rcvd_recs;            /* Records received while waiting. */
00228         /*
00229          * The ready_lsn is also used by the replication system.  It is the
00230          * next LSN we expect to receive.  It's normally equal to "lsn",
00231          * except at the beginning of a log file, at which point it's set
00232          * to the LSN of the first record of the new file (after the
00233          * header), rather than to 0.
00234          */
00235         DB_LSN    ready_lsn;
00236         /*
00237          * The bulk_buf is used by replication for bulk transfer.  While this
00238          * is protected by REP->mtx_clientdb, this doesn't contend with the
00239          * above fields because the above are used by clients and the bulk
00240          * fields below are used by a master.
00241          */
00242         roff_t    bulk_buf;             /* Bulk transfer buffer in region. */
00243         uintptr_t bulk_off;             /* Current offset into bulk buffer. */
00244         u_int32_t bulk_len;             /* Length of buffer. */
00245         u_int32_t bulk_flags;           /* Bulk buffer flags. */
00246 
00247         /*
00248          * During initialization, the log system walks forward through the
00249          * last log file to find its end.  If it runs into a checkpoint
00250          * while it's doing so, it caches it here so that the transaction
00251          * system doesn't need to walk through the file again on its
00252          * initialization.
00253          */
00254         DB_LSN  cached_ckp_lsn;
00255 
00256         u_int32_t regionmax;            /* Configured size of the region. */
00257 
00258         roff_t    buffer_off;           /* Log buffer offset in the region. */
00259         u_int32_t buffer_size;          /* Log buffer size. */
00260 
00261         u_int32_t log_size;             /* Log file's size. */
00262         u_int32_t log_nsize;            /* Next log file's size. */
00263 
00264         int       filemode;             /* Log file permissions mode. */
00265 
00266         /*
00267          * DB_LOG_AUTOREMOVE and DB_LOG_INMEMORY: not protected by a mutex,
00268          * all we care about is if they're zero or non-zero.
00269          */
00270         int       db_log_autoremove;
00271         int       db_log_inmemory;
00272 
00273         u_int32_t ncommit;              /* Number of txns waiting to commit. */
00274         DB_LSN    t_lsn;                /* LSN of first commit */
00275         SH_TAILQ_HEAD(__commit) commits;/* list of txns waiting to commit. */
00276         SH_TAILQ_HEAD(__free) free_commits;/* free list of commit structs. */
00277 
00278         /*
00279          * In-memory logs maintain a list of the start positions of all log
00280          * files currently active in the in-memory buffer.  This is to make the
00281          * lookup from LSN to log buffer offset efficient.
00282          */
00283         SH_TAILQ_HEAD(__logfile) logfiles;
00284         SH_TAILQ_HEAD(__free_logfile) free_logfiles;
00285 };
00286 
00287 /*
00288  * __db_commit structure --
00289  *      One of these is allocated for each transaction waiting to commit.
00290  */
00291 struct __db_commit {
00292         db_mutex_t      mtx_txnwait;    /* Mutex for txn to wait on. */
00293         DB_LSN          lsn;            /* LSN of commit record. */
00294         SH_TAILQ_ENTRY  links;          /* Either on free or waiting list. */
00295 
00296 #define DB_COMMIT_FLUSH         0x0001  /* Flush the log when you wake up. */
00297         u_int32_t       flags;
00298 };
00299 
00300 /*
00301  * Check for the proper progression of Log Sequence Numbers.
00302  * If we are rolling forward the LSN on the page must be greater
00303  * than or equal to the previous LSN in log record.
00304  * We ignore NOT LOGGED LSNs.  The user did an unlogged update.
00305  * We should eventually see a log record that matches and continue
00306  * forward.
00307  * If truncate is supported then a ZERO LSN implies a page that was
00308  * allocated prior to the recovery start pont and then truncated
00309  * later in the log.  An allocation of a page after this
00310  * page will extend the file, leaving a hole.  We want to
00311  * ignore this page until it is truncated again.
00312  *
00313  */
00314 
00315 #ifdef HAVE_FTRUNCATE
00316 #define CHECK_LSN(e, redo, cmp, lsn, prev)                              \
00317         if (DB_REDO(redo) && (cmp) < 0 &&                               \
00318             ((!IS_NOT_LOGGED_LSN(*(lsn)) && !IS_ZERO_LSN(*(lsn))) ||    \
00319             IS_REP_CLIENT(e))) {                                        \
00320                 ret = __db_check_lsn(dbenv, lsn, prev);                 \
00321                 goto out;                                               \
00322         }
00323 #else
00324 #define CHECK_LSN(e, redo, cmp, lsn, prev)                              \
00325         if (DB_REDO(redo) && (cmp) < 0 &&                               \
00326             (!IS_NOT_LOGGED_LSN(*(lsn)) || IS_REP_CLIENT(e))) {         \
00327                 ret = __db_check_lsn(dbenv, lsn, prev);                 \
00328                 goto out;                                               \
00329         }
00330 #endif
00331 
00332 /*
00333  * Helper for in-memory logs -- check whether an offset is in range
00334  * in a ring buffer (inclusive of start, exclusive of end).
00335  */
00336 struct __db_filestart {
00337         u_int32_t       file;
00338         size_t          b_off;
00339 
00340         SH_TAILQ_ENTRY  links;          /* Either on free or waiting list. */
00341 };
00342 
00343 #define RINGBUF_LEN(lp, start, end)                                     \
00344         ((start) < (end) ?                                              \
00345             (end) - (start) : (lp)->buffer_size - ((start) - (end)))
00346 
00347 /*
00348  * Internal macro to set pointer to the begin_lsn for generated
00349  * logging routines.  If begin_lsn is already set then do nothing.
00350  * Return a pointer to the last lsn too.
00351  */
00352 #undef DB_SET_TXN_LSNP
00353 #define DB_SET_TXN_LSNP(txn, blsnp, llsnp) do {                         \
00354         DB_LSN *__lsnp;                                                 \
00355         TXN_DETAIL *__td;                                               \
00356         __td = (txn)->td;                                               \
00357         *(llsnp) = &__td->last_lsn;                                     \
00358         while (__td->parent != INVALID_ROFF)                            \
00359                 __td = R_ADDR(&(txn)->mgrp->reginfo, __td->parent);     \
00360         __lsnp = &__td->begin_lsn;                                      \
00361         if (IS_ZERO_LSN(*__lsnp))                                       \
00362                 *(blsnp) = __lsnp;                                      \
00363 } while (0)
00364 
00365 /*
00366  * These are used in __log_backup to determine which LSN in the
00367  * checkpoint record to compare and return.
00368  */
00369 #define CKPLSN_CMP      0
00370 #define LASTCKP_CMP     1
00371 
00372 /*
00373  * Status codes indicating the validity of a log file examined by
00374  * __log_valid().
00375  */
00376 typedef enum {
00377         DB_LV_INCOMPLETE,
00378         DB_LV_NONEXISTENT,
00379         DB_LV_NORMAL,
00380         DB_LV_OLD_READABLE,
00381         DB_LV_OLD_UNREADABLE
00382 } logfile_validity;
00383 
00384 #include "dbinc_auto/dbreg_auto.h"
00385 #include "dbinc_auto/dbreg_ext.h"
00386 #include "dbinc_auto/log_ext.h"
00387 #endif /* !_LOG_H_ */

Generated on Sun Dec 25 12:14:22 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2