Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

log.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: log.c,v 12.15 2005/10/14 15:20:24 bostic Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 
00015 #include <ctype.h>
00016 #include <stdlib.h>
00017 #include <string.h>
00018 #endif
00019 
00020 #include "db_int.h"
00021 #include "dbinc/crypto.h"
00022 #include "dbinc/hmac.h"
00023 #include "dbinc/log.h"
00024 #include "dbinc/txn.h"
00025 
00026 static int      __log_init __P((DB_ENV *, DB_LOG *));
00027 static int      __log_recover __P((DB_LOG *));
00028 static size_t   __log_region_size __P((DB_ENV *));
00029 static int      __log_zero __P((DB_ENV *, DB_LSN *, DB_LSN *));
00030 
00031 /*
00032  * __log_open --
00033  *      Internal version of log_open: only called from DB_ENV->open.
00034  *
00035  * PUBLIC: int __log_open __P((DB_ENV *));
00036  */
00037 int
00038 __log_open(dbenv)
00039         DB_ENV *dbenv;
00040 {
00041         DB_LOG *dblp;
00042         LOG *lp;
00043         u_int8_t *bulk;
00044         int region_locked, ret;
00045 
00046         region_locked = 0;
00047 
00048         /* Create/initialize the DB_LOG structure. */
00049         if ((ret = __os_calloc(dbenv, 1, sizeof(DB_LOG), &dblp)) != 0)
00050                 return (ret);
00051         dblp->dbenv = dbenv;
00052 
00053         /* Set the default buffer size, if not otherwise configured. */
00054         if (dbenv->lg_bsize == 0)
00055                 dbenv->lg_bsize = F_ISSET(dbenv, DB_ENV_LOG_INMEMORY) ?
00056                     LG_BSIZE_INMEM : LG_BSIZE_DEFAULT;
00057 
00058         /* Join/create the log region. */
00059         dblp->reginfo.dbenv = dbenv;
00060         dblp->reginfo.type = REGION_TYPE_LOG;
00061         dblp->reginfo.id = INVALID_REGION_ID;
00062         dblp->reginfo.flags = REGION_JOIN_OK;
00063         if (F_ISSET(dbenv, DB_ENV_CREATE))
00064                 F_SET(&dblp->reginfo, REGION_CREATE_OK);
00065         if ((ret = __db_r_attach(
00066             dbenv, &dblp->reginfo, __log_region_size(dbenv))) != 0)
00067                 goto err;
00068 
00069         /* If we created the region, initialize it. */
00070         if (F_ISSET(&dblp->reginfo, REGION_CREATE))
00071                 if ((ret = __log_init(dbenv, dblp)) != 0)
00072                         goto err;
00073 
00074         /* Set the local addresses. */
00075         lp = dblp->reginfo.primary =
00076             R_ADDR(&dblp->reginfo, dblp->reginfo.rp->primary);
00077         dblp->bufp = R_ADDR(&dblp->reginfo, lp->buffer_off);
00078 
00079         /*
00080          * If the region is threaded, we have to lock the DBREG list, and we
00081          * need to allocate a mutex for that purpose.
00082          */
00083         if ((ret = __mutex_alloc(dbenv,
00084             MTX_LOG_REGION, DB_MUTEX_THREAD, &dblp->mtx_dbreg)) != 0)
00085                 goto err;
00086 
00087         /*
00088          * Set the handle -- we may be about to run recovery, which allocates
00089          * log cursors.  Log cursors require logging be already configured,
00090          * and the handle being set is what demonstrates that.
00091          *
00092          * If we created the region, run recovery.  If that fails, make sure
00093          * we reset the log handle before cleaning up, otherwise we will try
00094          * and clean up again in the mainline DB_ENV initialization code.
00095          */
00096         dbenv->lg_handle = dblp;
00097 
00098         if (F_ISSET(&dblp->reginfo, REGION_CREATE)) {
00099                 /*
00100                  * We first take the log file size from the environment, if
00101                  * specified.  If that wasn't set, default it.  Regardless,
00102                  * recovery may set it from the persistent information in a
00103                  * log file header.
00104                  */
00105                 if (lp->log_size == 0)
00106                         lp->log_size = F_ISSET(dbenv, DB_ENV_LOG_INMEMORY) ?
00107                             LG_MAX_INMEM : LG_MAX_DEFAULT;
00108 
00109                 if ((ret = __log_recover(dblp)) != 0)
00110                         goto err;
00111 
00112                 /*
00113                  * If the next log file size hasn't been set yet, default it
00114                  * to the current log file size.
00115                  */
00116                 if (lp->log_nsize == 0)
00117                         lp->log_nsize = lp->log_size;
00118 
00119                 /*
00120                  * If we haven't written any log files, write the first one
00121                  * so that checkpoint gets a valid ckp_lsn value.
00122                  */
00123                 if (IS_INIT_LSN(lp->lsn) &&
00124                     (ret = __log_newfile(dblp, NULL, 0)) != 0)
00125                         goto err;
00126 
00127                 /*
00128                  * Initialize replication's next-expected LSN value
00129                  * and replication's bulk buffer.
00130                  */
00131                 lp->ready_lsn = lp->lsn;
00132                 if (IS_ENV_REPLICATED(dbenv)) {
00133                         if ((ret = __db_shalloc(&dblp->reginfo, MEGABYTE, 0,
00134                             &bulk)) != 0)
00135                                 goto err;
00136                         lp->bulk_buf = R_OFFSET(&dblp->reginfo, bulk);
00137                         lp->bulk_len = MEGABYTE;
00138                         lp->bulk_off = 0;
00139                 } else {
00140                         lp->bulk_buf = INVALID_ROFF;
00141                         lp->bulk_len = 0;
00142                         lp->bulk_off = 0;
00143                 }
00144         } else {
00145                 /*
00146                  * A process joining the region may have reset the log file
00147                  * size, too.  If so, it only affects the next log file we
00148                  * create.  We need to check that the size is reasonable given
00149                  * the buffer size in the region.
00150                  */
00151                 LOG_SYSTEM_LOCK(dbenv);
00152                 region_locked = 1;
00153 
00154                  if (dbenv->lg_size != 0) {
00155                         if ((ret =
00156                             __log_check_sizes(dbenv, dbenv->lg_size, 0)) != 0)
00157                                 goto err;
00158 
00159                         lp->log_nsize = dbenv->lg_size;
00160                  }
00161 
00162                 /* Migrate persistent flags from the region into the DB_ENV. */
00163                 if (lp->db_log_autoremove)
00164                         F_SET(dbenv, DB_ENV_LOG_AUTOREMOVE);
00165                 if (lp->db_log_inmemory)
00166                         F_SET(dbenv, DB_ENV_LOG_INMEMORY);
00167 
00168                 LOG_SYSTEM_UNLOCK(dbenv);
00169                 region_locked = 0;
00170         }
00171 
00172         return (0);
00173 
00174 err:    dbenv->lg_handle = NULL;
00175         if (dblp->reginfo.addr != NULL) {
00176                 if (region_locked)
00177                         LOG_SYSTEM_UNLOCK(dbenv);
00178                 (void)__db_r_detach(dbenv, &dblp->reginfo, 0);
00179         }
00180 
00181         (void)__mutex_free(dbenv, &dblp->mtx_dbreg);
00182         __os_free(dbenv, dblp);
00183 
00184         return (ret);
00185 }
00186 
00187 /*
00188  * __log_init --
00189  *      Initialize a log region in shared memory.
00190  */
00191 static int
00192 __log_init(dbenv, dblp)
00193         DB_ENV *dbenv;
00194         DB_LOG *dblp;
00195 {
00196         LOG *lp;
00197         int ret;
00198         void *p;
00199 
00200         /*
00201          * This is the first point where we can validate the buffer size,
00202          * because we know all three settings have been configured (file size,
00203          * buffer size and the in-memory flag).
00204          */
00205         if ((ret =
00206            __log_check_sizes(dbenv, dbenv->lg_size, dbenv->lg_bsize)) != 0)
00207                 return (ret);
00208 
00209         if ((ret = __db_shalloc(&dblp->reginfo,
00210             sizeof(*lp), 0, &dblp->reginfo.primary)) != 0)
00211                 goto mem_err;
00212         dblp->reginfo.rp->primary =
00213             R_OFFSET(&dblp->reginfo, dblp->reginfo.primary);
00214         lp = dblp->reginfo.primary;
00215         memset(lp, 0, sizeof(*lp));
00216 
00217         if ((ret =
00218             __mutex_alloc(dbenv, MTX_LOG_REGION, 0, &lp->mtx_region)) != 0)
00219                 return (ret);
00220 
00221         lp->fid_max = 0;
00222         SH_TAILQ_INIT(&lp->fq);
00223         lp->free_fid_stack = INVALID_ROFF;
00224         lp->free_fids = lp->free_fids_alloced = 0;
00225 
00226         /* Initialize LOG LSNs. */
00227         INIT_LSN(lp->lsn);
00228         INIT_LSN(lp->t_lsn);
00229 
00230         /*
00231          * It's possible to be waiting for an LSN of [1][0], if a replication
00232          * client gets the first log record out of order.  An LSN of [0][0]
00233          * signifies that we're not waiting.
00234          */
00235         ZERO_LSN(lp->waiting_lsn);
00236 
00237         /*
00238          * Log makes note of the fact that it ran into a checkpoint on
00239          * startup if it did so, as a recovery optimization.  A zero
00240          * LSN signifies that it hasn't found one [yet].
00241          */
00242         ZERO_LSN(lp->cached_ckp_lsn);
00243 
00244         if ((ret =
00245             __mutex_alloc(dbenv, MTX_LOG_FILENAME, 0, &lp->mtx_filelist)) != 0)
00246                 return (ret);
00247         if ((ret = __mutex_alloc(dbenv, MTX_LOG_FLUSH, 0, &lp->mtx_flush)) != 0)
00248                 return (ret);
00249 
00250         /* Initialize the buffer. */
00251         if ((ret = __db_shalloc(&dblp->reginfo, dbenv->lg_bsize, 0, &p)) != 0) {
00252 mem_err:        __db_err(dbenv, "Unable to allocate memory for the log buffer");
00253                 return (ret);
00254         }
00255         lp->regionmax = dbenv->lg_regionmax;
00256         lp->buffer_off = R_OFFSET(&dblp->reginfo, p);
00257         lp->buffer_size = dbenv->lg_bsize;
00258         lp->filemode = dbenv->lg_filemode;
00259         lp->log_size = lp->log_nsize = dbenv->lg_size;
00260 
00261         /* Initialize the commit Queue. */
00262         SH_TAILQ_INIT(&lp->free_commits);
00263         SH_TAILQ_INIT(&lp->commits);
00264         lp->ncommit = 0;
00265 
00266         /* Initialize the logfiles list for in-memory logs. */
00267         SH_TAILQ_INIT(&lp->logfiles);
00268         SH_TAILQ_INIT(&lp->free_logfiles);
00269 
00270         /*
00271          * Fill in the log's persistent header.  Don't fill in the log file
00272          * sizes, as they may change at any time and so have to be filled in
00273          * as each log file is created.
00274          */
00275         lp->persist.magic = DB_LOGMAGIC;
00276         lp->persist.version = DB_LOGVERSION;
00277         lp->persist.notused = 0;
00278 
00279         /* Migrate persistent flags from the DB_ENV into the region. */
00280         if (F_ISSET(dbenv, DB_ENV_LOG_AUTOREMOVE))
00281                 lp->db_log_autoremove = 1;
00282         if (F_ISSET(dbenv, DB_ENV_LOG_INMEMORY))
00283                 lp->db_log_inmemory = 1;
00284 
00285         return (0);
00286 }
00287 
00288 /*
00289  * __log_recover --
00290  *      Recover a log.
00291  */
00292 static int
00293 __log_recover(dblp)
00294         DB_LOG *dblp;
00295 {
00296         DBT dbt;
00297         DB_ENV *dbenv;
00298         DB_LOGC *logc;
00299         DB_LSN lsn;
00300         LOG *lp;
00301         u_int32_t cnt, rectype;
00302         int ret;
00303         logfile_validity status;
00304 
00305         logc = NULL;
00306         dbenv = dblp->dbenv;
00307         lp = dblp->reginfo.primary;
00308 
00309         /*
00310          * Find a log file.  If none exist, we simply return, leaving
00311          * everything initialized to a new log.
00312          */
00313         if ((ret = __log_find(dblp, 0, &cnt, &status)) != 0)
00314                 return (ret);
00315         if (cnt == 0)
00316                 return (0);
00317 
00318         /*
00319          * If the last file is an old version, readable or no, start a new
00320          * file.  Don't bother finding the end of the last log file;
00321          * we assume that it's valid in its entirety, since the user
00322          * should have shut down cleanly or run recovery before upgrading.
00323          */
00324         if (status == DB_LV_OLD_READABLE || status == DB_LV_OLD_UNREADABLE) {
00325                 lp->lsn.file = lp->s_lsn.file = cnt + 1;
00326                 lp->lsn.offset = lp->s_lsn.offset = 0;
00327                 goto skipsearch;
00328         }
00329         DB_ASSERT(status == DB_LV_NORMAL);
00330 
00331         /*
00332          * We have the last useful log file and we've loaded any persistent
00333          * information.  Set the end point of the log past the end of the last
00334          * file. Read the last file, looking for the last checkpoint and
00335          * the log's end.
00336          */
00337         lp->lsn.file = cnt + 1;
00338         lp->lsn.offset = 0;
00339         lsn.file = cnt;
00340         lsn.offset = 0;
00341 
00342         /*
00343          * Allocate a cursor and set it to the first record.  This shouldn't
00344          * fail, leave error messages on.
00345          */
00346         if ((ret = __log_cursor(dbenv, &logc)) != 0)
00347                 return (ret);
00348         F_SET(logc, DB_LOG_LOCKED);
00349         memset(&dbt, 0, sizeof(dbt));
00350         if ((ret = __log_c_get(logc, &lsn, &dbt, DB_SET)) != 0)
00351                 goto err;
00352 
00353         /*
00354          * Read to the end of the file.  This may fail at some point, so
00355          * turn off error messages.
00356          */
00357         F_SET(logc, DB_LOG_SILENT_ERR);
00358         while (__log_c_get(logc, &lsn, &dbt, DB_NEXT) == 0) {
00359                 if (dbt.size < sizeof(u_int32_t))
00360                         continue;
00361                 memcpy(&rectype, dbt.data, sizeof(u_int32_t));
00362                 if (rectype == DB___txn_ckp)
00363                         /*
00364                          * If we happen to run into a checkpoint, cache its
00365                          * LSN so that the transaction system doesn't have
00366                          * to walk this log file again looking for it.
00367                          */
00368                         lp->cached_ckp_lsn = lsn;
00369         }
00370         F_CLR(logc, DB_LOG_SILENT_ERR);
00371 
00372         /*
00373          * We now know where the end of the log is.  Set the first LSN that
00374          * we want to return to an application and the LSN of the last known
00375          * record on disk.
00376          */
00377         lp->lsn = lsn;
00378         lp->s_lsn = lsn;
00379         lp->lsn.offset += logc->c_len;
00380         lp->s_lsn.offset += logc->c_len;
00381 
00382         /* Set up the current buffer information, too. */
00383         lp->len = logc->c_len;
00384         lp->a_off = 0;
00385         lp->b_off = 0;
00386         lp->w_off = lp->lsn.offset;
00387 
00388 skipsearch:
00389         if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY))
00390                 __db_msg(dbenv,
00391                     "Finding last valid log LSN: file: %lu offset %lu",
00392                     (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
00393 
00394 err:    if (logc != NULL)
00395                 (void)__log_c_close(logc);
00396 
00397         return (ret);
00398 }
00399 
00400 /*
00401  * __log_find --
00402  *      Try to find a log file.  If find_first is set, valp will contain
00403  * the number of the first readable log file, else it will contain the number
00404  * of the last log file (which may be too old to read).
00405  *
00406  * PUBLIC: int __log_find __P((DB_LOG *, int, u_int32_t *, logfile_validity *));
00407  */
00408 int
00409 __log_find(dblp, find_first, valp, statusp)
00410         DB_LOG *dblp;
00411         int find_first;
00412         u_int32_t *valp;
00413         logfile_validity *statusp;
00414 {
00415         DB_ENV *dbenv;
00416         LOG *lp;
00417         logfile_validity logval_status, status;
00418         struct __db_filestart *filestart;
00419         u_int32_t clv, logval;
00420         int cnt, fcnt, ret;
00421         const char *dir;
00422         char *c, **names, *p, *q, savech;
00423 
00424         dbenv = dblp->dbenv;
00425         lp = dblp->reginfo.primary;
00426         logval_status = status = DB_LV_NONEXISTENT;
00427 
00428         /* Return a value of 0 as the log file number on failure. */
00429         *valp = 0;
00430 
00431         if (lp->db_log_inmemory) {
00432                 filestart = find_first ?
00433                     SH_TAILQ_FIRST(&lp->logfiles, __db_filestart) :
00434                     SH_TAILQ_LAST(&lp->logfiles, links, __db_filestart);
00435                 if (filestart != NULL) {
00436                         *valp = filestart->file;
00437                         logval_status = DB_LV_NORMAL;
00438                 }
00439                 *statusp = logval_status;
00440                 return (0);
00441         }
00442 
00443         /* Find the directory name. */
00444         if ((ret = __log_name(dblp, 1, &p, NULL, 0)) != 0)
00445                 return (ret);
00446         if ((q = __db_rpath(p)) == NULL) {
00447                 COMPQUIET(savech, 0);
00448                 dir = PATH_DOT;
00449         } else {
00450                 savech = *q;
00451                 *q = '\0';
00452                 dir = p;
00453         }
00454 
00455         /* Get the list of file names. */
00456         ret = __os_dirlist(dbenv, dir, &names, &fcnt);
00457 
00458         /*
00459          * !!!
00460          * We overwrote a byte in the string with a nul.  Restore the string
00461          * so that the diagnostic checks in the memory allocation code work
00462          * and any error messages display the right file name.
00463          */
00464         if (q != NULL)
00465                 *q = savech;
00466 
00467         if (ret != 0) {
00468                 __db_err(dbenv, "%s: %s", dir, db_strerror(ret));
00469                 __os_free(dbenv, p);
00470                 return (ret);
00471         }
00472 
00473         /* Search for a valid log file name. */
00474         for (cnt = fcnt, clv = logval = 0; --cnt >= 0;) {
00475                 if (strncmp(names[cnt], LFPREFIX, sizeof(LFPREFIX) - 1) != 0)
00476                         continue;
00477 
00478                 /*
00479                  * Names of the form log\.[0-9]* are reserved for DB.  Other
00480                  * names sharing LFPREFIX, such as "log.db", are legal.
00481                  */
00482                 for (c = names[cnt] + sizeof(LFPREFIX) - 1; *c != '\0'; c++)
00483                         if (!isdigit((int)*c))
00484                                 break;
00485                 if (*c != '\0')
00486                         continue;
00487 
00488                 /*
00489                  * Use atol, not atoi; if an "int" is 16-bits, the largest
00490                  * log file name won't fit.
00491                  */
00492                 clv = (u_int32_t)atol(names[cnt] + (sizeof(LFPREFIX) - 1));
00493 
00494                 /*
00495                  * If searching for the first log file, we want to return the
00496                  * oldest log file we can read, or, if no readable log files
00497                  * exist, the newest log file we can't read (the crossover
00498                  * point between the old and new versions of the log file).
00499                  *
00500                  * If we're searching for the last log file, we want to return
00501                  * the newest log file, period.
00502                  *
00503                  * Readable log files should never precede unreadable log
00504                  * files, that would mean the admin seriously screwed up.
00505                  */
00506                 if (find_first) {
00507                         if (logval != 0 &&
00508                             status != DB_LV_OLD_UNREADABLE && clv > logval)
00509                                 continue;
00510                 } else
00511                         if (logval != 0 && clv < logval)
00512                                 continue;
00513 
00514                 if ((ret = __log_valid(dblp, clv, 1, NULL, 0, &status)) != 0) {
00515                         __db_err(dbenv, "Invalid log file: %s: %s",
00516                             names[cnt], db_strerror(ret));
00517                         goto err;
00518                 }
00519                 switch (status) {
00520                 case DB_LV_NONEXISTENT:
00521                         /* __log_valid never returns DB_LV_NONEXISTENT. */
00522                         DB_ASSERT(0);
00523                         break;
00524                 case DB_LV_INCOMPLETE:
00525                         /*
00526                          * The last log file may not have been initialized --
00527                          * it's possible to create a log file but not write
00528                          * anything to it.  If performing recovery (that is,
00529                          * if find_first isn't set), ignore the file, it's
00530                          * not interesting.  If we're searching for the first
00531                          * log record, return the file (assuming we don't find
00532                          * something better), as the "real" first log record
00533                          * is likely to be in the log buffer, and we want to
00534                          * set the file LSN for our return.
00535                          */
00536                         if (find_first)
00537                                 goto found;
00538                         break;
00539                 case DB_LV_OLD_UNREADABLE:
00540                         /*
00541                          * If we're searching for the first log file, then we
00542                          * only want this file if we don't yet have a file or
00543                          * already have an unreadable file and this one is
00544                          * newer than that one.  If we're searching for the
00545                          * last log file, we always want this file because we
00546                          * wouldn't be here if it wasn't newer than our current
00547                          * choice.
00548                          */
00549                         if (!find_first || logval == 0 ||
00550                             (status == DB_LV_OLD_UNREADABLE && clv > logval))
00551                                 goto found;
00552                         break;
00553                 case DB_LV_NORMAL:
00554                 case DB_LV_OLD_READABLE:
00555 found:                  logval = clv;
00556                         logval_status = status;
00557                         break;
00558                 }
00559         }
00560 
00561         *valp = logval;
00562 
00563 err:    __os_dirfree(dbenv, names, fcnt);
00564         __os_free(dbenv, p);
00565         *statusp = logval_status;
00566 
00567         return (ret);
00568 }
00569 
00570 /*
00571  * log_valid --
00572  *      Validate a log file.  Returns an error code in the event of
00573  *      a fatal flaw in a the specified log file;  returns success with
00574  *      a code indicating the currentness and completeness of the specified
00575  *      log file if it is not unexpectedly flawed (that is, if it's perfectly
00576  *      normal, if it's zero-length, or if it's an old version).
00577  *
00578  * PUBLIC: int __log_valid __P((DB_LOG *, u_int32_t, int,
00579  * PUBLIC:     DB_FH **, u_int32_t, logfile_validity *));
00580  */
00581 int
00582 __log_valid(dblp, number, set_persist, fhpp, flags, statusp)
00583         DB_LOG *dblp;
00584         u_int32_t number;
00585         int set_persist;
00586         DB_FH **fhpp;
00587         u_int32_t flags;
00588         logfile_validity *statusp;
00589 {
00590         DB_CIPHER *db_cipher;
00591         DB_ENV *dbenv;
00592         DB_FH *fhp;
00593         HDR *hdr;
00594         LOG *lp;
00595         LOGP *persist;
00596         logfile_validity status;
00597         size_t hdrsize, nr, recsize;
00598         int is_hmac, ret;
00599         u_int8_t *tmp;
00600         char *fname;
00601 
00602         dbenv = dblp->dbenv;
00603         db_cipher = dbenv->crypto_handle;
00604         fhp = NULL;
00605         persist = NULL;
00606         status = DB_LV_NORMAL;
00607         tmp = NULL;
00608 
00609         /* Return the file handle to our caller, on request */
00610         if (fhpp != NULL)
00611                 *fhpp = NULL;
00612 
00613         if (flags == 0)
00614                 flags = DB_OSO_RDONLY | DB_OSO_SEQ;
00615         /* Try to open the log file. */
00616         if ((ret = __log_name(dblp, number, &fname, &fhp, flags)) != 0) {
00617                 __os_free(dbenv, fname);
00618                 return (ret);
00619         }
00620 
00621         hdrsize = HDR_NORMAL_SZ;
00622         is_hmac = 0;
00623         recsize = sizeof(LOGP);
00624         if (CRYPTO_ON(dbenv)) {
00625                 hdrsize = HDR_CRYPTO_SZ;
00626                 recsize = sizeof(LOGP);
00627                 recsize += db_cipher->adj_size(recsize);
00628                 is_hmac = 1;
00629         }
00630         if ((ret = __os_calloc(dbenv, 1, recsize + hdrsize, &tmp)) != 0)
00631                 goto err;
00632 
00633         hdr = (HDR *)tmp;
00634         persist = (LOGP *)(tmp + hdrsize);
00635         /*
00636          * Try to read the header.  This can fail if the log is truncated, or
00637          * if we find a preallocated log file where the header has not yet been
00638          * written, so we need to check whether the header is zero-filled.
00639          */
00640         if ((ret = __os_read(dbenv, fhp, tmp, recsize + hdrsize, &nr)) != 0 ||
00641             nr != recsize + hdrsize ||
00642             (hdr->len == 0 && persist->magic == 0 && persist->log_size == 0)) {
00643                 if (ret == 0)
00644                         status = DB_LV_INCOMPLETE;
00645                 else
00646                         /*
00647                          * The error was a fatal read error, not just an
00648                          * incompletely initialized log file.
00649                          */
00650                         __db_err(dbenv, "Ignoring log file: %s: %s",
00651                             fname, db_strerror(ret));
00652                 goto err;
00653         }
00654 
00655         /*
00656          * Now we have to validate the persistent record.  We have
00657          * several scenarios we have to deal with:
00658          *
00659          * 1.  User has crypto turned on:
00660          *      - They're reading an old, unencrypted log file
00661          *        .  We will fail the record size match check below.
00662          *      - They're reading a current, unencrypted log file
00663          *        .  We will fail the record size match check below.
00664          *      - They're reading an old, encrypted log file [NOT YET]
00665          *        .  After decryption we'll fail the version check.  [NOT YET]
00666          *      - They're reading a current, encrypted log file
00667          *        .  We should proceed as usual.
00668          * 2.  User has crypto turned off:
00669          *      - They're reading an old, unencrypted log file
00670          *        .  We will fail the version check.
00671          *      - They're reading a current, unencrypted log file
00672          *        .  We should proceed as usual.
00673          *      - They're reading an old, encrypted log file [NOT YET]
00674          *        .  We'll fail the magic number check (it is encrypted).
00675          *      - They're reading a current, encrypted log file
00676          *        .  We'll fail the magic number check (it is encrypted).
00677          */
00678         if (CRYPTO_ON(dbenv)) {
00679                 /*
00680                  * If we are trying to decrypt an unencrypted log
00681                  * we can only detect that by having an unreasonable
00682                  * data length for our persistent data.
00683                  */
00684                 if ((hdr->len - hdrsize) != sizeof(LOGP)) {
00685                         __db_err(dbenv, "log record size mismatch");
00686                         goto err;
00687                 }
00688                 /* Check the checksum and decrypt. */
00689                 if ((ret = __db_check_chksum(dbenv, db_cipher, &hdr->chksum[0],
00690                     (u_int8_t *)persist, hdr->len - hdrsize, is_hmac)) != 0) {
00691                         __db_err(dbenv, "log record checksum mismatch");
00692                         goto err;
00693                 }
00694                 if ((ret = db_cipher->decrypt(dbenv, db_cipher->data,
00695                     &hdr->iv[0], (u_int8_t *)persist, hdr->len - hdrsize)) != 0)
00696                         goto err;
00697         }
00698 
00699         /* Validate the header. */
00700         if (persist->magic != DB_LOGMAGIC) {
00701                 __db_err(dbenv,
00702                     "Ignoring log file: %s: magic number %lx, not %lx",
00703                     fname, (u_long)persist->magic, (u_long)DB_LOGMAGIC);
00704                 ret = EINVAL;
00705                 goto err;
00706         }
00707 
00708         /*
00709          * Set our status code to indicate whether the log file belongs to an
00710          * unreadable or readable old version; leave it alone if and only if
00711          * the log file version is the current one.
00712          */
00713         if (persist->version > DB_LOGVERSION) {
00714                 /* This is a fatal error--the log file is newer than DB. */
00715                 __db_err(dbenv,
00716                     "Unacceptable log file %s: unsupported log version %lu",
00717                     fname, (u_long)persist->version);
00718                 ret = EINVAL;
00719                 goto err;
00720         } else if (persist->version < DB_LOGOLDVER) {
00721                 status = DB_LV_OLD_UNREADABLE;
00722                 /* This is a non-fatal error, but give some feedback. */
00723                 __db_err(dbenv,
00724                     "Skipping log file %s: historic log version %lu",
00725                     fname, (u_long)persist->version);
00726                 /*
00727                  * We don't want to set persistent info based on an unreadable
00728                  * region, so jump to "err".
00729                  */
00730                 goto err;
00731         } else if (persist->version < DB_LOGVERSION)
00732                 status = DB_LV_OLD_READABLE;
00733 
00734         /*
00735          * Only if we have a current log do we verify the checksum.  We could
00736          * not check the checksum before checking the magic and version because
00737          * old log headers put the length and checksum in a different location.
00738          */
00739         if (!CRYPTO_ON(dbenv) && ((ret = __db_check_chksum(dbenv,
00740             db_cipher, &hdr->chksum[0], (u_int8_t *)persist,
00741             hdr->len - hdrsize, is_hmac)) != 0)) {
00742                 __db_err(dbenv, "log record checksum mismatch");
00743                 goto err;
00744         }
00745 
00746         /*
00747          * If the log is readable so far and we're doing system initialization,
00748          * set the region's persistent information based on the headers.
00749          *
00750          * Override the current log file size.
00751          */
00752         if (set_persist) {
00753                 lp = dblp->reginfo.primary;
00754                 lp->log_size = persist->log_size;
00755         }
00756 
00757 err:    if (fname != NULL)
00758                 __os_free(dbenv, fname);
00759         if (ret == 0 && fhpp != NULL)
00760                 *fhpp = fhp;
00761         else
00762                 /* Must close on error or if we only used it locally. */
00763                 (void)__os_closehandle(dbenv, fhp);
00764         if (tmp != NULL)
00765                 __os_free(dbenv, tmp);
00766 
00767         *statusp = status;
00768 
00769         return (ret);
00770 }
00771 
00772 /*
00773  * __log_dbenv_refresh --
00774  *      Clean up after the log system on a close or failed open.
00775  *
00776  * PUBLIC: int __log_dbenv_refresh __P((DB_ENV *));
00777  */
00778 int
00779 __log_dbenv_refresh(dbenv)
00780         DB_ENV *dbenv;
00781 {
00782         DB_LOG *dblp;
00783         LOG *lp;
00784         REGINFO *reginfo;
00785         struct __fname *fnp;
00786         int ret, t_ret;
00787 
00788         dblp = dbenv->lg_handle;
00789         reginfo = &dblp->reginfo;
00790         lp = reginfo->primary;
00791 
00792         /* We may have opened files as part of XA; if so, close them. */
00793         ret = __dbreg_close_files(dbenv);
00794 
00795         /*
00796          * After we close the files, check for any unlogged closes left in
00797          * the shared memory queue.  If we find any, we need to panic the
00798          * region.  Note, just set "ret" -- a panic overrides any previously
00799          * set error return.
00800          */
00801         for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); fnp != NULL;
00802             fnp = SH_TAILQ_NEXT(fnp, q, __fname))
00803                 if (F_ISSET(fnp, DB_FNAME_NOTLOGGED))
00804                         ret = __db_panic(dbenv, EINVAL);
00805 
00806         /*
00807          * If a private region, return the memory to the heap.  Not needed for
00808          * filesystem-backed or system shared memory regions, that memory isn't
00809          * owned by any particular process.
00810          */
00811         if (F_ISSET(dbenv, DB_ENV_PRIVATE)) {
00812                 /* Discard the flush mutex. */
00813                 if ((t_ret =
00814                     __mutex_free(dbenv, &lp->mtx_flush)) != 0 && ret == 0)
00815                         ret = t_ret;
00816 
00817                 /* Discard the buffer. */
00818                 __db_shalloc_free(reginfo, R_ADDR(reginfo, lp->buffer_off));
00819 
00820                 /* Discard stack of free file IDs. */
00821                 if (lp->free_fid_stack != INVALID_ROFF)
00822                         __db_shalloc_free(reginfo,
00823                             R_ADDR(reginfo, lp->free_fid_stack));
00824         }
00825 
00826         /* Discard the per-thread DBREG mutex. */
00827         if ((t_ret = __mutex_free(dbenv, &dblp->mtx_dbreg)) != 0 && ret == 0)
00828                 ret = t_ret;
00829 
00830         /* Detach from the region. */
00831         if ((t_ret = __db_r_detach(dbenv, reginfo, 0)) != 0 && ret == 0)
00832                 ret = t_ret;
00833 
00834         /* Close open files, release allocated memory. */
00835         if (dblp->lfhp != NULL) {
00836                 if ((t_ret =
00837                     __os_closehandle(dbenv, dblp->lfhp)) != 0 && ret == 0)
00838                         ret = t_ret;
00839                 dblp->lfhp = NULL;
00840         }
00841         if (dblp->dbentry != NULL)
00842                 __os_free(dbenv, dblp->dbentry);
00843 
00844         __os_free(dbenv, dblp);
00845 
00846         dbenv->lg_handle = NULL;
00847         return (ret);
00848 }
00849 
00850 /*
00851  * __log_get_cached_ckp_lsn --
00852  *      Retrieve any last checkpoint LSN that we may have found on startup.
00853  *
00854  * PUBLIC: int __log_get_cached_ckp_lsn __P((DB_ENV *, DB_LSN *));
00855  */
00856 int
00857 __log_get_cached_ckp_lsn(dbenv, ckp_lsnp)
00858         DB_ENV *dbenv;
00859         DB_LSN *ckp_lsnp;
00860 {
00861         DB_LOG *dblp;
00862         LOG *lp;
00863 
00864         dblp = (DB_LOG *)dbenv->lg_handle;
00865         lp = (LOG *)dblp->reginfo.primary;
00866 
00867         LOG_SYSTEM_LOCK(dbenv);
00868         *ckp_lsnp = lp->cached_ckp_lsn;
00869         LOG_SYSTEM_UNLOCK(dbenv);
00870 
00871         return (0);
00872 }
00873 
00874 /*
00875  * __log_region_mutex_count --
00876  *      Return the number of mutexes the log region will need.
00877  *
00878  * PUBLIC: u_int32_t __log_region_mutex_count __P((DB_ENV *));
00879  */
00880 u_int32_t
00881 __log_region_mutex_count(dbenv)
00882         DB_ENV *dbenv;
00883 {
00884         /*
00885          * We need a few assorted mutexes, and one per transaction waiting
00886          * on the group commit list.  We can't know how many that will be,
00887          * but it should be bounded by the maximum active transactions.
00888          */
00889         return (dbenv->tx_max + 5);
00890 }
00891 
00892 /*
00893  * __log_region_size --
00894  *      Return the amount of space needed for the log region.
00895  *      Make the region large enough to hold txn_max transaction
00896  *      detail structures  plus some space to hold thread handles
00897  *      and the beginning of the shalloc region and anything we
00898  *      need for mutex system resource recording.
00899  */
00900 static size_t
00901 __log_region_size(dbenv)
00902         DB_ENV *dbenv;
00903 {
00904         size_t s;
00905 
00906         s = dbenv->lg_regionmax + dbenv->lg_bsize;
00907 
00908         /*
00909          * If running with replication, add in space for bulk buffer.
00910          * Allocate a megabyte and a little bit more space.
00911          */
00912         if (IS_ENV_REPLICATED(dbenv))
00913                 s += MEGABYTE;
00914 
00915         return (s);
00916 }
00917 
00918 /*
00919  * __log_vtruncate
00920  *      This is a virtual truncate.  We set up the log indicators to
00921  * make everyone believe that the given record is the last one in the
00922  * log.  Returns with the next valid LSN (i.e., the LSN of the next
00923  * record to be written). This is used in replication to discard records
00924  * in the log file that do not agree with the master.
00925  *
00926  * PUBLIC: int __log_vtruncate __P((DB_ENV *, DB_LSN *, DB_LSN *, DB_LSN *));
00927  */
00928 int
00929 __log_vtruncate(dbenv, lsn, ckplsn, trunclsn)
00930         DB_ENV *dbenv;
00931         DB_LSN *lsn, *ckplsn, *trunclsn;
00932 {
00933         DBT log_dbt;
00934         DB_LOG *dblp;
00935         DB_LOGC *logc;
00936         DB_LSN end_lsn;
00937         LOG *lp;
00938         u_int32_t bytes, c_len;
00939         int ret, t_ret;
00940 
00941         /* Need to find out the length of this soon-to-be-last record. */
00942         if ((ret = __log_cursor(dbenv, &logc)) != 0)
00943                 return (ret);
00944         memset(&log_dbt, 0, sizeof(log_dbt));
00945         ret = __log_c_get(logc, lsn, &log_dbt, DB_SET);
00946         c_len = logc->c_len;
00947         if ((t_ret = __log_c_close(logc)) != 0 && ret == 0)
00948                 ret = t_ret;
00949         if (ret != 0)
00950                 return (ret);
00951 
00952         /* Now do the truncate. */
00953         dblp = (DB_LOG *)dbenv->lg_handle;
00954         lp = (LOG *)dblp->reginfo.primary;
00955 
00956         LOG_SYSTEM_LOCK(dbenv);
00957 
00958         /*
00959          * Flush the log so we can simply initialize the in-memory buffer
00960          * after the truncate.
00961          */
00962         if ((ret = __log_flush_int(dblp, NULL, 0)) != 0)
00963                 goto err;
00964 
00965         end_lsn = lp->lsn;
00966         lp->lsn = *lsn;
00967         lp->len = c_len;
00968         lp->lsn.offset += lp->len;
00969 
00970         if (lp->db_log_inmemory &&
00971             (ret = __log_inmem_lsnoff(dblp, &lp->lsn, &lp->b_off)) != 0)
00972                 goto err;
00973 
00974         /*
00975          * I am going to assume that the number of bytes written since
00976          * the last checkpoint doesn't exceed a 32-bit number.
00977          */
00978         DB_ASSERT(lp->lsn.file >= ckplsn->file);
00979         bytes = 0;
00980         if (ckplsn->file != lp->lsn.file) {
00981                 bytes = lp->log_size - ckplsn->offset;
00982                 if (lp->lsn.file > ckplsn->file + 1)
00983                         bytes += lp->log_size *
00984                             ((lp->lsn.file - ckplsn->file) - 1);
00985                 bytes += lp->lsn.offset;
00986         } else
00987                 bytes = lp->lsn.offset - ckplsn->offset;
00988 
00989         lp->stat.st_wc_mbytes += bytes / MEGABYTE;
00990         lp->stat.st_wc_bytes += bytes % MEGABYTE;
00991 
00992         /*
00993          * If the saved lsn is greater than our new end of log, reset it
00994          * to our current end of log.
00995          */
00996         MUTEX_LOCK(dbenv, lp->mtx_flush);
00997         if (log_compare(&lp->s_lsn, lsn) > 0)
00998                 lp->s_lsn = lp->lsn;
00999         MUTEX_UNLOCK(dbenv, lp->mtx_flush);
01000 
01001         /* Initialize the in-region buffer to a pristine state. */
01002         ZERO_LSN(lp->f_lsn);
01003         lp->w_off = lp->lsn.offset;
01004 
01005         if (trunclsn != NULL)
01006                 *trunclsn = lp->lsn;
01007 
01008         /* Truncate the log to the new point. */
01009         if ((ret = __log_zero(dbenv, &lp->lsn, &end_lsn)) != 0)
01010                 goto err;
01011 
01012 err:    LOG_SYSTEM_UNLOCK(dbenv);
01013         return (ret);
01014 }
01015 
01016 /*
01017  * __log_is_outdated --
01018  *      Used by the replication system to identify if a client's logs are too
01019  *      old.
01020  *
01021  * PUBLIC: int __log_is_outdated __P((DB_ENV *, u_int32_t, int *));
01022  */
01023 int
01024 __log_is_outdated(dbenv, fnum, outdatedp)
01025         DB_ENV *dbenv;
01026         u_int32_t fnum;
01027         int *outdatedp;
01028 {
01029         DB_LOG *dblp;
01030         LOG *lp;
01031         char *name;
01032         int ret;
01033         u_int32_t cfile;
01034         struct __db_filestart *filestart;
01035 
01036         dblp = dbenv->lg_handle;
01037 
01038         /*
01039          * The log represented by dbenv is compared to the file number passed
01040          * in fnum.  If the log file fnum does not exist and is lower-numbered
01041          * than the current logs, return *outdatedp non-zero, else we return 0.
01042          */
01043         if (F_ISSET(dbenv, DB_ENV_LOG_INMEMORY)) {
01044                 LOG_SYSTEM_LOCK(dbenv);
01045                 lp = (LOG *)dblp->reginfo.primary;
01046                 filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart);
01047                 *outdatedp = filestart == NULL ? 0 : (fnum < filestart->file);
01048                 LOG_SYSTEM_UNLOCK(dbenv);
01049                 return (0);
01050         }
01051 
01052         *outdatedp = 0;
01053         if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0)
01054                 return (ret);
01055 
01056         /* If the file exists, we're just fine. */
01057         if (__os_exists(name, NULL) == 0)
01058                 goto out;
01059 
01060         /*
01061          * It didn't exist, decide if the file number is too big or
01062          * too little.  If it's too little, then we need to indicate
01063          * that the LSN is outdated.
01064          */
01065         LOG_SYSTEM_LOCK(dbenv);
01066         lp = (LOG *)dblp->reginfo.primary;
01067         cfile = lp->lsn.file;
01068         LOG_SYSTEM_UNLOCK(dbenv);
01069 
01070         if (cfile > fnum)
01071                 *outdatedp = 1;
01072 out:    __os_free(dbenv, name);
01073         return (ret);
01074 }
01075 
01076 /*
01077  * __log_zero --
01078  *      Zero out the tail of a log after a truncate.
01079  */
01080 static int
01081 __log_zero(dbenv, from_lsn, to_lsn)
01082         DB_ENV *dbenv;
01083         DB_LSN *from_lsn, *to_lsn;
01084 {
01085         DB_FH *fhp;
01086         DB_LOG *dblp;
01087         LOG *lp;
01088         struct __db_filestart *filestart, *nextstart;
01089         size_t nbytes, len, nw;
01090         u_int32_t fn, mbytes, bytes;
01091         u_int8_t buf[4096];
01092         int ret;
01093         char *fname;
01094 
01095         dblp = dbenv->lg_handle;
01096         DB_ASSERT(log_compare(from_lsn, to_lsn) <= 0);
01097         if (log_compare(from_lsn, to_lsn) > 0) {
01098                 __db_err(dbenv,
01099                     "Warning: truncating to point beyond end of log");
01100                 return (0);
01101         }
01102 
01103         lp = (LOG *)dblp->reginfo.primary;
01104         if (lp->db_log_inmemory) {
01105                 /*
01106                  * Remove the first file if it is invalidated by this write.
01107                  * Log records can't be bigger than a file, so we only need to
01108                  * check the first file.
01109                  */
01110                 for (filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart);
01111                     filestart != NULL && from_lsn->file < filestart->file;
01112                     filestart = nextstart) {
01113                         nextstart = SH_TAILQ_NEXT(filestart,
01114                             links, __db_filestart);
01115                         SH_TAILQ_REMOVE(&lp->logfiles, filestart,
01116                             links, __db_filestart);
01117                         SH_TAILQ_INSERT_HEAD(&lp->free_logfiles, filestart,
01118                             links, __db_filestart);
01119                 }
01120 
01121                 return (0);
01122         }
01123 
01124         /* Close any open file handles so unlinks don't fail. */
01125         if (dblp->lfhp != NULL) {
01126                 (void)__os_closehandle(dbenv, dblp->lfhp);
01127                 dblp->lfhp = NULL;
01128         }
01129 
01130         /* Throw away any extra log files that we have around. */
01131         for (fn = from_lsn->file + 1;; fn++) {
01132                 if (__log_name(dblp, fn, &fname, &fhp, DB_OSO_RDONLY) != 0) {
01133                         __os_free(dbenv, fname);
01134                         break;
01135                 }
01136                 (void)__os_closehandle(dbenv, fhp);
01137                 ret = __os_unlink(dbenv, fname);
01138                 __os_free(dbenv, fname);
01139                 if (ret != 0)
01140                         return (ret);
01141         }
01142 
01143         /* We removed some log files; have to 0 to end of file. */
01144         if ((ret =
01145             __log_name(dblp, from_lsn->file, &fname, &dblp->lfhp, 0)) != 0)
01146                 return (ret);
01147         __os_free(dbenv, fname);
01148         if ((ret = __os_ioinfo(dbenv,
01149             NULL, dblp->lfhp, &mbytes, &bytes, NULL)) != 0)
01150                 goto err;
01151         DB_ASSERT((mbytes * MEGABYTE + bytes) >= from_lsn->offset);
01152         len = (mbytes * MEGABYTE + bytes) - from_lsn->offset;
01153 
01154         memset(buf, 0, sizeof(buf));
01155 
01156         /* Initialize the write position. */
01157         if ((ret = __os_seek(dbenv,
01158             dblp->lfhp, 0, 0, from_lsn->offset, 0, DB_OS_SEEK_SET)) != 0)
01159                 goto err;
01160 
01161         while (len > 0) {
01162                 nbytes = len > sizeof(buf) ? sizeof(buf) : len;
01163                 if ((ret =
01164                     __os_write(dbenv, dblp->lfhp, buf, nbytes, &nw)) != 0)
01165                         goto err;
01166                 len -= nbytes;
01167         }
01168 
01169 err:    (void)__os_closehandle(dbenv, dblp->lfhp);
01170         dblp->lfhp = NULL;
01171 
01172         return (ret);
01173 }
01174 
01175 /*
01176  * __log_inmem_lsnoff --
01177  *      Find the offset in the buffer of a given LSN.
01178  *
01179  * PUBLIC: int __log_inmem_lsnoff __P((DB_LOG *, DB_LSN *, size_t *));
01180  */
01181 int
01182 __log_inmem_lsnoff(dblp, lsn, offsetp)
01183         DB_LOG *dblp;
01184         DB_LSN *lsn;
01185         size_t *offsetp;
01186 {
01187         LOG *lp;
01188         struct __db_filestart *filestart;
01189 
01190         lp = (LOG *)dblp->reginfo.primary;
01191 
01192         SH_TAILQ_FOREACH(filestart, &lp->logfiles, links, __db_filestart)
01193                 if (filestart->file == lsn->file) {
01194                         *offsetp =
01195                             (filestart->b_off + lsn->offset) % lp->buffer_size;
01196                         return (0);
01197                 }
01198 
01199         return (DB_NOTFOUND);
01200 }
01201 
01202 /*
01203  * __log_inmem_newfile --
01204  *      Records the offset of the beginning of a new file in the in-memory
01205  *      buffer.
01206  *
01207  * PUBLIC: int __log_inmem_newfile __P((DB_LOG *, u_int32_t));
01208  */
01209 int
01210 __log_inmem_newfile(dblp, file)
01211         DB_LOG *dblp;
01212         u_int32_t file;
01213 {
01214         HDR hdr;
01215         LOG *lp;
01216         struct __db_filestart *filestart;
01217         int ret;
01218 #ifdef DIAGNOSTIC
01219         struct __db_filestart *first, *last;
01220 #endif
01221 
01222         lp = (LOG *)dblp->reginfo.primary;
01223 
01224         /*
01225          * We write an empty header at the end of every in-memory log file.
01226          * This is used during cursor traversal to indicate when to switch the
01227          * LSN to the next file.
01228          */
01229         if (file > 1) {
01230                 memset(&hdr, 0, sizeof(HDR));
01231                 __log_inmem_copyin(dblp, lp->b_off, &hdr, sizeof(HDR));
01232                 lp->b_off = (lp->b_off + sizeof(HDR)) % lp->buffer_size;
01233         }
01234 
01235         filestart = SH_TAILQ_FIRST(&lp->free_logfiles, __db_filestart);
01236         if (filestart == NULL) {
01237                 if ((ret = __db_shalloc(&dblp->reginfo,
01238                     sizeof(struct __db_filestart), 0, &filestart)) != 0)
01239                         return (ret);
01240                 memset(filestart, 0, sizeof(*filestart));
01241         } else
01242                 SH_TAILQ_REMOVE(&lp->free_logfiles, filestart,
01243                     links, __db_filestart);
01244 
01245         filestart->file = file;
01246         filestart->b_off = lp->b_off;
01247 
01248 #ifdef DIAGNOSTIC
01249         first = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart);
01250         last = SH_TAILQ_LAST(&(lp)->logfiles, links, __db_filestart);
01251 
01252         /* Check that we don't wrap. */
01253         DB_ASSERT(!first || first == last ||
01254             RINGBUF_LEN(lp, first->b_off, lp->b_off) ==
01255             RINGBUF_LEN(lp, first->b_off, last->b_off) +
01256             RINGBUF_LEN(lp, last->b_off, lp->b_off));
01257 #endif
01258 
01259         SH_TAILQ_INSERT_TAIL(&lp->logfiles, filestart, links);
01260         return (0);
01261 }
01262 
01263 /*
01264  * __log_inmem_chkspace --
01265  *      Ensure that the requested amount of space is available in the buffer,
01266  *      and invalidate the region.
01267  *      Note: assumes that the region lock is held on entry.
01268  *
01269  * PUBLIC: int __log_inmem_chkspace __P((DB_LOG *, size_t));
01270  */
01271 int
01272 __log_inmem_chkspace(dblp, len)
01273         DB_LOG *dblp;
01274         size_t len;
01275 {
01276         DB_ENV *dbenv;
01277         LOG *lp;
01278         DB_LSN active_lsn, old_active_lsn;
01279         struct __db_filestart *filestart;
01280         int ret;
01281 
01282         dbenv = dblp->dbenv;
01283         lp = dblp->reginfo.primary;
01284 
01285         DB_ASSERT(lp->db_log_inmemory);
01286 
01287         /*
01288          * Allow room for an extra header so that we don't need to check for
01289          * space when switching files.
01290          */
01291         len += sizeof(HDR);
01292 
01293         /*
01294          * If transactions are enabled and we're about to fill available space,
01295          * update the active LSN and recheck.  If transactions aren't enabled,
01296          * don't even bother checking: in that case we can always overwrite old
01297          * log records, because we're never going to abort.
01298          */
01299         while (TXN_ON(dbenv) &&
01300             RINGBUF_LEN(lp, lp->b_off, lp->a_off) <= len) {
01301                 old_active_lsn = lp->active_lsn;
01302                 active_lsn = lp->lsn;
01303 
01304                 /*
01305                  * Drop the log region lock so we don't hold it while
01306                  * taking the transaction region lock.
01307                  */
01308                 LOG_SYSTEM_UNLOCK(dbenv);
01309                 if ((ret = __txn_getactive(dbenv, &active_lsn)) != 0)
01310                         return (ret);
01311                 LOG_SYSTEM_LOCK(dbenv);
01312                 active_lsn.offset = 0;
01313 
01314                 /* If we didn't make any progress, give up. */
01315                 if (log_compare(&active_lsn, &old_active_lsn) == 0) {
01316                         __db_err(dbenv,
01317       "In-memory log buffer is full (an active transaction spans the buffer)");
01318                         return (DB_LOG_BUFFER_FULL);
01319                 }
01320 
01321                 /* Make sure we're moving the region LSN forwards. */
01322                 if (log_compare(&active_lsn, &lp->active_lsn) > 0) {
01323                         lp->active_lsn = active_lsn;
01324                         (void)__log_inmem_lsnoff(dblp, &active_lsn,
01325                             &lp->a_off);
01326                 }
01327         }
01328 
01329         /*
01330          * Remove the first file if it is invalidated by this write.
01331          * Log records can't be bigger than a file, so we only need to
01332          * check the first file.
01333          */
01334         filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart);
01335         if (filestart != NULL &&
01336             RINGBUF_LEN(lp, lp->b_off, filestart->b_off) <= len) {
01337                 SH_TAILQ_REMOVE(&lp->logfiles, filestart,
01338                     links, __db_filestart);
01339                 SH_TAILQ_INSERT_HEAD(&lp->free_logfiles, filestart,
01340                     links, __db_filestart);
01341                 lp->f_lsn.file = filestart->file + 1;
01342         }
01343 
01344         return (0);
01345 }
01346 
01347 /*
01348  * __log_inmem_copyout --
01349  *      Copies the given number of bytes from the buffer -- no checking.
01350  *      Note: assumes that the region lock is held on entry.
01351  *
01352  * PUBLIC: void __log_inmem_copyout __P((DB_LOG *, size_t, void *, size_t));
01353  */
01354 void
01355 __log_inmem_copyout(dblp, offset, buf, size)
01356         DB_LOG *dblp;
01357         size_t offset;
01358         void *buf;
01359         size_t size;
01360 {
01361         LOG *lp;
01362         size_t nbytes;
01363 
01364         lp = (LOG *)dblp->reginfo.primary;
01365         nbytes = (offset + size < lp->buffer_size) ?
01366             size : lp->buffer_size - offset;
01367         memcpy(buf, dblp->bufp + offset, nbytes);
01368         if (nbytes < size)
01369                 memcpy((u_int8_t *)buf + nbytes, dblp->bufp, size - nbytes);
01370 }
01371 
01372 /*
01373  * __log_inmem_copyin --
01374  *      Copies the given number of bytes into the buffer -- no checking.
01375  *      Note: assumes that the region lock is held on entry.
01376  *
01377  * PUBLIC: void __log_inmem_copyin __P((DB_LOG *, size_t, void *, size_t));
01378  */
01379 void
01380 __log_inmem_copyin(dblp, offset, buf, size)
01381         DB_LOG *dblp;
01382         size_t offset;
01383         void *buf;
01384         size_t size;
01385 {
01386         LOG *lp;
01387         size_t nbytes;
01388 
01389         lp = (LOG *)dblp->reginfo.primary;
01390         nbytes = (offset + size < lp->buffer_size) ?
01391             size : lp->buffer_size - offset;
01392         memcpy(dblp->bufp + offset, buf, nbytes);
01393         if (nbytes < size)
01394                 memcpy(dblp->bufp, (u_int8_t *)buf + nbytes, size - nbytes);
01395 }

Generated on Sun Dec 25 12:14:40 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2