Berkeley DB 4.4.16: /home/huihoo/src/db/db-4.4.16/txn/txn

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1995, 1996
00009  *      The President and Fellows of Harvard University.  All rights reserved.
00010  *
00011  * This code is derived from software contributed to Berkeley by
00012  * Margo Seltzer.
00013  *
00014  * Redistribution and use in source and binary forms, with or without
00015  * modification, are permitted provided that the following conditions
00016  * are met:
00017  * 1. Redistributions of source code must retain the above copyright
00018  *    notice, this list of conditions and the following disclaimer.
00019  * 2. Redistributions in binary form must reproduce the above copyright
00020  *    notice, this list of conditions and the following disclaimer in the
00021  *    documentation and/or other materials provided with the distribution.
00022  * 3. Neither the name of the University nor the names of its contributors
00023  *    may be used to endorse or promote products derived from this software
00024  *    without specific prior written permission.
00025  *
00026  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00027  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00030  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00031  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00032  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00033  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00034  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00035  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00036  * SUCH DAMAGE.
00037  *
00038  * $Id: txn_chkpt.c,v 12.19 2005/10/20 18:57:13 bostic Exp $
00039  */
00040 
00041 #include "db_config.h"
00042 
00043 #ifndef NO_SYSTEM_INCLUDES
00044 #include <sys/types.h>
00045 #include <stdlib.h>
00046 
00047 #if TIME_WITH_SYS_TIME
00048 #include <sys/time.h>
00049 #include <time.h>
00050 #else
00051 #if HAVE_SYS_TIME_H
00052 #include <sys/time.h>
00053 #else
00054 #include <time.h>
00055 #endif
00056 #endif
00057 
00058 #include <string.h>
00059 #endif
00060 
00061 #include "db_int.h"
00062 #include "dbinc/db_shash.h"
00063 #include "dbinc/log.h"
00064 #include "dbinc/mp.h"
00065 #include "dbinc/txn.h"
00066 
00067 /*
00068  * __txn_checkpoint_pp --
00069  *      DB_ENV->txn_checkpoint pre/post processing.
00070  *
00071  * PUBLIC: int __txn_checkpoint_pp
00072  * PUBLIC:     __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t));
00073  */
00074 int
00075 __txn_checkpoint_pp(dbenv, kbytes, minutes, flags)
00076         DB_ENV *dbenv;
00077         u_int32_t kbytes, minutes, flags;
00078 {
00079         DB_THREAD_INFO *ip;
00080         int ret;
00081 
00082         PANIC_CHECK(dbenv);
00083         ENV_REQUIRES_CONFIG(dbenv,
00084             dbenv->tx_handle, "txn_checkpoint", DB_INIT_TXN);
00085 
00086         /*
00087          * On a replication client, all transactions are read-only; therefore,
00088          * a checkpoint is a null-op.
00089          *
00090          * We permit txn_checkpoint, instead of just rendering it illegal,
00091          * so that an application can just let a checkpoint thread continue
00092          * to operate as it gets promoted or demoted between being a
00093          * master and a client.
00094          */
00095         if (IS_REP_CLIENT(dbenv))
00096                 return (0);
00097 
00098         ENV_ENTER(dbenv, ip);
00099         REPLICATION_WRAP(dbenv,
00100             (__txn_checkpoint(dbenv, kbytes, minutes, flags)), ret);
00101         ENV_LEAVE(dbenv, ip);
00102         return (ret);
00103 }
00104 
00105 /*
00106  * __txn_checkpoint --
00107  *      DB_ENV->txn_checkpoint.
00108  *
00109  * PUBLIC: int __txn_checkpoint
00110  * PUBLIC:      __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t));
00111  */
00112 int
00113 __txn_checkpoint(dbenv, kbytes, minutes, flags)
00114         DB_ENV *dbenv;
00115         u_int32_t kbytes, minutes, flags;
00116 {
00117         DB_LSN ckp_lsn, last_ckp;
00118         DB_TXNMGR *mgr;
00119         DB_TXNREGION *region;
00120         REGENV *renv;
00121         REGINFO *infop;
00122         time_t last_ckp_time, now;
00123         u_int32_t bytes, gen, id, logflags, mbytes;
00124         int ret;
00125 
00126         ret = gen = 0;
00127         /*
00128          * A client will only call through here during recovery,
00129          * so just sync the Mpool and go home.
00130          */
00131         if (IS_REP_CLIENT(dbenv)) {
00132                 if (MPOOL_ON(dbenv) && (ret = __memp_sync(dbenv, NULL)) != 0) {
00133                         __db_err(dbenv,
00134                     "txn_checkpoint: failed to flush the buffer cache %s",
00135                             db_strerror(ret));
00136                         return (ret);
00137                 } else
00138                         return (0);
00139         }
00140 
00141         mgr = dbenv->tx_handle;
00142         region = mgr->reginfo.primary;
00143         infop = dbenv->reginfo;
00144         renv = infop->primary;
00145         /*
00146          * No mutex is needed as envid is read-only once it is set.
00147          */
00148         id = renv->envid;
00149 
00150         /*
00151          * The checkpoint LSN is an LSN such that all transactions begun before
00152          * it are complete.  Our first guess (corrected below based on the list
00153          * of active transactions) is the last-written LSN.
00154          */
00155         if ((ret = __log_current_lsn(dbenv, &ckp_lsn, &mbytes, &bytes)) != 0)
00156                 return (ret);
00157 
00158         if (!LF_ISSET(DB_FORCE)) {
00159                 /* Don't checkpoint a quiescent database. */
00160                 if (bytes == 0 && mbytes == 0)
00161                         return (0);
00162 
00163                 /*
00164                  * If either kbytes or minutes is non-zero, then only take the
00165                  * checkpoint if more than "minutes" minutes have passed or if
00166                  * more than "kbytes" of log data have been written since the
00167                  * last checkpoint.
00168                  */
00169                 if (kbytes != 0 &&
00170                     mbytes * 1024 + bytes / 1024 >= (u_int32_t)kbytes)
00171                         goto do_ckp;
00172 
00173                 if (minutes != 0) {
00174                         (void)time(&now);
00175 
00176                         TXN_SYSTEM_LOCK(dbenv);
00177                         last_ckp_time = region->time_ckp;
00178                         TXN_SYSTEM_UNLOCK(dbenv);
00179 
00180                         if (now - last_ckp_time >= (time_t)(minutes * 60))
00181                                 goto do_ckp;
00182                 }
00183 
00184                 /*
00185                  * If we checked time and data and didn't go to checkpoint,
00186                  * we're done.
00187                  */
00188                 if (minutes != 0 || kbytes != 0)
00189                         return (0);
00190         }
00191 
00192         /*
00193          * We must single thread checkpoints otherwise the chk_lsn may get out
00194          * of order.  We need to capture the start of the earliest currently
00195          * active transaction (chk_lsn) and then flush all buffers.  While
00196          * doing this we we could then be overtaken by another checkpoint that
00197          * sees a later chk_lsn but competes first.  An archive process could
00198          * then remove a log this checkpoint depends on.
00199          */
00200 do_ckp: MUTEX_LOCK(dbenv, region->mtx_ckp);
00201         if ((ret = __txn_getactive(dbenv, &ckp_lsn)) != 0)
00202                 goto err;
00203 
00204         if (MPOOL_ON(dbenv) && (ret = __memp_sync(dbenv, NULL)) != 0) {
00205                 __db_err(dbenv,
00206                     "txn_checkpoint: failed to flush the buffer cache %s",
00207                     db_strerror(ret));
00208                 goto err;
00209         }
00210 
00211         /*
00212          * Because we can't be a replication client here, and because
00213          * recovery (somewhat unusually) calls txn_checkpoint and expects
00214          * it to write a log message, LOGGING_ON is the correct macro here.
00215          */
00216         if (LOGGING_ON(dbenv)) {
00217                 TXN_SYSTEM_LOCK(dbenv);
00218                 last_ckp = region->last_ckp;
00219                 TXN_SYSTEM_UNLOCK(dbenv);
00220                 if (REP_ON(dbenv) && (ret = __rep_get_gen(dbenv, &gen)) != 0)
00221                         goto err;
00222 
00223                 /*
00224                  * Put out records for the open files before we log
00225                  * the checkpoint.  The records are certain to be at
00226                  * or after ckp_lsn, but before the checkpoint record
00227                  * itself, so they're sure to be included if we start
00228                  * recovery from the ckp_lsn contained in this
00229                  * checkpoint.
00230                  */
00231                 logflags = DB_LOG_PERM | DB_LOG_CHKPNT;
00232                 if (!IS_RECOVERING(dbenv))
00233                         logflags |= DB_FLUSH;
00234                 if ((ret = __dbreg_log_files(dbenv)) != 0 ||
00235                     (ret = __txn_ckp_log(dbenv, NULL, &ckp_lsn, logflags,
00236                     &ckp_lsn, &last_ckp, (int32_t)time(NULL), id, gen)) != 0) {
00237                         __db_err(dbenv,
00238                             "txn_checkpoint: log failed at LSN [%ld %ld] %s",
00239                             (long)ckp_lsn.file, (long)ckp_lsn.offset,
00240                             db_strerror(ret));
00241                         goto err;
00242                 }
00243 
00244                 if ((ret = __txn_updateckp(dbenv, &ckp_lsn)) != 0)
00245                         goto err;
00246         }
00247 
00248 err:    MUTEX_UNLOCK(dbenv, region->mtx_ckp);
00249         return (ret);
00250 }
00251 
00252 /*
00253  * __txn_getactive --
00254  *       Find the oldest active transaction and figure out its "begin" LSN.
00255  *       This is the lowest LSN we can checkpoint, since any record written
00256  *       after it may be involved in a transaction and may therefore need
00257  *       to be undone in the case of an abort.
00258  *
00259  *       We check both the file and offset for 0 since the lsn may be in
00260  *       transition.  If it is then we don't care about this txn because it
00261  *       must be starting after we set the initial value of lsnp in the caller.
00262  *       All txns must initalize their begin_lsn before writing to the log.
00263  *
00264  * PUBLIC: int __txn_getactive __P((DB_ENV *, DB_LSN *));
00265  */
00266 int
00267 __txn_getactive(dbenv, lsnp)
00268         DB_ENV *dbenv;
00269         DB_LSN *lsnp;
00270 {
00271         DB_TXNMGR *mgr;
00272         DB_TXNREGION *region;
00273         TXN_DETAIL *td;
00274 
00275         mgr = dbenv->tx_handle;
00276         region = mgr->reginfo.primary;
00277 
00278         TXN_SYSTEM_LOCK(dbenv);
00279         for (td = SH_TAILQ_FIRST(&region->active_txn, __txn_detail);
00280             td != NULL;
00281             td = SH_TAILQ_NEXT(td, links, __txn_detail))
00282                 if (td->begin_lsn.file != 0 &&
00283                     td->begin_lsn.offset != 0 &&
00284                     log_compare(&td->begin_lsn, lsnp) < 0)
00285                         *lsnp = td->begin_lsn;
00286         TXN_SYSTEM_UNLOCK(dbenv);
00287 
00288         return (0);
00289 }
00290 
00291 /*
00292  * __txn_getckp --
00293  *      Get the LSN of the last transaction checkpoint.
00294  *
00295  * PUBLIC: int __txn_getckp __P((DB_ENV *, DB_LSN *));
00296  */
00297 int
00298 __txn_getckp(dbenv, lsnp)
00299         DB_ENV *dbenv;
00300         DB_LSN *lsnp;
00301 {
00302         DB_LSN lsn;
00303         DB_TXNMGR *mgr;
00304         DB_TXNREGION *region;
00305 
00306         mgr = dbenv->tx_handle;
00307         region = mgr->reginfo.primary;
00308 
00309         TXN_SYSTEM_LOCK(dbenv);
00310         lsn = region->last_ckp;
00311         TXN_SYSTEM_UNLOCK(dbenv);
00312 
00313         if (IS_ZERO_LSN(lsn))
00314                 return (DB_NOTFOUND);
00315 
00316         *lsnp = lsn;
00317         return (0);
00318 }
00319 
00320 /*
00321  * __txn_updateckp --
00322  *      Update the last_ckp field in the transaction region.  This happens
00323  * at the end of a normal checkpoint and also when a replication client
00324  * receives a checkpoint record.
00325  *
00326  * PUBLIC: int __txn_updateckp __P((DB_ENV *, DB_LSN *));
00327  */
00328 int
00329 __txn_updateckp(dbenv, lsnp)
00330         DB_ENV *dbenv;
00331         DB_LSN *lsnp;
00332 {
00333         DB_TXNMGR *mgr;
00334         DB_TXNREGION *region;
00335 
00336         mgr = dbenv->tx_handle;
00337         region = mgr->reginfo.primary;
00338 
00339         /*
00340          * We want to make sure last_ckp only moves forward;  since we drop
00341          * locks above and in log_put, it's possible for two calls to
00342          * __txn_ckp_log to finish in a different order from how they were
00343          * called.
00344          */
00345         TXN_SYSTEM_LOCK(dbenv);
00346         if (log_compare(&region->last_ckp, lsnp) < 0) {
00347                 region->last_ckp = *lsnp;
00348                 (void)time(&region->time_ckp);
00349         }
00350         TXN_SYSTEM_UNLOCK(dbenv);
00351 
00352         return (0);
00353 }