Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

env_recover.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: env_recover.c,v 12.10 2005/10/19 15:14:11 bostic Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef lint
00013 static const char copyright[] =
00014     "Copyright (c) 1996-2005\nSleepycat Software Inc.  All rights reserved.\n";
00015 #endif
00016 
00017 #ifndef NO_SYSTEM_INCLUDES
00018 #include <sys/types.h>
00019 
00020 #if TIME_WITH_SYS_TIME
00021 #include <sys/time.h>
00022 #include <time.h>
00023 #else
00024 #if HAVE_SYS_TIME_H
00025 #include <sys/time.h>
00026 #else
00027 #include <time.h>
00028 #endif
00029 #endif
00030 
00031 #include <string.h>
00032 #endif
00033 
00034 #include "db_int.h"
00035 #include "dbinc/db_page.h"
00036 #include "dbinc/db_shash.h"
00037 #include "dbinc/log.h"
00038 #include "dbinc/txn.h"
00039 #include "dbinc/mp.h"
00040 #include "dbinc/db_am.h"
00041 
00042 static int    __db_log_corrupt __P((DB_ENV *, DB_LSN *));
00043 static int    __log_earliest __P((DB_ENV *, DB_LOGC *, int32_t *, DB_LSN *));
00044 static double __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int));
00045 
00046 /*
00047  * __db_apprec --
00048  *      Perform recovery.  If max_lsn is non-NULL, then we are trying
00049  * to synchronize this system up with another system that has a max
00050  * LSN of max_lsn, so we need to roll back sufficiently far for that
00051  * to work.  See __log_backup for details.
00052  *
00053  * PUBLIC: int __db_apprec __P((DB_ENV *, DB_LSN *, DB_LSN *, int, u_int32_t));
00054  */
00055 int
00056 __db_apprec(dbenv, max_lsn, trunclsn, update, flags)
00057         DB_ENV *dbenv;
00058         DB_LSN *max_lsn, *trunclsn;
00059         int update;
00060         u_int32_t flags;
00061 {
00062         DBT data;
00063         DB_LOGC *logc;
00064         DB_LSN ckp_lsn, first_lsn, last_lsn, lowlsn, lsn, stop_lsn, tlsn;
00065         DB_TXNHEAD *txninfo;
00066         DB_TXNREGION *region;
00067         REGENV *renv;
00068         REGINFO *infop;
00069         __txn_ckp_args *ckp_args;
00070         time_t now, tlow;
00071         double nfiles;
00072         u_int32_t hi_txn, log_size, txnid;
00073         int32_t low;
00074         int have_rec, progress, ret, t_ret;
00075         char *p, *pass, t1[60], t2[60];
00076 
00077         COMPQUIET(nfiles, (double)0.001);
00078 
00079         logc = NULL;
00080         ckp_args = NULL;
00081         hi_txn = TXN_MAXIMUM;
00082         txninfo = NULL;
00083         pass = "initial";
00084         ZERO_LSN(lsn);
00085 
00086         /*
00087          * XXX
00088          * Get the log size.  No locking required because we're single-threaded
00089          * during recovery.
00090          */
00091         log_size =
00092            ((LOG *)(((DB_LOG *)dbenv->lg_handle)->reginfo.primary))->log_size;
00093 
00094         /*
00095          * If we need to, update the env handle timestamp.
00096          */
00097         if (update && REP_ON(dbenv)) {
00098                 infop = dbenv->reginfo;
00099                 renv = infop->primary;
00100                 (void)time(&renv->rep_timestamp);
00101         }
00102 
00103         /* Set in-recovery flags. */
00104         F_SET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
00105         region = ((DB_TXNMGR *)dbenv->tx_handle)->reginfo.primary;
00106         F_SET(region, TXN_IN_RECOVERY);
00107 
00108         /* Allocate a cursor for the log. */
00109         if ((ret = __log_cursor(dbenv, &logc)) != 0)
00110                 goto err;
00111 
00112         /*
00113          * If the user is specifying recovery to a particular point in time
00114          * or to a particular LSN, find the point to start recovery from.
00115          */
00116         ZERO_LSN(lowlsn);
00117         if (max_lsn != NULL) {
00118                 if ((ret = __log_backup(dbenv, logc, max_lsn, &lowlsn,
00119                     CKPLSN_CMP)) != 0)
00120                         goto err;
00121         } else if (dbenv->tx_timestamp != 0) {
00122                 if ((ret = __log_earliest(dbenv, logc, &low, &lowlsn)) != 0)
00123                         goto err;
00124                 if ((int32_t)dbenv->tx_timestamp < low) {
00125                         (void)snprintf(t1, sizeof(t1),
00126                             "%s", ctime(&dbenv->tx_timestamp));
00127                         if ((p = strchr(t1, '\n')) != NULL)
00128                                 *p = '\0';
00129                         tlow = (time_t)low;
00130                         (void)snprintf(t2, sizeof(t2), "%s", ctime(&tlow));
00131                         if ((p = strchr(t2, '\n')) != NULL)
00132                                 *p = '\0';
00133                         __db_err(dbenv,
00134                     "Invalid recovery timestamp %s; earliest time is %s",
00135                             t1, t2);
00136                         ret = EINVAL;
00137                         goto err;
00138                 }
00139         }
00140 
00141         /*
00142          * Recovery is done in three passes:
00143          * Pass #0:
00144          *      We need to find the position from which we will open files.
00145          *      We need to open files beginning with the earlier of the
00146          *      most recent checkpoint LSN and a checkpoint LSN before the
00147          *      recovery timestamp, if specified.  We need to be before the
00148          *      most recent checkpoint LSN because we are going to collect
00149          *      information about which transactions were begun before we
00150          *      start rolling forward.  Those that were should never be undone
00151          *      because queue cannot use LSNs to determine what operations can
00152          *      safely be aborted and it cannot rollback operations in
00153          *      transactions for which there may be records not processed
00154          *      during recovery.  We need to consider earlier points in time
00155          *      in case we are recovering to a particular timestamp.
00156          *
00157          * Pass #1:
00158          *      Read forward through the log from the position found in pass 0
00159          *      opening and closing files, and recording transactions for which
00160          *      we've seen their first record (the transaction's prev_lsn is
00161          *      0,0).  At the end of this pass, we know all transactions for
00162          *      which we've seen begins and we have the "current" set of files
00163          *      open.
00164          *
00165          * Pass #2:
00166          *      Read backward through the log undoing any uncompleted TXNs.
00167          *      There are four cases:
00168          *          1.  If doing catastrophic recovery, we read to the
00169          *              beginning of the log
00170          *          2.  If we are doing normal reovery, then we have to roll
00171          *              back to the most recent checkpoint LSN.
00172          *          3.  If we are recovering to a point in time, then we have
00173          *              to roll back to the checkpoint whose ckp_lsn is earlier
00174          *              than the specified time.  __log_earliest will figure
00175          *              this out for us.
00176          *          4.  If we are recovering back to a particular LSN, then
00177          *              we have to roll back to the checkpoint whose ckp_lsn
00178          *              is earlier than the max_lsn.  __log_backup will figure
00179          *              that out for us.
00180          *      In case 2, "uncompleted TXNs" include all those who committed
00181          *      after the user's specified timestamp.
00182          *
00183          * Pass #3:
00184          *      Read forward through the log from the LSN found in pass #2,
00185          *      redoing any committed TXNs (which committed after any user-
00186          *      specified rollback point).  During this pass, checkpoint
00187          *      file information is ignored, and file openings and closings
00188          *      are redone.
00189          *
00190          * ckp_lsn   -- lsn of the last checkpoint or the first in the log.
00191          * first_lsn -- the lsn where the forward passes begin.
00192          * last_lsn  -- the last lsn in the log, used for feedback
00193          * lowlsn    -- the lsn we are rolling back to, if we are recovering
00194          *              to a point in time.
00195          * lsn       -- temporary use lsn.
00196          * stop_lsn  -- the point at which forward roll should stop
00197          */
00198 
00199         /*
00200          * Find out the last lsn, so that we can estimate how far along we
00201          * are in recovery.  This will help us determine how much log there
00202          * is between the first LSN that we're going to be working with and
00203          * the last one.  We assume that each of the three phases takes the
00204          * same amount of time (a false assumption) and then use the %-age
00205          * of the amount of log traversed to figure out how much of the
00206          * pass we've accomplished.
00207          *
00208          * If we can't find any log records, we're kind of done.
00209          */
00210 #ifdef UMRW
00211         ZERO_LSN(last_lsn);
00212 #endif
00213         memset(&data, 0, sizeof(data));
00214         if ((ret = __log_c_get(logc, &last_lsn, &data, DB_LAST)) != 0) {
00215                 if (ret == DB_NOTFOUND)
00216                         ret = 0;
00217                 else
00218                         __db_err(dbenv, "Last log record not found");
00219                 goto err;
00220         }
00221 
00222         do {
00223                 /* txnid is after rectype, which is a u_int32. */
00224                 memcpy(&txnid,
00225                     (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
00226 
00227                 if (txnid != 0)
00228                         break;
00229         } while ((ret = __log_c_get(logc, &lsn, &data, DB_PREV)) == 0);
00230 
00231         /*
00232          * There are no transactions, so there is nothing to do unless
00233          * we're recovering to an LSN.  If we are, we need to proceed since
00234          * we'll still need to do a vtruncate based on information we haven't
00235          * yet collected.
00236          */
00237         if (ret == DB_NOTFOUND)
00238                 ret = 0;
00239         else if (ret != 0)
00240                 goto err;
00241 
00242         hi_txn = txnid;
00243 
00244         /*
00245          * Pass #0
00246          * Find the LSN from which we begin OPENFILES.
00247          *
00248          * If this is a catastrophic recovery, or if no checkpoint exists
00249          * in the log, the LSN is the first LSN in the log.
00250          *
00251          * Otherwise, it is the minimum of (1) the LSN in the last checkpoint
00252          * and (2) the LSN in the checkpoint before any specified recovery
00253          * timestamp or max_lsn.
00254          */
00255         /*
00256          * Get the first LSN in the log; it's an initial default
00257          * even if this is not a catastrophic recovery.
00258          */
00259         if ((ret = __log_c_get(logc, &ckp_lsn, &data, DB_FIRST)) != 0) {
00260                 if (ret == DB_NOTFOUND)
00261                         ret = 0;
00262                 else
00263                         __db_err(dbenv, "First log record not found");
00264                 goto err;
00265         }
00266         first_lsn = ckp_lsn;
00267         have_rec = 1;
00268 
00269         if (!LF_ISSET(DB_RECOVER_FATAL)) {
00270                 if ((ret = __txn_getckp(dbenv, &ckp_lsn)) == 0 &&
00271                     (ret = __log_c_get(logc, &ckp_lsn, &data, DB_SET)) == 0) {
00272                         /* We have a recent checkpoint.  This is LSN (1). */
00273                         if ((ret = __txn_ckp_read(dbenv,
00274                             data.data, &ckp_args)) != 0) {
00275                                 __db_err(dbenv,
00276                             "Invalid checkpoint record at [%ld][%ld]",
00277                                     (u_long)ckp_lsn.file,
00278                                     (u_long)ckp_lsn.offset);
00279                                 goto err;
00280                         }
00281                         first_lsn = ckp_args->ckp_lsn;
00282                         __os_free(dbenv, ckp_args);
00283                         have_rec = 0;
00284                 }
00285 
00286                 /*
00287                  * If LSN (2) exists, use it if it's before LSN (1).
00288                  * (If LSN (1) doesn't exist, first_lsn is the
00289                  * beginning of the log, so will "win" this check.)
00290                  *
00291                  * XXX
00292                  * In the recovery-to-a-timestamp case, lowlsn is chosen by
00293                  * __log_earliest, and is the checkpoint LSN of the
00294                  * *earliest* checkpoint in the unreclaimed log.  I
00295                  * (krinsky) believe that we could optimize this by looking
00296                  * instead for the LSN of the *latest* checkpoint before
00297                  * the timestamp of interest, but I'm not sure that this
00298                  * is worth doing right now.  (We have to look for lowlsn
00299                  * and low anyway, to make sure the requested timestamp is
00300                  * somewhere in the logs we have, and all that's required
00301                  * is that we pick *some* checkpoint after the beginning of
00302                  * the logs and before the timestamp.
00303                  */
00304                 if ((dbenv->tx_timestamp != 0 || max_lsn != NULL) &&
00305                     log_compare(&lowlsn, &first_lsn) < 0) {
00306                         DB_ASSERT(have_rec == 0);
00307                         first_lsn = lowlsn;
00308                 }
00309         }
00310 
00311         /* Get the record at first_lsn if we don't have it already. */
00312         if (!have_rec &&
00313             (ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0) {
00314                 __db_err(dbenv, "Checkpoint LSN record [%ld][%ld] not found",
00315                     (u_long)first_lsn.file, (u_long)first_lsn.offset);
00316                 goto err;
00317         }
00318 
00319         if (dbenv->db_feedback != NULL) {
00320                 if (last_lsn.file == first_lsn.file)
00321                         nfiles = (double)
00322                             (last_lsn.offset - first_lsn.offset) / log_size;
00323                 else
00324                         nfiles = (double)(last_lsn.file - first_lsn.file) +
00325                             (double)((log_size - first_lsn.offset) +
00326                             last_lsn.offset) / log_size;
00327                 /* We are going to divide by nfiles; make sure it isn't 0. */
00328                 if (nfiles < 0.001)
00329                         nfiles = 0.001;
00330         }
00331 
00332         /* Find a low txnid. */
00333         ret = 0;
00334         if (hi_txn != 0) do {
00335                 /* txnid is after rectype, which is a u_int32. */
00336                 memcpy(&txnid,
00337                     (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
00338 
00339                 if (txnid != 0)
00340                         break;
00341         } while ((ret = __log_c_get(logc, &lsn, &data, DB_NEXT)) == 0);
00342 
00343         /*
00344          * There are no transactions and we're not recovering to an LSN (see
00345          * above), so there is nothing to do.
00346          */
00347         if (ret == DB_NOTFOUND) {
00348                 if (log_compare(&lsn, &last_lsn) != 0)
00349                         ret = __db_log_corrupt(dbenv, &lsn);
00350                 else
00351                         ret = 0;
00352         }
00353 
00354         /* Reset to the first lsn. */
00355         if (ret != 0 ||
00356             (ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0)
00357                 goto err;
00358 
00359         /* Initialize the transaction list. */
00360         if ((ret =
00361             __db_txnlist_init(dbenv, txnid, hi_txn, max_lsn, &txninfo)) != 0)
00362                 goto err;
00363 
00364         /*
00365          * Pass #1
00366          * Run forward through the log starting at the first relevant lsn.
00367          */
00368         if ((ret = __env_openfiles(dbenv, logc,
00369             txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
00370                 goto err;
00371 
00372         /* If there were no transactions, then we can bail out early. */
00373         if (hi_txn == 0 && max_lsn == NULL)
00374                 goto done;
00375 
00376         /*
00377          * Pass #2.
00378          *
00379          * We used first_lsn to tell us how far back we need to recover,
00380          * use it here.
00381          */
00382 
00383         if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY))
00384                 __db_msg(dbenv, "Recovery starting from [%lu][%lu]",
00385                     (u_long)first_lsn.file, (u_long)first_lsn.offset);
00386 
00387         pass = "backward";
00388         for (ret = __log_c_get(logc, &lsn, &data, DB_LAST);
00389             ret == 0 && log_compare(&lsn, &first_lsn) >= 0;
00390             ret = __log_c_get(logc, &lsn, &data, DB_PREV)) {
00391                 if (dbenv->db_feedback != NULL) {
00392                         progress = 34 + (int)(33 * (__lsn_diff(&first_lsn,
00393                             &last_lsn, &lsn, log_size, 0) / nfiles));
00394                         dbenv->db_feedback(dbenv, DB_RECOVER, progress);
00395                 }
00396                 tlsn = lsn;
00397                 ret = __db_dispatch(dbenv, dbenv->recover_dtab,
00398                     dbenv->recover_dtab_size, &data, &tlsn,
00399                     DB_TXN_BACKWARD_ROLL, txninfo);
00400                 if (ret != 0) {
00401                         if (ret != DB_TXN_CKP)
00402                                 goto msgerr;
00403                         else
00404                                 ret = 0;
00405                 }
00406         }
00407         if (ret == DB_NOTFOUND) {
00408                 if (log_compare(&lsn, &first_lsn) > 0)
00409                         ret = __db_log_corrupt(dbenv, &lsn);
00410                 else
00411                         ret = 0;
00412         }
00413         if (ret != 0)
00414                 goto err;
00415 
00416         /*
00417          * Pass #3.  If we are recovering to a timestamp or to an LSN,
00418          * we need to make sure that we don't roll-forward beyond that
00419          * point because there may be non-transactional operations (e.g.,
00420          * closes that would fail).  The last_lsn variable is used for
00421          * feedback calculations, but use it to set an initial stopping
00422          * point for the forward pass, and then reset appropriately to
00423          * derive a real stop_lsn that tells how far the forward pass
00424          * should go.
00425          */
00426         pass = "forward";
00427         stop_lsn = last_lsn;
00428         if (max_lsn != NULL || dbenv->tx_timestamp != 0)
00429                 stop_lsn = ((DB_TXNHEAD *)txninfo)->maxlsn;
00430 
00431         for (ret = __log_c_get(logc, &lsn, &data, DB_NEXT);
00432             ret == 0; ret = __log_c_get(logc, &lsn, &data, DB_NEXT)) {
00433                 if (dbenv->db_feedback != NULL) {
00434                         progress = 67 + (int)(33 * (__lsn_diff(&first_lsn,
00435                             &last_lsn, &lsn, log_size, 1) / nfiles));
00436                         dbenv->db_feedback(dbenv, DB_RECOVER, progress);
00437                 }
00438                 tlsn = lsn;
00439                 ret = __db_dispatch(dbenv, dbenv->recover_dtab,
00440                     dbenv->recover_dtab_size, &data, &tlsn,
00441                     DB_TXN_FORWARD_ROLL, txninfo);
00442                 if (ret != 0) {
00443                         if (ret != DB_TXN_CKP)
00444                                 goto msgerr;
00445                         else
00446                                 ret = 0;
00447                 }
00448                 /*
00449                  * If we are recovering to a timestamp or an LSN,
00450                  * we need to make sure that we don't try to roll
00451                  * forward beyond the soon-to-be end of log.
00452                  */
00453                 if (log_compare(&lsn, &stop_lsn) >= 0)
00454                         break;
00455 
00456         }
00457         if (ret == DB_NOTFOUND)
00458                 ret = __db_log_corrupt(dbenv, &lsn);
00459         if (ret != 0)
00460                 goto err;
00461 
00462 #ifndef HAVE_FTRUNCATE
00463         /*
00464          * Process any pages that were on the limbo list and move them to
00465          * the free list.  Do this before checkpointing the database.
00466          */
00467         if ((ret = __db_do_the_limbo(dbenv, NULL, NULL, txninfo,
00468               dbenv->tx_timestamp != 0 ? LIMBO_TIMESTAMP : LIMBO_RECOVER)) != 0)
00469                 goto err;
00470 #endif
00471 
00472         if (max_lsn == NULL)
00473                 region->last_txnid = ((DB_TXNHEAD *)txninfo)->maxid;
00474 
00475         if (dbenv->tx_timestamp != 0) {
00476                 /* We are going to truncate, so we'd best close the cursor. */
00477                 if (logc != NULL && (ret = __log_c_close(logc)) != 0)
00478                         goto err;
00479                 logc = NULL;
00480                 /* Flush everything to disk, we are losing the log. */
00481                 if ((ret = __memp_sync(dbenv, NULL)) != 0)
00482                         goto err;
00483                 region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
00484                 if ((ret = __log_vtruncate(dbenv,
00485                     &((DB_TXNHEAD *)txninfo)->maxlsn,
00486                     &((DB_TXNHEAD *)txninfo)->ckplsn, trunclsn)) != 0)
00487                         goto err;
00488 
00489 #ifndef HAVE_FTRUNCATE
00490                 /*
00491                  * Generate logging compensation records.
00492                  * If we crash during/after vtruncate we may have
00493                  * pages missing from the free list since they
00494                  * if we roll things further back from here.
00495                  * These pages are only known in memory at this pont.
00496                  */
00497                  if ((ret = __db_do_the_limbo(dbenv,
00498                        NULL, NULL, txninfo, LIMBO_COMPENSATE)) != 0)
00499                         goto err;
00500 #endif
00501         }
00502 
00503 done:
00504         /* Take a checkpoint here to force any dirty data pages to disk. */
00505         if ((ret = __txn_checkpoint(dbenv, 0, 0, DB_FORCE)) != 0)
00506                 goto err;
00507 
00508         /* Close all the db files that are open. */
00509         if ((ret = __dbreg_close_files(dbenv)) != 0)
00510                 goto err;
00511 
00512         if (max_lsn != NULL) {
00513                 if (!IS_ZERO_LSN(((DB_TXNHEAD *)txninfo)->ckplsn))
00514                         region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
00515                 else if ((ret =
00516                     __txn_findlastckp(dbenv, &region->last_ckp, max_lsn)) != 0)
00517                         goto err;
00518 
00519                 /* We are going to truncate, so we'd best close the cursor. */
00520                 if (logc != NULL && (ret = __log_c_close(logc)) != 0)
00521                         goto err;
00522                 if ((ret = __log_vtruncate(dbenv,
00523                     max_lsn, &((DB_TXNHEAD *)txninfo)->ckplsn, trunclsn)) != 0)
00524                         goto err;
00525 
00526                 /*
00527                  * Now we need to open files that should be open in order for
00528                  * client processing to continue.  However, since we've
00529                  * truncated the log, we need to recompute from where the
00530                  * openfiles pass should begin.
00531                  */
00532                 if ((ret = __log_cursor(dbenv, &logc)) != 0)
00533                         goto err;
00534                 if ((ret =
00535                     __log_c_get(logc, &first_lsn, &data, DB_FIRST)) != 0) {
00536                         if (ret == DB_NOTFOUND)
00537                                 ret = 0;
00538                         else
00539                                 __db_err(dbenv, "First log record not found");
00540                         goto err;
00541                 }
00542                 if ((ret = __txn_getckp(dbenv, &first_lsn)) == 0 &&
00543                     (ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) == 0) {
00544                         /* We have a recent checkpoint.  This is LSN (1). */
00545                         if ((ret = __txn_ckp_read(dbenv,
00546                             data.data, &ckp_args)) != 0) {
00547                                 __db_err(dbenv,
00548                             "Invalid checkpoint record at [%ld][%ld]",
00549                                     (u_long)first_lsn.file,
00550                                     (u_long)first_lsn.offset);
00551                                 goto err;
00552                         }
00553                         first_lsn = ckp_args->ckp_lsn;
00554                         __os_free(dbenv, ckp_args);
00555                 }
00556                 if ((ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0)
00557                         goto err;
00558                 if ((ret = __env_openfiles(dbenv, logc,
00559                     txninfo, &data, &first_lsn, NULL, nfiles, 1)) != 0)
00560                         goto err;
00561         } else if (region->stat.st_nrestores == 0) {
00562                 /*
00563                  * If there are no prepared transactions that need resolution,
00564                  * we need to reset the transaction ID space and log this fact.
00565                  */
00566                 if ((ret = __txn_reset(dbenv)) != 0)
00567                         goto err;
00568         } else {
00569                 /*
00570                  * If we have restored prepared txns then they are in process
00571                  * as far as replication is concerned.
00572                  */
00573                 if (REP_ON(dbenv))
00574                         ((DB_REP *)dbenv->rep_handle)->region->op_cnt =
00575                             region->stat.st_nrestores;
00576                 if ((ret = __txn_recycle_id(dbenv)) != 0)
00577                         goto err;
00578         }
00579 
00580         if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) {
00581                 (void)time(&now);
00582                 __db_msg(dbenv, "Recovery complete at %.24s", ctime(&now));
00583                 __db_msg(dbenv, "%s %lx %s [%lu][%lu]",
00584                     "Maximum transaction ID",
00585                     (u_long)(txninfo == NULL ?
00586                         TXN_MINIMUM : ((DB_TXNHEAD *)txninfo)->maxid),
00587                     "Recovery checkpoint",
00588                     (u_long)region->last_ckp.file,
00589                     (u_long)region->last_ckp.offset);
00590         }
00591 
00592         if (0) {
00593 msgerr:         __db_err(dbenv,
00594                     "Recovery function for LSN %lu %lu failed on %s pass",
00595                     (u_long)lsn.file, (u_long)lsn.offset, pass);
00596         }
00597 
00598 err:    if (logc != NULL && (t_ret = __log_c_close(logc)) != 0 && ret == 0)
00599                 ret = t_ret;
00600 
00601         if (txninfo != NULL)
00602                 __db_txnlist_end(dbenv, txninfo);
00603 
00604         dbenv->tx_timestamp = 0;
00605 
00606         F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
00607         F_CLR(region, TXN_IN_RECOVERY);
00608 
00609         return (ret);
00610 }
00611 
00612 /*
00613  * Figure out how many logfiles we have processed.  If we are moving
00614  * forward (is_forward != 0), then we're computing current - low.  If
00615  * we are moving backward, we are computing high - current.  max is
00616  * the number of bytes per logfile.
00617  */
00618 static double
00619 __lsn_diff(low, high, current, max, is_forward)
00620         DB_LSN *low, *high, *current;
00621         u_int32_t max;
00622         int is_forward;
00623 {
00624         double nf;
00625 
00626         /*
00627          * There are three cases in each direction.  If you are in the
00628          * same file, then all you need worry about is the difference in
00629          * offsets.  If you are in different files, then either your offsets
00630          * put you either more or less than the integral difference in the
00631          * number of files -- we need to handle both of these.
00632          */
00633         if (is_forward) {
00634                 if (current->file == low->file)
00635                         nf = (double)(current->offset - low->offset) / max;
00636                 else if (current->offset < low->offset)
00637                         nf = (double)((current->file - low->file) - 1) +
00638                             (double)((max - low->offset) + current->offset) /
00639                             max;
00640                 else
00641                         nf = (double)(current->file - low->file) +
00642                             (double)(current->offset - low->offset) / max;
00643         } else {
00644                 if (current->file == high->file)
00645                         nf = (double)(high->offset - current->offset) / max;
00646                 else if (current->offset > high->offset)
00647                         nf = (double)((high->file - current->file) - 1) +
00648                             (double)
00649                             ((max - current->offset) + high->offset) / max;
00650                 else
00651                         nf = (double)(high->file - current->file) +
00652                             (double)(high->offset - current->offset) / max;
00653         }
00654         return (nf);
00655 }
00656 
00657 /*
00658  * __log_backup --
00659  *
00660  * This is used to find the earliest log record to process when a client
00661  * is trying to sync up with a master whose max LSN is less than this
00662  * client's max lsn; we want to roll back everything after that.
00663  * Also used in the verify phase to walk back via checkpoints.
00664  *
00665  * Find the latest checkpoint whose ckp_lsn is less than the max lsn.
00666  * PUBLIC: int    __log_backup __P((DB_ENV *, DB_LOGC *, DB_LSN *,
00667  * PUBLIC:    DB_LSN *, u_int32_t));
00668  */
00669 int
00670 __log_backup(dbenv, logc, max_lsn, start_lsn, cmp)
00671         DB_ENV *dbenv;
00672         DB_LOGC *logc;
00673         DB_LSN *max_lsn, *start_lsn;
00674         u_int32_t cmp;
00675 {
00676         DB_LSN cmp_lsn, lsn;
00677         DBT data;
00678         __txn_ckp_args *ckp_args;
00679         int lcmp, ret;
00680 
00681         memset(&data, 0, sizeof(data));
00682         ckp_args = NULL;
00683 
00684         if (cmp != CKPLSN_CMP && cmp != LASTCKP_CMP)
00685                 return (EINVAL);
00686 
00687         if ((ret = __txn_getckp(dbenv, &lsn)) != 0)
00688                 goto err;
00689         /*
00690          * Cmp tells us whether to check the ckp_lsn or the last_ckp
00691          * fields in the checkpoint record.
00692          */
00693         while ((ret = __log_c_get(logc, &lsn, &data, DB_SET)) == 0) {
00694                 if ((ret = __txn_ckp_read(dbenv, data.data, &ckp_args)) != 0)
00695                         return (ret);
00696                 if (cmp == CKPLSN_CMP) {
00697                         /*
00698                          * Follow checkpoints through the log until
00699                          * we find one with a ckp_lsn less than
00700                          * or equal max_lsn.
00701                          */
00702                         cmp_lsn = ckp_args->ckp_lsn;
00703                         lcmp = (log_compare(&cmp_lsn, max_lsn) <= 0);
00704                 } else {
00705                         /*
00706                          * When we're walking back through the checkpoints
00707                          * we want the LSN of this checkpoint strictly less
00708                          * than the max_lsn (also a ckp LSN).
00709                          */
00710                         cmp_lsn = lsn;
00711                         lcmp = (log_compare(&cmp_lsn, max_lsn) < 0);
00712                 }
00713                 if (lcmp) {
00714                         *start_lsn = cmp_lsn;
00715                         break;
00716                 }
00717 
00718                 lsn = ckp_args->last_ckp;
00719                 /*
00720                  * If there are no more checkpoints behind us, we're
00721                  * done.  Break with DB_NOTFOUND.
00722                  */
00723                 if (IS_ZERO_LSN(lsn)) {
00724                         ret = DB_NOTFOUND;
00725                         break;
00726                 }
00727                 __os_free(dbenv, ckp_args);
00728         }
00729 
00730         if (ckp_args != NULL)
00731                 __os_free(dbenv, ckp_args);
00732         /*
00733          * For CKPLSN_CMP if we walked back through all the checkpoints,
00734          * set the cursor on the first log record.  For LASTCKP_CMP
00735          * we want to return 0,0 in start_lsn.
00736          */
00737 err:    if (IS_ZERO_LSN(*start_lsn) && cmp == CKPLSN_CMP &&
00738             (ret == 0 || ret == DB_NOTFOUND))
00739                 ret = __log_c_get(logc, start_lsn, &data, DB_FIRST);
00740         return (ret);
00741 }
00742 
00743 /*
00744  * __log_earliest --
00745  *
00746  * Return the earliest recovery point for the log files present.  The
00747  * earliest recovery time is the time stamp of the first checkpoint record
00748  * whose checkpoint LSN is greater than the first LSN we process.
00749  */
00750 static int
00751 __log_earliest(dbenv, logc, lowtime, lowlsn)
00752         DB_ENV *dbenv;
00753         DB_LOGC *logc;
00754         int32_t *lowtime;
00755         DB_LSN *lowlsn;
00756 {
00757         DB_LSN first_lsn, lsn;
00758         DBT data;
00759         __txn_ckp_args *ckpargs;
00760         u_int32_t rectype;
00761         int cmp, ret;
00762 
00763         memset(&data, 0, sizeof(data));
00764         /*
00765          * Read forward through the log looking for the first checkpoint
00766          * record whose ckp_lsn is greater than first_lsn.
00767          */
00768 
00769         for (ret = __log_c_get(logc, &first_lsn, &data, DB_FIRST);
00770             ret == 0; ret = __log_c_get(logc, &lsn, &data, DB_NEXT)) {
00771                 memcpy(&rectype, data.data, sizeof(rectype));
00772                 if (rectype != DB___txn_ckp)
00773                         continue;
00774                 if ((ret = __txn_ckp_read(dbenv, data.data, &ckpargs)) == 0) {
00775                         cmp = log_compare(&ckpargs->ckp_lsn, &first_lsn);
00776                         *lowlsn = ckpargs->ckp_lsn;
00777                         *lowtime = ckpargs->timestamp;
00778 
00779                         __os_free(dbenv, ckpargs);
00780                         if (cmp >= 0)
00781                                 break;
00782                 }
00783         }
00784 
00785         return (ret);
00786 }
00787 
00788 /*
00789  * __env_openfiles --
00790  * Perform the pass of recovery that opens files.  This is used
00791  * both during regular recovery and an initial call to txn_recover (since
00792  * we need files open in order to abort prepared, but not yet committed
00793  * transactions).
00794  *
00795  * See the comments in db_apprec for a detailed description of the
00796  * various recovery passes.
00797  *
00798  * If we are not doing feedback processing (i.e., we are doing txn_recover
00799  * processing and in_recovery is zero), then last_lsn can be NULL.
00800  *
00801  * PUBLIC: int __env_openfiles __P((DB_ENV *, DB_LOGC *,
00802  * PUBLIC:     void *, DBT *, DB_LSN *, DB_LSN *, double, int));
00803  */
00804 int
00805 __env_openfiles(dbenv, logc, txninfo,
00806     data, open_lsn, last_lsn, nfiles, in_recovery)
00807         DB_ENV *dbenv;
00808         DB_LOGC *logc;
00809         void *txninfo;
00810         DBT *data;
00811         DB_LSN *open_lsn, *last_lsn;
00812         int in_recovery;
00813         double nfiles;
00814 {
00815         DB_LSN lsn, tlsn;
00816         u_int32_t log_size;
00817         int progress, ret;
00818 
00819         /*
00820          * XXX
00821          * Get the log size.  No locking required because we're single-threaded
00822          * during recovery.
00823          */
00824         log_size =
00825            ((LOG *)(((DB_LOG *)dbenv->lg_handle)->reginfo.primary))->log_size;
00826 
00827         lsn = *open_lsn;
00828         for (;;) {
00829                 if (in_recovery && dbenv->db_feedback != NULL) {
00830                         DB_ASSERT(last_lsn != NULL);
00831                         progress = (int)(33 * (__lsn_diff(open_lsn,
00832                            last_lsn, &lsn, log_size, 1) / nfiles));
00833                         dbenv->db_feedback(dbenv, DB_RECOVER, progress);
00834                 }
00835                 tlsn = lsn;
00836                 ret = __db_dispatch(dbenv,
00837                     dbenv->recover_dtab, dbenv->recover_dtab_size, data, &tlsn,
00838                     in_recovery ? DB_TXN_OPENFILES : DB_TXN_POPENFILES,
00839                     txninfo);
00840                 if (ret != 0 && ret != DB_TXN_CKP) {
00841                         __db_err(dbenv,
00842                             "Recovery function for LSN %lu %lu failed",
00843                             (u_long)lsn.file, (u_long)lsn.offset);
00844                         break;
00845                 }
00846                 if ((ret = __log_c_get(logc, &lsn, data, DB_NEXT)) != 0) {
00847                         if (ret == DB_NOTFOUND) {
00848                                 if (last_lsn != NULL &&
00849                                    log_compare(&lsn, last_lsn) != 0)
00850                                         ret = __db_log_corrupt(dbenv, &lsn);
00851                                 else
00852                                         ret = 0;
00853                         }
00854                         break;
00855                 }
00856         }
00857 
00858         return (ret);
00859 }
00860 
00861 static int
00862 __db_log_corrupt(dbenv, lsnp)
00863         DB_ENV *dbenv;
00864         DB_LSN *lsnp;
00865 {
00866         __db_err(dbenv, "Log file corrupt at LSN: [%lu][%lu]",
00867              (u_long)lsnp->file, (u_long)lsnp->offset);
00868         return (EINVAL);
00869 }

Generated on Sun Dec 25 12:14:24 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2