Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

rep_verify.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 2004-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: rep_verify.c,v 12.21 2005/10/19 19:06:37 sue Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #if TIME_WITH_SYS_TIME
00014 #include <sys/time.h>
00015 #include <time.h>
00016 #else
00017 #if HAVE_SYS_TIME_H
00018 #include <sys/time.h>
00019 #else
00020 #include <time.h>
00021 #endif
00022 #endif
00023 
00024 #include <stdlib.h>
00025 #include <string.h>
00026 #endif
00027 
00028 #include "db_int.h"
00029 #include "dbinc/db_page.h"
00030 #include "dbinc/db_am.h"
00031 #include "dbinc/log.h"
00032 #include "dbinc/txn.h"
00033 
00034 static int __rep_dorecovery __P((DB_ENV *, DB_LSN *, DB_LSN *));
00035 
00036 /*
00037  * __rep_verify --
00038  *      Handle a REP_VERIFY message.
00039  *
00040  * PUBLIC: int __rep_verify __P((DB_ENV *, REP_CONTROL *, DBT *, int, time_t));
00041  */
00042 int
00043 __rep_verify(dbenv, rp, rec, eid, savetime)
00044         DB_ENV *dbenv;
00045         REP_CONTROL *rp;
00046         DBT *rec;
00047         int eid;
00048         time_t savetime;
00049 {
00050         DB_LOG *dblp;
00051         DB_LOGC *logc;
00052         DB_LSN lsn;
00053         DB_REP *db_rep;
00054         DBT mylog;
00055         LOG *lp;
00056         REP *rep;
00057         u_int32_t rectype;
00058         int match, ret, t_ret;
00059 
00060         ret = 0;
00061         db_rep = dbenv->rep_handle;
00062         rep = db_rep->region;
00063         dblp = dbenv->lg_handle;
00064         lp = dblp->reginfo.primary;
00065 
00066         if (IS_ZERO_LSN(lp->verify_lsn))
00067                 return (ret);
00068 
00069         if ((ret = __log_cursor(dbenv, &logc)) != 0)
00070                 return (ret);
00071         memset(&mylog, 0, sizeof(mylog));
00072         if ((ret = __log_c_get(logc, &rp->lsn, &mylog, DB_SET)) != 0)
00073                 goto err;;
00074         match = 0;
00075         memcpy(&rectype, mylog.data, sizeof(rectype));
00076         if (mylog.size == rec->size &&
00077             memcmp(mylog.data, rec->data, rec->size) == 0)
00078                 match = 1;
00079         /*
00080          * If we don't have a match, backup to the previous
00081          * identification record and try again.
00082          */
00083         if (match == 0) {
00084                 ZERO_LSN(lsn);
00085                 if ((ret = __rep_log_backup(logc, &lsn)) == 0) {
00086                         MUTEX_LOCK(dbenv, rep->mtx_clientdb);
00087                         lp->verify_lsn = lsn;
00088                         lp->rcvd_recs = 0;
00089                         lp->wait_recs = rep->request_gap;
00090                         MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
00091                         (void)__rep_send_message(dbenv, eid, REP_VERIFY_REQ,
00092                             &lsn, NULL, 0, DB_REP_ANYWHERE);
00093                 } else if (ret == DB_NOTFOUND) {
00094                         /*
00095                          * We've either run out of records because
00096                          * logs have been removed or we've rolled back
00097                          * all the way to the beginning.  In the latter
00098                          * we don't think these sites were ever part of
00099                          * the same environment and we'll say so.
00100                          * In the former, request internal backup.
00101                          */
00102                         if (rp->lsn.file == 1) {
00103                                 __db_err(dbenv,
00104                 "Client was never part of master's environment");
00105                                 ret = DB_REP_JOIN_FAILURE;
00106                         } else {
00107                                 rep->stat.st_outdated++;
00108 
00109                                 LOG_SYSTEM_LOCK(dbenv);
00110                                 lsn = lp->lsn;
00111                                 LOG_SYSTEM_UNLOCK(dbenv);
00112                                 REP_SYSTEM_LOCK(dbenv);
00113                                 F_CLR(rep, REP_F_RECOVER_VERIFY);
00114                                 if (FLD_ISSET(rep->config, REP_C_NOAUTOINIT))
00115                                         ret = DB_REP_JOIN_FAILURE;
00116                                 else {
00117                                         F_SET(rep, REP_F_RECOVER_UPDATE);
00118                                         ZERO_LSN(rep->first_lsn);
00119                                 }
00120                                 REP_SYSTEM_UNLOCK(dbenv);
00121                                 if (ret == 0)
00122                                         (void)__rep_send_message(dbenv,
00123                                             eid, REP_UPDATE_REQ, NULL,
00124                                             NULL, 0, DB_REP_ANYWHERE);
00125                         }
00126                 }
00127         } else
00128                 ret = __rep_verify_match(dbenv, &rp->lsn, savetime);
00129 
00130 err:    if ((t_ret = __log_c_close(logc)) != 0 && ret == 0)
00131                 ret = t_ret;
00132         return (ret);
00133 }
00134 
00135 /*
00136  * __rep_verify_fail --
00137  *      Handle a REP_VERIFY_FAIL message.
00138  *
00139  * PUBLIC: int __rep_verify_fail __P((DB_ENV *, REP_CONTROL *, int));
00140  */
00141 int
00142 __rep_verify_fail(dbenv, rp, eid)
00143         DB_ENV *dbenv;
00144         REP_CONTROL *rp;
00145         int eid;
00146 {
00147         DB_LOG *dblp;
00148         DB_REP *db_rep;
00149         LOG *lp;
00150         REP *rep;
00151         int ret;
00152 
00153         ret = 0;
00154         db_rep = dbenv->rep_handle;
00155         rep = db_rep->region;
00156         dblp = dbenv->lg_handle;
00157         lp = dblp->reginfo.primary;
00158 
00159         /*
00160          * If any recovery flags are set, but not VERIFY,
00161          * then we ignore this message.  We are already
00162          * in the middle of updating.
00163          */
00164         if (F_ISSET(rep, REP_F_RECOVER_MASK) &&
00165             !F_ISSET(rep, REP_F_RECOVER_VERIFY))
00166                 return (0);
00167         rep->stat.st_outdated++;
00168 
00169         MUTEX_LOCK(dbenv, rep->mtx_clientdb);
00170         REP_SYSTEM_LOCK(dbenv);
00171         /*
00172          * We don't want an old or delayed VERIFY_FAIL
00173          * message to throw us into internal initialization
00174          * when we shouldn't be.
00175          *
00176          * Only go into internal initialization if:
00177          * We are set for AUTOINIT mode.
00178          * We are in RECOVER_VERIFY and this LSN == verify_lsn.
00179          * We are not in any RECOVERY and we are expecting
00180          *    an LSN that no longer exists on the master.
00181          * Otherwise, ignore this message.
00182          */
00183         if (FLD_ISSET(rep->config, REP_C_NOAUTOINIT) &&
00184             ((F_ISSET(rep, REP_F_RECOVER_VERIFY) &&
00185             log_compare(&rp->lsn, &lp->verify_lsn) == 0) ||
00186             (F_ISSET(rep, REP_F_RECOVER_MASK) == 0 &&
00187             log_compare(&rp->lsn, &lp->ready_lsn) >= 0))) {
00188                 ret = DB_REP_JOIN_FAILURE;
00189                 goto unlock;
00190         }
00191         if (((F_ISSET(rep, REP_F_RECOVER_VERIFY)) &&
00192             log_compare(&rp->lsn, &lp->verify_lsn) == 0) ||
00193             (F_ISSET(rep, REP_F_RECOVER_MASK) == 0 &&
00194             log_compare(&rp->lsn, &lp->ready_lsn) >= 0)) {
00195                 F_CLR(rep, REP_F_RECOVER_VERIFY);
00196                 F_SET(rep, REP_F_RECOVER_UPDATE);
00197                 ZERO_LSN(rep->first_lsn);
00198                 lp->wait_recs = rep->request_gap;
00199                 REP_SYSTEM_UNLOCK(dbenv);
00200                 MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
00201                 (void)__rep_send_message(dbenv,
00202                     eid, REP_UPDATE_REQ, NULL, NULL, 0, 0);
00203         } else {
00204 unlock:         REP_SYSTEM_UNLOCK(dbenv);
00205                 MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
00206         }
00207         return (ret);
00208 }
00209 
00210 /*
00211  * __rep_verify_req --
00212  *      Handle a REP_VERIFY_REQ message.
00213  *
00214  * PUBLIC: int __rep_verify_req __P((DB_ENV *, REP_CONTROL *, int));
00215  */
00216 int
00217 __rep_verify_req(dbenv, rp, eid)
00218         DB_ENV *dbenv;
00219         REP_CONTROL *rp;
00220         int eid;
00221 {
00222         DB_LOGC *logc;
00223         DB_REP *db_rep;
00224         DBT *d, data_dbt;
00225         REP *rep;
00226         u_int32_t type;
00227         int old, ret;
00228 
00229         ret = 0;
00230         db_rep = dbenv->rep_handle;
00231         rep = db_rep->region;
00232 
00233         type = REP_VERIFY;
00234         if ((ret = __log_cursor(dbenv, &logc)) != 0)
00235                 return (ret);
00236         d = &data_dbt;
00237         memset(d, 0, sizeof(data_dbt));
00238         F_SET(logc, DB_LOG_SILENT_ERR);
00239         ret = __log_c_get(logc, &rp->lsn, d, DB_SET);
00240         /*
00241          * If the LSN was invalid, then we might get a not
00242          * found, we might get an EIO, we could get anything.
00243          * If we get a DB_NOTFOUND, then there is a chance that
00244          * the LSN comes before the first file present in which
00245          * case we need to return a fail so that the client can return
00246          * a DB_OUTDATED.
00247          *
00248          * If we're a client servicing this request and we get a
00249          * NOTFOUND, return it so the caller can rerequest from
00250          * a better source.
00251          */
00252         if (ret == DB_NOTFOUND) {
00253                 if (F_ISSET(rep, REP_F_CLIENT))
00254                         goto notfound;
00255                 else if (__log_is_outdated(dbenv, rp->lsn.file, &old) == 0 &&
00256                     old != 0)
00257                         type = REP_VERIFY_FAIL;
00258         }
00259 
00260         if (ret != 0)
00261                 d = NULL;
00262 
00263         (void)__rep_send_message(dbenv, eid, type, &rp->lsn, d, 0, 0);
00264 notfound:
00265         ret = __log_c_close(logc);
00266         return (ret);
00267 }
00268 
00269 static int
00270 __rep_dorecovery(dbenv, lsnp, trunclsnp)
00271         DB_ENV *dbenv;
00272         DB_LSN *lsnp, *trunclsnp;
00273 {
00274         DB_LSN lsn;
00275         DB_REP *db_rep;
00276         DBT mylog;
00277         DB_LOGC *logc;
00278         int ret, t_ret, update;
00279         u_int32_t rectype;
00280         __txn_regop_args *txnrec;
00281 
00282         db_rep = dbenv->rep_handle;
00283 
00284         /* Figure out if we are backing out any committed transactions. */
00285         if ((ret = __log_cursor(dbenv, &logc)) != 0)
00286                 return (ret);
00287 
00288         memset(&mylog, 0, sizeof(mylog));
00289         update = 0;
00290         while (update == 0 &&
00291             (ret = __log_c_get(logc, &lsn, &mylog, DB_PREV)) == 0 &&
00292             log_compare(&lsn, lsnp) > 0) {
00293                 memcpy(&rectype, mylog.data, sizeof(rectype));
00294                 if (rectype == DB___txn_regop) {
00295                         if ((ret =
00296                             __txn_regop_read(dbenv, mylog.data, &txnrec)) != 0)
00297                                 goto err;
00298                         if (txnrec->opcode != TXN_ABORT)
00299                                 update = 1;
00300                         __os_free(dbenv, txnrec);
00301                 }
00302         }
00303 
00304         /*
00305          * If we successfully run recovery, we've opened all the necessary
00306          * files.  We are guaranteed to be single-threaded here, so no mutex
00307          * is necessary.
00308          */
00309         if ((ret = __db_apprec(dbenv, lsnp, trunclsnp, update, 0)) == 0)
00310                 F_SET(db_rep, DBREP_OPENFILES);
00311 
00312 err:    if ((t_ret = __log_c_close(logc)) != 0 && ret == 0)
00313                 ret = t_ret;
00314 
00315         return (ret);
00316 }
00317 
00318 /*
00319  * __rep_verify_match --
00320  *      We have just received a matching log record during verification.
00321  * Figure out if we're going to need to run recovery. If so, wait until
00322  * everything else has exited the library.  If not, set up the world
00323  * correctly and move forward.
00324  *
00325  * PUBLIC: int __rep_verify_match __P((DB_ENV *, DB_LSN *, time_t));
00326  */
00327 int
00328 __rep_verify_match(dbenv, reclsnp, savetime)
00329         DB_ENV *dbenv;
00330         DB_LSN *reclsnp;
00331         time_t savetime;
00332 {
00333         DB_LOG *dblp;
00334         DB_LSN trunclsn;
00335         DB_REP *db_rep;
00336         LOG *lp;
00337         REGENV *renv;
00338         REGINFO *infop;
00339         REP *rep;
00340         int done, master, ret;
00341         u_int32_t unused;
00342 
00343         dblp = dbenv->lg_handle;
00344         db_rep = dbenv->rep_handle;
00345         rep = db_rep->region;
00346         lp = dblp->reginfo.primary;
00347         ret = 0;
00348         infop = dbenv->reginfo;
00349         renv = infop->primary;
00350 
00351         /*
00352          * Check if the savetime is different than our current time stamp.
00353          * If it is, then we're racing with another thread trying to recover
00354          * and we lost.  We must give up.
00355          */
00356         MUTEX_LOCK(dbenv, rep->mtx_clientdb);
00357         done = savetime != renv->rep_timestamp;
00358         if (done) {
00359                 MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
00360                 return (0);
00361         }
00362         ZERO_LSN(lp->verify_lsn);
00363         MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
00364 
00365         /*
00366          * Make sure the world hasn't changed while we tried to get
00367          * the lock.  If it hasn't then it's time for us to kick all
00368          * operations out of DB and run recovery.
00369          */
00370         REP_SYSTEM_LOCK(dbenv);
00371         if (!F_ISSET(rep, REP_F_RECOVER_LOG) &&
00372             (F_ISSET(rep, REP_F_READY) || rep->in_recovery != 0)) {
00373                 rep->stat.st_msgs_recover++;
00374                 goto errunlock;
00375         }
00376 
00377         if ((ret = __rep_lockout(dbenv, rep, 1)) != 0)
00378                 goto errunlock;
00379 
00380         /* OK, everyone is out, we can now run recovery. */
00381         REP_SYSTEM_UNLOCK(dbenv);
00382 
00383         if ((ret = __rep_dorecovery(dbenv, reclsnp, &trunclsn)) != 0) {
00384                 REP_SYSTEM_LOCK(dbenv);
00385                 rep->in_recovery = 0;
00386                 F_CLR(rep, REP_F_READY);
00387                 goto errunlock;
00388         }
00389 
00390         /*
00391          * The log has been truncated (either directly by us or by __db_apprec)
00392          * We want to make sure we're waiting for the LSN at the new end-of-log,
00393          * not some later point.
00394          */
00395         MUTEX_LOCK(dbenv, rep->mtx_clientdb);
00396         lp->ready_lsn = trunclsn;
00397         ZERO_LSN(lp->waiting_lsn);
00398         ZERO_LSN(lp->max_wait_lsn);
00399         lp->max_perm_lsn = *reclsnp;
00400         lp->wait_recs = 0;
00401         lp->rcvd_recs = 0;
00402         ZERO_LSN(lp->verify_lsn);
00403 
00404         /*
00405          * Discard any log records we have queued;  we're about to re-request
00406          * them, and can't trust the ones in the queue.  We need to set the
00407          * DB_AM_RECOVER bit in this handle, so that the operation doesn't
00408          * deadlock.
00409          */
00410         F_SET(db_rep->rep_db, DB_AM_RECOVER);
00411         MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
00412         ret = __db_truncate(db_rep->rep_db, NULL, &unused);
00413         MUTEX_LOCK(dbenv, rep->mtx_clientdb);
00414         F_CLR(db_rep->rep_db, DB_AM_RECOVER);
00415 
00416         REP_SYSTEM_LOCK(dbenv);
00417         rep->stat.st_log_queued = 0;
00418         rep->in_recovery = 0;
00419         F_CLR(rep, REP_F_NOARCHIVE | REP_F_RECOVER_MASK);
00420 
00421         if (ret != 0)
00422                 goto errunlock2;
00423 
00424         /*
00425          * If the master_id is invalid, this means that since
00426          * the last record was sent, somebody declared an
00427          * election and we may not have a master to request
00428          * things of.
00429          *
00430          * This is not an error;  when we find a new master,
00431          * we'll re-negotiate where the end of the log is and
00432          * try to bring ourselves up to date again anyway.
00433          *
00434          * !!!
00435          * We cannot assert the election flags though because
00436          * somebody may have declared an election and then
00437          * got an error, thus clearing the election flags
00438          * but we still have an invalid master_id.
00439          */
00440         master = rep->master_id;
00441         REP_SYSTEM_UNLOCK(dbenv);
00442         if (master == DB_EID_INVALID) {
00443                 MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
00444                 ret = 0;
00445         } else {
00446                 /*
00447                  * We're making an ALL_REQ.  But now that we've
00448                  * cleared the flags, we're likely receiving new
00449                  * log records from the master, resulting in a gap
00450                  * immediately.  So to avoid multiple data streams,
00451                  * set the wait_recs value high now to give the master
00452                  * a chance to start sending us these records before
00453                  * the gap code re-requests the same gap.  Wait_recs
00454                  * will get reset once we start receiving these
00455                  * records.
00456                  */
00457                 lp->wait_recs = rep->max_gap;
00458                 MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
00459                 (void)__rep_send_message(dbenv,
00460                     master, REP_ALL_REQ, reclsnp, NULL, 0, DB_REP_ANYWHERE);
00461         }
00462         if (0) {
00463 errunlock2:     MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
00464 errunlock:      REP_SYSTEM_UNLOCK(dbenv);
00465         }
00466         return (ret);
00467 }
00468 
00469 /*
00470  * __rep_log_backup --
00471  *
00472  * In the verify handshake, we walk backward looking for
00473  * identification records.  Those are the only record types
00474  * we verify and match on.
00475  *
00476  * PUBLIC: int __rep_log_backup __P((DB_LOGC *, DB_LSN *));
00477  */
00478 int
00479 __rep_log_backup(logc, lsn)
00480         DB_LOGC *logc;
00481         DB_LSN *lsn;
00482 {
00483         DBT mylog;
00484         u_int32_t rectype;
00485         int ret;
00486 
00487         ret = 0;
00488         memset(&mylog, 0, sizeof(mylog));
00489         while ((ret = __log_c_get(logc, lsn, &mylog, DB_PREV)) == 0) {
00490                 /*
00491                  * Look at the record type.  Only txn_regop and txn_ckp
00492                  * are interesting to us.
00493                  */
00494                 memcpy(&rectype, mylog.data, sizeof(rectype));
00495                 if (rectype == DB___txn_ckp || rectype == DB___txn_regop)
00496                         break;
00497         }
00498         return (ret);
00499 }

Generated on Sun Dec 25 12:14:45 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2