Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

log_get.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: log_get.c,v 12.16 2005/10/21 17:13:42 bostic Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 
00015 #include <string.h>
00016 #endif
00017 
00018 #include "db_int.h"
00019 #include "dbinc/crypto.h"
00020 #include "dbinc/db_page.h"
00021 #include "dbinc/hmac.h"
00022 #include "dbinc/log.h"
00023 #include "dbinc/hash.h"
00024 
00025 typedef enum { L_ALREADY, L_ACQUIRED, L_NONE } RLOCK;
00026 
00027 static int __log_c_close_pp __P((DB_LOGC *, u_int32_t));
00028 static int __log_c_get_pp __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t));
00029 static int __log_c_get_int __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t));
00030 static int __log_c_hdrchk __P((DB_LOGC *, DB_LSN *, HDR *, int *));
00031 static int __log_c_incursor __P((DB_LOGC *, DB_LSN *, HDR *, u_int8_t **));
00032 static int __log_c_inregion __P((DB_LOGC *,
00033                DB_LSN *, RLOCK *, DB_LSN *, HDR *, u_int8_t **, int *));
00034 static int __log_c_io __P((DB_LOGC *,
00035                u_int32_t, u_int32_t, void *, size_t *, int *));
00036 static int __log_c_ondisk __P((DB_LOGC *,
00037                DB_LSN *, DB_LSN *, u_int32_t, HDR *, u_int8_t **, int *));
00038 static int __log_c_set_maxrec __P((DB_LOGC *, char *));
00039 static int __log_c_shortread __P((DB_LOGC *, DB_LSN *, int));
00040 
00041 /*
00042  * __log_cursor_pp --
00043  *      DB_ENV->log_cursor
00044  *
00045  * PUBLIC: int __log_cursor_pp __P((DB_ENV *, DB_LOGC **, u_int32_t));
00046  */
00047 int
00048 __log_cursor_pp(dbenv, logcp, flags)
00049         DB_ENV *dbenv;
00050         DB_LOGC **logcp;
00051         u_int32_t flags;
00052 {
00053         DB_THREAD_INFO *ip;
00054         int ret;
00055 
00056         PANIC_CHECK(dbenv);
00057         ENV_REQUIRES_CONFIG(dbenv,
00058             dbenv->lg_handle, "DB_ENV->log_cursor", DB_INIT_LOG);
00059 
00060         /* Validate arguments. */
00061         if ((ret = __db_fchk(dbenv, "DB_ENV->log_cursor", flags, 0)) != 0)
00062                 return (ret);
00063 
00064         ENV_ENTER(dbenv, ip);
00065         REPLICATION_WRAP(dbenv, (__log_cursor(dbenv, logcp)), ret);
00066         ENV_LEAVE(dbenv, ip);
00067         return (ret);
00068 }
00069 
00070 /*
00071  * __log_cursor --
00072  *      Create a log cursor.
00073  *
00074  * PUBLIC: int __log_cursor __P((DB_ENV *, DB_LOGC **));
00075  */
00076 int
00077 __log_cursor(dbenv, logcp)
00078         DB_ENV *dbenv;
00079         DB_LOGC **logcp;
00080 {
00081         DB_LOGC *logc;
00082         int ret;
00083 
00084         *logcp = NULL;
00085 
00086         /* Allocate memory for the cursor. */
00087         if ((ret = __os_calloc(dbenv, 1, sizeof(DB_LOGC), &logc)) != 0)
00088                 return (ret);
00089 
00090         logc->bp_size = DB_LOGC_BUF_SIZE;
00091         /*
00092          * Set this to something positive.
00093          */
00094         logc->bp_maxrec = MEGABYTE;
00095         if ((ret = __os_malloc(dbenv, logc->bp_size, &logc->bp)) != 0) {
00096                 __os_free(dbenv, logc);
00097                 return (ret);
00098         }
00099 
00100         logc->dbenv = dbenv;
00101         logc->close = __log_c_close_pp;
00102         logc->get = __log_c_get_pp;
00103 
00104         *logcp = logc;
00105         return (0);
00106 }
00107 
00108 /*
00109  * __log_c_close_pp --
00110  *      DB_LOGC->close pre/post processing.
00111  */
00112 static int
00113 __log_c_close_pp(logc, flags)
00114         DB_LOGC *logc;
00115         u_int32_t flags;
00116 {
00117         DB_THREAD_INFO *ip;
00118         DB_ENV *dbenv;
00119         int ret;
00120 
00121         dbenv = logc->dbenv;
00122 
00123         PANIC_CHECK(dbenv);
00124         if ((ret = __db_fchk(dbenv, "DB_LOGC->close", flags, 0)) != 0)
00125                 return (ret);
00126 
00127         ENV_ENTER(dbenv, ip);
00128         REPLICATION_WRAP(dbenv, (__log_c_close(logc)), ret);
00129         ENV_LEAVE(dbenv, ip);
00130         return (ret);
00131 }
00132 
00133 /*
00134  * __log_c_close --
00135  *      DB_LOGC->close.
00136  *
00137  * PUBLIC: int __log_c_close __P((DB_LOGC *));
00138  */
00139 int
00140 __log_c_close(logc)
00141         DB_LOGC *logc;
00142 {
00143         DB_ENV *dbenv;
00144 
00145         dbenv = logc->dbenv;
00146 
00147         if (logc->c_fhp != NULL) {
00148                 (void)__os_closehandle(dbenv, logc->c_fhp);
00149                 logc->c_fhp = NULL;
00150         }
00151 
00152         if (logc->c_dbt.data != NULL)
00153                 __os_free(dbenv, logc->c_dbt.data);
00154 
00155         __os_free(dbenv, logc->bp);
00156         __os_free(dbenv, logc);
00157 
00158         return (0);
00159 }
00160 
00161 /*
00162  * __log_c_get_pp --
00163  *      DB_LOGC->get pre/post processing.
00164  */
00165 static int
00166 __log_c_get_pp(logc, alsn, dbt, flags)
00167         DB_LOGC *logc;
00168         DB_LSN *alsn;
00169         DBT *dbt;
00170         u_int32_t flags;
00171 {
00172         DB_ENV *dbenv;
00173         DB_THREAD_INFO *ip;
00174         int ret;
00175 
00176         dbenv = logc->dbenv;
00177 
00178         PANIC_CHECK(dbenv);
00179 
00180         /* Validate arguments. */
00181         switch (flags) {
00182         case DB_CURRENT:
00183         case DB_FIRST:
00184         case DB_LAST:
00185         case DB_NEXT:
00186         case DB_PREV:
00187                 break;
00188         case DB_SET:
00189                 if (IS_ZERO_LSN(*alsn)) {
00190                         __db_err(dbenv, "DB_LOGC->get: invalid LSN: %lu/%lu",
00191                             (u_long)alsn->file, (u_long)alsn->offset);
00192                         return (EINVAL);
00193                 }
00194                 break;
00195         default:
00196                 return (__db_ferr(dbenv, "DB_LOGC->get", 1));
00197         }
00198 
00199         ENV_ENTER(dbenv, ip);
00200         REPLICATION_WRAP(dbenv, (__log_c_get(logc, alsn, dbt, flags)), ret);
00201         ENV_LEAVE(dbenv, ip);
00202         return (ret);
00203 }
00204 
00205 /*
00206  * __log_c_get --
00207  *      DB_LOGC->get.
00208  *
00209  * PUBLIC: int __log_c_get __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t));
00210  */
00211 int
00212 __log_c_get(logc, alsn, dbt, flags)
00213         DB_LOGC *logc;
00214         DB_LSN *alsn;
00215         DBT *dbt;
00216         u_int32_t flags;
00217 {
00218         DB_ENV *dbenv;
00219         DB_LSN saved_lsn;
00220         int ret;
00221 
00222         dbenv = logc->dbenv;
00223 
00224         /*
00225          * On error, we take care not to overwrite the caller's LSN.  This
00226          * is because callers looking for the end of the log loop using the
00227          * DB_NEXT flag, and expect to take the last successful lsn out of
00228          * the passed-in structure after DB_LOGC->get fails with DB_NOTFOUND.
00229          *
00230          * !!!
00231          * This line is often flagged an uninitialized memory read during a
00232          * Purify or similar tool run, as the application didn't initialize
00233          * *alsn.  If the application isn't setting the DB_SET flag, there is
00234          * no reason it should have initialized *alsn, but we can't know that
00235          * and we want to make sure we never overwrite whatever the application
00236          * put in there.
00237          */
00238         saved_lsn = *alsn;
00239 
00240         /*
00241          * If we get one of the log's header records as a result of doing a
00242          * DB_FIRST, DB_NEXT, DB_LAST or DB_PREV, repeat the operation, log
00243          * file header records aren't useful to applications.
00244          */
00245         if ((ret = __log_c_get_int(logc, alsn, dbt, flags)) != 0) {
00246                 *alsn = saved_lsn;
00247                 return (ret);
00248         }
00249         if (alsn->offset == 0 && (flags == DB_FIRST ||
00250             flags == DB_NEXT || flags == DB_LAST || flags == DB_PREV)) {
00251                 switch (flags) {
00252                 case DB_FIRST:
00253                         flags = DB_NEXT;
00254                         break;
00255                 case DB_LAST:
00256                         flags = DB_PREV;
00257                         break;
00258                 case DB_NEXT:
00259                 case DB_PREV:
00260                 default:
00261                         break;
00262                 }
00263                 if (F_ISSET(dbt, DB_DBT_MALLOC)) {
00264                         __os_free(dbenv, dbt->data);
00265                         dbt->data = NULL;
00266                 }
00267                 if ((ret = __log_c_get_int(logc, alsn, dbt, flags)) != 0) {
00268                         *alsn = saved_lsn;
00269                         return (ret);
00270                 }
00271         }
00272 
00273         return (0);
00274 }
00275 
00276 /*
00277  * __log_c_get_int --
00278  *      Get a log record; internal version.
00279  */
00280 static int
00281 __log_c_get_int(logc, alsn, dbt, flags)
00282         DB_LOGC *logc;
00283         DB_LSN *alsn;
00284         DBT *dbt;
00285         u_int32_t flags;
00286 {
00287         DB_CIPHER *db_cipher;
00288         DB_ENV *dbenv;
00289         DB_LOG *dblp;
00290         DB_LSN last_lsn, nlsn;
00291         HDR hdr;
00292         LOG *lp;
00293         RLOCK rlock;
00294         logfile_validity status;
00295         u_int32_t cnt;
00296         u_int8_t *rp;
00297         int eof, is_hmac, need_cksum, ret;
00298 
00299         dbenv = logc->dbenv;
00300         db_cipher = dbenv->crypto_handle;
00301         dblp = dbenv->lg_handle;
00302         lp = dblp->reginfo.primary;
00303         is_hmac = 0;
00304 
00305         /*
00306          * We don't acquire the log region lock until we need it, and we
00307          * release it as soon as we're done.
00308          */
00309         rlock = F_ISSET(logc, DB_LOG_LOCKED) ? L_ALREADY : L_NONE;
00310 
00311         nlsn = logc->c_lsn;
00312         switch (flags) {
00313         case DB_NEXT:                           /* Next log record. */
00314                 if (!IS_ZERO_LSN(nlsn)) {
00315                         /* Increment the cursor by the cursor record size. */
00316                         nlsn.offset += logc->c_len;
00317                         break;
00318                 }
00319                 flags = DB_FIRST;
00320                 /* FALLTHROUGH */
00321         case DB_FIRST:                          /* First log record. */
00322                 /* Find the first log file. */
00323                 if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0)
00324                         goto err;
00325 
00326                 /*
00327                  * DB_LV_INCOMPLETE:
00328                  *      Theoretically, the log file we want could be created
00329                  *      but not yet written, the "first" log record must be
00330                  *      in the log buffer.
00331                  * DB_LV_NORMAL:
00332                  * DB_LV_OLD_READABLE:
00333                  *      We found a log file we can read.
00334                  * DB_LV_NONEXISTENT:
00335                  *      No log files exist, the "first" log record must be in
00336                  *      the log buffer.
00337                  * DB_LV_OLD_UNREADABLE:
00338                  *      No readable log files exist, we're at the cross-over
00339                  *      point between two versions.  The "first" log record
00340                  *      must be in the log buffer.
00341                  */
00342                 switch (status) {
00343                 case DB_LV_INCOMPLETE:
00344                         DB_ASSERT(lp->lsn.file == cnt);
00345                         /* FALLTHROUGH */
00346                 case DB_LV_NORMAL:
00347                 case DB_LV_OLD_READABLE:
00348                         nlsn.file = cnt;
00349                         break;
00350                 case DB_LV_NONEXISTENT:
00351                         nlsn.file = 1;
00352                         DB_ASSERT(lp->lsn.file == nlsn.file);
00353                         break;
00354                 case DB_LV_OLD_UNREADABLE:
00355                         nlsn.file = cnt + 1;
00356                         DB_ASSERT(lp->lsn.file == nlsn.file);
00357                         break;
00358                 }
00359                 nlsn.offset = 0;
00360                 break;
00361         case DB_CURRENT:                        /* Current log record. */
00362                 break;
00363         case DB_PREV:                           /* Previous log record. */
00364                 if (!IS_ZERO_LSN(nlsn)) {
00365                         /* If at start-of-file, move to the previous file. */
00366                         if (nlsn.offset == 0) {
00367                                 if (nlsn.file == 1) {
00368                                         ret = DB_NOTFOUND;
00369                                         goto err;
00370                                 }
00371                                 if ((!lp->db_log_inmemory &&
00372                                     (__log_valid(dblp, nlsn.file - 1, 0, NULL,
00373                                     0, &status) != 0 ||
00374                                     (status != DB_LV_NORMAL &&
00375                                     status != DB_LV_OLD_READABLE)))) {
00376                                         ret = DB_NOTFOUND;
00377                                         goto err;
00378                                 }
00379 
00380                                 --nlsn.file;
00381                         }
00382                         nlsn.offset = logc->c_prev;
00383                         break;
00384                 }
00385                 /* FALLTHROUGH */
00386         case DB_LAST:                           /* Last log record. */
00387                 if (rlock == L_NONE) {
00388                         rlock = L_ACQUIRED;
00389                         LOG_SYSTEM_LOCK(dbenv);
00390                 }
00391                 nlsn.file = lp->lsn.file;
00392                 nlsn.offset = lp->lsn.offset - lp->len;
00393                 break;
00394         case DB_SET:                            /* Set log record. */
00395                 nlsn = *alsn;
00396                 break;
00397         default:
00398                 DB_ASSERT(0);
00399                 ret = EINVAL;
00400                 goto err;
00401         }
00402 
00403         if (0) {                                /* Move to the next file. */
00404 next_file:      ++nlsn.file;
00405                 nlsn.offset = 0;
00406         }
00407 
00408         /*
00409          * The above switch statement should have set nlsn to the lsn of
00410          * the requested record.
00411          */
00412 
00413         if (CRYPTO_ON(dbenv)) {
00414                 hdr.size = HDR_CRYPTO_SZ;
00415                 is_hmac = 1;
00416         } else {
00417                 hdr.size = HDR_NORMAL_SZ;
00418                 is_hmac = 0;
00419         }
00420 
00421         /*
00422          * Check to see if the record is in the cursor's buffer -- if so,
00423          * we'll need to checksum it.
00424          */
00425         if ((ret = __log_c_incursor(logc, &nlsn, &hdr, &rp)) != 0)
00426                 goto err;
00427         if (rp != NULL)
00428                 goto cksum;
00429 
00430         /*
00431          * Look to see if we're moving backward in the log with the last record
00432          * coming from the disk -- it means the record can't be in the region's
00433          * buffer.  Else, check the region's buffer.
00434          *
00435          * If the record isn't in the region's buffer, then either logs are
00436          * in-memory, and we're done, or we're going to have to read the
00437          * record from disk.  We want to make a point of not reading past the
00438          * end of the logical log (after recovery, there may be data after the
00439          * end of the logical log, not to mention the log file may have been
00440          * pre-allocated).  So, zero out last_lsn, and initialize it inside
00441          * __log_c_inregion -- if it's still zero when we check it in
00442          * __log_c_ondisk, that's OK, it just means the logical end of the log
00443          * isn't an issue for this request.
00444          */
00445         ZERO_LSN(last_lsn);
00446         if (!F_ISSET(logc, DB_LOG_DISK) ||
00447             log_compare(&nlsn, &logc->c_lsn) > 0) {
00448                 F_CLR(logc, DB_LOG_DISK);
00449 
00450                 if ((ret = __log_c_inregion(logc,
00451                     &nlsn, &rlock, &last_lsn, &hdr, &rp, &need_cksum)) != 0)
00452                         goto err;
00453                 if (rp != NULL) {
00454                         /*
00455                          * If we read the entire record from the in-memory log
00456                          * buffer, we don't need to checksum it, nor do we need
00457                          * to worry about vtruncate issues.
00458                          */
00459                         if (need_cksum)
00460                                 goto cksum;
00461                         goto from_memory;
00462                 }
00463                 if (lp->db_log_inmemory)
00464                         goto nohdr;
00465         }
00466 
00467         /*
00468          * We have to read from an on-disk file to retrieve the record.
00469          * If we ever can't retrieve the record at offset 0, we're done,
00470          * return EOF/DB_NOTFOUND.
00471          *
00472          * Discard the region lock if we're still holding it, the on-disk
00473          * reading routines don't need it.
00474          */
00475         if (rlock == L_ACQUIRED) {
00476                 rlock = L_NONE;
00477                 LOG_SYSTEM_UNLOCK(dbenv);
00478         }
00479         if ((ret = __log_c_ondisk(
00480             logc, &nlsn, &last_lsn, flags, &hdr, &rp, &eof)) != 0)
00481                 goto err;
00482         if (eof) {
00483                 /*
00484                  * Only DB_NEXT automatically moves to the next file, and
00485                  * it only happens once.
00486                  */
00487                 if (flags != DB_NEXT || nlsn.offset == 0)
00488                         return (DB_NOTFOUND);
00489                 goto next_file;
00490         }
00491         F_SET(logc, DB_LOG_DISK);
00492 
00493 cksum:  /*
00494          * Discard the region lock if we're still holding it.  (The path to
00495          * get here is we acquired the region lock because of the caller's
00496          * flag argument, but we found the record in the in-memory or cursor
00497          * buffers.  Improbable, but it's easy to avoid.)
00498          */
00499         if (rlock == L_ACQUIRED) {
00500                 rlock = L_NONE;
00501                 LOG_SYSTEM_UNLOCK(dbenv);
00502         }
00503 
00504         /*
00505          * Checksum: there are two types of errors -- a configuration error
00506          * or a checksum mismatch.  The former is always bad.  The latter is
00507          * OK if we're searching for the end of the log, and very, very bad
00508          * if we're reading random log records.
00509          */
00510         if ((ret = __db_check_chksum(dbenv, db_cipher,
00511             hdr.chksum, rp + hdr.size, hdr.len - hdr.size, is_hmac)) != 0) {
00512                 if (F_ISSET(logc, DB_LOG_SILENT_ERR)) {
00513                         if (ret == 0 || ret == -1)
00514                                 ret = EIO;
00515                 } else if (ret == -1) {
00516                         __db_err(dbenv,
00517                     "DB_LOGC->get: log record LSN %lu/%lu: checksum mismatch",
00518                             (u_long)nlsn.file, (u_long)nlsn.offset);
00519                         __db_err(dbenv,
00520                     "DB_LOGC->get: catastrophic recovery may be required");
00521                         ret = __db_panic(dbenv, DB_RUNRECOVERY);
00522                 }
00523                 goto err;
00524         }
00525 
00526         /*
00527          * If we got a 0-length record, that means we're in the midst of
00528          * some bytes that got 0'd as the result of a vtruncate.  We're
00529          * going to have to retry.
00530          */
00531         if (hdr.len == 0) {
00532 nohdr:          switch (flags) {
00533                 case DB_FIRST:
00534                 case DB_NEXT:
00535                         /* Zero'd records always indicate the end of a file. */
00536                         goto next_file;
00537                 case DB_LAST:
00538                 case DB_PREV:
00539                         /*
00540                          * We should never get here.  If we recover a log
00541                          * file with 0's at the end, we'll treat the 0'd
00542                          * headers as the end of log and ignore them.  If
00543                          * we're reading backwards from another file, then
00544                          * the first record in that new file should have its
00545                          * prev field set correctly.
00546                          */
00547                         __db_err(dbenv,
00548                 "Encountered zero length records while traversing backwards");
00549                         DB_ASSERT(0);
00550                         ret = __db_panic(dbenv, DB_RUNRECOVERY);
00551                         goto err;
00552                 case DB_SET:
00553                 default:
00554                         /* Return the 0-length record. */
00555                         break;
00556                 }
00557         }
00558 
00559 from_memory:
00560         /*
00561          * Discard the region lock if we're still holding it.  (The path to
00562          * get here is we acquired the region lock because of the caller's
00563          * flag argument, but we found the record in the in-memory or cursor
00564          * buffers.  Improbable, but it's easy to avoid.)
00565          */
00566         if (rlock == L_ACQUIRED) {
00567                 rlock = L_NONE;
00568                 LOG_SYSTEM_UNLOCK(dbenv);
00569         }
00570 
00571         /* Copy the record into the user's DBT. */
00572         if ((ret = __db_retcopy(dbenv, dbt, rp + hdr.size,
00573             (u_int32_t)(hdr.len - hdr.size),
00574             &logc->c_dbt.data, &logc->c_dbt.ulen)) != 0)
00575                 goto err;
00576 
00577         if (CRYPTO_ON(dbenv)) {
00578                 if ((ret = db_cipher->decrypt(dbenv, db_cipher->data,
00579                     hdr.iv, dbt->data, hdr.len - hdr.size)) != 0) {
00580                         ret = EAGAIN;
00581                         goto err;
00582                 }
00583                 /*
00584                  * Return the original log record size to the user,
00585                  * even though we've allocated more than that, possibly.
00586                  * The log record is decrypted in the user dbt, not in
00587                  * the buffer, so we must do this here after decryption,
00588                  * not adjust the len passed to the __db_retcopy call.
00589                  */
00590                 dbt->size = hdr.orig_size;
00591         }
00592 
00593         /* Update the cursor and the returned LSN. */
00594         *alsn = nlsn;
00595         logc->c_lsn = nlsn;
00596         logc->c_len = hdr.len;
00597         logc->c_prev = hdr.prev;
00598 
00599 err:    if (rlock == L_ACQUIRED)
00600                 LOG_SYSTEM_UNLOCK(dbenv);
00601 
00602         return (ret);
00603 }
00604 
00605 /*
00606  * __log_c_incursor --
00607  *      Check to see if the requested record is in the cursor's buffer.
00608  */
00609 static int
00610 __log_c_incursor(logc, lsn, hdr, pp)
00611         DB_LOGC *logc;
00612         DB_LSN *lsn;
00613         HDR *hdr;
00614         u_int8_t **pp;
00615 {
00616         u_int8_t *p;
00617         int eof;
00618 
00619         *pp = NULL;
00620 
00621         /*
00622          * Test to see if the requested LSN could be part of the cursor's
00623          * buffer.
00624          *
00625          * The record must be part of the same file as the cursor's buffer.
00626          * The record must start at a byte offset equal to or greater than
00627          * the cursor buffer.
00628          * The record must not start at a byte offset after the cursor
00629          * buffer's end.
00630          */
00631         if (logc->bp_lsn.file != lsn->file)
00632                 return (0);
00633         if (logc->bp_lsn.offset > lsn->offset)
00634                 return (0);
00635         if (logc->bp_lsn.offset + logc->bp_rlen <= lsn->offset + hdr->size)
00636                 return (0);
00637 
00638         /*
00639          * Read the record's header and check if the record is entirely held
00640          * in the buffer.  If the record is not entirely held, get it again.
00641          * (The only advantage in having part of the record locally is that
00642          * we might avoid a system call because we already have the HDR in
00643          * memory.)
00644          *
00645          * If the header check fails for any reason, it must be because the
00646          * LSN is bogus.  Fail hard.
00647          */
00648         p = logc->bp + (lsn->offset - logc->bp_lsn.offset);
00649         memcpy(hdr, p, hdr->size);
00650         if (__log_c_hdrchk(logc, lsn, hdr, &eof))
00651                 return (DB_NOTFOUND);
00652         if (eof || logc->bp_lsn.offset + logc->bp_rlen < lsn->offset + hdr->len)
00653                 return (0);
00654 
00655         *pp = p;                                /* Success. */
00656 
00657         return (0);
00658 }
00659 
00660 /*
00661  * __log_c_inregion --
00662  *      Check to see if the requested record is in the region's buffer.
00663  */
00664 static int
00665 __log_c_inregion(logc, lsn, rlockp, last_lsn, hdr, pp, need_cksump)
00666         DB_LOGC *logc;
00667         DB_LSN *lsn, *last_lsn;
00668         RLOCK *rlockp;
00669         HDR *hdr;
00670         u_int8_t **pp;
00671         int *need_cksump;
00672 {
00673         DB_ENV *dbenv;
00674         DB_LOG *dblp;
00675         LOG *lp;
00676         size_t b_region, len, nr;
00677         u_int32_t b_disk;
00678         int eof, ret;
00679         u_int8_t *p;
00680 
00681         dbenv = logc->dbenv;
00682         dblp = dbenv->lg_handle;
00683         lp = ((DB_LOG *)logc->dbenv->lg_handle)->reginfo.primary;
00684 
00685         ret = 0;
00686         b_region = 0;
00687         *pp = NULL;
00688         *need_cksump = 0;
00689 
00690         /* If we haven't yet acquired the log region lock, do so. */
00691         if (*rlockp == L_NONE) {
00692                 *rlockp = L_ACQUIRED;
00693                 LOG_SYSTEM_LOCK(dbenv);
00694         }
00695 
00696         /*
00697          * The routines to read from disk must avoid reading past the logical
00698          * end of the log, so pass that information back to it.
00699          *
00700          * Since they're reading directly from the disk, they must also avoid
00701          * reading past the offset we've written out.  If the log was
00702          * truncated, it's possible that there are zeroes or garbage on
00703          * disk after this offset, and the logical end of the log can
00704          * come later than this point if the log buffer isn't empty.
00705          */
00706         *last_lsn = lp->lsn;
00707         if (!lp->db_log_inmemory && last_lsn->offset > lp->w_off)
00708                 last_lsn->offset = lp->w_off;
00709 
00710         /*
00711          * Test to see if the requested LSN could be part of the region's
00712          * buffer.
00713          *
00714          * During recovery, we read the log files getting the information to
00715          * initialize the region.  In that case, the region's lsn field will
00716          * not yet have been filled in, use only the disk.
00717          *
00718          * The record must not start at a byte offset after the region buffer's
00719          * end, since that means the request is for a record after the end of
00720          * the log.  Do this test even if the region's buffer is empty -- after
00721          * recovery, the log files may continue past the declared end-of-log,
00722          * and the disk reading routine will incorrectly attempt to read the
00723          * remainder of the log.
00724          *
00725          * Otherwise, test to see if the region's buffer actually has what we
00726          * want:
00727          *
00728          * The buffer must have some useful content.
00729          * The record must be in the same file as the region's buffer and must
00730          * start at a byte offset equal to or greater than the region's buffer.
00731          */
00732         if (IS_ZERO_LSN(lp->lsn))
00733                 return (0);
00734         if (log_compare(lsn, &lp->lsn) >= 0)
00735                 return (DB_NOTFOUND);
00736         else if (lp->db_log_inmemory) {
00737                 if ((ret = __log_inmem_lsnoff(dblp, lsn, &b_region)) != 0)
00738                         return (ret);
00739         } else if (lp->b_off == 0 || log_compare(lsn, &lp->f_lsn) < 0)
00740                 return (0);
00741 
00742         /*
00743          * The current contents of the cursor's buffer will be useless for a
00744          * future call, we're about to overwrite it -- trash it rather than
00745          * try and make it look correct.
00746          */
00747         logc->bp_rlen = 0;
00748 
00749         /*
00750          * If the requested LSN is greater than the region buffer's first
00751          * byte, we know the entire record is in the buffer on a good LSN.
00752          *
00753          * If we're given a bad LSN, the "entire" record might not be in
00754          * our buffer in order to fail at the chksum.  __log_c_hdrchk made
00755          * sure our dest buffer fits, via bp_maxrec, but we also need to
00756          * make sure we don't run off the end of this buffer, the src.
00757          *
00758          * There is one case where the header check can fail: on a scan through
00759          * in-memory logs, when we reach the end of a file we can read an empty
00760          * header.  In that case, it's safe to return zero, here: it will be
00761          * caught in our caller.  Otherwise, the LSN is bogus.  Fail hard.
00762          */
00763         if (lp->db_log_inmemory || log_compare(lsn, &lp->f_lsn) > 0) {
00764                 if (!lp->db_log_inmemory)
00765                         b_region = lsn->offset - lp->w_off;
00766                 __log_inmem_copyout(dblp, b_region, hdr, hdr->size);
00767                 if (__log_c_hdrchk(logc, lsn, hdr, &eof) != 0)
00768                         return (DB_NOTFOUND);
00769                 if (eof)
00770                         return (0);
00771                 if (lp->db_log_inmemory) {
00772                         if (RINGBUF_LEN(lp, b_region, lp->b_off) < hdr->len)
00773                                 return (DB_NOTFOUND);
00774                 } else if (lsn->offset + hdr->len > lp->w_off + lp->buffer_size)
00775                         return (DB_NOTFOUND);
00776                 if (logc->bp_size <= hdr->len) {
00777                         len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128);
00778                         if ((ret =
00779                             __os_realloc(logc->dbenv, len, &logc->bp)) != 0)
00780                                  return (ret);
00781                         logc->bp_size = (u_int32_t)len;
00782                 }
00783                 __log_inmem_copyout(dblp, b_region, logc->bp, hdr->len);
00784                 *pp = logc->bp;
00785                 return (0);
00786         }
00787 
00788         DB_ASSERT(!lp->db_log_inmemory);
00789 
00790         /*
00791          * There's a partial record, that is, the requested record starts
00792          * in a log file and finishes in the region buffer.  We have to
00793          * find out how many bytes of the record are in the region buffer
00794          * so we can copy them out into the cursor buffer.  First, check
00795          * to see if the requested record is the only record in the region
00796          * buffer, in which case we should copy the entire region buffer.
00797          *
00798          * Else, walk back through the region's buffer to find the first LSN
00799          * after the record that crosses the buffer boundary -- we can detect
00800          * that LSN, because its "prev" field will reference the record we
00801          * want.  The bytes we need to copy from the region buffer are the
00802          * bytes up to the record we find.  The bytes we'll need to allocate
00803          * to hold the log record are the bytes between the two offsets.
00804          */
00805         b_disk = lp->w_off - lsn->offset;
00806         if (lp->b_off <= lp->len)
00807                 b_region = (u_int32_t)lp->b_off;
00808         else
00809                 for (p = dblp->bufp + (lp->b_off - lp->len);;) {
00810                         memcpy(hdr, p, hdr->size);
00811                         if (hdr->prev == lsn->offset) {
00812                                 b_region = (u_int32_t)(p - dblp->bufp);
00813                                 break;
00814                         }
00815                         p = dblp->bufp + (hdr->prev - lp->w_off);
00816                 }
00817 
00818         /*
00819          * If we don't have enough room for the record, we have to allocate
00820          * space.  We have to do it while holding the region lock, which is
00821          * truly annoying, but there's no way around it.  This call is why
00822          * we allocate cursor buffer space when allocating the cursor instead
00823          * of waiting.
00824          */
00825         if (logc->bp_size <= b_region + b_disk) {
00826                 len = (size_t)DB_ALIGN((uintmax_t)(b_region + b_disk) * 2, 128);
00827                 if ((ret = __os_realloc(logc->dbenv, len, &logc->bp)) != 0)
00828                         return (ret);
00829                 logc->bp_size = (u_int32_t)len;
00830         }
00831 
00832         /* Copy the region's bytes to the end of the cursor's buffer. */
00833         p = (logc->bp + logc->bp_size) - b_region;
00834         memcpy(p, dblp->bufp, b_region);
00835 
00836         /* Release the region lock. */
00837         if (*rlockp == L_ACQUIRED) {
00838                 *rlockp = L_NONE;
00839                 LOG_SYSTEM_UNLOCK(dbenv);
00840         }
00841 
00842         /*
00843          * Read the rest of the information from disk.  Neither short reads
00844          * or EOF are acceptable, the bytes we want had better be there.
00845          */
00846         if (b_disk != 0) {
00847                 p -= b_disk;
00848                 nr = b_disk;
00849                 if ((ret = __log_c_io(
00850                     logc, lsn->file, lsn->offset, p, &nr, NULL)) != 0)
00851                         return (ret);
00852                 if (nr < b_disk)
00853                         return (__log_c_shortread(logc, lsn, 0));
00854 
00855                 /* We read bytes from the disk, we'll need to checksum them. */
00856                 *need_cksump = 1;
00857         }
00858 
00859         /* Copy the header information into the caller's structure. */
00860         memcpy(hdr, p, hdr->size);
00861 
00862         *pp = p;
00863         return (0);
00864 }
00865 
00866 /*
00867  * __log_c_ondisk --
00868  *      Read a record off disk.
00869  */
00870 static int
00871 __log_c_ondisk(logc, lsn, last_lsn, flags, hdr, pp, eofp)
00872         DB_LOGC *logc;
00873         DB_LSN *lsn, *last_lsn;
00874         u_int32_t flags;
00875         int *eofp;
00876         HDR *hdr;
00877         u_int8_t **pp;
00878 {
00879         DB_ENV *dbenv;
00880         size_t len, nr;
00881         u_int32_t offset;
00882         int ret;
00883 
00884         dbenv = logc->dbenv;
00885         *eofp = 0;
00886 
00887         nr = hdr->size;
00888         if ((ret =
00889             __log_c_io(logc, lsn->file, lsn->offset, hdr, &nr, eofp)) != 0)
00890                 return (ret);
00891         if (*eofp)
00892                 return (0);
00893 
00894         /*
00895          * If the read was successful, but we can't read a full header, assume
00896          * we've hit EOF.  We can't check that the header has been partially
00897          * zeroed out, but it's unlikely that this is caused by a write failure
00898          * since the header is written as a single write call and it's less
00899          * than sector.
00900          */
00901         if (nr < hdr->size) {
00902                 *eofp = 1;
00903                 return (0);
00904         }
00905 
00906         /* Check the HDR. */
00907         if ((ret = __log_c_hdrchk(logc, lsn, hdr, eofp)) != 0)
00908                 return (ret);
00909         if (*eofp)
00910                 return (0);
00911 
00912         /*
00913          * Regardless of how we return, the previous contents of the cursor's
00914          * buffer are useless -- trash it.
00915          */
00916         logc->bp_rlen = 0;
00917 
00918         /*
00919          * Otherwise, we now (finally!) know how big the record is.  (Maybe
00920          * we should have just stuck the length of the record into the LSN!?)
00921          * Make sure we have enough space.
00922          */
00923         if (logc->bp_size <= hdr->len) {
00924                 len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128);
00925                 if ((ret = __os_realloc(dbenv, len, &logc->bp)) != 0)
00926                         return (ret);
00927                 logc->bp_size = (u_int32_t)len;
00928         }
00929 
00930         /*
00931          * If we're moving forward in the log file, read this record in at the
00932          * beginning of the buffer.  Otherwise, read this record in at the end
00933          * of the buffer, making sure we don't try and read before the start
00934          * of the file.  (We prefer positioning at the end because transaction
00935          * aborts use DB_SET to move backward through the log and we might get
00936          * lucky.)
00937          *
00938          * Read a buffer's worth, without reading past the logical EOF.  The
00939          * last_lsn may be a zero LSN, but that's OK, the test works anyway.
00940          */
00941         if (flags == DB_FIRST || flags == DB_NEXT)
00942                 offset = lsn->offset;
00943         else if (lsn->offset + hdr->len < logc->bp_size)
00944                 offset = 0;
00945         else
00946                 offset = (lsn->offset + hdr->len) - logc->bp_size;
00947 
00948         nr = logc->bp_size;
00949         if (lsn->file == last_lsn->file && offset + nr >= last_lsn->offset)
00950                 nr = last_lsn->offset - offset;
00951 
00952         if ((ret =
00953             __log_c_io(logc, lsn->file, offset, logc->bp, &nr, eofp)) != 0)
00954                 return (ret);
00955 
00956         /*
00957          * We should have at least gotten the bytes up-to-and-including the
00958          * record we're reading.
00959          */
00960         if (nr < (lsn->offset + hdr->len) - offset)
00961                 return (__log_c_shortread(logc, lsn, 1));
00962 
00963         /*
00964          * Set up the return information.
00965          *
00966          * !!!
00967          * No need to set the bp_lsn.file field, __log_c_io set it for us.
00968          */
00969         logc->bp_rlen = (u_int32_t)nr;
00970         logc->bp_lsn.offset = offset;
00971 
00972         *pp = logc->bp + (lsn->offset - offset);
00973 
00974         return (0);
00975 }
00976 
00977 /*
00978  * __log_c_hdrchk --
00979  *
00980  * Check for corrupted HDRs before we use them to allocate memory or find
00981  * records.
00982  *
00983  * If the log files were pre-allocated, a zero-filled HDR structure is the
00984  * logical file end.  However, we can see buffers filled with 0's during
00985  * recovery, too (because multiple log buffers were written asynchronously,
00986  * and one made it to disk before a different one that logically precedes
00987  * it in the log file.
00988  *
00989  * Check for impossibly large records.  The malloc should fail later, but we
00990  * have customers that run mallocs that treat all allocation failures as fatal
00991  * errors.
00992  *
00993  * Note that none of this is necessarily something awful happening.  We let
00994  * the application hand us any LSN they want, and it could be a pointer into
00995  * the middle of a log record, there's no way to tell.
00996  */
00997 static int
00998 __log_c_hdrchk(logc, lsn, hdr, eofp)
00999         DB_LOGC *logc;
01000         DB_LSN *lsn;
01001         HDR *hdr;
01002         int *eofp;
01003 {
01004         DB_ENV *dbenv;
01005         int ret;
01006 
01007         dbenv = logc->dbenv;
01008 
01009         /*
01010          * Check EOF before we do any other processing.
01011          */
01012         if (eofp != NULL) {
01013                 if (hdr->prev == 0 && hdr->chksum[0] == 0 && hdr->len == 0) {
01014                         *eofp = 1;
01015                         return (0);
01016                 }
01017                 *eofp = 0;
01018         }
01019 
01020         /*
01021          * Sanity check the log record's size.
01022          * We must check it after "virtual" EOF above.
01023          */
01024         if (hdr->len <= hdr->size)
01025                 goto err;
01026 
01027         /*
01028          * If the cursor's max-record value isn't yet set, it means we aren't
01029          * reading these records from a log file and no check is necessary.
01030          */
01031         if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) {
01032                 /*
01033                  * If we fail the check, there's the pathological case that
01034                  * we're reading the last file, it's growing, and our initial
01035                  * check information was wrong.  Get it again, to be sure.
01036                  */
01037                 if ((ret = __log_c_set_maxrec(logc, NULL)) != 0) {
01038                         __db_err(dbenv, "DB_LOGC->get: %s", db_strerror(ret));
01039                         return (ret);
01040                 }
01041                 if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec)
01042                         goto err;
01043         }
01044         return (0);
01045 
01046 err:    if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
01047                 __db_err(dbenv,
01048                     "DB_LOGC->get: LSN %lu/%lu: invalid log record header",
01049                     (u_long)lsn->file, (u_long)lsn->offset);
01050         return (EIO);
01051 }
01052 
01053 /*
01054  * __log_c_io --
01055  *      Read records from a log file.
01056  */
01057 static int
01058 __log_c_io(logc, fnum, offset, p, nrp, eofp)
01059         DB_LOGC *logc;
01060         u_int32_t fnum, offset;
01061         void *p;
01062         size_t *nrp;
01063         int *eofp;
01064 {
01065         DB_ENV *dbenv;
01066         DB_LOG *dblp;
01067         LOG *lp;
01068         int ret;
01069         char *np;
01070 
01071         dbenv = logc->dbenv;
01072         dblp = dbenv->lg_handle;
01073         lp = dblp->reginfo.primary;
01074 
01075         /*
01076          * If we've switched files, discard the current file handle and acquire
01077          * a new one.
01078          */
01079         if (logc->c_fhp != NULL && logc->bp_lsn.file != fnum) {
01080                 ret = __os_closehandle(dbenv, logc->c_fhp);
01081                 logc->c_fhp = NULL;
01082                 logc->bp_lsn.file = 0;
01083 
01084                 if (ret != 0)
01085                         return (ret);
01086         }
01087         if (logc->c_fhp == NULL) {
01088                 if ((ret = __log_name(dblp, fnum,
01089                     &np, &logc->c_fhp, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) {
01090                         /*
01091                          * If we're allowed to return EOF, assume that's the
01092                          * problem, set the EOF status flag and return 0.
01093                          */
01094                         if (eofp != NULL) {
01095                                 *eofp = 1;
01096                                 ret = 0;
01097                         } else if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
01098                                 __db_err(dbenv, "DB_LOGC->get: %s: %s",
01099                                     np, db_strerror(ret));
01100                         __os_free(dbenv, np);
01101                         return (ret);
01102                 }
01103 
01104                 if ((ret = __log_c_set_maxrec(logc, np)) != 0) {
01105                         __db_err(dbenv,
01106                             "DB_LOGC->get: %s: %s", np, db_strerror(ret));
01107                         __os_free(dbenv, np);
01108                         return (ret);
01109                 }
01110                 __os_free(dbenv, np);
01111 
01112                 logc->bp_lsn.file = fnum;
01113         }
01114 
01115         /* Seek to the record's offset. */
01116         if ((ret = __os_seek(dbenv,
01117             logc->c_fhp, 0, 0, offset, 0, DB_OS_SEEK_SET)) != 0) {
01118                 if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
01119                         __db_err(dbenv,
01120                             "DB_LOGC->get: LSN: %lu/%lu: seek: %s",
01121                             (u_long)fnum, (u_long)offset, db_strerror(ret));
01122                 return (ret);
01123         }
01124 
01125         /* Read the data. */
01126         ++lp->stat.st_rcount;
01127         if ((ret = __os_read(dbenv, logc->c_fhp, p, *nrp, nrp)) != 0) {
01128                 if (!F_ISSET(logc, DB_LOG_SILENT_ERR))
01129                         __db_err(dbenv,
01130                             "DB_LOGC->get: LSN: %lu/%lu: read: %s",
01131                             (u_long)fnum, (u_long)offset, db_strerror(ret));
01132                 return (ret);
01133         }
01134 
01135         return (0);
01136 }
01137 
01138 /*
01139  * __log_c_shortread --
01140  *      Read was short -- return a consistent error message and error.
01141  */
01142 static int
01143 __log_c_shortread(logc, lsn, check_silent)
01144         DB_LOGC *logc;
01145         DB_LSN *lsn;
01146         int check_silent;
01147 {
01148         if (!check_silent || !F_ISSET(logc, DB_LOG_SILENT_ERR))
01149                 __db_err(logc->dbenv, "DB_LOGC->get: LSN: %lu/%lu: short read",
01150                     (u_long)lsn->file, (u_long)lsn->offset);
01151         return (EIO);
01152 }
01153 
01154 /*
01155  * __log_c_set_maxrec --
01156  *      Bound the maximum log record size in a log file.
01157  */
01158 static int
01159 __log_c_set_maxrec(logc, np)
01160         DB_LOGC *logc;
01161         char *np;
01162 {
01163         DB_ENV *dbenv;
01164         DB_LOG *dblp;
01165         LOG *lp;
01166         u_int32_t mbytes, bytes;
01167         int ret;
01168 
01169         dbenv = logc->dbenv;
01170         dblp = dbenv->lg_handle;
01171 
01172         /*
01173          * We don't want to try and allocate huge chunks of memory because
01174          * applications with error-checking malloc's often consider that a
01175          * hard failure.  If we're about to look at a corrupted record with
01176          * a bizarre size, we need to know before trying to allocate space
01177          * to hold it.  We could read the persistent data at the beginning
01178          * of the file but that's hard -- we may have to decrypt it, checksum
01179          * it and so on.  Stat the file instead.
01180          */
01181         if (logc->c_fhp != NULL) {
01182                 if ((ret = __os_ioinfo(dbenv, np, logc->c_fhp,
01183                     &mbytes, &bytes, NULL)) != 0)
01184                         return (ret);
01185                 if (logc->bp_maxrec < (mbytes * MEGABYTE + bytes))
01186                         logc->bp_maxrec = mbytes * MEGABYTE + bytes;
01187         }
01188 
01189         /*
01190          * If reading from the log file currently being written, we could get
01191          * an incorrect size, that is, if the cursor was opened on the file
01192          * when it had only a few hundred bytes, and then the cursor used to
01193          * move forward in the file, after more log records were written, the
01194          * original stat value would be wrong.  Use the maximum of the current
01195          * log file size and the size of the buffer -- that should represent
01196          * the max of any log record currently in the file.
01197          *
01198          * The log buffer size is set when the environment is opened and never
01199          * changed, we don't need a lock on it.
01200          */
01201         lp = dblp->reginfo.primary;
01202         if (logc->bp_maxrec < lp->buffer_size)
01203                 logc->bp_maxrec = lp->buffer_size;
01204 
01205         return (0);
01206 }
01207 
01208 #ifdef HAVE_REPLICATION
01209 /*
01210  * __log_rep_split --
01211  *      - Split a log buffer into individual records.
01212  *
01213  * This is used by a replication client to process a bulk log message from the
01214  * master and convert it into individual __rep_apply requests.
01215  *
01216  * PUBLIC: int __log_rep_split __P((DB_ENV *, REP_CONTROL *, DBT *, DB_LSN *));
01217  */
01218 int
01219 __log_rep_split(dbenv, rp, rec, ret_lsnp)
01220         DB_ENV *dbenv;
01221         REP_CONTROL *rp;
01222         DBT *rec;
01223         DB_LSN *ret_lsnp;
01224 {
01225         DB_LSN save_lsn, tmp_lsn;
01226         DB_REP *db_rep;
01227         DBT logrec;
01228         REP *rep;
01229         REP_CONTROL tmprp;
01230         u_int32_t len;
01231         int is_dup, is_perm, ret, save_ret;
01232         u_int8_t *p, *ep;
01233 #ifdef DIAGNOSTIC
01234         DB_MSGBUF mb;
01235 #endif
01236 
01237         memset(&logrec, 0, sizeof(logrec));
01238         memset(&save_lsn, 0, sizeof(save_lsn));
01239         memset(&tmp_lsn, 0, sizeof(tmp_lsn));
01240         /*
01241          * We're going to be modifying the rp LSN contents so make
01242          * our own private copy to play with.
01243          */
01244         memcpy(&tmprp, rp, sizeof(tmprp));
01245         /*
01246          * We send the bulk buffer on a PERM record, so often we will have
01247          * DB_LOG_PERM set.  However, we only want to mark the last LSN
01248          * we have as a PERM record.  So clear it here, and when we're on
01249          * the last record below, set it.
01250          */
01251         is_perm = F_ISSET(rp, DB_LOG_PERM);
01252         F_CLR(&tmprp, DB_LOG_PERM);
01253         ret = save_ret = 0;
01254         db_rep = dbenv->rep_handle;
01255         rep = db_rep->region;
01256         for (ep = (u_int8_t *)rec->data + rec->size, p = (u_int8_t *)rec->data;
01257             p < ep; ) {
01258                 /*
01259                  * First thing in the buffer is the length.  Then the LSN
01260                  * of this record, then the record itself.
01261                  */
01262                 /*
01263                  * XXX
01264                  * If/when we add architecture neutral log files we may want
01265                  * to send/receive these lengths in network byte order.
01266                  */
01267                 memcpy(&len, p, sizeof(len));
01268                 p += sizeof(len);
01269                 memcpy(&tmprp.lsn, p, sizeof(DB_LSN));
01270                 p += sizeof(DB_LSN);
01271                 logrec.data = p;
01272                 logrec.size = len;
01273                 RPRINT(dbenv, rep, (dbenv, &mb,
01274                     "log_rep_split: Processing LSN [%lu][%lu]",
01275                     (u_long)tmprp.lsn.file, (u_long)tmprp.lsn.offset));
01276                 RPRINT(dbenv, rep, (dbenv, &mb,
01277     "log_rep_split: p %#lx ep %#lx logrec data %#lx, size %lu (%#lx)",
01278                     P_TO_ULONG(p), P_TO_ULONG(ep), P_TO_ULONG(logrec.data),
01279                     (u_long)logrec.size, (u_long)logrec.size));
01280                 is_dup = 0;
01281                 p += len;
01282                 if (p >= ep && is_perm)
01283                         F_SET(&tmprp, DB_LOG_PERM);
01284                 ret = __rep_apply(dbenv, &tmprp, &logrec, &tmp_lsn, &is_dup);
01285                 RPRINT(dbenv, rep, (dbenv, &mb,
01286                     "log_split: rep_apply ret %d, tmp_lsn [%lu][%lu]",
01287                     ret, (u_long)tmp_lsn.file, (u_long)tmp_lsn.offset));
01288 #if 0
01289                 /*
01290                  * This buffer may be old and we've already gotten these
01291                  * records.  Short-circuit processing this buffer.
01292                  */
01293                 if (is_dup)
01294                         goto out;
01295 #endif
01296                 switch (ret) {
01297                 /*
01298                  * If we received the pieces we need for running recovery,
01299                  * short-circuit because recovery will truncate the log to
01300                  * the LSN we want anyway.
01301                  */
01302                 case DB_REP_LOGREADY:
01303                         goto out;
01304                 /*
01305                  * If we just handled a special record, retain that information.
01306                  */
01307                 case DB_REP_ISPERM:
01308                 case DB_REP_NOTPERM:
01309                 case DB_REP_STARTUPDONE:
01310                         save_ret = ret;
01311                         save_lsn = tmp_lsn;
01312                         ret = 0;
01313                         break;
01314                 /*
01315                  * Normal processing, do nothing, just continue.
01316                  */
01317                 case 0:
01318                         break;
01319                 /*
01320                  * If we get an error, then stop immediately.
01321                  */
01322                 default:
01323                         goto out;
01324                 }
01325         }
01326 out:
01327         /*
01328          * If we finish processing successfully, set our return values
01329          * based on what we saw.
01330          */
01331         if (ret == 0) {
01332                 ret = save_ret;
01333                 *ret_lsnp = save_lsn;
01334         }
01335         return (ret);
01336 }
01337 #endif

Generated on Sun Dec 25 12:14:40 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2