Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

log_put.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: log_put.c,v 12.22 2005/10/31 02:22:30 bostic Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 
00015 #if TIME_WITH_SYS_TIME
00016 #include <sys/time.h>
00017 #include <time.h>
00018 #else
00019 #if HAVE_SYS_TIME_H
00020 #include <sys/time.h>
00021 #else
00022 #include <time.h>
00023 #endif
00024 #endif
00025 
00026 #include <stdio.h>
00027 #include <string.h>
00028 #endif
00029 
00030 #include "db_int.h"
00031 #include "dbinc/crypto.h"
00032 #include "dbinc/hmac.h"
00033 #include "dbinc/log.h"
00034 #include "dbinc/txn.h"
00035 
00036 static int __log_encrypt_record __P((DB_ENV *, DBT *, HDR *, u_int32_t));
00037 static int __log_file __P((DB_ENV *, const DB_LSN *, char *, size_t));
00038 static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t));
00039 static int __log_flush_commit __P((DB_ENV *, const DB_LSN *, u_int32_t));
00040 static int __log_newfh __P((DB_LOG *, int));
00041 static int __log_put_next __P((DB_ENV *,
00042     DB_LSN *, const DBT *, HDR *, DB_LSN *));
00043 static int __log_putr __P((DB_LOG *,
00044     DB_LSN *, const DBT *, u_int32_t, HDR *));
00045 static int __log_write __P((DB_LOG *, void *, u_int32_t));
00046 
00047 /*
00048  * __log_put_pp --
00049  *      DB_ENV->log_put pre/post processing.
00050  *
00051  * PUBLIC: int __log_put_pp __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t));
00052  */
00053 int
00054 __log_put_pp(dbenv, lsnp, udbt, flags)
00055         DB_ENV *dbenv;
00056         DB_LSN *lsnp;
00057         const DBT *udbt;
00058         u_int32_t flags;
00059 {
00060         DB_THREAD_INFO *ip;
00061         int ret;
00062 
00063         PANIC_CHECK(dbenv);
00064         ENV_REQUIRES_CONFIG(dbenv,
00065             dbenv->lg_handle, "DB_ENV->log_put", DB_INIT_LOG);
00066 
00067         /* Validate arguments: check for allowed flags. */
00068         if ((ret = __db_fchk(dbenv, "DB_ENV->log_put", flags,
00069             DB_LOG_CHKPNT | DB_LOG_COMMIT |
00070             DB_FLUSH | DB_LOG_NOCOPY | DB_LOG_PERM | DB_LOG_WRNOSYNC)) != 0)
00071                 return (ret);
00072 
00073         /* DB_LOG_WRNOSYNC and DB_FLUSH are mutually exclusive. */
00074         if (LF_ISSET(DB_LOG_WRNOSYNC) && LF_ISSET(DB_FLUSH))
00075                 return (__db_ferr(dbenv, "DB_ENV->log_put", 1));
00076 
00077         /* Replication clients should never write log records. */
00078         if (IS_REP_CLIENT(dbenv)) {
00079                 __db_err(dbenv,
00080                     "DB_ENV->log_put is illegal on replication clients");
00081                 return (EINVAL);
00082         }
00083 
00084         ENV_ENTER(dbenv, ip);
00085         REPLICATION_WRAP(dbenv, (__log_put(dbenv, lsnp, udbt, flags)), ret);
00086         ENV_LEAVE(dbenv, ip);
00087         return (ret);
00088 }
00089 
00090 /*
00091  * __log_put --
00092  *      DB_ENV->log_put.
00093  *
00094  * PUBLIC: int __log_put __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t));
00095  */
00096 int
00097 __log_put(dbenv, lsnp, udbt, flags)
00098         DB_ENV *dbenv;
00099         DB_LSN *lsnp;
00100         const DBT *udbt;
00101         u_int32_t flags;
00102 {
00103         DB_CIPHER *db_cipher;
00104         DBT *dbt, t;
00105         DB_LOG *dblp;
00106         DB_LSN lsn, old_lsn;
00107         DB_REP *db_rep;
00108         HDR hdr;
00109         LOG *lp;
00110         REP *rep;
00111         REP_BULK bulk;
00112         int lock_held, need_free, ret;
00113         u_int8_t *key;
00114 
00115         dblp = dbenv->lg_handle;
00116         lp = dblp->reginfo.primary;
00117         db_cipher = dbenv->crypto_handle;
00118         db_rep = dbenv->rep_handle;
00119         if (db_rep != NULL)
00120                 rep = db_rep->region;
00121         else
00122                 rep = NULL;
00123 
00124         dbt = &t;
00125         t = *udbt;
00126         lock_held = need_free = 0;
00127         ZERO_LSN(old_lsn);
00128 
00129         /*
00130          * If we are not a rep application, but are sharing a master rep env,
00131          * we should not be writing log records.
00132          */
00133         if (IS_REP_MASTER(dbenv) && dbenv->rep_send == NULL) {
00134                 __db_err(dbenv, "%s %s",
00135                     "Non-replication DB_ENV handle attempting",
00136                     "to modify a replicated environment");
00137                 return (EINVAL);
00138         }
00139 
00140         /*
00141          * If we are coming from the logging code, we use an internal flag,
00142          * DB_LOG_NOCOPY, because we know we can overwrite/encrypt the log
00143          * record in place.  Otherwise, if a user called log_put then we
00144          * must copy it to new memory so that we know we can write it.
00145          *
00146          * We also must copy it to new memory if we are a replication master
00147          * so that we retain an unencrypted copy of the log record to send
00148          * to clients.
00149          */
00150         if (!LF_ISSET(DB_LOG_NOCOPY) || IS_REP_MASTER(dbenv)) {
00151                 if (CRYPTO_ON(dbenv))
00152                         t.size += db_cipher->adj_size(udbt->size);
00153                 if ((ret = __os_calloc(dbenv, 1, t.size, &t.data)) != 0)
00154                         goto err;
00155                 need_free = 1;
00156                 memcpy(t.data, udbt->data, udbt->size);
00157         }
00158         if ((ret = __log_encrypt_record(dbenv, dbt, &hdr, udbt->size)) != 0)
00159                 goto err;
00160         if (CRYPTO_ON(dbenv))
00161                 key = db_cipher->mac_key;
00162         else
00163                 key = NULL;
00164 
00165         /* Before we grab the region lock, calculate the record's checksum. */
00166         __db_chksum(dbt->data, dbt->size, key, hdr.chksum);
00167 
00168         LOG_SYSTEM_LOCK(dbenv);
00169         lock_held = 1;
00170 
00171         if ((ret = __log_put_next(dbenv, &lsn, dbt, &hdr, &old_lsn)) != 0)
00172                 goto panic_check;
00173 
00174         /*
00175          * Assign the return LSN before dropping the region lock.  Necessary
00176          * in case the lsn is a begin_lsn from a TXN_DETAIL structure passed
00177          * in by the logging routines.
00178          */
00179         *lsnp = lsn;
00180 
00181         if (IS_REP_MASTER(dbenv)) {
00182                 /*
00183                  * Replication masters need to drop the lock to send messages,
00184                  * but want to drop and reacquire it a minimal number of times.
00185                  */
00186                 LOG_SYSTEM_UNLOCK(dbenv);
00187                 lock_held = 0;
00188 
00189                 /*
00190                  * If we changed files and we're in a replicated environment,
00191                  * we need to inform our clients now that we've dropped the
00192                  * region lock.
00193                  *
00194                  * Note that a failed NEWFILE send is a dropped message that
00195                  * our client can handle, so we can ignore it.  It's possible
00196                  * that the record we already put is a commit, so we don't just
00197                  * want to return failure.
00198                  */
00199                 if (!IS_ZERO_LSN(old_lsn))
00200                         (void)__rep_send_message(dbenv, DB_EID_BROADCAST,
00201                             REP_NEWFILE, &old_lsn, NULL, 0, 0);
00202 
00203                 /*
00204                  * If we're doing bulk processing put it in the bulk buffer.
00205                  */
00206                 ret = 0;
00207                 if (FLD_ISSET(rep->config, REP_C_BULK)) {
00208                         /*
00209                          * Bulk could have been turned on by another process.
00210                          * If so, set the address into the bulk region now.
00211                          */
00212                         if (db_rep->bulk == NULL)
00213                                 db_rep->bulk = R_ADDR(&dblp->reginfo,
00214                                     lp->bulk_buf);
00215                         memset(&bulk, 0, sizeof(bulk));
00216                         bulk.addr = db_rep->bulk;
00217                         bulk.offp = &lp->bulk_off;
00218                         bulk.len = lp->bulk_len;
00219                         bulk.type = REP_BULK_LOG;
00220                         bulk.eid = DB_EID_BROADCAST;
00221                         bulk.flagsp = &lp->bulk_flags;
00222                         ret = __rep_bulk_message(dbenv, &bulk, NULL,
00223                             &lsn, udbt, flags);
00224                 }
00225                 if (!FLD_ISSET(rep->config, REP_C_BULK) ||
00226                     ret == DB_REP_BULKOVF) {
00227                         /*
00228                          * Then send the log record itself on to our clients.
00229                          */
00230                         /*
00231                          * !!!
00232                          * In the crypto case, we MUST send the udbt, not the
00233                          * now-encrypted dbt.  Clients have no way to decrypt
00234                          * without the header.
00235                          */
00236                         ret = __rep_send_message(dbenv, DB_EID_BROADCAST,
00237                             REP_LOG, &lsn, udbt, flags, 0);
00238                 }
00239                 /*
00240                  * If the send fails and we're a commit or checkpoint,
00241                  * there's nothing we can do;  the record's in the log.
00242                  * Flush it, even if we're running with TXN_NOSYNC,
00243                  * on the grounds that it should be in durable
00244                  * form somewhere.
00245                  */
00246                 if (ret != 0 && LF_ISSET(DB_LOG_PERM))
00247                         LF_SET(DB_FLUSH);
00248         }
00249 
00250         /*
00251          * If needed, do a flush.  Note that failures at this point
00252          * are only permissible if we know we haven't written a commit
00253          * record;  __log_flush_commit is responsible for enforcing this.
00254          *
00255          * If a flush is not needed, see if WRITE_NOSYNC was set and we
00256          * need to write out the log buffer.
00257          */
00258         if (LF_ISSET(DB_FLUSH | DB_LOG_WRNOSYNC)) {
00259                 if (!lock_held) {
00260                         LOG_SYSTEM_LOCK(dbenv);
00261                         lock_held = 1;
00262                 }
00263                 if ((ret = __log_flush_commit(dbenv, &lsn, flags)) != 0)
00264                         goto panic_check;
00265         }
00266 
00267         /*
00268          * If flushed a checkpoint record, reset the "bytes since the last
00269          * checkpoint" counters.
00270          */
00271         if (LF_ISSET(DB_LOG_CHKPNT))
00272                 lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
00273 
00274         /* Increment count of records added to the log. */
00275         ++lp->stat.st_record;
00276 
00277         if (0) {
00278 panic_check:    /*
00279                  * Writing log records cannot fail if we're a replication
00280                  * master.  The reason is that once we send the record to
00281                  * replication clients, the transaction can no longer
00282                  * abort, otherwise the master would be out of sync with
00283                  * the rest of the replication group.  Panic the system.
00284                  */
00285                 if (ret != 0 && IS_REP_MASTER(dbenv))
00286                         ret = __db_panic(dbenv, ret);
00287         }
00288 
00289 err:    if (lock_held)
00290                 LOG_SYSTEM_UNLOCK(dbenv);
00291         if (need_free)
00292                 __os_free(dbenv, dbt->data);
00293 
00294         /*
00295          * If auto-remove is set and we switched files, remove unnecessary
00296          * log files.
00297          */
00298         if (ret == 0 && !IS_ZERO_LSN(old_lsn) && lp->db_log_autoremove)
00299                 __log_autoremove(dbenv);
00300 
00301         return (ret);
00302 }
00303 
00304 /*
00305  * __log_current_lsn --
00306  *      Return the current LSN.
00307  *
00308  * PUBLIC: int __log_current_lsn
00309  * PUBLIC:     __P((DB_ENV *, DB_LSN *, u_int32_t *, u_int32_t *));
00310  */
00311 int
00312 __log_current_lsn(dbenv, lsnp, mbytesp, bytesp)
00313         DB_ENV *dbenv;
00314         DB_LSN *lsnp;
00315         u_int32_t *mbytesp, *bytesp;
00316 {
00317         DB_LOG *dblp;
00318         LOG *lp;
00319 
00320         dblp = dbenv->lg_handle;
00321         lp = dblp->reginfo.primary;
00322 
00323         LOG_SYSTEM_LOCK(dbenv);
00324 
00325         /*
00326          * We are trying to get the LSN of the last entry in the log.  We use
00327          * this in three places: 1) DB_ENV->txn_checkpoint uses it as a first
00328          * value when trying to compute an LSN such that all transactions begun
00329          * before it are complete.   2) DB_ENV->txn_begin uses it as the
00330          * begin_lsn. 3) While opening a file to see if we've gotten rid of
00331          * too many log files.
00332          *
00333          * Typically, it's easy to get the last written LSN, you simply look
00334          * at the current log pointer and back up the number of bytes of the
00335          * last log record.  However, if the last thing we did was write the
00336          * log header of a new log file, then, this doesn't work, so we return
00337          * the first log record that will be written in this new file.
00338          */
00339         *lsnp = lp->lsn;
00340         if (lp->lsn.offset > lp->len)
00341                 lsnp->offset -= lp->len;
00342 
00343         /*
00344          * Since we're holding the log region lock, return the bytes put into
00345          * the log since the last checkpoint, transaction checkpoint needs it.
00346          *
00347          * We add the current buffer offset so as to count bytes that have not
00348          * yet been written, but are sitting in the log buffer.
00349          */
00350         if (mbytesp != NULL) {
00351                 *mbytesp = lp->stat.st_wc_mbytes;
00352                 *bytesp = (u_int32_t)(lp->stat.st_wc_bytes + lp->b_off);
00353         }
00354 
00355         LOG_SYSTEM_UNLOCK(dbenv);
00356 
00357         return (0);
00358 }
00359 
00360 /*
00361  * __log_put_next --
00362  *      Put the given record as the next in the log, wherever that may
00363  * turn out to be.
00364  */
00365 static int
00366 __log_put_next(dbenv, lsn, dbt, hdr, old_lsnp)
00367         DB_ENV *dbenv;
00368         DB_LSN *lsn;
00369         const DBT *dbt;
00370         HDR *hdr;
00371         DB_LSN *old_lsnp;
00372 {
00373         DB_LOG *dblp;
00374         DB_LSN old_lsn;
00375         LOG *lp;
00376         int newfile, ret;
00377 
00378         dblp = dbenv->lg_handle;
00379         lp = dblp->reginfo.primary;
00380 
00381         /*
00382          * Save a copy of lp->lsn before we might decide to switch log
00383          * files and change it.  If we do switch log files, and we're
00384          * doing replication, we'll need to tell our clients about the
00385          * switch, and they need to receive a NEWFILE message
00386          * with this "would-be" LSN in order to know they're not
00387          * missing any log records.
00388          */
00389         old_lsn = lp->lsn;
00390         newfile = 0;
00391 
00392         /*
00393          * If this information won't fit in the file, or if we're a
00394          * replication client environment and have been told to do so,
00395          * swap files.
00396          */
00397         if (lp->lsn.offset == 0 ||
00398             lp->lsn.offset + hdr->size + dbt->size > lp->log_size) {
00399                 if (hdr->size + sizeof(LOGP) + dbt->size > lp->log_size) {
00400                         __db_err(dbenv,
00401             "DB_ENV->log_put: record larger than maximum file size (%lu > %lu)",
00402                             (u_long)hdr->size + sizeof(LOGP) + dbt->size,
00403                             (u_long)lp->log_size);
00404                         return (EINVAL);
00405                 }
00406 
00407                 if ((ret = __log_newfile(dblp, NULL, 0)) != 0)
00408                         return (ret);
00409 
00410                 /*
00411                  * Flag that we switched files, in case we're a master
00412                  * and need to send this information to our clients.
00413                  * We postpone doing the actual send until we can
00414                  * safely release the log region lock and are doing so
00415                  * anyway.
00416                  */
00417                 newfile = 1;
00418         }
00419 
00420         /*
00421          * The offset into the log file at this point is the LSN where
00422          * we're about to put this record, and is the LSN the caller wants.
00423          */
00424         *lsn = lp->lsn;
00425 
00426         /* If we switched log files, let our caller know where. */
00427         if (newfile)
00428                 *old_lsnp = old_lsn;
00429 
00430         /* Actually put the record. */
00431         return (__log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len, hdr));
00432 }
00433 
00434 /*
00435  * __log_flush_commit --
00436  *      Flush a record.
00437  */
00438 static int
00439 __log_flush_commit(dbenv, lsnp, flags)
00440         DB_ENV *dbenv;
00441         const DB_LSN *lsnp;
00442         u_int32_t flags;
00443 {
00444         DB_LOG *dblp;
00445         DB_LSN flush_lsn;
00446         LOG *lp;
00447         int ret;
00448 
00449         dblp = dbenv->lg_handle;
00450         lp = dblp->reginfo.primary;
00451         flush_lsn = *lsnp;
00452 
00453         ret = 0;
00454 
00455         /*
00456          * DB_FLUSH:
00457          *      Flush a record for which the DB_FLUSH flag to log_put was set.
00458          *
00459          * DB_LOG_WRNOSYNC:
00460          *      If there's anything in the current log buffer, write it out.
00461          */
00462         if (LF_ISSET(DB_FLUSH))
00463                 ret = __log_flush_int(dblp, &flush_lsn, 1);
00464         else if (!lp->db_log_inmemory && lp->b_off != 0)
00465                 if ((ret = __log_write(dblp,
00466                     dblp->bufp, (u_int32_t)lp->b_off)) == 0)
00467                         lp->b_off = 0;
00468 
00469         /*
00470          * If a flush supporting a transaction commit fails, we must abort the
00471          * transaction.  (If we aren't doing a commit, return the failure; if
00472          * if the commit we care about made it to disk successfully, we just
00473          * ignore the failure, because there's no way to undo the commit.)
00474          */
00475         if (ret == 0 || !LF_ISSET(DB_LOG_COMMIT))
00476                 return (ret);
00477 
00478         if (flush_lsn.file != lp->lsn.file || flush_lsn.offset < lp->w_off)
00479                 return (0);
00480 
00481         /*
00482          * Else, make sure that the commit record does not get out after we
00483          * abort the transaction.  Do this by overwriting the commit record
00484          * in the buffer.  (Note that other commits in this buffer will wait
00485          * until a successful write happens, we do not wake them.)  We point
00486          * at the right part of the buffer and write an abort record over the
00487          * commit.  We must then try and flush the buffer again, since the
00488          * interesting part of the buffer may have actually made it out to
00489          * disk before there was a failure, we can't know for sure.
00490          */
00491         if (__txn_force_abort(dbenv,
00492             dblp->bufp + flush_lsn.offset - lp->w_off) == 0)
00493                 (void)__log_flush_int(dblp, &flush_lsn, 0);
00494 
00495         return (ret);
00496 }
00497 
00498 /*
00499  * __log_newfile --
00500  *      Initialize and switch to a new log file.  (Note that this is
00501  * called both when no log yet exists and when we fill a log file.)
00502  *
00503  * PUBLIC: int __log_newfile __P((DB_LOG *, DB_LSN *, u_int32_t));
00504  */
00505 int
00506 __log_newfile(dblp, lsnp, logfile)
00507         DB_LOG *dblp;
00508         DB_LSN *lsnp;
00509         u_int32_t logfile;
00510 {
00511         DB_CIPHER *db_cipher;
00512         DB_ENV *dbenv;
00513         DB_LSN lsn;
00514         DBT t;
00515         HDR hdr;
00516         LOG *lp;
00517         int need_free, ret;
00518         u_int32_t lastoff;
00519         size_t tsize;
00520         u_int8_t *tmp;
00521 
00522         dbenv = dblp->dbenv;
00523         lp = dblp->reginfo.primary;
00524 
00525         /*
00526          * If we're not specifying a specific log file number and we're
00527          * not at the beginning of a file already, start a new one.
00528          */
00529         if (logfile == 0 && lp->lsn.offset != 0) {
00530                 /*
00531                  * Flush the log so this file is out and can be closed.  We
00532                  * cannot release the region lock here because we need to
00533                  * protect the end of the file while we switch.  In
00534                  * particular, a thread with a smaller record than ours
00535                  * could detect that there is space in the log. Even
00536                  * blocking that event by declaring the file full would
00537                  * require all threads to wait here so that the lsn.file
00538                  * can be moved ahead after the flush completes.  This
00539                  * probably can be changed if we had an lsn for the
00540                  * previous file and one for the current, but it does not
00541                  * seem like this would get much more throughput, if any.
00542                  */
00543                 if ((ret = __log_flush_int(dblp, NULL, 0)) != 0)
00544                         return (ret);
00545 
00546                 /*
00547                  * Save the last known offset from the previous file, we'll
00548                  * need it to initialize the persistent header information.
00549                  */
00550                 lastoff = lp->lsn.offset;
00551 
00552                 /* Point the current LSN to the new file. */
00553                 ++lp->lsn.file;
00554                 lp->lsn.offset = 0;
00555 
00556                 /* Reset the file write offset. */
00557                 lp->w_off = 0;
00558         } else
00559                 lastoff = 0;
00560 
00561         /*
00562          * Replication may require we reset the log file name space entirely.
00563          * In that case we also force a file switch so that replication can
00564          * clean up old files.
00565          */
00566         if (logfile != 0) {
00567                 lp->lsn.file = logfile;
00568                 lp->lsn.offset = 0;
00569                 lp->w_off = 0;
00570                 if ((ret = __log_newfh(dblp, 1)) != 0)
00571                         return (ret);
00572         }
00573 
00574         DB_ASSERT(lp->db_log_inmemory || lp->b_off == 0);
00575         if (lp->db_log_inmemory &&
00576             (ret = __log_inmem_newfile(dblp, lp->lsn.file)) != 0)
00577                 return (ret);
00578 
00579         /*
00580          * Insert persistent information as the first record in every file.
00581          * Note that the previous length is wrong for the very first record
00582          * of the log, but that's okay, we check for it during retrieval.
00583          */
00584         memset(&t, 0, sizeof(t));
00585         memset(&hdr, 0, sizeof(HDR));
00586 
00587         need_free = 0;
00588         tsize = sizeof(LOGP);
00589         db_cipher = dbenv->crypto_handle;
00590         if (CRYPTO_ON(dbenv))
00591                 tsize += db_cipher->adj_size(tsize);
00592         if ((ret = __os_calloc(dbenv, 1, tsize, &tmp)) != 0)
00593                 return (ret);
00594         lp->persist.log_size = lp->log_size = lp->log_nsize;
00595         memcpy(tmp, &lp->persist, sizeof(LOGP));
00596         t.data = tmp;
00597         t.size = (u_int32_t)tsize;
00598         need_free = 1;
00599 
00600         if ((ret =
00601             __log_encrypt_record(dbenv, &t, &hdr, (u_int32_t)tsize)) != 0)
00602                 goto err;
00603         __db_chksum(t.data, t.size,
00604             (CRYPTO_ON(dbenv)) ? db_cipher->mac_key : NULL, hdr.chksum);
00605         lsn = lp->lsn;
00606         if ((ret = __log_putr(dblp, &lsn,
00607             &t, lastoff == 0 ? 0 : lastoff - lp->len, &hdr)) != 0)
00608                 goto err;
00609 
00610         /* Update the LSN information returned to the caller. */
00611         if (lsnp != NULL)
00612                 *lsnp = lp->lsn;
00613 
00614 err:    if (need_free)
00615                 __os_free(dbenv, tmp);
00616         return (ret);
00617 }
00618 
00619 /*
00620  * __log_putr --
00621  *      Actually put a record into the log.
00622  */
00623 static int
00624 __log_putr(dblp, lsn, dbt, prev, h)
00625         DB_LOG *dblp;
00626         DB_LSN *lsn;
00627         const DBT *dbt;
00628         u_int32_t prev;
00629         HDR *h;
00630 {
00631         DB_CIPHER *db_cipher;
00632         DB_ENV *dbenv;
00633         DB_LSN f_lsn;
00634         LOG *lp;
00635         HDR tmp, *hdr;
00636         int ret, t_ret;
00637         size_t b_off, nr;
00638         u_int32_t w_off;
00639 
00640         dbenv = dblp->dbenv;
00641         lp = dblp->reginfo.primary;
00642 
00643         /*
00644          * If we weren't given a header, use a local one.
00645          */
00646         db_cipher = dbenv->crypto_handle;
00647         if (h == NULL) {
00648                 hdr = &tmp;
00649                 memset(hdr, 0, sizeof(HDR));
00650                 if (CRYPTO_ON(dbenv))
00651                         hdr->size = HDR_CRYPTO_SZ;
00652                 else
00653                         hdr->size = HDR_NORMAL_SZ;
00654         } else
00655                 hdr = h;
00656 
00657         /* Save our position in case we fail. */
00658         b_off = lp->b_off;
00659         w_off = lp->w_off;
00660         f_lsn = lp->f_lsn;
00661 
00662         /*
00663          * Initialize the header.  If we just switched files, lsn.offset will
00664          * be 0, and what we really want is the offset of the previous record
00665          * in the previous file.  Fortunately, prev holds the value we want.
00666          */
00667         hdr->prev = prev;
00668         hdr->len = (u_int32_t)hdr->size + dbt->size;
00669 
00670         /*
00671          * If we were passed in a nonzero checksum, our caller calculated
00672          * the checksum before acquiring the log mutex, as an optimization.
00673          *
00674          * If our caller calculated a real checksum of 0, we'll needlessly
00675          * recalculate it.  C'est la vie;  there's no out-of-bounds value
00676          * here.
00677          */
00678         if (hdr->chksum[0] == 0)
00679                 __db_chksum(dbt->data, dbt->size,
00680                     (CRYPTO_ON(dbenv)) ? db_cipher->mac_key : NULL,
00681                     hdr->chksum);
00682 
00683         if (lp->db_log_inmemory && (ret = __log_inmem_chkspace(dblp,
00684             (u_int32_t)hdr->size + dbt->size)) != 0)
00685                 goto err;
00686 
00687         if ((ret = __log_fill(dblp, lsn, hdr, (u_int32_t)hdr->size)) != 0)
00688                 goto err;
00689 
00690         if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0)
00691                 goto err;
00692 
00693         lp->len = (u_int32_t)(hdr->size + dbt->size);
00694         lp->lsn.offset += (u_int32_t)(hdr->size + dbt->size);
00695         return (0);
00696 err:
00697         /*
00698          * If we wrote more than one buffer before failing, get the
00699          * first one back.  The extra buffers will fail the checksums
00700          * and be ignored.
00701          */
00702         if (w_off + lp->buffer_size < lp->w_off) {
00703                 DB_ASSERT(!lp->db_log_inmemory);
00704                 if ((t_ret = __os_seek(dbenv,
00705                     dblp->lfhp, 0, 0, w_off, 0, DB_OS_SEEK_SET)) != 0 ||
00706                     (t_ret = __os_read(dbenv, dblp->lfhp, dblp->bufp,
00707                     b_off, &nr)) != 0)
00708                         return (__db_panic(dbenv, t_ret));
00709                 if (nr != b_off) {
00710                         __db_err(dbenv, "Short read while restoring log");
00711                         return (__db_panic(dbenv, EIO));
00712                 }
00713         }
00714 
00715         /* Reset to where we started. */
00716         lp->w_off = w_off;
00717         lp->b_off = b_off;
00718         lp->f_lsn = f_lsn;
00719 
00720         return (ret);
00721 }
00722 
00723 /*
00724  * __log_flush_pp --
00725  *      DB_ENV->log_flush pre/post processing.
00726  *
00727  * PUBLIC: int __log_flush_pp __P((DB_ENV *, const DB_LSN *));
00728  */
00729 int
00730 __log_flush_pp(dbenv, lsn)
00731         DB_ENV *dbenv;
00732         const DB_LSN *lsn;
00733 {
00734         DB_THREAD_INFO *ip;
00735         int ret;
00736 
00737         PANIC_CHECK(dbenv);
00738         ENV_REQUIRES_CONFIG(dbenv,
00739             dbenv->lg_handle, "DB_ENV->log_flush", DB_INIT_LOG);
00740 
00741         ENV_ENTER(dbenv, ip);
00742         REPLICATION_WRAP(dbenv, (__log_flush(dbenv, lsn)), ret);
00743         ENV_LEAVE(dbenv, ip);
00744         return (ret);
00745 }
00746 
00747 /*
00748  * See if we need to wait.  s_lsn is not locked so some care is needed.
00749  * The sync point can only move forward.  The lsnp->file cannot be
00750  * greater than the s_lsn.file.  If the file we want is in the past
00751  * we are done.  If the file numbers are the same check the offset.
00752  * This all assumes we can read an 32-bit quantity in one state or
00753  * the other, not in transition.
00754  */
00755 #define ALREADY_FLUSHED(lp, lsnp)                               \
00756         (((lp)->s_lsn.file > (lsnp)->file) ||           \
00757         ((lp)->s_lsn.file == (lsnp)->file &&            \
00758             (lp)->s_lsn.offset > (lsnp)->offset))
00759 
00760 /*
00761  * __log_flush --
00762  *      DB_ENV->log_flush
00763  *
00764  * PUBLIC: int __log_flush __P((DB_ENV *, const DB_LSN *));
00765  */
00766 int
00767 __log_flush(dbenv, lsn)
00768         DB_ENV *dbenv;
00769         const DB_LSN *lsn;
00770 {
00771         DB_LOG *dblp;
00772         LOG *lp;
00773         int ret;
00774 
00775         dblp = dbenv->lg_handle;
00776         lp = dblp->reginfo.primary;
00777         if (lsn != NULL && ALREADY_FLUSHED(lp, lsn))
00778                 return (0);
00779         LOG_SYSTEM_LOCK(dbenv);
00780         ret = __log_flush_int(dblp, lsn, 1);
00781         LOG_SYSTEM_UNLOCK(dbenv);
00782         return (ret);
00783 }
00784 
00785 /*
00786  * __log_flush_int --
00787  *      Write all records less than or equal to the specified LSN; internal
00788  *      version.
00789  *
00790  * PUBLIC: int __log_flush_int __P((DB_LOG *, const DB_LSN *, int));
00791  */
00792 int
00793 __log_flush_int(dblp, lsnp, release)
00794         DB_LOG *dblp;
00795         const DB_LSN *lsnp;
00796         int release;
00797 {
00798         struct __db_commit *commit;
00799         DB_ENV *dbenv;
00800         DB_LSN flush_lsn, f_lsn;
00801         LOG *lp;
00802         size_t b_off;
00803         u_int32_t ncommit, w_off;
00804         int do_flush, first, ret;
00805 
00806         dbenv = dblp->dbenv;
00807         lp = dblp->reginfo.primary;
00808         ncommit = 0;
00809         ret = 0;
00810 
00811         if (lp->db_log_inmemory) {
00812                 lp->s_lsn = lp->lsn;
00813                 ++lp->stat.st_scount;
00814                 return (0);
00815         }
00816 
00817         /*
00818          * If no LSN specified, flush the entire log by setting the flush LSN
00819          * to the last LSN written in the log.  Otherwise, check that the LSN
00820          * isn't a non-existent record for the log.
00821          */
00822         if (lsnp == NULL) {
00823                 flush_lsn.file = lp->lsn.file;
00824                 flush_lsn.offset = lp->lsn.offset - lp->len;
00825         } else if (lsnp->file > lp->lsn.file ||
00826             (lsnp->file == lp->lsn.file &&
00827             lsnp->offset > lp->lsn.offset - lp->len)) {
00828                 __db_err(dbenv,
00829     "DB_ENV->log_flush: LSN of %lu/%lu past current end-of-log of %lu/%lu",
00830                     (u_long)lsnp->file, (u_long)lsnp->offset,
00831                     (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
00832                 __db_err(dbenv, "%s %s %s",
00833                     "Database environment corrupt; the wrong log files may",
00834                     "have been removed or incompatible database files imported",
00835                     "from another environment");
00836                 return (__db_panic(dbenv, DB_RUNRECOVERY));
00837         } else {
00838                 if (ALREADY_FLUSHED(lp, lsnp))
00839                         return (0);
00840                 flush_lsn = *lsnp;
00841         }
00842 
00843         /*
00844          * If a flush is in progress and we're allowed to do so, drop
00845          * the region lock and block waiting for the next flush.
00846          */
00847         if (release && lp->in_flush != 0) {
00848                 if ((commit = SH_TAILQ_FIRST(
00849                     &lp->free_commits, __db_commit)) == NULL) {
00850                         if ((ret = __db_shalloc(&dblp->reginfo,
00851                             sizeof(struct __db_commit), 0, &commit)) != 0)
00852                                 goto flush;
00853                         memset(commit, 0, sizeof(*commit));
00854                         if ((ret = __mutex_alloc(dbenv, MTX_TXN_COMMIT,
00855                             DB_MUTEX_SELF_BLOCK, &commit->mtx_txnwait)) != 0) {
00856                                 __db_shalloc_free(&dblp->reginfo, commit);
00857                                 return (ret);
00858                         }
00859                         MUTEX_LOCK(dbenv, commit->mtx_txnwait);
00860                 } else
00861                         SH_TAILQ_REMOVE(
00862                             &lp->free_commits, commit, links, __db_commit);
00863 
00864                 lp->ncommit++;
00865 
00866                 /*
00867                  * Flushes may be requested out of LSN order;  be
00868                  * sure we only move lp->t_lsn forward.
00869                  */
00870                 if (log_compare(&lp->t_lsn, &flush_lsn) < 0)
00871                         lp->t_lsn = flush_lsn;
00872 
00873                 commit->lsn = flush_lsn;
00874                 SH_TAILQ_INSERT_HEAD(
00875                     &lp->commits, commit, links, __db_commit);
00876                 LOG_SYSTEM_UNLOCK(dbenv);
00877                 /* Wait here for the in-progress flush to finish. */
00878                 MUTEX_LOCK(dbenv, commit->mtx_txnwait);
00879                 LOG_SYSTEM_LOCK(dbenv);
00880 
00881                 lp->ncommit--;
00882                 /*
00883                  * Grab the flag before freeing the struct to see if
00884                  * we need to flush the log to commit.  If so,
00885                  * use the maximal lsn for any committing thread.
00886                  */
00887                 do_flush = F_ISSET(commit, DB_COMMIT_FLUSH);
00888                 F_CLR(commit, DB_COMMIT_FLUSH);
00889                 SH_TAILQ_INSERT_HEAD(
00890                     &lp->free_commits, commit, links, __db_commit);
00891                 if (do_flush) {
00892                         lp->in_flush--;
00893                         flush_lsn = lp->t_lsn;
00894                 } else
00895                         return (0);
00896         }
00897 
00898         /*
00899          * Protect flushing with its own mutex so we can release
00900          * the region lock except during file switches.
00901          */
00902 flush:  MUTEX_LOCK(dbenv, lp->mtx_flush);
00903 
00904         /*
00905          * If the LSN is less than or equal to the last-sync'd LSN, we're done.
00906          * Note, the last-sync LSN saved in s_lsn is the LSN of the first byte
00907          * after the byte we absolutely know was written to disk, so the test
00908          * is <, not <=.
00909          */
00910         if (flush_lsn.file < lp->s_lsn.file ||
00911             (flush_lsn.file == lp->s_lsn.file &&
00912             flush_lsn.offset < lp->s_lsn.offset)) {
00913                 MUTEX_UNLOCK(dbenv, lp->mtx_flush);
00914                 goto done;
00915         }
00916 
00917         /*
00918          * We may need to write the current buffer.  We have to write the
00919          * current buffer if the flush LSN is greater than or equal to the
00920          * buffer's starting LSN.
00921          *
00922          * Otherwise, it's still possible that this thread may never have
00923          * written to this log file.  Acquire a file descriptor if we don't
00924          * already have one.
00925          */
00926         if (lp->b_off != 0 && log_compare(&flush_lsn, &lp->f_lsn) >= 0) {
00927                 if ((ret = __log_write(dblp,
00928                     dblp->bufp, (u_int32_t)lp->b_off)) != 0) {
00929                         MUTEX_UNLOCK(dbenv, lp->mtx_flush);
00930                         goto done;
00931                 }
00932 
00933                 lp->b_off = 0;
00934         } else if (dblp->lfhp == NULL || dblp->lfname != lp->lsn.file)
00935                 if ((ret = __log_newfh(dblp, 0)) != 0) {
00936                         MUTEX_UNLOCK(dbenv, lp->mtx_flush);
00937                         goto done;
00938                 }
00939 
00940         /*
00941          * We are going to flush, release the region.
00942          * First get the current state of the buffer since
00943          * another write may come in, but we may not flush it.
00944          */
00945         b_off = lp->b_off;
00946         w_off = lp->w_off;
00947         f_lsn = lp->f_lsn;
00948         lp->in_flush++;
00949         if (release)
00950                 LOG_SYSTEM_UNLOCK(dbenv);
00951 
00952         /* Sync all writes to disk. */
00953         if ((ret = __os_fsync(dbenv, dblp->lfhp)) != 0) {
00954                 MUTEX_UNLOCK(dbenv, lp->mtx_flush);
00955                 if (release)
00956                         LOG_SYSTEM_LOCK(dbenv);
00957                 ret = __db_panic(dbenv, ret);
00958                 return (ret);
00959         }
00960 
00961         /*
00962          * Set the last-synced LSN.
00963          * This value must be set to the LSN past the last complete
00964          * record that has been flushed.  This is at least the first
00965          * lsn, f_lsn.  If the buffer is empty, b_off == 0, then
00966          * we can move up to write point since the first lsn is not
00967          * set for the new buffer.
00968          */
00969         lp->s_lsn = f_lsn;
00970         if (b_off == 0)
00971                 lp->s_lsn.offset = w_off;
00972 
00973         MUTEX_UNLOCK(dbenv, lp->mtx_flush);
00974         if (release)
00975                 LOG_SYSTEM_LOCK(dbenv);
00976 
00977         lp->in_flush--;
00978         ++lp->stat.st_scount;
00979 
00980         /*
00981          * How many flush calls (usually commits) did this call actually sync?
00982          * At least one, if it got here.
00983          */
00984         ncommit = 1;
00985 done:
00986         if (lp->ncommit != 0) {
00987                 first = 1;
00988                 for (commit = SH_TAILQ_FIRST(&lp->commits, __db_commit);
00989                     commit != NULL;
00990                     commit = SH_TAILQ_NEXT(commit, links, __db_commit))
00991                         if (log_compare(&lp->s_lsn, &commit->lsn) > 0) {
00992                                 MUTEX_UNLOCK(dbenv, commit->mtx_txnwait);
00993                                 SH_TAILQ_REMOVE(
00994                                     &lp->commits, commit, links, __db_commit);
00995                                 ncommit++;
00996                         } else if (first == 1) {
00997                                 F_SET(commit, DB_COMMIT_FLUSH);
00998                                 MUTEX_UNLOCK(dbenv, commit->mtx_txnwait);
00999                                 SH_TAILQ_REMOVE(
01000                                     &lp->commits, commit, links, __db_commit);
01001                                 /*
01002                                  * This thread will wake and flush.
01003                                  * If another thread commits and flushes
01004                                  * first we will waste a trip trough the
01005                                  * mutex.
01006                                  */
01007                                 lp->in_flush++;
01008                                 first = 0;
01009                         }
01010         }
01011         if (lp->stat.st_maxcommitperflush < ncommit)
01012                 lp->stat.st_maxcommitperflush = ncommit;
01013         if (lp->stat.st_mincommitperflush > ncommit ||
01014             lp->stat.st_mincommitperflush == 0)
01015                 lp->stat.st_mincommitperflush = ncommit;
01016 
01017         return (ret);
01018 }
01019 
01020 /*
01021  * __log_fill --
01022  *      Write information into the log.
01023  */
01024 static int
01025 __log_fill(dblp, lsn, addr, len)
01026         DB_LOG *dblp;
01027         DB_LSN *lsn;
01028         void *addr;
01029         u_int32_t len;
01030 {
01031         LOG *lp;
01032         u_int32_t bsize, nrec;
01033         size_t nw, remain;
01034         int ret;
01035 
01036         lp = dblp->reginfo.primary;
01037         bsize = lp->buffer_size;
01038 
01039         if (lp->db_log_inmemory) {
01040                 __log_inmem_copyin(dblp, lp->b_off, addr, len);
01041                 lp->b_off = (lp->b_off + len) % lp->buffer_size;
01042                 return (0);
01043         }
01044 
01045         while (len > 0) {                       /* Copy out the data. */
01046                 /*
01047                  * If we're beginning a new buffer, note the user LSN to which
01048                  * the first byte of the buffer belongs.  We have to know this
01049                  * when flushing the buffer so that we know if the in-memory
01050                  * buffer needs to be flushed.
01051                  */
01052                 if (lp->b_off == 0)
01053                         lp->f_lsn = *lsn;
01054 
01055                 /*
01056                  * If we're on a buffer boundary and the data is big enough,
01057                  * copy as many records as we can directly from the data.
01058                  */
01059                 if (lp->b_off == 0 && len >= bsize) {
01060                         nrec = len / bsize;
01061                         if ((ret = __log_write(dblp, addr, nrec * bsize)) != 0)
01062                                 return (ret);
01063                         addr = (u_int8_t *)addr + nrec * bsize;
01064                         len -= nrec * bsize;
01065                         ++lp->stat.st_wcount_fill;
01066                         continue;
01067                 }
01068 
01069                 /* Figure out how many bytes we can copy this time. */
01070                 remain = bsize - lp->b_off;
01071                 nw = remain > len ? len : remain;
01072                 memcpy(dblp->bufp + lp->b_off, addr, nw);
01073                 addr = (u_int8_t *)addr + nw;
01074                 len -= (u_int32_t)nw;
01075                 lp->b_off += nw;
01076 
01077                 /* If we fill the buffer, flush it. */
01078                 if (lp->b_off == bsize) {
01079                         if ((ret = __log_write(dblp, dblp->bufp, bsize)) != 0)
01080                                 return (ret);
01081                         lp->b_off = 0;
01082                         ++lp->stat.st_wcount_fill;
01083                 }
01084         }
01085         return (0);
01086 }
01087 
01088 /*
01089  * __log_write --
01090  *      Write the log buffer to disk.
01091  */
01092 static int
01093 __log_write(dblp, addr, len)
01094         DB_LOG *dblp;
01095         void *addr;
01096         u_int32_t len;
01097 {
01098         DB_ENV *dbenv;
01099         LOG *lp;
01100         size_t nw;
01101         int ret;
01102 
01103         dbenv = dblp->dbenv;
01104         lp = dblp->reginfo.primary;
01105 
01106         DB_ASSERT(!lp->db_log_inmemory);
01107 
01108         /*
01109          * If we haven't opened the log file yet or the current one has
01110          * changed, acquire a new log file.  We are creating the file if we're
01111          * about to write to the start of it, in other words, if the write
01112          * offset is zero.
01113          */
01114         if (dblp->lfhp == NULL || dblp->lfname != lp->lsn.file)
01115                 if ((ret = __log_newfh(dblp, lp->w_off == 0)) != 0)
01116                         return (ret);
01117 
01118         /*
01119          * If we're writing the first block in a log file on a filesystem that
01120          * guarantees unwritten blocks are zero-filled, we set the size of the
01121          * file in advance.  This increases sync performance on some systems,
01122          * because they don't need to update metadata on every sync.
01123          *
01124          * Ignore any error -- we may have run out of disk space, but that's no
01125          * reason to quit.
01126          */
01127 #ifdef HAVE_FILESYSTEM_NOTZERO
01128         if (lp->w_off == 0 && !__os_fs_notzero())
01129 #else
01130         if (lp->w_off == 0)
01131 #endif
01132                 (void)__db_file_extend(dbenv, dblp->lfhp, lp->log_size);
01133 
01134         /*
01135          * Seek to the offset in the file (someone may have written it
01136          * since we last did).
01137          */
01138         if ((ret = __os_seek(dbenv,
01139             dblp->lfhp, 0, 0, lp->w_off, 0, DB_OS_SEEK_SET)) != 0 ||
01140             (ret = __os_write(dbenv, dblp->lfhp, addr, len, &nw)) != 0)
01141                 return (ret);
01142 
01143         /* Reset the buffer offset and update the seek offset. */
01144         lp->w_off += len;
01145 
01146         /* Update written statistics. */
01147         if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
01148                 lp->stat.st_w_bytes -= MEGABYTE;
01149                 ++lp->stat.st_w_mbytes;
01150         }
01151         if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) {
01152                 lp->stat.st_wc_bytes -= MEGABYTE;
01153                 ++lp->stat.st_wc_mbytes;
01154         }
01155         ++lp->stat.st_wcount;
01156 
01157         return (0);
01158 }
01159 
01160 /*
01161  * __log_file_pp --
01162  *      DB_ENV->log_file pre/post processing.
01163  *
01164  * PUBLIC: int __log_file_pp __P((DB_ENV *, const DB_LSN *, char *, size_t));
01165  */
01166 int
01167 __log_file_pp(dbenv, lsn, namep, len)
01168         DB_ENV *dbenv;
01169         const DB_LSN *lsn;
01170         char *namep;
01171         size_t len;
01172 {
01173         DB_THREAD_INFO *ip;
01174         int ret;
01175 
01176         PANIC_CHECK(dbenv);
01177         ENV_REQUIRES_CONFIG(dbenv,
01178             dbenv->lg_handle, "DB_ENV->log_file", DB_INIT_LOG);
01179 
01180         if (F_ISSET(dbenv, DB_ENV_LOG_INMEMORY)) {
01181                 __db_err(dbenv,
01182                     "DB_ENV->log_file is illegal with in-memory logs.");
01183                 return (EINVAL);
01184         }
01185 
01186         ENV_ENTER(dbenv, ip);
01187         REPLICATION_WRAP(dbenv, (__log_file(dbenv, lsn, namep, len)), ret);
01188         ENV_LEAVE(dbenv, ip);
01189         return (ret);
01190 }
01191 
01192 /*
01193  * __log_file --
01194  *      DB_ENV->log_file.
01195  */
01196 static int
01197 __log_file(dbenv, lsn, namep, len)
01198         DB_ENV *dbenv;
01199         const DB_LSN *lsn;
01200         char *namep;
01201         size_t len;
01202 {
01203         DB_LOG *dblp;
01204         int ret;
01205         char *name;
01206 
01207         dblp = dbenv->lg_handle;
01208         LOG_SYSTEM_LOCK(dbenv);
01209         ret = __log_name(dblp, lsn->file, &name, NULL, 0);
01210         LOG_SYSTEM_UNLOCK(dbenv);
01211         if (ret != 0)
01212                 return (ret);
01213 
01214         /* Check to make sure there's enough room and copy the name. */
01215         if (len < strlen(name) + 1) {
01216                 *namep = '\0';
01217                 __db_err(dbenv, "DB_ENV->log_file: name buffer is too short");
01218                 return (EINVAL);
01219         }
01220         (void)strcpy(namep, name);
01221         __os_free(dbenv, name);
01222 
01223         return (0);
01224 }
01225 
01226 /*
01227  * __log_newfh --
01228  *      Acquire a file handle for the current log file.
01229  */
01230 static int
01231 __log_newfh(dblp, create)
01232         DB_LOG *dblp;
01233         int create;
01234 {
01235         DB_ENV *dbenv;
01236         LOG *lp;
01237         u_int32_t flags;
01238         int ret;
01239         logfile_validity status;
01240 
01241         dbenv = dblp->dbenv;
01242         lp = dblp->reginfo.primary;
01243 
01244         /* Close any previous file descriptor. */
01245         if (dblp->lfhp != NULL) {
01246                 (void)__os_closehandle(dbenv, dblp->lfhp);
01247                 dblp->lfhp = NULL;
01248         }
01249 
01250         flags = DB_OSO_SEQ |
01251             (create ? DB_OSO_CREATE : 0) |
01252             (F_ISSET(dbenv, DB_ENV_DIRECT_LOG) ? DB_OSO_DIRECT : 0) |
01253             (F_ISSET(dbenv, DB_ENV_DSYNC_LOG) ? DB_OSO_DSYNC : 0);
01254 
01255         /* Get the path of the new file and open it. */
01256         dblp->lfname = lp->lsn.file;
01257         if ((ret = __log_valid(dblp, dblp->lfname, 0, &dblp->lfhp,
01258             flags, &status)) != 0)
01259                 __db_err(dbenv,
01260                     "DB_ENV->log_put: %d: %s", lp->lsn.file, db_strerror(ret));
01261         else if (status != DB_LV_NORMAL && status != DB_LV_INCOMPLETE)
01262                 ret = DB_NOTFOUND;
01263 
01264         return (ret);
01265 }
01266 
01267 /*
01268  * __log_name --
01269  *      Return the log name for a particular file, and optionally open it.
01270  *
01271  * PUBLIC: int __log_name __P((DB_LOG *,
01272  * PUBLIC:     u_int32_t, char **, DB_FH **, u_int32_t));
01273  */
01274 int
01275 __log_name(dblp, filenumber, namep, fhpp, flags)
01276         DB_LOG *dblp;
01277         u_int32_t filenumber, flags;
01278         char **namep;
01279         DB_FH **fhpp;
01280 {
01281         DB_ENV *dbenv;
01282         LOG *lp;
01283         int mode, ret;
01284         char *oname;
01285         char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20];
01286 
01287         dbenv = dblp->dbenv;
01288         lp = dblp->reginfo.primary;
01289 
01290         DB_ASSERT(!lp->db_log_inmemory);
01291 
01292         /*
01293          * !!!
01294          * The semantics of this routine are bizarre.
01295          *
01296          * The reason for all of this is that we need a place where we can
01297          * intercept requests for log files, and, if appropriate, check for
01298          * both the old-style and new-style log file names.  The trick is
01299          * that all callers of this routine that are opening the log file
01300          * read-only want to use an old-style file name if they can't find
01301          * a match using a new-style name.  The only down-side is that some
01302          * callers may check for the old-style when they really don't need
01303          * to, but that shouldn't mess up anything, and we only check for
01304          * the old-style name when we've already failed to find a new-style
01305          * one.
01306          *
01307          * Create a new-style file name, and if we're not going to open the
01308          * file, return regardless.
01309          */
01310         (void)snprintf(new, sizeof(new), LFNAME, filenumber);
01311         if ((ret = __db_appname(dbenv,
01312             DB_APP_LOG, new, 0, NULL, namep)) != 0 || fhpp == NULL)
01313                 return (ret);
01314 
01315         /* The application may have specified an absolute file mode. */
01316         if (lp->filemode == 0)
01317                 mode = dbenv->db_mode;
01318         else {
01319                 LF_SET(DB_OSO_ABSMODE);
01320                 mode = lp->filemode;
01321         }
01322 
01323         /* Open the new-style file -- if we succeed, we're done. */
01324         if ((ret = __os_open_extend(dbenv, *namep, 0, flags, mode, fhpp)) == 0)
01325                 return (0);
01326 
01327         /*
01328          * If the open failed for reason other than the file
01329          * not being there, complain loudly, the wrong user
01330          * probably started up the application.
01331          */
01332         if (ret != ENOENT) {
01333                 __db_err(dbenv,
01334                      "%s: log file unreadable: %s", *namep, db_strerror(ret));
01335                 return (__db_panic(dbenv, ret));
01336         }
01337 
01338         /*
01339          * The open failed... if the DB_RDONLY flag isn't set, we're done,
01340          * the caller isn't interested in old-style files.
01341          */
01342         if (!LF_ISSET(DB_OSO_RDONLY)) {
01343                 __db_err(dbenv,
01344                     "%s: log file open failed: %s", *namep, db_strerror(ret));
01345                 return (__db_panic(dbenv, ret));
01346         }
01347 
01348         /* Create an old-style file name. */
01349         (void)snprintf(old, sizeof(old), LFNAME_V1, filenumber);
01350         if ((ret = __db_appname(dbenv, DB_APP_LOG, old, 0, NULL, &oname)) != 0)
01351                 goto err;
01352 
01353         /*
01354          * Open the old-style file -- if we succeed, we're done.  Free the
01355          * space allocated for the new-style name and return the old-style
01356          * name to the caller.
01357          */
01358         if ((ret = __os_open(dbenv, oname, flags, mode, fhpp)) == 0) {
01359                 __os_free(dbenv, *namep);
01360                 *namep = oname;
01361                 return (0);
01362         }
01363 
01364         /*
01365          * Couldn't find either style of name -- return the new-style name
01366          * for the caller's error message.  If it's an old-style name that's
01367          * actually missing we're going to confuse the user with the error
01368          * message, but that implies that not only were we looking for an
01369          * old-style name, but we expected it to exist and we weren't just
01370          * looking for any log file.  That's not a likely error.
01371          */
01372 err:    __os_free(dbenv, oname);
01373         return (ret);
01374 }
01375 
01376 /*
01377  * __log_rep_put --
01378  *      Short-circuit way for replication clients to put records into the
01379  * log.  Replication clients' logs need to be laid out exactly their masters'
01380  * are, so we let replication take responsibility for when the log gets
01381  * flushed, when log switches files, etc.  This is just a thin PUBLIC wrapper
01382  * for __log_putr with a slightly prettier interface.
01383  *
01384  * Note that the REP->mtx_clientdb should be held when this is called.
01385  * Note that we acquire the log region mutex while holding mtx_clientdb.
01386  *
01387  * PUBLIC: int __log_rep_put __P((DB_ENV *, DB_LSN *, const DBT *));
01388  */
01389 int
01390 __log_rep_put(dbenv, lsnp, rec)
01391         DB_ENV *dbenv;
01392         DB_LSN *lsnp;
01393         const DBT *rec;
01394 {
01395         DB_CIPHER *db_cipher;
01396         DB_LOG *dblp;
01397         HDR hdr;
01398         DBT *dbt, t;
01399         LOG *lp;
01400         int need_free, ret;
01401 
01402         dblp = dbenv->lg_handle;
01403         lp = dblp->reginfo.primary;
01404 
01405         LOG_SYSTEM_LOCK(dbenv);
01406         memset(&hdr, 0, sizeof(HDR));
01407         t = *rec;
01408         dbt = &t;
01409         need_free = 0;
01410         db_cipher = (DB_CIPHER *)dbenv->crypto_handle;
01411         if (CRYPTO_ON(dbenv))
01412                 t.size += db_cipher->adj_size(rec->size);
01413         if ((ret = __os_calloc(dbenv, 1, t.size, &t.data)) != 0)
01414                 goto err;
01415         need_free = 1;
01416         memcpy(t.data, rec->data, rec->size);
01417 
01418         if ((ret = __log_encrypt_record(dbenv, dbt, &hdr, rec->size)) != 0)
01419                 goto err;
01420         __db_chksum(t.data, t.size,
01421             (CRYPTO_ON(dbenv)) ? db_cipher->mac_key : NULL, hdr.chksum);
01422 
01423         DB_ASSERT(log_compare(lsnp, &lp->lsn) == 0);
01424         ret = __log_putr(dblp, lsnp, dbt, lp->lsn.offset - lp->len, &hdr);
01425 err:
01426         /*
01427          * !!! Assume caller holds REP->mtx_clientdb to modify ready_lsn.
01428          */
01429         lp->ready_lsn = lp->lsn;
01430         LOG_SYSTEM_UNLOCK(dbenv);
01431         if (need_free)
01432                 __os_free(dbenv, t.data);
01433         return (ret);
01434 }
01435 
01436 static int
01437 __log_encrypt_record(dbenv, dbt, hdr, orig)
01438         DB_ENV *dbenv;
01439         DBT *dbt;
01440         HDR *hdr;
01441         u_int32_t orig;
01442 {
01443         DB_CIPHER *db_cipher;
01444         int ret;
01445 
01446         if (CRYPTO_ON(dbenv)) {
01447                 db_cipher = (DB_CIPHER *)dbenv->crypto_handle;
01448                 hdr->size = HDR_CRYPTO_SZ;
01449                 hdr->orig_size = orig;
01450                 if ((ret = db_cipher->encrypt(dbenv, db_cipher->data,
01451                     hdr->iv, dbt->data, dbt->size)) != 0)
01452                         return (ret);
01453         } else {
01454                 hdr->size = HDR_NORMAL_SZ;
01455         }
01456         return (0);
01457 }

Generated on Sun Dec 25 12:14:41 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2