Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

txn.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1995, 1996
00009  *      The President and Fellows of Harvard University.  All rights reserved.
00010  *
00011  * This code is derived from software contributed to Berkeley by
00012  * Margo Seltzer.
00013  *
00014  * Redistribution and use in source and binary forms, with or without
00015  * modification, are permitted provided that the following conditions
00016  * are met:
00017  * 1. Redistributions of source code must retain the above copyright
00018  *    notice, this list of conditions and the following disclaimer.
00019  * 2. Redistributions in binary form must reproduce the above copyright
00020  *    notice, this list of conditions and the following disclaimer in the
00021  *    documentation and/or other materials provided with the distribution.
00022  * 3. Neither the name of the University nor the names of its contributors
00023  *    may be used to endorse or promote products derived from this software
00024  *    without specific prior written permission.
00025  *
00026  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00027  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00030  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00031  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00032  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00033  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00034  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00035  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00036  * SUCH DAMAGE.
00037  *
00038  * $Id: txn.c,v 12.34 2005/11/01 00:44:35 bostic Exp $
00039  */
00040 
00041 #include "db_config.h"
00042 
00043 #ifndef NO_SYSTEM_INCLUDES
00044 #include <sys/types.h>
00045 #include <stdlib.h>
00046 
00047 #if TIME_WITH_SYS_TIME
00048 #include <sys/time.h>
00049 #include <time.h>
00050 #else
00051 #if HAVE_SYS_TIME_H
00052 #include <sys/time.h>
00053 #else
00054 #include <time.h>
00055 #endif
00056 #endif
00057 
00058 #include <string.h>
00059 #endif
00060 
00061 #include "db_int.h"
00062 #include "dbinc/crypto.h"
00063 #include "dbinc/hmac.h"
00064 #include "dbinc/db_page.h"
00065 #include "dbinc/db_shash.h"
00066 #include "dbinc/hash.h"
00067 #include "dbinc/lock.h"
00068 #include "dbinc/log.h"
00069 #include "dbinc/txn.h"
00070 
00071 #define SET_LOG_FLAGS(dbenv, txn, lflags)                               \
00072         do {                                                            \
00073                 lflags = DB_LOG_COMMIT | DB_LOG_PERM;                   \
00074                 if (F_ISSET(txn, TXN_SYNC))                             \
00075                         lflags |= DB_FLUSH;                             \
00076                 else if (F_ISSET(txn, TXN_WRITE_NOSYNC))                \
00077                         lflags |= DB_LOG_WRNOSYNC;                      \
00078                 else if (!F_ISSET(txn, TXN_NOSYNC) &&                   \
00079                     !F_ISSET(dbenv, DB_ENV_TXN_NOSYNC)) {               \
00080                         if (F_ISSET(dbenv, DB_ENV_TXN_WRITE_NOSYNC))    \
00081                                 lflags |= DB_LOG_WRNOSYNC;              \
00082                         else                                            \
00083                                 lflags |= DB_FLUSH;                     \
00084                 }                                                       \
00085         } while (0)
00086 
00087 /*
00088  * __txn_isvalid enumerated types.  We cannot simply use the transaction
00089  * statuses, because different statuses need to be handled differently
00090  * depending on the caller.
00091  */
00092 typedef enum {
00093         TXN_OP_ABORT,
00094         TXN_OP_COMMIT,
00095         TXN_OP_DISCARD,
00096         TXN_OP_PREPARE
00097 } txnop_t;
00098 
00099 static int  __txn_abort_pp __P((DB_TXN *));
00100 static int  __txn_begin_int __P((DB_TXN *, int));
00101 static int  __txn_commit_pp __P((DB_TXN *, u_int32_t));
00102 static int  __txn_discard __P((DB_TXN *, u_int32_t));
00103 static int  __txn_dispatch_undo
00104                 __P((DB_ENV *, DB_TXN *, DBT *, DB_LSN *, void *));
00105 static int  __txn_end __P((DB_TXN *, int));
00106 static int  __txn_isvalid __P((const DB_TXN *, txnop_t));
00107 static int  __txn_undo __P((DB_TXN *));
00108 static void __txn_set_txn_lsnp __P((DB_TXN *, DB_LSN **, DB_LSN **));
00109 
00110 /*
00111  * __txn_begin_pp --
00112  *      DB_ENV->txn_begin pre/post processing.
00113  *
00114  * PUBLIC: int __txn_begin_pp __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t));
00115  */
00116 int
00117 __txn_begin_pp(dbenv, parent, txnpp, flags)
00118         DB_ENV *dbenv;
00119         DB_TXN *parent, **txnpp;
00120         u_int32_t flags;
00121 {
00122         DB_THREAD_INFO *ip;
00123         int rep_check, ret;
00124 
00125         PANIC_CHECK(dbenv);
00126         ENV_REQUIRES_CONFIG(dbenv, dbenv->tx_handle, "txn_begin", DB_INIT_TXN);
00127 
00128         if ((ret = __db_fchk(dbenv,
00129             "txn_begin", flags,
00130             DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_TXN_NOWAIT |
00131             DB_TXN_NOSYNC | DB_TXN_SYNC | DB_TXN_WRITE_NOSYNC)) != 0)
00132                 return (ret);
00133         if ((ret = __db_fcchk(dbenv, "txn_begin", flags,
00134             DB_TXN_WRITE_NOSYNC | DB_TXN_NOSYNC, DB_TXN_SYNC)) != 0)
00135                 return (ret);
00136         if ((ret = __db_fcchk(dbenv, "txn_begin",
00137             flags, DB_TXN_WRITE_NOSYNC, DB_TXN_NOSYNC)) != 0)
00138                 return (ret);
00139 
00140         ENV_ENTER(dbenv, ip);
00141 
00142         if (parent == NULL) {
00143                 rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0;
00144                 if (rep_check && (ret = __op_rep_enter(dbenv)) != 0)
00145                         goto err;
00146         } else
00147                 rep_check = 0;
00148         ret = __txn_begin(dbenv, parent, txnpp, flags);
00149         /*
00150          * We only decrement the count if the operation fails.
00151          * Otherwise the count will be decremented when the
00152          * txn is resolved by txn_commit, txn_abort, etc.
00153          */
00154         if (ret != 0 && rep_check)
00155                 (void)__op_rep_exit(dbenv);
00156 
00157 err:    ENV_LEAVE(dbenv, ip);
00158         return (ret);
00159 }
00160 
00161 /*
00162  * __txn_begin --
00163  *      DB_ENV->txn_begin.
00164  *
00165  * This is a wrapper to the actual begin process.  Normal transaction begin
00166  * allocates a DB_TXN structure for the caller, while XA transaction begin
00167  * does not.  Other than that, both call into common __txn_begin_int code.
00168  *
00169  * Internally, we use TXN_DETAIL structures, but the DB_TXN structure
00170  * provides access to the transaction ID and the offset in the transaction
00171  * region of the TXN_DETAIL structure.
00172  *
00173  * PUBLIC: int __txn_begin __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t));
00174  */
00175 int
00176 __txn_begin(dbenv, parent, txnpp, flags)
00177         DB_ENV *dbenv;
00178         DB_TXN *parent, **txnpp;
00179         u_int32_t flags;
00180 {
00181         DB_LOCKREGION *region;
00182         DB_TXN *txn;
00183         TXN_DETAIL *ptd, *td;
00184         int ret;
00185 
00186         *txnpp = NULL;
00187         if ((ret = __os_calloc(dbenv, 1, sizeof(DB_TXN), &txn)) != 0)
00188                 return (ret);
00189 
00190         txn->mgrp = dbenv->tx_handle;
00191         txn->parent = parent;
00192         TAILQ_INIT(&txn->kids);
00193         TAILQ_INIT(&txn->events);
00194         STAILQ_INIT(&txn->logs);
00195         txn->flags = TXN_MALLOC;
00196         if (LF_ISSET(DB_READ_COMMITTED))
00197                 F_SET(txn, TXN_READ_COMMITTED);
00198         if (LF_ISSET(DB_READ_UNCOMMITTED))
00199                 F_SET(txn, TXN_READ_UNCOMMITTED);
00200         if (LF_ISSET(DB_TXN_NOSYNC))
00201                 F_SET(txn, TXN_NOSYNC);
00202         if (LF_ISSET(DB_TXN_SYNC))
00203                 F_SET(txn, TXN_SYNC);
00204         if (LF_ISSET(DB_TXN_NOWAIT))
00205                 F_SET(txn, TXN_NOWAIT);
00206         if (LF_ISSET(DB_TXN_WRITE_NOSYNC))
00207                 F_SET(txn, TXN_WRITE_NOSYNC);
00208 
00209         if ((ret = __txn_begin_int(txn, 0)) != 0)
00210                 goto err;
00211         td = txn->td;
00212 
00213         if (parent != NULL) {
00214                 ptd = parent->td;
00215                 TAILQ_INSERT_HEAD(&parent->kids, txn, klinks);
00216                 SH_TAILQ_INSERT_HEAD(&ptd->kids, td, klinks, __txn_detail);
00217         }
00218 
00219         if (LOCKING_ON(dbenv)) {
00220                 region = ((DB_LOCKTAB *)dbenv->lk_handle)->reginfo.primary;
00221                 if (parent != NULL) {
00222                         ret = __lock_inherit_timeout(dbenv,
00223                             parent->txnid, txn->txnid);
00224                         /* No parent locker set yet. */
00225                         if (ret == EINVAL) {
00226                                 parent = NULL;
00227                                 ret = 0;
00228                         }
00229                         if (ret != 0)
00230                                 goto err;
00231                 }
00232 
00233                 /*
00234                  * Parent is NULL if we have no parent
00235                  * or it has no timeouts set.
00236                  */
00237                 if (parent == NULL && region->tx_timeout != 0)
00238                         if ((ret = __lock_set_timeout(dbenv, txn->txnid,
00239                             region->tx_timeout, DB_SET_TXN_TIMEOUT)) != 0)
00240                                 goto err;
00241         }
00242 
00243         *txnpp = txn;
00244         return (0);
00245 
00246 err:
00247         __os_free(dbenv, txn);
00248         return (ret);
00249 }
00250 
00251 /*
00252  * __txn_xa_begin --
00253  *      XA version of txn_begin.
00254  *
00255  * PUBLIC: int __txn_xa_begin __P((DB_ENV *, DB_TXN *));
00256  */
00257 int
00258 __txn_xa_begin(dbenv, txn)
00259         DB_ENV *dbenv;
00260         DB_TXN *txn;
00261 {
00262         PANIC_CHECK(dbenv);
00263 
00264         /*
00265          * We need to initialize the transaction structure, but must be careful
00266          * not to smash the links.  We manually initialize the structure.
00267          */
00268         txn->mgrp = dbenv->tx_handle;
00269         TAILQ_INIT(&txn->kids);
00270         TAILQ_INIT(&txn->events);
00271         STAILQ_INIT(&txn->logs);
00272         txn->parent = NULL;
00273         txn->txnid = TXN_INVALID;
00274         txn->cursors = 0;
00275         memset(&txn->lock_timeout, 0, sizeof(db_timeout_t));
00276         memset(&txn->expire, 0, sizeof(db_timeout_t));
00277 
00278         return (__txn_begin_int(txn, 0));
00279 }
00280 
00281 /*
00282  * __txn_recycle_id --
00283  *      Find a range of useable transaction ids.
00284  *
00285  * PUBLIC: int __txn_recycle_id __P((DB_ENV *));
00286  */
00287 int
00288 __txn_recycle_id(dbenv)
00289         DB_ENV *dbenv;
00290 {
00291         DB_LSN null_lsn;
00292         DB_TXNMGR *mgr;
00293         DB_TXNREGION *region;
00294         TXN_DETAIL *td;
00295         u_int32_t *ids;
00296         int nids, ret;
00297 
00298         mgr = dbenv->tx_handle;
00299         region = mgr->reginfo.primary;
00300 
00301         if ((ret = __os_malloc(dbenv,
00302             sizeof(u_int32_t) * region->maxtxns, &ids)) != 0)
00303                 return (ret);
00304         nids = 0;
00305         for (td = SH_TAILQ_FIRST(&region->active_txn, __txn_detail);
00306             td != NULL;
00307             td = SH_TAILQ_NEXT(td, links, __txn_detail))
00308                 ids[nids++] = td->txnid;
00309         region->last_txnid = TXN_MINIMUM - 1;
00310         region->cur_maxid = TXN_MAXIMUM;
00311         if (nids != 0)
00312                 __db_idspace(ids, nids,
00313                     &region->last_txnid, &region->cur_maxid);
00314         __os_free(dbenv, ids);
00315         /*
00316          * Check LOGGING_ON rather than DBENV_LOGGING as
00317          * we want to emit this record at the end of recovery.
00318          */
00319         if (LOGGING_ON(dbenv))
00320             ret = __txn_recycle_log(dbenv, NULL, &null_lsn,
00321                 0, region->last_txnid + 1, region->cur_maxid);
00322 
00323         return (ret);
00324 }
00325 
00326 /*
00327  * __txn_compensate_begin
00328  *      Begin an compensation transaction.  This is a special interface
00329  * that is used only for transactions that must be started to compensate
00330  * for actions during an abort.  Currently only used for allocations.
00331  *
00332  * PUBLIC: int __txn_compensate_begin __P((DB_ENV *, DB_TXN **));
00333  */
00334 int
00335 __txn_compensate_begin(dbenv, txnpp)
00336         DB_ENV *dbenv;
00337         DB_TXN **txnpp;
00338 {
00339         DB_TXN *txn;
00340         int ret;
00341 
00342         PANIC_CHECK(dbenv);
00343 
00344         if ((ret = __os_calloc(dbenv, 1, sizeof(DB_TXN), &txn)) != 0)
00345                 return (ret);
00346 
00347         txn->mgrp = dbenv->tx_handle;
00348         TAILQ_INIT(&txn->kids);
00349         TAILQ_INIT(&txn->events);
00350         STAILQ_INIT(&txn->logs);
00351         txn->flags = TXN_COMPENSATE | TXN_MALLOC;
00352 
00353         *txnpp = txn;
00354         return (__txn_begin_int(txn, 1));
00355 }
00356 
00357 /*
00358  * __txn_begin_int --
00359  *      Normal DB version of txn_begin.
00360  */
00361 static int
00362 __txn_begin_int(txn, internal)
00363         DB_TXN *txn;
00364         int internal;
00365 {
00366         DB_ENV *dbenv;
00367         DB_TXNMGR *mgr;
00368         DB_TXNREGION *region;
00369         TXN_DETAIL *td;
00370         u_int32_t id;
00371         int ret;
00372 
00373         mgr = txn->mgrp;
00374         dbenv = mgr->dbenv;
00375         region = mgr->reginfo.primary;
00376 
00377         TXN_SYSTEM_LOCK(dbenv);
00378         if (!F_ISSET(txn, TXN_COMPENSATE) && F_ISSET(region, TXN_IN_RECOVERY)) {
00379                 __db_err(dbenv, "operation not permitted during recovery");
00380                 ret = EINVAL;
00381                 goto err;
00382         }
00383 
00384         /* Make sure that we aren't still recovering prepared transactions. */
00385         if (!internal && region->stat.st_nrestores != 0) {
00386                 __db_err(dbenv,
00387     "recovery of prepared but not yet committed transactions is incomplete");
00388                 ret = EINVAL;
00389                 goto err;
00390         }
00391 
00392         /*
00393          * Allocate a new transaction id. Our current valid range can span
00394          * the maximum valid value, so check for it and wrap manually.
00395          */
00396         if (region->last_txnid == TXN_MAXIMUM &&
00397             region->cur_maxid != TXN_MAXIMUM)
00398                 region->last_txnid = TXN_MINIMUM - 1;
00399 
00400         if (region->last_txnid == region->cur_maxid &&
00401             (ret = __txn_recycle_id(dbenv)) != 0)
00402                 goto err;
00403 
00404         /* Allocate a new transaction detail structure. */
00405         if ((ret =
00406             __db_shalloc(&mgr->reginfo, sizeof(TXN_DETAIL), 0, &td)) != 0) {
00407                 __db_err(dbenv,
00408                     "Unable to allocate memory for transaction detail");
00409                 goto err;
00410         }
00411 
00412         /* Place transaction on active transaction list. */
00413         SH_TAILQ_INSERT_HEAD(&region->active_txn, td, links, __txn_detail);
00414 
00415         id = ++region->last_txnid;
00416         ++region->stat.st_nbegins;
00417         if (++region->stat.st_nactive > region->stat.st_maxnactive)
00418                 region->stat.st_maxnactive = region->stat.st_nactive;
00419 
00420         td->txnid = id;
00421         dbenv->thread_id(dbenv, &td->pid, &td->tid);
00422         ZERO_LSN(td->last_lsn);
00423         ZERO_LSN(td->begin_lsn);
00424         SH_TAILQ_INIT(&td->kids);
00425         if (txn->parent != NULL)
00426                 td->parent = R_OFFSET(&mgr->reginfo, txn->parent->td);
00427         else
00428                 td->parent = INVALID_ROFF;
00429         td->name = INVALID_ROFF;
00430         td->status = TXN_RUNNING;
00431         td->flags = 0;
00432         td->xa_status = 0;
00433 
00434         TXN_SYSTEM_UNLOCK(dbenv);
00435 
00436         txn->txnid = id;
00437         txn->td  = td;
00438 
00439         txn->abort = __txn_abort_pp;
00440         txn->commit = __txn_commit_pp;
00441         txn->discard = __txn_discard;
00442         txn->get_name = __txn_get_name;
00443         txn->id = __txn_id;
00444         txn->prepare = __txn_prepare;
00445         txn->set_txn_lsnp = __txn_set_txn_lsnp;
00446         txn->set_name = __txn_set_name;
00447         txn->set_timeout = __txn_set_timeout;
00448 
00449         /*
00450          * If this is a transaction family, we must link the child to the
00451          * maximal grandparent in the lock table for deadlock detection.
00452          */
00453         if (txn->parent != NULL && LOCKING_ON(dbenv))
00454                 if ((ret = __lock_addfamilylocker(dbenv,
00455                     txn->parent->txnid, txn->txnid)) != 0)
00456                         return (ret);
00457 
00458         if (F_ISSET(txn, TXN_MALLOC)) {
00459                 MUTEX_LOCK(dbenv, mgr->mutex);
00460                 TAILQ_INSERT_TAIL(&mgr->txn_chain, txn, links);
00461                 MUTEX_UNLOCK(dbenv, mgr->mutex);
00462         }
00463 
00464         return (0);
00465 
00466 err:    TXN_SYSTEM_UNLOCK(dbenv);
00467         return (ret);
00468 }
00469 
00470 /*
00471  * __txn_continue
00472  *      Fill in the fields of the local transaction structure given
00473  *      the detail transaction structure.
00474  *
00475  * PUBLIC: void __txn_continue __P((DB_ENV *, DB_TXN *, TXN_DETAIL *));
00476  */
00477 void
00478 __txn_continue(env, txn, td)
00479         DB_ENV *env;
00480         DB_TXN *txn;
00481         TXN_DETAIL *td;
00482 {
00483         txn->mgrp = env->tx_handle;
00484         txn->parent = NULL;
00485         txn->txnid = td->txnid;
00486         txn->td = td;
00487 
00488         txn->abort = __txn_abort_pp;
00489         txn->commit = __txn_commit_pp;
00490         txn->discard = __txn_discard;
00491         txn->get_name = __txn_get_name;
00492         txn->id = __txn_id;
00493         txn->prepare = __txn_prepare;
00494         txn->set_name = __txn_set_name;
00495 
00496         txn->flags = 0;
00497         if (F_ISSET(td, TXN_DTL_RESTORED))
00498                 F_SET(txn, TXN_RESTORED);
00499 }
00500 
00501 /*
00502  * __txn_commit_pp --
00503  *      Interface routine to TXN->commit.
00504  */
00505 static int
00506 __txn_commit_pp(txn, flags)
00507         DB_TXN *txn;
00508         u_int32_t flags;
00509 {
00510         DB_ENV *dbenv;
00511         DB_THREAD_INFO *ip;
00512         int not_child, ret, t_ret;
00513 
00514         dbenv = txn->mgrp->dbenv;
00515         not_child = txn->parent == NULL;
00516 
00517         ENV_ENTER(dbenv, ip);
00518 
00519         ret = __txn_commit(txn, flags);
00520         if (not_child && IS_ENV_REPLICATED(dbenv) &&
00521             (t_ret = __op_rep_exit(dbenv)) != 0 && ret == 0)
00522                 ret = t_ret;
00523         ENV_LEAVE(dbenv, ip);
00524         return (ret);
00525 }
00526 
00527 /*
00528  * __txn_commit --
00529  *      Commit a transaction.
00530  *
00531  * PUBLIC: int __txn_commit __P((DB_TXN *, u_int32_t));
00532  */
00533 int
00534 __txn_commit(txn, flags)
00535         DB_TXN *txn;
00536         u_int32_t flags;
00537 {
00538         DBT list_dbt;
00539         DB_ENV *dbenv;
00540         DB_LOCKREQ request;
00541         DB_TXN *kid;
00542         REGENV *renv;
00543         REGINFO *infop;
00544         TXN_DETAIL *td;
00545         u_int32_t id, lflags;
00546         int ret, t_ret;
00547 
00548         dbenv = txn->mgrp->dbenv;
00549         td = txn->td;
00550 
00551         PANIC_CHECK(dbenv);
00552 
00553         if ((ret = __txn_isvalid(txn, TXN_OP_COMMIT)) != 0)
00554                 return (ret);
00555 
00556         infop = dbenv->reginfo;
00557         renv = infop->primary;
00558         /*
00559          * No mutex is needed as envid is read-only once it is set.
00560          */
00561         id = renv->envid;
00562 
00563         /*
00564          * We clear flags that are incorrect, ignoring any flag errors, and
00565          * default to synchronous operations.  By definition, transaction
00566          * handles are dead when we return, and this error should never
00567          * happen, but we don't want to fail in the field 'cause the app is
00568          * specifying the wrong flag for some reason.
00569          */
00570         if (__db_fchk(dbenv, "DB_TXN->commit", flags,
00571             DB_TXN_NOSYNC | DB_TXN_SYNC | DB_TXN_WRITE_NOSYNC) != 0)
00572                 flags = DB_TXN_SYNC;
00573         if (__db_fcchk(dbenv, "DB_TXN->commit", flags,
00574             DB_TXN_SYNC, DB_TXN_NOSYNC | DB_TXN_WRITE_NOSYNC) != 0)
00575                 flags = DB_TXN_SYNC;
00576 
00577         if (LF_ISSET(DB_TXN_WRITE_NOSYNC)) {
00578                 F_CLR(txn, TXN_SYNC_FLAGS);
00579                 F_SET(txn, TXN_WRITE_NOSYNC);
00580         }
00581         if (LF_ISSET(DB_TXN_NOSYNC)) {
00582                 F_CLR(txn, TXN_SYNC_FLAGS);
00583                 F_SET(txn, TXN_NOSYNC);
00584         }
00585         if (LF_ISSET(DB_TXN_SYNC)) {
00586                 F_CLR(txn, TXN_SYNC_FLAGS);
00587                 F_SET(txn, TXN_SYNC);
00588         }
00589 
00590         /*
00591          * Commit any unresolved children.  If anyone fails to commit,
00592          * then try to abort the rest of the kids and then abort the parent.
00593          * Abort should never fail; if it does, we bail out immediately.
00594          */
00595         while ((kid = TAILQ_FIRST(&txn->kids)) != NULL)
00596                 if ((ret = __txn_commit(kid, flags)) != 0)
00597                         while ((kid = TAILQ_FIRST(&txn->kids)) != NULL)
00598                                 if ((t_ret = __txn_abort(kid)) != 0)
00599                                         return (__db_panic(dbenv, t_ret));
00600 
00601         /*
00602          * If there are any log records, write a log record and sync the log,
00603          * else do no log writes.  If the commit is for a child transaction,
00604          * we do not need to commit the child synchronously since it may still
00605          * abort (if its parent aborts), and otherwise its parent or ultimate
00606          * ancestor will write synchronously.
00607          */
00608         if (DBENV_LOGGING(dbenv) && (!IS_ZERO_LSN(td->last_lsn) ||
00609             STAILQ_FIRST(&txn->logs) != NULL)) {
00610                 if (txn->parent == NULL) {
00611                         /*
00612                          * We are about to free all the read locks for this
00613                          * transaction below.  Some of those locks might be
00614                          * handle locks which should not be freed, because
00615                          * they will be freed when the handle is closed. Check
00616                          * the events and preprocess any trades now so we don't
00617                          * release the locks below.
00618                          */
00619                         if ((ret =
00620                             __txn_doevents(dbenv, txn, TXN_PREPARE, 1)) != 0)
00621                                 goto err;
00622 
00623                         memset(&request, 0, sizeof(request));
00624                         if (LOCKING_ON(dbenv)) {
00625                                 request.op = DB_LOCK_PUT_READ;
00626                                 if (IS_REP_MASTER(dbenv) &&
00627                                     !IS_ZERO_LSN(td->last_lsn)) {
00628                                         memset(&list_dbt, 0, sizeof(list_dbt));
00629                                         request.obj = &list_dbt;
00630                                 }
00631                                 ret = __lock_vec(dbenv,
00632                                     txn->txnid, 0, &request, 1, NULL);
00633                         }
00634 
00635                         if (ret == 0 && !IS_ZERO_LSN(td->last_lsn)) {
00636                                 SET_LOG_FLAGS(dbenv, txn, lflags);
00637                                 ret = __txn_regop_log(dbenv, txn,
00638                                     &td->last_lsn, lflags, TXN_COMMIT,
00639                                     (int32_t)time(NULL), id, request.obj);
00640                         }
00641 
00642                         if (request.obj != NULL && request.obj->data != NULL)
00643                                 __os_free(dbenv, request.obj->data);
00644                         if (ret != 0)
00645                                 goto err;
00646                 } else {
00647                         /* Log the commit in the parent! */
00648                         if (!IS_ZERO_LSN(td->last_lsn) &&
00649                             (ret = __txn_child_log(dbenv, txn->parent,
00650                             &((TXN_DETAIL *)txn->parent->td)->last_lsn,
00651                             0, txn->txnid, &td->last_lsn)) != 0) {
00652                                 goto err;
00653                         }
00654                         if (STAILQ_FIRST(&txn->logs) != NULL) {
00655                                 /*
00656                                  * Put the child first so we back it out first.
00657                                  * All records are undone in reverse order.
00658                                  */
00659                                 STAILQ_CONCAT(&txn->logs, &txn->parent->logs);
00660                                 txn->parent->logs = txn->logs;
00661                                 STAILQ_INIT(&txn->logs);
00662                         }
00663 
00664                         F_SET(txn->parent, TXN_CHILDCOMMIT);
00665                 }
00666         }
00667 
00668         /*
00669          * Process any aborted pages from our children.  We delay putting pages
00670          * on the free list that are newly allocated and then aborted so we can
00671          * undo other allocations, if necessary, without worrying about these
00672          * pages which were not on the free list before.
00673          */
00674         if (txn->txn_list != NULL) {
00675 #ifndef HAVE_FTRUNCATE
00676                 t_ret = __db_do_the_limbo(dbenv,
00677                       NULL, txn, txn->txn_list, LIMBO_NORMAL);
00678                 if (t_ret != 0 && ret == 0)
00679                         ret = t_ret;
00680 #endif
00681                 __db_txnlist_end(dbenv, txn->txn_list);
00682                 txn->txn_list = NULL;
00683         }
00684 
00685         if (ret != 0)
00686                 goto err;
00687 
00688         /* This is OK because __txn_end can only fail with a panic. */
00689         return (__txn_end(txn, 1));
00690 
00691 err:    /*
00692          * If we are prepared, then we "must" be able to commit.  We panic here
00693          * because even though the coordinator might be able to retry it is not
00694          * clear it would know to do that.  Otherwise  we'll try to abort.  If
00695          * that is successful, then we return whatever was in ret (that is, the
00696          * reason we failed).  If the abort was unsuccessful, abort probably
00697          * returned DB_RUNRECOVERY and we need to propagate that up.
00698          */
00699         if (td->status == TXN_PREPARED)
00700                 return (__db_panic(dbenv, ret));
00701 
00702         if ((t_ret = __txn_abort(txn)) != 0)
00703                 ret = t_ret;
00704         return (ret);
00705 }
00706 
00707 /*
00708  * __txn_abort_pp --
00709  *      Interface routine to TXN->abort.
00710  */
00711 static int
00712 __txn_abort_pp(txn)
00713         DB_TXN *txn;
00714 {
00715         DB_ENV *dbenv;
00716         DB_THREAD_INFO *ip;
00717         int not_child, ret, t_ret;
00718 
00719         dbenv = txn->mgrp->dbenv;
00720         not_child = txn->parent == NULL;
00721 
00722         ENV_ENTER(dbenv, ip);
00723 
00724         ret = __txn_abort(txn);
00725         if (not_child && IS_ENV_REPLICATED(dbenv) &&
00726             (t_ret = __op_rep_exit(dbenv)) != 0 && ret == 0)
00727                 ret = t_ret;
00728         ENV_LEAVE(dbenv, ip);
00729         return (ret);
00730 }
00731 
00732 /*
00733  * __txn_abort --
00734  *      Abort a transaction.
00735  *
00736  * PUBLIC: int __txn_abort __P((DB_TXN *));
00737  */
00738 int
00739 __txn_abort(txn)
00740         DB_TXN *txn;
00741 {
00742         DB_ENV *dbenv;
00743         DB_LOCKREQ request;
00744         DB_TXN *kid;
00745         REGENV *renv;
00746         REGINFO *infop;
00747         TXN_DETAIL *td;
00748         u_int32_t id, lflags;
00749         int ret;
00750 
00751         dbenv = txn->mgrp->dbenv;
00752         td = txn->td;
00753 
00754         PANIC_CHECK(dbenv);
00755 
00756         /* Ensure that abort always fails fatally. */
00757         if ((ret = __txn_isvalid(txn, TXN_OP_ABORT)) != 0)
00758                 return (__db_panic(dbenv, ret));
00759 
00760         /*
00761          * Try to abort any unresolved children.
00762          *
00763          * Abort either succeeds or panics the region.  As soon as we
00764          * see any failure, we just get out of here and return the panic
00765          * up.
00766          */
00767         while ((kid = TAILQ_FIRST(&txn->kids)) != NULL)
00768                 if ((ret = __txn_abort(kid)) != 0)
00769                         return (ret);
00770 
00771         infop = dbenv->reginfo;
00772         renv = infop->primary;
00773         /*
00774          * No mutex is needed as envid is read-only once it is set.
00775          */
00776         id = renv->envid;
00777 
00778         /*
00779          * Fast path -- no need to do anything fancy if there were no
00780          * modifications (e.g., log records) for this transaction.
00781          * We still call txn_undo to cleanup the txn_list from our
00782          * children.
00783          */
00784         if (IS_ZERO_LSN(td->last_lsn) && STAILQ_FIRST(&txn->logs) == NULL) {
00785                 if (txn->txn_list == NULL)
00786                         goto done;
00787                 else
00788                         goto undo;
00789         }
00790 
00791         if (LOCKING_ON(dbenv)) {
00792                 /*
00793                  * We are about to free all the read locks for this transaction
00794                  * below.  Some of those locks might be handle locks which
00795                  * should not be freed, because they will be freed when the
00796                  * handle is closed.  Check the events and preprocess any
00797                  * trades now so that we don't release the locks below.
00798                  */
00799                 if ((ret = __txn_doevents(dbenv, txn, TXN_ABORT, 1)) != 0)
00800                         return (__db_panic(dbenv, ret));
00801 
00802                 /* Turn off timeouts. */
00803                 if ((ret = __lock_set_timeout(dbenv,
00804                     txn->txnid, 0, DB_SET_TXN_TIMEOUT)) != 0)
00805                         return (__db_panic(dbenv, ret));
00806 
00807                 if ((ret = __lock_set_timeout(dbenv,
00808                     txn->txnid, 0, DB_SET_LOCK_TIMEOUT)) != 0)
00809                         return (__db_panic(dbenv, ret));
00810 
00811                 request.op = DB_LOCK_UPGRADE_WRITE;
00812                 request.obj = NULL;
00813                 if ((ret = __lock_vec(
00814                     dbenv, txn->txnid, DB_LOCK_ABORT, &request, 1, NULL)) != 0)
00815                         return (__db_panic(dbenv, ret));
00816         }
00817 undo:   if ((ret = __txn_undo(txn)) != 0)
00818                 return (__db_panic(dbenv, ret));
00819 
00820         /*
00821          * Normally, we do not need to log aborts.  However, if we
00822          * are a distributed transaction (i.e., we have a prepare),
00823          * then we log the abort so we know that this transaction
00824          * was actually completed.
00825          */
00826 done:   SET_LOG_FLAGS(dbenv, txn, lflags);
00827         if (DBENV_LOGGING(dbenv) && td->status == TXN_PREPARED &&
00828             (ret = __txn_regop_log(dbenv, txn, &td->last_lsn,
00829             lflags, TXN_ABORT, (int32_t)time(NULL), id, NULL)) != 0)
00830                 return (__db_panic(dbenv, ret));
00831 
00832         /* __txn_end always panics if it errors, so pass the return along. */
00833         return (__txn_end(txn, 0));
00834 }
00835 
00836 /*
00837  * __txn_discard --
00838  *      Interface routine to TXN->discard.
00839  */
00840 static int
00841 __txn_discard(txn, flags)
00842         DB_TXN *txn;
00843         u_int32_t flags;
00844 {
00845         DB_ENV *dbenv;
00846         DB_THREAD_INFO *ip;
00847         int ret;
00848 
00849         dbenv = txn->mgrp->dbenv;
00850 
00851         ENV_ENTER(dbenv, ip);
00852         ret = __txn_discard_int(txn, flags);
00853         ENV_LEAVE(dbenv, ip);
00854         return (ret);
00855 }
00856 
00857 /*
00858  * __txn_discard --
00859  *      Free the per-process resources associated with this txn handle.
00860  *
00861  * PUBLIC: int __txn_discard_int __P((DB_TXN *, u_int32_t flags));
00862  */
00863 int
00864 __txn_discard_int(txn, flags)
00865         DB_TXN *txn;
00866         u_int32_t flags;
00867 {
00868         DB_ENV *dbenv;
00869         DB_TXN *freep;
00870         DB_TXNMGR *mgr;
00871         int ret;
00872 
00873         COMPQUIET(flags, 0);
00874 
00875         mgr = txn->mgrp;
00876         dbenv = mgr->dbenv;
00877         freep = NULL;
00878 
00879         PANIC_CHECK(dbenv);
00880 
00881         if ((ret = __txn_isvalid(txn, TXN_OP_DISCARD)) != 0)
00882                 return (ret);
00883 
00884         /* Should be no children. */
00885         DB_ASSERT(TAILQ_FIRST(&txn->kids) == NULL);
00886 
00887         /* Free the space. */
00888         MUTEX_LOCK(dbenv, mgr->mutex);
00889         mgr->n_discards++;
00890         if (F_ISSET(txn, TXN_MALLOC)) {
00891                 TAILQ_REMOVE(&mgr->txn_chain, txn, links);
00892                 freep = txn;
00893         }
00894         MUTEX_UNLOCK(dbenv, mgr->mutex);
00895         if (freep != NULL)
00896                 __os_free(dbenv, freep);
00897 
00898         return (0);
00899 }
00900 
00901 /*
00902  * __txn_prepare --
00903  *      Flush the log so a future commit is guaranteed to succeed.
00904  *
00905  * PUBLIC: int __txn_prepare __P((DB_TXN *, u_int8_t *));
00906  */
00907 int
00908 __txn_prepare(txn, gid)
00909         DB_TXN *txn;
00910         u_int8_t *gid;
00911 {
00912         DBT list_dbt, xid;
00913         DB_ENV *dbenv;
00914         DB_LOCKREQ request;
00915         DB_THREAD_INFO *ip;
00916         DB_TXN *kid;
00917         TXN_DETAIL *td;
00918         u_int32_t lflags;
00919         int ret;
00920 
00921         dbenv = txn->mgrp->dbenv;
00922         td = txn->td;
00923 
00924         PANIC_CHECK(dbenv);
00925 
00926         if ((ret = __txn_isvalid(txn, TXN_OP_PREPARE)) != 0)
00927                 return (ret);
00928 
00929         ENV_ENTER(dbenv, ip);
00930 
00931         /* Commit any unresolved children. */
00932         while ((kid = TAILQ_FIRST(&txn->kids)) != NULL)
00933                 if ((ret = __txn_commit(kid, DB_TXN_NOSYNC)) != 0)
00934                         goto err;
00935 
00936 #ifndef HAVE_FTRUNCATE
00937         if (txn->txn_list != NULL  &&
00938             (ret = __db_do_the_limbo(dbenv,
00939             NULL, txn, txn->txn_list, LIMBO_PREPARE)) != 0)
00940                 goto err;
00941 #endif
00942         /*
00943          * In XA, the global transaction ID in the txn_detail structure is
00944          * already set; in a non-XA environment, we must set it here.  XA
00945          * requires that the transaction be either ENDED or SUSPENDED when
00946          * prepare is called, so we know that if the xa_status isn't in one
00947          * of those states, then we are calling prepare directly and we need
00948          * to fill in the td->xid.
00949          */
00950         if ((ret = __txn_doevents(dbenv, txn, TXN_PREPARE, 1)) != 0)
00951                 goto err;
00952         memset(&request, 0, sizeof(request));
00953         if (LOCKING_ON(dbenv)) {
00954                 request.op = DB_LOCK_PUT_READ;
00955                 if (IS_REP_MASTER(dbenv) &&
00956                     !IS_ZERO_LSN(td->last_lsn)) {
00957                         memset(&list_dbt, 0, sizeof(list_dbt));
00958                         request.obj = &list_dbt;
00959                 }
00960                 if ((ret = __lock_vec(dbenv,
00961                     txn->txnid, 0, &request, 1, NULL)) != 0)
00962                         goto err;
00963 
00964         }
00965         if (DBENV_LOGGING(dbenv)) {
00966                 memset(&xid, 0, sizeof(xid));
00967                 if (td->xa_status != TXN_XA_ENDED &&
00968                     td->xa_status != TXN_XA_SUSPENDED)
00969                         /* Regular prepare; fill in the gid. */
00970                         memcpy(td->xid, gid, sizeof(td->xid));
00971 
00972                 xid.size = sizeof(td->xid);
00973                 xid.data = td->xid;
00974 
00975                 lflags = DB_LOG_COMMIT | DB_LOG_PERM | DB_FLUSH;
00976                 if ((ret = __txn_xa_regop_log(dbenv, txn, &td->last_lsn,
00977                     lflags, TXN_PREPARE, &xid, td->format, td->gtrid, td->bqual,
00978                     &td->begin_lsn, request.obj)) != 0) {
00979                         __db_err(dbenv, "DB_TXN->prepare: log_write failed %s",
00980                             db_strerror(ret));
00981                 }
00982                 if (request.obj != NULL && request.obj->data != NULL)
00983                         __os_free(dbenv, request.obj->data);
00984                 if (ret != 0)
00985                         goto err;
00986 
00987         }
00988 
00989         MUTEX_LOCK(dbenv, txn->mgrp->mutex);
00990         td->status = TXN_PREPARED;
00991         MUTEX_UNLOCK(dbenv, txn->mgrp->mutex);
00992 err:    ENV_LEAVE(dbenv, ip);
00993         return (ret);
00994 }
00995 
00996 /*
00997  * __txn_id --
00998  *      Return the transaction ID.
00999  *
01000  * PUBLIC: u_int32_t __txn_id __P((DB_TXN *));
01001  */
01002 u_int32_t
01003 __txn_id(txn)
01004         DB_TXN *txn;
01005 {
01006         return (txn->txnid);
01007 }
01008 
01009 /*
01010  * __txn_get_name --
01011  *      Get a descriptive string from a transaction.
01012  *
01013  * PUBLIC: int __txn_get_name __P((DB_TXN *, const char **));
01014  */
01015 int
01016 __txn_get_name(txn, namep)
01017         DB_TXN *txn;
01018         const char **namep;
01019 {
01020         *namep = txn->name;
01021 
01022         return (0);
01023 }
01024 
01025 /*
01026  * __txn_set_name --
01027  *      Set a descriptive string for a transaction.
01028  *
01029  * PUBLIC: int __txn_set_name __P((DB_TXN *, const char *));
01030  */
01031 int
01032 __txn_set_name(txn, name)
01033         DB_TXN *txn;
01034         const char *name;
01035 {
01036         DB_ENV *dbenv;
01037         DB_TXNMGR *mgr;
01038         TXN_DETAIL *td;
01039         size_t len;
01040         int ret;
01041         char *p;
01042 
01043         mgr = txn->mgrp;
01044         dbenv = mgr->dbenv;
01045         td = txn->td;
01046         len = strlen(name) + 1;
01047 
01048         if ((ret = __os_realloc(dbenv, len, &txn->name)) != 0)
01049                 return (ret);
01050         memcpy(txn->name, name, len);
01051 
01052         if (td->name != INVALID_ROFF) {
01053                 __db_shalloc_free(
01054                     &mgr->reginfo, R_ADDR(&mgr->reginfo, td->name));
01055                 td->name = INVALID_ROFF;
01056         }
01057         if ((ret = __db_shalloc(&mgr->reginfo, len, 0, &p)) != 0) {
01058                 __db_err(dbenv,
01059                     "Unable to allocate memory for transaction name");
01060 
01061                 __os_free(dbenv, txn->name);
01062                 txn->name = NULL;
01063 
01064                 return (ret);
01065         }
01066         td->name = R_OFFSET(&mgr->reginfo, p);
01067         memcpy(p, name, len);
01068 
01069 #ifdef DIAGNOSTIC
01070         /*
01071          * If DIAGNOSTIC is set, map the name into the log so users can track
01072          * operations through the log.
01073          */
01074         if (DBENV_LOGGING(dbenv))
01075                 (void)__log_printf(dbenv, txn,
01076                     "transaction %#lx named %s", (u_long)txn->txnid, name);
01077 #endif
01078 
01079         return (0);
01080 }
01081 
01082 /*
01083  * __txn_set_timeout --
01084  *      DB_ENV->set_txn_timeout.
01085  * PUBLIC: int  __txn_set_timeout __P((DB_TXN *, db_timeout_t, u_int32_t));
01086  */
01087 int
01088 __txn_set_timeout(txn, timeout, op)
01089         DB_TXN *txn;
01090         db_timeout_t timeout;
01091         u_int32_t op;
01092 {
01093         if (op != DB_SET_TXN_TIMEOUT &&  op != DB_SET_LOCK_TIMEOUT)
01094                 return (__db_ferr(txn->mgrp->dbenv, "DB_TXN->set_timeout", 0));
01095 
01096         return (__lock_set_timeout(
01097             txn->mgrp->dbenv, txn->txnid, timeout, op));
01098 }
01099 
01100 /*
01101  * __txn_isvalid --
01102  *      Return 0 if the DB_TXN is reasonable, otherwise panic.
01103  */
01104 static int
01105 __txn_isvalid(txn, op)
01106         const DB_TXN *txn;
01107         txnop_t op;
01108 {
01109         DB_ENV *dbenv;
01110         DB_TXNMGR *mgr;
01111         DB_TXNREGION *region;
01112         TXN_DETAIL *td;
01113 
01114         mgr = txn->mgrp;
01115         dbenv = mgr->dbenv;
01116         region = mgr->reginfo.primary;
01117 
01118         /* Check for recovery. */
01119         if (!F_ISSET(txn, TXN_COMPENSATE) &&
01120             F_ISSET(region, TXN_IN_RECOVERY)) {
01121                 __db_err(dbenv, "operation not permitted during recovery");
01122                 goto err;
01123         }
01124 
01125         /* Check for live cursors. */
01126         if (txn->cursors != 0) {
01127                 __db_err(dbenv, "transaction has active cursors");
01128                 goto err;
01129         }
01130 
01131         /* Check transaction's state. */
01132         td = txn->td;
01133 
01134         /* Handle any operation specific checks. */
01135         switch (op) {
01136         case TXN_OP_DISCARD:
01137                 /*
01138                  * Since we're just tossing the per-process space; there are
01139                  * a lot of problems with the transaction that we can tolerate.
01140                  */
01141 
01142                 /* Transaction is already been reused. */
01143                 if (txn->txnid != td->txnid)
01144                         return (0);
01145 
01146                 /*
01147                  * What we've got had better be either a prepared or
01148                  * restored transaction.
01149                  */
01150                 if (td->status != TXN_PREPARED &&
01151                     !F_ISSET(td, TXN_DTL_RESTORED)) {
01152                         __db_err(dbenv, "not a restored transaction");
01153                         return (__db_panic(dbenv, EINVAL));
01154                 }
01155 
01156                 return (0);
01157         case TXN_OP_PREPARE:
01158                 if (txn->parent != NULL) {
01159                         /*
01160                          * This is not fatal, because you could imagine an
01161                          * application that simply prepares everybody because
01162                          * it doesn't distinguish between children and parents.
01163                          * I'm not arguing this is good, but I could imagine
01164                          * someone doing it.
01165                          */
01166                         __db_err(dbenv,
01167                             "Prepare disallowed on child transactions");
01168                         return (EINVAL);
01169                 }
01170                 break;
01171         case TXN_OP_ABORT:
01172         case TXN_OP_COMMIT:
01173         default:
01174                 break;
01175         }
01176 
01177         switch (td->status) {
01178         case TXN_PREPARED:
01179                 if (op == TXN_OP_PREPARE) {
01180                         __db_err(dbenv, "transaction already prepared");
01181                         /*
01182                          * Txn_prepare doesn't blow away the user handle, so
01183                          * in this case, give the user the opportunity to
01184                          * abort or commit.
01185                          */
01186                         return (EINVAL);
01187                 }
01188                 break;
01189         case TXN_RUNNING:
01190                 break;
01191         case TXN_ABORTED:
01192         case TXN_COMMITTED:
01193         default:
01194                 __db_err(dbenv, "transaction already %s",
01195                     td->status == TXN_COMMITTED ? "committed" : "aborted");
01196                 goto err;
01197         }
01198 
01199         return (0);
01200 
01201 err:    /*
01202          * If there's a serious problem with the transaction, panic.  TXN
01203          * handles are dead by definition when we return, and if you use
01204          * a cursor you forgot to close, we have no idea what will happen.
01205          */
01206         return (__db_panic(dbenv, EINVAL));
01207 }
01208 
01209 /*
01210  * __txn_end --
01211  *      Internal transaction end routine.
01212  */
01213 static int
01214 __txn_end(txn, is_commit)
01215         DB_TXN *txn;
01216         int is_commit;
01217 {
01218         DB_ENV *dbenv;
01219         DB_LOCKREQ request;
01220         DB_TXNLOGREC *lr;
01221         DB_TXNMGR *mgr;
01222         DB_TXNREGION *region;
01223         TXN_DETAIL *ptd, *td;
01224         int do_closefiles, ret;
01225 
01226         mgr = txn->mgrp;
01227         dbenv = mgr->dbenv;
01228         region = mgr->reginfo.primary;
01229         do_closefiles = 0;
01230 
01231         /* Process commit events. */
01232         if ((ret = __txn_doevents(dbenv,
01233             txn, is_commit ? TXN_COMMIT : TXN_ABORT, 0)) != 0)
01234                 return (__db_panic(dbenv, ret));
01235 
01236         /*
01237          * Release the locks.
01238          *
01239          * __txn_end cannot return an simple error, we MUST return
01240          * success/failure from commit or abort, ignoring any internal
01241          * errors.  So, we panic if something goes wrong.  We can't
01242          * deadlock here because we're not acquiring any new locks,
01243          * so DB_LOCK_DEADLOCK is just as fatal as any other error.
01244          */
01245         if (LOCKING_ON(dbenv)) {
01246                 request.op = txn->parent == NULL ||
01247                     is_commit == 0 ? DB_LOCK_PUT_ALL : DB_LOCK_INHERIT;
01248                 request.obj = NULL;
01249                 if ((ret = __lock_vec(dbenv,
01250                     txn->txnid, 0, &request, 1, NULL)) != 0)
01251                         return (__db_panic(dbenv, ret));
01252         }
01253 
01254         /* End the transaction. */
01255         TXN_SYSTEM_LOCK(dbenv);
01256 
01257         td = txn->td;
01258         SH_TAILQ_REMOVE(&region->active_txn, td, links, __txn_detail);
01259         if (F_ISSET(td, TXN_DTL_RESTORED)) {
01260                 region->stat.st_nrestores--;
01261                 do_closefiles = region->stat.st_nrestores == 0;
01262         }
01263 
01264         if (td->name != INVALID_ROFF) {
01265                 __db_shalloc_free(
01266                     &mgr->reginfo, R_ADDR(&mgr->reginfo, td->name));
01267                 td->name = INVALID_ROFF;
01268         }
01269         if (txn->parent != NULL) {
01270                 ptd = txn->parent->td;
01271                 SH_TAILQ_REMOVE(&ptd->kids, td, klinks, __txn_detail);
01272         }
01273         __db_shalloc_free(&mgr->reginfo, td);
01274 
01275         if (is_commit)
01276                 region->stat.st_ncommits++;
01277         else
01278                 region->stat.st_naborts++;
01279         --region->stat.st_nactive;
01280 
01281         TXN_SYSTEM_UNLOCK(dbenv);
01282 
01283         /*
01284          * The transaction cannot get more locks, remove its locker info,
01285          * if any.
01286          */
01287         if (LOCKING_ON(dbenv) && (ret =
01288             __lock_freefamilylocker(dbenv->lk_handle, txn->txnid)) != 0)
01289                 return (__db_panic(dbenv, ret));
01290         if (txn->parent != NULL)
01291                 TAILQ_REMOVE(&txn->parent->kids, txn, klinks);
01292 
01293         /* Free the space. */
01294         while ((lr = STAILQ_FIRST(&txn->logs)) != NULL) {
01295                 STAILQ_REMOVE(&txn->logs, lr, __txn_logrec, links);
01296                 __os_free(dbenv, lr);
01297         }
01298         if (txn->name != NULL) {
01299                 __os_free(dbenv, txn->name);
01300                 txn->name = NULL;
01301         }
01302         if (F_ISSET(txn, TXN_MALLOC)) {
01303                 MUTEX_LOCK(dbenv, mgr->mutex);
01304                 TAILQ_REMOVE(&mgr->txn_chain, txn, links);
01305                 MUTEX_UNLOCK(dbenv, mgr->mutex);
01306 
01307                 __os_free(dbenv, txn);
01308         }
01309 
01310         if (do_closefiles) {
01311                 F_SET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
01312                 (void)__dbreg_close_files(dbenv);
01313                 F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
01314                 mgr->n_discards = 0;
01315                 (void)__txn_checkpoint(dbenv, 0, 0, DB_FORCE);
01316         }
01317 
01318         return (0);
01319 }
01320 
01321 static int
01322 __txn_dispatch_undo(dbenv, txn, rdbt, key_lsn, txnlist)
01323         DB_ENV *dbenv;
01324         DB_TXN *txn;
01325         DBT *rdbt;
01326         DB_LSN *key_lsn;
01327         void *txnlist;
01328 {
01329         int ret;
01330 
01331         ret = __db_dispatch(dbenv, dbenv->recover_dtab,
01332             dbenv->recover_dtab_size, rdbt, key_lsn, DB_TXN_ABORT, txnlist);
01333         if (ret == DB_SURPRISE_KID) {
01334                 F_SET(txn, TXN_CHILDCOMMIT);
01335                 ret = 0;
01336         }
01337         if (ret == 0 && F_ISSET(txn, TXN_CHILDCOMMIT) && IS_ZERO_LSN(*key_lsn))
01338                 ret = __db_txnlist_lsnget(dbenv, txnlist, key_lsn, 0);
01339 
01340         return (ret);
01341 }
01342 
01343 /*
01344  * __txn_undo --
01345  *      Undo the transaction with id txnid.
01346  */
01347 static int
01348 __txn_undo(txn)
01349         DB_TXN *txn;
01350 {
01351         DBT rdbt;
01352         DB_ENV *dbenv;
01353         DB_LOGC *logc;
01354         DB_LSN key_lsn;
01355         DB_TXN *ptxn;
01356         DB_TXNHEAD *txnlist;
01357         DB_TXNLOGREC *lr;
01358         DB_TXNMGR *mgr;
01359         int ret, t_ret;
01360 
01361         mgr = txn->mgrp;
01362         dbenv = mgr->dbenv;
01363         logc = NULL;
01364         txnlist = NULL;
01365         ret = 0;
01366 
01367         if (!DBENV_LOGGING(dbenv))
01368                 return (0);
01369 
01370         /*
01371          * This is the simplest way to code this, but if the mallocs during
01372          * recovery turn out to be a performance issue, we can do the
01373          * allocation here and use DB_DBT_USERMEM.
01374          */
01375         memset(&rdbt, 0, sizeof(rdbt));
01376 
01377         /*
01378          * Allocate a txnlist for children and aborted page allocs.
01379          * We need to associate the list with the maximal parent
01380          * so that aborted pages are recovered when that transaction
01381          * is committed or aborted.
01382          */
01383         for (ptxn = txn->parent; ptxn != NULL && ptxn->parent != NULL;)
01384                 ptxn = ptxn->parent;
01385 
01386         if (ptxn != NULL && ptxn->txn_list != NULL)
01387                 txnlist = ptxn->txn_list;
01388         else if (txn->txn_list != NULL)
01389                 txnlist = txn->txn_list;
01390         else if ((ret = __db_txnlist_init(dbenv, 0, 0, NULL, &txnlist)) != 0)
01391                 return (ret);
01392         else if (ptxn != NULL)
01393                 ptxn->txn_list = txnlist;
01394 
01395         /*
01396          * Take log records from the linked list stored in the transaction,
01397          * then from the log.
01398          */
01399         for (lr = STAILQ_FIRST(&txn->logs);
01400             lr != NULL; lr = STAILQ_NEXT(lr, links)) {
01401                 rdbt.data = lr->data;
01402                 rdbt.size = 0;
01403                 LSN_NOT_LOGGED(key_lsn);
01404                 ret =
01405                     __txn_dispatch_undo(dbenv, txn, &rdbt, &key_lsn, txnlist);
01406                 if (ret != 0) {
01407                         __db_err(dbenv,
01408                             "DB_TXN->abort: In-memory log undo failed: %s",
01409                             db_strerror(ret));
01410                         goto err;
01411                 }
01412         }
01413 
01414         key_lsn = ((TXN_DETAIL *)txn->td)->last_lsn;
01415 
01416         if (!IS_ZERO_LSN(key_lsn) &&
01417              (ret = __log_cursor(dbenv, &logc)) != 0)
01418                 goto err;
01419 
01420         while (!IS_ZERO_LSN(key_lsn)) {
01421                 /*
01422                  * The dispatch routine returns the lsn of the record
01423                  * before the current one in the key_lsn argument.
01424                  */
01425                 if ((ret = __log_c_get(logc, &key_lsn, &rdbt, DB_SET)) == 0) {
01426                         ret = __txn_dispatch_undo(dbenv,
01427                             txn, &rdbt, &key_lsn, txnlist);
01428                 }
01429 
01430                 if (ret != 0) {
01431                         __db_err(dbenv,
01432                     "DB_TXN->abort: Log undo failed for LSN: %lu %lu: %s",
01433                             (u_long)key_lsn.file, (u_long)key_lsn.offset,
01434                             db_strerror(ret));
01435                         goto err;
01436                 }
01437         }
01438 
01439 #ifndef HAVE_FTRUNCATE
01440         ret = __db_do_the_limbo(dbenv, ptxn, txn, txnlist, LIMBO_NORMAL);
01441 #endif
01442 
01443 err:    if (logc != NULL && (t_ret = __log_c_close(logc)) != 0 && ret == 0)
01444                 ret = t_ret;
01445 
01446         if (ptxn == NULL && txnlist != NULL)
01447                 __db_txnlist_end(dbenv, txnlist);
01448         return (ret);
01449 }
01450 
01451 /*
01452  * __txn_activekids --
01453  *      Return if this transaction has any active children.
01454  *
01455  * PUBLIC: int __txn_activekids __P((DB_ENV *, u_int32_t, DB_TXN *));
01456  */
01457 int
01458 __txn_activekids(dbenv, rectype, txn)
01459         DB_ENV *dbenv;
01460         u_int32_t rectype;
01461         DB_TXN *txn;
01462 {
01463         /*
01464          * On a child commit, we know that there are children (i.e., the
01465          * committing child at the least.  In that case, skip this check.
01466          */
01467         if (F_ISSET(txn, TXN_COMPENSATE) || rectype == DB___txn_child)
01468                 return (0);
01469 
01470         if (TAILQ_FIRST(&txn->kids) != NULL) {
01471                 __db_err(dbenv, "Child transaction is active");
01472                 return (EPERM);
01473         }
01474         return (0);
01475 }
01476 
01477 /*
01478  * __txn_force_abort --
01479  *      Force an abort record into the log if the commit record
01480  *      failed to get to disk.
01481  *
01482  * PUBLIC: int __txn_force_abort __P((DB_ENV *, u_int8_t *));
01483  */
01484 int
01485 __txn_force_abort(dbenv, buffer)
01486         DB_ENV *dbenv;
01487         u_int8_t *buffer;
01488 {
01489         DB_CIPHER *db_cipher;
01490         HDR *hdr;
01491         u_int32_t hdrlen, offset, opcode, sum_len;
01492         u_int8_t *bp, *key, chksum[DB_MAC_KEY];
01493         size_t hdrsize, rec_len;
01494         int ret;
01495 
01496         db_cipher = dbenv->crypto_handle;
01497 
01498         /*
01499          * This routine depends on the layout of HDR and the __txn_regop
01500          * __txn_xa_regop records in txn.src.  We are passed the beginning
01501          * of the commit record in the log buffer and overwrite the
01502          * commit with an abort and recalculate the checksum.
01503          */
01504         hdrsize = CRYPTO_ON(dbenv) ? HDR_CRYPTO_SZ : HDR_NORMAL_SZ;
01505 
01506         hdr = (HDR *)buffer;
01507         memcpy(&hdrlen, buffer + SSZ(HDR, len), sizeof(hdr->len));
01508         rec_len = hdrlen - hdrsize;
01509 
01510         offset = sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN);
01511         if (CRYPTO_ON(dbenv)) {
01512                 key = db_cipher->mac_key;
01513                 sum_len = DB_MAC_KEY;
01514                 if ((ret = db_cipher->decrypt(dbenv, db_cipher->data,
01515                     &hdr->iv[0], buffer + hdrsize, rec_len)) != 0)
01516                         return (__db_panic(dbenv, ret));
01517         } else {
01518                 key = NULL;
01519                 sum_len = sizeof(u_int32_t);
01520         }
01521         bp = buffer + hdrsize + offset;
01522         opcode = TXN_ABORT;
01523         memcpy(bp, &opcode, sizeof(opcode));
01524 
01525         if (CRYPTO_ON(dbenv) &&
01526             (ret = db_cipher->encrypt(dbenv,
01527             db_cipher->data, &hdr->iv[0], buffer + hdrsize, rec_len)) != 0)
01528                 return (__db_panic(dbenv, ret));
01529 
01530         __db_chksum(buffer + hdrsize, rec_len, key, chksum);
01531         memcpy(buffer + SSZA(HDR, chksum), chksum, sum_len);
01532 
01533         return (0);
01534 }
01535 
01536 /*
01537  * __txn_preclose
01538  *      Before we can close an environment, we need to check if we
01539  * were in the midst of taking care of restored transactions.  If
01540  * so, then we need to close the files that we opened.
01541  *
01542  * PUBLIC: int __txn_preclose __P((DB_ENV *));
01543  */
01544 int
01545 __txn_preclose(dbenv)
01546         DB_ENV *dbenv;
01547 {
01548         DB_TXNMGR *mgr;
01549         DB_TXNREGION *region;
01550         int do_closefiles, ret;
01551 
01552         mgr = dbenv->tx_handle;
01553         region = mgr->reginfo.primary;
01554         do_closefiles = 0;
01555 
01556         TXN_SYSTEM_LOCK(dbenv);
01557         if (region != NULL &&
01558             region->stat.st_nrestores <= mgr->n_discards &&
01559             mgr->n_discards != 0)
01560                 do_closefiles = 1;
01561         TXN_SYSTEM_UNLOCK(dbenv);
01562 
01563         if (do_closefiles) {
01564                 /*
01565                  * Set the DBLOG_RECOVER flag while closing these
01566                  * files so they do not create additional log records
01567                  * that will confuse future recoveries.
01568                  */
01569                 F_SET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
01570                 ret = __dbreg_close_files(dbenv);
01571                 F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
01572         } else
01573                 ret = 0;
01574 
01575         return (ret);
01576 }
01577 
01578 /*
01579  * __txn_reset --
01580  *      Reset the last txnid to its minimum value, and log the reset.
01581  *
01582  * PUBLIC: int __txn_reset __P((DB_ENV *));
01583  */
01584 int
01585 __txn_reset(dbenv)
01586         DB_ENV *dbenv;
01587 {
01588         DB_LSN scrap;
01589         DB_TXNREGION *region;
01590 
01591         region = ((DB_TXNMGR *)dbenv->tx_handle)->reginfo.primary;
01592         region->last_txnid = TXN_MINIMUM;
01593 
01594         DB_ASSERT(LOGGING_ON(dbenv));
01595         return (__txn_recycle_log(dbenv,
01596             NULL, &scrap, 0, TXN_MINIMUM, TXN_MAXIMUM));
01597 }
01598 
01599 /*
01600  * txn_set_txn_lsnp --
01601  *      Set the pointer to the begin_lsn field if that field is zero.
01602  *      Set the pointer to the last_lsn field.
01603  */
01604 static void
01605 __txn_set_txn_lsnp(txn, blsnp, llsnp)
01606         DB_TXN *txn;
01607         DB_LSN **blsnp;
01608         DB_LSN **llsnp;
01609 {
01610         DB_LSN *lsnp;
01611         TXN_DETAIL *td;
01612 
01613         td = txn->td;
01614         *llsnp = &td->last_lsn;
01615         while (td->parent != INVALID_ROFF)
01616                 td = R_ADDR(&txn->mgrp->reginfo, td->parent);
01617 
01618         lsnp = &td->begin_lsn;
01619         if (IS_ZERO_LSN(*lsnp))
01620                 *blsnp = lsnp;
01621 }

Generated on Sun Dec 25 12:14:56 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2