Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

db.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1990, 1993, 1994, 1995, 1996
00009  *      Keith Bostic.  All rights reserved.
00010  */
00011 /*
00012  * Copyright (c) 1990, 1993, 1994, 1995
00013  *      The Regents of the University of California.  All rights reserved.
00014  *
00015  * Redistribution and use in source and binary forms, with or without
00016  * modification, are permitted provided that the following conditions
00017  * are met:
00018  * 1. Redistributions of source code must retain the above copyright
00019  *    notice, this list of conditions and the following disclaimer.
00020  * 2. Redistributions in binary form must reproduce the above copyright
00021  *    notice, this list of conditions and the following disclaimer in the
00022  *    documentation and/or other materials provided with the distribution.
00023  * 3. Neither the name of the University nor the names of its contributors
00024  *    may be used to endorse or promote products derived from this software
00025  *    without specific prior written permission.
00026  *
00027  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00028  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00029  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00030  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00031  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00032  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00033  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00034  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00035  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00036  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00037  * SUCH DAMAGE.
00038  *
00039  * $Id: db.c,v 12.22 2005/11/12 17:41:44 bostic Exp $
00040  */
00041 
00042 #include "db_config.h"
00043 
00044 #ifndef NO_SYSTEM_INCLUDES
00045 #include <sys/types.h>
00046 
00047 #include <string.h>
00048 #endif
00049 
00050 #include "db_int.h"
00051 #include "dbinc/db_page.h"
00052 #include "dbinc/db_shash.h"
00053 #include "dbinc/db_swap.h"
00054 #include "dbinc/btree.h"
00055 #include "dbinc/fop.h"
00056 #include "dbinc/hash.h"
00057 #include "dbinc/lock.h"
00058 #include "dbinc/log.h"
00059 #include "dbinc/mp.h"
00060 #include "dbinc/qam.h"
00061 #include "dbinc/txn.h"
00062 
00063 static int __db_disassociate __P((DB *));
00064 
00065 #ifdef CONFIG_TEST
00066 static void __db_makecopy __P((DB_ENV *, const char *, const char *));
00067 static int  __db_testdocopy __P((DB_ENV *, const char *));
00068 static int  __qam_testdocopy __P((DB *, const char *));
00069 #endif
00070 
00071 /*
00072  * DB.C --
00073  *      This file contains the utility functions for the DBP layer.
00074  */
00075 
00076 /*
00077  * __db_master_open --
00078  *      Open up a handle on a master database.
00079  *
00080  * PUBLIC: int __db_master_open __P((DB *,
00081  * PUBLIC:     DB_TXN *, const char *, u_int32_t, int, DB **));
00082  */
00083 int
00084 __db_master_open(subdbp, txn, name, flags, mode, dbpp)
00085         DB *subdbp;
00086         DB_TXN *txn;
00087         const char *name;
00088         u_int32_t flags;
00089         int mode;
00090         DB **dbpp;
00091 {
00092         DB *dbp;
00093         int ret;
00094 
00095         *dbpp = NULL;
00096 
00097         /* Open up a handle on the main database. */
00098         if ((ret = db_create(&dbp, subdbp->dbenv, 0)) != 0)
00099                 return (ret);
00100 
00101         /*
00102          * It's always a btree.
00103          * Run in the transaction we've created.
00104          * Set the pagesize in case we're creating a new database.
00105          * Flag that we're creating a database with subdatabases.
00106          */
00107         dbp->pgsize = subdbp->pgsize;
00108         F_SET(dbp, DB_AM_SUBDB);
00109         F_SET(dbp, F_ISSET(subdbp,
00110             DB_AM_RECOVER | DB_AM_SWAP |
00111             DB_AM_ENCRYPT | DB_AM_CHKSUM | DB_AM_NOT_DURABLE));
00112 
00113         /*
00114          * If there was a subdb specified, then we only want to apply
00115          * DB_EXCL to the subdb, not the actual file.  We only got here
00116          * because there was a subdb specified.
00117          */
00118         LF_CLR(DB_EXCL);
00119         LF_SET(DB_RDWRMASTER);
00120         if ((ret = __db_open(dbp,
00121             txn, name, NULL, DB_BTREE, flags, mode, PGNO_BASE_MD)) != 0)
00122                 goto err;
00123 
00124         /*
00125          * Verify that pagesize is the same on both.  The items in dbp were now
00126          * initialized from the meta page.  The items in dbp were set in
00127          * __db_dbopen when we either read or created the master file.  Other
00128          * items such as checksum and encryption are checked when we read the
00129          * meta-page.  So we do not check those here.  However, if the
00130          * meta-page caused checksumming to be turned on and it wasn't already,
00131          * set it here.
00132          */
00133         if (F_ISSET(dbp, DB_AM_CHKSUM))
00134                 F_SET(subdbp, DB_AM_CHKSUM);
00135         if (subdbp->pgsize != 0 && dbp->pgsize != subdbp->pgsize) {
00136                 ret = EINVAL;
00137                 __db_err(dbp->dbenv,
00138                     "Different pagesize specified on existent file");
00139                 goto err;
00140         }
00141 err:
00142         if (ret != 0 && !F_ISSET(dbp, DB_AM_DISCARD))
00143                 (void)__db_close(dbp, txn, 0);
00144         else
00145                 *dbpp = dbp;
00146         return (ret);
00147 }
00148 
00149 /*
00150  * __db_master_update --
00151  *      Add/Open/Remove a subdatabase from a master database.
00152  *
00153  * PUBLIC: int __db_master_update __P((DB *, DB *, DB_TXN *, const char *,
00154  * PUBLIC:     DBTYPE, mu_action, const char *, u_int32_t));
00155  */
00156 int
00157 __db_master_update(mdbp, sdbp, txn, subdb, type, action, newname, flags)
00158         DB *mdbp, *sdbp;
00159         DB_TXN *txn;
00160         const char *subdb;
00161         DBTYPE type;
00162         mu_action action;
00163         const char *newname;
00164         u_int32_t flags;
00165 {
00166         DB_ENV *dbenv;
00167         DBC *dbc, *ndbc;
00168         DBT key, data, ndata;
00169         PAGE *p, *r;
00170         db_pgno_t t_pgno;
00171         int modify, ret, t_ret;
00172 
00173         dbenv = mdbp->dbenv;
00174         dbc = ndbc = NULL;
00175         p = NULL;
00176 
00177         memset(&key, 0, sizeof(key));
00178         memset(&data, 0, sizeof(data));
00179 
00180         /* Might we modify the master database?  If so, we'll need to lock. */
00181         modify = (action != MU_OPEN || LF_ISSET(DB_CREATE)) ? 1 : 0;
00182 
00183         /*
00184          * Open up a cursor.  If this is CDB and we're creating the database,
00185          * make it an update cursor.
00186          */
00187         if ((ret = __db_cursor(mdbp, txn, &dbc,
00188             (CDB_LOCKING(dbenv) && modify) ? DB_WRITECURSOR : 0)) != 0)
00189                 goto err;
00190 
00191         /*
00192          * Point the cursor at the record.
00193          *
00194          * If we're removing or potentially creating an entry, lock the page
00195          * with DB_RMW.
00196          *
00197          * We do multiple cursor operations with the cursor in some cases and
00198          * subsequently access the data DBT information.  Set DB_DBT_MALLOC so
00199          * we don't risk modification of the data between our uses of it.
00200          *
00201          * !!!
00202          * We don't include the name's nul termination in the database.
00203          */
00204         key.data = (void *)subdb;
00205         key.size = (u_int32_t)strlen(subdb);
00206         F_SET(&data, DB_DBT_MALLOC);
00207 
00208         ret = __db_c_get(dbc, &key, &data,
00209             DB_SET | ((STD_LOCKING(dbc) && modify) ? DB_RMW : 0));
00210 
00211         /*
00212          * What we do next--whether or not we found a record for the
00213          * specified subdatabase--depends on what the specified action is.
00214          * Handle ret appropriately as the first statement of each case.
00215          */
00216         switch (action) {
00217         case MU_REMOVE:
00218                 /*
00219                  * We should have found something if we're removing it.  Note
00220                  * that in the common case where the DB we're asking to remove
00221                  * doesn't exist, we won't get this far;  __db_subdb_remove
00222                  * will already have returned an error from __db_open.
00223                  */
00224                 if (ret != 0)
00225                         goto err;
00226 
00227                 /*
00228                  * Delete the subdatabase entry first;  if this fails,
00229                  * we don't want to touch the actual subdb pages.
00230                  */
00231                 if ((ret = __db_c_del(dbc, 0)) != 0)
00232                         goto err;
00233 
00234                 /*
00235                  * We're handling actual data, not on-page meta-data,
00236                  * so it hasn't been converted to/from opposite
00237                  * endian architectures.  Do it explicitly, now.
00238                  */
00239                 memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t));
00240                 DB_NTOHL(&sdbp->meta_pgno);
00241                 if ((ret =
00242                     __memp_fget(mdbp->mpf, &sdbp->meta_pgno, 0, &p)) != 0)
00243                         goto err;
00244 
00245                 /* Free the root on the master db if it was created. */
00246                 if (TYPE(p) == P_BTREEMETA &&
00247                     ((BTMETA *)p)->root != PGNO_INVALID) {
00248                         if ((ret = __memp_fget(mdbp->mpf,
00249                              &((BTMETA *)p)->root, 0, &r)) != 0)
00250                                 goto err;
00251 
00252                         /* Free and put the page. */
00253                         if ((ret = __db_free(dbc, r)) != 0) {
00254                                 r = NULL;
00255                                 goto err;
00256                         }
00257                 }
00258                 /* Free and put the page. */
00259                 if ((ret = __db_free(dbc, p)) != 0) {
00260                         p = NULL;
00261                         goto err;
00262                 }
00263                 p = NULL;
00264                 break;
00265         case MU_RENAME:
00266                 /* We should have found something if we're renaming it. */
00267                 if (ret != 0)
00268                         goto err;
00269 
00270                 /*
00271                  * Before we rename, we need to make sure we're not
00272                  * overwriting another subdatabase, or else this operation
00273                  * won't be undoable.  Open a second cursor and check
00274                  * for the existence of newname;  it shouldn't appear under
00275                  * us since we hold the metadata lock.
00276                  */
00277                 if ((ret = __db_cursor(mdbp, txn, &ndbc, 0)) != 0)
00278                         goto err;
00279                 key.data = (void *)newname;
00280                 key.size = (u_int32_t)strlen(newname);
00281 
00282                 /*
00283                  * We don't actually care what the meta page of the potentially-
00284                  * overwritten DB is;  we just care about existence.
00285                  */
00286                 memset(&ndata, 0, sizeof(ndata));
00287                 F_SET(&ndata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
00288 
00289                 if ((ret = __db_c_get(ndbc, &key, &ndata, DB_SET)) == 0) {
00290                         /* A subdb called newname exists.  Bail. */
00291                         ret = EEXIST;
00292                         __db_err(dbenv, "rename: database %s exists", newname);
00293                         goto err;
00294                 } else if (ret != DB_NOTFOUND)
00295                         goto err;
00296 
00297                 /*
00298                  * Now do the put first;  we don't want to lose our
00299                  * sole reference to the subdb.  Use the second cursor
00300                  * so that the first one continues to point to the old record.
00301                  */
00302                 if ((ret = __db_c_put(ndbc, &key, &data, DB_KEYFIRST)) != 0)
00303                         goto err;
00304                 if ((ret = __db_c_del(dbc, 0)) != 0) {
00305                         /*
00306                          * If the delete fails, try to delete the record
00307                          * we just put, in case we're not txn-protected.
00308                          */
00309                         (void)__db_c_del(ndbc, 0);
00310                         goto err;
00311                 }
00312 
00313                 break;
00314         case MU_OPEN:
00315                 /*
00316                  * Get the subdatabase information.  If it already exists,
00317                  * copy out the page number and we're done.
00318                  */
00319                 switch (ret) {
00320                 case 0:
00321                         if (LF_ISSET(DB_CREATE) && LF_ISSET(DB_EXCL)) {
00322                                 ret = EEXIST;
00323                                 goto err;
00324                         }
00325                         memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t));
00326                         DB_NTOHL(&sdbp->meta_pgno);
00327                         goto done;
00328                 case DB_NOTFOUND:
00329                         if (LF_ISSET(DB_CREATE))
00330                                 break;
00331                         /*
00332                          * No db_err, it is reasonable to remove a
00333                          * nonexistent db.
00334                          */
00335                         ret = ENOENT;
00336                         goto err;
00337                 default:
00338                         goto err;
00339                 }
00340 
00341                 /* Create a subdatabase. */
00342                 if ((ret = __db_new(dbc,
00343                     type == DB_HASH ? P_HASHMETA : P_BTREEMETA, &p)) != 0)
00344                         goto err;
00345                 sdbp->meta_pgno = PGNO(p);
00346 
00347                 /*
00348                  * XXX
00349                  * We're handling actual data, not on-page meta-data, so it
00350                  * hasn't been converted to/from opposite endian architectures.
00351                  * Do it explicitly, now.
00352                  */
00353                 t_pgno = PGNO(p);
00354                 DB_HTONL(&t_pgno);
00355                 memset(&ndata, 0, sizeof(ndata));
00356                 ndata.data = &t_pgno;
00357                 ndata.size = sizeof(db_pgno_t);
00358                 if ((ret = __db_c_put(dbc, &key, &ndata, DB_KEYLAST)) != 0)
00359                         goto err;
00360                 F_SET(sdbp, DB_AM_CREATED);
00361                 break;
00362         }
00363 
00364 err:
00365 done:   /*
00366          * If we allocated a page: if we're successful, mark the page dirty
00367          * and return it to the cache, otherwise, discard/free it.
00368          */
00369         if (p != NULL) {
00370                 if (ret == 0) {
00371                         if ((t_ret =
00372                             __memp_fput(mdbp->mpf, p, DB_MPOOL_DIRTY)) != 0)
00373                                 ret = t_ret;
00374                 } else
00375                         (void)__memp_fput(mdbp->mpf, p, 0);
00376         }
00377 
00378         /* Discard the cursor(s) and data. */
00379         if (data.data != NULL)
00380                 __os_ufree(dbenv, data.data);
00381         if (dbc != NULL && (t_ret = __db_c_close(dbc)) != 0 && ret == 0)
00382                 ret = t_ret;
00383         if (ndbc != NULL && (t_ret = __db_c_close(ndbc)) != 0 && ret == 0)
00384                 ret = t_ret;
00385 
00386         return (ret);
00387 }
00388 
00389 /*
00390  * __db_dbenv_setup --
00391  *      Set up the underlying environment during a db_open.
00392  *
00393  * PUBLIC: int __db_dbenv_setup __P((DB *,
00394  * PUBLIC:     DB_TXN *, const char *, const char *, u_int32_t, u_int32_t));
00395  */
00396 int
00397 __db_dbenv_setup(dbp, txn, fname, dname, id, flags)
00398         DB *dbp;
00399         DB_TXN *txn;
00400         const char *fname, *dname;
00401         u_int32_t id, flags;
00402 {
00403         DB *ldbp;
00404         DB_ENV *dbenv;
00405         u_int32_t maxid;
00406         int ret;
00407 
00408         dbenv = dbp->dbenv;
00409 
00410         /* If we don't yet have an environment, it's time to create it. */
00411         if (!F_ISSET(dbenv, DB_ENV_OPEN_CALLED)) {
00412                 /* Make sure we have at least DB_MINCACHE pages in our cache. */
00413                 if (dbenv->mp_gbytes == 0 &&
00414                     dbenv->mp_bytes < dbp->pgsize * DB_MINPAGECACHE &&
00415                     (ret = __memp_set_cachesize(
00416                     dbenv, 0, dbp->pgsize * DB_MINPAGECACHE, 0)) != 0)
00417                         return (ret);
00418 
00419                 if ((ret = __env_open(dbenv, NULL, DB_CREATE |
00420                     DB_INIT_MPOOL | DB_PRIVATE | LF_ISSET(DB_THREAD), 0)) != 0)
00421                         return (ret);
00422         }
00423 
00424         /* Join the underlying cache. */
00425         if ((!F_ISSET(dbp, DB_AM_INMEM) || dname == NULL) &&
00426             (ret = __db_dbenv_mpool(dbp, fname, flags)) != 0)
00427                 return (ret);
00428 
00429         /* We may need a per-thread mutex. */
00430         if (LF_ISSET(DB_THREAD) && (ret = __mutex_alloc(
00431             dbenv, MTX_DB_HANDLE, DB_MUTEX_THREAD, &dbp->mutex)) != 0)
00432                 return (ret);
00433 
00434         /*
00435          * Set up a bookkeeping entry for this database in the log region,
00436          * if such a region exists.  Note that even if we're in recovery
00437          * or a replication client, where we won't log registries, we'll
00438          * still need an FNAME struct, so LOGGING_ON is the correct macro.
00439          */
00440         if (LOGGING_ON(dbenv) && dbp->log_filename == NULL &&
00441             (ret = __dbreg_setup(dbp,
00442             F_ISSET(dbp, DB_AM_INMEM) ? dname : fname, id)) != 0)
00443                 return (ret);
00444 
00445         /*
00446          * If we're actively logging and our caller isn't a recovery function
00447          * that already did so, then assign this dbp a log fileid.
00448          */
00449         if (DBENV_LOGGING(dbenv) && !F_ISSET(dbp, DB_AM_RECOVER) &&
00450 #if !defined(DEBUG_ROP)
00451             !F_ISSET(dbp, DB_AM_RDONLY) &&
00452 #endif
00453             (ret = __dbreg_new_id(dbp, txn)) != 0)
00454                 return (ret);
00455 
00456         /*
00457          * Insert ourselves into the DB_ENV's dblist.  We allocate a
00458          * unique ID to each {fileid, meta page number} pair, and to
00459          * each temporary file (since they all have a zero fileid).
00460          * This ID gives us something to use to tell which DB handles
00461          * go with which databases in all the cursor adjustment
00462          * routines, where we don't want to do a lot of ugly and
00463          * expensive memcmps.
00464          */
00465         MUTEX_LOCK(dbenv, dbenv->mtx_dblist);
00466         for (maxid = 0, ldbp = LIST_FIRST(&dbenv->dblist);
00467             ldbp != NULL; ldbp = LIST_NEXT(ldbp, dblistlinks)) {
00468                 if (!F_ISSET(dbp, DB_AM_INMEM)) {
00469                         if (memcmp(ldbp->fileid, dbp->fileid, DB_FILE_ID_LEN)
00470                             == 0 && ldbp->meta_pgno == dbp->meta_pgno)
00471                                 break;
00472                 } else if (dname != NULL) {
00473                         if (F_ISSET(ldbp, DB_AM_INMEM) &&
00474                             strcmp(ldbp->dname, dname) == 0)
00475                                 break;
00476                 }
00477                 if (ldbp->adj_fileid > maxid)
00478                         maxid = ldbp->adj_fileid;
00479         }
00480 
00481         /*
00482          * If ldbp is NULL, we didn't find a match, or we weren't
00483          * really looking because fname is NULL.  Assign the dbp an
00484          * adj_fileid one higher than the largest we found, and
00485          * insert it at the head of the master dbp list.
00486          *
00487          * If ldbp is not NULL, it is a match for our dbp.  Give dbp
00488          * the same ID that ldbp has, and add it after ldbp so they're
00489          * together in the list.
00490          */
00491         if (ldbp == NULL) {
00492                 dbp->adj_fileid = maxid + 1;
00493                 LIST_INSERT_HEAD(&dbenv->dblist, dbp, dblistlinks);
00494         } else {
00495                 dbp->adj_fileid = ldbp->adj_fileid;
00496                 LIST_INSERT_AFTER(ldbp, dbp, dblistlinks);
00497         }
00498         MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist);
00499 
00500         return (0);
00501 }
00502 
00503 /*
00504  * __db_dbenv_mpool --
00505  *      Set up the underlying environment cache during a db_open.
00506  *
00507  * PUBLIC: int __db_dbenv_mpool __P((DB *, const char *, u_int32_t));
00508  */
00509 int
00510 __db_dbenv_mpool(dbp, fname, flags)
00511         DB *dbp;
00512         const char *fname;
00513         u_int32_t flags;
00514 {
00515         DB_ENV *dbenv;
00516         DBT pgcookie;
00517         DB_MPOOLFILE *mpf;
00518         DB_PGINFO pginfo;
00519         int fidset, ftype, ret;
00520         int32_t lsn_off;
00521         u_int8_t nullfid[DB_FILE_ID_LEN];
00522         u_int32_t clear_len;
00523 
00524         COMPQUIET(mpf, NULL);
00525 
00526         dbenv = dbp->dbenv;
00527         lsn_off = 0;
00528 
00529         /* It's possible that this database is already open. */
00530         if (F_ISSET(dbp, DB_AM_OPEN_CALLED))
00531                 return (0);
00532 
00533         /*
00534          * If we need to pre- or post-process a file's pages on I/O, set the
00535          * file type.  If it's a hash file, always call the pgin and pgout
00536          * routines.  This means that hash files can never be mapped into
00537          * process memory.  If it's a btree file and requires swapping, we
00538          * need to page the file in and out.  This has to be right -- we can't
00539          * mmap files that are being paged in and out.
00540          */
00541         switch (dbp->type) {
00542         case DB_BTREE:
00543         case DB_RECNO:
00544                 ftype = F_ISSET(dbp, DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM)
00545                     ? DB_FTYPE_SET : DB_FTYPE_NOTSET;
00546                 clear_len = CRYPTO_ON(dbenv) ?
00547                     (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) :
00548                     DB_PAGE_DB_LEN;
00549                 break;
00550         case DB_HASH:
00551                 ftype = DB_FTYPE_SET;
00552                 clear_len = CRYPTO_ON(dbenv) ?
00553                     (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) :
00554                     DB_PAGE_DB_LEN;
00555                 break;
00556         case DB_QUEUE:
00557                 ftype = F_ISSET(dbp,
00558                     DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM) ?
00559                     DB_FTYPE_SET : DB_FTYPE_NOTSET;
00560 
00561                 /*
00562                  * If we came in here without a pagesize set, then we need
00563                  * to mark the in-memory handle as having clear_len not
00564                  * set, because we don't really know the clear length or
00565                  * the page size yet (since the file doesn't yet exist).
00566                  */
00567                 clear_len = dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET;
00568                 break;
00569         case DB_UNKNOWN:
00570                 /*
00571                  * If we're running in the verifier, our database might
00572                  * be corrupt and we might not know its type--but we may
00573                  * still want to be able to verify and salvage.
00574                  *
00575                  * If we can't identify the type, it's not going to be safe
00576                  * to call __db_pgin--we pretty much have to give up all
00577                  * hope of salvaging cross-endianness.  Proceed anyway;
00578                  * at worst, the database will just appear more corrupt
00579                  * than it actually is, but at best, we may be able
00580                  * to salvage some data even with no metadata page.
00581                  */
00582                 if (F_ISSET(dbp, DB_AM_VERIFYING)) {
00583                         ftype = DB_FTYPE_NOTSET;
00584                         clear_len = DB_PAGE_DB_LEN;
00585                         break;
00586                 }
00587 
00588                 /*
00589                  * This might be an in-memory file and we won't know its
00590                  * file type until after we open it and read the meta-data
00591                  * page.
00592                  */
00593                 if (F_ISSET(dbp, DB_AM_INMEM)) {
00594                         clear_len = DB_CLEARLEN_NOTSET;
00595                         ftype = DB_FTYPE_NOTSET;
00596                         lsn_off = DB_LSN_OFF_NOTSET;
00597                         break;
00598                 }
00599                 /* FALLTHROUGH */
00600         default:
00601                 return (__db_unknown_type(dbenv, "DB->open", dbp->type));
00602         }
00603 
00604         mpf = dbp->mpf;
00605 
00606         memset(nullfid, 0, DB_FILE_ID_LEN);
00607         fidset = memcmp(nullfid, dbp->fileid, DB_FILE_ID_LEN);
00608         if (fidset)
00609                 (void)__memp_set_fileid(mpf, dbp->fileid);
00610 
00611         (void)__memp_set_clear_len(mpf, clear_len);
00612         (void)__memp_set_ftype(mpf, ftype);
00613         (void)__memp_set_lsn_offset(mpf, lsn_off);
00614 
00615         pginfo.db_pagesize = dbp->pgsize;
00616         pginfo.flags =
00617             F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP));
00618         pginfo.type = dbp->type;
00619         pgcookie.data = &pginfo;
00620         pgcookie.size = sizeof(DB_PGINFO);
00621         (void)__memp_set_pgcookie(mpf, &pgcookie);
00622 
00623         if ((ret = __memp_fopen(mpf, NULL, fname,
00624             LF_ISSET(DB_CREATE | DB_DURABLE_UNKNOWN |
00625                 DB_NOMMAP | DB_ODDFILESIZE | DB_RDONLY | DB_TRUNCATE) |
00626             (F_ISSET(dbenv, DB_ENV_DIRECT_DB) ? DB_DIRECT : 0) |
00627             (F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_TXN_NOT_DURABLE : 0),
00628             0, dbp->pgsize)) != 0) {
00629                 /*
00630                  * The open didn't work; we need to reset the mpf,
00631                  * retaining the in-memory semantics (if any).
00632                  */
00633                 (void)__memp_fclose(dbp->mpf, 0);
00634                 (void)__memp_fcreate(dbenv, &dbp->mpf);
00635                 if (F_ISSET(dbp, DB_AM_INMEM))
00636                         MAKE_INMEM(dbp);
00637                 return (ret);
00638         }
00639 
00640         /*
00641          * Set the open flag.  We use it to mean that the dbp has gone
00642          * through mpf setup, including dbreg_register.  Also, below,
00643          * the underlying access method open functions may want to do
00644          * things like acquire cursors, so the open flag has to be set
00645          * before calling them.
00646          */
00647         F_SET(dbp, DB_AM_OPEN_CALLED);
00648         if (!fidset && fname != NULL) {
00649                 (void)__memp_get_fileid(dbp->mpf, dbp->fileid);
00650                 dbp->preserve_fid = 1;
00651         }
00652 
00653         return (0);
00654 }
00655 
00656 /*
00657  * __db_close --
00658  *      DB->close method.
00659  *
00660  * PUBLIC: int __db_close __P((DB *, DB_TXN *, u_int32_t));
00661  */
00662 int
00663 __db_close(dbp, txn, flags)
00664         DB *dbp;
00665         DB_TXN *txn;
00666         u_int32_t flags;
00667 {
00668         DB_ENV *dbenv;
00669         int db_ref, deferred_close, ret, t_ret;
00670 
00671         dbenv = dbp->dbenv;
00672         deferred_close = ret = 0;
00673 
00674         /*
00675          * Validate arguments, but as a DB handle destructor, we can't fail.
00676          *
00677          * Check for consistent transaction usage -- ignore errors.  Only
00678          * internal callers specify transactions, so it's a serious problem
00679          * if we get error messages.
00680          */
00681         if (txn != NULL)
00682                 (void)__db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0);
00683 
00684         /* Refresh the structure and close any underlying resources. */
00685         ret = __db_refresh(dbp, txn, flags, &deferred_close, 0);
00686 
00687         /*
00688          * If we've deferred the close because the logging of the close failed,
00689          * return our failure right away without destroying the handle.
00690          */
00691         if (deferred_close)
00692                 return (ret);
00693 
00694         /* !!!
00695          * This code has an apparent race between the moment we read and
00696          * decrement dbenv->db_ref and the moment we check whether it's 0.
00697          * However, if the environment is DBLOCAL, the user shouldn't have a
00698          * reference to the dbenv handle anyway;  the only way we can get
00699          * multiple dbps sharing a local dbenv is if we open them internally
00700          * during something like a subdatabase open.  If any such thing is
00701          * going on while the user is closing the original dbp with a local
00702          * dbenv, someone's already badly screwed up, so there's no reason
00703          * to bother engineering around this possibility.
00704          */
00705         MUTEX_LOCK(dbenv, dbenv->mtx_dblist);
00706         db_ref = --dbenv->db_ref;
00707         MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist);
00708         if (F_ISSET(dbenv, DB_ENV_DBLOCAL) && db_ref == 0 &&
00709             (t_ret = __env_close(dbenv, 0)) != 0 && ret == 0)
00710                 ret = t_ret;
00711 
00712         /* Free the database handle. */
00713         memset(dbp, CLEAR_BYTE, sizeof(*dbp));
00714         __os_free(dbenv, dbp);
00715 
00716         return (ret);
00717 }
00718 
00719 /*
00720  * __db_refresh --
00721  *      Refresh the DB structure, releasing any allocated resources.
00722  * This does most of the work of closing files now because refresh
00723  * is what is used during abort processing (since we can't destroy
00724  * the actual handle) and during abort processing, we may have a
00725  * fully opened handle.
00726  *
00727  * PUBLIC: int __db_refresh __P((DB *, DB_TXN *, u_int32_t, int *, int));
00728  */
00729 int
00730 __db_refresh(dbp, txn, flags, deferred_closep, reuse)
00731         DB *dbp;
00732         DB_TXN *txn;
00733         u_int32_t flags;
00734         int *deferred_closep, reuse;
00735 {
00736         DB *sdbp;
00737         DBC *dbc;
00738         DB_ENV *dbenv;
00739         DB_LOCKREQ lreq;
00740         REGENV *renv;
00741         REGINFO *infop;
00742         u_int32_t save_flags;
00743         int resync, ret, t_ret;
00744 
00745         ret = 0;
00746 
00747         dbenv = dbp->dbenv;
00748         infop = dbenv->reginfo;
00749         if (infop != NULL)
00750                 renv = infop->primary;
00751         else
00752                 renv = NULL;
00753 
00754         /* If never opened, or not currently open, it's easy. */
00755         if (!F_ISSET(dbp, DB_AM_OPEN_CALLED))
00756                 goto never_opened;
00757 
00758         /*
00759          * If we have any secondary indices, disassociate them from us.
00760          * We don't bother with the mutex here;  it only protects some
00761          * of the ops that will make us core-dump mid-close anyway, and
00762          * if you're trying to do something with a secondary *while* you're
00763          * closing the primary, you deserve what you get.  The disassociation
00764          * is mostly done just so we can close primaries and secondaries in
00765          * any order--but within one thread of control.
00766          */
00767         for (sdbp = LIST_FIRST(&dbp->s_secondaries);
00768             sdbp != NULL; sdbp = LIST_NEXT(sdbp, s_links)) {
00769                 LIST_REMOVE(sdbp, s_links);
00770                 if ((t_ret = __db_disassociate(sdbp)) != 0 && ret == 0)
00771                         ret = t_ret;
00772         }
00773 
00774         /*
00775          * Sync the underlying access method.  Do before closing the cursors
00776          * because DB->sync allocates cursors in order to write Recno backing
00777          * source text files.
00778          *
00779          * Sync is slow on some systems, notably Solaris filesystems where the
00780          * entire buffer cache is searched.  If we're in recovery, don't flush
00781          * the file, it's not necessary.
00782          */
00783         if (!LF_ISSET(DB_NOSYNC) &&
00784             !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) &&
00785             (t_ret = __db_sync(dbp)) != 0 && ret == 0)
00786                 ret = t_ret;
00787 
00788         /*
00789          * Go through the active cursors and call the cursor recycle routine,
00790          * which resolves pending operations and moves the cursors onto the
00791          * free list.  Then, walk the free list and call the cursor destroy
00792          * routine.  Note that any failure on a close is considered "really
00793          * bad" and we just break out of the loop and force forward.
00794          */
00795         resync = TAILQ_FIRST(&dbp->active_queue) == NULL ? 0 : 1;
00796         while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
00797                 if ((t_ret = __db_c_close(dbc)) != 0) {
00798                         if (ret == 0)
00799                                 ret = t_ret;
00800                         break;
00801                 }
00802 
00803         while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
00804                 if ((t_ret = __db_c_destroy(dbc)) != 0) {
00805                         if (ret == 0)
00806                                 ret = t_ret;
00807                         break;
00808                 }
00809 
00810         /*
00811          * Close any outstanding join cursors.  Join cursors destroy themselves
00812          * on close and have no separate destroy routine.  We don't have to set
00813          * the resync flag here, because join cursors aren't write cursors.
00814          */
00815         while ((dbc = TAILQ_FIRST(&dbp->join_queue)) != NULL)
00816                 if ((t_ret = __db_join_close(dbc)) != 0) {
00817                         if (ret == 0)
00818                                 ret = t_ret;
00819                         break;
00820                 }
00821 
00822         /*
00823          * Sync the memory pool, even though we've already called DB->sync,
00824          * because closing cursors can dirty pages by deleting items they
00825          * referenced.
00826          *
00827          * Sync is slow on some systems, notably Solaris filesystems where the
00828          * entire buffer cache is searched.  If we're in recovery, don't flush
00829          * the file, it's not necessary.
00830          */
00831         if (resync && !LF_ISSET(DB_NOSYNC) &&
00832             !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) &&
00833             (t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0)
00834                 ret = t_ret;
00835 
00836 never_opened:
00837         /*
00838          * At this point, we haven't done anything to render the DB
00839          * handle unusable, at least by a transaction abort.  Take the
00840          * opportunity now to log the file close.  If this log fails
00841          * and we're in a transaction, we have to bail out of the attempted
00842          * close; we'll need a dbp in order to successfully abort the
00843          * transaction, and we can't conjure a new one up because we haven't
00844          * gotten out the dbreg_register record that represents the close.
00845          * In this case, we put off actually closing the dbp until we've
00846          * performed the abort.
00847          */
00848         if (!reuse && LOGGING_ON(dbp->dbenv)) {
00849                 /*
00850                  * Discard the log file id, if any.  We want to log the close
00851                  * if and only if this is not a recovery dbp or a client dbp,
00852                  * or a dead dbp handle.
00853                  */
00854                 DB_ASSERT(renv != NULL);
00855                 if (F_ISSET(dbp, DB_AM_RECOVER) || IS_REP_CLIENT(dbenv) ||
00856                     dbp->timestamp != renv->rep_timestamp)
00857                         t_ret = __dbreg_revoke_id(dbp, 0, DB_LOGFILEID_INVALID);
00858                 else {
00859                         if ((t_ret = __dbreg_close_id(dbp,
00860                             txn, DBREG_CLOSE)) != 0 && txn != NULL) {
00861                                 /*
00862                                  * We're in a txn and the attempt to log the
00863                                  * close failed;  let the txn subsystem know
00864                                  * that we need to destroy this dbp once we're
00865                                  * done with the abort, then bail from the
00866                                  * close.
00867                                  *
00868                                  * Note that if the attempt to put off the
00869                                  * close -also- fails--which it won't unless
00870                                  * we're out of heap memory--we're really
00871                                  * screwed.  Panic.
00872                                  */
00873                                 if ((ret =
00874                                     __txn_closeevent(dbenv, txn, dbp)) != 0)
00875                                         return (__db_panic(dbenv, ret));
00876                                 if (deferred_closep != NULL)
00877                                         *deferred_closep = 1;
00878                                 return (t_ret);
00879                         }
00880                         /*
00881                          * If dbreg_close_id failed and we were not in a
00882                          * transaction, then we need to finish this close
00883                          * because the caller can't do anything with the
00884                          * handle after we return an error.  We rely on
00885                          * dbreg_close_id to mark the entry in some manner
00886                          * so that we do not do a clean shutdown of this
00887                          * environment.  If shutdown isn't clean, then the
00888                          * application *must* run recovery and that will
00889                          * generate the RCLOSE record.
00890                          */
00891                 }
00892 
00893                 if (ret == 0)
00894                         ret = t_ret;
00895 
00896                 /* Discard the log FNAME. */
00897                 if ((t_ret = __dbreg_teardown(dbp)) != 0 && ret == 0)
00898                         ret = t_ret;
00899         }
00900 
00901         /* Close any handle we've been holding since the open.  */
00902         if (dbp->saved_open_fhp != NULL &&
00903             (t_ret = __os_closehandle(dbenv, dbp->saved_open_fhp)) != 0 &&
00904             ret == 0)
00905                 ret = t_ret;
00906 
00907         /*
00908          * Remove this DB handle from the DB_ENV's dblist, if it's been added.
00909          *
00910          * Close our reference to the underlying cache while locked, we don't
00911          * want to race with a thread searching for our underlying cache link
00912          * while opening a DB handle.
00913          */
00914         MUTEX_LOCK(dbenv, dbenv->mtx_dblist);
00915         if (!reuse && dbp->dblistlinks.le_prev != NULL) {
00916                 LIST_REMOVE(dbp, dblistlinks);
00917                 dbp->dblistlinks.le_prev = NULL;
00918         }
00919 
00920         /* Close the memory pool file handle. */
00921         if (dbp->mpf != NULL) {
00922                 if ((t_ret = __memp_fclose(dbp->mpf,
00923                     F_ISSET(dbp, DB_AM_DISCARD) ? DB_MPOOL_DISCARD : 0)) != 0 &&
00924                     ret == 0)
00925                         ret = t_ret;
00926                 dbp->mpf = NULL;
00927                 if (reuse &&
00928                     (t_ret = __memp_fcreate(dbenv, &dbp->mpf)) != 0 &&
00929                     ret == 0)
00930                         ret = t_ret;
00931         }
00932 
00933         MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist);
00934 
00935         /*
00936          * Call the access specific close function.
00937          *
00938          * We do this here rather than in __db_close as we need to do this when
00939          * aborting an open so that file descriptors are closed and abort of
00940          * renames can succeed on platforms that lock open files (such as
00941          * Windows).  In particular, we need to ensure that all the extents
00942          * associated with a queue are closed so that queue renames can be
00943          * aborted.
00944          *
00945          * It is also important that we do this before releasing the handle
00946          * lock, because dbremove and dbrename assume that once they have the
00947          * handle lock, it is safe to modify the underlying file(s).
00948          *
00949          * !!!
00950          * Because of where these functions are called in the DB handle close
00951          * process, these routines can't do anything that would dirty pages or
00952          * otherwise affect closing down the database.  Specifically, we can't
00953          * abort and recover any of the information they control.
00954          */
00955         if ((t_ret = __bam_db_close(dbp)) != 0 && ret == 0)
00956                 ret = t_ret;
00957         if ((t_ret = __ham_db_close(dbp)) != 0 && ret == 0)
00958                 ret = t_ret;
00959         if ((t_ret = __qam_db_close(dbp, dbp->flags)) != 0 && ret == 0)
00960                 ret = t_ret;
00961 
00962         /*
00963          * !!!
00964          * At this point, the access-method specific information has been
00965          * freed.  From now on, we can use the dbp, but not touch any
00966          * access-method specific data.
00967          */
00968 
00969         if (!reuse && dbp->lid != DB_LOCK_INVALIDID) {
00970                 /* We may have pending trade operations on this dbp. */
00971                 if (txn != NULL)
00972                         __txn_remlock(dbenv, txn, &dbp->handle_lock, dbp->lid);
00973 
00974                 /* We may be holding the handle lock; release it. */
00975                 lreq.op = DB_LOCK_PUT_ALL;
00976                 lreq.obj = NULL;
00977                 if ((t_ret = __lock_vec(dbenv,
00978                     dbp->lid, 0, &lreq, 1, NULL)) != 0 && ret == 0)
00979                         ret = t_ret;
00980 
00981                 if ((t_ret = __lock_id_free(dbenv, dbp->lid)) != 0 && ret == 0)
00982                         ret = t_ret;
00983                 dbp->lid = DB_LOCK_INVALIDID;
00984                 LOCK_INIT(dbp->handle_lock);
00985         }
00986 
00987         /*
00988          * If this is a temporary file (un-named in-memory file), then
00989          * discard the locker ID allocated as the fileid.
00990          */
00991         if (LOCKING_ON(dbenv) &&
00992             F_ISSET(dbp, DB_AM_INMEM) && !dbp->preserve_fid &&
00993             *(u_int32_t *)dbp->fileid != DB_LOCK_INVALIDID &&
00994             (t_ret = __lock_id_free(dbenv, *(u_int32_t *)dbp->fileid)) != 0 &&
00995             ret == 0)
00996                 ret = t_ret;
00997 
00998         if (reuse) {
00999                 /*
01000                  * If we are reusing this dbp, then we're done now. Re-init
01001                  * the handle, preserving important flags, and then return.
01002                  * This code is borrowed from __db_init, which does more
01003                  * than we can do here.
01004                  */
01005                 save_flags = F_ISSET(dbp, DB_AM_INMEM | DB_AM_TXN);
01006 
01007                 /*
01008                  * XXX If this is an XA handle, we'll want to specify 
01009                  * DB_XA_CREATE.
01010                  */
01011                 if ((ret = __bam_db_create(dbp)) != 0)
01012                         return (ret);
01013                 if ((ret = __ham_db_create(dbp)) != 0)
01014                         return (ret);
01015                 if ((ret = __qam_db_create(dbp)) != 0)
01016                         return (ret);
01017 
01018                 /* Restore flags */
01019                 dbp->flags = dbp->orig_flags | save_flags;
01020 
01021                 if (FLD_ISSET(save_flags, DB_AM_INMEM)) {
01022                         /*
01023                          * If this is inmem, then it may have a fileid
01024                          * even if it was never opened, and we need to
01025                          * clear out that fileid.
01026                          */
01027                         memset(dbp->fileid, 0, sizeof(dbp->fileid));
01028                         MAKE_INMEM(dbp);
01029                 }
01030                 return (ret);
01031         }
01032 
01033         dbp->type = DB_UNKNOWN;
01034 
01035         /* Discard the thread mutex. */
01036         if ((t_ret = __mutex_free(dbenv, &dbp->mutex)) != 0 && ret == 0)
01037                 ret = t_ret;
01038 
01039         /* Discard any memory allocated for the file and database names. */
01040         if (dbp->fname != NULL) {
01041                 __os_free(dbp->dbenv, dbp->fname);
01042                 dbp->fname = NULL;
01043         }
01044         if (dbp->dname != NULL) {
01045                 __os_free(dbp->dbenv, dbp->dname);
01046                 dbp->dname = NULL;
01047         }
01048 
01049         /* Discard any memory used to store returned data. */
01050         if (dbp->my_rskey.data != NULL)
01051                 __os_free(dbp->dbenv, dbp->my_rskey.data);
01052         if (dbp->my_rkey.data != NULL)
01053                 __os_free(dbp->dbenv, dbp->my_rkey.data);
01054         if (dbp->my_rdata.data != NULL)
01055                 __os_free(dbp->dbenv, dbp->my_rdata.data);
01056 
01057         /* For safety's sake;  we may refresh twice. */
01058         memset(&dbp->my_rskey, 0, sizeof(DBT));
01059         memset(&dbp->my_rkey, 0, sizeof(DBT));
01060         memset(&dbp->my_rdata, 0, sizeof(DBT));
01061 
01062         /* Clear out fields that normally get set during open. */
01063         memset(dbp->fileid, 0, sizeof(dbp->fileid));
01064         dbp->adj_fileid = 0;
01065         dbp->meta_pgno = 0;
01066         dbp->cur_lid = DB_LOCK_INVALIDID;
01067         dbp->associate_lid = DB_LOCK_INVALIDID;
01068         dbp->cl_id = 0;
01069         dbp->open_flags = 0;
01070 
01071         /*
01072          * If we are being refreshed with a txn specified, then we need
01073          * to make sure that we clear out the lock handle field, because
01074          * releasing all the locks for this transaction will release this
01075          * lock and we don't want close to stumble upon this handle and
01076          * try to close it.
01077          */
01078         if (txn != NULL)
01079                 LOCK_INIT(dbp->handle_lock);
01080 
01081         /* Reset flags to whatever the user configured. */
01082         dbp->flags = dbp->orig_flags;
01083 
01084         return (ret);
01085 }
01086 
01087 /*
01088  * __db_log_page
01089  *      Log a meta-data or root page during a subdatabase create operation.
01090  *
01091  * PUBLIC: int __db_log_page __P((DB *, DB_TXN *, DB_LSN *, db_pgno_t, PAGE *));
01092  */
01093 int
01094 __db_log_page(dbp, txn, lsn, pgno, page)
01095         DB *dbp;
01096         DB_TXN *txn;
01097         DB_LSN *lsn;
01098         db_pgno_t pgno;
01099         PAGE *page;
01100 {
01101         DBT page_dbt;
01102         DB_LSN new_lsn;
01103         int ret;
01104 
01105         if (!LOGGING_ON(dbp->dbenv) || txn == NULL)
01106                 return (0);
01107 
01108         memset(&page_dbt, 0, sizeof(page_dbt));
01109         page_dbt.size = dbp->pgsize;
01110         page_dbt.data = page;
01111 
01112         ret = __crdel_metasub_log(dbp, txn, &new_lsn, 0, pgno, &page_dbt, lsn);
01113 
01114         if (ret == 0)
01115                 page->lsn = new_lsn;
01116         return (ret);
01117 }
01118 
01119 /*
01120  * __db_backup_name
01121  *      Create the backup file name for a given file.
01122  *
01123  * PUBLIC: int __db_backup_name __P((DB_ENV *,
01124  * PUBLIC:     const char *, DB_TXN *, char **));
01125  */
01126 #undef  BACKUP_PREFIX
01127 #define BACKUP_PREFIX   "__db"
01128 
01129 #undef  MAX_LSN_TO_TEXT
01130 #define MAX_LSN_TO_TEXT 17
01131 
01132 int
01133 __db_backup_name(dbenv, name, txn, backup)
01134         DB_ENV *dbenv;
01135         const char *name;
01136         DB_TXN *txn;
01137         char **backup;
01138 {
01139         DB_LSN lsn;
01140         size_t len;
01141         int ret;
01142         char *p, *retp;
01143 
01144         /*
01145          * Part of the name may be a full path, so we need to make sure that
01146          * we allocate enough space for it, even in the case where we don't
01147          * use the entire filename for the backup name.
01148          */
01149         len = strlen(name) + strlen(BACKUP_PREFIX) + 1 + MAX_LSN_TO_TEXT;
01150         if ((ret = __os_malloc(dbenv, len, &retp)) != 0)
01151                 return (ret);
01152 
01153         /*
01154          * Create the name.  Backup file names are in one of three forms:
01155          *
01156          *      In a transactional env: __db.LSN(8).LSN(8)
01157          * and
01158          *      In VXWORKS (where we want 8.3 support)
01159          * and
01160          *      in any other non-transactional env: __db.FILENAME
01161          *
01162          * If the transaction doesn't have a current LSN, we write a dummy
01163          * log record to force it, so we ensure all tmp names are unique.
01164          *
01165          * In addition, the name passed may contain an env-relative path.
01166          * In that case, put the __db. in the right place (in the last
01167          * component of the pathname).
01168          *
01169          * There are four cases here:
01170          *      1. simple path w/out transaction
01171          *      2. simple path + transaction
01172          *      3. multi-component path w/out transaction
01173          *      4. multi-component path + transaction
01174          */
01175         p = __db_rpath(name);
01176         if (txn == NULL) {
01177 #ifdef HAVE_VXWORKS
01178             { int i, n;
01179                 /* On VxWorks we must support 8.3 names. */
01180                 if (p == NULL)                          /* Case 1. */
01181                         n = snprintf(retp,
01182                             len, "%s%.4s.tmp", BACKUP_PREFIX, name);
01183                 else                            /* Case 3. */
01184                         n = snprintf(retp, len, "%.*s%s%.4s.tmp",
01185                             (int)(p - name) + 1, name, BACKUP_PREFIX, p + 1);
01186 
01187                 /*
01188                  * Overwrite "." in the characters copied from the name.
01189                  * If we backup 8 characters from the end, we're guaranteed
01190                  * to a) include the four bytes we copied from the name
01191                  * and b) not run off the beginning of the string.
01192                  */
01193                 for (i = 0, p = (retp + n) - 8; i < 4; p++, i++)
01194                         if (*p == '.')
01195                                 *p = '_';
01196             }
01197 #else
01198                 if (p == NULL)                          /* Case 1. */
01199                         snprintf(retp, len, "%s.%s", BACKUP_PREFIX, name);
01200                 else                                    /* Case 3. */
01201                         snprintf(retp, len, "%.*s%s.%s",
01202                             (int)(p - name) + 1, name, BACKUP_PREFIX, p + 1);
01203 #endif
01204         } else {
01205                 lsn = ((TXN_DETAIL *)txn->td)->last_lsn;
01206                 if (IS_ZERO_LSN(lsn)) {
01207                         /*
01208                          * Write dummy log record.   The two choices for dummy
01209                          * log records are __db_noop_log and __db_debug_log;
01210                          * unfortunately __db_noop_log requires a valid dbp,
01211                          * and we aren't guaranteed to be able to pass one in
01212                          * here.
01213                          */
01214                         if ((ret = __db_debug_log(dbenv,
01215                             txn, &lsn, 0, NULL, 0, NULL, NULL, 0)) != 0) {
01216                                 __os_free(dbenv, retp);
01217                                 return (ret);
01218                         }
01219                 }
01220 
01221                 if (p == NULL)                          /* Case 2. */
01222                         snprintf(retp, len,
01223                             "%s.%x.%x", BACKUP_PREFIX, lsn.file, lsn.offset);
01224                 else                                    /* Case 4. */
01225                         snprintf(retp, len, "%.*s%x.%x",
01226                             (int)(p - name) + 1, name, lsn.file, lsn.offset);
01227         }
01228 
01229         *backup = retp;
01230         return (0);
01231 }
01232 
01233 /*
01234  * __dblist_get --
01235  *      Get the first element of dbenv->dblist with
01236  *      dbp->adj_fileid matching adjid.
01237  *
01238  * PUBLIC: DB *__dblist_get __P((DB_ENV *, u_int32_t));
01239  */
01240 DB *
01241 __dblist_get(dbenv, adjid)
01242         DB_ENV *dbenv;
01243         u_int32_t adjid;
01244 {
01245         DB *dbp;
01246 
01247         for (dbp = LIST_FIRST(&dbenv->dblist);
01248             dbp != NULL && dbp->adj_fileid != adjid;
01249             dbp = LIST_NEXT(dbp, dblistlinks))
01250                 ;
01251 
01252         return (dbp);
01253 }
01254 
01255 /*
01256  * __db_disassociate --
01257  *      Destroy the association between a given secondary and its primary.
01258  */
01259 static int
01260 __db_disassociate(sdbp)
01261         DB *sdbp;
01262 {
01263         DBC *dbc;
01264         int ret, t_ret;
01265 
01266         ret = 0;
01267 
01268         sdbp->s_callback = NULL;
01269         sdbp->s_primary = NULL;
01270         sdbp->get = sdbp->stored_get;
01271         sdbp->close = sdbp->stored_close;
01272 
01273         /*
01274          * Complain, but proceed, if we have any active cursors.  (We're in
01275          * the middle of a close, so there's really no turning back.)
01276          */
01277         if (sdbp->s_refcnt != 1 ||
01278             TAILQ_FIRST(&sdbp->active_queue) != NULL ||
01279             TAILQ_FIRST(&sdbp->join_queue) != NULL) {
01280                 __db_err(sdbp->dbenv,
01281     "Closing a primary DB while a secondary DB has active cursors is unsafe");
01282                 ret = EINVAL;
01283         }
01284         sdbp->s_refcnt = 0;
01285 
01286         while ((dbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL)
01287                 if ((t_ret = __db_c_destroy(dbc)) != 0 && ret == 0)
01288                         ret = t_ret;
01289 
01290         F_CLR(sdbp, DB_AM_SECONDARY);
01291         return (ret);
01292 }
01293 
01294 #ifdef CONFIG_TEST
01295 /*
01296  * __db_testcopy
01297  *      Create a copy of all backup files and our "main" DB.
01298  *
01299  * PUBLIC: #ifdef CONFIG_TEST
01300  * PUBLIC: int __db_testcopy __P((DB_ENV *, DB *, const char *));
01301  * PUBLIC: #endif
01302  */
01303 int
01304 __db_testcopy(dbenv, dbp, name)
01305         DB_ENV *dbenv;
01306         DB *dbp;
01307         const char *name;
01308 {
01309         DB_MPOOL *dbmp;
01310         DB_MPOOLFILE *mpf;
01311 
01312         DB_ASSERT(dbp != NULL || name != NULL);
01313 
01314         if (name == NULL) {
01315                 dbmp = dbenv->mp_handle;
01316                 mpf = dbp->mpf;
01317                 name = R_ADDR(dbmp->reginfo, mpf->mfp->path_off);
01318         }
01319 
01320         if (dbp != NULL && dbp->type == DB_QUEUE)
01321                 return (__qam_testdocopy(dbp, name));
01322         else
01323                 return (__db_testdocopy(dbenv, name));
01324 }
01325 
01326 static int
01327 __qam_testdocopy(dbp, name)
01328         DB *dbp;
01329         const char *name;
01330 {
01331         QUEUE_FILELIST *filelist, *fp;
01332         char buf[256], *dir;
01333         int ret;
01334 
01335         filelist = NULL;
01336         if ((ret = __db_testdocopy(dbp->dbenv, name)) != 0)
01337                 return (ret);
01338         if (dbp->mpf != NULL &&
01339             (ret = __qam_gen_filelist(dbp, &filelist)) != 0)
01340                 return (ret);
01341 
01342         if (filelist == NULL)
01343                 return (0);
01344         dir = ((QUEUE *)dbp->q_internal)->dir;
01345         for (fp = filelist; fp->mpf != NULL; fp++) {
01346                 snprintf(buf, sizeof(buf),
01347                     QUEUE_EXTENT, dir, PATH_SEPARATOR[0], name, fp->id);
01348                 if ((ret = __db_testdocopy(dbp->dbenv, buf)) != 0)
01349                         return (ret);
01350         }
01351 
01352         __os_free(dbp->dbenv, filelist);
01353         return (0);
01354 }
01355 
01356 /*
01357  * __db_testdocopy
01358  *      Create a copy of all backup files and our "main" DB.
01359  *
01360  */
01361 static int
01362 __db_testdocopy(dbenv, name)
01363         DB_ENV *dbenv;
01364         const char *name;
01365 {
01366         size_t len;
01367         int dircnt, i, ret;
01368         char *backup, *copy, *dir, **namesp, *p, *real_name;
01369 
01370         dircnt = 0;
01371         copy = backup = NULL;
01372         namesp = NULL;
01373 
01374         /* Get the real backing file name. */
01375         if ((ret = __db_appname(dbenv,
01376             DB_APP_DATA, name, 0, NULL, &real_name)) != 0)
01377                 return (ret);
01378 
01379         /*
01380          * Maximum size of file, including adding a ".afterop".
01381          */
01382         len = strlen(real_name) +
01383             strlen(BACKUP_PREFIX) + 1 + MAX_LSN_TO_TEXT + 9;
01384 
01385         if ((ret = __os_malloc(dbenv, len, &copy)) != 0)
01386                 goto err;
01387 
01388         if ((ret = __os_malloc(dbenv, len, &backup)) != 0)
01389                 goto err;
01390 
01391         /*
01392          * First copy the file itself.
01393          */
01394         snprintf(copy, len, "%s.afterop", real_name);
01395         __db_makecopy(dbenv, real_name, copy);
01396 
01397         if ((ret = __os_strdup(dbenv, real_name, &dir)) != 0)
01398                 goto err;
01399         __os_free(dbenv, real_name);
01400         real_name = NULL;
01401 
01402         /*
01403          * Create the name.  Backup file names are of the form:
01404          *
01405          *      __db.name.0x[lsn-file].0x[lsn-offset]
01406          *
01407          * which guarantees uniqueness.  We want to look for the
01408          * backup name, followed by a '.0x' (so that if they have
01409          * files named, say, 'a' and 'abc' we won't match 'abc' when
01410          * looking for 'a'.
01411          */
01412         snprintf(backup, len, "%s.%s.0x", BACKUP_PREFIX, name);
01413 
01414         /*
01415          * We need the directory path to do the __os_dirlist.
01416          */
01417         p = __db_rpath(dir);
01418         if (p != NULL)
01419                 *p = '\0';
01420         ret = __os_dirlist(dbenv, dir, &namesp, &dircnt);
01421 #if DIAGNOSTIC
01422         /*
01423          * XXX
01424          * To get the memory guard code to work because it uses strlen and we
01425          * just moved the end of the string somewhere sooner.  This causes the
01426          * guard code to fail because it looks at one byte past the end of the
01427          * string.
01428          */
01429         *p = '/';
01430 #endif
01431         __os_free(dbenv, dir);
01432         if (ret != 0)
01433                 goto err;
01434         for (i = 0; i < dircnt; i++) {
01435                 /*
01436                  * Need to check if it is a backup file for this.
01437                  * No idea what namesp[i] may be or how long, so
01438                  * must use strncmp and not memcmp.  We don't want
01439                  * to use strcmp either because we are only matching
01440                  * the first part of the real file's name.  We don't
01441                  * know its LSN's.
01442                  */
01443                 if (strncmp(namesp[i], backup, strlen(backup)) == 0) {
01444                         if ((ret = __db_appname(dbenv, DB_APP_DATA,
01445                             namesp[i], 0, NULL, &real_name)) != 0)
01446                                 goto err;
01447 
01448                         /*
01449                          * This should not happen.  Check that old
01450                          * .afterop files aren't around.
01451                          * If so, just move on.
01452                          */
01453                         if (strstr(real_name, ".afterop") != NULL) {
01454                                 __os_free(dbenv, real_name);
01455                                 real_name = NULL;
01456                                 continue;
01457                         }
01458                         snprintf(copy, len, "%s.afterop", real_name);
01459                         __db_makecopy(dbenv, real_name, copy);
01460                         __os_free(dbenv, real_name);
01461                         real_name = NULL;
01462                 }
01463         }
01464 
01465 err:    if (backup != NULL)
01466                 __os_free(dbenv, backup);
01467         if (copy != NULL)
01468                 __os_free(dbenv, copy);
01469         if (namesp != NULL)
01470                 __os_dirfree(dbenv, namesp, dircnt);
01471         if (real_name != NULL)
01472                 __os_free(dbenv, real_name);
01473         return (ret);
01474 }
01475 
01476 static void
01477 __db_makecopy(dbenv, src, dest)
01478         DB_ENV *dbenv;
01479         const char *src, *dest;
01480 {
01481         DB_FH *rfhp, *wfhp;
01482         size_t rcnt, wcnt;
01483         char *buf;
01484 
01485         rfhp = wfhp = NULL;
01486 
01487         if (__os_malloc(dbenv, 1024, &buf) != 0)
01488                 return;
01489 
01490         if (__os_open(dbenv,
01491             src, DB_OSO_RDONLY, __db_omode(OWNER_RW), &rfhp) != 0)
01492                 goto err;
01493         if (__os_open(dbenv, dest,
01494             DB_OSO_CREATE | DB_OSO_TRUNC, __db_omode(OWNER_RW), &wfhp) != 0)
01495                 goto err;
01496 
01497         for (;;)
01498                 if (__os_read(dbenv, rfhp, buf, 1024, &rcnt) < 0 || rcnt == 0 ||
01499                     __os_write(dbenv, wfhp, buf, rcnt, &wcnt) < 0)
01500                         break;
01501 
01502 err:    if (buf != NULL)
01503                 __os_free(dbenv, buf);
01504         if (rfhp != NULL)
01505                 (void)__os_closehandle(dbenv, rfhp);
01506         if (wfhp != NULL)
01507                 (void)__os_closehandle(dbenv, wfhp);
01508 }
01509 #endif

Generated on Sun Dec 25 12:14:18 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2