Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

fop_util.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 2001-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: fop_util.c,v 12.19 2005/10/27 01:26:00 mjc Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 
00015 #include <string.h>
00016 #endif
00017 
00018 #include "db_int.h"
00019 #include "dbinc/db_page.h"
00020 #include "dbinc/db_shash.h"
00021 #include "dbinc/db_am.h"
00022 #include "dbinc/hash.h"
00023 #include "dbinc/fop.h"
00024 #include "dbinc/lock.h"
00025 #include "dbinc/mp.h"
00026 #include "dbinc/log.h"
00027 #include "dbinc/txn.h"
00028 
00029 static int __fop_set_pgsize __P((DB *, DB_FH *, const char *));
00030 static int __fop_inmem_create __P((DB *, const char *, DB_TXN *, u_int32_t));
00031 static int __fop_inmem_dummy __P((DB *, DB_TXN *, const char *, u_int8_t *));
00032 static int __fop_inmem_read_meta __P((DB *, const char *, u_int32_t));
00033 static int __fop_inmem_swap __P((DB *, DB *, DB_TXN *,
00034                const char *, const char *, const char *, u_int32_t));
00035 static int __fop_ondisk_dummy __P((DB *,
00036                DB_TXN *, const char *, u_int8_t *, u_int32_t));
00037 static int __fop_ondisk_swap __P((DB *, DB *, DB_TXN *,
00038                const char *, const char *, const char *, u_int32_t, u_int32_t));
00039 
00040 /*
00041  * Acquire the environment meta-data lock.  The parameters are the
00042  * environment (ENV), the locker id to use in acquiring the lock (ID)
00043  * and a pointer to a DB_LOCK.
00044  *
00045  * !!!
00046  * Turn off locking for Critical Path.  The application must do its own
00047  * synchronization of open/create.  Two threads creating and opening a
00048  * file at the same time may have unpredictable results.
00049  */
00050 #ifdef CRITICALPATH_10266
00051 #define GET_ENVLOCK(ENV, ID, L) (0)
00052 #else
00053 #define GET_ENVLOCK(ENV, ID, L) do {                                    \
00054         DBT __dbt;                                                      \
00055         u_int32_t __lockval;                                            \
00056                                                                         \
00057         if (LOCKING_ON((ENV))) {                                        \
00058                 __lockval = 1;                                          \
00059                 __dbt.data = &__lockval;                                \
00060                 __dbt.size = sizeof(__lockval);                         \
00061                 if ((ret = __lock_get((ENV), (ID),                      \
00062                     0, &__dbt, DB_LOCK_WRITE, (L))) != 0)               \
00063                         goto err;                                       \
00064         }                                                               \
00065 } while (0)
00066 #endif
00067 
00068 #define RESET_MPF(D, F) do {                                            \
00069         (void)__memp_fclose((D)->mpf, (F));                             \
00070         (D)->mpf = NULL;                                                \
00071         F_CLR((D), DB_AM_OPEN_CALLED);                                  \
00072         if ((ret = __memp_fcreate((D)->dbenv, &(D)->mpf)) != 0)         \
00073                 goto err;                                               \
00074 } while (0)
00075 
00076 /*
00077  * If we open a file handle and our caller is doing fcntl(2) locking,
00078  * we can't close the handle because that would discard the caller's
00079  * lock. Save it until we close or refresh the DB handle.
00080  */
00081 #define CLOSE_HANDLE(D, F) {                                            \
00082         if ((F) != NULL) {                                              \
00083                 if (LF_ISSET(DB_FCNTL_LOCKING))                         \
00084                         (D)->saved_open_fhp = (F);                      \
00085                 else if ((t_ret =                                       \
00086                     __os_closehandle((D)->dbenv, (F))) != 0) {          \
00087                         if (ret == 0)                                   \
00088                                 ret = t_ret;                            \
00089                         goto err;                                       \
00090                 }                                                       \
00091                 (F) = NULL;                                             \
00092         }                                                               \
00093 }
00094 
00095 /*
00096  * __fop_lock_handle --
00097  *
00098  * Get the handle lock for a database.  If the envlock is specified, do this
00099  * as a lock_vec call that releases the environment lock before acquiring the
00100  * handle lock.
00101  *
00102  * PUBLIC: int __fop_lock_handle __P((DB_ENV *,
00103  * PUBLIC:     DB *, u_int32_t, db_lockmode_t, DB_LOCK *, u_int32_t));
00104  *
00105  */
00106 int
00107 __fop_lock_handle(dbenv, dbp, locker, mode, elockp, flags)
00108         DB_ENV *dbenv;
00109         DB *dbp;
00110         u_int32_t locker;
00111         db_lockmode_t mode;
00112         DB_LOCK *elockp;
00113         u_int32_t flags;
00114 {
00115         DBT fileobj;
00116         DB_LOCKREQ reqs[2], *ereq;
00117         DB_LOCK_ILOCK lock_desc;
00118         int ret;
00119 
00120         if (!LOCKING_ON(dbenv) ||
00121             F_ISSET(dbp, DB_AM_COMPENSATE | DB_AM_RECOVER))
00122                 return (0);
00123 
00124         /*
00125          * If we are in recovery, the only locking we should be
00126          * doing is on the global environment.
00127          */
00128         if (IS_RECOVERING(dbenv))
00129                 return (elockp == NULL ? 0 : __ENV_LPUT(dbenv, *elockp));
00130 
00131         memcpy(lock_desc.fileid, dbp->fileid, DB_FILE_ID_LEN);
00132         lock_desc.pgno = dbp->meta_pgno;
00133         lock_desc.type = DB_HANDLE_LOCK;
00134 
00135         memset(&fileobj, 0, sizeof(fileobj));
00136         fileobj.data = &lock_desc;
00137         fileobj.size = sizeof(lock_desc);
00138         DB_TEST_SUBLOCKS(dbenv, flags);
00139         if (elockp == NULL)
00140                 ret = __lock_get(dbenv, locker,
00141                     flags, &fileobj, mode, &dbp->handle_lock);
00142         else {
00143                 reqs[0].op = DB_LOCK_PUT;
00144                 reqs[0].lock = *elockp;
00145                 reqs[1].op = DB_LOCK_GET;
00146                 reqs[1].mode = mode;
00147                 reqs[1].obj = &fileobj;
00148                 reqs[1].timeout = 0;
00149                 if ((ret = __lock_vec(dbenv,
00150                     locker, flags, reqs, 2, &ereq)) == 0) {
00151                         dbp->handle_lock = reqs[1].lock;
00152                         LOCK_INIT(*elockp);
00153                 } else if (ereq != reqs)
00154                         LOCK_INIT(*elockp);
00155         }
00156 
00157         dbp->cur_lid = locker;
00158         return (ret);
00159 }
00160 
00161 /*
00162  * __fop_file_setup --
00163  *
00164  * Perform all the needed checking and locking to open up or create a
00165  * file.
00166  *
00167  * There's a reason we don't push this code down into the buffer cache.
00168  * The problem is that there's no information external to the file that
00169  * we can use as a unique ID.  UNIX has dev/inode pairs, but they are
00170  * not necessarily unique after reboot, if the file was mounted via NFS.
00171  * Windows has similar problems, as the FAT filesystem doesn't maintain
00172  * dev/inode numbers across reboot.  So, we must get something from the
00173  * file we can use to ensure that, even after a reboot, the file we're
00174  * joining in the cache is the right file for us to join.  The solution
00175  * we use is to maintain a file ID that's stored in the database, and
00176  * that's why we have to open and read the file before calling into the
00177  * buffer cache or obtaining a lock (we use this unique fileid to lock
00178  * as well as to identify like files in the cache).
00179  *
00180  * There are a couple of idiosyncrasies that this code must support, in
00181  * particular, DB_TRUNCATE and DB_FCNTL_LOCKING.  First, we disallow
00182  * DB_TRUNCATE in the presence of transactions, since opening a file with
00183  * O_TRUNC will result in data being lost in an unrecoverable fashion.
00184  * We also disallow DB_TRUNCATE if locking is enabled, because even in
00185  * the presence of locking, we cannot avoid race conditions, so allowing
00186  * DB_TRUNCATE with locking would be misleading.  See SR [#7345] for more
00187  * details.
00188  *
00189  * However, if you are running with neither locking nor transactions, then
00190  * you can specify DB_TRUNCATE, and if you do so, we will truncate the file
00191  * regardless of its contents.
00192  *
00193  * FCNTL locking introduces another set of complications.  First, the only
00194  * reason we support the DB_FCNTL_LOCKING flag is for historic compatibility
00195  * with programs like Sendmail and Postfix.  In these cases, the caller may
00196  * already have a lock on the file; we need to make sure that any file handles
00197  * we open remain open, because if we were to close them, the lock held by the
00198  * caller would go away.  Furthermore, Sendmail and/or Postfix need the ability
00199  * to create databases in empty files.  So, when you're doing FCNTL locking,
00200  * it's reasonable that you are trying to create a database into a 0-length
00201  * file and we allow it, while under normal conditions, we do not create
00202  * databases if the files already exist and are not Berkeley DB files.
00203  *
00204  * PUBLIC: int __fop_file_setup __P((DB *,
00205  * PUBLIC:     DB_TXN *, const char *, int, u_int32_t, u_int32_t *));
00206  */
00207 int
00208 __fop_file_setup(dbp, txn, name, mode, flags, retidp)
00209         DB *dbp;
00210         DB_TXN *txn;
00211         const char *name;
00212         int mode;
00213         u_int32_t flags, *retidp;
00214 {
00215         DB_ENV *dbenv;
00216         DB_FH *fhp;
00217         DB_LOCK elock;
00218         DB_TXN *stxn;
00219         DBTYPE save_type;
00220         size_t len;
00221         u_int32_t dflags, locker, oflags;
00222         u_int8_t mbuf[DBMETASIZE];
00223         int created_locker, create_ok, ret, retries, t_ret, tmp_created;
00224         int truncating, was_inval;
00225         char *real_name, *real_tmpname, *tmpname;
00226 
00227         *retidp = TXN_INVALID;
00228 
00229         dbenv = dbp->dbenv;
00230         fhp = NULL;
00231         LOCK_INIT(elock);
00232         stxn = NULL;
00233         created_locker = tmp_created = truncating = was_inval = 0;
00234         real_name = real_tmpname = tmpname = NULL;
00235         dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0;
00236 
00237         ret = 0;
00238         retries = 0;
00239         save_type = dbp->type;
00240 
00241         /*
00242          * Get a lockerid for this handle.  There are paths through queue
00243          * rename and remove where this dbp already has a locker, so make
00244          * sure we don't clobber it and conflict.
00245          */
00246         if (LOCKING_ON(dbenv) &&
00247             !F_ISSET(dbp, DB_AM_COMPENSATE) &&
00248             !F_ISSET(dbp, DB_AM_RECOVER) &&
00249             dbp->lid == DB_LOCK_INVALIDID) {
00250                 if ((ret = __lock_id(dbenv, &dbp->lid, NULL)) != 0)
00251                         goto err;
00252                 created_locker = 1;
00253         }
00254         LOCK_INIT(dbp->handle_lock);
00255 
00256         locker = txn == NULL ? dbp->lid : txn->txnid;
00257 
00258         oflags = 0;
00259         if (F_ISSET(dbp, DB_AM_INMEM))
00260                 real_name = (char *)name;
00261         else {
00262                 /* Get the real backing file name. */
00263                 if ((ret = __db_appname(dbenv,
00264                     DB_APP_DATA, name, 0, NULL, &real_name)) != 0)
00265                         goto err;
00266 
00267                 /* Fill in the default file mode. */
00268                 if (mode == 0)
00269                         mode = __db_omode("rw-rw----");
00270 
00271                 if (LF_ISSET(DB_RDONLY))
00272                         oflags |= DB_OSO_RDONLY;
00273                 if (LF_ISSET(DB_TRUNCATE))
00274                         oflags |= DB_OSO_TRUNC;
00275         }
00276 
00277         retries = 0;
00278         create_ok = LF_ISSET(DB_CREATE);
00279         LF_CLR(DB_CREATE);
00280 
00281 retry:
00282         /*
00283          * If we cannot create the file, only retry a few times.  We
00284          * think we might be in a race with another create, but it could
00285          * be that the backup filename exists (that is, is left over from
00286          * a previous crash).
00287          */
00288         if (++retries > DB_RETRY) {
00289                 __db_err(dbenv, "__fop_file_setup:  Retry limit (%d) exceeded",
00290                     DB_RETRY);
00291                 goto err;
00292         }
00293         if (!F_ISSET(dbp, DB_AM_COMPENSATE) && !F_ISSET(dbp, DB_AM_RECOVER))
00294                 GET_ENVLOCK(dbenv, locker, &elock);
00295         if (name == NULL)
00296                 ret = ENOENT;
00297         else if (F_ISSET(dbp, DB_AM_INMEM)) {
00298                 ret = __db_dbenv_mpool(dbp, name, flags);
00299                 /*
00300                  * We are using __db_dbenv_open as a check for existence.
00301                  * However, db_dbenv_mpool does an actual open and there
00302                  * are scenarios where the object exists, but cannot be
00303                  * opened, because our settings don't match those internally.
00304                  * We need to check for that explicitly.  We'll need the
00305                  * mpool open to read the meta-data page, so we're going to
00306                  * have to temporarily turn this dbp into an UNKNOWN one.
00307                  */
00308                 if (ret == EINVAL) {
00309                         was_inval = 1;
00310                         save_type = dbp->type;
00311                         dbp->type = DB_UNKNOWN;
00312                         ret = __db_dbenv_mpool(dbp, name, flags);
00313                         dbp->type = save_type;
00314                 }
00315         } else
00316                 ret = __os_exists(real_name, NULL);
00317 
00318         if (ret == 0) {
00319                 /*
00320                  * If the file exists, there are 5 possible cases:
00321                  * 1. DB_EXCL was specified so this is an error, unless
00322                  *      this is a file left around after a rename and we
00323                  *      are in the same transaction.  This gets decomposed
00324                  *      into several subcases, because we check for various
00325                  *      errors before we know we're in rename.
00326                  * 2. We are truncating, and it doesn't matter what kind
00327                  *      of file it is, we should open/create it.
00328                  * 3. It is 0-length, we are not doing transactions (i.e.,
00329                  *      we are sendmail), we should open/create into it.
00330                  *      -- on-disk files only!
00331                  * 4. Is it a Berkeley DB file and we should simply open it.
00332                  * 5. It is not a BDB file and we should return an error.
00333                  */
00334 
00335                 /* Open file (if there is one). */
00336 reopen:         if (!F_ISSET(dbp, DB_AM_INMEM) &&
00337                     (ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0)
00338                         goto err;
00339 
00340                 /* Case 2: DB_TRUNCATE: we must do the creation in place. */
00341                 if (LF_ISSET(DB_TRUNCATE)) {
00342                         if (LF_ISSET(DB_EXCL)) {
00343                                 /* Case 1a: DB_EXCL and DB_TRUNCATE. */
00344                                 ret = EEXIST;
00345                                 goto err;
00346                         }
00347                         tmpname = (char *)name;
00348                         goto creat2;
00349                 }
00350 
00351                 /* Cases 1,3-5: we need to read the meta-data page. */
00352                 if (F_ISSET(dbp, DB_AM_INMEM))
00353                         ret = __fop_inmem_read_meta(dbp, name, flags);
00354                 else {
00355                         ret = __fop_read_meta(dbenv, real_name, mbuf,
00356                             sizeof(mbuf), fhp,
00357                             LF_ISSET(DB_FCNTL_LOCKING) && txn == NULL ? 1 : 0,
00358                             &len);
00359 
00360                         /* Case 3: 0-length, no txns. */
00361                         if (ret != 0 && len == 0 && txn == NULL) {
00362                                 if (LF_ISSET(DB_EXCL)) {
00363                                         /*
00364                                          * Case 1b: DB_EXCL and
00365                                          * 0-lenth file exists.
00366                                          */
00367                                         ret = EEXIST;
00368                                         goto err;
00369                                 }
00370                                 tmpname = (char *)name;
00371                                 goto creat2;
00372                         }
00373 
00374                         /* Case 4: This is a valid file. */
00375                         if (ret == 0)
00376                                 ret = __db_meta_setup(dbenv, dbp,
00377                                     real_name, (DBMETA *)mbuf, flags, 1);
00378 
00379                 }
00380 
00381                 /* Case 5: Invalid file. */
00382                 if (ret != 0)
00383                         goto err;
00384 
00385                 /* Now, get our handle lock. */
00386                 if ((ret = __fop_lock_handle(dbenv,
00387                     dbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) == 0) {
00388                         if ((ret = __ENV_LPUT(dbenv, elock)) != 0)
00389                                 goto err;
00390                 } else if (ret != DB_LOCK_NOTGRANTED ||
00391                     (txn != NULL && F_ISSET(txn, TXN_NOWAIT)))
00392                         goto err;
00393                 else {
00394                         /*
00395                          * We were unable to acquire the handle lock without
00396                          * blocking.  The fact that we are blocking might mean
00397                          * that someone else is trying to delete the file.
00398                          * Since some platforms cannot delete files while they
00399                          * are open (Windows), we are going to have to close
00400                          * the file.  This would be a problem if we were doing
00401                          * FCNTL locking, because our closing the handle would
00402                          * release the FCNTL locks.  Fortunately, if we are
00403                          * doing FCNTL locking, then we should never fail to
00404                          * acquire our handle lock, so we should never get here.
00405                          * We assert it here to make sure we aren't destroying
00406                          * any application level FCNTL semantics.
00407                          */
00408                         DB_ASSERT(!LF_ISSET(DB_FCNTL_LOCKING));
00409                         if (!F_ISSET(dbp, DB_AM_INMEM)) {
00410                                 if ((ret = __os_closehandle(dbenv, fhp)) != 0)
00411                                         goto err;
00412                                 fhp = NULL;
00413                         }
00414                         if ((ret = __fop_lock_handle(dbenv,
00415                             dbp, locker, DB_LOCK_READ, &elock, 0)) != 0) {
00416                                 if (F_ISSET(dbp, DB_AM_INMEM))
00417                                         RESET_MPF(dbp, 0);
00418                                 goto err;
00419                         }
00420 
00421                         /*
00422                          * It's possible that our DBP was initialized
00423                          * with a different file last time we opened it.
00424                          * Therefore, we need to reset the DBP type and then
00425                          * re-read the meta-data page and reset any other
00426                          * fields that __db_meta_setup initializes.  We
00427                          * need to shut down this dbp and reopen for in-memory
00428                          * named databases. Unfortunately __db_refresh is
00429                          * pretty aggressive at the shutting down, so we need
00430                          * to do a bunch of restoration.
00431                          * XXX it would be nice to pull refresh apart into
00432                          * the stuff you need to do to call __db_env_mpool
00433                          * and the stuff you can really throw away.
00434                          */
00435                         if (F_ISSET(dbp, DB_AM_INMEM)) {
00436                                 if ((ret = __db_refresh(dbp,
00437                                     txn, DB_NOSYNC, NULL, 1)) != 0)
00438                                         goto err;
00439                                 ret = __db_dbenv_mpool(dbp, name, flags);
00440                         } else 
00441                                 ret = __os_open(dbenv, real_name, 0, 0, &fhp);
00442 
00443                         if (ret != 0) {
00444                                 if ((ret =
00445                                     __ENV_LPUT(dbenv, dbp->handle_lock)) != 0) {
00446                                         LOCK_INIT(dbp->handle_lock);
00447                                         goto err;
00448                                 }
00449                                 goto retry;
00450                         }
00451 
00452                         dbp->type = save_type;
00453                         if (F_ISSET(dbp, DB_AM_INMEM)) 
00454                                 ret = __fop_inmem_read_meta(dbp, name, flags);
00455                         else if ((ret =
00456                             __fop_read_meta(dbenv, real_name, mbuf,
00457                             sizeof(mbuf), fhp,
00458                             LF_ISSET(DB_FCNTL_LOCKING) && txn == NULL ? 1 : 0,
00459                             &len)) != 0 ||
00460                             (ret = __db_meta_setup(dbenv, dbp, real_name,
00461                             (DBMETA *)mbuf, flags, 1)) != 0)
00462                                 goto err;
00463 
00464                 }
00465 
00466                 /* If we got here, then we have the handle lock. */
00467 
00468                 /*
00469                  * Check for a file in the midst of a rename.  If we find that
00470                  * the file is in the midst of a rename, it must be the case
00471                  * that it is in our current transaction (else we would still
00472                  * be blocking), so we can continue along and create a new file
00473                  * with the same name.  In that case, we have to close the file
00474                  * handle because we reuse it below.  This is a case where
00475                  * a 'was_inval' above is OK.
00476                  */
00477                 if (F_ISSET(dbp, DB_AM_IN_RENAME)) {
00478                         was_inval = 0;
00479                         if (create_ok) {
00480                                 if (F_ISSET(dbp, DB_AM_INMEM)) {
00481                                         RESET_MPF(dbp, DB_MPOOL_DISCARD);
00482                                 } else if ((ret =
00483                                     __os_closehandle(dbenv, fhp)) != 0)
00484                                         goto err;
00485                                 LF_SET(DB_CREATE);
00486                                 goto create;
00487                         } else {
00488                                 ret = ENOENT;
00489                                 goto err;
00490                         }
00491                 }
00492 
00493                 /* If we get here, a was_inval is bad. */
00494                 if (was_inval) {
00495                         ret = EINVAL;
00496                         goto err;
00497                 }
00498 
00499                 /*
00500                  * Now, case 1: check for DB_EXCL, because the file that exists
00501                  * is not in the middle of a rename, so we have an error.  This
00502                  * is a weird case, but we need to make sure that we don't
00503                  * continue to hold the handle lock, since technically, we
00504                  * should not have been allowed to open it.
00505                  */
00506                 if (LF_ISSET(DB_EXCL)) {
00507                         ret = __ENV_LPUT(dbenv, dbp->handle_lock);
00508                         LOCK_INIT(dbp->handle_lock);
00509                         if (ret == 0)
00510                                 ret = EEXIST;
00511                         goto err;
00512                 }
00513                 goto done;
00514         }
00515 
00516         /* File does not exist. */
00517 #ifdef  HAVE_VXWORKS
00518         /*
00519          * VxWorks can return file-system specific error codes if the
00520          * file does not exist, not ENOENT.
00521          */
00522         if (!create_ok)
00523 #else
00524         if (!create_ok || ret != ENOENT)
00525 #endif
00526                 goto err;
00527         LF_SET(DB_CREATE);
00528         ret = 0;
00529 
00530         /*
00531          * We need to create file, which means that we need to set up the file,
00532          * the fileid and the locks.  Then we need to call the appropriate
00533          * routines to create meta-data pages.  For in-memory files, we retain
00534          * the environment lock, while for on-disk files, we drop the env lock
00535          * and create into a temporary.
00536          */
00537         if (!F_ISSET(dbp, DB_AM_INMEM) &&
00538             (ret = __ENV_LPUT(dbenv, elock)) != 0)
00539                 goto err;
00540 
00541 create: if (txn != NULL && IS_REP_CLIENT(dbenv)) {
00542                 __db_err(dbenv,
00543                     "Transactional create on replication client disallowed");
00544                 ret = EINVAL;
00545                 goto err;
00546         }
00547 
00548         if (F_ISSET(dbp, DB_AM_INMEM))
00549                 ret = __fop_inmem_create(dbp, name, txn, flags);
00550         else {
00551                 if ((ret = __db_backup_name(dbenv, name, txn, &tmpname)) != 0)
00552                         goto err;
00553                 if (TXN_ON(dbenv) && txn != NULL &&
00554                     (ret = __txn_begin(dbenv, txn, &stxn, 0)) != 0)
00555                         goto err;
00556                 if ((ret = __fop_create(dbenv,
00557                     stxn, &fhp, tmpname, DB_APP_DATA, mode, dflags)) != 0) {
00558                         /*
00559                          * If we don't have transactions there is a race on
00560                          * creating the temp file.
00561                          */
00562                         if (!TXN_ON(dbenv) && ret == EEXIST) {
00563                                 __os_free(dbenv, tmpname);
00564                                 tmpname = NULL;
00565                                 __os_yield(dbenv, 1);
00566                                 goto retry;
00567                         }
00568                         goto err;
00569                 }
00570                 tmp_created = 1;
00571         }
00572 
00573 creat2: if (!F_ISSET(dbp, DB_AM_INMEM)) {
00574                 if ((ret = __db_appname(dbenv,
00575                     DB_APP_DATA, tmpname, 0, NULL, &real_tmpname)) != 0)
00576                         goto err;
00577 
00578                 /* Set the pagesize if it isn't yet set. */
00579                 if (dbp->pgsize == 0 &&
00580                     (ret = __fop_set_pgsize(dbp, fhp, real_tmpname)) != 0)
00581                         goto errmsg;
00582 
00583                 /* Construct a file_id. */
00584                 if ((ret =
00585                     __os_fileid(dbenv, real_tmpname, 1, dbp->fileid)) != 0)
00586                         goto errmsg;
00587         }
00588 
00589         if ((ret = __db_new_file(dbp,
00590             F_ISSET(dbp, DB_AM_INMEM) ? txn : stxn, fhp, tmpname)) != 0)
00591                 goto err;
00592 
00593         /*
00594          * We need to close the handle here on platforms where remove and
00595          * rename fail if a handle is open (including Windows).
00596          */
00597         CLOSE_HANDLE(dbp, fhp);
00598 
00599         /*
00600          * Now move the file into place unless we are creating in place (because
00601          * we created a database in a file that started out 0-length).  If
00602          * this is an in-memory file, we may or may not hold the environment
00603          * lock depending on how we got here.
00604          */
00605         if (!F_ISSET(dbp, DB_AM_COMPENSATE) &&
00606             !F_ISSET(dbp, DB_AM_RECOVER) && !LOCK_ISSET(elock))
00607                 GET_ENVLOCK(dbenv, locker, &elock);
00608 
00609         if (F_ISSET(dbp, DB_AM_IN_RENAME)) {
00610                 F_CLR(dbp, DB_AM_IN_RENAME);
00611                 __txn_remrem(dbenv, txn, real_name);
00612         } else if (name == tmpname) {
00613                 /* We created it in place. */
00614         } else if (!F_ISSET(dbp, DB_AM_INMEM) &&
00615             __os_exists(real_name, NULL) == 0) {
00616                 /*
00617                  * Someone managed to create the file; remove our temp
00618                  * and try to open the file that now exists.
00619                  */
00620                 (void)__fop_remove(dbenv,
00621                     NULL, dbp->fileid, tmpname, DB_APP_DATA, dflags);
00622                 (void)__ENV_LPUT(dbenv, dbp->handle_lock);
00623                 LOCK_INIT(dbp->handle_lock);
00624 
00625                 if (stxn != NULL) {
00626                         ret = __txn_abort(stxn);
00627                         stxn = NULL;
00628                 }
00629                 if (ret != 0)
00630                         goto err;
00631                 goto reopen;
00632         }
00633 
00634         if (name != NULL && (ret = __fop_lock_handle(dbenv,
00635             dbp, locker, DB_LOCK_WRITE, &elock, NOWAIT_FLAG(txn))) != 0)
00636                 goto err;
00637         if (tmpname != NULL && tmpname != name && (ret = __fop_rename(dbenv,
00638             stxn, tmpname, name, dbp->fileid, DB_APP_DATA, dflags)) != 0)
00639                 goto err;
00640 
00641         if (stxn != NULL) {
00642                 *retidp = stxn->txnid;
00643                 ret = __txn_commit(stxn, 0);
00644                 stxn = NULL;
00645         } else
00646                 *retidp = TXN_INVALID;
00647 
00648         if (ret != 0)
00649                 goto err;
00650 
00651         F_SET(dbp, DB_AM_CREATED);
00652 
00653         if (0) {
00654 errmsg:         __db_err(dbenv, "%s: %s", name, db_strerror(ret));
00655 
00656 err:            CLOSE_HANDLE(dbp, fhp);
00657                 if (stxn != NULL)
00658                         (void)__txn_abort(stxn);
00659                 if (tmp_created && txn == NULL)
00660                         (void)__fop_remove(dbenv,
00661                             NULL, NULL, tmpname, DB_APP_DATA, dflags);
00662                 if (txn == NULL)
00663                         (void)__ENV_LPUT(dbenv, dbp->handle_lock);
00664                 (void)__ENV_LPUT(dbenv, elock);
00665                 if (created_locker) {
00666                         (void)__lock_id_free(dbenv, dbp->lid);
00667                         dbp->lid = DB_LOCK_INVALIDID;
00668                 }
00669         }
00670 
00671 done:   /*
00672          * There are cases where real_name and tmpname take on the
00673          * exact same string, so we need to make sure that we do not
00674          * free twice.
00675          */
00676         if (!truncating && tmpname != NULL && tmpname != name)
00677                 __os_free(dbenv, tmpname);
00678         if (real_name != name && real_name != NULL)
00679                 __os_free(dbenv, real_name);
00680         if (real_tmpname != NULL)
00681                 __os_free(dbenv, real_tmpname);
00682         CLOSE_HANDLE(dbp, fhp);
00683 
00684         return (ret);
00685 }
00686 
00687 /*
00688  * __fop_set_pgsize --
00689  *      Set the page size based on file information.
00690  */
00691 static int
00692 __fop_set_pgsize(dbp, fhp, name)
00693         DB *dbp;
00694         DB_FH *fhp;
00695         const char *name;
00696 {
00697         DB_ENV *dbenv;
00698         u_int32_t iopsize;
00699         int ret;
00700 
00701         dbenv = dbp->dbenv;
00702 
00703         /*
00704          * Use the filesystem's optimum I/O size as the pagesize if a pagesize
00705          * not specified.  Some filesystems have 64K as their optimum I/O size,
00706          * but as that results in fairly large default caches, we limit the
00707          * default pagesize to 16K.
00708          */
00709         if ((ret = __os_ioinfo(dbenv, name, fhp, NULL, NULL, &iopsize)) != 0) {
00710                 __db_err(dbenv, "%s: %s", name, db_strerror(ret));
00711                 return (ret);
00712         }
00713         if (iopsize < 512)
00714                 iopsize = 512;
00715         if (iopsize > 16 * 1024)
00716                 iopsize = 16 * 1024;
00717 
00718         /*
00719          * Sheer paranoia, but we don't want anything that's not a power-of-2
00720          * (we rely on that for alignment of various types on the pages), and
00721          * we want a multiple of the sector size as well.  If the value
00722          * we got out of __os_ioinfo looks bad, use a default instead.
00723          */
00724         if (!IS_VALID_PAGESIZE(iopsize))
00725                 iopsize = DB_DEF_IOSIZE;
00726 
00727         dbp->pgsize = iopsize;
00728         F_SET(dbp, DB_AM_PGDEF);
00729 
00730         return (0);
00731 }
00732 
00733 /*
00734  * __fop_subdb_setup --
00735  *
00736  * Subdb setup is significantly simpler than file setup.  In terms of
00737  * locking, for the duration of the operation/transaction, the locks on
00738  * the meta-data page will suffice to protect us from simultaneous operations
00739  * on the sub-database.  Before we complete the operation though, we'll get a
00740  * handle lock on the subdatabase so that on one else can try to remove it
00741  * while we've got it open.  We use an object that looks like the meta-data
00742  * page lock with a different type (DB_HANDLE_LOCK) for the long-term handle.
00743  * locks.
00744  *
00745  * PUBLIC: int __fop_subdb_setup __P((DB *, DB_TXN *,
00746  * PUBLIC:     const char *, const char *, int, u_int32_t));
00747  */
00748 int
00749 __fop_subdb_setup(dbp, txn, mname, name, mode, flags)
00750         DB *dbp;
00751         DB_TXN *txn;
00752         const char *mname, *name;
00753         int mode;
00754         u_int32_t flags;
00755 {
00756         DB *mdbp;
00757         DB_ENV *dbenv;
00758         db_lockmode_t lkmode;
00759         int ret, t_ret;
00760 
00761         mdbp = NULL;
00762         dbenv = dbp->dbenv;
00763 
00764         if ((ret = __db_master_open(dbp, txn, mname, flags, mode, &mdbp)) != 0)
00765                 return (ret);
00766         /*
00767          * If we created this file, then we need to set the DISCARD flag so
00768          * that if we fail in the middle of this routine, we discard from the
00769          * mpool any pages that we just created.
00770          */
00771         if (F_ISSET(mdbp, DB_AM_CREATED))
00772                 F_SET(mdbp, DB_AM_DISCARD);
00773 
00774         /*
00775          * We are going to close this instance of the master, so we can
00776          * steal its handle instead of reopening a handle on the database.
00777          */
00778         if (LF_ISSET(DB_FCNTL_LOCKING)) {
00779                 dbp->saved_open_fhp = mdbp->saved_open_fhp;
00780                 mdbp->saved_open_fhp = NULL;
00781         }
00782 
00783         /* Copy the pagesize and set the sub-database flag. */
00784         dbp->pgsize = mdbp->pgsize;
00785         F_SET(dbp, DB_AM_SUBDB);
00786 
00787         if (name != NULL && (ret = __db_master_update(mdbp, dbp, txn,
00788             name, dbp->type, MU_OPEN, NULL, flags)) != 0)
00789                 goto err;
00790 
00791         /*
00792          * Hijack the master's locker ID as well, so that our locks don't
00793          * conflict with the master's.  Since we're closing the master,
00794          * that lid would just have been freed anyway.  Once we've gotten
00795          * the locker id, we need to acquire the handle lock for this
00796          * subdatabase.
00797          */
00798         dbp->lid = mdbp->lid;
00799         mdbp->lid = DB_LOCK_INVALIDID;
00800 
00801         DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, mname);
00802 
00803         /*
00804          * We copy our fileid from our master so that we all open
00805          * the same file in mpool.  We'll use the meta-pgno to lock
00806          * so that we end up with different handle locks.
00807          */
00808 
00809         memcpy(dbp->fileid, mdbp->fileid, DB_FILE_ID_LEN);
00810         lkmode = F_ISSET(dbp, DB_AM_CREATED) || LF_ISSET(DB_WRITEOPEN) ?
00811             DB_LOCK_WRITE : DB_LOCK_READ;
00812         if ((ret = __fop_lock_handle(dbenv, dbp,
00813             txn == NULL ? dbp->lid : txn->txnid, lkmode, NULL,
00814             NOWAIT_FLAG(txn))) != 0)
00815                 goto err;
00816 
00817         if ((ret = __db_init_subdb(mdbp, dbp, name, txn)) != 0) {
00818                 /*
00819                  * If there was no transaction and we created this database,
00820                  * then we need to undo the update of the master database.
00821                  */
00822                 if (F_ISSET(dbp, DB_AM_CREATED) && txn == NULL)
00823                         (void)__db_master_update(mdbp, dbp, txn,
00824                             name, dbp->type, MU_REMOVE, NULL, 0);
00825                 F_CLR(dbp, DB_AM_CREATED);
00826                 goto err;
00827         }
00828 
00829         /*
00830          * XXX
00831          * This should have been done at the top of this routine.  The problem
00832          * is that __db_init_subdb() uses "standard" routines to process the
00833          * meta-data page and set information in the DB handle based on it.
00834          * Those routines have to deal with swapped pages and will normally set
00835          * the DB_AM_SWAP flag.  However, we use the master's metadata page and
00836          * that has already been swapped, so they get the is-swapped test wrong.
00837          */
00838         F_CLR(dbp, DB_AM_SWAP);
00839         F_SET(dbp, F_ISSET(mdbp, DB_AM_SWAP));
00840 
00841         /*
00842          * In the file create case, these happen in separate places so we have
00843          * two different tests.  They end up in the same place for subdbs, but
00844          * for compatibility with file testing, we put them both here anyway.
00845          */
00846         DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, mname);
00847         DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, mname);
00848 
00849         /*
00850          * File exists and we have the appropriate locks; we should now
00851          * process a normal open.
00852          */
00853         if (F_ISSET(mdbp, DB_AM_CREATED)) {
00854                 F_SET(dbp, DB_AM_CREATED_MSTR);
00855                 F_CLR(mdbp, DB_AM_DISCARD);
00856         }
00857 
00858         if (0) {
00859 err:
00860 DB_TEST_RECOVERY_LABEL
00861                 if (txn == NULL)
00862                         (void)__ENV_LPUT(dbenv, dbp->handle_lock);
00863         }
00864 
00865         /*
00866          * The master's handle lock is under the control of the
00867          * subdb (it acquired the master's locker).  We want to
00868          * keep the master's handle lock so that no one can remove
00869          * the file while the subdb is open.  If we register the
00870          * trade event and then invalidate the copy of the lock
00871          * in the master's handle, that will accomplish this.  However,
00872          * before we register this event, we'd better remove any
00873          * events that we've already registered for the master.
00874          */
00875         if (!F_ISSET(dbp, DB_AM_RECOVER) && txn != NULL) {
00876                 /* Unregister old master events. */
00877                  __txn_remlock(dbenv,
00878                     txn, &mdbp->handle_lock, DB_LOCK_INVALIDID);
00879 
00880                 /* Now register the new event. */
00881                 if ((t_ret = __txn_lockevent(dbenv, txn, dbp,
00882                     &mdbp->handle_lock, dbp->lid == DB_LOCK_INVALIDID ?
00883                     mdbp->lid : dbp->lid)) != 0 && ret == 0)
00884                         ret = t_ret;
00885         }
00886         LOCK_INIT(mdbp->handle_lock);
00887 
00888         /*
00889          * If the master was created, we need to sync so that the metadata
00890          * page is correct on disk for recovery, since it isn't read through
00891          * mpool.  If we're opening a subdb in an existing file, we can skip
00892          * the sync.
00893          */
00894         if ((t_ret =__db_close(mdbp, txn,
00895             F_ISSET(dbp, DB_AM_CREATED_MSTR) ? 0 : DB_NOSYNC)) != 0 &&
00896             ret == 0)
00897                 ret = t_ret;
00898         return (ret);
00899 }
00900 
00901 /*
00902  * __fop_remove_setup --
00903  *      Open handle appropriately and lock for removal of a database file.
00904  *
00905  * PUBLIC: int __fop_remove_setup __P((DB *,
00906  * PUBLIC:      DB_TXN *, const char *, u_int32_t));
00907  */
00908 int
00909 __fop_remove_setup(dbp, txn, name, flags)
00910         DB *dbp;
00911         DB_TXN *txn;
00912         const char *name;
00913         u_int32_t flags;
00914 {
00915         DB_ENV *dbenv;
00916         DB_FH *fhp;
00917         DB_LOCK elock;
00918         u_int32_t refcnt;
00919         u_int8_t mbuf[DBMETASIZE];
00920         int ret;
00921 
00922         COMPQUIET(flags, 0);
00923         dbenv = dbp->dbenv;
00924         PANIC_CHECK(dbenv);
00925         LOCK_INIT(elock);
00926         fhp = NULL;
00927         ret = 0;
00928 
00929         /* Create locker if necessary. */
00930 retry:  if (LOCKING_ON(dbenv)) {
00931                 if (txn != NULL)
00932                         dbp->lid = txn->txnid;
00933                 else if (dbp->lid == DB_LOCK_INVALIDID) {
00934                         if ((ret = __lock_id(dbenv, &dbp->lid, NULL)) != 0)
00935                                 goto err;
00936                 }
00937         }
00938 
00939         /*
00940          * We are about to open a file handle and then possibly close it.
00941          * We cannot close handles if we are doing FCNTL locking.  However,
00942          * there is no way to pass the FCNTL flag into this routine via the
00943          * user API.  The only way we can get in here and be doing FCNTL
00944          * locking is if we are trying to clean up an open that was called
00945          * with FCNTL locking.  In that case, the save_fhp should already be
00946          * set.  So, we use that field to tell us if we need to make sure
00947          * that we shouldn't close the handle.
00948          */
00949         fhp = dbp->saved_open_fhp;
00950         DB_ASSERT(LF_ISSET(DB_FCNTL_LOCKING) || fhp == NULL);
00951 
00952         /*
00953          * Lock environment to protect file open.  That will enable us to
00954          * read the meta-data page and get the fileid so that we can lock
00955          * the handle.
00956          */
00957         GET_ENVLOCK(dbenv, dbp->lid, &elock);
00958 
00959         /* Open database. */
00960         if (F_ISSET(dbp, DB_AM_INMEM))
00961                 ret = __db_dbenv_mpool(dbp, name, flags);
00962         else if (fhp == NULL)
00963                 ret = __os_open(dbenv, name, DB_OSO_RDONLY, 0, &fhp);
00964         if (ret != 0)
00965                 goto err;
00966 
00967         /* Get meta-data */
00968         if (F_ISSET(dbp, DB_AM_INMEM))
00969                 ret = __fop_inmem_read_meta(dbp, name, flags);
00970         else if ((ret = __fop_read_meta(dbenv,
00971             name, mbuf, sizeof(mbuf), fhp, 0, NULL)) == 0)
00972                 ret = __db_meta_setup(dbenv,
00973                     dbp, name, (DBMETA *)mbuf, flags, 1);
00974         if (ret != 0)
00975                 goto err;
00976 
00977         /*
00978          * Now, get the handle lock.  We first try with NOWAIT, because if
00979          * we have to wait, we're going to have to close the file and reopen
00980          * it, so that if there is someone else removing it, our open doesn't
00981          * prevent that.
00982          */
00983         if ((ret = __fop_lock_handle(dbenv,
00984             dbp, dbp->lid, DB_LOCK_WRITE, NULL, DB_LOCK_NOWAIT)) != 0) {
00985                 /*
00986                  * Close the file, block on the lock, clean up the dbp, and
00987                  * then start all over again.
00988                  */
00989                 if (!F_ISSET(dbp, DB_AM_INMEM) && !LF_ISSET(DB_FCNTL_LOCKING)) {
00990                         (void)__os_closehandle(dbenv, fhp);
00991                         fhp = NULL;
00992                 }
00993                 if (ret != DB_LOCK_NOTGRANTED ||
00994                     (txn != NULL && F_ISSET(txn, TXN_NOWAIT)))
00995                         goto err;
00996                 else if ((ret = __fop_lock_handle(dbenv,
00997                     dbp, dbp->lid, DB_LOCK_WRITE, &elock, 0)) != 0)
00998                         goto err;
00999 
01000                 if (F_ISSET(dbp, DB_AM_INMEM)) {
01001                         (void)__lock_put(dbenv, &dbp->handle_lock);
01002                         (void)__db_refresh(dbp, txn, DB_NOSYNC, NULL, 1);
01003                 } else {
01004                         if (txn != NULL)
01005                                 dbp->lid = DB_LOCK_INVALIDID;
01006                         (void)__db_refresh(dbp, txn, DB_NOSYNC, NULL, 0);
01007                 }
01008                 goto retry;
01009         } else if ((ret = __ENV_LPUT(dbenv, elock)) != 0)
01010                 goto err;
01011 
01012         /* Check if the file is already open. */
01013         if ((ret = __memp_get_refcnt(dbenv, dbp->fileid, &refcnt)) != 0)
01014                 goto err;
01015 
01016         /*
01017          * Now, error check.  If the file is already open, then we must have
01018          * it open (since we got the lock) and we need to panic, because this
01019          * is a self deadlock and the application has a bug. If the file isn't
01020          * open, but it's in the midst of a rename then this file doesn't
01021          * really exist.  Note that in-memory files will always have an
01022          * artificially incremented ref count.
01023          */
01024         if ((F_ISSET(dbp, DB_AM_INMEM) && refcnt != 2) ||
01025             (!F_ISSET(dbp, DB_AM_INMEM) && refcnt != 0)) {
01026                 __db_err(dbenv,
01027 "Attempting to remove file open in current transaction causing self-deadlock");
01028                 ret = __db_panic(dbenv, DB_LOCK_DEADLOCK);
01029         } else if (F_ISSET(dbp, DB_AM_IN_RENAME))
01030                 ret = ENOENT;
01031 
01032         if (0) {
01033 err:            (void)__ENV_LPUT(dbenv, elock);
01034         }
01035         if (fhp != NULL && !LF_ISSET(DB_FCNTL_LOCKING))
01036                 (void)__os_closehandle(dbenv, fhp);
01037         /*
01038          * If this is a real file and we are going to proceed with the removal,
01039          * then we need to make sure that we don't leave any pages around in the
01040          * mpool since the file is closed and will be reopened again before
01041          * access.  However, this might be an in-memory file, in which case
01042          * we will handle the discard from the mpool later as it's the "real"
01043          * removal of the database.
01044          */
01045         if (ret == 0 && !F_ISSET(dbp, DB_AM_INMEM))
01046                 F_SET(dbp, DB_AM_DISCARD);
01047         return (ret);
01048 }
01049 
01050 /*
01051  * __fop_read_meta --
01052  *      Read the meta-data page from a file and return it in buf.
01053  *
01054  * PUBLIC: int __fop_read_meta __P((DB_ENV *, const char *,
01055  * PUBLIC:     u_int8_t *, size_t, DB_FH *, int, size_t *));
01056  */
01057 int
01058 __fop_read_meta(dbenv, name, buf, size, fhp, errok, nbytesp)
01059         DB_ENV *dbenv;
01060         const char *name;
01061         u_int8_t *buf;
01062         size_t size;
01063         DB_FH *fhp;
01064         int errok;
01065         size_t *nbytesp;
01066 {
01067         size_t nr;
01068         int ret;
01069 
01070         /*
01071          * Our caller wants to know the number of bytes read, even if we
01072          * return an error.
01073          */
01074         if (nbytesp != NULL)
01075                 *nbytesp = 0;
01076 
01077         nr = 0;
01078         ret = __os_read(dbenv, fhp, buf, size, &nr);
01079         if (nbytesp != NULL)
01080                 *nbytesp = nr;
01081 
01082         if (ret != 0) {
01083                 if (!errok)
01084                         __db_err(dbenv, "%s: %s", name, db_strerror(ret));
01085                 goto err;
01086         }
01087 
01088         if (nr != size) {
01089                 if (!errok)
01090                         __db_err(dbenv,
01091                             "%s: unexpected file type or format", name);
01092                 ret = EINVAL;
01093         }
01094 
01095 err:
01096         return (ret);
01097 }
01098 
01099 /*
01100  * __fop_dummy --
01101  *      This implements the creation and name swapping of dummy files that
01102  * we use for remove and rename (remove is simply a rename with a delayed
01103  * remove).
01104  *
01105  * PUBLIC: int __fop_dummy __P((DB *,
01106  * PUBLIC:     DB_TXN *, const char *, const char *, u_int32_t));
01107  */
01108 int
01109 __fop_dummy(dbp, txn, old, new, flags)
01110         DB *dbp;
01111         DB_TXN *txn;
01112         const char *old, *new;
01113         u_int32_t flags;
01114 {
01115         DB *tmpdbp;
01116         DB_ENV *dbenv;
01117         DB_TXN *stxn;
01118         char *back;
01119         int ret, t_ret;
01120         u_int8_t mbuf[DBMETASIZE];
01121         u_int32_t locker;
01122 
01123         dbenv = dbp->dbenv;
01124         back = NULL;
01125         stxn = NULL;
01126         tmpdbp = NULL;
01127 
01128         DB_ASSERT(txn != NULL);
01129         locker = txn->txnid;
01130 
01131         /*
01132          * Begin sub transaction to encapsulate the rename.  Note that we
01133          * expect the inmem_swap calls to complete the sub-transaction,
01134          * aborting on error and committing on success.
01135          */
01136         if (TXN_ON(dbenv) && (ret = __txn_begin(dbenv, txn, &stxn, 0)) != 0)
01137                 goto err;
01138 
01139         /* We need to create a dummy file as a place holder. */
01140         if ((ret = __db_backup_name(dbenv, new, stxn, &back)) != 0)
01141                 goto err;
01142         /* Create a dummy dbp handle. */
01143         if ((ret = db_create(&tmpdbp, dbenv, 0)) != 0)
01144                 goto err;
01145 
01146         memset(mbuf, 0, sizeof(mbuf));
01147         ret = F_ISSET(dbp, DB_AM_INMEM) ?
01148             __fop_inmem_dummy(tmpdbp, stxn, back, mbuf) :
01149             __fop_ondisk_dummy(tmpdbp, stxn, back, mbuf, flags);
01150 
01151         if (ret != 0)
01152                 goto err;
01153 
01154         ret = F_ISSET(dbp, DB_AM_INMEM) ?
01155             __fop_inmem_swap(dbp, tmpdbp, stxn, old, new, back, locker) :
01156             __fop_ondisk_swap(dbp, tmpdbp, stxn, old, new, back, locker, flags);
01157         stxn = NULL;
01158         if (ret != 0)
01159                 goto err;
01160 
01161 err:    if (stxn != NULL)
01162                 (void)__txn_abort(stxn);
01163         if (tmpdbp != NULL &&
01164             (t_ret = __db_close(tmpdbp, NULL, 0)) != 0 && ret == 0)
01165                 ret = t_ret;
01166         if (back != NULL)
01167                 __os_free(dbenv, back);
01168         return (ret);
01169 }
01170 
01171 /*
01172  * __fop_dbrename --
01173  *      Do the appropriate file locking and file system operations
01174  * to effect a dbrename in the absence of transactions (__fop_dummy
01175  * and the subsequent calls in __db_rename do the work for the
01176  * transactional case).
01177  *
01178  * PUBLIC: int __fop_dbrename __P((DB *, const char *, const char *));
01179  */
01180 int
01181 __fop_dbrename(dbp, old, new)
01182         DB *dbp;
01183         const char *old, *new;
01184 {
01185         DB_ENV *dbenv;
01186         DB_LOCK elock;
01187         char *real_new, *real_old;
01188         int ret, t_ret;
01189 
01190         dbenv = dbp->dbenv;
01191         real_new = NULL;
01192         real_old = NULL;
01193         LOCK_INIT(elock);
01194 
01195         if (F_ISSET(dbp, DB_AM_INMEM)) {
01196                 real_new = (char *)new;
01197                 real_old = (char *)old;
01198         } else {
01199                 /* Get full names. */
01200                 if ((ret = __db_appname(dbenv,
01201                     DB_APP_DATA, new, 0, NULL, &real_new)) != 0)
01202                         goto err;
01203 
01204                 if ((ret = __db_appname(dbenv,
01205                     DB_APP_DATA, old, 0, NULL, &real_old)) != 0)
01206                         goto err;
01207 
01208         }
01209 
01210         /*
01211          * It is an error to rename a file over one that already exists,
01212          * as that wouldn't be transaction-safe.  We check explicitly
01213          * for ondisk files, but it's done memp_nameop for in-memory ones.
01214          */
01215         GET_ENVLOCK(dbenv, dbp->lid, &elock);
01216         ret = F_ISSET(dbp, DB_AM_INMEM) ? ENOENT :
01217             __os_exists(real_new, NULL);
01218 
01219         if (ret == 0) {
01220                 ret = EEXIST;
01221                 __db_err(dbenv, "rename: file %s exists", real_new);
01222                 goto err;
01223         }
01224 
01225         ret = __memp_nameop(dbenv,
01226             dbp->fileid, new, real_old, real_new, F_ISSET(dbp, DB_AM_INMEM));
01227 
01228 err:    if ((t_ret = __ENV_LPUT(dbenv, elock)) != 0 && ret == 0)
01229                 ret = t_ret;
01230         if (!F_ISSET(dbp, DB_AM_INMEM) && real_old != NULL)
01231                 __os_free(dbenv, real_old);
01232         if (!F_ISSET(dbp, DB_AM_INMEM) && real_new != NULL)
01233                 __os_free(dbenv, real_new);
01234         return (ret);
01235 }
01236 
01237 static int
01238 __fop_inmem_create(dbp, name, txn, flags)
01239         DB *dbp;
01240         const char *name;
01241         DB_TXN *txn;
01242         u_int32_t flags;
01243 {
01244         DB_ENV *dbenv;
01245         DB_LSN lsn;
01246         DBT fid_dbt, name_dbt;
01247         int ret;
01248         int32_t lfid;
01249         u_int32_t *p32;
01250 
01251         dbenv = dbp->dbenv;
01252 
01253         MAKE_INMEM(dbp);
01254 
01255         /* Set the pagesize if it isn't yet set. */
01256         if (dbp->pgsize == 0)
01257                 dbp->pgsize = DB_DEF_IOSIZE;
01258 
01259         /*
01260          * Construct a file_id.
01261          *
01262          * If this file has no name, then we only need a fileid for locking.
01263          * If this file has a name, we need the fileid both for locking and
01264          * matching in the memory pool.  So, with unnamed in-memory databases,
01265          * use a lock_id.  For named in-memory files, we need to find a value
01266          * that we can use to uniquely identify a name/fid pair.  We use a
01267          * combination of a unique id (__os_unique_id) and a hash of the
01268          * original name.
01269          */
01270         if (name == NULL) {
01271                 if (LOCKING_ON(dbenv) && (ret =
01272                     __lock_id(dbenv, (u_int32_t *)dbp->fileid, NULL)) != 0)
01273                         goto err;
01274         }  else {
01275                 p32 = (u_int32_t *)(&dbp->fileid[0]);
01276                 __os_unique_id(dbenv, p32);
01277                 p32++;
01278                 (void)strncpy(
01279                     (char *)p32, name, DB_FILE_ID_LEN - sizeof(u_int32_t));
01280                 dbp->preserve_fid = 1;
01281 
01282                 if (DBENV_LOGGING(dbenv) && dbp->log_filename != NULL)
01283                         memcpy(dbp->log_filename->ufid,
01284                             dbp->fileid, DB_FILE_ID_LEN);
01285         }
01286 
01287         /* Now, set the fileid. */
01288         if ((ret = __memp_set_fileid(dbp->mpf, dbp->fileid)) != 0)
01289                 goto err;
01290 
01291         if ((ret = __db_dbenv_mpool(dbp, name, flags)) != 0)
01292                 goto err;
01293 
01294         if (name != NULL && DBENV_LOGGING(dbenv)) {
01295                 memset(&name_dbt, 0, sizeof(name_dbt));
01296                 name_dbt.data = (void *)name;
01297                 name_dbt.size = (u_int32_t)strlen(name) + 1;
01298                 memset(&fid_dbt, 0, sizeof(fid_dbt));
01299                 fid_dbt.data = dbp->fileid;
01300                 fid_dbt.size = DB_FILE_ID_LEN;
01301                 lfid = dbp->log_filename == NULL ?
01302                     DB_LOGFILEID_INVALID : dbp->log_filename->id;
01303                 if ((ret = __crdel_inmem_create_log(dbenv, txn,
01304                     &lsn, 0, lfid, &name_dbt, &fid_dbt, dbp->pgsize)) != 0)
01305                         goto err;
01306         }
01307 
01308         F_SET(dbp, DB_AM_CREATED);
01309 
01310 err:
01311         return (ret);
01312 }
01313 
01314 static int
01315 __fop_inmem_read_meta(dbp, name, flags)
01316         DB *dbp;
01317         const char *name;
01318         u_int32_t flags;
01319 {
01320         DBMETA *metap;
01321         db_pgno_t pgno;
01322         int ret, t_ret;
01323 
01324         pgno  = PGNO_BASE_MD;
01325         if ((ret = __memp_fget(dbp->mpf, &pgno, 0, &metap)) != 0)
01326                 return (ret);
01327         ret = __db_meta_setup(dbp->dbenv, dbp, name, metap, flags, 1);
01328 
01329         if ((t_ret = __memp_fput(dbp->mpf, metap, 0)) && ret == 0)
01330                 ret = t_ret;
01331 
01332         return (ret);
01333 }
01334 
01335 static int
01336 __fop_ondisk_dummy(dbp, txn, name, mbuf, flags)
01337         DB *dbp;
01338         DB_TXN *txn;
01339         const char *name;
01340         u_int8_t *mbuf;
01341         u_int32_t flags;
01342 {
01343         DB_ENV *dbenv;
01344         int ret;
01345         char *realname;
01346         u_int32_t dflags;
01347 
01348         realname = NULL;
01349         dbenv = dbp->dbenv;
01350         dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0;
01351 
01352         if ((ret = __db_appname(dbenv,
01353             DB_APP_DATA, name, flags, NULL, &realname)) != 0)
01354                 goto err;
01355 
01356         if ((ret = __fop_create(dbenv,
01357             txn, NULL, name, DB_APP_DATA, 0, dflags)) != 0)
01358                 goto err;
01359 
01360         if ((ret =
01361             __os_fileid(dbenv, realname, 1, ((DBMETA *)mbuf)->uid)) != 0)
01362                 goto err;
01363 
01364         ((DBMETA *)mbuf)->magic = DB_RENAMEMAGIC;
01365         if ((ret = __fop_write(dbenv, txn, name,
01366             DB_APP_DATA, NULL, 0, 0, 0, mbuf, DBMETASIZE, 1, dflags)) != 0)
01367                 goto err;
01368 
01369         memcpy(dbp->fileid, ((DBMETA *)mbuf)->uid, DB_FILE_ID_LEN);
01370 
01371 err:    if (realname != NULL)
01372                 __os_free(dbenv, realname);
01373 
01374         return (ret);
01375 }
01376 
01377 static int
01378 __fop_inmem_dummy(dbp, txn, name, mbuf)
01379         DB *dbp;
01380         DB_TXN *txn;
01381         const char *name;
01382         u_int8_t *mbuf;
01383 {
01384         DBMETA *metap;
01385         db_pgno_t pgno;
01386         int ret, t_ret;
01387 
01388         if ((ret = __fop_inmem_create(dbp, name, txn, DB_CREATE)) != 0)
01389                 return (ret);
01390 
01391         pgno  = PGNO_BASE_MD;
01392         if ((ret =
01393             __memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &metap)) != 0)
01394                 return (ret);
01395         /* Check file existed. */
01396         if (metap->magic != 0)
01397                 ret = EEXIST;
01398         else
01399                 metap->magic = DB_RENAMEMAGIC;
01400 
01401         /* Copy the fileid onto the meta-data page. */
01402         memcpy(metap->uid, dbp->fileid, DB_FILE_ID_LEN);
01403 
01404         if ((t_ret = __memp_fput(dbp->mpf,
01405             metap, ret == 0 ? DB_MPOOL_DIRTY : DB_MPOOL_DISCARD)) != 0 &&
01406             ret == 0)
01407                 ret = t_ret;
01408 
01409         if (ret != 0)
01410                 goto err;
01411 
01412         ((DBMETA *)mbuf)->magic = DB_RENAMEMAGIC;
01413 
01414 err:    return (ret);
01415 }
01416 
01417 static int
01418 __fop_ondisk_swap(dbp, tmpdbp, txn, old, new, back, locker, flags)
01419         DB *dbp, *tmpdbp;
01420         DB_TXN *txn;
01421         const char *old, *new, *back;
01422         u_int32_t locker, flags;
01423 {
01424         DB_ENV *dbenv;
01425         DB_FH *fhp;
01426         DB_LOCK elock;
01427         DB_LSN lsn;
01428         DBT fiddbt, namedbt, tmpdbt;
01429         DB_TXN *parent;
01430         char *realold, *realnew;
01431         int ret, t_ret;
01432         u_int8_t mbuf[DBMETASIZE];
01433         u_int32_t child_txnid, dflags;
01434 
01435         DB_ASSERT(txn != NULL);
01436         DB_ASSERT(old != NULL);
01437 
01438         dbenv = dbp->dbenv;
01439         realold = realnew = NULL;
01440         LOCK_INIT(elock);
01441         fhp = NULL;
01442         dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0;
01443 
01444         if ((ret =
01445             __db_appname(dbenv, DB_APP_DATA, new, 0, NULL, &realnew)) != 0)
01446                 goto err;
01447 
01448         /* Now, lock the name space while we initialize this file. */
01449 retry:  GET_ENVLOCK(dbenv, locker, &elock);
01450         if (__os_exists(realnew, NULL) == 0) {
01451                 /*
01452                  * It is possible that the only reason this file exists is
01453                  * because we've done a previous rename of it and we have
01454                  * left a placeholder here.  We need to check for that case
01455                  * and allow this rename to succeed if that's the case.
01456                  */
01457                 if ((ret = __os_open(dbenv, realnew, 0, 0, &fhp)) != 0)
01458                         goto err;
01459                 if ((ret = __fop_read_meta(dbenv,
01460                     realnew, mbuf, sizeof(mbuf), fhp, 0, NULL)) != 0 ||
01461                     (ret = __db_meta_setup(dbenv,
01462                     tmpdbp, realnew, (DBMETA *)mbuf, 0, 1)) != 0) {
01463                         ret = EEXIST;
01464                         goto err;
01465                 }
01466 
01467                 /*
01468                  * Now, try to acquire the handle lock.  If the handle is locked
01469                  * by our current, transaction, then we'll get it and life is
01470                  * good.
01471                  *
01472                  * Alternately, it's not locked at all, we'll get the lock, but
01473                  * we will realize it exists and consider this an error.
01474                  *
01475                  * However, if it's held by another transaction, then there
01476                  * could be two different scenarios: 1) the file is in the
01477                  * midst of being created or deleted and when that transaction
01478                  * is over, we might be able to proceed. 2) the file is open
01479                  * and exists and we should report an error. In order to
01480                  * distinguish these two cases, we do the following. First, we
01481                  * try to acquire a READLOCK.  If the handle is in the midst of
01482                  * being created, then we'll block because a writelock is held.
01483                  * In that case, we should request a blocking write, and when we
01484                  * get the lock, we should then go back and check to see if the
01485                  * object exists and start all over again.
01486                  *
01487                  * If we got the READLOCK, then either no one is holding the
01488                  * lock or someone has an open handle and the fact that the file
01489                  * exists is problematic.  So, in this case, we request the
01490                  * WRITELOCK non-blocking -- if it succeeds, we're golden.  If
01491                  * it fails, then the file exists and we return EEXIST.
01492                  */
01493                 if ((ret = __fop_lock_handle(dbenv,
01494                     tmpdbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) != 0) {
01495                         /*
01496                          * Someone holds a writelock.  Try for the WRITELOCK
01497                          * and after we get it, retry.
01498                          */
01499                         if ((ret = __fop_lock_handle(dbenv, tmpdbp,
01500                             locker, DB_LOCK_WRITE, &elock, 0)) != 0)
01501                                 goto err;
01502 
01503                         /*
01504                          * We now have the write lock; release it and start
01505                          * over.
01506                          */
01507                         (void)__lock_put(dbenv, &tmpdbp->handle_lock);
01508                         (void)__db_refresh(tmpdbp, NULL, 0, NULL, 0);
01509                         goto retry;
01510                 } else {
01511                         /* We got the read lock; try to upgrade it. */
01512                         ret = __fop_lock_handle(dbenv,
01513                             tmpdbp, locker, DB_LOCK_WRITE,
01514                             NULL, DB_LOCK_UPGRADE | DB_LOCK_NOWAIT);
01515                         if (ret != 0) {
01516                                 /*
01517                                  * We did not get the writelock, so someone
01518                                  * has the handle open.  This is an error.
01519                                  */
01520                                 (void)__lock_put(dbenv, &tmpdbp->handle_lock);
01521                                 ret = EEXIST;
01522                         } else  if (F_ISSET(tmpdbp, DB_AM_IN_RENAME))
01523                                 /* We got the lock and are renaming it. */
01524                                 ret = 0;
01525                         else { /* We got the lock, but the file exists. */
01526                                 (void)__lock_put(dbenv, &tmpdbp->handle_lock);
01527                                 ret = EEXIST;
01528                         }
01529                 }
01530                 if ((t_ret = __os_closehandle(dbenv, fhp)) != 0 && ret == 0)
01531                         ret = t_ret;
01532                 fhp = NULL;
01533                 if (ret != 0)
01534                         goto err;
01535         }
01536 
01537         /*
01538          * While we have the namespace locked, do the renames and then
01539          * swap for the handle lock.
01540          */
01541         if ((ret = __fop_rename(dbenv,
01542             txn, old, new, dbp->fileid, DB_APP_DATA, dflags)) != 0)
01543                 goto err;
01544         if ((ret = __fop_rename(dbenv,
01545             txn, back, old, tmpdbp->fileid, DB_APP_DATA, dflags)) != 0)
01546                 goto err;
01547         if ((ret = __fop_lock_handle(dbenv,
01548             tmpdbp, locker, DB_LOCK_WRITE, &elock, NOWAIT_FLAG(txn))) != 0)
01549                 goto err;
01550 
01551         /*
01552          * We just acquired a transactional lock on the tmp handle.
01553          * We need to null out the tmp handle's lock so that it
01554          * doesn't create problems for us in the close path.
01555          */
01556         LOCK_INIT(tmpdbp->handle_lock);
01557 
01558         /* Commit the child. */
01559         child_txnid = txn->txnid;
01560         parent = txn->parent;
01561         ret = __txn_commit(txn, 0);
01562         txn = NULL;
01563 
01564         /* Now log the child information in the parent. */
01565         memset(&fiddbt, 0, sizeof(fiddbt));
01566         memset(&tmpdbt, 0, sizeof(fiddbt));
01567         memset(&namedbt, 0, sizeof(namedbt));
01568         fiddbt.data = dbp->fileid;
01569         fiddbt.size = DB_FILE_ID_LEN;
01570         tmpdbt.data = tmpdbp->fileid;
01571         tmpdbt.size = DB_FILE_ID_LEN;
01572         namedbt.data = (void *)old;
01573         namedbt.size = (u_int32_t)strlen(old) + 1;
01574         if ((t_ret = __fop_file_remove_log(dbenv,
01575             parent, &lsn, 0, &fiddbt, &tmpdbt, &namedbt,
01576             (u_int32_t)DB_APP_DATA, child_txnid)) != 0 && ret == 0)
01577                 ret = t_ret;
01578 
01579         /* This is a delayed delete of the dummy file. */
01580         if ((ret = __db_appname(dbenv,
01581             DB_APP_DATA, old, flags, NULL, &realold)) != 0)
01582                 goto err;
01583 
01584         if ((ret = __txn_remevent(dbenv, parent, realold, NULL, 0)) != 0)
01585                 goto err;
01586 
01587 err:    if (txn != NULL)        /* Ret must already be set, so void abort. */
01588                 (void)__txn_abort(txn);
01589 
01590         (void)__ENV_LPUT(dbenv, elock);
01591         if (realnew != NULL)
01592                 __os_free(dbenv, realnew);
01593         if (realold != NULL)
01594                 __os_free(dbenv, realold);
01595         return (ret);
01596 }
01597 
01598 static int
01599 __fop_inmem_swap(olddbp, backdbp, txn, old, new, back, locker)
01600         DB *olddbp, *backdbp;
01601         DB_TXN *txn;
01602         const char *old, *new, *back;
01603         u_int32_t locker;
01604 {
01605         DB_ENV *dbenv;
01606         DB_LOCK elock;
01607         DB_LSN lsn;
01608         DB_TXN *parent;
01609         DBT fid_dbt, n1_dbt, n2_dbt;
01610         DB *tmpdbp;
01611         int ret, t_ret;
01612 
01613         dbenv = olddbp->dbenv;
01614         parent = txn->parent;
01615 retry:  LOCK_INIT(elock);
01616         if ((ret = db_create(&tmpdbp, dbenv, 0)) != 0)
01617                 return (ret);
01618         MAKE_INMEM(tmpdbp);
01619 
01620         GET_ENVLOCK(dbenv, locker, &elock);
01621         if ((ret = __db_dbenv_mpool(tmpdbp, new, 0)) == 0) {
01622                 /*
01623                  * It is possible that the only reason this database exists is
01624                  * because we've done a previous rename of it and we have
01625                  * left a placeholder here.  We need to check for that case
01626                  * and allow this rename to succeed if that's the case.
01627                  */
01628 
01629                 if ((ret = __fop_inmem_read_meta(tmpdbp, new, 0)) != 0) {
01630                         ret = EEXIST;
01631                         goto err;
01632                 }
01633 
01634                 /*
01635                  * Now, try to acquire the handle lock.  If it's from our txn,
01636                  * then we'll get the lock.  If it's not, then someone else has
01637                  * it locked.  See the comments in __fop_ondisk_swap for
01638                  * details.
01639                  */
01640                 if ((ret = __fop_lock_handle(dbenv,
01641                     tmpdbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) != 0) {
01642                         /*
01643                          * Someone holds a writelock.  Try for the WRITELOCK
01644                          * and after we get it, retry.
01645                          */
01646                         if ((ret = __fop_lock_handle(dbenv, tmpdbp,
01647                             locker, DB_LOCK_WRITE, &elock, 0)) != 0)
01648                                 goto err;
01649 
01650                         /* We now have the write lock; release it and start over. */
01651                         (void)__lock_put(dbenv, &tmpdbp->handle_lock);
01652                         (void)__db_close(tmpdbp, NULL, DB_NOSYNC);
01653                         (void)__ENV_LPUT(dbenv, elock);
01654                         goto retry;
01655                 } else {
01656                         (void)__lock_put(dbenv, &tmpdbp->handle_lock);
01657                         if (!F_ISSET(tmpdbp, DB_AM_IN_RENAME))
01658                                 ret = EEXIST;
01659                 }
01660                 if (ret != 0)
01661                         goto err;
01662         }
01663 
01664         /* Log the renames. */
01665         if (LOGGING_ON(dbenv)) {
01666                 /* Rename old to new. */
01667                 memset(&fid_dbt, 0, sizeof(fid_dbt));
01668                 fid_dbt.data = olddbp->fileid;
01669                 fid_dbt.size = DB_FILE_ID_LEN;
01670                 memset(&n1_dbt, 0, sizeof(n1_dbt));
01671                 n1_dbt.data = (void *)old;
01672                 n1_dbt.size = (u_int32_t)strlen(old) + 1;
01673                 memset(&n2_dbt, 0, sizeof(n2_dbt));
01674                 n2_dbt.data = (void *)new;
01675                 n2_dbt.size = (u_int32_t)strlen(new) + 1;
01676                 if ((ret = __crdel_inmem_rename_log(dbenv, txn, &lsn, 0,
01677                     &n1_dbt, &n2_dbt, &fid_dbt)) != 0)
01678                         goto err;
01679 
01680                 /* Rename back to old */
01681                 fid_dbt.data = backdbp->fileid;
01682                 n2_dbt.data = (char *)back;
01683                 n2_dbt.size = (u_int32_t)strlen(back) + 1;
01684                 if ((ret = __crdel_inmem_rename_log(dbenv, txn, &lsn, 0,
01685                     &n2_dbt, &n1_dbt, &fid_dbt)) != 0)
01686                         goto err;
01687         }
01688 
01689         /*
01690          * While we have the namespace locked, do the renames and then
01691          * swap for the handle lock.   If we ran into a file in the midst
01692          * of rename, then we need to delete it first, else nameop is
01693          * going to consider it an error.
01694          */
01695         if (F_ISSET(tmpdbp, DB_AM_IN_RENAME)) {
01696                 if ((ret = __memp_nameop(dbenv,
01697                     tmpdbp->fileid, NULL, new, NULL, 1)) != 0)
01698                         goto err;
01699                 __txn_remrem(dbenv, parent, new);
01700         }
01701 
01702         if ((ret = __memp_nameop(dbenv, olddbp->fileid, new, old, new, 1)) != 0)
01703                 goto err;
01704         if ((ret =
01705             __memp_nameop(dbenv, backdbp->fileid, old, back, old, 1)) != 0)
01706                 goto err;
01707 
01708         if ((ret = __fop_lock_handle(dbenv,
01709             tmpdbp, locker, DB_LOCK_WRITE, &elock, 0)) != 0)
01710                 goto err;
01711 
01712         /*
01713          * We just acquired a transactional lock on the tmp handle.
01714          * We need to null out the tmp handle's lock so that it
01715          * doesn't create problems for us in the close path.
01716          */
01717         LOCK_INIT(tmpdbp->handle_lock);
01718 
01719         DB_ASSERT(txn != NULL);
01720 
01721         /* Commit the child. */
01722         ret = __txn_commit(txn, 0);
01723         txn = NULL;
01724 
01725         if ((ret = __db_inmem_remove(backdbp, parent, old)) != 0)
01726                 goto err;
01727 
01728 err:    (void)__ENV_LPUT(dbenv, elock);
01729 
01730         if (txn != NULL)
01731                 (void)__txn_abort(txn);
01732 
01733         if ((t_ret = __db_close(tmpdbp, NULL, 0)) != 0 && ret == 0)
01734                 ret = t_ret;
01735 
01736         return (ret);
01737 }

Generated on Sun Dec 25 12:14:28 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2