Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

mp_fopen.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: mp_fopen.c,v 12.16 2005/10/31 02:22:31 bostic Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 
00015 #include <string.h>
00016 #endif
00017 
00018 #include "db_int.h"
00019 #include "dbinc/db_shash.h"
00020 #include "dbinc/log.h"
00021 #include "dbinc/mp.h"
00022 
00023 /*
00024  * __memp_fopen_pp --
00025  *      DB_MPOOLFILE->open pre/post processing.
00026  *
00027  * PUBLIC: int __memp_fopen_pp
00028  * PUBLIC:     __P((DB_MPOOLFILE *, const char *, u_int32_t, int, size_t));
00029  */
00030 int
00031 __memp_fopen_pp(dbmfp, path, flags, mode, pagesize)
00032         DB_MPOOLFILE *dbmfp;
00033         const char *path;
00034         u_int32_t flags;
00035         int mode;
00036         size_t pagesize;
00037 {
00038         DB_ENV *dbenv;
00039         DB_THREAD_INFO *ip;
00040         int ret;
00041 
00042         dbenv = dbmfp->dbenv;
00043 
00044         PANIC_CHECK(dbenv);
00045 
00046         /* Validate arguments. */
00047         if ((ret = __db_fchk(dbenv, "DB_MPOOLFILE->open", flags,
00048             DB_CREATE | DB_DIRECT | DB_EXTENT |
00049             DB_NOMMAP | DB_ODDFILESIZE | DB_RDONLY | DB_TRUNCATE)) != 0)
00050                 return (ret);
00051 
00052         /*
00053          * Require a non-zero, power-of-two pagesize, smaller than the
00054          * clear length.
00055          */
00056         if (pagesize == 0 || !POWER_OF_TWO(pagesize)) {
00057                 __db_err(dbenv,
00058                     "DB_MPOOLFILE->open: page sizes must be a power-of-2");
00059                 return (EINVAL);
00060         }
00061         if (dbmfp->clear_len > pagesize) {
00062                 __db_err(dbenv,
00063                     "DB_MPOOLFILE->open: clear length larger than page size");
00064                 return (EINVAL);
00065         }
00066 
00067         /* Read-only checks, and local flag. */
00068         if (LF_ISSET(DB_RDONLY) && path == NULL) {
00069                 __db_err(dbenv,
00070                     "DB_MPOOLFILE->open: temporary files can't be readonly");
00071                 return (EINVAL);
00072         }
00073 
00074         ENV_ENTER(dbenv, ip);
00075         REPLICATION_WRAP(dbenv,
00076             (__memp_fopen(dbmfp, NULL, path, flags, mode, pagesize)), ret);
00077         ENV_LEAVE(dbenv, ip);
00078         return (ret);
00079 }
00080 
00081 /*
00082  * __memp_fopen --
00083  *      DB_MPOOLFILE->open.
00084  *
00085  * PUBLIC: int __memp_fopen __P((DB_MPOOLFILE *,
00086  * PUBLIC:     MPOOLFILE *, const char *, u_int32_t, int, size_t));
00087  */
00088 int
00089 __memp_fopen(dbmfp, mfp, path, flags, mode, pgsize)
00090         DB_MPOOLFILE *dbmfp;
00091         MPOOLFILE *mfp;
00092         const char *path;
00093         u_int32_t flags;
00094         int mode;
00095         size_t pgsize;
00096 {
00097         DB_ENV *dbenv;
00098         DB_MPOOL *dbmp;
00099         DB_MPOOLFILE *tmp_dbmfp;
00100         MPOOL *mp;
00101         db_pgno_t last_pgno;
00102         size_t maxmap;
00103         u_int32_t mbytes, bytes, oflags, pagesize;
00104         int created_fileid, refinc, ret;
00105         char *rpath;
00106         void *p;
00107 
00108         dbenv = dbmfp->dbenv;
00109         dbmp = dbenv->mp_handle;
00110         mp = dbmp->reginfo[0].primary;
00111         created_fileid = refinc = ret = 0;
00112         rpath = NULL;
00113 
00114         /*
00115          * We're keeping the page size as a size_t in the public API, but
00116          * it's a u_int32_t everywhere internally.
00117          */
00118         pagesize = (u_int32_t)pgsize;
00119 
00120         /*
00121          * We're called internally with a specified mfp, in which case the
00122          * path is NULL, but we'll get the path from the underlying region
00123          * information.  Otherwise, if the path is NULL, it's a temporary
00124          * file -- we know we can't join any existing files, and we'll delay
00125          * the open until we actually need to write the file.
00126          */
00127         DB_ASSERT(mfp == NULL || path == NULL);
00128 
00129         /* If this handle is already open, return. */
00130         if (F_ISSET(dbmfp, MP_OPEN_CALLED))
00131                 return (0);
00132 
00133         if (path == NULL && mfp == NULL)
00134                 goto alloc;
00135 
00136         /*
00137          * If there's no backing file, we can join existing files in the cache,
00138          * but there's nothing to read from disk.
00139          */
00140         if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE))
00141                 goto check_mpoolfile;
00142 
00143         /*
00144          * Our caller may be able to tell us which underlying MPOOLFILE we
00145          * need a handle for.
00146          */
00147         if (mfp != NULL) {
00148                 /*
00149                  * Deadfile can only be set if mpf_cnt goes to zero (or if we
00150                  * failed creating the file DB_AM_DISCARD).  Increment the ref
00151                  * count so the file cannot become dead and be unlinked.
00152                  */
00153                 MUTEX_LOCK(dbenv, mfp->mutex);
00154                 if (!mfp->deadfile) {
00155                         ++mfp->mpf_cnt;
00156                         refinc = 1;
00157                 }
00158                 MUTEX_UNLOCK(dbenv, mfp->mutex);
00159 
00160                 /*
00161                  * Test one last time to see if the file is dead -- it may have
00162                  * been removed.  This happens when a checkpoint trying to open
00163                  * the file to flush a buffer races with the Db::remove method.
00164                  * The error will be ignored, so don't output an error message.
00165                  */
00166                 if (mfp->deadfile)
00167                         return (EINVAL);
00168         }
00169 
00170         /* Convert MP open flags to DB OS-layer open flags. */
00171         oflags = 0;
00172         if (LF_ISSET(DB_CREATE))
00173                 oflags |= DB_OSO_CREATE;
00174         if (LF_ISSET(DB_DIRECT))
00175                 oflags |= DB_OSO_DIRECT;
00176         if (LF_ISSET(DB_RDONLY)) {
00177                 F_SET(dbmfp, MP_READONLY);
00178                 oflags |= DB_OSO_RDONLY;
00179         }
00180 
00181         /*
00182          * XXX
00183          * A grievous layering violation, the DB_DSYNC_DB flag was left in
00184          * the DB_ENV structure and not driven through the cache API.  This
00185          * needs to be fixed when the general API configuration is fixed.
00186          */
00187         if (F_ISSET(dbenv, DB_ENV_DSYNC_DB))
00188                 oflags |= DB_OSO_DSYNC;
00189 
00190         /*
00191          * Get the real name for this file and open it.
00192          *
00193          * Supply a page size so os_open can decide whether to turn buffering
00194          * off if the DB_DIRECT_DB flag is set.
00195          *
00196          * Acquire the region lock if we're using a path from an underlying
00197          * MPOOLFILE -- there's a race in accessing the path name stored in
00198          * the region, __memp_nameop may be simultaneously renaming the file.
00199          */
00200         if (mfp != NULL) {
00201                 MPOOL_SYSTEM_LOCK(dbenv);
00202                 path = R_ADDR(dbmp->reginfo, mfp->path_off);
00203         }
00204         if ((ret =
00205             __db_appname(dbenv, DB_APP_DATA, path, 0, NULL, &rpath)) == 0)
00206                 ret = __os_open_extend(dbenv,
00207                     rpath, (u_int32_t)pagesize, oflags, mode, &dbmfp->fhp);
00208         if (mfp != NULL)
00209                 MPOOL_SYSTEM_UNLOCK(dbenv);
00210 
00211         if (ret != 0) {
00212                 /* If it's a Queue extent file, it may not exist, that's OK. */
00213                 if (!LF_ISSET(DB_EXTENT))
00214                         __db_err(dbenv, "%s: %s", rpath, db_strerror(ret));
00215                 goto err;
00216         }
00217 
00218         /*
00219          * Cache file handles are shared, and have mutexes to protect the
00220          * underlying file handle across seek and read/write calls.
00221          */
00222         dbmfp->fhp->ref = 1;
00223         if ((ret = __mutex_alloc(
00224             dbenv, MTX_MPOOL_FH, DB_MUTEX_THREAD, &dbmfp->fhp->mtx_fh)) != 0)
00225                 goto err;
00226 
00227         /*
00228          * Figure out the file's size.
00229          *
00230          * !!!
00231          * We can't use off_t's here, or in any code in the mainline library
00232          * for that matter.  (We have to use them in the os stubs, of course,
00233          * as there are system calls that take them as arguments.)  The reason
00234          * is some customers build in environments where an off_t is 32-bits,
00235          * but still run where offsets are 64-bits, and they pay us a lot of
00236          * money.
00237          */
00238         if ((ret = __os_ioinfo(
00239             dbenv, rpath, dbmfp->fhp, &mbytes, &bytes, NULL)) != 0) {
00240                 __db_err(dbenv, "%s: %s", rpath, db_strerror(ret));
00241                 goto err;
00242         }
00243 
00244         /*
00245          * Get the file id if we weren't given one.  Generated file id's
00246          * don't use timestamps, otherwise there'd be no chance of any
00247          * other process joining the party.
00248          */
00249         if (!F_ISSET(dbmfp, MP_FILEID_SET)) {
00250                 if  ((ret = __os_fileid(dbenv, rpath, 0, dbmfp->fileid)) != 0)
00251                         goto err;
00252                 created_fileid = 1;
00253         }
00254 
00255         if (mfp != NULL)
00256                 goto have_mfp;
00257 
00258 check_mpoolfile:
00259         /*
00260          * Walk the list of MPOOLFILE's, looking for a matching file.
00261          *
00262          * The fileID is a filesystem unique number (e.g., a UNIX dev/inode
00263          * pair) plus a timestamp.  If files are removed and created in less
00264          * than a second, the fileID can be repeated.  The problem with
00265          * repetition happens when the file that previously had the fileID
00266          * value still has pages in the pool, since we don't want to use them
00267          * to satisfy requests for the new file.
00268          *
00269          * Because the DB_TRUNCATE flag reuses the dev/inode pair, repeated
00270          * opens with that flag set guarantees matching fileIDs when the
00271          * machine can open a file and then re-open with truncate within a
00272          * second.  For this reason, we pass that flag down, and, if we find
00273          * a matching entry, we ensure that it's never found again, and we
00274          * create a new entry for the current request.
00275          */
00276         MPOOL_SYSTEM_LOCK(dbenv);
00277         for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
00278             mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
00279                 /* Skip dead files and temporary files. */
00280                 if (mfp->deadfile || F_ISSET(mfp, MP_TEMP))
00281                         continue;
00282 
00283                 /*
00284                  * Any remaining DB_MPOOL_NOFILE databases are in-memory
00285                  * named databases and need only match other in-memory
00286                  * databases with the same name.
00287                  */
00288                 if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE)) {
00289                         if (!mfp->no_backing_file)
00290                                 continue;
00291 
00292                         if (strcmp(path, R_ADDR(dbmp->reginfo, mfp->path_off)))
00293                                 continue;
00294 
00295                         /*
00296                          * We matched an in-memory file; grab the fileid if
00297                          * it is set in the region, but not in the dbmfp.
00298                          */
00299                         if (!F_ISSET(dbmfp, MP_FILEID_SET))
00300                                 __memp_set_fileid(dbmfp,
00301                                     R_ADDR(dbmp->reginfo, mfp->fileid_off));
00302                 } else
00303                         if (memcmp(dbmfp->fileid, R_ADDR(dbmp->reginfo,
00304                             mfp->fileid_off), DB_FILE_ID_LEN) != 0)
00305                                 continue;
00306 
00307                 /*
00308                  * If the file is being truncated, remove it from the system
00309                  * and create a new entry.
00310                  *
00311                  * !!!
00312                  * We should be able to set mfp to NULL and break out of the
00313                  * loop, but I like the idea of checking all the entries.
00314                  */
00315                 if (LF_ISSET(DB_TRUNCATE)) {
00316                         MUTEX_LOCK(dbenv, mfp->mutex);
00317                         mfp->deadfile = 1;
00318                         MUTEX_UNLOCK(dbenv, mfp->mutex);
00319                         continue;
00320                 }
00321 
00322                 /*
00323                  * Some things about a file cannot be changed: the clear length,
00324                  * page size, or LSN location.  However, if this is an attempt
00325                  * to open a named in-memory file, we may not yet have that
00326                  * information. so accept uninitialized entries.
00327                  *
00328                  * The file type can change if the application's pre- and post-
00329                  * processing needs change.  For example, an application that
00330                  * created a hash subdatabase in a database that was previously
00331                  * all btree.
00332                  *
00333                  * !!!
00334                  * We do not check to see if the pgcookie information changed,
00335                  * or update it if it is.
00336                  */
00337                 if ((dbmfp->clear_len != DB_CLEARLEN_NOTSET &&
00338                     mfp->clear_len != DB_CLEARLEN_NOTSET &&
00339                     dbmfp->clear_len != mfp->clear_len) ||
00340                     (pagesize != 0 && pagesize != mfp->stat.st_pagesize) ||
00341                     (dbmfp->lsn_offset != -1 &&
00342                     mfp->lsn_off != DB_LSN_OFF_NOTSET &&
00343                     dbmfp->lsn_offset != mfp->lsn_off)) {
00344                         __db_err(dbenv,
00345                     "%s: clear length, page size or LSN location changed",
00346                             path);
00347                         MPOOL_SYSTEM_UNLOCK(dbenv);
00348                         ret = EINVAL;
00349                         goto err;
00350                 }
00351 
00352                 /*
00353                  * Check to see if this file has died while we waited.
00354                  *
00355                  * We normally don't lock the deadfile field when we read it as
00356                  * we only care if the field is zero or non-zero.  We do lock
00357                  * on read when searching for a matching MPOOLFILE so that two
00358                  * threads of control don't race between setting the deadfile
00359                  * bit and incrementing the reference count, that is, a thread
00360                  * of control decrementing the reference count and then setting
00361                  * deadfile because the reference count is 0 blocks us finding
00362                  * the file without knowing it's about to be marked dead.
00363                  */
00364                 MUTEX_LOCK(dbenv, mfp->mutex);
00365                 if (mfp->deadfile) {
00366                         MUTEX_UNLOCK(dbenv, mfp->mutex);
00367                         continue;
00368                 }
00369                 ++mfp->mpf_cnt;
00370                 refinc = 1;
00371                 MUTEX_UNLOCK(dbenv, mfp->mutex);
00372 
00373                 /* Initialize any fields that are not yet set. */
00374                 if (dbmfp->ftype != 0)
00375                         mfp->ftype = dbmfp->ftype;
00376                 if (dbmfp->clear_len != DB_CLEARLEN_NOTSET)
00377                         mfp->clear_len = dbmfp->clear_len;
00378                 if (dbmfp->lsn_offset != -1)
00379                         mfp->lsn_off = dbmfp->lsn_offset;
00380 
00381                 break;
00382         }
00383         MPOOL_SYSTEM_UNLOCK(dbenv);
00384 
00385         if (mfp != NULL)
00386                 goto have_mfp;
00387 
00388 alloc:  /*
00389          * If we get here and we created a FILEID, then it's OK to set
00390          * the dbmfp as having its FILEID_SET, because we aren't trying
00391          * to match an existing file in the mpool.
00392          */
00393         if (created_fileid)
00394                 F_SET(dbmfp, MP_FILEID_SET);
00395         /*
00396          * If we didn't find the file and this is an in-memory file, then
00397          *  the create flag should be set.
00398          */
00399         if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE) &&
00400             !LF_ISSET(DB_CREATE)) {
00401                 ret = ENOENT;
00402                 goto err;
00403         }
00404 
00405         /* Allocate and initialize a new MPOOLFILE. */
00406         if ((ret = __memp_alloc(
00407             dbmp, dbmp->reginfo, NULL, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
00408                 goto err;
00409         memset(mfp, 0, sizeof(MPOOLFILE));
00410         mfp->mpf_cnt = 1;
00411         mfp->ftype = dbmfp->ftype;
00412         mfp->stat.st_pagesize = pagesize;
00413         mfp->lsn_off = dbmfp->lsn_offset;
00414         mfp->clear_len = dbmfp->clear_len;
00415         mfp->priority = dbmfp->priority;
00416         if (dbmfp->gbytes != 0 || dbmfp->bytes != 0) {
00417                 mfp->maxpgno = (db_pgno_t)
00418                     (dbmfp->gbytes * (GIGABYTE / mfp->stat.st_pagesize));
00419                 mfp->maxpgno += (db_pgno_t)
00420                     ((dbmfp->bytes + mfp->stat.st_pagesize - 1) /
00421                     mfp->stat.st_pagesize);
00422         }
00423         if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE))
00424                 mfp->no_backing_file = 1;
00425         if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_UNLINK))
00426                 mfp->unlink_on_close = 1;
00427 
00428         if (LF_ISSET(DB_DURABLE_UNKNOWN | DB_RDONLY))
00429                 F_SET(mfp, MP_DURABLE_UNKNOWN);
00430         if (LF_ISSET(DB_DIRECT))
00431                 F_SET(mfp, MP_DIRECT);
00432         if (LF_ISSET(DB_EXTENT))
00433                 F_SET(mfp, MP_EXTENT);
00434         if (LF_ISSET(DB_TXN_NOT_DURABLE))
00435                 F_SET(mfp, MP_NOT_DURABLE);
00436         F_SET(mfp, MP_CAN_MMAP);
00437 
00438         /*
00439          * An in-memory database with no name is a temp file.  Named
00440          * in-memory databases get an artificially  bumped reference
00441          * count so they don't disappear on close; they need a remove
00442          * to make them disappear.
00443          */
00444         if (path == NULL)
00445                 F_SET(mfp, MP_TEMP);
00446         else if (FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE))
00447                 mfp->mpf_cnt++;
00448 
00449         if (path != NULL && !FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE)) {
00450                 /*
00451                  * Don't permit files that aren't a multiple of the pagesize,
00452                  * and find the number of the last page in the file, all the
00453                  * time being careful not to overflow 32 bits.
00454                  *
00455                  * During verify or recovery, we might have to cope with a
00456                  * truncated file; if the file size is not a multiple of the
00457                  * page size, round down to a page, we'll take care of the
00458                  * partial page outside the mpool system.
00459                  */
00460                 if (bytes % pagesize != 0) {
00461                         if (LF_ISSET(DB_ODDFILESIZE))
00462                                 bytes -= (u_int32_t)(bytes % pagesize);
00463                         else {
00464                                 __db_err(dbenv,
00465                     "%s: file size not a multiple of the pagesize", rpath);
00466                                 ret = EINVAL;
00467                                 goto err;
00468                         }
00469                 }
00470 
00471                 /*
00472                  * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a
00473                  * page get, we have to increment the last page in the file.
00474                  * Figure it out and save it away.
00475                  *
00476                  * Note correction: page numbers are zero-based, not 1-based.
00477                  */
00478                 last_pgno = (db_pgno_t)(mbytes * (MEGABYTE / pagesize));
00479                 last_pgno += (db_pgno_t)(bytes / pagesize);
00480                 if (last_pgno != 0)
00481                         --last_pgno;
00482                 mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
00483 
00484                 /*
00485                  * Get the file ID if we weren't given one.  Generated file ID's
00486                  * don't use timestamps, otherwise there'd be no chance of any
00487                  * other process joining the party.
00488                  */
00489                 if (!F_ISSET(dbmfp, MP_FILEID_SET) &&
00490                     (ret = __os_fileid(dbenv, rpath, 0, dbmfp->fileid)) != 0)
00491                         goto err;
00492 
00493         }
00494 
00495         /* Copy the file identification string into shared memory. */
00496         if (F_ISSET(dbmfp, MP_FILEID_SET)) {
00497                 if ((ret = __memp_alloc(dbmp, dbmp->reginfo,
00498                     NULL, DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
00499                         goto err;
00500                 memcpy(p, dbmfp->fileid, DB_FILE_ID_LEN);
00501         }
00502 
00503         /* Copy the file path into shared memory. */
00504         if (path != NULL) {
00505                 if ((ret = __memp_alloc(dbmp, dbmp->reginfo,
00506                     NULL, strlen(path) + 1, &mfp->path_off, &p)) != 0)
00507                         goto err;
00508                 memcpy(p, path, strlen(path) + 1);
00509         }
00510 
00511         /* Copy the page cookie into shared memory. */
00512         if (dbmfp->pgcookie == NULL || dbmfp->pgcookie->size == 0) {
00513                 mfp->pgcookie_len = 0;
00514                 mfp->pgcookie_off = 0;
00515         } else {
00516                 if ((ret = __memp_alloc(dbmp, dbmp->reginfo,
00517                     NULL, dbmfp->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
00518                         goto err;
00519                 memcpy(p, dbmfp->pgcookie->data, dbmfp->pgcookie->size);
00520                 mfp->pgcookie_len = dbmfp->pgcookie->size;
00521         }
00522 
00523         if ((ret =
00524             __mutex_alloc(dbenv, MTX_MPOOLFILE_HANDLE, 0, &mfp->mutex)) != 0)
00525                 goto err;
00526 
00527         /*
00528          * Prepend the MPOOLFILE to the list of MPOOLFILE's.
00529          */
00530         MPOOL_SYSTEM_LOCK(dbenv);
00531         SH_TAILQ_INSERT_HEAD(&mp->mpfq, mfp, q, __mpoolfile);
00532         MPOOL_SYSTEM_UNLOCK(dbenv);
00533 
00534 have_mfp:
00535         /*
00536          * We need to verify that all handles open a file either durable or not
00537          * durable.  This needs to be cross process and cross sub-databases, so
00538          * mpool is the place to do it.
00539          */
00540         if (!LF_ISSET(DB_DURABLE_UNKNOWN | DB_RDONLY)) {
00541                 if (F_ISSET(mfp, MP_DURABLE_UNKNOWN)) {
00542                         if (LF_ISSET(MP_NOT_DURABLE))
00543                                 F_SET(mfp, MP_NOT_DURABLE);
00544                         F_CLR(mfp, MP_DURABLE_UNKNOWN);
00545                 } else if (!LF_ISSET(DB_TXN_NOT_DURABLE) !=
00546                     !F_ISSET(mfp, MP_NOT_DURABLE)) {
00547                         __db_err(dbenv,
00548              "Cannot open DURABLE and NOT DURABLE handles in the same file");
00549                         ret = EINVAL;
00550                         goto err;
00551                 }
00552         }
00553         /*
00554          * All paths to here have initialized the mfp variable to reference
00555          * the selected (or allocated) MPOOLFILE.
00556          */
00557         dbmfp->mfp = mfp;
00558 
00559         /*
00560          * Check to see if we can mmap the file.  If a file:
00561          *      + isn't temporary
00562          *      + is read-only
00563          *      + doesn't require any pgin/pgout support
00564          *      + the DB_NOMMAP flag wasn't set (in either the file open or
00565          *        the environment in which it was opened)
00566          *      + and is less than mp_mmapsize bytes in size
00567          *
00568          * we can mmap it instead of reading/writing buffers.  Don't do error
00569          * checking based on the mmap call failure.  We want to do normal I/O
00570          * on the file if the reason we failed was because the file was on an
00571          * NFS mounted partition, and we can fail in buffer I/O just as easily
00572          * as here.
00573          *
00574          * We'd like to test to see if the file is too big to mmap.  Since we
00575          * don't know what size or type off_t's or size_t's are, or the largest
00576          * unsigned integral type is, or what random insanity the local C
00577          * compiler will perpetrate, doing the comparison in a portable way is
00578          * flatly impossible.  Hope that mmap fails if the file is too large.
00579          */
00580 #define DB_MAXMMAPSIZE  (10 * 1024 * 1024)      /* 10 MB. */
00581         if (F_ISSET(mfp, MP_CAN_MMAP)) {
00582                 maxmap = dbenv->mp_mmapsize == 0 ?
00583                     DB_MAXMMAPSIZE : dbenv->mp_mmapsize;
00584                 if (path == NULL ||
00585                     FLD_ISSET(dbmfp->config_flags, DB_MPOOL_NOFILE))
00586                         F_CLR(mfp, MP_CAN_MMAP);
00587                 else if (!F_ISSET(dbmfp, MP_READONLY))
00588                         F_CLR(mfp, MP_CAN_MMAP);
00589                 else if (dbmfp->ftype != 0)
00590                         F_CLR(mfp, MP_CAN_MMAP);
00591                 else if (LF_ISSET(DB_NOMMAP) || F_ISSET(dbenv, DB_ENV_NOMMAP))
00592                         F_CLR(mfp, MP_CAN_MMAP);
00593                 else {
00594                         MPOOL_SYSTEM_LOCK(dbenv);
00595                         maxmap = mp->mp_mmapsize == 0 ?
00596                             DB_MAXMMAPSIZE : mp->mp_mmapsize;
00597                         MPOOL_SYSTEM_UNLOCK(dbenv);
00598                         if (mbytes > maxmap / MEGABYTE ||
00599                             (mbytes == maxmap / MEGABYTE &&
00600                             bytes >= maxmap % MEGABYTE))
00601                                 F_CLR(mfp, MP_CAN_MMAP);
00602                 }
00603 
00604                 dbmfp->addr = NULL;
00605                 if (F_ISSET(mfp, MP_CAN_MMAP)) {
00606                         dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;
00607                         if (__os_mapfile(dbenv, rpath,
00608                             dbmfp->fhp, dbmfp->len, 1, &dbmfp->addr) != 0) {
00609                                 dbmfp->addr = NULL;
00610                                 F_CLR(mfp, MP_CAN_MMAP);
00611                         }
00612                 }
00613         }
00614 
00615         F_SET(dbmfp, MP_OPEN_CALLED);
00616 
00617         /*
00618          * Share the underlying file descriptor if that's possible.
00619          *
00620          * Add the file to the process' list of DB_MPOOLFILEs.
00621          */
00622         MUTEX_LOCK(dbenv, dbmp->mutex);
00623 
00624         if (dbmfp->fhp != NULL)
00625                 for (tmp_dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
00626                     tmp_dbmfp != NULL; tmp_dbmfp = TAILQ_NEXT(tmp_dbmfp, q))
00627                         if (dbmfp->mfp == tmp_dbmfp->mfp &&
00628                             (F_ISSET(dbmfp, MP_READONLY) ||
00629                             !F_ISSET(tmp_dbmfp, MP_READONLY))) {
00630                                 __mutex_free(dbenv, &dbmfp->fhp->mtx_fh);
00631                                 (void)__os_closehandle(dbenv, dbmfp->fhp);
00632                                 ++tmp_dbmfp->fhp->ref;
00633                                 dbmfp->fhp = tmp_dbmfp->fhp;
00634                                 break;
00635                         }
00636 
00637         TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
00638 
00639         MUTEX_UNLOCK(dbenv, dbmp->mutex);
00640 
00641         if (0) {
00642 err:            if (refinc) {
00643                         /*
00644                          * If mpf_cnt goes to zero here and unlink_on_close is
00645                          * set, then we missed the last close, but there was an
00646                          * error trying to open the file, so we probably cannot
00647                          * unlink it anyway.
00648                          */
00649                         MUTEX_LOCK(dbenv, mfp->mutex);
00650                         --mfp->mpf_cnt;
00651                         MUTEX_UNLOCK(dbenv, mfp->mutex);
00652                 }
00653 
00654         }
00655         if (rpath != NULL)
00656                 __os_free(dbenv, rpath);
00657         return (ret);
00658 }
00659 
00660 /*
00661  * memp_fclose_pp --
00662  *      DB_MPOOLFILE->close pre/post processing.
00663  *
00664  * PUBLIC: int __memp_fclose_pp __P((DB_MPOOLFILE *, u_int32_t));
00665  */
00666 int
00667 __memp_fclose_pp(dbmfp, flags)
00668         DB_MPOOLFILE *dbmfp;
00669         u_int32_t flags;
00670 {
00671         DB_ENV *dbenv;
00672         DB_THREAD_INFO *ip;
00673         int ret;
00674 
00675         dbenv = dbmfp->dbenv;
00676 
00677         /*
00678          * Validate arguments, but as a handle destructor, we can't fail.
00679          *
00680          * !!!
00681          * DB_MPOOL_DISCARD: Undocumented flag: DB private.
00682          */
00683         (void)__db_fchk(dbenv, "DB_MPOOLFILE->close", flags, DB_MPOOL_DISCARD);
00684 
00685         ENV_ENTER(dbenv, ip);
00686         REPLICATION_WRAP(dbenv, (__memp_fclose(dbmfp, flags)), ret);
00687         ENV_LEAVE(dbenv, ip);
00688         return (ret);
00689 }
00690 
00691 /*
00692  * __memp_fclose --
00693  *      DB_MPOOLFILE->close.
00694  *
00695  * PUBLIC: int __memp_fclose __P((DB_MPOOLFILE *, u_int32_t));
00696  */
00697 int
00698 __memp_fclose(dbmfp, flags)
00699         DB_MPOOLFILE *dbmfp;
00700         u_int32_t flags;
00701 {
00702         DB_ENV *dbenv;
00703         DB_MPOOL *dbmp;
00704         MPOOLFILE *mfp;
00705         char *rpath;
00706         u_int32_t ref;
00707         int deleted, ret, t_ret;
00708 
00709         dbenv = dbmfp->dbenv;
00710         dbmp = dbenv->mp_handle;
00711         ret = 0;
00712 
00713         /*
00714          * Remove the DB_MPOOLFILE from the process' list.
00715          *
00716          * It's possible the underlying mpool cache may never have been created.
00717          * In that case, all we have is a structure, discard it.
00718          *
00719          * It's possible the DB_MPOOLFILE was never added to the DB_MPOOLFILE
00720          * file list, check the MP_OPEN_CALLED flag to be sure.
00721          */
00722         if (dbmp == NULL)
00723                 goto done;
00724 
00725         MUTEX_LOCK(dbenv, dbmp->mutex);
00726 
00727         DB_ASSERT(dbmfp->ref >= 1);
00728         if ((ref = --dbmfp->ref) == 0 && F_ISSET(dbmfp, MP_OPEN_CALLED))
00729                 TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);
00730 
00731         /*
00732          * Decrement the file descriptor's ref count -- if we're the last ref,
00733          * we'll discard the file descriptor.
00734          */
00735         if (ref == 0 && dbmfp->fhp != NULL && --dbmfp->fhp->ref > 0)
00736                 dbmfp->fhp = NULL;
00737         MUTEX_UNLOCK(dbenv, dbmp->mutex);
00738         if (ref != 0)
00739                 return (0);
00740 
00741         /* Complain if pinned blocks never returned. */
00742         if (dbmfp->pinref != 0) {
00743                 __db_err(dbenv, "%s: close: %lu blocks left pinned",
00744                     __memp_fn(dbmfp), (u_long)dbmfp->pinref);
00745                 ret = __db_panic(dbenv, DB_RUNRECOVERY);
00746         }
00747 
00748         /* Discard any mmap information. */
00749         if (dbmfp->addr != NULL &&
00750             (ret = __os_unmapfile(dbenv, dbmfp->addr, dbmfp->len)) != 0)
00751                 __db_err(dbenv, "%s: %s", __memp_fn(dbmfp), db_strerror(ret));
00752 
00753         /*
00754          * Close the file and discard the descriptor structure; temporary
00755          * files may not yet have been created.
00756          */
00757         if (dbmfp->fhp != NULL) {
00758                 if ((t_ret =
00759                     __mutex_free(dbenv, &dbmfp->fhp->mtx_fh)) != 0 && ret == 0)
00760                         ret = t_ret;
00761                 if ((t_ret = __os_closehandle(dbenv, dbmfp->fhp)) != 0) {
00762                         __db_err(dbenv, "%s: %s",
00763                             __memp_fn(dbmfp), db_strerror(t_ret));
00764                         if (ret == 0)
00765                                 ret = t_ret;
00766                 }
00767                 dbmfp->fhp = NULL;
00768         }
00769 
00770         /*
00771          * Discard our reference on the underlying MPOOLFILE, and close it
00772          * if it's no longer useful to anyone.  It possible the open of the
00773          * file never happened or wasn't successful, in which case, mpf will
00774          * be NULL and MP_OPEN_CALLED will not be set.
00775          */
00776         mfp = dbmfp->mfp;
00777         DB_ASSERT((F_ISSET(dbmfp, MP_OPEN_CALLED) && mfp != NULL) ||
00778             (!F_ISSET(dbmfp, MP_OPEN_CALLED) && mfp == NULL));
00779         if (!F_ISSET(dbmfp, MP_OPEN_CALLED))
00780                 goto done;
00781 
00782         /*
00783          * If it's a temp file, all outstanding references belong to unflushed
00784          * buffers.  (A temp file can only be referenced by one DB_MPOOLFILE).
00785          * We don't care about preserving any of those buffers, so mark the
00786          * MPOOLFILE as dead so that even the dirty ones just get discarded
00787          * when we try to flush them.
00788          */
00789         deleted = 0;
00790         MUTEX_LOCK(dbenv, mfp->mutex);
00791         if (--mfp->mpf_cnt == 0 || LF_ISSET(DB_MPOOL_DISCARD)) {
00792                 if (LF_ISSET(DB_MPOOL_DISCARD) ||
00793                     F_ISSET(mfp, MP_TEMP) || mfp->unlink_on_close) {
00794                         mfp->deadfile = 1;
00795                 }
00796                 if (mfp->unlink_on_close) {
00797                         if ((t_ret = __db_appname(dbmp->dbenv,
00798                             DB_APP_DATA, R_ADDR(dbmp->reginfo,
00799                             mfp->path_off), 0, NULL, &rpath)) != 0 && ret == 0)
00800                                 ret = t_ret;
00801                         if (t_ret == 0) {
00802                                 if ((t_ret = __os_unlink(
00803                                     dbmp->dbenv, rpath)) != 0 && ret == 0)
00804                                         ret = t_ret;
00805                                 __os_free(dbenv, rpath);
00806                         }
00807                 }
00808                 if (mfp->block_cnt == 0) {
00809                         if ((t_ret =
00810                             __memp_mf_discard(dbmp, mfp)) != 0 && ret == 0)
00811                                 ret = t_ret;
00812                         deleted = 1;
00813                 }
00814         }
00815         if (deleted == 0)
00816                 MUTEX_UNLOCK(dbenv, mfp->mutex);
00817 
00818 done:   /* Discard the DB_MPOOLFILE structure. */
00819         if (dbmfp->pgcookie != NULL) {
00820                 __os_free(dbenv, dbmfp->pgcookie->data);
00821                 __os_free(dbenv, dbmfp->pgcookie);
00822         }
00823         __os_free(dbenv, dbmfp);
00824 
00825         return (ret);
00826 }
00827 
00828 /*
00829  * __memp_mf_discard --
00830  *      Discard an MPOOLFILE.
00831  *
00832  * PUBLIC: int __memp_mf_discard __P((DB_MPOOL *, MPOOLFILE *));
00833  */
00834 int
00835 __memp_mf_discard(dbmp, mfp)
00836         DB_MPOOL *dbmp;
00837         MPOOLFILE *mfp;
00838 {
00839         DB_ENV *dbenv;
00840         DB_MPOOL_STAT *sp;
00841         MPOOL *mp;
00842         int need_sync, ret, t_ret;
00843 
00844         dbenv = dbmp->dbenv;
00845         mp = dbmp->reginfo[0].primary;
00846         ret = 0;
00847 
00848         /*
00849          * Expects caller to be holding the MPOOLFILE mutex.
00850          *
00851          * When discarding a file, we have to flush writes from it to disk.
00852          * The scenario is that dirty buffers from this file need to be
00853          * flushed to satisfy a future checkpoint, but when the checkpoint
00854          * calls mpool sync, the sync code won't know anything about them.
00855          * Ignore files not written, discarded, or only temporary.
00856          */
00857         need_sync =
00858            mfp->file_written && !mfp->deadfile && !F_ISSET(mfp, MP_TEMP);
00859 
00860         /*
00861          * We have to release the MPOOLFILE mutex before acquiring the region
00862          * mutex so we don't deadlock.  Make sure nobody ever looks at this
00863          * structure again.
00864          */
00865         mfp->deadfile = 1;
00866 
00867         /* Discard the mutex we're holding and return it too the pool. */
00868         MUTEX_UNLOCK(dbenv, mfp->mutex);
00869         if ((t_ret = __mutex_free(dbenv, &mfp->mutex)) != 0 && ret == 0)
00870                 ret = t_ret;
00871 
00872         /* Lock the region and delete from the list of MPOOLFILEs. */
00873         MPOOL_SYSTEM_LOCK(dbenv);
00874         SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
00875 
00876         if (need_sync &&
00877             (t_ret = __memp_mf_sync(dbmp, mfp, 1)) != 0 && ret == 0)
00878                 ret = t_ret;
00879 
00880         /* Copy the statistics into the region. */
00881         sp = &mp->stat;
00882         sp->st_cache_hit += mfp->stat.st_cache_hit;
00883         sp->st_cache_miss += mfp->stat.st_cache_miss;
00884         sp->st_map += mfp->stat.st_map;
00885         sp->st_page_create += mfp->stat.st_page_create;
00886         sp->st_page_in += mfp->stat.st_page_in;
00887         sp->st_page_out += mfp->stat.st_page_out;
00888 
00889         /* Free the space. */
00890         if (mfp->path_off != 0)
00891                 __db_shalloc_free(&dbmp->reginfo[0],
00892                     R_ADDR(dbmp->reginfo, mfp->path_off));
00893         if (mfp->fileid_off != 0)
00894                 __db_shalloc_free(&dbmp->reginfo[0],
00895                     R_ADDR(dbmp->reginfo, mfp->fileid_off));
00896         if (mfp->pgcookie_off != 0)
00897                 __db_shalloc_free(&dbmp->reginfo[0],
00898                     R_ADDR(dbmp->reginfo, mfp->pgcookie_off));
00899         __db_shalloc_free(&dbmp->reginfo[0], mfp);
00900 
00901         MPOOL_SYSTEM_UNLOCK(dbenv);
00902 
00903         return (ret);
00904 }
00905 
00906 /*
00907  * __memp_inmemlist --
00908  *      Return a list of the named in-memory databases.
00909  *
00910  * PUBLIC: int __memp_inmemlist __P((DB_ENV *, char ***, int *));
00911  */
00912 int
00913 __memp_inmemlist(dbenv, namesp, cntp)
00914         DB_ENV *dbenv;
00915         char ***namesp;
00916         int *cntp;
00917 {
00918         DB_MPOOL *dbmp;
00919         MPOOL *mp;
00920         MPOOLFILE *mfp;
00921 
00922         int arraysz, cnt, ret;
00923         char **names;
00924 
00925         names = NULL;
00926         dbmp = dbenv->mp_handle;
00927         mp = dbmp->reginfo[0].primary;
00928 
00929         arraysz = cnt = 0;
00930         MPOOL_SYSTEM_LOCK(dbenv);
00931         for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
00932             mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
00933                 /* Skip dead files and temporary files. */
00934                 if (mfp->deadfile || F_ISSET(mfp, MP_TEMP))
00935                         continue;
00936 
00937                 /* Skip entries that allow files. */
00938                 if (!mfp->no_backing_file)
00939                         continue;
00940 
00941                 /* We found one. */
00942                 if (cnt >= arraysz) {
00943                         arraysz += 100;
00944                         if ((ret = __os_realloc(dbenv,
00945                             (u_int)arraysz * sizeof(names[0]), &names)) != 0)
00946                                 goto nomem;
00947                 }
00948                 if ((ret = __os_strdup(dbenv,
00949                     R_ADDR(dbmp->reginfo, mfp->path_off), &names[cnt])) != 0)
00950                         goto nomem;
00951 
00952                 cnt++;
00953         }
00954         MPOOL_SYSTEM_UNLOCK(dbenv);
00955         *namesp = names;
00956         *cntp = cnt;
00957         return (0);
00958 
00959 nomem:  MPOOL_SYSTEM_UNLOCK(dbenv);
00960         if (names != NULL) {
00961                 while (--cnt >= 0)
00962                         __os_free(dbenv, names[cnt]);
00963                 __os_free(dbenv, names);
00964         }
00965 
00966         /* Make sure we don't return any garbage. */
00967         *cntp = 0;
00968         *namesp = NULL;
00969         return (ret);
00970 }

Generated on Sun Dec 25 12:14:41 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2