Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

os_map.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: os_map.c,v 12.3 2005/07/21 01:36:18 bostic Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 #ifdef HAVE_MMAP
00015 #include <sys/mman.h>
00016 #endif
00017 
00018 #ifdef HAVE_SHMGET
00019 #include <sys/ipc.h>
00020 #include <sys/shm.h>
00021 #endif
00022 
00023 #include <string.h>
00024 #endif
00025 
00026 #include "db_int.h"
00027 
00028 #ifdef HAVE_MMAP
00029 static int __os_map __P((DB_ENV *, char *, DB_FH *, size_t, int, int, void **));
00030 #endif
00031 #ifndef HAVE_SHMGET
00032 static int __db_nosystemmem __P((DB_ENV *));
00033 #endif
00034 
00035 /*
00036  * __os_r_sysattach --
00037  *      Create/join a shared memory region.
00038  *
00039  * PUBLIC: int __os_r_sysattach __P((DB_ENV *, REGINFO *, REGION *));
00040  */
00041 int
00042 __os_r_sysattach(dbenv, infop, rp)
00043         DB_ENV *dbenv;
00044         REGINFO *infop;
00045         REGION *rp;
00046 {
00047         if (F_ISSET(dbenv, DB_ENV_SYSTEM_MEM)) {
00048                 /*
00049                  * If the region is in system memory on UNIX, we use shmget(2).
00050                  *
00051                  * !!!
00052                  * There exist spinlocks that don't work in shmget memory, e.g.,
00053                  * the HP/UX msemaphore interface.  If we don't have locks that
00054                  * will work in shmget memory, we better be private and not be
00055                  * threaded.  If we reach this point, we know we're public, so
00056                  * it's an error.
00057                  */
00058 #if defined(HAVE_MUTEX_HPPA_MSEM_INIT)
00059                 __db_err(dbenv,
00060             "architecture does not support locks inside system shared memory");
00061                 return (EINVAL);
00062 #endif
00063 #if defined(HAVE_SHMGET)
00064                 {
00065                 key_t segid;
00066                 int id, mode, ret;
00067 
00068                 /*
00069                  * We could potentially create based on REGION_CREATE_OK, but
00070                  * that's dangerous -- we might get crammed in sideways if
00071                  * some of the expected regions exist but others do not.  Also,
00072                  * if the requested size differs from an existing region's
00073                  * actual size, then all sorts of nasty things can happen.
00074                  * Basing create solely on REGION_CREATE is much safer -- a
00075                  * recovery will get us straightened out.
00076                  */
00077                 if (F_ISSET(infop, REGION_CREATE)) {
00078                         /*
00079                          * The application must give us a base System V IPC key
00080                          * value.  Adjust that value based on the region's ID,
00081                          * and correct so the user's original value appears in
00082                          * the ipcs output.
00083                          */
00084                         if (dbenv->shm_key == INVALID_REGION_SEGID) {
00085                                 __db_err(dbenv,
00086                             "no base system shared memory ID specified");
00087                                 return (EINVAL);
00088                         }
00089                         segid = (key_t)(dbenv->shm_key + (infop->id - 1));
00090 
00091                         /*
00092                          * If map to an existing region, assume the application
00093                          * crashed and we're restarting.  Delete the old region
00094                          * and re-try.  If that fails, return an error, the
00095                          * application will have to select a different segment
00096                          * ID or clean up some other way.
00097                          */
00098                         if ((id = shmget(segid, 0, 0)) != -1) {
00099                                 (void)shmctl(id, IPC_RMID, NULL);
00100                                 if ((id = shmget(segid, 0, 0)) != -1) {
00101                                         __db_err(dbenv,
00102                 "shmget: key: %ld: shared system memory region already exists",
00103                                             (long)segid);
00104                                         return (EAGAIN);
00105                                 }
00106                         }
00107 
00108                         /*
00109                          * Map the DbEnv::open method file mode permissions to
00110                          * shmget call permissions.
00111                          */
00112                         mode = IPC_CREAT | __db_shm_mode(dbenv);
00113                         if ((id = shmget(segid, rp->size, mode)) == -1) {
00114                                 ret = __os_get_errno();
00115                                 __db_err(dbenv,
00116         "shmget: key: %ld: unable to create shared system memory region: %s",
00117                                     (long)segid, strerror(ret));
00118                                 return (ret);
00119                         }
00120                         rp->segid = id;
00121                 } else
00122                         id = rp->segid;
00123 
00124                 if ((infop->addr = shmat(id, NULL, 0)) == (void *)-1) {
00125                         infop->addr = NULL;
00126                         ret = __os_get_errno();
00127                         __db_err(dbenv,
00128         "shmat: id %d: unable to attach to shared system memory region: %s",
00129                             id, strerror(ret));
00130                         return (ret);
00131                 }
00132 
00133                 return (0);
00134                 }
00135 #else
00136                 return (__db_nosystemmem(dbenv));
00137 #endif
00138         }
00139 
00140 #ifdef HAVE_MMAP
00141         {
00142         DB_FH *fhp;
00143         int ret;
00144 
00145         fhp = NULL;
00146 
00147         /*
00148          * Try to open/create the shared region file.  We DO NOT need to ensure
00149          * that multiple threads/processes attempting to simultaneously create
00150          * the region are properly ordered, our caller has already taken care
00151          * of that.
00152          */
00153         if ((ret = __os_open(dbenv, infop->name,
00154             DB_OSO_REGION |
00155             (F_ISSET(infop, REGION_CREATE_OK) ? DB_OSO_CREATE : 0),
00156             dbenv->db_mode, &fhp)) != 0)
00157                 __db_err(dbenv, "%s: %s", infop->name, db_strerror(ret));
00158 
00159         /*
00160          * If we created the file, grow it to its full size before mapping
00161          * it in.  We really want to avoid touching the buffer cache after
00162          * mmap(2) is called, doing anything else confuses the hell out of
00163          * systems without merged VM/buffer cache systems, or, more to the
00164          * point, *badly* merged VM/buffer cache systems.
00165          */
00166         if (ret == 0 && F_ISSET(infop, REGION_CREATE)) {
00167                 if (F_ISSET(dbenv, DB_ENV_REGION_INIT))
00168                         ret = __db_file_write(dbenv, "region file", fhp,
00169                             rp->size / MEGABYTE, rp->size % MEGABYTE, 0x00);
00170                 else
00171                         ret = __db_file_extend(dbenv, fhp, rp->size);
00172         }
00173 
00174         /* Map the file in. */
00175         if (ret == 0)
00176                 ret = __os_map(dbenv,
00177                     infop->name, fhp, rp->size, 1, 0, &infop->addr);
00178 
00179         if (fhp != NULL)
00180                 (void)__os_closehandle(dbenv, fhp);
00181 
00182         return (ret);
00183         }
00184 #else
00185         COMPQUIET(infop, NULL);
00186         COMPQUIET(rp, NULL);
00187         __db_err(dbenv,
00188             "architecture lacks mmap(2), shared environments not possible");
00189         return (DB_OPNOTSUP);
00190 #endif
00191 }
00192 
00193 /*
00194  * __os_r_sysdetach --
00195  *      Detach from a shared memory region.
00196  *
00197  * PUBLIC: int __os_r_sysdetach __P((DB_ENV *, REGINFO *, int));
00198  */
00199 int
00200 __os_r_sysdetach(dbenv, infop, destroy)
00201         DB_ENV *dbenv;
00202         REGINFO *infop;
00203         int destroy;
00204 {
00205         REGION *rp;
00206 
00207         rp = infop->rp;
00208 
00209         if (F_ISSET(dbenv, DB_ENV_SYSTEM_MEM)) {
00210 #ifdef HAVE_SHMGET
00211                 int ret, segid;
00212 
00213                 /*
00214                  * We may be about to remove the memory referenced by rp,
00215                  * save the segment ID, and (optionally) wipe the original.
00216                  */
00217                 segid = rp->segid;
00218                 if (destroy)
00219                         rp->segid = INVALID_REGION_SEGID;
00220 
00221                 if (shmdt(infop->addr) != 0) {
00222                         ret = __os_get_errno();
00223                         __db_err(dbenv, "shmdt: %s", strerror(ret));
00224                         return (ret);
00225                 }
00226 
00227                 if (destroy && shmctl(segid, IPC_RMID,
00228                     NULL) != 0 && (ret = __os_get_errno()) != EINVAL) {
00229                         __db_err(dbenv,
00230             "shmctl: id %d: unable to delete system shared memory region: %s",
00231                             segid, strerror(ret));
00232                         return (ret);
00233                 }
00234 
00235                 return (0);
00236 #else
00237                 return (__db_nosystemmem(dbenv));
00238 #endif
00239         }
00240 
00241 #ifdef HAVE_MMAP
00242 #ifdef HAVE_MUNLOCK
00243         if (F_ISSET(dbenv, DB_ENV_LOCKDOWN))
00244                 (void)munlock(infop->addr, rp->size);
00245 #endif
00246         if (munmap(infop->addr, rp->size) != 0) {
00247                 int ret;
00248 
00249                 ret = __os_get_errno();
00250                 __db_err(dbenv, "munmap: %s", strerror(ret));
00251                 return (ret);
00252         }
00253 
00254         if (destroy && __os_region_unlink(dbenv, infop->name) != 0)
00255                 return (__os_get_errno());
00256 
00257         return (0);
00258 #else
00259         COMPQUIET(destroy, 0);
00260         return (EINVAL);
00261 #endif
00262 }
00263 
00264 /*
00265  * __os_mapfile --
00266  *      Map in a shared memory file.
00267  *
00268  * PUBLIC: int __os_mapfile __P((DB_ENV *,
00269  * PUBLIC:     char *, DB_FH *, size_t, int, void **));
00270  */
00271 int
00272 __os_mapfile(dbenv, path, fhp, len, is_rdonly, addrp)
00273         DB_ENV *dbenv;
00274         char *path;
00275         DB_FH *fhp;
00276         int is_rdonly;
00277         size_t len;
00278         void **addrp;
00279 {
00280 #if defined(HAVE_MMAP) && !defined(HAVE_QNX)
00281         return (__os_map(dbenv, path, fhp, len, 0, is_rdonly, addrp));
00282 #else
00283         COMPQUIET(dbenv, NULL);
00284         COMPQUIET(path, NULL);
00285         COMPQUIET(fhp, NULL);
00286         COMPQUIET(is_rdonly, 0);
00287         COMPQUIET(len, 0);
00288         COMPQUIET(addrp, NULL);
00289         return (EINVAL);
00290 #endif
00291 }
00292 
00293 /*
00294  * __os_unmapfile --
00295  *      Unmap the shared memory file.
00296  *
00297  * PUBLIC: int __os_unmapfile __P((DB_ENV *, void *, size_t));
00298  */
00299 int
00300 __os_unmapfile(dbenv, addr, len)
00301         DB_ENV *dbenv;
00302         void *addr;
00303         size_t len;
00304 {
00305         int ret;
00306 
00307         /* If the user replaced the map call, call through their interface. */
00308         if (DB_GLOBAL(j_unmap) != NULL)
00309                 return (DB_GLOBAL(j_unmap)(addr, len));
00310 
00311 #ifdef HAVE_MMAP
00312 #ifdef HAVE_MUNLOCK
00313         if (F_ISSET(dbenv, DB_ENV_LOCKDOWN))
00314                 RETRY_CHK((munlock(addr, len)), ret);
00315 #else
00316         COMPQUIET(dbenv, NULL);
00317 #endif
00318         RETRY_CHK((munmap(addr, len)), ret);
00319         return (ret);
00320 #else
00321         COMPQUIET(dbenv, NULL);
00322 
00323         return (EINVAL);
00324 #endif
00325 }
00326 
00327 #ifdef HAVE_MMAP
00328 /*
00329  * __os_map --
00330  *      Call the mmap(2) function.
00331  */
00332 static int
00333 __os_map(dbenv, path, fhp, len, is_region, is_rdonly, addrp)
00334         DB_ENV *dbenv;
00335         char *path;
00336         DB_FH *fhp;
00337         int is_region, is_rdonly;
00338         size_t len;
00339         void **addrp;
00340 {
00341         void *p;
00342         int flags, prot, ret;
00343 
00344         /* If the user replaced the map call, call through their interface. */
00345         if (DB_GLOBAL(j_map) != NULL)
00346                 return (DB_GLOBAL(j_map)
00347                     (path, len, is_region, is_rdonly, addrp));
00348 
00349         /* Check for illegal usage. */
00350         DB_ASSERT(F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1);
00351 
00352         /*
00353          * If it's read-only, it's private, and if it's not, it's shared.
00354          * Don't bother with an additional parameter.
00355          */
00356         flags = is_rdonly ? MAP_PRIVATE : MAP_SHARED;
00357 
00358 #ifdef MAP_FILE
00359         /*
00360          * Historically, MAP_FILE was required for mapping regular files,
00361          * even though it was the default.  Some systems have it, some
00362          * don't, some that have it set it to 0.
00363          */
00364         flags |= MAP_FILE;
00365 #endif
00366 
00367         /*
00368          * I know of no systems that implement the flag to tell the system
00369          * that the region contains semaphores, but it's not an unreasonable
00370          * thing to do, and has been part of the design since forever.  I
00371          * don't think anyone will object, but don't set it for read-only
00372          * files, it doesn't make sense.
00373          */
00374 #ifdef MAP_HASSEMAPHORE
00375         if (is_region && !is_rdonly)
00376                 flags |= MAP_HASSEMAPHORE;
00377 #else
00378         COMPQUIET(is_region, 0);
00379 #endif
00380 
00381         /*
00382          * FreeBSD:
00383          * Causes data dirtied via this VM map to be flushed to physical media
00384          * only when necessary (usually by the pager) rather then gratuitously.
00385          * Typically this prevents the update daemons from flushing pages
00386          * dirtied through such maps and thus allows efficient sharing of
00387          * memory across unassociated processes using a file-backed shared
00388          * memory map.
00389          */
00390 #ifdef MAP_NOSYNC
00391         flags |= MAP_NOSYNC;
00392 #endif
00393 
00394         prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE);
00395 
00396         /*
00397          * XXX
00398          * Work around a bug in the VMS V7.1 mmap() implementation.  To map
00399          * a file into memory on VMS it needs to be opened in a certain way,
00400          * originally.  To get the file opened in that certain way, the VMS
00401          * mmap() closes the file and re-opens it.  When it does this, it
00402          * doesn't flush any caches out to disk before closing.  The problem
00403          * this causes us is that when the memory cache doesn't get written
00404          * out, the file isn't big enough to match the memory chunk and the
00405          * mmap() call fails.  This call to fsync() fixes the problem.  DEC
00406          * thinks this isn't a bug because of language in XPG5 discussing user
00407          * responsibility for on-disk and in-memory synchronization.
00408          */
00409 #ifdef VMS
00410         if (__os_fsync(dbenv, fhp) == -1)
00411                 return (__os_get_errno());
00412 #endif
00413 
00414         /* MAP_FAILED was not defined in early mmap implementations. */
00415 #ifndef MAP_FAILED
00416 #define MAP_FAILED      -1
00417 #endif
00418         if ((p = mmap(NULL,
00419             len, prot, flags, fhp->fd, (off_t)0)) == (void *)MAP_FAILED) {
00420                 ret = __os_get_errno();
00421                 __db_err(dbenv, "mmap: %s", strerror(ret));
00422                 return (ret);
00423         }
00424 
00425 #ifdef HAVE_MLOCK
00426         /*
00427          * If it's a region, we want to make sure that the memory isn't paged.
00428          * For example, Solaris will page large mpools because it thinks that
00429          * I/O buffer memory is more important than we are.  The mlock system
00430          * call may or may not succeed (mlock is restricted to the super-user
00431          * on some systems).  Currently, the only other use of mmap in DB is
00432          * to map read-only databases -- we don't want them paged, either, so
00433          * the call isn't conditional.
00434          */
00435         if (F_ISSET(dbenv, DB_ENV_LOCKDOWN) && mlock(p, len) != 0) {
00436                 ret = __os_get_errno();
00437                 (void)munmap(p, len);
00438                 __db_err(dbenv, "mlock: %s", strerror(ret));
00439                 return (ret);
00440         }
00441 #else
00442         COMPQUIET(dbenv, NULL);
00443 #endif
00444 
00445         *addrp = p;
00446         return (0);
00447 }
00448 #endif
00449 
00450 #ifndef HAVE_SHMGET
00451 /*
00452  * __db_nosystemmem --
00453  *      No system memory environments error message.
00454  */
00455 static int
00456 __db_nosystemmem(dbenv)
00457         DB_ENV *dbenv;
00458 {
00459         __db_err(dbenv,
00460             "architecture doesn't support environments in system memory");
00461         return (DB_OPNOTSUP);
00462 }
00463 #endif

Generated on Sun Dec 25 12:14:42 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2