Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

os_map.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: os_map.c,v 12.2 2005/06/16 20:23:29 bostic Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #include "db_int.h"
00013 
00014 static int __os_map
00015   __P((DB_ENV *, char *, REGINFO *, DB_FH *, size_t, int, int, int, void **));
00016 static int __os_unique_name __P((_TCHAR *, HANDLE, _TCHAR *, size_t));
00017 
00018 /*
00019  * __os_r_sysattach --
00020  *      Create/join a shared memory region.
00021  */
00022 int
00023 __os_r_sysattach(dbenv, infop, rp)
00024         DB_ENV *dbenv;
00025         REGINFO *infop;
00026         REGION *rp;
00027 {
00028         DB_FH *fhp;
00029         int is_system, ret;
00030 
00031         /*
00032          * Try to open/create the file.  We DO NOT need to ensure that multiple
00033          * threads/processes attempting to simultaneously create the region are
00034          * properly ordered, our caller has already taken care of that.
00035          */
00036         if ((ret = __os_open(dbenv, infop->name,
00037             F_ISSET(infop, REGION_CREATE_OK) ? DB_OSO_CREATE: 0,
00038             dbenv->db_mode, &fhp)) != 0) {
00039                 __db_err(dbenv, "%s: %s", infop->name, db_strerror(ret));
00040                 return (ret);
00041         }
00042 
00043         /*
00044          * On Windows/9X, files that are opened by multiple processes do not
00045          * share data correctly.  For this reason, the DB_SYSTEM_MEM flag is
00046          * implied for any application that does not specify the DB_PRIVATE
00047          * flag.
00048          */
00049         is_system = F_ISSET(dbenv, DB_ENV_SYSTEM_MEM) ||
00050             (!F_ISSET(dbenv, DB_ENV_PRIVATE) && __os_is_winnt() == 0);
00051 
00052         /*
00053          * Map the file in.  If we're creating an in-system-memory region,
00054          * specify a segment ID (which is never used again) so that the
00055          * calling code writes out the REGENV_REF structure to the primary
00056          * environment file.
00057          */
00058         ret = __os_map(dbenv, infop->name, infop, fhp, rp->size,
00059            1, is_system, 0, &infop->addr);
00060         if (ret == 0 && is_system == 1)
00061                 rp->segid = 1;
00062 
00063         (void)__os_closehandle(dbenv, fhp);
00064 
00065         return (ret);
00066 }
00067 
00068 /*
00069  * __os_r_sysdetach --
00070  *      Detach from a shared memory region.
00071  */
00072 int
00073 __os_r_sysdetach(dbenv, infop, destroy)
00074         DB_ENV *dbenv;
00075         REGINFO *infop;
00076         int destroy;
00077 {
00078         int ret, t_ret;
00079 
00080         if (infop->wnt_handle != NULL) {
00081                 (void)CloseHandle(infop->wnt_handle);
00082                 infop->wnt_handle = NULL;
00083         }
00084 
00085         ret = !UnmapViewOfFile(infop->addr) ? __os_get_errno() : 0;
00086         if (ret != 0)
00087                 __db_err(dbenv, "UnmapViewOfFile: %s", strerror(ret));
00088 
00089         if (!F_ISSET(dbenv, DB_ENV_SYSTEM_MEM) && destroy) {
00090                 if (F_ISSET(dbenv, DB_ENV_OVERWRITE))
00091                         (void)__db_file_multi_write(dbenv, infop->name);
00092                 if ((t_ret = __os_unlink(dbenv, infop->name)) != 0 && ret == 0)
00093                         ret = t_ret;
00094         }
00095 
00096         return (ret);
00097 }
00098 
00099 /*
00100  * __os_mapfile --
00101  *      Map in a shared memory file.
00102  */
00103 int
00104 __os_mapfile(dbenv, path, fhp, len, is_rdonly, addr)
00105         DB_ENV *dbenv;
00106         char *path;
00107         DB_FH *fhp;
00108         int is_rdonly;
00109         size_t len;
00110         void **addr;
00111 {
00112         /* If the user replaced the map call, call through their interface. */
00113         if (DB_GLOBAL(j_map) != NULL)
00114                 return (DB_GLOBAL(j_map)(path, len, 0, is_rdonly, addr));
00115 
00116         return (__os_map(dbenv, path, NULL, fhp, len, 0, 0, is_rdonly, addr));
00117 }
00118 
00119 /*
00120  * __os_unmapfile --
00121  *      Unmap the shared memory file.
00122  */
00123 int
00124 __os_unmapfile(dbenv, addr, len)
00125         DB_ENV *dbenv;
00126         void *addr;
00127         size_t len;
00128 {
00129         /* If the user replaced the map call, call through their interface. */
00130         if (DB_GLOBAL(j_unmap) != NULL)
00131                 return (DB_GLOBAL(j_unmap)(addr, len));
00132 
00133         return (!UnmapViewOfFile(addr) ? __os_get_errno() : 0);
00134 }
00135 
00136 /*
00137  * __os_unique_name --
00138  *      Create a unique identifying name from a pathname (may be absolute or
00139  *      relative) and/or a file descriptor.
00140  *
00141  *      The name returned must be unique (different files map to different
00142  *      names), and repeatable (same files, map to same names).  It's not
00143  *      so easy to do by name.  Should handle not only:
00144  *
00145  *              foo.bar == ./foo.bar == c:/whatever_path/foo.bar
00146  *
00147  *      but also understand that:
00148  *
00149  *              foo.bar == Foo.Bar      (FAT file system)
00150  *              foo.bar != Foo.Bar      (NTFS)
00151  *
00152  *      The best solution is to use the file index, found in the file
00153  *      information structure (similar to UNIX inode #).
00154  *
00155  *      When a file is deleted, its file index may be reused,
00156  *      but if the unique name has not gone from its namespace,
00157  *      we may get a conflict.  So to ensure some tie in to the
00158  *      original pathname, we also use the creation time and the
00159  *      file basename.  This is not a perfect system, but it
00160  *      should work for all but anamolous test cases.
00161  *
00162  */
00163 static int
00164 __os_unique_name(orig_path, hfile, result_path, result_path_len)
00165         _TCHAR *orig_path, *result_path;
00166         HANDLE hfile;
00167         size_t result_path_len;
00168 {
00169         BY_HANDLE_FILE_INFORMATION fileinfo;
00170         _TCHAR *basename, *p;
00171 
00172         /*
00173          * In Windows, pathname components are delimited by '/' or '\', and
00174          * if neither is present, we need to strip off leading drive letter
00175          * (e.g. c:foo.txt).
00176          */
00177         basename = _tcsrchr(orig_path, '/');
00178         p = _tcsrchr(orig_path, '\\');
00179         if (basename == NULL || (p != NULL && p > basename))
00180                 basename = p;
00181         if (basename == NULL)
00182                 basename = _tcsrchr(orig_path, ':');
00183 
00184         if (basename == NULL)
00185                 basename = orig_path;
00186         else
00187                 basename++;
00188 
00189         if (!GetFileInformationByHandle(hfile, &fileinfo))
00190                 return (__os_get_errno());
00191 
00192         (void)_sntprintf(result_path, result_path_len,
00193             _T("__db_shmem.%8.8lx.%8.8lx.%8.8lx.%8.8lx.%8.8lx.%s"),
00194             fileinfo.dwVolumeSerialNumber,
00195             fileinfo.nFileIndexHigh,
00196             fileinfo.nFileIndexLow,
00197             fileinfo.ftCreationTime.dwHighDateTime,
00198             fileinfo.ftCreationTime.dwHighDateTime,
00199             basename);
00200 
00201         return (0);
00202 }
00203 
00204 /*
00205  * __os_map --
00206  *      The mmap(2) function for Windows.
00207  */
00208 static int
00209 __os_map(dbenv, path, infop, fhp, len, is_region, is_system, is_rdonly, addr)
00210         DB_ENV *dbenv;
00211         REGINFO *infop;
00212         char *path;
00213         DB_FH *fhp;
00214         int is_region, is_system, is_rdonly;
00215         size_t len;
00216         void **addr;
00217 {
00218         HANDLE hMemory;
00219         int ret, use_pagefile;
00220         _TCHAR *tpath, shmem_name[MAXPATHLEN];
00221         void *pMemory;
00222 
00223         ret = 0;
00224         if (infop != NULL)
00225                 infop->wnt_handle = NULL;
00226 
00227         use_pagefile = is_region && is_system;
00228 
00229         /*
00230          * If creating a region in system space, get a matching name in the
00231          * paging file namespace.
00232          */
00233         if (use_pagefile) {
00234                 TO_TSTRING(dbenv, path, tpath, ret);
00235                 if (ret != 0)
00236                         return (ret);
00237                 ret = __os_unique_name(tpath, fhp->handle,
00238                     shmem_name, sizeof(shmem_name));
00239                 FREE_STRING(dbenv, tpath);
00240                 if (ret != 0)
00241                         return (ret);
00242         }
00243 
00244         /*
00245          * XXX
00246          * DB: We have not implemented copy-on-write here.
00247          *
00248          * If this is an region in system memory, we try to open it using the
00249          * OpenFileMapping() first, and only call CreateFileMapping() if we're
00250          * really creating the section.  There are two reasons:
00251          *
00252          * 1) We only create the mapping if we have newly created the region.
00253          *    This avoids a long-running problem caused by Windows reference
00254          *    counting, where regions that are closed by all processes are
00255          *    deleted.  It turns out that just checking for a zeroed region
00256          *    is not good enough. See [#4882] and [#7127] for the details.
00257          *
00258          * 2) CreateFileMapping seems to mess up making the commit charge to
00259          *    the process. It thinks, incorrectly, that when we want to join a
00260          *    previously existing section, that it should make a commit charge
00261          *    for the whole section.  In fact, there is no new committed memory
00262          *    whatever.  The call can fail if there is insufficient memory free
00263          *    to handle the erroneous commit charge.  So, we find that the
00264          *    bogus commit is not made if we call OpenFileMapping.
00265          */
00266         hMemory = NULL;
00267         if (use_pagefile) {
00268                 hMemory = OpenFileMapping(
00269                     is_rdonly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS,
00270                     0, shmem_name);
00271 
00272                 if (hMemory == NULL && F_ISSET(infop, REGION_CREATE_OK))
00273                         hMemory = CreateFileMapping((HANDLE)-1, 0,
00274                             is_rdonly ? PAGE_READONLY : PAGE_READWRITE,
00275                             0, (DWORD)len, shmem_name);
00276         } else
00277                 hMemory = CreateFileMapping(fhp->handle, 0,
00278                     is_rdonly ? PAGE_READONLY : PAGE_READWRITE,
00279                     0, (DWORD)len, NULL);
00280 
00281         if (hMemory == NULL) {
00282                 ret = __os_get_errno();
00283                 __db_err(dbenv, "OpenFileMapping: %s", strerror(ret));
00284                 return (__db_panic(dbenv, ret));
00285         }
00286 
00287         pMemory = MapViewOfFile(hMemory,
00288             (is_rdonly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS), 0, 0, len);
00289         if (pMemory == NULL) {
00290                 ret = __os_get_errno();
00291                 __db_err(dbenv, "MapViewOfFile: %s", strerror(ret));
00292                 return (__db_panic(dbenv, ret));
00293         }
00294 
00295         /*
00296          * XXX
00297          * It turns out that the kernel object underlying the named section
00298          * is reference counted, but that the call to MapViewOfFile() above
00299          * does NOT increment the reference count! So, if we close the handle
00300          * here, the kernel deletes the object from the kernel namespace.
00301          * When a second process comes along to join the region, the kernel
00302          * happily creates a new object with the same name, but completely
00303          * different identity. The two processes then have distinct isolated
00304          * mapped sections, not at all what was wanted. Not closing the handle
00305          * here fixes this problem.  We carry the handle around in the region
00306          * structure so we can close it when unmap is called.
00307          */
00308         if (use_pagefile && infop != NULL)
00309                 infop->wnt_handle = hMemory;
00310         else
00311                 CloseHandle(hMemory);
00312 
00313         *addr = pMemory;
00314         return (ret);
00315 }

Generated on Sun Dec 25 12:14:42 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2