00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "db_config.h"
00011
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 #include <sys/stat.h>
00015
00016 #include <string.h>
00017 #endif
00018
00019 #include "db_int.h"
00020
00021 #ifdef HAVE_FILESYSTEM_NOTZERO
00022 static int __os_zerofill __P((DB_ENV *, DB_FH *));
00023 #endif
00024 static int __os_physwrite __P((DB_ENV *, DB_FH *, void *, size_t, size_t *));
00025
00026
00027
00028
00029
00030
00031
00032
00033 int
00034 __os_io(dbenv, op, fhp, pgno, pagesize, buf, niop)
00035 DB_ENV *dbenv;
00036 int op;
00037 DB_FH *fhp;
00038 db_pgno_t pgno;
00039 u_int32_t pagesize;
00040 u_int8_t *buf;
00041 size_t *niop;
00042 {
00043 #if defined(HAVE_PREAD) && defined(HAVE_PWRITE)
00044 ssize_t nio;
00045 #endif
00046 int ret;
00047
00048
00049 DB_ASSERT(F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1);
00050
00051 #if defined(HAVE_PREAD) && defined(HAVE_PWRITE)
00052 switch (op) {
00053 case DB_IO_READ:
00054 if (DB_GLOBAL(j_read) != NULL)
00055 goto slow;
00056 nio = DB_GLOBAL(j_pread) != NULL ? DB_GLOBAL(j_pread)
00057 (fhp->fd, buf, pagesize, (off_t)pgno * pagesize) :
00058 pread(fhp->fd, buf, pagesize, (off_t)pgno * pagesize);
00059 break;
00060 case DB_IO_WRITE:
00061 if (DB_GLOBAL(j_write) != NULL)
00062 goto slow;
00063 #ifdef HAVE_FILESYSTEM_NOTZERO
00064 if (__os_fs_notzero())
00065 goto slow;
00066 #endif
00067 nio = DB_GLOBAL(j_pwrite) != NULL ? DB_GLOBAL(j_pwrite)
00068 (fhp->fd, buf, pagesize, (off_t)pgno * pagesize) :
00069 pwrite(fhp->fd, buf, pagesize, (off_t)pgno * pagesize);
00070 break;
00071 default:
00072 return (EINVAL);
00073 }
00074 if (nio == (ssize_t)pagesize) {
00075 *niop = pagesize;
00076 return (0);
00077 }
00078 slow:
00079 #endif
00080 MUTEX_LOCK(dbenv, fhp->mtx_fh);
00081
00082 if ((ret = __os_seek(dbenv, fhp,
00083 pagesize, pgno, 0, 0, DB_OS_SEEK_SET)) != 0)
00084 goto err;
00085 switch (op) {
00086 case DB_IO_READ:
00087 ret = __os_read(dbenv, fhp, buf, pagesize, niop);
00088 break;
00089 case DB_IO_WRITE:
00090 ret = __os_write(dbenv, fhp, buf, pagesize, niop);
00091 break;
00092 default:
00093 ret = EINVAL;
00094 break;
00095 }
00096
00097 err: MUTEX_UNLOCK(dbenv, fhp->mtx_fh);
00098
00099 return (ret);
00100
00101 }
00102
00103
00104
00105
00106
00107
00108
00109 int
00110 __os_read(dbenv, fhp, addr, len, nrp)
00111 DB_ENV *dbenv;
00112 DB_FH *fhp;
00113 void *addr;
00114 size_t len;
00115 size_t *nrp;
00116 {
00117 size_t offset;
00118 ssize_t nr;
00119 int ret;
00120 u_int8_t *taddr;
00121
00122 ret = 0;
00123
00124
00125 DB_ASSERT(F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1);
00126
00127 if (DB_GLOBAL(j_read) != NULL) {
00128 *nrp = len;
00129 if (DB_GLOBAL(j_read)(fhp->fd, addr, len) != (ssize_t)len) {
00130 ret = __os_get_errno();
00131 __db_err(dbenv, "read: %#lx, %lu: %s",
00132 P_TO_ULONG(addr), (u_long)len, strerror(ret));
00133 }
00134 return (ret);
00135 }
00136
00137 for (taddr = addr, offset = 0;
00138 offset < len; taddr += nr, offset += (u_int32_t)nr) {
00139 RETRY_CHK(((nr = read(
00140 fhp->fd, taddr, len - offset)) < 0 ? 1 : 0), ret);
00141 if (nr == 0 || ret != 0)
00142 break;
00143 }
00144 *nrp = (size_t)(taddr - (u_int8_t *)addr);
00145 if (ret != 0)
00146 __db_err(dbenv, "read: %#lx, %lu: %s",
00147 P_TO_ULONG(taddr), (u_long)len - offset, strerror(ret));
00148 return (ret);
00149 }
00150
00151
00152
00153
00154
00155
00156
00157 int
00158 __os_write(dbenv, fhp, addr, len, nwp)
00159 DB_ENV *dbenv;
00160 DB_FH *fhp;
00161 void *addr;
00162 size_t len;
00163 size_t *nwp;
00164 {
00165
00166 DB_ASSERT(F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1);
00167
00168 #ifdef HAVE_FILESYSTEM_NOTZERO
00169
00170 if (__os_fs_notzero()) {
00171 int ret;
00172 if ((ret = __os_zerofill(dbenv, fhp)) != 0)
00173 return (ret);
00174 }
00175 #endif
00176 return (__os_physwrite(dbenv, fhp, addr, len, nwp));
00177 }
00178
00179
00180
00181
00182
00183 static int
00184 __os_physwrite(dbenv, fhp, addr, len, nwp)
00185 DB_ENV *dbenv;
00186 DB_FH *fhp;
00187 void *addr;
00188 size_t len;
00189 size_t *nwp;
00190 {
00191 size_t offset;
00192 ssize_t nw;
00193 int ret;
00194 u_int8_t *taddr;
00195
00196 ret = 0;
00197
00198 #if defined(HAVE_FILESYSTEM_NOTZERO) && defined(DIAGNOSTIC)
00199 if (__os_fs_notzero()) {
00200 struct stat sb;
00201 off_t cur_off;
00202
00203 DB_ASSERT(fstat(fhp->fd, &sb) != -1 &&
00204 (cur_off = lseek(fhp->fd, (off_t)0, SEEK_CUR)) != -1 &&
00205 cur_off <= sb.st_size);
00206 }
00207 #endif
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 PANIC_CHECK(dbenv);
00221
00222 if (DB_GLOBAL(j_write) != NULL) {
00223 *nwp = len;
00224 if (DB_GLOBAL(j_write)(fhp->fd, addr, len) != (ssize_t)len) {
00225 ret = __os_get_errno();
00226 __db_err(dbenv, "write: %#lx, %lu: %s",
00227 P_TO_ULONG(addr), (u_long)len, strerror(ret));
00228 }
00229 return (ret);
00230 }
00231
00232 for (taddr = addr, offset = 0;
00233 offset < len; taddr += nw, offset += (u_int32_t)nw) {
00234 RETRY_CHK(((nw = write(
00235 fhp->fd, taddr, len - offset)) < 0 ? 1 : 0), ret);
00236 if (ret != 0)
00237 break;
00238 }
00239 *nwp = len;
00240 if (ret != 0)
00241 __db_err(dbenv, "write: %#lx, %lu: %s",
00242 P_TO_ULONG(taddr), (u_long)len - offset, strerror(ret));
00243 return (ret);
00244 }
00245
00246 #ifdef HAVE_FILESYSTEM_NOTZERO
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260 static int
00261 __os_zerofill(dbenv, fhp)
00262 DB_ENV *dbenv;
00263 DB_FH *fhp;
00264 {
00265 off_t stat_offset, write_offset;
00266 size_t blen, nw;
00267 u_int32_t bytes, mbytes;
00268 int group_sync, need_free, ret;
00269 u_int8_t buf[8 * 1024], *bp;
00270
00271
00272 write_offset = (off_t)fhp->pgno * fhp->pgsize + fhp->offset;
00273
00274
00275 if ((ret = __os_ioinfo(dbenv, NULL, fhp, &mbytes, &bytes, NULL)) != 0)
00276 return (ret);
00277 stat_offset = (off_t)mbytes * MEGABYTE + bytes;
00278
00279
00280 if (stat_offset >= write_offset)
00281 return (0);
00282
00283
00284 #undef ZF_LARGE_WRITE
00285 #define ZF_LARGE_WRITE (64 * 1024)
00286 if (write_offset - stat_offset > ZF_LARGE_WRITE) {
00287 if ((ret = __os_calloc(dbenv, 1, ZF_LARGE_WRITE, &bp)) != 0)
00288 return (ret);
00289 blen = ZF_LARGE_WRITE;
00290 need_free = 1;
00291 } else {
00292 bp = buf;
00293 blen = sizeof(buf);
00294 need_free = 0;
00295 memset(buf, 0, sizeof(buf));
00296 }
00297
00298
00299 if ((ret = __os_seek(
00300 dbenv, fhp, MEGABYTE, mbytes, bytes, 0, DB_OS_SEEK_SET)) != 0)
00301 goto err;
00302
00303
00304
00305
00306
00307
00308
00309 for (group_sync = 0; stat_offset < write_offset; group_sync = 1) {
00310 if (write_offset - stat_offset <= blen) {
00311 blen = (size_t)(write_offset - stat_offset);
00312 if (group_sync && (ret = __os_fsync(dbenv, fhp)) != 0)
00313 goto err;
00314 }
00315 if ((ret = __os_physwrite(dbenv, fhp, bp, blen, &nw)) != 0)
00316 goto err;
00317 stat_offset += blen;
00318 }
00319 if ((ret = __os_fsync(dbenv, fhp)) != 0)
00320 goto err;
00321
00322
00323 mbytes = (u_int32_t)(write_offset / MEGABYTE);
00324 bytes = (u_int32_t)(write_offset % MEGABYTE);
00325 ret = __os_seek(dbenv, fhp, MEGABYTE, mbytes, bytes, 0, DB_OS_SEEK_SET);
00326
00327 err: if (need_free)
00328 __os_free(dbenv, bp);
00329 return (ret);
00330 }
00331 #endif