00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "db_config.h"
00011
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014
00015 #include <string.h>
00016 #endif
00017
00018 #include "db_int.h"
00019 #include "dbinc/db_shash.h"
00020 #include "dbinc/log.h"
00021 #include "dbinc/mp.h"
00022
00023
00024
00025
00026
00027
00028
00029
00030 int
00031 __memp_fget_pp(dbmfp, pgnoaddr, flags, addrp)
00032 DB_MPOOLFILE *dbmfp;
00033 db_pgno_t *pgnoaddr;
00034 u_int32_t flags;
00035 void *addrp;
00036 {
00037 DB_ENV *dbenv;
00038 DB_THREAD_INFO *ip;
00039 int rep_check, ret;
00040
00041 dbenv = dbmfp->dbenv;
00042
00043 PANIC_CHECK(dbenv);
00044 MPF_ILLEGAL_BEFORE_OPEN(dbmfp, "DB_MPOOLFILE->get");
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 #define OKFLAGS (DB_MPOOL_CREATE | DB_MPOOL_LAST | DB_MPOOL_NEW)
00060 if (flags != 0) {
00061 if ((ret = __db_fchk(dbenv, "memp_fget", flags, OKFLAGS)) != 0)
00062 return (ret);
00063
00064 switch (flags) {
00065 case DB_MPOOL_CREATE:
00066 case DB_MPOOL_LAST:
00067 case DB_MPOOL_NEW:
00068 break;
00069 default:
00070 return (__db_ferr(dbenv, "memp_fget", 1));
00071 }
00072 }
00073
00074 ENV_ENTER(dbenv, ip);
00075
00076 rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0;
00077 if (rep_check && (ret = __op_rep_enter(dbenv)) != 0)
00078 goto err;
00079 ret = __memp_fget(dbmfp, pgnoaddr, flags, addrp);
00080
00081
00082
00083
00084
00085 if (ret != 0 && rep_check)
00086 (void)__op_rep_exit(dbenv);
00087
00088
00089 err: if (ret != 0)
00090 ENV_LEAVE(dbenv, ip);
00091
00092 return (ret);
00093 }
00094
00095
00096
00097
00098
00099
00100
00101
00102 int
00103 __memp_fget(dbmfp, pgnoaddr, flags, addrp)
00104 DB_MPOOLFILE *dbmfp;
00105 db_pgno_t *pgnoaddr;
00106 u_int32_t flags;
00107 void *addrp;
00108 {
00109 enum { FIRST_FOUND, FIRST_MISS, SECOND_FOUND, SECOND_MISS } state;
00110 BH *alloc_bhp, *bhp;
00111 DB_ENV *dbenv;
00112 DB_MPOOL *dbmp;
00113 DB_MPOOL_HASH *hp;
00114 MPOOL *c_mp, *mp;
00115 MPOOLFILE *mfp;
00116 roff_t mf_offset;
00117 u_int32_t n_cache, st_hsearch;
00118 int b_incr, extending, first, ret;
00119
00120 *(void **)addrp = NULL;
00121
00122 dbenv = dbmfp->dbenv;
00123 dbmp = dbenv->mp_handle;
00124
00125 c_mp = NULL;
00126 mp = dbmp->reginfo[0].primary;
00127 mfp = dbmfp->mfp;
00128 mf_offset = R_OFFSET(dbmp->reginfo, mfp);
00129 alloc_bhp = bhp = NULL;
00130 hp = NULL;
00131 b_incr = extending = ret = 0;
00132
00133 switch (flags) {
00134 case DB_MPOOL_LAST:
00135
00136 MPOOL_SYSTEM_LOCK(dbenv);
00137 *pgnoaddr = mfp->last_pgno;
00138 MPOOL_SYSTEM_UNLOCK(dbenv);
00139 break;
00140 case DB_MPOOL_NEW:
00141
00142
00143
00144
00145 goto alloc;
00146 case DB_MPOOL_CREATE:
00147 default:
00148 break;
00149 }
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173 if (dbmfp->addr != NULL &&
00174 F_ISSET(mfp, MP_CAN_MMAP) && *pgnoaddr <= mfp->orig_last_pgno) {
00175 *(void **)addrp = (u_int8_t *)dbmfp->addr +
00176 (*pgnoaddr * mfp->stat.st_pagesize);
00177 ++mfp->stat.st_map;
00178 return (0);
00179 }
00180
00181 hb_search:
00182
00183
00184
00185
00186
00187 n_cache = NCACHE(mp, mf_offset, *pgnoaddr);
00188 c_mp = dbmp->reginfo[n_cache].primary;
00189 hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
00190 hp = &hp[NBUCKET(c_mp, mf_offset, *pgnoaddr)];
00191
00192
00193 retry: st_hsearch = 0;
00194 MUTEX_LOCK(dbenv, hp->mtx_hash);
00195 for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
00196 bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
00197 ++st_hsearch;
00198 if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset)
00199 continue;
00200
00201
00202
00203
00204
00205
00206
00207 if (bhp->ref == UINT16_MAX) {
00208 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00209
00210 __db_err(dbenv,
00211 "%s: page %lu: reference count overflow",
00212 __memp_fn(dbmfp), (u_long)bhp->pgno);
00213 ret = __db_panic(dbenv, EINVAL);
00214 goto err;
00215 }
00216 ++bhp->ref;
00217 b_incr = 1;
00218
00219
00220
00221
00222
00223
00224
00225
00226 for (first = 1; F_ISSET(bhp, BH_LOCKED) &&
00227 !F_ISSET(dbenv, DB_ENV_NOLOCKING); first = 0) {
00228
00229
00230
00231
00232
00233 if (!first && bhp->ref_sync != 0) {
00234 --bhp->ref;
00235 b_incr = 0;
00236 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00237 __os_yield(dbenv, 1);
00238 goto retry;
00239 }
00240
00241 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00242
00243
00244
00245
00246
00247
00248 if (!first)
00249 __os_yield(dbenv, 1);
00250
00251 MUTEX_LOCK(dbenv, bhp->mtx_bh);
00252
00253 MUTEX_UNLOCK(dbenv, bhp->mtx_bh);
00254 MUTEX_LOCK(dbenv, hp->mtx_hash);
00255 }
00256
00257 ++mfp->stat.st_cache_hit;
00258 break;
00259 }
00260
00261
00262
00263
00264
00265 ++c_mp->stat.st_hash_searches;
00266 if (st_hsearch > c_mp->stat.st_hash_longest)
00267 c_mp->stat.st_hash_longest = st_hsearch;
00268 c_mp->stat.st_hash_examined += st_hsearch;
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293 state = bhp == NULL ?
00294 (alloc_bhp == NULL ? FIRST_MISS : SECOND_MISS) :
00295 (alloc_bhp == NULL ? FIRST_FOUND : SECOND_FOUND);
00296 switch (state) {
00297 case FIRST_FOUND:
00298
00299
00300
00301
00302
00303 if (flags == DB_MPOOL_FREE) {
00304 if (bhp->ref == 1)
00305 return (__memp_bhfree(
00306 dbmp, hp, bhp, BH_FREE_FREEMEM));
00307 __db_err(dbenv,
00308 "File %s: freeing pinned buffer for page %lu",
00309 __memp_fns(dbmp, mfp), (u_long)*pgnoaddr);
00310 ret = __db_panic(dbenv, EINVAL);
00311 goto err;
00312 }
00313
00314
00315 break;
00316 case FIRST_MISS:
00317
00318
00319
00320
00321
00322 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00323
00324
00325
00326
00327 if (flags == DB_MPOOL_FREE)
00328 return (0);
00329
00330 alloc:
00331
00332
00333
00334
00335 COMPQUIET(n_cache, 0);
00336
00337 extending = ret = 0;
00338 MPOOL_SYSTEM_LOCK(dbenv);
00339 switch (flags) {
00340 case DB_MPOOL_NEW:
00341 extending = 1;
00342 if (mfp->maxpgno != 0 &&
00343 mfp->last_pgno >= mfp->maxpgno) {
00344 __db_err(dbenv, "%s: file limited to %lu pages",
00345 __memp_fn(dbmfp), (u_long)mfp->maxpgno);
00346 ret = ENOSPC;
00347 } else
00348 *pgnoaddr = mfp->last_pgno + 1;
00349 break;
00350 case DB_MPOOL_CREATE:
00351 if (mfp->maxpgno != 0 && *pgnoaddr > mfp->maxpgno) {
00352 __db_err(dbenv, "%s: file limited to %lu pages",
00353 __memp_fn(dbmfp), (u_long)mfp->maxpgno);
00354 ret = ENOSPC;
00355 } else
00356 extending = *pgnoaddr > mfp->last_pgno;
00357 break;
00358 default:
00359 ret = *pgnoaddr > mfp->last_pgno ? DB_PAGE_NOTFOUND : 0;
00360 break;
00361 }
00362 MPOOL_SYSTEM_UNLOCK(dbenv);
00363 if (ret != 0)
00364 goto err;
00365
00366
00367
00368
00369
00370
00371 mf_offset = R_OFFSET(dbmp->reginfo, mfp);
00372 n_cache = NCACHE(mp, mf_offset, *pgnoaddr);
00373 c_mp = dbmp->reginfo[n_cache].primary;
00374
00375
00376 if ((ret = __memp_alloc(dbmp,
00377 &dbmp->reginfo[n_cache], mfp, 0, NULL, &alloc_bhp)) != 0)
00378 goto err;
00379 #ifdef DIAGNOSTIC
00380 if ((uintptr_t)alloc_bhp->buf & (sizeof(size_t) - 1)) {
00381 __db_err(dbenv,
00382 "DB_MPOOLFILE->get: buffer data is NOT size_t aligned");
00383 ret = __db_panic(dbenv, EINVAL);
00384 goto err;
00385 }
00386 #endif
00387
00388
00389
00390
00391 if (extending)
00392 MPOOL_SYSTEM_LOCK(dbenv);
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419 if (flags == DB_MPOOL_NEW && *pgnoaddr != mfp->last_pgno + 1) {
00420 *pgnoaddr = mfp->last_pgno + 1;
00421 if (n_cache != NCACHE(mp, mf_offset, *pgnoaddr)) {
00422
00423
00424
00425
00426 MPOOL_SYSTEM_UNLOCK(dbenv);
00427
00428 MPOOL_REGION_LOCK(
00429 dbenv, &dbmp->reginfo[n_cache]);
00430 __db_shalloc_free(
00431 &dbmp->reginfo[n_cache], alloc_bhp);
00432 c_mp->stat.st_pages--;
00433 MPOOL_REGION_UNLOCK(
00434 dbenv, &dbmp->reginfo[n_cache]);
00435
00436 alloc_bhp = NULL;
00437 goto alloc;
00438 }
00439 }
00440
00441
00442
00443
00444
00445
00446 if (extending) {
00447 if (*pgnoaddr > mfp->last_pgno)
00448 mfp->last_pgno = *pgnoaddr;
00449
00450 MPOOL_SYSTEM_UNLOCK(dbenv);
00451 if (ret != 0)
00452 goto err;
00453 }
00454 goto hb_search;
00455 case SECOND_FOUND:
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00468 MPOOL_REGION_LOCK(dbenv, &dbmp->reginfo[n_cache]);
00469 __db_shalloc_free(&dbmp->reginfo[n_cache], alloc_bhp);
00470 c_mp->stat.st_pages--;
00471 alloc_bhp = NULL;
00472 MPOOL_REGION_UNLOCK(dbenv, &dbmp->reginfo[n_cache]);
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482 if (flags == DB_MPOOL_NEW) {
00483 --bhp->ref;
00484 b_incr = 0;
00485 goto alloc;
00486 }
00487
00488
00489 MUTEX_LOCK(dbenv, hp->mtx_hash);
00490 break;
00491 case SECOND_MISS:
00492
00493
00494
00495
00496
00497 bhp = alloc_bhp;
00498 alloc_bhp = NULL;
00499
00500
00501
00502
00503
00504
00505
00506
00507 b_incr = 1;
00508
00509
00510 memset(bhp, 0, sizeof(BH));
00511 bhp->ref = 1;
00512 bhp->priority = UINT32_MAX;
00513 bhp->pgno = *pgnoaddr;
00514 bhp->mf_offset = mf_offset;
00515 SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
00516
00517
00518
00519
00520
00521
00522
00523 if ((ret = __mutex_alloc(
00524 dbenv, MTX_MPOOL_BUFFER, 0, &bhp->mtx_bh)) != 0)
00525 goto err;
00526
00527 hp->hash_priority =
00528 SH_TAILQ_FIRSTP(&hp->hash_bucket, __bh)->priority;
00529
00530
00531 if (extending) {
00532 ++hp->hash_page_dirty;
00533 F_SET(bhp, BH_DIRTY | BH_DIRTY_CREATE);
00534 }
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554 if (extending) {
00555 if (mfp->clear_len == DB_CLEARLEN_NOTSET)
00556 memset(bhp->buf, 0, mfp->stat.st_pagesize);
00557 else {
00558 memset(bhp->buf, 0, mfp->clear_len);
00559 #if defined(DIAGNOSTIC) || defined(UMRW)
00560 memset(bhp->buf + mfp->clear_len, CLEAR_BYTE,
00561 mfp->stat.st_pagesize - mfp->clear_len);
00562 #endif
00563 }
00564
00565 if (flags == DB_MPOOL_CREATE && mfp->ftype != 0)
00566 F_SET(bhp, BH_CALLPGIN);
00567
00568 ++mfp->stat.st_page_create;
00569 } else {
00570 F_SET(bhp, BH_TRASH);
00571 ++mfp->stat.st_cache_miss;
00572 }
00573
00574
00575 MUTEX_LOCK(dbenv, mfp->mutex);
00576 ++mfp->block_cnt;
00577 MUTEX_UNLOCK(dbenv, mfp->mutex);
00578 }
00579
00580 DB_ASSERT(bhp->ref != 0);
00581
00582
00583
00584
00585
00586
00587
00588 if (state != SECOND_MISS && bhp->ref == 1) {
00589 bhp->priority = UINT32_MAX;
00590 SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
00591 SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
00592 hp->hash_priority =
00593 SH_TAILQ_FIRSTP(&hp->hash_bucket, __bh)->priority;
00594 }
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609 if (F_ISSET(bhp, BH_TRASH) &&
00610 (ret = __memp_pgread(dbmfp,
00611 hp->mtx_hash, bhp, LF_ISSET(DB_MPOOL_CREATE) ? 1 : 0)) != 0)
00612 goto err;
00613
00614
00615
00616
00617
00618
00619 if (F_ISSET(bhp, BH_CALLPGIN)) {
00620 if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
00621 goto err;
00622 F_CLR(bhp, BH_CALLPGIN);
00623 }
00624 #ifdef DIAGNOSTIC
00625 __memp_check_order(hp);
00626 #endif
00627
00628 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00629
00630 #ifdef DIAGNOSTIC
00631
00632 MPOOL_SYSTEM_LOCK(dbenv);
00633 ++dbmfp->pinref;
00634 MPOOL_SYSTEM_UNLOCK(dbenv);
00635
00636
00637
00638
00639
00640 if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
00641 __os_yield(dbenv, 1);
00642 #endif
00643
00644 *(void **)addrp = bhp->buf;
00645 return (0);
00646
00647 err:
00648
00649
00650
00651
00652 if (b_incr) {
00653 if (bhp->ref == 1)
00654 (void)__memp_bhfree(dbmp, hp, bhp, BH_FREE_FREEMEM);
00655 else {
00656 --bhp->ref;
00657 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00658 }
00659 }
00660
00661
00662 if (alloc_bhp != NULL) {
00663 MPOOL_REGION_LOCK(dbenv, &dbmp->reginfo[n_cache]);
00664 __db_shalloc_free(&dbmp->reginfo[n_cache], alloc_bhp);
00665 c_mp->stat.st_pages--;
00666 MPOOL_REGION_UNLOCK(dbenv, &dbmp->reginfo[n_cache]);
00667 }
00668
00669 return (ret);
00670 }