00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "db_config.h"
00011
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014
00015 #include <stdlib.h>
00016 #endif
00017
00018 #include "db_int.h"
00019 #include "dbinc/db_shash.h"
00020 #include "dbinc/log.h"
00021 #include "dbinc/mp.h"
00022
00023 typedef struct {
00024 DB_MPOOL_HASH *track_hp;
00025
00026 roff_t track_off;
00027 db_pgno_t track_pgno;
00028 } BH_TRACK;
00029
00030 static int __bhcmp __P((const void *, const void *));
00031 static int __memp_close_flush_files __P((DB_ENV *, DB_MPOOL *, int));
00032 static int __memp_sync_files __P((DB_ENV *, DB_MPOOL *));
00033
00034
00035
00036
00037
00038
00039
00040 int
00041 __memp_sync_pp(dbenv, lsnp)
00042 DB_ENV *dbenv;
00043 DB_LSN *lsnp;
00044 {
00045 DB_THREAD_INFO *ip;
00046 int ret;
00047
00048 PANIC_CHECK(dbenv);
00049 ENV_REQUIRES_CONFIG(dbenv,
00050 dbenv->mp_handle, "memp_sync", DB_INIT_MPOOL);
00051
00052
00053
00054
00055
00056 if (lsnp != NULL)
00057 ENV_REQUIRES_CONFIG(dbenv,
00058 dbenv->lg_handle, "memp_sync", DB_INIT_LOG);
00059
00060 ENV_ENTER(dbenv, ip);
00061 REPLICATION_WRAP(dbenv, (__memp_sync(dbenv, lsnp)), ret);
00062 ENV_LEAVE(dbenv, ip);
00063 return (ret);
00064 }
00065
00066
00067
00068
00069
00070
00071
00072 int
00073 __memp_sync(dbenv, lsnp)
00074 DB_ENV *dbenv;
00075 DB_LSN *lsnp;
00076 {
00077 DB_MPOOL *dbmp;
00078 MPOOL *mp;
00079 int ret;
00080
00081 dbmp = dbenv->mp_handle;
00082 mp = dbmp->reginfo[0].primary;
00083
00084
00085 if (lsnp != NULL) {
00086 MPOOL_SYSTEM_LOCK(dbenv);
00087 if (log_compare(lsnp, &mp->lsn) <= 0) {
00088 *lsnp = mp->lsn;
00089
00090 MPOOL_SYSTEM_UNLOCK(dbenv);
00091 return (0);
00092 }
00093 MPOOL_SYSTEM_UNLOCK(dbenv);
00094 }
00095
00096 if ((ret = __memp_sync_int(dbenv, NULL, 0, DB_SYNC_CACHE, NULL)) != 0)
00097 return (ret);
00098
00099 if (lsnp != NULL) {
00100 MPOOL_SYSTEM_LOCK(dbenv);
00101 if (log_compare(lsnp, &mp->lsn) > 0)
00102 mp->lsn = *lsnp;
00103 MPOOL_SYSTEM_UNLOCK(dbenv);
00104 }
00105
00106 return (0);
00107 }
00108
00109
00110
00111
00112
00113
00114
00115 int
00116 __memp_fsync_pp(dbmfp)
00117 DB_MPOOLFILE *dbmfp;
00118 {
00119 DB_ENV *dbenv;
00120 DB_THREAD_INFO *ip;
00121 int ret;
00122
00123 dbenv = dbmfp->dbenv;
00124
00125 PANIC_CHECK(dbenv);
00126 MPF_ILLEGAL_BEFORE_OPEN(dbmfp, "DB_MPOOLFILE->sync");
00127
00128 ENV_ENTER(dbenv, ip);
00129 REPLICATION_WRAP(dbenv, (__memp_fsync(dbmfp)), ret);
00130 ENV_LEAVE(dbenv, ip);
00131 return (ret);
00132 }
00133
00134
00135
00136
00137
00138
00139
00140 int
00141 __memp_fsync(dbmfp)
00142 DB_MPOOLFILE *dbmfp;
00143 {
00144 MPOOLFILE *mfp;
00145
00146 mfp = dbmfp->mfp;
00147
00148
00149
00150
00151
00152
00153
00154 if (F_ISSET(dbmfp, MP_READONLY))
00155 return (0);
00156
00157 if (F_ISSET(dbmfp->mfp, MP_TEMP) || dbmfp->mfp->no_backing_file)
00158 return (0);
00159
00160 if (mfp->file_written == 0)
00161 return (0);
00162
00163 return (__memp_sync_int(dbmfp->dbenv, dbmfp, 0, DB_SYNC_FILE, NULL));
00164 }
00165
00166
00167
00168
00169
00170
00171
00172 int
00173 __mp_xxx_fh(dbmfp, fhp)
00174 DB_MPOOLFILE *dbmfp;
00175 DB_FH **fhp;
00176 {
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191 if ((*fhp = dbmfp->fhp) != NULL)
00192 return (0);
00193
00194 return (__memp_sync_int(dbmfp->dbenv, dbmfp, 0, DB_SYNC_FILE, NULL));
00195 }
00196
00197
00198
00199
00200
00201
00202
00203
00204 int
00205 __memp_sync_int(dbenv, dbmfp, trickle_max, op, wrotep)
00206 DB_ENV *dbenv;
00207 DB_MPOOLFILE *dbmfp;
00208 u_int32_t trickle_max, *wrotep;
00209 db_sync_op op;
00210 {
00211 BH *bhp;
00212 BH_TRACK *bharray;
00213 DB_MPOOL *dbmp;
00214 DB_MPOOL_HASH *hp;
00215 MPOOL *c_mp, *mp;
00216 MPOOLFILE *mfp;
00217 db_mutex_t mutex;
00218 roff_t last_mf_offset;
00219 u_int32_t ar_cnt, ar_max, i, n_cache, remaining, wrote;
00220 int filecnt, hb_lock, maxopenfd, maxwrite, maxwrite_sleep;
00221 int pass, ret, t_ret, wait_cnt, write_cnt;
00222
00223 dbmp = dbenv->mp_handle;
00224 mp = dbmp->reginfo[0].primary;
00225 last_mf_offset = INVALID_ROFF;
00226 filecnt = pass = wrote = 0;
00227
00228
00229 MPOOL_SYSTEM_LOCK(dbenv);
00230 maxopenfd = mp->mp_maxopenfd;
00231 maxwrite = mp->mp_maxwrite;
00232 maxwrite_sleep = mp->mp_maxwrite_sleep;
00233 MPOOL_SYSTEM_UNLOCK(dbenv);
00234
00235
00236 ar_max = mp->nreg * mp->htab_buckets;
00237 if ((ret =
00238 __os_malloc(dbenv, ar_max * sizeof(BH_TRACK), &bharray)) != 0)
00239 return (ret);
00240
00241
00242
00243
00244
00245
00246 for (ar_cnt = 0, n_cache = 0; n_cache < mp->nreg; ++n_cache) {
00247 c_mp = dbmp->reginfo[n_cache].primary;
00248
00249 hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
00250 for (i = 0; i < c_mp->htab_buckets; i++, hp++) {
00251
00252
00253
00254
00255
00256
00257 if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
00258 continue;
00259
00260 MUTEX_LOCK(dbenv, hp->mtx_hash);
00261 for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
00262 bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
00263
00264 if (bhp->ref == 0 && !F_ISSET(bhp, BH_DIRTY))
00265 continue;
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278 if (op == DB_SYNC_FILE &&
00279 !F_ISSET(bhp, BH_DIRTY))
00280 continue;
00281
00282 mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
00283
00284
00285
00286
00287
00288
00289 if (mfp->no_backing_file ||
00290 F_ISSET(mfp, MP_TEMP))
00291 continue;
00292
00293
00294
00295
00296
00297 if (dbmfp != NULL && mfp != dbmfp->mfp)
00298 continue;
00299
00300
00301
00302
00303
00304 if (dbmfp == NULL && mfp->lsn_off == -1)
00305 continue;
00306
00307
00308 bharray[ar_cnt].track_hp = hp;
00309 bharray[ar_cnt].track_pgno = bhp->pgno;
00310 bharray[ar_cnt].track_off = bhp->mf_offset;
00311 ar_cnt++;
00312
00313
00314
00315
00316
00317
00318
00319 if (ar_cnt >= ar_max) {
00320 if ((ret = __os_realloc(dbenv,
00321 (ar_max * 2) * sizeof(BH_TRACK),
00322 &bharray)) != 0)
00323 break;
00324 ar_max *= 2;
00325 }
00326 }
00327 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00328
00329 if (ret != 0)
00330 goto err;
00331 }
00332 }
00333
00334
00335 if (ar_cnt == 0)
00336 goto done;
00337
00338
00339
00340
00341
00342
00343 if (ar_cnt > 1)
00344 qsort(bharray, ar_cnt, sizeof(BH_TRACK), __bhcmp);
00345
00346
00347
00348
00349
00350 if (op == DB_SYNC_TRICKLE && ar_cnt > trickle_max)
00351 ar_cnt = trickle_max;
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361 if (LOGGING_ON(dbenv) && (ret = __log_flush(dbenv, NULL)) != 0)
00362 goto err;
00363
00364
00365
00366
00367
00368
00369 for (i = pass = write_cnt = 0, remaining = ar_cnt; remaining > 0; ++i) {
00370 if (i >= ar_cnt) {
00371 i = 0;
00372 ++pass;
00373 __os_sleep(dbenv, 1, 0);
00374 }
00375 if ((hp = bharray[i].track_hp) == NULL)
00376 continue;
00377
00378
00379 mutex = hp->mtx_hash;
00380 MUTEX_LOCK(dbenv, mutex);
00381 for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
00382 bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
00383 if (bhp->pgno == bharray[i].track_pgno &&
00384 bhp->mf_offset == bharray[i].track_off)
00385 break;
00386
00387
00388
00389
00390
00391
00392
00393
00394 if (bhp == NULL || (bhp->ref == 0 && !F_ISSET(bhp, BH_DIRTY))) {
00395 MUTEX_UNLOCK(dbenv, mutex);
00396 --remaining;
00397 bharray[i].track_hp = NULL;
00398 continue;
00399 }
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412 if (F_ISSET(bhp, BH_LOCKED) || (bhp->ref != 0 && pass < 2)) {
00413 MUTEX_UNLOCK(dbenv, mutex);
00414 if (op != DB_SYNC_CACHE && op != DB_SYNC_FILE) {
00415 --remaining;
00416 bharray[i].track_hp = NULL;
00417 }
00418 continue;
00419 }
00420
00421
00422
00423
00424
00425
00426
00427 bhp->ref_sync = bhp->ref;
00428
00429
00430 ++bhp->ref;
00431 F_SET(bhp, BH_LOCKED);
00432 MUTEX_LOCK(dbenv, bhp->mtx_bh);
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447 MUTEX_UNLOCK(dbenv, mutex);
00448 for (wait_cnt = 1;
00449 bhp->ref_sync != 0 && wait_cnt < 4; ++wait_cnt)
00450 __os_sleep(dbenv, 1, 0);
00451 MUTEX_LOCK(dbenv, mutex);
00452 hb_lock = 1;
00453
00454
00455
00456
00457
00458 if (maxopenfd != 0 && bhp->mf_offset != last_mf_offset) {
00459 if (++filecnt >= maxopenfd) {
00460 filecnt = 0;
00461 if ((ret = __memp_close_flush_files(
00462 dbenv, dbmp, 1)) != 0)
00463 break;
00464 }
00465 last_mf_offset = bhp->mf_offset;
00466 }
00467
00468
00469
00470
00471
00472 if (bhp->ref_sync == 0) {
00473 --remaining;
00474 bharray[i].track_hp = NULL;
00475 }
00476
00477
00478
00479
00480
00481 if (bhp->ref_sync == 0 && F_ISSET(bhp, BH_DIRTY)) {
00482 hb_lock = 0;
00483 MUTEX_UNLOCK(dbenv, mutex);
00484
00485 mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
00486 if ((ret = __memp_bhwrite(dbmp, hp, mfp, bhp, 1)) == 0)
00487 ++wrote;
00488 else
00489 __db_err(dbenv, "%s: unable to flush page: %lu",
00490 __memp_fns(dbmp, mfp), (u_long)bhp->pgno);
00491
00492
00493
00494
00495
00496 if (maxwrite != 0 && ++write_cnt >= maxwrite) {
00497 write_cnt = 0;
00498 __os_sleep(dbenv, 0, (u_long)maxwrite_sleep);
00499 }
00500 }
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518 if (F_ISSET(bhp, BH_LOCKED)) {
00519 F_CLR(bhp, BH_LOCKED);
00520 MUTEX_UNLOCK(dbenv, bhp->mtx_bh);
00521
00522 if (!hb_lock)
00523 MUTEX_LOCK(dbenv, mutex);
00524 }
00525
00526
00527
00528
00529
00530 bhp->ref_sync = 0;
00531
00532
00533 --bhp->ref;
00534 MUTEX_UNLOCK(dbenv, mutex);
00535
00536 if (ret != 0)
00537 break;
00538 }
00539
00540 done:
00541
00542
00543
00544
00545
00546
00547 if (ret == 0 && (op == DB_SYNC_CACHE || op == DB_SYNC_FILE)) {
00548 if (dbmfp == NULL)
00549 ret = __memp_sync_files(dbenv, dbmp);
00550 else
00551 ret = __os_fsync(dbenv, dbmfp->fhp);
00552 }
00553
00554
00555 if ((t_ret = __memp_close_flush_files(dbenv, dbmp, 0)) != 0 && ret == 0)
00556 ret = t_ret;
00557
00558 err: __os_free(dbenv, bharray);
00559 if (wrotep != NULL)
00560 *wrotep = wrote;
00561
00562 return (ret);
00563 }
00564
00565
00566
00567
00568
00569 static
00570 int __memp_sync_files(dbenv, dbmp)
00571 DB_ENV *dbenv;
00572 DB_MPOOL *dbmp;
00573 {
00574 DB_MPOOLFILE *dbmfp;
00575 MPOOL *mp;
00576 MPOOLFILE *mfp, *next_mfp;
00577 int need_discard_pass, ret, t_ret;
00578
00579 need_discard_pass = ret = 0;
00580 mp = dbmp->reginfo[0].primary;
00581
00582 MPOOL_SYSTEM_LOCK(dbenv);
00583 for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
00584 mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
00585 if (!mfp->file_written || mfp->no_backing_file ||
00586 mfp->deadfile || F_ISSET(mfp, MP_TEMP))
00587 continue;
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608 MUTEX_LOCK(dbenv, mfp->mutex);
00609 if (!mfp->file_written || mfp->deadfile) {
00610 MUTEX_UNLOCK(dbenv, mfp->mutex);
00611 continue;
00612 }
00613 MPOOL_SYSTEM_UNLOCK(dbenv);
00614 ++mfp->mpf_cnt;
00615 MUTEX_UNLOCK(dbenv, mfp->mutex);
00616
00617
00618
00619
00620
00621 MUTEX_LOCK(dbenv, dbmp->mutex);
00622 for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
00623 dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) {
00624 if (dbmfp->mfp != mfp || F_ISSET(dbmfp, MP_READONLY))
00625 continue;
00626
00627
00628
00629
00630
00631 ++dbmfp->ref;
00632 break;
00633 }
00634 MUTEX_UNLOCK(dbenv, dbmp->mutex);
00635
00636
00637 if (dbmfp == NULL) {
00638 if ((t_ret = __memp_mf_sync(dbmp, mfp, 0)) != 0) {
00639 __db_err(dbenv,
00640 "%s: unable to flush: %s", (char *)
00641 R_ADDR(dbmp->reginfo, mfp->path_off),
00642 db_strerror(t_ret));
00643 if (ret == 0)
00644 ret = t_ret;
00645 }
00646 } else {
00647 if ((t_ret =
00648 __os_fsync(dbenv, dbmfp->fhp)) != 0 && ret == 0)
00649 ret = t_ret;
00650
00651 if ((t_ret = __memp_fclose(dbmfp, 0)) != 0 && ret == 0)
00652 ret = t_ret;
00653 }
00654
00655
00656
00657
00658
00659
00660
00661
00662 MPOOL_SYSTEM_LOCK(dbenv);
00663 MUTEX_LOCK(dbenv, mfp->mutex);
00664 --mfp->mpf_cnt;
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680 if (mfp->mpf_cnt == 0 || (mfp->mpf_cnt == 1 &&
00681 dbmfp != NULL && F_ISSET(dbmfp, MP_FLUSH))) {
00682 mfp->file_written = 0;
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692 if (mfp->mpf_cnt == 0 && mfp->block_cnt == 0)
00693 need_discard_pass = 1;
00694 }
00695
00696
00697 MUTEX_UNLOCK(dbenv, mfp->mutex);
00698 }
00699
00700
00701
00702
00703
00704
00705
00706 if (need_discard_pass)
00707 for (mfp = SH_TAILQ_FIRST(
00708 &mp->mpfq, __mpoolfile); mfp != NULL; mfp = next_mfp) {
00709 next_mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile);
00710
00711
00712
00713
00714
00715
00716 if (mfp->block_cnt != 0 || mfp->mpf_cnt != 0)
00717 continue;
00718
00719 MUTEX_LOCK(dbenv, mfp->mutex);
00720 if (mfp->block_cnt == 0 && mfp->mpf_cnt == 0)
00721 (void)__memp_mf_discard(dbmp, mfp);
00722 else
00723 MUTEX_UNLOCK(dbenv, mfp->mutex);
00724 }
00725 MPOOL_SYSTEM_UNLOCK(dbenv);
00726
00727 return (ret);
00728 }
00729
00730
00731
00732
00733
00734
00735
00736 int
00737 __memp_mf_sync(dbmp, mfp, region_locked)
00738 DB_MPOOL *dbmp;
00739 MPOOLFILE *mfp;
00740 int region_locked;
00741 {
00742 DB_ENV *dbenv;
00743 DB_FH *fhp;
00744 int ret, t_ret;
00745 char *rpath;
00746
00747 dbenv = dbmp->dbenv;
00748
00749
00750
00751
00752
00753 if (!region_locked)
00754 MPOOL_SYSTEM_LOCK(dbenv);
00755
00756 if ((ret = __db_appname(dbenv, DB_APP_DATA,
00757 R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) == 0) {
00758 if ((ret = __os_open(dbenv, rpath, 0, 0, &fhp)) == 0) {
00759 ret = __os_fsync(dbenv, fhp);
00760 if ((t_ret =
00761 __os_closehandle(dbenv, fhp)) != 0 && ret == 0)
00762 ret = t_ret;
00763 }
00764 __os_free(dbenv, rpath);
00765 }
00766
00767 if (!region_locked)
00768 MPOOL_SYSTEM_UNLOCK(dbenv);
00769
00770 return (ret);
00771 }
00772
00773
00774
00775
00776
00777 static int
00778 __memp_close_flush_files(dbenv, dbmp, dosync)
00779 DB_ENV *dbenv;
00780 DB_MPOOL *dbmp;
00781 int dosync;
00782 {
00783 DB_MPOOLFILE *dbmfp;
00784 MPOOLFILE *mfp;
00785 int ret;
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800 retry: MUTEX_LOCK(dbenv, dbmp->mutex);
00801 for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
00802 dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
00803 if (F_ISSET(dbmfp, MP_FLUSH)) {
00804 F_CLR(dbmfp, MP_FLUSH);
00805 MUTEX_UNLOCK(dbenv, dbmp->mutex);
00806 if (dosync) {
00807
00808
00809
00810
00811
00812
00813
00814 mfp = dbmfp->mfp;
00815 if (mfp->mpf_cnt == 1) {
00816 MUTEX_LOCK(dbenv, mfp->mutex);
00817 if (mfp->mpf_cnt == 1)
00818 mfp->file_written = 0;
00819 MUTEX_UNLOCK(dbenv, mfp->mutex);
00820 }
00821 if ((ret = __os_fsync(dbenv, dbmfp->fhp)) != 0)
00822 return (ret);
00823 }
00824 if ((ret = __memp_fclose(dbmfp, 0)) != 0)
00825 return (ret);
00826 goto retry;
00827 }
00828 MUTEX_UNLOCK(dbenv, dbmp->mutex);
00829
00830 return (0);
00831 }
00832
00833 static int
00834 __bhcmp(p1, p2)
00835 const void *p1, *p2;
00836 {
00837 BH_TRACK *bhp1, *bhp2;
00838
00839 bhp1 = (BH_TRACK *)p1;
00840 bhp2 = (BH_TRACK *)p2;
00841
00842
00843 if (bhp1->track_off < bhp2->track_off)
00844 return (-1);
00845 if (bhp1->track_off > bhp2->track_off)
00846 return (1);
00847
00848
00849
00850
00851
00852
00853 if (bhp1->track_pgno < bhp2->track_pgno)
00854 return (-1);
00855 if (bhp1->track_pgno > bhp2->track_pgno)
00856 return (1);
00857 return (0);
00858 }