Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

mp_fput.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: mp_fput.c,v 12.7 2005/10/07 20:21:33 ubell Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 
00015 #endif
00016 
00017 #include "db_int.h"
00018 #include "dbinc/db_shash.h"
00019 #include "dbinc/log.h"
00020 #include "dbinc/mp.h"
00021 
00022 static int __memp_reset_lru __P((DB_ENV *, REGINFO *));
00023 
00024 /*
00025  * __memp_fput_pp --
00026  *      DB_MPOOLFILE->put pre/post processing.
00027  *
00028  * PUBLIC: int __memp_fput_pp __P((DB_MPOOLFILE *, void *, u_int32_t));
00029  */
00030 int
00031 __memp_fput_pp(dbmfp, pgaddr, flags)
00032         DB_MPOOLFILE *dbmfp;
00033         void *pgaddr;
00034         u_int32_t flags;
00035 {
00036         DB_ENV *dbenv;
00037         DB_THREAD_INFO *ip;
00038         int ret, t_ret;
00039 
00040         dbenv = dbmfp->dbenv;
00041         PANIC_CHECK(dbenv);
00042 
00043         ENV_ENTER(dbenv, ip);
00044 
00045         ret = __memp_fput(dbmfp, pgaddr, flags);
00046         if (IS_ENV_REPLICATED(dbenv) &&
00047             (t_ret = __op_rep_exit(dbenv)) != 0 && ret == 0)
00048                 ret = t_ret;
00049 
00050         ENV_LEAVE(dbenv, ip);
00051         return (ret);
00052 }
00053 
00054 /*
00055  * __memp_fput --
00056  *      DB_MPOOLFILE->put.
00057  *
00058  * PUBLIC: int __memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t));
00059  */
00060 int
00061 __memp_fput(dbmfp, pgaddr, flags)
00062         DB_MPOOLFILE *dbmfp;
00063         void *pgaddr;
00064         u_int32_t flags;
00065 {
00066         BH *fbhp, *bhp, *prev;
00067         DB_ENV *dbenv;
00068         DB_MPOOL *dbmp;
00069         DB_MPOOL_HASH *hp;
00070         MPOOL *c_mp;
00071         MPOOLFILE *mfp;
00072         u_int32_t n_cache;
00073         int adjust, ret, t_ret;
00074 
00075         dbenv = dbmfp->dbenv;
00076         MPF_ILLEGAL_BEFORE_OPEN(dbmfp, "DB_MPOOLFILE->put");
00077         dbmp = dbenv->mp_handle;
00078         ret = 0;
00079 
00080         /*
00081          * Check arguments, but don't fail because we want to unpin the page
00082          * regardless.  The problem is when running with replication.  There
00083          * is a reference count we incremented when __memp_fget was called,
00084          * and we need to unpin the page and decrement that reference count.
00085          * If we see flag problems, mark the page dirty.
00086          */
00087         if (flags) {
00088                 if (__db_fchk(dbenv, "memp_fput", flags,
00089                     DB_MPOOL_CLEAN | DB_MPOOL_DIRTY | DB_MPOOL_DISCARD) != 0 ||
00090                     __db_fcchk(dbenv, "memp_fput", flags,
00091                     DB_MPOOL_CLEAN, DB_MPOOL_DIRTY) != 0) {
00092                         flags = DB_MPOOL_DIRTY;
00093                         ret = EINVAL;
00094                         DB_ASSERT(0);
00095                 }
00096 
00097                 if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
00098                         __db_err(dbenv,
00099                             "%s: dirty flag set for readonly file page",
00100                             __memp_fn(dbmfp));
00101                         flags = 0;
00102                         ret = EINVAL;
00103                         DB_ASSERT(0);
00104                 }
00105         }
00106 
00107         /*
00108          * If we're mapping the file, there's nothing to do.  Because we can
00109          * stop mapping the file at any time, we have to check on each buffer
00110          * to see if the address we gave the application was part of the map
00111          * region.
00112          */
00113         if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
00114             (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len)
00115                 return (0);
00116 
00117 #ifdef DIAGNOSTIC
00118         /*
00119          * Decrement the per-file pinned buffer count (mapped pages aren't
00120          * counted).
00121          */
00122         MPOOL_SYSTEM_LOCK(dbenv);
00123         if (dbmfp->pinref == 0) {
00124                 MPOOL_SYSTEM_UNLOCK(dbenv);
00125                 __db_err(dbenv,
00126                     "%s: more pages returned than retrieved", __memp_fn(dbmfp));
00127                 return (__db_panic(dbenv, EACCES));
00128         }
00129         --dbmfp->pinref;
00130         MPOOL_SYSTEM_UNLOCK(dbenv);
00131 #endif
00132 
00133         /* Convert a page address to a buffer header and hash bucket. */
00134         bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
00135         n_cache = NCACHE(dbmp->reginfo[0].primary, bhp->mf_offset, bhp->pgno);
00136         c_mp = dbmp->reginfo[n_cache].primary;
00137         hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
00138         hp = &hp[NBUCKET(c_mp, bhp->mf_offset, bhp->pgno)];
00139 
00140         MUTEX_LOCK(dbenv, hp->mtx_hash);
00141 
00142         /* Set/clear the page bits. */
00143         if (LF_ISSET(DB_MPOOL_CLEAN) &&
00144             F_ISSET(bhp, BH_DIRTY) && !F_ISSET(bhp, BH_DIRTY_CREATE)) {
00145                 DB_ASSERT(hp->hash_page_dirty != 0);
00146                 --hp->hash_page_dirty;
00147                 F_CLR(bhp, BH_DIRTY);
00148         }
00149         if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
00150                 ++hp->hash_page_dirty;
00151                 F_SET(bhp, BH_DIRTY);
00152         }
00153         if (LF_ISSET(DB_MPOOL_DISCARD))
00154                 F_SET(bhp, BH_DISCARD);
00155 
00156         /*
00157          * Check for a reference count going to zero.  This can happen if the
00158          * application returns a page twice.
00159          */
00160         if (bhp->ref == 0) {
00161                 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00162                 __db_err(dbenv, "%s: page %lu: unpinned page returned",
00163                     __memp_fn(dbmfp), (u_long)bhp->pgno);
00164                 return (__db_panic(dbenv, EACCES));
00165         }
00166 
00167         /* Note the activity so allocation won't decide to quit. */
00168         ++c_mp->put_counter;
00169 
00170         /*
00171          * Mark the file dirty.  Check for a dirty bit on the buffer as well
00172          * as the dirty flag because the buffer might have been marked dirty
00173          * in the DB_MPOOLFILE->set method.
00174          */
00175         mfp = dbmfp->mfp;
00176         if (LF_ISSET(DB_MPOOL_DIRTY) || F_ISSET(bhp, BH_DIRTY))
00177                 mfp->file_written = 1;
00178 
00179         /*
00180          * If more than one reference to the page or a reference other than a
00181          * thread waiting to flush the buffer to disk, we're done.  Ignore the
00182          * discard flags (for now) and leave the buffer's priority alone.
00183          */
00184         if (--bhp->ref > 1 || (bhp->ref == 1 && !F_ISSET(bhp, BH_LOCKED))) {
00185                 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00186                 return (0);
00187         }
00188 
00189         /* Update priority values. */
00190         if (F_ISSET(bhp, BH_DISCARD) || mfp->priority == MPOOL_PRI_VERY_LOW)
00191                 bhp->priority = 0;
00192         else {
00193                 /*
00194                  * We don't lock the LRU counter or the stat.st_pages field, if
00195                  * we get garbage (which won't happen on a 32-bit machine), it
00196                  * only means a buffer has the wrong priority.
00197                  */
00198                 bhp->priority = c_mp->lru_count;
00199 
00200                 adjust = 0;
00201                 if (mfp->priority != 0)
00202                         adjust =
00203                             (int)c_mp->stat.st_pages / mfp->priority;
00204                 if (F_ISSET(bhp, BH_DIRTY))
00205                         adjust += c_mp->stat.st_pages / MPOOL_PRI_DIRTY;
00206 
00207                 if (adjust > 0) {
00208                         if (UINT32_MAX - bhp->priority >= (u_int32_t)adjust)
00209                                 bhp->priority += adjust;
00210                 } else if (adjust < 0)
00211                         if (bhp->priority > (u_int32_t)-adjust)
00212                                 bhp->priority += adjust;
00213         }
00214 
00215         /*
00216          * Buffers on hash buckets are sorted by priority -- move the buffer
00217          * to the correct position in the list.
00218          */
00219         if ((fbhp =
00220              SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) ==
00221              SH_TAILQ_LAST(&hp->hash_bucket, hq, __bh))
00222                 goto done;
00223 
00224         if (fbhp == bhp)
00225                 fbhp = SH_TAILQ_NEXT(fbhp, hq, __bh);
00226         SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
00227 
00228         for (prev = NULL; fbhp != NULL;
00229             prev = fbhp, fbhp = SH_TAILQ_NEXT(fbhp, hq, __bh))
00230                 if (fbhp->priority > bhp->priority)
00231                         break;
00232         if (prev == NULL)
00233                 SH_TAILQ_INSERT_HEAD(&hp->hash_bucket, bhp, hq, __bh);
00234         else
00235                 SH_TAILQ_INSERT_AFTER(&hp->hash_bucket, prev, bhp, hq, __bh);
00236 
00237 done:
00238         /* Reset the hash bucket's priority. */
00239         hp->hash_priority = SH_TAILQ_FIRSTP(&hp->hash_bucket, __bh)->priority;
00240 
00241 #ifdef DIAGNOSTIC
00242         __memp_check_order(hp);
00243 #endif
00244 
00245         /*
00246          * The sync code has a separate counter for buffers on which it waits.
00247          * It reads that value without holding a lock so we update it as the
00248          * last thing we do.  Once that value goes to 0, we won't see another
00249          * reference to that buffer being returned to the cache until the sync
00250          * code has finished, so we're safe as long as we don't let the value
00251          * go to 0 before we finish with the buffer.
00252          */
00253         if (F_ISSET(bhp, BH_LOCKED) && bhp->ref_sync != 0)
00254                 --bhp->ref_sync;
00255 
00256         MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00257 
00258         /*
00259          * On every buffer put we update the buffer generation number and check
00260          * for wraparound.
00261          */
00262         if (++c_mp->lru_count == UINT32_MAX)
00263                 if ((t_ret =
00264                     __memp_reset_lru(dbenv, dbmp->reginfo)) != 0 && ret == 0)
00265                         ret = t_ret;
00266 
00267         return (ret);
00268 }
00269 
00270 /*
00271  * __memp_reset_lru --
00272  *      Reset the cache LRU counter.
00273  */
00274 static int
00275 __memp_reset_lru(dbenv, infop)
00276         DB_ENV *dbenv;
00277         REGINFO *infop;
00278 {
00279         BH *bhp;
00280         DB_MPOOL_HASH *hp;
00281         MPOOL *c_mp;
00282         u_int32_t bucket;
00283 
00284         c_mp = infop->primary;
00285 
00286         /*
00287          * Update the counter so all future allocations will start at the
00288          * bottom.
00289          */
00290         c_mp->lru_count -= MPOOL_BASE_DECREMENT;
00291 
00292         /* Adjust the priority of every buffer in the system. */
00293         for (hp = R_ADDR(infop, c_mp->htab),
00294             bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
00295                 /*
00296                  * Skip empty buckets.
00297                  *
00298                  * We can check for empty buckets before locking as we
00299                  * only care if the pointer is zero or non-zero.
00300                  */
00301                 if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
00302                         continue;
00303 
00304                 MUTEX_LOCK(dbenv, hp->mtx_hash);
00305                 for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
00306                     bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
00307                         if (bhp->priority != UINT32_MAX &&
00308                             bhp->priority > MPOOL_BASE_DECREMENT)
00309                                 bhp->priority -= MPOOL_BASE_DECREMENT;
00310                 MUTEX_UNLOCK(dbenv, hp->mtx_hash);
00311         }
00312 
00313         return (0);
00314 }

Generated on Sun Dec 25 12:14:41 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2