Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

hash_stat.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: hash_stat.c,v 12.1 2005/06/16 20:22:53 bostic Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 
00015 #include <string.h>
00016 #endif
00017 
00018 #include "db_int.h"
00019 #include "dbinc/db_page.h"
00020 #include "dbinc/db_shash.h"
00021 #include "dbinc/btree.h"
00022 #include "dbinc/hash.h"
00023 #include "dbinc/mp.h"
00024 
00025 #ifdef HAVE_STATISTICS
00026 static int __ham_stat_callback __P((DB *, PAGE *, void *, int *));
00027 
00028 /*
00029  * __ham_stat --
00030  *      Gather/print the hash statistics
00031  *
00032  * PUBLIC: int __ham_stat __P((DBC *, void *, u_int32_t));
00033  */
00034 int
00035 __ham_stat(dbc, spp, flags)
00036         DBC *dbc;
00037         void *spp;
00038         u_int32_t flags;
00039 {
00040         DB *dbp;
00041         DB_ENV *dbenv;
00042         DB_HASH_STAT *sp;
00043         DB_MPOOLFILE *mpf;
00044         HASH_CURSOR *hcp;
00045         PAGE *h;
00046         db_pgno_t pgno;
00047         int ret;
00048 
00049         dbp = dbc->dbp;
00050         dbenv = dbp->dbenv;
00051 
00052         mpf = dbp->mpf;
00053         sp = NULL;
00054 
00055         hcp = (HASH_CURSOR *)dbc->internal;
00056 
00057         if ((ret = __ham_get_meta(dbc)) != 0)
00058                 goto err;
00059 
00060         /* Allocate and clear the structure. */
00061         if ((ret = __os_umalloc(dbenv, sizeof(*sp), &sp)) != 0)
00062                 goto err;
00063         memset(sp, 0, sizeof(*sp));
00064         /* Copy the fields that we have. */
00065         sp->hash_nkeys = hcp->hdr->dbmeta.key_count;
00066         sp->hash_ndata = hcp->hdr->dbmeta.record_count;
00067         sp->hash_pagesize = dbp->pgsize;
00068         sp->hash_buckets = hcp->hdr->max_bucket + 1;
00069         sp->hash_magic = hcp->hdr->dbmeta.magic;
00070         sp->hash_version = hcp->hdr->dbmeta.version;
00071         sp->hash_metaflags = hcp->hdr->dbmeta.flags;
00072         sp->hash_ffactor = hcp->hdr->ffactor;
00073 
00074         if (flags == DB_FAST_STAT || flags == DB_CACHED_COUNTS)
00075                 goto done;
00076 
00077         /* Walk the free list, counting pages. */
00078         for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free;
00079             pgno != PGNO_INVALID;) {
00080                 ++sp->hash_free;
00081 
00082                 if ((ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
00083                         goto err;
00084 
00085                 pgno = h->next_pgno;
00086                 (void)__memp_fput(mpf, h, 0);
00087         }
00088 
00089         /* Now traverse the rest of the table. */
00090         sp->hash_nkeys = 0;
00091         sp->hash_ndata = 0;
00092         if ((ret = __ham_traverse(dbc,
00093             DB_LOCK_READ, __ham_stat_callback, sp, 0)) != 0)
00094                 goto err;
00095 
00096         if (!F_ISSET(dbp, DB_AM_RDONLY)) {
00097                 if ((ret = __ham_dirty_meta(dbc)) != 0)
00098                         goto err;
00099                 hcp->hdr->dbmeta.key_count = sp->hash_nkeys;
00100                 hcp->hdr->dbmeta.record_count = sp->hash_ndata;
00101         }
00102 
00103 done:   if ((ret = __ham_release_meta(dbc)) != 0)
00104                 goto err;
00105 
00106         *(DB_HASH_STAT **)spp = sp;
00107         return (0);
00108 
00109 err:    if (sp != NULL)
00110                 __os_ufree(dbenv, sp);
00111 
00112         if (hcp->hdr != NULL)
00113                 (void)__ham_release_meta(dbc);
00114 
00115         return (ret);
00116 }
00117 
00118 /*
00119  * __ham_stat_print --
00120  *      Display hash statistics.
00121  *
00122  * PUBLIC: int __ham_stat_print __P((DBC *, u_int32_t));
00123  */
00124 int
00125 __ham_stat_print(dbc, flags)
00126         DBC *dbc;
00127         u_int32_t flags;
00128 {
00129         static const FN fn[] = {
00130                 { DB_HASH_DUP,          "duplicates" },
00131                 { DB_HASH_SUBDB,        "multiple-databases" },
00132                 { DB_HASH_DUPSORT,      "sorted duplicates" },
00133                 { 0,                    NULL }
00134         };
00135         DB *dbp;
00136         DB_ENV *dbenv;
00137         DB_HASH_STAT *sp;
00138         int lorder, ret;
00139         const char *s;
00140 
00141         dbp = dbc->dbp;
00142         dbenv = dbp->dbenv;
00143 
00144         if ((ret = __ham_stat(dbc, &sp, 0)) != 0)
00145                 return (ret);
00146 
00147         if (LF_ISSET(DB_STAT_ALL)) {
00148                 __db_msg(dbenv, "%s", DB_GLOBAL(db_line));
00149                 __db_msg(dbenv, "Default Hash database information:");
00150         }
00151         __db_msg(dbenv, "%lx\tHash magic number", (u_long)sp->hash_magic);
00152         __db_msg(dbenv,
00153             "%lu\tHash version number", (u_long)sp->hash_version);
00154         (void)__db_get_lorder(dbp, &lorder);
00155         switch (lorder) {
00156         case 1234:
00157                 s = "Little-endian";
00158                 break;
00159         case 4321:
00160                 s = "Big-endian";
00161                 break;
00162         default:
00163                 s = "Unrecognized byte order";
00164                 break;
00165         }
00166         __db_msg(dbenv, "%s\tByte order", s);
00167         __db_prflags(dbenv, NULL, sp->hash_metaflags, fn, NULL, "\tFlags");
00168         __db_dl(dbenv,
00169             "Underlying database page size", (u_long)sp->hash_pagesize);
00170         __db_dl(dbenv, "Specified fill factor", (u_long)sp->hash_ffactor);
00171         __db_dl(dbenv,
00172             "Number of keys in the database", (u_long)sp->hash_nkeys);
00173         __db_dl(dbenv,
00174             "Number of data items in the database", (u_long)sp->hash_ndata);
00175 
00176         __db_dl(dbenv, "Number of hash buckets", (u_long)sp->hash_buckets);
00177         __db_dl_pct(dbenv, "Number of bytes free on bucket pages",
00178             (u_long)sp->hash_bfree, DB_PCT_PG(
00179             sp->hash_bfree, sp->hash_buckets, sp->hash_pagesize), "ff");
00180 
00181         __db_dl(dbenv,
00182             "Number of overflow pages", (u_long)sp->hash_bigpages);
00183         __db_dl_pct(dbenv, "Number of bytes free in overflow pages",
00184             (u_long)sp->hash_big_bfree, DB_PCT_PG(
00185             sp->hash_big_bfree, sp->hash_bigpages, sp->hash_pagesize), "ff");
00186 
00187         __db_dl(dbenv,
00188             "Number of bucket overflow pages", (u_long)sp->hash_overflows);
00189         __db_dl_pct(dbenv,
00190             "Number of bytes free in bucket overflow pages",
00191             (u_long)sp->hash_ovfl_free, DB_PCT_PG(
00192             sp->hash_ovfl_free, sp->hash_overflows, sp->hash_pagesize), "ff");
00193 
00194         __db_dl(dbenv, "Number of duplicate pages", (u_long)sp->hash_dup);
00195         __db_dl_pct(dbenv, "Number of bytes free in duplicate pages",
00196             (u_long)sp->hash_dup_free, DB_PCT_PG(
00197             sp->hash_dup_free, sp->hash_dup, sp->hash_pagesize), "ff");
00198 
00199         __db_dl(dbenv,
00200             "Number of pages on the free list", (u_long)sp->hash_free);
00201 
00202         __os_ufree(dbenv, sp);
00203 
00204         return (0);
00205 }
00206 
00207 static int
00208 __ham_stat_callback(dbp, pagep, cookie, putp)
00209         DB *dbp;
00210         PAGE *pagep;
00211         void *cookie;
00212         int *putp;
00213 {
00214         DB_HASH_STAT *sp;
00215         DB_BTREE_STAT bstat;
00216         db_indx_t indx, len, off, tlen, top;
00217         u_int8_t *hk;
00218         int ret;
00219 
00220         *putp = 0;
00221         sp = cookie;
00222 
00223         switch (pagep->type) {
00224         case P_INVALID:
00225                 /*
00226                  * Hash pages may be wholly zeroed;  this is not a bug.
00227                  * Obviously such pages have no data, so we can just proceed.
00228                  */
00229                 break;
00230         case P_HASH:
00231                 /*
00232                  * We count the buckets and the overflow pages
00233                  * separately and tally their bytes separately
00234                  * as well.  We need to figure out if this page
00235                  * is a bucket.
00236                  */
00237                 if (PREV_PGNO(pagep) == PGNO_INVALID)
00238                         sp->hash_bfree += P_FREESPACE(dbp, pagep);
00239                 else {
00240                         sp->hash_overflows++;
00241                         sp->hash_ovfl_free += P_FREESPACE(dbp, pagep);
00242                 }
00243                 top = NUM_ENT(pagep);
00244                 /* Correct for on-page duplicates and deleted items. */
00245                 for (indx = 0; indx < top; indx += P_INDX) {
00246                         switch (*H_PAIRDATA(dbp, pagep, indx)) {
00247                         case H_OFFDUP:
00248                                 break;
00249                         case H_OFFPAGE:
00250                         case H_KEYDATA:
00251                                 sp->hash_ndata++;
00252                                 break;
00253                         case H_DUPLICATE:
00254                                 tlen = LEN_HDATA(dbp, pagep, 0, indx);
00255                                 hk = H_PAIRDATA(dbp, pagep, indx);
00256                                 for (off = 0; off < tlen;
00257                                     off += len + 2 * sizeof(db_indx_t)) {
00258                                         sp->hash_ndata++;
00259                                         memcpy(&len,
00260                                             HKEYDATA_DATA(hk)
00261                                             + off, sizeof(db_indx_t));
00262                                 }
00263                                 break;
00264                         default:
00265                                 return (__db_pgfmt(dbp->dbenv, PGNO(pagep)));
00266                         }
00267                 }
00268                 sp->hash_nkeys += H_NUMPAIRS(pagep);
00269                 break;
00270         case P_IBTREE:
00271         case P_IRECNO:
00272         case P_LBTREE:
00273         case P_LRECNO:
00274         case P_LDUP:
00275                 /*
00276                  * These are all btree pages; get a correct
00277                  * cookie and call them.  Then add appropriate
00278                  * fields into our stat structure.
00279                  */
00280                 memset(&bstat, 0, sizeof(bstat));
00281                 if ((ret = __bam_stat_callback(dbp, pagep, &bstat, putp)) != 0)
00282                         return (ret);
00283                 sp->hash_dup++;
00284                 sp->hash_dup_free += bstat.bt_leaf_pgfree +
00285                     bstat.bt_dup_pgfree + bstat.bt_int_pgfree;
00286                 sp->hash_ndata += bstat.bt_ndata;
00287                 break;
00288         case P_OVERFLOW:
00289                 sp->hash_bigpages++;
00290                 sp->hash_big_bfree += P_OVFLSPACE(dbp, dbp->pgsize, pagep);
00291                 break;
00292         default:
00293                 return (__db_pgfmt(dbp->dbenv, PGNO(pagep)));
00294         }
00295 
00296         return (0);
00297 }
00298 
00299 /*
00300  * __ham_print_cursor --
00301  *      Display the current cursor.
00302  *
00303  * PUBLIC: void __ham_print_cursor __P((DBC *));
00304  */
00305 void
00306 __ham_print_cursor(dbc)
00307         DBC *dbc;
00308 {
00309         static const FN fn[] = {
00310                 { H_CONTINUE,   "H_CONTINUE" },
00311                 { H_DELETED,    "H_DELETED" },
00312                 { H_DIRTY,      "H_DIRTY" },
00313                 { H_DUPONLY,    "H_DUPONLY" },
00314                 { H_EXPAND,     "H_EXPAND" },
00315                 { H_ISDUP,      "H_ISDUP" },
00316                 { H_NEXT_NODUP, "H_NEXT_NODUP" },
00317                 { H_NOMORE,     "H_NOMORE" },
00318                 { H_OK,         "H_OK" },
00319                 { 0,            NULL }
00320         };
00321         DB_ENV *dbenv;
00322         HASH_CURSOR *cp;
00323 
00324         dbenv = dbc->dbp->dbenv;
00325         cp = (HASH_CURSOR *)dbc->internal;
00326 
00327         STAT_ULONG("Bucket traversing", cp->bucket);
00328         STAT_ULONG("Bucket locked", cp->lbucket);
00329         STAT_ULONG("Duplicate set offset", cp->dup_off);
00330         STAT_ULONG("Current duplicate length", cp->dup_len);
00331         STAT_ULONG("Total duplicate set length", cp->dup_tlen);
00332         STAT_ULONG("Bytes needed for add", cp->seek_size);
00333         STAT_ULONG("Page on which we can insert", cp->seek_found_page);
00334         STAT_ULONG("Order", cp->order);
00335         __db_prflags(dbenv, NULL, cp->flags, fn, NULL, "\tInternal Flags");
00336 }
00337 
00338 #else /* !HAVE_STATISTICS */
00339 
00340 int
00341 __ham_stat(dbc, spp, flags)
00342         DBC *dbc;
00343         void *spp;
00344         u_int32_t flags;
00345 {
00346         COMPQUIET(spp, NULL);
00347         COMPQUIET(flags, 0);
00348 
00349         return (__db_stat_not_built(dbc->dbp->dbenv));
00350 }
00351 #endif
00352 
00353 /*
00354  * __ham_traverse
00355  *       Traverse an entire hash table.  We use the callback so that we
00356  * can use this both for stat collection and for deallocation.
00357  *
00358  * PUBLIC: int __ham_traverse __P((DBC *, db_lockmode_t,
00359  * PUBLIC:     int (*)(DB *, PAGE *, void *, int *), void *, int));
00360  */
00361 int
00362 __ham_traverse(dbc, mode, callback, cookie, look_past_max)
00363         DBC *dbc;
00364         db_lockmode_t mode;
00365         int (*callback) __P((DB *, PAGE *, void *, int *));
00366         void *cookie;
00367         int look_past_max;
00368 {
00369         DB *dbp;
00370         DBC *opd;
00371         DB_MPOOLFILE *mpf;
00372         HASH_CURSOR *hcp;
00373         HKEYDATA *hk;
00374         db_pgno_t pgno, opgno;
00375         int did_put, i, ret, t_ret;
00376         u_int32_t bucket, spares_entry;
00377 
00378         dbp = dbc->dbp;
00379         opd = NULL;
00380         mpf = dbp->mpf;
00381         hcp = (HASH_CURSOR *)dbc->internal;
00382         ret = 0;
00383 
00384         /*
00385          * In a perfect world, we could simply read each page in the file
00386          * and look at its page type to tally the information necessary.
00387          * Unfortunately, the bucket locking that hash tables do to make
00388          * locking easy, makes this a pain in the butt.  We have to traverse
00389          * duplicate, overflow and big pages from the bucket so that we
00390          * don't access anything that isn't properly locked.
00391          *
00392          */
00393         for (bucket = 0;; bucket++) {
00394                 /*
00395                  * We put the loop exit condition check here, because
00396                  * it made for a really vile extended ?: that made SCO's
00397                  * compiler drop core.
00398                  *
00399                  * If look_past_max is not set, we can stop at max_bucket;
00400                  * if it is set, we need to include pages that are part of
00401                  * the current doubling but beyond the highest bucket we've
00402                  * split into, as well as pages from a "future" doubling
00403                  * that may have been created within an aborted
00404                  * transaction.  To do this, keep looping (and incrementing
00405                  * bucket) until the corresponding spares array entries
00406                  * cease to be defined.
00407                  */
00408                 if (look_past_max) {
00409                         spares_entry = __db_log2(bucket + 1);
00410                         if (spares_entry >= NCACHED ||
00411                             hcp->hdr->spares[spares_entry] == 0)
00412                                 break;
00413                 } else {
00414                         if (bucket > hcp->hdr->max_bucket)
00415                                 break;
00416                 }
00417 
00418                 hcp->bucket = bucket;
00419                 hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket);
00420                 for (ret = __ham_get_cpage(dbc, mode); ret == 0;
00421                     ret = __ham_next_cpage(dbc, pgno, 0)) {
00422 
00423                         /*
00424                          * If we are cleaning up pages past the max_bucket,
00425                          * then they may be on the free list and have their
00426                          * next pointers set, but they should be ignored.  In
00427                          * fact, we really ought to just skip anybody who is
00428                          * not a valid page.
00429                          */
00430                         if (TYPE(hcp->page) == P_INVALID)
00431                                 break;
00432                         pgno = NEXT_PGNO(hcp->page);
00433 
00434                         /*
00435                          * Go through each item on the page checking for
00436                          * duplicates (in which case we have to count the
00437                          * duplicate pages) or big key/data items (in which
00438                          * case we have to count those pages).
00439                          */
00440                         for (i = 0; i < NUM_ENT(hcp->page); i++) {
00441                                 hk = (HKEYDATA *)P_ENTRY(dbp, hcp->page, i);
00442                                 switch (HPAGE_PTYPE(hk)) {
00443                                 case H_OFFDUP:
00444                                         memcpy(&opgno, HOFFDUP_PGNO(hk),
00445                                             sizeof(db_pgno_t));
00446                                         if ((ret = __db_c_newopd(dbc,
00447                                             opgno, NULL, &opd)) != 0)
00448                                                 return (ret);
00449                                         if ((ret = __bam_traverse(opd,
00450                                             DB_LOCK_READ, opgno,
00451                                             callback, cookie))
00452                                             != 0)
00453                                                 goto err;
00454                                         if ((ret = __db_c_close(opd)) != 0)
00455                                                 return (ret);
00456                                         opd = NULL;
00457                                         break;
00458                                 case H_OFFPAGE:
00459                                         /*
00460                                          * We are about to get a big page
00461                                          * which will use the same spot that
00462                                          * the current page uses, so we need
00463                                          * to restore the current page before
00464                                          * looking at it again.
00465                                          */
00466                                         memcpy(&opgno, HOFFPAGE_PGNO(hk),
00467                                             sizeof(db_pgno_t));
00468                                         if ((ret = __db_traverse_big(dbp,
00469                                             opgno, callback, cookie)) != 0)
00470                                                 goto err;
00471                                         break;
00472                                 case H_KEYDATA:
00473                                 case H_DUPLICATE:
00474                                         break;
00475                                 default:
00476                                         DB_ASSERT(0);
00477                                         ret = EINVAL;
00478                                         goto err;
00479 
00480                                 }
00481                         }
00482 
00483                         /* Call the callback on main pages. */
00484                         if ((ret = callback(dbp,
00485                             hcp->page, cookie, &did_put)) != 0)
00486                                 goto err;
00487 
00488                         if (did_put)
00489                                 hcp->page = NULL;
00490                         if (pgno == PGNO_INVALID)
00491                                 break;
00492                 }
00493                 if (ret != 0)
00494                         goto err;
00495 
00496                 if (hcp->page != NULL) {
00497                         if ((ret = __memp_fput(mpf, hcp->page, 0)) != 0)
00498                                 return (ret);
00499                         hcp->page = NULL;
00500                 }
00501 
00502         }
00503 err:    if (opd != NULL &&
00504             (t_ret = __db_c_close(opd)) != 0 && ret == 0)
00505                 ret = t_ret;
00506         return (ret);
00507 }

Generated on Sun Dec 25 12:14:30 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2