Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

hash_verify.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1999-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: hash_verify.c,v 12.8 2005/06/16 20:22:54 bostic Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 
00015 #include <string.h>
00016 #endif
00017 
00018 #include "db_int.h"
00019 #include "dbinc/db_page.h"
00020 #include "dbinc/db_shash.h"
00021 #include "dbinc/db_verify.h"
00022 #include "dbinc/btree.h"
00023 #include "dbinc/hash.h"
00024 #include "dbinc/mp.h"
00025 
00026 static int __ham_dups_unsorted __P((DB *, u_int8_t *, u_int32_t));
00027 static int __ham_vrfy_bucket __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t,
00028     u_int32_t));
00029 static int __ham_vrfy_item __P((DB *,
00030     VRFY_DBINFO *, db_pgno_t, PAGE *, u_int32_t, u_int32_t));
00031 
00032 /*
00033  * __ham_vrfy_meta --
00034  *      Verify the hash-specific part of a metadata page.
00035  *
00036  *      Note that unlike btree, we don't save things off, because we
00037  *      will need most everything again to verify each page and the
00038  *      amount of state here is significant.
00039  *
00040  * PUBLIC: int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *,
00041  * PUBLIC:     db_pgno_t, u_int32_t));
00042  */
00043 int
00044 __ham_vrfy_meta(dbp, vdp, m, pgno, flags)
00045         DB *dbp;
00046         VRFY_DBINFO *vdp;
00047         HMETA *m;
00048         db_pgno_t pgno;
00049         u_int32_t flags;
00050 {
00051         HASH *hashp;
00052         VRFY_PAGEINFO *pip;
00053         int i, ret, t_ret, isbad;
00054         u_int32_t pwr, mbucket;
00055         u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t));
00056 
00057         if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00058                 return (ret);
00059         isbad = 0;
00060 
00061         hashp = dbp->h_internal;
00062 
00063         if (hashp != NULL && hashp->h_hash != NULL)
00064                 hfunc = hashp->h_hash;
00065         else
00066                 hfunc = __ham_func5;
00067 
00068         /*
00069          * If we haven't already checked the common fields in pagezero,
00070          * check them.
00071          */
00072         if (!F_ISSET(pip, VRFY_INCOMPLETE) &&
00073             (ret = __db_vrfy_meta(dbp, vdp, &m->dbmeta, pgno, flags)) != 0) {
00074                 if (ret == DB_VERIFY_BAD)
00075                         isbad = 1;
00076                 else
00077                         goto err;
00078         }
00079 
00080         /* h_charkey */
00081         if (!LF_ISSET(DB_NOORDERCHK))
00082                 if (m->h_charkey != hfunc(dbp, CHARKEY, sizeof(CHARKEY))) {
00083                         EPRINT((dbp->dbenv,
00084 "Page %lu: database has custom hash function; reverify with DB_NOORDERCHK set",
00085                             (u_long)pgno));
00086                         /*
00087                          * Return immediately;  this is probably a sign of user
00088                          * error rather than database corruption, so we want to
00089                          * avoid extraneous errors.
00090                          */
00091                         isbad = 1;
00092                         goto err;
00093                 }
00094 
00095         /* max_bucket must be less than the last pgno. */
00096         if (m->max_bucket > vdp->last_pgno) {
00097                 EPRINT((dbp->dbenv,
00098                     "Page %lu: Impossible max_bucket %lu on meta page",
00099                     (u_long)pgno, (u_long)m->max_bucket));
00100                 /*
00101                  * Most other fields depend somehow on max_bucket, so
00102                  * we just return--there will be lots of extraneous
00103                  * errors.
00104                  */
00105                 isbad = 1;
00106                 goto err;
00107         }
00108 
00109         /*
00110          * max_bucket, high_mask and low_mask: high_mask must be one
00111          * less than the next power of two above max_bucket, and
00112          * low_mask must be one less than the power of two below it.
00113          */
00114         pwr = (m->max_bucket == 0) ? 1 : 1 << __db_log2(m->max_bucket + 1);
00115         if (m->high_mask != pwr - 1) {
00116                 EPRINT((dbp->dbenv,
00117                     "Page %lu: incorrect high_mask %lu, should be %lu",
00118                     (u_long)pgno, (u_long)m->high_mask, (u_long)pwr - 1));
00119                 isbad = 1;
00120         }
00121         pwr >>= 1;
00122         if (m->low_mask != pwr - 1) {
00123                 EPRINT((dbp->dbenv,
00124                     "Page %lu: incorrect low_mask %lu, should be %lu",
00125                     (u_long)pgno, (u_long)m->low_mask, (u_long)pwr - 1));
00126                 isbad = 1;
00127         }
00128 
00129         /* ffactor: no check possible. */
00130         pip->h_ffactor = m->ffactor;
00131 
00132         /*
00133          * nelem: just make sure it's not astronomical for now. This is the
00134          * same check that hash_upgrade does, since there was a bug in 2.X
00135          * which could make nelem go "negative".
00136          */
00137         if (m->nelem > 0x80000000) {
00138                 EPRINT((dbp->dbenv,
00139                     "Page %lu: suspiciously high nelem of %lu",
00140                     (u_long)pgno, (u_long)m->nelem));
00141                 isbad = 1;
00142                 pip->h_nelem = 0;
00143         } else
00144                 pip->h_nelem = m->nelem;
00145 
00146         /* flags */
00147         if (F_ISSET(&m->dbmeta, DB_HASH_DUP))
00148                 F_SET(pip, VRFY_HAS_DUPS);
00149         if (F_ISSET(&m->dbmeta, DB_HASH_DUPSORT))
00150                 F_SET(pip, VRFY_HAS_DUPSORT);
00151         /* XXX: Why is the DB_HASH_SUBDB flag necessary? */
00152 
00153         /* spares array */
00154         for (i = 0; m->spares[i] != 0 && i < NCACHED; i++) {
00155                 /*
00156                  * We set mbucket to the maximum bucket that would use a given
00157                  * spares entry;  we want to ensure that it's always less
00158                  * than last_pgno.
00159                  */
00160                 mbucket = (1 << i) - 1;
00161                 if (BS_TO_PAGE(mbucket, m->spares) > vdp->last_pgno) {
00162                         EPRINT((dbp->dbenv,
00163                             "Page %lu: spares array entry %d is invalid",
00164                             (u_long)pgno, i));
00165                         isbad = 1;
00166                 }
00167         }
00168 
00169 err:    if ((t_ret =
00170             __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
00171                 ret = t_ret;
00172         if (LF_ISSET(DB_SALVAGE) &&
00173            (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0)
00174                 ret = t_ret;
00175         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
00176 }
00177 
00178 /*
00179  * __ham_vrfy --
00180  *      Verify hash page.
00181  *
00182  * PUBLIC: int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
00183  * PUBLIC:     u_int32_t));
00184  */
00185 int
00186 __ham_vrfy(dbp, vdp, h, pgno, flags)
00187         DB *dbp;
00188         VRFY_DBINFO *vdp;
00189         PAGE *h;
00190         db_pgno_t pgno;
00191         u_int32_t flags;
00192 {
00193         VRFY_PAGEINFO *pip;
00194         u_int32_t ent, himark, inpend;
00195         db_indx_t *inp;
00196         int isbad, ret, t_ret;
00197 
00198         isbad = 0;
00199         if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00200                 return (ret);
00201 
00202         if (TYPE(h) != P_HASH) {
00203                 TYPE_ERR_PRINT(dbp->dbenv, "__ham_vrfy", pgno, TYPE(h));
00204                 DB_ASSERT(0);
00205                 ret = EINVAL;
00206                 goto err;
00207         }
00208 
00209         /* Verify and save off fields common to all PAGEs. */
00210         if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
00211                 if (ret == DB_VERIFY_BAD)
00212                         isbad = 1;
00213                 else
00214                         goto err;
00215         }
00216 
00217         /*
00218          * Verify inp[].  Each offset from 0 to NUM_ENT(h) must be lower
00219          * than the previous one, higher than the current end of the inp array,
00220          * and lower than the page size.
00221          *
00222          * In any case, we return immediately if things are bad, as it would
00223          * be unsafe to proceed.
00224          */
00225         inp = P_INP(dbp, h);
00226         for (ent = 0, himark = dbp->pgsize,
00227             inpend = (u_int32_t)((u_int8_t *)inp - (u_int8_t *)h);
00228             ent < NUM_ENT(h); ent++)
00229                 if (inp[ent] >= himark) {
00230                         EPRINT((dbp->dbenv,
00231                             "Page %lu: item %lu is out of order or nonsensical",
00232                             (u_long)pgno, (u_long)ent));
00233                         isbad = 1;
00234                         goto err;
00235                 } else if (inpend >= himark) {
00236                         EPRINT((dbp->dbenv,
00237                             "Page %lu: entries array collided with data",
00238                             (u_long)pgno));
00239                         isbad = 1;
00240                         goto err;
00241 
00242                 } else {
00243                         himark = inp[ent];
00244                         inpend += sizeof(db_indx_t);
00245                         if ((ret = __ham_vrfy_item(
00246                             dbp, vdp, pgno, h, ent, flags)) != 0)
00247                                 goto err;
00248                 }
00249 
00250 err:    if ((t_ret =
00251             __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
00252                 ret = t_ret;
00253         return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret);
00254 }
00255 
00256 /*
00257  * __ham_vrfy_item --
00258  *      Given a hash page and an offset, sanity-check the item itself,
00259  *      and save off any overflow items or off-page dup children as necessary.
00260  */
00261 static int
00262 __ham_vrfy_item(dbp, vdp, pgno, h, i, flags)
00263         DB *dbp;
00264         VRFY_DBINFO *vdp;
00265         db_pgno_t pgno;
00266         PAGE *h;
00267         u_int32_t i, flags;
00268 {
00269         HOFFPAGE hop;
00270         HOFFDUP hod;
00271         VRFY_CHILDINFO child;
00272         VRFY_PAGEINFO *pip;
00273         db_indx_t offset, len, dlen, elen;
00274         int ret, t_ret;
00275         u_int8_t *databuf;
00276 
00277         if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00278                 return (ret);
00279 
00280         switch (HPAGE_TYPE(dbp, h, i)) {
00281         case H_KEYDATA:
00282                 /* Nothing to do here--everything but the type field is data */
00283                 break;
00284         case H_DUPLICATE:
00285                 /* Are we a datum or a key?  Better be the former. */
00286                 if (i % 2 == 0) {
00287                         EPRINT((dbp->dbenv,
00288                             "Page %lu: hash key stored as duplicate item %lu",
00289                             (u_long)pip->pgno, (u_long)i));
00290                 }
00291                 /*
00292                  * Dups are encoded as a series within a single HKEYDATA,
00293                  * in which each dup is surrounded by a copy of its length
00294                  * on either side (so that the series can be walked in either
00295                  * direction.  We loop through this series and make sure
00296                  * each dup is reasonable.
00297                  *
00298                  * Note that at this point, we've verified item i-1, so
00299                  * it's safe to use LEN_HKEYDATA (which looks at inp[i-1]).
00300                  */
00301                 len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i);
00302                 databuf = HKEYDATA_DATA(P_ENTRY(dbp, h, i));
00303                 for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) {
00304                         memcpy(&dlen, databuf + offset, sizeof(db_indx_t));
00305 
00306                         /* Make sure the length is plausible. */
00307                         if (offset + DUP_SIZE(dlen) > len) {
00308                                 EPRINT((dbp->dbenv,
00309                             "Page %lu: duplicate item %lu has bad length",
00310                                     (u_long)pip->pgno, (u_long)i));
00311                                 ret = DB_VERIFY_BAD;
00312                                 goto err;
00313                         }
00314 
00315                         /*
00316                          * Make sure the second copy of the length is the
00317                          * same as the first.
00318                          */
00319                         memcpy(&elen,
00320                             databuf + offset + dlen + sizeof(db_indx_t),
00321                             sizeof(db_indx_t));
00322                         if (elen != dlen) {
00323                                 EPRINT((dbp->dbenv,
00324                 "Page %lu: duplicate item %lu has two different lengths",
00325                                     (u_long)pip->pgno, (u_long)i));
00326                                 ret = DB_VERIFY_BAD;
00327                                 goto err;
00328                         }
00329                 }
00330                 F_SET(pip, VRFY_HAS_DUPS);
00331                 if (!LF_ISSET(DB_NOORDERCHK) &&
00332                     __ham_dups_unsorted(dbp, databuf, len))
00333                         F_SET(pip, VRFY_DUPS_UNSORTED);
00334                 break;
00335         case H_OFFPAGE:
00336                 /* Offpage item.  Make sure pgno is sane, save off. */
00337                 memcpy(&hop, P_ENTRY(dbp, h, i), HOFFPAGE_SIZE);
00338                 if (!IS_VALID_PGNO(hop.pgno) || hop.pgno == pip->pgno ||
00339                     hop.pgno == PGNO_INVALID) {
00340                         EPRINT((dbp->dbenv,
00341                             "Page %lu: offpage item %lu has bad pgno %lu",
00342                             (u_long)pip->pgno, (u_long)i, (u_long)hop.pgno));
00343                         ret = DB_VERIFY_BAD;
00344                         goto err;
00345                 }
00346                 memset(&child, 0, sizeof(VRFY_CHILDINFO));
00347                 child.pgno = hop.pgno;
00348                 child.type = V_OVERFLOW;
00349                 child.tlen = hop.tlen; /* This will get checked later. */
00350                 if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0)
00351                         goto err;
00352                 break;
00353         case H_OFFDUP:
00354                 /* Offpage duplicate item.  Same drill. */
00355                 memcpy(&hod, P_ENTRY(dbp, h, i), HOFFDUP_SIZE);
00356                 if (!IS_VALID_PGNO(hod.pgno) || hod.pgno == pip->pgno ||
00357                     hod.pgno == PGNO_INVALID) {
00358                         EPRINT((dbp->dbenv,
00359                             "Page %lu: offpage item %lu has bad page number",
00360                             (u_long)pip->pgno, (u_long)i));
00361                         ret = DB_VERIFY_BAD;
00362                         goto err;
00363                 }
00364                 memset(&child, 0, sizeof(VRFY_CHILDINFO));
00365                 child.pgno = hod.pgno;
00366                 child.type = V_DUPLICATE;
00367                 if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0)
00368                         goto err;
00369                 F_SET(pip, VRFY_HAS_DUPS);
00370                 break;
00371         default:
00372                 EPRINT((dbp->dbenv,
00373                     "Page %lu: item %lu has bad type",
00374                     (u_long)pip->pgno, (u_long)i));
00375                 ret = DB_VERIFY_BAD;
00376                 break;
00377         }
00378 
00379 err:    if ((t_ret =
00380             __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
00381                 ret = t_ret;
00382         return (ret);
00383 }
00384 
00385 /*
00386  * __ham_vrfy_structure --
00387  *      Verify the structure of a hash database.
00388  *
00389  * PUBLIC: int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t,
00390  * PUBLIC:     u_int32_t));
00391  */
00392 int
00393 __ham_vrfy_structure(dbp, vdp, meta_pgno, flags)
00394         DB *dbp;
00395         VRFY_DBINFO *vdp;
00396         db_pgno_t meta_pgno;
00397         u_int32_t flags;
00398 {
00399         DB *pgset;
00400         DB_MPOOLFILE *mpf;
00401         HMETA *m;
00402         PAGE *h;
00403         VRFY_PAGEINFO *pip;
00404         int isbad, p, ret, t_ret;
00405         db_pgno_t pgno;
00406         u_int32_t bucket, spares_entry;
00407 
00408         mpf = dbp->mpf;
00409         pgset = vdp->pgset;
00410         h = NULL;
00411         ret = isbad = 0;
00412 
00413         if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, &p)) != 0)
00414                 return (ret);
00415         if (p != 0) {
00416                 EPRINT((dbp->dbenv,
00417                     "Page %lu: Hash meta page referenced twice",
00418                     (u_long)meta_pgno));
00419                 return (DB_VERIFY_BAD);
00420         }
00421         if ((ret = __db_vrfy_pgset_inc(pgset, meta_pgno)) != 0)
00422                 return (ret);
00423 
00424         /* Get the meta page;  we'll need it frequently. */
00425         if ((ret = __memp_fget(mpf, &meta_pgno, 0, &m)) != 0)
00426                 return (ret);
00427 
00428         /* Loop through bucket by bucket. */
00429         for (bucket = 0; bucket <= m->max_bucket; bucket++)
00430                 if ((ret =
00431                     __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)) != 0) {
00432                         if (ret == DB_VERIFY_BAD)
00433                                 isbad = 1;
00434                         else
00435                                 goto err;
00436                     }
00437 
00438         /*
00439          * There may be unused hash pages corresponding to buckets
00440          * that have been allocated but not yet used.  These may be
00441          * part of the current doubling above max_bucket, or they may
00442          * correspond to buckets that were used in a transaction
00443          * that then aborted.
00444          *
00445          * Loop through them, as far as the spares array defines them,
00446          * and make sure they're all empty.
00447          *
00448          * Note that this should be safe, since we've already verified
00449          * that the spares array is sane.
00450          */
00451         for (bucket = m->max_bucket + 1; spares_entry = __db_log2(bucket + 1),
00452             spares_entry < NCACHED && m->spares[spares_entry] != 0; bucket++) {
00453                 pgno = BS_TO_PAGE(bucket, m->spares);
00454                 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00455                         goto err;
00456 
00457                 /* It's okay if these pages are totally zeroed;  unmark it. */
00458                 F_CLR(pip, VRFY_IS_ALLZEROES);
00459 
00460                 /* It's also OK if this page is simply invalid. */
00461                 if (pip->type == P_INVALID) {
00462                         if ((ret = __db_vrfy_putpageinfo(dbp->dbenv,
00463                             vdp, pip)) != 0)
00464                                 goto err;
00465                         continue;
00466                 }
00467 
00468                 if (pip->type != P_HASH) {
00469                         EPRINT((dbp->dbenv,
00470                             "Page %lu: hash bucket %lu maps to non-hash page",
00471                             (u_long)pgno, (u_long)bucket));
00472                         isbad = 1;
00473                 } else if (pip->entries != 0) {
00474                         EPRINT((dbp->dbenv,
00475                     "Page %lu: non-empty page in unused hash bucket %lu",
00476                             (u_long)pgno, (u_long)bucket));
00477                         isbad = 1;
00478                 } else {
00479                         if ((ret = __db_vrfy_pgset_get(pgset, pgno, &p)) != 0)
00480                                 goto err;
00481                         if (p != 0) {
00482                                 EPRINT((dbp->dbenv,
00483                                     "Page %lu: above max_bucket referenced",
00484                                     (u_long)pgno));
00485                                 isbad = 1;
00486                         } else {
00487                                 if ((ret =
00488                                     __db_vrfy_pgset_inc(pgset, pgno)) != 0)
00489                                         goto err;
00490                                 if ((ret = __db_vrfy_putpageinfo(dbp->dbenv,
00491                                     vdp, pip)) != 0)
00492                                         goto err;
00493                                 continue;
00494                         }
00495                 }
00496 
00497                 /* If we got here, it's an error. */
00498                 (void)__db_vrfy_putpageinfo(dbp->dbenv, vdp, pip);
00499                 goto err;
00500         }
00501 
00502 err:    if ((t_ret = __memp_fput(mpf, m, 0)) != 0)
00503                 return (t_ret);
00504         if (h != NULL && (t_ret = __memp_fput(mpf, h, 0)) != 0)
00505                 return (t_ret);
00506         return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD: ret);
00507 }
00508 
00509 /*
00510  * __ham_vrfy_bucket --
00511  *      Verify a given bucket.
00512  */
00513 static int
00514 __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)
00515         DB *dbp;
00516         VRFY_DBINFO *vdp;
00517         HMETA *m;
00518         u_int32_t bucket, flags;
00519 {
00520         HASH *hashp;
00521         VRFY_CHILDINFO *child;
00522         VRFY_PAGEINFO *mip, *pip;
00523         int ret, t_ret, isbad, p;
00524         db_pgno_t pgno, next_pgno;
00525         DBC *cc;
00526         u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t));
00527 
00528         isbad = 0;
00529         pip = NULL;
00530         cc = NULL;
00531 
00532         hashp = dbp->h_internal;
00533         if (hashp != NULL && hashp->h_hash != NULL)
00534                 hfunc = hashp->h_hash;
00535         else
00536                 hfunc = __ham_func5;
00537 
00538         if ((ret = __db_vrfy_getpageinfo(vdp, PGNO(m), &mip)) != 0)
00539                 return (ret);
00540 
00541         /* Calculate the first pgno for this bucket. */
00542         pgno = BS_TO_PAGE(bucket, m->spares);
00543 
00544         if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00545                 goto err;
00546 
00547         /* Make sure we got a plausible page number. */
00548         if (pgno > vdp->last_pgno || pip->type != P_HASH) {
00549                 EPRINT((dbp->dbenv,
00550                     "Page %lu: impossible first page in bucket %lu",
00551                     (u_long)pgno, (u_long)bucket));
00552                 /* Unsafe to continue. */
00553                 isbad = 1;
00554                 goto err;
00555         }
00556 
00557         if (pip->prev_pgno != PGNO_INVALID) {
00558                 EPRINT((dbp->dbenv,
00559                     "Page %lu: first page in hash bucket %lu has a prev_pgno",
00560                     (u_long)pgno, (u_long)bucket));
00561                 isbad = 1;
00562         }
00563 
00564         /*
00565          * Set flags for dups and sorted dups.
00566          */
00567         flags |= F_ISSET(mip, VRFY_HAS_DUPS) ? ST_DUPOK : 0;
00568         flags |= F_ISSET(mip, VRFY_HAS_DUPSORT) ? ST_DUPSORT : 0;
00569 
00570         /* Loop until we find a fatal bug, or until we run out of pages. */
00571         for (;;) {
00572                 /* Provide feedback on our progress to the application. */
00573                 if (!LF_ISSET(DB_SALVAGE))
00574                         __db_vrfy_struct_feedback(dbp, vdp);
00575 
00576                 if ((ret = __db_vrfy_pgset_get(vdp->pgset, pgno, &p)) != 0)
00577                         goto err;
00578                 if (p != 0) {
00579                         EPRINT((dbp->dbenv,
00580                             "Page %lu: hash page referenced twice",
00581                             (u_long)pgno));
00582                         isbad = 1;
00583                         /* Unsafe to continue. */
00584                         goto err;
00585                 } else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, pgno)) != 0)
00586                         goto err;
00587 
00588                 /*
00589                  * Hash pages that nothing has ever hashed to may never
00590                  * have actually come into existence, and may appear to be
00591                  * entirely zeroed.  This is acceptable, and since there's
00592                  * no real way for us to know whether this has actually
00593                  * occurred, we clear the "wholly zeroed" flag on every
00594                  * hash page.  A wholly zeroed page, by nature, will appear
00595                  * to have no flags set and zero entries, so should
00596                  * otherwise verify correctly.
00597                  */
00598                 F_CLR(pip, VRFY_IS_ALLZEROES);
00599 
00600                 /* If we have dups, our meta page had better know about it. */
00601                 if (F_ISSET(pip, VRFY_HAS_DUPS) &&
00602                     !F_ISSET(mip, VRFY_HAS_DUPS)) {
00603                         EPRINT((dbp->dbenv,
00604                     "Page %lu: duplicates present in non-duplicate database",
00605                             (u_long)pgno));
00606                         isbad = 1;
00607                 }
00608 
00609                 /*
00610                  * If the database has sorted dups, this page had better
00611                  * not have unsorted ones.
00612                  */
00613                 if (F_ISSET(mip, VRFY_HAS_DUPSORT) &&
00614                     F_ISSET(pip, VRFY_DUPS_UNSORTED)) {
00615                         EPRINT((dbp->dbenv,
00616                             "Page %lu: unsorted dups in sorted-dup database",
00617                             (u_long)pgno));
00618                         isbad = 1;
00619                 }
00620 
00621                 /* Walk overflow chains and offpage dup trees. */
00622                 if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
00623                         goto err;
00624                 for (ret = __db_vrfy_ccset(cc, pip->pgno, &child); ret == 0;
00625                     ret = __db_vrfy_ccnext(cc, &child))
00626                         if (child->type == V_OVERFLOW) {
00627                                 if ((ret = __db_vrfy_ovfl_structure(dbp, vdp,
00628                                     child->pgno, child->tlen,
00629                                     flags | ST_OVFL_LEAF)) != 0) {
00630                                         if (ret == DB_VERIFY_BAD)
00631                                                 isbad = 1;
00632                                         else
00633                                                 goto err;
00634                                 }
00635                         } else if (child->type == V_DUPLICATE) {
00636                                 if ((ret = __db_vrfy_duptype(dbp,
00637                                     vdp, child->pgno, flags)) != 0) {
00638                                         isbad = 1;
00639                                         continue;
00640                                 }
00641                                 if ((ret = __bam_vrfy_subtree(dbp, vdp,
00642                                     child->pgno, NULL, NULL,
00643                                     flags | ST_RECNUM | ST_DUPSET | ST_TOPLEVEL,
00644                                     NULL, NULL, NULL)) != 0) {
00645                                         if (ret == DB_VERIFY_BAD)
00646                                                 isbad = 1;
00647                                         else
00648                                                 goto err;
00649                                 }
00650                         }
00651                 if ((ret = __db_vrfy_ccclose(cc)) != 0)
00652                         goto err;
00653                 cc = NULL;
00654 
00655                 /* If it's safe to check that things hash properly, do so. */
00656                 if (isbad == 0 && !LF_ISSET(DB_NOORDERCHK) &&
00657                     (ret = __ham_vrfy_hashing(dbp, pip->entries,
00658                     m, bucket, pgno, flags, hfunc)) != 0) {
00659                         if (ret == DB_VERIFY_BAD)
00660                                 isbad = 1;
00661                         else
00662                                 goto err;
00663                 }
00664 
00665                 next_pgno = pip->next_pgno;
00666                 ret = __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip);
00667 
00668                 pip = NULL;
00669                 if (ret != 0)
00670                         goto err;
00671 
00672                 if (next_pgno == PGNO_INVALID)
00673                         break;          /* End of the bucket. */
00674 
00675                 /* We already checked this, but just in case... */
00676                 if (!IS_VALID_PGNO(next_pgno)) {
00677                         DB_ASSERT(0);
00678                         EPRINT((dbp->dbenv,
00679                             "Page %lu: hash page has bad next_pgno",
00680                             (u_long)pgno));
00681                         isbad = 1;
00682                         goto err;
00683                 }
00684 
00685                 if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0)
00686                         goto err;
00687 
00688                 if (pip->prev_pgno != pgno) {
00689                         EPRINT((dbp->dbenv,
00690                             "Page %lu: hash page has bad prev_pgno",
00691                             (u_long)next_pgno));
00692                         isbad = 1;
00693                 }
00694                 pgno = next_pgno;
00695         }
00696 
00697 err:    if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0)
00698                 ret = t_ret;
00699         if (mip != NULL && ((t_ret =
00700             __db_vrfy_putpageinfo(dbp->dbenv, vdp, mip)) != 0) && ret == 0)
00701                 ret = t_ret;
00702         if (pip != NULL && ((t_ret =
00703             __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0) && ret == 0)
00704                 ret = t_ret;
00705         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
00706 }
00707 
00708 /*
00709  * __ham_vrfy_hashing --
00710  *      Verify that all items on a given hash page hash correctly.
00711  *
00712  * PUBLIC: int __ham_vrfy_hashing __P((DB *,
00713  * PUBLIC:     u_int32_t, HMETA *, u_int32_t, db_pgno_t, u_int32_t,
00714  * PUBLIC:     u_int32_t (*) __P((DB *, const void *, u_int32_t))));
00715  */
00716 int
00717 __ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc)
00718         DB *dbp;
00719         u_int32_t nentries;
00720         HMETA *m;
00721         u_int32_t thisbucket;
00722         db_pgno_t pgno;
00723         u_int32_t flags;
00724         u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t));
00725 {
00726         DBT dbt;
00727         DB_MPOOLFILE *mpf;
00728         PAGE *h;
00729         db_indx_t i;
00730         int ret, t_ret, isbad;
00731         u_int32_t hval, bucket;
00732 
00733         mpf = dbp->mpf;
00734         ret = isbad = 0;
00735 
00736         memset(&dbt, 0, sizeof(DBT));
00737         F_SET(&dbt, DB_DBT_REALLOC);
00738 
00739         if ((ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
00740                 return (ret);
00741 
00742         for (i = 0; i < nentries; i += 2) {
00743                 /*
00744                  * We've already verified the page integrity and that of any
00745                  * overflow chains linked off it;  it is therefore safe to use
00746                  * __db_ret.  It's also not all that much slower, since we have
00747                  * to copy every hash item to deal with alignment anyway;  we
00748                  * can tweak this a bit if this proves to be a bottleneck,
00749                  * but for now, take the easy route.
00750                  */
00751                 if ((ret = __db_ret(dbp, h, i, &dbt, NULL, NULL)) != 0)
00752                         goto err;
00753                 hval = hfunc(dbp, dbt.data, dbt.size);
00754 
00755                 bucket = hval & m->high_mask;
00756                 if (bucket > m->max_bucket)
00757                         bucket = bucket & m->low_mask;
00758 
00759                 if (bucket != thisbucket) {
00760                         EPRINT((dbp->dbenv,
00761                             "Page %lu: item %lu hashes incorrectly",
00762                             (u_long)pgno, (u_long)i));
00763                         isbad = 1;
00764                 }
00765         }
00766 
00767 err:    if (dbt.data != NULL)
00768                 __os_ufree(dbp->dbenv, dbt.data);
00769         if ((t_ret = __memp_fput(mpf, h, 0)) != 0)
00770                 return (t_ret);
00771 
00772         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
00773 }
00774 
00775 /*
00776  * __ham_salvage --
00777  *      Safely dump out anything that looks like a key on an alleged
00778  *      hash page.
00779  *
00780  * PUBLIC: int __ham_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *,
00781  * PUBLIC:     void *, int (*)(void *, const void *), u_int32_t));
00782  */
00783 int
00784 __ham_salvage(dbp, vdp, pgno, h, handle, callback, flags)
00785         DB *dbp;
00786         VRFY_DBINFO *vdp;
00787         db_pgno_t pgno;
00788         PAGE *h;
00789         void *handle;
00790         int (*callback) __P((void *, const void *));
00791         u_int32_t flags;
00792 {
00793         DBT dbt, unkdbt;
00794         db_pgno_t dpgno;
00795         int ret, err_ret, t_ret;
00796         u_int32_t himark, i;
00797         u_int8_t *hk, *p;
00798         void *buf;
00799         db_indx_t dlen, len, tlen;
00800 
00801         memset(&dbt, 0, sizeof(DBT));
00802         dbt.flags = DB_DBT_REALLOC;
00803 
00804         memset(&unkdbt, 0, sizeof(DBT));
00805         unkdbt.size = (u_int32_t)strlen("UNKNOWN") + 1;
00806         unkdbt.data = "UNKNOWN";
00807 
00808         err_ret = 0;
00809 
00810         /*
00811          * Allocate a buffer for overflow items.  Start at one page;
00812          * __db_safe_goff will realloc as needed.
00813          */
00814         if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &buf)) != 0)
00815                 return (ret);
00816 
00817         himark = dbp->pgsize;
00818         for (i = 0;; i++) {
00819                 /* If we're not aggressive, break when we hit NUM_ENT(h). */
00820                 if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h))
00821                         break;
00822 
00823                 /* Verify the current item. */
00824                 ret = __db_vrfy_inpitem(dbp,
00825                     h, pgno, i, 0, flags, &himark, NULL);
00826                 /* If this returned a fatality, it's time to break. */
00827                 if (ret == DB_VERIFY_FATAL)
00828                         break;
00829 
00830                 if (ret == 0) {
00831                         /* Set len to total entry length. */
00832                         len = LEN_HITEM(dbp, h, dbp->pgsize, i);
00833                         hk = P_ENTRY(dbp, h, i);
00834                         if (len == 0 || len > dbp->pgsize ||
00835                             (u_int32_t)(hk + len - (u_int8_t *)h) >
00836                             dbp->pgsize) {
00837                                 /* Item is unsafely large; skip it. */
00838                                 err_ret = DB_VERIFY_BAD;
00839                                 continue;
00840                         }
00841                         switch (HPAGE_PTYPE(hk)) {
00842                         default:
00843                                 if (!LF_ISSET(DB_AGGRESSIVE))
00844                                         break;
00845                                 err_ret = DB_VERIFY_BAD;
00846                                 break;
00847                         case H_KEYDATA:
00848                                 /* Update len to size of item. */
00849                                 len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i);
00850 keydata:                        memcpy(buf, HKEYDATA_DATA(hk), len);
00851                                 dbt.size = len;
00852                                 dbt.data = buf;
00853                                 if ((ret = __db_vrfy_prdbt(&dbt,
00854                                     0, " ", handle, callback, 0, vdp)) != 0)
00855                                         err_ret = ret;
00856                                 break;
00857                         case H_OFFPAGE:
00858                                 if (len < HOFFPAGE_SIZE) {
00859                                         err_ret = DB_VERIFY_BAD;
00860                                         continue;
00861                                 }
00862                                 memcpy(&dpgno,
00863                                     HOFFPAGE_PGNO(hk), sizeof(dpgno));
00864                                 if ((ret = __db_safe_goff(dbp, vdp,
00865                                     dpgno, &dbt, &buf, flags)) != 0) {
00866                                         err_ret = ret;
00867                                         (void)__db_vrfy_prdbt(&unkdbt, 0, " ",
00868                                             handle, callback, 0, vdp);
00869                                         break;
00870                                 }
00871                                 if ((ret = __db_vrfy_prdbt(&dbt,
00872                                     0, " ", handle, callback, 0, vdp)) != 0)
00873                                         err_ret = ret;
00874                                 break;
00875                         case H_OFFDUP:
00876                                 if (len < HOFFDUP_SIZE) {
00877                                         err_ret = DB_VERIFY_BAD;
00878                                         continue;
00879                                 }
00880                                 memcpy(&dpgno,
00881                                     HOFFDUP_PGNO(hk), sizeof(dpgno));
00882                                 /* UNKNOWN iff pgno is bad or we're a key. */
00883                                 if (!IS_VALID_PGNO(dpgno) || (i % 2 == 0)) {
00884                                         if ((ret =
00885                                             __db_vrfy_prdbt(&unkdbt, 0, " ",
00886                                             handle, callback, 0, vdp)) != 0)
00887                                                 err_ret = ret;
00888                                 } else if ((ret = __db_salvage_duptree(dbp,
00889                                     vdp, dpgno, &dbt, handle, callback,
00890                                     flags | SA_SKIPFIRSTKEY)) != 0)
00891                                         err_ret = ret;
00892                                 break;
00893                         case H_DUPLICATE:
00894                                 len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i);
00895                                 /*
00896                                  * We're a key;  printing dups will seriously
00897                                  * foul the output.  If we're being aggressive,
00898                                  * pretend this is a key and let the app.
00899                                  * programmer sort out the mess.
00900                                  */
00901                                 if (i % 2 == 0) {
00902                                         err_ret = ret;
00903                                         if (LF_ISSET(DB_AGGRESSIVE))
00904                                                 goto keydata;
00905                                         break;
00906                                 }
00907 
00908                                 /*
00909                                  * Check if too small to have any data.
00910                                  * But first, we have to update the len to
00911                                  * reflect the size of the data not the
00912                                  * size of the on-page entry.
00913                                  */
00914                                 if (len <
00915                                     HKEYDATA_SIZE(2 * sizeof(db_indx_t))) {
00916                                         err_ret = DB_VERIFY_BAD;
00917                                         continue;
00918                                 }
00919 
00920                                 /* Loop until we hit the total length. */
00921                                 for (tlen = 0; tlen + sizeof(db_indx_t) < len;
00922                                     tlen += dlen) {
00923                                         p = HKEYDATA_DATA(hk) + tlen;
00924                                         tlen += sizeof(db_indx_t);
00925                                         memcpy(&dlen, p, sizeof(db_indx_t));
00926                                         p += sizeof(db_indx_t);
00927                                         /*
00928                                          * If dlen is too long, print all the
00929                                          * rest of the dup set in a chunk.
00930                                          */
00931                                         if (dlen + tlen > len)
00932                                                 dlen = len - tlen;
00933                                         memcpy(buf, p, dlen);
00934                                         dbt.size = dlen;
00935                                         dbt.data = buf;
00936                                         if ((ret = __db_vrfy_prdbt(&dbt, 0, " ",
00937                                             handle, callback, 0, vdp)) != 0)
00938                                                 err_ret = ret;
00939                                         tlen += sizeof(db_indx_t);
00940                                 }
00941                                 break;
00942                         }
00943                 }
00944         }
00945 
00946         __os_free(dbp->dbenv, buf);
00947         if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0)
00948                 return (t_ret);
00949         return ((ret == 0 && err_ret != 0) ? err_ret : ret);
00950 }
00951 
00952 /*
00953  * __ham_meta2pgset --
00954  *      Return the set of hash pages corresponding to the given
00955  *      known-good meta page.
00956  *
00957  * PUBLIC: int __ham_meta2pgset __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t,
00958  * PUBLIC:     DB *));
00959  */
00960 int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset)
00961         DB *dbp;
00962         VRFY_DBINFO *vdp;
00963         HMETA *hmeta;
00964         u_int32_t flags;
00965         DB *pgset;
00966 {
00967         DB_MPOOLFILE *mpf;
00968         PAGE *h;
00969         db_pgno_t pgno;
00970         u_int32_t bucket, totpgs;
00971         int ret, val;
00972 
00973         /*
00974          * We don't really need flags, but leave them for consistency with
00975          * __bam_meta2pgset.
00976          */
00977         COMPQUIET(flags, 0);
00978 
00979         DB_ASSERT(pgset != NULL);
00980 
00981         mpf = dbp->mpf;
00982         totpgs = 0;
00983 
00984         /*
00985          * Loop through all the buckets, pushing onto pgset the corresponding
00986          * page(s) for each one.
00987          */
00988         for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) {
00989                 pgno = BS_TO_PAGE(bucket, hmeta->spares);
00990 
00991                 /*
00992                  * We know the initial pgno is safe because the spares array has
00993                  * been verified.
00994                  *
00995                  * Safely walk the list of pages in this bucket.
00996                  */
00997                 for (;;) {
00998                         if ((ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
00999                                 return (ret);
01000                         if (TYPE(h) == P_HASH) {
01001 
01002                                 /*
01003                                  * Make sure we don't go past the end of
01004                                  * pgset.
01005                                  */
01006                                 if (++totpgs > vdp->last_pgno) {
01007                                         (void)__memp_fput(mpf, h, 0);
01008                                         return (DB_VERIFY_BAD);
01009                                 }
01010                                 if ((ret =
01011                                     __db_vrfy_pgset_inc(pgset, pgno)) != 0) {
01012                                         (void)__memp_fput(mpf, h, 0);
01013                                         return (ret);
01014                                 }
01015 
01016                                 pgno = NEXT_PGNO(h);
01017                         } else
01018                                 pgno = PGNO_INVALID;
01019 
01020                         if ((ret = __memp_fput(mpf, h, 0)) != 0)
01021                                 return (ret);
01022 
01023                         /* If the new pgno is wonky, go onto the next bucket. */
01024                         if (!IS_VALID_PGNO(pgno) ||
01025                             pgno == PGNO_INVALID)
01026                                 break;
01027 
01028                         /*
01029                          * If we've touched this page before, we have a cycle;
01030                          * go on to the next bucket.
01031                          */
01032                         if ((ret = __db_vrfy_pgset_get(pgset, pgno, &val)) != 0)
01033                                 return (ret);
01034                         if (val != 0)
01035                                 break;
01036                 }
01037         }
01038         return (0);
01039 }
01040 
01041 /*
01042  * __ham_dups_unsorted --
01043  *      Takes a known-safe hash duplicate set and its total length.
01044  *      Returns 1 if there are out-of-order duplicates in this set,
01045  *      0 if there are not.
01046  */
01047 static int
01048 __ham_dups_unsorted(dbp, buf, len)
01049         DB *dbp;
01050         u_int8_t *buf;
01051         u_int32_t len;
01052 {
01053         DBT a, b;
01054         db_indx_t offset, dlen;
01055         int (*func) __P((DB *, const DBT *, const DBT *));
01056 
01057         memset(&a, 0, sizeof(DBT));
01058         memset(&b, 0, sizeof(DBT));
01059 
01060         func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
01061 
01062         /*
01063          * Loop through the dup set until we hit the end or we find
01064          * a pair of dups that's out of order.  b is always the current
01065          * dup, a the one before it.
01066          */
01067         for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) {
01068                 memcpy(&dlen, buf + offset, sizeof(db_indx_t));
01069                 b.data = buf + offset + sizeof(db_indx_t);
01070                 b.size = dlen;
01071 
01072                 if (a.data != NULL && func(dbp, &a, &b) > 0)
01073                         return (1);
01074 
01075                 a.data = b.data;
01076                 a.size = b.size;
01077         }
01078 
01079         return (0);
01080 }

Generated on Sun Dec 25 12:14:30 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2