Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

bt_verify.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1999-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: bt_verify.c,v 12.13 2005/11/11 20:27:49 ubell Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 
00015 #include <string.h>
00016 #endif
00017 
00018 #include "db_int.h"
00019 #include "dbinc/db_page.h"
00020 #include "dbinc/db_shash.h"
00021 #include "dbinc/db_verify.h"
00022 #include "dbinc/btree.h"
00023 #include "dbinc/mp.h"
00024 
00025 static int __bam_safe_getdata __P((DB *, PAGE *, u_int32_t, int, DBT *, int *));
00026 static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
00027     db_indx_t *, u_int32_t));
00028 static int __bam_vrfy_treeorder __P((DB *, db_pgno_t, PAGE *, BINTERNAL *,
00029     BINTERNAL *, int (*)(DB *, const DBT *, const DBT *), u_int32_t));
00030 static int __ram_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
00031     db_indx_t *, u_int32_t));
00032 
00033 /*
00034  * __bam_vrfy_meta --
00035  *      Verify the btree-specific part of a metadata page.
00036  *
00037  * PUBLIC: int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *,
00038  * PUBLIC:     db_pgno_t, u_int32_t));
00039  */
00040 int
00041 __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
00042         DB *dbp;
00043         VRFY_DBINFO *vdp;
00044         BTMETA *meta;
00045         db_pgno_t pgno;
00046         u_int32_t flags;
00047 {
00048         DB_ENV *dbenv;
00049         VRFY_PAGEINFO *pip;
00050         int isbad, t_ret, ret;
00051         db_indx_t ovflsize;
00052 
00053         dbenv = dbp->dbenv;
00054         isbad = 0;
00055 
00056         if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00057                 return (ret);
00058 
00059         /*
00060          * If VRFY_INCOMPLETE is not set, then we didn't come through
00061          * __db_vrfy_pagezero and didn't incompletely
00062          * check this page--we haven't checked it at all.
00063          * Thus we need to call __db_vrfy_meta and check the common fields.
00064          *
00065          * If VRFY_INCOMPLETE is set, we've already done all the same work
00066          * in __db_vrfy_pagezero, so skip the check.
00067          */
00068         if (!F_ISSET(pip, VRFY_INCOMPLETE) &&
00069             (ret = __db_vrfy_meta(dbp, vdp, &meta->dbmeta, pgno, flags)) != 0) {
00070                 if (ret == DB_VERIFY_BAD)
00071                         isbad = 1;
00072                 else
00073                         goto err;
00074         }
00075 
00076         /* bt_minkey:  must be >= 2; must produce sensible ovflsize */
00077 
00078         /* avoid division by zero */
00079         ovflsize = meta->minkey > 0 ?
00080             B_MINKEY_TO_OVFLSIZE(dbp, meta->minkey, dbp->pgsize) : 0;
00081 
00082         if (meta->minkey < 2 ||
00083             ovflsize > B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) {
00084                 pip->bt_minkey = 0;
00085                 isbad = 1;
00086                 EPRINT((dbenv,
00087             "Page %lu: nonsensical bt_minkey value %lu on metadata page",
00088                     (u_long)pgno, (u_long)meta->minkey));
00089         } else
00090                 pip->bt_minkey = meta->minkey;
00091 
00092         /* re_len: no constraints on this (may be zero or huge--we make rope) */
00093         pip->re_pad = meta->re_pad;
00094         pip->re_len = meta->re_len;
00095 
00096         /*
00097          * The root must not be current page or 0 and it must be within
00098          * database.  If this metadata page is the master meta data page
00099          * of the file, then the root page had better be page 1.
00100          */
00101         pip->root = 0;
00102         if (meta->root == PGNO_INVALID ||
00103             meta->root == pgno || !IS_VALID_PGNO(meta->root) ||
00104             (pgno == PGNO_BASE_MD && meta->root != 1)) {
00105                 isbad = 1;
00106                 EPRINT((dbenv,
00107                     "Page %lu: nonsensical root page %lu on metadata page",
00108                     (u_long)pgno, (u_long)meta->root));
00109         } else
00110                 pip->root = meta->root;
00111 
00112         /* Flags. */
00113         if (F_ISSET(&meta->dbmeta, BTM_RENUMBER))
00114                 F_SET(pip, VRFY_IS_RRECNO);
00115 
00116         if (F_ISSET(&meta->dbmeta, BTM_SUBDB)) {
00117                 /*
00118                  * If this is a master db meta page, it had better not have
00119                  * duplicates.
00120                  */
00121                 if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) {
00122                         isbad = 1;
00123                         EPRINT((dbenv,
00124 "Page %lu: Btree metadata page has both duplicates and multiple databases",
00125                             (u_long)pgno));
00126                 }
00127                 F_SET(pip, VRFY_HAS_SUBDBS);
00128         }
00129 
00130         if (F_ISSET(&meta->dbmeta, BTM_DUP))
00131                 F_SET(pip, VRFY_HAS_DUPS);
00132         if (F_ISSET(&meta->dbmeta, BTM_DUPSORT))
00133                 F_SET(pip, VRFY_HAS_DUPSORT);
00134         if (F_ISSET(&meta->dbmeta, BTM_RECNUM))
00135                 F_SET(pip, VRFY_HAS_RECNUMS);
00136         if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) {
00137                 EPRINT((dbenv,
00138     "Page %lu: Btree metadata page illegally has both recnums and dups",
00139                     (u_long)pgno));
00140                 isbad = 1;
00141         }
00142 
00143         if (F_ISSET(&meta->dbmeta, BTM_RECNO)) {
00144                 F_SET(pip, VRFY_IS_RECNO);
00145                 dbp->type = DB_RECNO;
00146         } else if (F_ISSET(pip, VRFY_IS_RRECNO)) {
00147                 isbad = 1;
00148                 EPRINT((dbenv,
00149     "Page %lu: metadata page has renumber flag set but is not recno",
00150                     (u_long)pgno));
00151         }
00152 
00153         if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) {
00154                 EPRINT((dbenv,
00155                     "Page %lu: recno metadata page specifies duplicates",
00156                     (u_long)pgno));
00157                 isbad = 1;
00158         }
00159 
00160         if (F_ISSET(&meta->dbmeta, BTM_FIXEDLEN))
00161                 F_SET(pip, VRFY_IS_FIXEDLEN);
00162         else if (pip->re_len > 0) {
00163                 /*
00164                  * It's wrong to have an re_len if it's not a fixed-length
00165                  * database
00166                  */
00167                 isbad = 1;
00168                 EPRINT((dbenv,
00169                     "Page %lu: re_len of %lu in non-fixed-length database",
00170                     (u_long)pgno, (u_long)pip->re_len));
00171         }
00172 
00173         /*
00174          * We do not check that the rest of the page is 0, because it may
00175          * not be and may still be correct.
00176          */
00177 
00178 err:    if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
00179                 ret = t_ret;
00180         if (LF_ISSET(DB_SALVAGE) &&
00181            (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0)
00182                 ret = t_ret;
00183         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
00184 }
00185 
00186 /*
00187  * __ram_vrfy_leaf --
00188  *      Verify a recno leaf page.
00189  *
00190  * PUBLIC: int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
00191  * PUBLIC:     u_int32_t));
00192  */
00193 int
00194 __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
00195         DB *dbp;
00196         VRFY_DBINFO *vdp;
00197         PAGE *h;
00198         db_pgno_t pgno;
00199         u_int32_t flags;
00200 {
00201         BKEYDATA *bk;
00202         DB_ENV *dbenv;
00203         VRFY_PAGEINFO *pip;
00204         db_indx_t i;
00205         int ret, t_ret, isbad;
00206         u_int32_t re_len_guess, len;
00207 
00208         dbenv = dbp->dbenv;
00209         isbad = 0;
00210 
00211         if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00212                 return (ret);
00213 
00214         if (TYPE(h) != P_LRECNO) {
00215                 /* We should not have been called. */
00216                 TYPE_ERR_PRINT(dbenv, "__ram_vrfy_leaf", pgno, TYPE(h));
00217                 DB_ASSERT(0);
00218                 ret = EINVAL;
00219                 goto err;
00220         }
00221 
00222         /*
00223          * Verify (and, if relevant, save off) page fields common to
00224          * all PAGEs.
00225          */
00226         if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
00227                 if (ret == DB_VERIFY_BAD)
00228                         isbad = 1;
00229                 else
00230                         goto err;
00231         }
00232 
00233         /*
00234          * Verify inp[].  Return immediately if it returns DB_VERIFY_BAD;
00235          * further checks are dangerous.
00236          */
00237         if ((ret = __bam_vrfy_inp(dbp,
00238             vdp, h, pgno, &pip->entries, flags)) != 0)
00239                 goto err;
00240 
00241         if (F_ISSET(pip, VRFY_HAS_DUPS)) {
00242                 EPRINT((dbenv,
00243                     "Page %lu: Recno database has dups", (u_long)pgno));
00244                 ret = DB_VERIFY_BAD;
00245                 goto err;
00246         }
00247 
00248         /*
00249          * Walk through inp and see if the lengths of all the records are the
00250          * same--if so, this may be a fixed-length database, and we want to
00251          * save off this value.  We know inp to be safe if we've gotten this
00252          * far.
00253          */
00254         re_len_guess = 0;
00255         for (i = 0; i < NUM_ENT(h); i++) {
00256                 bk = GET_BKEYDATA(dbp, h, i);
00257                 /* KEYEMPTY.  Go on. */
00258                 if (B_DISSET(bk->type))
00259                         continue;
00260                 if (bk->type == B_OVERFLOW)
00261                         len = ((BOVERFLOW *)bk)->tlen;
00262                 else if (bk->type == B_KEYDATA)
00263                         len = bk->len;
00264                 else {
00265                         isbad = 1;
00266                         EPRINT((dbenv,
00267                             "Page %lu: nonsensical type for item %lu",
00268                             (u_long)pgno, (u_long)i));
00269                         continue;
00270                 }
00271                 if (re_len_guess == 0)
00272                         re_len_guess = len;
00273 
00274                 /*
00275                  * Is this item's len the same as the last one's?  If not,
00276                  * reset to 0 and break--we don't have a single re_len.
00277                  * Otherwise, go on to the next item.
00278                  */
00279                 if (re_len_guess != len) {
00280                         re_len_guess = 0;
00281                         break;
00282                 }
00283         }
00284         pip->re_len = re_len_guess;
00285 
00286         /* Save off record count. */
00287         pip->rec_cnt = NUM_ENT(h);
00288 
00289 err:    if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
00290                 ret = t_ret;
00291         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
00292 }
00293 
00294 /*
00295  * __bam_vrfy --
00296  *      Verify a btree leaf or internal page.
00297  *
00298  * PUBLIC: int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
00299  * PUBLIC:     u_int32_t));
00300  */
00301 int
00302 __bam_vrfy(dbp, vdp, h, pgno, flags)
00303         DB *dbp;
00304         VRFY_DBINFO *vdp;
00305         PAGE *h;
00306         db_pgno_t pgno;
00307         u_int32_t flags;
00308 {
00309         DB_ENV *dbenv;
00310         VRFY_PAGEINFO *pip;
00311         int ret, t_ret, isbad;
00312 
00313         dbenv = dbp->dbenv;
00314         isbad = 0;
00315 
00316         if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00317                 return (ret);
00318 
00319         switch (TYPE(h)) {
00320         case P_IBTREE:
00321         case P_IRECNO:
00322         case P_LBTREE:
00323         case P_LDUP:
00324                 break;
00325         default:
00326                 TYPE_ERR_PRINT(dbenv, "__bam_vrfy", pgno, TYPE(h));
00327                 DB_ASSERT(0);
00328                 ret = EINVAL;
00329                 goto err;
00330         }
00331 
00332         /*
00333          * Verify (and, if relevant, save off) page fields common to
00334          * all PAGEs.
00335          */
00336         if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
00337                 if (ret == DB_VERIFY_BAD)
00338                         isbad = 1;
00339                 else
00340                         goto err;
00341         }
00342 
00343         /*
00344          * The record count is, on internal pages, stored in an overloaded
00345          * next_pgno field.  Save it off;  we'll verify it when we check
00346          * overall database structure.  We could overload the field
00347          * in VRFY_PAGEINFO, too, but this seems gross, and space
00348          * is not at such a premium.
00349          */
00350         pip->rec_cnt = RE_NREC(h);
00351 
00352         /*
00353          * Verify inp[].
00354          */
00355         if (TYPE(h) == P_IRECNO) {
00356                 if ((ret = __ram_vrfy_inp(dbp,
00357                     vdp, h, pgno, &pip->entries, flags)) != 0)
00358                         goto err;
00359         } else if ((ret = __bam_vrfy_inp(dbp,
00360             vdp, h, pgno, &pip->entries, flags)) != 0) {
00361                 if (ret == DB_VERIFY_BAD)
00362                         isbad = 1;
00363                 else
00364                         goto err;
00365                 EPRINT((dbenv,
00366                     "Page %lu: item order check unsafe: skipping",
00367                     (u_long)pgno));
00368         } else if (!LF_ISSET(DB_NOORDERCHK) && (ret =
00369             __bam_vrfy_itemorder(dbp, vdp, h, pgno, 0, 0, 0, flags)) != 0) {
00370                 /*
00371                  * We know that the elements of inp are reasonable.
00372                  *
00373                  * Check that elements fall in the proper order.
00374                  */
00375                 if (ret == DB_VERIFY_BAD)
00376                         isbad = 1;
00377                 else
00378                         goto err;
00379         }
00380 
00381 err:    if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
00382                 ret = t_ret;
00383         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
00384 }
00385 
00386 /*
00387  * __ram_vrfy_inp --
00388  *      Verify that all entries in a P_IRECNO inp[] array are reasonable,
00389  *      and count them.  Note that P_LRECNO uses __bam_vrfy_inp;
00390  *      P_IRECNOs are a special, and simpler, case, since they have
00391  *      RINTERNALs rather than BKEYDATA/BINTERNALs.
00392  */
00393 static int
00394 __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
00395         DB *dbp;
00396         VRFY_DBINFO *vdp;
00397         PAGE *h;
00398         db_pgno_t pgno;
00399         db_indx_t *nentriesp;
00400         u_int32_t flags;
00401 {
00402         DB_ENV *dbenv;
00403         RINTERNAL *ri;
00404         VRFY_CHILDINFO child;
00405         VRFY_PAGEINFO *pip;
00406         int ret, t_ret, isbad;
00407         u_int32_t himark, i, offset, nentries;
00408         db_indx_t *inp;
00409         u_int8_t *pagelayout, *p;
00410 
00411         dbenv = dbp->dbenv;
00412         isbad = 0;
00413         memset(&child, 0, sizeof(VRFY_CHILDINFO));
00414         nentries = 0;
00415         pagelayout = NULL;
00416 
00417         if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00418                 return (ret);
00419 
00420         if (TYPE(h) != P_IRECNO) {
00421                 TYPE_ERR_PRINT(dbenv, "__ram_vrfy_inp", pgno, TYPE(h));
00422                 DB_ASSERT(0);
00423                 ret = EINVAL;
00424                 goto err;
00425         }
00426 
00427         himark = dbp->pgsize;
00428         if ((ret = __os_malloc(dbenv, dbp->pgsize, &pagelayout)) != 0)
00429                 goto err;
00430         memset(pagelayout, 0, dbp->pgsize);
00431         inp = P_INP(dbp, h);
00432         for (i = 0; i < NUM_ENT(h); i++) {
00433                 if ((u_int8_t *)inp + i >= (u_int8_t *)h + himark) {
00434                         EPRINT((dbenv,
00435                             "Page %lu: entries listing %lu overlaps data",
00436                             (u_long)pgno, (u_long)i));
00437                         ret = DB_VERIFY_BAD;
00438                         goto err;
00439                 }
00440                 offset = inp[i];
00441                 /*
00442                  * Check that the item offset is reasonable:  it points
00443                  * somewhere after the inp array and before the end of the
00444                  * page.
00445                  */
00446                 if (offset <= (u_int32_t)((u_int8_t *)inp + i -
00447                     (u_int8_t *)h) ||
00448                     offset > (u_int32_t)(dbp->pgsize - RINTERNAL_SIZE)) {
00449                         isbad = 1;
00450                         EPRINT((dbenv,
00451                             "Page %lu: bad offset %lu at index %lu",
00452                             (u_long)pgno, (u_long)offset, (u_long)i));
00453                         continue;
00454                 }
00455 
00456                 /* Update the high-water mark (what HOFFSET should be) */
00457                 if (offset < himark)
00458                         himark = offset;
00459 
00460                 nentries++;
00461 
00462                 /* Make sure this RINTERNAL is not multiply referenced. */
00463                 ri = GET_RINTERNAL(dbp, h, i);
00464                 if (pagelayout[offset] == 0) {
00465                         pagelayout[offset] = 1;
00466                         child.pgno = ri->pgno;
00467                         child.type = V_RECNO;
00468                         child.nrecs = ri->nrecs;
00469                         if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0)
00470                                 goto err;
00471                 } else {
00472                         EPRINT((dbenv,
00473                 "Page %lu: RINTERNAL structure at offset %lu referenced twice",
00474                             (u_long)pgno, (u_long)offset));
00475                         isbad = 1;
00476                 }
00477         }
00478 
00479         for (p = pagelayout + himark;
00480             p < pagelayout + dbp->pgsize;
00481             p += RINTERNAL_SIZE)
00482                 if (*p != 1) {
00483                         EPRINT((dbenv,
00484                             "Page %lu: gap between items at offset %lu",
00485                             (u_long)pgno, (u_long)(p - pagelayout)));
00486                         isbad = 1;
00487                 }
00488 
00489         if ((db_indx_t)himark != HOFFSET(h)) {
00490                 EPRINT((dbenv,
00491                     "Page %lu: bad HOFFSET %lu, appears to be %lu",
00492                     (u_long)pgno, (u_long)(HOFFSET(h)), (u_long)himark));
00493                 isbad = 1;
00494         }
00495 
00496         *nentriesp = nentries;
00497 
00498 err:    if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
00499                 ret = t_ret;
00500         if (pagelayout != NULL)
00501                 __os_free(dbenv, pagelayout);
00502         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
00503 }
00504 
00505 typedef enum { VRFY_ITEM_NOTSET=0, VRFY_ITEM_BEGIN, VRFY_ITEM_END } VRFY_ITEM;
00506 
00507 /*
00508  * __bam_vrfy_inp --
00509  *      Verify that all entries in inp[] array are reasonable;
00510  *      count them.
00511  */
00512 static int
00513 __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
00514         DB *dbp;
00515         VRFY_DBINFO *vdp;
00516         PAGE *h;
00517         db_pgno_t pgno;
00518         db_indx_t *nentriesp;
00519         u_int32_t flags;
00520 {
00521         BKEYDATA *bk;
00522         BOVERFLOW *bo;
00523         DB_ENV *dbenv;
00524         VRFY_CHILDINFO child;
00525         VRFY_ITEM *pagelayout;
00526         VRFY_PAGEINFO *pip;
00527         u_int32_t himark, offset;               /*
00528                                                  * These would be db_indx_ts
00529                                                  * but for alignment.
00530                                                  */
00531         u_int32_t i, endoff, nentries;
00532         int isbad, initem, isdupitem, ret, t_ret;
00533 
00534         dbenv = dbp->dbenv;
00535         isbad = isdupitem = 0;
00536         nentries = 0;
00537         memset(&child, 0, sizeof(VRFY_CHILDINFO));
00538         if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00539                 return (ret);
00540 
00541         switch (TYPE(h)) {
00542         case P_IBTREE:
00543         case P_LBTREE:
00544         case P_LDUP:
00545         case P_LRECNO:
00546                 break;
00547         default:
00548                 /*
00549                  * In the salvager, we might call this from a page which
00550                  * we merely suspect is a btree page.  Otherwise, it
00551                  * shouldn't get called--if it is, that's a verifier bug.
00552                  */
00553                 if (LF_ISSET(DB_SALVAGE))
00554                         break;
00555                 TYPE_ERR_PRINT(dbenv, "__bam_vrfy_inp", pgno, TYPE(h));
00556                 DB_ASSERT(0);
00557                 ret = EINVAL;
00558                 goto err;
00559         }
00560 
00561         /*
00562          * Loop through inp[], the array of items, until we either
00563          * run out of entries or collide with the data.  Keep track
00564          * of h_offset in himark.
00565          *
00566          * For each element in inp[i], make sure it references a region
00567          * that starts after the end of the inp array (as defined by
00568          * NUM_ENT(h)), ends before the beginning of the page, doesn't
00569          * overlap any other regions, and doesn't have a gap between
00570          * it and the region immediately after it.
00571          */
00572         himark = dbp->pgsize;
00573         if ((ret = __os_calloc(
00574             dbenv, dbp->pgsize, sizeof(pagelayout[0]), &pagelayout)) != 0)
00575                 goto err;
00576         for (i = 0; i < NUM_ENT(h); i++) {
00577                 switch (ret = __db_vrfy_inpitem(dbp,
00578                     h, pgno, i, 1, flags, &himark, &offset)) {
00579                 case 0:
00580                         break;
00581                 case DB_VERIFY_BAD:
00582                         isbad = 1;
00583                         continue;
00584                 case DB_VERIFY_FATAL:
00585                         isbad = 1;
00586                         goto err;
00587                 default:
00588                         DB_ASSERT(ret != 0);
00589                         break;
00590                 }
00591 
00592                 /*
00593                  * We now have a plausible beginning for the item, and we know
00594                  * its length is safe.
00595                  *
00596                  * Mark the beginning and end in pagelayout so we can make sure
00597                  * items have no overlaps or gaps.
00598                  */
00599                 bk = GET_BKEYDATA(dbp, h, i);
00600                 if (pagelayout[offset] == VRFY_ITEM_NOTSET)
00601                         pagelayout[offset] = VRFY_ITEM_BEGIN;
00602                 else if (pagelayout[offset] == VRFY_ITEM_BEGIN) {
00603                         /*
00604                          * Having two inp entries that point at the same patch
00605                          * of page is legal if and only if the page is
00606                          * a btree leaf and they're onpage duplicate keys--
00607                          * that is, if (i % P_INDX) == 0.
00608                          */
00609                         if ((i % P_INDX == 0) && (TYPE(h) == P_LBTREE)) {
00610                                 /* Flag for later. */
00611                                 F_SET(pip, VRFY_HAS_DUPS);
00612 
00613                                 /* Bump up nentries so we don't undercount. */
00614                                 nentries++;
00615 
00616                                 /*
00617                                  * We'll check to make sure the end is
00618                                  * equal, too.
00619                                  */
00620                                 isdupitem = 1;
00621                         } else {
00622                                 isbad = 1;
00623                                 EPRINT((dbenv, "Page %lu: duplicated item %lu",
00624                                     (u_long)pgno, (u_long)i));
00625                         }
00626                 }
00627 
00628                 /*
00629                  * Mark the end.  Its location varies with the page type
00630                  * and the item type.
00631                  *
00632                  * If the end already has a sign other than 0, do nothing--
00633                  * it's an overlap that we'll catch later.
00634                  */
00635                 switch (B_TYPE(bk->type)) {
00636                 case B_KEYDATA:
00637                         if (TYPE(h) == P_IBTREE)
00638                                 /* It's a BINTERNAL. */
00639                                 endoff = offset + BINTERNAL_SIZE(bk->len) - 1;
00640                         else
00641                                 endoff = offset + BKEYDATA_SIZE(bk->len) - 1;
00642                         break;
00643                 case B_DUPLICATE:
00644                         /*
00645                          * Flag that we have dups; we'll check whether
00646                          * that's okay during the structure check.
00647                          */
00648                         F_SET(pip, VRFY_HAS_DUPS);
00649                         /* FALLTHROUGH */
00650                 case B_OVERFLOW:
00651                         /*
00652                          * Overflow entries on internal pages are stored
00653                          * as the _data_ of a BINTERNAL;  overflow entries
00654                          * on leaf pages are stored as the entire entry.
00655                          */
00656                         endoff = offset +
00657                             ((TYPE(h) == P_IBTREE) ?
00658                             BINTERNAL_SIZE(BOVERFLOW_SIZE) :
00659                             BOVERFLOW_SIZE) - 1;
00660                         break;
00661                 default:
00662                         /*
00663                          * We'll complain later;  for now, just mark
00664                          * a minimum.
00665                          */
00666                         endoff = offset + BKEYDATA_SIZE(0) - 1;
00667                         break;
00668                 }
00669 
00670                 /*
00671                  * If this is an onpage duplicate key we've seen before,
00672                  * the end had better coincide too.
00673                  */
00674                 if (isdupitem && pagelayout[endoff] != VRFY_ITEM_END) {
00675                         EPRINT((dbenv, "Page %lu: duplicated item %lu",
00676                             (u_long)pgno, (u_long)i));
00677                         isbad = 1;
00678                 } else if (pagelayout[endoff] == VRFY_ITEM_NOTSET)
00679                         pagelayout[endoff] = VRFY_ITEM_END;
00680                 isdupitem = 0;
00681 
00682                 /*
00683                  * There should be no deleted items in a quiescent tree,
00684                  * except in recno.
00685                  */
00686                 if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) {
00687                         isbad = 1;
00688                         EPRINT((dbenv, "Page %lu: item %lu marked deleted",
00689                             (u_long)pgno, (u_long)i));
00690                 }
00691 
00692                 /*
00693                  * Check the type and such of bk--make sure it's reasonable
00694                  * for the pagetype.
00695                  */
00696                 switch (B_TYPE(bk->type)) {
00697                 case B_KEYDATA:
00698                         /*
00699                          * This is a normal, non-overflow BKEYDATA or BINTERNAL.
00700                          * The only thing to check is the len, and that's
00701                          * already been done.
00702                          */
00703                         break;
00704                 case B_DUPLICATE:
00705                         if (TYPE(h) == P_IBTREE) {
00706                                 isbad = 1;
00707                                 EPRINT((dbenv,
00708     "Page %lu: duplicate page referenced by internal btree page at item %lu",
00709                                     (u_long)pgno, (u_long)i));
00710                                 break;
00711                         } else if (TYPE(h) == P_LRECNO) {
00712                                 isbad = 1;
00713                                 EPRINT((dbenv,
00714         "Page %lu: duplicate page referenced by recno page at item %lu",
00715                                     (u_long)pgno, (u_long)i));
00716                                 break;
00717                         }
00718                         /* FALLTHROUGH */
00719                 case B_OVERFLOW:
00720                         bo = (TYPE(h) == P_IBTREE) ?
00721                             (BOVERFLOW *)(((BINTERNAL *)bk)->data) :
00722                             (BOVERFLOW *)bk;
00723 
00724                         if (B_TYPE(bk->type) == B_OVERFLOW)
00725                                 /* Make sure tlen is reasonable. */
00726                                 if (bo->tlen > dbp->pgsize * vdp->last_pgno) {
00727                                         isbad = 1;
00728                                         EPRINT((dbenv,
00729                                 "Page %lu: impossible tlen %lu, item %lu",
00730                                             (u_long)pgno,
00731                                             (u_long)bo->tlen, (u_long)i));
00732                                         /* Don't save as a child. */
00733                                         break;
00734                                 }
00735 
00736                         if (!IS_VALID_PGNO(bo->pgno) || bo->pgno == pgno ||
00737                             bo->pgno == PGNO_INVALID) {
00738                                 isbad = 1;
00739                                 EPRINT((dbenv,
00740                             "Page %lu: offpage item %lu has bad pgno %lu",
00741                                     (u_long)pgno, (u_long)i, (u_long)bo->pgno));
00742                                 /* Don't save as a child. */
00743                                 break;
00744                         }
00745 
00746                         child.pgno = bo->pgno;
00747                         child.type = (B_TYPE(bk->type) == B_OVERFLOW ?
00748                             V_OVERFLOW : V_DUPLICATE);
00749                         child.tlen = bo->tlen;
00750                         if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0)
00751                                 goto err;
00752                         break;
00753                 default:
00754                         isbad = 1;
00755                         EPRINT((dbenv, "Page %lu: item %lu of invalid type %lu",
00756                             (u_long)pgno, (u_long)i, (u_long)B_TYPE(bk->type)));
00757                         break;
00758                 }
00759         }
00760 
00761         /*
00762          * Now, loop through and make sure the items are contiguous and
00763          * non-overlapping.
00764          */
00765         initem = 0;
00766         for (i = himark; i < dbp->pgsize; i++)
00767                 if (initem == 0)
00768                         switch (pagelayout[i]) {
00769                         case VRFY_ITEM_NOTSET:
00770                                 /* May be just for alignment. */
00771                                 if (i != DB_ALIGN(i, sizeof(u_int32_t)))
00772                                         continue;
00773 
00774                                 isbad = 1;
00775                                 EPRINT((dbenv,
00776                                     "Page %lu: gap between items at offset %lu",
00777                                     (u_long)pgno, (u_long)i));
00778                                 /* Find the end of the gap */
00779                                 for (; pagelayout[i + 1] == VRFY_ITEM_NOTSET &&
00780                                     (size_t)(i + 1) < dbp->pgsize; i++)
00781                                         ;
00782                                 break;
00783                         case VRFY_ITEM_BEGIN:
00784                                 /* We've found an item. Check its alignment. */
00785                                 if (i != DB_ALIGN(i, sizeof(u_int32_t))) {
00786                                         isbad = 1;
00787                                         EPRINT((dbenv,
00788                                             "Page %lu: offset %lu unaligned",
00789                                             (u_long)pgno, (u_long)i));
00790                                 }
00791                                 initem = 1;
00792                                 nentries++;
00793                                 break;
00794                         case VRFY_ITEM_END:
00795                                 /*
00796                                  * We've hit the end of an item even though
00797                                  * we don't think we're in one;  must
00798                                  * be an overlap.
00799                                  */
00800                                 isbad = 1;
00801                                 EPRINT((dbenv,
00802                                     "Page %lu: overlapping items at offset %lu",
00803                                     (u_long)pgno, (u_long)i));
00804                                 break;
00805                         }
00806                 else
00807                         switch (pagelayout[i]) {
00808                         case VRFY_ITEM_NOTSET:
00809                                 /* In the middle of an item somewhere. Okay. */
00810                                 break;
00811                         case VRFY_ITEM_END:
00812                                 /* End of an item; switch to out-of-item mode.*/
00813                                 initem = 0;
00814                                 break;
00815                         case VRFY_ITEM_BEGIN:
00816                                 /*
00817                                  * Hit a second item beginning without an
00818                                  * end.  Overlap.
00819                                  */
00820                                 isbad = 1;
00821                                 EPRINT((dbenv,
00822                                     "Page %lu: overlapping items at offset %lu",
00823                                     (u_long)pgno, (u_long)i));
00824                                 break;
00825                         }
00826 
00827         __os_free(dbenv, pagelayout);
00828 
00829         /* Verify HOFFSET. */
00830         if ((db_indx_t)himark != HOFFSET(h)) {
00831                 EPRINT((dbenv, "Page %lu: bad HOFFSET %lu, appears to be %lu",
00832                     (u_long)pgno, (u_long)HOFFSET(h), (u_long)himark));
00833                 isbad = 1;
00834         }
00835 
00836 err:    if (nentriesp != NULL)
00837                 *nentriesp = nentries;
00838 
00839         if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
00840                 ret = t_ret;
00841 
00842         return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
00843 }
00844 
00845 /*
00846  * __bam_vrfy_itemorder --
00847  *      Make sure the items on a page sort correctly.
00848  *
00849  *      Assumes that NUM_ENT(h) and inp[0]..inp[NUM_ENT(h) - 1] are
00850  *      reasonable;  be sure that __bam_vrfy_inp has been called first.
00851  *
00852  *      If ovflok is set, it also assumes that overflow page chains
00853  *      hanging off the current page have been sanity-checked, and so we
00854  *      can use __bam_cmp to verify their ordering.  If it is not set,
00855  *      and we run into an overflow page, carp and return DB_VERIFY_BAD;
00856  *      we shouldn't be called if any exist.
00857  *
00858  * PUBLIC: int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, PAGE *,
00859  * PUBLIC:     db_pgno_t, u_int32_t, int, int, u_int32_t));
00860  */
00861 int
00862 __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
00863         DB *dbp;
00864         VRFY_DBINFO *vdp;
00865         PAGE *h;
00866         db_pgno_t pgno;
00867         u_int32_t nentries;
00868         int ovflok, hasdups;
00869         u_int32_t flags;
00870 {
00871         BINTERNAL *bi;
00872         BKEYDATA *bk;
00873         BOVERFLOW *bo;
00874         BTREE *bt;
00875         DBT dbta, dbtb, dup_1, dup_2, *p1, *p2, *tmp;
00876         DB_ENV *dbenv;
00877         VRFY_PAGEINFO *pip;
00878         db_indx_t i;
00879         int cmp, freedup_1, freedup_2, isbad, ret, t_ret;
00880         int (*dupfunc) __P((DB *, const DBT *, const DBT *));
00881         int (*func) __P((DB *, const DBT *, const DBT *));
00882         void *buf1, *buf2, *tmpbuf;
00883 
00884         /*
00885          * We need to work in the ORDERCHKONLY environment where we might
00886          * not have a pip, but we also may need to work in contexts where
00887          * NUM_ENT isn't safe.
00888          */
00889         if (vdp != NULL) {
00890                 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00891                         return (ret);
00892                 nentries = pip->entries;
00893         } else
00894                 pip = NULL;
00895 
00896         dbenv = dbp->dbenv;
00897         ret = isbad = 0;
00898         bo = NULL;                      /* Shut up compiler. */
00899 
00900         memset(&dbta, 0, sizeof(DBT));
00901         F_SET(&dbta, DB_DBT_REALLOC);
00902 
00903         memset(&dbtb, 0, sizeof(DBT));
00904         F_SET(&dbtb, DB_DBT_REALLOC);
00905 
00906         buf1 = buf2 = NULL;
00907 
00908         DB_ASSERT(!LF_ISSET(DB_NOORDERCHK));
00909 
00910         dupfunc = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
00911         if (TYPE(h) == P_LDUP)
00912                 func = dupfunc;
00913         else {
00914                 func = __bam_defcmp;
00915                 if (dbp->bt_internal != NULL) {
00916                         bt = (BTREE *)dbp->bt_internal;
00917                         if (bt->bt_compare != NULL)
00918                                 func = bt->bt_compare;
00919                 }
00920         }
00921 
00922         /*
00923          * We alternate our use of dbta and dbtb so that we can walk
00924          * through the page key-by-key without copying a dbt twice.
00925          * p1 is always the dbt for index i - 1, and p2 for index i.
00926          */
00927         p1 = &dbta;
00928         p2 = &dbtb;
00929 
00930         /*
00931          * Loop through the entries.  nentries ought to contain the
00932          * actual count, and so is a safe way to terminate the loop;  whether
00933          * we inc. by one or two depends on whether we're a leaf page--
00934          * on a leaf page, we care only about keys.  On internal pages
00935          * and LDUP pages, we want to check the order of all entries.
00936          *
00937          * Note that on IBTREE pages, we start with item 1, since item
00938          * 0 doesn't get looked at by __bam_cmp.
00939          */
00940         for (i = (TYPE(h) == P_IBTREE) ? 1 : 0; i < nentries;
00941             i += (TYPE(h) == P_LBTREE) ? P_INDX : O_INDX) {
00942                 /*
00943                  * Put key i-1, now in p2, into p1, by swapping DBTs and bufs.
00944                  */
00945                 tmp = p1;
00946                 p1 = p2;
00947                 p2 = tmp;
00948                 tmpbuf = buf1;
00949                 buf1 = buf2;
00950                 buf2 = tmpbuf;
00951 
00952                 /*
00953                  * Get key i into p2.
00954                  */
00955                 switch (TYPE(h)) {
00956                 case P_IBTREE:
00957                         bi = GET_BINTERNAL(dbp, h, i);
00958                         if (B_TYPE(bi->type) == B_OVERFLOW) {
00959                                 bo = (BOVERFLOW *)(bi->data);
00960                                 goto overflow;
00961                         } else {
00962                                 p2->data = bi->data;
00963                                 p2->size = bi->len;
00964                         }
00965 
00966                         /*
00967                          * The leftmost key on an internal page must be
00968                          * len 0, since it's just a placeholder and
00969                          * automatically sorts less than all keys.
00970                          *
00971                          * XXX
00972                          * This criterion does not currently hold!
00973                          * See todo list item #1686.  Meanwhile, it's harmless
00974                          * to just not check for it.
00975                          */
00976 #if 0
00977                         if (i == 0 && bi->len != 0) {
00978                                 isbad = 1;
00979                                 EPRINT((dbenv,
00980                 "Page %lu: lowest key on internal page of nonzero length",
00981                                     (u_long)pgno));
00982                         }
00983 #endif
00984                         break;
00985                 case P_LBTREE:
00986                 case P_LDUP:
00987                         bk = GET_BKEYDATA(dbp, h, i);
00988                         if (B_TYPE(bk->type) == B_OVERFLOW) {
00989                                 bo = (BOVERFLOW *)bk;
00990                                 goto overflow;
00991                         } else {
00992                                 p2->data = bk->data;
00993                                 p2->size = bk->len;
00994                         }
00995                         break;
00996                 default:
00997                         /*
00998                          * This means our caller screwed up and sent us
00999                          * an inappropriate page.
01000                          */
01001                         TYPE_ERR_PRINT(dbenv,
01002                             "__bam_vrfy_itemorder", pgno, TYPE(h))
01003                         DB_ASSERT(0);
01004                         ret = EINVAL;
01005                         goto err;
01006                 }
01007 
01008                 if (0) {
01009                         /*
01010                          * If ovflok != 1, we can't safely go chasing
01011                          * overflow pages with the normal routines now;
01012                          * they might be unsafe or nonexistent.  Mark this
01013                          * page as incomplete and return.
01014                          *
01015                          * Note that we don't need to worry about freeing
01016                          * buffers, since they can't have been allocated
01017                          * if overflow items are unsafe.
01018                          */
01019 overflow:               if (!ovflok) {
01020                                 F_SET(pip, VRFY_INCOMPLETE);
01021                                 goto err;
01022                         }
01023 
01024                         /*
01025                          * Overflow items are safe to chase.  Do so.
01026                          * Fetch the overflow item into p2->data,
01027                          * NULLing it or reallocing it as appropriate.
01028                          *
01029                          * (We set p2->data to buf2 before the call
01030                          * so we're sure to realloc if we can and if p2
01031                          * was just pointing at a non-overflow item.)
01032                          */
01033                         p2->data = buf2;
01034                         if ((ret = __db_goff(dbp,
01035                             p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) {
01036                                 isbad = 1;
01037                                 EPRINT((dbenv,
01038                             "Page %lu: error %lu in fetching overflow item %lu",
01039                                     (u_long)pgno, (u_long)ret, (u_long)i));
01040                         }
01041                         /* In case it got realloc'ed and thus changed. */
01042                         buf2 = p2->data;
01043                 }
01044 
01045                 /* Compare with the last key. */
01046                 if (p1->data != NULL && p2->data != NULL) {
01047                         cmp = func(dbp, p1, p2);
01048 
01049                         /* comparison succeeded */
01050                         if (cmp > 0) {
01051                                 isbad = 1;
01052                                 EPRINT((dbenv,
01053                                     "Page %lu: out-of-order key at entry %lu",
01054                                     (u_long)pgno, (u_long)i));
01055                                 /* proceed */
01056                         } else if (cmp == 0) {
01057                                 /*
01058                                  * If they compared equally, this
01059                                  * had better be a (sub)database with dups.
01060                                  * Mark it so we can check during the
01061                                  * structure check.
01062                                  */
01063                                 if (pip != NULL)
01064                                         F_SET(pip, VRFY_HAS_DUPS);
01065                                 else if (hasdups == 0) {
01066                                         isbad = 1;
01067                                         EPRINT((dbenv,
01068         "Page %lu: database with no duplicates has duplicated keys",
01069                                             (u_long)pgno));
01070                                 }
01071 
01072                                 /*
01073                                  * If we're a btree leaf, check to see
01074                                  * if the data items of these on-page dups are
01075                                  * in sorted order.  If not, flag this, so
01076                                  * that we can make sure during the
01077                                  * structure checks that the DUPSORT flag
01078                                  * is unset.
01079                                  *
01080                                  * At this point i points to a duplicate key.
01081                                  * Compare the datum before it (same key)
01082                                  * to the datum after it, i.e. i-1 to i+1.
01083                                  */
01084                                 if (TYPE(h) == P_LBTREE) {
01085                                         /*
01086                                          * Unsafe;  continue and we'll pick
01087                                          * up the bogus nentries later.
01088                                          */
01089                                         if (i + 1 >= (db_indx_t)nentries)
01090                                                 continue;
01091 
01092                                         /*
01093                                          * We don't bother with clever memory
01094                                          * management with on-page dups,
01095                                          * as it's only really a big win
01096                                          * in the overflow case, and overflow
01097                                          * dups are probably (?) rare.
01098                                          */
01099                                         if (((ret = __bam_safe_getdata(dbp,
01100                                             h, i - 1, ovflok, &dup_1,
01101                                             &freedup_1)) != 0) ||
01102                                             ((ret = __bam_safe_getdata(dbp,
01103                                             h, i + 1, ovflok, &dup_2,
01104                                             &freedup_2)) != 0))
01105                                                 goto err;
01106 
01107                                         /*
01108                                          * If either of the data are NULL,
01109                                          * it's because they're overflows and
01110                                          * it's not safe to chase them now.
01111                                          * Mark an incomplete and return.
01112                                          */
01113                                         if (dup_1.data == NULL ||
01114                                             dup_2.data == NULL) {
01115                                                 DB_ASSERT(!ovflok);
01116                                                 F_SET(pip, VRFY_INCOMPLETE);
01117                                                 goto err;
01118                                         }
01119 
01120                                         /*
01121                                          * If the dups are out of order,
01122                                          * flag this.  It's not an error
01123                                          * until we do the structure check
01124                                          * and see whether DUPSORT is set.
01125                                          */
01126                                         if (dupfunc(dbp, &dup_1, &dup_2) > 0)
01127                                                 F_SET(pip, VRFY_DUPS_UNSORTED);
01128 
01129                                         if (freedup_1)
01130                                                 __os_ufree(dbenv, dup_1.data);
01131                                         if (freedup_2)
01132                                                 __os_ufree(dbenv, dup_2.data);
01133                                 }
01134                         }
01135                 }
01136         }
01137 
01138 err:    if (pip != NULL && ((t_ret =
01139             __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0) && ret == 0)
01140                 ret = t_ret;
01141 
01142         if (buf1 != NULL)
01143                 __os_ufree(dbenv, buf1);
01144         if (buf2 != NULL)
01145                 __os_ufree(dbenv, buf2);
01146 
01147         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
01148 }
01149 
01150 /*
01151  * __bam_vrfy_structure --
01152  *      Verify the tree structure of a btree database (including the master
01153  *      database containing subdbs).
01154  *
01155  * PUBLIC: int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t,
01156  * PUBLIC:     u_int32_t));
01157  */
01158 int
01159 __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
01160         DB *dbp;
01161         VRFY_DBINFO *vdp;
01162         db_pgno_t meta_pgno;
01163         u_int32_t flags;
01164 {
01165         DB *pgset;
01166         DB_ENV *dbenv;
01167         VRFY_PAGEINFO *mip, *rip;
01168         db_pgno_t root, p;
01169         int t_ret, ret;
01170         u_int32_t nrecs, level, relen, stflags;
01171 
01172         dbenv = dbp->dbenv;
01173         mip = rip = 0;
01174         pgset = vdp->pgset;
01175 
01176         if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &mip)) != 0)
01177                 return (ret);
01178 
01179         if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, (int *)&p)) != 0)
01180                 goto err;
01181         if (p != 0) {
01182                 EPRINT((dbenv,
01183                     "Page %lu: btree metadata page observed twice",
01184                     (u_long)meta_pgno));
01185                 ret = DB_VERIFY_BAD;
01186                 goto err;
01187         }
01188         if ((ret = __db_vrfy_pgset_inc(pgset, meta_pgno)) != 0)
01189                 goto err;
01190 
01191         root = mip->root;
01192 
01193         if (root == 0) {
01194                 EPRINT((dbenv,
01195                     "Page %lu: btree metadata page has no root",
01196                     (u_long)meta_pgno));
01197                 ret = DB_VERIFY_BAD;
01198                 goto err;
01199         }
01200 
01201         if ((ret = __db_vrfy_getpageinfo(vdp, root, &rip)) != 0)
01202                 goto err;
01203 
01204         switch (rip->type) {
01205         case P_IBTREE:
01206         case P_LBTREE:
01207                 stflags = flags | ST_TOPLEVEL;
01208                 if (F_ISSET(mip, VRFY_HAS_DUPS))
01209                         stflags |= ST_DUPOK;
01210                 if (F_ISSET(mip, VRFY_HAS_DUPSORT))
01211                         stflags |= ST_DUPSORT;
01212                 if (F_ISSET(mip, VRFY_HAS_RECNUMS))
01213                         stflags |= ST_RECNUM;
01214                 ret = __bam_vrfy_subtree(dbp,
01215                     vdp, root, NULL, NULL, stflags, NULL, NULL, NULL);
01216                 break;
01217         case P_IRECNO:
01218         case P_LRECNO:
01219                 stflags = flags | ST_RECNUM | ST_IS_RECNO | ST_TOPLEVEL;
01220                 if (mip->re_len > 0)
01221                         stflags |= ST_RELEN;
01222                 if ((ret = __bam_vrfy_subtree(dbp, vdp,
01223                     root, NULL, NULL, stflags, &level, &nrecs, &relen)) != 0)
01224                         goto err;
01225                 /*
01226                  * Even if mip->re_len > 0, re_len may come back zero if the
01227                  * tree is empty.  It should be okay to just skip the check in
01228                  * this case, as if there are any non-deleted keys at all,
01229                  * that should never happen.
01230                  */
01231                 if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) {
01232                         EPRINT((dbenv,
01233                             "Page %lu: recno database has bad re_len %lu",
01234                             (u_long)meta_pgno, (u_long)relen));
01235                         ret = DB_VERIFY_BAD;
01236                         goto err;
01237                 }
01238                 ret = 0;
01239                 break;
01240         case P_LDUP:
01241                 EPRINT((dbenv,
01242                     "Page %lu: duplicate tree referenced from metadata page",
01243                     (u_long)meta_pgno));
01244                 ret = DB_VERIFY_BAD;
01245                 break;
01246         default:
01247                 EPRINT((dbenv,
01248             "Page %lu: btree root of incorrect type %lu on metadata page",
01249                     (u_long)meta_pgno, (u_long)rip->type));
01250                 ret = DB_VERIFY_BAD;
01251                 break;
01252         }
01253 
01254 err:    if (mip != NULL && ((t_ret =
01255             __db_vrfy_putpageinfo(dbenv, vdp, mip)) != 0) && ret == 0)
01256                 ret = t_ret;
01257         if (rip != NULL && ((t_ret =
01258             __db_vrfy_putpageinfo(dbenv, vdp, rip)) != 0) && ret == 0)
01259                 ret = t_ret;
01260         return (ret);
01261 }
01262 
01263 /*
01264  * __bam_vrfy_subtree--
01265  *      Verify a subtree (or entire) btree with specified root.
01266  *
01267  *      Note that this is public because it must be called to verify
01268  *      offpage dup trees, including from hash.
01269  *
01270  * PUBLIC: int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *,
01271  * PUBLIC:     void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *));
01272  */
01273 int
01274 __bam_vrfy_subtree(dbp, vdp, pgno, l, r, flags, levelp, nrecsp, relenp)
01275         DB *dbp;
01276         VRFY_DBINFO *vdp;
01277         db_pgno_t pgno;
01278         void *l, *r;
01279         u_int32_t flags, *levelp, *nrecsp, *relenp;
01280 {
01281         BINTERNAL *li, *ri, *lp, *rp;
01282         DB *pgset;
01283         DBC *cc;
01284         DB_ENV *dbenv;
01285         DB_MPOOLFILE *mpf;
01286         PAGE *h;
01287         VRFY_CHILDINFO *child;
01288         VRFY_PAGEINFO *pip;
01289         db_indx_t i;
01290         db_pgno_t next_pgno, prev_pgno;
01291         db_recno_t child_nrecs, nrecs;
01292         u_int32_t child_level, child_relen, j, level, relen, stflags;
01293         u_int8_t leaf_type;
01294         int (*func) __P((DB *, const DBT *, const DBT *));
01295         int isbad, p, ret, t_ret, toplevel;
01296 
01297         dbenv = dbp->dbenv;
01298         mpf = dbp->mpf;
01299         ret = isbad = 0;
01300         nrecs = 0;
01301         h = NULL;
01302         relen = 0;
01303         leaf_type = P_INVALID;
01304         next_pgno = prev_pgno = PGNO_INVALID;
01305         rp = (BINTERNAL *)r;
01306         lp = (BINTERNAL *)l;
01307 
01308         /* Provide feedback on our progress to the application. */
01309         if (!LF_ISSET(DB_SALVAGE))
01310                 __db_vrfy_struct_feedback(dbp, vdp);
01311 
01312         if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
01313                 return (ret);
01314 
01315         cc = NULL;
01316         level = pip->bt_level;
01317 
01318         toplevel = LF_ISSET(ST_TOPLEVEL) ? 1 : 0;
01319         LF_CLR(ST_TOPLEVEL);
01320 
01321         /*
01322          * If this is the root, initialize the vdp's prev- and next-pgno
01323          * accounting.
01324          *
01325          * For each leaf page we hit, we'll want to make sure that
01326          * vdp->prev_pgno is the same as pip->prev_pgno and vdp->next_pgno is
01327          * our page number.  Then, we'll set vdp->next_pgno to pip->next_pgno
01328          * and vdp->prev_pgno to our page number, and the next leaf page in
01329          * line should be able to do the same verification.
01330          */
01331         if (toplevel) {
01332                 /*
01333                  * Cache the values stored in the vdp so that if we're an
01334                  * auxiliary tree such as an off-page duplicate set, our
01335                  * caller's leaf page chain doesn't get lost.
01336                  */
01337                 prev_pgno = vdp->prev_pgno;
01338                 next_pgno = vdp->next_pgno;
01339                 leaf_type = vdp->leaf_type;
01340                 vdp->next_pgno = vdp->prev_pgno = PGNO_INVALID;
01341                 vdp->leaf_type = P_INVALID;
01342         }
01343 
01344         /*
01345          * We are recursively descending a btree, starting from the root
01346          * and working our way out to the leaves.
01347          *
01348          * There are four cases we need to deal with:
01349          *      1. pgno is a recno leaf page.  Any children are overflows.
01350          *      2. pgno is a duplicate leaf page.  Any children
01351          *         are overflow pages;  traverse them, and then return
01352          *         level and nrecs.
01353          *      3. pgno is an ordinary leaf page.  Check whether dups are
01354          *         allowed, and if so, traverse any off-page dups or
01355          *         overflows.  Then return nrecs and level.
01356          *      4. pgno is a recno internal page.  Recursively check any
01357          *         child pages, making sure their levels are one lower
01358          *         and their nrecs sum to ours.
01359          *      5. pgno is a btree internal page.  Same as #4, plus we
01360          *         must verify that for each pair of BINTERNAL entries
01361          *         N and N+1, the leftmost item on N's child sorts
01362          *         greater than N, and the rightmost item on N's child
01363          *         sorts less than N+1.
01364          *
01365          * Furthermore, in any sorted page type (P_LDUP, P_LBTREE, P_IBTREE),
01366          * we need to verify the internal sort order is correct if,
01367          * due to overflow items, we were not able to do so earlier.
01368          */
01369         switch (pip->type) {
01370         case P_LRECNO:
01371         case P_LDUP:
01372         case P_LBTREE:
01373                 /*
01374                  * Cases 1, 2 and 3.
01375                  *
01376                  * We're some sort of leaf page;  verify
01377                  * that our linked list of leaves is consistent.
01378                  */
01379                 if (vdp->leaf_type == P_INVALID) {
01380                         /*
01381                          * First leaf page.  Set the type that all its
01382                          * successors should be, and verify that our prev_pgno
01383                          * is PGNO_INVALID.
01384                          */
01385                         vdp->leaf_type = pip->type;
01386                         if (pip->prev_pgno != PGNO_INVALID)
01387                                 goto bad_prev;
01388                 } else {
01389                         /*
01390                          * Successor leaf page. Check our type, the previous
01391                          * page's next_pgno, and our prev_pgno.
01392                          */
01393                         if (pip->type != vdp->leaf_type) {
01394                                 isbad = 1;
01395                                 EPRINT((dbenv,
01396         "Page %lu: unexpected page type %lu found in leaf chain (expected %lu)",
01397                                     (u_long)pip->pgno, (u_long)pip->type,
01398                                     (u_long)vdp->leaf_type));
01399                         }
01400 
01401                         /*
01402                          * Don't do the prev/next_pgno checks if we've lost
01403                          * leaf pages due to another corruption.
01404                          */
01405                         if (!F_ISSET(vdp, VRFY_LEAFCHAIN_BROKEN)) {
01406                                 if (pip->pgno != vdp->next_pgno) {
01407                                         isbad = 1;
01408                                         EPRINT((dbenv,
01409         "Page %lu: incorrect next_pgno %lu found in leaf chain (should be %lu)",
01410                                             (u_long)vdp->prev_pgno,
01411                                             (u_long)vdp->next_pgno,
01412                                             (u_long)pip->pgno));
01413                                 }
01414                                 if (pip->prev_pgno != vdp->prev_pgno) {
01415 bad_prev:                               isbad = 1;
01416                                         EPRINT((dbenv,
01417     "Page %lu: incorrect prev_pgno %lu found in leaf chain (should be %lu)",
01418                                             (u_long)pip->pgno,
01419                                             (u_long)pip->prev_pgno,
01420                                             (u_long)vdp->prev_pgno));
01421                                 }
01422                         }
01423                 }
01424                 vdp->prev_pgno = pip->pgno;
01425                 vdp->next_pgno = pip->next_pgno;
01426                 F_CLR(vdp, VRFY_LEAFCHAIN_BROKEN);
01427 
01428                 /*
01429                  * Overflow pages are common to all three leaf types;
01430                  * traverse the child list, looking for overflows.
01431                  */
01432                 if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
01433                         goto err;
01434                 for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0;
01435                     ret = __db_vrfy_ccnext(cc, &child))
01436                         if (child->type == V_OVERFLOW &&
01437                             (ret = __db_vrfy_ovfl_structure(dbp, vdp,
01438                             child->pgno, child->tlen,
01439                             flags | ST_OVFL_LEAF)) != 0) {
01440                                 if (ret == DB_VERIFY_BAD)
01441                                         isbad = 1;
01442                                 else
01443                                         goto done;
01444                         }
01445 
01446                 if ((ret = __db_vrfy_ccclose(cc)) != 0)
01447                         goto err;
01448                 cc = NULL;
01449 
01450                 /* Case 1 */
01451                 if (pip->type == P_LRECNO) {
01452                         if (!LF_ISSET(ST_IS_RECNO) &&
01453                             !(LF_ISSET(ST_DUPOK) && !LF_ISSET(ST_DUPSORT))) {
01454                                 isbad = 1;
01455                                 EPRINT((dbenv,
01456                                     "Page %lu: recno leaf page non-recno tree",
01457                                     (u_long)pgno));
01458                                 goto done;
01459                         }
01460                         goto leaf;
01461                 } else if (LF_ISSET(ST_IS_RECNO)) {
01462                         /*
01463                          * It's a non-recno leaf.  Had better not be a recno
01464                          * subtree.
01465                          */
01466                         isbad = 1;
01467                         EPRINT((dbenv,
01468                             "Page %lu: non-recno leaf page in recno tree",
01469                             (u_long)pgno));
01470                         goto done;
01471                 }
01472 
01473                 /* Case 2--no more work. */
01474                 if (pip->type == P_LDUP)
01475                         goto leaf;
01476 
01477                 /* Case 3 */
01478 
01479                 /* Check if we have any dups. */
01480                 if (F_ISSET(pip, VRFY_HAS_DUPS)) {
01481                         /* If dups aren't allowed in this btree, trouble. */
01482                         if (!LF_ISSET(ST_DUPOK)) {
01483                                 isbad = 1;
01484                                 EPRINT((dbenv,
01485                                     "Page %lu: duplicates in non-dup btree",
01486                                     (u_long)pgno));
01487                         } else {
01488                                 /*
01489                                  * We correctly have dups.  If any are off-page,
01490                                  * traverse those btrees recursively.
01491                                  */
01492                                 if ((ret =
01493                                     __db_vrfy_childcursor(vdp, &cc)) != 0)
01494                                         goto err;
01495                                 for (ret = __db_vrfy_ccset(cc, pgno, &child);
01496                                     ret == 0;
01497                                     ret = __db_vrfy_ccnext(cc, &child)) {
01498                                         stflags = flags | ST_RECNUM | ST_DUPSET;
01499                                         /* Skip any overflow entries. */
01500                                         if (child->type == V_DUPLICATE) {
01501                                                 if ((ret = __db_vrfy_duptype(
01502                                                     dbp, vdp, child->pgno,
01503                                                     stflags)) != 0) {
01504                                                         isbad = 1;
01505                                                         /* Next child. */
01506                                                         continue;
01507                                                 }
01508                                                 if ((ret = __bam_vrfy_subtree(
01509                                                     dbp, vdp, child->pgno, NULL,
01510                                                     NULL, stflags | ST_TOPLEVEL,
01511                                                     NULL, NULL, NULL)) != 0) {
01512                                                         if (ret ==
01513                                                             DB_VERIFY_BAD)
01514                                                                 isbad = 1;
01515                                                         else
01516                                                                 goto err;
01517                                                 }
01518                                         }
01519                                 }
01520 
01521                                 if ((ret = __db_vrfy_ccclose(cc)) != 0)
01522                                         goto err;
01523                                 cc = NULL;
01524 
01525                                 /*
01526                                  * If VRFY_DUPS_UNSORTED is set,
01527                                  * ST_DUPSORT had better not be.
01528                                  */
01529                                 if (F_ISSET(pip, VRFY_DUPS_UNSORTED) &&
01530                                     LF_ISSET(ST_DUPSORT)) {
01531                                         isbad = 1;
01532                                         EPRINT((dbenv,
01533                     "Page %lu: unsorted duplicate set in sorted-dup database",
01534                                             (u_long)pgno));
01535                                 }
01536                         }
01537                 }
01538                 goto leaf;
01539         case P_IBTREE:
01540         case P_IRECNO:
01541                 /* We handle these below. */
01542                 break;
01543         default:
01544                 /*
01545                  * If a P_IBTREE or P_IRECNO contains a reference to an
01546                  * invalid page, we'll wind up here;  handle it gracefully.
01547                  * Note that the code at the "done" label assumes that the
01548                  * current page is a btree/recno one of some sort;  this
01549                  * is not the case here, so we goto err.
01550                  *
01551                  * If the page is entirely zeroed, its pip->type will be a lie
01552                  * (we assumed it was a hash page, as they're allowed to be
01553                  * zeroed);  handle this case specially.
01554                  */
01555                 if (F_ISSET(pip, VRFY_IS_ALLZEROES))
01556                         ZEROPG_ERR_PRINT(dbenv, pgno, "btree or recno page");
01557                 else
01558                         EPRINT((dbenv,
01559             "Page %lu: btree or recno page is of inappropriate type %lu",
01560                             (u_long)pgno, (u_long)pip->type));
01561 
01562                 /*
01563                  * We probably lost a leaf page (or more if this was an
01564                  * internal page) from our prev/next_pgno chain.  Flag
01565                  * that this is expected;  we don't want or need to
01566                  * spew error messages about erroneous prev/next_pgnos,
01567                  * since that's probably not the real problem.
01568                  */
01569                 F_SET(vdp, VRFY_LEAFCHAIN_BROKEN);
01570 
01571                 ret = DB_VERIFY_BAD;
01572                 goto err;
01573         }
01574 
01575         /*
01576          * Cases 4 & 5: This is a btree or recno internal page.  For each child,
01577          * recurse, keeping a running count of nrecs and making sure the level
01578          * is always reasonable.
01579          */
01580         if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
01581                 goto err;
01582         for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0;
01583             ret = __db_vrfy_ccnext(cc, &child))
01584                 if (child->type == V_RECNO) {
01585                         if (pip->type != P_IRECNO) {
01586                                 TYPE_ERR_PRINT(dbenv, "__bam_vrfy_subtree",
01587                                     pgno, pip->type);
01588                                 DB_ASSERT(0);
01589                                 ret = EINVAL;
01590                                 goto err;
01591                         }
01592                         if ((ret = __bam_vrfy_subtree(dbp, vdp, child->pgno,
01593                             NULL, NULL, flags, &child_level, &child_nrecs,
01594                             &child_relen)) != 0) {
01595                                 if (ret == DB_VERIFY_BAD)
01596                                         isbad = 1;
01597                                 else
01598                                         goto done;
01599                         }
01600 
01601                         if (LF_ISSET(ST_RELEN)) {
01602                                 if (relen == 0)
01603                                         relen = child_relen;
01604                                 /*
01605                                  * child_relen may be zero if the child subtree
01606                                  * is empty.
01607                                  */
01608                                 else if (child_relen > 0 &&
01609                                     relen != child_relen) {
01610                                         isbad = 1;
01611                                         EPRINT((dbenv,
01612                            "Page %lu: recno page returned bad re_len %lu",
01613                                             (u_long)child->pgno,
01614                                             (u_long)child_relen));
01615                                 }
01616                                 if (relenp)
01617                                         *relenp = relen;
01618                         }
01619                         if (LF_ISSET(ST_RECNUM)) {
01620                                 if (child->nrecs != child_nrecs) {
01621                                         isbad = 1;
01622                                         EPRINT((dbenv,
01623                 "Page %lu: record count incorrect: actual %lu, in record %lu",
01624                                             (u_long)child->pgno,
01625                                             (u_long)child_nrecs,
01626                                             (u_long)child->nrecs));
01627                                 }
01628                                 nrecs += child_nrecs;
01629                         }
01630                         if (isbad == 0 && level != child_level + 1) {
01631                                 isbad = 1;
01632                                 EPRINT((dbenv,
01633                 "Page %lu: recno level incorrect: got %lu, expected %lu",
01634                                     (u_long)child->pgno, (u_long)child_level,
01635                                     (u_long)(level - 1)));
01636                         }
01637                 } else if (child->type == V_OVERFLOW) {
01638                         /*
01639                          * It is possible for one internal page to reference
01640                          * a single overflow page twice, if all the items
01641                          * in the subtree referenced by slot 0 are deleted,
01642                          * then a similar number of items are put back
01643                          * before the key that formerly had been in slot 1.
01644                          *
01645                          * (Btree doesn't look at the key in slot 0, so the
01646                          * fact that the key formerly at slot 1 is the "wrong"
01647                          * parent of the stuff in the slot 0 subtree isn't
01648                          * really incorrect.)
01649                          *
01650                          * __db_vrfy_ovfl_structure is designed to be
01651                          * efficiently called multiple times for multiple
01652                          * references;  call it here as many times as is
01653                          * appropriate.
01654                          */
01655 
01656                         /* Otherwise, __db_vrfy_childput would be broken. */
01657                         DB_ASSERT(child->refcnt >= 1);
01658 
01659                         /*
01660                          * An overflow referenced more than twice here
01661                          * shouldn't happen.
01662                          */
01663                         if (child->refcnt > 2) {
01664                                 isbad = 1;
01665                                 EPRINT((dbenv,
01666     "Page %lu: overflow page %lu referenced more than twice from internal page",
01667                                     (u_long)pgno, (u_long)child->pgno));
01668                         } else
01669                                 for (j = 0; j < child->refcnt; j++)
01670                                         if ((ret = __db_vrfy_ovfl_structure(dbp,
01671                                             vdp, child->pgno, child->tlen,
01672                                             flags)) != 0) {
01673                                                 if (ret == DB_VERIFY_BAD)
01674                                                         isbad = 1;
01675                                                 else
01676                                                         goto done;
01677                                         }
01678                 }
01679 
01680         if ((ret = __db_vrfy_ccclose(cc)) != 0)
01681                 goto err;
01682         cc = NULL;
01683 
01684         /* We're done with case 4. */
01685         if (pip->type == P_IRECNO)
01686                 goto done;
01687 
01688         /*
01689          * Case 5.  Btree internal pages.
01690          * As described above, we need to iterate through all the
01691          * items on the page and make sure that our children sort appropriately
01692          * with respect to them.
01693          *
01694          * For each entry, li will be the "left-hand" key for the entry
01695          * itself, which must sort lower than all entries on its child;
01696          * ri will be the key to its right, which must sort greater.
01697          */
01698         if (h == NULL && (ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
01699                 goto err;
01700         for (i = 0; i < pip->entries; i += O_INDX) {
01701                 li = GET_BINTERNAL(dbp, h, i);
01702                 ri = (i + O_INDX < pip->entries) ?
01703                     GET_BINTERNAL(dbp, h, i + O_INDX) : rp;
01704 
01705                 /*
01706                  * The leftmost key is forcibly sorted less than all entries,
01707                  * so don't bother passing it.
01708                  */
01709                 if ((ret = __bam_vrfy_subtree(dbp, vdp, li->pgno,
01710                     i == 0 ? NULL : li, ri, flags, &child_level,
01711                     &child_nrecs, NULL)) != 0) {
01712                         if (ret == DB_VERIFY_BAD)
01713                                 isbad = 1;
01714                         else
01715                                 goto done;
01716                 }
01717 
01718                 if (LF_ISSET(ST_RECNUM)) {
01719                         /*
01720                          * Keep a running tally on the actual record count so
01721                          * we can return it to our parent (if we have one) or
01722                          * compare it to the NRECS field if we're a root page.
01723                          */
01724                         nrecs += child_nrecs;
01725 
01726                         /*
01727                          * Make sure the actual record count of the child
01728                          * is equal to the value in the BINTERNAL structure.
01729                          */
01730                         if (li->nrecs != child_nrecs) {
01731                                 isbad = 1;
01732                                 EPRINT((dbenv,
01733         "Page %lu: item %lu has incorrect record count of %lu, should be %lu",
01734                                     (u_long)pgno, (u_long)i, (u_long)li->nrecs,
01735                                     (u_long)child_nrecs));
01736                         }
01737                 }
01738 
01739                 if (level != child_level + 1) {
01740                         isbad = 1;
01741                         EPRINT((dbenv,
01742                 "Page %lu: Btree level incorrect: got %lu, expected %lu",
01743                             (u_long)li->pgno,
01744                             (u_long)child_level, (u_long)(level - 1)));
01745                 }
01746         }
01747 
01748         if (0) {
01749 leaf:           level = LEAFLEVEL;
01750                 if (LF_ISSET(ST_RECNUM))
01751                         nrecs = pip->rec_cnt;
01752 
01753                 /* XXX
01754                  * We should verify that the record count on a leaf page
01755                  * is the sum of the number of keys and the number of
01756                  * records in its off-page dups.  This requires looking
01757                  * at the page again, however, and it may all be changing
01758                  * soon, so for now we don't bother.
01759                  */
01760 
01761                 if (LF_ISSET(ST_RELEN) && relenp)
01762                         *relenp = pip->re_len;
01763         }
01764 done:   if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
01765                 /*
01766                  * During the page-by-page pass, item order verification was
01767                  * not finished due to the presence of overflow items.  If
01768                  * isbad == 0, though, it's now safe to do so, as we've
01769                  * traversed any child overflow pages.  Do it.
01770                  */
01771                 if (h == NULL && (ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
01772                         goto err;
01773                 if ((ret = __bam_vrfy_itemorder(dbp,
01774                     vdp, h, pgno, 0, 1, 0, flags)) != 0)
01775                         goto err;
01776                 F_CLR(pip, VRFY_INCOMPLETE);
01777         }
01778 
01779         /*
01780          * It's possible to get to this point with a page that has no
01781          * items, but without having detected any sort of failure yet.
01782          * Having zero items is legal if it's a leaf--it may be the
01783          * root page in an empty tree, or the tree may have been
01784          * modified with the DB_REVSPLITOFF flag set (there's no way
01785          * to tell from what's on disk).  For an internal page,
01786          * though, having no items is a problem (all internal pages
01787          * must have children).
01788          */
01789         if (isbad == 0 && ret == 0) {
01790                 if (h == NULL && (ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
01791                         goto err;
01792 
01793                 if (NUM_ENT(h) == 0 && ISINTERNAL(h)) {
01794                         isbad = 1;
01795                         EPRINT((dbenv,
01796                     "Page %lu: internal page is empty and should not be",
01797                             (u_long)pgno));
01798                         goto err;
01799                 }
01800         }
01801 
01802         /*
01803          * Our parent has sent us BINTERNAL pointers to parent records
01804          * so that we can verify our place with respect to them.  If it's
01805          * appropriate--we have a default sort function--verify this.
01806          */
01807         if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) && lp != NULL) {
01808                 if (h == NULL && (ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
01809                         goto err;
01810 
01811                 /*
01812                  * __bam_vrfy_treeorder needs to know what comparison function
01813                  * to use.  If ST_DUPSET is set, we're in a duplicate tree
01814                  * and we use the duplicate comparison function;  otherwise,
01815                  * use the btree one.  If unset, use the default, of course.
01816                  */
01817                 func = LF_ISSET(ST_DUPSET) ? dbp->dup_compare :
01818                     ((BTREE *)dbp->bt_internal)->bt_compare;
01819                 if (func == NULL)
01820                         func = __bam_defcmp;
01821 
01822                 if ((ret = __bam_vrfy_treeorder(
01823                     dbp, pgno, h, lp, rp, func, flags)) != 0) {
01824                         if (ret == DB_VERIFY_BAD)
01825                                 isbad = 1;
01826                         else
01827                                 goto err;
01828                 }
01829         }
01830 
01831         /*
01832          * This is guaranteed to succeed for leaf pages, but no harm done.
01833          *
01834          * Internal pages below the top level do not store their own
01835          * record numbers, so we skip them.
01836          */
01837         if (LF_ISSET(ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) {
01838                 isbad = 1;
01839                 EPRINT((dbenv,
01840                     "Page %lu: bad record count: has %lu records, claims %lu",
01841                     (u_long)pgno, (u_long)nrecs, (u_long)pip->rec_cnt));
01842         }
01843 
01844         if (levelp)
01845                 *levelp = level;
01846         if (nrecsp)
01847                 *nrecsp = nrecs;
01848 
01849         pgset = vdp->pgset;
01850         if ((ret = __db_vrfy_pgset_get(pgset, pgno, &p)) != 0)
01851                 goto err;
01852         if (p != 0) {
01853                 isbad = 1;
01854                 EPRINT((dbenv, "Page %lu: linked twice", (u_long)pgno));
01855         } else if ((ret = __db_vrfy_pgset_inc(pgset, pgno)) != 0)
01856                 goto err;
01857 
01858         if (toplevel)
01859                 /*
01860                  * The last page's next_pgno in the leaf chain should have been
01861                  * PGNO_INVALID.
01862                  */
01863                 if (vdp->next_pgno != PGNO_INVALID) {
01864                         isbad = 1;
01865                         EPRINT((dbenv, "Page %lu: unterminated leaf chain",
01866                             (u_long)vdp->prev_pgno));
01867                 }
01868 
01869 err:    if (toplevel) {
01870                 /* Restore our caller's settings. */
01871                 vdp->next_pgno = next_pgno;
01872                 vdp->prev_pgno = prev_pgno;
01873                 vdp->leaf_type = leaf_type;
01874         }
01875 
01876         if (h != NULL && (t_ret = __memp_fput(mpf, h, 0)) != 0 && ret == 0)
01877                 ret = t_ret;
01878         if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
01879                 ret = t_ret;
01880         if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0)
01881                 ret = t_ret;
01882         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
01883 }
01884 
01885 /*
01886  * __bam_vrfy_treeorder --
01887  *      Verify that the lowest key on a page sorts greater than the
01888  *      BINTERNAL which points to it (lp), and the highest key
01889  *      sorts less than the BINTERNAL above that (rp).
01890  *
01891  *      If lp is NULL, this means that it was the leftmost key on the
01892  *      parent, which (regardless of sort function) sorts less than
01893  *      all keys.  No need to check it.
01894  *
01895  *      If rp is NULL, lp was the highest key on the parent, so there's
01896  *      no higher key we must sort less than.
01897  */
01898 static int
01899 __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
01900         DB *dbp;
01901         db_pgno_t pgno;
01902         PAGE *h;
01903         BINTERNAL *lp, *rp;
01904         int (*func) __P((DB *, const DBT *, const DBT *));
01905         u_int32_t flags;
01906 {
01907         BOVERFLOW *bo;
01908         DB_ENV *dbenv;
01909         DBT dbt;
01910         db_indx_t last;
01911         int ret, cmp;
01912 
01913         dbenv = dbp->dbenv;
01914         memset(&dbt, 0, sizeof(DBT));
01915         F_SET(&dbt, DB_DBT_MALLOC);
01916         ret = 0;
01917 
01918         /*
01919          * Empty pages are sorted correctly by definition.  We check
01920          * to see whether they ought to be empty elsewhere;  leaf
01921          * pages legally may be.
01922          */
01923         if (NUM_ENT(h) == 0)
01924                 return (0);
01925 
01926         switch (TYPE(h)) {
01927         case P_IBTREE:
01928         case P_LDUP:
01929                 last = NUM_ENT(h) - O_INDX;
01930                 break;
01931         case P_LBTREE:
01932                 last = NUM_ENT(h) - P_INDX;
01933                 break;
01934         default:
01935                 TYPE_ERR_PRINT(dbenv, "__bam_vrfy_treeorder", pgno, TYPE(h));
01936                 DB_ASSERT(0);
01937                 return (EINVAL);
01938         }
01939 
01940         /*
01941          * The key on page h, the child page, is more likely to be
01942          * an overflow page, so we pass its offset, rather than lp/rp's,
01943          * into __bam_cmp.  This will take advantage of __db_moff.
01944          */
01945 
01946         /*
01947          * Skip first-item check if we're an internal page--the first
01948          * entry on an internal page is treated specially by __bam_cmp,
01949          * so what's on the page shouldn't matter.  (Plus, since we're passing
01950          * our page and item 0 as to __bam_cmp, we'll sort before our
01951          * parent and falsely report a failure.)
01952          */
01953         if (lp != NULL && TYPE(h) != P_IBTREE) {
01954                 if (lp->type == B_KEYDATA) {
01955                         dbt.data = lp->data;
01956                         dbt.size = lp->len;
01957                 } else if (lp->type == B_OVERFLOW) {
01958                         bo = (BOVERFLOW *)lp->data;
01959                         if ((ret = __db_goff(dbp, &dbt, bo->tlen, bo->pgno,
01960                             NULL, NULL)) != 0)
01961                                 return (ret);
01962                 } else {
01963                         DB_ASSERT(0);
01964                         EPRINT((dbenv,
01965                             "Page %lu: unknown type for internal record",
01966                             (u_long)PGNO(h)));
01967                         return (EINVAL);
01968                 }
01969 
01970                 /* On error, fall through, free if needed, and return. */
01971                 if ((ret = __bam_cmp(dbp, &dbt, h, 0, func, &cmp)) == 0) {
01972                         if (cmp > 0) {
01973                                 EPRINT((dbenv,
01974             "Page %lu: first item on page sorted greater than parent entry",
01975                                     (u_long)PGNO(h)));
01976                                 ret = DB_VERIFY_BAD;
01977                         }
01978                 } else
01979                         EPRINT((dbenv,
01980                             "Page %lu: first item on page had comparison error",
01981                             (u_long)PGNO(h)));
01982 
01983                 if (dbt.data != lp->data)
01984                         __os_ufree(dbenv, dbt.data);
01985                 if (ret != 0)
01986                         return (ret);
01987         }
01988 
01989         if (rp != NULL) {
01990                 if (rp->type == B_KEYDATA) {
01991                         dbt.data = rp->data;
01992                         dbt.size = rp->len;
01993                 } else if (rp->type == B_OVERFLOW) {
01994                         bo = (BOVERFLOW *)rp->data;
01995                         if ((ret = __db_goff(dbp, &dbt, bo->tlen, bo->pgno,
01996                             NULL, NULL)) != 0)
01997                                 return (ret);
01998                 } else {
01999                         DB_ASSERT(0);
02000                         EPRINT((dbenv,
02001                             "Page %lu: unknown type for internal record",
02002                             (u_long)PGNO(h)));
02003                         return (EINVAL);
02004                 }
02005 
02006                 /* On error, fall through, free if needed, and return. */
02007                 if ((ret = __bam_cmp(dbp, &dbt, h, last, func, &cmp)) == 0) {
02008                         if (cmp < 0) {
02009                                 EPRINT((dbenv,
02010             "Page %lu: last item on page sorted greater than parent entry",
02011                                     (u_long)PGNO(h)));
02012                                 ret = DB_VERIFY_BAD;
02013                         }
02014                 } else
02015                         EPRINT((dbenv,
02016                             "Page %lu: last item on page had comparison error",
02017                             (u_long)PGNO(h)));
02018 
02019                 if (dbt.data != rp->data)
02020                         __os_ufree(dbenv, dbt.data);
02021         }
02022 
02023         return (ret);
02024 }
02025 
02026 /*
02027  * __bam_salvage --
02028  *      Safely dump out anything that looks like a key on an alleged
02029  *      btree leaf page.
02030  *
02031  * PUBLIC: int __bam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t,
02032  * PUBLIC:     PAGE *, void *, int (*)(void *, const void *), DBT *,
02033  * PUBLIC:     u_int32_t));
02034  */
02035 int
02036 __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
02037         DB *dbp;
02038         VRFY_DBINFO *vdp;
02039         db_pgno_t pgno;
02040         u_int32_t pgtype;
02041         PAGE *h;
02042         void *handle;
02043         int (*callback) __P((void *, const void *));
02044         DBT *key;
02045         u_int32_t flags;
02046 {
02047         BKEYDATA *bk;
02048         BOVERFLOW *bo;
02049         DBT dbt, unknown_key, unknown_data;
02050         DB_ENV *dbenv;
02051         VRFY_ITEM *pgmap;
02052         db_indx_t i, last, beg, end, *inp;
02053         u_int32_t himark;
02054         void *ovflbuf;
02055         int ret, t_ret, t2_ret;
02056 
02057         dbenv = dbp->dbenv;
02058         ovflbuf = pgmap = NULL;
02059         inp = P_INP(dbp, h);
02060 
02061         memset(&dbt, 0, sizeof(DBT));
02062         dbt.flags = DB_DBT_REALLOC;
02063 
02064         memset(&unknown_key, 0, sizeof(DBT));
02065         unknown_key.size = (u_int32_t)strlen("UNKNOWN_KEY");
02066         unknown_key.data = "UNKNOWN_KEY";
02067         memset(&unknown_data, 0, sizeof(DBT));
02068         unknown_data.size = (u_int32_t)strlen("UNKNOWN_DATA");
02069         unknown_data.data = "UNKNOWN_DATA";
02070 
02071         /*
02072          * Allocate a buffer for overflow items.  Start at one page;
02073          * __db_safe_goff will realloc as needed.
02074          */
02075         if ((ret = __os_malloc(dbenv, dbp->pgsize, &ovflbuf)) != 0)
02076                 goto err;
02077 
02078         if (LF_ISSET(DB_AGGRESSIVE) && (ret =
02079             __os_calloc(dbenv, dbp->pgsize, sizeof(pgmap[0]), &pgmap)) != 0)
02080                 goto err;
02081 
02082         /*
02083          * Loop through the inp array, spitting out key/data pairs.
02084          *
02085          * If we're salvaging normally, loop from 0 through NUM_ENT(h).  If
02086          * we're being aggressive, loop until we hit the end of the page --
02087          * NUM_ENT() may be bogus.
02088          */
02089         himark = dbp->pgsize;
02090         for (i = 0, last = UINT16_MAX;; i += O_INDX) {
02091                 /* If we're not aggressive, break when we hit NUM_ENT(h). */
02092                 if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h))
02093                         break;
02094 
02095                 /* Verify the current item. */
02096                 t_ret =
02097                     __db_vrfy_inpitem(dbp, h, pgno, i, 1, flags, &himark, NULL);
02098 
02099                 if (t_ret != 0) {
02100                         /*
02101                          * If this is a btree leaf and we've printed out a key
02102                          * but not its associated data item, fix this imbalance
02103                          * by printing an "UNKNOWN_DATA".
02104                          */
02105                         if (pgtype == P_LBTREE && i % P_INDX == 1 &&
02106                             last == i - 1 && (t2_ret = __db_vrfy_prdbt(
02107                             &unknown_data,
02108                             0, " ", handle, callback, 0, vdp)) != 0) {
02109                                 if (ret == 0)
02110                                         ret = t2_ret;
02111                                 goto err;
02112                         }
02113 
02114                         /*
02115                          * Don't return DB_VERIFY_FATAL; it's private and means
02116                          * only that we can't go on with this page, not with
02117                          * the whole database.  It's not even an error if we've
02118                          * run into it after NUM_ENT(h).
02119                          */
02120                         if (t_ret == DB_VERIFY_FATAL) {
02121                                 if (i < NUM_ENT(h) && ret == 0)
02122                                         ret = DB_VERIFY_BAD;
02123                                 break;
02124                         }
02125                         continue;
02126                 }
02127 
02128                 /*
02129                  * If this returned 0, it's safe to print or (carefully)
02130                  * try to fetch.
02131                  *
02132                  * We only print deleted items if DB_AGGRESSIVE is set.
02133                  */
02134                 bk = GET_BKEYDATA(dbp, h, i);
02135                 if (!LF_ISSET(DB_AGGRESSIVE) && B_DISSET(bk->type))
02136                         continue;
02137 
02138                 /*
02139                  * If this is a btree leaf and we're about to print out a data
02140                  * item for which we didn't print out a key, fix this imbalance
02141                  * by printing an "UNKNOWN_KEY".
02142                  */
02143                 if (pgtype == P_LBTREE && i % P_INDX == 1 &&
02144                     last != i - 1 && (t_ret = __db_vrfy_prdbt(
02145                     &unknown_key, 0, " ", handle, callback, 0, vdp)) != 0) {
02146                         if (ret == 0)
02147                                 ret = t_ret;
02148                         goto err;
02149                 }
02150                 last = i;
02151 
02152                 /*
02153                  * We're going to go try to print the next item.  If key is
02154                  * non-NULL, we're a dup page, so we've got to print the key
02155                  * first, unless SA_SKIPFIRSTKEY is set and we're on the first
02156                  * entry.
02157                  */
02158                 if (key != NULL && (i != 0 || !LF_ISSET(SA_SKIPFIRSTKEY)))
02159                         if ((t_ret = __db_vrfy_prdbt(key,
02160                             0, " ", handle, callback, 0, vdp)) != 0) {
02161                                 if (ret == 0)
02162                                         ret = t_ret;
02163                                 goto err;
02164                         }
02165 
02166                 beg = inp[i];
02167                 switch (B_TYPE(bk->type)) {
02168                 case B_DUPLICATE:
02169                         end = beg + BOVERFLOW_SIZE - 1;
02170                         /*
02171                          * If we're not on a normal btree leaf page, there
02172                          * shouldn't be off-page dup sets.  Something's
02173                          * confused; just drop it, and the code to pick up
02174                          * unlinked offpage dup sets will print it out
02175                          * with key "UNKNOWN" later.
02176                          */
02177                         if (pgtype != P_LBTREE)
02178                                 break;
02179 
02180                         bo = (BOVERFLOW *)bk;
02181 
02182                         /*
02183                          * If the page number is unreasonable, or if this is
02184                          * supposed to be a key item, output "UNKNOWN_KEY" --
02185                          * the best we can do is run into the data items in
02186                          * the unlinked offpage dup pass.
02187                          */
02188                         if (!IS_VALID_PGNO(bo->pgno) || (i % P_INDX == 0)) {
02189                                 /* Not much to do on failure. */
02190                                 if ((t_ret = __db_vrfy_prdbt(&unknown_key,
02191                                     0, " ", handle, callback, 0, vdp)) != 0) {
02192                                         if (ret == 0)
02193                                                 ret = t_ret;
02194                                         goto err;
02195                                 }
02196                                 break;
02197                         }
02198 
02199                         /* Don't stop on error. */
02200                         if ((t_ret = __db_salvage_duptree(dbp,
02201                             vdp, bo->pgno, &dbt, handle, callback,
02202                             flags | SA_SKIPFIRSTKEY)) != 0 && ret == 0)
02203                                 ret = t_ret;
02204 
02205                         break;
02206                 case B_KEYDATA:
02207                         end = (db_indx_t)DB_ALIGN(
02208                             beg + bk->len, sizeof(u_int32_t)) - 1;
02209                         dbt.data = bk->data;
02210                         dbt.size = bk->len;
02211                         if ((t_ret = __db_vrfy_prdbt(&dbt,
02212                             0, " ", handle, callback, 0, vdp)) != 0) {
02213                                 if (ret == 0)
02214                                         ret = t_ret;
02215                                 goto err;
02216                         }
02217                         break;
02218                 case B_OVERFLOW:
02219                         end = beg + BOVERFLOW_SIZE - 1;
02220                         bo = (BOVERFLOW *)bk;
02221 
02222                         /* Don't stop on error. */
02223                         if ((t_ret = __db_safe_goff(dbp, vdp,
02224                             bo->pgno, &dbt, &ovflbuf, flags)) != 0 && ret == 0)
02225                                 ret = t_ret;
02226                         if ((t_ret = __db_vrfy_prdbt(
02227                             t_ret == 0 ? &dbt : &unknown_key,
02228                             0, " ", handle, callback, 0, vdp)) != 0 && ret == 0)
02229                                 ret = t_ret;
02230                         break;
02231                 default:
02232                         /*
02233                          * We should never get here; __db_vrfy_inpitem should
02234                          * not be returning 0 if bk->type is unrecognizable.
02235                          */
02236                         DB_ASSERT(0);
02237                         if (ret == 0)
02238                                 ret = EINVAL;
02239                         goto err;
02240                 }
02241 
02242                 /*
02243                  * If we're being aggressive, mark the beginning and end of
02244                  * the item; we'll come back and print whatever "junk" is in
02245                  * the gaps in case we had any bogus inp elements and thereby
02246                  * missed stuff.
02247                  */
02248                 if (LF_ISSET(DB_AGGRESSIVE)) {
02249                         pgmap[beg] = VRFY_ITEM_BEGIN;
02250                         pgmap[end] = VRFY_ITEM_END;
02251                 }
02252         }
02253 
02254 err:    if (pgmap != NULL)
02255                 __os_free(dbenv, pgmap);
02256         if (ovflbuf != NULL)
02257                 __os_free(dbenv, ovflbuf);
02258 
02259         /* Mark this page as done. */
02260         if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0)
02261                 ret = t_ret;
02262 
02263         return (ret);
02264 }
02265 
02266 /*
02267  * __bam_salvage_walkdupint --
02268  *      Walk a known-good btree or recno internal page which is part of
02269  *      a dup tree, calling __db_salvage_duptree on each child page.
02270  *
02271  * PUBLIC: int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *,
02272  * PUBLIC:     DBT *, void *, int (*)(void *, const void *), u_int32_t));
02273  */
02274 int
02275 __bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags)
02276         DB *dbp;
02277         VRFY_DBINFO *vdp;
02278         PAGE *h;
02279         DBT *key;
02280         void *handle;
02281         int (*callback) __P((void *, const void *));
02282         u_int32_t flags;
02283 {
02284         RINTERNAL *ri;
02285         BINTERNAL *bi;
02286         int ret, t_ret;
02287         db_indx_t i;
02288 
02289         ret = 0;
02290         for (i = 0; i < NUM_ENT(h); i++) {
02291                 switch (TYPE(h)) {
02292                 case P_IBTREE:
02293                         bi = GET_BINTERNAL(dbp, h, i);
02294                         if ((t_ret = __db_salvage_duptree(dbp,
02295                             vdp, bi->pgno, key, handle, callback, flags)) != 0)
02296                                 ret = t_ret;
02297                         break;
02298                 case P_IRECNO:
02299                         ri = GET_RINTERNAL(dbp, h, i);
02300                         if ((t_ret = __db_salvage_duptree(dbp,
02301                             vdp, ri->pgno, key, handle, callback, flags)) != 0)
02302                                 ret = t_ret;
02303                         break;
02304                 default:
02305                         __db_err(dbp->dbenv,
02306                             "__bam_salvage_walkdupint called on non-int. page");
02307                         DB_ASSERT(0);
02308                         return (EINVAL);
02309                 }
02310                 /* Pass SA_SKIPFIRSTKEY, if set, on to the 0th child only. */
02311                 flags &= ~LF_ISSET(SA_SKIPFIRSTKEY);
02312         }
02313 
02314         return (ret);
02315 }
02316 
02317 /*
02318  * __bam_meta2pgset --
02319  *      Given a known-good meta page, return in pgsetp a 0-terminated list of
02320  *      db_pgno_t's corresponding to the pages in the btree.
02321  *
02322  *      We do this by a somewhat sleazy method, to avoid having to traverse the
02323  *      btree structure neatly:  we walk down the left side to the very
02324  *      first leaf page, then we mark all the pages in the chain of
02325  *      NEXT_PGNOs (being wary of cycles and invalid ones), then we
02326  *      consolidate our scratch array into a nice list, and return.  This
02327  *      avoids the memory management hassles of recursion and the
02328  *      trouble of walking internal pages--they just don't matter, except
02329  *      for the left branch.
02330  *
02331  * PUBLIC: int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *,
02332  * PUBLIC:     u_int32_t, DB *));
02333  */
02334 int
02335 __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
02336         DB *dbp;
02337         VRFY_DBINFO *vdp;
02338         BTMETA *btmeta;
02339         u_int32_t flags;
02340         DB *pgset;
02341 {
02342         BINTERNAL *bi;
02343         DB_MPOOLFILE *mpf;
02344         PAGE *h;
02345         RINTERNAL *ri;
02346         db_pgno_t current, p;
02347         int err_ret, ret;
02348 
02349         mpf = dbp->mpf;
02350         h = NULL;
02351         ret = err_ret = 0;
02352         DB_ASSERT(pgset != NULL);
02353         for (current = btmeta->root;;) {
02354                 if (!IS_VALID_PGNO(current) || current == PGNO(btmeta)) {
02355                         err_ret = DB_VERIFY_BAD;
02356                         goto err;
02357                 }
02358                 if ((ret = __memp_fget(mpf, &current, 0, &h)) != 0) {
02359                         err_ret = ret;
02360                         goto err;
02361                 }
02362 
02363                 switch (TYPE(h)) {
02364                 case P_IBTREE:
02365                 case P_IRECNO:
02366                         if ((ret = __bam_vrfy(dbp,
02367                             vdp, h, current, flags | DB_NOORDERCHK)) != 0) {
02368                                 err_ret = ret;
02369                                 goto err;
02370                         }
02371                         if (TYPE(h) == P_IBTREE) {
02372                                 bi = GET_BINTERNAL(dbp, h, 0);
02373                                 current = bi->pgno;
02374                         } else {        /* P_IRECNO */
02375                                 ri = GET_RINTERNAL(dbp, h, 0);
02376                                 current = ri->pgno;
02377                         }
02378                         break;
02379                 case P_LBTREE:
02380                 case P_LRECNO:
02381                         goto traverse;
02382                 default:
02383                         err_ret = DB_VERIFY_BAD;
02384                         goto err;
02385                 }
02386 
02387                 if ((ret = __memp_fput(mpf, h, 0)) != 0)
02388                         err_ret = ret;
02389                 h = NULL;
02390         }
02391 
02392         /*
02393          * At this point, current is the pgno of leaf page h, the 0th in the
02394          * tree we're concerned with.
02395          */
02396 traverse:
02397         while (IS_VALID_PGNO(current) && current != PGNO_INVALID) {
02398                 if (h == NULL &&
02399                     (ret = __memp_fget(mpf, &current, 0, &h)) != 0) {
02400                         err_ret = ret;
02401                         break;
02402                 }
02403 
02404                 if ((ret = __db_vrfy_pgset_get(pgset, current, (int *)&p)) != 0)
02405                         goto err;
02406 
02407                 if (p != 0) {
02408                         /*
02409                          * We've found a cycle.  Return success anyway--
02410                          * our caller may as well use however much of
02411                          * the pgset we've come up with.
02412                          */
02413                         break;
02414                 }
02415                 if ((ret = __db_vrfy_pgset_inc(pgset, current)) != 0)
02416                         goto err;
02417 
02418                 current = NEXT_PGNO(h);
02419                 if ((ret = __memp_fput(mpf, h, 0)) != 0)
02420                         err_ret = ret;
02421                 h = NULL;
02422         }
02423 
02424 err:    if (h != NULL)
02425                 (void)__memp_fput(mpf, h, 0);
02426 
02427         return (ret == 0 ? err_ret : ret);
02428 }
02429 
02430 /*
02431  * __bam_safe_getdata --
02432  *
02433  *      Utility function for __bam_vrfy_itemorder.  Safely gets the datum at
02434  *      index i, page h, and sticks it in DBT dbt.  If ovflok is 1 and i's an
02435  *      overflow item, we do a safe_goff to get the item and signal that we need
02436  *      to free dbt->data;  if ovflok is 0, we leaves the DBT zeroed.
02437  */
02438 static int
02439 __bam_safe_getdata(dbp, h, i, ovflok, dbt, freedbtp)
02440         DB *dbp;
02441         PAGE *h;
02442         u_int32_t i;
02443         int ovflok;
02444         DBT *dbt;
02445         int *freedbtp;
02446 {
02447         BKEYDATA *bk;
02448         BOVERFLOW *bo;
02449 
02450         memset(dbt, 0, sizeof(DBT));
02451         *freedbtp = 0;
02452 
02453         bk = GET_BKEYDATA(dbp, h, i);
02454         if (B_TYPE(bk->type) == B_OVERFLOW) {
02455                 if (!ovflok)
02456                         return (0);
02457 
02458                 bo = (BOVERFLOW *)bk;
02459                 F_SET(dbt, DB_DBT_MALLOC);
02460 
02461                 *freedbtp = 1;
02462                 return (__db_goff(dbp, dbt, bo->tlen, bo->pgno, NULL, NULL));
02463         } else {
02464                 dbt->data = bk->data;
02465                 dbt->size = bk->len;
02466         }
02467 
02468         return (0);
02469 }

Generated on Sun Dec 25 12:14:14 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2