Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

bt_rsearch.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1990, 1993, 1994, 1995, 1996
00009  *      Keith Bostic.  All rights reserved.
00010  */
00011 /*
00012  * Copyright (c) 1990, 1993
00013  *      The Regents of the University of California.  All rights reserved.
00014  *
00015  * Redistribution and use in source and binary forms, with or without
00016  * modification, are permitted provided that the following conditions
00017  * are met:
00018  * 1. Redistributions of source code must retain the above copyright
00019  *    notice, this list of conditions and the following disclaimer.
00020  * 2. Redistributions in binary form must reproduce the above copyright
00021  *    notice, this list of conditions and the following disclaimer in the
00022  *    documentation and/or other materials provided with the distribution.
00023  * 3. Neither the name of the University nor the names of its contributors
00024  *    may be used to endorse or promote products derived from this software
00025  *    without specific prior written permission.
00026  *
00027  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00028  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00029  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00030  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00031  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00032  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00033  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00034  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00035  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00036  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00037  * SUCH DAMAGE.
00038  *
00039  * $Id: bt_rsearch.c,v 12.5 2005/08/08 03:37:05 ubell Exp $
00040  */
00041 
00042 #include "db_config.h"
00043 
00044 #ifndef NO_SYSTEM_INCLUDES
00045 #include <sys/types.h>
00046 #endif
00047 
00048 #include "db_int.h"
00049 #include "dbinc/db_page.h"
00050 #include "dbinc/btree.h"
00051 #include "dbinc/db_shash.h"
00052 #include "dbinc/lock.h"
00053 #include "dbinc/mp.h"
00054 
00055 /*
00056  * __bam_rsearch --
00057  *      Search a btree for a record number.
00058  *
00059  * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *));
00060  */
00061 int
00062 __bam_rsearch(dbc, recnop, flags, stop, exactp)
00063         DBC *dbc;
00064         db_recno_t *recnop;
00065         u_int32_t flags;
00066         int stop, *exactp;
00067 {
00068         BINTERNAL *bi;
00069         BTREE_CURSOR *cp;
00070         DB *dbp;
00071         DB_LOCK lock;
00072         DB_MPOOLFILE *mpf;
00073         PAGE *h;
00074         RINTERNAL *ri;
00075         db_indx_t adjust, deloffset, indx, top;
00076         db_lockmode_t lock_mode;
00077         db_pgno_t pg;
00078         db_recno_t recno, t_recno, total;
00079         int ret, stack, t_ret;
00080 
00081         dbp = dbc->dbp;
00082         mpf = dbp->mpf;
00083         cp = (BTREE_CURSOR *)dbc->internal;
00084         h = NULL;
00085 
00086         BT_STK_CLR(cp);
00087 
00088         /*
00089          * There are several ways we search a btree tree.  The flags argument
00090          * specifies if we're acquiring read or write locks and if we are
00091          * locking pairs of pages.  In addition, if we're adding or deleting
00092          * an item, we have to lock the entire tree, regardless.  See btree.h
00093          * for more details.
00094          *
00095          * If write-locking pages, we need to know whether or not to acquire a
00096          * write lock on a page before getting it.  This depends on how deep it
00097          * is in tree, which we don't know until we acquire the root page.  So,
00098          * if we need to lock the root page we may have to upgrade it later,
00099          * because we won't get the correct lock initially.
00100          *
00101          * Retrieve the root page.
00102          */
00103 
00104         if ((ret = __bam_get_root(dbc, cp->root, stop, flags, &stack)) != 0)
00105                 return (ret);
00106         lock_mode = cp->csp->lock_mode;
00107         lock = cp->csp->lock;
00108         h = cp->csp->page;
00109 
00110         BT_STK_CLR(cp);
00111         /*
00112          * If appending to the tree, set the record number now -- we have the
00113          * root page locked.
00114          *
00115          * Delete only deletes exact matches, read only returns exact matches.
00116          * Note, this is different from __bam_search(), which returns non-exact
00117          * matches for read.
00118          *
00119          * The record may not exist.  We can only return the correct location
00120          * for the record immediately after the last record in the tree, so do
00121          * a fast check now.
00122          */
00123         total = RE_NREC(h);
00124         if (LF_ISSET(S_APPEND)) {
00125                 *exactp = 0;
00126                 *recnop = recno = total + 1;
00127         } else {
00128                 recno = *recnop;
00129                 if (recno <= total)
00130                         *exactp = 1;
00131                 else {
00132                         *exactp = 0;
00133                         if (!LF_ISSET(S_PAST_EOF) || recno > total + 1) {
00134                                 /*
00135                                  * Keep the page locked for serializability.
00136                                  *
00137                                  * XXX
00138                                  * This leaves the root page locked, which will
00139                                  * eliminate any concurrency.  A possible fix
00140                                  * would be to lock the last leaf page instead.
00141                                  */
00142                                 ret = __memp_fput(mpf, h, 0);
00143                                 if ((t_ret =
00144                                     __TLPUT(dbc, lock)) != 0 && ret == 0)
00145                                         ret = t_ret;
00146                                 return (ret == 0 ? DB_NOTFOUND : ret);
00147                         }
00148                 }
00149         }
00150 
00151         /*
00152          * !!!
00153          * Record numbers in the tree are 0-based, but the recno is
00154          * 1-based.  All of the calculations below have to take this
00155          * into account.
00156          */
00157         for (total = 0;;) {
00158                 switch (TYPE(h)) {
00159                 case P_LBTREE:
00160                 case P_LDUP:
00161                         recno -= total;
00162                         /*
00163                          * There may be logically deleted records on the page.
00164                          * If there are enough, the record may not exist.
00165                          */
00166                         if (TYPE(h) == P_LBTREE) {
00167                                 adjust = P_INDX;
00168                                 deloffset = O_INDX;
00169                         } else {
00170                                 adjust = O_INDX;
00171                                 deloffset = 0;
00172                         }
00173                         for (t_recno = 0, indx = 0;; indx += adjust) {
00174                                 if (indx >= NUM_ENT(h)) {
00175                                         *exactp = 0;
00176                                         if (!LF_ISSET(S_PAST_EOF) ||
00177                                             recno > t_recno + 1) {
00178                                                 ret = __memp_fput(mpf, h, 0);
00179                                                 h = NULL;
00180                                                 if ((t_ret = __TLPUT(dbc,
00181                                                     lock)) != 0 && ret == 0)
00182                                                         ret = t_ret;
00183                                                 if (ret == 0)
00184                                                         ret = DB_NOTFOUND;
00185                                                 goto err;
00186                                         }
00187                                 }
00188                                 if (!B_DISSET(GET_BKEYDATA(dbp, h,
00189                                     indx + deloffset)->type) &&
00190                                     ++t_recno == recno)
00191                                         break;
00192                         }
00193 
00194                         /* Correct from 1-based to 0-based for a page offset. */
00195                         BT_STK_ENTER(dbp->dbenv,
00196                             cp, h, indx, lock, lock_mode, ret);
00197                         if (ret != 0)
00198                                 goto err;
00199                         return (0);
00200                 case P_IBTREE:
00201                         for (indx = 0, top = NUM_ENT(h);;) {
00202                                 bi = GET_BINTERNAL(dbp, h, indx);
00203                                 if (++indx == top || total + bi->nrecs >= recno)
00204                                         break;
00205                                 total += bi->nrecs;
00206                         }
00207                         pg = bi->pgno;
00208                         break;
00209                 case P_LRECNO:
00210                         recno -= total;
00211 
00212                         /* Correct from 1-based to 0-based for a page offset. */
00213                         --recno;
00214                         BT_STK_ENTER(dbp->dbenv,
00215                             cp, h, recno, lock, lock_mode, ret);
00216                         if (ret != 0)
00217                                 goto err;
00218                         return (0);
00219                 case P_IRECNO:
00220                         for (indx = 0, top = NUM_ENT(h);;) {
00221                                 ri = GET_RINTERNAL(dbp, h, indx);
00222                                 if (++indx == top || total + ri->nrecs >= recno)
00223                                         break;
00224                                 total += ri->nrecs;
00225                         }
00226                         pg = ri->pgno;
00227                         break;
00228                 default:
00229                         return (__db_pgfmt(dbp->dbenv, h->pgno));
00230                 }
00231                 --indx;
00232 
00233                 /* Return if this is the lowest page wanted. */
00234                 if (stop == LEVEL(h)) {
00235                         BT_STK_ENTER(dbp->dbenv,
00236                             cp, h, indx, lock, lock_mode, ret);
00237                         if (ret != 0)
00238                                 goto err;
00239                         return (0);
00240                 }
00241                 if (stack) {
00242                         BT_STK_PUSH(dbp->dbenv,
00243                             cp, h, indx, lock, lock_mode, ret);
00244                         if (ret != 0)
00245                                 goto err;
00246                         h = NULL;
00247 
00248                         lock_mode = DB_LOCK_WRITE;
00249                         if ((ret =
00250                             __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
00251                                 goto err;
00252                 } else {
00253                         /*
00254                          * Decide if we want to return a pointer to the next
00255                          * page in the stack.  If we do, write lock it and
00256                          * never unlock it.
00257                          */
00258                         if ((LF_ISSET(S_PARENT) &&
00259                             (u_int8_t)(stop + 1) >= (u_int8_t)(LEVEL(h) - 1)) ||
00260                             (LEVEL(h) - 1) == LEAFLEVEL)
00261                                 stack = 1;
00262 
00263                         if ((ret = __memp_fput(mpf, h, 0)) != 0)
00264                                 goto err;
00265                         h = NULL;
00266 
00267                         lock_mode = stack &&
00268                             LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ;
00269                         if ((ret = __db_lget(dbc,
00270                             LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
00271                                 /*
00272                                  * If we fail, discard the lock we held.  This
00273                                  * is OK because this only happens when we are
00274                                  * descending the tree holding read-locks.
00275                                  */
00276                                 (void)__LPUT(dbc, lock);
00277                                 goto err;
00278                         }
00279                 }
00280 
00281                 if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0)
00282                         goto err;
00283         }
00284         /* NOTREACHED */
00285 
00286 err:    if (h != NULL && (t_ret = __memp_fput(mpf, h, 0)) != 0 && ret == 0)
00287                 ret = t_ret;
00288 
00289         BT_STK_POP(cp);
00290         __bam_stkrel(dbc, 0);
00291 
00292         return (ret);
00293 }
00294 
00295 /*
00296  * __bam_adjust --
00297  *      Adjust the tree after adding or deleting a record.
00298  *
00299  * PUBLIC: int __bam_adjust __P((DBC *, int32_t));
00300  */
00301 int
00302 __bam_adjust(dbc, adjust)
00303         DBC *dbc;
00304         int32_t adjust;
00305 {
00306         BTREE_CURSOR *cp;
00307         DB *dbp;
00308         DB_MPOOLFILE *mpf;
00309         EPG *epg;
00310         PAGE *h;
00311         db_pgno_t root_pgno;
00312         int ret;
00313 
00314         dbp = dbc->dbp;
00315         mpf = dbp->mpf;
00316         cp = (BTREE_CURSOR *)dbc->internal;
00317         root_pgno = cp->root;
00318 
00319         /* Update the record counts for the tree. */
00320         for (epg = cp->sp; epg <= cp->csp; ++epg) {
00321                 h = epg->page;
00322                 if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
00323                         if (DBC_LOGGING(dbc)) {
00324                                 if ((ret = __bam_cadjust_log(dbp, dbc->txn,
00325                                     &LSN(h), 0, PGNO(h), &LSN(h),
00326                                     (u_int32_t)epg->indx, adjust,
00327                                     PGNO(h) == root_pgno ?
00328                                     CAD_UPDATEROOT : 0)) != 0)
00329                                         return (ret);
00330                         } else
00331                                 LSN_NOT_LOGGED(LSN(h));
00332 
00333                         if (TYPE(h) == P_IBTREE)
00334                                 GET_BINTERNAL(dbp, h, epg->indx)->nrecs +=
00335                                     adjust;
00336                         else
00337                                 GET_RINTERNAL(dbp, h, epg->indx)->nrecs +=
00338                                     adjust;
00339 
00340                         if (PGNO(h) == root_pgno)
00341                                 RE_NREC_ADJ(h, adjust);
00342 
00343                         if ((ret = __memp_fset(mpf, h, DB_MPOOL_DIRTY)) != 0)
00344                                 return (ret);
00345                 }
00346         }
00347         return (0);
00348 }
00349 
00350 /*
00351  * __bam_nrecs --
00352  *      Return the number of records in the tree.
00353  *
00354  * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *));
00355  */
00356 int
00357 __bam_nrecs(dbc, rep)
00358         DBC *dbc;
00359         db_recno_t *rep;
00360 {
00361         DB *dbp;
00362         DB_LOCK lock;
00363         DB_MPOOLFILE *mpf;
00364         PAGE *h;
00365         db_pgno_t pgno;
00366         int ret, t_ret;
00367 
00368         dbp = dbc->dbp;
00369         mpf = dbp->mpf;
00370 
00371         pgno = dbc->internal->root;
00372         if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0)
00373                 return (ret);
00374         if ((ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
00375                 return (ret);
00376 
00377         *rep = RE_NREC(h);
00378 
00379         ret = __memp_fput(mpf, h, 0);
00380         if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0)
00381                 ret = t_ret;
00382 
00383         return (ret);
00384 }
00385 
00386 /*
00387  * __bam_total --
00388  *      Return the number of records below a page.
00389  *
00390  * PUBLIC: db_recno_t __bam_total __P((DB *, PAGE *));
00391  */
00392 db_recno_t
00393 __bam_total(dbp, h)
00394         DB *dbp;
00395         PAGE *h;
00396 {
00397         db_recno_t nrecs;
00398         db_indx_t indx, top;
00399 
00400         nrecs = 0;
00401         top = NUM_ENT(h);
00402 
00403         switch (TYPE(h)) {
00404         case P_LBTREE:
00405                 /* Check for logically deleted records. */
00406                 for (indx = 0; indx < top; indx += P_INDX)
00407                         if (!B_DISSET(
00408                             GET_BKEYDATA(dbp, h, indx + O_INDX)->type))
00409                                 ++nrecs;
00410                 break;
00411         case P_LDUP:
00412                 /* Check for logically deleted records. */
00413                 for (indx = 0; indx < top; indx += O_INDX)
00414                         if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type))
00415                                 ++nrecs;
00416                 break;
00417         case P_IBTREE:
00418                 for (indx = 0; indx < top; indx += O_INDX)
00419                         nrecs += GET_BINTERNAL(dbp, h, indx)->nrecs;
00420                 break;
00421         case P_LRECNO:
00422                 nrecs = NUM_ENT(h);
00423                 break;
00424         case P_IRECNO:
00425                 for (indx = 0; indx < top; indx += O_INDX)
00426                         nrecs += GET_RINTERNAL(dbp, h, indx)->nrecs;
00427                 break;
00428         }
00429 
00430         return (nrecs);
00431 }

Generated on Sun Dec 25 12:14:13 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2