Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

bt_search.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1990, 1993, 1994, 1995, 1996
00009  *      Keith Bostic.  All rights reserved.
00010  */
00011 /*
00012  * Copyright (c) 1990, 1993, 1994, 1995
00013  *      The Regents of the University of California.  All rights reserved.
00014  *
00015  * This code is derived from software contributed to Berkeley by
00016  * Mike Olson.
00017  *
00018  * Redistribution and use in source and binary forms, with or without
00019  * modification, are permitted provided that the following conditions
00020  * are met:
00021  * 1. Redistributions of source code must retain the above copyright
00022  *    notice, this list of conditions and the following disclaimer.
00023  * 2. Redistributions in binary form must reproduce the above copyright
00024  *    notice, this list of conditions and the following disclaimer in the
00025  *    documentation and/or other materials provided with the distribution.
00026  * 3. Neither the name of the University nor the names of its contributors
00027  *    may be used to endorse or promote products derived from this software
00028  *    without specific prior written permission.
00029  *
00030  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00031  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00032  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00033  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00034  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00035  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00036  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00037  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00038  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00039  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00040  * SUCH DAMAGE.
00041  *
00042  * $Id: bt_search.c,v 12.17 2005/11/10 21:17:13 ubell Exp $
00043  */
00044 
00045 #include "db_config.h"
00046 
00047 #ifndef NO_SYSTEM_INCLUDES
00048 #include <sys/types.h>
00049 
00050 #include <string.h>
00051 #endif
00052 
00053 #include "db_int.h"
00054 #include "dbinc/db_page.h"
00055 #include "dbinc/db_shash.h"
00056 #include "dbinc/btree.h"
00057 #include "dbinc/lock.h"
00058 #include "dbinc/mp.h"
00059 
00060 /*
00061  * __bam_get_root --
00062  *      Fetch the root of a tree and see if we want to keep
00063  * it in the stack.
00064  *
00065  * PUBLIC: int __bam_get_root __P((DBC *, db_pgno_t, int, u_int32_t, int *));
00066  */
00067 int
00068 __bam_get_root(dbc, pg, slevel, flags, stack)
00069         DBC *dbc;
00070         db_pgno_t pg;
00071         int slevel;
00072         u_int32_t flags;
00073         int *stack;
00074 {
00075         BTREE_CURSOR *cp;
00076         DB *dbp;
00077         DB_LOCK lock;
00078         DB_MPOOLFILE *mpf;
00079         PAGE *h;
00080         db_lockmode_t lock_mode;
00081         int ret, t_ret;
00082 
00083         dbp = dbc->dbp;
00084         mpf = dbp->mpf;
00085         cp = (BTREE_CURSOR *)dbc->internal;
00086         /*
00087          * If write-locking pages, we need to know whether or not to acquire a
00088          * write lock on a page before getting it.  This depends on how deep it
00089          * is in tree, which we don't know until we acquire the root page.  So,
00090          * if we need to lock the root page we may have to upgrade it later,
00091          * because we won't get the correct lock initially.
00092          *
00093          * Retrieve the root page.
00094          */
00095 try_again:
00096         *stack = LF_ISSET(S_STACK) &&
00097               (dbc->dbtype == DB_RECNO || F_ISSET(cp, C_RECNUM));
00098         lock_mode = DB_LOCK_READ;
00099         if (*stack ||
00100             LF_ISSET(S_DEL) || (LF_ISSET(S_NEXT) && LF_ISSET(S_WRITE)))
00101                 lock_mode = DB_LOCK_WRITE;
00102         if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
00103                 return (ret);
00104         if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0) {
00105                 /* Did not read it, so we can release the lock */
00106                 (void)__LPUT(dbc, lock);
00107                 return (ret);
00108         }
00109 
00110         /*
00111          * Decide if we need to save this page; if we do, write lock it.
00112          * We deliberately don't lock-couple on this call.  If the tree
00113          * is tiny, i.e., one page, and two threads are busily updating
00114          * the root page, we're almost guaranteed deadlocks galore, as
00115          * each one gets a read lock and then blocks the other's attempt
00116          * for a write lock.
00117          */
00118         if (!*stack &&
00119             ((LF_ISSET(S_PARENT) && (u_int8_t)(slevel + 1) >= LEVEL(h)) ||
00120             (LF_ISSET(S_WRITE) && LEVEL(h) == LEAFLEVEL) ||
00121             (LF_ISSET(S_START) && slevel == LEVEL(h)))) {
00122                 if (!STD_LOCKING(dbc))
00123                         goto no_relock;
00124                 ret = __memp_fput(mpf, h, 0);
00125                 if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0)
00126                         ret = t_ret;
00127                 if (ret != 0)
00128                         return (ret);
00129                 lock_mode = DB_LOCK_WRITE;
00130                 if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
00131                         return (ret);
00132                 if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0) {
00133                         /* Did not read it, so we can release the lock */
00134                         (void)__LPUT(dbc, lock);
00135                         return (ret);
00136                 }
00137                 if (!((LF_ISSET(S_PARENT) &&
00138                     (u_int8_t)(slevel + 1) >= LEVEL(h)) ||
00139                     (LF_ISSET(S_WRITE) && LEVEL(h) == LEAFLEVEL) ||
00140                     (LF_ISSET(S_START) && slevel == LEVEL(h)))) {
00141                         /* Someone else split the root, start over. */
00142                         ret = __memp_fput(mpf, h, 0);
00143                         if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0)
00144                                 ret = t_ret;
00145                         if (ret != 0)
00146                                 return (ret);
00147                         goto try_again;
00148                 }
00149 no_relock:      *stack = 1;
00150         }
00151         BT_STK_ENTER(dbp->dbenv, cp, h, 0, lock, lock_mode, ret);
00152 
00153         return (ret);
00154 }
00155 
00156 /*
00157  * __bam_search --
00158  *      Search a btree for a key.
00159  *
00160  * PUBLIC: int __bam_search __P((DBC *, db_pgno_t,
00161  * PUBLIC:     const DBT *, u_int32_t, int, db_recno_t *, int *));
00162  */
00163 int
00164 __bam_search(dbc, root_pgno, key, flags, slevel, recnop, exactp)
00165         DBC *dbc;
00166         db_pgno_t root_pgno;
00167         const DBT *key;
00168         u_int32_t flags;
00169         int slevel, *exactp;
00170         db_recno_t *recnop;
00171 {
00172         BTREE *t;
00173         BTREE_CURSOR *cp;
00174         DB *dbp;
00175         DB_LOCK lock;
00176         DB_MPOOLFILE *mpf;
00177         PAGE *h;
00178         db_indx_t base, i, indx, *inp, lim;
00179         db_lockmode_t lock_mode;
00180         db_pgno_t pg;
00181         db_recno_t recno;
00182         int adjust, cmp, deloffset, ret, stack, t_ret;
00183         int (*func) __P((DB *, const DBT *, const DBT *));
00184 
00185         dbp = dbc->dbp;
00186         mpf = dbp->mpf;
00187         cp = (BTREE_CURSOR *)dbc->internal;
00188         h = NULL;
00189         t = dbp->bt_internal;
00190         recno = 0;
00191 
00192         BT_STK_CLR(cp);
00193 
00194         /*
00195          * There are several ways we search a btree tree.  The flags argument
00196          * specifies if we're acquiring read or write locks, if we position
00197          * to the first or last item in a set of duplicates, if we return
00198          * deleted items, and if we are locking pairs of pages.  In addition,
00199          * if we're modifying record numbers, we have to lock the entire tree
00200          * regardless.  See btree.h for more details.
00201          */
00202 
00203         if (root_pgno == PGNO_INVALID)
00204                 root_pgno = cp->root;
00205         if ((ret = __bam_get_root(dbc, root_pgno, slevel, flags, &stack)) != 0)
00206                 return (ret);
00207         lock_mode = cp->csp->lock_mode;
00208         lock = cp->csp->lock;
00209         h = cp->csp->page;
00210 
00211         BT_STK_CLR(cp);
00212 
00213         /* Choose a comparison function. */
00214         func = F_ISSET(dbc, DBC_OPD) ?
00215             (dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare) :
00216             t->bt_compare;
00217 
00218         for (;;) {
00219                 inp = P_INP(dbp, h);
00220                 adjust = TYPE(h) == P_LBTREE ? P_INDX : O_INDX;
00221                 if (LF_ISSET(S_MIN | S_MAX)) {
00222                         if (LF_ISSET(S_MIN) || NUM_ENT(h) == 0)
00223                                 indx = 0;
00224                         else if (TYPE(h) == P_LBTREE)
00225                                 indx = NUM_ENT(h) - 2;
00226                         else
00227                                 indx = NUM_ENT(h) - 1;
00228 
00229                         if (LEVEL(h) == LEAFLEVEL ||
00230                              (!LF_ISSET(S_START) && LEVEL(h) == slevel)) {
00231                                 if (LF_ISSET(S_NEXT))
00232                                         goto get_next;
00233                                 goto found;
00234                         }
00235                         goto next;
00236                 }
00237                 /*
00238                  * Do a binary search on the current page.  If we're searching
00239                  * a Btree leaf page, we have to walk the indices in groups of
00240                  * two.  If we're searching an internal page or a off-page dup
00241                  * page, they're an index per page item.  If we find an exact
00242                  * match on a leaf page, we're done.
00243                  */
00244                 for (base = 0,
00245                     lim = NUM_ENT(h) / (db_indx_t)adjust; lim != 0; lim >>= 1) {
00246                         indx = base + ((lim >> 1) * adjust);
00247                         if ((ret =
00248                             __bam_cmp(dbp, key, h, indx, func, &cmp)) != 0)
00249                                 goto err;
00250                         if (cmp == 0) {
00251                                 if (LEVEL(h) == LEAFLEVEL ||
00252                                     (!LF_ISSET(S_START) &&
00253                                     LEVEL(h) == slevel)) {
00254                                         if (LF_ISSET(S_NEXT))
00255                                                 goto get_next;
00256                                         goto found;
00257                                 }
00258                                 goto next;
00259                         }
00260                         if (cmp > 0) {
00261                                 base = indx + adjust;
00262                                 --lim;
00263                         }
00264                 }
00265 
00266                 /*
00267                  * No match found.  Base is the smallest index greater than
00268                  * key and may be zero or a last + O_INDX index.
00269                  *
00270                  * If it's a leaf page or the stopping point,
00271                  * return base as the "found" value.
00272                  * Delete only deletes exact matches.
00273                  */
00274                 if (LEVEL(h) == LEAFLEVEL ||
00275                     (!LF_ISSET(S_START) && LEVEL(h) == slevel)) {
00276                         *exactp = 0;
00277 
00278                         if (LF_ISSET(S_EXACT)) {
00279                                 ret = DB_NOTFOUND;
00280                                 goto err;
00281                         }
00282 
00283                         if (LF_ISSET(S_STK_ONLY)) {
00284                                 BT_STK_NUM(dbp->dbenv, cp, h, base, ret);
00285                                 if ((t_ret =
00286                                     __LPUT(dbc, lock)) != 0 && ret == 0)
00287                                         ret = t_ret;
00288                                 if ((t_ret =
00289                                     __memp_fput(mpf, h, 0)) != 0 && ret == 0)
00290                                         ret = t_ret;
00291                                 return (ret);
00292                         }
00293                         if (LF_ISSET(S_NEXT)) {
00294 get_next:                       /*
00295                                  * The caller could have asked for a NEXT
00296                                  * at the root if the tree recently collapsed.
00297                                  */
00298                                 if (PGNO(h) == root_pgno) {
00299                                         ret = DB_NOTFOUND;
00300                                         goto err;
00301                                 }
00302                                 /*
00303                                  * Save the root of the subtree
00304                                  * and drop the rest of the subtree
00305                                  * and search down again starting at
00306                                  * the next child.
00307                                  */
00308                                 if ((ret = __LPUT(dbc, lock)) != 0)
00309                                         goto err;
00310                                 if ((ret = __memp_fput(mpf, h, 0)) != 0)
00311                                         goto err;
00312                                 h = NULL;
00313                                 LF_SET(S_MIN);
00314                                 LF_CLR(S_NEXT);
00315                                 indx = cp->sp->indx + 1;
00316                                 if (indx == NUM_ENT(cp->sp->page)) {
00317                                         ret = DB_NOTFOUND;
00318                                         cp->csp++;
00319                                         goto err;
00320                                 }
00321                                 h = cp->sp->page;
00322                                 cp->sp->page = NULL;
00323                                 lock = cp->sp->lock;
00324                                 LOCK_INIT(cp->sp->lock);
00325                                 if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0)
00326                                         goto err;
00327                                 stack = 1;
00328                                 goto next;
00329                         }
00330 
00331                         /*
00332                          * !!!
00333                          * Possibly returning a deleted record -- DB_SET_RANGE,
00334                          * DB_KEYFIRST and DB_KEYLAST don't require an exact
00335                          * match, and we don't want to walk multiple pages here
00336                          * to find an undeleted record.  This is handled by the
00337                          * calling routine.
00338                          */
00339                         if (LF_ISSET(S_DEL) && cp->csp == cp->sp)
00340                                 cp->csp++;
00341                         BT_STK_ENTER(dbp->dbenv,
00342                             cp, h, base, lock, lock_mode, ret);
00343                         if (ret != 0)
00344                                 goto err;
00345                         return (0);
00346                 }
00347 
00348                 /*
00349                  * If it's not a leaf page, record the internal page (which is
00350                  * a parent page for the key).  Decrement the base by 1 if it's
00351                  * non-zero so that if a split later occurs, the inserted page
00352                  * will be to the right of the saved page.
00353                  */
00354                 indx = base > 0 ? base - O_INDX : base;
00355 
00356                 /*
00357                  * If we're trying to calculate the record number, sum up
00358                  * all the record numbers on this page up to the indx point.
00359                  */
00360 next:           if (recnop != NULL)
00361                         for (i = 0; i < indx; ++i)
00362                                 recno += GET_BINTERNAL(dbp, h, i)->nrecs;
00363 
00364                 pg = GET_BINTERNAL(dbp, h, indx)->pgno;
00365 
00366                 /* See if we are at the level to start stacking. */
00367                 if (LF_ISSET(S_START) && slevel == LEVEL(h))
00368                         stack = 1;
00369 
00370                 if (LF_ISSET(S_STK_ONLY)) {
00371                         if (slevel == LEVEL(h)) {
00372                                 BT_STK_NUM(dbp->dbenv, cp, h, indx, ret);
00373                                 if ((t_ret =
00374                                     __LPUT(dbc, lock)) != 0 && ret == 0)
00375                                         ret = t_ret;
00376                                 if ((t_ret =
00377                                     __memp_fput(mpf, h, 0)) != 0 && ret == 0)
00378                                         ret = t_ret;
00379                                 return (ret);
00380                         }
00381                         BT_STK_NUMPUSH(dbp->dbenv, cp, h, indx, ret);
00382                         (void)__memp_fput(mpf, h, 0);
00383                         h = NULL;
00384                         if ((ret = __db_lget(dbc,
00385                             LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
00386                                 /*
00387                                  * Discard our lock and return on failure.  This
00388                                  * is OK because it only happens when descending
00389                                  * the tree holding read-locks.
00390                                  */
00391                                 (void)__LPUT(dbc, lock);
00392                                 return (ret);
00393                         }
00394                 } else if (stack) {
00395                         /* Return if this is the lowest page wanted. */
00396                         if (LF_ISSET(S_PARENT) && slevel == LEVEL(h)) {
00397                                 BT_STK_ENTER(dbp->dbenv,
00398                                     cp, h, indx, lock, lock_mode, ret);
00399                                 if (ret != 0)
00400                                         goto err;
00401                                 return (0);
00402                         }
00403                         if (LF_ISSET(S_DEL) && NUM_ENT(h) > 1) {
00404                                 /*
00405                                  * There was a page with a singleton pointer
00406                                  * to a non-empty subtree.
00407                                  */
00408                                 cp->csp--;
00409                                 if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0)
00410                                         goto err;
00411                                 stack = 0;
00412                                 goto do_del;
00413                         }
00414                         BT_STK_PUSH(dbp->dbenv,
00415                             cp, h, indx, lock, lock_mode, ret);
00416                         if (ret != 0)
00417                                 goto err;
00418                         h = NULL;
00419 
00420                         lock_mode = DB_LOCK_WRITE;
00421                         if ((ret =
00422                             __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
00423                                 goto err;
00424                 } else {
00425                         /*
00426                          * Decide if we want to return a reference to the next
00427                          * page in the return stack.  If so, lock it and never
00428                          * unlock it.
00429                          */
00430                         if ((LF_ISSET(S_PARENT) &&
00431                             (u_int8_t)(slevel + 1) >= (LEVEL(h) - 1)) ||
00432                             (LEVEL(h) - 1) == LEAFLEVEL)
00433                                 stack = 1;
00434 
00435                         /*
00436                          * Returning a subtree.  See if we have hit the start
00437                          * point if so save the parent and set stack.
00438                          * Otherwise free the parent and temporarily
00439                          * save this one.
00440                          * For S_DEL we need to find a page with 1 entry.
00441                          * For S_NEXT we want find the minimal subtree
00442                          * that contains the key and the next page.
00443                          * We save pages as long as we are at the right
00444                          * edge of the subtree.  When we leave the right
00445                          * edge, then drop the subtree.
00446                          */
00447                         if (!LF_ISSET(S_DEL | S_NEXT)) {
00448                                 if ((ret = __memp_fput(mpf, h, 0)) != 0)
00449                                         goto err;
00450                                 goto lock_next;
00451                         }
00452 
00453                         if ((LF_ISSET(S_DEL) && NUM_ENT(h) == 1)) {
00454                                 stack = 1;
00455                                 LF_SET(S_WRITE);
00456                                 /* Push the parent. */
00457                                 cp->csp++;
00458                                 /* Push this node. */
00459                                 BT_STK_PUSH(dbp->dbenv, cp, h,
00460                                      indx, lock, lock_mode, ret);
00461                                 if (ret != 0)
00462                                         goto err;
00463                                 LOCK_INIT(lock);
00464                         } else {
00465                         /*
00466                          * See if we want to save the tree so far.
00467                          * If we are looking for the next key,
00468                          * then we must save this node if we are
00469                          * at the end of the page.  If not then
00470                          * discard anything we have saved so far.
00471                          * For delete only keep one node until
00472                          * we find a singleton.
00473                          */
00474 do_del:                         if (cp->csp->page != NULL) {
00475                                         if (LF_ISSET(S_NEXT) &&
00476                                              indx == NUM_ENT(h) - 1)
00477                                                 cp->csp++;
00478                                         else if ((ret =
00479                                             __bam_stkrel(dbc, STK_NOLOCK)) != 0)
00480                                                 goto err;
00481                                 }
00482                                 /* Save this node. */
00483                                 BT_STK_ENTER(dbp->dbenv, cp,
00484                                     h, indx, lock, lock_mode, ret);
00485                                 if (ret != 0)
00486                                         goto err;
00487                                 LOCK_INIT(lock);
00488                         }
00489 
00490 lock_next:              h = NULL;
00491 
00492                         if (stack && LF_ISSET(S_WRITE))
00493                                 lock_mode = DB_LOCK_WRITE;
00494                         if ((ret = __db_lget(dbc,
00495                             LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
00496                                 /*
00497                                  * If we fail, discard the lock we held.  This
00498                                  * is OK because this only happens when we are
00499                                  * descending the tree holding read-locks.
00500                                  */
00501                                 (void)__LPUT(dbc, lock);
00502                                 if (LF_ISSET(S_DEL | S_NEXT))
00503                                         cp->csp++;
00504                                 goto err;
00505                         }
00506                 }
00507                 if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0)
00508                         goto err;
00509         }
00510         /* NOTREACHED */
00511 
00512 found:  *exactp = 1;
00513 
00514         /*
00515          * If we got here, we know that we have a Btree leaf or off-page
00516          * duplicates page.  If it's a Btree leaf page, we have to handle
00517          * on-page duplicates.
00518          *
00519          * If there are duplicates, go to the first/last one.  This is
00520          * safe because we know that we're not going to leave the page,
00521          * all duplicate sets that are not on overflow pages exist on a
00522          * single leaf page.
00523          */
00524         if (TYPE(h) == P_LBTREE && NUM_ENT(h) > P_INDX) {
00525                 if (LF_ISSET(S_DUPLAST))
00526                         while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
00527                             inp[indx] == inp[indx + P_INDX])
00528                                 indx += P_INDX;
00529                 else if (LF_ISSET(S_DUPFIRST))
00530                         while (indx > 0 &&
00531                             inp[indx] == inp[indx - P_INDX])
00532                                 indx -= P_INDX;
00533         }
00534 
00535         /*
00536          * Now check if we are allowed to return deleted items; if not, then
00537          * find the next (or previous) non-deleted duplicate entry.  (We do
00538          * not move from the original found key on the basis of the S_DELNO
00539          * flag.)
00540          */
00541         DB_ASSERT(recnop == NULL || LF_ISSET(S_DELNO));
00542         if (LF_ISSET(S_DELNO)) {
00543                 deloffset = TYPE(h) == P_LBTREE ? O_INDX : 0;
00544                 if (LF_ISSET(S_DUPLAST))
00545                         while (B_DISSET(GET_BKEYDATA(dbp,
00546                             h, indx + deloffset)->type) && indx > 0 &&
00547                             inp[indx] == inp[indx - adjust])
00548                                 indx -= adjust;
00549                 else
00550                         while (B_DISSET(GET_BKEYDATA(dbp,
00551                             h, indx + deloffset)->type) &&
00552                             indx < (db_indx_t)(NUM_ENT(h) - adjust) &&
00553                             inp[indx] == inp[indx + adjust])
00554                                 indx += adjust;
00555 
00556                 /*
00557                  * If we weren't able to find a non-deleted duplicate, return
00558                  * DB_NOTFOUND.
00559                  */
00560                 if (B_DISSET(GET_BKEYDATA(dbp, h, indx + deloffset)->type)) {
00561                         ret = DB_NOTFOUND;
00562                         goto err;
00563                 }
00564 
00565                 /*
00566                  * Increment the record counter to point to the found element.
00567                  * Ignore any deleted key/data pairs.  There doesn't need to
00568                  * be any correction for duplicates, as Btree doesn't support
00569                  * duplicates and record numbers in the same tree.
00570                  */
00571                 if (recnop != NULL) {
00572                         DB_ASSERT(TYPE(h) == P_LBTREE);
00573 
00574                         for (i = 0; i < indx; i += P_INDX)
00575                                 if (!B_DISSET(
00576                                     GET_BKEYDATA(dbp, h, i + O_INDX)->type))
00577                                         ++recno;
00578 
00579                         /* Correct the number for a 0-base. */
00580                         *recnop = recno + 1;
00581                 }
00582         }
00583 
00584         if (LF_ISSET(S_STK_ONLY)) {
00585                 BT_STK_NUM(dbp->dbenv, cp, h, indx, ret);
00586                 if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0)
00587                         ret = t_ret;
00588                 if ((t_ret = __memp_fput(mpf, h, 0)) != 0 && ret == 0)
00589                         ret = t_ret;
00590         } else {
00591                 if (LF_ISSET(S_DEL) && cp->csp == cp->sp)
00592                         cp->csp++;
00593                 BT_STK_ENTER(dbp->dbenv, cp, h, indx, lock, lock_mode, ret);
00594         }
00595         if (ret != 0)
00596                 goto err;
00597 
00598         return (0);
00599 
00600 err:    if (h != NULL && (t_ret = __memp_fput(mpf, h, 0)) != 0 && ret == 0)
00601                 ret = t_ret;
00602 
00603         /* Keep any not-found page locked for serializability. */
00604         if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0)
00605                 ret = t_ret;
00606 
00607         BT_STK_POP(cp);
00608         __bam_stkrel(dbc, 0);
00609 
00610         return (ret);
00611 }
00612 
00613 /*
00614  * __bam_stkrel --
00615  *      Release all pages currently held in the stack.
00616  *
00617  * PUBLIC: int __bam_stkrel __P((DBC *, u_int32_t));
00618  */
00619 int
00620 __bam_stkrel(dbc, flags)
00621         DBC *dbc;
00622         u_int32_t flags;
00623 {
00624         BTREE_CURSOR *cp;
00625         DB *dbp;
00626         DB_MPOOLFILE *mpf;
00627         EPG *epg;
00628         int ret, t_ret;
00629 
00630         dbp = dbc->dbp;
00631         mpf = dbp->mpf;
00632         cp = (BTREE_CURSOR *)dbc->internal;
00633 
00634         /*
00635          * Release inner pages first.
00636          *
00637          * The caller must be sure that setting STK_NOLOCK will not effect
00638          * either serializability or recoverability.
00639          */
00640         for (ret = 0, epg = cp->sp; epg <= cp->csp; ++epg) {
00641                 if (epg->page != NULL) {
00642                         if (LF_ISSET(STK_CLRDBC) && cp->page == epg->page) {
00643                                 cp->page = NULL;
00644                                 LOCK_INIT(cp->lock);
00645                         }
00646                         if ((t_ret =
00647                             __memp_fput(mpf, epg->page, 0)) != 0 && ret == 0)
00648                                 ret = t_ret;
00649                         /*
00650                          * XXX
00651                          * Temporary fix for #3243 -- under certain deadlock
00652                          * conditions we call here again and re-free the page.
00653                          * The correct fix is to never release a stack that
00654                          * doesn't hold items.
00655                          */
00656                         epg->page = NULL;
00657                 }
00658                 /*
00659                  * We set this if we need to release our pins,
00660                  * but are not logically ready to have the pages
00661                  * visible.
00662                  */
00663                 if (LF_ISSET(STK_PGONLY))
00664                         continue;
00665                 if (LF_ISSET(STK_NOLOCK)) {
00666                         if ((t_ret = __LPUT(dbc, epg->lock)) != 0 && ret == 0)
00667                                 ret = t_ret;
00668                 } else
00669                         if ((t_ret = __TLPUT(dbc, epg->lock)) != 0 && ret == 0)
00670                                 ret = t_ret;
00671         }
00672 
00673         /* Clear the stack, all pages have been released. */
00674         if (!LF_ISSET(STK_PGONLY))
00675                 BT_STK_CLR(cp);
00676 
00677         return (ret);
00678 }
00679 
00680 /*
00681  * __bam_stkgrow --
00682  *      Grow the stack.
00683  *
00684  * PUBLIC: int __bam_stkgrow __P((DB_ENV *, BTREE_CURSOR *));
00685  */
00686 int
00687 __bam_stkgrow(dbenv, cp)
00688         DB_ENV *dbenv;
00689         BTREE_CURSOR *cp;
00690 {
00691         EPG *p;
00692         size_t entries;
00693         int ret;
00694 
00695         entries = cp->esp - cp->sp;
00696 
00697         if ((ret = __os_calloc(dbenv, entries * 2, sizeof(EPG), &p)) != 0)
00698                 return (ret);
00699         memcpy(p, cp->sp, entries * sizeof(EPG));
00700         if (cp->sp != cp->stack)
00701                 __os_free(dbenv, cp->sp);
00702         cp->sp = p;
00703         cp->csp = p + entries;
00704         cp->esp = p + entries * 2;
00705         return (0);
00706 }

Generated on Sun Dec 25 12:14:13 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2