Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

bt_recno.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1997-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: bt_recno.c,v 12.6 2005/08/08 14:27:59 bostic Exp $
00008  */
00009 
00010 #include "db_config.h"
00011 
00012 #ifndef NO_SYSTEM_INCLUDES
00013 #include <sys/types.h>
00014 
00015 #include <string.h>
00016 #endif
00017 
00018 #include "db_int.h"
00019 #include "dbinc/db_page.h"
00020 #include "dbinc/btree.h"
00021 #include "dbinc/db_shash.h"
00022 #include "dbinc/lock.h"
00023 
00024 static int  __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t));
00025 static int  __ram_source __P((DB *));
00026 static int  __ram_sread __P((DBC *, db_recno_t));
00027 static int  __ram_update __P((DBC *, db_recno_t, int));
00028 
00029 /*
00030  * In recno, there are two meanings to the on-page "deleted" flag.  If we're
00031  * re-numbering records, it means the record was implicitly created.  We skip
00032  * over implicitly created records if doing a cursor "next" or "prev", and
00033  * return DB_KEYEMPTY if they're explicitly requested..  If not re-numbering
00034  * records, it means that the record was implicitly created, or was deleted.
00035  * We skip over implicitly created or deleted records if doing a cursor "next"
00036  * or "prev", and return DB_KEYEMPTY if they're explicitly requested.
00037  *
00038  * If we're re-numbering records, then we have to detect in the cursor that
00039  * a record was deleted, and adjust the cursor as necessary on the next get.
00040  * If we're not re-numbering records, then we can detect that a record has
00041  * been deleted by looking at the actual on-page record, so we completely
00042  * ignore the cursor's delete flag.  This is different from the B+tree code.
00043  * It also maintains whether the cursor references a deleted record in the
00044  * cursor, and it doesn't always check the on-page value.
00045  */
00046 #define CD_SET(cp) {                                                    \
00047         if (F_ISSET(cp, C_RENUMBER))                                    \
00048                 F_SET(cp, C_DELETED);                                   \
00049 }
00050 #define CD_CLR(cp) {                                                    \
00051         if (F_ISSET(cp, C_RENUMBER)) {                                  \
00052                 F_CLR(cp, C_DELETED);                                   \
00053                 cp->order = INVALID_ORDER;                              \
00054         }                                                               \
00055 }
00056 #define CD_ISSET(cp)                                                    \
00057         (F_ISSET(cp, C_RENUMBER) && F_ISSET(cp, C_DELETED) ? 1 : 0)
00058 
00059 /*
00060  * Macros for comparing the ordering of two cursors.
00061  * cp1 comes before cp2 iff one of the following holds:
00062  *      cp1's recno is less than cp2's recno
00063  *      recnos are equal, both deleted, and cp1's order is less than cp2's
00064  *      recnos are equal, cp1 deleted, and cp2 not deleted
00065  */
00066 #define C_LESSTHAN(cp1, cp2)                                            \
00067     (((cp1)->recno < (cp2)->recno) ||                                   \
00068     (((cp1)->recno == (cp2)->recno) &&                                  \
00069     ((CD_ISSET((cp1)) && CD_ISSET((cp2)) && (cp1)->order < (cp2)->order) || \
00070     (CD_ISSET((cp1)) && !CD_ISSET((cp2))))))
00071 
00072 /*
00073  * cp1 is equal to cp2 iff their recnos and delete flags are identical,
00074  * and if the delete flag is set their orders are also identical.
00075  */
00076 #define C_EQUAL(cp1, cp2)                                               \
00077     ((cp1)->recno == (cp2)->recno && CD_ISSET((cp1)) == CD_ISSET((cp2)) && \
00078     (!CD_ISSET((cp1)) || (cp1)->order == (cp2)->order))
00079 
00080 /*
00081  * Do we need to log the current cursor adjustment?
00082  */
00083 #define CURADJ_LOG(dbc)                                                 \
00084         (DBC_LOGGING((dbc)) && (dbc)->txn != NULL && (dbc)->txn->parent != NULL)
00085 
00086 /*
00087  * After a search, copy the found page into the cursor, discarding any
00088  * currently held lock.
00089  */
00090 #define STACK_TO_CURSOR(cp, ret) {                                      \
00091         int __t_ret;                                                    \
00092         (cp)->page = (cp)->csp->page;                                   \
00093         (cp)->pgno = (cp)->csp->page->pgno;                             \
00094         (cp)->indx = (cp)->csp->indx;                                   \
00095         if ((__t_ret = __TLPUT(dbc, (cp)->lock)) != 0 && (ret) == 0)    \
00096                 ret = __t_ret;                                          \
00097         (cp)->lock = (cp)->csp->lock;                                   \
00098         (cp)->lock_mode = (cp)->csp->lock_mode;                         \
00099 }
00100 
00101 /*
00102  * __ram_open --
00103  *      Recno open function.
00104  *
00105  * PUBLIC: int __ram_open __P((DB *,
00106  * PUBLIC:      DB_TXN *, const char *, db_pgno_t, u_int32_t));
00107  */
00108 int
00109 __ram_open(dbp, txn, name, base_pgno, flags)
00110         DB *dbp;
00111         DB_TXN *txn;
00112         const char *name;
00113         db_pgno_t base_pgno;
00114         u_int32_t flags;
00115 {
00116         BTREE *t;
00117         DBC *dbc;
00118         int ret, t_ret;
00119 
00120         COMPQUIET(name, NULL);
00121         t = dbp->bt_internal;
00122 
00123         /* Start up the tree. */
00124         if ((ret = __bam_read_root(dbp, txn, base_pgno, flags)) != 0)
00125                 return (ret);
00126 
00127         /*
00128          * If the user specified a source tree, open it and map it in.
00129          *
00130          * !!!
00131          * We don't complain if the user specified transactions or threads.
00132          * It's possible to make it work, but you'd better know what you're
00133          * doing!
00134          */
00135         if (t->re_source != NULL && (ret = __ram_source(dbp)) != 0)
00136                 return (ret);
00137 
00138         /* If we're snapshotting an underlying source file, do it now. */
00139         if (F_ISSET(dbp, DB_AM_SNAPSHOT)) {
00140                 /* Allocate a cursor. */
00141                 if ((ret = __db_cursor(dbp, NULL, &dbc, 0)) != 0)
00142                         return (ret);
00143 
00144                 /* Do the snapshot. */
00145                 if ((ret = __ram_update(dbc,
00146                     DB_MAX_RECORDS, 0)) != 0 && ret == DB_NOTFOUND)
00147                         ret = 0;
00148 
00149                 /* Discard the cursor. */
00150                 if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
00151                         ret = t_ret;
00152         }
00153 
00154         return (ret);
00155 }
00156 
00157 /*
00158  * __ram_append --
00159  *      Recno append function.
00160  *
00161  * PUBLIC: int __ram_append __P((DBC *, DBT *, DBT *));
00162  */
00163 int
00164 __ram_append(dbc, key, data)
00165         DBC *dbc;
00166         DBT *key, *data;
00167 {
00168         BTREE_CURSOR *cp;
00169         int ret;
00170 
00171         cp = (BTREE_CURSOR *)dbc->internal;
00172 
00173         /*
00174          * Make sure we've read in all of the backing source file.  If
00175          * we found the record or it simply didn't exist, add the
00176          * user's record.
00177          */
00178         ret = __ram_update(dbc, DB_MAX_RECORDS, 0);
00179         if (ret == 0 || ret == DB_NOTFOUND)
00180                 ret = __ram_add(dbc, &cp->recno, data, DB_APPEND, 0);
00181 
00182         /* Return the record number. */
00183         if (ret == 0)
00184                 ret = __db_retcopy(dbc->dbp->dbenv, key, &cp->recno,
00185                     sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen);
00186 
00187         return (ret);
00188 }
00189 
00190 /*
00191  * __ram_c_del --
00192  *      Recno cursor->c_del function.
00193  *
00194  * PUBLIC: int __ram_c_del __P((DBC *));
00195  */
00196 int
00197 __ram_c_del(dbc)
00198         DBC *dbc;
00199 {
00200         BKEYDATA bk;
00201         BTREE *t;
00202         BTREE_CURSOR *cp;
00203         DB *dbp;
00204         DB_LSN lsn;
00205         DBT hdr, data;
00206         int exact, ret, stack, t_ret;
00207 
00208         dbp = dbc->dbp;
00209         cp = (BTREE_CURSOR *)dbc->internal;
00210         t = dbp->bt_internal;
00211         stack = 0;
00212 
00213         /*
00214          * The semantics of cursors during delete are as follows: in
00215          * non-renumbering recnos, records are replaced with a marker
00216          * containing a delete flag.  If the record referenced by this cursor
00217          * has already been deleted, we will detect that as part of the delete
00218          * operation, and fail.
00219          *
00220          * In renumbering recnos, cursors which represent deleted items
00221          * are flagged with the C_DELETED flag, and it is an error to
00222          * call c_del a second time without an intervening cursor motion.
00223          */
00224         if (CD_ISSET(cp))
00225                 return (DB_KEYEMPTY);
00226 
00227         /* Search the tree for the key; delete only deletes exact matches. */
00228         if ((ret = __bam_rsearch(dbc, &cp->recno, S_DELETE, 1, &exact)) != 0)
00229                 goto err;
00230         if (!exact) {
00231                 ret = DB_NOTFOUND;
00232                 goto err;
00233         }
00234         stack = 1;
00235 
00236         /* Copy the page into the cursor. */
00237         STACK_TO_CURSOR(cp, ret);
00238         if (ret != 0)
00239                 goto err;
00240 
00241         /*
00242          * If re-numbering records, the on-page deleted flag can only mean
00243          * that this record was implicitly created.  Applications aren't
00244          * permitted to delete records they never created, return an error.
00245          *
00246          * If not re-numbering records, the on-page deleted flag means that
00247          * this record was implicitly created, or, was deleted at some time.
00248          * The former is an error because applications aren't permitted to
00249          * delete records they never created, the latter is an error because
00250          * if the record was "deleted", we could never have found it.
00251          */
00252         if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type)) {
00253                 ret = DB_KEYEMPTY;
00254                 goto err;
00255         }
00256 
00257         if (F_ISSET(cp, C_RENUMBER)) {
00258                 /* Delete the item, adjust the counts, adjust the cursors. */
00259                 if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0)
00260                         goto err;
00261                 if ((ret = __bam_adjust(dbc, -1)) != 0)
00262                         goto err;
00263                 if (__ram_ca(dbc, CA_DELETE) > 0 &&
00264                     CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp, dbc->txn,
00265                     &lsn, 0, CA_DELETE, cp->root, cp->recno, cp->order)) != 0)
00266                         goto err;
00267 
00268                 /*
00269                  * If the page is empty, delete it.
00270                  *
00271                  * We never delete a root page.  First, root pages of primary
00272                  * databases never go away, recno or otherwise.  However, if
00273                  * it's the root page of an off-page duplicates database, then
00274                  * it can be deleted.   We don't delete it here because we have
00275                  * no way of telling the primary database page holder (e.g.,
00276                  * the hash access method) that its page element should cleaned
00277                  * up because the underlying tree is gone.  So, we keep the page
00278                  * around until the last cursor referencing the empty tree is
00279                  * are closed, and then clean it up.
00280                  */
00281                 if (NUM_ENT(cp->page) == 0 && PGNO(cp->page) != cp->root) {
00282                         /*
00283                          * We want to delete a single item out of the last page
00284                          * that we're not deleting.
00285                          */
00286                         ret = __bam_dpages(dbc, 0, 0);
00287 
00288                         /*
00289                          * Regardless of the return from __bam_dpages, it will
00290                          * discard our stack and pinned page.
00291                          */
00292                         stack = 0;
00293                         cp->page = NULL;
00294                 }
00295         } else {
00296                 /* Use a delete/put pair to replace the record with a marker. */
00297                 if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0)
00298                         goto err;
00299 
00300                 B_TSET(bk.type, B_KEYDATA, 1);
00301                 bk.len = 0;
00302                 memset(&hdr, 0, sizeof(hdr));
00303                 hdr.data = &bk;
00304                 hdr.size = SSZA(BKEYDATA, data);
00305                 memset(&data, 0, sizeof(data));
00306                 data.data = (void *)"";
00307                 data.size = 0;
00308                 if ((ret = __db_pitem(dbc,
00309                     cp->page, cp->indx, BKEYDATA_SIZE(0), &hdr, &data)) != 0)
00310                         goto err;
00311         }
00312 
00313         t->re_modified = 1;
00314 
00315 err:    if (stack && (t_ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0 && ret == 0)
00316                 ret = t_ret;
00317 
00318         return (ret);
00319 }
00320 
00321 /*
00322  * __ram_c_get --
00323  *      Recno cursor->c_get function.
00324  *
00325  * PUBLIC: int __ram_c_get
00326  * PUBLIC:     __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
00327  */
00328 int
00329 __ram_c_get(dbc, key, data, flags, pgnop)
00330         DBC *dbc;
00331         DBT *key, *data;
00332         u_int32_t flags;
00333         db_pgno_t *pgnop;
00334 {
00335         BTREE_CURSOR *cp;
00336         DB *dbp;
00337         int cmp, exact, ret;
00338 
00339         COMPQUIET(pgnop, NULL);
00340 
00341         dbp = dbc->dbp;
00342         cp = (BTREE_CURSOR *)dbc->internal;
00343 
00344         LF_CLR(DB_MULTIPLE|DB_MULTIPLE_KEY);
00345 retry:  switch (flags) {
00346         case DB_CURRENT:
00347                 /*
00348                  * If we're using mutable records and the deleted flag is
00349                  * set, the cursor is pointing at a nonexistent record;
00350                  * return an error.
00351                  */
00352                 if (CD_ISSET(cp))
00353                         return (DB_KEYEMPTY);
00354                 break;
00355         case DB_NEXT_DUP:
00356                 /*
00357                  * If we're not in an off-page dup set, we know there's no
00358                  * next duplicate since recnos don't have them.  If we
00359                  * are in an off-page dup set, the next item assuredly is
00360                  * a dup, so we set flags to DB_NEXT and keep going.
00361                  */
00362                 if (!F_ISSET(dbc, DBC_OPD))
00363                         return (DB_NOTFOUND);
00364                 /* FALLTHROUGH */
00365         case DB_NEXT_NODUP:
00366                 /*
00367                  * Recno databases don't have duplicates, set flags to DB_NEXT
00368                  * and keep going.
00369                  */
00370                 /* FALLTHROUGH */
00371         case DB_NEXT:
00372                 flags = DB_NEXT;
00373                 /*
00374                  * If record numbers are mutable: if we just deleted a record,
00375                  * we have to avoid incrementing the record number so that we
00376                  * return the right record by virtue of renumbering the tree.
00377                  */
00378                 if (CD_ISSET(cp)) {
00379                         /*
00380                          * Clear the flag, we've moved off the deleted record.
00381                          */
00382                         CD_CLR(cp);
00383                         break;
00384                 }
00385 
00386                 if (cp->recno != RECNO_OOB) {
00387                         ++cp->recno;
00388                         break;
00389                 }
00390                 /* FALLTHROUGH */
00391         case DB_FIRST:
00392                 flags = DB_NEXT;
00393                 cp->recno = 1;
00394                 break;
00395         case DB_PREV_NODUP:
00396                 /*
00397                  * Recno databases don't have duplicates, set flags to DB_PREV
00398                  * and keep going.
00399                  */
00400                 /* FALLTHROUGH */
00401         case DB_PREV:
00402                 flags = DB_PREV;
00403                 if (cp->recno != RECNO_OOB) {
00404                         if (cp->recno == 1) {
00405                                 ret = DB_NOTFOUND;
00406                                 goto err;
00407                         }
00408                         --cp->recno;
00409                         break;
00410                 }
00411                 /* FALLTHROUGH */
00412         case DB_LAST:
00413                 flags = DB_PREV;
00414                 if (((ret = __ram_update(dbc,
00415                     DB_MAX_RECORDS, 0)) != 0) && ret != DB_NOTFOUND)
00416                         goto err;
00417                 if ((ret = __bam_nrecs(dbc, &cp->recno)) != 0)
00418                         goto err;
00419                 if (cp->recno == 0) {
00420                         ret = DB_NOTFOUND;
00421                         goto err;
00422                 }
00423                 break;
00424         case DB_GET_BOTHC:
00425                 /*
00426                  * If we're doing a join and these are offpage dups,
00427                  * we want to keep searching forward from after the
00428                  * current cursor position.  Increment the recno by 1,
00429                  * then proceed as for a DB_SET.
00430                  *
00431                  * Otherwise, we know there are no additional matching
00432                  * data, as recnos don't have dups.  return DB_NOTFOUND.
00433                  */
00434                 if (F_ISSET(dbc, DBC_OPD)) {
00435                         cp->recno++;
00436                         break;
00437                 }
00438                 ret = DB_NOTFOUND;
00439                 goto err;
00440                 /* NOTREACHED */
00441         case DB_GET_BOTH:
00442         case DB_GET_BOTH_RANGE:
00443                 /*
00444                  * If we're searching a set of off-page dups, we start
00445                  * a new linear search from the first record.  Otherwise,
00446                  * we compare the single data item associated with the
00447                  * requested record for a match.
00448                  */
00449                 if (F_ISSET(dbc, DBC_OPD)) {
00450                         cp->recno = 1;
00451                         break;
00452                 }
00453                 /* FALLTHROUGH */
00454         case DB_SET:
00455         case DB_SET_RANGE:
00456                 if ((ret = __ram_getno(dbc, key, &cp->recno, 0)) != 0)
00457                         goto err;
00458                 break;
00459         default:
00460                 ret = __db_unknown_flag(dbp->dbenv, "__ram_c_get", flags);
00461                 goto err;
00462         }
00463 
00464         /*
00465          * For DB_PREV, DB_LAST, DB_SET and DB_SET_RANGE, we have already
00466          * called __ram_update() to make sure sufficient records have been
00467          * read from the backing source file.  Do it now for DB_CURRENT (if
00468          * the current record was deleted we may need more records from the
00469          * backing file for a DB_CURRENT operation), DB_FIRST and DB_NEXT.
00470          * (We don't have to test for flags == DB_FIRST, because the switch
00471          * statement above re-set flags to DB_NEXT in that case.)
00472          */
00473         if ((flags == DB_NEXT || flags == DB_CURRENT) && ((ret =
00474             __ram_update(dbc, cp->recno, 0)) != 0) && ret != DB_NOTFOUND)
00475                 goto err;
00476 
00477         for (;; ++cp->recno) {
00478                 /* Search the tree for the record. */
00479                 if ((ret = __bam_rsearch(dbc, &cp->recno,
00480                     F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND,
00481                     1, &exact)) != 0)
00482                         goto err;
00483                 if (!exact) {
00484                         ret = DB_NOTFOUND;
00485                         goto err;
00486                 }
00487 
00488                 /* Copy the page into the cursor. */
00489                 STACK_TO_CURSOR(cp, ret);
00490                 if (ret != 0)
00491                         goto err;
00492 
00493                 /*
00494                  * If re-numbering records, the on-page deleted flag means this
00495                  * record was implicitly created.  If not re-numbering records,
00496                  * the on-page deleted flag means this record was implicitly
00497                  * created, or, it was deleted at some time.  Regardless, we
00498                  * skip such records if doing cursor next/prev operations or
00499                  * walking through off-page duplicates, and fail if they were
00500                  * requested explicitly by the application.
00501                  */
00502                 if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type))
00503                         switch (flags) {
00504                         case DB_NEXT:
00505                         case DB_PREV:
00506                                 (void)__bam_stkrel(dbc, STK_CLRDBC);
00507                                 goto retry;
00508                         case DB_GET_BOTH:
00509                         case DB_GET_BOTH_RANGE:
00510                                 /*
00511                                  * If we're an OPD tree, we don't care about
00512                                  * matching a record number on a DB_GET_BOTH
00513                                  * -- everything belongs to the same tree.  A
00514                                  * normal recno should give up and return
00515                                  * DB_NOTFOUND if the matching recno is deleted.
00516                                  */
00517                                 if (F_ISSET(dbc, DBC_OPD)) {
00518                                         (void)__bam_stkrel(dbc, STK_CLRDBC);
00519                                         continue;
00520                                 }
00521                                 ret = DB_NOTFOUND;
00522                                 goto err;
00523                         default:
00524                                 ret = DB_KEYEMPTY;
00525                                 goto err;
00526                         }
00527 
00528                 if (flags == DB_GET_BOTH ||
00529                     flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) {
00530                         if ((ret = __bam_cmp(dbp, data,
00531                             cp->page, cp->indx, __bam_defcmp, &cmp)) != 0)
00532                                 return (ret);
00533                         if (cmp == 0)
00534                                 break;
00535                         if (!F_ISSET(dbc, DBC_OPD)) {
00536                                 ret = DB_NOTFOUND;
00537                                 goto err;
00538                         }
00539                         (void)__bam_stkrel(dbc, STK_CLRDBC);
00540                 } else
00541                         break;
00542         }
00543 
00544         /* Return the key if the user didn't give us one. */
00545         if (!F_ISSET(dbc, DBC_OPD)) {
00546                 if (flags != DB_GET_BOTH && flags != DB_GET_BOTH_RANGE &&
00547                     flags != DB_SET && flags != DB_SET_RANGE)
00548                         ret = __db_retcopy(dbp->dbenv,
00549                             key, &cp->recno, sizeof(cp->recno),
00550                             &dbc->rkey->data, &dbc->rkey->ulen);
00551                 F_SET(key, DB_DBT_ISSET);
00552         }
00553 
00554         /* The cursor was reset, no further delete adjustment is necessary. */
00555 err:    CD_CLR(cp);
00556 
00557         return (ret);
00558 }
00559 
00560 /*
00561  * __ram_c_put --
00562  *      Recno cursor->c_put function.
00563  *
00564  * PUBLIC: int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
00565  */
00566 int
00567 __ram_c_put(dbc, key, data, flags, pgnop)
00568         DBC *dbc;
00569         DBT *key, *data;
00570         u_int32_t flags;
00571         db_pgno_t *pgnop;
00572 {
00573         BTREE_CURSOR *cp;
00574         DB *dbp;
00575         DB_LSN lsn;
00576         int exact, nc, ret, t_ret;
00577         u_int32_t iiflags;
00578         void *arg;
00579 
00580         COMPQUIET(pgnop, NULL);
00581 
00582         dbp = dbc->dbp;
00583         cp = (BTREE_CURSOR *)dbc->internal;
00584 
00585         /*
00586          * DB_KEYFIRST and DB_KEYLAST mean different things if they're
00587          * used in an off-page duplicate tree.  If we're an off-page
00588          * duplicate tree, they really mean "put at the beginning of the
00589          * tree" and "put at the end of the tree" respectively, so translate
00590          * them to something else.
00591          */
00592         if (F_ISSET(dbc, DBC_OPD))
00593                 switch (flags) {
00594                 case DB_KEYFIRST:
00595                         cp->recno = 1;
00596                         flags = DB_BEFORE;
00597                         break;
00598                 case DB_KEYLAST:
00599                         if ((ret = __ram_add(dbc,
00600                             &cp->recno, data, DB_APPEND, 0)) != 0)
00601                                 return (ret);
00602                         if (CURADJ_LOG(dbc) &&
00603                             (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0,
00604                             CA_ICURRENT, cp->root, cp->recno, cp->order)) != 0)
00605                                 return (ret);
00606                         return (0);
00607                 default:
00608                         break;
00609                 }
00610 
00611         /*
00612          * Handle normal DB_KEYFIRST/DB_KEYLAST;  for a recno, which has
00613          * no duplicates, these are identical and mean "put the given
00614          * datum at the given recno".
00615          *
00616          * Note that the code here used to be in __ram_put;  now, we
00617          * go through the access-method-common __db_put function, which
00618          * handles DB_NOOVERWRITE, so we and __ram_add don't have to.
00619          */
00620         if (flags == DB_KEYFIRST || flags == DB_KEYLAST) {
00621                 ret = __ram_getno(dbc, key, &cp->recno, 1);
00622                 if (ret == 0 || ret == DB_NOTFOUND)
00623                         ret = __ram_add(dbc, &cp->recno, data, 0, 0);
00624                 return (ret);
00625         }
00626 
00627         /*
00628          * If we're putting with a cursor that's marked C_DELETED, we need to
00629          * take special care;  the cursor doesn't "really" reference the item
00630          * corresponding to its current recno, but instead is "between" that
00631          * record and the current one.  Translate the actual insert into
00632          * DB_BEFORE, and let the __ram_ca work out the gory details of what
00633          * should wind up pointing where.
00634          */
00635         if (CD_ISSET(cp))
00636                 iiflags = DB_BEFORE;
00637         else
00638                 iiflags = flags;
00639 
00640 split:  if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
00641                 goto err;
00642         /*
00643          * An inexact match is okay;  it just means we're one record past the
00644          * end, which is reasonable if we're marked deleted.
00645          */
00646         DB_ASSERT(exact || CD_ISSET(cp));
00647 
00648         /* Copy the page into the cursor. */
00649         STACK_TO_CURSOR(cp, ret);
00650         if (ret != 0)
00651                 goto err;
00652 
00653         ret = __bam_iitem(dbc, key, data, iiflags, 0);
00654         t_ret = __bam_stkrel(dbc, STK_CLRDBC);
00655 
00656         if (t_ret != 0 && (ret == 0 || ret == DB_NEEDSPLIT))
00657                 ret = t_ret;
00658         else if (ret == DB_NEEDSPLIT) {
00659                 arg = &cp->recno;
00660                 if ((ret = __bam_split(dbc, arg, NULL)) != 0)
00661                         goto err;
00662                 goto split;
00663         }
00664         if (ret != 0)
00665                 goto err;
00666 
00667         switch (flags) {                        /* Adjust the cursors. */
00668         case DB_AFTER:
00669                 nc = __ram_ca(dbc, CA_IAFTER);
00670 
00671                 /*
00672                  * We only need to adjust this cursor forward if we truly added
00673                  * the item after the current recno, rather than remapping it
00674                  * to DB_BEFORE.
00675                  */
00676                 if (iiflags == DB_AFTER)
00677                         ++cp->recno;
00678 
00679                 /* Only log if __ram_ca found any relevant cursors. */
00680                 if (nc > 0 && CURADJ_LOG(dbc) &&
00681                     (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IAFTER,
00682                     cp->root, cp->recno, cp->order)) != 0)
00683                         goto err;
00684                 break;
00685         case DB_BEFORE:
00686                 nc = __ram_ca(dbc, CA_IBEFORE);
00687                 --cp->recno;
00688 
00689                 /* Only log if __ram_ca found any relevant cursors. */
00690                 if (nc > 0 && CURADJ_LOG(dbc) &&
00691                     (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IBEFORE,
00692                     cp->root, cp->recno, cp->order)) != 0)
00693                         goto err;
00694                 break;
00695         case DB_CURRENT:
00696                 /*
00697                  * We only need to do an adjustment if we actually
00698                  * added an item, which we only would have done if the
00699                  * cursor was marked deleted.
00700                  *
00701                  * Only log if __ram_ca found any relevant cursors.
00702                  */
00703                 if (CD_ISSET(cp) && __ram_ca(dbc, CA_ICURRENT) > 0 &&
00704                     CURADJ_LOG(dbc) &&
00705                     (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0,
00706                     CA_ICURRENT, cp->root, cp->recno, cp->order)) != 0)
00707                         goto err;
00708                 break;
00709         default:
00710                 break;
00711         }
00712 
00713         /* Return the key if we've created a new record. */
00714         if (!F_ISSET(dbc, DBC_OPD) && (flags == DB_AFTER || flags == DB_BEFORE))
00715                 ret = __db_retcopy(dbp->dbenv, key, &cp->recno,
00716                     sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen);
00717 
00718         /* The cursor was reset, no further delete adjustment is necessary. */
00719 err:    CD_CLR(cp);
00720 
00721         return (ret);
00722 }
00723 
00724 /*
00725  * __ram_ca --
00726  *      Adjust cursors.  Returns the number of relevant cursors.
00727  *
00728  * PUBLIC: int __ram_ca __P((DBC *, ca_recno_arg));
00729  */
00730 int
00731 __ram_ca(dbc_arg, op)
00732         DBC *dbc_arg;
00733         ca_recno_arg op;
00734 {
00735         BTREE_CURSOR *cp, *cp_arg;
00736         DB *dbp, *ldbp;
00737         DB_ENV *dbenv;
00738         DBC *dbc;
00739         db_recno_t recno;
00740         int adjusted, found;
00741         u_int32_t order;
00742 
00743         dbp = dbc_arg->dbp;
00744         dbenv = dbp->dbenv;
00745         cp_arg = (BTREE_CURSOR *)dbc_arg->internal;
00746         recno = cp_arg->recno;
00747 
00748         found = 0;
00749 
00750         /*
00751          * It only makes sense to adjust cursors if we're a renumbering
00752          * recno;  we should only be called if this is one.
00753          */
00754         DB_ASSERT(F_ISSET(cp_arg, C_RENUMBER));
00755 
00756         MUTEX_LOCK(dbenv, dbenv->mtx_dblist);
00757         /*
00758          * Adjust the cursors.  See the comment in __bam_ca_delete().
00759          */
00760         /*
00761          * If we're doing a delete, we need to find the highest
00762          * order of any cursor currently pointing at this item,
00763          * so we can assign a higher order to the newly deleted
00764          * cursor.  Unfortunately, this requires a second pass through
00765          * the cursor list.
00766          */
00767         if (op == CA_DELETE) {
00768                 order = 1;
00769                 for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
00770                     ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
00771                     ldbp = LIST_NEXT(ldbp, dblistlinks)) {
00772                         MUTEX_LOCK(dbenv, dbp->mutex);
00773                         for (dbc = TAILQ_FIRST(&ldbp->active_queue);
00774                             dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
00775                                 cp = (BTREE_CURSOR *)dbc->internal;
00776                                 if (cp_arg->root == cp->root &&
00777                                     recno == cp->recno && CD_ISSET(cp) &&
00778                                     order <= cp->order)
00779                                         order = cp->order + 1;
00780                         }
00781                         MUTEX_UNLOCK(dbenv, dbp->mutex);
00782                 }
00783         } else
00784                 order = INVALID_ORDER;
00785 
00786         /* Now go through and do the actual adjustments. */
00787         for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
00788             ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
00789             ldbp = LIST_NEXT(ldbp, dblistlinks)) {
00790                 MUTEX_LOCK(dbenv, dbp->mutex);
00791                 for (dbc = TAILQ_FIRST(&ldbp->active_queue);
00792                     dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
00793                         cp = (BTREE_CURSOR *)dbc->internal;
00794                         if (cp_arg->root != cp->root)
00795                                 continue;
00796                         ++found;
00797                         adjusted = 0;
00798                         switch (op) {
00799                         case CA_DELETE:
00800                                 if (recno < cp->recno) {
00801                                         --cp->recno;
00802                                         /*
00803                                          * If the adjustment made them equal,
00804                                          * we have to merge the orders.
00805                                          */
00806                                         if (recno == cp->recno && CD_ISSET(cp))
00807                                                 cp->order += order;
00808                                 } else if (recno == cp->recno &&
00809                                     !CD_ISSET(cp)) {
00810                                         CD_SET(cp);
00811                                         cp->order = order;
00812                                 }
00813                                 break;
00814                         case CA_IBEFORE:
00815                                 /*
00816                                  * IBEFORE is just like IAFTER, except that we
00817                                  * adjust cursors on the current record too.
00818                                  */
00819                                 if (C_EQUAL(cp_arg, cp)) {
00820                                         ++cp->recno;
00821                                         adjusted = 1;
00822                                 }
00823                                 goto iafter;
00824                         case CA_ICURRENT:
00825 
00826                                 /*
00827                                  * If the original cursor wasn't deleted, we
00828                                  * just did a replacement and so there's no
00829                                  * need to adjust anything--we shouldn't have
00830                                  * gotten this far.  Otherwise, we behave
00831                                  * much like an IAFTER, except that all
00832                                  * cursors pointing to the current item get
00833                                  * marked undeleted and point to the new
00834                                  * item.
00835                                  */
00836                                 DB_ASSERT(CD_ISSET(cp_arg));
00837                                 if (C_EQUAL(cp_arg, cp)) {
00838                                         CD_CLR(cp);
00839                                         break;
00840                                 }
00841                                 /* FALLTHROUGH */
00842                         case CA_IAFTER:
00843 iafter:                         if (!adjusted && C_LESSTHAN(cp_arg, cp)) {
00844                                         ++cp->recno;
00845                                         adjusted = 1;
00846                                 }
00847                                 if (recno == cp->recno && adjusted)
00848                                         /*
00849                                          * If we've moved this cursor's recno,
00850                                          * split its order number--i.e.,
00851                                          * decrement it by enough so that
00852                                          * the lowest cursor moved has order 1.
00853                                          * cp_arg->order is the split point,
00854                                          * so decrement by one less than that.
00855                                          */
00856                                         cp->order -= (cp_arg->order - 1);
00857                                 break;
00858                         }
00859                 }
00860                 MUTEX_UNLOCK(dbp->dbenv, dbp->mutex);
00861         }
00862         MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist);
00863 
00864         return (found);
00865 }
00866 
00867 /*
00868  * __ram_getno --
00869  *      Check the user's record number, and make sure we've seen it.
00870  *
00871  * PUBLIC: int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int));
00872  */
00873 int
00874 __ram_getno(dbc, key, rep, can_create)
00875         DBC *dbc;
00876         const DBT *key;
00877         db_recno_t *rep;
00878         int can_create;
00879 {
00880         DB *dbp;
00881         db_recno_t recno;
00882 
00883         dbp = dbc->dbp;
00884 
00885         /* Check the user's record number. */
00886         if ((recno = *(db_recno_t *)key->data) == 0) {
00887                 __db_err(dbp->dbenv, "illegal record number of 0");
00888                 return (EINVAL);
00889         }
00890         if (rep != NULL)
00891                 *rep = recno;
00892 
00893         /*
00894          * Btree can neither create records nor read them in.  Recno can
00895          * do both, see if we can find the record.
00896          */
00897         return (dbc->dbtype == DB_RECNO ?
00898             __ram_update(dbc, recno, can_create) : 0);
00899 }
00900 
00901 /*
00902  * __ram_update --
00903  *      Ensure the tree has records up to and including the specified one.
00904  */
00905 static int
00906 __ram_update(dbc, recno, can_create)
00907         DBC *dbc;
00908         db_recno_t recno;
00909         int can_create;
00910 {
00911         BTREE *t;
00912         DB *dbp;
00913         DBT *rdata;
00914         db_recno_t nrecs;
00915         int ret;
00916 
00917         dbp = dbc->dbp;
00918         t = dbp->bt_internal;
00919 
00920         /*
00921          * If we can't create records and we've read the entire backing input
00922          * file, we're done.
00923          */
00924         if (!can_create && t->re_eof)
00925                 return (0);
00926 
00927         /*
00928          * If we haven't seen this record yet, try to get it from the original
00929          * file.
00930          */
00931         if ((ret = __bam_nrecs(dbc, &nrecs)) != 0)
00932                 return (ret);
00933         if (!t->re_eof && recno > nrecs) {
00934                 if ((ret = __ram_sread(dbc, recno)) != 0 && ret != DB_NOTFOUND)
00935                         return (ret);
00936                 if ((ret = __bam_nrecs(dbc, &nrecs)) != 0)
00937                         return (ret);
00938         }
00939 
00940         /*
00941          * If we can create records, create empty ones up to the requested
00942          * record.
00943          */
00944         if (!can_create || recno <= nrecs + 1)
00945                 return (0);
00946 
00947         rdata = &dbc->my_rdata;
00948         rdata->flags = 0;
00949         rdata->size = 0;
00950 
00951         while (recno > ++nrecs)
00952                 if ((ret = __ram_add(dbc,
00953                     &nrecs, rdata, 0, BI_DELETED)) != 0)
00954                         return (ret);
00955         return (0);
00956 }
00957 
00958 /*
00959  * __ram_source --
00960  *      Load information about the backing file.
00961  */
00962 static int
00963 __ram_source(dbp)
00964         DB *dbp;
00965 {
00966         BTREE *t;
00967         char *source;
00968         int ret;
00969 
00970         t = dbp->bt_internal;
00971 
00972         /* Find the real name, and swap out the one we had before. */
00973         if ((ret = __db_appname(dbp->dbenv,
00974             DB_APP_DATA, t->re_source, 0, NULL, &source)) != 0)
00975                 return (ret);
00976         __os_free(dbp->dbenv, t->re_source);
00977         t->re_source = source;
00978 
00979         /*
00980          * !!!
00981          * It's possible that the backing source file is read-only.  We don't
00982          * much care other than we'll complain if there are any modifications
00983          * when it comes time to write the database back to the source.
00984          */
00985         if ((t->re_fp = fopen(t->re_source, "r")) == NULL) {
00986                 ret = __os_get_errno();
00987                 __db_err(dbp->dbenv, "%s: %s", t->re_source, db_strerror(ret));
00988                 return (ret);
00989         }
00990 
00991         t->re_eof = 0;
00992         return (0);
00993 }
00994 
00995 /*
00996  * __ram_writeback --
00997  *      Rewrite the backing file.
00998  *
00999  * PUBLIC: int __ram_writeback __P((DB *));
01000  */
01001 int
01002 __ram_writeback(dbp)
01003         DB *dbp;
01004 {
01005         BTREE *t;
01006         DB_ENV *dbenv;
01007         DBC *dbc;
01008         DBT key, data;
01009         FILE *fp;
01010         db_recno_t keyno;
01011         int ret, t_ret;
01012         u_int8_t delim, *pad;
01013 
01014         t = dbp->bt_internal;
01015         dbenv = dbp->dbenv;
01016         fp = NULL;
01017         pad = NULL;
01018 
01019         /* If the file wasn't modified, we're done. */
01020         if (!t->re_modified)
01021                 return (0);
01022 
01023         /* If there's no backing source file, we're done. */
01024         if (t->re_source == NULL) {
01025                 t->re_modified = 0;
01026                 return (0);
01027         }
01028 
01029         /*
01030          * We step through the records, writing each one out.  Use the record
01031          * number and the dbp->get() function, instead of a cursor, so we find
01032          * and write out "deleted" or non-existent records.  The DB handle may
01033          * be threaded, so allocate memory as we go.
01034          */
01035         memset(&key, 0, sizeof(key));
01036         key.size = sizeof(db_recno_t);
01037         key.data = &keyno;
01038         memset(&data, 0, sizeof(data));
01039         F_SET(&data, DB_DBT_REALLOC);
01040 
01041         /* Allocate a cursor. */
01042         if ((ret = __db_cursor(dbp, NULL, &dbc, 0)) != 0)
01043                 return (ret);
01044 
01045         /*
01046          * Read any remaining records into the tree.
01047          *
01048          * !!!
01049          * This is why we can't support transactions when applications specify
01050          * backing (re_source) files.  At this point we have to read in the
01051          * rest of the records from the file so that we can write all of the
01052          * records back out again, which could modify a page for which we'd
01053          * have to log changes and which we don't have locked.  This could be
01054          * partially fixed by taking a snapshot of the entire file during the
01055          * DB->open as DB->open is transaction protected.  But, if a checkpoint
01056          * occurs then, the part of the log holding the copy of the file could
01057          * be discarded, and that would make it impossible to recover in the
01058          * face of disaster.  This could all probably be fixed, but it would
01059          * require transaction protecting the backing source file.
01060          *
01061          * XXX
01062          * This could be made to work now that we have transactions protecting
01063          * file operations.  Margo has specifically asked for the privilege of
01064          * doing this work.
01065          */
01066         if ((ret =
01067             __ram_update(dbc, DB_MAX_RECORDS, 0)) != 0 && ret != DB_NOTFOUND)
01068                 goto err;
01069 
01070         /*
01071          * Close any existing file handle and re-open the file, truncating it.
01072          */
01073         if (t->re_fp != NULL) {
01074                 if (fclose(t->re_fp) != 0) {
01075                         ret = __os_get_errno();
01076                         goto err;
01077                 }
01078                 t->re_fp = NULL;
01079         }
01080         if ((fp = fopen(t->re_source, "w")) == NULL) {
01081                 ret = __os_get_errno();
01082                 __db_err(dbenv, "%s: %s", t->re_source, db_strerror(ret));
01083                 goto err;
01084         }
01085 
01086         /*
01087          * We'll need the delimiter if we're doing variable-length records,
01088          * and the pad character if we're doing fixed-length records.
01089          */
01090         delim = t->re_delim;
01091         for (keyno = 1;; ++keyno) {
01092                 switch (ret = __db_get(dbp, NULL, &key, &data, 0)) {
01093                 case 0:
01094                         if (data.size != 0 &&
01095                             fwrite(data.data, 1, data.size, fp) != data.size)
01096                                 goto write_err;
01097                         break;
01098                 case DB_KEYEMPTY:
01099                         if (F_ISSET(dbp, DB_AM_FIXEDLEN)) {
01100                                 if (pad == NULL) {
01101                                         if ((ret = __os_malloc(
01102                                             dbenv, t->re_len, &pad)) != 0)
01103                                                 goto err;
01104                                         memset(pad, t->re_pad, t->re_len);
01105                                 }
01106                                 if (fwrite(pad, 1, t->re_len, fp) != t->re_len)
01107                                         goto write_err;
01108                         }
01109                         break;
01110                 case DB_NOTFOUND:
01111                         ret = 0;
01112                         goto done;
01113                 default:
01114                         goto err;
01115                 }
01116                 if (!F_ISSET(dbp, DB_AM_FIXEDLEN) &&
01117                     fwrite(&delim, 1, 1, fp) != 1) {
01118 write_err:              ret = __os_get_errno();
01119                         __db_err(dbenv,
01120                             "%s: write failed to backing file: %s",
01121                             t->re_source, strerror(ret));
01122                         goto err;
01123                 }
01124         }
01125 
01126 err:
01127 done:   /* Close the file descriptor. */
01128         if (fp != NULL && fclose(fp) != 0) {
01129                 t_ret = __os_get_errno();
01130                 if (ret == 0)
01131                         ret = t_ret;
01132                 __db_err(dbenv, "%s: %s", t->re_source, db_strerror(t_ret));
01133         }
01134 
01135         /* Discard the cursor. */
01136         if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
01137                 ret = t_ret;
01138 
01139         /* Discard memory allocated to hold the data items. */
01140         if (data.data != NULL)
01141                 __os_ufree(dbenv, data.data);
01142         if (pad != NULL)
01143                 __os_free(dbenv, pad);
01144 
01145         if (ret == 0)
01146                 t->re_modified = 0;
01147 
01148         return (ret);
01149 }
01150 
01151 /*
01152  * __ram_sread --
01153  *      Read records from a source file.
01154  */
01155 static int
01156 __ram_sread(dbc, top)
01157         DBC *dbc;
01158         db_recno_t top;
01159 {
01160         BTREE *t;
01161         DB *dbp;
01162         DBT data, *rdata;
01163         db_recno_t recno;
01164         size_t len;
01165         int ch, ret, was_modified;
01166 
01167         t = dbc->dbp->bt_internal;
01168         dbp = dbc->dbp;
01169         was_modified = t->re_modified;
01170 
01171         if ((ret = __bam_nrecs(dbc, &recno)) != 0)
01172                 return (ret);
01173 
01174         /*
01175          * Use the record key return memory, it's only a short-term use.
01176          * The record data return memory is used by __bam_iitem, which
01177          * we'll indirectly call, so use the key so as not to collide.
01178          */
01179         len = F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_len : 256;
01180         rdata = &dbc->my_rkey;
01181         if (rdata->ulen < len) {
01182                 if ((ret = __os_realloc(
01183                     dbp->dbenv, len, &rdata->data)) != 0) {
01184                         rdata->ulen = 0;
01185                         rdata->data = NULL;
01186                         return (ret);
01187                 }
01188                 rdata->ulen = (u_int32_t)len;
01189         }
01190 
01191         memset(&data, 0, sizeof(data));
01192         while (recno < top) {
01193                 data.data = rdata->data;
01194                 data.size = 0;
01195                 if (F_ISSET(dbp, DB_AM_FIXEDLEN))
01196                         for (len = t->re_len; len > 0; --len) {
01197                                 if ((ch = getc(t->re_fp)) == EOF) {
01198                                         if (data.size == 0)
01199                                                 goto eof;
01200                                         break;
01201                                 }
01202                                 ((u_int8_t *)data.data)[data.size++] = ch;
01203                         }
01204                 else
01205                         for (;;) {
01206                                 if ((ch = getc(t->re_fp)) == EOF) {
01207                                         if (data.size == 0)
01208                                                 goto eof;
01209                                         break;
01210                                 }
01211                                 if (ch == t->re_delim)
01212                                         break;
01213 
01214                                 ((u_int8_t *)data.data)[data.size++] = ch;
01215                                 if (data.size == rdata->ulen) {
01216                                         if ((ret = __os_realloc(dbp->dbenv,
01217                                             rdata->ulen *= 2,
01218                                             &rdata->data)) != 0) {
01219                                                 rdata->ulen = 0;
01220                                                 rdata->data = NULL;
01221                                                 return (ret);
01222                                         } else
01223                                                 data.data = rdata->data;
01224                                 }
01225                         }
01226 
01227                 /*
01228                  * Another process may have read this record from the input
01229                  * file and stored it into the database already, in which
01230                  * case we don't need to repeat that operation.  We detect
01231                  * this by checking if the last record we've read is greater
01232                  * or equal to the number of records in the database.
01233                  */
01234                 if (t->re_last >= recno) {
01235                         ++recno;
01236                         if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0)
01237                                 goto err;
01238                 }
01239                 ++t->re_last;
01240         }
01241 
01242         if (0) {
01243 eof:            t->re_eof = 1;
01244                 ret = DB_NOTFOUND;
01245         }
01246 err:    if (!was_modified)
01247                 t->re_modified = 0;
01248 
01249         return (ret);
01250 }
01251 
01252 /*
01253  * __ram_add --
01254  *      Add records into the tree.
01255  */
01256 static int
01257 __ram_add(dbc, recnop, data, flags, bi_flags)
01258         DBC *dbc;
01259         db_recno_t *recnop;
01260         DBT *data;
01261         u_int32_t flags, bi_flags;
01262 {
01263         BTREE_CURSOR *cp;
01264         int exact, ret, stack, t_ret;
01265 
01266         cp = (BTREE_CURSOR *)dbc->internal;
01267 
01268 retry:  /* Find the slot for insertion. */
01269         if ((ret = __bam_rsearch(dbc, recnop,
01270             S_INSERT | (flags == DB_APPEND ? S_APPEND : 0), 1, &exact)) != 0)
01271                 return (ret);
01272         stack = 1;
01273 
01274         /* Copy the page into the cursor. */
01275         STACK_TO_CURSOR(cp, ret);
01276         if (ret != 0)
01277                 goto err;
01278 
01279         /*
01280          * The application may modify the data based on the selected record
01281          * number.
01282          */
01283         if (flags == DB_APPEND && dbc->dbp->db_append_recno != NULL &&
01284             (ret = dbc->dbp->db_append_recno(dbc->dbp, data, *recnop)) != 0)
01285                 goto err;
01286 
01287         /*
01288          * Select the arguments for __bam_iitem() and do the insert.  If the
01289          * key is an exact match, or we're replacing the data item with a
01290          * new data item, replace the current item.  If the key isn't an exact
01291          * match, we're inserting a new key/data pair, before the search
01292          * location.
01293          */
01294         switch (ret = __bam_iitem(dbc,
01295             NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) {
01296         case 0:
01297                 /*
01298                  * Don't adjust anything.
01299                  *
01300                  * If we inserted a record, no cursors need adjusting because
01301                  * the only new record it's possible to insert is at the very
01302                  * end of the tree.  The necessary adjustments to the internal
01303                  * page counts were made by __bam_iitem().
01304                  *
01305                  * If we overwrote a record, no cursors need adjusting because
01306                  * future DBcursor->get calls will simply return the underlying
01307                  * record (there's no adjustment made for the DB_CURRENT flag
01308                  * when a cursor get operation immediately follows a cursor
01309                  * delete operation, and the normal adjustment for the DB_NEXT
01310                  * flag is still correct).
01311                  */
01312                 break;
01313         case DB_NEEDSPLIT:
01314                 /* Discard the stack of pages and split the page. */
01315                 (void)__bam_stkrel(dbc, STK_CLRDBC);
01316                 stack = 0;
01317 
01318                 if ((ret = __bam_split(dbc, recnop, NULL)) != 0)
01319                         goto err;
01320 
01321                 goto retry;
01322                 /* NOTREACHED */
01323         default:
01324                 goto err;
01325         }
01326 
01327 err:    if (stack && (t_ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0 && ret == 0)
01328                 ret = t_ret;
01329 
01330         return (ret);
01331 }

Generated on Sun Dec 25 12:14:13 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2