Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

hash.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1990, 1993, 1994
00009  *      Margo Seltzer.  All rights reserved.
00010  */
00011 /*
00012  * Copyright (c) 1990, 1993, 1994
00013  *      The Regents of the University of California.  All rights reserved.
00014  *
00015  * This code is derived from software contributed to Berkeley by
00016  * Margo Seltzer.
00017  *
00018  * Redistribution and use in source and binary forms, with or without
00019  * modification, are permitted provided that the following conditions
00020  * are met:
00021  * 1. Redistributions of source code must retain the above copyright
00022  *    notice, this list of conditions and the following disclaimer.
00023  * 2. Redistributions in binary form must reproduce the above copyright
00024  *    notice, this list of conditions and the following disclaimer in the
00025  *    documentation and/or other materials provided with the distribution.
00026  * 3. Neither the name of the University nor the names of its contributors
00027  *    may be used to endorse or promote products derived from this software
00028  *    without specific prior written permission.
00029  *
00030  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00031  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00032  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00033  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00034  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00035  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00036  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00037  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00038  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00039  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00040  * SUCH DAMAGE.
00041  *
00042  * $Id: hash.c,v 12.10 2005/10/20 18:57:07 bostic Exp $
00043  */
00044 
00045 #include "db_config.h"
00046 
00047 #ifndef NO_SYSTEM_INCLUDES
00048 #include <sys/types.h>
00049 
00050 #include <string.h>
00051 #endif
00052 
00053 #include "db_int.h"
00054 #include "dbinc/db_page.h"
00055 #include "dbinc/db_shash.h"
00056 #include "dbinc/btree.h"
00057 #include "dbinc/hash.h"
00058 #include "dbinc/lock.h"
00059 #include "dbinc/mp.h"
00060 
00061 static int  __ham_bulk __P((DBC *, DBT *, u_int32_t));
00062 static int  __ham_c_close __P((DBC *, db_pgno_t, int *));
00063 static int  __ham_c_del __P((DBC *));
00064 static int  __ham_c_destroy __P((DBC *));
00065 static int  __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
00066 static int  __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
00067 static int  __ham_c_writelock __P((DBC *));
00068 static int  __ham_dup_return __P((DBC *, DBT *, u_int32_t));
00069 static int  __ham_expand_table __P((DBC *));
00070 static int  __ham_lookup __P((DBC *,
00071                 const DBT *, u_int32_t, db_lockmode_t, db_pgno_t *));
00072 static int  __ham_overwrite __P((DBC *, DBT *, u_int32_t));
00073 
00074 /*
00075  * __ham_quick_delete --
00076  *      This function is called by __db_del when the appropriate conditions
00077  *      are met, and it performs the delete in the optimized way.
00078  *
00079  * PUBLIC: int __ham_quick_delete __P((DBC *));
00080  */
00081 int
00082 __ham_quick_delete(dbc)
00083         DBC *dbc;
00084 {
00085         int ret, t_ret;
00086 
00087         /*
00088          * When performing a DB->del operation not involving secondary indices
00089          * and not removing an off-page duplicate tree, we can speed things up
00090          * substantially by removing the entire duplicate set, if any is
00091          * present, in one operation, rather than by conjuring up and deleting
00092          * each of the items individually.  (All are stored in one big HKEYDATA
00093          * structure.)  We don't bother to distinguish on-page duplicate sets
00094          * from single, non-dup items;  they're deleted in exactly the same way.
00095          *
00096          * The cursor should be set to the first item in the duplicate set, or
00097          * to the sole key/data pair when the key does not have a duplicate set,
00098          * before the function is called.
00099          *
00100          * We do not need to call CDB_LOCKING_INIT, __db_del calls here with
00101          * a write cursor.
00102          *
00103          * Assert we're initialized, but not to an off-page duplicate.
00104          * Assert we're not using secondary indices.
00105          */
00106         DB_ASSERT(IS_INITIALIZED(dbc));
00107         DB_ASSERT(dbc->internal->opd == NULL);
00108         DB_ASSERT(!F_ISSET(dbc->dbp, DB_AM_SECONDARY));
00109         DB_ASSERT(LIST_FIRST(&dbc->dbp->s_secondaries) == NULL);
00110 
00111         if ((ret = __ham_get_meta(dbc)) != 0)
00112                 return (ret);
00113 
00114         if ((ret = __ham_c_writelock(dbc)) == 0)
00115                 ret = __ham_del_pair(dbc, 1);
00116 
00117         if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
00118                 ret = t_ret;
00119 
00120         return (ret);
00121 }
00122 
00123 /* ****************** CURSORS ********************************** */
00124 /*
00125  * __ham_c_init --
00126  *      Initialize the hash-specific portion of a cursor.
00127  *
00128  * PUBLIC: int __ham_c_init __P((DBC *));
00129  */
00130 int
00131 __ham_c_init(dbc)
00132         DBC *dbc;
00133 {
00134         DB_ENV *dbenv;
00135         HASH_CURSOR *new_curs;
00136         int ret;
00137 
00138         dbenv = dbc->dbp->dbenv;
00139         if ((ret = __os_calloc(dbenv,
00140             1, sizeof(struct cursor_t), &new_curs)) != 0)
00141                 return (ret);
00142         if ((ret = __os_malloc(dbenv,
00143             dbc->dbp->pgsize, &new_curs->split_buf)) != 0) {
00144                 __os_free(dbenv, new_curs);
00145                 return (ret);
00146         }
00147 
00148         dbc->internal = (DBC_INTERNAL *) new_curs;
00149         dbc->c_close = __db_c_close_pp;
00150         dbc->c_count = __db_c_count_pp;
00151         dbc->c_del = __db_c_del_pp;
00152         dbc->c_dup = __db_c_dup_pp;
00153         dbc->c_get = __db_c_get_pp;
00154         dbc->c_pget = __db_c_pget_pp;
00155         dbc->c_put = __db_c_put_pp;
00156         dbc->c_am_bulk = __ham_bulk;
00157         dbc->c_am_close = __ham_c_close;
00158         dbc->c_am_del = __ham_c_del;
00159         dbc->c_am_destroy = __ham_c_destroy;
00160         dbc->c_am_get = __ham_c_get;
00161         dbc->c_am_put = __ham_c_put;
00162         dbc->c_am_writelock = __ham_c_writelock;
00163 
00164         return (__ham_item_init(dbc));
00165 }
00166 
00167 /*
00168  * __ham_c_close --
00169  *      Close down the cursor from a single use.
00170  */
00171 static int
00172 __ham_c_close(dbc, root_pgno, rmroot)
00173         DBC *dbc;
00174         db_pgno_t root_pgno;
00175         int *rmroot;
00176 {
00177         DB_MPOOLFILE *mpf;
00178         HASH_CURSOR *hcp;
00179         HKEYDATA *dp;
00180         db_lockmode_t lock_mode;
00181         int doroot, gotmeta, ret, t_ret;
00182         u_int32_t dirty;
00183 
00184         COMPQUIET(rmroot, 0);
00185         mpf = dbc->dbp->mpf;
00186         dirty = 0;
00187         doroot = gotmeta = ret = 0;
00188         hcp = (HASH_CURSOR *) dbc->internal;
00189 
00190         /* Check for off page dups. */
00191         if (dbc->internal->opd != NULL) {
00192                 if ((ret = __ham_get_meta(dbc)) != 0)
00193                         goto done;
00194                 gotmeta = 1;
00195                 lock_mode = DB_LOCK_READ;
00196 
00197                 /* To support dirty reads we must reget the write lock. */
00198                 if (F_ISSET(dbc->dbp, DB_AM_READ_UNCOMMITTED) &&
00199                      F_ISSET((BTREE_CURSOR *)
00200                      dbc->internal->opd->internal, C_DELETED))
00201                         lock_mode = DB_LOCK_WRITE;
00202 
00203                 if ((ret = __ham_get_cpage(dbc, lock_mode)) != 0)
00204                         goto out;
00205                 dp = (HKEYDATA *)H_PAIRDATA(dbc->dbp, hcp->page, hcp->indx);
00206 
00207                 /* If it's not a dup we aborted before we changed it. */
00208                 if (HPAGE_PTYPE(dp) == H_OFFDUP)
00209                         memcpy(&root_pgno,
00210                             HOFFPAGE_PGNO(dp), sizeof(db_pgno_t));
00211                 else
00212                         root_pgno = PGNO_INVALID;
00213 
00214                 if ((ret =
00215                     hcp->opd->c_am_close(hcp->opd, root_pgno, &doroot)) != 0)
00216                         goto out;
00217                 if (doroot != 0) {
00218                         if ((ret = __ham_del_pair(dbc, 1)) != 0)
00219                                 goto out;
00220                         dirty = DB_MPOOL_DIRTY;
00221                 }
00222         }
00223 
00224 out:    if (hcp->page != NULL && (t_ret =
00225             __memp_fput(mpf, hcp->page, dirty)) != 0 && ret == 0)
00226                 ret = t_ret;
00227         if (gotmeta != 0 && (t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
00228                 ret = t_ret;
00229 
00230 done:   if ((t_ret = __ham_item_init(dbc)) != 0 && ret == 0)
00231                 ret = t_ret;
00232         return (ret);
00233 }
00234 
00235 /*
00236  * __ham_c_destroy --
00237  *      Cleanup the access method private part of a cursor.
00238  */
00239 static int
00240 __ham_c_destroy(dbc)
00241         DBC *dbc;
00242 {
00243         HASH_CURSOR *hcp;
00244 
00245         hcp = (HASH_CURSOR *)dbc->internal;
00246         if (hcp->split_buf != NULL)
00247                 __os_free(dbc->dbp->dbenv, hcp->split_buf);
00248         __os_free(dbc->dbp->dbenv, hcp);
00249 
00250         return (0);
00251 }
00252 
00253 /*
00254  * __ham_c_count --
00255  *      Return a count of on-page duplicates.
00256  *
00257  * PUBLIC: int __ham_c_count __P((DBC *, db_recno_t *));
00258  */
00259 int
00260 __ham_c_count(dbc, recnop)
00261         DBC *dbc;
00262         db_recno_t *recnop;
00263 {
00264         DB *dbp;
00265         DB_MPOOLFILE *mpf;
00266         HASH_CURSOR *hcp;
00267         db_indx_t len;
00268         db_recno_t recno;
00269         int ret, t_ret;
00270         u_int8_t *p, *pend;
00271 
00272         dbp = dbc->dbp;
00273         mpf = dbp->mpf;
00274         hcp = (HASH_CURSOR *)dbc->internal;
00275 
00276         recno = 0;
00277 
00278         if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0)
00279                 return (ret);
00280         if (hcp->indx >= NUM_ENT(hcp->page)) {
00281                 *recnop = 0;
00282                 goto err;
00283         }
00284 
00285         switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) {
00286         case H_KEYDATA:
00287         case H_OFFPAGE:
00288                 recno = 1;
00289                 break;
00290         case H_DUPLICATE:
00291                 p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx));
00292                 pend = p +
00293                     LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx);
00294                 for (; p < pend; recno++) {
00295                         /* p may be odd, so copy rather than just dereffing */
00296                         memcpy(&len, p, sizeof(db_indx_t));
00297                         p += 2 * sizeof(db_indx_t) + len;
00298                 }
00299 
00300                 break;
00301         default:
00302                 ret = __db_pgfmt(dbp->dbenv, hcp->pgno);
00303                 goto err;
00304         }
00305 
00306         *recnop = recno;
00307 
00308 err:    if ((t_ret = __memp_fput(mpf, hcp->page, 0)) != 0 && ret == 0)
00309                 ret = t_ret;
00310         hcp->page = NULL;
00311         return (ret);
00312 }
00313 
00314 static int
00315 __ham_c_del(dbc)
00316         DBC *dbc;
00317 {
00318         DB *dbp;
00319         DBT repldbt;
00320         DB_MPOOLFILE *mpf;
00321         HASH_CURSOR *hcp;
00322         int ret, t_ret;
00323 
00324         dbp = dbc->dbp;
00325         mpf = dbp->mpf;
00326         hcp = (HASH_CURSOR *)dbc->internal;
00327 
00328         if (F_ISSET(hcp, H_DELETED))
00329                 return (DB_NOTFOUND);
00330 
00331         if ((ret = __ham_get_meta(dbc)) != 0)
00332                 goto out;
00333 
00334         if ((ret = __ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0)
00335                 goto out;
00336 
00337         /* Off-page duplicates. */
00338         if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP)
00339                 goto out;
00340 
00341         if (F_ISSET(hcp, H_ISDUP)) { /* On-page duplicate. */
00342                 if (hcp->dup_off == 0 &&
00343                     DUP_SIZE(hcp->dup_len) == LEN_HDATA(dbp, hcp->page,
00344                     hcp->hdr->dbmeta.pagesize, hcp->indx))
00345                         ret = __ham_del_pair(dbc, 1);
00346                 else {
00347                         repldbt.flags = 0;
00348                         F_SET(&repldbt, DB_DBT_PARTIAL);
00349                         repldbt.doff = hcp->dup_off;
00350                         repldbt.dlen = DUP_SIZE(hcp->dup_len);
00351                         repldbt.size = 0;
00352                         repldbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page,
00353                             hcp->indx));
00354                         if ((ret = __ham_replpair(dbc, &repldbt, 0)) == 0) {
00355                                 hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
00356                                 F_SET(hcp, H_DELETED);
00357                                 ret = __ham_c_update(dbc,
00358                                     DUP_SIZE(hcp->dup_len), 0, 1);
00359                         }
00360                 }
00361 
00362         } else /* Not a duplicate */
00363                 ret = __ham_del_pair(dbc, 1);
00364 
00365 out:    if (hcp->page != NULL) {
00366                 if ((t_ret = __memp_fput(mpf,
00367                     hcp->page, ret == 0 ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0)
00368                         ret = t_ret;
00369                 hcp->page = NULL;
00370         }
00371         if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
00372                 ret = t_ret;
00373         return (ret);
00374 }
00375 
00376 /*
00377  * __ham_c_dup --
00378  *      Duplicate a hash cursor, such that the new one holds appropriate
00379  *      locks for the position of the original.
00380  *
00381  * PUBLIC: int __ham_c_dup __P((DBC *, DBC *));
00382  */
00383 int
00384 __ham_c_dup(orig_dbc, new_dbc)
00385         DBC *orig_dbc, *new_dbc;
00386 {
00387         HASH_CURSOR *orig, *new;
00388         int ret;
00389 
00390         orig = (HASH_CURSOR *)orig_dbc->internal;
00391         new = (HASH_CURSOR *)new_dbc->internal;
00392 
00393         new->bucket = orig->bucket;
00394         new->lbucket = orig->lbucket;
00395         new->dup_off = orig->dup_off;
00396         new->dup_len = orig->dup_len;
00397         new->dup_tlen = orig->dup_tlen;
00398 
00399         if (F_ISSET(orig, H_DELETED))
00400                 F_SET(new, H_DELETED);
00401         if (F_ISSET(orig, H_ISDUP))
00402                 F_SET(new, H_ISDUP);
00403 
00404         /*
00405          * If the old cursor held a lock and we're not in transactions, get one
00406          * for the new one.   The reason that we don't need a new lock if we're
00407          * in a transaction is because we already hold a lock and will continue
00408          * to do so until commit, so there is no point in re-acquiring it. We
00409          * don't know if the old lock was a read or write lock, but it doesn't
00410          * matter. We'll get a read lock.  We know that this locker already
00411          * holds a lock of the correct type, so if we need a write lock and
00412          * request it, we know that we'll get it.
00413          */
00414         if (orig_dbc->txn == NULL && LOCK_ISSET(orig->lock))
00415                 if ((ret = __ham_lock_bucket(new_dbc, DB_LOCK_READ)) != 0)
00416                         return (ret);
00417 
00418         return (0);
00419 }
00420 
00421 static int
00422 __ham_c_get(dbc, key, data, flags, pgnop)
00423         DBC *dbc;
00424         DBT *key;
00425         DBT *data;
00426         u_int32_t flags;
00427         db_pgno_t *pgnop;
00428 {
00429         DB *dbp;
00430         DB_MPOOLFILE *mpf;
00431         HASH_CURSOR *hcp;
00432         db_lockmode_t lock_type;
00433         int get_key, ret, t_ret;
00434 
00435         hcp = (HASH_CURSOR *)dbc->internal;
00436         dbp = dbc->dbp;
00437         mpf = dbp->mpf;
00438 
00439         /* Clear OR'd in additional bits so we can check for flag equality. */
00440         if (F_ISSET(dbc, DBC_RMW))
00441                 lock_type = DB_LOCK_WRITE;
00442         else
00443                 lock_type = DB_LOCK_READ;
00444 
00445         if ((ret = __ham_get_meta(dbc)) != 0)
00446                 return (ret);
00447         hcp->seek_size = 0;
00448 
00449         ret = 0;
00450         get_key = 1;
00451         switch (flags) {
00452         case DB_PREV_NODUP:
00453                 F_SET(hcp, H_NEXT_NODUP);
00454                 /* FALLTHROUGH */
00455         case DB_PREV:
00456                 if (IS_INITIALIZED(dbc)) {
00457                         ret = __ham_item_prev(dbc, lock_type, pgnop);
00458                         break;
00459                 }
00460                 /* FALLTHROUGH */
00461         case DB_LAST:
00462                 ret = __ham_item_last(dbc, lock_type, pgnop);
00463                 break;
00464         case DB_NEXT_NODUP:
00465                 F_SET(hcp, H_NEXT_NODUP);
00466                 /* FALLTHROUGH */
00467         case DB_NEXT:
00468                 if (IS_INITIALIZED(dbc)) {
00469                         ret = __ham_item_next(dbc, lock_type, pgnop);
00470                         break;
00471                 }
00472                 /* FALLTHROUGH */
00473         case DB_FIRST:
00474                 ret = __ham_item_first(dbc, lock_type, pgnop);
00475                 break;
00476         case DB_NEXT_DUP:
00477                 /* cgetchk has already determined that the cursor is set. */
00478                 F_SET(hcp, H_DUPONLY);
00479                 ret = __ham_item_next(dbc, lock_type, pgnop);
00480                 break;
00481         case DB_SET:
00482         case DB_SET_RANGE:
00483         case DB_GET_BOTH:
00484         case DB_GET_BOTH_RANGE:
00485                 ret = __ham_lookup(dbc, key, 0, lock_type, pgnop);
00486                 get_key = 0;
00487                 break;
00488         case DB_GET_BOTHC:
00489                 F_SET(hcp, H_DUPONLY);
00490 
00491                 ret = __ham_item_next(dbc, lock_type, pgnop);
00492                 get_key = 0;
00493                 break;
00494         case DB_CURRENT:
00495                 /* cgetchk has already determined that the cursor is set. */
00496                 if (F_ISSET(hcp, H_DELETED)) {
00497                         ret = DB_KEYEMPTY;
00498                         goto err;
00499                 }
00500 
00501                 ret = __ham_item(dbc, lock_type, pgnop);
00502                 break;
00503         default:
00504                 ret = __db_unknown_flag(dbp->dbenv, "__ham_c_get", flags);
00505                 break;
00506         }
00507 
00508         /*
00509          * Must always enter this loop to do error handling and
00510          * check for big key/data pair.
00511          */
00512         for (;;) {
00513                 if (ret != 0 && ret != DB_NOTFOUND)
00514                         goto err;
00515                 else if (F_ISSET(hcp, H_OK)) {
00516                         if (*pgnop == PGNO_INVALID)
00517                                 ret = __ham_dup_return(dbc, data, flags);
00518                         break;
00519                 } else if (!F_ISSET(hcp, H_NOMORE)) {
00520                         __db_err(dbp->dbenv,
00521                             "H_NOMORE returned to __ham_c_get");
00522                         ret = EINVAL;
00523                         break;
00524                 }
00525 
00526                 /*
00527                  * Ran out of entries in a bucket; change buckets.
00528                  */
00529                 switch (flags) {
00530                         case DB_LAST:
00531                         case DB_PREV:
00532                         case DB_PREV_NODUP:
00533                                 ret = __memp_fput(mpf, hcp->page, 0);
00534                                 hcp->page = NULL;
00535                                 if (hcp->bucket == 0) {
00536                                         ret = DB_NOTFOUND;
00537                                         hcp->pgno = PGNO_INVALID;
00538                                         goto err;
00539                                 }
00540                                 F_CLR(hcp, H_ISDUP);
00541                                 hcp->bucket--;
00542                                 hcp->indx = NDX_INVALID;
00543                                 hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
00544                                 if (ret == 0)
00545                                         ret = __ham_item_prev(dbc,
00546                                             lock_type, pgnop);
00547                                 break;
00548                         case DB_FIRST:
00549                         case DB_NEXT:
00550                         case DB_NEXT_NODUP:
00551                                 ret = __memp_fput(mpf, hcp->page, 0);
00552                                 hcp->page = NULL;
00553                                 hcp->indx = NDX_INVALID;
00554                                 hcp->bucket++;
00555                                 F_CLR(hcp, H_ISDUP);
00556                                 hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
00557                                 if (hcp->bucket > hcp->hdr->max_bucket) {
00558                                         ret = DB_NOTFOUND;
00559                                         hcp->pgno = PGNO_INVALID;
00560                                         goto err;
00561                                 }
00562                                 if (ret == 0)
00563                                         ret = __ham_item_next(dbc,
00564                                             lock_type, pgnop);
00565                                 break;
00566                         case DB_GET_BOTH:
00567                         case DB_GET_BOTHC:
00568                         case DB_GET_BOTH_RANGE:
00569                         case DB_NEXT_DUP:
00570                         case DB_SET:
00571                         case DB_SET_RANGE:
00572                                 /* Key not found. */
00573                                 ret = DB_NOTFOUND;
00574                                 goto err;
00575                         case DB_CURRENT:
00576                                 /*
00577                                  * This should only happen if you are doing
00578                                  * deletes and reading with concurrent threads
00579                                  * and not doing proper locking.  We return
00580                                  * the same error code as we would if the
00581                                  * cursor were deleted.
00582                                  */
00583                                 ret = DB_KEYEMPTY;
00584                                 goto err;
00585                         default:
00586                                 DB_ASSERT(0);
00587                 }
00588         }
00589 
00590         if (get_key == 0)
00591                 F_SET(key, DB_DBT_ISSET);
00592 
00593 err:    if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
00594                 ret = t_ret;
00595 
00596         F_CLR(hcp, H_DUPONLY);
00597         F_CLR(hcp, H_NEXT_NODUP);
00598 
00599         return (ret);
00600 }
00601 
00602 /*
00603  * __ham_bulk -- Return bulk data from a hash table.
00604  */
00605 static int
00606 __ham_bulk(dbc, data, flags)
00607         DBC *dbc;
00608         DBT *data;
00609         u_int32_t flags;
00610 {
00611         DB *dbp;
00612         DB_MPOOLFILE *mpf;
00613         HASH_CURSOR *cp;
00614         PAGE *pg;
00615         db_indx_t dup_len, dup_off, dup_tlen, indx, *inp;
00616         db_lockmode_t lock_mode;
00617         db_pgno_t pgno;
00618         int32_t *endp, *offp, *saveoff;
00619         u_int32_t key_off, key_size, pagesize, size, space;
00620         u_int8_t *dbuf, *dp, *hk, *np, *tmp;
00621         int is_dup, is_key;
00622         int need_pg, next_key, no_dup, ret, t_ret;
00623 
00624         ret = 0;
00625         key_off = 0;
00626         dup_len = dup_off = dup_tlen = 0;
00627         size = 0;
00628         dbp = dbc->dbp;
00629         pagesize = dbp->pgsize;
00630         mpf = dbp->mpf;
00631         cp = (HASH_CURSOR *)dbc->internal;
00632         is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0;
00633         next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP;
00634         no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP;
00635         dbuf = data->data;
00636         np = dp = dbuf;
00637 
00638         /* Keep track of space that is left.  There is an termination entry */
00639         space = data->ulen;
00640         space -= sizeof(*offp);
00641 
00642         /* Build the offset/size table from the end up. */
00643         endp = (int32_t *) ((u_int8_t *)dbuf + data->ulen);
00644         endp--;
00645         offp = endp;
00646 
00647         key_size = 0;
00648         lock_mode = F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE: DB_LOCK_READ;
00649 
00650 next_pg:
00651         need_pg = 1;
00652         indx = cp->indx;
00653         pg = cp->page;
00654         inp = P_INP(dbp, pg);
00655 
00656         do {
00657                 if (is_key) {
00658                         hk = H_PAIRKEY(dbp, pg, indx);
00659                         if (HPAGE_PTYPE(hk) == H_OFFPAGE) {
00660                                 memcpy(&key_size,
00661                                     HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
00662                                 memcpy(&pgno,
00663                                     HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
00664                                 size = key_size;
00665                                 if (key_size > space)
00666                                         goto get_key_space;
00667                                 if ((ret = __bam_bulk_overflow(
00668                                     dbc, key_size, pgno, np)) != 0)
00669                                         return (ret);
00670                                 space -= key_size;
00671                                 key_off = (u_int32_t)(np - dbuf);
00672                                 np += key_size;
00673                         } else {
00674                                 if (need_pg) {
00675                                         dp = np;
00676                                         size = pagesize - HOFFSET(pg);
00677                                         if (space < size) {
00678 get_key_space:
00679                                                 if (offp == endp) {
00680                                                         data->size = (u_int32_t)
00681                                                             DB_ALIGN(size +
00682                                                             pagesize, 1024);
00683                                                         return
00684                                                             (DB_BUFFER_SMALL);
00685                                                 }
00686                                                 goto back_up;
00687                                         }
00688                                         memcpy(dp,
00689                                            (u_int8_t *)pg + HOFFSET(pg), size);
00690                                         need_pg = 0;
00691                                         space -= size;
00692                                         np += size;
00693                                 }
00694                                 key_size = LEN_HKEY(dbp, pg, pagesize, indx);
00695                                 key_off = ((inp[indx] - HOFFSET(pg)) +
00696                                     (u_int32_t)(dp - dbuf)) +
00697                                     SSZA(HKEYDATA, data);
00698                         }
00699                 }
00700 
00701                 hk = H_PAIRDATA(dbp, pg, indx);
00702                 switch (HPAGE_PTYPE(hk)) {
00703                 case H_DUPLICATE:
00704                 case H_KEYDATA:
00705                         if (need_pg) {
00706                                 dp = np;
00707                                 size = pagesize - HOFFSET(pg);
00708                                 if (space < size) {
00709 back_up:
00710                                         if (indx != 0) {
00711                                                 indx -= 2;
00712                                                 /* XXX
00713                                                  * It's not clear that this is
00714                                                  * the right way to fix this,
00715                                                  * but here goes.
00716                                                  * If we are backing up onto a
00717                                                  * duplicate, then we need to
00718                                                  * position ourselves at the
00719                                                  * end of the duplicate set.
00720                                                  * We probably need to make
00721                                                  * this work for H_OFFDUP too.
00722                                                  * It might be worth making a
00723                                                  * dummy cursor and calling
00724                                                  * __ham_item_prev.
00725                                                  */
00726                                                 tmp = H_PAIRDATA(dbp, pg, indx);
00727                                                 if (HPAGE_PTYPE(tmp) ==
00728                                                     H_DUPLICATE) {
00729                                                         dup_off = dup_tlen =
00730                                                             LEN_HDATA(dbp, pg,
00731                                                             pagesize, indx + 1);
00732                                                         memcpy(&dup_len,
00733                                                             HKEYDATA_DATA(tmp),
00734                                                             sizeof(db_indx_t));
00735                                                 } else  {
00736                                                         is_dup = 0;
00737                                                         dup_len = 0;
00738                                                         dup_off = 0;
00739                                                         dup_tlen = 0;
00740                                                         F_CLR(cp, H_ISDUP);
00741                                                 }
00742                                                 goto get_space;
00743                                         }
00744                                         /* indx == 0 */
00745                                         cp->dup_len = dup_len;
00746                                         cp->dup_off = dup_off;
00747                                         cp->dup_tlen = dup_tlen;
00748                                         if ((ret = __ham_item_prev(dbc,
00749                                             lock_mode, &pgno)) != 0) {
00750                                                 if (ret != DB_NOTFOUND)
00751                                                         return (ret);
00752                                                 if ((ret = __memp_fput(mpf,
00753                                                     cp->page, 0)) != 0)
00754                                                         return (ret);
00755                                                 cp->page = NULL;
00756                                                 if (cp->bucket == 0) {
00757                                                         cp->indx = indx =
00758                                                             NDX_INVALID;
00759                                                         goto get_space;
00760                                                 }
00761                                                 if ((ret =
00762                                                     __ham_get_meta(dbc)) != 0)
00763                                                         return (ret);
00764 
00765                                                 cp->bucket--;
00766                                                 cp->pgno = BUCKET_TO_PAGE(cp,
00767                                                     cp->bucket);
00768                                                 cp->indx = NDX_INVALID;
00769                                                 if ((ret = __ham_release_meta(
00770                                                     dbc)) != 0)
00771                                                         return (ret);
00772                                                 if ((ret = __ham_item_prev(dbc,
00773                                                     lock_mode, &pgno)) != 0)
00774                                                         return (ret);
00775                                         }
00776                                         indx = cp->indx;
00777 get_space:
00778                                         /*
00779                                          * See if we put any data in the buffer.
00780                                          */
00781                                         if (offp >= endp ||
00782                                             F_ISSET(dbc, DBC_TRANSIENT)) {
00783                                                 data->size = (u_int32_t)
00784                                                     DB_ALIGN(size +
00785                                                     data->ulen - space, 1024);
00786                                                 return (DB_BUFFER_SMALL);
00787                                         }
00788                                         /*
00789                                          * Don't continue;  we're all out
00790                                          * of space, even though we're
00791                                          * returning success.
00792                                          */
00793                                         next_key = 0;
00794                                         break;
00795                                 }
00796                                 memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size);
00797                                 need_pg = 0;
00798                                 space -= size;
00799                                 np += size;
00800                         }
00801 
00802                         /*
00803                          * We're about to crack the offset(s) and length(s)
00804                          * out of an H_KEYDATA or H_DUPLICATE item.
00805                          * There are three cases:
00806                          *   1. We were moved into a duplicate set by
00807                          *      the standard hash cursor code.  Respect
00808                          *      the dup_off and dup_tlen we were given.
00809                          *   2. We stumbled upon a duplicate set while
00810                          *      walking the page on our own.  We need to
00811                          *      recognize it as a dup and set dup_off and
00812                          *      dup_tlen.
00813                          *   3. The current item is not a dup.
00814                          */
00815                         if (F_ISSET(cp, H_ISDUP)) {
00816                                 /* Case 1 */
00817                                 is_dup = 1;
00818                                 dup_len = cp->dup_len;
00819                                 dup_off = cp->dup_off;
00820                                 dup_tlen = cp->dup_tlen;
00821                         } else if (HPAGE_PTYPE(hk) == H_DUPLICATE) {
00822                                 /* Case 2 */
00823                                 is_dup = 1;
00824                                 /*
00825                                  * If we run out of memory and bail,
00826                                  * make sure the fact we're in a dup set
00827                                  * isn't ignored later.
00828                                  */
00829                                 F_SET(cp, H_ISDUP);
00830                                 dup_off = 0;
00831                                 memcpy(&dup_len,
00832                                     HKEYDATA_DATA(hk), sizeof(db_indx_t));
00833                                 dup_tlen = LEN_HDATA(dbp, pg, pagesize, indx);
00834                         } else {
00835                                 /* Case 3 */
00836                                 is_dup = 0;
00837                                 dup_len = 0;
00838                                 dup_off = 0;
00839                                 dup_tlen = 0;
00840                         }
00841 
00842                         do {
00843                                 space -= (is_key ? 4 : 2) * sizeof(*offp);
00844                                 size += (is_key ? 4 : 2) * sizeof(*offp);
00845                                 /*
00846                                  * Since space is an unsigned, if we happen
00847                                  * to wrap, then this comparison will turn out
00848                                  * to be true.  XXX Wouldn't it be better to
00849                                  * simply check above that space is greater than
00850                                  * the value we're about to subtract???
00851                                  */
00852                                 if (space > data->ulen) {
00853                                         if (!is_dup || dup_off == 0)
00854                                                 goto back_up;
00855                                         dup_off -= (db_indx_t)
00856                                             DUP_SIZE((u_int32_t)offp[1]);
00857                                         goto get_space;
00858                                 }
00859                                 if (is_key) {
00860                                         *offp-- = (int32_t)key_off;
00861                                         *offp-- = (int32_t)key_size;
00862                                 }
00863                                 if (is_dup) {
00864                                         *offp-- = (int32_t)(
00865                                             ((inp[indx + 1] - HOFFSET(pg)) +
00866                                             dp - dbuf) + SSZA(HKEYDATA, data) +
00867                                             dup_off + sizeof(db_indx_t));
00868                                         memcpy(&dup_len,
00869                                             HKEYDATA_DATA(hk) + dup_off,
00870                                             sizeof(db_indx_t));
00871                                         dup_off += DUP_SIZE(dup_len);
00872                                         *offp-- = dup_len;
00873                                 } else {
00874                                         *offp-- = (int32_t)(
00875                                             ((inp[indx + 1] - HOFFSET(pg)) +
00876                                             dp - dbuf) + SSZA(HKEYDATA, data));
00877                                         *offp-- = LEN_HDATA(dbp, pg,
00878                                             pagesize, indx);
00879                                 }
00880                         } while (is_dup && dup_off < dup_tlen && no_dup == 0);
00881                         F_CLR(cp, H_ISDUP);
00882                         break;
00883                 case H_OFFDUP:
00884                         memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
00885                         space -= 2 * sizeof(*offp);
00886                         if (space > data->ulen)
00887                                 goto back_up;
00888 
00889                         if (is_key) {
00890                                 space -= 2 * sizeof(*offp);
00891                                 if (space > data->ulen)
00892                                         goto back_up;
00893                                 *offp-- = (int32_t)key_off;
00894                                 *offp-- = (int32_t)key_size;
00895                         }
00896                         saveoff = offp;
00897                         if ((ret = __bam_bulk_duplicates(dbc,
00898                             pgno, dbuf, is_key ? offp + 2 : NULL,
00899                             &offp, &np, &space, no_dup)) != 0) {
00900                                 if (ret == DB_BUFFER_SMALL) {
00901                                         size = space;
00902                                         space = 0;
00903                                         if (is_key && saveoff == offp) {
00904                                                 offp += 2;
00905                                                 goto back_up;
00906                                         }
00907                                         goto get_space;
00908                                 }
00909                                 return (ret);
00910                         }
00911                         break;
00912                 case H_OFFPAGE:
00913                         space -= (is_key ? 4 : 2) * sizeof(*offp);
00914                         if (space > data->ulen)
00915                                 goto back_up;
00916 
00917                         memcpy(&size, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
00918                         memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
00919                         if (size > space)
00920                                 goto back_up;
00921 
00922                         if ((ret =
00923                             __bam_bulk_overflow(dbc, size, pgno, np)) != 0)
00924                                 return (ret);
00925 
00926                         if (is_key) {
00927                                 *offp-- = (int32_t)key_off;
00928                                 *offp-- = (int32_t)key_size;
00929                         }
00930 
00931                         *offp-- = (int32_t)(np - dbuf);
00932                         *offp-- = (int32_t)size;
00933 
00934                         np += size;
00935                         space -= size;
00936                         break;
00937                 default:
00938                         /* Do nothing. */
00939                         break;
00940                 }
00941         } while (next_key && (indx += 2) < NUM_ENT(pg));
00942 
00943         cp->indx = indx;
00944         cp->dup_len = dup_len;
00945         cp->dup_off = dup_off;
00946         cp->dup_tlen = dup_tlen;
00947 
00948         /* If we are off the page then try to the next page. */
00949         if (ret == 0 && next_key && indx >= NUM_ENT(pg)) {
00950                 if ((ret = __ham_item_next(dbc, lock_mode, &pgno)) == 0)
00951                         goto next_pg;
00952                 if (ret != DB_NOTFOUND)
00953                         return (ret);
00954                 if ((ret = __memp_fput(dbc->dbp->mpf, cp->page, 0)) != 0)
00955                         return (ret);
00956                 cp->page = NULL;
00957                 if ((ret = __ham_get_meta(dbc)) != 0)
00958                         return (ret);
00959 
00960                 cp->bucket++;
00961                 if (cp->bucket > cp->hdr->max_bucket) {
00962                         /*
00963                          * Restore cursor to its previous state.  We're past
00964                          * the last item in the last bucket, so the next
00965                          * DBC->c_get(DB_NEXT) will return DB_NOTFOUND.
00966                          */
00967                         cp->bucket--;
00968                         ret = DB_NOTFOUND;
00969                 } else {
00970                         /*
00971                          * Start on the next bucket.
00972                          *
00973                          * Note that if this new bucket happens to be empty,
00974                          * but there's another non-empty bucket after it,
00975                          * we'll return early.  This is a rare case, and we
00976                          * don't guarantee any particular number of keys
00977                          * returned on each call, so just let the next call
00978                          * to bulk get move forward by yet another bucket.
00979                          */
00980                         cp->pgno = BUCKET_TO_PAGE(cp, cp->bucket);
00981                         cp->indx = NDX_INVALID;
00982                         F_CLR(cp, H_ISDUP);
00983                         ret = __ham_item_next(dbc, lock_mode, &pgno);
00984                 }
00985 
00986                 if ((t_ret = __ham_release_meta(dbc)) != 0)
00987                         return (t_ret);
00988                 if (ret == 0)
00989                         goto next_pg;
00990                 if (ret != DB_NOTFOUND)
00991                         return (ret);
00992         }
00993         *offp = -1;
00994         return (0);
00995 }
00996 
00997 static int
00998 __ham_c_put(dbc, key, data, flags, pgnop)
00999         DBC *dbc;
01000         DBT *key;
01001         DBT *data;
01002         u_int32_t flags;
01003         db_pgno_t *pgnop;
01004 {
01005         DB *dbp;
01006         DB_MPOOLFILE *mpf;
01007         DBT tmp_val, *myval;
01008         HASH_CURSOR *hcp;
01009         u_int32_t nbytes;
01010         int ret, t_ret;
01011 
01012         /*
01013          * The compiler doesn't realize that we only use this when ret is
01014          * equal to 0 and that if ret is equal to 0, that we must have set
01015          * myval.  So, we initialize it here to shut the compiler up.
01016          */
01017         COMPQUIET(myval, NULL);
01018 
01019         dbp = dbc->dbp;
01020         mpf = dbp->mpf;
01021         hcp = (HASH_CURSOR *)dbc->internal;
01022 
01023         if (F_ISSET(hcp, H_DELETED) &&
01024             flags != DB_KEYFIRST && flags != DB_KEYLAST)
01025                 return (DB_NOTFOUND);
01026 
01027         if ((ret = __ham_get_meta(dbc)) != 0)
01028                 goto err1;
01029 
01030         switch (flags) {
01031         case DB_KEYLAST:
01032         case DB_KEYFIRST:
01033         case DB_NODUPDATA:
01034                 nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE :
01035                     HKEYDATA_PSIZE(key->size)) +
01036                     (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE :
01037                     HKEYDATA_PSIZE(data->size));
01038                 if ((ret = __ham_lookup(dbc,
01039                     key, nbytes, DB_LOCK_WRITE, pgnop)) == DB_NOTFOUND) {
01040                         ret = 0;
01041                         if (hcp->seek_found_page != PGNO_INVALID &&
01042                             hcp->seek_found_page != hcp->pgno) {
01043                                 if ((ret = __memp_fput(mpf, hcp->page, 0)) != 0)
01044                                         goto err2;
01045                                 hcp->page = NULL;
01046                                 hcp->pgno = hcp->seek_found_page;
01047                                 hcp->indx = NDX_INVALID;
01048                         }
01049 
01050                         if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
01051                                 /*
01052                                  * A partial put, but the key does not exist
01053                                  * and we are not beginning the write at 0.
01054                                  * We must create a data item padded up to doff
01055                                  * and then write the new bytes represented by
01056                                  * val.
01057                                  */
01058                                 if ((ret = __ham_init_dbt(dbp->dbenv, &tmp_val,
01059                                     data->size + data->doff,
01060                                     &dbc->my_rdata.data,
01061                                     &dbc->my_rdata.ulen)) == 0) {
01062                                         memset(tmp_val.data, 0, data->doff);
01063                                         memcpy((u_int8_t *)tmp_val.data +
01064                                             data->doff, data->data, data->size);
01065                                         myval = &tmp_val;
01066                                 }
01067                         } else
01068                                 myval = (DBT *)data;
01069 
01070                         if (ret == 0)
01071                                 ret = __ham_add_el(dbc, key, myval, H_KEYDATA);
01072                         goto done;
01073                 }
01074                 break;
01075         case DB_BEFORE:
01076         case DB_AFTER:
01077         case DB_CURRENT:
01078                 ret = __ham_item(dbc, DB_LOCK_WRITE, pgnop);
01079                 break;
01080         default:
01081                 ret = __db_unknown_flag(dbp->dbenv, "__ham_c_put", flags);
01082                 break;
01083         }
01084 
01085         if (*pgnop == PGNO_INVALID && ret == 0) {
01086                 if (flags == DB_CURRENT ||
01087                     ((flags == DB_KEYFIRST ||
01088                     flags == DB_KEYLAST || flags == DB_NODUPDATA) &&
01089                     !(F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))))
01090                         ret = __ham_overwrite(dbc, data, flags);
01091                 else
01092                         ret = __ham_add_dup(dbc, data, flags, pgnop);
01093         }
01094 
01095 done:   if (hcp->page != NULL) {
01096                 if ((t_ret = __memp_fput(mpf,
01097                     hcp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
01098                         ret = t_ret;
01099                 if (t_ret == 0)
01100                         hcp->page = NULL;
01101         }
01102 
01103         if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
01104                 ret = __ham_expand_table(dbc);
01105                 F_CLR(hcp, H_EXPAND);
01106                 /* If we are out of space, ignore the error. */
01107                 if (ret == ENOSPC && dbc->txn == NULL)
01108                         ret = 0;
01109         }
01110 
01111 err2:   if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
01112                 ret = t_ret;
01113 
01114 err1:   return (ret);
01115 }
01116 
01117 /********************************* UTILITIES ************************/
01118 
01119 /*
01120  * __ham_expand_table --
01121  */
01122 static int
01123 __ham_expand_table(dbc)
01124         DBC *dbc;
01125 {
01126         DB *dbp;
01127         DB_LOCK metalock;
01128         DB_LSN lsn;
01129         DB_MPOOLFILE *mpf;
01130         DBMETA *mmeta;
01131         HASH_CURSOR *hcp;
01132         PAGE *h;
01133         db_pgno_t pgno, mpgno;
01134         u_int32_t dirty_meta, logn, newalloc, new_bucket, old_bucket;
01135         int got_meta, new_double, ret, t_ret;
01136 
01137         dbp = dbc->dbp;
01138         mpf = dbp->mpf;
01139         hcp = (HASH_CURSOR *)dbc->internal;
01140         if ((ret = __ham_dirty_meta(dbc)) != 0)
01141                 return (ret);
01142 
01143         LOCK_INIT(metalock);
01144         mmeta = (DBMETA *) hcp->hdr;
01145         mpgno = mmeta->pgno;
01146         h = NULL;
01147         dirty_meta = newalloc = 0;
01148         got_meta = 0;
01149 
01150         /*
01151          * If the split point is about to increase, make sure that we
01152          * have enough extra pages.  The calculation here is weird.
01153          * We'd like to do this after we've upped max_bucket, but it's
01154          * too late then because we've logged the meta-data split.  What
01155          * we'll do between then and now is increment max bucket and then
01156          * see what the log of one greater than that is; here we have to
01157          * look at the log of max + 2.  VERY NASTY STUFF.
01158          *
01159          * We figure out what we need to do, then we log it, then request
01160          * the pages from mpool.  We don't want to fail after extending
01161          * the file.
01162          *
01163          * If the page we are about to split into has already been allocated,
01164          * then we simply need to get it to get its LSN.  If it hasn't yet
01165          * been allocated, then we know it's LSN (0,0).
01166          */
01167 
01168         new_bucket = hcp->hdr->max_bucket + 1;
01169         old_bucket = new_bucket & hcp->hdr->low_mask;
01170 
01171         new_double = hcp->hdr->max_bucket == hcp->hdr->high_mask;
01172         logn = __db_log2(new_bucket);
01173 
01174         if (!new_double || hcp->hdr->spares[logn + 1] != PGNO_INVALID) {
01175                 /* Page exists; get it so we can get its LSN */
01176                 pgno = BUCKET_TO_PAGE(hcp, new_bucket);
01177                 if ((ret =
01178                     __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0)
01179                         goto err;
01180                 lsn = h->lsn;
01181         } else {
01182                 /* Get the master meta-data page to do allocation. */
01183                 if (F_ISSET(dbp, DB_AM_SUBDB)) {
01184                         mpgno = PGNO_BASE_MD;
01185                         if ((ret = __db_lget(dbc,
01186                            0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
01187                                 goto err;
01188                         if ((ret = __memp_fget(mpf, &mpgno, 0, &mmeta)) != 0)
01189                                 goto err;
01190                         got_meta = 1;
01191                 }
01192                 pgno = mmeta->last_pgno + 1;
01193                 ZERO_LSN(lsn);
01194                 newalloc = 1;
01195         }
01196 
01197         /* Log the meta-data split first. */
01198         if (DBC_LOGGING(dbc)) {
01199                 /*
01200                  * We always log the page number of the first page of
01201                  * the allocation group.  However, the LSN that we log
01202                  * is either the LSN on the first page (if we did not
01203                  * do the actual allocation here) or the LSN on the last
01204                  * page of the unit (if we did do the allocation here).
01205                  */
01206                 if ((ret = __ham_metagroup_log(dbp, dbc->txn,
01207                     &lsn, 0, hcp->hdr->max_bucket, mpgno, &mmeta->lsn,
01208                     hcp->hdr->dbmeta.pgno, &hcp->hdr->dbmeta.lsn,
01209                     pgno, &lsn, newalloc, mmeta->last_pgno)) != 0)
01210                         goto err;
01211         } else
01212                 LSN_NOT_LOGGED(lsn);
01213 
01214         hcp->hdr->dbmeta.lsn = lsn;
01215 
01216         if (new_double && hcp->hdr->spares[logn + 1] == PGNO_INVALID) {
01217                 /*
01218                  * We need to begin a new doubling and we have not allocated
01219                  * any pages yet.  Read the last page in and initialize it to
01220                  * make the allocation contiguous.  The pgno we calculated
01221                  * above is the first page allocated. The entry in spares is
01222                  * that page number minus any buckets already allocated (it
01223                  * simplifies bucket to page transaction).  After we've set
01224                  * that, we calculate the last pgno.
01225                  */
01226 
01227                 pgno += hcp->hdr->max_bucket;
01228 
01229                 if ((ret = __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0)
01230                         goto err;
01231 
01232                 hcp->hdr->spares[logn + 1] =
01233                     (pgno - new_bucket) - hcp->hdr->max_bucket;
01234                 mmeta->last_pgno = pgno;
01235                 mmeta->lsn = lsn;
01236                 dirty_meta = DB_MPOOL_DIRTY;
01237 
01238                 P_INIT(h, dbp->pgsize,
01239                     pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
01240         }
01241 
01242         /* Write out whatever page we ended up modifying. */
01243         h->lsn = lsn;
01244         if ((ret = __memp_fput(mpf, h, DB_MPOOL_DIRTY)) != 0)
01245                 goto err;
01246         h = NULL;
01247 
01248         /*
01249          * Update the meta-data page of this hash database.
01250          */
01251         hcp->hdr->max_bucket = new_bucket;
01252         if (new_double) {
01253                 hcp->hdr->low_mask = hcp->hdr->high_mask;
01254                 hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask;
01255         }
01256 
01257         /* Relocate records to the new bucket */
01258         ret = __ham_split_page(dbc, old_bucket, new_bucket);
01259 
01260 err:    if (got_meta)
01261                 if ((t_ret =
01262                     __memp_fput(mpf, mmeta, dirty_meta)) != 0 && ret == 0)
01263                         ret = t_ret;
01264         if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0)
01265                         ret = t_ret;
01266         if (h != NULL)
01267                 if ((t_ret = __memp_fput(mpf, h, 0)) != 0 && ret == 0)
01268                         ret = t_ret;
01269 
01270         return (ret);
01271 }
01272 
01273 /*
01274  * PUBLIC: u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, u_int32_t));
01275  */
01276 u_int32_t
01277 __ham_call_hash(dbc, k, len)
01278         DBC *dbc;
01279         u_int8_t *k;
01280         u_int32_t len;
01281 {
01282         DB *dbp;
01283         HASH_CURSOR *hcp;
01284         HASH *hashp;
01285         u_int32_t n, bucket;
01286 
01287         dbp = dbc->dbp;
01288         hcp = (HASH_CURSOR *)dbc->internal;
01289         hashp = dbp->h_internal;
01290 
01291         n = (u_int32_t)(hashp->h_hash(dbp, k, len));
01292 
01293         bucket = n & hcp->hdr->high_mask;
01294         if (bucket > hcp->hdr->max_bucket)
01295                 bucket = bucket & hcp->hdr->low_mask;
01296         return (bucket);
01297 }
01298 
01299 /*
01300  * Check for duplicates, and call __db_ret appropriately.  Release
01301  * everything held by the cursor.
01302  */
01303 static int
01304 __ham_dup_return(dbc, val, flags)
01305         DBC *dbc;
01306         DBT *val;
01307         u_int32_t flags;
01308 {
01309         DB *dbp;
01310         HASH_CURSOR *hcp;
01311         PAGE *pp;
01312         DBT *myval, tmp_val;
01313         db_indx_t ndx;
01314         db_pgno_t pgno;
01315         u_int32_t off, tlen;
01316         u_int8_t *hk, type;
01317         int cmp, ret;
01318         db_indx_t len;
01319 
01320         /* Check for duplicate and return the first one. */
01321         dbp = dbc->dbp;
01322         hcp = (HASH_CURSOR *)dbc->internal;
01323         ndx = H_DATAINDEX(hcp->indx);
01324         type = HPAGE_TYPE(dbp, hcp->page, ndx);
01325         pp = hcp->page;
01326         myval = val;
01327 
01328         /*
01329          * There are 4 cases:
01330          * 1. We are not in duplicate, simply return; the upper layer
01331          *    will do the right thing.
01332          * 2. We are looking at keys and stumbled onto a duplicate.
01333          * 3. We are in the middle of a duplicate set. (ISDUP set)
01334          * 4. We need to check for particular data match.
01335          */
01336 
01337         /* We should never get here with off-page dups. */
01338         DB_ASSERT(type != H_OFFDUP);
01339 
01340         /* Case 1 */
01341         if (type != H_DUPLICATE && flags != DB_GET_BOTH &&
01342             flags != DB_GET_BOTHC && flags != DB_GET_BOTH_RANGE)
01343                 return (0);
01344 
01345         /*
01346          * Here we check for the case where we just stumbled onto a
01347          * duplicate.  In this case, we do initialization and then
01348          * let the normal duplicate code handle it. (Case 2)
01349          */
01350         if (!F_ISSET(hcp, H_ISDUP) && type == H_DUPLICATE) {
01351                 F_SET(hcp, H_ISDUP);
01352                 hcp->dup_tlen = LEN_HDATA(dbp, hcp->page,
01353                     hcp->hdr->dbmeta.pagesize, hcp->indx);
01354                 hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
01355                 if (flags == DB_LAST ||
01356                     flags == DB_PREV || flags == DB_PREV_NODUP) {
01357                         hcp->dup_off = 0;
01358                         do {
01359                                 memcpy(&len,
01360                                     HKEYDATA_DATA(hk) + hcp->dup_off,
01361                                     sizeof(db_indx_t));
01362                                 hcp->dup_off += DUP_SIZE(len);
01363                         } while (hcp->dup_off < hcp->dup_tlen);
01364                         hcp->dup_off -= DUP_SIZE(len);
01365                 } else {
01366                         memcpy(&len,
01367                             HKEYDATA_DATA(hk), sizeof(db_indx_t));
01368                         hcp->dup_off = 0;
01369                 }
01370                 hcp->dup_len = len;
01371         }
01372 
01373         /*
01374          * If we are retrieving a specific key/data pair, then we
01375          * may need to adjust the cursor before returning data.
01376          * Case 4
01377          */
01378         if (flags == DB_GET_BOTH ||
01379             flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) {
01380                 if (F_ISSET(hcp, H_ISDUP)) {
01381                         /*
01382                          * If we're doing a join, search forward from the
01383                          * current position, not the beginning of the dup set.
01384                          */
01385                         if (flags == DB_GET_BOTHC)
01386                                 F_SET(hcp, H_CONTINUE);
01387 
01388                         __ham_dsearch(dbc, val, &off, &cmp, flags);
01389 
01390                         /*
01391                          * This flag is set nowhere else and is safe to
01392                          * clear unconditionally.
01393                          */
01394                         F_CLR(hcp, H_CONTINUE);
01395                         hcp->dup_off = off;
01396                 } else {
01397                         hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
01398                         if (((HKEYDATA *)hk)->type == H_OFFPAGE) {
01399                                 memcpy(&tlen,
01400                                     HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
01401                                 memcpy(&pgno,
01402                                     HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
01403                                 if ((ret = __db_moff(dbp, val,
01404                                     pgno, tlen, dbp->dup_compare, &cmp)) != 0)
01405                                         return (ret);
01406                         } else {
01407                                 /*
01408                                  * We do not zero tmp_val since the comparison
01409                                  * routines may only look at data and size.
01410                                  */
01411                                 tmp_val.data = HKEYDATA_DATA(hk);
01412                                 tmp_val.size = LEN_HDATA(dbp, hcp->page,
01413                                     dbp->pgsize, hcp->indx);
01414                                 cmp = dbp->dup_compare == NULL ?
01415                                     __bam_defcmp(dbp, &tmp_val, val) :
01416                                     dbp->dup_compare(dbp, &tmp_val, val);
01417                         }
01418                 }
01419 
01420                 if (cmp != 0)
01421                         return (DB_NOTFOUND);
01422         }
01423 
01424         /*
01425          * If we're doing a bulk get, we don't want to actually return
01426          * the data:  __ham_bulk will take care of cracking out the
01427          * duplicates appropriately.
01428          *
01429          * The rest of this function calculates partial offsets and
01430          * handles the actual __db_ret, so just return if
01431          * DB_MULTIPLE(_KEY) is set.
01432          */
01433         if (F_ISSET(dbc, DBC_MULTIPLE | DBC_MULTIPLE_KEY))
01434                 return (0);
01435 
01436         /*
01437          * Now, everything is initialized, grab a duplicate if
01438          * necessary.
01439          */
01440         if (F_ISSET(hcp, H_ISDUP)) {    /* Case 3 */
01441                 /*
01442                  * Copy the DBT in case we are retrieving into user
01443                  * memory and we need the parameters for it.  If the
01444                  * user requested a partial, then we need to adjust
01445                  * the user's parameters to get the partial of the
01446                  * duplicate which is itself a partial.
01447                  */
01448                 memcpy(&tmp_val, val, sizeof(*val));
01449 
01450                 if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) {
01451                         /*
01452                          * Take the user's length unless it would go
01453                          * beyond the end of the duplicate.
01454                          */
01455                         if (tmp_val.doff > hcp->dup_len)
01456                                 tmp_val.dlen = 0;
01457                         else if (tmp_val.dlen + tmp_val.doff > hcp->dup_len)
01458                                 tmp_val.dlen = hcp->dup_len - tmp_val.doff;
01459 
01460                 } else {
01461                         F_SET(&tmp_val, DB_DBT_PARTIAL);
01462                         tmp_val.dlen = hcp->dup_len;
01463                         tmp_val.doff = 0;
01464                 }
01465 
01466                 /*
01467                  * Set offset to the appropriate place within the
01468                  * current duplicate -- need to take into account
01469                  * both the dup_off and the current duplicate's
01470                  * length.
01471                  */
01472                 tmp_val.doff += hcp->dup_off + sizeof(db_indx_t);
01473 
01474                 myval = &tmp_val;
01475         }
01476 
01477         /*
01478          * Finally, if we had a duplicate, pp, ndx, and myval should be
01479          * set appropriately.
01480          */
01481         if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata->data,
01482             &dbc->rdata->ulen)) != 0)
01483                 return (ret);
01484 
01485         /*
01486          * In case we sent a temporary off to db_ret, set the real
01487          * return values.
01488          */
01489         val->data = myval->data;
01490         val->size = myval->size;
01491 
01492         F_SET(val, DB_DBT_ISSET);
01493 
01494         return (0);
01495 }
01496 
01497 static int
01498 __ham_overwrite(dbc, nval, flags)
01499         DBC *dbc;
01500         DBT *nval;
01501         u_int32_t flags;
01502 {
01503         DB *dbp;
01504         DB_ENV *dbenv;
01505         HASH_CURSOR *hcp;
01506         DBT *myval, tmp_val, tmp_val2;
01507         void *newrec;
01508         u_int8_t *hk, *p;
01509         u_int32_t len, nondup_size;
01510         db_indx_t newsize;
01511         int ret;
01512 
01513         dbp = dbc->dbp;
01514         dbenv = dbp->dbenv;
01515         hcp = (HASH_CURSOR *)dbc->internal;
01516         if (F_ISSET(hcp, H_ISDUP)) {
01517                 /*
01518                  * This is an overwrite of a duplicate. We should never
01519                  * be off-page at this point.
01520                  */
01521                 DB_ASSERT(hcp->opd == NULL);
01522                 /* On page dups */
01523                 if (F_ISSET(nval, DB_DBT_PARTIAL)) {
01524                         /*
01525                          * We're going to have to get the current item, then
01526                          * construct the record, do any padding and do a
01527                          * replace.
01528                          */
01529                         memset(&tmp_val, 0, sizeof(tmp_val));
01530                         if ((ret =
01531                             __ham_dup_return(dbc, &tmp_val, DB_CURRENT)) != 0)
01532                                 return (ret);
01533 
01534                         /* Figure out new size. */
01535                         nondup_size = tmp_val.size;
01536                         newsize = nondup_size;
01537 
01538                         /*
01539                          * Three cases:
01540                          * 1. strictly append (may need to allocate space
01541                          *      for pad bytes; really gross).
01542                          * 2. overwrite some and append.
01543                          * 3. strictly overwrite.
01544                          */
01545                         if (nval->doff > nondup_size)
01546                                 newsize +=
01547                                     ((nval->doff - nondup_size) + nval->size);
01548                         else if (nval->doff + nval->dlen > nondup_size)
01549                                 newsize += nval->size -
01550                                     (nondup_size - nval->doff);
01551                         else
01552                                 newsize += nval->size - nval->dlen;
01553 
01554                         /*
01555                          * Make sure that the new size doesn't put us over
01556                          * the onpage duplicate size in which case we need
01557                          * to convert to off-page duplicates.
01558                          */
01559                         if (ISBIG(hcp,
01560                             (hcp->dup_tlen - nondup_size) + newsize)) {
01561                                 if ((ret = __ham_dup_convert(dbc)) != 0)
01562                                         return (ret);
01563                                 return (hcp->opd->c_am_put(hcp->opd,
01564                                     NULL, nval, flags, NULL));
01565                         }
01566 
01567                         if ((ret = __os_malloc(dbp->dbenv,
01568                             DUP_SIZE(newsize), &newrec)) != 0)
01569                                 return (ret);
01570                         memset(&tmp_val2, 0, sizeof(tmp_val2));
01571                         F_SET(&tmp_val2, DB_DBT_PARTIAL);
01572 
01573                         /* Construct the record. */
01574                         p = newrec;
01575                         /* Initial size. */
01576                         memcpy(p, &newsize, sizeof(db_indx_t));
01577                         p += sizeof(db_indx_t);
01578 
01579                         /* First part of original record. */
01580                         len = nval->doff > tmp_val.size
01581                             ? tmp_val.size : nval->doff;
01582                         memcpy(p, tmp_val.data, len);
01583                         p += len;
01584 
01585                         if (nval->doff > tmp_val.size) {
01586                                 /* Padding */
01587                                 memset(p, 0, nval->doff - tmp_val.size);
01588                                 p += nval->doff - tmp_val.size;
01589                         }
01590 
01591                         /* New bytes */
01592                         memcpy(p, nval->data, nval->size);
01593                         p += nval->size;
01594 
01595                         /* End of original record (if there is any) */
01596                         if (nval->doff + nval->dlen < tmp_val.size) {
01597                                 len = (tmp_val.size - nval->doff) - nval->dlen;
01598                                 memcpy(p, (u_int8_t *)tmp_val.data +
01599                                     nval->doff + nval->dlen, len);
01600                                 p += len;
01601                         }
01602 
01603                         /* Final size. */
01604                         memcpy(p, &newsize, sizeof(db_indx_t));
01605 
01606                         /*
01607                          * Make sure that the caller isn't corrupting
01608                          * the sort order.
01609                          */
01610                         if (dbp->dup_compare != NULL) {
01611                                 tmp_val2.data =
01612                                     (u_int8_t *)newrec + sizeof(db_indx_t);
01613                                 tmp_val2.size = newsize;
01614                                 if (dbp->dup_compare(
01615                                     dbp, &tmp_val, &tmp_val2) != 0) {
01616                                         __os_free(dbenv, newrec);
01617                                         return (__db_duperr(dbp, flags));
01618                                 }
01619                         }
01620 
01621                         tmp_val2.data = newrec;
01622                         tmp_val2.size = DUP_SIZE(newsize);
01623                         tmp_val2.doff = hcp->dup_off;
01624                         tmp_val2.dlen = DUP_SIZE(hcp->dup_len);
01625 
01626                         ret = __ham_replpair(dbc, &tmp_val2, 0);
01627                         __os_free(dbenv, newrec);
01628 
01629                         /* Update cursor */
01630                         if (ret != 0)
01631                                 return (ret);
01632 
01633                         if (newsize > nondup_size)
01634                                 hcp->dup_tlen += (newsize - nondup_size);
01635                         else
01636                                 hcp->dup_tlen -= (nondup_size - newsize);
01637                         hcp->dup_len = newsize;
01638                         return (0);
01639                 } else {
01640                         /* Check whether we need to convert to off page. */
01641                         if (ISBIG(hcp,
01642                             (hcp->dup_tlen - hcp->dup_len) + nval->size)) {
01643                                 if ((ret = __ham_dup_convert(dbc)) != 0)
01644                                         return (ret);
01645                                 return (hcp->opd->c_am_put(hcp->opd,
01646                                     NULL, nval, flags, NULL));
01647                         }
01648 
01649                         /* Make sure we maintain sort order. */
01650                         if (dbp->dup_compare != NULL) {
01651                                 tmp_val2.data =
01652                                     HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page,
01653                                     hcp->indx)) + hcp->dup_off +
01654                                     sizeof(db_indx_t);
01655                                 tmp_val2.size = hcp->dup_len;
01656                                 if (dbp->dup_compare(
01657                                     dbp, nval, &tmp_val2) != 0) {
01658                                         __db_err(dbenv,
01659                         "Existing data sorts differently from put data");
01660                                         return (EINVAL);
01661                                 }
01662                         }
01663                         /* Overwriting a complete duplicate. */
01664                         if ((ret =
01665                             __ham_make_dup(dbp->dbenv, nval, &tmp_val,
01666                             &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0)
01667                                 return (ret);
01668                         /* Now fix what we are replacing. */
01669                         tmp_val.doff = hcp->dup_off;
01670                         tmp_val.dlen = DUP_SIZE(hcp->dup_len);
01671 
01672                         /* Update cursor */
01673                         if (nval->size > hcp->dup_len)
01674                                 hcp->dup_tlen += (nval->size - hcp->dup_len);
01675                         else
01676                                 hcp->dup_tlen -= (hcp->dup_len - nval->size);
01677                         hcp->dup_len = (db_indx_t)nval->size;
01678                 }
01679                 myval = &tmp_val;
01680         } else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
01681                 /* Put/overwrite */
01682                 memcpy(&tmp_val, nval, sizeof(*nval));
01683                 F_SET(&tmp_val, DB_DBT_PARTIAL);
01684                 tmp_val.doff = 0;
01685                 hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
01686                 if (HPAGE_PTYPE(hk) == H_OFFPAGE)
01687                         memcpy(&tmp_val.dlen,
01688                             HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
01689                 else
01690                         tmp_val.dlen = LEN_HDATA(dbp, hcp->page,
01691                             hcp->hdr->dbmeta.pagesize, hcp->indx);
01692                 myval = &tmp_val;
01693         } else
01694                 /* Regular partial put */
01695                 myval = nval;
01696 
01697         return (__ham_replpair(dbc, myval, 0));
01698 }
01699 
01700 /*
01701  * Given a key and a cursor, sets the cursor to the page/ndx on which
01702  * the key resides.  If the key is found, the cursor H_OK flag is set
01703  * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
01704  * If the key is not found, the H_OK flag is not set.  If the sought
01705  * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
01706  * are set indicating where an add might take place.  If it is 0,
01707  * non of the cursor pointer field are valid.
01708  */
01709 static int
01710 __ham_lookup(dbc, key, sought, mode, pgnop)
01711         DBC *dbc;
01712         const DBT *key;
01713         u_int32_t sought;
01714         db_lockmode_t mode;
01715         db_pgno_t *pgnop;
01716 {
01717         DB *dbp;
01718         HASH_CURSOR *hcp;
01719         db_pgno_t pgno;
01720         u_int32_t tlen;
01721         int match, ret;
01722         u_int8_t *hk, *dk;
01723 
01724         dbp = dbc->dbp;
01725         hcp = (HASH_CURSOR *)dbc->internal;
01726         /*
01727          * Set up cursor so that we're looking for space to add an item
01728          * as we cycle through the pages looking for the key.
01729          */
01730         if ((ret = __ham_item_reset(dbc)) != 0)
01731                 return (ret);
01732         hcp->seek_size = sought;
01733 
01734         hcp->bucket = __ham_call_hash(dbc, (u_int8_t *)key->data, key->size);
01735         hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
01736 
01737         for (;;) {
01738                 *pgnop = PGNO_INVALID;
01739                 if ((ret = __ham_item_next(dbc, mode, pgnop)) != 0)
01740                         return (ret);
01741 
01742                 if (F_ISSET(hcp, H_NOMORE))
01743                         break;
01744 
01745                 hk = H_PAIRKEY(dbp, hcp->page, hcp->indx);
01746                 switch (HPAGE_PTYPE(hk)) {
01747                 case H_OFFPAGE:
01748                         memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
01749                         if (tlen == key->size) {
01750                                 memcpy(&pgno,
01751                                     HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
01752                                 if ((ret = __db_moff(dbp,
01753                                     key, pgno, tlen, NULL, &match)) != 0)
01754                                         return (ret);
01755                                 if (match == 0)
01756                                         goto found_key;
01757                         }
01758                         break;
01759                 case H_KEYDATA:
01760                         if (key->size ==
01761                             LEN_HKEY(dbp, hcp->page, dbp->pgsize, hcp->indx) &&
01762                             memcmp(key->data,
01763                             HKEYDATA_DATA(hk), key->size) == 0) {
01764                                 /* Found the key, check for data type. */
01765 found_key:                      F_SET(hcp, H_OK);
01766                                 dk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
01767                                 if (HPAGE_PTYPE(dk) == H_OFFDUP)
01768                                         memcpy(pgnop, HOFFDUP_PGNO(dk),
01769                                             sizeof(db_pgno_t));
01770                                 return (0);
01771                         }
01772                         break;
01773                 case H_DUPLICATE:
01774                 case H_OFFDUP:
01775                         /*
01776                          * These are errors because keys are never
01777                          * duplicated, only data items are.
01778                          */
01779                         return (__db_pgfmt(dbp->dbenv, PGNO(hcp->page)));
01780                 default:
01781                         return (__db_pgfmt(dbp->dbenv, PGNO(hcp->page)));
01782                 }
01783         }
01784 
01785         /*
01786          * Item was not found.
01787          */
01788 
01789         if (sought != 0)
01790                 return (ret);
01791 
01792         return (ret);
01793 }
01794 
01795 /*
01796  * __ham_init_dbt --
01797  *      Initialize a dbt using some possibly already allocated storage
01798  *      for items.
01799  *
01800  * PUBLIC: int __ham_init_dbt __P((DB_ENV *,
01801  * PUBLIC:     DBT *, u_int32_t, void **, u_int32_t *));
01802  */
01803 int
01804 __ham_init_dbt(dbenv, dbt, size, bufp, sizep)
01805         DB_ENV *dbenv;
01806         DBT *dbt;
01807         u_int32_t size;
01808         void **bufp;
01809         u_int32_t *sizep;
01810 {
01811         int ret;
01812 
01813         memset(dbt, 0, sizeof(*dbt));
01814         if (*sizep < size) {
01815                 if ((ret = __os_realloc(dbenv, size, bufp)) != 0) {
01816                         *sizep = 0;
01817                         return (ret);
01818                 }
01819                 *sizep = size;
01820         }
01821         dbt->data = *bufp;
01822         dbt->size = size;
01823         return (0);
01824 }
01825 
01826 /*
01827  * Adjust the cursor after an insert or delete.  The cursor passed is
01828  * the one that was operated upon; we just need to check any of the
01829  * others.
01830  *
01831  * len indicates the length of the item added/deleted
01832  * add indicates if the item indicated by the cursor has just been
01833  * added (add == 1) or deleted (add == 0).
01834  * dup indicates if the addition occurred into a duplicate set.
01835  *
01836  * PUBLIC: int __ham_c_update
01837  * PUBLIC:    __P((DBC *, u_int32_t, int, int));
01838  */
01839 int
01840 __ham_c_update(dbc, len, add, is_dup)
01841         DBC *dbc;
01842         u_int32_t len;
01843         int add, is_dup;
01844 {
01845         DB *dbp, *ldbp;
01846         DBC *cp;
01847         DB_ENV *dbenv;
01848         DB_LSN lsn;
01849         DB_TXN *my_txn;
01850         HASH_CURSOR *hcp, *lcp;
01851         int found, ret;
01852         u_int32_t order;
01853 
01854         dbp = dbc->dbp;
01855         dbenv = dbp->dbenv;
01856         hcp = (HASH_CURSOR *)dbc->internal;
01857 
01858         /*
01859          * Adjustment will only be logged if this is a subtransaction.
01860          * Only subtransactions can abort and effect their parent
01861          * transactions cursors.
01862          */
01863 
01864         my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
01865         found = 0;
01866 
01867         MUTEX_LOCK(dbenv, dbenv->mtx_dblist);
01868 
01869         /*
01870          * Calculate the order of this deleted record.
01871          * This will be one greater than any cursor that is pointing
01872          * at this record and already marked as deleted.
01873          */
01874         order = 0;
01875         if (!add) {
01876                 order = 1;
01877                 for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
01878                     ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
01879                     ldbp = LIST_NEXT(ldbp, dblistlinks)) {
01880                         MUTEX_LOCK(dbenv, dbp->mutex);
01881                         for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
01882                             cp = TAILQ_NEXT(cp, links)) {
01883                                 if (cp == dbc || cp->dbtype != DB_HASH)
01884                                         continue;
01885                                 lcp = (HASH_CURSOR *)cp->internal;
01886                                 if (F_ISSET(lcp, H_DELETED) &&
01887                                     hcp->pgno == lcp->pgno &&
01888                                     hcp->indx == lcp->indx &&
01889                                     order <= lcp->order &&
01890                                     (!is_dup || hcp->dup_off == lcp->dup_off))
01891                                         order = lcp->order + 1;
01892                         }
01893                         MUTEX_UNLOCK(dbenv, dbp->mutex);
01894                 }
01895                 hcp->order = order;
01896         }
01897 
01898         for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
01899             ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
01900             ldbp = LIST_NEXT(ldbp, dblistlinks)) {
01901                 MUTEX_LOCK(dbenv, dbp->mutex);
01902                 for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
01903                     cp = TAILQ_NEXT(cp, links)) {
01904                         if (cp == dbc || cp->dbtype != DB_HASH)
01905                                 continue;
01906 
01907                         lcp = (HASH_CURSOR *)cp->internal;
01908 
01909                         if (lcp->pgno != hcp->pgno || lcp->indx == NDX_INVALID)
01910                                 continue;
01911 
01912                         if (my_txn != NULL && cp->txn != my_txn)
01913                                 found = 1;
01914 
01915                         if (!is_dup) {
01916                                 if (add) {
01917                                         /*
01918                                          * This routine is not called to add
01919                                          * non-dup records which are always put
01920                                          * at the end.  It is only called from
01921                                          * recovery in this case and the
01922                                          * cursor will be marked deleted.
01923                                          * We are "undeleting" so unmark all
01924                                          * cursors with the same order.
01925                                          */
01926                                         if (lcp->indx == hcp->indx &&
01927                                             F_ISSET(lcp, H_DELETED)) {
01928                                                 if (lcp->order == hcp->order)
01929                                                         F_CLR(lcp, H_DELETED);
01930                                                 else if (lcp->order >
01931                                                     hcp->order) {
01932 
01933                                                 /*
01934                                                  * If we've moved this cursor's
01935                                                  * index, split its order
01936                                                  * number--i.e., decrement it by
01937                                                  * enough so that the lowest
01938                                                  * cursor moved has order 1.
01939                                                  * cp_arg->order is the split
01940                                                  * point, so decrement by one
01941                                                  * less than that.
01942                                                  */
01943                                                         lcp->order -=
01944                                                             (hcp->order - 1);
01945                                                         lcp->indx += 2;
01946                                                 }
01947                                         } else if (lcp->indx >= hcp->indx)
01948                                                 lcp->indx += 2;
01949 
01950                                 } else {
01951                                         if (lcp->indx > hcp->indx) {
01952                                                 lcp->indx -= 2;
01953                                                 if (lcp->indx == hcp->indx &&
01954                                                     F_ISSET(lcp, H_DELETED))
01955                                                         lcp->order += order;
01956                                         } else if (lcp->indx == hcp->indx &&
01957                                             !F_ISSET(lcp, H_DELETED)) {
01958                                                 F_SET(lcp, H_DELETED);
01959                                                 F_CLR(lcp, H_ISDUP);
01960                                                 lcp->order = order;
01961                                         }
01962                                 }
01963                         } else if (lcp->indx == hcp->indx) {
01964                                 /*
01965                                  * Handle duplicates.  This routine is
01966                                  * only called for on page dups.
01967                                  * Off page dups are handled by btree/rtree
01968                                  * code.
01969                                  */
01970                                 if (add) {
01971                                         lcp->dup_tlen += len;
01972                                         if (lcp->dup_off == hcp->dup_off &&
01973                                             F_ISSET(hcp, H_DELETED) &&
01974                                             F_ISSET(lcp, H_DELETED)) {
01975                                                 /* Abort of a delete. */
01976                                                 if (lcp->order == hcp->order)
01977                                                         F_CLR(lcp, H_DELETED);
01978                                                 else if (lcp->order >
01979                                                     hcp->order) {
01980                                                         lcp->order -=
01981                                                             (hcp->order -1);
01982                                                         lcp->dup_off += len;
01983                                                 }
01984                                         } else if (lcp->dup_off >= hcp->dup_off)
01985                                                 lcp->dup_off += len;
01986                                 } else {
01987                                         lcp->dup_tlen -= len;
01988                                         if (lcp->dup_off > hcp->dup_off) {
01989                                                 lcp->dup_off -= len;
01990                                                 if (lcp->dup_off ==
01991                                                     hcp->dup_off &&
01992                                                     F_ISSET(lcp, H_DELETED))
01993                                                         lcp->order += order;
01994                                         } else if (lcp->dup_off ==
01995                                             hcp->dup_off &&
01996                                             !F_ISSET(lcp, H_DELETED)) {
01997                                                 F_SET(lcp, H_DELETED);
01998                                                 lcp->order = order;
01999                                         }
02000                                 }
02001                         }
02002                 }
02003                 MUTEX_UNLOCK(dbenv, dbp->mutex);
02004         }
02005         MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist);
02006 
02007         if (found != 0 && DBC_LOGGING(dbc)) {
02008                 if ((ret = __ham_curadj_log(dbp, my_txn, &lsn, 0, hcp->pgno,
02009                     hcp->indx, len, hcp->dup_off, add, is_dup, order)) != 0)
02010                         return (ret);
02011         }
02012 
02013         return (0);
02014 }
02015 
02016 /*
02017  * __ham_get_clist --
02018  *
02019  * Get a list of cursors either on a particular bucket or on a particular
02020  * page and index combination.  The former is so that we can update
02021  * cursors on a split.  The latter is so we can update cursors when we
02022  * move items off page.
02023  *
02024  * PUBLIC: int __ham_get_clist __P((DB *, db_pgno_t, u_int32_t, DBC ***));
02025  */
02026 int
02027 __ham_get_clist(dbp, pgno, indx, listp)
02028         DB *dbp;
02029         db_pgno_t pgno;
02030         u_int32_t indx;
02031         DBC ***listp;
02032 {
02033         DB *ldbp;
02034         DBC *cp;
02035         DB_ENV *dbenv;
02036         u_int nalloc, nused;
02037         int ret;
02038 
02039         /*
02040          * Assume that finding anything is the exception, so optimize for
02041          * the case where there aren't any.
02042          */
02043         nalloc = nused = 0;
02044         *listp = NULL;
02045         dbenv = dbp->dbenv;
02046 
02047         MUTEX_LOCK(dbenv, dbenv->mtx_dblist);
02048         for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
02049             ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
02050             ldbp = LIST_NEXT(ldbp, dblistlinks)) {
02051                 MUTEX_LOCK(dbenv, dbp->mutex);
02052                 for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
02053                     cp = TAILQ_NEXT(cp, links))
02054                         /*
02055                          * We match if cp->pgno matches the specified
02056                          * pgno, and if either the cp->indx matches
02057                          * or we weren't given an index.
02058                          */
02059                         if (cp->internal->pgno == pgno &&
02060                             (indx == NDX_INVALID ||
02061                             cp->internal->indx == indx)) {
02062                                 if (nused >= nalloc) {
02063                                         nalloc += 10;
02064                                         if ((ret = __os_realloc(dbp->dbenv,
02065                                             nalloc * sizeof(HASH_CURSOR *),
02066                                             listp)) != 0)
02067                                                 goto err;
02068                                 }
02069                                 (*listp)[nused++] = cp;
02070                         }
02071 
02072                 MUTEX_UNLOCK(dbp->dbenv, dbp->mutex);
02073         }
02074         MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist);
02075 
02076         if (listp != NULL) {
02077                 if (nused >= nalloc) {
02078                         nalloc++;
02079                         if ((ret = __os_realloc(dbp->dbenv,
02080                             nalloc * sizeof(HASH_CURSOR *), listp)) != 0)
02081                                 return (ret);
02082                 }
02083                 (*listp)[nused] = NULL;
02084         }
02085         return (0);
02086 err:
02087         MUTEX_UNLOCK(dbp->dbenv, dbp->mutex);
02088         MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist);
02089         return (ret);
02090 }
02091 
02092 static int
02093 __ham_c_writelock(dbc)
02094         DBC *dbc;
02095 {
02096         DB_LOCK tmp_lock;
02097         HASH_CURSOR *hcp;
02098         int ret;
02099 
02100         /*
02101          * All we need do is acquire the lock and let the off-page
02102          * dup tree do its thing.
02103          */
02104         if (!STD_LOCKING(dbc))
02105                 return (0);
02106 
02107         hcp = (HASH_CURSOR *)dbc->internal;
02108         ret = 0;
02109         if ((!LOCK_ISSET(hcp->lock) || hcp->lock_mode != DB_LOCK_WRITE)) {
02110                 tmp_lock = hcp->lock;
02111                 if ((ret = __ham_lock_bucket(dbc, DB_LOCK_WRITE)) == 0 &&
02112                     tmp_lock.mode != DB_LOCK_WWRITE)
02113                         ret = __LPUT(dbc, tmp_lock);
02114         }
02115         return (ret);
02116 }

Generated on Sun Dec 25 12:14:28 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2