Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

hash_page.c

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1990, 1993, 1994
00009  *      Margo Seltzer.  All rights reserved.
00010  */
00011 /*
00012  * Copyright (c) 1990, 1993, 1994
00013  *      The Regents of the University of California.  All rights reserved.
00014  *
00015  * This code is derived from software contributed to Berkeley by
00016  * Margo Seltzer.
00017  *
00018  * Redistribution and use in source and binary forms, with or without
00019  * modification, are permitted provided that the following conditions
00020  * are met:
00021  * 1. Redistributions of source code must retain the above copyright
00022  *    notice, this list of conditions and the following disclaimer.
00023  * 2. Redistributions in binary form must reproduce the above copyright
00024  *    notice, this list of conditions and the following disclaimer in the
00025  *    documentation and/or other materials provided with the distribution.
00026  * 3. Neither the name of the University nor the names of its contributors
00027  *    may be used to endorse or promote products derived from this software
00028  *    without specific prior written permission.
00029  *
00030  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00031  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00032  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00033  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00034  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00035  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00036  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00037  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00038  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00039  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00040  * SUCH DAMAGE.
00041  *
00042  * $Id: hash_page.c,v 12.7 2005/10/13 22:22:43 ubell Exp $
00043  */
00044 
00045 #include "db_config.h"
00046 
00047 /*
00048  * PACKAGE:  hashing
00049  *
00050  * DESCRIPTION:
00051  *      Page manipulation for hashing package.
00052  */
00053 
00054 #ifndef NO_SYSTEM_INCLUDES
00055 #include <sys/types.h>
00056 
00057 #include <string.h>
00058 #endif
00059 
00060 #include "db_int.h"
00061 #include "dbinc/db_page.h"
00062 #include "dbinc/db_shash.h"
00063 #include "dbinc/hash.h"
00064 #include "dbinc/lock.h"
00065 #include "dbinc/mp.h"
00066 
00067 static int __ham_c_delpg
00068     __P((DBC *, db_pgno_t, db_pgno_t, u_int32_t, db_ham_mode, u_int32_t *));
00069 
00070 /*
00071  * PUBLIC: int __ham_item __P((DBC *, db_lockmode_t, db_pgno_t *));
00072  */
00073 int
00074 __ham_item(dbc, mode, pgnop)
00075         DBC *dbc;
00076         db_lockmode_t mode;
00077         db_pgno_t *pgnop;
00078 {
00079         DB *dbp;
00080         HASH_CURSOR *hcp;
00081         db_pgno_t next_pgno;
00082         int ret;
00083 
00084         dbp = dbc->dbp;
00085         hcp = (HASH_CURSOR *)dbc->internal;
00086 
00087         if (F_ISSET(hcp, H_DELETED)) {
00088                 __db_err(dbp->dbenv, "Attempt to return a deleted item");
00089                 return (EINVAL);
00090         }
00091         F_CLR(hcp, H_OK | H_NOMORE);
00092 
00093         /* Check if we need to get a page for this cursor. */
00094         if ((ret = __ham_get_cpage(dbc, mode)) != 0)
00095                 return (ret);
00096 
00097 recheck:
00098         /* Check if we are looking for space in which to insert an item. */
00099         if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID &&
00100             hcp->seek_size < P_FREESPACE(dbp, hcp->page))
00101                 hcp->seek_found_page = hcp->pgno;
00102 
00103         /* Check for off-page duplicates. */
00104         if (hcp->indx < NUM_ENT(hcp->page) &&
00105             HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) {
00106                 memcpy(pgnop,
00107                     HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)),
00108                     sizeof(db_pgno_t));
00109                 F_SET(hcp, H_OK);
00110                 return (0);
00111         }
00112 
00113         /* Check if we need to go on to the next page. */
00114         if (F_ISSET(hcp, H_ISDUP))
00115                 /*
00116                  * ISDUP is set, and offset is at the beginning of the datum.
00117                  * We need to grab the length of the datum, then set the datum
00118                  * pointer to be the beginning of the datum.
00119                  */
00120                 memcpy(&hcp->dup_len,
00121                     HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) +
00122                     hcp->dup_off, sizeof(db_indx_t));
00123 
00124         if (hcp->indx >= (db_indx_t)NUM_ENT(hcp->page)) {
00125                 /* Fetch next page. */
00126                 if (NEXT_PGNO(hcp->page) == PGNO_INVALID) {
00127                         F_SET(hcp, H_NOMORE);
00128                         return (DB_NOTFOUND);
00129                 }
00130                 next_pgno = NEXT_PGNO(hcp->page);
00131                 hcp->indx = 0;
00132                 if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0)
00133                         return (ret);
00134                 goto recheck;
00135         }
00136 
00137         F_SET(hcp, H_OK);
00138         return (0);
00139 }
00140 
00141 /*
00142  * PUBLIC: int __ham_item_reset __P((DBC *));
00143  */
00144 int
00145 __ham_item_reset(dbc)
00146         DBC *dbc;
00147 {
00148         DB *dbp;
00149         DB_MPOOLFILE *mpf;
00150         HASH_CURSOR *hcp;
00151         int ret, t_ret;
00152 
00153         dbp = dbc->dbp;
00154         mpf = dbp->mpf;
00155         hcp = (HASH_CURSOR *)dbc->internal;
00156 
00157         ret = 0;
00158         if (hcp->page != NULL)
00159                 ret = __memp_fput(mpf, hcp->page, 0);
00160 
00161         if ((t_ret = __ham_item_init(dbc)) != 0 && ret == 0)
00162                 ret = t_ret;
00163 
00164         return (ret);
00165 }
00166 
00167 /*
00168  * PUBLIC: int __ham_item_init __P((DBC *));
00169  */
00170 int
00171 __ham_item_init(dbc)
00172         DBC *dbc;
00173 {
00174         HASH_CURSOR *hcp;
00175         int ret;
00176 
00177         hcp = (HASH_CURSOR *)dbc->internal;
00178 
00179         /*
00180          * If this cursor still holds any locks, we must release them if
00181          * we are not running with transactions.
00182          */
00183         ret = __TLPUT(dbc, hcp->lock);
00184 
00185         /*
00186          * The following fields must *not* be initialized here because they
00187          * may have meaning across inits.
00188          *      hlock, hdr, split_buf, stats
00189          */
00190         hcp->bucket = BUCKET_INVALID;
00191         hcp->lbucket = BUCKET_INVALID;
00192         LOCK_INIT(hcp->lock);
00193         hcp->lock_mode = DB_LOCK_NG;
00194         hcp->dup_off = 0;
00195         hcp->dup_len = 0;
00196         hcp->dup_tlen = 0;
00197         hcp->seek_size = 0;
00198         hcp->seek_found_page = PGNO_INVALID;
00199         hcp->flags = 0;
00200 
00201         hcp->pgno = PGNO_INVALID;
00202         hcp->indx = NDX_INVALID;
00203         hcp->page = NULL;
00204 
00205         return (ret);
00206 }
00207 
00208 /*
00209  * Returns the last item in a bucket.
00210  *
00211  * PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t, db_pgno_t *));
00212  */
00213 int
00214 __ham_item_last(dbc, mode, pgnop)
00215         DBC *dbc;
00216         db_lockmode_t mode;
00217         db_pgno_t *pgnop;
00218 {
00219         HASH_CURSOR *hcp;
00220         int ret;
00221 
00222         hcp = (HASH_CURSOR *)dbc->internal;
00223         if ((ret = __ham_item_reset(dbc)) != 0)
00224                 return (ret);
00225 
00226         hcp->bucket = hcp->hdr->max_bucket;
00227         hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
00228         F_SET(hcp, H_OK);
00229         return (__ham_item_prev(dbc, mode, pgnop));
00230 }
00231 
00232 /*
00233  * PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t, db_pgno_t *));
00234  */
00235 int
00236 __ham_item_first(dbc, mode, pgnop)
00237         DBC *dbc;
00238         db_lockmode_t mode;
00239         db_pgno_t *pgnop;
00240 {
00241         HASH_CURSOR *hcp;
00242         int ret;
00243 
00244         hcp = (HASH_CURSOR *)dbc->internal;
00245         if ((ret = __ham_item_reset(dbc)) != 0)
00246                 return (ret);
00247         F_SET(hcp, H_OK);
00248         hcp->bucket = 0;
00249         hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
00250         return (__ham_item_next(dbc, mode, pgnop));
00251 }
00252 
00253 /*
00254  * __ham_item_prev --
00255  *      Returns a pointer to key/data pair on a page.  In the case of
00256  *      bigkeys, just returns the page number and index of the bigkey
00257  *      pointer pair.
00258  *
00259  * PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t, db_pgno_t *));
00260  */
00261 int
00262 __ham_item_prev(dbc, mode, pgnop)
00263         DBC *dbc;
00264         db_lockmode_t mode;
00265         db_pgno_t *pgnop;
00266 {
00267         DB *dbp;
00268         HASH_CURSOR *hcp;
00269         db_pgno_t next_pgno;
00270         int ret;
00271 
00272         hcp = (HASH_CURSOR *)dbc->internal;
00273         dbp = dbc->dbp;
00274 
00275         /*
00276          * There are 5 cases for backing up in a hash file.
00277          * Case 1: In the middle of a page, no duplicates, just dec the index.
00278          * Case 2: In the middle of a duplicate set, back up one.
00279          * Case 3: At the beginning of a duplicate set, get out of set and
00280          *      back up to next key.
00281          * Case 4: At the beginning of a page; go to previous page.
00282          * Case 5: At the beginning of a bucket; go to prev bucket.
00283          */
00284         F_CLR(hcp, H_OK | H_NOMORE | H_DELETED);
00285 
00286         if ((ret = __ham_get_cpage(dbc, mode)) != 0)
00287                 return (ret);
00288 
00289         /*
00290          * First handle the duplicates.  Either you'll get the key here
00291          * or you'll exit the duplicate set and drop into the code below
00292          * to handle backing up through keys.
00293          */
00294         if (!F_ISSET(hcp, H_NEXT_NODUP) && F_ISSET(hcp, H_ISDUP)) {
00295                 if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) ==
00296                     H_OFFDUP) {
00297                         memcpy(pgnop,
00298                             HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)),
00299                             sizeof(db_pgno_t));
00300                         F_SET(hcp, H_OK);
00301                         return (0);
00302                 }
00303 
00304                 /* Duplicates are on-page. */
00305                 if (hcp->dup_off != 0) {
00306                         memcpy(&hcp->dup_len, HKEYDATA_DATA(
00307                             H_PAIRDATA(dbp, hcp->page, hcp->indx))
00308                             + hcp->dup_off - sizeof(db_indx_t),
00309                             sizeof(db_indx_t));
00310                         hcp->dup_off -=
00311                             DUP_SIZE(hcp->dup_len);
00312                         return (__ham_item(dbc, mode, pgnop));
00313                 }
00314         }
00315 
00316         /*
00317          * If we get here, we are not in a duplicate set, and just need
00318          * to back up the cursor.  There are still three cases:
00319          * midpage, beginning of page, beginning of bucket.
00320          */
00321 
00322         if (F_ISSET(hcp, H_DUPONLY)) {
00323                 F_CLR(hcp, H_OK);
00324                 F_SET(hcp, H_NOMORE);
00325                 return (0);
00326         } else
00327                 /*
00328                  * We are no longer in a dup set;  flag this so the dup code
00329                  * will reinitialize should we stumble upon another one.
00330                  */
00331                 F_CLR(hcp, H_ISDUP);
00332 
00333         if (hcp->indx == 0) {           /* Beginning of page. */
00334                 hcp->pgno = PREV_PGNO(hcp->page);
00335                 if (hcp->pgno == PGNO_INVALID) {
00336                         /* Beginning of bucket. */
00337                         F_SET(hcp, H_NOMORE);
00338                         return (DB_NOTFOUND);
00339                 } else if ((ret =
00340                     __ham_next_cpage(dbc, hcp->pgno, 0)) != 0)
00341                         return (ret);
00342                 else
00343                         hcp->indx = NUM_ENT(hcp->page);
00344         }
00345 
00346         /*
00347          * Either we've got the cursor set up to be decremented, or we
00348          * have to find the end of a bucket.
00349          */
00350         if (hcp->indx == NDX_INVALID) {
00351                 DB_ASSERT(hcp->page != NULL);
00352 
00353                 hcp->indx = NUM_ENT(hcp->page);
00354                 for (next_pgno = NEXT_PGNO(hcp->page);
00355                     next_pgno != PGNO_INVALID;
00356                     next_pgno = NEXT_PGNO(hcp->page)) {
00357                         if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0)
00358                                 return (ret);
00359                         hcp->indx = NUM_ENT(hcp->page);
00360                 }
00361 
00362                 if (hcp->indx == 0) {
00363                         /* Bucket was empty. */
00364                         F_SET(hcp, H_NOMORE);
00365                         return (DB_NOTFOUND);
00366                 }
00367         }
00368 
00369         hcp->indx -= 2;
00370 
00371         return (__ham_item(dbc, mode, pgnop));
00372 }
00373 
00374 /*
00375  * Sets the cursor to the next key/data pair on a page.
00376  *
00377  * PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t, db_pgno_t *));
00378  */
00379 int
00380 __ham_item_next(dbc, mode, pgnop)
00381         DBC *dbc;
00382         db_lockmode_t mode;
00383         db_pgno_t *pgnop;
00384 {
00385         HASH_CURSOR *hcp;
00386         int ret;
00387 
00388         hcp = (HASH_CURSOR *)dbc->internal;
00389 
00390         if ((ret = __ham_get_cpage(dbc, mode)) != 0)
00391                 return (ret);
00392 
00393         /*
00394          * Deleted on-page duplicates are a weird case. If we delete the last
00395          * one, then our cursor is at the very end of a duplicate set and
00396          * we actually need to go on to the next key.
00397          */
00398         if (F_ISSET(hcp, H_DELETED)) {
00399                 if (hcp->indx != NDX_INVALID &&
00400                     F_ISSET(hcp, H_ISDUP) &&
00401                     HPAGE_TYPE(dbc->dbp, hcp->page, H_DATAINDEX(hcp->indx))
00402                         == H_DUPLICATE && hcp->dup_tlen == hcp->dup_off) {
00403                         if (F_ISSET(hcp, H_DUPONLY)) {
00404                                 F_CLR(hcp, H_OK);
00405                                 F_SET(hcp, H_NOMORE);
00406                                 return (0);
00407                         } else {
00408                                 F_CLR(hcp, H_ISDUP);
00409                                 hcp->indx += 2;
00410                         }
00411                 } else if (!F_ISSET(hcp, H_ISDUP) && F_ISSET(hcp, H_DUPONLY)) {
00412                         F_CLR(hcp, H_OK);
00413                         F_SET(hcp, H_NOMORE);
00414                         return (0);
00415                 } else if (F_ISSET(hcp, H_ISDUP) &&
00416                     F_ISSET(hcp, H_NEXT_NODUP)) {
00417                         F_CLR(hcp, H_ISDUP);
00418                         hcp->indx += 2;
00419                 }
00420                 F_CLR(hcp, H_DELETED);
00421         } else if (hcp->indx == NDX_INVALID) {
00422                 hcp->indx = 0;
00423                 F_CLR(hcp, H_ISDUP);
00424         } else if (F_ISSET(hcp, H_NEXT_NODUP)) {
00425                 hcp->indx += 2;
00426                 F_CLR(hcp, H_ISDUP);
00427         } else if (F_ISSET(hcp, H_ISDUP) && hcp->dup_tlen != 0) {
00428                 if (hcp->dup_off + DUP_SIZE(hcp->dup_len) >=
00429                     hcp->dup_tlen && F_ISSET(hcp, H_DUPONLY)) {
00430                         F_CLR(hcp, H_OK);
00431                         F_SET(hcp, H_NOMORE);
00432                         return (0);
00433                 }
00434                 hcp->dup_off += DUP_SIZE(hcp->dup_len);
00435                 if (hcp->dup_off >= hcp->dup_tlen) {
00436                         F_CLR(hcp, H_ISDUP);
00437                         hcp->indx += 2;
00438                 }
00439         } else if (F_ISSET(hcp, H_DUPONLY)) {
00440                 F_CLR(hcp, H_OK);
00441                 F_SET(hcp, H_NOMORE);
00442                 return (0);
00443         } else {
00444                 hcp->indx += 2;
00445                 F_CLR(hcp, H_ISDUP);
00446         }
00447 
00448         return (__ham_item(dbc, mode, pgnop));
00449 }
00450 
00451 /*
00452  * PUBLIC: void __ham_putitem __P((DB *, PAGE *p, const DBT *, int));
00453  *
00454  * This is a little bit sleazy in that we're overloading the meaning
00455  * of the H_OFFPAGE type here.  When we recover deletes, we have the
00456  * entire entry instead of having only the DBT, so we'll pass type
00457  * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
00458  * an H_KEYDATA around it.
00459  */
00460 void
00461 __ham_putitem(dbp, p, dbt, type)
00462         DB *dbp;
00463         PAGE *p;
00464         const DBT *dbt;
00465         int type;
00466 {
00467         u_int16_t n, off;
00468         db_indx_t *inp;
00469 
00470         n = NUM_ENT(p);
00471         inp = P_INP(dbp, p);
00472 
00473         /* Put the item element on the page. */
00474         if (type == H_OFFPAGE) {
00475                 off = HOFFSET(p) - dbt->size;
00476                 HOFFSET(p) = inp[n] = off;
00477                 memcpy(P_ENTRY(dbp, p, n), dbt->data, dbt->size);
00478         } else {
00479                 off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
00480                 HOFFSET(p) = inp[n] = off;
00481                 PUT_HKEYDATA(P_ENTRY(dbp, p, n), dbt->data, dbt->size, type);
00482         }
00483 
00484         /* Adjust page info. */
00485         NUM_ENT(p) += 1;
00486 }
00487 
00488 /*
00489  * PUBLIC: void __ham_reputpair  __P((DB *, PAGE *,
00490  * PUBLIC:    u_int32_t, const DBT *, const DBT *));
00491  *
00492  * This is a special case to restore a key/data pair to its original
00493  * location during recovery.  We are guaranteed that the pair fits
00494  * on the page and is not the last pair on the page (because if it's
00495  * the last pair, the normal insert works).
00496  */
00497 void
00498 __ham_reputpair(dbp, p, ndx, key, data)
00499         DB *dbp;
00500         PAGE *p;
00501         u_int32_t ndx;
00502         const DBT *key, *data;
00503 {
00504         db_indx_t i, *inp, movebytes, newbytes;
00505         size_t psize;
00506         u_int8_t *from;
00507 
00508         psize = dbp->pgsize;
00509         inp = P_INP(dbp, p);
00510         /* First shuffle the existing items up on the page.  */
00511         movebytes = (db_indx_t)(
00512             (ndx == 0 ? psize : inp[H_DATAINDEX(ndx - 2)]) - HOFFSET(p));
00513         newbytes = key->size + data->size;
00514         from = (u_int8_t *)p + HOFFSET(p);
00515         memmove(from - newbytes, from, movebytes);
00516 
00517         /*
00518          * Adjust the indices and move them up 2 spaces. Note that we
00519          * have to check the exit condition inside the loop just in case
00520          * we are dealing with index 0 (db_indx_t's are unsigned).
00521          */
00522         for (i = NUM_ENT(p) - 1; ; i-- ) {
00523                 inp[i + 2] = inp[i] - newbytes;
00524                 if (i == H_KEYINDEX(ndx))
00525                         break;
00526         }
00527 
00528         /* Put the key and data on the page. */
00529         inp[H_KEYINDEX(ndx)] = (db_indx_t)(
00530             (ndx == 0 ? psize : inp[H_DATAINDEX(ndx - 2)]) - key->size);
00531         inp[H_DATAINDEX(ndx)] = inp[H_KEYINDEX(ndx)] - data->size;
00532         memcpy(P_ENTRY(dbp, p, H_KEYINDEX(ndx)), key->data, key->size);
00533         memcpy(P_ENTRY(dbp, p, H_DATAINDEX(ndx)), data->data, data->size);
00534 
00535         /* Adjust page info. */
00536         HOFFSET(p) -= newbytes;
00537         NUM_ENT(p) += 2;
00538 }
00539 
00540 /*
00541  * PUBLIC: int __ham_del_pair __P((DBC *, int));
00542  */
00543 int
00544 __ham_del_pair(dbc, reclaim_page)
00545         DBC *dbc;
00546         int reclaim_page;
00547 {
00548         DB *dbp;
00549         DBT data_dbt, key_dbt;
00550         DB_LSN new_lsn, *n_lsn, tmp_lsn;
00551         DB_MPOOLFILE *mpf;
00552         HASH_CURSOR *hcp;
00553         PAGE *n_pagep, *nn_pagep, *p, *p_pagep;
00554         db_ham_mode op;
00555         db_indx_t ndx;
00556         db_pgno_t chg_pgno, pgno, tmp_pgno;
00557         u_int32_t order;
00558         int ret, t_ret;
00559 
00560         dbp = dbc->dbp;
00561         mpf = dbp->mpf;
00562         hcp = (HASH_CURSOR *)dbc->internal;
00563         n_pagep = p_pagep = nn_pagep = NULL;
00564         ndx = hcp->indx;
00565 
00566         if (hcp->page == NULL && (ret = __memp_fget(
00567             mpf, &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
00568                 return (ret);
00569         p = hcp->page;
00570 
00571         /*
00572          * We optimize for the normal case which is when neither the key nor
00573          * the data are large.  In this case, we write a single log record
00574          * and do the delete.  If either is large, we'll call __big_delete
00575          * to remove the big item and then update the page to remove the
00576          * entry referring to the big item.
00577          */
00578         if (HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)) == H_OFFPAGE) {
00579                 memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_KEYINDEX(ndx))),
00580                     sizeof(db_pgno_t));
00581                 ret = __db_doff(dbc, pgno);
00582         } else
00583                 ret = 0;
00584 
00585         if (ret == 0)
00586                 switch (HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx))) {
00587                 case H_OFFPAGE:
00588                         memcpy(&pgno,
00589                             HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_DATAINDEX(ndx))),
00590                             sizeof(db_pgno_t));
00591                         ret = __db_doff(dbc, pgno);
00592                         break;
00593                 case H_OFFDUP:
00594                 case H_DUPLICATE:
00595                         /*
00596                          * If we delete a pair that is/was a duplicate, then
00597                          * we had better clear the flag so that we update the
00598                          * cursor appropriately.
00599                          */
00600                         F_CLR(hcp, H_ISDUP);
00601                         break;
00602                 default:
00603                         /* No-op */
00604                         break;
00605                 }
00606 
00607         if (ret)
00608                 return (ret);
00609 
00610         /* Now log the delete off this page. */
00611         if (DBC_LOGGING(dbc)) {
00612                 key_dbt.data = P_ENTRY(dbp, p, H_KEYINDEX(ndx));
00613                 key_dbt.size = LEN_HITEM(dbp, p, dbp->pgsize, H_KEYINDEX(ndx));
00614                 data_dbt.data = P_ENTRY(dbp, p, H_DATAINDEX(ndx));
00615                 data_dbt.size =
00616                     LEN_HITEM(dbp, p, dbp->pgsize, H_DATAINDEX(ndx));
00617 
00618                 if ((ret = __ham_insdel_log(dbp,
00619                     dbc->txn, &new_lsn, 0, DELPAIR, PGNO(p), (u_int32_t)ndx,
00620                     &LSN(p), &key_dbt, &data_dbt)) != 0)
00621                         return (ret);
00622         } else
00623                 LSN_NOT_LOGGED(new_lsn);
00624 
00625         /* Move lsn onto page. */
00626         LSN(p) = new_lsn;
00627 
00628         /* Do the delete. */
00629         __ham_dpair(dbp, p, ndx);
00630 
00631         /*
00632          * Mark item deleted so that we don't try to return it, and
00633          * so that we update the cursor correctly on the next call
00634          * to next.
00635          */
00636         F_SET(hcp, H_DELETED);
00637         F_CLR(hcp, H_OK);
00638 
00639         /*
00640          * Update cursors that are on the page where the delete happend.
00641          */
00642         if ((ret = __ham_c_update(dbc, 0, 0, 0)) != 0)
00643                 return (ret);
00644 
00645         /*
00646          * If we are locking, we will not maintain this, because it is
00647          * a hot spot.
00648          *
00649          * XXX
00650          * Perhaps we can retain incremental numbers and apply them later.
00651          */
00652         if (!STD_LOCKING(dbc)) {
00653                 --hcp->hdr->nelem;
00654                 if ((ret = __ham_dirty_meta(dbc)) != 0)
00655                         return (ret);
00656         }
00657 
00658         /*
00659          * If we need to reclaim the page, then check if the page is empty.
00660          * There are two cases.  If it's empty and it's not the first page
00661          * in the bucket (i.e., the bucket page) then we can simply remove
00662          * it. If it is the first chain in the bucket, then we need to copy
00663          * the second page into it and remove the second page.
00664          * If its the only page in the bucket we leave it alone.
00665          */
00666         if (!reclaim_page ||
00667             NUM_ENT(p) != 0 ||
00668             (PREV_PGNO(p) == PGNO_INVALID && NEXT_PGNO(p) == PGNO_INVALID))
00669                 return (__memp_fset(mpf, p, DB_MPOOL_DIRTY));
00670 
00671         if (PREV_PGNO(p) == PGNO_INVALID) {
00672                 /*
00673                  * First page in chain is empty and we know that there
00674                  * are more pages in the chain.
00675                  */
00676                 if ((ret = __memp_fget(mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0)
00677                         return (ret);
00678 
00679                 if (NEXT_PGNO(n_pagep) != PGNO_INVALID && (ret =
00680                     __memp_fget(mpf, &NEXT_PGNO(n_pagep), 0, &nn_pagep)) != 0)
00681                         goto err;
00682 
00683                 if (DBC_LOGGING(dbc)) {
00684                         key_dbt.data = n_pagep;
00685                         key_dbt.size = dbp->pgsize;
00686                         if ((ret = __ham_copypage_log(dbp,
00687                             dbc->txn, &new_lsn, 0, PGNO(p),
00688                             &LSN(p), PGNO(n_pagep), &LSN(n_pagep),
00689                             NEXT_PGNO(n_pagep),
00690                             nn_pagep == NULL ? NULL : &LSN(nn_pagep),
00691                             &key_dbt)) != 0)
00692                                 goto err;
00693                 } else
00694                         LSN_NOT_LOGGED(new_lsn);
00695 
00696                 /* Move lsn onto page. */
00697                 LSN(p) = new_lsn;       /* Structure assignment. */
00698                 LSN(n_pagep) = new_lsn;
00699                 if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
00700                         LSN(nn_pagep) = new_lsn;
00701 
00702                 if (nn_pagep != NULL) {
00703                         PREV_PGNO(nn_pagep) = PGNO(p);
00704                         if ((ret =
00705                             __memp_fput(mpf, nn_pagep, DB_MPOOL_DIRTY)) != 0) {
00706                                 nn_pagep = NULL;
00707                                 goto err;
00708                         }
00709                 }
00710 
00711                 tmp_pgno = PGNO(p);
00712                 tmp_lsn = LSN(p);
00713                 memcpy(p, n_pagep, dbp->pgsize);
00714                 PGNO(p) = tmp_pgno;
00715                 LSN(p) = tmp_lsn;
00716                 PREV_PGNO(p) = PGNO_INVALID;
00717 
00718                 /*
00719                  * Update cursors to reflect the fact that records
00720                  * on the second page have moved to the first page.
00721                  */
00722                 if ((ret = __ham_c_delpg(dbc, PGNO(n_pagep),
00723                     PGNO(p), 0, DB_HAM_DELFIRSTPG, &order)) != 0)
00724                         goto err;
00725 
00726                 /*
00727                  * Update the cursor to reflect its new position.
00728                  */
00729                 hcp->indx = 0;
00730                 hcp->pgno = PGNO(p);
00731                 hcp->order += order;
00732 
00733                 if ((ret = __memp_fset(mpf, p, DB_MPOOL_DIRTY)) != 0)
00734                         goto err;
00735                 if ((ret = __db_free(dbc, n_pagep)) != 0) {
00736                         n_pagep = NULL;
00737                         goto err;
00738                 }
00739         } else {
00740                 if ((ret = __memp_fget(mpf, &PREV_PGNO(p), 0, &p_pagep)) != 0)
00741                         goto err;
00742 
00743                 if (NEXT_PGNO(p) != PGNO_INVALID) {
00744                         if ((ret =
00745                             __memp_fget(mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0)
00746                                 goto err;
00747                         n_lsn = &LSN(n_pagep);
00748                 } else {
00749                         n_pagep = NULL;
00750                         n_lsn = NULL;
00751                 }
00752 
00753                 NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
00754                 if (n_pagep != NULL)
00755                         PREV_PGNO(n_pagep) = PGNO(p_pagep);
00756 
00757                 if (DBC_LOGGING(dbc)) {
00758                         if ((ret = __ham_newpage_log(dbp, dbc->txn,
00759                             &new_lsn, 0, DELOVFL, PREV_PGNO(p), &LSN(p_pagep),
00760                             PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
00761                                 goto err;
00762                 } else
00763                         LSN_NOT_LOGGED(new_lsn);
00764 
00765                 /* Move lsn onto page. */
00766                 LSN(p_pagep) = new_lsn; /* Structure assignment. */
00767                 if (n_pagep)
00768                         LSN(n_pagep) = new_lsn;
00769                 LSN(p) = new_lsn;
00770 
00771                 if (NEXT_PGNO(p) == PGNO_INVALID) {
00772                         /*
00773                          * There is no next page; put the cursor on the
00774                          * previous page as if we'd deleted the last item
00775                          * on that page, with index after the last valid
00776                          * entry.
00777                          *
00778                          * The deleted flag was set up above.
00779                          */
00780                         hcp->pgno = PGNO(p_pagep);
00781                         hcp->indx = NUM_ENT(p_pagep);
00782                         op = DB_HAM_DELLASTPG;
00783                 } else {
00784                         /*
00785                          * There is a next page, so put the cursor at
00786                          * the beginning of it.
00787                          */
00788                         hcp->pgno = NEXT_PGNO(p);
00789                         hcp->indx = 0;
00790                         op = DB_HAM_DELMIDPG;
00791                 }
00792 
00793                 /*
00794                  * Since we are about to delete the cursor page and we have
00795                  * just moved the cursor, we need to make sure that the
00796                  * old page pointer isn't left hanging around in the cursor.
00797                  */
00798                 hcp->page = NULL;
00799                 chg_pgno = PGNO(p);
00800                 ret = __db_free(dbc, p);
00801                 if ((t_ret =
00802                     __memp_fput(mpf, p_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
00803                         ret = t_ret;
00804                 if (n_pagep != NULL && (t_ret =
00805                     __memp_fput(mpf, n_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
00806                         ret = t_ret;
00807                 if (ret != 0)
00808                         return (ret);
00809                 if ((ret = __ham_c_delpg(dbc,
00810                     chg_pgno, hcp->pgno, hcp->indx, op, &order)) != 0)
00811                         return (ret);
00812                 hcp->order += order;
00813         }
00814         return (ret);
00815 
00816 err:    /* Clean up any pages. */
00817         if (n_pagep != NULL)
00818                 (void)__memp_fput(mpf, n_pagep, 0);
00819         if (nn_pagep != NULL)
00820                 (void)__memp_fput(mpf, nn_pagep, 0);
00821         if (p_pagep != NULL)
00822                 (void)__memp_fput(mpf, p_pagep, 0);
00823         return (ret);
00824 }
00825 
00826 /*
00827  * __ham_replpair --
00828  *      Given the key data indicated by the cursor, replace part/all of it
00829  *      according to the fields in the dbt.
00830  *
00831  * PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t));
00832  */
00833 int
00834 __ham_replpair(dbc, dbt, make_dup)
00835         DBC *dbc;
00836         DBT *dbt;
00837         u_int32_t make_dup;
00838 {
00839         DB *dbp;
00840         DBT old_dbt, tdata, tmp;
00841         DB_ENV *dbenv;
00842         DB_LSN  new_lsn;
00843         HASH_CURSOR *hcp;
00844         u_int32_t change;
00845         u_int32_t dup_flag, len, memsize, newlen;
00846         int beyond_eor, is_big, is_plus, ret, type;
00847         u_int8_t *beg, *dest, *end, *hk, *src;
00848         void *memp;
00849 
00850         /*
00851          * Items that were already offpage (ISBIG) were handled before
00852          * we get in here.  So, we need only handle cases where the old
00853          * key is on a regular page.  That leaves us 6 cases:
00854          * 1. Original data onpage; new data is smaller
00855          * 2. Original data onpage; new data is the same size
00856          * 3. Original data onpage; new data is bigger, but not ISBIG,
00857          *    fits on page
00858          * 4. Original data onpage; new data is bigger, but not ISBIG,
00859          *    does not fit on page
00860          * 5. Original data onpage; New data is an off-page item.
00861          * 6. Original data was offpage; new item is smaller.
00862          *
00863          * Cases 1-3 are essentially the same (and should be the common case).
00864          * We handle 4-6 as delete and add.
00865          */
00866         dbp = dbc->dbp;
00867         dbenv = dbp->dbenv;
00868         hcp = (HASH_CURSOR *)dbc->internal;
00869 
00870         /*
00871          * We need to compute the number of bytes that we are adding or
00872          * removing from the entry.  Normally, we can simply substract
00873          * the number of bytes we are replacing (dbt->dlen) from the
00874          * number of bytes we are inserting (dbt->size).  However, if
00875          * we are doing a partial put off the end of a record, then this
00876          * formula doesn't work, because we are essentially adding
00877          * new bytes.
00878          */
00879         if (dbt->size > dbt->dlen) {
00880                 change = dbt->size - dbt->dlen;
00881                 is_plus = 1;
00882         } else {
00883                 change = dbt->dlen - dbt->size;
00884                 is_plus = 0;
00885         }
00886 
00887         hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
00888         is_big = HPAGE_PTYPE(hk) == H_OFFPAGE;
00889 
00890         if (is_big)
00891                 memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
00892         else
00893                 len = LEN_HKEYDATA(dbp, hcp->page,
00894                     dbp->pgsize, H_DATAINDEX(hcp->indx));
00895 
00896         beyond_eor = dbt->doff + dbt->dlen > len;
00897         if (beyond_eor) {
00898                 /*
00899                  * The change is beyond the end of record.  If change
00900                  * is a positive number, we can simply add the extension
00901                  * to it.  However, if change is negative, then we need
00902                  * to figure out if the extension is larger than the
00903                  * negative change.
00904                  */
00905                 if (is_plus)
00906                         change += dbt->doff + dbt->dlen - len;
00907                 else if (dbt->doff + dbt->dlen - len > change) {
00908                         /* Extension bigger than change */
00909                         is_plus = 1;
00910                         change = (dbt->doff + dbt->dlen - len) - change;
00911                 } else /* Extension is smaller than change. */
00912                         change -= (dbt->doff + dbt->dlen - len);
00913         }
00914 
00915         newlen = (is_plus ? len + change : len - change);
00916         if (ISBIG(hcp, newlen) ||
00917             (is_plus && change > P_FREESPACE(dbp, hcp->page)) ||
00918             beyond_eor || is_big) {
00919                 /* 
00920                  * If we are in cases 4 or 5 then is_plus will be true.
00921                  * If we don't have a transaction then we cannot roll back,
00922                  * make sure there is enough room for the new page.
00923                  */
00924                 if (is_plus && dbc->txn == NULL &&
00925                     dbp->mpf->mfp->maxpgno != 0 &&
00926                     dbp->mpf->mfp->maxpgno == dbp->mpf->mfp->last_pgno)
00927                         return (__db_space_err(dbp));
00928                 /*
00929                  * Cases 4-6 -- two subcases.
00930                  * A. This is not really a partial operation, but an overwrite.
00931                  *    Simple del and add works.
00932                  * B. This is a partial and we need to construct the data that
00933                  *    we are really inserting (yuck).
00934                  * In both cases, we need to grab the key off the page (in
00935                  * some cases we could do this outside of this routine; for
00936                  * cleanliness we do it here.  If you happen to be on a big
00937                  * key, this could be a performance hit).
00938                  */
00939                 memset(&tmp, 0, sizeof(tmp));
00940                 if ((ret =
00941                     __db_ret(dbp, hcp->page, H_KEYINDEX(hcp->indx),
00942                     &tmp, &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0)
00943                         return (ret);
00944 
00945                 /* Preserve duplicate info. */
00946                 dup_flag = F_ISSET(hcp, H_ISDUP);
00947                 if (dbt->doff == 0 && dbt->dlen == len) {
00948                         ret = __ham_del_pair(dbc, 0);
00949                         if (ret == 0)
00950                             ret = __ham_add_el(dbc,
00951                                 &tmp, dbt, dup_flag ? H_DUPLICATE : H_KEYDATA);
00952                 } else {                                        /* Case B */
00953                         type = HPAGE_PTYPE(hk) != H_OFFPAGE ?
00954                             HPAGE_PTYPE(hk) : H_KEYDATA;
00955                         memset(&tdata, 0, sizeof(tdata));
00956                         memp = NULL;
00957                         memsize = 0;
00958                         if ((ret = __db_ret(dbp, hcp->page,
00959                             H_DATAINDEX(hcp->indx), &tdata, &memp, &memsize))
00960                             != 0)
00961                                 goto err;
00962 
00963                         /* Now we can delete the item. */
00964                         if ((ret = __ham_del_pair(dbc, 0)) != 0) {
00965                                 __os_free(dbenv, memp);
00966                                 goto err;
00967                         }
00968 
00969                         /* Now shift old data around to make room for new. */
00970                         if (is_plus) {
00971                                 if ((ret = __os_realloc(dbenv,
00972                                     tdata.size + change, &tdata.data)) != 0)
00973                                         return (ret);
00974                                 memp = tdata.data;
00975                                 memsize = tdata.size + change;
00976                                 memset((u_int8_t *)tdata.data + tdata.size,
00977                                     0, change);
00978                         }
00979                         end = (u_int8_t *)tdata.data + tdata.size;
00980 
00981                         src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
00982                         if (src < end && tdata.size > dbt->doff + dbt->dlen) {
00983                                 len = tdata.size - (dbt->doff + dbt->dlen);
00984                                 if (is_plus)
00985                                         dest = src + change;
00986                                 else
00987                                         dest = src - change;
00988                                 memmove(dest, src, len);
00989                         }
00990                         memcpy((u_int8_t *)tdata.data + dbt->doff,
00991                             dbt->data, dbt->size);
00992                         if (is_plus)
00993                                 tdata.size += change;
00994                         else
00995                                 tdata.size -= change;
00996 
00997                         /* Now add the pair. */
00998                         ret = __ham_add_el(dbc, &tmp, &tdata, type);
00999                         __os_free(dbenv, memp);
01000                 }
01001                 F_SET(hcp, dup_flag);
01002 err:            return (ret);
01003         }
01004 
01005         /*
01006          * Set up pointer into existing data. Do it before the log
01007          * message so we can use it inside of the log setup.
01008          */
01009         beg = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx));
01010         beg += dbt->doff;
01011 
01012         /*
01013          * If we are going to have to move bytes at all, figure out
01014          * all the parameters here.  Then log the call before moving
01015          * anything around.
01016          */
01017         if (DBC_LOGGING(dbc)) {
01018                 old_dbt.data = beg;
01019                 old_dbt.size = dbt->dlen;
01020                 if ((ret = __ham_replace_log(dbp,
01021                     dbc->txn, &new_lsn, 0, PGNO(hcp->page),
01022                     (u_int32_t)H_DATAINDEX(hcp->indx), &LSN(hcp->page),
01023                     (int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
01024                         return (ret);
01025 
01026         } else
01027                 LSN_NOT_LOGGED(new_lsn);
01028 
01029         LSN(hcp->page) = new_lsn;       /* Structure assignment. */
01030 
01031         __ham_onpage_replace(dbp, hcp->page, (u_int32_t)H_DATAINDEX(hcp->indx),
01032             (int32_t)dbt->doff, change, is_plus, dbt);
01033 
01034         return (0);
01035 }
01036 
01037 /*
01038  * Replace data on a page with new data, possibly growing or shrinking what's
01039  * there.  This is called on two different occasions. On one (from replpair)
01040  * we are interested in changing only the data.  On the other (from recovery)
01041  * we are replacing the entire data (header and all) with a new element.  In
01042  * the latter case, the off argument is negative.
01043  * pagep: the page that we're changing
01044  * ndx: page index of the element that is growing/shrinking.
01045  * off: Offset at which we are beginning the replacement.
01046  * change: the number of bytes (+ or -) that the element is growing/shrinking.
01047  * dbt: the new data that gets written at beg.
01048  *
01049  * PUBLIC: void __ham_onpage_replace __P((DB *, PAGE *, u_int32_t,
01050  * PUBLIC:     int32_t, u_int32_t,  int, DBT *));
01051  */
01052 void
01053 __ham_onpage_replace(dbp, pagep, ndx, off, change, is_plus, dbt)
01054         DB *dbp;
01055         PAGE *pagep;
01056         u_int32_t ndx;
01057         int32_t off;
01058         u_int32_t change;
01059         int is_plus;
01060         DBT *dbt;
01061 {
01062         db_indx_t i, *inp;
01063         int32_t len;
01064         size_t pgsize;
01065         u_int8_t *src, *dest;
01066         int zero_me;
01067 
01068         pgsize = dbp->pgsize;
01069         inp = P_INP(dbp, pagep);
01070         if (change != 0) {
01071                 zero_me = 0;
01072                 src = (u_int8_t *)(pagep) + HOFFSET(pagep);
01073                 if (off < 0)
01074                         len = inp[ndx] - HOFFSET(pagep);
01075                 else if ((u_int32_t)off >=
01076                     LEN_HKEYDATA(dbp, pagep, pgsize, ndx)) {
01077                         len = (int32_t)(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx))
01078                             + LEN_HKEYDATA(dbp, pagep, pgsize, ndx) - src);
01079                         zero_me = 1;
01080                 } else
01081                         len = (int32_t)(
01082                             (HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off) -
01083                             src);
01084                 if (is_plus)
01085                         dest = src - change;
01086                 else
01087                         dest = src + change;
01088                 memmove(dest, src, (size_t)len);
01089                 if (zero_me)
01090                         memset(dest + len, 0, change);
01091 
01092                 /* Now update the indices. */
01093                 for (i = ndx; i < NUM_ENT(pagep); i++) {
01094                         if (is_plus)
01095                                 inp[i] -= change;
01096                         else
01097                                 inp[i] += change;
01098                 }
01099                 if (is_plus)
01100                         HOFFSET(pagep) -= change;
01101                 else
01102                         HOFFSET(pagep) += change;
01103         }
01104         if (off >= 0)
01105                 memcpy(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off,
01106                     dbt->data, dbt->size);
01107         else
01108                 memcpy(P_ENTRY(dbp, pagep, ndx), dbt->data, dbt->size);
01109 }
01110 
01111 /*
01112  * PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t));
01113  */
01114 int
01115 __ham_split_page(dbc, obucket, nbucket)
01116         DBC *dbc;
01117         u_int32_t obucket, nbucket;
01118 {
01119         DB *dbp;
01120         DBC **carray;
01121         DBT key, page_dbt;
01122         DB_ENV *dbenv;
01123         DB_LOCK block;
01124         DB_LSN new_lsn;
01125         DB_MPOOLFILE *mpf;
01126         HASH_CURSOR *hcp, *cp;
01127         PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
01128         db_indx_t n;
01129         db_pgno_t bucket_pgno, npgno, next_pgno;
01130         u_int32_t big_len, len;
01131         int found, i, ret, t_ret;
01132         void *big_buf;
01133 
01134         dbp = dbc->dbp;
01135         carray = NULL;
01136         dbenv = dbp->dbenv;
01137         mpf = dbp->mpf;
01138         hcp = (HASH_CURSOR *)dbc->internal;
01139         temp_pagep = old_pagep = new_pagep = NULL;
01140         npgno = PGNO_INVALID;
01141         LOCK_INIT(block);
01142 
01143         bucket_pgno = BUCKET_TO_PAGE(hcp, obucket);
01144         if ((ret = __db_lget(dbc,
01145             0, bucket_pgno, DB_LOCK_WRITE, 0, &block)) != 0)
01146                 goto err;
01147         if ((ret = __memp_fget(mpf,
01148             &bucket_pgno, DB_MPOOL_CREATE, &old_pagep)) != 0)
01149                 goto err;
01150 
01151         /* Properly initialize the new bucket page. */
01152         npgno = BUCKET_TO_PAGE(hcp, nbucket);
01153         if ((ret = __memp_fget(mpf, &npgno, DB_MPOOL_CREATE, &new_pagep)) != 0)
01154                 goto err;
01155         P_INIT(new_pagep,
01156             dbp->pgsize, npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
01157 
01158         temp_pagep = hcp->split_buf;
01159         memcpy(temp_pagep, old_pagep, dbp->pgsize);
01160 
01161         if (DBC_LOGGING(dbc)) {
01162                 page_dbt.size = dbp->pgsize;
01163                 page_dbt.data = old_pagep;
01164                 if ((ret = __ham_splitdata_log(dbp,
01165                     dbc->txn, &new_lsn, 0, SPLITOLD,
01166                     PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0)
01167                         goto err;
01168         } else
01169                 LSN_NOT_LOGGED(new_lsn);
01170 
01171         LSN(old_pagep) = new_lsn;       /* Structure assignment. */
01172 
01173         P_INIT(old_pagep, dbp->pgsize, PGNO(old_pagep), PGNO_INVALID,
01174             PGNO_INVALID, 0, P_HASH);
01175 
01176         big_len = 0;
01177         big_buf = NULL;
01178         key.flags = 0;
01179         while (temp_pagep != NULL) {
01180                 if ((ret = __ham_get_clist(dbp,
01181                     PGNO(temp_pagep), NDX_INVALID, &carray)) != 0)
01182                         goto err;
01183 
01184                 for (n = 0; n < (db_indx_t)NUM_ENT(temp_pagep); n += 2) {
01185                         if ((ret = __db_ret(dbp, temp_pagep,
01186                             H_KEYINDEX(n), &key, &big_buf, &big_len)) != 0)
01187                                 goto err;
01188 
01189                         if (__ham_call_hash(dbc, key.data, key.size) == obucket)
01190                                 pp = &old_pagep;
01191                         else
01192                                 pp = &new_pagep;
01193 
01194                         /*
01195                          * Figure out how many bytes we need on the new
01196                          * page to store the key/data pair.
01197                          */
01198                         len = LEN_HITEM(dbp, temp_pagep, dbp->pgsize,
01199                             H_DATAINDEX(n)) +
01200                             LEN_HITEM(dbp, temp_pagep, dbp->pgsize,
01201                             H_KEYINDEX(n)) +
01202                             2 * sizeof(db_indx_t);
01203 
01204                         if (P_FREESPACE(dbp, *pp) < len) {
01205                                 if (DBC_LOGGING(dbc)) {
01206                                         page_dbt.size = dbp->pgsize;
01207                                         page_dbt.data = *pp;
01208                                         if ((ret = __ham_splitdata_log(dbp,
01209                                             dbc->txn, &new_lsn, 0,
01210                                             SPLITNEW, PGNO(*pp), &page_dbt,
01211                                             &LSN(*pp))) != 0)
01212                                                 goto err;
01213                                 } else
01214                                         LSN_NOT_LOGGED(new_lsn);
01215                                 LSN(*pp) = new_lsn;
01216                                 if ((ret =
01217                                     __ham_add_ovflpage(dbc, *pp, 1, pp)) != 0)
01218                                         goto err;
01219                         }
01220 
01221                         /* Check if we need to update a cursor. */
01222                         if (carray != NULL) {
01223                                 found = 0;
01224                                 for (i = 0; carray[i] != NULL; i++) {
01225                                         cp =
01226                                             (HASH_CURSOR *)carray[i]->internal;
01227                                         if (cp->pgno == PGNO(temp_pagep) &&
01228                                             cp->indx == n) {
01229                                                 cp->pgno = PGNO(*pp);
01230                                                 cp->indx = NUM_ENT(*pp);
01231                                                 found = 1;
01232                                         }
01233                                 }
01234                                 if (found && DBC_LOGGING(dbc) &&
01235                                     IS_SUBTRANSACTION(dbc->txn)) {
01236                                         if ((ret =
01237                                             __ham_chgpg_log(dbp,
01238                                             dbc->txn, &new_lsn, 0,
01239                                             DB_HAM_SPLIT, PGNO(temp_pagep),
01240                                             PGNO(*pp), n, NUM_ENT(*pp))) != 0)
01241                                                 goto err;
01242                                 }
01243                         }
01244                         __ham_copy_item(dbp, temp_pagep, H_KEYINDEX(n), *pp);
01245                         __ham_copy_item(dbp, temp_pagep, H_DATAINDEX(n), *pp);
01246                 }
01247                 next_pgno = NEXT_PGNO(temp_pagep);
01248 
01249                 /* Clear temp_page; if it's a link overflow page, free it. */
01250                 if (PGNO(temp_pagep) != bucket_pgno && (ret =
01251                     __db_free(dbc, temp_pagep)) != 0) {
01252                         temp_pagep = NULL;
01253                         goto err;
01254                 }
01255 
01256                 if (next_pgno == PGNO_INVALID)
01257                         temp_pagep = NULL;
01258                 else if ((ret = __memp_fget(
01259                     mpf, &next_pgno, DB_MPOOL_CREATE, &temp_pagep)) != 0)
01260                         goto err;
01261 
01262                 if (temp_pagep != NULL) {
01263                         if (DBC_LOGGING(dbc)) {
01264                                 page_dbt.size = dbp->pgsize;
01265                                 page_dbt.data = temp_pagep;
01266                                 if ((ret = __ham_splitdata_log(dbp,
01267                                     dbc->txn, &new_lsn, 0,
01268                                     SPLITOLD, PGNO(temp_pagep),
01269                                     &page_dbt, &LSN(temp_pagep))) != 0)
01270                                         goto err;
01271                         } else
01272                                 LSN_NOT_LOGGED(new_lsn);
01273                         LSN(temp_pagep) = new_lsn;
01274                 }
01275 
01276                 if (carray != NULL)     /* We never knew its size. */
01277                         __os_free(dbenv, carray);
01278                 carray = NULL;
01279         }
01280         if (big_buf != NULL)
01281                 __os_free(dbenv, big_buf);
01282 
01283         /*
01284          * If the original bucket spanned multiple pages, then we've got
01285          * a pointer to a page that used to be on the bucket chain.  It
01286          * should be deleted.
01287          */
01288         if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
01289             (ret = __db_free(dbc, temp_pagep)) != 0) {
01290                 temp_pagep = NULL;
01291                 goto err;
01292         }
01293 
01294         /*
01295          * Write new buckets out.
01296          */
01297         if (DBC_LOGGING(dbc)) {
01298                 page_dbt.size = dbp->pgsize;
01299                 page_dbt.data = old_pagep;
01300                 if ((ret = __ham_splitdata_log(dbp, dbc->txn,
01301                     &new_lsn, 0, SPLITNEW, PGNO(old_pagep), &page_dbt,
01302                     &LSN(old_pagep))) != 0)
01303                         goto err;
01304                 LSN(old_pagep) = new_lsn;
01305 
01306                 page_dbt.data = new_pagep;
01307                 if ((ret = __ham_splitdata_log(dbp, dbc->txn, &new_lsn, 0,
01308                     SPLITNEW, PGNO(new_pagep), &page_dbt,
01309                     &LSN(new_pagep))) != 0)
01310                         goto err;
01311                 LSN(new_pagep) = new_lsn;
01312         } else {
01313                 LSN_NOT_LOGGED(LSN(old_pagep));
01314                 LSN_NOT_LOGGED(LSN(new_pagep));
01315         }
01316 
01317         ret = __memp_fput(mpf, old_pagep, DB_MPOOL_DIRTY);
01318         if ((t_ret =
01319             __memp_fput(mpf, new_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
01320                 ret = t_ret;
01321 
01322         if (0) {
01323 err:            if (old_pagep != NULL)
01324                         (void)__memp_fput(mpf, old_pagep, DB_MPOOL_DIRTY);
01325                 if (new_pagep != NULL) {
01326                         P_INIT(new_pagep, dbp->pgsize,
01327                              npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
01328                         (void)__memp_fput(mpf, new_pagep, DB_MPOOL_DIRTY);
01329                 }
01330                 if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
01331                         (void)__memp_fput(mpf, temp_pagep, DB_MPOOL_DIRTY);
01332         }
01333         if ((t_ret = __TLPUT(dbc, block)) != 0 && ret == 0)
01334                 ret = t_ret;
01335         if (carray != NULL)             /* We never knew its size. */
01336                 __os_free(dbenv, carray);
01337         return (ret);
01338 }
01339 
01340 /*
01341  * Add the given pair to the page.  The page in question may already be
01342  * held (i.e. it was already gotten).  If it is, then the page is passed
01343  * in via the pagep parameter.  On return, pagep will contain the page
01344  * to which we just added something.  This allows us to link overflow
01345  * pages and return the new page having correctly put the last page.
01346  *
01347  * PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, int));
01348  */
01349 int
01350 __ham_add_el(dbc, key, val, type)
01351         DBC *dbc;
01352         const DBT *key, *val;
01353         int type;
01354 {
01355         const DBT *pkey, *pdata;
01356         DB *dbp;
01357         DBT key_dbt, data_dbt;
01358         DB_LSN new_lsn;
01359         DB_MPOOLFILE *mpf;
01360         HASH_CURSOR *hcp;
01361         HOFFPAGE doff, koff;
01362         db_pgno_t next_pgno, pgno;
01363         u_int32_t data_size, key_size;
01364         u_int32_t pages, pagespace, pairsize, rectype;
01365         int do_expand, is_keybig, is_databig, ret;
01366         int key_type, data_type;
01367 
01368         dbp = dbc->dbp;
01369         mpf = dbp->mpf;
01370         hcp = (HASH_CURSOR *)dbc->internal;
01371         do_expand = 0;
01372 
01373         pgno = hcp->seek_found_page != PGNO_INVALID ?
01374             hcp->seek_found_page : hcp->pgno;
01375         if (hcp->page == NULL &&
01376             (ret = __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
01377                 return (ret);
01378 
01379         key_size = HKEYDATA_PSIZE(key->size);
01380         data_size = HKEYDATA_PSIZE(val->size);
01381         is_keybig = ISBIG(hcp, key->size);
01382         is_databig = ISBIG(hcp, val->size);
01383         if (is_keybig)
01384                 key_size = HOFFPAGE_PSIZE;
01385         if (is_databig)
01386                 data_size = HOFFPAGE_PSIZE;
01387 
01388         pairsize = key_size + data_size;
01389 
01390         /* Advance to first page in chain with room for item. */
01391         while (H_NUMPAIRS(hcp->page) && NEXT_PGNO(hcp->page) != PGNO_INVALID) {
01392                 /*
01393                  * This may not be the end of the chain, but the pair may fit
01394                  * anyway.  Check if it's a bigpair that fits or a regular
01395                  * pair that fits.
01396                  */
01397                 if (P_FREESPACE(dbp, hcp->page) >= pairsize)
01398                         break;
01399                 next_pgno = NEXT_PGNO(hcp->page);
01400                 if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0)
01401                         return (ret);
01402         }
01403 
01404         /*
01405          * Check if we need to allocate a new page.
01406          */
01407         if (P_FREESPACE(dbp, hcp->page) < pairsize) {
01408                 do_expand = 1;
01409                 if ((ret = __ham_add_ovflpage(dbc,
01410                     (PAGE *)hcp->page, 1, (PAGE **)&hcp->page)) != 0)
01411                         return (ret);
01412                 hcp->pgno = PGNO(hcp->page);
01413         }
01414 
01415         /*
01416          * If we don't have a transaction then make sure we will not
01417          * run out of file space before updating the key or data.
01418          */
01419         if (dbc->txn == NULL &&
01420             dbp->mpf->mfp->maxpgno != 0 && (is_keybig || is_databig)) {
01421                 pagespace = P_MAXSPACE(dbp, dbp->pgsize);
01422                 pages = 0;
01423                 if (is_databig) 
01424                         pages = ((data_size - 1) / pagespace) + 1;
01425                 if (is_keybig) {
01426                         pages += ((key->size - 1) / pagespace) + 1;
01427                         if (pages >
01428                             (dbp->mpf->mfp->maxpgno - dbp->mpf->mfp->last_pgno))
01429                                 return (__db_space_err(dbp));
01430                 }
01431         }
01432 
01433         /*
01434          * Update cursor.
01435          */
01436         hcp->indx = NUM_ENT(hcp->page);
01437         F_CLR(hcp, H_DELETED);
01438         if (is_keybig) {
01439                 koff.type = H_OFFPAGE;
01440                 UMRW_SET(koff.unused[0]);
01441                 UMRW_SET(koff.unused[1]);
01442                 UMRW_SET(koff.unused[2]);
01443                 if ((ret = __db_poff(dbc, key, &koff.pgno)) != 0)
01444                         return (ret);
01445                 koff.tlen = key->size;
01446                 key_dbt.data = &koff;
01447                 key_dbt.size = sizeof(koff);
01448                 pkey = &key_dbt;
01449                 key_type = H_OFFPAGE;
01450         } else {
01451                 pkey = key;
01452                 key_type = H_KEYDATA;
01453         }
01454 
01455         if (is_databig) {
01456                 doff.type = H_OFFPAGE;
01457                 UMRW_SET(doff.unused[0]);
01458                 UMRW_SET(doff.unused[1]);
01459                 UMRW_SET(doff.unused[2]);
01460                 if ((ret = __db_poff(dbc, val, &doff.pgno)) != 0)
01461                         return (ret);
01462                 doff.tlen = val->size;
01463                 data_dbt.data = &doff;
01464                 data_dbt.size = sizeof(doff);
01465                 pdata = &data_dbt;
01466                 data_type = H_OFFPAGE;
01467         } else {
01468                 pdata = val;
01469                 data_type = type;
01470         }
01471 
01472         if (DBC_LOGGING(dbc)) {
01473                 rectype = PUTPAIR;
01474                 if (is_databig)
01475                         rectype |= PAIR_DATAMASK;
01476                 if (is_keybig)
01477                         rectype |= PAIR_KEYMASK;
01478                 if (type == H_DUPLICATE)
01479                         rectype |= PAIR_DUPMASK;
01480 
01481                 if ((ret = __ham_insdel_log(dbp, dbc->txn, &new_lsn, 0,
01482                     rectype, PGNO(hcp->page), (u_int32_t)NUM_ENT(hcp->page),
01483                     &LSN(hcp->page), pkey, pdata)) != 0)
01484                         return (ret);
01485         } else
01486                 LSN_NOT_LOGGED(new_lsn);
01487 
01488         /* Move lsn onto page. */
01489         LSN(hcp->page) = new_lsn;       /* Structure assignment. */
01490 
01491         __ham_putitem(dbp, hcp->page, pkey, key_type);
01492         __ham_putitem(dbp, hcp->page, pdata, data_type);
01493 
01494         /*
01495          * For splits, we are going to update item_info's page number
01496          * field, so that we can easily return to the same page the
01497          * next time we come in here.  For other operations, this shouldn't
01498          * matter, since odds are this is the last thing that happens before
01499          * we return to the user program.
01500          */
01501         hcp->pgno = PGNO(hcp->page);
01502 
01503         /*
01504          * XXX
01505          * Maybe keep incremental numbers here.
01506          */
01507         if (!STD_LOCKING(dbc)) {
01508                 hcp->hdr->nelem++;
01509                 if ((ret = __ham_dirty_meta(dbc)) != 0)
01510                         return (ret);
01511         }
01512 
01513         if (do_expand || (hcp->hdr->ffactor != 0 &&
01514             (u_int32_t)H_NUMPAIRS(hcp->page) > hcp->hdr->ffactor))
01515                 F_SET(hcp, H_EXPAND);
01516         return (0);
01517 }
01518 
01519 /*
01520  * Special __putitem call used in splitting -- copies one entry to
01521  * another.  Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
01522  * H_DUPLICATE, H_OFFDUP).  Since we log splits at a high level, we
01523  * do not need to do any logging here.
01524  *
01525  * PUBLIC: void __ham_copy_item __P((DB *, PAGE *, u_int32_t, PAGE *));
01526  */
01527 void
01528 __ham_copy_item(dbp, src_page, src_ndx, dest_page)
01529         DB *dbp;
01530         PAGE *src_page;
01531         u_int32_t src_ndx;
01532         PAGE *dest_page;
01533 {
01534         u_int32_t len;
01535         size_t pgsize;
01536         void *src, *dest;
01537         db_indx_t *inp;
01538 
01539         pgsize = dbp->pgsize;
01540         inp = P_INP(dbp, dest_page);
01541         /*
01542          * Copy the key and data entries onto this new page.
01543          */
01544         src = P_ENTRY(dbp, src_page, src_ndx);
01545 
01546         /* Set up space on dest. */
01547         len = (u_int32_t)LEN_HITEM(dbp, src_page, pgsize, src_ndx);
01548         HOFFSET(dest_page) -= len;
01549         inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
01550         dest = P_ENTRY(dbp, dest_page, NUM_ENT(dest_page));
01551         NUM_ENT(dest_page)++;
01552 
01553         memcpy(dest, src, len);
01554 }
01555 
01556 /*
01557  *
01558  * Returns:
01559  *      0 on success -- pp points to new page.
01560  *      errno on error -- pp not valid.
01561  *
01562  * PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **));
01563  */
01564 int
01565 __ham_add_ovflpage(dbc, pagep, release, pp)
01566         DBC *dbc;
01567         PAGE *pagep;
01568         int release;
01569         PAGE **pp;
01570 {
01571         DB *dbp;
01572         DB_LSN new_lsn;
01573         DB_MPOOLFILE *mpf;
01574         PAGE *new_pagep;
01575         int ret;
01576 
01577         dbp = dbc->dbp;
01578         mpf = dbp->mpf;
01579 
01580         if ((ret = __db_new(dbc, P_HASH, &new_pagep)) != 0)
01581                 return (ret);
01582 
01583         if (DBC_LOGGING(dbc)) {
01584                 if ((ret = __ham_newpage_log(dbp, dbc->txn, &new_lsn, 0,
01585                     PUTOVFL, PGNO(pagep), &LSN(pagep), PGNO(new_pagep),
01586                     &LSN(new_pagep), PGNO_INVALID, NULL)) != 0) {
01587                         (void)__memp_fput(mpf, pagep, DB_MPOOL_DIRTY);
01588                         return (ret);
01589                 }
01590         } else
01591                 LSN_NOT_LOGGED(new_lsn);
01592 
01593         /* Move lsn onto page. */
01594         LSN(pagep) = LSN(new_pagep) = new_lsn;
01595         NEXT_PGNO(pagep) = PGNO(new_pagep);
01596 
01597         PREV_PGNO(new_pagep) = PGNO(pagep);
01598 
01599         if (release)
01600                 ret = __memp_fput(mpf, pagep, DB_MPOOL_DIRTY);
01601 
01602         *pp = new_pagep;
01603         return (ret);
01604 }
01605 
01606 /*
01607  * PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t));
01608  */
01609 int
01610 __ham_get_cpage(dbc, mode)
01611         DBC *dbc;
01612         db_lockmode_t mode;
01613 {
01614         DB *dbp;
01615         DB_LOCK tmp_lock;
01616         DB_MPOOLFILE *mpf;
01617         HASH_CURSOR *hcp;
01618         int ret;
01619 
01620         dbp = dbc->dbp;
01621         mpf = dbp->mpf;
01622         hcp = (HASH_CURSOR *)dbc->internal;
01623         ret = 0;
01624 
01625         /*
01626          * There are four cases with respect to buckets and locks.
01627          * 1. If there is no lock held, then if we are locking, we should
01628          *    get the lock.
01629          * 2. If there is a lock held, it's for the current bucket, and it's
01630          *    for the right mode, we don't need to do anything.
01631          * 3. If there is a lock held for the current bucket but it's not
01632          *    strong enough, we need to upgrade.
01633          * 4. If there is a lock, but it's for a different bucket, then we need
01634          *    to release the existing lock and get a new lock.
01635          */
01636         LOCK_INIT(tmp_lock);
01637         if (STD_LOCKING(dbc)) {
01638                 if (hcp->lbucket != hcp->bucket) {      /* Case 4 */
01639                         if ((ret = __TLPUT(dbc, hcp->lock)) != 0)
01640                                 return (ret);
01641                         LOCK_INIT(hcp->lock);
01642                 }
01643 
01644                 /*
01645                  * See if we have the right lock.  If we are doing
01646                  * dirty reads we assume the write lock has been downgraded.
01647                  */
01648                 if ((LOCK_ISSET(hcp->lock) &&
01649                     ((hcp->lock_mode == DB_LOCK_READ ||
01650                     F_ISSET(dbp, DB_AM_READ_UNCOMMITTED)) &&
01651                     mode == DB_LOCK_WRITE))) {
01652                         /* Case 3. */
01653                         tmp_lock = hcp->lock;
01654                         LOCK_INIT(hcp->lock);
01655                 }
01656 
01657                 /* Acquire the lock. */
01658                 if (!LOCK_ISSET(hcp->lock))
01659                         /* Cases 1, 3, and 4. */
01660                         if ((ret = __ham_lock_bucket(dbc, mode)) != 0)
01661                                 return (ret);
01662 
01663                 if (ret == 0) {
01664                         hcp->lock_mode = mode;
01665                         hcp->lbucket = hcp->bucket;
01666                         /* Case 3: release the original lock. */
01667                         if ((ret = __ENV_LPUT(dbp->dbenv, tmp_lock)) != 0)
01668                                 return (ret);
01669                 } else if (LOCK_ISSET(tmp_lock))
01670                         hcp->lock = tmp_lock;
01671         }
01672 
01673         if (ret == 0 && hcp->page == NULL) {
01674                 if (hcp->pgno == PGNO_INVALID)
01675                         hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
01676                 if ((ret = __memp_fget(mpf,
01677                     &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
01678                         return (ret);
01679         }
01680 
01681         return (0);
01682 }
01683 
01684 /*
01685  * Get a new page at the cursor, putting the last page if necessary.
01686  * If the flag is set to H_ISDUP, then we are talking about the
01687  * duplicate page, not the main page.
01688  *
01689  * PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t, int));
01690  */
01691 int
01692 __ham_next_cpage(dbc, pgno, dirty)
01693         DBC *dbc;
01694         db_pgno_t pgno;
01695         int dirty;
01696 {
01697         DB *dbp;
01698         DB_MPOOLFILE *mpf;
01699         HASH_CURSOR *hcp;
01700         PAGE *p;
01701         int ret;
01702 
01703         dbp = dbc->dbp;
01704         mpf = dbp->mpf;
01705         hcp = (HASH_CURSOR *)dbc->internal;
01706 
01707         if (hcp->page != NULL && (ret =
01708             __memp_fput(mpf, hcp->page, dirty ? DB_MPOOL_DIRTY : 0)) != 0)
01709                 return (ret);
01710         hcp->page = NULL;
01711 
01712         if ((ret = __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &p)) != 0)
01713                 return (ret);
01714 
01715         hcp->page = p;
01716         hcp->pgno = pgno;
01717         hcp->indx = 0;
01718 
01719         return (0);
01720 }
01721 
01722 /*
01723  * __ham_lock_bucket --
01724  *      Get the lock on a particular bucket.
01725  *
01726  * PUBLIC: int __ham_lock_bucket __P((DBC *, db_lockmode_t));
01727  */
01728 int
01729 __ham_lock_bucket(dbc, mode)
01730         DBC *dbc;
01731         db_lockmode_t mode;
01732 {
01733         HASH_CURSOR *hcp;
01734         db_pgno_t pgno;
01735         int gotmeta, ret;
01736 
01737         hcp = (HASH_CURSOR *)dbc->internal;
01738         gotmeta = hcp->hdr == NULL ? 1 : 0;
01739         if (gotmeta)
01740                 if ((ret = __ham_get_meta(dbc)) != 0)
01741                         return (ret);
01742         pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
01743         if (gotmeta)
01744                 if ((ret = __ham_release_meta(dbc)) != 0)
01745                         return (ret);
01746 
01747         ret = __db_lget(dbc, 0, pgno, mode, 0, &hcp->lock);
01748 
01749         hcp->lock_mode = mode;
01750         return (ret);
01751 }
01752 
01753 /*
01754  * __ham_dpair --
01755  *      Delete a pair on a page, paying no attention to what the pair
01756  *      represents.  The caller is responsible for freeing up duplicates
01757  *      or offpage entries that might be referenced by this pair.
01758  *
01759  *      Recovery assumes that this may be called without the metadata
01760  *      page pinned.
01761  *
01762  * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
01763  */
01764 void
01765 __ham_dpair(dbp, p, indx)
01766         DB *dbp;
01767         PAGE *p;
01768         u_int32_t indx;
01769 {
01770         db_indx_t delta, n, *inp;
01771         u_int8_t *dest, *src;
01772 
01773         inp = P_INP(dbp, p);
01774         /*
01775          * Compute "delta", the amount we have to shift all of the
01776          * offsets.  To find the delta, we just need to calculate
01777          * the size of the pair of elements we are removing.
01778          */
01779         delta = H_PAIRSIZE(dbp, p, dbp->pgsize, indx);
01780 
01781         /*
01782          * The hard case: we want to remove something other than
01783          * the last item on the page.  We need to shift data and
01784          * offsets down.
01785          */
01786         if ((db_indx_t)indx != NUM_ENT(p) - 2) {
01787                 /*
01788                  * Move the data: src is the first occupied byte on
01789                  * the page. (Length is delta.)
01790                  */
01791                 src = (u_int8_t *)p + HOFFSET(p);
01792 
01793                 /*
01794                  * Destination is delta bytes beyond src.  This might
01795                  * be an overlapping copy, so we have to use memmove.
01796                  */
01797                 dest = src + delta;
01798                 memmove(dest, src, inp[H_DATAINDEX(indx)] - HOFFSET(p));
01799         }
01800 
01801         /* Adjust page metadata. */
01802         HOFFSET(p) = HOFFSET(p) + delta;
01803         NUM_ENT(p) = NUM_ENT(p) - 2;
01804 
01805         /* Adjust the offsets. */
01806         for (n = (db_indx_t)indx; n < (db_indx_t)(NUM_ENT(p)); n++)
01807                 inp[n] = inp[n + 2] + delta;
01808 
01809 }
01810 
01811 /*
01812  * __ham_c_delpg --
01813  *
01814  * Adjust the cursors after we've emptied a page in a bucket, taking
01815  * care that when we move cursors pointing to deleted items, their
01816  * orders don't collide with the orders of cursors on the page we move
01817  * them to (since after this function is called, cursors with the same
01818  * index on the two pages will be otherwise indistinguishable--they'll
01819  * all have pgno new_pgno).  There are three cases:
01820  *
01821  *      1) The emptied page is the first page in the bucket.  In this
01822  *      case, we've copied all the items from the second page into the
01823  *      first page, so the first page is new_pgno and the second page is
01824  *      old_pgno.  new_pgno is empty, but can have deleted cursors
01825  *      pointing at indx 0, so we need to be careful of the orders
01826  *      there.  This is DB_HAM_DELFIRSTPG.
01827  *
01828  *      2) The page is somewhere in the middle of a bucket.  Our caller
01829  *      can just delete such a page, so it's old_pgno.  old_pgno is
01830  *      empty, but may have deleted cursors pointing at indx 0, so we
01831  *      need to be careful of indx 0 when we move those cursors to
01832  *      new_pgno.  This is DB_HAM_DELMIDPG.
01833  *
01834  *      3) The page is the last in a bucket.  Again the empty page is
01835  *      old_pgno, and again it should only have cursors that are deleted
01836  *      and at indx == 0.  This time, though, there's no next page to
01837  *      move them to, so we set them to indx == num_ent on the previous
01838  *      page--and indx == num_ent is the index whose cursors we need to
01839  *      be careful of.  This is DB_HAM_DELLASTPG.
01840  */
01841 static int
01842 __ham_c_delpg(dbc, old_pgno, new_pgno, num_ent, op, orderp)
01843         DBC *dbc;
01844         db_pgno_t old_pgno, new_pgno;
01845         u_int32_t num_ent;
01846         db_ham_mode op;
01847         u_int32_t *orderp;
01848 {
01849         DB *dbp, *ldbp;
01850         DB_ENV *dbenv;
01851         DB_LSN lsn;
01852         DB_TXN *my_txn;
01853         DBC *cp;
01854         HASH_CURSOR *hcp;
01855         int found, ret;
01856         db_indx_t indx;
01857         u_int32_t order;
01858 
01859         /* Which is the worrisome index? */
01860         indx = (op == DB_HAM_DELLASTPG) ? num_ent : 0;
01861 
01862         dbp = dbc->dbp;
01863         dbenv = dbp->dbenv;
01864 
01865         my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
01866         found = 0;
01867 
01868         MUTEX_LOCK(dbenv, dbenv->mtx_dblist);
01869         /*
01870          * Find the highest order of any cursor our movement
01871          * may collide with.
01872          */
01873         order = 1;
01874         for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
01875             ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
01876             ldbp = LIST_NEXT(ldbp, dblistlinks)) {
01877                 MUTEX_LOCK(dbenv, dbp->mutex);
01878                 for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
01879                     cp = TAILQ_NEXT(cp, links)) {
01880                         if (cp == dbc || cp->dbtype != DB_HASH)
01881                                 continue;
01882                         hcp = (HASH_CURSOR *)cp->internal;
01883                         if (hcp->pgno == new_pgno) {
01884                                 if (hcp->indx == indx &&
01885                                     F_ISSET(hcp, H_DELETED) &&
01886                                     hcp->order >= order)
01887                                         order = hcp->order + 1;
01888                                 DB_ASSERT(op != DB_HAM_DELFIRSTPG ||
01889                                     hcp->indx == NDX_INVALID ||
01890                                     (hcp->indx == 0 &&
01891                                     F_ISSET(hcp, H_DELETED)));
01892                         }
01893                 }
01894                 MUTEX_UNLOCK(dbenv, dbp->mutex);
01895         }
01896 
01897         for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
01898             ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
01899             ldbp = LIST_NEXT(ldbp, dblistlinks)) {
01900                 MUTEX_LOCK(dbenv, dbp->mutex);
01901                 for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
01902                     cp = TAILQ_NEXT(cp, links)) {
01903                         if (cp == dbc || cp->dbtype != DB_HASH)
01904                                 continue;
01905 
01906                         hcp = (HASH_CURSOR *)cp->internal;
01907 
01908                         if (hcp->pgno == old_pgno) {
01909                                 switch (op) {
01910                                 case DB_HAM_DELFIRSTPG:
01911                                         /*
01912                                          * We're moving all items,
01913                                          * regardless of index.
01914                                          */
01915                                         hcp->pgno = new_pgno;
01916 
01917                                         /*
01918                                          * But we have to be careful of
01919                                          * the order values.
01920                                          */
01921                                         if (hcp->indx == indx)
01922                                                 hcp->order += order;
01923                                         break;
01924                                 case DB_HAM_DELMIDPG:
01925                                         hcp->pgno = new_pgno;
01926                                         DB_ASSERT(hcp->indx == 0 &&
01927                                             F_ISSET(hcp, H_DELETED));
01928                                         hcp->order += order;
01929                                         break;
01930                                 case DB_HAM_DELLASTPG:
01931                                         hcp->pgno = new_pgno;
01932                                         DB_ASSERT(hcp->indx == 0 &&
01933                                             F_ISSET(hcp, H_DELETED));
01934                                         hcp->indx = indx;
01935                                         hcp->order += order;
01936                                         break;
01937                                 default:
01938                                         DB_ASSERT(0);
01939                                         return (__db_panic(dbenv, EINVAL));
01940                                 }
01941                                 if (my_txn != NULL && cp->txn != my_txn)
01942                                         found = 1;
01943                         }
01944                 }
01945                 MUTEX_UNLOCK(dbenv, dbp->mutex);
01946         }
01947         MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist);
01948 
01949         if (found != 0 && DBC_LOGGING(dbc)) {
01950                 if ((ret = __ham_chgpg_log(dbp, my_txn, &lsn, 0, op,
01951                     old_pgno, new_pgno, indx, order)) != 0)
01952                         return (ret);
01953         }
01954         *orderp = order;
01955         return (0);
01956 }

Generated on Sun Dec 25 12:14:29 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2