Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

db_page.h

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: db_page.h,v 12.6 2005/08/08 14:52:30 bostic Exp $
00008  */
00009 
00010 #ifndef _DB_PAGE_H_
00011 #define _DB_PAGE_H_
00012 
00013 #if defined(__cplusplus)
00014 extern "C" {
00015 #endif
00016 
00017 /*
00018  * DB page formats.
00019  *
00020  * !!!
00021  * This implementation requires that values within the following structures
00022  * NOT be padded -- note, ANSI C permits random padding within structures.
00023  * If your compiler pads randomly you can just forget ever making DB run on
00024  * your system.  In addition, no data type can require larger alignment than
00025  * its own size, e.g., a 4-byte data element may not require 8-byte alignment.
00026  *
00027  * Note that key/data lengths are often stored in db_indx_t's -- this is
00028  * not accidental, nor does it limit the key/data size.  If the key/data
00029  * item fits on a page, it's guaranteed to be small enough to fit into a
00030  * db_indx_t, and storing it in one saves space.
00031  */
00032 
00033 #define PGNO_INVALID    0       /* Invalid page number in any database. */
00034 #define PGNO_BASE_MD    0       /* Base database: metadata page number. */
00035 
00036 /* Page types. */
00037 #define P_INVALID       0       /* Invalid page type. */
00038 #define __P_DUPLICATE   1       /* Duplicate. DEPRECATED in 3.1 */
00039 #define P_HASH          2       /* Hash. */
00040 #define P_IBTREE        3       /* Btree internal. */
00041 #define P_IRECNO        4       /* Recno internal. */
00042 #define P_LBTREE        5       /* Btree leaf. */
00043 #define P_LRECNO        6       /* Recno leaf. */
00044 #define P_OVERFLOW      7       /* Overflow. */
00045 #define P_HASHMETA      8       /* Hash metadata page. */
00046 #define P_BTREEMETA     9       /* Btree metadata page. */
00047 #define P_QAMMETA       10      /* Queue metadata page. */
00048 #define P_QAMDATA       11      /* Queue data page. */
00049 #define P_LDUP          12      /* Off-page duplicate leaf. */
00050 #define P_PAGETYPE_MAX  13
00051 /* Flag to __db_new */
00052 #define P_DONTEXTEND    0x8000  /* Don't allocate if there are no free pages. */
00053 
00054 /*
00055  * When we create pages in mpool, we ask mpool to clear some number of bytes
00056  * in the header.  This number must be at least as big as the regular page
00057  * headers and cover enough of the btree and hash meta-data pages to obliterate
00058  * the page type.
00059  */
00060 #define DB_PAGE_DB_LEN          32
00061 #define DB_PAGE_QUEUE_LEN       0
00062 
00063 /************************************************************************
00064  GENERIC METADATA PAGE HEADER
00065  *
00066  * !!!
00067  * The magic and version numbers have to be in the same place in all versions
00068  * of the metadata page as the application may not have upgraded the database.
00069  ************************************************************************/
00070 typedef struct _dbmeta33 {
00071         DB_LSN    lsn;          /* 00-07: LSN. */
00072         db_pgno_t pgno;         /* 08-11: Current page number. */
00073         u_int32_t magic;        /* 12-15: Magic number. */
00074         u_int32_t version;      /* 16-19: Version. */
00075         u_int32_t pagesize;     /* 20-23: Pagesize. */
00076         u_int8_t  encrypt_alg;  /*    24: Encryption algorithm. */
00077         u_int8_t  type;         /*    25: Page type. */
00078 #define DBMETA_CHKSUM           0x01
00079         u_int8_t  metaflags;    /* 26: Meta-only flags */
00080         u_int8_t  unused1;      /* 27: Unused. */
00081         u_int32_t free;         /* 28-31: Free list page number. */
00082         db_pgno_t last_pgno;    /* 32-35: Page number of last page in db. */
00083         u_int32_t unused3;      /* 36-39: Unused. */
00084         u_int32_t key_count;    /* 40-43: Cached key count. */
00085         u_int32_t record_count; /* 44-47: Cached record count. */
00086         u_int32_t flags;        /* 48-51: Flags: unique to each AM. */
00087                                 /* 52-71: Unique file ID. */
00088         u_int8_t  uid[DB_FILE_ID_LEN];
00089 } DBMETA33, DBMETA;
00090 
00091 /************************************************************************
00092  BTREE METADATA PAGE LAYOUT
00093  ************************************************************************/
00094 typedef struct _btmeta33 {
00095 #define BTM_DUP         0x001   /*        Duplicates. */
00096 #define BTM_RECNO       0x002   /*        Recno tree. */
00097 #define BTM_RECNUM      0x004   /*        Btree: maintain record count. */
00098 #define BTM_FIXEDLEN    0x008   /*        Recno: fixed length records. */
00099 #define BTM_RENUMBER    0x010   /*        Recno: renumber on insert/delete. */
00100 #define BTM_SUBDB       0x020   /*        Subdatabases. */
00101 #define BTM_DUPSORT     0x040   /*        Duplicates are sorted. */
00102 #define BTM_MASK        0x07f
00103         DBMETA  dbmeta;         /* 00-71: Generic meta-data header. */
00104 
00105         u_int32_t unused1;      /* 72-75: Unused space. */
00106         u_int32_t minkey;       /* 76-79: Btree: Minkey. */
00107         u_int32_t re_len;       /* 80-83: Recno: fixed-length record length. */
00108         u_int32_t re_pad;       /* 84-87: Recno: fixed-length record pad. */
00109         u_int32_t root;         /* 88-91: Root page. */
00110         u_int32_t unused2[92];  /* 92-459: Unused space. */
00111         u_int32_t crypto_magic;         /* 460-463: Crypto magic number */
00112         u_int32_t trash[3];             /* 464-475: Trash space - Do not use */
00113         u_int8_t iv[DB_IV_BYTES];       /* 476-495: Crypto IV */
00114         u_int8_t chksum[DB_MAC_KEY];    /* 496-511: Page chksum */
00115 
00116         /*
00117          * Minimum page size is 512.
00118          */
00119 } BTMETA33, BTMETA;
00120 
00121 /************************************************************************
00122  HASH METADATA PAGE LAYOUT
00123  ************************************************************************/
00124 typedef struct _hashmeta33 {
00125 #define DB_HASH_DUP     0x01    /*        Duplicates. */
00126 #define DB_HASH_SUBDB   0x02    /*        Subdatabases. */
00127 #define DB_HASH_DUPSORT 0x04    /*        Duplicates are sorted. */
00128         DBMETA dbmeta;          /* 00-71: Generic meta-data page header. */
00129 
00130         u_int32_t max_bucket;   /* 72-75: ID of Maximum bucket in use */
00131         u_int32_t high_mask;    /* 76-79: Modulo mask into table */
00132         u_int32_t low_mask;     /* 80-83: Modulo mask into table lower half */
00133         u_int32_t ffactor;      /* 84-87: Fill factor */
00134         u_int32_t nelem;        /* 88-91: Number of keys in hash table */
00135         u_int32_t h_charkey;    /* 92-95: Value of hash(CHARKEY) */
00136 #define NCACHED 32              /* number of spare points */
00137                                 /* 96-223: Spare pages for overflow */
00138         u_int32_t spares[NCACHED];
00139         u_int32_t unused[59];   /* 224-459: Unused space */
00140         u_int32_t crypto_magic; /* 460-463: Crypto magic number */
00141         u_int32_t trash[3];     /* 464-475: Trash space - Do not use */
00142         u_int8_t iv[DB_IV_BYTES];       /* 476-495: Crypto IV */
00143         u_int8_t chksum[DB_MAC_KEY];    /* 496-511: Page chksum */
00144 
00145         /*
00146          * Minimum page size is 512.
00147          */
00148 } HMETA33, HMETA;
00149 
00150 /************************************************************************
00151  QUEUE METADATA PAGE LAYOUT
00152  ************************************************************************/
00153 /*
00154  * QAM Meta data page structure
00155  *
00156  */
00157 typedef struct _qmeta33 {
00158         DBMETA    dbmeta;       /* 00-71: Generic meta-data header. */
00159 
00160         u_int32_t first_recno;  /* 72-75: First not deleted record. */
00161         u_int32_t cur_recno;    /* 76-79: Next recno to be allocated. */
00162         u_int32_t re_len;       /* 80-83: Fixed-length record length. */
00163         u_int32_t re_pad;       /* 84-87: Fixed-length record pad. */
00164         u_int32_t rec_page;     /* 88-91: Records Per Page. */
00165         u_int32_t page_ext;     /* 92-95: Pages per extent */
00166 
00167         u_int32_t unused[91];   /* 96-459: Unused space */
00168         u_int32_t crypto_magic; /* 460-463: Crypto magic number */
00169         u_int32_t trash[3];     /* 464-475: Trash space - Do not use */
00170         u_int8_t iv[DB_IV_BYTES];       /* 476-495: Crypto IV */
00171         u_int8_t chksum[DB_MAC_KEY];    /* 496-511: Page chksum */
00172         /*
00173          * Minimum page size is 512.
00174          */
00175 } QMETA33, QMETA;
00176 
00177 /*
00178  * DBMETASIZE is a constant used by __db_file_setup and DB->verify
00179  * as a buffer which is guaranteed to be larger than any possible
00180  * metadata page size and smaller than any disk sector.
00181  */
00182 #define DBMETASIZE      512
00183 
00184 /************************************************************************
00185  BTREE/HASH MAIN PAGE LAYOUT
00186  ************************************************************************/
00187 /*
00188  *      +-----------------------------------+
00189  *      |    lsn    |   pgno    | prev pgno |
00190  *      +-----------------------------------+
00191  *      | next pgno |  entries  | hf offset |
00192  *      +-----------------------------------+
00193  *      |   level   |   type    |   chksum  |
00194  *      +-----------------------------------+
00195  *      |    iv     |   index   | free -->  |
00196  *      +-----------+-----------------------+
00197  *      |        F R E E A R E A            |
00198  *      +-----------------------------------+
00199  *      |              <-- free |   item    |
00200  *      +-----------------------------------+
00201  *      |   item    |   item    |   item    |
00202  *      +-----------------------------------+
00203  *
00204  * sizeof(PAGE) == 26 bytes + possibly 20 bytes of checksum and possibly
00205  * 16 bytes of IV (+ 2 bytes for alignment), and the following indices
00206  * are guaranteed to be two-byte aligned.  If we aren't doing crypto or
00207  * checksumming the bytes are reclaimed for data storage.
00208  *
00209  * For hash and btree leaf pages, index items are paired, e.g., inp[0] is the
00210  * key for inp[1]'s data.  All other types of pages only contain single items.
00211  */
00212 typedef struct __pg_chksum {
00213         u_int8_t        unused[2];              /* 26-27: For alignment */
00214         u_int8_t        chksum[4];              /* 28-31: Checksum */
00215 } PG_CHKSUM;
00216 
00217 typedef struct __pg_crypto {
00218         u_int8_t        unused[2];              /* 26-27: For alignment */
00219         u_int8_t        chksum[DB_MAC_KEY];     /* 28-47: Checksum */
00220         u_int8_t        iv[DB_IV_BYTES];        /* 48-63: IV */
00221         /* !!!
00222          * Must be 16-byte aligned for crypto
00223          */
00224 } PG_CRYPTO;
00225 
00226 typedef struct _db_page {
00227         DB_LSN    lsn;          /* 00-07: Log sequence number. */
00228         db_pgno_t pgno;         /* 08-11: Current page number. */
00229         db_pgno_t prev_pgno;    /* 12-15: Previous page number. */
00230         db_pgno_t next_pgno;    /* 16-19: Next page number. */
00231         db_indx_t entries;      /* 20-21: Number of items on the page. */
00232         db_indx_t hf_offset;    /* 22-23: High free byte page offset. */
00233 
00234         /*
00235          * The btree levels are numbered from the leaf to the root, starting
00236          * with 1, so the leaf is level 1, its parent is level 2, and so on.
00237          * We maintain this level on all btree pages, but the only place that
00238          * we actually need it is on the root page.  It would not be difficult
00239          * to hide the byte on the root page once it becomes an internal page,
00240          * so we could get this byte back if we needed it for something else.
00241          */
00242 #define LEAFLEVEL         1
00243 #define MAXBTREELEVEL   255
00244         u_int8_t  level;        /*    24: Btree tree level. */
00245         u_int8_t  type;         /*    25: Page type. */
00246 } PAGE;
00247 
00248 /*
00249  * With many compilers sizeof(PAGE) == 28, while SIZEOF_PAGE == 26.
00250  * We add in other things directly after the page header and need
00251  * the SIZEOF_PAGE.  When giving the sizeof(), many compilers will
00252  * pad it out to the next 4-byte boundary.
00253  */
00254 #define SIZEOF_PAGE     26
00255 /*
00256  * !!!
00257  * DB_AM_ENCRYPT always implies DB_AM_CHKSUM so that must come first.
00258  */
00259 #define P_INP(dbp, pg)                                                  \
00260         ((db_indx_t *)((u_int8_t *)(pg) + SIZEOF_PAGE +                 \
00261         (F_ISSET((dbp), DB_AM_ENCRYPT) ? sizeof(PG_CRYPTO) :            \
00262         (F_ISSET((dbp), DB_AM_CHKSUM) ? sizeof(PG_CHKSUM) : 0))))
00263 
00264 #define P_IV(dbp, pg)                                                   \
00265         (F_ISSET((dbp), DB_AM_ENCRYPT) ? ((u_int8_t *)(pg) +            \
00266         SIZEOF_PAGE + SSZA(PG_CRYPTO, iv))                              \
00267         : NULL)
00268 
00269 #define P_CHKSUM(dbp, pg)                                               \
00270         (F_ISSET((dbp), DB_AM_ENCRYPT) ? ((u_int8_t *)(pg) +            \
00271         SIZEOF_PAGE + SSZA(PG_CRYPTO, chksum)) :                        \
00272         (F_ISSET((dbp), DB_AM_CHKSUM) ? ((u_int8_t *)(pg) +             \
00273         SIZEOF_PAGE + SSZA(PG_CHKSUM, chksum))                          \
00274         : NULL))
00275 
00276 /* PAGE element macros. */
00277 #define LSN(p)          (((PAGE *)p)->lsn)
00278 #define PGNO(p)         (((PAGE *)p)->pgno)
00279 #define PREV_PGNO(p)    (((PAGE *)p)->prev_pgno)
00280 #define NEXT_PGNO(p)    (((PAGE *)p)->next_pgno)
00281 #define NUM_ENT(p)      (((PAGE *)p)->entries)
00282 #define HOFFSET(p)      (((PAGE *)p)->hf_offset)
00283 #define LEVEL(p)        (((PAGE *)p)->level)
00284 #define TYPE(p)         (((PAGE *)p)->type)
00285 
00286 /************************************************************************
00287  QUEUE MAIN PAGE LAYOUT
00288  ************************************************************************/
00289 /*
00290  * Sizes of page below.  Used to reclaim space if not doing
00291  * crypto or checksumming.  If you change the QPAGE below you
00292  * MUST adjust this too.
00293  */
00294 #define QPAGE_NORMAL    28
00295 #define QPAGE_CHKSUM    48
00296 #define QPAGE_SEC       64
00297 
00298 typedef struct _qpage {
00299         DB_LSN    lsn;          /* 00-07: Log sequence number. */
00300         db_pgno_t pgno;         /* 08-11: Current page number. */
00301         u_int32_t unused0[3];   /* 12-23: Unused. */
00302         u_int8_t  unused1[1];   /*    24: Unused. */
00303         u_int8_t  type;         /*    25: Page type. */
00304         u_int8_t  unused2[2];   /* 26-27: Unused. */
00305         u_int8_t  chksum[DB_MAC_KEY]; /* 28-47: Checksum */
00306         u_int8_t  iv[DB_IV_BYTES]; /* 48-63: IV */
00307 } QPAGE;
00308 
00309 #define QPAGE_SZ(dbp)                                           \
00310         (F_ISSET((dbp), DB_AM_ENCRYPT) ? QPAGE_SEC :            \
00311         F_ISSET((dbp), DB_AM_CHKSUM) ? QPAGE_CHKSUM : QPAGE_NORMAL)
00312 /*
00313  * !!!
00314  * The next_pgno and prev_pgno fields are not maintained for btree and recno
00315  * internal pages.  Doing so only provides a minor performance improvement,
00316  * it's hard to do when deleting internal pages, and it increases the chance
00317  * of deadlock during deletes and splits because we have to re-link pages at
00318  * more than the leaf level.
00319  *
00320  * !!!
00321  * The btree/recno access method needs db_recno_t bytes of space on the root
00322  * page to specify how many records are stored in the tree.  (The alternative
00323  * is to store the number of records in the meta-data page, which will create
00324  * a second hot spot in trees being actively modified, or recalculate it from
00325  * the BINTERNAL fields on each access.)  Overload the PREV_PGNO field.
00326  */
00327 #define RE_NREC(p)                                                      \
00328         ((TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) ? PREV_PGNO(p) :  \
00329         (db_pgno_t)(TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 : NUM_ENT(p)))
00330 #define RE_NREC_ADJ(p, adj)                                             \
00331         PREV_PGNO(p) += adj;
00332 #define RE_NREC_SET(p, num)                                             \
00333         PREV_PGNO(p) = (num);
00334 
00335 /*
00336  * Initialize a page.
00337  *
00338  * !!!
00339  * Don't modify the page's LSN, code depends on it being unchanged after a
00340  * P_INIT call.
00341  */
00342 #define P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do {     \
00343         PGNO(pg) = (n);                                                 \
00344         PREV_PGNO(pg) = (pg_prev);                                      \
00345         NEXT_PGNO(pg) = (pg_next);                                      \
00346         NUM_ENT(pg) = (0);                                              \
00347         HOFFSET(pg) = (db_indx_t)(pg_size);                             \
00348         LEVEL(pg) = (btl);                                              \
00349         TYPE(pg) = (pg_type);                                           \
00350 } while (0)
00351 
00352 /* Page header length (offset to first index). */
00353 #define P_OVERHEAD(dbp) P_TO_UINT16(P_INP(dbp, 0))
00354 
00355 /* First free byte. */
00356 #define LOFFSET(dbp, pg)                                                \
00357     (P_OVERHEAD(dbp) + NUM_ENT(pg) * sizeof(db_indx_t))
00358 
00359 /* Free space on a regular page. */
00360 #define P_FREESPACE(dbp, pg)    (HOFFSET(pg) - LOFFSET(dbp, pg))
00361 
00362 /* Get a pointer to the bytes at a specific index. */
00363 #define P_ENTRY(dbp, pg, indx)  ((u_int8_t *)pg + P_INP(dbp, pg)[indx])
00364 
00365 /************************************************************************
00366  OVERFLOW PAGE LAYOUT
00367  ************************************************************************/
00368 
00369 /*
00370  * Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which
00371  * store a page number (the first page of the overflow item) and a length
00372  * (the total length of the overflow item).  The overflow item consists of
00373  * some number of overflow pages, linked by the next_pgno field of the page.
00374  * A next_pgno field of PGNO_INVALID flags the end of the overflow item.
00375  *
00376  * Overflow page overloads:
00377  *      The amount of overflow data stored on each page is stored in the
00378  *      hf_offset field.
00379  *
00380  *      The implementation reference counts overflow items as it's possible
00381  *      for them to be promoted onto btree internal pages.  The reference
00382  *      count is stored in the entries field.
00383  */
00384 #define OV_LEN(p)       (((PAGE *)p)->hf_offset)
00385 #define OV_REF(p)       (((PAGE *)p)->entries)
00386 
00387 /* Maximum number of bytes that you can put on an overflow page. */
00388 #define P_MAXSPACE(dbp, psize)  ((psize) - P_OVERHEAD(dbp))
00389 
00390 /* Free space on an overflow page. */
00391 #define P_OVFLSPACE(dbp, psize, pg)     (P_MAXSPACE(dbp, psize) - HOFFSET(pg))
00392 
00393 /************************************************************************
00394  HASH PAGE LAYOUT
00395  ************************************************************************/
00396 
00397 /* Each index references a group of bytes on the page. */
00398 #define H_KEYDATA       1       /* Key/data item. */
00399 #define H_DUPLICATE     2       /* Duplicate key/data item. */
00400 #define H_OFFPAGE       3       /* Overflow key/data item. */
00401 #define H_OFFDUP        4       /* Overflow page of duplicates. */
00402 
00403 /*
00404  * !!!
00405  * Items on hash pages are (potentially) unaligned, so we can never cast the
00406  * (page + offset) pointer to an HKEYDATA, HOFFPAGE or HOFFDUP structure, as
00407  * we do with B+tree on-page structures.  Because we frequently want the type
00408  * field, it requires no alignment, and it's in the same location in all three
00409  * structures, there's a pair of macros.
00410  */
00411 #define HPAGE_PTYPE(p)          (*(u_int8_t *)p)
00412 #define HPAGE_TYPE(dbp, pg, indx)       (*P_ENTRY(dbp, pg, indx))
00413 
00414 /*
00415  * The first and second types are H_KEYDATA and H_DUPLICATE, represented
00416  * by the HKEYDATA structure:
00417  *
00418  *      +-----------------------------------+
00419  *      |    type   | key/data ...          |
00420  *      +-----------------------------------+
00421  *
00422  * For duplicates, the data field encodes duplicate elements in the data
00423  * field:
00424  *
00425  *      +---------------------------------------------------------------+
00426  *      |    type   | len1 | element1 | len1 | len2 | element2 | len2   |
00427  *      +---------------------------------------------------------------+
00428  *
00429  * Thus, by keeping track of the offset in the element, we can do both
00430  * backward and forward traversal.
00431  */
00432 typedef struct _hkeydata {
00433         u_int8_t  type;         /*    00: Page type. */
00434         u_int8_t  data[1];      /* Variable length key/data item. */
00435 } HKEYDATA;
00436 #define HKEYDATA_DATA(p)        (((u_int8_t *)p) + SSZA(HKEYDATA, data))
00437 
00438 /*
00439  * The length of any HKEYDATA item. Note that indx is an element index,
00440  * not a PAIR index.
00441  */
00442 #define LEN_HITEM(dbp, pg, pgsize, indx)                                \
00443         (((indx) == 0 ? (pgsize) :                                      \
00444         (P_INP(dbp, pg)[(indx) - 1])) - (P_INP(dbp, pg)[indx]))
00445 
00446 #define LEN_HKEYDATA(dbp, pg, psize, indx)                              \
00447         (db_indx_t)(LEN_HITEM(dbp, pg, psize, indx) - HKEYDATA_SIZE(0))
00448 
00449 /*
00450  * Page space required to add a new HKEYDATA item to the page, with and
00451  * without the index value.
00452  */
00453 #define HKEYDATA_SIZE(len)                                              \
00454         ((len) + SSZA(HKEYDATA, data))
00455 #define HKEYDATA_PSIZE(len)                                             \
00456         (HKEYDATA_SIZE(len) + sizeof(db_indx_t))
00457 
00458 /* Put a HKEYDATA item at the location referenced by a page entry. */
00459 #define PUT_HKEYDATA(pe, kd, len, type) {                               \
00460         ((HKEYDATA *)pe)->type = type;                                  \
00461         memcpy((u_int8_t *)pe + sizeof(u_int8_t), kd, len);             \
00462 }
00463 
00464 /*
00465  * Macros the describe the page layout in terms of key-data pairs.
00466  */
00467 #define H_NUMPAIRS(pg)                  (NUM_ENT(pg) / 2)
00468 #define H_KEYINDEX(indx)                (indx)
00469 #define H_DATAINDEX(indx)               ((indx) + 1)
00470 #define H_PAIRKEY(dbp, pg, indx)        P_ENTRY(dbp, pg, H_KEYINDEX(indx))
00471 #define H_PAIRDATA(dbp, pg, indx)       P_ENTRY(dbp, pg, H_DATAINDEX(indx))
00472 #define H_PAIRSIZE(dbp, pg, psize, indx)                                \
00473         (LEN_HITEM(dbp, pg, psize, H_KEYINDEX(indx)) +                  \
00474         LEN_HITEM(dbp, pg, psize, H_DATAINDEX(indx)))
00475 #define LEN_HDATA(dbp, p, psize, indx)                                  \
00476     LEN_HKEYDATA(dbp, p, psize, H_DATAINDEX(indx))
00477 #define LEN_HKEY(dbp, p, psize, indx)                                   \
00478     LEN_HKEYDATA(dbp, p, psize, H_KEYINDEX(indx))
00479 
00480 /*
00481  * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure:
00482  */
00483 typedef struct _hoffpage {
00484         u_int8_t  type;         /*    00: Page type and delete flag. */
00485         u_int8_t  unused[3];    /* 01-03: Padding, unused. */
00486         db_pgno_t pgno;         /* 04-07: Offpage page number. */
00487         u_int32_t tlen;         /* 08-11: Total length of item. */
00488 } HOFFPAGE;
00489 
00490 #define HOFFPAGE_PGNO(p)        (((u_int8_t *)p) + SSZ(HOFFPAGE, pgno))
00491 #define HOFFPAGE_TLEN(p)        (((u_int8_t *)p) + SSZ(HOFFPAGE, tlen))
00492 
00493 /*
00494  * Page space required to add a new HOFFPAGE item to the page, with and
00495  * without the index value.
00496  */
00497 #define HOFFPAGE_SIZE           (sizeof(HOFFPAGE))
00498 #define HOFFPAGE_PSIZE          (HOFFPAGE_SIZE + sizeof(db_indx_t))
00499 
00500 /*
00501  * The fourth type is H_OFFDUP represented by the HOFFDUP structure:
00502  */
00503 typedef struct _hoffdup {
00504         u_int8_t  type;         /*    00: Page type and delete flag. */
00505         u_int8_t  unused[3];    /* 01-03: Padding, unused. */
00506         db_pgno_t pgno;         /* 04-07: Offpage page number. */
00507 } HOFFDUP;
00508 #define HOFFDUP_PGNO(p)         (((u_int8_t *)p) + SSZ(HOFFDUP, pgno))
00509 
00510 /*
00511  * Page space required to add a new HOFFDUP item to the page, with and
00512  * without the index value.
00513  */
00514 #define HOFFDUP_SIZE            (sizeof(HOFFDUP))
00515 
00516 /************************************************************************
00517  BTREE PAGE LAYOUT
00518  ************************************************************************/
00519 
00520 /* Each index references a group of bytes on the page. */
00521 #define B_KEYDATA       1       /* Key/data item. */
00522 #define B_DUPLICATE     2       /* Duplicate key/data item. */
00523 #define B_OVERFLOW      3       /* Overflow key/data item. */
00524 
00525 /*
00526  * We have to store a deleted entry flag in the page.   The reason is complex,
00527  * but the simple version is that we can't delete on-page items referenced by
00528  * a cursor -- the return order of subsequent insertions might be wrong.  The
00529  * delete flag is an overload of the top bit of the type byte.
00530  */
00531 #define B_DELETE        (0x80)
00532 #define B_DCLR(t)       (t) &= ~B_DELETE
00533 #define B_DSET(t)       (t) |= B_DELETE
00534 #define B_DISSET(t)     ((t) & B_DELETE)
00535 
00536 #define B_TYPE(t)       ((t) & ~B_DELETE)
00537 #define B_TSET(t, type, deleted) {                                      \
00538         (t) = (type);                                                   \
00539         if (deleted)                                                    \
00540                 B_DSET(t);                                              \
00541 }
00542 
00543 /*
00544  * The first type is B_KEYDATA, represented by the BKEYDATA structure:
00545  */
00546 typedef struct _bkeydata {
00547         db_indx_t len;          /* 00-01: Key/data item length. */
00548         u_int8_t  type;         /*    02: Page type AND DELETE FLAG. */
00549         u_int8_t  data[1];      /* Variable length key/data item. */
00550 } BKEYDATA;
00551 
00552 /* Get a BKEYDATA item for a specific index. */
00553 #define GET_BKEYDATA(dbp, pg, indx)                                     \
00554         ((BKEYDATA *)P_ENTRY(dbp, pg, indx))
00555 
00556 /*
00557  * Page space required to add a new BKEYDATA item to the page, with and
00558  * without the index value.  The (u_int16_t) cast avoids warnings: DB_ALIGN
00559  * casts to uintmax_t, the cast converts it to a small integral type so we
00560  * don't get complaints when we assign the final result to an integral type
00561  * smaller than uintmax_t.
00562  */
00563 #define BKEYDATA_SIZE(len)                                              \
00564         (u_int16_t)DB_ALIGN((len) + SSZA(BKEYDATA, data), sizeof(u_int32_t))
00565 #define BKEYDATA_PSIZE(len)                                             \
00566         (BKEYDATA_SIZE(len) + sizeof(db_indx_t))
00567 
00568 /*
00569  * The second and third types are B_DUPLICATE and B_OVERFLOW, represented
00570  * by the BOVERFLOW structure.
00571  */
00572 typedef struct _boverflow {
00573         db_indx_t unused1;      /* 00-01: Padding, unused. */
00574         u_int8_t  type;         /*    02: Page type AND DELETE FLAG. */
00575         u_int8_t  unused2;      /*    03: Padding, unused. */
00576         db_pgno_t pgno;         /* 04-07: Next page number. */
00577         u_int32_t tlen;         /* 08-11: Total length of item. */
00578 } BOVERFLOW;
00579 
00580 /* Get a BOVERFLOW item for a specific index. */
00581 #define GET_BOVERFLOW(dbp, pg, indx)                                    \
00582         ((BOVERFLOW *)P_ENTRY(dbp, pg, indx))
00583 
00584 /*
00585  * Page space required to add a new BOVERFLOW item to the page, with and
00586  * without the index value.
00587  */
00588 #define BOVERFLOW_SIZE                                                  \
00589         ((u_int16_t)DB_ALIGN(sizeof(BOVERFLOW), sizeof(u_int32_t)))
00590 #define BOVERFLOW_PSIZE                                                 \
00591         (BOVERFLOW_SIZE + sizeof(db_indx_t))
00592 
00593 #define BITEM_SIZE(bk)                                                  \
00594         (B_TYPE((bk)->type) != B_KEYDATA ? BOVERFLOW_SIZE :             \
00595         BKEYDATA_SIZE((bk)->len))
00596 
00597 #define BITEM_PSIZE(bk)                                                 \
00598         (B_TYPE((bk)->type) != B_KEYDATA ? BOVERFLOW_PSIZE :            \
00599         BKEYDATA_PSIZE((bk)->len))
00600 
00601 /*
00602  * Btree leaf and hash page layouts group indices in sets of two, one for the
00603  * key and one for the data.  Everything else does it in sets of one to save
00604  * space.  Use the following macros so that it's real obvious what's going on.
00605  */
00606 #define O_INDX  1
00607 #define P_INDX  2
00608 
00609 /************************************************************************
00610  BTREE INTERNAL PAGE LAYOUT
00611  ************************************************************************/
00612 
00613 /*
00614  * Btree internal entry.
00615  */
00616 typedef struct _binternal {
00617         db_indx_t  len;         /* 00-01: Key/data item length. */
00618         u_int8_t   type;        /*    02: Page type AND DELETE FLAG. */
00619         u_int8_t   unused;      /*    03: Padding, unused. */
00620         db_pgno_t  pgno;        /* 04-07: Page number of referenced page. */
00621         db_recno_t nrecs;       /* 08-11: Subtree record count. */
00622         u_int8_t   data[1];     /* Variable length key item. */
00623 } BINTERNAL;
00624 
00625 /* Get a BINTERNAL item for a specific index. */
00626 #define GET_BINTERNAL(dbp, pg, indx)                                    \
00627         ((BINTERNAL *)P_ENTRY(dbp, pg, indx))
00628 
00629 /*
00630  * Page space required to add a new BINTERNAL item to the page, with and
00631  * without the index value.
00632  */
00633 #define BINTERNAL_SIZE(len)                                             \
00634         (u_int16_t)DB_ALIGN((len) + SSZA(BINTERNAL, data), sizeof(u_int32_t))
00635 #define BINTERNAL_PSIZE(len)                                            \
00636         (BINTERNAL_SIZE(len) + sizeof(db_indx_t))
00637 
00638 /************************************************************************
00639  RECNO INTERNAL PAGE LAYOUT
00640  ************************************************************************/
00641 
00642 /*
00643  * The recno internal entry.
00644  */
00645 typedef struct _rinternal {
00646         db_pgno_t  pgno;        /* 00-03: Page number of referenced page. */
00647         db_recno_t nrecs;       /* 04-07: Subtree record count. */
00648 } RINTERNAL;
00649 
00650 /* Get a RINTERNAL item for a specific index. */
00651 #define GET_RINTERNAL(dbp, pg, indx)                                    \
00652         ((RINTERNAL *)P_ENTRY(dbp, pg, indx))
00653 
00654 /*
00655  * Page space required to add a new RINTERNAL item to the page, with and
00656  * without the index value.
00657  */
00658 #define RINTERNAL_SIZE                                                  \
00659         (u_int16_t)DB_ALIGN(sizeof(RINTERNAL), sizeof(u_int32_t))
00660 #define RINTERNAL_PSIZE                                                 \
00661         (RINTERNAL_SIZE + sizeof(db_indx_t))
00662 
00663 struct pglist {
00664         db_pgno_t pgno;
00665         DB_LSN lsn;
00666 };
00667 
00668 #if defined(__cplusplus)
00669 }
00670 #endif
00671 
00672 #endif /* !_DB_PAGE_H_ */

Generated on Sun Dec 25 12:14:22 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2