00001 /*- 00002 * See the file LICENSE for redistribution information. 00003 * 00004 * Copyright (c) 1996-2005 00005 * Sleepycat Software. All rights reserved. 00006 * 00007 * $Id: db_page.h,v 12.6 2005/08/08 14:52:30 bostic Exp $ 00008 */ 00009 00010 #ifndef _DB_PAGE_H_ 00011 #define _DB_PAGE_H_ 00012 00013 #if defined(__cplusplus) 00014 extern "C" { 00015 #endif 00016 00017 /* 00018 * DB page formats. 00019 * 00020 * !!! 00021 * This implementation requires that values within the following structures 00022 * NOT be padded -- note, ANSI C permits random padding within structures. 00023 * If your compiler pads randomly you can just forget ever making DB run on 00024 * your system. In addition, no data type can require larger alignment than 00025 * its own size, e.g., a 4-byte data element may not require 8-byte alignment. 00026 * 00027 * Note that key/data lengths are often stored in db_indx_t's -- this is 00028 * not accidental, nor does it limit the key/data size. If the key/data 00029 * item fits on a page, it's guaranteed to be small enough to fit into a 00030 * db_indx_t, and storing it in one saves space. 00031 */ 00032 00033 #define PGNO_INVALID 0 /* Invalid page number in any database. */ 00034 #define PGNO_BASE_MD 0 /* Base database: metadata page number. */ 00035 00036 /* Page types. */ 00037 #define P_INVALID 0 /* Invalid page type. */ 00038 #define __P_DUPLICATE 1 /* Duplicate. DEPRECATED in 3.1 */ 00039 #define P_HASH 2 /* Hash. */ 00040 #define P_IBTREE 3 /* Btree internal. */ 00041 #define P_IRECNO 4 /* Recno internal. */ 00042 #define P_LBTREE 5 /* Btree leaf. */ 00043 #define P_LRECNO 6 /* Recno leaf. */ 00044 #define P_OVERFLOW 7 /* Overflow. */ 00045 #define P_HASHMETA 8 /* Hash metadata page. */ 00046 #define P_BTREEMETA 9 /* Btree metadata page. */ 00047 #define P_QAMMETA 10 /* Queue metadata page. */ 00048 #define P_QAMDATA 11 /* Queue data page. */ 00049 #define P_LDUP 12 /* Off-page duplicate leaf. */ 00050 #define P_PAGETYPE_MAX 13 00051 /* Flag to __db_new */ 00052 #define P_DONTEXTEND 0x8000 /* Don't allocate if there are no free pages. */ 00053 00054 /* 00055 * When we create pages in mpool, we ask mpool to clear some number of bytes 00056 * in the header. This number must be at least as big as the regular page 00057 * headers and cover enough of the btree and hash meta-data pages to obliterate 00058 * the page type. 00059 */ 00060 #define DB_PAGE_DB_LEN 32 00061 #define DB_PAGE_QUEUE_LEN 0 00062 00063 /************************************************************************ 00064 GENERIC METADATA PAGE HEADER 00065 * 00066 * !!! 00067 * The magic and version numbers have to be in the same place in all versions 00068 * of the metadata page as the application may not have upgraded the database. 00069 ************************************************************************/ 00070 typedef struct _dbmeta33 { 00071 DB_LSN lsn; /* 00-07: LSN. */ 00072 db_pgno_t pgno; /* 08-11: Current page number. */ 00073 u_int32_t magic; /* 12-15: Magic number. */ 00074 u_int32_t version; /* 16-19: Version. */ 00075 u_int32_t pagesize; /* 20-23: Pagesize. */ 00076 u_int8_t encrypt_alg; /* 24: Encryption algorithm. */ 00077 u_int8_t type; /* 25: Page type. */ 00078 #define DBMETA_CHKSUM 0x01 00079 u_int8_t metaflags; /* 26: Meta-only flags */ 00080 u_int8_t unused1; /* 27: Unused. */ 00081 u_int32_t free; /* 28-31: Free list page number. */ 00082 db_pgno_t last_pgno; /* 32-35: Page number of last page in db. */ 00083 u_int32_t unused3; /* 36-39: Unused. */ 00084 u_int32_t key_count; /* 40-43: Cached key count. */ 00085 u_int32_t record_count; /* 44-47: Cached record count. */ 00086 u_int32_t flags; /* 48-51: Flags: unique to each AM. */ 00087 /* 52-71: Unique file ID. */ 00088 u_int8_t uid[DB_FILE_ID_LEN]; 00089 } DBMETA33, DBMETA; 00090 00091 /************************************************************************ 00092 BTREE METADATA PAGE LAYOUT 00093 ************************************************************************/ 00094 typedef struct _btmeta33 { 00095 #define BTM_DUP 0x001 /* Duplicates. */ 00096 #define BTM_RECNO 0x002 /* Recno tree. */ 00097 #define BTM_RECNUM 0x004 /* Btree: maintain record count. */ 00098 #define BTM_FIXEDLEN 0x008 /* Recno: fixed length records. */ 00099 #define BTM_RENUMBER 0x010 /* Recno: renumber on insert/delete. */ 00100 #define BTM_SUBDB 0x020 /* Subdatabases. */ 00101 #define BTM_DUPSORT 0x040 /* Duplicates are sorted. */ 00102 #define BTM_MASK 0x07f 00103 DBMETA dbmeta; /* 00-71: Generic meta-data header. */ 00104 00105 u_int32_t unused1; /* 72-75: Unused space. */ 00106 u_int32_t minkey; /* 76-79: Btree: Minkey. */ 00107 u_int32_t re_len; /* 80-83: Recno: fixed-length record length. */ 00108 u_int32_t re_pad; /* 84-87: Recno: fixed-length record pad. */ 00109 u_int32_t root; /* 88-91: Root page. */ 00110 u_int32_t unused2[92]; /* 92-459: Unused space. */ 00111 u_int32_t crypto_magic; /* 460-463: Crypto magic number */ 00112 u_int32_t trash[3]; /* 464-475: Trash space - Do not use */ 00113 u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */ 00114 u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */ 00115 00116 /* 00117 * Minimum page size is 512. 00118 */ 00119 } BTMETA33, BTMETA; 00120 00121 /************************************************************************ 00122 HASH METADATA PAGE LAYOUT 00123 ************************************************************************/ 00124 typedef struct _hashmeta33 { 00125 #define DB_HASH_DUP 0x01 /* Duplicates. */ 00126 #define DB_HASH_SUBDB 0x02 /* Subdatabases. */ 00127 #define DB_HASH_DUPSORT 0x04 /* Duplicates are sorted. */ 00128 DBMETA dbmeta; /* 00-71: Generic meta-data page header. */ 00129 00130 u_int32_t max_bucket; /* 72-75: ID of Maximum bucket in use */ 00131 u_int32_t high_mask; /* 76-79: Modulo mask into table */ 00132 u_int32_t low_mask; /* 80-83: Modulo mask into table lower half */ 00133 u_int32_t ffactor; /* 84-87: Fill factor */ 00134 u_int32_t nelem; /* 88-91: Number of keys in hash table */ 00135 u_int32_t h_charkey; /* 92-95: Value of hash(CHARKEY) */ 00136 #define NCACHED 32 /* number of spare points */ 00137 /* 96-223: Spare pages for overflow */ 00138 u_int32_t spares[NCACHED]; 00139 u_int32_t unused[59]; /* 224-459: Unused space */ 00140 u_int32_t crypto_magic; /* 460-463: Crypto magic number */ 00141 u_int32_t trash[3]; /* 464-475: Trash space - Do not use */ 00142 u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */ 00143 u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */ 00144 00145 /* 00146 * Minimum page size is 512. 00147 */ 00148 } HMETA33, HMETA; 00149 00150 /************************************************************************ 00151 QUEUE METADATA PAGE LAYOUT 00152 ************************************************************************/ 00153 /* 00154 * QAM Meta data page structure 00155 * 00156 */ 00157 typedef struct _qmeta33 { 00158 DBMETA dbmeta; /* 00-71: Generic meta-data header. */ 00159 00160 u_int32_t first_recno; /* 72-75: First not deleted record. */ 00161 u_int32_t cur_recno; /* 76-79: Next recno to be allocated. */ 00162 u_int32_t re_len; /* 80-83: Fixed-length record length. */ 00163 u_int32_t re_pad; /* 84-87: Fixed-length record pad. */ 00164 u_int32_t rec_page; /* 88-91: Records Per Page. */ 00165 u_int32_t page_ext; /* 92-95: Pages per extent */ 00166 00167 u_int32_t unused[91]; /* 96-459: Unused space */ 00168 u_int32_t crypto_magic; /* 460-463: Crypto magic number */ 00169 u_int32_t trash[3]; /* 464-475: Trash space - Do not use */ 00170 u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */ 00171 u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */ 00172 /* 00173 * Minimum page size is 512. 00174 */ 00175 } QMETA33, QMETA; 00176 00177 /* 00178 * DBMETASIZE is a constant used by __db_file_setup and DB->verify 00179 * as a buffer which is guaranteed to be larger than any possible 00180 * metadata page size and smaller than any disk sector. 00181 */ 00182 #define DBMETASIZE 512 00183 00184 /************************************************************************ 00185 BTREE/HASH MAIN PAGE LAYOUT 00186 ************************************************************************/ 00187 /* 00188 * +-----------------------------------+ 00189 * | lsn | pgno | prev pgno | 00190 * +-----------------------------------+ 00191 * | next pgno | entries | hf offset | 00192 * +-----------------------------------+ 00193 * | level | type | chksum | 00194 * +-----------------------------------+ 00195 * | iv | index | free --> | 00196 * +-----------+-----------------------+ 00197 * | F R E E A R E A | 00198 * +-----------------------------------+ 00199 * | <-- free | item | 00200 * +-----------------------------------+ 00201 * | item | item | item | 00202 * +-----------------------------------+ 00203 * 00204 * sizeof(PAGE) == 26 bytes + possibly 20 bytes of checksum and possibly 00205 * 16 bytes of IV (+ 2 bytes for alignment), and the following indices 00206 * are guaranteed to be two-byte aligned. If we aren't doing crypto or 00207 * checksumming the bytes are reclaimed for data storage. 00208 * 00209 * For hash and btree leaf pages, index items are paired, e.g., inp[0] is the 00210 * key for inp[1]'s data. All other types of pages only contain single items. 00211 */ 00212 typedef struct __pg_chksum { 00213 u_int8_t unused[2]; /* 26-27: For alignment */ 00214 u_int8_t chksum[4]; /* 28-31: Checksum */ 00215 } PG_CHKSUM; 00216 00217 typedef struct __pg_crypto { 00218 u_int8_t unused[2]; /* 26-27: For alignment */ 00219 u_int8_t chksum[DB_MAC_KEY]; /* 28-47: Checksum */ 00220 u_int8_t iv[DB_IV_BYTES]; /* 48-63: IV */ 00221 /* !!! 00222 * Must be 16-byte aligned for crypto 00223 */ 00224 } PG_CRYPTO; 00225 00226 typedef struct _db_page { 00227 DB_LSN lsn; /* 00-07: Log sequence number. */ 00228 db_pgno_t pgno; /* 08-11: Current page number. */ 00229 db_pgno_t prev_pgno; /* 12-15: Previous page number. */ 00230 db_pgno_t next_pgno; /* 16-19: Next page number. */ 00231 db_indx_t entries; /* 20-21: Number of items on the page. */ 00232 db_indx_t hf_offset; /* 22-23: High free byte page offset. */ 00233 00234 /* 00235 * The btree levels are numbered from the leaf to the root, starting 00236 * with 1, so the leaf is level 1, its parent is level 2, and so on. 00237 * We maintain this level on all btree pages, but the only place that 00238 * we actually need it is on the root page. It would not be difficult 00239 * to hide the byte on the root page once it becomes an internal page, 00240 * so we could get this byte back if we needed it for something else. 00241 */ 00242 #define LEAFLEVEL 1 00243 #define MAXBTREELEVEL 255 00244 u_int8_t level; /* 24: Btree tree level. */ 00245 u_int8_t type; /* 25: Page type. */ 00246 } PAGE; 00247 00248 /* 00249 * With many compilers sizeof(PAGE) == 28, while SIZEOF_PAGE == 26. 00250 * We add in other things directly after the page header and need 00251 * the SIZEOF_PAGE. When giving the sizeof(), many compilers will 00252 * pad it out to the next 4-byte boundary. 00253 */ 00254 #define SIZEOF_PAGE 26 00255 /* 00256 * !!! 00257 * DB_AM_ENCRYPT always implies DB_AM_CHKSUM so that must come first. 00258 */ 00259 #define P_INP(dbp, pg) \ 00260 ((db_indx_t *)((u_int8_t *)(pg) + SIZEOF_PAGE + \ 00261 (F_ISSET((dbp), DB_AM_ENCRYPT) ? sizeof(PG_CRYPTO) : \ 00262 (F_ISSET((dbp), DB_AM_CHKSUM) ? sizeof(PG_CHKSUM) : 0)))) 00263 00264 #define P_IV(dbp, pg) \ 00265 (F_ISSET((dbp), DB_AM_ENCRYPT) ? ((u_int8_t *)(pg) + \ 00266 SIZEOF_PAGE + SSZA(PG_CRYPTO, iv)) \ 00267 : NULL) 00268 00269 #define P_CHKSUM(dbp, pg) \ 00270 (F_ISSET((dbp), DB_AM_ENCRYPT) ? ((u_int8_t *)(pg) + \ 00271 SIZEOF_PAGE + SSZA(PG_CRYPTO, chksum)) : \ 00272 (F_ISSET((dbp), DB_AM_CHKSUM) ? ((u_int8_t *)(pg) + \ 00273 SIZEOF_PAGE + SSZA(PG_CHKSUM, chksum)) \ 00274 : NULL)) 00275 00276 /* PAGE element macros. */ 00277 #define LSN(p) (((PAGE *)p)->lsn) 00278 #define PGNO(p) (((PAGE *)p)->pgno) 00279 #define PREV_PGNO(p) (((PAGE *)p)->prev_pgno) 00280 #define NEXT_PGNO(p) (((PAGE *)p)->next_pgno) 00281 #define NUM_ENT(p) (((PAGE *)p)->entries) 00282 #define HOFFSET(p) (((PAGE *)p)->hf_offset) 00283 #define LEVEL(p) (((PAGE *)p)->level) 00284 #define TYPE(p) (((PAGE *)p)->type) 00285 00286 /************************************************************************ 00287 QUEUE MAIN PAGE LAYOUT 00288 ************************************************************************/ 00289 /* 00290 * Sizes of page below. Used to reclaim space if not doing 00291 * crypto or checksumming. If you change the QPAGE below you 00292 * MUST adjust this too. 00293 */ 00294 #define QPAGE_NORMAL 28 00295 #define QPAGE_CHKSUM 48 00296 #define QPAGE_SEC 64 00297 00298 typedef struct _qpage { 00299 DB_LSN lsn; /* 00-07: Log sequence number. */ 00300 db_pgno_t pgno; /* 08-11: Current page number. */ 00301 u_int32_t unused0[3]; /* 12-23: Unused. */ 00302 u_int8_t unused1[1]; /* 24: Unused. */ 00303 u_int8_t type; /* 25: Page type. */ 00304 u_int8_t unused2[2]; /* 26-27: Unused. */ 00305 u_int8_t chksum[DB_MAC_KEY]; /* 28-47: Checksum */ 00306 u_int8_t iv[DB_IV_BYTES]; /* 48-63: IV */ 00307 } QPAGE; 00308 00309 #define QPAGE_SZ(dbp) \ 00310 (F_ISSET((dbp), DB_AM_ENCRYPT) ? QPAGE_SEC : \ 00311 F_ISSET((dbp), DB_AM_CHKSUM) ? QPAGE_CHKSUM : QPAGE_NORMAL) 00312 /* 00313 * !!! 00314 * The next_pgno and prev_pgno fields are not maintained for btree and recno 00315 * internal pages. Doing so only provides a minor performance improvement, 00316 * it's hard to do when deleting internal pages, and it increases the chance 00317 * of deadlock during deletes and splits because we have to re-link pages at 00318 * more than the leaf level. 00319 * 00320 * !!! 00321 * The btree/recno access method needs db_recno_t bytes of space on the root 00322 * page to specify how many records are stored in the tree. (The alternative 00323 * is to store the number of records in the meta-data page, which will create 00324 * a second hot spot in trees being actively modified, or recalculate it from 00325 * the BINTERNAL fields on each access.) Overload the PREV_PGNO field. 00326 */ 00327 #define RE_NREC(p) \ 00328 ((TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) ? PREV_PGNO(p) : \ 00329 (db_pgno_t)(TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 : NUM_ENT(p))) 00330 #define RE_NREC_ADJ(p, adj) \ 00331 PREV_PGNO(p) += adj; 00332 #define RE_NREC_SET(p, num) \ 00333 PREV_PGNO(p) = (num); 00334 00335 /* 00336 * Initialize a page. 00337 * 00338 * !!! 00339 * Don't modify the page's LSN, code depends on it being unchanged after a 00340 * P_INIT call. 00341 */ 00342 #define P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do { \ 00343 PGNO(pg) = (n); \ 00344 PREV_PGNO(pg) = (pg_prev); \ 00345 NEXT_PGNO(pg) = (pg_next); \ 00346 NUM_ENT(pg) = (0); \ 00347 HOFFSET(pg) = (db_indx_t)(pg_size); \ 00348 LEVEL(pg) = (btl); \ 00349 TYPE(pg) = (pg_type); \ 00350 } while (0) 00351 00352 /* Page header length (offset to first index). */ 00353 #define P_OVERHEAD(dbp) P_TO_UINT16(P_INP(dbp, 0)) 00354 00355 /* First free byte. */ 00356 #define LOFFSET(dbp, pg) \ 00357 (P_OVERHEAD(dbp) + NUM_ENT(pg) * sizeof(db_indx_t)) 00358 00359 /* Free space on a regular page. */ 00360 #define P_FREESPACE(dbp, pg) (HOFFSET(pg) - LOFFSET(dbp, pg)) 00361 00362 /* Get a pointer to the bytes at a specific index. */ 00363 #define P_ENTRY(dbp, pg, indx) ((u_int8_t *)pg + P_INP(dbp, pg)[indx]) 00364 00365 /************************************************************************ 00366 OVERFLOW PAGE LAYOUT 00367 ************************************************************************/ 00368 00369 /* 00370 * Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which 00371 * store a page number (the first page of the overflow item) and a length 00372 * (the total length of the overflow item). The overflow item consists of 00373 * some number of overflow pages, linked by the next_pgno field of the page. 00374 * A next_pgno field of PGNO_INVALID flags the end of the overflow item. 00375 * 00376 * Overflow page overloads: 00377 * The amount of overflow data stored on each page is stored in the 00378 * hf_offset field. 00379 * 00380 * The implementation reference counts overflow items as it's possible 00381 * for them to be promoted onto btree internal pages. The reference 00382 * count is stored in the entries field. 00383 */ 00384 #define OV_LEN(p) (((PAGE *)p)->hf_offset) 00385 #define OV_REF(p) (((PAGE *)p)->entries) 00386 00387 /* Maximum number of bytes that you can put on an overflow page. */ 00388 #define P_MAXSPACE(dbp, psize) ((psize) - P_OVERHEAD(dbp)) 00389 00390 /* Free space on an overflow page. */ 00391 #define P_OVFLSPACE(dbp, psize, pg) (P_MAXSPACE(dbp, psize) - HOFFSET(pg)) 00392 00393 /************************************************************************ 00394 HASH PAGE LAYOUT 00395 ************************************************************************/ 00396 00397 /* Each index references a group of bytes on the page. */ 00398 #define H_KEYDATA 1 /* Key/data item. */ 00399 #define H_DUPLICATE 2 /* Duplicate key/data item. */ 00400 #define H_OFFPAGE 3 /* Overflow key/data item. */ 00401 #define H_OFFDUP 4 /* Overflow page of duplicates. */ 00402 00403 /* 00404 * !!! 00405 * Items on hash pages are (potentially) unaligned, so we can never cast the 00406 * (page + offset) pointer to an HKEYDATA, HOFFPAGE or HOFFDUP structure, as 00407 * we do with B+tree on-page structures. Because we frequently want the type 00408 * field, it requires no alignment, and it's in the same location in all three 00409 * structures, there's a pair of macros. 00410 */ 00411 #define HPAGE_PTYPE(p) (*(u_int8_t *)p) 00412 #define HPAGE_TYPE(dbp, pg, indx) (*P_ENTRY(dbp, pg, indx)) 00413 00414 /* 00415 * The first and second types are H_KEYDATA and H_DUPLICATE, represented 00416 * by the HKEYDATA structure: 00417 * 00418 * +-----------------------------------+ 00419 * | type | key/data ... | 00420 * +-----------------------------------+ 00421 * 00422 * For duplicates, the data field encodes duplicate elements in the data 00423 * field: 00424 * 00425 * +---------------------------------------------------------------+ 00426 * | type | len1 | element1 | len1 | len2 | element2 | len2 | 00427 * +---------------------------------------------------------------+ 00428 * 00429 * Thus, by keeping track of the offset in the element, we can do both 00430 * backward and forward traversal. 00431 */ 00432 typedef struct _hkeydata { 00433 u_int8_t type; /* 00: Page type. */ 00434 u_int8_t data[1]; /* Variable length key/data item. */ 00435 } HKEYDATA; 00436 #define HKEYDATA_DATA(p) (((u_int8_t *)p) + SSZA(HKEYDATA, data)) 00437 00438 /* 00439 * The length of any HKEYDATA item. Note that indx is an element index, 00440 * not a PAIR index. 00441 */ 00442 #define LEN_HITEM(dbp, pg, pgsize, indx) \ 00443 (((indx) == 0 ? (pgsize) : \ 00444 (P_INP(dbp, pg)[(indx) - 1])) - (P_INP(dbp, pg)[indx])) 00445 00446 #define LEN_HKEYDATA(dbp, pg, psize, indx) \ 00447 (db_indx_t)(LEN_HITEM(dbp, pg, psize, indx) - HKEYDATA_SIZE(0)) 00448 00449 /* 00450 * Page space required to add a new HKEYDATA item to the page, with and 00451 * without the index value. 00452 */ 00453 #define HKEYDATA_SIZE(len) \ 00454 ((len) + SSZA(HKEYDATA, data)) 00455 #define HKEYDATA_PSIZE(len) \ 00456 (HKEYDATA_SIZE(len) + sizeof(db_indx_t)) 00457 00458 /* Put a HKEYDATA item at the location referenced by a page entry. */ 00459 #define PUT_HKEYDATA(pe, kd, len, type) { \ 00460 ((HKEYDATA *)pe)->type = type; \ 00461 memcpy((u_int8_t *)pe + sizeof(u_int8_t), kd, len); \ 00462 } 00463 00464 /* 00465 * Macros the describe the page layout in terms of key-data pairs. 00466 */ 00467 #define H_NUMPAIRS(pg) (NUM_ENT(pg) / 2) 00468 #define H_KEYINDEX(indx) (indx) 00469 #define H_DATAINDEX(indx) ((indx) + 1) 00470 #define H_PAIRKEY(dbp, pg, indx) P_ENTRY(dbp, pg, H_KEYINDEX(indx)) 00471 #define H_PAIRDATA(dbp, pg, indx) P_ENTRY(dbp, pg, H_DATAINDEX(indx)) 00472 #define H_PAIRSIZE(dbp, pg, psize, indx) \ 00473 (LEN_HITEM(dbp, pg, psize, H_KEYINDEX(indx)) + \ 00474 LEN_HITEM(dbp, pg, psize, H_DATAINDEX(indx))) 00475 #define LEN_HDATA(dbp, p, psize, indx) \ 00476 LEN_HKEYDATA(dbp, p, psize, H_DATAINDEX(indx)) 00477 #define LEN_HKEY(dbp, p, psize, indx) \ 00478 LEN_HKEYDATA(dbp, p, psize, H_KEYINDEX(indx)) 00479 00480 /* 00481 * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure: 00482 */ 00483 typedef struct _hoffpage { 00484 u_int8_t type; /* 00: Page type and delete flag. */ 00485 u_int8_t unused[3]; /* 01-03: Padding, unused. */ 00486 db_pgno_t pgno; /* 04-07: Offpage page number. */ 00487 u_int32_t tlen; /* 08-11: Total length of item. */ 00488 } HOFFPAGE; 00489 00490 #define HOFFPAGE_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, pgno)) 00491 #define HOFFPAGE_TLEN(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, tlen)) 00492 00493 /* 00494 * Page space required to add a new HOFFPAGE item to the page, with and 00495 * without the index value. 00496 */ 00497 #define HOFFPAGE_SIZE (sizeof(HOFFPAGE)) 00498 #define HOFFPAGE_PSIZE (HOFFPAGE_SIZE + sizeof(db_indx_t)) 00499 00500 /* 00501 * The fourth type is H_OFFDUP represented by the HOFFDUP structure: 00502 */ 00503 typedef struct _hoffdup { 00504 u_int8_t type; /* 00: Page type and delete flag. */ 00505 u_int8_t unused[3]; /* 01-03: Padding, unused. */ 00506 db_pgno_t pgno; /* 04-07: Offpage page number. */ 00507 } HOFFDUP; 00508 #define HOFFDUP_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFDUP, pgno)) 00509 00510 /* 00511 * Page space required to add a new HOFFDUP item to the page, with and 00512 * without the index value. 00513 */ 00514 #define HOFFDUP_SIZE (sizeof(HOFFDUP)) 00515 00516 /************************************************************************ 00517 BTREE PAGE LAYOUT 00518 ************************************************************************/ 00519 00520 /* Each index references a group of bytes on the page. */ 00521 #define B_KEYDATA 1 /* Key/data item. */ 00522 #define B_DUPLICATE 2 /* Duplicate key/data item. */ 00523 #define B_OVERFLOW 3 /* Overflow key/data item. */ 00524 00525 /* 00526 * We have to store a deleted entry flag in the page. The reason is complex, 00527 * but the simple version is that we can't delete on-page items referenced by 00528 * a cursor -- the return order of subsequent insertions might be wrong. The 00529 * delete flag is an overload of the top bit of the type byte. 00530 */ 00531 #define B_DELETE (0x80) 00532 #define B_DCLR(t) (t) &= ~B_DELETE 00533 #define B_DSET(t) (t) |= B_DELETE 00534 #define B_DISSET(t) ((t) & B_DELETE) 00535 00536 #define B_TYPE(t) ((t) & ~B_DELETE) 00537 #define B_TSET(t, type, deleted) { \ 00538 (t) = (type); \ 00539 if (deleted) \ 00540 B_DSET(t); \ 00541 } 00542 00543 /* 00544 * The first type is B_KEYDATA, represented by the BKEYDATA structure: 00545 */ 00546 typedef struct _bkeydata { 00547 db_indx_t len; /* 00-01: Key/data item length. */ 00548 u_int8_t type; /* 02: Page type AND DELETE FLAG. */ 00549 u_int8_t data[1]; /* Variable length key/data item. */ 00550 } BKEYDATA; 00551 00552 /* Get a BKEYDATA item for a specific index. */ 00553 #define GET_BKEYDATA(dbp, pg, indx) \ 00554 ((BKEYDATA *)P_ENTRY(dbp, pg, indx)) 00555 00556 /* 00557 * Page space required to add a new BKEYDATA item to the page, with and 00558 * without the index value. The (u_int16_t) cast avoids warnings: DB_ALIGN 00559 * casts to uintmax_t, the cast converts it to a small integral type so we 00560 * don't get complaints when we assign the final result to an integral type 00561 * smaller than uintmax_t. 00562 */ 00563 #define BKEYDATA_SIZE(len) \ 00564 (u_int16_t)DB_ALIGN((len) + SSZA(BKEYDATA, data), sizeof(u_int32_t)) 00565 #define BKEYDATA_PSIZE(len) \ 00566 (BKEYDATA_SIZE(len) + sizeof(db_indx_t)) 00567 00568 /* 00569 * The second and third types are B_DUPLICATE and B_OVERFLOW, represented 00570 * by the BOVERFLOW structure. 00571 */ 00572 typedef struct _boverflow { 00573 db_indx_t unused1; /* 00-01: Padding, unused. */ 00574 u_int8_t type; /* 02: Page type AND DELETE FLAG. */ 00575 u_int8_t unused2; /* 03: Padding, unused. */ 00576 db_pgno_t pgno; /* 04-07: Next page number. */ 00577 u_int32_t tlen; /* 08-11: Total length of item. */ 00578 } BOVERFLOW; 00579 00580 /* Get a BOVERFLOW item for a specific index. */ 00581 #define GET_BOVERFLOW(dbp, pg, indx) \ 00582 ((BOVERFLOW *)P_ENTRY(dbp, pg, indx)) 00583 00584 /* 00585 * Page space required to add a new BOVERFLOW item to the page, with and 00586 * without the index value. 00587 */ 00588 #define BOVERFLOW_SIZE \ 00589 ((u_int16_t)DB_ALIGN(sizeof(BOVERFLOW), sizeof(u_int32_t))) 00590 #define BOVERFLOW_PSIZE \ 00591 (BOVERFLOW_SIZE + sizeof(db_indx_t)) 00592 00593 #define BITEM_SIZE(bk) \ 00594 (B_TYPE((bk)->type) != B_KEYDATA ? BOVERFLOW_SIZE : \ 00595 BKEYDATA_SIZE((bk)->len)) 00596 00597 #define BITEM_PSIZE(bk) \ 00598 (B_TYPE((bk)->type) != B_KEYDATA ? BOVERFLOW_PSIZE : \ 00599 BKEYDATA_PSIZE((bk)->len)) 00600 00601 /* 00602 * Btree leaf and hash page layouts group indices in sets of two, one for the 00603 * key and one for the data. Everything else does it in sets of one to save 00604 * space. Use the following macros so that it's real obvious what's going on. 00605 */ 00606 #define O_INDX 1 00607 #define P_INDX 2 00608 00609 /************************************************************************ 00610 BTREE INTERNAL PAGE LAYOUT 00611 ************************************************************************/ 00612 00613 /* 00614 * Btree internal entry. 00615 */ 00616 typedef struct _binternal { 00617 db_indx_t len; /* 00-01: Key/data item length. */ 00618 u_int8_t type; /* 02: Page type AND DELETE FLAG. */ 00619 u_int8_t unused; /* 03: Padding, unused. */ 00620 db_pgno_t pgno; /* 04-07: Page number of referenced page. */ 00621 db_recno_t nrecs; /* 08-11: Subtree record count. */ 00622 u_int8_t data[1]; /* Variable length key item. */ 00623 } BINTERNAL; 00624 00625 /* Get a BINTERNAL item for a specific index. */ 00626 #define GET_BINTERNAL(dbp, pg, indx) \ 00627 ((BINTERNAL *)P_ENTRY(dbp, pg, indx)) 00628 00629 /* 00630 * Page space required to add a new BINTERNAL item to the page, with and 00631 * without the index value. 00632 */ 00633 #define BINTERNAL_SIZE(len) \ 00634 (u_int16_t)DB_ALIGN((len) + SSZA(BINTERNAL, data), sizeof(u_int32_t)) 00635 #define BINTERNAL_PSIZE(len) \ 00636 (BINTERNAL_SIZE(len) + sizeof(db_indx_t)) 00637 00638 /************************************************************************ 00639 RECNO INTERNAL PAGE LAYOUT 00640 ************************************************************************/ 00641 00642 /* 00643 * The recno internal entry. 00644 */ 00645 typedef struct _rinternal { 00646 db_pgno_t pgno; /* 00-03: Page number of referenced page. */ 00647 db_recno_t nrecs; /* 04-07: Subtree record count. */ 00648 } RINTERNAL; 00649 00650 /* Get a RINTERNAL item for a specific index. */ 00651 #define GET_RINTERNAL(dbp, pg, indx) \ 00652 ((RINTERNAL *)P_ENTRY(dbp, pg, indx)) 00653 00654 /* 00655 * Page space required to add a new RINTERNAL item to the page, with and 00656 * without the index value. 00657 */ 00658 #define RINTERNAL_SIZE \ 00659 (u_int16_t)DB_ALIGN(sizeof(RINTERNAL), sizeof(u_int32_t)) 00660 #define RINTERNAL_PSIZE \ 00661 (RINTERNAL_SIZE + sizeof(db_indx_t)) 00662 00663 struct pglist { 00664 db_pgno_t pgno; 00665 DB_LSN lsn; 00666 }; 00667 00668 #if defined(__cplusplus) 00669 } 00670 #endif 00671 00672 #endif /* !_DB_PAGE_H_ */