backends/quartz/btree.h

Go to the documentation of this file.
00001 /* btree.h: Btree implementation
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002,2003,2004,2005,2006,2008 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00019  * USA
00020  */
00021 
00022 #ifndef OM_HGUARD_BTREE_H
00023 #define OM_HGUARD_BTREE_H
00024 
00025 #include <xapian/visibility.h>
00026 
00027 #include <algorithm>
00028 #include <string>
00029 using std::string;
00030 
00031 #include "quartz_types.h"
00032 #include "btree_base.h"
00033 #include "btree_util.h"
00034 #include "bcursor.h"
00035 #include "noreturn.h"
00036 
00042 const string::size_type BTREE_MAX_KEY_LEN = 252;
00043 
00044 // FIXME: This named constant probably isn't used everywhere it should be...
00045 #define BYTES_PER_BLOCK_NUMBER 4
00046 
00047 /*  The B-tree blocks have a number of internal lengths and offsets held in 1, 2
00048     or 4 bytes. To make the coding a little clearer,
00049        we use  for
00050        ------  ---
00051        K1      the 1 byte length of key
00052        I2      the 2 byte length of an item (key-tag pair)
00053        D2      the 2 byte offset to the item from the directory
00054        C2      the 2 byte counter that ends each key and begins each tag
00055 */
00056 
00057 #define K1 1
00058 #define I2 2
00059 #define D2 2
00060 #define C2 2
00061 
00062 /*  and when getting K1 or setting D2, we use GETK, SETD defined as: */
00063 
00064 #define GETK(p, c)    GETINT1(p, c)
00065 #define SETD(p, c, x) SETINT2(p, c, x)
00066 
00067 /* A B-tree comprises (a) a base file, containing essential information (Block
00068    size, number of the B-tree root block etc), (b) a bitmap, the Nth bit of the
00069    bitmap being set if the Nth block of the B-tree file is in use, and (c) a
00070    file DB containing the B-tree proper. The DB file is divided into a sequence
00071    of equal sized blocks, numbered 0, 1, 2 ... some of which are free, some in
00072    use. Those in use are arranged in a tree.
00073 
00074    Each block, b, has a structure like this:
00075 
00076      R L M T D o1 o2 o3 ... oN <gap> [item] .. [item] .. [item] ...
00077      <---------- D ----------> <-M->
00078 
00079    And then,
00080 
00081    R = REVISION(b)  is the revision number the B-tree had when the block was
00082                     written into the DB file.
00083    L = GET_LEVEL(b) is the level of the block, which is the number of levels
00084                     towards the root of the B-tree structure. So leaf blocks
00085                     have level 0 and the one root block has the highest level
00086                     equal to the number of levels in the B-tree.
00087    M = MAX_FREE(b)  is the size of the gap between the end of the directory and
00088                     the first item of data. (It is not necessarily the maximum
00089                     size among the bits of space that are free, but I can't
00090                     think of a better name.)
00091    T = TOTAL_FREE(b)is the total amount of free space left in b.
00092    D = DIR_END(b)   gives the offset to the end of the directory.
00093 
00094    o1, o2 ... oN are a directory of offsets to the N items held in the block.
00095    The items are key-tag pairs, and as they occur in the directory are ordered
00096    by the keys.
00097 
00098    An item has this form:
00099 
00100            I K key x C tag
00101              <--K-->
00102            <------I------>
00103 
00104    A long tag presented through the API is split up into C tags small enough to
00105    be accommodated in the blocks of the B-tree. The key is extended to include
00106    a counter, x, which runs from 1 to C. The key is preceded by a length, K,
00107    and the whole item with a length, I, as depicted above.
00108 
00109    Here are the corresponding definitions:
00110 
00111 */
00112 
00113 #define REVISION(b)      static_cast<unsigned int>(get_int4(b, 0))
00114 #define GET_LEVEL(b)     GETINT1(b, 4)
00115 #define MAX_FREE(b)      GETINT2(b, 5)
00116 #define TOTAL_FREE(b)    GETINT2(b, 7)
00117 #define DIR_END(b)       GETINT2(b, 9)
00118 #define DIR_START        11
00119 
00120 #define SET_REVISION(b, x)      set_int4(b, 0, x)
00121 #define SET_LEVEL(b, x)         SETINT1(b, 4, x)
00122 #define SET_MAX_FREE(b, x)      SETINT2(b, 5, x)
00123 #define SET_TOTAL_FREE(b, x)    SETINT2(b, 7, x)
00124 #define SET_DIR_END(b, x)       SETINT2(b, 9, x)
00125 
00128 #define SEQ_START_POINT (-10)
00129 
00132 #define BLOCK_CAPACITY 4
00133 
00134 
00135 
00136 /* if you've been reading the comments from the top, the next four procedures
00137    will not cause any headaches.
00138 
00139    Recall that item has this form:
00140 
00141            i k
00142            | |
00143            I K key x C tag
00144              <--K-->
00145            <------I------>
00146 
00147 
00148    item_of(p, c) returns i, the address of the item at block address p,
00149    directory offset c,
00150 
00151    component_of(p, c) returns the number marked 'x' above,
00152 
00153    components_of(p, c) returns the number marked 'C' above,
00154 */
00155 
00156 class XAPIAN_VISIBILITY_DEFAULT Key {
00157     const byte *p;
00158 public:
00159     explicit Key(const byte * p_) : p(p_) { }
00160     const byte * get_address() const { return p; }
00161     void read(string * key) const {
00162         key->assign(reinterpret_cast<const char *>(p + K1), length());
00163     }
00164     bool operator==(Key key2) const;
00165     bool operator!=(Key key2) const { return !(*this == key2); }
00166     bool operator<(Key key2) const;
00167     bool operator>=(Key key2) const { return !(*this < key2); }
00168     bool operator>(Key key2) const { return key2 < *this; }
00169     bool operator<=(Key key2) const { return !(key2 < *this); }
00170     int length() const {
00171         return GETK(p, 0) - C2 - K1;
00172     }
00173     char operator[](size_t i) const {
00174         return p[i + K1];
00175     }
00176 };
00177 
00178 // Item_wr wants to be "Item with non-const p and more methods" - we can't
00179 // achieve that nicely with inheritance, so we use a template base class.
00180 template <class T> class Item_base {
00181 protected:
00182     T p;
00183 public:
00184     /* Item from block address and offset to item pointer */
00185     Item_base(T p_, int c) : p(p_ + GETINT2(p_, c)) { }
00186     Item_base(T p_) : p(p_) { }
00187     T get_address() const { return p; }
00188     int size() const { return GETINT2(p, 0); } /* I in diagram above */
00189     int component_of() const {
00190         return GETINT2(p, GETK(p, I2) + I2 - C2);
00191     }
00192     int components_of() const {
00193         return GETINT2(p, GETK(p, I2) + I2);
00194     }
00195     Key key() const { return Key(p + I2); }
00196     void append_chunk(string * tag) const {
00197         /* number of bytes to extract from current component */
00198         int cd = GETK(p, I2) + I2 + C2;
00199         int l = size() - cd;
00200         tag->append(reinterpret_cast<const char *>(p + cd), l);
00201     }
00205     uint4 block_given_by() const {
00206         return get_int4(p, size() - BYTES_PER_BLOCK_NUMBER);
00207     }
00208 };
00209 
00210 class Item : public Item_base<const byte *> {
00211 public:
00212     /* Item from block address and offset to item pointer */
00213     Item(const byte * p_, int c) : Item_base<const byte *>(p_, c) { }
00214     Item(const byte * p_) : Item_base<const byte *>(p_) { }
00215 };
00216 
00217 class Item_wr : public Item_base<byte *> {
00218     void set_key_len(int x) { SETINT1(p, I2, x); }
00219 public:
00220     /* Item_wr from block address and offset to item pointer */
00221     Item_wr(byte * p_, int c) : Item_base<byte *>(p_, c) { }
00222     Item_wr(byte * p_) : Item_base<byte *>(p_) { }
00223     void set_component_of(int i) {
00224         SETINT2(p, GETK(p, I2) + I2 - C2, i);
00225     }
00226     void set_components_of(int m) {
00227         SETINT2(p, GETK(p, I2) + I2, m);
00228     }
00229     // Takes size as we may be truncating newkey.
00230     void set_key_and_block(Key newkey, int truncate_size, uint4 n) {
00231         int i = truncate_size;
00232         // Read the length now because we may be copying the key over itself.
00233         // FIXME that's stupid!  sort this out
00234         int newkey_len = newkey.length();
00235         int newsize = I2 + K1 + i + C2;
00236         // Item size (4 since tag contains block number)
00237         SETINT2(p, 0, newsize + 4);
00238         // Key size
00239         SETINT1(p, I2, newsize - I2);
00240         // Copy the main part of the key, possibly truncating.
00241         memmove(p + I2 + K1, newkey.get_address() + K1, i);
00242         // Copy the count part.
00243         memmove(p + I2 + K1 + i, newkey.get_address() + K1 + newkey_len, C2);
00244         // Set tag contents to block number
00245 //      set_block_given_by(n);
00246         set_int4(p, newsize, n);
00247     }
00248 
00252     void set_block_given_by(uint4 n) {
00253         set_int4(p, size() - BYTES_PER_BLOCK_NUMBER, n);
00254     }
00255     void set_size(int l) { SETINT2(p, 0, l); }
00258     void form_null_key(uint4 n) {
00259         set_int4(p, I2 + K1, n);
00260         set_key_len(K1);        /* null key */
00261         set_size(I2 + K1 + 4);  /* total length */
00262     }
00263     void form_key(const string & key_) {
00264         Assert(key_.length() <= BTREE_MAX_KEY_LEN);
00265 
00266         // This just so it doesn't fall over horribly in non-debug builds.
00267         string::size_type key_len = std::min(key_.length(), BTREE_MAX_KEY_LEN);
00268 
00269         set_key_len(key_len + K1 + C2);
00270         memmove(p + I2 + K1, key_.data(), key_len);
00271         set_component_of(1);
00272     }
00273     // FIXME passing cd here is icky
00274     void set_tag(int cd, const char *start, int len) {
00275         memmove(p + cd, start, len);
00276         set_size(cd + len);
00277     }
00278     void fake_root_item() {
00279         set_key_len(K1 + C2);   // null key length
00280         set_size(I2 + K1 + 2 * C2);   // length of the item
00281         set_component_of(1);
00282         set_components_of(1);
00283     }
00284 };
00285 
00286 // Allow for BTREE_CURSOR_LEVELS levels in the B-tree.
00287 // With 10, overflow is practically impossible
00288 // FIXME: but we want it to be completely impossible...
00289 #define BTREE_CURSOR_LEVELS 10
00290 
00311 class XAPIAN_VISIBILITY_DEFAULT Btree {
00312     friend class Bcursor; /* Should probably fix this. */
00313     private:
00315         Btree(const Btree &);
00316 
00318         Btree & operator=(const Btree &);
00319 
00320     public:
00333         Btree(string path_, bool readonly_);
00334 
00340         ~Btree();
00341 
00345         void close();
00346 
00349         bool exists() const;
00350 
00359         void open();
00360 
00378         bool open(quartz_revision_number_t revision_);
00379 
00393         void commit(quartz_revision_number_t revision);
00394 
00400         void cancel();
00401 
00415         bool get_exact_entry(const string & key, string & tag) const;
00416 
00429         bool find_tag(const string &key, string * tag) const;
00430 
00433         void read_tag(Cursor * C_, string *tag) const;
00434 
00453         bool add(const string &key, string tag);
00454 
00472         bool del(const string &key);
00473 
00491         void create(unsigned int blocksize);
00492 
00493         void set_full_compaction(bool parity);
00494 
00505         quartz_revision_number_t get_latest_revision_number() const {
00506             return latest_revision_number;
00507         }
00508 
00517         quartz_revision_number_t get_open_revision_number() const {
00518             return revision_number;
00519         }
00520 
00527         quartz_tablesize_t get_entry_count() const {
00528             return item_count;
00529         }
00530 
00536         Bcursor * cursor_get() const;
00537 
00543         bool is_modified() const { return Btree_modified; }
00544 
00550         void set_max_item_size(size_t block_capacity) {
00551             if (block_capacity > 4) block_capacity = 4;
00552             max_item_size = (block_size - DIR_START - block_capacity * D2)
00553                 / block_capacity;
00554         }
00555 
00556     protected:
00557 
00562         bool do_open_to_read(bool revision_supplied, quartz_revision_number_t revision_);
00563 
00568         bool do_open_to_write(bool revision_supplied, quartz_revision_number_t revision_);
00569         bool basic_open(bool revision_supplied, quartz_revision_number_t revision);
00570 
00571         bool find(Cursor *) const;
00572         int delete_kt();
00573         void read_block(uint4 n, byte *p) const;
00574         void write_block(uint4 n, const byte *p) const;
00575         XAPIAN_NORETURN(void set_overwritten() const);
00576         void block_to_cursor(Cursor *C_, int j, uint4 n) const;
00577         void alter();
00578         void compact(byte *p);
00579         void enter_key(int j, Key prevkey, Key newkey);
00580         int mid_point(byte *p);
00581         void add_item_to_block(byte *p, Item_wr kt, int c);
00582         void add_item(Item_wr kt, int j);
00583         void delete_item(int j, bool repeatedly);
00584         int add_kt(bool found);
00585         void read_root();
00586         void split_root(uint4 split_n);
00587         void form_key(const string & key) const;
00588 
00590         quartz_revision_number_t revision_number;
00591 
00593         uint4 item_count;
00594 
00596         unsigned int block_size;
00597 
00601         mutable quartz_revision_number_t latest_revision_number;
00602 
00607         mutable bool both_bases;
00608 
00610         int base_letter;
00611 
00616         bool faked_root_block;
00617 
00621         bool sequential;
00622 
00624         int handle;
00625 
00627         int level;
00628 
00630         uint4 root;
00631 
00633         mutable Item_wr kt;
00634 
00636         byte * buffer;
00637 
00639         Btree_base base;
00640 
00642         char other_base_letter;
00643 
00645         string name;
00646 
00650         int seq_count;
00651 
00653         uint4 changed_n;
00654 
00657         int changed_c;
00658 
00660         size_t max_item_size;
00661 
00663         mutable bool Btree_modified;
00664 
00666         bool full_compaction;
00667 
00669         bool writable;
00670 
00672         bool dont_close_handle;
00673 
00674         /* B-tree navigation functions */
00675         bool prev(Cursor *C_, int j) const { return (this->*prev_ptr)(C_, j); }
00676         bool next(Cursor *C_, int j) const { return (this->*next_ptr)(C_, j); }
00677 
00678         bool (Btree::* prev_ptr)(Cursor *, int) const;
00679         bool (Btree::* next_ptr)(Cursor *, int) const;
00680 
00681         bool prev_default(Cursor *C_, int j) const;
00682         bool next_default(Cursor *C_, int j) const;
00683 
00684         bool prev_for_sequential(Cursor *C_, int dummy) const;
00685         bool next_for_sequential(Cursor *C_, int dummy) const;
00686 
00687         static int find_in_block(const byte * p, Key key, bool leaf, int c);
00688 
00692         static uint4 block_given_by(const byte * p, int c);
00693 
00694         mutable Cursor C[BTREE_CURSOR_LEVELS];
00695 
00701         byte * split_p;
00702 
00703         /* Debugging methods */
00704 //      void report_block_full(int m, int n, const byte * p);
00705 };
00706 
00707 #endif /* OM_HGUARD_BTREE_H */

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.