backends/flint/flint_table.h

Go to the documentation of this file.
00001 /* flint_table.h: Btree implementation
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002,2003,2004,2005,2006,2007,2008 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00019  * USA
00020  */
00021 
00022 #ifndef OM_HGUARD_FLINT_TABLE_H
00023 #define OM_HGUARD_FLINT_TABLE_H
00024 
00025 #include <xapian/error.h>
00026 #include <xapian/visibility.h>
00027 
00028 #include <algorithm>
00029 #include <string>
00030 using std::string;
00031 
00032 #include "flint_types.h"
00033 #include "flint_btreebase.h"
00034 #include "flint_btreeutil.h"
00035 #include "flint_cursor.h"
00036 #include "noreturn.h"
00037 
00038 #include "stringutils.h"
00039 #include "utils.h"
00040 
00041 #include <zlib.h>
00042 
00043 #define DONT_COMPRESS -1
00044 
00050 #define FLINT_BTREE_MAX_KEY_LEN 252
00051 
00052 // FIXME: This named constant probably isn't used everywhere it should be...
00053 #define BYTES_PER_BLOCK_NUMBER 4
00054 
00055 /*  The B-tree blocks have a number of internal lengths and offsets held in 1, 2
00056     or 4 bytes. To make the coding a little clearer,
00057        we use  for
00058        ------  ---
00059        K1      the 1 byte length of key
00060        I2      the 2 byte length of an item (key-tag pair)
00061        D2      the 2 byte offset to the item from the directory
00062        C2      the 2 byte counter that ends each key and begins each tag
00063 */
00064 
00065 #define K1 1
00066 #define I2 2
00067 #define D2 2
00068 #define C2 2
00069 
00070 /*  and when getting K1 or setting D2, we use getK, setD defined as: */
00071 
00072 #define getK(p, c)    getint1(p, c)
00073 #define setD(p, c, x) setint2(p, c, x)
00074 
00075 /* if you've been reading the comments from the top, the next four procedures
00076    will not cause any headaches.
00077 
00078    Recall that item has this form:
00079 
00080            i k
00081            | |
00082            I K key x C tag
00083              <--K-->
00084            <------I------>
00085 
00086 
00087    item_of(p, c) returns i, the address of the item at block address p,
00088    directory offset c,
00089 
00090    component_of(p, c) returns the number marked 'x' above,
00091 
00092    components_of(p, c) returns the number marked 'C' above,
00093 */
00094 
00095 class XAPIAN_VISIBILITY_DEFAULT Key_ {
00096     const byte *p;
00097 public:
00098     explicit Key_(const byte * p_) : p(p_) { }
00099     const byte * get_address() const { return p; }
00100     void read(string * key) const {
00101         key->assign(reinterpret_cast<const char *>(p + K1), length());
00102     }
00103     bool operator==(Key_ key2) const;
00104     bool operator!=(Key_ key2) const { return !(*this == key2); }
00105     bool operator<(Key_ key2) const;
00106     bool operator>=(Key_ key2) const { return !(*this < key2); }
00107     bool operator>(Key_ key2) const { return key2 < *this; }
00108     bool operator<=(Key_ key2) const { return !(key2 < *this); }
00109     int length() const {
00110         return getK(p, 0) - C2 - K1;
00111     }
00112     char operator[](size_t i) const {
00113         return p[i + K1];
00114     }
00115 };
00116 
00117 // Item_wr_ wants to be "Item_ with non-const p and more methods" - we can't
00118 // achieve that nicely with inheritance, so we use a template base class.
00119 template <class T> class Item_base_ {
00120 protected:
00121     T p;
00122 public:
00123     /* Item_ from block address and offset to item pointer */
00124     Item_base_(T p_, int c) : p(p_ + getint2(p_, c)) { }
00125     Item_base_(T p_) : p(p_) { }
00126     T get_address() const { return p; }
00127     int size() const { return getint2(p, 0) & 0x7fff; } /* I in diagram above */
00128     bool get_compressed() const { return *p & 0x80; }
00129     int component_of() const {
00130         return getint2(p, getK(p, I2) + I2 - C2);
00131     }
00132     int components_of() const {
00133         return getint2(p, getK(p, I2) + I2);
00134     }
00135     Key_ key() const { return Key_(p + I2); }
00136     void append_chunk(string * tag) const {
00137         /* number of bytes to extract from current component */
00138         int cd = getK(p, I2) + I2 + C2;
00139         int l = size() - cd;
00140         tag->append(reinterpret_cast<const char *>(p + cd), l);
00141     }
00145     uint4 block_given_by() const {
00146         return getint4(p, size() - BYTES_PER_BLOCK_NUMBER);
00147     }
00148 };
00149 
00150 class Item_ : public Item_base_<const byte *> {
00151 public:
00152     /* Item_ from block address and offset to item pointer */
00153     Item_(const byte * p_, int c) : Item_base_<const byte *>(p_, c) { }
00154     Item_(const byte * p_) : Item_base_<const byte *>(p_) { }
00155 };
00156 
00157 class Item_wr_ : public Item_base_<byte *> {
00158     void set_key_len(int x) { setint1(p, I2, x); }
00159 public:
00160     /* Item_wr_ from block address and offset to item pointer */
00161     Item_wr_(byte * p_, int c) : Item_base_<byte *>(p_, c) { }
00162     Item_wr_(byte * p_) : Item_base_<byte *>(p_) { }
00163     void set_component_of(int i) {
00164         setint2(p, getK(p, I2) + I2 - C2, i);
00165     }
00166     void set_components_of(int m) {
00167         setint2(p, getK(p, I2) + I2, m);
00168     }
00169     // Takes size as we may be truncating newkey.
00170     void set_key_and_block(Key_ newkey, int truncate_size, uint4 n) {
00171         int i = truncate_size;
00172         // Read the length now because we may be copying the key over itself.
00173         // FIXME that's stupid!  sort this out
00174         int newkey_len = newkey.length();
00175         int newsize = I2 + K1 + i + C2;
00176         // Item size (4 since tag contains block number)
00177         setint2(p, 0, newsize + 4);
00178         // Key size
00179         setint1(p, I2, newsize - I2);
00180         // Copy the main part of the key, possibly truncating.
00181         memmove(p + I2 + K1, newkey.get_address() + K1, i);
00182         // Copy the count part.
00183         memmove(p + I2 + K1 + i, newkey.get_address() + K1 + newkey_len, C2);
00184         // Set tag contents to block number
00185 //      set_block_given_by(n);
00186         setint4(p, newsize, n);
00187     }
00188 
00192     void set_block_given_by(uint4 n) {
00193         setint4(p, size() - BYTES_PER_BLOCK_NUMBER, n);
00194     }
00195     void set_size(int l) { setint2(p, 0, l); }
00198     void form_null_key(uint4 n) {
00199         setint4(p, I2 + K1, n);
00200         set_key_len(K1);        /* null key */
00201         set_size(I2 + K1 + 4);  /* total length */
00202     }
00203     void form_key(const string & key_) {
00204         string::size_type key_len = key_.length();
00205         if (key_len > FLINT_BTREE_MAX_KEY_LEN) {
00206             // We check term length when a term is added to a document but
00207             // flint doubles zero bytes, so this can still happen for terms
00208             // which contain one or more zero bytes.
00209             string msg("Key too long: length was ");
00210             msg += om_tostring(key_len);
00211             msg += " bytes, maximum length of a key is "
00212                    STRINGIZE(FLINT_BTREE_MAX_KEY_LEN) " bytes";
00213             throw Xapian::InvalidArgumentError(msg);
00214         }
00215 
00216         set_key_len(key_len + K1 + C2);
00217         memmove(p + I2 + K1, key_.data(), key_len);
00218         set_component_of(1);
00219     }
00220     // FIXME passing cd here is icky
00221     void set_tag(int cd, const char *start, int len, bool compressed) {
00222         memmove(p + cd, start, len);
00223         set_size(cd + len);
00224         if (compressed) *p |= 0x80;
00225     }
00226     void fake_root_item() {
00227         set_key_len(K1 + C2);   // null key length
00228         set_size(I2 + K1 + 2 * C2);   // length of the item
00229         set_component_of(1);
00230         set_components_of(1);
00231     }
00232 };
00233 
00234 // Allow for BTREE_CURSOR_LEVELS levels in the B-tree.
00235 // With 10, overflow is practically impossible
00236 // FIXME: but we want it to be completely impossible...
00237 #define BTREE_CURSOR_LEVELS 10
00238 
00259 class XAPIAN_VISIBILITY_DEFAULT FlintTable {
00260     friend class FlintCursor; /* Should probably fix this. */
00261     private:
00263         FlintTable(const FlintTable &);
00264 
00266         FlintTable & operator=(const FlintTable &);
00267 
00268     public:
00284         FlintTable(string path_, bool readonly_,
00285                    int compress_strategy_ = DONT_COMPRESS, bool lazy = false);
00286 
00292         ~FlintTable();
00293 
00299         void close(bool permanent=false);
00300 
00303         bool exists() const;
00304 
00313         void open();
00314 
00332         bool open(flint_revision_number_t revision_);
00333 
00347         void commit(flint_revision_number_t revision);
00348 
00354         void cancel();
00355 
00369         bool get_exact_entry(const string & key, string & tag) const;
00370 
00382         bool key_exists(const string &key) const;
00383 
00396         bool find_tag(const string &key, string * tag) const;
00397 
00406         bool read_tag(Cursor_ * C_, string *tag, bool keep_compressed) const;
00407 
00429         bool add(const string &key, string tag, bool already_compressed = false);
00430 
00448         bool del(const string &key);
00449 
00451         void erase();
00452 
00457         void set_block_size(unsigned int block_size_);
00458 
00461         unsigned int get_block_size() const { return block_size; }
00462 
00486         void create_and_open(unsigned int blocksize);
00487 
00488         void set_full_compaction(bool parity);
00489 
00500         flint_revision_number_t get_latest_revision_number() const {
00501             return latest_revision_number;
00502         }
00503 
00512         flint_revision_number_t get_open_revision_number() const {
00513             return revision_number;
00514         }
00515 
00522         flint_tablesize_t get_entry_count() const {
00523             return item_count;
00524         }
00525 
00531         FlintCursor * cursor_get() const;
00532 
00538         bool is_modified() const { return Btree_modified; }
00539 
00545         void set_max_item_size(size_t block_capacity) {
00546             if (block_capacity > 4) block_capacity = 4;
00547             max_item_size = (block_size - 11 /*DIR_START*/ - block_capacity * D2)
00548                 / block_capacity;
00549         }
00550 
00551     protected:
00552 
00557         bool do_open_to_read(bool revision_supplied, flint_revision_number_t revision_);
00558 
00563         bool do_open_to_write(bool revision_supplied,
00564                               flint_revision_number_t revision_,
00565                               bool create_db = false);
00566         bool basic_open(bool revision_supplied, flint_revision_number_t revision);
00567 
00568         bool find(Cursor_ *) const;
00569         int delete_kt();
00570         void read_block(uint4 n, byte *p) const;
00571         void write_block(uint4 n, const byte *p) const;
00572         XAPIAN_NORETURN(void set_overwritten() const);
00573         void block_to_cursor(Cursor_ *C_, int j, uint4 n) const;
00574         void alter();
00575         void compact(byte *p);
00576         void enter_key(int j, Key_ prevkey, Key_ newkey);
00577         int mid_point(byte *p);
00578         void add_item_to_block(byte *p, Item_wr_ kt, int c);
00579         void add_item(Item_wr_ kt, int j);
00580         void delete_item(int j, bool repeatedly);
00581         int add_kt(bool found);
00582         void read_root();
00583         void split_root(uint4 split_n);
00584         void form_key(const string & key) const;
00585 
00586         char other_base_letter() const {
00587            return (base_letter == 'A') ? 'B' : 'A';
00588         }
00589 
00591         flint_revision_number_t revision_number;
00592 
00594         uint4 item_count;
00595 
00597         unsigned int block_size;
00598 
00602         mutable flint_revision_number_t latest_revision_number;
00603 
00608         mutable bool both_bases;
00609 
00611         char base_letter;
00612 
00617         bool faked_root_block;
00618 
00622         bool sequential;
00623 
00625         int handle;
00626 
00628         int level;
00629 
00631         uint4 root;
00632 
00634         mutable Item_wr_ kt;
00635 
00637         byte * buffer;
00638 
00640         FlintTable_base base;
00641 
00643         string name;
00644 
00648         int seq_count;
00649 
00651         uint4 changed_n;
00652 
00655         int changed_c;
00656 
00658         size_t max_item_size;
00659 
00661         mutable bool Btree_modified;
00662 
00664         bool full_compaction;
00665 
00667         bool writable;
00668 
00669         /* B-tree navigation functions */
00670         bool prev(Cursor_ *C_, int j) const {
00671             if (sequential) return prev_for_sequential(C_, j);
00672             return prev_default(C_, j);
00673         }
00674 
00675         bool next(Cursor_ *C_, int j) const {
00676             if (sequential) return next_for_sequential(C_, j);
00677             return next_default(C_, j);
00678         }
00679 
00680         /* Default implementations. */
00681         bool prev_default(Cursor_ *C_, int j) const;
00682         bool next_default(Cursor_ *C_, int j) const;
00683 
00684         /* Implementations for sequential mode. */
00685         bool prev_for_sequential(Cursor_ *C_, int dummy) const;
00686         bool next_for_sequential(Cursor_ *C_, int dummy) const;
00687 
00688         static int find_in_block(const byte * p, Key_ key, bool leaf, int c);
00689 
00693         static uint4 block_given_by(const byte * p, int c);
00694 
00695         mutable Cursor_ C[BTREE_CURSOR_LEVELS];
00696 
00702         byte * split_p;
00703 
00706         int compress_strategy;
00707 
00709         bool lazy;
00710 
00711         /* Debugging methods */
00712 //      void report_block_full(int m, int n, const byte * p);
00713 };
00714 
00715 #endif /* OM_HGUARD_FLINT_TABLE_H */

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.