backends/quartz/btree_base.cc

Go to the documentation of this file.
00001 /* btree_base.cc: Btree base file implementation
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002,2003,2004,2006 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00019  * USA
00020  */
00021 
00022 #include <config.h>
00023 
00024 #include "safeerrno.h"
00025 
00026 #include "btree_base.h"
00027 #include "quartz_utils.h"
00028 #include "utils.h"
00029 #include <xapian/error.h>
00030 #include "omassert.h"
00031 
00032 #include <limits.h>
00033 
00034 #include <algorithm>
00035 
00036 using namespace std;
00037 
00038 /************ Base file parameters ************/
00039 
00066 #define CURR_FORMAT 5U
00067 
00068 Btree_base::Btree_base()
00069         : revision(0),
00070           block_size(0),
00071           root(0),
00072           level(0),
00073           bit_map_size(0),
00074           item_count(0),
00075           last_block(0),
00076           have_fakeroot(false),
00077           sequential(false),
00078           bit_map_low(0),
00079           bit_map0(0),
00080           bit_map(0)
00081 {
00082 }
00083 
00084 Btree_base::Btree_base(const string &name_, char ch)
00085         : revision(0),
00086           block_size(0),
00087           root(0),
00088           level(0),
00089           bit_map_size(0),
00090           item_count(0),
00091           last_block(0),
00092           have_fakeroot(false),
00093           sequential(false),
00094           bit_map_low(0),
00095           bit_map0(0),
00096           bit_map(0)
00097 {
00098     string err_msg;
00099     if (!read(name_, ch, err_msg)) {
00100         throw Xapian::DatabaseOpeningError(err_msg);
00101     }
00102 }
00103 
00104 Btree_base::Btree_base(const Btree_base &other)
00105         : revision(other.revision),
00106           block_size(other.block_size),
00107           root(other.root),
00108           level(other.level),
00109           bit_map_size(other.bit_map_size),
00110           item_count(other.item_count),
00111           last_block(other.last_block),
00112           have_fakeroot(other.have_fakeroot),
00113           sequential(other.sequential),
00114           bit_map_low(other.bit_map_low),
00115           bit_map0(0),
00116           bit_map(0)
00117 {
00118     try {
00119         bit_map0 = new byte[bit_map_size];
00120         bit_map = new byte[bit_map_size];
00121 
00122         memcpy(bit_map0, other.bit_map0, bit_map_size);
00123         memcpy(bit_map, other.bit_map, bit_map_size);
00124     } catch (...) {
00125         delete [] bit_map0;
00126         delete [] bit_map;
00127     }
00128 }
00129 
00130 void
00131 Btree_base::swap(Btree_base &other)
00132 {
00133     std::swap(revision, other.revision);
00134     std::swap(block_size, other.block_size);
00135     std::swap(root, other.root);
00136     std::swap(level, other.level);
00137     std::swap(bit_map_size, other.bit_map_size);
00138     std::swap(item_count, other.item_count);
00139     std::swap(last_block, other.last_block);
00140     std::swap(have_fakeroot, other.have_fakeroot);
00141     std::swap(sequential, other.sequential);
00142     std::swap(bit_map_low, other.bit_map_low);
00143     std::swap(bit_map0, other.bit_map0);
00144     std::swap(bit_map, other.bit_map);
00145 }
00146 
00147 Btree_base::~Btree_base()
00148 {
00149     delete [] bit_map;
00150     bit_map = 0;
00151     delete [] bit_map0;
00152     bit_map0 = 0;
00153 }
00154 
00155 bool
00156 Btree_base::do_unpack_uint(const char **start, const char *end,
00157                            uint4 *dest, string &err_msg, 
00158                            const string &basename,
00159                            const char *varname)
00160 {
00161     bool result = unpack_uint(start, end, dest);
00162     if (!result) {
00163         err_msg += "Unable to read " + string(varname) + " from " +
00164                     basename + "\n";
00165     }
00166     return result;
00167 }
00168 
00169 #define DO_UNPACK_UINT_ERRCHECK(start, end, var) \
00170 do { \
00171     if (!do_unpack_uint(start, end, &var, err_msg, basename, #var)) { \
00172         return false; \
00173     } \
00174 } while(0)
00175 
00176 /* How much of the base file to read at the first go (in bytes).
00177  * This should be big enough that the base file without bitmap
00178  * will fit in to this size with no problem.  Other than that
00179  * it's fairly arbitrary, but shouldn't be big enough to cause
00180  * serious memory problems!
00181  */
00182 #define REASONABLE_BASE_SIZE 1024
00183 
00184 bool
00185 Btree_base::read(const string & name, char ch, string &err_msg)
00186 {
00187     string basename = name + "base" + ch;
00188     int h = sys_open_to_read_no_except(basename);
00189     fdcloser closefd(h);
00190     if (h == -1) {
00191         err_msg += "Couldn't open " + basename + ": " + strerror(errno) + "\n";
00192         return false;
00193     }
00194     string buf(sys_read_all_bytes(h, REASONABLE_BASE_SIZE));
00195 
00196     const char *start = buf.data();
00197     const char *end = start + buf.length();
00198 
00199     DO_UNPACK_UINT_ERRCHECK(&start, end, revision);
00200     uint4 format;
00201     DO_UNPACK_UINT_ERRCHECK(&start, end, format);
00202     if (format != CURR_FORMAT) {
00203         err_msg += "Bad base file format " + om_tostring(format) + " in " +
00204                     basename + "\n";
00205         return false;
00206     }
00207     DO_UNPACK_UINT_ERRCHECK(&start, end, block_size);
00208     DO_UNPACK_UINT_ERRCHECK(&start, end, root);
00209     DO_UNPACK_UINT_ERRCHECK(&start, end, level);
00210     DO_UNPACK_UINT_ERRCHECK(&start, end, bit_map_size);
00211     /* Now that we know the size of the bit map, (possibly)
00212      * read in more data from the base file in case
00213      * REASONABLE_BASE_SIZE is too small.  We need to update
00214      * start and end.
00215      */
00216     {
00217         unsigned long start_offset = start - buf.data();
00218         buf += sys_read_all_bytes(h, bit_map_size);
00219         start = buf.data() + start_offset;
00220         end = buf.data() + buf.length();
00221     }
00222     DO_UNPACK_UINT_ERRCHECK(&start, end, item_count);
00223     DO_UNPACK_UINT_ERRCHECK(&start, end, last_block);
00224     uint4 have_fakeroot_;
00225     DO_UNPACK_UINT_ERRCHECK(&start, end, have_fakeroot_);
00226     have_fakeroot = have_fakeroot_;
00227 
00228     uint4 sequential_;
00229     DO_UNPACK_UINT_ERRCHECK(&start, end, sequential_);
00230     sequential = sequential_;
00231 
00232     if (have_fakeroot && !sequential) {
00233         sequential = true; // FIXME : work out why we need this...
00234         /*
00235         err_msg += "Corrupt base file, `" + basename + "':\n"
00236                 "sequential must be set whenever have_fakeroot is set.\n" +
00237                 "sequential=" + (sequential?"true":"false") +
00238                 ", have_fakeroot=" + (have_fakeroot?"true":"false") + "\n";
00239         return false;
00240         */
00241     }
00242 
00243     uint4 revision2;
00244     DO_UNPACK_UINT_ERRCHECK(&start, end, revision2);
00245     if (revision != revision2) {
00246         err_msg += "Revision number mismatch in " +
00247                 basename + ": " +
00248                 om_tostring(revision) + " vs " + om_tostring(revision2) + "\n";
00249         return false;
00250     }
00251 
00252     /* Read the bitmap */
00253     if (uint4(end - start) <= bit_map_size) {
00254         err_msg += "Not enough space for bitmap in base file " +
00255                 basename + "\n";
00256         return false;
00257     }
00258 
00259     /* It's ok to delete a zero pointer */
00260     delete [] bit_map0;
00261     bit_map0 = 0;
00262     delete [] bit_map;
00263     bit_map = 0;
00264 
00265     bit_map0 = new byte[bit_map_size];
00266     bit_map = new byte[bit_map_size];
00267     memcpy(bit_map0, start, bit_map_size);
00268     memcpy(bit_map, bit_map0, bit_map_size);
00269     start += bit_map_size;
00270 
00271     uint4 revision3;
00272     if (!unpack_uint(&start, end, &revision3)) {
00273         err_msg += "Couldn't read revision2 from base file " +
00274         basename + "\n";
00275         return false;
00276     }
00277 
00278     if (revision != revision3) {
00279         err_msg += "Revision number mismatch in " +
00280                 basename + ": " +
00281                 om_tostring(revision) + " vs " + om_tostring(revision2) + "\n";
00282         return false;
00283     }
00284 
00285     if (start != end) {
00286         err_msg += "Junk at end of base file " + basename + "\n";
00287         return false;
00288     }
00289 
00290     return true;
00291 }
00292 
00293 void
00294 Btree_base::write_to_file(const string &filename)
00295 {
00296     calculate_last_block();
00297 
00298     string buf;
00299     buf += pack_uint(revision);
00300     buf += pack_uint(CURR_FORMAT);
00301     buf += pack_uint(block_size);
00302     buf += pack_uint(static_cast<uint4>(root));
00303     buf += pack_uint(static_cast<uint4>(level));
00304     buf += pack_uint(static_cast<uint4>(bit_map_size));
00305     buf += pack_uint(static_cast<uint4>(item_count));
00306     buf += pack_uint(static_cast<uint4>(last_block));
00307     buf += pack_uint(have_fakeroot);
00308     buf += pack_uint(sequential);
00309     buf += pack_uint(revision);
00310     if (bit_map_size) {
00311         buf.append(reinterpret_cast<const char *>(bit_map), bit_map_size);
00312     }
00313     buf += pack_uint(revision);  // REVISION2
00314 
00315     int h = sys_open_to_write(filename);
00316     fdcloser closefd(h);
00317 
00318     sys_write_string(h, buf);
00319     sys_flush(h);
00320 }
00321 
00322 /*
00323    block_free_at_start(B, n) is true iff (if and only if) block n was free at
00324    the start of the transaction on the B-tree.
00325 */
00326 
00327 bool
00328 Btree_base::block_free_at_start(uint4 n) const
00329 {
00330     size_t i = n / CHAR_BIT;
00331     int bit = 0x1 << n % CHAR_BIT;
00332     return (bit_map0[i] & bit) == 0;
00333 }
00334 
00335 /* free_block(B, n) causes block n to be marked free in the bit map.
00336    B->bit_map_low is the lowest byte in the bit map known to have a free bit
00337    set. Searching starts from there when looking for a free block.
00338 */
00339 
00340 void
00341 Btree_base::free_block(uint4 n)
00342 {
00343     uint4 i = n / CHAR_BIT;
00344     int bit = 0x1 << n % CHAR_BIT;
00345     bit_map[i] &= ~ bit;
00346 
00347     if (bit_map_low > i)
00348         if ((bit_map0[i] & bit) == 0) /* free at start */
00349             bit_map_low = i;
00350 }
00351 
00352 /* extend(B) increases the size of the two bit maps in an obvious way.
00353    The bitmap file grows and shrinks as the DB file grows and shrinks in
00354    internal usage. But the DB file itself does not reduce in size, no matter
00355    how many blocks are freed.
00356 */
00357 
00358 #define BIT_MAP_INC 1000
00359     /* increase the bit map by this number of bytes if it overflows */
00360 
00361 void
00362 Btree_base::extend_bit_map()
00363 {
00364     int n = bit_map_size + BIT_MAP_INC;
00365     byte *new_bit_map0 = 0;
00366     byte *new_bit_map = 0;
00367 
00368     try {
00369         new_bit_map0 = new byte[n];
00370         new_bit_map = new byte[n];
00371 
00372         memcpy(new_bit_map0, bit_map0, bit_map_size);
00373         memset(new_bit_map0 + bit_map_size, 0, n - bit_map_size);
00374         
00375         memcpy(new_bit_map, bit_map, bit_map_size);
00376         memset(new_bit_map + bit_map_size, 0, n - bit_map_size);
00377     } catch (...) {
00378         delete [] new_bit_map0;
00379         delete [] new_bit_map;
00380         throw;
00381     }
00382     delete [] bit_map0;
00383     bit_map0 = new_bit_map0;
00384     delete [] bit_map;
00385     bit_map = new_bit_map;
00386     bit_map_size = n;
00387 }
00388 
00389 /* next_free_block(B) returns the number of the next available free block in
00390    the bitmap, marking it as 'in use' before returning. More precisely, we get
00391    a block that is both free now (in bit_map) and was free at the beginning of
00392    the transaction on the B-tree (in bit_map0).
00393 
00394    Starting at bit_map_low we go up byte at a time until we find a byte with a
00395    free (zero) bit, and then go up that byte bit at a time. If the bit map has
00396    no free bits it is extended so that it will have.
00397 */
00398 
00399 uint4
00400 Btree_base::next_free_block()
00401 {
00402     uint4 i;
00403     int x;
00404     for (i = bit_map_low;; i++)
00405     {  
00406         if (i >= bit_map_size) {
00407             extend_bit_map();
00408         }
00409         x = bit_map0[i] | bit_map[i];
00410         if (x != UCHAR_MAX) break;
00411     }
00412     uint4 n = i * CHAR_BIT;
00413     int d = 0x1;
00414     while ((x & d) != 0) { d <<= 1; n++; }
00415     bit_map[i] |= d;   /* set as 'in use' */
00416     bit_map_low = i;
00417     if (n > last_block) {
00418         last_block = n;
00419     }
00420     return n;
00421 }
00422 
00423 bool
00424 Btree_base::block_free_now(uint4 n)
00425 {
00426     uint4 i = n / CHAR_BIT;
00427     int bit = 0x1 << n % CHAR_BIT;
00428     return (bit_map[i] & bit) == 0;
00429 }
00430 
00431 void
00432 Btree_base::calculate_last_block()
00433 {
00434     if (bit_map_size == 0) {
00435         last_block = 0;
00436         return;
00437     }
00438     int i = bit_map_size - 1;
00439     while (bit_map[i] == 0 && i > 0) {
00440         i--;
00441     }
00442     bit_map_size = i + 1;
00443 
00444     int x = bit_map[i];
00445 
00446     /* Check for when there are no blocks */
00447     if (x == 0) {
00448         last_block = 0;
00449         return;
00450     }
00451     uint4 n = (i + 1) * CHAR_BIT - 1;
00452     int d = 0x1 << (CHAR_BIT - 1);
00453     while ((x & d) == 0) { d >>= 1; n--; }
00454 
00455     last_block = n;
00456 }
00457 
00458 bool
00459 Btree_base::is_empty() const
00460 {
00461     for (uint4 i = 0; i < bit_map_size; i++) {
00462         if (bit_map[i] != 0) {
00463             return false;
00464         }
00465     }
00466     return true;
00467 }
00468 
00469 void
00470 Btree_base::clear_bit_map()
00471 {
00472     memset(bit_map, 0, bit_map_size);
00473 }
00474 
00475 // We've commited, so "bitmap at start" needs to be reset to the current bitmap.
00476 void
00477 Btree_base::commit()
00478 {
00479     memcpy(bit_map0, bit_map, bit_map_size);
00480     bit_map_low = 0;
00481 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.