backends/flint/flint_btreebase.cc

Go to the documentation of this file.
00001 /* flint_btreebase.cc: Btree base file implementation
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002,2003,2004,2006 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00019  * USA
00020  */
00021 
00022 #include <config.h>
00023 
00024 #include "safeerrno.h"
00025 #ifdef __WIN32__
00026 # include "msvc_posix_wrapper.h"
00027 #endif
00028 
00029 #include "flint_btreebase.h"
00030 #include "flint_io.h"
00031 #include "flint_utils.h"
00032 #include "utils.h"
00033 #include <xapian/error.h>
00034 #include "omassert.h"
00035 
00036 #include <limits.h>
00037 
00038 #include <algorithm>
00039 #include <cstring>
00040 
00041 using namespace std;
00042 
00046 class fdcloser {
00047     public:
00048         fdcloser(int fd_) : fd(fd_) {}
00049         ~fdcloser() {
00050             if (fd >= 0) {
00051                 (void)close(fd);
00052             }
00053         }
00054     private:
00055         int fd;
00056 };
00057 
00058 /************ Base file parameters ************/
00059 
00086 #define CURR_FORMAT 5U
00087 
00088 FlintTable_base::FlintTable_base()
00089         : revision(0),
00090           block_size(0),
00091           root(0),
00092           level(0),
00093           bit_map_size(0),
00094           item_count(0),
00095           last_block(0),
00096           have_fakeroot(false),
00097           sequential(false),
00098           bit_map_low(0),
00099           bit_map0(0),
00100           bit_map(0)
00101 {
00102 }
00103 
00104 FlintTable_base::FlintTable_base(const FlintTable_base &other)
00105         : revision(other.revision),
00106           block_size(other.block_size),
00107           root(other.root),
00108           level(other.level),
00109           bit_map_size(other.bit_map_size),
00110           item_count(other.item_count),
00111           last_block(other.last_block),
00112           have_fakeroot(other.have_fakeroot),
00113           sequential(other.sequential),
00114           bit_map_low(other.bit_map_low),
00115           bit_map0(0),
00116           bit_map(0)
00117 {
00118     try {
00119         bit_map0 = new byte[bit_map_size];
00120         bit_map = new byte[bit_map_size];
00121 
00122         memcpy(bit_map0, other.bit_map0, bit_map_size);
00123         memcpy(bit_map, other.bit_map, bit_map_size);
00124     } catch (...) {
00125         delete [] bit_map0;
00126         delete [] bit_map;
00127     }
00128 }
00129 
00130 void
00131 FlintTable_base::swap(FlintTable_base &other)
00132 {
00133     std::swap(revision, other.revision);
00134     std::swap(block_size, other.block_size);
00135     std::swap(root, other.root);
00136     std::swap(level, other.level);
00137     std::swap(bit_map_size, other.bit_map_size);
00138     std::swap(item_count, other.item_count);
00139     std::swap(last_block, other.last_block);
00140     std::swap(have_fakeroot, other.have_fakeroot);
00141     std::swap(sequential, other.sequential);
00142     std::swap(bit_map_low, other.bit_map_low);
00143     std::swap(bit_map0, other.bit_map0);
00144     std::swap(bit_map, other.bit_map);
00145 }
00146 
00147 FlintTable_base::~FlintTable_base()
00148 {
00149     delete [] bit_map;
00150     bit_map = 0;
00151     delete [] bit_map0;
00152     bit_map0 = 0;
00153 }
00154 
00155 bool
00156 FlintTable_base::do_unpack_uint(const char **start, const char *end,
00157                            uint4 *dest, string &err_msg, 
00158                            const string &basename,
00159                            const char *varname)
00160 {
00161     bool result = unpack_uint(start, end, dest);
00162     if (!result) {
00163         err_msg += "Unable to read " + string(varname) + " from " +
00164                     basename + "\n";
00165     }
00166     return result;
00167 }
00168 
00169 #define DO_UNPACK_UINT_ERRCHECK(start, end, var) \
00170 do { \
00171     if (!do_unpack_uint(start, end, &var, err_msg, basename, #var)) { \
00172         return false; \
00173     } \
00174 } while(0)
00175 
00176 /* How much of the base file to read at the first go (in bytes).
00177  * This must be big enough that the base file without bitmap
00178  * will fit in to this size with no problem.  Other than that
00179  * it's fairly arbitrary, but shouldn't be big enough to cause
00180  * serious memory problems!
00181  */
00182 #define REASONABLE_BASE_SIZE 1024
00183 
00184 bool
00185 FlintTable_base::read(const string & name, char ch, string &err_msg)
00186 {
00187     string basename = name + "base" + ch;
00188 #ifdef __WIN32__
00189     int h = msvc_posix_open(basename.c_str(), O_RDONLY | O_BINARY);
00190 #else
00191     int h = open(basename.c_str(), O_RDONLY | O_BINARY);
00192 #endif
00193 
00194     if (h == -1) {
00195         err_msg += "Couldn't open " + basename + ": " + strerror(errno) + "\n";
00196         return false;
00197     }
00198     fdcloser closefd(h);
00199 
00200     char buf[REASONABLE_BASE_SIZE];
00201 
00202     const char *start = buf;
00203     const char *end = buf + flint_io_read(h, buf, REASONABLE_BASE_SIZE, 0);
00204 
00205     DO_UNPACK_UINT_ERRCHECK(&start, end, revision);
00206     uint4 format;
00207     DO_UNPACK_UINT_ERRCHECK(&start, end, format);
00208     if (format != CURR_FORMAT) {
00209         err_msg += "Bad base file format " + om_tostring(format) + " in " +
00210                     basename + "\n";
00211         return false;
00212     }
00213     DO_UNPACK_UINT_ERRCHECK(&start, end, block_size);
00214     DO_UNPACK_UINT_ERRCHECK(&start, end, root);
00215     DO_UNPACK_UINT_ERRCHECK(&start, end, level);
00216     DO_UNPACK_UINT_ERRCHECK(&start, end, bit_map_size);
00217     DO_UNPACK_UINT_ERRCHECK(&start, end, item_count);
00218     DO_UNPACK_UINT_ERRCHECK(&start, end, last_block);
00219     uint4 have_fakeroot_;
00220     DO_UNPACK_UINT_ERRCHECK(&start, end, have_fakeroot_);
00221     have_fakeroot = have_fakeroot_;
00222 
00223     uint4 sequential_;
00224     DO_UNPACK_UINT_ERRCHECK(&start, end, sequential_);
00225     sequential = sequential_;
00226 
00227     if (have_fakeroot && !sequential) {
00228         sequential = true; // FIXME : work out why we need this...
00229         /*
00230         err_msg += "Corrupt base file, `" + basename + "':\n"
00231                 "sequential must be set whenever have_fakeroot is set.\n" +
00232                 "sequential=" + (sequential?"true":"false") +
00233                 ", have_fakeroot=" + (have_fakeroot?"true":"false") + "\n";
00234         return false;
00235         */
00236     }
00237 
00238     uint4 revision2;
00239     DO_UNPACK_UINT_ERRCHECK(&start, end, revision2);
00240     if (revision != revision2) {
00241         err_msg += "Revision number mismatch in " +
00242                 basename + ": " +
00243                 om_tostring(revision) + " vs " + om_tostring(revision2) + "\n";
00244         return false;
00245     }
00246 
00247     /* It's ok to delete a zero pointer */
00248     delete [] bit_map0;
00249     bit_map0 = 0;
00250     delete [] bit_map;
00251     bit_map = 0;
00252 
00253     bit_map0 = new byte[bit_map_size];
00254     bit_map = new byte[bit_map_size];
00255 
00256     size_t n = end - start;
00257     if (n < bit_map_size) {
00258         memcpy(bit_map0, start, n);
00259         (void)flint_io_read(h, reinterpret_cast<char *>(bit_map0) + n,
00260                             bit_map_size - n, bit_map_size - n);
00261         n = 0;
00262     } else {
00263         memcpy(bit_map0, start, bit_map_size);
00264         n -= bit_map_size;
00265         if (n) memmove(buf, start + bit_map_size, n);
00266     }
00267     memcpy(bit_map, bit_map0, bit_map_size);
00268 
00269     start = buf;
00270     end = buf + n;
00271     end += flint_io_read(h, buf + n, REASONABLE_BASE_SIZE - n, 0);
00272 
00273     uint4 revision3;
00274     if (!unpack_uint(&start, end, &revision3)) {
00275         err_msg += "Couldn't read revision3 from base file " +
00276         basename + "\n";
00277         return false;
00278     }
00279 
00280     if (revision != revision3) {
00281         err_msg += "Revision number mismatch in " +
00282                 basename + ": " +
00283                 om_tostring(revision) + " vs " + om_tostring(revision3) + "\n";
00284         return false;
00285     }
00286 
00287     if (start != end) {
00288         err_msg += "Junk at end of base file " + basename + "\n";
00289         return false;
00290     }
00291 
00292     return true;
00293 }
00294 
00295 void
00296 FlintTable_base::write_to_file(const string &filename)
00297 {
00298     calculate_last_block();
00299 
00300     string buf;
00301     buf += pack_uint(revision);
00302     buf += pack_uint(CURR_FORMAT);
00303     buf += pack_uint(block_size);
00304     buf += pack_uint(static_cast<uint4>(root));
00305     buf += pack_uint(static_cast<uint4>(level));
00306     buf += pack_uint(static_cast<uint4>(bit_map_size));
00307     buf += pack_uint(static_cast<uint4>(item_count));
00308     buf += pack_uint(static_cast<uint4>(last_block));
00309     buf += pack_uint(have_fakeroot);
00310     buf += pack_uint(sequential);
00311     buf += pack_uint(revision);
00312     if (bit_map_size > 0) {
00313         buf.append(reinterpret_cast<const char *>(bit_map), bit_map_size);
00314     }
00315     buf += pack_uint(revision);  // REVISION2
00316 
00317 #ifdef __WIN32__
00318     int h = msvc_posix_open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY);
00319 #else
00320     int h = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
00321 #endif
00322     if (h < 0) {
00323         string message = string("Couldn't open base ")
00324                 + filename + " to write: " + strerror(errno);
00325         throw Xapian::DatabaseOpeningError(message);
00326     }
00327     fdcloser closefd(h);
00328 
00329     flint_io_write(h, buf.data(), buf.size());
00330     flint_io_sync(h);
00331 }
00332 
00333 /*
00334    block_free_at_start(B, n) is true iff (if and only if) block n was free at
00335    the start of the transaction on the B-tree.
00336 */
00337 
00338 bool
00339 FlintTable_base::block_free_at_start(uint4 n) const
00340 {
00341     size_t i = n / CHAR_BIT;
00342     int bit = 0x1 << n % CHAR_BIT;
00343     return (bit_map0[i] & bit) == 0;
00344 }
00345 
00346 /* free_block(B, n) causes block n to be marked free in the bit map.
00347    B->bit_map_low is the lowest byte in the bit map known to have a free bit
00348    set. Searching starts from there when looking for a free block.
00349 */
00350 
00351 void
00352 FlintTable_base::free_block(uint4 n)
00353 {
00354     uint4 i = n / CHAR_BIT;
00355     int bit = 0x1 << n % CHAR_BIT;
00356     bit_map[i] &= ~ bit;
00357 
00358     if (bit_map_low > i)
00359         if ((bit_map0[i] & bit) == 0) /* free at start */
00360             bit_map_low = i;
00361 }
00362 
00363 /* extend(B) increases the size of the two bit maps in an obvious way.
00364    The bitmap file grows and shrinks as the DB file grows and shrinks in
00365    internal usage. But the DB file itself does not reduce in size, no matter
00366    how many blocks are freed.
00367 */
00368 
00369 #define BIT_MAP_INC 1000
00370     /* increase the bit map by this number of bytes if it overflows */
00371 
00372 void
00373 FlintTable_base::extend_bit_map()
00374 {
00375     int n = bit_map_size + BIT_MAP_INC;
00376     byte *new_bit_map0 = 0;
00377     byte *new_bit_map = 0;
00378 
00379     try {
00380         new_bit_map0 = new byte[n];
00381         new_bit_map = new byte[n];
00382 
00383         memcpy(new_bit_map0, bit_map0, bit_map_size);
00384         memset(new_bit_map0 + bit_map_size, 0, n - bit_map_size);
00385         
00386         memcpy(new_bit_map, bit_map, bit_map_size);
00387         memset(new_bit_map + bit_map_size, 0, n - bit_map_size);
00388     } catch (...) {
00389         delete [] new_bit_map0;
00390         delete [] new_bit_map;
00391         throw;
00392     }
00393     delete [] bit_map0;
00394     bit_map0 = new_bit_map0;
00395     delete [] bit_map;
00396     bit_map = new_bit_map;
00397     bit_map_size = n;
00398 }
00399 
00400 /* next_free_block(B) returns the number of the next available free block in
00401    the bitmap, marking it as 'in use' before returning. More precisely, we get
00402    a block that is both free now (in bit_map) and was free at the beginning of
00403    the transaction on the B-tree (in bit_map0).
00404 
00405    Starting at bit_map_low we go up byte at a time until we find a byte with a
00406    free (zero) bit, and then go up that byte bit at a time. If the bit map has
00407    no free bits it is extended so that it will have.
00408 */
00409 
00410 uint4
00411 FlintTable_base::next_free_block()
00412 {
00413     uint4 i;
00414     int x;
00415     for (i = bit_map_low;; i++) {
00416         if (i >= bit_map_size) {
00417             extend_bit_map();
00418         }
00419         x = bit_map0[i] | bit_map[i];
00420         if (x != UCHAR_MAX) break;
00421     }
00422     uint4 n = i * CHAR_BIT;
00423     int d = 0x1;
00424     while ((x & d) != 0) { d <<= 1; n++; }
00425     bit_map[i] |= d;   /* set as 'in use' */
00426     bit_map_low = i;
00427     if (n > last_block) {
00428         last_block = n;
00429     }
00430     return n;
00431 }
00432 
00433 bool
00434 FlintTable_base::block_free_now(uint4 n)
00435 {
00436     uint4 i = n / CHAR_BIT;
00437     int bit = 0x1 << n % CHAR_BIT;
00438     return (bit_map[i] & bit) == 0;
00439 }
00440 
00441 void
00442 FlintTable_base::calculate_last_block()
00443 {
00444     if (bit_map_size == 0) {
00445         last_block = 0;
00446         return;
00447     }
00448     int i = bit_map_size - 1;
00449     while (bit_map[i] == 0 && i > 0) {
00450         i--;
00451     }
00452     bit_map_size = i + 1;
00453 
00454     int x = bit_map[i];
00455 
00456     /* Check for when there are no blocks */
00457     if (x == 0) {
00458         last_block = 0;
00459         return;
00460     }
00461     uint4 n = (i + 1) * CHAR_BIT - 1;
00462     int d = 0x1 << (CHAR_BIT - 1);
00463     while ((x & d) == 0) { d >>= 1; n--; }
00464 
00465     last_block = n;
00466 }
00467 
00468 bool
00469 FlintTable_base::is_empty() const
00470 {
00471     for (uint4 i = 0; i < bit_map_size; i++) {
00472         if (bit_map[i] != 0) {
00473             return false;
00474         }
00475     }
00476     return true;
00477 }
00478 
00479 void
00480 FlintTable_base::clear_bit_map()
00481 {
00482     memset(bit_map, 0, bit_map_size);
00483 }
00484 
00485 // We've commited, so "bitmap at start" needs to be reset to the current bitmap.
00486 void
00487 FlintTable_base::commit()
00488 {
00489     memcpy(bit_map0, bit_map, bit_map_size);
00490     bit_map_low = 0;
00491 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.