backends/flint/flint_database.cc

Go to the documentation of this file.
00001 /* flint_database.cc: flint database
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2001 Hein Ragas
00005  * Copyright 2002 Ananova Ltd
00006  * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts
00007  * Copyright 2006 Richard Boulton
00008  *
00009  * This program is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU General Public License as
00011  * published by the Free Software Foundation; either version 2 of the
00012  * License, or (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00022  * USA
00023  */
00024 
00025 #include <config.h>
00026 
00027 #include "flint_database.h"
00028 
00029 #include <xapian/error.h>
00030 #include <xapian/valueiterator.h>
00031 
00032 #include "autoptr.h"
00033 #include "contiguousalldocspostlist.h"
00034 #include "flint_alldocspostlist.h"
00035 #include "flint_alltermslist.h"
00036 #include "flint_document.h"
00037 #include "flint_lock.h"
00038 #include "flint_metadata.h"
00039 #include "flint_modifiedpostlist.h"
00040 #include "flint_positionlist.h"
00041 #include "flint_postlist.h"
00042 #include "flint_record.h"
00043 #include "flint_spellingwordslist.h"
00044 #include "flint_termlist.h"
00045 #include "flint_utils.h"
00046 #include "flint_values.h"
00047 #include "omdebug.h"
00048 #include "safeerrno.h"
00049 #include "safesysstat.h"
00050 #include "stringutils.h"
00051 #include "utils.h"
00052 
00053 #include <sys/types.h>
00054 
00055 #include <list>
00056 #include <string>
00057 
00058 using namespace std;
00059 using namespace Xapian;
00060 
00061 // The maximum safe term length is determined by the postlist.  There we
00062 // store the term followed by "\x00\x00" then a length byte, then up to
00063 // 4 bytes of docid.  The Btree manager's key length limit is 252 bytes
00064 // so the maximum safe term length is 252 - 2 - 1 - 4 = 245 bytes.  If
00065 // the term contains zero bytes, the limit is lower (by one for each zero byte
00066 // in the term).
00067 #define MAX_SAFE_TERM_LENGTH 245
00068 
00069 // Magic key in the postlist table (which corresponds to an invalid docid) is
00070 // used to store the next free docid and total length of all documents.
00071 static const string METAINFO_KEY("", 1);
00072 
00073 /* This finds the tables, opens them at consistent revisions, manages
00074  * determining the current and next revision numbers, and stores handles
00075  * to the tables.
00076  */
00077 FlintDatabase::FlintDatabase(const string &flint_dir, int action,
00078                              unsigned int block_size)
00079         : db_dir(flint_dir),
00080           readonly(action == XAPIAN_DB_READONLY),
00081           version_file(db_dir),
00082           postlist_table(db_dir, readonly),
00083           position_table(db_dir, readonly),
00084           termlist_table(db_dir, readonly),
00085           value_table(db_dir, readonly),
00086           synonym_table(db_dir, readonly),
00087           spelling_table(db_dir, readonly),
00088           record_table(db_dir, readonly),
00089           lock(db_dir + "/flintlock"),
00090           total_length(0),
00091           lastdocid(0)
00092 {
00093     DEBUGCALL(DB, void, "FlintDatabase", flint_dir << ", " << action <<
00094               ", " << block_size);
00095 
00096     if (action == XAPIAN_DB_READONLY) {
00097         open_tables_consistent();
00098         return;
00099     }
00100 
00101     if (action != Xapian::DB_OPEN && !database_exists()) {
00102         // FIXME: if we allow Xapian::DB_OVERWRITE, check it here
00103 
00104         // Create the directory for the database, if it doesn't exist
00105         // already.
00106         bool fail = false;
00107         struct stat statbuf;
00108         if (stat(db_dir, &statbuf) == 0) {
00109             if (!S_ISDIR(statbuf.st_mode)) fail = true;
00110         } else if (errno != ENOENT || mkdir(db_dir, 0755) == -1) {
00111             fail = true;
00112         }
00113         if (fail) {
00114             throw Xapian::DatabaseCreateError("Cannot create directory `" +
00115                                               db_dir + "'", errno);
00116         }
00117         get_database_write_lock();
00118 
00119         create_and_open_tables(block_size);
00120         return;
00121     }
00122 
00123     if (action == Xapian::DB_CREATE) {
00124         throw Xapian::DatabaseCreateError("Can't create new database at `" +
00125                                           db_dir + "': a database already exists and I was told "
00126                                           "not to overwrite it");
00127     }
00128 
00129     get_database_write_lock();
00130     // if we're overwriting, pretend the db doesn't exist
00131     // FIXME: if we allow Xapian::DB_OVERWRITE, check it here
00132     if (action == Xapian::DB_CREATE_OR_OVERWRITE) {
00133         create_and_open_tables(block_size);
00134         return;
00135     }
00136 
00137     // Get latest consistent version
00138     open_tables_consistent();
00139 
00140     // Check that there are no more recent versions of tables.  If there
00141     // are, perform recovery by writing a new revision number to all
00142     // tables.
00143     if (record_table.get_open_revision_number() !=
00144         postlist_table.get_latest_revision_number()) {
00145         flint_revision_number_t new_revision = get_next_revision_number();
00146 
00147         set_revision_number(new_revision);
00148     }
00149 }
00150 
00151 FlintDatabase::~FlintDatabase()
00152 {
00153     DEBUGCALL(DB, void, "~FlintDatabase", "");
00154 }
00155 
00156 void
00157 FlintDatabase::read_metainfo()
00158 {
00159     DEBUGCALL(DB, void, "FlintDatabase::read_metainfo", "");
00160 
00161     string tag;
00162     if (!postlist_table.get_exact_entry(METAINFO_KEY, tag)) {
00163         lastdocid = 0;
00164         total_length = 0;
00165         return;
00166     }
00167 
00168     const char * data = tag.data();
00169     const char * end = data + tag.size();
00170     if (!unpack_uint(&data, end, &lastdocid) ||
00171         !unpack_uint_last(&data, end, &total_length)) {
00172         throw Xapian::DatabaseCorruptError("Meta information is corrupt.");
00173     }
00174 }
00175 
00176 bool
00177 FlintDatabase::database_exists() {
00178     DEBUGCALL(DB, bool, "FlintDatabase::database_exists", "");
00179     RETURN(record_table.exists() &&
00180            postlist_table.exists() &&
00181            termlist_table.exists());
00182 }
00183 
00184 void
00185 FlintDatabase::create_and_open_tables(unsigned int block_size)
00186 {
00187     DEBUGCALL(DB, void, "FlintDatabase::create_and_open_tables", "");
00188     // The caller is expected to create the database directory if it doesn't
00189     // already exist.
00190 
00191     // Create postlist_table first, and record_table last.  Existence of
00192     // record_table is considered to imply existence of the database.
00193     version_file.create();
00194     postlist_table.create_and_open(block_size);
00195     // The position table is created lazily, but erase it in case we're
00196     // overwriting an existing database and it already exists.
00197     position_table.erase();
00198     position_table.set_block_size(block_size);
00199 
00200     termlist_table.create_and_open(block_size);
00201     // The value table is created lazily, but erase it in case we're
00202     // overwriting an existing database and it already exists.
00203     value_table.erase();
00204     value_table.set_block_size(block_size);
00205 
00206     synonym_table.create_and_open(block_size);
00207     spelling_table.create_and_open(block_size);
00208     record_table.create_and_open(block_size);
00209 
00210     Assert(database_exists());
00211 
00212     // Check consistency
00213     flint_revision_number_t revision = record_table.get_open_revision_number();
00214     if (revision != termlist_table.get_open_revision_number() ||
00215         revision != postlist_table.get_open_revision_number()) {
00216         throw Xapian::DatabaseCreateError("Newly created tables are not in consistent state");
00217     }
00218 
00219     total_length = 0;
00220     lastdocid = 0;
00221 }
00222 
00223 void
00224 FlintDatabase::open_tables_consistent()
00225 {
00226     DEBUGCALL(DB, void, "FlintDatabase::open_tables_consistent", "");
00227     // Open record_table first, since it's the last to be written to,
00228     // and hence if a revision is available in it, it should be available
00229     // in all the other tables (unless they've moved on already).
00230     //
00231     // If we find that a table can't open the desired revision, we
00232     // go back and open record_table again, until record_table has
00233     // the same revision as the last time we opened it.
00234 
00235     flint_revision_number_t cur_rev = record_table.get_open_revision_number();
00236 
00237     // Check the version file unless we're reopening.
00238     if (cur_rev == 0) version_file.read_and_check(readonly);
00239 
00240     record_table.open();
00241     flint_revision_number_t revision = record_table.get_open_revision_number();
00242 
00243     if (cur_rev && cur_rev == revision) {
00244         // We're reopening a database and the revision hasn't changed so we
00245         // don't need to do anything.
00246         return;
00247     }
00248 
00249     // In case the position, value, synonym, and/or spelling tables don't
00250     // exist yet.
00251     unsigned int block_size = record_table.get_block_size();
00252     position_table.set_block_size(block_size);
00253     value_table.set_block_size(block_size);
00254     synonym_table.set_block_size(block_size);
00255     spelling_table.set_block_size(block_size);
00256 
00257     bool fully_opened = false;
00258     int tries = 100;
00259     int tries_left = tries;
00260     while (!fully_opened && (tries_left--) > 0) {
00261         if (spelling_table.open(revision) &&
00262             synonym_table.open(revision) &&
00263             value_table.open(revision) &&
00264             termlist_table.open(revision) &&
00265             position_table.open(revision) &&
00266             postlist_table.open(revision)) {
00267             // Everything now open at the same revision.
00268             fully_opened = true;
00269         } else {
00270             // Couldn't open consistent revision: two cases possible:
00271             // i)   An update has completed and a second one has begun since
00272             //      record was opened.  This leaves a consistent revision
00273             //      available, but not the one we were trying to open.
00274             // ii)  Tables have become corrupt / have no consistent revision
00275             //      available.  In this case, updates must have ceased.
00276             //
00277             // So, we reopen the record table, and check its revision number,
00278             // if it's changed we try the opening again, otherwise we give up.
00279             //
00280             record_table.open();
00281             flint_revision_number_t newrevision =
00282                     record_table.get_open_revision_number();
00283             if (revision == newrevision) {
00284                 // Revision number hasn't changed - therefore a second index
00285                 // sweep hasn't begun and the system must have failed.  Database
00286                 // is inconsistent.
00287                 throw Xapian::DatabaseCorruptError("Cannot open tables at consistent revisions");
00288             }
00289             revision = newrevision;
00290         }
00291     }
00292 
00293     if (!fully_opened) {
00294         throw Xapian::DatabaseModifiedError("Cannot open tables at stable revision - changing too fast");
00295     }
00296 
00297     read_metainfo();
00298 }
00299 
00300 void
00301 FlintDatabase::open_tables(flint_revision_number_t revision)
00302 {
00303     DEBUGCALL(DB, void, "FlintDatabase::open_tables", revision);
00304     version_file.read_and_check(readonly);
00305     record_table.open(revision);
00306 
00307     // In case the position, value, synonym, and/or spelling tables don't
00308     // exist yet.
00309     unsigned int block_size = record_table.get_block_size();
00310     position_table.set_block_size(block_size);
00311     value_table.set_block_size(block_size);
00312     synonym_table.set_block_size(block_size);
00313     spelling_table.set_block_size(block_size);
00314 
00315     spelling_table.open(revision);
00316     synonym_table.open(revision);
00317     value_table.open(revision);
00318     termlist_table.open(revision);
00319     position_table.open(revision);
00320     postlist_table.open(revision);
00321 }
00322 
00323 flint_revision_number_t
00324 FlintDatabase::get_revision_number() const
00325 {
00326     DEBUGCALL(DB, flint_revision_number_t, "FlintDatabase::get_revision_number", "");
00327     // We could use any table here, theoretically.
00328     RETURN(postlist_table.get_open_revision_number());
00329 }
00330 
00331 flint_revision_number_t
00332 FlintDatabase::get_next_revision_number() const
00333 {
00334     DEBUGCALL(DB, flint_revision_number_t, "FlintDatabase::get_next_revision_number", "");
00335     /* We _must_ use postlist_table here, since it is always the first
00336      * to be written, and hence will have the greatest available revision
00337      * number.
00338      */
00339     flint_revision_number_t new_revision =
00340             postlist_table.get_latest_revision_number();
00341     ++new_revision;
00342     RETURN(new_revision);
00343 }
00344 
00345 void
00346 FlintDatabase::set_revision_number(flint_revision_number_t new_revision)
00347 {
00348     DEBUGCALL(DB, void, "FlintDatabase::set_revision_number", new_revision);
00349     postlist_table.commit(new_revision);
00350     position_table.commit(new_revision);
00351     termlist_table.commit(new_revision);
00352     value_table.commit(new_revision);
00353     synonym_table.commit(new_revision);
00354     spelling_table.commit(new_revision);
00355     record_table.commit(new_revision);
00356 }
00357 
00358 void
00359 FlintDatabase::reopen()
00360 {
00361     DEBUGCALL(DB, void, "FlintDatabase::reopen", "");
00362     if (readonly) {
00363         open_tables_consistent();
00364     }
00365 }
00366 
00367 void
00368 FlintDatabase::get_database_write_lock()
00369 {
00370     DEBUGCALL(DB, void, "FlintDatabase::get_database_write_lock", "");
00371     FlintLock::reason why = lock.lock(true);
00372     if (why != FlintLock::SUCCESS) {
00373         if (why == FlintLock::UNKNOWN && !database_exists()) {
00374             string msg("No flint database found at path `");
00375             msg += db_dir;
00376             msg += '\'';
00377             throw Xapian::DatabaseOpeningError(msg);
00378         }
00379         string msg("Unable to acquire database write lock on ");
00380         msg += db_dir;
00381         if (why == FlintLock::INUSE) {
00382             msg += ": already locked";
00383         } else if (why == FlintLock::UNSUPPORTED) {
00384             msg += ": locking probably not supported by this FS";
00385         }
00386         throw Xapian::DatabaseLockError(msg);
00387     }
00388 }
00389 
00390 void
00391 FlintDatabase::modifications_failed(flint_revision_number_t old_revision,
00392                                     flint_revision_number_t new_revision,
00393                                     const string & msg)
00394 {
00395     // Modifications failed.  Wipe all the modifications from memory.
00396     try {
00397         // Discard any buffered changes and reinitialised cached values
00398         // from the table.
00399         cancel();
00400 
00401         // Reopen tables with old revision number.
00402         open_tables(old_revision);
00403 
00404         // Increase revision numbers to new revision number plus one,
00405         // writing increased numbers to all tables.
00406         ++new_revision;
00407         set_revision_number(new_revision);
00408     } catch (const Xapian::Error &e) {
00409         // Permanently close the table, since we can't get it into a
00410         // consistent state, to avoid risk of database corruption.
00411         postlist_table.close(true);
00412         position_table.close(true);
00413         termlist_table.close(true);
00414         value_table.close(true);
00415         synonym_table.close(true);
00416         spelling_table.close(true);
00417         record_table.close(true);
00418         throw Xapian::DatabaseError("Modifications failed (" + msg +
00419                                     "), and cannot set consistent table "
00420                                     "revision numbers: " + e.get_msg());
00421     }
00422 }
00423 
00424 void
00425 FlintDatabase::apply()
00426 {
00427     DEBUGCALL(DB, void, "FlintDatabase::apply", "");
00428     if (!postlist_table.is_modified() &&
00429         !position_table.is_modified() &&
00430         !termlist_table.is_modified() &&
00431         !value_table.is_modified() &&
00432         !synonym_table.is_modified() &&
00433         !spelling_table.is_modified() &&
00434         !record_table.is_modified()) {
00435         return;
00436     }
00437 
00438     flint_revision_number_t old_revision = get_revision_number();
00439     flint_revision_number_t new_revision = get_next_revision_number();
00440 
00441     try {
00442         set_revision_number(new_revision);
00443     } catch (const Xapian::Error &e) {
00444         modifications_failed(old_revision, new_revision, e.get_description());
00445         throw;
00446     } catch (...) {
00447         modifications_failed(old_revision, new_revision, "Unknown error");
00448         throw;
00449     }
00450 }
00451 
00452 void
00453 FlintDatabase::cancel()
00454 {
00455     DEBUGCALL(DB, void, "FlintDatabase::cancel", "");
00456     postlist_table.cancel();
00457     position_table.cancel();
00458     termlist_table.cancel();
00459     value_table.cancel();
00460     synonym_table.cancel();
00461     spelling_table.cancel();
00462     record_table.cancel();
00463 }
00464 
00465 Xapian::doccount
00466 FlintDatabase::get_doccount() const
00467 {
00468     DEBUGCALL(DB, Xapian::doccount, "FlintDatabase::get_doccount", "");
00469     RETURN(record_table.get_doccount());
00470 }
00471 
00472 Xapian::docid
00473 FlintDatabase::get_lastdocid() const
00474 {
00475     DEBUGCALL(DB, Xapian::docid, "FlintDatabase::get_lastdocid", "");
00476     RETURN(lastdocid);
00477 }
00478 
00479 Xapian::doclength
00480 FlintDatabase::get_avlength() const
00481 {
00482     DEBUGCALL(DB, Xapian::doclength, "FlintDatabase::get_avlength", "");
00483     Xapian::doccount doccount = record_table.get_doccount();
00484     if (doccount == 0) {
00485         // Avoid dividing by zero when there are no documents.
00486         RETURN(0);
00487     }
00488     RETURN(double(total_length) / doccount);
00489 }
00490 
00491 Xapian::doclength
00492 FlintDatabase::get_doclength(Xapian::docid did) const
00493 {
00494     DEBUGCALL(DB, Xapian::doclength, "FlintDatabase::get_doclength", did);
00495     Assert(did != 0);
00496     RETURN(termlist_table.get_doclength(did));
00497 }
00498 
00499 Xapian::doccount
00500 FlintDatabase::get_termfreq(const string & term) const
00501 {
00502     DEBUGCALL(DB, Xapian::doccount, "FlintDatabase::get_termfreq", term);
00503     Assert(!term.empty());
00504     RETURN(postlist_table.get_termfreq(term));
00505 }
00506 
00507 Xapian::termcount
00508 FlintDatabase::get_collection_freq(const string & term) const
00509 {
00510     DEBUGCALL(DB, Xapian::termcount, "FlintDatabase::get_collection_freq", term);
00511     Assert(!term.empty());
00512     RETURN(postlist_table.get_collection_freq(term));
00513 }
00514 
00515 bool
00516 FlintDatabase::term_exists(const string & term) const
00517 {
00518     DEBUGCALL(DB, bool, "FlintDatabase::term_exists", term);
00519     Assert(!term.empty());
00520     return postlist_table.term_exists(term);
00521 }
00522 
00523 bool
00524 FlintDatabase::has_positions() const
00525 {
00526     return position_table.get_entry_count() > 0;
00527 }
00528 
00529 LeafPostList *
00530 FlintDatabase::open_post_list(const string& term) const
00531 {
00532     DEBUGCALL(DB, LeafPostList *, "FlintDatabase::open_post_list", term);
00533     Xapian::Internal::RefCntPtr<const FlintDatabase> ptrtothis(this);
00534 
00535     if (term.empty()) {
00536         Xapian::doccount doccount = get_doccount();
00537         if (lastdocid == doccount) {
00538             RETURN(new ContiguousAllDocsPostList(ptrtothis, doccount));
00539         }
00540         RETURN(new FlintAllDocsPostList(ptrtothis, doccount));
00541     }
00542 
00543     RETURN(new FlintPostList(ptrtothis, term));
00544 }
00545 
00546 TermList *
00547 FlintDatabase::open_term_list(Xapian::docid did) const
00548 {
00549     DEBUGCALL(DB, TermList *, "FlintDatabase::open_term_list", did);
00550     Assert(did != 0);
00551 
00552     Xapian::Internal::RefCntPtr<const FlintDatabase> ptrtothis(this);
00553     RETURN(new FlintTermList(ptrtothis, did));
00554 }
00555 
00556 Xapian::Document::Internal *
00557 FlintDatabase::open_document(Xapian::docid did, bool lazy) const
00558 {
00559     DEBUGCALL(DB, Xapian::Document::Internal *, "FlintDatabase::open_document",
00560               did << ", " << lazy);
00561     Assert(did != 0);
00562 
00563     Xapian::Internal::RefCntPtr<const FlintDatabase> ptrtothis(this);
00564     RETURN(new FlintDocument(ptrtothis,
00565                               &value_table,
00566                               &record_table,
00567                               did, lazy));
00568 }
00569 
00570 PositionList *
00571 FlintDatabase::open_position_list(Xapian::docid did, const string & term) const
00572 {
00573     Assert(did != 0);
00574 
00575     AutoPtr<FlintPositionList> poslist(new FlintPositionList());
00576     if (!poslist->read_data(&position_table, did, term)) {
00577         // Check that term / document combination exists.
00578         // If the doc doesn't exist, this will throw Xapian::DocNotFoundError:
00579         AutoPtr<TermList> tl(open_term_list(did));
00580         tl->skip_to(term);
00581         if (tl->at_end() || tl->get_termname() != term)
00582             throw Xapian::RangeError("Can't open position list: requested term is not present in document.");
00583         // FIXME: For 1.2.0, change this to just return an empty termlist.
00584         // If the user really needs to know, they can check themselves.
00585     }
00586 
00587     return poslist.release();
00588 }
00589 
00590 TermList *
00591 FlintDatabase::open_allterms(const string & prefix) const
00592 {
00593     DEBUGCALL(DB, TermList *, "FlintDatabase::open_allterms", "");
00594     RETURN(new FlintAllTermsList(Xapian::Internal::RefCntPtr<const FlintDatabase>(this),
00595                                  prefix));
00596 }
00597 
00598 TermList *
00599 FlintDatabase::open_spelling_termlist(const string & word) const
00600 {
00601     return spelling_table.open_termlist(word);
00602 }
00603 
00604 TermList *
00605 FlintDatabase::open_spelling_wordlist() const
00606 {
00607     FlintCursor * cursor = spelling_table.cursor_get();
00608     if (!cursor) return NULL;
00609     return new FlintSpellingWordsList(Xapian::Internal::RefCntPtr<const FlintDatabase>(this),
00610                                       cursor);
00611 }
00612 
00613 Xapian::doccount
00614 FlintDatabase::get_spelling_frequency(const string & word) const
00615 {
00616     return spelling_table.get_word_frequency(word);
00617 }
00618 
00619 TermList *
00620 FlintDatabase::open_synonym_termlist(const string & term) const
00621 {
00622     return synonym_table.open_termlist(term);
00623 }
00624 
00625 TermList *
00626 FlintDatabase::open_synonym_keylist(const string & prefix) const
00627 {
00628     FlintCursor * cursor = synonym_table.cursor_get();
00629     if (!cursor) return NULL;
00630     return new FlintSynonymTermList(Xapian::Internal::RefCntPtr<const FlintDatabase>(this),
00631                                     cursor, synonym_table.get_entry_count(),
00632                                     prefix);
00633 }
00634 
00635 TermList *
00636 FlintDatabase::open_metadata_keylist(const std::string &prefix) const
00637 {
00638     DEBUGCALL(DB, string, "FlintDatabase::open_metadata_keylist", "");
00639     FlintCursor * cursor = postlist_table.cursor_get();
00640     if (!cursor) return NULL;
00641     return new FlintMetadataTermList(Xapian::Internal::RefCntPtr<const FlintDatabase>(this),
00642                                      cursor, prefix);
00643 }
00644 
00645 string
00646 FlintDatabase::get_metadata(const string & key) const
00647 {
00648     DEBUGCALL(DB, string, "FlintDatabase::get_metadata", key);
00649     string btree_key("\x00\xc0", 2);
00650     btree_key += key;
00651     string tag;
00652     (void)postlist_table.get_exact_entry(btree_key, tag);
00653     RETURN(tag);
00654 }
00655 
00657 
00658 FlintWritableDatabase::FlintWritableDatabase(const string &dir, int action,
00659                                                int block_size)
00660         : FlintDatabase(dir, action, block_size),
00661           freq_deltas(),
00662           doclens(),
00663           mod_plists(),
00664           change_count(0),
00665           flush_threshold(0)
00666 {
00667     DEBUGCALL(DB, void, "FlintWritableDatabase", dir << ", " << action << ", "
00668               << block_size);
00669 
00670     const char *p = getenv("XAPIAN_FLUSH_THRESHOLD");
00671     if (p)
00672         flush_threshold = atoi(p);
00673     if (flush_threshold == 0)
00674         flush_threshold = 10000;
00675 }
00676 
00677 FlintWritableDatabase::~FlintWritableDatabase()
00678 {
00679     DEBUGCALL(DB, void, "~FlintWritableDatabase", "");
00680     dtor_called();
00681 }
00682 
00683 void
00684 FlintWritableDatabase::flush()
00685 {
00686     if (transaction_active())
00687         throw Xapian::InvalidOperationError("Can't flush during a transaction");
00688     if (change_count) flush_postlist_changes();
00689     apply();
00690 }
00691 
00692 void
00693 FlintWritableDatabase::flush_postlist_changes() const
00694 {
00695     postlist_table.merge_changes(mod_plists, doclens, freq_deltas);
00696 
00697     // Update the total document length and last used docid.
00698     string tag = pack_uint(lastdocid);
00699     tag += pack_uint_last(total_length);
00700     postlist_table.add(METAINFO_KEY, tag);
00701 
00702     freq_deltas.clear();
00703     doclens.clear();
00704     mod_plists.clear();
00705     change_count = 0;
00706 }
00707 
00708 Xapian::docid
00709 FlintWritableDatabase::add_document(const Xapian::Document & document)
00710 {
00711     DEBUGCALL(DB, Xapian::docid,
00712               "FlintWritableDatabase::add_document", document);
00713     // Make sure the docid counter doesn't overflow.
00714     if (lastdocid == Xapian::docid(-1))
00715         throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
00716     // Use the next unused document ID.
00717     RETURN(add_document_(++lastdocid, document));
00718 }
00719 
00720 Xapian::docid
00721 FlintWritableDatabase::add_document_(Xapian::docid did,
00722                                      const Xapian::Document & document)
00723 {
00724     DEBUGCALL(DB, Xapian::docid,
00725               "FlintWritableDatabase::add_document_", did << ", " << document);
00726     Assert(did != 0);
00727     try {
00728         // Add the record using that document ID.
00729         record_table.replace_record(document.get_data(), did);
00730 
00731         // Set the values.
00732         {
00733             Xapian::ValueIterator value = document.values_begin();
00734             Xapian::ValueIterator value_end = document.values_end();
00735             string s;
00736             value_table.encode_values(s, value, value_end);
00737             value_table.set_encoded_values(did, s);
00738         }
00739 
00740         flint_doclen_t new_doclen = 0;
00741         {
00742             Xapian::TermIterator term = document.termlist_begin();
00743             Xapian::TermIterator term_end = document.termlist_end();
00744             for ( ; term != term_end; ++term) {
00745                 termcount wdf = term.get_wdf();
00746                 // Calculate the new document length
00747                 new_doclen += wdf;
00748 
00749                 string tname = *term;
00750                 if (tname.size() > MAX_SAFE_TERM_LENGTH)
00751                     throw Xapian::InvalidArgumentError("Term too long (> "STRINGIZE(MAX_SAFE_TERM_LENGTH)"): " + tname);
00752                 map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00753                 i = freq_deltas.find(tname);
00754                 if (i == freq_deltas.end()) {
00755                     freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wdf))));
00756                 } else {
00757                     ++i->second.first;
00758                     i->second.second += wdf;
00759                 }
00760 
00761                 // Add did to tname's postlist
00762                 map<string, map<docid, pair<char, termcount> > >::iterator j;
00763                 j = mod_plists.find(tname);
00764                 if (j == mod_plists.end()) {
00765                     map<docid, pair<char, termcount> > m;
00766                     j = mod_plists.insert(make_pair(tname, m)).first;
00767                 }
00768                 Assert(j->second.find(did) == j->second.end());
00769                 j->second.insert(make_pair(did, make_pair('A', wdf)));
00770 
00771                 if (term.positionlist_begin() != term.positionlist_end()) {
00772                     position_table.set_positionlist(
00773                         did, tname,
00774                         term.positionlist_begin(), term.positionlist_end());
00775                 }
00776             }
00777         }
00778         DEBUGLINE(DB, "Calculated doclen for new document " << did << " as " << new_doclen);
00779 
00780         // Set the termlist
00781         termlist_table.set_termlist(did, document, new_doclen);
00782 
00783         // Set the new document length
00784         Assert(doclens.find(did) == doclens.end());
00785         doclens[did] = new_doclen;
00786         total_length += new_doclen;
00787     } catch (...) {
00788         // If an error occurs while adding a document, or doing any other
00789         // transaction, the modifications so far must be cleared before
00790         // returning control to the user - otherwise partial modifications will
00791         // persist in memory, and eventually get written to disk.
00792         cancel();
00793         throw;
00794     }
00795 
00796     // FIXME: this should be done by checking memory usage, not the number of
00797     // changes.
00798     // We could also look at:
00799     // * mod_plists.size()
00800     // * doclens.size()
00801     // * freq_deltas.size()
00802     //
00803     // cout << "+++ mod_plists.size() " << mod_plists.size() <<
00804     //     ", doclens.size() " << doclens.size() <<
00805     //     ", freq_deltas.size() " << freq_deltas.size() << endl;
00806     if (++change_count >= flush_threshold) {
00807         flush_postlist_changes();
00808         if (!transaction_active()) apply();
00809     }
00810 
00811     RETURN(did);
00812 }
00813 
00814 void
00815 FlintWritableDatabase::delete_document(Xapian::docid did)
00816 {
00817     DEBUGCALL(DB, void, "FlintWritableDatabase::delete_document", did);
00818     Assert(did != 0);
00819 
00820     // Remove the record.  If this fails, just propagate the exception since
00821     // the state should still be consistent (most likely it's
00822     // DocNotFoundError).
00823     record_table.delete_record(did);
00824 
00825     try {
00826         // Remove the values
00827         value_table.delete_all_values(did);
00828 
00829         // OK, now add entries to remove the postings in the underlying record.
00830         Xapian::Internal::RefCntPtr<const FlintWritableDatabase> ptrtothis(this);
00831         FlintTermList termlist(ptrtothis, did);
00832 
00833         total_length -= termlist.get_doclength();
00834 
00835         termlist.next();
00836         while (!termlist.at_end()) {
00837             string tname = termlist.get_termname();
00838             position_table.delete_positionlist(did, tname);
00839             termcount wdf = termlist.get_wdf();
00840 
00841             map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00842             i = freq_deltas.find(tname);
00843             if (i == freq_deltas.end()) {
00844                 freq_deltas.insert(make_pair(tname, make_pair(-1, -termcount_diff(wdf))));
00845             } else {
00846                 --i->second.first;
00847                 i->second.second -= wdf;
00848             }
00849 
00850             // Remove did from tname's postlist
00851             map<string, map<docid, pair<char, termcount> > >::iterator j;
00852             j = mod_plists.find(tname);
00853             if (j == mod_plists.end()) {
00854                 map<docid, pair<char, termcount> > m;
00855                 j = mod_plists.insert(make_pair(tname, m)).first;
00856             }
00857 
00858             map<docid, pair<char, termcount> >::iterator k;
00859             k = j->second.find(did);
00860             if (k == j->second.end()) {
00861                 j->second.insert(make_pair(did, make_pair('D', 0u)));
00862             } else {
00863                 // Deleting a document we added/modified since the last flush.
00864                 k->second = make_pair('D', 0u);
00865             }
00866 
00867             termlist.next();
00868         }
00869 
00870         // Remove the termlist.
00871         termlist_table.delete_termlist(did);
00872 
00873         // Remove the new doclength.
00874         doclens.erase(did);
00875     } catch (...) {
00876         // If an error occurs while deleting a document, or doing any other
00877         // transaction, the modifications so far must be cleared before
00878         // returning control to the user - otherwise partial modifications will
00879         // persist in memory, and eventually get written to disk.
00880         cancel();
00881         throw;
00882     }
00883 
00884     if (++change_count >= flush_threshold) {
00885         flush_postlist_changes();
00886         if (!transaction_active()) apply();
00887     }
00888 }
00889 
00890 void
00891 FlintWritableDatabase::replace_document(Xapian::docid did,
00892                                         const Xapian::Document & document)
00893 {
00894     DEBUGCALL(DB, void, "FlintWritableDatabase::replace_document", did << ", " << document);
00895     Assert(did != 0);
00896 
00897     try {
00898         if (did > lastdocid) {
00899             lastdocid = did;
00900             // If this docid is above the highwatermark, then we can't be
00901             // replacing an existing document.
00902             (void)add_document_(did, document);
00903             return;
00904         }
00905 
00906         // OK, now add entries to remove the postings in the underlying record.
00907         Xapian::Internal::RefCntPtr<const FlintWritableDatabase> ptrtothis(this);
00908         FlintTermList termlist(ptrtothis, did);
00909 
00910         termlist.next();
00911         while (!termlist.at_end()) {
00912             string tname = termlist.get_termname();
00913             termcount wdf = termlist.get_wdf();
00914 
00915             map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00916             i = freq_deltas.find(tname);
00917             if (i == freq_deltas.end()) {
00918                 freq_deltas.insert(make_pair(tname, make_pair(-1, -termcount_diff(wdf))));
00919             } else {
00920                 --i->second.first;
00921                 i->second.second -= wdf;
00922             }
00923 
00924             // Remove did from tname's postlist
00925             map<string, map<docid, pair<char, termcount> > >::iterator j;
00926             j = mod_plists.find(tname);
00927             if (j == mod_plists.end()) {
00928                 map<docid, pair<char, termcount> > m;
00929                 j = mod_plists.insert(make_pair(tname, m)).first;
00930             }
00931 
00932             map<docid, pair<char, termcount> >::iterator k;
00933             k = j->second.find(did);
00934             if (k == j->second.end()) {
00935                 j->second.insert(make_pair(did, make_pair('D', 0u)));
00936             } else {
00937                 // Modifying a document we added/modified since the last flush.
00938                 k->second = make_pair('D', 0u);
00939             }
00940 
00941             termlist.next();
00942         }
00943 
00944         total_length -= termlist.get_doclength();
00945 
00946         // Replace the record
00947         record_table.replace_record(document.get_data(), did);
00948 
00949         // FIXME: we read the values delete them and then replace in case
00950         // they come from where they're going!  Better to ask Document
00951         // nicely and shortcut in this case!
00952         {
00953             Xapian::ValueIterator value = document.values_begin();
00954             Xapian::ValueIterator value_end = document.values_end();
00955             string s;
00956             value_table.encode_values(s, value, value_end);
00957 
00958             // Replace the values.
00959             value_table.delete_all_values(did);
00960             value_table.set_encoded_values(did, s);
00961         }
00962 
00963         flint_doclen_t new_doclen = 0;
00964         {
00965             Xapian::TermIterator term = document.termlist_begin();
00966             Xapian::TermIterator term_end = document.termlist_end();
00967             for ( ; term != term_end; ++term) {
00968                 // Calculate the new document length
00969                 termcount wdf = term.get_wdf();
00970                 new_doclen += wdf;
00971 
00972                 string tname = *term;
00973                 if (tname.size() > MAX_SAFE_TERM_LENGTH)
00974                     throw Xapian::InvalidArgumentError("Term too long (> "STRINGIZE(MAX_SAFE_TERM_LENGTH)"): " + tname);
00975                 map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00976                 i = freq_deltas.find(tname);
00977                 if (i == freq_deltas.end()) {
00978                     freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wdf))));
00979                 } else {
00980                     ++i->second.first;
00981                     i->second.second += wdf;
00982                 }
00983 
00984                 // Add did to tname's postlist
00985                 map<string, map<docid, pair<char, termcount> > >::iterator j;
00986                 j = mod_plists.find(tname);
00987                 if (j == mod_plists.end()) {
00988                     map<docid, pair<char, termcount> > m;
00989                     j = mod_plists.insert(make_pair(tname, m)).first;
00990                 }
00991                 map<docid, pair<char, termcount> >::iterator k;
00992                 k = j->second.find(did);
00993                 if (k != j->second.end()) {
00994                     Assert(k->second.first == 'D');
00995                     k->second.first = 'M';
00996                     k->second.second = wdf;
00997                 } else {
00998                     j->second.insert(make_pair(did, make_pair('A', wdf)));
00999                 }
01000 
01001                 PositionIterator it = term.positionlist_begin();
01002                 PositionIterator it_end = term.positionlist_end();
01003                 if (it != it_end) {
01004                     position_table.set_positionlist(
01005                         did, tname, it, it_end);
01006                 } else {
01007                     position_table.delete_positionlist(did, tname);
01008                 }
01009             }
01010         }
01011         DEBUGLINE(DB, "Calculated doclen for replacement document " << did << " as " << new_doclen);
01012 
01013         // Set the termlist
01014         termlist_table.set_termlist(did, document, new_doclen);
01015 
01016         // Set the new document length
01017         doclens[did] = new_doclen;
01018         total_length += new_doclen;
01019     } catch (const Xapian::DocNotFoundError &) {
01020         (void)add_document_(did, document);
01021         return;
01022     } catch (...) {
01023         // If an error occurs while replacing a document, or doing any other
01024         // transaction, the modifications so far must be cleared before
01025         // returning control to the user - otherwise partial modifications will
01026         // persist in memory, and eventually get written to disk.
01027         cancel();
01028         throw;
01029     }
01030 
01031     if (++change_count >= flush_threshold) {
01032         flush_postlist_changes();
01033         if (!transaction_active()) apply();
01034     }
01035 }
01036 
01037 Xapian::doclength
01038 FlintWritableDatabase::get_doclength(Xapian::docid did) const
01039 {
01040     DEBUGCALL(DB, Xapian::doclength, "FlintWritableDatabase::get_doclength", did);
01041     map<docid, termcount>::const_iterator i = doclens.find(did);
01042     if (i != doclens.end()) RETURN(i->second);
01043 
01044     RETURN(FlintDatabase::get_doclength(did));
01045 }
01046 
01047 Xapian::doccount
01048 FlintWritableDatabase::get_termfreq(const string & tname) const
01049 {
01050     DEBUGCALL(DB, Xapian::doccount, "FlintWritableDatabase::get_termfreq", tname);
01051     Xapian::doccount termfreq = FlintDatabase::get_termfreq(tname);
01052     map<string, pair<termcount_diff, termcount_diff> >::const_iterator i;
01053     i = freq_deltas.find(tname);
01054     if (i != freq_deltas.end()) termfreq += i->second.first;
01055     RETURN(termfreq);
01056 }
01057 
01058 Xapian::termcount
01059 FlintWritableDatabase::get_collection_freq(const string & tname) const
01060 {
01061     DEBUGCALL(DB, Xapian::termcount, "FlintWritableDatabase::get_collection_freq", tname);
01062     Xapian::termcount collfreq = FlintDatabase::get_collection_freq(tname);
01063 
01064     map<string, pair<termcount_diff, termcount_diff> >::const_iterator i;
01065     i = freq_deltas.find(tname);
01066     if (i != freq_deltas.end()) collfreq += i->second.second;
01067 
01068     RETURN(collfreq);
01069 }
01070 
01071 bool
01072 FlintWritableDatabase::term_exists(const string & tname) const
01073 {
01074     DEBUGCALL(DB, bool, "FlintWritableDatabase::term_exists", tname);
01075     RETURN(get_termfreq(tname) != 0);
01076 }
01077 
01078 LeafPostList *
01079 FlintWritableDatabase::open_post_list(const string& tname) const
01080 {
01081     DEBUGCALL(DB, LeafPostList *, "FlintWritableDatabase::open_post_list", tname);
01082     Xapian::Internal::RefCntPtr<const FlintWritableDatabase> ptrtothis(this);
01083 
01084     if (tname.empty()) {
01085         Xapian::doccount doccount = get_doccount();
01086         if (lastdocid == doccount) {
01087             RETURN(new ContiguousAllDocsPostList(ptrtothis, doccount));
01088         }
01089         RETURN(new FlintAllDocsPostList(ptrtothis, doccount));
01090     }
01091 
01092     map<string, map<docid, pair<char, termcount> > >::const_iterator j;
01093     j = mod_plists.find(tname);
01094     if (j != mod_plists.end()) {
01095         // We've got buffered changes to this term's postlist, so we need to
01096         // use a FlintModifiedPostList.
01097         RETURN(new FlintModifiedPostList(ptrtothis, tname, j->second));
01098     }
01099 
01100     RETURN(new FlintPostList(ptrtothis, tname));
01101 }
01102 
01103 TermList *
01104 FlintWritableDatabase::open_allterms(const string & prefix) const
01105 {
01106     DEBUGCALL(DB, TermList *, "FlintWritableDatabase::open_allterms", "");
01107     // If there are changes, terms may have been added or removed, and so we
01108     // need to flush (but don't commit - there may be a transaction in progress.
01109     if (change_count) flush_postlist_changes();
01110     RETURN(FlintDatabase::open_allterms(prefix));
01111 }
01112 
01113 void
01114 FlintWritableDatabase::cancel()
01115 {
01116     FlintDatabase::cancel();
01117     read_metainfo();
01118     freq_deltas.clear();
01119     doclens.clear();
01120     mod_plists.clear();
01121     change_count = 0;
01122 }
01123 
01124 void
01125 FlintWritableDatabase::add_spelling(const string & word,
01126                                     Xapian::termcount freqinc) const
01127 {
01128     spelling_table.add_word(word, freqinc);
01129 }
01130 
01131 void
01132 FlintWritableDatabase::remove_spelling(const string & word,
01133                                        Xapian::termcount freqdec) const
01134 {
01135     spelling_table.remove_word(word, freqdec);
01136 }
01137 
01138 TermList *
01139 FlintWritableDatabase::open_spelling_wordlist() const
01140 {
01141     spelling_table.merge_changes();
01142     return FlintDatabase::open_spelling_wordlist();
01143 }
01144 
01145 TermList *
01146 FlintWritableDatabase::open_synonym_keylist(const string & prefix) const
01147 {
01148     synonym_table.merge_changes();
01149     return FlintDatabase::open_synonym_keylist(prefix);
01150 }
01151 
01152 void
01153 FlintWritableDatabase::add_synonym(const string & term,
01154                                    const string & synonym) const
01155 {
01156     synonym_table.add_synonym(term, synonym);
01157 }
01158 
01159 void
01160 FlintWritableDatabase::remove_synonym(const string & term,
01161                                       const string & synonym) const
01162 {
01163     synonym_table.remove_synonym(term, synonym);
01164 }
01165 
01166 void
01167 FlintWritableDatabase::clear_synonyms(const string & term) const
01168 {
01169     synonym_table.clear_synonyms(term);
01170 }
01171 
01172 void
01173 FlintWritableDatabase::set_metadata(const string & key, const string & value)
01174 {
01175     DEBUGCALL(DB, string, "FlintWritableDatabase::set_metadata",
01176               key << ", " << value);
01177     string btree_key("\x00\xc0", 2);
01178     btree_key += key;
01179     if (value.empty()) {
01180         postlist_table.del(btree_key);
01181     } else {
01182         postlist_table.add(btree_key, value);
01183     }
01184 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.