backends/quartz/quartz_database.cc

Go to the documentation of this file.
00001 /* quartz_database.cc: quartz database
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2001 Hein Ragas
00005  * Copyright 2002 Ananova Ltd
00006  * Copyright 2002,2003,2004,2005,2006,2007,2008 Olly Betts
00007  * Copyright 2006 Richard Boulton
00008  *
00009  * This program is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU General Public License as
00011  * published by the Free Software Foundation; either version 2 of the
00012  * License, or (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00022  * USA
00023  */
00024 
00025 #include <config.h>
00026 
00027 #include "safeerrno.h"
00028 
00029 #include "quartz_database.h"
00030 #include "utils.h"
00031 #include "omdebug.h"
00032 #include "autoptr.h"
00033 #include <xapian/error.h>
00034 #include <xapian/valueiterator.h>
00035 
00036 #include "quartz_postlist.h"
00037 #include "quartz_alldocspostlist.h"
00038 #include "quartz_termlist.h"
00039 #include "quartz_positionlist.h"
00040 #include "quartz_utils.h"
00041 #include "quartz_record.h"
00042 #include "quartz_values.h"
00043 #include "quartz_document.h"
00044 #include "quartz_alltermslist.h"
00045 
00046 #include <sys/types.h>
00047 #include "safesysstat.h"
00048 #include "safefcntl.h"
00049 #include "safeunistd.h"
00050 #ifdef HAVE_SYS_UTSNAME_H
00051 # include <sys/utsname.h>
00052 #endif
00053 
00054 #ifdef __CYGWIN__
00055 # include "safewindows.h"
00056 # include <sys/cygwin.h>
00057 #endif
00058 
00059 #include <list>
00060 #include <string>
00061 
00062 using namespace std;
00063 using namespace Xapian;
00064 
00065 /* This finds the tables, opens them at consistent revisions, manages
00066  * determining the current and next revision numbers, and stores handles
00067  * to the tables.
00068  */
00069 QuartzDatabase::QuartzDatabase(const string &quartz_dir, int action,
00070                                unsigned int block_size)
00071         : db_dir(quartz_dir),
00072           readonly(action == OM_DB_READONLY),
00073           metafile(db_dir + "/meta"),
00074           postlist_table(db_dir, readonly),
00075           positionlist_table(db_dir, readonly),
00076           termlist_table(db_dir, readonly),
00077           value_table(db_dir, readonly),
00078           record_table(db_dir, readonly),
00079           log(db_dir + "/log")
00080 {
00081     DEBUGCALL(DB, void, "QuartzDatabase", quartz_dir << ", " << action <<
00082               ", " << block_size);
00083     static const char *acts[] = {
00084         "Open readonly", "Create or open", "Create", "Create or overwrite",
00085         "Open" // , "Overwrite"
00086     };
00087     log.make_entry(string(acts[action]) + " database at `" + db_dir + "'");
00088 
00089     // set cache size parameters, etc, here.
00090 
00091     // open environment here
00092 
00093     bool dbexists = database_exists();
00094     // open tables
00095     if (action == OM_DB_READONLY) {
00096         if (!dbexists) {
00097             // Catch pre-0.6 Xapian databases and give a better error
00098             if (file_exists(db_dir + "/attribute_DB"))
00099                 throw Xapian::DatabaseVersionError("Cannot open database at `" + db_dir + "' - it was created by a pre-0.6 version of Xapian");
00100             throw Xapian::DatabaseOpeningError("Cannot open database at `" + db_dir + "' - it does not exist");
00101         }
00102         // Can still allow searches even if recovery is needed
00103         open_tables_consistent();
00104     } else {
00105         if (!dbexists) {
00106             // FIXME: if we allow Xapian::DB_OVERWRITE, check it here
00107             if (action == Xapian::DB_OPEN) {
00108                 // Catch pre-0.6 Xapian databases and give a better error
00109                 if (file_exists(db_dir + "/attribute_DB"))
00110                     throw Xapian::DatabaseVersionError("Cannot open database at `" + db_dir + "' - it was created by a pre-0.6 version of Xapian");
00111                 throw Xapian::DatabaseOpeningError("Cannot open database at `" + db_dir + "' - it does not exist");
00112             }
00113 
00114             // Create the directory for the database, if it doesn't exist
00115             // already.
00116             bool fail = false;
00117             struct stat statbuf;
00118             if (stat(db_dir, &statbuf) == 0) {
00119                 if (!S_ISDIR(statbuf.st_mode)) fail = true;
00120             } else if (errno != ENOENT || mkdir(db_dir, 0755) == -1) {
00121                 fail = true;
00122             }
00123             if (fail) {
00124                 throw Xapian::DatabaseCreateError("Cannot create directory `"
00125                                                    + db_dir + "'", errno);
00126             }
00127             get_database_write_lock();
00128 
00129             create_and_open_tables(block_size);
00130             return;
00131         }
00132 
00133         log.make_entry("Old database exists");
00134         if (action == Xapian::DB_CREATE) {
00135             throw Xapian::DatabaseCreateError("Can't create new database at `" +
00136                     db_dir + "': a database already exists and I was told "
00137                     "not to overwrite it");
00138         }
00139 
00140         get_database_write_lock();
00141         // if we're overwriting, pretend the db doesn't exists
00142         // FIXME: if we allow Xapian::DB_OVERWRITE, check it here
00143         if (action == Xapian::DB_CREATE_OR_OVERWRITE) {
00144             create_and_open_tables(block_size);
00145             return;
00146         }
00147 
00148         // Get latest consistent version
00149         open_tables_consistent();
00150 
00151         // Check that there are no more recent versions of tables.  If there
00152         // are, perform recovery by writing a new revision number to all
00153         // tables.
00154         if (record_table.get_open_revision_number() !=
00155             postlist_table.get_latest_revision_number()) {
00156             quartz_revision_number_t new_revision = get_next_revision_number();
00157 
00158             log.make_entry("Detected partially applied changes, updating "
00159                             "all revision numbers to consistent state (" +
00160                             om_tostring(new_revision) + ") to proceed - "
00161                             "this will remove partial changes");
00162             postlist_table.commit(new_revision);
00163             positionlist_table.commit(new_revision);
00164             termlist_table.commit(new_revision);
00165             value_table.commit(new_revision);
00166             record_table.commit(new_revision);
00167         }
00168         if (record_table.get_doccount() == 0) {
00169             record_table.set_total_length_and_lastdocid(0, record_table.get_lastdocid());
00170         }
00171     }
00172 }
00173 
00174 QuartzDatabase::~QuartzDatabase()
00175 {
00176     DEBUGCALL(DB, void, "~QuartzDatabase", "");
00177     // Only needed for a writable database: dtor_called();
00178     log.make_entry("Closing database");
00179     if (!readonly) release_database_write_lock();
00180 }
00181 
00182 bool
00183 QuartzDatabase::database_exists() {
00184     DEBUGCALL(DB, bool, "QuartzDatabase::database_exists", "");
00185     return record_table.exists() &&
00186            postlist_table.exists() &&
00187            positionlist_table.exists() &&
00188            termlist_table.exists() &&
00189            value_table.exists();
00190 }
00191 
00192 void
00193 QuartzDatabase::create_and_open_tables(unsigned int block_size)
00194 {
00195     DEBUGCALL(DB, void, "QuartzDatabase::create_and_open_tables", "");
00196     //FIXME - check that database directory exists.
00197 
00198     log.make_entry("Creating new database");
00199     // Create postlist_table first, and record_table last.  Existence of
00200     // record_table is considered to imply existence of the database.
00201     metafile.create();
00202     postlist_table.create(block_size);
00203     positionlist_table.create(block_size);
00204     termlist_table.create(block_size);
00205     value_table.create(block_size);
00206     record_table.create(block_size);
00207 
00208     Assert(database_exists());
00209 
00210     log.make_entry("Opening new database");
00211     metafile.open();
00212     record_table.open();
00213     value_table.open();
00214     termlist_table.open();
00215     positionlist_table.open();
00216     postlist_table.open();
00217 
00218     // Check consistency
00219     quartz_revision_number_t revision = record_table.get_open_revision_number();
00220     if (revision != value_table.get_open_revision_number() ||
00221         revision != termlist_table.get_open_revision_number() ||
00222         revision != positionlist_table.get_open_revision_number() ||
00223         revision != postlist_table.get_open_revision_number()) {
00224         log.make_entry("Revisions are not consistent: have " +
00225                         om_tostring(revision) + ", " +
00226                         om_tostring(value_table.get_open_revision_number()) + ", " +
00227                         om_tostring(termlist_table.get_open_revision_number()) + ", " +
00228                         om_tostring(positionlist_table.get_open_revision_number()) + " and " +
00229                         om_tostring(postlist_table.get_open_revision_number()));
00230         throw Xapian::DatabaseCreateError("Newly created tables are not in consistent state");
00231     }
00232     log.make_entry("Opened tables at revision " + om_tostring(revision));
00233     record_table.set_total_length_and_lastdocid(0, 0);
00234 }
00235 
00236 void
00237 QuartzDatabase::open_tables_consistent()
00238 {
00239     DEBUGCALL(DB, void, "QuartzDatabase::open_tables_consistent", "");
00240     // Open record_table first, since it's the last to be written to,
00241     // and hence if a revision is available in it, it should be available
00242     // in all the other tables (unless they've moved on already).
00243     //
00244     // If we find that a table can't open the desired revision, we
00245     // go back and open record_table again, until record_table has
00246     // the same revision as the last time we opened it.
00247 
00248     log.make_entry("Opening tables at latest consistent revision");
00249     metafile.open();
00250     record_table.open();
00251     quartz_revision_number_t revision = record_table.get_open_revision_number();
00252 
00253     bool fully_opened = false;
00254     int tries = 100;
00255     int tries_left = tries;
00256     while (!fully_opened && (tries_left--) > 0) {
00257         log.make_entry("Trying revision " + om_tostring(revision));
00258 
00259         bool opened;
00260         opened = value_table.open(revision);
00261         if (opened) opened = termlist_table.open(revision);
00262         if (opened) opened = positionlist_table.open(revision);
00263         if (opened) opened = postlist_table.open(revision);
00264         if (opened) {
00265             fully_opened = true;
00266         } else {
00267             // Couldn't open consistent revision: two cases possible:
00268             // i)   An update has completed and a second one has begun since
00269             //      record was opened.  This leaves a consistent revision
00270             //      available, but not the one we were trying to open.
00271             // ii)  Tables have become corrupt / have no consistent revision
00272             //      available.  In this case, updates must have ceased.
00273             //
00274             // So, we reopen the record table, and check its revision number,
00275             // if it's changed we try the opening again, otherwise we give up.
00276             //
00277             record_table.open();
00278             quartz_revision_number_t newrevision =
00279                     record_table.get_open_revision_number();
00280             if (revision == newrevision) {
00281                 // Revision number hasn't changed - therefore a second index
00282                 // sweep hasn't begun and the system must have failed.  Database
00283                 // is inconsistent.
00284                 log.make_entry("Cannot open all tables at revision in record table: " + om_tostring(revision));
00285                 throw Xapian::DatabaseCorruptError("Cannot open tables at consistent revisions");
00286             }
00287             revision = newrevision;
00288         }
00289     }
00290 
00291     if (!fully_opened) {
00292         log.make_entry("Cannot open all tables in a consistent state - keep changing too fast, giving up after " + om_tostring(tries) + " attempts");
00293         throw Xapian::DatabaseModifiedError("Cannot open tables at stable revision - changing too fast");
00294     }
00295 
00296     log.make_entry("Opened tables at revision " + om_tostring(revision));
00297 }
00298 
00299 void
00300 QuartzDatabase::open_tables(quartz_revision_number_t revision)
00301 {
00302     DEBUGCALL(DB, void, "QuartzDatabase::open_tables", revision);
00303     log.make_entry("Opening tables at revision " + om_tostring(revision));
00304     metafile.open();
00305     record_table.open(revision);
00306     value_table.open(revision);
00307     termlist_table.open(revision);
00308     positionlist_table.open(revision);
00309     postlist_table.open(revision);
00310     log.make_entry("Opened tables at revision " + om_tostring(revision));
00311 }
00312 
00313 quartz_revision_number_t
00314 QuartzDatabase::get_revision_number() const
00315 {
00316     DEBUGCALL(DB, quartz_revision_number_t, "QuartzDatabase::get_revision_number", "");
00317     // We could use any table here, theoretically.
00318     RETURN(postlist_table.get_open_revision_number());
00319 }
00320 
00321 quartz_revision_number_t
00322 QuartzDatabase::get_next_revision_number() const
00323 {
00324     DEBUGCALL(DB, quartz_revision_number_t, "QuartzDatabase::get_next_revision_number", "");
00325     /* We _must_ use postlist_table here, since it is always the first
00326      * to be written, and hence will have the greatest available revision
00327      * number.
00328      */
00329     quartz_revision_number_t new_revision =
00330             postlist_table.get_latest_revision_number();
00331     new_revision += 1;
00332     RETURN(new_revision);
00333 }
00334 
00335 void
00336 QuartzDatabase::set_revision_number(quartz_revision_number_t new_revision)
00337 {
00338     DEBUGCALL(DB, void, "QuartzDatabase::set_revision_number", new_revision);
00339     postlist_table.commit(new_revision);
00340     positionlist_table.commit(new_revision);
00341     termlist_table.commit(new_revision);
00342     value_table.commit(new_revision);
00343     record_table.commit(new_revision);
00344 }
00345 
00346 void
00347 QuartzDatabase::reopen()
00348 {
00349     DEBUGCALL(DB, void, "QuartzDatabase::reopen", "");
00350     if (readonly) {
00351         open_tables_consistent();
00352     }
00353 }
00354 
00355 void
00356 QuartzDatabase::get_database_write_lock()
00357 {
00358     DEBUGCALL(DB, void, "QuartzDatabase::get_database_write_lock", "");
00359     // FIXME:: have a backoff strategy to avoid stalling on a stale lockfile
00360 #ifdef HAVE_SYS_UTSNAME_H
00361     const char *hostname;
00362     struct utsname host;
00363     if (!uname(&host)) {
00364         host.nodename[0] = '\0';
00365     }
00366     hostname = host.nodename;
00367 #elif defined(HAVE_GETHOSTNAME)
00368     char hostname[256];
00369     if (gethostname(hostname, sizeof hostname) == -1) {
00370         *hostname = '\0';
00371     }
00372 #else
00373     const char *hostname = "";
00374 #endif
00375     string tempname = db_dir + "/db_lock.tmp." + om_tostring(getpid()) + "." +
00376             hostname + "." +
00377             om_tostring(reinterpret_cast<long>(this)); /* should work within
00378                                                           one process too! */
00379     DEBUGLINE(DB, "Temporary file " << tempname << " created");
00380     int num_tries = 5;
00381     while (true) {
00382         num_tries--;
00383         if (num_tries < 0) {
00384             throw Xapian::DatabaseLockError("Unable to acquire database write lock "
00385                                       + db_dir + "/db_lock");
00386         }
00387 
00388         int tempfd = open(tempname.c_str(), O_CREAT | O_EXCL, 0600);
00389         if (tempfd < 0) {
00390             throw Xapian::DatabaseLockError("Unable to create " + tempname +
00391                                       ": " + strerror(errno),
00392                                       errno);
00393         }
00394 
00395 #if defined __CYGWIN__
00396         close(tempfd);
00397         // Cygwin carefully tries to recreate Unix semantics for rename(), so
00398         // we can't use rename for locking.  And link() works on NTFS but not
00399         // FAT.  So we use the underlying API call and translate the paths.
00400         char fr[MAX_PATH], to[MAX_PATH];
00401         cygwin_conv_to_win32_path(tempname.c_str(), fr);
00402         cygwin_conv_to_win32_path((db_dir + "/db_lock").c_str(), to);
00403         if (MoveFile(fr, to)) {
00404             return;
00405         }
00406 #elif defined __WIN32__
00407         // MS Windows can't rename an open file, so make sure we close it
00408         // first.
00409         close(tempfd);
00410         // MS Windows doesn't support link(), but rename() won't overwrite an
00411         // existing file, which is exactly the semantics we want.
00412         if (rename(tempname.c_str(), (db_dir + "/db_lock").c_str()) == 0) {
00413             return;
00414         }
00415 #else
00416         /* Now link(2) the temporary file to the lockfile name.
00417          * If either link() returns 0, or the temporary file has
00418          * link count 2 afterwards, then the lock succeeded.
00419          * Otherwise, it failed.  (Reference: Linux open() manpage)
00420          */
00421         /* FIXME: sort out all these unlinks */
00422         int result = link(tempname, db_dir + "/db_lock");
00423         if (result == 0) {
00424             close(tempfd);
00425             unlink(tempname);
00426             return;
00427         }
00428 #ifdef XAPIAN_DEBUG_VERBOSE
00429         int link_errno = errno;
00430 #endif
00431         struct stat statbuf;
00432         int statresult = fstat(tempfd, &statbuf);
00433         int fstat_errno = errno;
00434         close(tempfd);
00435         unlink(tempname);
00436         if (statresult != 0) {
00437             throw Xapian::DatabaseLockError("Unable to fstat() temporary file " +
00438                                       tempname + " while locking: " +
00439                                       strerror(fstat_errno));
00440         }
00441         if (statbuf.st_nlink == 2) {
00442             /* success */
00443             return;
00444         }
00445         DEBUGLINE(DB, "link() returned " << result << "(" <<
00446                   strerror(link_errno) << ")");
00447         DEBUGLINE(DB, "Links in statbuf: " << statbuf.st_nlink);
00448         /* also failed */
00449 #endif
00450     }
00451 }
00452 
00453 void
00454 QuartzDatabase::release_database_write_lock()
00455 {
00456     DEBUGCALL(DB, void, "QuartzDatabase::release_database_write_lock", "");
00457     unlink(db_dir + "/db_lock");
00458 }
00459 
00460 void
00461 QuartzDatabase::apply()
00462 {
00463     DEBUGCALL(DB, void, "QuartzDatabase::apply", "");
00464     if (!postlist_table.is_modified() &&
00465         !positionlist_table.is_modified() &&
00466         !termlist_table.is_modified() &&
00467         !value_table.is_modified() &&
00468         !record_table.is_modified()) {
00469         log.make_entry("No modifications to apply");
00470         return;
00471     }
00472 
00473     quartz_revision_number_t old_revision = get_revision_number();
00474     quartz_revision_number_t new_revision = get_next_revision_number();
00475 
00476     log.make_entry("Applying modifications.  New revision number is " + om_tostring(new_revision));
00477 
00478     try {
00479         postlist_table.commit(new_revision);
00480         positionlist_table.commit(new_revision);
00481         termlist_table.commit(new_revision);
00482         value_table.commit(new_revision);
00483         record_table.commit(new_revision);
00484 
00485         log.make_entry("Modifications succeeded");
00486     } catch (...) {
00487         // Modifications failed.  Wipe all the modifications from memory.
00488         log.make_entry("Attempted modifications failed.  Wiping partial modifications");
00489 
00490         // Reopen tables with old revision number.
00491         log.make_entry("Reopening tables without modifications: old revision is " + om_tostring(old_revision));
00492         open_tables(old_revision);
00493 
00494         // Increase revision numbers to new revision number plus one,
00495         // writing increased numbers to all tables.
00496         new_revision += 1;
00497         log.make_entry("Increasing revision number in all tables to " + om_tostring(new_revision));
00498 
00499         try {
00500             set_revision_number(new_revision);
00501 
00502             // This cancel() causes any buffered changes to be thrown away,
00503             // and the buffer to be reinitialised with the old entry count.
00504             cancel();
00505         } catch (const Xapian::Error & e) {
00506             string msg("Setting revision number failed: ");
00507             msg += e.get_description();
00508             log.make_entry(msg);
00509             throw Xapian::DatabaseError("Modifications failed, and cannot set revision numbers in database to a consistent state");
00510         }
00511         throw;
00512     }
00513 }
00514 
00515 void
00516 QuartzDatabase::cancel()
00517 {
00518     DEBUGCALL(DB, void, "QuartzDatabase::cancel", "");
00519     postlist_table.cancel();
00520     positionlist_table.cancel();
00521     termlist_table.cancel();
00522     value_table.cancel();
00523     record_table.cancel();
00524 }
00525 
00526 Xapian::doccount
00527 QuartzDatabase::get_doccount() const
00528 {
00529     DEBUGCALL(DB, Xapian::doccount, "QuartzDatabase::get_doccount", "");
00530     RETURN(record_table.get_doccount());
00531 }
00532 
00533 Xapian::docid
00534 QuartzDatabase::get_lastdocid() const
00535 {
00536     DEBUGCALL(DB, Xapian::docid, "QuartzDatabase::get_lastdocid", "");
00537     RETURN(record_table.get_lastdocid());
00538 }
00539 
00540 Xapian::doclength
00541 QuartzDatabase::get_avlength() const
00542 {
00543     DEBUGCALL(DB, Xapian::doclength, "QuartzDatabase::get_avlength", "");
00544     Xapian::doccount docs = record_table.get_doccount();
00545     if (docs == 0) RETURN(0);
00546     RETURN(double(record_table.get_total_length()) / docs);
00547 }
00548 
00549 Xapian::doclength
00550 QuartzDatabase::get_doclength(Xapian::docid did) const
00551 {
00552     DEBUGCALL(DB, Xapian::doclength, "QuartzDatabase::get_doclength", did);
00553     Assert(did != 0);
00554 
00555     QuartzTermList termlist(0, &termlist_table, did, 0);
00556     RETURN(termlist.get_doclength());
00557 }
00558 
00559 Xapian::doccount
00560 QuartzDatabase::get_termfreq(const string & tname) const
00561 {
00562     DEBUGCALL(DB, Xapian::doccount, "QuartzDatabase::get_termfreq", tname);
00563     Assert(!tname.empty());
00564 
00565     RETURN(postlist_table.get_termfreq(tname));
00566     RETURN(postlist_table.get_collection_freq(tname));
00567 }
00568 
00569 Xapian::termcount
00570 QuartzDatabase::get_collection_freq(const string & tname) const
00571 {
00572     DEBUGCALL(DB, Xapian::termcount, "QuartzDatabase::get_collection_freq", tname);
00573     Assert(!tname.empty());
00574 
00575     RETURN(postlist_table.get_collection_freq(tname));
00576 }
00577 
00578 bool
00579 QuartzDatabase::term_exists(const string & tname) const
00580 {
00581     DEBUGCALL(DB, bool, "QuartzDatabase::term_exists", tname);
00582     Assert(!tname.empty());
00583     AutoPtr<Bcursor> cursor(postlist_table.cursor_get());
00584     // FIXME: nasty C&P from backends/quartz/quartz_postlist.cc
00585     string key = pack_string_preserving_sort(tname);
00586     return cursor->find_entry(key);
00587 }
00588 
00589 bool
00590 QuartzDatabase::has_positions() const
00591 {
00592     return positionlist_table.get_entry_count() > 0;
00593 }
00594 
00595 
00596 LeafPostList *
00597 QuartzDatabase::open_post_list(const string& tname) const
00598 {
00599     DEBUGCALL(DB, LeafPostList *, "QuartzDatabase::open_post_list", tname);
00600     Xapian::Internal::RefCntPtr<const QuartzDatabase> ptrtothis(this);
00601 
00602     if (tname.empty()) {
00603         RETURN(new QuartzAllDocsPostList(ptrtothis,
00604                                          &termlist_table,
00605                                          get_doccount()));
00606     }
00607 
00608     RETURN(new QuartzPostList(ptrtothis,
00609                               &postlist_table,
00610                               &positionlist_table,
00611                               tname));
00612 }
00613 
00614 TermList *
00615 QuartzDatabase::open_term_list(Xapian::docid did) const
00616 {
00617     DEBUGCALL(DB, TermList *, "QuartzDatabase::open_term_list", did);
00618     Assert(did != 0);
00619 
00620     Xapian::Internal::RefCntPtr<const QuartzDatabase> ptrtothis(this);
00621     RETURN(new QuartzTermList(ptrtothis, &termlist_table, did, get_doccount()));
00622 }
00623 
00624 Xapian::Document::Internal *
00625 QuartzDatabase::open_document(Xapian::docid did, bool lazy) const
00626 {
00627     DEBUGCALL(DB, Xapian::Document::Internal *, "QuartzDatabase::open_document",
00628               did << ", " << lazy);
00629     Assert(did != 0);
00630 
00631     Xapian::Internal::RefCntPtr<const QuartzDatabase> ptrtothis(this);
00632     RETURN(new QuartzDocument(ptrtothis,
00633                               &value_table,
00634                               &record_table,
00635                               did, lazy));
00636 }
00637 
00638 PositionList *
00639 QuartzDatabase::open_position_list(Xapian::docid did,
00640                                    const string & tname) const
00641 {
00642     Assert(did != 0);
00643 
00644     AutoPtr<QuartzPositionList> poslist(new QuartzPositionList());
00645     poslist->read_data(&positionlist_table, did, tname);
00646     if (poslist->get_size() == 0) {
00647         // Check that term / document combination exists.
00648         // If the doc doesn't exist, this will throw Xapian::DocNotFoundError:
00649         AutoPtr<TermList> tl(open_term_list(did));
00650         tl->skip_to(tname);
00651         if (tl->at_end() || tl->get_termname() != tname)
00652             throw Xapian::RangeError("Can't open position list: requested term is not present in document.");
00653     }
00654 
00655     return poslist.release();
00656 }
00657 
00658 TermList *
00659 QuartzDatabase::open_allterms(const string & prefix) const
00660 {
00661     DEBUGCALL(DB, TermList *, "QuartzDatabase::open_allterms", "");
00662     AutoPtr<Bcursor> pl_cursor(postlist_table.cursor_get());
00663     RETURN(new QuartzAllTermsList(Xapian::Internal::RefCntPtr<const QuartzDatabase>(this),
00664                                   pl_cursor, postlist_table.get_entry_count(), prefix));
00665 }
00666 
00667 size_t QuartzWritableDatabase::flush_threshold = 0;
00668 
00669 QuartzWritableDatabase::QuartzWritableDatabase(const string &dir, int action,
00670                                                int block_size)
00671         : freq_deltas(),
00672           doclens(),
00673           mod_plists(),
00674           database_ro(dir, action, block_size),
00675           total_length(database_ro.record_table.get_total_length()),
00676           lastdocid(database_ro.get_lastdocid()),
00677           changes_made(0)
00678 {
00679     DEBUGCALL(DB, void, "QuartzWritableDatabase", dir << ", " << action << ", "
00680               << block_size);
00681     if (flush_threshold == 0) {
00682         const char *p = getenv("XAPIAN_FLUSH_THRESHOLD");
00683         if (p) flush_threshold = atoi(p);
00684     }
00685     if (flush_threshold == 0) flush_threshold = 10000;
00686 }
00687 
00688 QuartzWritableDatabase::~QuartzWritableDatabase()
00689 {
00690     DEBUGCALL(DB, void, "~QuartzWritableDatabase", "");
00691     dtor_called();
00692 }
00693 
00694 void
00695 QuartzWritableDatabase::flush()
00696 {
00697     if (transaction_active())
00698         throw Xapian::InvalidOperationError("Can't flush during a transaction");
00699     if (changes_made) do_flush_const();
00700 }
00701 
00702 void
00703 QuartzWritableDatabase::do_flush_const() const
00704 {
00705     DEBUGCALL(DB, void, "QuartzWritableDatabase::do_flush_const", "");
00706 
00707     database_ro.postlist_table.merge_changes(mod_plists, doclens, freq_deltas);
00708 
00709     // Update the total document length and last used docid.
00710     database_ro.record_table.set_total_length_and_lastdocid(total_length,
00711                                                             lastdocid);
00712     database_ro.apply();
00713     freq_deltas.clear();
00714     doclens.clear();
00715     mod_plists.clear();
00716     changes_made = 0;
00717 }
00718 
00719 Xapian::docid
00720 QuartzWritableDatabase::add_document(const Xapian::Document & document)
00721 {
00722     DEBUGCALL(DB, Xapian::docid,
00723               "QuartzWritableDatabase::add_document", document);
00724     // Make sure the docid counter doesn't overflow.
00725     if (lastdocid == Xapian::docid(-1))
00726         throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
00727     // Use the next unused document ID.
00728     RETURN(add_document_(++lastdocid, document));
00729 }
00730 
00731 Xapian::docid
00732 QuartzWritableDatabase::add_document_(Xapian::docid did,
00733                                       const Xapian::Document & document)
00734 {
00735     Assert(did != 0);
00736     try {
00737         // Add the record using that document ID.
00738         database_ro.record_table.replace_record(document.get_data(), did);
00739 
00740         // Set the values.
00741         {
00742             Xapian::ValueIterator value = document.values_begin();
00743             Xapian::ValueIterator value_end = document.values_end();
00744             string s;
00745             database_ro.value_table.encode_values(s, value, value_end);
00746             database_ro.value_table.set_encoded_values(did, s);
00747         }
00748 
00749         quartz_doclen_t new_doclen = 0;
00750         {
00751             Xapian::TermIterator term = document.termlist_begin();
00752             Xapian::TermIterator term_end = document.termlist_end();
00753             for ( ; term != term_end; ++term) {
00754                 termcount wdf = term.get_wdf();
00755                 // Calculate the new document length
00756                 new_doclen += wdf;
00757 
00758                 string tname = *term;
00759                 map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00760                 i = freq_deltas.find(tname);
00761                 if (i == freq_deltas.end()) {
00762                     freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wdf))));
00763                 } else {
00764                     ++i->second.first;
00765                     i->second.second += wdf;
00766                 }
00767 
00768                 // Add did to tname's postlist
00769                 map<string, map<docid, pair<char, termcount> > >::iterator j;
00770                 j = mod_plists.find(tname);
00771                 if (j == mod_plists.end()) {
00772                     map<docid, pair<char, termcount> > m;
00773                     j = mod_plists.insert(make_pair(tname, m)).first;
00774                 }
00775                 Assert(j->second.find(did) == j->second.end());
00776                 j->second.insert(make_pair(did, make_pair('A', wdf)));
00777 
00778                 if (term.positionlist_begin() != term.positionlist_end()) {
00779                     database_ro.positionlist_table.set_positionlist(
00780                         did, tname,
00781                         term.positionlist_begin(), term.positionlist_end());
00782                 }
00783             }
00784         }
00785 
00786         // Set the termlist
00787         database_ro.termlist_table.set_entries(did,
00788                 document.termlist_begin(), document.termlist_end(),
00789                 new_doclen, false);
00790 
00791         // Set the new document length
00792         Assert(doclens.find(did) == doclens.end());
00793         doclens[did] = new_doclen;
00794         total_length += new_doclen;
00795     } catch (...) {
00796         // If an error occurs while adding a document, or doing any other
00797         // transaction, the modifications so far must be cleared before
00798         // returning control to the user - otherwise partial modifications will
00799         // persist in memory, and eventually get written to disk.
00800         cancel();
00801         throw;
00802     }
00803 
00804     // FIXME: this should be done by checking memory usage, not the number of
00805     // changes.
00806     // We could also look at:
00807     // * mod_plists.size()
00808     // * doclens.size()
00809     // * freq_deltas.size()
00810     //
00811     // cout << "+++ mod_plists.size() " << mod_plists.size() <<
00812     //     ", doclens.size() " << doclens.size() <<
00813     //     ", freq_deltas.size() " << freq_deltas.size() << endl;
00814     if (++changes_made >= flush_threshold && !transaction_active())
00815         do_flush_const();
00816 
00817     return did;
00818 }
00819 
00820 void
00821 QuartzWritableDatabase::delete_document(Xapian::docid did)
00822 {
00823     DEBUGCALL(DB, void, "QuartzWritableDatabase::delete_document", did);
00824     Assert(did != 0);
00825 
00826     // Remove the record.  If this fails, just propagate the exception since
00827     // the state should still be consistent (most likely it's
00828     // DocNotFoundError).
00829     database_ro.record_table.delete_record(did);
00830 
00831     try {
00832         // Remove the values
00833         database_ro.value_table.delete_all_values(did);
00834 
00835         // OK, now add entries to remove the postings in the underlying record.
00836         Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
00837         QuartzTermList termlist(ptrtothis,
00838                                 &database_ro.termlist_table,
00839                                 did, get_doccount());
00840 
00841         total_length -= termlist.get_doclength();
00842 
00843         termlist.next();
00844         while (!termlist.at_end()) {
00845             string tname = termlist.get_termname();
00846             database_ro.positionlist_table.delete_positionlist(did, tname);
00847             termcount wdf = termlist.get_wdf();
00848 
00849             map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00850             i = freq_deltas.find(tname);
00851             if (i == freq_deltas.end()) {
00852                 freq_deltas.insert(make_pair(tname, make_pair(-1, -termcount_diff(wdf))));
00853             } else {
00854                 --i->second.first;
00855                 i->second.second -= wdf;
00856             }
00857 
00858             // Remove did from tname's postlist
00859             map<string, map<docid, pair<char, termcount> > >::iterator j;
00860             j = mod_plists.find(tname);
00861             if (j == mod_plists.end()) {
00862                 map<docid, pair<char, termcount> > m;
00863                 j = mod_plists.insert(make_pair(tname, m)).first;
00864             }
00865 
00866             map<docid, pair<char, termcount> >::iterator k;
00867             k = j->second.find(did);
00868             if (k == j->second.end()) {
00869                 j->second.insert(make_pair(did, make_pair('D', 0u)));
00870             } else {
00871                 // Deleting a document we added/modified since the last flush.
00872                 k->second = make_pair('D', 0u);
00873             }
00874 
00875             termlist.next();
00876         }
00877 
00878         // Remove the termlist.
00879         database_ro.termlist_table.delete_termlist(did);
00880 
00881         // Remove the new doclength.
00882         doclens.erase(did);
00883     } catch (...) {
00884         // If an error occurs while deleting a document, or doing any other
00885         // transaction, the modifications so far must be cleared before
00886         // returning control to the user - otherwise partial modifications will
00887         // persist in memory, and eventually get written to disk.
00888         cancel();
00889         throw;
00890     }
00891 
00892     if (++changes_made >= flush_threshold && !transaction_active())
00893         do_flush_const();
00894 }
00895 
00896 void
00897 QuartzWritableDatabase::replace_document(Xapian::docid did,
00898                                          const Xapian::Document & document)
00899 {
00900     DEBUGCALL(DB, void, "QuartzWritableDatabase::replace_document", did << ", " << document);
00901     Assert(did != 0);
00902 
00903     try {
00904         if (did > lastdocid) {
00905             lastdocid = did;
00906             // If this docid is above the highwatermark, then we can't be
00907             // replacing an existing document.
00908             (void)add_document_(did, document);
00909             return;
00910         }
00911 
00912         // OK, now add entries to remove the postings in the underlying record.
00913         Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
00914         QuartzTermList termlist(ptrtothis,
00915                                 &database_ro.termlist_table,
00916                                 did, get_doccount());
00917 
00918         termlist.next();
00919         while (!termlist.at_end()) {
00920             string tname = termlist.get_termname();
00921             termcount wdf = termlist.get_wdf();
00922 
00923             map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00924             i = freq_deltas.find(tname);
00925             if (i == freq_deltas.end()) {
00926                 freq_deltas.insert(make_pair(tname, make_pair(-1, -termcount_diff(wdf))));
00927             } else {
00928                 --i->second.first;
00929                 i->second.second -= wdf;
00930             }
00931 
00932             // Remove did from tname's postlist
00933             map<string, map<docid, pair<char, termcount> > >::iterator j;
00934             j = mod_plists.find(tname);
00935             if (j == mod_plists.end()) {
00936                 map<docid, pair<char, termcount> > m;
00937                 j = mod_plists.insert(make_pair(tname, m)).first;
00938             }
00939 
00940             map<docid, pair<char, termcount> >::iterator k;
00941             k = j->second.find(did);
00942             if (k == j->second.end()) {
00943                 j->second.insert(make_pair(did, make_pair('D', 0u)));
00944             } else {
00945                 // Modifying a document we added/modified since the last flush.
00946                 k->second = make_pair('D', 0u);
00947             }
00948 
00949             termlist.next();
00950         }
00951 
00952         total_length -= termlist.get_doclength();
00953 
00954         // Replace the record
00955         database_ro.record_table.replace_record(document.get_data(), did);
00956 
00957         // FIXME: we read the values delete them and then replace in case
00958         // they come from where they're going!  Better to ask Document
00959         // nicely and shortcut in this case!
00960         {
00961             Xapian::ValueIterator value = document.values_begin();
00962             Xapian::ValueIterator value_end = document.values_end();
00963             string s;
00964             database_ro.value_table.encode_values(s, value, value_end);
00965 
00966             // Replace the values.
00967             database_ro.value_table.delete_all_values(did);
00968             database_ro.value_table.set_encoded_values(did, s);
00969         }
00970 
00971         quartz_doclen_t new_doclen = 0;
00972         {
00973             Xapian::TermIterator term = document.termlist_begin();
00974             Xapian::TermIterator term_end = document.termlist_end();
00975             for ( ; term != term_end; ++term) {
00976                 // Calculate the new document length
00977                 termcount wdf = term.get_wdf();
00978                 new_doclen += wdf;
00979 
00980                 string tname = *term;
00981                 map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00982                 i = freq_deltas.find(tname);
00983                 if (i == freq_deltas.end()) {
00984                     freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wdf))));
00985                 } else {
00986                     ++i->second.first;
00987                     i->second.second += wdf;
00988                 }
00989 
00990                 // Add did to tname's postlist
00991                 map<string, map<docid, pair<char, termcount> > >::iterator j;
00992                 j = mod_plists.find(tname);
00993                 if (j == mod_plists.end()) {
00994                     map<docid, pair<char, termcount> > m;
00995                     j = mod_plists.insert(make_pair(tname, m)).first;
00996                 }
00997                 map<docid, pair<char, termcount> >::iterator k;
00998                 k = j->second.find(did);
00999                 if (k != j->second.end()) {
01000                     Assert(k->second.first == 'D');
01001                     k->second.first = 'M';
01002                     k->second.second = wdf;
01003                 } else {
01004                     j->second.insert(make_pair(did, make_pair('A', wdf)));
01005                 }
01006 
01007                 PositionIterator it = term.positionlist_begin();
01008                 PositionIterator it_end = term.positionlist_end();
01009                 if (it != it_end) {
01010                     database_ro.positionlist_table.set_positionlist(
01011                         did, tname, it, it_end);
01012                 } else {
01013                     database_ro.positionlist_table.delete_positionlist(did, tname);
01014                 }
01015             }
01016         }
01017 
01018         // Set the termlist
01019         database_ro.termlist_table.set_entries(did,
01020                 document.termlist_begin(), document.termlist_end(),
01021                 new_doclen, false);
01022 
01023         // Set the new document length
01024         doclens[did] = new_doclen;
01025         total_length += new_doclen;
01026     } catch (const Xapian::DocNotFoundError &) {
01027         (void)add_document_(did, document);
01028         return;
01029     } catch (...) {
01030         // If an error occurs while replacing a document, or doing any other
01031         // transaction, the modifications so far must be cleared before
01032         // returning control to the user - otherwise partial modifications will
01033         // persist in memory, and eventually get written to disk.
01034         cancel();
01035         throw;
01036     }
01037 
01038     if (++changes_made >= flush_threshold && !transaction_active())
01039         do_flush_const();
01040 }
01041 
01042 Xapian::doccount
01043 QuartzWritableDatabase::get_doccount() const
01044 {
01045     DEBUGCALL(DB, Xapian::doccount, "QuartzWritableDatabase::get_doccount", "");
01046     RETURN(database_ro.get_doccount());
01047 }
01048 
01049 Xapian::docid
01050 QuartzWritableDatabase::get_lastdocid() const
01051 {
01052     DEBUGCALL(DB, Xapian::docid, "QuartzWritableDatabase::get_lastdocid", "");
01053     RETURN(lastdocid);
01054 }
01055 
01056 Xapian::doclength
01057 QuartzWritableDatabase::get_avlength() const
01058 {
01059     DEBUGCALL(DB, Xapian::doclength, "QuartzWritableDatabase::get_avlength", "");
01060     Xapian::doccount docs = database_ro.get_doccount();
01061     if (docs == 0) RETURN(0);
01062     RETURN(double(total_length) / docs);
01063 }
01064 
01065 Xapian::doclength
01066 QuartzWritableDatabase::get_doclength(Xapian::docid did) const
01067 {
01068     DEBUGCALL(DB, Xapian::doclength, "QuartzWritableDatabase::get_doclength", did);
01069     map<docid, termcount>::const_iterator i = doclens.find(did);
01070     if (i != doclens.end()) RETURN(i->second);
01071 
01072     RETURN(database_ro.get_doclength(did));
01073 }
01074 
01075 Xapian::doccount
01076 QuartzWritableDatabase::get_termfreq(const string & tname) const
01077 {
01078     DEBUGCALL(DB, Xapian::doccount, "QuartzWritableDatabase::get_termfreq", tname);
01079     Xapian::doccount termfreq = database_ro.get_termfreq(tname);
01080     map<string, pair<termcount_diff, termcount_diff> >::const_iterator i;
01081     i = freq_deltas.find(tname);
01082     if (i != freq_deltas.end()) termfreq += i->second.first;
01083     RETURN(termfreq);
01084 }
01085 
01086 Xapian::termcount
01087 QuartzWritableDatabase::get_collection_freq(const string & tname) const
01088 {
01089     DEBUGCALL(DB, Xapian::termcount, "QuartzWritableDatabase::get_collection_freq", tname);
01090     Xapian::termcount collfreq = database_ro.get_collection_freq(tname);
01091 
01092     map<string, pair<termcount_diff, termcount_diff> >::const_iterator i;
01093     i = freq_deltas.find(tname);
01094     if (i != freq_deltas.end()) collfreq += i->second.second;
01095 
01096     RETURN(collfreq);
01097 }
01098 
01099 bool
01100 QuartzWritableDatabase::term_exists(const string & tname) const
01101 {
01102     DEBUGCALL(DB, bool, "QuartzWritableDatabase::term_exists", tname);
01103     RETURN(get_termfreq(tname) != 0);
01104 }
01105 
01106 bool
01107 QuartzWritableDatabase::has_positions() const
01108 {
01109     return database_ro.has_positions();
01110 }
01111 
01112 
01113 LeafPostList *
01114 QuartzWritableDatabase::open_post_list(const string& tname) const
01115 {
01116     DEBUGCALL(DB, LeafPostList *, "QuartzWritableDatabase::open_post_list", tname);
01117     Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
01118 
01119     if (tname.empty()) {
01120         RETURN(new QuartzAllDocsPostList(ptrtothis,
01121                                          &database_ro.termlist_table,
01122                                          get_doccount()));
01123     }
01124 
01125     // Need to flush iff we've got buffered changes to this term's postlist.
01126     map<string, map<docid, pair<char, termcount> > >::const_iterator j;
01127     j = mod_plists.find(tname);
01128     if (j != mod_plists.end()) {
01129         if (transaction_active())
01130             throw Xapian::UnimplementedError("Can't open modified postlist during a transaction");
01131         do_flush_const();
01132     }
01133 
01134     RETURN(new QuartzPostList(ptrtothis,
01135                               &database_ro.postlist_table,
01136                               &database_ro.positionlist_table,
01137                               tname));
01138 }
01139 
01140 TermList *
01141 QuartzWritableDatabase::open_term_list(Xapian::docid did) const
01142 {
01143     DEBUGCALL(DB, TermList *, "QuartzWritableDatabase::open_term_list",
01144               did);
01145     Assert(did != 0);
01146 
01147     Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
01148     RETURN(new QuartzTermList(ptrtothis, &database_ro.termlist_table, did,
01149                               get_doccount()));
01150 }
01151 
01152 Xapian::Document::Internal *
01153 QuartzWritableDatabase::open_document(Xapian::docid did, bool lazy) const
01154 {
01155     DEBUGCALL(DB, Xapian::Document::Internal *, "QuartzWritableDatabase::open_document",
01156               did << ", " << lazy);
01157     Assert(did != 0);
01158 
01159     Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
01160     RETURN(new QuartzDocument(ptrtothis,
01161                               &database_ro.value_table,
01162                               &database_ro.record_table,
01163                               did, lazy));
01164 }
01165 
01166 PositionList *
01167 QuartzWritableDatabase::open_position_list(Xapian::docid did,
01168                                    const string & tname) const
01169 {
01170     Assert(did != 0);
01171 
01172     AutoPtr<QuartzPositionList> poslist(new QuartzPositionList());
01173     poslist->read_data(&database_ro.positionlist_table, did, tname);
01174     if (poslist->get_size() == 0) {
01175         // Check that term / document combination exists.
01176         // If the doc doesn't exist, this will throw Xapian::DocNotFoundError:
01177         AutoPtr<TermList> tl(open_term_list(did));
01178         tl->skip_to(tname);
01179         if (tl->at_end() || tl->get_termname() != tname)
01180             throw Xapian::RangeError("Can't open position list: requested term is not present in document.");
01181     }
01182 
01183     return poslist.release();
01184 }
01185 
01186 TermList *
01187 QuartzWritableDatabase::open_allterms(const string & prefix) const
01188 {
01189     DEBUGCALL(DB, TermList *, "QuartzWritableDatabase::open_allterms", "");
01190     if (transaction_active())
01191         throw Xapian::UnimplementedError("Can't open allterms iterator during a transaction");
01192     // Terms may have been added or removed, so we need to flush.
01193     if (changes_made) do_flush_const();
01194     QuartzPostListTable *t = &database_ro.postlist_table;
01195     AutoPtr<Bcursor> pl_cursor(t->cursor_get());
01196     RETURN(new QuartzAllTermsList(Xapian::Internal::RefCntPtr<const QuartzWritableDatabase>(this),
01197                                   pl_cursor, t->get_entry_count(), prefix));
01198 }
01199 
01200 void
01201 QuartzWritableDatabase::cancel()
01202 {
01203     database_ro.cancel();
01204     total_length = database_ro.record_table.get_total_length();
01205     lastdocid = database_ro.get_lastdocid();
01206     freq_deltas.clear();
01207     doclens.clear();
01208     mod_plists.clear();
01209     changes_made = 0;
01210 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.