backends/flint/flint_termlist.cc

Go to the documentation of this file.
00001 /* flint_termlist.cc: Termlists in a flint database
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2002,2003,2004,2006,2007 Olly Betts
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License as
00009  * published by the Free Software Foundation; either version 2 of the
00010  * License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00020  * USA
00021  */
00022 
00023 #include <config.h>
00024 
00025 #include <xapian/error.h>
00026 
00027 #include "expandweight.h"
00028 #include "flint_positionlist.h"
00029 #include "flint_termlist.h"
00030 #include "flint_utils.h"
00031 #include "omassert.h"
00032 #include "utils.h"
00033 
00034 using namespace std;
00035 
00036 FlintTermList::FlintTermList(Xapian::Internal::RefCntPtr<const FlintDatabase> db_,
00037                              Xapian::docid did_)
00038         : db(db_), did(did_), current_wdf(0), current_termfreq(0)
00039 {
00040     DEBUGCALL(DB, void, "FlintTermList",
00041               "[RefCntPtr<const FlintDatabase>], " << did_);
00042 
00043     if (!db->termlist_table.get_exact_entry(flint_docid_to_key(did), data))
00044         throw Xapian::DocNotFoundError("No termlist for document " + om_tostring(did));
00045 
00046     pos = data.data();
00047     end = pos + data.size();
00048 
00049     if (pos == end) {
00050         doclen = 0;
00051         termlist_size = 0;
00052         return;
00053     }
00054 
00055     // Read doclen
00056     if (!unpack_uint(&pos, end, &doclen)) {
00057         const char *msg;
00058         if (pos == 0) {
00059             msg = "Too little data for doclen in termlist";
00060         } else {
00061             msg = "Overflowed value for doclen in termlist";
00062         }
00063         throw Xapian::DatabaseCorruptError(msg);
00064     }
00065 
00066     // Read termlist_size
00067     if (!unpack_uint(&pos, end, &termlist_size)) {
00068         const char *msg;
00069         if (pos == 0) {
00070             msg = "Too little data for list size in termlist";
00071         } else {
00072             msg = "Overflowed value for list size in termlist";
00073         }
00074         throw Xapian::DatabaseCorruptError(msg);
00075     }
00076 
00077     // See comment in FlintTermListTable::set_termlist() in
00078     // flint_termlisttable.cc for an explanation of this!
00079     if (pos != end && *pos == '0') ++pos;
00080 }
00081 
00082 flint_doclen_t
00083 FlintTermList::get_doclength() const
00084 {
00085     DEBUGCALL(DB, flint_doclen_t, "FlintTermList::get_doclength", "");
00086     RETURN(doclen);
00087 }
00088 
00089 Xapian::termcount
00090 FlintTermList::get_approx_size() const
00091 {
00092     DEBUGCALL(DB, Xapian::termcount, "FlintTermList::get_approx_size", "");
00093     RETURN(termlist_size);
00094 }
00095 
00096 void
00097 FlintTermList::accumulate_stats(Xapian::Internal::ExpandStats & stats) const
00098 {
00099     DEBUGCALL(DB, void, "FlintTermList::accumulate_stats", "[stats&]");
00100     Assert(!at_end());
00101     stats.accumulate(current_wdf, doclen, get_termfreq(), db->get_doccount());
00102 }
00103 
00104 string
00105 FlintTermList::get_termname() const
00106 {
00107     DEBUGCALL(DB, string, "FlintTermList::get_termname", "");
00108     RETURN(current_term);
00109 }
00110 
00111 Xapian::termcount
00112 FlintTermList::get_wdf() const
00113 {
00114     DEBUGCALL(DB, Xapian::termcount, "FlintTermList::get_wdf", "");
00115     RETURN(current_wdf);
00116 }
00117 
00118 Xapian::doccount
00119 FlintTermList::get_termfreq() const
00120 {
00121     DEBUGCALL(DB, Xapian::doccount, "FlintTermList::get_termfreq", "");
00122     if (current_termfreq == 0)
00123         current_termfreq = db->get_termfreq(current_term);
00124     RETURN(current_termfreq);
00125 }
00126 
00127 TermList *
00128 FlintTermList::next()
00129 {
00130     DEBUGCALL(DB, TermList *, "FlintTermList::next", "");
00131     Assert(!at_end());
00132     if (pos == end) {
00133         pos = NULL;
00134         RETURN(NULL);
00135     }
00136 
00137     // Reset to 0 to indicate that the termfreq needs to be read.
00138     current_termfreq = 0;
00139 
00140     bool wdf_in_reuse = false;
00141     if (!current_term.empty()) {
00142         // Find out how much of the previous term to reuse.
00143         size_t len = static_cast<unsigned char>(*pos++);
00144         if (len > current_term.size()) {
00145             // The wdf is also stored in the "reuse" byte.
00146             wdf_in_reuse = true;
00147             size_t divisor = current_term.size() + 1;
00148             current_wdf = len / divisor - 1;
00149             len %= divisor;
00150         }
00151         current_term.resize(len);
00152     }
00153 
00154     // Append the new tail to form the next term.
00155     size_t append_len = static_cast<unsigned char>(*pos++);
00156     current_term.append(pos, append_len);
00157     pos += append_len;
00158 
00159     // Read the wdf if it wasn't packed into the reuse byte.
00160     if (!wdf_in_reuse && !unpack_uint(&pos, end, &current_wdf)) {
00161         const char *msg;
00162         if (pos == 0) {
00163             msg = "Too little data for wdf in termlist";
00164         } else {
00165             msg = "Overflowed value for wdf in termlist";
00166         }
00167         throw Xapian::DatabaseCorruptError(msg);
00168     }
00169 
00170     RETURN(NULL);
00171 }
00172 
00173 bool
00174 FlintTermList::at_end() const
00175 {
00176     DEBUGCALL(DB, bool, "FlintTermList::at_end", "");
00177     RETURN(pos == NULL);
00178 }
00179 
00180 Xapian::termcount
00181 FlintTermList::positionlist_count() const
00182 {
00183     DEBUGCALL(DB, Xapian::termcount, "FlintTermList::positionlist_count", "");
00184     RETURN(db->position_table.positionlist_count(did, current_term));
00185 }
00186 
00187 Xapian::PositionIterator
00188 FlintTermList::positionlist_begin() const
00189 {
00190     DEBUGCALL(DB, Xapian::PositionIterator, "FlintTermList::positionlist_begin", "");
00191     return Xapian::PositionIterator(
00192             new FlintPositionList(&db->position_table, did, current_term));
00193 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.