00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include <xapian/error.h>
00026
00027 #include "expandweight.h"
00028 #include "flint_positionlist.h"
00029 #include "flint_termlist.h"
00030 #include "flint_utils.h"
00031 #include "omassert.h"
00032 #include "utils.h"
00033
00034 using namespace std;
00035
00036 FlintTermList::FlintTermList(Xapian::Internal::RefCntPtr<const FlintDatabase> db_,
00037 Xapian::docid did_)
00038 : db(db_), did(did_), current_wdf(0), current_termfreq(0)
00039 {
00040 DEBUGCALL(DB, void, "FlintTermList",
00041 "[RefCntPtr<const FlintDatabase>], " << did_);
00042
00043 if (!db->termlist_table.get_exact_entry(flint_docid_to_key(did), data))
00044 throw Xapian::DocNotFoundError("No termlist for document " + om_tostring(did));
00045
00046 pos = data.data();
00047 end = pos + data.size();
00048
00049 if (pos == end) {
00050 doclen = 0;
00051 termlist_size = 0;
00052 return;
00053 }
00054
00055
00056 if (!unpack_uint(&pos, end, &doclen)) {
00057 const char *msg;
00058 if (pos == 0) {
00059 msg = "Too little data for doclen in termlist";
00060 } else {
00061 msg = "Overflowed value for doclen in termlist";
00062 }
00063 throw Xapian::DatabaseCorruptError(msg);
00064 }
00065
00066
00067 if (!unpack_uint(&pos, end, &termlist_size)) {
00068 const char *msg;
00069 if (pos == 0) {
00070 msg = "Too little data for list size in termlist";
00071 } else {
00072 msg = "Overflowed value for list size in termlist";
00073 }
00074 throw Xapian::DatabaseCorruptError(msg);
00075 }
00076
00077
00078
00079 if (pos != end && *pos == '0') ++pos;
00080 }
00081
00082 flint_doclen_t
00083 FlintTermList::get_doclength() const
00084 {
00085 DEBUGCALL(DB, flint_doclen_t, "FlintTermList::get_doclength", "");
00086 RETURN(doclen);
00087 }
00088
00089 Xapian::termcount
00090 FlintTermList::get_approx_size() const
00091 {
00092 DEBUGCALL(DB, Xapian::termcount, "FlintTermList::get_approx_size", "");
00093 RETURN(termlist_size);
00094 }
00095
00096 void
00097 FlintTermList::accumulate_stats(Xapian::Internal::ExpandStats & stats) const
00098 {
00099 DEBUGCALL(DB, void, "FlintTermList::accumulate_stats", "[stats&]");
00100 Assert(!at_end());
00101 stats.accumulate(current_wdf, doclen, get_termfreq(), db->get_doccount());
00102 }
00103
00104 string
00105 FlintTermList::get_termname() const
00106 {
00107 DEBUGCALL(DB, string, "FlintTermList::get_termname", "");
00108 RETURN(current_term);
00109 }
00110
00111 Xapian::termcount
00112 FlintTermList::get_wdf() const
00113 {
00114 DEBUGCALL(DB, Xapian::termcount, "FlintTermList::get_wdf", "");
00115 RETURN(current_wdf);
00116 }
00117
00118 Xapian::doccount
00119 FlintTermList::get_termfreq() const
00120 {
00121 DEBUGCALL(DB, Xapian::doccount, "FlintTermList::get_termfreq", "");
00122 if (current_termfreq == 0)
00123 current_termfreq = db->get_termfreq(current_term);
00124 RETURN(current_termfreq);
00125 }
00126
00127 TermList *
00128 FlintTermList::next()
00129 {
00130 DEBUGCALL(DB, TermList *, "FlintTermList::next", "");
00131 Assert(!at_end());
00132 if (pos == end) {
00133 pos = NULL;
00134 RETURN(NULL);
00135 }
00136
00137
00138 current_termfreq = 0;
00139
00140 bool wdf_in_reuse = false;
00141 if (!current_term.empty()) {
00142
00143 size_t len = static_cast<unsigned char>(*pos++);
00144 if (len > current_term.size()) {
00145
00146 wdf_in_reuse = true;
00147 size_t divisor = current_term.size() + 1;
00148 current_wdf = len / divisor - 1;
00149 len %= divisor;
00150 }
00151 current_term.resize(len);
00152 }
00153
00154
00155 size_t append_len = static_cast<unsigned char>(*pos++);
00156 current_term.append(pos, append_len);
00157 pos += append_len;
00158
00159
00160 if (!wdf_in_reuse && !unpack_uint(&pos, end, ¤t_wdf)) {
00161 const char *msg;
00162 if (pos == 0) {
00163 msg = "Too little data for wdf in termlist";
00164 } else {
00165 msg = "Overflowed value for wdf in termlist";
00166 }
00167 throw Xapian::DatabaseCorruptError(msg);
00168 }
00169
00170 RETURN(NULL);
00171 }
00172
00173 bool
00174 FlintTermList::at_end() const
00175 {
00176 DEBUGCALL(DB, bool, "FlintTermList::at_end", "");
00177 RETURN(pos == NULL);
00178 }
00179
00180 Xapian::termcount
00181 FlintTermList::positionlist_count() const
00182 {
00183 DEBUGCALL(DB, Xapian::termcount, "FlintTermList::positionlist_count", "");
00184 RETURN(db->position_table.positionlist_count(did, current_term));
00185 }
00186
00187 Xapian::PositionIterator
00188 FlintTermList::positionlist_begin() const
00189 {
00190 DEBUGCALL(DB, Xapian::PositionIterator, "FlintTermList::positionlist_begin", "");
00191 return Xapian::PositionIterator(
00192 new FlintPositionList(&db->position_table, did, current_term));
00193 }