00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include <xapian/error.h>
00026 #include "expandweight.h"
00027 #include "quartz_termlist.h"
00028 #include "quartz_utils.h"
00029 #include "utils.h"
00030
00031 #include <algorithm>
00032 using namespace std;
00033
00034 void
00035 QuartzTermListTable::set_entries(Xapian::docid did,
00036 Xapian::TermIterator t,
00037 const Xapian::TermIterator &t_end,
00038 quartz_doclen_t doclen_,
00039 bool store_termfreqs)
00040 {
00041 DEBUGCALL(DB, void, "QuartzTermList::set_entries", did << ", " << t << ", " << t_end << ", " << doclen_ << ", " << store_termfreqs);
00042 string tag = pack_uint(doclen_);
00043
00044 string v;
00045 string prev_term;
00046 Xapian::doccount size = 0;
00047 for ( ; t != t_end; ++t) {
00048 bool stored_wdf = false;
00049
00050 if (!prev_term.empty()) {
00051 string::size_type len = min(prev_term.length(), (*t).length());
00052 string::size_type i;
00053 for (i = 0; i < len; ++i) {
00054 if (prev_term[i] != (*t)[i]) break;
00055 }
00056
00057 string::size_type x;
00058 x = (t.get_wdf() + 1) * (prev_term.length() + 1) + i;
00059 if (x < 256) {
00060
00061 v += char(x);
00062 stored_wdf = true;
00063 } else {
00064 v += char(i);
00065 }
00066 v += char((*t).length() - i);
00067 v += (*t).substr(i);
00068 } else {
00069 v += char((*t).length());
00070 v += *t;
00071 }
00072 prev_term = *t;
00073
00074 if (!stored_wdf) v += pack_uint(t.get_wdf());
00075 if (store_termfreqs) v += pack_uint(t.get_termfreq());
00076 ++size;
00077 }
00078 tag += pack_uint(size);
00079 tag += pack_bool(store_termfreqs);
00080 tag += v;
00081 add(quartz_docid_to_key(did), tag);
00082
00083 DEBUGLINE(DB, "QuartzTermList::set_entries() - new entry is `" + tag + "'");
00084 }
00085
00086 void
00087 QuartzTermListTable::delete_termlist(Xapian::docid did)
00088 {
00089 DEBUGCALL_STATIC(DB, void, "QuartzTermList::delete_termlist", did);
00090 del(quartz_docid_to_key(did));
00091 }
00092
00093
00094 QuartzTermList::QuartzTermList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db_,
00095 const Btree * table_,
00096 Xapian::docid did_,
00097 Xapian::doccount doccount_)
00098 : this_db(this_db_), did(did_), table(table_),
00099 have_finished(false), current_wdf(0), has_termfreqs(false),
00100 current_termfreq(0), doccount(doccount_)
00101 {
00102 DEBUGCALL(DB, void, "QuartzTermList", "[this_db_], " << table_ << ", "
00103 << did << ", " << doccount_);
00104
00105 string key(quartz_docid_to_key(did));
00106
00107 if (!table->get_exact_entry(key, termlist_part))
00108 throw Xapian::DocNotFoundError("Can't read termlist for document "
00109 + om_tostring(did) + ": Not found");
00110
00111 DEBUGLINE(DB, "QuartzTermList::QuartzTermList() - data is `" + termlist_part + "'");
00112
00113 pos = termlist_part.data();
00114 end = pos + termlist_part.size();
00115
00116
00117 if (!unpack_uint(&pos, end, &doclen)) {
00118 if (pos != 0) throw Xapian::RangeError("doclen out of range.");
00119 throw Xapian::DatabaseCorruptError("Unexpected end of data when reading doclen.");
00120 }
00121
00122
00123 if (!unpack_uint(&pos, end, &termlist_size)) {
00124 if (pos != 0) throw Xapian::RangeError("Size of termlist out of range.");
00125 throw Xapian::DatabaseCorruptError("Unexpected end of data when reading termlist.");
00126 }
00127
00128
00129 if (!unpack_bool(&pos, end, &has_termfreqs)) {
00130 Assert(pos == 0);
00131 throw Xapian::DatabaseCorruptError("Unexpected end of data when reading termlist.");
00132 }
00133 }
00134
00135 Xapian::termcount
00136 QuartzTermList::get_approx_size() const
00137 {
00138 DEBUGCALL(DB, Xapian::termcount, "QuartzTermList::get_approx_size", "");
00139 RETURN(termlist_size);
00140 }
00141
00142 quartz_doclen_t
00143 QuartzTermList::get_doclength() const
00144 {
00145 DEBUGCALL(DB, quartz_doclen_t, "QuartzTermList::get_doclength", "");
00146 RETURN(doclen);
00147 }
00148
00149
00150 TermList *
00151 QuartzTermList::next()
00152 {
00153 DEBUGCALL(DB, TermList *, "QuartzTermList::next", "");
00154 if (pos == end) {
00155 have_finished = true;
00156 RETURN(0);
00157 }
00158 bool got_wdf = false;
00159
00160 if (!current_tname.empty()) {
00161 string::size_type len = static_cast<unsigned char>(*pos++);
00162 if (len > current_tname.length()) {
00163
00164 current_wdf = len / (current_tname.length() + 1) - 1;
00165 len %= (current_tname.length() + 1);
00166 got_wdf = true;
00167 }
00168 current_tname.resize(len);
00169 }
00170
00171
00172 string::size_type len = static_cast<unsigned char>(*pos++);
00173 current_tname.append(pos, len);
00174 pos += len;
00175
00176 if (!got_wdf) {
00177
00178 if (!unpack_uint(&pos, end, ¤t_wdf)) {
00179 if (pos == 0) throw Xapian::DatabaseCorruptError("Unexpected end of data when reading termlist.");
00180 throw Xapian::RangeError("Size of wdf out of range, in termlist.");
00181 }
00182 }
00183
00184
00185 if (has_termfreqs) {
00186 if (!unpack_uint(&pos, end, ¤t_termfreq)) {
00187 if (pos == 0) throw Xapian::DatabaseCorruptError("Unexpected end of data when reading termlist.");
00188 throw Xapian::RangeError("Size of term frequency out of range, in termlist.");
00189 }
00190 } else {
00191 current_termfreq = 0;
00192 }
00193
00194 DEBUGLINE(DB, "QuartzTermList::next() -" <<
00195 " current_tname=" << current_tname <<
00196 " current_wdf=" << current_wdf <<
00197 " current_termfreq=" << current_termfreq);
00198 RETURN(0);
00199 }
00200
00201 bool
00202 QuartzTermList::at_end() const
00203 {
00204 DEBUGCALL(DB, bool, "QuartzTermList::at_end", "");
00205 RETURN(have_finished);
00206 }
00207
00208 string
00209 QuartzTermList::get_termname() const
00210 {
00211 DEBUGCALL(DB, string, "QuartzTermList::get_termname", "");
00212 RETURN(current_tname);
00213 }
00214
00215 Xapian::termcount
00216 QuartzTermList::get_wdf() const
00217 {
00218 DEBUGCALL(DB, Xapian::termcount, "QuartzTermList::get_wdf", "");
00219 RETURN(current_wdf);
00220 }
00221
00222 Xapian::doccount
00223 QuartzTermList::get_termfreq() const
00224 {
00225 DEBUGCALL(DB, Xapian::doccount, "QuartzTermList::get_termfreq", "");
00226 if (current_termfreq == 0)
00227 current_termfreq = this_db->get_termfreq(current_tname);
00228 RETURN(current_termfreq);
00229 }
00230
00231 void
00232 QuartzTermList::accumulate_stats(Xapian::Internal::ExpandStats & stats) const
00233 {
00234 DEBUGCALL(DB, void, "QuartzTermList::accumulate_stats", "[stats&]");
00235 Assert(!have_finished);
00236 stats.accumulate(current_wdf, doclen, get_termfreq(), doccount);
00237 }
00238
00239 Xapian::termcount
00240 QuartzTermList::positionlist_count() const
00241 {
00242 throw Xapian::UnimplementedError("QuartzTermList::positionlist_count() not implemented");
00243 }
00244
00245 Xapian::PositionIterator
00246 QuartzTermList::positionlist_begin() const
00247 {
00248 DEBUGCALL(DB, Xapian::PositionIterator, "QuartzTermList::positionlist_begin", "");
00249 return Xapian::PositionIterator(this_db->open_position_list(did, current_tname));
00250 }