00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include <xapian/document.h>
00024 #include <xapian/error.h>
00025 #include <xapian/positioniterator.h>
00026 #include <xapian/termiterator.h>
00027 #include <xapian/valueiterator.h>
00028
00029 #include "omassert.h"
00030 #include "omenquireinternal.h"
00031 #include "serialise.h"
00032 #include "serialise-double.h"
00033 #include "stats.h"
00034 #include "utils.h"
00035
00036 #include <string>
00037 #include <string.h>
00038
00039 using namespace std;
00040
00041 string
00042 encode_length(size_t len)
00043 {
00044 string result;
00045 if (len < 255) {
00046 result += static_cast<unsigned char>(len);
00047 } else {
00048 result += '\xff';
00049 len -= 255;
00050 while (true) {
00051 unsigned char byte = static_cast<unsigned char>(len & 0x7f);
00052 len >>= 7;
00053 if (!len) {
00054 result += (byte | static_cast<unsigned char>(0x80));
00055 break;
00056 }
00057 result += byte;
00058 }
00059 }
00060 return result;
00061 }
00062
00063 size_t
00064 decode_length(const char ** p, const char *end, bool check_remaining)
00065 {
00066 if (*p == end) {
00067 throw Xapian::NetworkError("Bad encoded length: no data");
00068 }
00069
00070 size_t len = static_cast<unsigned char>(*(*p)++);
00071 if (len == 0xff) {
00072 len = 0;
00073 unsigned char ch;
00074 int shift = 0;
00075 do {
00076 if (*p == end || shift > 28)
00077 throw Xapian::NetworkError("Bad encoded length: insufficient data");
00078 ch = *(*p)++;
00079 len |= size_t(ch & 0x7f) << shift;
00080 shift += 7;
00081 } while ((ch & 0x80) == 0);
00082 len += 255;
00083 }
00084 if (check_remaining && len > size_t(end - *p)) {
00085 throw Xapian::NetworkError("Bad encoded length: length greater than data");
00086 }
00087 return len;
00088 }
00089
00090 string
00091 serialise_error(const Xapian::Error &e)
00092 {
00093 string result;
00094 result += encode_length(strlen(e.get_type()));
00095 result += e.get_type();
00096 result += encode_length(e.get_context().length());
00097 result += e.get_context();
00098 result += encode_length(e.get_msg().length());
00099 result += e.get_msg();
00100
00101 const char * err = e.get_error_string();
00102 if (err) result += err;
00103 return result;
00104 }
00105
00106 void
00107 unserialise_error(const string &serialised_error, const string &prefix,
00108 const string &new_context)
00109 {
00110
00111 const char * p = serialised_error.c_str();
00112 const char * end = p + serialised_error.size();
00113 size_t len;
00114 len = decode_length(&p, end, true);
00115 if (len == 7 && memcmp(p, "UNKNOWN", 7) == 0) {
00116 throw Xapian::InternalError("UNKNOWN");
00117 }
00118 string type(p, len);
00119 p += len;
00120
00121 len = decode_length(&p, end, true);
00122 string context(p, len);
00123 p += len;
00124
00125 len = decode_length(&p, end, true);
00126 string msg(prefix);
00127 msg.append(p, len);
00128 p += len;
00129
00130 const char * error_string = (p == end) ? NULL : p;
00131
00132 if (!context.empty() && !new_context.empty()) {
00133 msg += "; context was: ";
00134 msg += context;
00135 context = new_context;
00136 }
00137
00138 #include <xapian/errordispatch.h>
00139
00140 msg = "Unknown remote exception type " + type + ": " + msg;
00141 throw Xapian::InternalError(msg, context);
00142 }
00143
00144 string serialise_stats(const Stats &stats)
00145 {
00146 string result;
00147
00148 result += encode_length(stats.collection_size);
00149 result += encode_length(stats.rset_size);
00150 result += serialise_double(stats.average_length);
00151
00152 map<string, Xapian::doccount>::const_iterator i;
00153
00154 result += encode_length(stats.termfreq.size());
00155 for (i = stats.termfreq.begin(); i != stats.termfreq.end(); ++i) {
00156 result += encode_length(i->first.size());
00157 result += i->first;
00158 result += encode_length(i->second);
00159 }
00160
00161 for (i = stats.reltermfreq.begin(); i != stats.reltermfreq.end(); ++i) {
00162 result += encode_length(i->first.size());
00163 result += i->first;
00164 result += encode_length(i->second);
00165 }
00166
00167 return result;
00168 }
00169
00170 Stats
00171 unserialise_stats(const string &s)
00172 {
00173 const char * p = s.c_str();
00174 const char * p_end = p + s.size();
00175
00176 Stats stat;
00177
00178 stat.collection_size = decode_length(&p, p_end, false);
00179 stat.rset_size = decode_length(&p, p_end, false);
00180 stat.average_length = unserialise_double(&p, p_end);
00181
00182 size_t n = decode_length(&p, p_end, false);
00183 while (n--) {
00184 size_t len = decode_length(&p, p_end, true);
00185 string term(p, len);
00186 p += len;
00187 stat.termfreq.insert(make_pair(term, decode_length(&p, p_end, false)));
00188 }
00189
00190 while (p != p_end) {
00191 size_t len = decode_length(&p, p_end, true);
00192 string term(p, len);
00193 p += len;
00194 stat.reltermfreq.insert(make_pair(term, decode_length(&p, p_end, false)));
00195 }
00196
00197 return stat;
00198 }
00199
00200 string
00201 serialise_mset_pre_30_5(const Xapian::MSet &mset)
00202 {
00203 string result;
00204
00205 result += encode_length(mset.get_firstitem());
00206 result += encode_length(mset.get_matches_lower_bound());
00207 result += encode_length(mset.get_matches_estimated());
00208 result += encode_length(mset.get_matches_upper_bound());
00209 result += serialise_double(mset.get_max_possible());
00210 result += serialise_double(mset.get_max_attained());
00211 result += encode_length(mset.size());
00212 for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
00213 result += serialise_double(i.get_weight());
00214 result += encode_length(*i);
00215 result += encode_length(i.get_collapse_key().size());
00216 result += i.get_collapse_key();
00217 result += encode_length(i.get_collapse_count());
00218 }
00219
00220 const map<string, Xapian::MSet::Internal::TermFreqAndWeight> &termfreqandwts
00221 = mset.internal->termfreqandwts;
00222
00223 map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator j;
00224 for (j = termfreqandwts.begin(); j != termfreqandwts.end(); ++j) {
00225 result += encode_length(j->first.size());
00226 result += j->first;
00227 result += encode_length(j->second.termfreq);
00228 result += serialise_double(j->second.termweight);
00229 }
00230
00231 return result;
00232 }
00233
00234 string
00235 serialise_mset(const Xapian::MSet &mset)
00236 {
00237 string result;
00238
00239 result += encode_length(mset.get_firstitem());
00240 result += encode_length(mset.get_matches_lower_bound());
00241 result += encode_length(mset.get_matches_estimated());
00242 result += encode_length(mset.get_matches_upper_bound());
00243 result += serialise_double(mset.get_max_possible());
00244 result += serialise_double(mset.get_max_attained());
00245
00246 result += serialise_double(mset.internal->percent_factor);
00247
00248 result += encode_length(mset.size());
00249 for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
00250 result += serialise_double(i.get_weight());
00251 result += encode_length(*i);
00252 result += encode_length(i.get_collapse_key().size());
00253 result += i.get_collapse_key();
00254 result += encode_length(i.get_collapse_count());
00255 }
00256
00257 const map<string, Xapian::MSet::Internal::TermFreqAndWeight> &termfreqandwts
00258 = mset.internal->termfreqandwts;
00259
00260 map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator j;
00261 for (j = termfreqandwts.begin(); j != termfreqandwts.end(); ++j) {
00262 result += encode_length(j->first.size());
00263 result += j->first;
00264 result += encode_length(j->second.termfreq);
00265 result += serialise_double(j->second.termweight);
00266 }
00267
00268 return result;
00269 }
00270
00271 Xapian::MSet
00272 unserialise_mset(const string &s)
00273 {
00274 const char * p = s.data();
00275 const char * p_end = p + s.size();
00276
00277 Xapian::doccount firstitem = decode_length(&p, p_end, false);
00278 Xapian::doccount matches_lower_bound = decode_length(&p, p_end, false);
00279 Xapian::doccount matches_estimated = decode_length(&p, p_end, false);
00280 Xapian::doccount matches_upper_bound = decode_length(&p, p_end, false);
00281 Xapian::weight max_possible = unserialise_double(&p, p_end);
00282 Xapian::weight max_attained = unserialise_double(&p, p_end);
00283
00284 double percent_factor = unserialise_double(&p, p_end);
00285
00286 vector<Xapian::Internal::MSetItem> items;
00287 size_t msize = decode_length(&p, p_end, false);
00288 while (msize-- > 0) {
00289 Xapian::weight wt = unserialise_double(&p, p_end);
00290 Xapian::docid did = decode_length(&p, p_end, false);
00291 size_t len = decode_length(&p, p_end, true);
00292 string key(p, len);
00293 p += len;
00294 items.push_back(Xapian::Internal::MSetItem(wt, did, key,
00295 decode_length(&p, p_end, false)));
00296 }
00297
00298 map<string, Xapian::MSet::Internal::TermFreqAndWeight> terminfo;
00299 while (p != p_end) {
00300 Xapian::MSet::Internal::TermFreqAndWeight tfaw;
00301 size_t len = decode_length(&p, p_end, true);
00302 string term(p, len);
00303 p += len;
00304 tfaw.termfreq = decode_length(&p, p_end, false);
00305 tfaw.termweight = unserialise_double(&p, p_end);
00306 terminfo.insert(make_pair(term, tfaw));
00307 }
00308
00309 return Xapian::MSet(new Xapian::MSet::Internal(
00310 firstitem,
00311 matches_upper_bound,
00312 matches_lower_bound,
00313 matches_estimated,
00314 max_possible, max_attained,
00315 items, terminfo, percent_factor));
00316 }
00317
00318 string
00319 serialise_rset(const Xapian::RSet &rset)
00320 {
00321 const set<Xapian::docid> & items = rset.internal->get_items();
00322 string result;
00323 set<Xapian::docid>::const_iterator i;
00324 Xapian::docid lastdid = 0;
00325 for (i = items.begin(); i != items.end(); ++i) {
00326 Xapian::docid did = *i;
00327 result += encode_length(did - lastdid - 1);
00328 lastdid = did;
00329 }
00330 return result;
00331 }
00332
00333 Xapian::RSet
00334 unserialise_rset(const string &s)
00335 {
00336 Xapian::RSet rset;
00337
00338 const char * p = s.data();
00339 const char * p_end = p + s.size();
00340
00341 Xapian::docid did = 0;
00342 while (p != p_end) {
00343 did += decode_length(&p, p_end, false) + 1;
00344 rset.add_document(did);
00345 }
00346
00347 return rset;
00348 }
00349
00350 string
00351 serialise_document(const Xapian::Document &doc)
00352 {
00353 string result;
00354
00355 size_t n = doc.values_count();
00356 result += encode_length(doc.values_count());
00357 Xapian::ValueIterator value;
00358 for (value = doc.values_begin(); value != doc.values_end(); ++value) {
00359 result += encode_length(value.get_valueno());
00360 result += encode_length((*value).size());
00361 result += *value;
00362 --n;
00363 }
00364 Assert(n == 0);
00365
00366 result += encode_length(doc.termlist_count());
00367 Xapian::TermIterator term;
00368 n = doc.termlist_count();
00369 for (term = doc.termlist_begin(); term != doc.termlist_end(); ++term) {
00370 result += encode_length((*term).size());
00371 result += *term;
00372 result += encode_length(term.get_wdf());
00373
00374 result += encode_length(term.positionlist_count());
00375 Xapian::PositionIterator pos;
00376 Xapian::termpos oldpos = 0;
00377 size_t x = term.positionlist_count();
00378 for (pos = term.positionlist_begin(); pos != term.positionlist_end(); ++pos) {
00379 Xapian::termpos diff = *pos - oldpos;
00380 string delta = encode_length(diff);
00381 result += delta;
00382 oldpos = *pos;
00383 --x;
00384 }
00385 Assert(x == 0);
00386 --n;
00387 }
00388 Assert(n == 0);
00389
00390 result += doc.get_data();
00391 return result;
00392 }
00393
00394 Xapian::Document
00395 unserialise_document(const string &s)
00396 {
00397 Xapian::Document doc;
00398 const char * p = s.data();
00399 const char * p_end = p + s.size();
00400
00401 size_t n_values = decode_length(&p, p_end, false);
00402 while (n_values--) {
00403 Xapian::valueno valno = decode_length(&p, p_end, false);
00404 size_t len = decode_length(&p, p_end, true);
00405 doc.add_value(valno, string(p, len));
00406 p += len;
00407 }
00408
00409 size_t n_terms = decode_length(&p, p_end, false);
00410 while (n_terms--) {
00411 size_t len = decode_length(&p, p_end, true);
00412 string term(p, len);
00413 p += len;
00414
00415
00416 Xapian::termcount wdf = decode_length(&p, p_end, false);
00417 doc.add_term(term, wdf);
00418
00419 size_t n_pos = decode_length(&p, p_end, false);
00420 Xapian::termpos pos = 0;
00421 while (n_pos--) {
00422 pos += decode_length(&p, p_end, false);
00423 doc.add_posting(term, pos, 0);
00424 }
00425 }
00426
00427 doc.set_data(string(p, p_end - p));
00428 return doc;
00429 }