00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024 #include "omdebug.h"
00025 #include "quartz_postlist.h"
00026 #include "quartz_utils.h"
00027 #include "bcursor.h"
00028 #include "database.h"
00029 #include "noreturn.h"
00030 #include "utils.h"
00031
00033 static void
00034 make_key(const string & tname, Xapian::docid did, string & key)
00035 {
00036 key = pack_string_preserving_sort(tname);
00037 key += pack_uint_preserving_sort(did);
00038 }
00039
00040 static void
00041 make_key(const string & tname, string & key)
00042 {
00043 key = pack_string_preserving_sort(tname);
00044 }
00045
00046
00047
00048
00049
00050
00051 const unsigned int CHUNKSIZE = 2000;
00052
00059 class QPostlistChunkWriter {
00060 public:
00061 QPostlistChunkWriter(const string &orig_key_,
00062 bool is_first_chunk_,
00063 const string &tname_,
00064 bool is_last_chunk_);
00065
00067 void append(Btree * table, Xapian::docid did,
00068 Xapian::termcount wdf, quartz_doclen_t doclen);
00069
00071 void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
00072 const string & s) {
00073 Assert(!started);
00074 first_did = first_did_;
00075 current_did = current_did_;
00076 if (!s.empty()) {
00077 chunk.append(s);
00078 started = true;
00079 }
00080 }
00081
00086 void flush(Btree *table);
00087
00088 private:
00089 string orig_key;
00090 string tname;
00091 bool is_first_chunk;
00092 bool is_last_chunk;
00093 bool started;
00094
00095 Xapian::docid first_did;
00096 Xapian::docid current_did;
00097
00098 string chunk;
00099 };
00100
00101
00102
00104 XAPIAN_NORETURN(static void report_read_error(const char * position));
00105 static void report_read_error(const char * position)
00106 {
00107 if (position == 0) {
00108
00109 throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when reading posting list.");
00110 }
00111
00112 throw Xapian::RangeError("Value in posting list too large.");
00113 }
00114
00115 static inline bool get_tname_from_key(const char **src, const char *end,
00116 string &tname)
00117 {
00118 return unpack_string_preserving_sort(src, end, tname);
00119 }
00120
00121 static inline bool
00122 check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
00123 {
00124 string tname_in_key;
00125
00126
00127 if (!get_tname_from_key(keypos, keyend, tname_in_key)) {
00128 report_read_error(*keypos);
00129 }
00130
00131
00132 return tname_in_key == tname;
00133 }
00134
00135 static inline bool
00136 check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
00137 {
00138 if (*keypos == keyend) return false;
00139
00140 return check_tname_in_key_lite(keypos, keyend, tname);
00141 }
00142
00144 static Xapian::docid
00145 read_start_of_first_chunk(const char ** posptr,
00146 const char * end,
00147 Xapian::doccount * number_of_entries_ptr,
00148 Xapian::termcount * collection_freq_ptr)
00149 {
00150 DEBUGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk",
00151 (const void *)posptr << ", " <<
00152 (const void *)end << ", " <<
00153 (void *)number_of_entries_ptr << ", " <<
00154 (void *)collection_freq_ptr);
00155
00156 QuartzPostList::read_number_of_entries(posptr, end,
00157 number_of_entries_ptr, collection_freq_ptr);
00158 if (number_of_entries_ptr)
00159 DEBUGLINE(DB, "number_of_entries = " << *number_of_entries_ptr);
00160 if (collection_freq_ptr)
00161 DEBUGLINE(DB, "collection_freq = " << *collection_freq_ptr);
00162
00163 Xapian::docid did;
00164
00165 if (!unpack_uint(posptr, end, &did))
00166 report_read_error(*posptr);
00167 ++did;
00168 DEBUGLINE(DB, "doc_id = " << did);
00169 RETURN(did);
00170 }
00171
00172 Xapian::doccount
00173 QuartzPostListTable::get_termfreq(const string & term) const
00174 {
00175 string key, tag;
00176 make_key(term, key);
00177 if (!get_exact_entry(key, tag)) return 0;
00178
00179 Xapian::doccount termfreq;
00180 const char * p = tag.data();
00181 QuartzPostList::read_number_of_entries(&p, p + tag.size(), &termfreq, NULL);
00182 return termfreq;
00183 }
00184
00185 Xapian::termcount
00186 QuartzPostListTable::get_collection_freq(const string & term) const
00187 {
00188 string key, tag;
00189 make_key(term, key);
00190 if (!get_exact_entry(key, tag)) return 0;
00191
00192 Xapian::termcount collfreq;
00193 const char * p = tag.data();
00194 QuartzPostList::read_number_of_entries(&p, p + tag.size(), NULL, &collfreq);
00195 return collfreq;
00196 }
00197
00198 static inline void read_did_increase(const char ** posptr,
00199 const char * end,
00200 Xapian::docid * did_ptr)
00201 {
00202 Xapian::docid did_increase;
00203 if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
00204 *did_ptr += did_increase + 1;
00205 }
00206
00208 static inline void read_wdf_and_length(const char ** posptr,
00209 const char * end,
00210 Xapian::termcount * wdf_ptr,
00211 quartz_doclen_t * doclength_ptr)
00212 {
00213 if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
00214 if (!unpack_uint(posptr, end, doclength_ptr)) report_read_error(*posptr);
00215 }
00216
00218 static Xapian::docid
00219 read_start_of_chunk(const char ** posptr,
00220 const char * end,
00221 Xapian::docid first_did_in_chunk,
00222 bool * is_last_chunk_ptr)
00223 {
00224 DEBUGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk",
00225 reinterpret_cast<const void*>(posptr) << ", " <<
00226 reinterpret_cast<const void*>(end) << ", " <<
00227 first_did_in_chunk << ", " <<
00228 reinterpret_cast<const void*>(is_last_chunk_ptr));
00229
00230
00231 if (!unpack_bool(posptr, end, is_last_chunk_ptr))
00232 report_read_error(*posptr);
00233 if (is_last_chunk_ptr)
00234 DEBUGLINE(DB, "is_last_chunk = " << *is_last_chunk_ptr);
00235
00236
00237 Xapian::docid increase_to_last;
00238 if (!unpack_uint(posptr, end, &increase_to_last))
00239 report_read_error(*posptr);
00240 ++increase_to_last;
00241 Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
00242 DEBUGLINE(DB, "last_did_in_chunk = " << last_did_in_chunk);
00243 RETURN(last_did_in_chunk);
00244 }
00245
00246 static string make_wdf_and_length(Xapian::termcount wdf, quartz_doclen_t doclength)
00247 {
00248 return pack_uint(wdf) + pack_uint(doclength);
00249 }
00250
00251 static void write_start_of_chunk(string & chunk,
00252 unsigned int start_of_chunk_header,
00253 unsigned int end_of_chunk_header,
00254 bool is_last_chunk,
00255 Xapian::docid first_did_in_chunk,
00256 Xapian::docid last_did_in_chunk)
00257 {
00258 Assert((size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
00259 Assert(last_did_in_chunk >= first_did_in_chunk);
00260 Xapian::docid increase_to_last = last_did_in_chunk - first_did_in_chunk;
00261
00262 chunk.replace(start_of_chunk_header,
00263 end_of_chunk_header - start_of_chunk_header,
00264 pack_bool(is_last_chunk) + pack_uint(increase_to_last - 1));
00265
00266
00267
00268 }
00269
00274 class QPostlistChunkReader {
00275 public:
00281 QPostlistChunkReader(Xapian::docid first_did, const string & data_)
00282 : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
00283 {
00284 if (!at_end) read_wdf_and_length(&pos, end, &wdf, &doclength);
00285 }
00286
00287 Xapian::docid get_docid() const {
00288 return did;
00289 }
00290 Xapian::termcount get_wdf() const {
00291 return wdf;
00292 }
00293 quartz_doclen_t get_doclength() const {
00294 return doclength;
00295 }
00296
00297 bool is_at_end() const {
00298 return at_end;
00299 }
00300
00303 void next();
00304
00305 private:
00306 string data;
00307
00308 const char *pos;
00309 const char *end;
00310
00311 bool at_end;
00312
00313 Xapian::docid did;
00314 Xapian::termcount wdf;
00315 quartz_doclen_t doclength;
00316 };
00317
00318 void
00319 QPostlistChunkReader::next()
00320 {
00321 if (pos == end) {
00322 at_end = true;
00323 } else {
00324 read_did_increase(&pos, end, &did);
00325 read_wdf_and_length(&pos, end, &wdf, &doclength);
00326 }
00327 }
00328
00329 QPostlistChunkWriter::QPostlistChunkWriter(const string &orig_key_,
00330 bool is_first_chunk_,
00331 const string &tname_,
00332 bool is_last_chunk_)
00333 : orig_key(orig_key_),
00334 tname(tname_), is_first_chunk(is_first_chunk_),
00335 is_last_chunk(is_last_chunk_),
00336 started(false)
00337 {
00338 DEBUGCALL(DB, void, "QPostlistChunkWriter::QPostlistChunkWriter",
00339 orig_key_ << ", " << is_first_chunk_ << ", " << tname_ << ", " <<
00340 is_last_chunk_);
00341 }
00342
00343 void
00344 QPostlistChunkWriter::append(Btree * table, Xapian::docid did,
00345 Xapian::termcount wdf, quartz_doclen_t doclen)
00346 {
00347 if (!started) {
00348 started = true;
00349 first_did = did;
00350 } else {
00351 Assert(did > current_did);
00352
00353 if (chunk.size() >= CHUNKSIZE) {
00354 bool save_is_last_chunk = is_last_chunk;
00355 is_last_chunk = false;
00356 flush(table);
00357 is_last_chunk = save_is_last_chunk;
00358 is_first_chunk = false;
00359 first_did = did;
00360 chunk = "";
00361 make_key(tname, first_did, orig_key);
00362 } else {
00363 chunk.append(pack_uint(did - current_did - 1));
00364 }
00365 }
00366 current_did = did;
00367 chunk.append(make_wdf_and_length(wdf, doclen));
00368 }
00369
00372 static inline string
00373 make_start_of_first_chunk(Xapian::termcount entries,
00374 Xapian::termcount collectionfreq,
00375 Xapian::docid new_did)
00376 {
00377 return pack_uint(entries) + pack_uint(collectionfreq) + pack_uint(new_did - 1);
00378 }
00379
00382 static inline string
00383 make_start_of_chunk(bool new_is_last_chunk,
00384 Xapian::docid new_first_did,
00385 Xapian::docid new_final_did)
00386 {
00387 Assert(new_final_did >= new_first_did);
00388 return pack_bool(new_is_last_chunk) +
00389 pack_uint(new_final_did - new_first_did - 1);
00390 }
00391
00392 void
00393 QPostlistChunkWriter::flush(Btree *table)
00394 {
00395 DEBUGCALL(DB, void, "QPostlistChunkWriter::flush", table);
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405 if (!started) {
00406
00407
00408
00409
00410
00411
00412
00413
00414 DEBUGLINE(DB, "QPostlistChunkWriter::flush(): deleting chunk");
00415 Assert(!orig_key.empty());
00416 if (is_first_chunk) {
00417 DEBUGLINE(DB, "QPostlistChunkWriter::flush(): deleting first chunk");
00418 if (is_last_chunk) {
00419
00420
00421
00422 table->del(orig_key);
00423 return;
00424 }
00425
00426
00427
00428
00429
00430
00431 AutoPtr<Bcursor> cursor(table->cursor_get());
00432
00433 if (!cursor->find_entry(orig_key)) {
00434 throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
00435 }
00436
00437
00438
00439 Xapian::termcount num_ent, coll_freq;
00440 {
00441 cursor->read_tag();
00442 const char *tagpos = cursor->current_tag.data();
00443 const char *tagend = tagpos + cursor->current_tag.size();
00444
00445 (void)read_start_of_first_chunk(&tagpos, tagend,
00446 &num_ent, &coll_freq);
00447 }
00448
00449
00450 cursor->next();
00451 if (cursor->after_end()) {
00452 throw Xapian::DatabaseCorruptError("Expected another key but found none");
00453 }
00454 const char *kpos = cursor->current_key.data();
00455 const char *kend = kpos + cursor->current_key.size();
00456 if (!check_tname_in_key(&kpos, kend, tname)) {
00457 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
00458 }
00459
00460
00461 Xapian::docid new_first_did;
00462 if (!unpack_uint_preserving_sort(&kpos, kend,
00463 &new_first_did)) {
00464 report_read_error(kpos);
00465 }
00466
00467 cursor->read_tag();
00468 const char *tagpos = cursor->current_tag.data();
00469 const char *tagend = tagpos + cursor->current_tag.size();
00470
00471
00472 bool new_is_last_chunk;
00473 Xapian::docid new_last_did_in_chunk =
00474 read_start_of_chunk(&tagpos, tagend, new_first_did,
00475 &new_is_last_chunk);
00476
00477 string chunk_data(tagpos, tagend);
00478
00479
00480 table->del(cursor->current_key);
00481
00482
00483 string tag;
00484 tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
00485 tag += make_start_of_chunk(new_is_last_chunk,
00486 new_first_did,
00487 new_last_did_in_chunk);
00488 tag += chunk_data;
00489 table->add(orig_key, tag);
00490 return;
00491 }
00492
00493 DEBUGLINE(DB, "QPostlistChunkWriter::flush(): deleting secondary chunk");
00494
00495
00496
00497
00498
00499 table->del(orig_key);
00500
00501 if (is_last_chunk) {
00502 DEBUGLINE(DB, "QPostlistChunkWriter::flush(): deleting secondary last chunk");
00503
00504 AutoPtr<Bcursor> cursor(table->cursor_get());
00505
00506
00507
00508 if (cursor->find_entry(orig_key)) {
00509 throw Xapian::DatabaseCorruptError("Quartz key not deleted as we expected");
00510 }
00511
00512 const char * keypos = cursor->current_key.data();
00513 const char * keyend = keypos + cursor->current_key.size();
00514 if (!check_tname_in_key(&keypos, keyend, tname)) {
00515 throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
00516 }
00517
00518 bool is_prev_first_chunk = (keypos == keyend);
00519
00520
00521 cursor->read_tag();
00522 string tag = cursor->current_tag;
00523
00524 const char *tagpos = tag.data();
00525 const char *tagend = tagpos + tag.size();
00526
00527
00528 Xapian::docid first_did_in_chunk;
00529 if (is_prev_first_chunk) {
00530 first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
00531 0, 0);
00532 } else {
00533 if (!unpack_uint_preserving_sort(&keypos, keyend,
00534 &first_did_in_chunk))
00535 report_read_error(keypos);
00536 }
00537 bool wrong_is_last_chunk;
00538 string::size_type start_of_chunk_header = tagpos - tag.data();
00539 Xapian::docid last_did_in_chunk =
00540 read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
00541 &wrong_is_last_chunk);
00542 string::size_type end_of_chunk_header = tagpos - tag.data();
00543
00544
00545 write_start_of_chunk(tag,
00546 start_of_chunk_header,
00547 end_of_chunk_header,
00548 true,
00549 first_did_in_chunk,
00550 last_did_in_chunk);
00551 table->add(cursor->current_key, tag);
00552 }
00553 } else {
00554 DEBUGLINE(DB, "QPostlistChunkWriter::flush(): updating chunk which still has items in it");
00555
00556
00557
00558
00559
00560
00561 string tag;
00562
00563
00564
00565
00566 if (is_first_chunk) {
00567
00568
00569
00570 DEBUGLINE(DB, "QPostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
00571 string key;
00572 make_key(tname, key);
00573 bool ok = table->get_exact_entry(key, tag);
00574 (void)ok;
00575 Assert(ok);
00576 Assert(!tag.empty());
00577
00578 Xapian::termcount num_ent, coll_freq;
00579 {
00580 const char * tagpos = tag.data();
00581 const char * tagend = tagpos + tag.size();
00582 (void)read_start_of_first_chunk(&tagpos, tagend,
00583 &num_ent, &coll_freq);
00584 }
00585
00586 tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
00587
00588 tag += make_start_of_chunk(is_last_chunk, first_did, current_did);
00589 tag += chunk;
00590 table->add(key, tag);
00591 return;
00592 }
00593
00594 DEBUGLINE(DB, "QPostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606 const char *keypos = orig_key.data();
00607 const char *keyend = keypos + orig_key.size();
00608 if (!check_tname_in_key(&keypos, keyend, tname)) {
00609 throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
00610 }
00611 Xapian::docid initial_did;
00612 if (!unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
00613 report_read_error(keypos);
00614 }
00615 string new_key;
00616 if (initial_did != first_did) {
00617
00618
00619
00620
00621 make_key(tname, first_did, new_key);
00622 table->del(orig_key);
00623 } else {
00624 new_key = orig_key;
00625 }
00626
00627
00628 tag = make_start_of_chunk(is_last_chunk, first_did, current_did);
00629
00630 tag += chunk;
00631 table->add(new_key, tag);
00632 }
00633 }
00634
00639 void QuartzPostList::read_number_of_entries(const char ** posptr,
00640 const char * end,
00641 Xapian::doccount * number_of_entries_ptr,
00642 Xapian::termcount * collection_freq_ptr)
00643 {
00644 if (!unpack_uint(posptr, end, number_of_entries_ptr))
00645 report_read_error(*posptr);
00646 if (!unpack_uint(posptr, end, collection_freq_ptr))
00647 report_read_error(*posptr);
00648 }
00649
00668 QuartzPostList::QuartzPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db_,
00669 const Btree * table_,
00670 const Btree * positiontable_,
00671 const string & tname_)
00672 : this_db(this_db_),
00673 table(table_),
00674 positiontable(positiontable_),
00675 tname(tname_),
00676 cursor(table->cursor_get()),
00677 is_at_end(false),
00678 have_started(false)
00679 {
00680 DEBUGCALL(DB, void, "QuartzPostList::QuartzPostList",
00681 this_db_.get() << ", " << table_ << ", " <<
00682 positiontable_ << ", " << tname_);
00683 string key;
00684 make_key(tname, key);
00685 int found = cursor->find_entry(key);
00686 if (!found) {
00687 number_of_entries = 0;
00688 is_at_end = true;
00689 pos = 0;
00690 end = 0;
00691 first_did_in_chunk = 0;
00692 last_did_in_chunk = 0;
00693 return;
00694 }
00695 cursor->read_tag();
00696 pos = cursor->current_tag.data();
00697 end = pos + cursor->current_tag.size();
00698
00699 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00700 first_did_in_chunk = did;
00701 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00702 &is_last_chunk);
00703 read_wdf_and_length(&pos, end, &wdf, &doclength);
00704 }
00705
00706 QuartzPostList::~QuartzPostList()
00707 {
00708 DEBUGCALL(DB, void, "QuartzPostList::~QuartzPostList", "");
00709 }
00710
00711 bool
00712 QuartzPostList::next_in_chunk()
00713 {
00714 DEBUGCALL(DB, bool, "QuartzPostList::next_in_chunk", "");
00715 if (pos == end) RETURN(false);
00716
00717 read_did_increase(&pos, end, &did);
00718 read_wdf_and_length(&pos, end, &wdf, &doclength);
00719
00720
00721 Assert(did <= last_did_in_chunk);
00722 Assert(did < last_did_in_chunk || pos == end);
00723 Assert(pos != end || did == last_did_in_chunk);
00724
00725 RETURN(true);
00726 }
00727
00728 void
00729 QuartzPostList::next_chunk()
00730 {
00731 DEBUGCALL(DB, void, "QuartzPostList::next_chunk", "");
00732 if (is_last_chunk) {
00733 is_at_end = true;
00734 return;
00735 }
00736
00737 cursor->next();
00738 if (cursor->after_end()) {
00739 is_at_end = true;
00740 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00741 tname + "'");
00742 }
00743 const char * keypos = cursor->current_key.data();
00744 const char * keyend = keypos + cursor->current_key.size();
00745
00746 if (!check_tname_in_key_lite(&keypos, keyend, tname)) {
00747 is_at_end = true;
00748 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00749 tname + "'");
00750 }
00751
00752 Xapian::docid newdid;
00753 if (!unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
00754 report_read_error(keypos);
00755 }
00756 if (newdid <= did) {
00757 throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
00758 om_tostring(newdid) +
00759 ") is not greater than final document ID in previous chunk (" +
00760 om_tostring(did) + ")");
00761 }
00762 did = newdid;
00763
00764 cursor->read_tag();
00765 pos = cursor->current_tag.data();
00766 end = pos + cursor->current_tag.size();
00767
00768 first_did_in_chunk = did;
00769 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00770 &is_last_chunk);
00771 read_wdf_and_length(&pos, end, &wdf, &doclength);
00772 }
00773
00774 PositionList *
00775 QuartzPostList::read_position_list()
00776 {
00777 DEBUGCALL(DB, PositionList *, "QuartzPostList::read_position_list", "");
00778
00779 positionlist.read_data(positiontable, did, tname);
00780
00781 RETURN(&positionlist);
00782 }
00783
00784 PositionList *
00785 QuartzPostList::open_position_list() const
00786 {
00787 DEBUGCALL(DB, PositionList *, "QuartzPostList::open_position_list", "");
00788
00789 AutoPtr<QuartzPositionList> poslist(new QuartzPositionList());
00790 poslist->read_data(positiontable, did, tname);
00791
00792 RETURN(poslist.release());
00793 }
00794
00795 PostList *
00796 QuartzPostList::next(Xapian::weight w_min)
00797 {
00798 DEBUGCALL(DB, PostList *, "QuartzPostList::next", w_min);
00799 (void)w_min;
00800
00801 if (!have_started) {
00802 have_started = true;
00803 } else {
00804 if (!next_in_chunk()) next_chunk();
00805 }
00806
00807 DEBUGLINE(DB, string("Moved to ") <<
00808 (is_at_end ? string("end.") : string("docid, wdf, doclength = ") +
00809 om_tostring(did) + ", " + om_tostring(wdf) + ", " +
00810 om_tostring(doclength)));
00811
00812 RETURN(NULL);
00813 }
00814
00815 bool
00816 QuartzPostList::current_chunk_contains(Xapian::docid desired_did)
00817 {
00818 DEBUGCALL(DB, bool, "QuartzPostList::current_chunk_contains", desired_did);
00819 if (desired_did >= first_did_in_chunk &&
00820 desired_did <= last_did_in_chunk) {
00821 RETURN(true);
00822 }
00823 RETURN(false);
00824 }
00825
00826 void
00827 QuartzPostList::move_to_chunk_containing(Xapian::docid desired_did)
00828 {
00829 DEBUGCALL(DB, void,
00830 "QuartzPostList::move_to_chunk_containing", desired_did);
00831 string key;
00832 make_key(tname, desired_did, key);
00833 (void) cursor->find_entry(key);
00834 Assert(!cursor->after_end());
00835
00836 const char * keypos = cursor->current_key.data();
00837 const char * keyend = keypos + cursor->current_key.size();
00838
00839 if (!check_tname_in_key_lite(&keypos, keyend, tname)) {
00840
00841 is_at_end = true;
00842 is_last_chunk = true;
00843 return;
00844 }
00845 is_at_end = false;
00846
00847 cursor->read_tag();
00848 pos = cursor->current_tag.data();
00849 end = pos + cursor->current_tag.size();
00850
00851 if (keypos == keyend) {
00852
00853 #ifdef XAPIAN_DEBUG
00854 Xapian::doccount old_number_of_entries = number_of_entries;
00855 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00856 Assert(old_number_of_entries == number_of_entries);
00857 #else
00858 did = read_start_of_first_chunk(&pos, end, 0, 0);
00859 #endif
00860 } else {
00861
00862 if (!unpack_uint_preserving_sort(&keypos, keyend, &did)) {
00863 report_read_error(keypos);
00864 }
00865 }
00866
00867 first_did_in_chunk = did;
00868 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00869 &is_last_chunk);
00870 read_wdf_and_length(&pos, end, &wdf, &doclength);
00871
00872
00873
00874 if (desired_did > last_did_in_chunk) next_chunk();
00875 }
00876
00877 bool
00878 QuartzPostList::move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
00879 {
00880 DEBUGCALL(DB, bool,
00881 "QuartzPostList::move_forward_in_chunk_to_at_least", desired_did);
00882 if (desired_did > last_did_in_chunk) {
00883 pos = end;
00884 RETURN(false);
00885 }
00886 while (did < desired_did) {
00887
00888
00889 bool at_end_of_chunk = !next_in_chunk();
00890 if (at_end_of_chunk) RETURN(false);
00891 }
00892 RETURN(true);
00893 }
00894
00895 PostList *
00896 QuartzPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min)
00897 {
00898 DEBUGCALL(DB, PostList *,
00899 "QuartzPostList::skip_to", desired_did << ", " << w_min);
00900 (void)w_min;
00901
00902
00903 have_started = true;
00904
00905
00906 if (is_at_end || desired_did <= did) RETURN(NULL);
00907
00908
00909 if (!current_chunk_contains(desired_did)) {
00910 move_to_chunk_containing(desired_did);
00911
00912
00913 if (is_at_end) RETURN(NULL);
00914 }
00915
00916
00917 bool have_document = move_forward_in_chunk_to_at_least(desired_did);
00918 #ifdef XAPIAN_DEBUG
00919 Assert(have_document);
00920 #else
00921 (void)have_document;
00922 #endif
00923
00924 DEBUGLINE(DB, string("Skipped to ") <<
00925 (is_at_end ? string("end.") : string("docid, wdf, doclength = ") +
00926 om_tostring(did) + ", " + om_tostring(wdf) + ", " +
00927 om_tostring(doclength) + "."));
00928
00929 RETURN(NULL);
00930 }
00931
00932 string
00933 QuartzPostList::get_description() const
00934 {
00935 return tname + ":" + om_tostring(number_of_entries);
00936 }
00937
00938
00939 Xapian::docid
00940 QuartzPostListTable::get_chunk(const string &tname,
00941 Xapian::docid did, bool adding,
00942 QPostlistChunkReader ** from, QPostlistChunkWriter **to)
00943 {
00944
00945 string key;
00946 make_key(tname, did, key);
00947
00948
00949 AutoPtr<Bcursor> cursor(cursor_get());
00950
00951 cursor->find_entry(key);
00952 Assert(!cursor->after_end());
00953
00954 const char * keypos = cursor->current_key.data();
00955 const char * keyend = keypos + cursor->current_key.size();
00956
00957 if (!check_tname_in_key(&keypos, keyend, tname)) {
00958
00959 if (!adding)
00960 throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname);
00961
00962 *from = NULL;
00963 *to = new QPostlistChunkWriter("", true, tname, true);
00964 return Xapian::docid(-1);
00965 }
00966
00967
00968
00969 bool is_first_chunk = (keypos == keyend);
00970
00971 cursor->read_tag();
00972 const char * pos = cursor->current_tag.data();
00973 const char * end = pos + cursor->current_tag.size();
00974 Xapian::docid first_did_in_chunk;
00975 if (is_first_chunk) {
00976 first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
00977 } else {
00978 if (!unpack_uint_preserving_sort(&keypos, keyend,
00979 &first_did_in_chunk)) {
00980 report_read_error(keypos);
00981 }
00982 }
00983
00984 bool is_last_chunk;
00985 Xapian::docid last_did_in_chunk;
00986 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
00987 *to = new QPostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
00988 is_last_chunk);
00989 if (did > last_did_in_chunk) {
00990
00991
00992
00993 *from = NULL;
00994 (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
00995 string(pos, end));
00996 } else {
00997 *from = new QPostlistChunkReader(first_did_in_chunk, string(pos, end));
00998 }
00999 if (is_last_chunk) return Xapian::docid(-1);
01000
01001
01002 cursor->next();
01003 if (cursor->after_end()) {
01004 throw Xapian::DatabaseCorruptError("Expected another key but found none");
01005 }
01006 const char *kpos = cursor->current_key.data();
01007 const char *kend = kpos + cursor->current_key.size();
01008 if (!check_tname_in_key(&kpos, kend, tname)) {
01009 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
01010 }
01011
01012
01013 Xapian::docid first_did_of_next_chunk;
01014 if (!unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
01015 report_read_error(kpos);
01016 }
01017 return first_did_of_next_chunk - 1;
01018 }
01019
01020 void
01021 QuartzPostListTable::merge_changes(
01022 const map<string, map<Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
01023 const map<Xapian::docid, Xapian::termcount> & doclens,
01024 const map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas)
01025 {
01026 map<string, map<Xapian::docid, pair<char, Xapian::termcount> > >::const_iterator i;
01027 for (i = mod_plists.begin(); i != mod_plists.end(); ++i) {
01028 if (i->second.empty()) continue;
01029 string tname = i->first;
01030 {
01031
01032
01033 map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname);
01034 Assert(deltas != freq_deltas.end());
01035
01036 string current_key;
01037 make_key(tname, current_key);
01038 string tag;
01039 (void)get_exact_entry(current_key, tag);
01040
01041
01042 const char *pos = tag.data();
01043 const char *end = pos + tag.size();
01044 Xapian::termcount termfreq, collfreq;
01045 Xapian::docid firstdid, lastdid;
01046 bool islast;
01047 if (pos == end) {
01048 termfreq = 0;
01049 collfreq = 0;
01050 firstdid = 0;
01051 lastdid = 0;
01052 islast = true;
01053 } else {
01054 firstdid = read_start_of_first_chunk(&pos, end,
01055 &termfreq, &collfreq);
01056
01057 lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
01058 }
01059
01060 termfreq += deltas->second.first;
01061 if (termfreq == 0) {
01062
01063
01064 del(current_key);
01065 if (islast) continue;
01066 AutoPtr<Bcursor> cursor(cursor_get());
01067 (void)cursor->find_entry(current_key);
01068
01069 cursor->next();
01070 while (!cursor->after_end()) {
01071 const char *kpos = cursor->current_key.data();
01072 const char *kend = kpos + cursor->current_key.size();
01073 if (!check_tname_in_key_lite(&kpos, kend, tname)) break;
01074 cursor->del();
01075 }
01076 continue;
01077 }
01078 collfreq += deltas->second.second;
01079
01080
01081 string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
01082 newhdr += make_start_of_chunk(islast, firstdid, lastdid);
01083 if (pos == end) {
01084 add(current_key, newhdr);
01085 } else {
01086 Assert((size_t)(pos - tag.data()) <= tag.size());
01087 tag.replace(0, pos - tag.data(), newhdr);
01088 add(current_key, tag);
01089 }
01090 }
01091 map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j;
01092 j = i->second.begin();
01093 Assert(j != i->second.end());
01094
01095 Xapian::docid max_did;
01096 QPostlistChunkReader *from;
01097 QPostlistChunkWriter *to;
01098 max_did = get_chunk(tname, j->first, j->second.first == 'A',
01099 &from, &to);
01100 for ( ; j != i->second.end(); ++j) {
01101 Xapian::docid did = j->first;
01102
01103 next_chunk:
01104 if (from) while (!from->is_at_end()) {
01105 Xapian::docid copy_did = from->get_docid();
01106 if (copy_did >= did) {
01107 if (copy_did == did) {
01108 Assert(j->second.first != 'A');
01109 from->next();
01110 }
01111 break;
01112 }
01113 to->append(this, copy_did,
01114 from->get_wdf(), from->get_doclength());
01115 from->next();
01116 }
01117 if ((!from || from->is_at_end()) && did > max_did) {
01118 delete from;
01119 to->flush(this);
01120 delete to;
01121 max_did = get_chunk(tname, did, false, &from, &to);
01122 goto next_chunk;
01123 }
01124
01125 if (j->second.first != 'D') {
01126 map<Xapian::docid, Xapian::termcount>::const_iterator k = doclens.find(did);
01127 Assert(k != doclens.end());
01128 Xapian::termcount new_doclen = k->second;
01129 Xapian::termcount new_wdf = j->second.second;
01130
01131 to->append(this, did, new_wdf, new_doclen);
01132 }
01133 }
01134
01135 if (from) {
01136 while (!from->is_at_end()) {
01137 to->append(this, from->get_docid(),
01138 from->get_wdf(), from->get_doclength());
01139 from->next();
01140 }
01141 delete from;
01142 }
01143 to->flush(this);
01144 delete to;
01145 }
01146 }