00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024 #include "flint_postlist.h"
00025
00026 #include "flint_cursor.h"
00027 #include "flint_database.h"
00028 #include "flint_utils.h"
00029 #include "noreturn.h"
00030 #include "omdebug.h"
00031 #include "utils.h"
00032
00033 Xapian::doccount
00034 FlintPostListTable::get_termfreq(const string & term) const
00035 {
00036 string key = make_key(term);
00037 string tag;
00038 if (!get_exact_entry(key, tag)) return 0;
00039
00040 Xapian::doccount termfreq;
00041 const char * p = tag.data();
00042 FlintPostList::read_number_of_entries(&p, p + tag.size(), &termfreq, NULL);
00043 return termfreq;
00044 }
00045
00046 Xapian::termcount
00047 FlintPostListTable::get_collection_freq(const string & term) const
00048 {
00049 string key = make_key(term);
00050 string tag;
00051 if (!get_exact_entry(key, tag)) return 0;
00052
00053 Xapian::termcount collfreq;
00054 const char * p = tag.data();
00055 FlintPostList::read_number_of_entries(&p, p + tag.size(), NULL, &collfreq);
00056 return collfreq;
00057 }
00058
00059
00060
00061
00062
00063
00064 const unsigned int CHUNKSIZE = 2000;
00065
00072 class PostlistChunkWriter {
00073 public:
00074 PostlistChunkWriter(const string &orig_key_,
00075 bool is_first_chunk_,
00076 const string &tname_,
00077 bool is_last_chunk_);
00078
00080 void append(FlintTable * table, Xapian::docid did,
00081 Xapian::termcount wdf, flint_doclen_t doclen);
00082
00084 void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
00085 const string & s) {
00086 Assert(!started);
00087 first_did = first_did_;
00088 current_did = current_did_;
00089 if (!s.empty()) {
00090 chunk.append(s);
00091 started = true;
00092 }
00093 }
00094
00099 void flush(FlintTable *table);
00100
00101 private:
00102 string orig_key;
00103 string tname;
00104 bool is_first_chunk;
00105 bool is_last_chunk;
00106 bool started;
00107
00108 Xapian::docid first_did;
00109 Xapian::docid current_did;
00110
00111 string chunk;
00112 };
00113
00114
00115
00117 XAPIAN_NORETURN(static void report_read_error(const char * position));
00118 static void report_read_error(const char * position)
00119 {
00120 if (position == 0) {
00121
00122 throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when reading posting list.");
00123 }
00124
00125 throw Xapian::RangeError("Value in posting list too large.");
00126 }
00127
00128 static inline bool get_tname_from_key(const char **src, const char *end,
00129 string &tname)
00130 {
00131 return unpack_string_preserving_sort(src, end, tname);
00132 }
00133
00134 static inline bool
00135 check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
00136 {
00137 string tname_in_key;
00138
00139
00140 if (!get_tname_from_key(keypos, keyend, tname_in_key)) {
00141 report_read_error(*keypos);
00142 }
00143
00144
00145 return tname_in_key == tname;
00146 }
00147
00148 static inline bool
00149 check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
00150 {
00151 if (*keypos == keyend) return false;
00152
00153 return check_tname_in_key_lite(keypos, keyend, tname);
00154 }
00155
00157 static Xapian::docid
00158 read_start_of_first_chunk(const char ** posptr,
00159 const char * end,
00160 Xapian::doccount * number_of_entries_ptr,
00161 Xapian::termcount * collection_freq_ptr)
00162 {
00163 DEBUGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk",
00164 (const void *)posptr << ", " <<
00165 (const void *)end << ", " <<
00166 (void *)number_of_entries_ptr << ", " <<
00167 (void *)collection_freq_ptr);
00168
00169 FlintPostList::read_number_of_entries(posptr, end,
00170 number_of_entries_ptr, collection_freq_ptr);
00171 if (number_of_entries_ptr)
00172 DEBUGLINE(DB, "number_of_entries = " << *number_of_entries_ptr);
00173 if (collection_freq_ptr)
00174 DEBUGLINE(DB, "collection_freq = " << *collection_freq_ptr);
00175
00176 Xapian::docid did;
00177
00178 if (!unpack_uint(posptr, end, &did))
00179 report_read_error(*posptr);
00180 ++did;
00181 DEBUGLINE(DB, "doc_id = " << did);
00182 RETURN(did);
00183 }
00184
00185 static inline void read_did_increase(const char ** posptr,
00186 const char * end,
00187 Xapian::docid * did_ptr)
00188 {
00189 Xapian::docid did_increase;
00190 if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
00191 *did_ptr += did_increase + 1;
00192 }
00193
00195 static inline void read_wdf_and_length(const char ** posptr,
00196 const char * end,
00197 Xapian::termcount * wdf_ptr,
00198 flint_doclen_t * doclength_ptr)
00199 {
00200 if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
00201 if (!unpack_uint(posptr, end, doclength_ptr)) report_read_error(*posptr);
00202 }
00203
00205 static Xapian::docid
00206 read_start_of_chunk(const char ** posptr,
00207 const char * end,
00208 Xapian::docid first_did_in_chunk,
00209 bool * is_last_chunk_ptr)
00210 {
00211 DEBUGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk",
00212 reinterpret_cast<const void*>(posptr) << ", " <<
00213 reinterpret_cast<const void*>(end) << ", " <<
00214 first_did_in_chunk << ", " <<
00215 reinterpret_cast<const void*>(is_last_chunk_ptr));
00216
00217
00218 if (!unpack_bool(posptr, end, is_last_chunk_ptr))
00219 report_read_error(*posptr);
00220 if (is_last_chunk_ptr)
00221 DEBUGLINE(DB, "is_last_chunk = " << *is_last_chunk_ptr);
00222
00223
00224 Xapian::docid increase_to_last;
00225 if (!unpack_uint(posptr, end, &increase_to_last))
00226 report_read_error(*posptr);
00227 ++increase_to_last;
00228 Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
00229 DEBUGLINE(DB, "last_did_in_chunk = " << last_did_in_chunk);
00230 RETURN(last_did_in_chunk);
00231 }
00232
00233 static string make_wdf_and_length(Xapian::termcount wdf, flint_doclen_t doclength)
00234 {
00235 return pack_uint(wdf) + pack_uint(doclength);
00236 }
00237
00238 static void write_start_of_chunk(string & chunk,
00239 unsigned int start_of_chunk_header,
00240 unsigned int end_of_chunk_header,
00241 bool is_last_chunk,
00242 Xapian::docid first_did_in_chunk,
00243 Xapian::docid last_did_in_chunk)
00244 {
00245 Assert((size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
00246 Assert(last_did_in_chunk >= first_did_in_chunk);
00247 Xapian::docid increase_to_last = last_did_in_chunk - first_did_in_chunk;
00248
00249 chunk.replace(start_of_chunk_header,
00250 end_of_chunk_header - start_of_chunk_header,
00251 pack_bool(is_last_chunk) + pack_uint(increase_to_last - 1));
00252
00253
00254
00255 }
00256
00261 class PostlistChunkReader {
00262 public:
00268 PostlistChunkReader(Xapian::docid first_did, const string & data_)
00269 : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
00270 {
00271 if (!at_end) read_wdf_and_length(&pos, end, &wdf, &doclength);
00272 }
00273
00274 Xapian::docid get_docid() const {
00275 return did;
00276 }
00277 Xapian::termcount get_wdf() const {
00278 return wdf;
00279 }
00280 flint_doclen_t get_doclength() const {
00281 DEBUGCALL(DB, flint_doclen_t, "PostlistChunkReader::get_doclength", "");
00282 RETURN(doclength);
00283 }
00284
00285 bool is_at_end() const {
00286 return at_end;
00287 }
00288
00291 void next();
00292
00293 private:
00294 string data;
00295
00296 const char *pos;
00297 const char *end;
00298
00299 bool at_end;
00300
00301 Xapian::docid did;
00302 Xapian::termcount wdf;
00303 flint_doclen_t doclength;
00304 };
00305
00306 void
00307 PostlistChunkReader::next()
00308 {
00309 if (pos == end) {
00310 at_end = true;
00311 } else {
00312 read_did_increase(&pos, end, &did);
00313 read_wdf_and_length(&pos, end, &wdf, &doclength);
00314 }
00315 }
00316
00317 PostlistChunkWriter::PostlistChunkWriter(const string &orig_key_,
00318 bool is_first_chunk_,
00319 const string &tname_,
00320 bool is_last_chunk_)
00321 : orig_key(orig_key_),
00322 tname(tname_), is_first_chunk(is_first_chunk_),
00323 is_last_chunk(is_last_chunk_),
00324 started(false)
00325 {
00326 DEBUGCALL(DB, void, "PostlistChunkWriter::PostlistChunkWriter",
00327 orig_key_ << ", " << is_first_chunk_ << ", " << tname_ << ", " <<
00328 is_last_chunk_);
00329 }
00330
00331 void
00332 PostlistChunkWriter::append(FlintTable * table, Xapian::docid did,
00333 Xapian::termcount wdf, flint_doclen_t doclen)
00334 {
00335 if (!started) {
00336 started = true;
00337 first_did = did;
00338 } else {
00339 Assert(did > current_did);
00340
00341 if (chunk.size() >= CHUNKSIZE) {
00342 bool save_is_last_chunk = is_last_chunk;
00343 is_last_chunk = false;
00344 flush(table);
00345 is_last_chunk = save_is_last_chunk;
00346 is_first_chunk = false;
00347 first_did = did;
00348 chunk = "";
00349 orig_key = FlintPostListTable::make_key(tname, first_did);
00350 } else {
00351 chunk.append(pack_uint(did - current_did - 1));
00352 }
00353 }
00354 current_did = did;
00355 chunk.append(make_wdf_and_length(wdf, doclen));
00356 }
00357
00360 static inline string
00361 make_start_of_first_chunk(Xapian::termcount entries,
00362 Xapian::termcount collectionfreq,
00363 Xapian::docid new_did)
00364 {
00365 return pack_uint(entries) + pack_uint(collectionfreq) + pack_uint(new_did - 1);
00366 }
00367
00370 static inline string
00371 make_start_of_chunk(bool new_is_last_chunk,
00372 Xapian::docid new_first_did,
00373 Xapian::docid new_final_did)
00374 {
00375 Assert(new_final_did >= new_first_did);
00376 return pack_bool(new_is_last_chunk) +
00377 pack_uint(new_final_did - new_first_did - 1);
00378 }
00379
00380 void
00381 PostlistChunkWriter::flush(FlintTable *table)
00382 {
00383 DEBUGCALL(DB, void, "PostlistChunkWriter::flush", table);
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393 if (!started) {
00394
00395
00396
00397
00398
00399
00400
00401
00402 DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting chunk");
00403 Assert(!orig_key.empty());
00404 if (is_first_chunk) {
00405 DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting first chunk");
00406 if (is_last_chunk) {
00407
00408
00409
00410 table->del(orig_key);
00411 return;
00412 }
00413
00414
00415
00416
00417
00418
00419 AutoPtr<FlintCursor> cursor(table->cursor_get());
00420
00421 if (!cursor->find_entry(orig_key)) {
00422 throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
00423 }
00424
00425
00426
00427 Xapian::termcount num_ent, coll_freq;
00428 {
00429 cursor->read_tag();
00430 const char *tagpos = cursor->current_tag.data();
00431 const char *tagend = tagpos + cursor->current_tag.size();
00432
00433 (void)read_start_of_first_chunk(&tagpos, tagend,
00434 &num_ent, &coll_freq);
00435 }
00436
00437
00438 cursor->next();
00439 if (cursor->after_end()) {
00440 throw Xapian::DatabaseCorruptError("Expected another key but found none");
00441 }
00442 const char *kpos = cursor->current_key.data();
00443 const char *kend = kpos + cursor->current_key.size();
00444 if (!check_tname_in_key(&kpos, kend, tname)) {
00445 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
00446 }
00447
00448
00449 Xapian::docid new_first_did;
00450 if (!unpack_uint_preserving_sort(&kpos, kend,
00451 &new_first_did)) {
00452 report_read_error(kpos);
00453 }
00454
00455 cursor->read_tag();
00456 const char *tagpos = cursor->current_tag.data();
00457 const char *tagend = tagpos + cursor->current_tag.size();
00458
00459
00460 bool new_is_last_chunk;
00461 Xapian::docid new_last_did_in_chunk =
00462 read_start_of_chunk(&tagpos, tagend, new_first_did,
00463 &new_is_last_chunk);
00464
00465 string chunk_data(tagpos, tagend);
00466
00467
00468 table->del(cursor->current_key);
00469
00470
00471 string tag;
00472 tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
00473 tag += make_start_of_chunk(new_is_last_chunk,
00474 new_first_did,
00475 new_last_did_in_chunk);
00476 tag += chunk_data;
00477 table->add(orig_key, tag);
00478 return;
00479 }
00480
00481 DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary chunk");
00482
00483
00484
00485
00486
00487 table->del(orig_key);
00488
00489 if (is_last_chunk) {
00490 DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary last chunk");
00491
00492 AutoPtr<FlintCursor> cursor(table->cursor_get());
00493
00494
00495
00496 if (cursor->find_entry(orig_key)) {
00497 throw Xapian::DatabaseCorruptError("Flint key not deleted as we expected");
00498 }
00499
00500 const char * keypos = cursor->current_key.data();
00501 const char * keyend = keypos + cursor->current_key.size();
00502 if (!check_tname_in_key(&keypos, keyend, tname)) {
00503 throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
00504 }
00505
00506 bool is_prev_first_chunk = (keypos == keyend);
00507
00508
00509 cursor->read_tag();
00510 string tag = cursor->current_tag;
00511
00512 const char *tagpos = tag.data();
00513 const char *tagend = tagpos + tag.size();
00514
00515
00516 Xapian::docid first_did_in_chunk;
00517 if (is_prev_first_chunk) {
00518 first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
00519 0, 0);
00520 } else {
00521 if (!unpack_uint_preserving_sort(&keypos, keyend,
00522 &first_did_in_chunk))
00523 report_read_error(keypos);
00524 }
00525 bool wrong_is_last_chunk;
00526 string::size_type start_of_chunk_header = tagpos - tag.data();
00527 Xapian::docid last_did_in_chunk =
00528 read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
00529 &wrong_is_last_chunk);
00530 string::size_type end_of_chunk_header = tagpos - tag.data();
00531
00532
00533 write_start_of_chunk(tag,
00534 start_of_chunk_header,
00535 end_of_chunk_header,
00536 true,
00537 first_did_in_chunk,
00538 last_did_in_chunk);
00539 table->add(cursor->current_key, tag);
00540 }
00541 } else {
00542 DEBUGLINE(DB, "PostlistChunkWriter::flush(): updating chunk which still has items in it");
00543
00544
00545
00546
00547
00548
00549 string tag;
00550
00551
00552
00553
00554 if (is_first_chunk) {
00555
00556
00557
00558 DEBUGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
00559 string key = FlintPostListTable::make_key(tname);
00560 bool ok = table->get_exact_entry(key, tag);
00561 (void)ok;
00562 Assert(ok);
00563 Assert(!tag.empty());
00564
00565 Xapian::termcount num_ent, coll_freq;
00566 {
00567 const char * tagpos = tag.data();
00568 const char * tagend = tagpos + tag.size();
00569 (void)read_start_of_first_chunk(&tagpos, tagend,
00570 &num_ent, &coll_freq);
00571 }
00572
00573 tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
00574
00575 tag += make_start_of_chunk(is_last_chunk, first_did, current_did);
00576 tag += chunk;
00577 table->add(key, tag);
00578 return;
00579 }
00580
00581 DEBUGLINE(DB, "PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593 const char *keypos = orig_key.data();
00594 const char *keyend = keypos + orig_key.size();
00595 if (!check_tname_in_key(&keypos, keyend, tname)) {
00596 throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
00597 }
00598 Xapian::docid initial_did;
00599 if (!unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
00600 report_read_error(keypos);
00601 }
00602 string new_key;
00603 if (initial_did != first_did) {
00604
00605
00606
00607
00608 new_key = FlintPostListTable::make_key(tname, first_did);
00609 table->del(orig_key);
00610 } else {
00611 new_key = orig_key;
00612 }
00613
00614
00615 tag = make_start_of_chunk(is_last_chunk, first_did, current_did);
00616
00617 tag += chunk;
00618 table->add(new_key, tag);
00619 }
00620 }
00621
00626 void FlintPostList::read_number_of_entries(const char ** posptr,
00627 const char * end,
00628 Xapian::doccount * number_of_entries_ptr,
00629 Xapian::termcount * collection_freq_ptr)
00630 {
00631 if (!unpack_uint(posptr, end, number_of_entries_ptr))
00632 report_read_error(*posptr);
00633 if (!unpack_uint(posptr, end, collection_freq_ptr))
00634 report_read_error(*posptr);
00635 }
00636
00656 FlintPostList::FlintPostList(Xapian::Internal::RefCntPtr<const FlintDatabase> this_db_,
00657 const string & tname_)
00658 : this_db(this_db_),
00659 tname(tname_),
00660 have_started(false),
00661 cursor(this_db->postlist_table.cursor_get()),
00662 is_at_end(false)
00663 {
00664 DEBUGCALL(DB, void, "FlintPostList::ostList",
00665 this_db_.get() << ", " << tname_);
00666 string key = FlintPostListTable::make_key(tname);
00667 int found = cursor->find_entry(key);
00668 if (!found) {
00669 number_of_entries = 0;
00670 is_at_end = true;
00671 pos = 0;
00672 end = 0;
00673 first_did_in_chunk = 0;
00674 last_did_in_chunk = 0;
00675 return;
00676 }
00677 cursor->read_tag();
00678 pos = cursor->current_tag.data();
00679 end = pos + cursor->current_tag.size();
00680
00681 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00682 first_did_in_chunk = did;
00683 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00684 &is_last_chunk);
00685 read_wdf_and_length(&pos, end, &wdf, &doclength);
00686 }
00687
00688 FlintPostList::~FlintPostList()
00689 {
00690 DEBUGCALL(DB, void, "FlintPostList::~FlintPostList", "");
00691 }
00692
00693 bool
00694 FlintPostList::next_in_chunk()
00695 {
00696 DEBUGCALL(DB, bool, "FlintPostList::next_in_chunk", "");
00697 if (pos == end) RETURN(false);
00698
00699 read_did_increase(&pos, end, &did);
00700 read_wdf_and_length(&pos, end, &wdf, &doclength);
00701
00702
00703 Assert(did <= last_did_in_chunk);
00704 Assert(did < last_did_in_chunk || pos == end);
00705 Assert(pos != end || did == last_did_in_chunk);
00706
00707 RETURN(true);
00708 }
00709
00710 void
00711 FlintPostList::next_chunk()
00712 {
00713 DEBUGCALL(DB, void, "FlintPostList::next_chunk", "");
00714 if (is_last_chunk) {
00715 is_at_end = true;
00716 return;
00717 }
00718
00719 cursor->next();
00720 if (cursor->after_end()) {
00721 is_at_end = true;
00722 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00723 tname + "'");
00724 }
00725 const char * keypos = cursor->current_key.data();
00726 const char * keyend = keypos + cursor->current_key.size();
00727
00728 if (!check_tname_in_key_lite(&keypos, keyend, tname)) {
00729 is_at_end = true;
00730 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00731 tname + "'");
00732 }
00733
00734 Xapian::docid newdid;
00735 if (!unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
00736 report_read_error(keypos);
00737 }
00738 if (newdid <= did) {
00739 throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
00740 om_tostring(newdid) +
00741 ") is not greater than final document ID in previous chunk (" +
00742 om_tostring(did) + ")");
00743 }
00744 did = newdid;
00745
00746 cursor->read_tag();
00747 pos = cursor->current_tag.data();
00748 end = pos + cursor->current_tag.size();
00749
00750 first_did_in_chunk = did;
00751 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00752 &is_last_chunk);
00753 read_wdf_and_length(&pos, end, &wdf, &doclength);
00754 }
00755
00756 PositionList *
00757 FlintPostList::read_position_list()
00758 {
00759 DEBUGCALL(DB, PositionList *, "FlintPostList::read_position_list", "");
00760 positionlist.read_data(&this_db->position_table, did, tname);
00761 RETURN(&positionlist);
00762 }
00763
00764 PositionList *
00765 FlintPostList::open_position_list() const
00766 {
00767 DEBUGCALL(DB, PositionList *, "FlintPostList::open_position_list", "");
00768 RETURN(new FlintPositionList(&this_db->position_table, did, tname));
00769 }
00770
00771 PostList *
00772 FlintPostList::next(Xapian::weight w_min)
00773 {
00774 DEBUGCALL(DB, PostList *, "FlintPostList::next", w_min);
00775 (void)w_min;
00776
00777 if (!have_started) {
00778 have_started = true;
00779 } else {
00780 if (!next_in_chunk()) next_chunk();
00781 }
00782
00783 DEBUGLINE(DB, string("Moved to ") <<
00784 (is_at_end ? string("end.") : string("docid, wdf, doclength = ") +
00785 om_tostring(did) + ", " + om_tostring(wdf) + ", " +
00786 om_tostring(doclength)));
00787
00788 RETURN(NULL);
00789 }
00790
00791 bool
00792 FlintPostList::current_chunk_contains(Xapian::docid desired_did)
00793 {
00794 DEBUGCALL(DB, bool, "FlintPostList::current_chunk_contains", desired_did);
00795 if (desired_did >= first_did_in_chunk &&
00796 desired_did <= last_did_in_chunk) {
00797 RETURN(true);
00798 }
00799 RETURN(false);
00800 }
00801
00802 void
00803 FlintPostList::move_to_chunk_containing(Xapian::docid desired_did)
00804 {
00805 DEBUGCALL(DB, void,
00806 "FlintPostList::move_to_chunk_containing", desired_did);
00807 (void)cursor->find_entry(FlintPostListTable::make_key(tname, desired_did));
00808 Assert(!cursor->after_end());
00809
00810 const char * keypos = cursor->current_key.data();
00811 const char * keyend = keypos + cursor->current_key.size();
00812
00813 if (!check_tname_in_key_lite(&keypos, keyend, tname)) {
00814
00815 is_at_end = true;
00816 is_last_chunk = true;
00817 return;
00818 }
00819 is_at_end = false;
00820
00821 cursor->read_tag();
00822 pos = cursor->current_tag.data();
00823 end = pos + cursor->current_tag.size();
00824
00825 if (keypos == keyend) {
00826
00827 #ifdef XAPIAN_DEBUG
00828 Xapian::doccount old_number_of_entries = number_of_entries;
00829 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00830 Assert(old_number_of_entries == number_of_entries);
00831 #else
00832 did = read_start_of_first_chunk(&pos, end, NULL, NULL);
00833 #endif
00834 } else {
00835
00836 if (!unpack_uint_preserving_sort(&keypos, keyend, &did)) {
00837 report_read_error(keypos);
00838 }
00839 }
00840
00841 first_did_in_chunk = did;
00842 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00843 &is_last_chunk);
00844 read_wdf_and_length(&pos, end, &wdf, &doclength);
00845
00846
00847
00848 if (desired_did > last_did_in_chunk) next_chunk();
00849 }
00850
00851 bool
00852 FlintPostList::move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
00853 {
00854 DEBUGCALL(DB, bool,
00855 "FlintPostList::move_forward_in_chunk_to_at_least", desired_did);
00856 if (desired_did > last_did_in_chunk) {
00857 pos = end;
00858 RETURN(false);
00859 }
00860 while (did < desired_did) {
00861
00862
00863 bool at_end_of_chunk = !next_in_chunk();
00864 if (at_end_of_chunk) RETURN(false);
00865 }
00866 RETURN(true);
00867 }
00868
00869 PostList *
00870 FlintPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min)
00871 {
00872 DEBUGCALL(DB, PostList *,
00873 "FlintPostList::skip_to", desired_did << ", " << w_min);
00874 (void)w_min;
00875
00876
00877 have_started = true;
00878
00879
00880 if (is_at_end || desired_did <= did) RETURN(NULL);
00881
00882
00883 if (!current_chunk_contains(desired_did)) {
00884 move_to_chunk_containing(desired_did);
00885
00886
00887 if (is_at_end) RETURN(NULL);
00888 }
00889
00890
00891 bool have_document = move_forward_in_chunk_to_at_least(desired_did);
00892 #ifdef XAPIAN_DEBUG
00893 Assert(have_document);
00894 #else
00895 (void)have_document;
00896 #endif
00897
00898 DEBUGLINE(DB, string("Skipped to ") <<
00899 (is_at_end ? string("end.") : string("docid, wdf, doclength = ") +
00900 om_tostring(did) + ", " + om_tostring(wdf) + ", " +
00901 om_tostring(doclength) + "."));
00902
00903 RETURN(NULL);
00904 }
00905
00906 string
00907 FlintPostList::get_description() const
00908 {
00909 return tname + ":" + om_tostring(number_of_entries);
00910 }
00911
00912
00913 Xapian::docid
00914 FlintPostListTable::get_chunk(const string &tname,
00915 Xapian::docid did, bool adding,
00916 PostlistChunkReader ** from, PostlistChunkWriter **to)
00917 {
00918
00919 string key = make_key(tname, did);
00920
00921
00922 AutoPtr<FlintCursor> cursor(cursor_get());
00923
00924 cursor->find_entry(key);
00925 Assert(!cursor->after_end());
00926
00927 const char * keypos = cursor->current_key.data();
00928 const char * keyend = keypos + cursor->current_key.size();
00929
00930 if (!check_tname_in_key(&keypos, keyend, tname)) {
00931
00932 if (!adding)
00933 throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname);
00934
00935 *from = NULL;
00936 *to = new PostlistChunkWriter("", true, tname, true);
00937 return Xapian::docid(-1);
00938 }
00939
00940
00941
00942 bool is_first_chunk = (keypos == keyend);
00943
00944 cursor->read_tag();
00945 const char * pos = cursor->current_tag.data();
00946 const char * end = pos + cursor->current_tag.size();
00947 Xapian::docid first_did_in_chunk;
00948 if (is_first_chunk) {
00949 first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
00950 } else {
00951 if (!unpack_uint_preserving_sort(&keypos, keyend,
00952 &first_did_in_chunk)) {
00953 report_read_error(keypos);
00954 }
00955 }
00956
00957 bool is_last_chunk;
00958 Xapian::docid last_did_in_chunk;
00959 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
00960 *to = new PostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
00961 is_last_chunk);
00962 if (did > last_did_in_chunk) {
00963
00964
00965
00966 *from = NULL;
00967 (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
00968 string(pos, end));
00969 } else {
00970 *from = new PostlistChunkReader(first_did_in_chunk, string(pos, end));
00971 }
00972 if (is_last_chunk) return Xapian::docid(-1);
00973
00974
00975 cursor->next();
00976 if (cursor->after_end()) {
00977 throw Xapian::DatabaseCorruptError("Expected another key but found none");
00978 }
00979 const char *kpos = cursor->current_key.data();
00980 const char *kend = kpos + cursor->current_key.size();
00981 if (!check_tname_in_key(&kpos, kend, tname)) {
00982 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
00983 }
00984
00985
00986 Xapian::docid first_did_of_next_chunk;
00987 if (!unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
00988 report_read_error(kpos);
00989 }
00990 return first_did_of_next_chunk - 1;
00991 }
00992
00993 void
00994 FlintPostListTable::merge_changes(
00995 const map<string, map<Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
00996 const map<Xapian::docid, Xapian::termcount> & doclens,
00997 const map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas)
00998 {
00999 DEBUGCALL(DB, void, "FlintPostListTable::merge_changes", "mod_plists, doclens, freq_deltas");
01000 map<string, map<Xapian::docid, pair<char, Xapian::termcount> > >::const_iterator i;
01001 for (i = mod_plists.begin(); i != mod_plists.end(); ++i) {
01002 if (i->second.empty()) continue;
01003 string tname = i->first;
01004 {
01005
01006
01007 map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname);
01008 Assert(deltas != freq_deltas.end());
01009
01010 string current_key = make_key(tname);
01011 string tag;
01012 (void)get_exact_entry(current_key, tag);
01013
01014
01015 const char *pos = tag.data();
01016 const char *end = pos + tag.size();
01017 Xapian::termcount termfreq, collfreq;
01018 Xapian::docid firstdid, lastdid;
01019 bool islast;
01020 if (pos == end) {
01021 termfreq = 0;
01022 collfreq = 0;
01023 firstdid = 0;
01024 lastdid = 0;
01025 islast = true;
01026 } else {
01027 firstdid = read_start_of_first_chunk(&pos, end,
01028 &termfreq, &collfreq);
01029
01030 lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
01031 }
01032
01033 termfreq += deltas->second.first;
01034 if (termfreq == 0) {
01035
01036
01037 if (islast) {
01038
01039 del(current_key);
01040 continue;
01041 }
01042 AutoPtr<FlintCursor> cursor(cursor_get());
01043 bool found = cursor->find_entry(current_key);
01044 Assert(found);
01045 if (!found) continue;
01046 while (cursor->del()) {
01047 const char *kpos = cursor->current_key.data();
01048 const char *kend = kpos + cursor->current_key.size();
01049 if (!check_tname_in_key_lite(&kpos, kend, tname)) break;
01050 }
01051 continue;
01052 }
01053 collfreq += deltas->second.second;
01054
01055
01056 string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
01057 newhdr += make_start_of_chunk(islast, firstdid, lastdid);
01058 if (pos == end) {
01059 add(current_key, newhdr);
01060 } else {
01061 Assert((size_t)(pos - tag.data()) <= tag.size());
01062 tag.replace(0, pos - tag.data(), newhdr);
01063 add(current_key, tag);
01064 }
01065 }
01066 map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j;
01067 j = i->second.begin();
01068 Assert(j != i->second.end());
01069
01070 Xapian::docid max_did;
01071 PostlistChunkReader *from;
01072 PostlistChunkWriter *to;
01073 max_did = get_chunk(tname, j->first, j->second.first == 'A',
01074 &from, &to);
01075 for ( ; j != i->second.end(); ++j) {
01076 Xapian::docid did = j->first;
01077
01078 next_chunk:
01079 DEBUGLINE(DB, "Updating tname=" << tname << ", did=" << did);
01080 if (from) while (!from->is_at_end()) {
01081 Xapian::docid copy_did = from->get_docid();
01082 if (copy_did >= did) {
01083 if (copy_did == did) {
01084 Assert(j->second.first != 'A');
01085 from->next();
01086 }
01087 break;
01088 }
01089 to->append(this, copy_did,
01090 from->get_wdf(), from->get_doclength());
01091 from->next();
01092 }
01093 if ((!from || from->is_at_end()) && did > max_did) {
01094 delete from;
01095 to->flush(this);
01096 delete to;
01097 max_did = get_chunk(tname, did, false, &from, &to);
01098 goto next_chunk;
01099 }
01100
01101 if (j->second.first != 'D') {
01102 map<Xapian::docid, Xapian::termcount>::const_iterator k = doclens.find(did);
01103 Assert(k != doclens.end());
01104 Xapian::termcount new_doclen = k->second;
01105 Xapian::termcount new_wdf = j->second.second;
01106
01107 to->append(this, did, new_wdf, new_doclen);
01108 }
01109 }
01110
01111 if (from) {
01112 while (!from->is_at_end()) {
01113 to->append(this, from->get_docid(),
01114 from->get_wdf(), from->get_doclength());
01115 from->next();
01116 }
01117 delete from;
01118 }
01119 to->flush(this);
01120 delete to;
01121 }
01122 }