00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <config.h>
00023 #include <stdio.h>
00024
00025 #include "omassert.h"
00026 #include "omdebug.h"
00027 #include "multi_postlist.h"
00028 #include "utils.h"
00029
00030 #ifdef XAPIAN_DEBUG_PARANOID
00031 #include "xapian/database.h"
00032 #endif
00033
00034 MultiPostList::MultiPostList(std::vector<LeafPostList *> & pls,
00035 const Xapian::Database &this_db_)
00036 : postlists(pls),
00037 this_db(this_db_),
00038 finished(false),
00039 currdoc(0),
00040 freq_initialised(false)
00041 {
00042 multiplier = pls.size();
00043 }
00044
00045
00046 MultiPostList::~MultiPostList()
00047 {
00048 std::vector<LeafPostList *>::iterator i;
00049 for (i = postlists.begin(); i != postlists.end(); i++) {
00050 delete *i;
00051 }
00052 postlists.clear();
00053 }
00054
00055 Xapian::doccount
00056 MultiPostList::get_termfreq() const
00057 {
00058 if (freq_initialised) return termfreq;
00059 DEBUGLINE(DB, "Calculating multiple term frequencies");
00060
00061
00062 termfreq = 0;
00063 std::vector<LeafPostList *>::const_iterator i;
00064 for (i = postlists.begin(); i != postlists.end(); i++) {
00065 termfreq += (*i)->get_termfreq();
00066 }
00067
00068 freq_initialised = true;
00069 return termfreq;
00070 }
00071
00072 Xapian::docid
00073 MultiPostList::get_docid() const
00074 {
00075 DEBUGCALL(DB, Xapian::docid, "MultiPostList::get_docid", "");
00076 Assert(!at_end());
00077 Assert(currdoc != 0);
00078 RETURN(currdoc);
00079 }
00080
00081 Xapian::doclength
00082 MultiPostList::get_doclength() const
00083 {
00084 DEBUGCALL(DB, Xapian::doclength, "MultiPostList::get_doclength", "");
00085 Assert(!at_end());
00086 Assert(currdoc != 0);
00087 Xapian::doclength result = postlists[(currdoc - 1) % multiplier]->get_doclength();
00088 AssertEqParanoid(result, this_db.get_doclength(get_docid()));
00089 RETURN(result);
00090 }
00091
00092 Xapian::termcount
00093 MultiPostList::get_wdf() const
00094 {
00095 return postlists[(currdoc - 1) % multiplier]->get_wdf();
00096 }
00097
00098 PositionList *
00099 MultiPostList::read_position_list()
00100 {
00101 return postlists[(currdoc - 1) % multiplier]->read_position_list();
00102 }
00103
00104 PositionList *
00105 MultiPostList::open_position_list() const
00106 {
00107 return postlists[(currdoc - 1) % multiplier]->open_position_list();
00108 }
00109
00110 PostList *
00111 MultiPostList::next(Xapian::weight w_min)
00112 {
00113 DEBUGCALL(DB, PostList *, "MultiPostList::next", w_min);
00114 Assert(!at_end());
00115
00116 Xapian::docid newdoc = 0;
00117 Xapian::docid offset = 1;
00118 std::vector<LeafPostList *>::iterator i;
00119 for (i = postlists.begin(); i != postlists.end(); i++) {
00120 if (!(*i)->at_end()) {
00121 Xapian::docid id = ((*i)->get_docid() - 1) * multiplier + offset;
00122
00123 if (currdoc >= id) {
00124 (*i)->next(w_min);
00125 if (!(*i)->at_end()) {
00126 id = ((*i)->get_docid() - 1) * multiplier + offset;
00127 if (newdoc == 0 || id < newdoc) newdoc = id;
00128 }
00129 } else {
00130 if (newdoc == 0 || id < newdoc) newdoc = id;
00131 }
00132 }
00133 offset++;
00134 }
00135 if (newdoc) {
00136 DEBUGLINE(DB, "MultiPostList::next() newdoc=" << newdoc <<
00137 " (olddoc=" << currdoc << ")");
00138 currdoc = newdoc;
00139 } else {
00140 DEBUGLINE(DB, "MultiPostList::next() finished" <<
00141 " (olddoc=" << currdoc << ")");
00142 finished = true;
00143 }
00144 RETURN(NULL);
00145 }
00146
00147 PostList *
00148 MultiPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
00149 {
00150 DEBUGCALL(DB, PostList *, "MultiPostList::skip_to", did << ", " << w_min);
00151 Assert(!at_end());
00152 Xapian::docid newdoc = 0;
00153 Xapian::docid offset = 0;
00154 Xapian::docid realdid = (did - 1) / multiplier + 2;
00155 Xapian::doccount dbnumber = (did - 1) % multiplier;
00156 std::vector<LeafPostList *>::iterator i;
00157 for (i = postlists.begin(); i != postlists.end(); i++) {
00158 if (offset == dbnumber) --realdid;
00159 ++offset;
00160 Assert((realdid - 1) * multiplier + offset >= did);
00161 Assert((realdid - 1) * multiplier + offset < did + multiplier);
00162 if (!(*i)->at_end()) {
00163 (*i)->skip_to(realdid, w_min);
00164 if (!(*i)->at_end()) {
00165 Xapian::docid id = ((*i)->get_docid() - 1) * multiplier + offset;
00166 if (newdoc == 0 || id < newdoc) newdoc = id;
00167 }
00168 }
00169 }
00170 if (newdoc) {
00171 currdoc = newdoc;
00172 } else {
00173 finished = true;
00174 }
00175 RETURN(NULL);
00176 }
00177
00178 bool
00179 MultiPostList::at_end() const
00180 {
00181 return finished;
00182 }
00183
00184 std::string
00185 MultiPostList::get_description() const
00186 {
00187 std::string desc = "[";
00188
00189 std::vector<LeafPostList *>::const_iterator i;
00190 for (i = postlists.begin(); i != postlists.end(); i++) {
00191 desc += (*i)->get_description();
00192 if (i != postlists.end()) desc += ",";
00193 }
00194
00195 return desc + "]:" + om_tostring(get_termfreq());
00196 }