backends/multi/multi_postlist.cc

Go to the documentation of this file.
00001 /* multi_postlist.cc: interface to multiple database access
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002,2003,2004,2005,2007 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00019  * USA
00020  */
00021 
00022 #include <config.h>
00023 #include <stdio.h>
00024 
00025 #include "omassert.h"
00026 #include "omdebug.h"
00027 #include "multi_postlist.h"
00028 #include "utils.h"
00029 
00030 #ifdef XAPIAN_DEBUG_PARANOID
00031 #include "xapian/database.h"
00032 #endif
00033 
00034 MultiPostList::MultiPostList(std::vector<LeafPostList *> & pls,
00035                              const Xapian::Database &this_db_)
00036         : postlists(pls),
00037           this_db(this_db_),
00038           finished(false),
00039           currdoc(0),
00040           freq_initialised(false)
00041 {
00042     multiplier = pls.size();
00043 }
00044 
00045 
00046 MultiPostList::~MultiPostList()
00047 {
00048     std::vector<LeafPostList *>::iterator i;
00049     for (i = postlists.begin(); i != postlists.end(); i++) {
00050         delete *i;
00051     }
00052     postlists.clear();
00053 }
00054 
00055 Xapian::doccount
00056 MultiPostList::get_termfreq() const
00057 {
00058     if (freq_initialised) return termfreq;
00059     DEBUGLINE(DB, "Calculating multiple term frequencies");
00060 
00061     // Calculate and remember the termfreq
00062     termfreq = 0;
00063     std::vector<LeafPostList *>::const_iterator i;
00064     for (i = postlists.begin(); i != postlists.end(); i++) {
00065         termfreq += (*i)->get_termfreq();
00066     }
00067 
00068     freq_initialised = true;
00069     return termfreq;
00070 }
00071 
00072 Xapian::docid
00073 MultiPostList::get_docid() const
00074 {
00075     DEBUGCALL(DB, Xapian::docid, "MultiPostList::get_docid", "");
00076     Assert(!at_end());
00077     Assert(currdoc != 0);
00078     RETURN(currdoc);
00079 }
00080 
00081 Xapian::doclength
00082 MultiPostList::get_doclength() const
00083 {
00084     DEBUGCALL(DB, Xapian::doclength, "MultiPostList::get_doclength", "");
00085     Assert(!at_end());
00086     Assert(currdoc != 0);
00087     Xapian::doclength result = postlists[(currdoc - 1) % multiplier]->get_doclength();
00088     AssertEqParanoid(result, this_db.get_doclength(get_docid()));
00089     RETURN(result);
00090 }
00091 
00092 Xapian::termcount
00093 MultiPostList::get_wdf() const
00094 {
00095     return postlists[(currdoc - 1) % multiplier]->get_wdf();
00096 }
00097 
00098 PositionList *
00099 MultiPostList::read_position_list()
00100 {
00101     return postlists[(currdoc - 1) % multiplier]->read_position_list();
00102 }
00103 
00104 PositionList *
00105 MultiPostList::open_position_list() const
00106 {
00107     return postlists[(currdoc - 1) % multiplier]->open_position_list();
00108 }
00109 
00110 PostList *
00111 MultiPostList::next(Xapian::weight w_min)
00112 {
00113     DEBUGCALL(DB, PostList *, "MultiPostList::next", w_min);
00114     Assert(!at_end());
00115 
00116     Xapian::docid newdoc = 0;
00117     Xapian::docid offset = 1;
00118     std::vector<LeafPostList *>::iterator i;
00119     for (i = postlists.begin(); i != postlists.end(); i++) {
00120         if (!(*i)->at_end()) {
00121             Xapian::docid id = ((*i)->get_docid() - 1) * multiplier + offset;
00122             // Check if it needs to be advanced
00123             if (currdoc >= id) {
00124                 (*i)->next(w_min);
00125                 if (!(*i)->at_end()) {
00126                     id = ((*i)->get_docid() - 1) * multiplier + offset;
00127                     if (newdoc == 0 || id < newdoc) newdoc = id;
00128                 }
00129             } else {
00130                 if (newdoc == 0 || id < newdoc) newdoc = id;
00131             }
00132         }
00133         offset++;
00134     }
00135     if (newdoc) {
00136         DEBUGLINE(DB, "MultiPostList::next() newdoc=" << newdoc <<
00137                   " (olddoc=" << currdoc << ")");
00138         currdoc = newdoc;
00139     } else {
00140         DEBUGLINE(DB, "MultiPostList::next() finished" <<
00141                   " (olddoc=" << currdoc << ")");
00142         finished = true;
00143     }
00144     RETURN(NULL);
00145 }
00146 
00147 PostList *
00148 MultiPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
00149 {
00150     DEBUGCALL(DB, PostList *, "MultiPostList::skip_to", did << ", " << w_min);
00151     Assert(!at_end());
00152     Xapian::docid newdoc = 0;
00153     Xapian::docid offset = 0;
00154     Xapian::docid realdid = (did - 1) / multiplier + 2;
00155     Xapian::doccount dbnumber = (did - 1) % multiplier;
00156     std::vector<LeafPostList *>::iterator i;
00157     for (i = postlists.begin(); i != postlists.end(); i++) {    
00158         if (offset == dbnumber) --realdid;
00159         ++offset;
00160         Assert((realdid - 1) * multiplier + offset >= did);
00161         Assert((realdid - 1) * multiplier + offset < did + multiplier);
00162         if (!(*i)->at_end()) {
00163             (*i)->skip_to(realdid, w_min);
00164             if (!(*i)->at_end()) {
00165                 Xapian::docid id = ((*i)->get_docid() - 1) * multiplier + offset;
00166                 if (newdoc == 0 || id < newdoc) newdoc = id;
00167             }
00168         }
00169     }
00170     if (newdoc) {
00171         currdoc = newdoc;
00172     } else {
00173         finished = true;
00174     }
00175     RETURN(NULL);
00176 }
00177 
00178 bool
00179 MultiPostList::at_end() const
00180 {
00181     return finished;
00182 }
00183 
00184 std::string
00185 MultiPostList::get_description() const
00186 {
00187     std::string desc = "[";
00188 
00189     std::vector<LeafPostList *>::const_iterator i;
00190     for (i = postlists.begin(); i != postlists.end(); i++) {
00191         desc += (*i)->get_description();
00192         if (i != postlists.end()) desc += ",";
00193     }
00194 
00195     return desc + "]:" + om_tostring(get_termfreq());
00196 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.