matcher/mergepostlist.cc

Go to the documentation of this file.
00001 /* mergepostlist.cc: MERGE of two posting lists
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2002,2003,2004,2006 Olly Betts
00006  * Copyright 2007 Lemur Consulting Ltd
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #include <config.h>
00025 #include "multimatch.h"
00026 #include "emptypostlist.h"
00027 #include "mergepostlist.h"
00028 #include "branchpostlist.h"
00029 #include "omdebug.h"
00030 #include <xapian/errorhandler.h>
00031 
00032 // NB don't prune - even with one sublist we still translate docids...
00033 
00034 MergePostList::MergePostList(std::vector<PostList *> plists_,
00035                              MultiMatch *matcher_,
00036                              Xapian::ErrorHandler * errorhandler_)
00037         : plists(plists_), current(-1), matcher(matcher_),
00038           errorhandler(errorhandler_)
00039 {
00040     DEBUGCALL(MATCH, void, "MergePostList::MergePostList", "std::vector<PostList *>");
00041 }
00042 
00043 MergePostList::~MergePostList()
00044 {
00045     DEBUGCALL(MATCH, void, "MergePostList::~MergePostList", "");
00046     std::vector<PostList *>::const_iterator i;
00047     for (i = plists.begin(); i != plists.end(); i++) {
00048         delete *i;
00049     }
00050 }
00051 
00052 PostList *
00053 MergePostList::next(Xapian::weight w_min)
00054 {
00055     DEBUGCALL(MATCH, PostList *, "MergePostList::next", w_min);
00056     DEBUGLINE(MATCH, "current = " << current);
00057     if (current == -1) current = 0;
00058     do {
00059         // FIXME: should skip over Remote matchers which aren't ready yet
00060         // and come back to them later...
00061         try {
00062             next_handling_prune(plists[current], w_min, matcher);
00063             if (!plists[current]->at_end()) break;
00064             current++;
00065         } catch (Xapian::Error & e) {
00066             if (errorhandler) {
00067                 DEBUGLINE(EXCEPTION, "Calling error handler in MergePostList::next().");
00068                 (*errorhandler)(e);
00069                 // Continue match without this sub-postlist.
00070                 delete plists[current];
00071                 plists[current] = new EmptyPostList;
00072             } else {
00073                 throw;
00074             }
00075         }
00076     } while (unsigned(current) < plists.size());
00077     DEBUGLINE(MATCH, "current = " << current);
00078     RETURN(NULL);
00079 }
00080 
00081 PostList *
00082 MergePostList::skip_to(Xapian::docid did, Xapian::weight w_min)
00083 {
00084     DEBUGCALL(MATCH, PostList *, "MergePostList::skip_to", did << ", " << w_min);
00085     (void)did;
00086     (void)w_min;
00087     // MergePostList doesn't return documents in docid order, so skip_to
00088     // isn't a meaningful operation.
00089     throw Xapian::InvalidOperationError("MergePostList doesn't support skip_to");
00090 }
00091 
00092 Xapian::termcount
00093 MergePostList::get_wdf() const
00094 {
00095     DEBUGCALL(MATCH, Xapian::termcount, "MergePostList::get_wdf", "");
00096     RETURN(plists[current]->get_wdf());
00097 }
00098 
00099 Xapian::doccount
00100 MergePostList::get_termfreq_max() const
00101 {
00102     DEBUGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_max", "");
00103     // sum of termfreqs for all children
00104     Xapian::doccount total = 0;
00105     vector<PostList *>::const_iterator i;
00106     for (i = plists.begin(); i != plists.end(); i++) {
00107         total += (*i)->get_termfreq_max();
00108     }
00109     return total;
00110 }
00111 
00112 Xapian::doccount
00113 MergePostList::get_termfreq_min() const
00114 {
00115     DEBUGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_min", "");
00116     // sum of termfreqs for all children
00117     Xapian::doccount total = 0;
00118     vector<PostList *>::const_iterator i;
00119     for (i = plists.begin(); i != plists.end(); i++) {
00120         total += (*i)->get_termfreq_min();
00121     }
00122     return total;
00123 }
00124 
00125 Xapian::doccount
00126 MergePostList::get_termfreq_est() const
00127 {
00128     DEBUGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_est", "");
00129     // sum of termfreqs for all children
00130     Xapian::doccount total = 0;
00131     vector<PostList *>::const_iterator i;
00132     for (i = plists.begin(); i != plists.end(); i++) {
00133         total += (*i)->get_termfreq_est();
00134     }
00135     return total;
00136 }
00137 
00138 Xapian::docid
00139 MergePostList::get_docid() const
00140 {
00141     DEBUGCALL(MATCH, Xapian::docid, "MergePostList::get_docid", "");
00142     Assert(current != -1);
00143     // FIXME: this needs fixing so we can prune plists - see MultiPostlist
00144     // for code which does this...
00145     RETURN((plists[current]->get_docid() - 1) * plists.size() + current + 1);
00146 }
00147 
00148 Xapian::weight
00149 MergePostList::get_weight() const
00150 {
00151     DEBUGCALL(MATCH, Xapian::weight, "MergePostList::get_weight", "");
00152     Assert(current != -1);
00153     return plists[current]->get_weight();
00154 }
00155 
00156 const string *
00157 MergePostList::get_collapse_key() const
00158 {
00159     DEBUGCALL(MATCH, string *, "MergePostList::get_collapse_key", "");
00160     Assert(current != -1);
00161     return plists[current]->get_collapse_key();
00162 }
00163 
00164 Xapian::weight
00165 MergePostList::get_maxweight() const
00166 {
00167     DEBUGCALL(MATCH, Xapian::weight, "MergePostList::get_maxweight", "");
00168     return w_max;
00169 }
00170 
00171 Xapian::weight
00172 MergePostList::recalc_maxweight()
00173 {
00174     DEBUGCALL(MATCH, Xapian::weight, "MergePostList::recalc_maxweight", "");
00175     w_max = 0;
00176     vector<PostList *>::iterator i;
00177     for (i = plists.begin(); i != plists.end(); i++) {
00178         try {
00179             Xapian::weight w = (*i)->recalc_maxweight();
00180             if (w > w_max) w_max = w;
00181         } catch (Xapian::Error & e) {
00182             if (errorhandler) {
00183                 DEBUGLINE(EXCEPTION, "Calling error handler in MergePostList::recalc_maxweight().");
00184                 (*errorhandler)(e);
00185 
00186                 if (current == i - plists.begin()) {
00187                     // Fatal error
00188                     throw;
00189                 }
00190                 // Continue match without this sub-postlist.
00191                 delete (*i);
00192                 *i = new EmptyPostList;
00193             } else {
00194                 throw;
00195             }
00196         }
00197     }
00198     return w_max;
00199 }
00200 
00201 bool
00202 MergePostList::at_end() const
00203 {
00204     DEBUGCALL(MATCH, bool, "MergePostList::at_end", "");
00205     Assert(current != -1);
00206     return unsigned(current) >= plists.size();    
00207 }
00208 
00209 string
00210 MergePostList::get_description() const
00211 {
00212     string desc = "( Merge ";
00213     vector<PostList *>::const_iterator i;
00214     for (i = plists.begin(); i != plists.end(); i++) {
00215         desc += (*i)->get_description() + " ";
00216     }
00217     return desc + ")";
00218 }
00219 
00220 Xapian::doclength
00221 MergePostList::get_doclength() const
00222 {
00223     DEBUGCALL(MATCH, Xapian::doclength, "MergePostList::get_doclength", "");
00224     Assert(current != -1);
00225     return plists[current]->get_doclength();
00226 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.