matcher/orpostlist.cc

Go to the documentation of this file.
00001 /* orpostlist.cc: OR of two posting lists
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2001,2002 Ananova Ltd
00005  * Copyright 2003,2004,2007 Olly Betts
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License as
00009  * published by the Free Software Foundation; either version 2 of the
00010  * License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00020  * USA
00021  */
00022 
00023 #include <config.h>
00024 
00025 #include "orpostlist.h"
00026 #include "andpostlist.h"
00027 #include "andmaybepostlist.h"
00028 #include "omassert.h"
00029 #include "omdebug.h"
00030 
00031 #include <algorithm>
00032 
00033 OrPostList::OrPostList(PostList *left_,
00034                        PostList *right_,
00035                        MultiMatch *matcher_,
00036                        Xapian::doccount dbsize_)
00037         : BranchPostList(left_, right_, matcher_),
00038           lhead(0), rhead(0), lmax(0), rmax(0), minmax(0), dbsize(dbsize_)
00039 {
00040     DEBUGCALL(MATCH, void, "OrPostList", left_ << ", " << right_ << ", " << matcher_ << ", " << dbsize_);
00041 }
00042 
00043 PostList *
00044 OrPostList::next(Xapian::weight w_min)
00045 {
00046     DEBUGCALL(MATCH, PostList *, "OrPostList::next", w_min);
00047     if (w_min > minmax) {
00048         // we can replace the OR with another operator
00049         PostList *ret;
00050         if (w_min > lmax) {
00051             if (w_min > rmax) {
00052                 DEBUGLINE(MATCH, "OR -> AND");
00053                 ret = new AndPostList(l, r, matcher, dbsize, true);
00054                 skip_to_handling_prune(ret, std::max(lhead, rhead) + 1, w_min,
00055                                        matcher);
00056             } else {
00057                 DEBUGLINE(MATCH, "OR -> AND MAYBE (1)");
00058                 ret = new AndMaybePostList(r, l, matcher, dbsize, rhead, lhead);
00059                 next_handling_prune(ret, w_min, matcher);
00060             }
00061         } else {
00062             // w_min > rmax since w_min > minmax but not (w_min > lmax)
00063             Assert(w_min > rmax);
00064             DEBUGLINE(MATCH, "OR -> AND MAYBE (2)");
00065             ret = new AndMaybePostList(l, r, matcher, dbsize, lhead, rhead);
00066             next_handling_prune(ret, w_min, matcher);
00067         }
00068 
00069         l = r = NULL;
00070         RETURN(ret);
00071     }
00072 
00073     bool ldry = false;
00074     bool rnext = false;
00075 
00076     if (lhead <= rhead) {
00077         if (lhead == rhead) rnext = true;
00078         next_handling_prune(l, w_min - rmax, matcher);
00079         if (l->at_end()) ldry = true;
00080     } else {
00081         rnext = true;
00082     }
00083 
00084     if (rnext) {
00085         next_handling_prune(r, w_min - lmax, matcher);
00086         if (r->at_end()) {
00087             PostList *ret = l;
00088             l = NULL;
00089             RETURN(ret);
00090         }
00091         rhead = r->get_docid();
00092     }
00093 
00094     if (!ldry) {
00095         lhead = l->get_docid();
00096         RETURN(NULL);
00097     }
00098 
00099     PostList *ret = r;
00100     r = NULL;
00101     RETURN(ret);
00102 }
00103 
00104 PostList *
00105 OrPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
00106 {
00107     DEBUGCALL(MATCH, PostList *, "OrPostList::skip_to", did << ", " << w_min);
00108     if (w_min > minmax) {
00109         // we can replace the OR with another operator
00110         PostList *ret;
00111         if (w_min > lmax) {
00112             if (w_min > rmax) {
00113                 DEBUGLINE(MATCH, "OR -> AND (in skip_to)");
00114                 ret = new AndPostList(l, r, matcher, dbsize, true);
00115                 did = std::max(did, std::max(lhead, rhead));
00116             } else {
00117                 DEBUGLINE(MATCH, "OR -> AND MAYBE (in skip_to) (1)");
00118                 ret = new AndMaybePostList(r, l, matcher, dbsize, rhead, lhead);
00119                 did = std::max(did, rhead);
00120             }
00121         } else {
00122             // w_min > rmax since w_min > minmax but not (w_min > lmax)
00123             Assert(w_min > rmax);
00124             DEBUGLINE(MATCH, "OR -> AND MAYBE (in skip_to) (2)");
00125             ret = new AndMaybePostList(l, r, matcher, dbsize, lhead, rhead);
00126             did = std::max(did, lhead);
00127         }
00128 
00129         l = r = NULL;
00130         skip_to_handling_prune(ret, did, w_min, matcher);
00131         RETURN(ret);
00132     }
00133 
00134     bool ldry = false;
00135     if (lhead < did) {
00136         skip_to_handling_prune(l, did, w_min - rmax, matcher);
00137         ldry = l->at_end();
00138     }
00139 
00140     if (rhead < did) {
00141         skip_to_handling_prune(r, did, w_min - lmax, matcher);
00142 
00143         if (r->at_end()) {
00144             PostList *ret = l;
00145             l = NULL;
00146             RETURN(ret);
00147         }
00148         rhead = r->get_docid();
00149     }
00150 
00151     if (!ldry) {
00152         lhead = l->get_docid();
00153         RETURN(NULL);
00154     }
00155 
00156     PostList *ret = r;
00157     r = NULL;
00158     RETURN(ret);
00159 }
00160 
00161 Xapian::doccount
00162 OrPostList::get_termfreq_max() const
00163 {
00164     DEBUGCALL(MATCH, Xapian::doccount, "OrPostList::get_termfreq_max", "");
00165     RETURN(std::min(l->get_termfreq_max() + r->get_termfreq_max(), dbsize));
00166 }
00167 
00168 Xapian::doccount
00169 OrPostList::get_termfreq_min() const
00170 {
00171     DEBUGCALL(MATCH, Xapian::doccount, "OrPostList::get_termfreq_min", "");
00172     RETURN(std::max(l->get_termfreq_min(), r->get_termfreq_min()));
00173 }
00174 
00175 Xapian::doccount
00176 OrPostList::get_termfreq_est() const
00177 {
00178     DEBUGCALL(MATCH, Xapian::doccount, "OrPostList::get_termfreq_est", "");
00179     // Estimate assuming independence:
00180     // P(l or r) = P(l) + P(r) - P(l) . P(r)
00181     double lest = static_cast<double>(l->get_termfreq_est());
00182     double rest = static_cast<double>(r->get_termfreq_est());
00183     double est = lest + rest - (lest * rest / dbsize);
00184     RETURN(static_cast<Xapian::doccount>(est + 0.5));
00185 }
00186 
00187 Xapian::docid
00188 OrPostList::get_docid() const
00189 {
00190     DEBUGCALL(MATCH, Xapian::docid, "OrPostList::get_docid", "");
00191     Assert(lhead != 0 && rhead != 0); // check we've started
00192     RETURN(std::min(lhead, rhead));
00193 }
00194 
00195 // only called if we are doing a probabilistic OR
00196 Xapian::weight
00197 OrPostList::get_weight() const
00198 {
00199     DEBUGCALL(MATCH, Xapian::weight, "OrPostList::get_weight", "");
00200     Assert(lhead != 0 && rhead != 0); // check we've started
00201     if (lhead < rhead) RETURN(l->get_weight());
00202     if (lhead > rhead) RETURN(r->get_weight());
00203     RETURN(l->get_weight() + r->get_weight());
00204 }
00205 
00206 // only called if we are doing a probabilistic operation
00207 Xapian::weight
00208 OrPostList::get_maxweight() const
00209 {
00210     DEBUGCALL(MATCH, Xapian::weight, "OrPostList::get_maxweight", "");
00211     RETURN(lmax + rmax);
00212 }
00213 
00214 Xapian::weight
00215 OrPostList::recalc_maxweight()
00216 {
00217     DEBUGCALL(MATCH, Xapian::weight, "OrPostList::recalc_maxweight", "");
00218     lmax = l->recalc_maxweight();
00219     rmax = r->recalc_maxweight();
00220     minmax = std::min(lmax, rmax);
00221     RETURN(OrPostList::get_maxweight());
00222 }
00223 
00224 bool
00225 OrPostList::at_end() const
00226 {
00227     DEBUGCALL(MATCH, bool, "OrPostList::at_end", "");
00228     // Can never really happen - OrPostList next/skip_to autoprune
00229     AssertParanoid(!(l->at_end()) && !(r->at_end()));
00230     RETURN(false);
00231 }
00232 
00233 std::string
00234 OrPostList::get_description() const
00235 {
00236     return "(" + l->get_description() + " Or " + r->get_description() + ")";
00237 }
00238 
00239 Xapian::doclength
00240 OrPostList::get_doclength() const
00241 {
00242     DEBUGCALL(MATCH, Xapian::doclength, "OrPostList::get_doclength", "");
00243     Xapian::doclength doclength;
00244 
00245     Assert(lhead != 0 && rhead != 0); // check we've started
00246     if (lhead > rhead) {
00247         doclength = r->get_doclength();
00248         DEBUGLINE(MATCH, "OrPostList::get_doclength() [right docid=" 
00249                   << rhead << "] = " << doclength);
00250     } else {
00251         doclength = l->get_doclength();
00252         DEBUGLINE(MATCH, "OrPostList::get_doclength() [left docid="
00253                   << lhead << "] = " << doclength);
00254     }
00255 
00256     RETURN(doclength);
00257 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.