matcher/xorpostlist.cc

Go to the documentation of this file.
00001 /* xorpostlist.cc: XOR of two posting lists
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2003,2004,2007 Olly Betts
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License as
00009  * published by the Free Software Foundation; either version 2 of the
00010  * License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00020  * USA
00021  */
00022 
00023 #include <config.h>
00024 
00025 #include "xorpostlist.h"
00026 #include "andnotpostlist.h"
00027 #include "omassert.h"
00028 #include "omdebug.h"
00029 
00030 // for XOR we just pass w_min through unchanged since both branches matching
00031 // doesn't cause a match
00032 
00033 inline PostList *
00034 XorPostList::advance_to_next_match(Xapian::weight w_min)
00035 {
00036     DEBUGCALL(MATCH, PostList *, "XorPostList::advance_to_next_match", w_min);
00037     while (rhead == lhead) {
00038         next_handling_prune(l, w_min, matcher);
00039         next_handling_prune(r, w_min, matcher);
00040         if (l->at_end()) {
00041             if (r->at_end()) {
00042                 lhead = 0;
00043                 RETURN(NULL);
00044             }
00045             PostList *ret = r;
00046             r = NULL;
00047             RETURN(ret);
00048         }
00049         if (r->at_end()) {
00050             PostList *ret = l;
00051             l = NULL;
00052             RETURN(ret);
00053         }
00054         lhead = l->get_docid();
00055         rhead = r->get_docid();
00056     }
00057     RETURN(NULL);
00058 }
00059 
00060 XorPostList::XorPostList(PostList *left_,
00061                          PostList *right_,
00062                          MultiMatch *matcher_,
00063                          Xapian::doccount dbsize_)
00064         : BranchPostList(left_, right_, matcher_),
00065           lhead(0),
00066           rhead(0),
00067           dbsize(dbsize_)
00068 {
00069     DEBUGCALL(MATCH, void, "XorPostList", left_ << ", " << right_ << ", " << matcher_ << ", " << dbsize_);
00070 }
00071 
00072 PostList *
00073 XorPostList::next(Xapian::weight w_min)
00074 {
00075     DEBUGCALL(MATCH, PostList *, "XorPostList::next", w_min);
00076     if (w_min > minmax) {
00077         // we can replace the XOR with another operator (or run dry)
00078         PostList *ret;
00079         if (w_min > lmax) {
00080             if (w_min > rmax) {
00081                 DEBUGLINE(MATCH, "XOR drops below w_min");
00082                 // neither side is weighty enough, so run dry
00083                 lhead = 0;
00084                 RETURN(NULL);
00085             }
00086             DEBUGLINE(MATCH, "XOR -> AND NOT (1)");
00087             ret = new AndNotPostList(r, l, matcher, dbsize);
00088         } else {
00089             // w_min > rmax since w_min > minmax but not (w_min > lmax)
00090             Assert(w_min > rmax);
00091             DEBUGLINE(MATCH, "XOR -> AND NOT (2)");
00092             ret = new AndNotPostList(l, r, matcher, dbsize);
00093         }
00094 
00095         l = r = NULL;
00096         next_handling_prune(ret, w_min, matcher);
00097         RETURN(ret);
00098     }
00099 
00100     bool ldry = false;
00101     bool rnext = false;
00102 
00103     if (lhead <= rhead) {
00104         // lhead == rhead should only happen on first next
00105         if (lhead == rhead) rnext = true;
00106         next_handling_prune(l, w_min, matcher);
00107         if (l->at_end()) ldry = true;
00108     } else {
00109         rnext = true;
00110     }
00111 
00112     if (rnext) {
00113         next_handling_prune(r, w_min, matcher);
00114         if (r->at_end()) {
00115             PostList *ret = l;
00116             l = NULL;
00117             RETURN(ret);
00118         }
00119         rhead = r->get_docid();
00120     }
00121 
00122     if (ldry) {
00123         PostList *ret = r;
00124         r = NULL;
00125         RETURN(ret);
00126     }
00127 
00128     lhead = l->get_docid();
00129     RETURN(advance_to_next_match(w_min));
00130 }
00131 
00132 PostList *
00133 XorPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
00134 {
00135     DEBUGCALL(MATCH, PostList *, "XorPostList::skip_to", did << ", " << w_min);
00136     if (w_min > minmax) {
00137         // we can replace the XOR with another operator (or run dry)
00138         PostList *ret, *ret2;
00139         if (w_min > lmax) {
00140             if (w_min > rmax) {
00141                 DEBUGLINE(MATCH, "XOR drops below w_min");
00142                 // neither side is weighty enough, so run dry
00143                 lhead = 0;
00144                 RETURN(NULL);
00145             }
00146             DEBUGLINE(MATCH, "XOR -> AND NOT (in skip_to) (1)");
00147             AndNotPostList *ret3 = new AndNotPostList(r, l, matcher, dbsize);
00148             did = std::max(did, rhead);
00149             ret2 = ret3->sync_and_skip_to(did, w_min, rhead, lhead);
00150             ret = ret3;
00151         } else {
00152             // w_min > rmax since w_min > minmax but not (w_min > lmax)
00153             Assert(w_min > rmax);
00154             DEBUGLINE(MATCH, "XOR -> AND NOT (in skip_to) (2)");
00155             AndNotPostList *ret3 = new AndNotPostList(l, r, matcher, dbsize);
00156             did = std::max(did, lhead);
00157             ret2 = ret3->sync_and_skip_to(did, w_min, lhead, rhead);
00158             ret = ret3;
00159         }
00160 
00161         l = r = NULL;
00162         if (ret2) {
00163             delete ret;
00164             ret = ret2;
00165         }
00166         RETURN(ret);
00167     }
00168 
00169     bool ldry = false;
00170     if (lhead < did) {
00171         skip_to_handling_prune(l, did, w_min, matcher);
00172         ldry = l->at_end();
00173     }
00174 
00175     if (rhead < did) {
00176         skip_to_handling_prune(r, did, w_min, matcher);
00177 
00178         if (r->at_end()) {
00179             PostList *ret = l;
00180             l = NULL;
00181             RETURN(ret);
00182         }
00183         rhead = r->get_docid();
00184     }
00185 
00186     if (ldry) {
00187         PostList *ret = r;
00188         r = NULL;
00189         RETURN(ret);
00190     }
00191 
00192     lhead = l->get_docid();
00193     RETURN(advance_to_next_match(w_min));
00194 }
00195 
00196 Xapian::doccount
00197 XorPostList::get_termfreq_max() const
00198 {
00199     DEBUGCALL(MATCH, Xapian::doccount, "XorPostList::get_termfreq_max", "");
00200     return l->get_termfreq_max() + r->get_termfreq_max();
00201 }
00202 
00203 Xapian::doccount
00204 XorPostList::get_termfreq_min() const
00205 {
00206     DEBUGCALL(MATCH, Xapian::doccount, "XorPostList::get_termfreq_min", "");
00207     // Min = freq_min(a or b) - freq_max(a and b)
00208     //     = max(a_min, b_min) - min(a_max, b_max)
00209     //     = min(b_min - a_max, a_min - b_max)
00210     Xapian::doccount r_min = r->get_termfreq_min();
00211     Xapian::doccount l_min = l->get_termfreq_min();
00212     Xapian::doccount r_max = r->get_termfreq_max();
00213     Xapian::doccount l_max = l->get_termfreq_max();
00214     Xapian::doccount termfreq_min = 0u;
00215     if (r_min > l_max)
00216         termfreq_min = r_min - l_max;
00217     if (l_min > r_max && (l_min - r_max) > termfreq_min)
00218         termfreq_min = l_min - r_max;
00219 
00220     return termfreq_min;
00221 }
00222 
00223 Xapian::doccount
00224 XorPostList::get_termfreq_est() const
00225 {
00226     DEBUGCALL(MATCH, Xapian::doccount, "XorPostList::get_termfreq_est", "");
00227     // Estimate assuming independence:
00228     // P(l xor r) = P(l) + P(r) - 2 . P(l) . P(r)
00229     double lest = static_cast<double>(l->get_termfreq_est());
00230     double rest = static_cast<double>(r->get_termfreq_est());
00231     double est = lest + rest - (2.0 * lest * rest / dbsize);
00232     RETURN(static_cast<Xapian::doccount>(est + 0.5));
00233 }
00234 
00235 Xapian::docid
00236 XorPostList::get_docid() const
00237 {
00238     DEBUGCALL(MATCH, Xapian::docid, "XorPostList::get_docid", "");
00239     Assert(lhead != 0 && rhead != 0); // check we've started
00240     return std::min(lhead, rhead);
00241 }
00242 
00243 // only called if we are doing a probabilistic XOR
00244 Xapian::weight
00245 XorPostList::get_weight() const
00246 {
00247     DEBUGCALL(MATCH, Xapian::weight, "XorPostList::get_weight", "");
00248     Assert(lhead != 0 && rhead != 0); // check we've started
00249     if (lhead < rhead) return l->get_weight();
00250     Assert(lhead > rhead);
00251     return r->get_weight();
00252 }
00253 
00254 // only called if we are doing a probabilistic operation
00255 Xapian::weight
00256 XorPostList::get_maxweight() const
00257 {
00258     DEBUGCALL(MATCH, Xapian::weight, "XorPostList::get_maxweight", "");
00259     return std::max(lmax, rmax);
00260 }
00261 
00262 Xapian::weight
00263 XorPostList::recalc_maxweight()
00264 {
00265     DEBUGCALL(MATCH, Xapian::weight, "XorPostList::recalc_maxweight", "");
00266     lmax = l->recalc_maxweight();
00267     rmax = r->recalc_maxweight();
00268     minmax = std::min(lmax, rmax);
00269     return XorPostList::get_maxweight();
00270 }
00271 
00272 bool
00273 XorPostList::at_end() const
00274 {
00275     DEBUGCALL(MATCH, bool, "XorPostList::at_end", "");
00276     return lhead == 0;
00277 }
00278 
00279 std::string
00280 XorPostList::get_description() const
00281 {
00282     return "(" + l->get_description() + " Xor " + r->get_description() + ")";
00283 }
00284 
00285 Xapian::doclength
00286 XorPostList::get_doclength() const
00287 {
00288     DEBUGCALL(MATCH, Xapian::doclength, "XorPostList::get_doclength", "");
00289     Assert(lhead != 0 && rhead != 0); // check we've started
00290     if (lhead < rhead) return l->get_doclength();
00291     Assert(lhead > rhead);
00292     return r->get_doclength();
00293 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.