matcher/andnotpostlist.cc

Go to the documentation of this file.
00001 /* andnotpostlist.cc: Return items which are in A, unless they're in B
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2003,2004,2007 Olly Betts
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License as
00009  * published by the Free Software Foundation; either version 2 of the
00010  * License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00020  * USA
00021  */
00022 
00023 #include <config.h>
00024 #include "andnotpostlist.h"
00025 #include "omdebug.h"
00026 #include <algorithm>
00027 
00028 PostList *
00029 AndNotPostList::advance_to_next_match(Xapian::weight w_min, PostList *ret)
00030 {
00031     DEBUGCALL(MATCH, PostList *, "AndNotPostList::advance_to_next_match", w_min << ", " << ret);
00032     handle_prune(l, ret);
00033     if (l->at_end()) {
00034         lhead = 0;
00035         RETURN(NULL);
00036     }
00037     lhead = l->get_docid();
00038 
00039     while (rhead <= lhead) {
00040         if (lhead == rhead) {
00041             next_handling_prune(l, w_min, matcher);
00042             if (l->at_end()) {
00043                 lhead = 0;
00044                 RETURN(NULL);
00045             }
00046             lhead = l->get_docid();
00047         }
00048         skip_to_handling_prune(r, lhead, 0, matcher);
00049         if (r->at_end()) {
00050             ret = l;
00051             l = NULL;
00052             RETURN(ret);
00053         }
00054         rhead = r->get_docid();
00055     }
00056     RETURN(NULL);
00057 }
00058 
00059 AndNotPostList::AndNotPostList(PostList *left_,
00060                                PostList *right_,
00061                                MultiMatch *matcher_,
00062                                Xapian::doccount dbsize_)
00063         : BranchPostList(left_, right_, matcher_),
00064           lhead(0), rhead(0), dbsize(dbsize_)
00065 {
00066     DEBUGCALL(MATCH, void, "AndNotPostList", left_ << ", " << right_ << ", " << matcher_ << ", " << dbsize_);
00067 }
00068 
00069 PostList *
00070 AndNotPostList::next(Xapian::weight w_min)
00071 {
00072     DEBUGCALL(MATCH, PostList *, "AndNotPostList::next", w_min);
00073     RETURN(advance_to_next_match(w_min, l->next(w_min)));
00074 }
00075 
00076 PostList *
00077 AndNotPostList::sync_and_skip_to(Xapian::docid id,
00078                                  Xapian::weight w_min,
00079                                  Xapian::docid lh,
00080                                  Xapian::docid rh)
00081 {
00082     DEBUGCALL(MATCH, PostList *, "AndNotPostList::sync_and_skip_to", id << ", " << w_min << ", " << lh << ", " << rh);
00083     lhead = lh;
00084     rhead = rh;
00085     RETURN(skip_to(id, w_min));
00086 }
00087 
00088 PostList *
00089 AndNotPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
00090 {
00091     DEBUGCALL(MATCH, PostList *, "AndNotPostList::skip_to", did << ", " << w_min);
00092     if (did <= lhead) RETURN(NULL);
00093     RETURN(advance_to_next_match(w_min, l->skip_to(did, w_min)));
00094 }
00095 
00096 Xapian::doccount
00097 AndNotPostList::get_termfreq_max() const
00098 {
00099     DEBUGCALL(MATCH, Xapian::doccount, "AndNotPostList::get_termfreq_max", "");
00100     // Max is when as many docs as possible on left, and none excluded.
00101     RETURN(l->get_termfreq_max());
00102 }
00103 
00104 Xapian::doccount
00105 AndNotPostList::get_termfreq_min() const
00106 {
00107     DEBUGCALL(MATCH, Xapian::doccount, "AndNotPostList::get_termfreq_min", "");
00108     // Min is when as few docs as possible on left, and maximum are excluded.
00109     Xapian::doccount l_min = l->get_termfreq_min();
00110     Xapian::doccount r_max = r->get_termfreq_max();
00111     if (l_min > r_max) RETURN(l_min - r_max);
00112     RETURN(0u);
00113 }
00114 
00115 Xapian::doccount
00116 AndNotPostList::get_termfreq_est() const
00117 {
00118     DEBUGCALL(MATCH, Xapian::doccount, "AndNotPostList::get_termfreq_est", "");
00119     // Estimate assuming independence:
00120     // P(l and r) = P(l) . P(r)
00121     // P(l not r) = P(l) - P(l and r) = P(l) . ( 1 - P(r))
00122     double est = l->get_termfreq_est() *
00123              (1.0 - double(r->get_termfreq_est()) / dbsize);
00124     RETURN(static_cast<Xapian::doccount>(est + 0.5));
00125 }
00126 
00127 Xapian::docid
00128 AndNotPostList::get_docid() const
00129 {
00130     DEBUGCALL(MATCH, Xapian::docid, "AndNotPostList::get_docid", "");
00131     RETURN(lhead);
00132 }
00133 
00134 // only called if we are doing a probabilistic AND NOT
00135 Xapian::weight
00136 AndNotPostList::get_weight() const
00137 {
00138     DEBUGCALL(MATCH, Xapian::weight, "AndNotPostList::get_weight", "");
00139     RETURN(l->get_weight());
00140 }
00141 
00142 // only called if we are doing a probabilistic AND NOT
00143 Xapian::weight
00144 AndNotPostList::get_maxweight() const
00145 {
00146     DEBUGCALL(MATCH, Xapian::weight, "AndNotPostList::get_maxweight", "");
00147     RETURN(l->get_maxweight());
00148 }
00149 
00150 Xapian::weight
00151 AndNotPostList::recalc_maxweight()
00152 {
00153     DEBUGCALL(MATCH, Xapian::weight, "AndNotPostList::recalc_maxweight", "");
00154     RETURN(l->recalc_maxweight());
00155 }
00156 
00157 bool
00158 AndNotPostList::at_end() const
00159 {
00160     DEBUGCALL(MATCH, bool, "AndNotPostList::at_end", "");
00161     RETURN(lhead == 0);
00162 }
00163 
00164 std::string
00165 AndNotPostList::get_description() const
00166 {
00167     return "(" + l->get_description() + " AndNot " + r->get_description() + ")";
00168 }
00169 
00170 Xapian::doclength
00171 AndNotPostList::get_doclength() const
00172 {
00173     DEBUGCALL(MATCH, Xapian::doclength, "AndNotPostList::get_doclength", "");
00174     RETURN(l->get_doclength());
00175 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.