matcher/multiandpostlist.cc

Go to the documentation of this file.
00001 
00004 /* Copyright (C) 2007 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00019  */
00020 
00021 #include <config.h>
00022 
00023 #include "multiandpostlist.h"
00024 #include "omassert.h"
00025 
00026 MultiAndPostList::~MultiAndPostList()
00027 {
00028     if (plist) {
00029         for (size_t i = 0; i < n_kids; ++i) {
00030             delete plist[i];
00031         }
00032         delete [] plist;
00033     }
00034     delete [] max_wt;
00035 }
00036 
00037 Xapian::doccount
00038 MultiAndPostList::get_termfreq_min() const
00039 {
00040     // The number of matching documents is minimised when we have the minimum
00041     // number of matching documents from each sub-postlist, and these are
00042     // maximally disjoint.
00043     Xapian::doccount sum = plist[0]->get_termfreq_min();
00044     if (sum) {
00045         for (size_t i = 1; i < n_kids; ++i) {
00046             Xapian::doccount sum_old = sum;
00047             sum += plist[i]->get_termfreq_min();
00048             // If sum < sum_old, the calculation overflowed and the true sum
00049             // must be > db_size.  Since we added a value <= db_size,
00050             // subtracting db_size must un-overflow us.
00051             if (sum >= sum_old && sum <= db_size) {
00052                 // It's possible there's no overlap.
00053                 return 0;
00054             }
00055             sum -= db_size;
00056         }
00057         AssertParanoid(sum <= MultiAndPostList::get_termfreq_est());
00058     }
00059     return sum;
00060 }
00061 
00062 Xapian::doccount
00063 MultiAndPostList::get_termfreq_max() const
00064 {
00065     // We can't match more documents than the least of our sub-postlists.
00066     Xapian::doccount result = plist[0]->get_termfreq_max();
00067     for (size_t i = 1; i < n_kids; ++i) {
00068         Xapian::doccount tf = plist[i]->get_termfreq_max();
00069         if (tf < result) result = tf;
00070     }
00071     return result;
00072 }
00073 
00074 Xapian::doccount
00075 MultiAndPostList::get_termfreq_est() const
00076 {
00077     // We calculate the estimate assuming independence.  With this assumption,
00078     // the estimate is the product of the estimates for the sub-postlists
00079     // divided by db_size (n_kids - 1) times.
00080     double result(plist[0]->get_termfreq_est());
00081     for (size_t i = 1; i < n_kids; ++i) {
00082         result = (result * plist[i]->get_termfreq_est()) / db_size;
00083     }
00084     return static_cast<Xapian::doccount>(result + 0.5);
00085 }
00086 
00087 Xapian::weight
00088 MultiAndPostList::get_maxweight() const
00089 {
00090     return max_total;
00091 }
00092 
00093 Xapian::docid
00094 MultiAndPostList::get_docid() const
00095 {
00096     return did;
00097 }
00098 
00099 Xapian::doclength
00100 MultiAndPostList::get_doclength() const
00101 {
00102     Assert(did);
00103     Xapian::doclength doclength = plist[0]->get_doclength();
00104     for (size_t i = 1; i < n_kids; ++i) {
00105         AssertEqDouble(doclength, plist[i]->get_doclength());
00106     }
00107     return doclength;
00108 }
00109 
00110 Xapian::weight
00111 MultiAndPostList::get_weight() const
00112 {
00113     Assert(did);
00114     Xapian::weight result = 0;
00115     for (size_t i = 0; i < n_kids; ++i) {
00116         result += plist[i]->get_weight();
00117     }
00118     return result;
00119 }
00120 
00121 bool
00122 MultiAndPostList::at_end() const
00123 {
00124     return (did == 0);
00125 }
00126 
00127 Xapian::weight
00128 MultiAndPostList::recalc_maxweight()
00129 {
00130     max_total = 0.0;
00131     for (size_t i = 0; i < n_kids; ++i) {
00132         Xapian::weight new_max = plist[i]->recalc_maxweight();
00133         max_wt[i] = new_max;
00134         max_total += new_max;
00135     }
00136     return max_total;
00137 }
00138 
00139 PostList *
00140 MultiAndPostList::find_next_match(Xapian::weight w_min)
00141 {
00142 advanced_plist0:
00143     if (plist[0]->at_end()) {
00144         did = 0;
00145         return NULL;
00146     }
00147     did = plist[0]->get_docid();
00148     for (size_t i = 1; i < n_kids; ++i) {
00149         bool valid;
00150         check_helper(i, did, w_min, valid);
00151         if (!valid) {
00152             next_helper(0, w_min);
00153             goto advanced_plist0;
00154         }
00155         if (plist[i]->at_end()) {
00156             did = 0;
00157             return NULL;
00158         }
00159         Xapian::docid new_did = plist[i]->get_docid();
00160         if (new_did != did) {
00161             skip_to_helper(0, new_did, w_min);
00162             goto advanced_plist0;
00163         }
00164     }
00165     return NULL;
00166 }
00167 
00168 PostList *
00169 MultiAndPostList::next(Xapian::weight w_min)
00170 {
00171     next_helper(0, w_min);
00172     return find_next_match(w_min);
00173 }
00174 
00175 PostList *
00176 MultiAndPostList::skip_to(Xapian::docid did_min, Xapian::weight w_min)
00177 {
00178     skip_to_helper(0, did_min, w_min);
00179     return find_next_match(w_min);
00180 }
00181 
00182 std::string
00183 MultiAndPostList::get_description() const
00184 {
00185     string desc("(");
00186     desc += plist[0]->get_description();
00187     for (size_t i = 1; i < n_kids; ++i) {
00188         desc += " AND ";
00189         desc += plist[i]->get_description();
00190     }
00191     desc += ')';
00192     return desc;
00193 }
00194 
00195 Xapian::termcount
00196 MultiAndPostList::get_wdf() const
00197 {
00198     Xapian::termcount totwdf = 0;
00199     for (size_t i = 0; i < n_kids; ++i) {
00200         totwdf += plist[i]->get_wdf();
00201     }
00202     return totwdf;
00203 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.