matcher/localmatch.cc

Go to the documentation of this file.
00001 /* localmatch.cc
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts
00006  * Copyright 2007 Lemur Consulting Ltd
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #include <config.h>
00025 
00026 #include "localmatch.h"
00027 
00028 #include "autoptr.h"
00029 #include "extraweightpostlist.h"
00030 #include "leafpostlist.h"
00031 #include "omdebug.h"
00032 #include "omqueryinternal.h"
00033 #include "queryoptimiser.h"
00034 #include "scaleweight.h"
00035 #include "weightinternal.h"
00036 #include "stats.h"
00037 
00038 #include <cfloat>
00039 #include <cmath>
00040 #include <map>
00041 
00042 LocalSubMatch::LocalSubMatch(const Xapian::Database::Internal *db_,
00043         const Xapian::Query::Internal * query, Xapian::termcount qlen_,
00044         const Xapian::RSet & omrset,
00045         const Xapian::Weight *wt_factory_)
00046         : orig_query(*query), qlen(qlen_), db(db_),
00047           rset(db, omrset), wt_factory(wt_factory_)
00048 {
00049     DEBUGCALL(MATCH, void, "LocalSubMatch::LocalSubMatch",
00050               db << ", " << query << ", " << qlen_ << ", " << omrset << ", " <<
00051               ", [wt_factory]");
00052 }
00053 
00054 bool
00055 LocalSubMatch::prepare_match(bool /*nowait*/, Stats & total_stats)
00056 {
00057     DEBUGCALL(MATCH, bool, "LocalSubMatch::prepare_match", "/*nowait*/");
00058     Stats my_stats;
00059 
00060     // Set the collection statistics.
00061     my_stats.collection_size = db->get_doccount();
00062     my_stats.average_length = db->get_avlength();
00063 
00064     // Get the term-frequencies and relevant term-frequencies.
00065     Xapian::TermIterator titer = orig_query.get_terms();
00066     Xapian::TermIterator terms_end(NULL);
00067     for ( ; titer != terms_end; ++titer) {
00068         if ((*titer).empty()) {
00069             my_stats.set_termfreq(*titer, db->get_doccount());
00070         } else {
00071             my_stats.set_termfreq(*titer, db->get_termfreq(*titer));
00072         }
00073         rset.will_want_reltermfreq(*titer);
00074     }
00075     rset.contribute_stats(my_stats);
00076 
00077     // Contribute the calculated statistics.
00078     total_stats += my_stats;
00079     RETURN(true);
00080 }
00081 
00082 void
00083 LocalSubMatch::start_match(Xapian::doccount, Xapian::doccount,
00084                            Xapian::doccount, const Stats & total_stats)
00085 {
00086     // Set the statistics for the whole collection.
00087     stats = &total_stats;
00088 }
00089 
00090 PostList *
00091 LocalSubMatch::get_postlist_and_term_info(MultiMatch * matcher,
00092         map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts)
00093 {
00094     DEBUGCALL(MATCH, PostList *, "LocalSubMatch::get_postlist_and_term_info",
00095               matcher << ", [termfreqandwts]");
00096 
00097     // Build the postlist tree for the query.  This calls
00098     // LocalSubMatch::postlist_from_op_leaf_query() for each term in the query,
00099     // which builds term_info as a side effect.
00100     QueryOptimiser opt(*db, *this, matcher);
00101     PostList * pl = opt.optimise_query(&orig_query);
00102     if (termfreqandwts) *termfreqandwts = term_info;
00103 
00104     // We only need an ExtraWeightPostList if there's an extra weight
00105     // contribution.
00106     AutoPtr<Xapian::Weight> extra_wt;
00107     // FIXME:1.1: create the Xapian::Weight::Internal directly.
00108     extra_wt = wt_factory->create(stats->create_weight_internal(), qlen, 1, "");
00109     if (extra_wt->get_maxextra() != 0.0) {
00110         pl = new ExtraWeightPostList(pl, extra_wt.release(), matcher);
00111     }
00112 
00113     RETURN(pl);
00114 }
00115 
00116 PostList *
00117 LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *query,
00118                                            double factor)
00119 {
00120     DEBUGCALL(MATCH, PostList *, "LocalSubMatch::postlist_from_op_leaf_query",
00121               query << ", " << factor);
00122     Assert(query);
00123     AssertEq(query->op, Xapian::Query::Internal::OP_LEAF);
00124     Assert(query->subqs.empty());
00125     bool boolean = (factor == 0.0);
00126     AutoPtr<Xapian::Weight> wt;
00127     if (!boolean) {
00128         // FIXME:
00129         // pass factor to Weight::create() - and have a shim class for classes
00130         // which don't understand it...
00131         // FIXME:1.1: create the Xapian::Weight::Internal directly.
00132         wt = wt_factory->create(stats->create_weight_internal(query->tname),
00133                                 qlen, query->wqf, query->tname);
00134         if (fabs(factor - 1.0) > DBL_EPSILON) {
00135             wt = new ScaleWeight(wt.release(), factor);
00136         }
00137     }
00138 
00139     map<string, Xapian::MSet::Internal::TermFreqAndWeight>::iterator i;
00140     i = term_info.find(query->tname);
00141     if (i == term_info.end()) {
00142         Xapian::doccount tf = stats->get_termfreq(query->tname);
00143         Xapian::weight weight = boolean ? 0 : wt->get_maxpart();
00144         Xapian::MSet::Internal::TermFreqAndWeight info(tf, weight);
00145         term_info.insert(make_pair(query->tname, info));
00146     } else if (!boolean) {
00147         i->second.termweight += wt->get_maxpart();
00148     }
00149 
00150     LeafPostList * pl = db->open_post_list(query->tname);
00151     // The default for LeafPostList is to return 0 weight and maxweight which
00152     // is the same as boolean weighting.
00153     if (!boolean) pl->set_termweight(wt.release());
00154     RETURN(pl);
00155 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.