00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include <math.h>
00026
00027 #include "expandweight.h"
00028 #include "termlist.h"
00029
00030 #include "omassert.h"
00031 #include "omdebug.h"
00032
00033 using namespace std;
00034
00035 namespace Xapian {
00036 namespace Internal {
00037
00038 ExpandWeight::ExpandWeight(const Xapian::Database &db_,
00039 Xapian::doccount rsize_,
00040 bool use_exact_termfreq_,
00041 double expand_k_)
00042 : db(db_),
00043 rsize(rsize_),
00044 use_exact_termfreq(use_exact_termfreq_),
00045 expand_k(expand_k_)
00046 {
00047 DEBUGCALL(MATCH, void, "ExpandWeight", db_ << ", " << rsize_ << ", " << use_exact_termfreq_ << ", " << expand_k_);
00048 }
00049
00050 Xapian::weight
00051 ExpandWeight::get_weight(TermList * merger, const std::string &tname) const
00052 {
00053 DEBUGCALL(MATCH, Xapian::weight, "ExpandWeight::get_weight", "[merger], " << tname);
00054 ExpandStats stats(db.get_avlength(), expand_k);
00055 merger->accumulate_stats(stats);
00056 double termfreq = double(stats.termfreq);
00057 const double rtermfreq = stats.rtermfreq;
00058
00059 Xapian::doccount dbsize = db.get_doccount();
00060 if (stats.dbsize != dbsize) {
00061 if (!use_exact_termfreq) {
00062 termfreq *= double(dbsize) / stats.dbsize;
00063 DEBUGLINE(EXPAND, "Approximating termfreq of `" << tname << "': " <<
00064 stats.termfreq << " * " << dbsize << " / " <<
00065 stats.dbsize << " = " << termfreq << " (true value is:" <<
00066 db.get_termfreq(tname) << ")");
00067
00068
00069
00070
00071
00072 if (termfreq < rtermfreq) {
00073 termfreq = rtermfreq;
00074 } else {
00075 const double upper_bound = dbsize - rsize + rtermfreq;
00076 if (termfreq > upper_bound) termfreq = upper_bound;
00077 }
00078 } else {
00079 termfreq = db.get_termfreq(tname);
00080 DEBUGLINE(EXPAND, "Asked database for termfreq of `" << tname <<
00081 "': " << termfreq);
00082 }
00083 }
00084
00085 DEBUGMSG(EXPAND, "ExpandWeight::get_weight: "
00086 "N=" << dbsize << ", "
00087 "n=" << termfreq << ", "
00088 "R=" << rsize << ", "
00089 "r=" << rtermfreq << ", "
00090 "mult=" << stats.multiplier);
00091
00092 Xapian::weight tw;
00093 tw = (rtermfreq + 0.5) * (dbsize - rsize - termfreq + rtermfreq + 0.5) /
00094 ((rsize - rtermfreq + 0.5) * (termfreq - rtermfreq + 0.5));
00095 Assert(tw > 0);
00096
00097
00098
00099
00100
00101 if (tw < 2) {
00102 tw = tw / 2 + 1;
00103 }
00104 tw = log(tw);
00105
00106 DEBUGLINE(EXPAND, " => Term weight = " << tw <<
00107 " Expand weight = " << stats.multiplier * tw);
00108
00109
00110 RETURN(stats.multiplier * tw);
00111 }
00112
00113 }
00114 }