00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <config.h>
00025
00026 #include <math.h>
00027
00028 #include <xapian/enquire.h>
00029
00030 #include "omassert.h"
00031 #include "omdebug.h"
00032 #include "serialise-double.h"
00033 #include "weightinternal.h"
00034
00035 using namespace std;
00036
00037 namespace Xapian {
00038
00039 TradWeight * TradWeight::clone() const {
00040 return new TradWeight(param_k);
00041 }
00042
00043 string TradWeight::name() const { return "Trad"; }
00044
00045 string TradWeight::serialise() const {
00046 return serialise_double(param_k);
00047 }
00048
00049 TradWeight * TradWeight::unserialise(const string & s) const {
00050 const char *p = s.data();
00051 const char *p_end = p + s.size();
00052 double param_k_ = unserialise_double(&p, p_end);
00053
00054 return new TradWeight(param_k_);
00055 }
00056
00057
00058 void
00059 TradWeight::calc_termweight() const
00060 {
00061 DEBUGCALL(MATCH, void, "TradWeight::calc_termweight", "");
00062
00063 lenpart = internal->average_length;
00064
00065 if (lenpart != 0) lenpart = param_k / lenpart;
00066
00067 Xapian::doccount termfreq = internal->termfreq;
00068
00069 DEBUGLINE(WTCALC, "Statistics: N=" << internal->collection_size <<
00070 " n_t=" << termfreq << " lenpart=" << lenpart);
00071
00072 Xapian::weight tw = 0;
00073 if (internal->rset_size != 0) {
00074 Xapian::doccount rtermfreq = internal->reltermfreq;
00075
00076 DEBUGLINE(WTCALC, " R=" << internal->rset_size << " r_t=" << rtermfreq);
00077
00078
00079
00080
00081
00082
00083 Assert(termfreq >= rtermfreq);
00084 Assert(termfreq <= internal->collection_size - internal->rset_size + rtermfreq);
00085
00086 tw = ((rtermfreq + 0.5) *
00087 (internal->collection_size - internal->rset_size - termfreq + rtermfreq + 0.5)) /
00088 ((internal->rset_size - rtermfreq + 0.5) *
00089 (termfreq - rtermfreq + 0.5));
00090 } else {
00091 tw = (internal->collection_size - termfreq + 0.5) / (termfreq + 0.5);
00092 }
00093
00094 Assert(tw > 0);
00095
00096
00097
00098
00099
00100 if (tw < 2) {
00101 tw = tw / 2 + 1;
00102 }
00103 tw = log(tw);
00104
00105 DEBUGLINE(WTCALC, " => termweight = " << tw);
00106 termweight = tw;
00107 weight_calculated = true;
00108 }
00109
00110 Xapian::weight
00111 TradWeight::get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const
00112 {
00113 DEBUGCALL(MATCH, Xapian::weight, "TradWeight::get_sumpart", wdf << ", " << len);
00114 if (!weight_calculated) calc_termweight();
00115
00116 Xapian::weight wt = double(wdf) / (len * lenpart + wdf);
00117
00118 wt *= termweight;
00119
00120 RETURN(wt);
00121 }
00122
00123 Xapian::weight
00124 TradWeight::get_maxpart() const
00125 {
00126 DEBUGCALL(MATCH, Xapian::weight, "TradWeight::get_maxpart", "");
00127 if (!weight_calculated) calc_termweight();
00128
00129 RETURN(termweight);
00130 }
00131
00132 Xapian::weight
00133 TradWeight::get_sumextra(Xapian::doclength ) const
00134 {
00135 DEBUGCALL(MATCH, Xapian::weight, "TradWeight::get_sumextra", "/*len*/");
00136 RETURN(0);
00137 }
00138
00139 Xapian::weight
00140 TradWeight::get_maxextra() const
00141 {
00142 DEBUGCALL(MATCH, Xapian::weight, "TradWeight::get_maxextra", "");
00143 RETURN(0);
00144 }
00145
00146 bool TradWeight::get_sumpart_needs_doclength() const {
00147 if (!weight_calculated) calc_termweight();
00148 return (lenpart != 0);
00149 }
00150
00151 }