00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef OM_HGUARD_STATS_H
00024 #define OM_HGUARD_STATS_H
00025
00026 #include "xapian/types.h"
00027 #include "omassert.h"
00028 #include <string>
00029 #include <map>
00030
00031 #include "autoptr.h"
00032 #include "weightinternal.h"
00033 #include <list>
00034
00035 using namespace std;
00036
00038 class Stats {
00039 public:
00041 Xapian::doccount collection_size;
00042
00044 Xapian::doccount rset_size;
00045
00047 Xapian::doclength average_length;
00048
00050 std::map<string, Xapian::doccount> termfreq;
00051
00053 std::map<string, Xapian::doccount> reltermfreq;
00054
00055
00056 Stats() : collection_size(0),
00057 rset_size(0),
00058 average_length(1.0)
00059 {}
00060
00063 Stats & operator +=(const Stats & inc);
00064
00070 Xapian::doccount get_termfreq(const string & tname) const;
00071
00074 void set_termfreq(const string & tname, Xapian::doccount tfreq);
00075
00081 Xapian::doccount get_reltermfreq(const string & tname) const;
00082
00085 void set_reltermfreq(const string & tname, Xapian::doccount rtfreq);
00086
00088 string get_description() const;
00089
00090
00103 mutable list<Xapian::Weight::Internal *> weight_internals;
00104
00109 ~Stats() {
00110 list<Xapian::Weight::Internal *>::const_iterator i;
00111 for (i = weight_internals.begin(); i != weight_internals.end(); ++i)
00112 {
00113 delete *i;
00114 }
00115 }
00116
00128 Xapian::Weight::Internal * create_weight_internal() const
00129 {
00130 AutoPtr<Xapian::Weight::Internal> wti(new Xapian::Weight::Internal(*this));
00131 weight_internals.push_back(wti.get());
00132 return wti.release();
00133 }
00134
00146 Xapian::Weight::Internal * create_weight_internal(const string & tname) const
00147 {
00148 AutoPtr<Xapian::Weight::Internal> wti(new Xapian::Weight::Internal(*this, tname));
00149 weight_internals.push_back(wti.get());
00150 return wti.release();
00151 }
00152 };
00153
00155
00157
00158 inline Stats &
00159 Stats::operator +=(const Stats & inc)
00160 {
00161
00162 Xapian::doccount new_collection_size = collection_size + inc.collection_size;
00163 if (new_collection_size != 0) {
00164
00165
00166 average_length = (average_length * collection_size +
00167 inc.average_length * inc.collection_size) /
00168 new_collection_size;
00169 }
00170 collection_size = new_collection_size;
00171
00172
00173 rset_size += inc.rset_size;
00174
00175
00176 std::map<string, Xapian::doccount>::const_iterator i;
00177 for (i = inc.termfreq.begin(); i != inc.termfreq.end(); ++i) {
00178 termfreq[i->first] += i->second;
00179 }
00180 for (i = inc.reltermfreq.begin(); i != inc.reltermfreq.end(); ++i) {
00181 reltermfreq[i->first] += i->second;
00182 }
00183 return *this;
00184 }
00185
00186 inline Xapian::doccount
00187 Stats::get_termfreq(const string & tname) const
00188 {
00189
00190 if (tname.empty()) return 0;
00191
00192 std::map<string, Xapian::doccount>::const_iterator tfreq;
00193 tfreq = termfreq.find(tname);
00194 Assert(tfreq != termfreq.end());
00195 return tfreq->second;
00196 }
00197
00198 inline void
00199 Stats::set_termfreq(const string & tname, Xapian::doccount tfreq)
00200 {
00201
00202
00203 Assert(termfreq.find(tname) == termfreq.end() ||
00204 termfreq.find(tname)->second == tfreq);
00205 termfreq[tname] = tfreq;
00206 }
00207
00208 inline Xapian::doccount
00209 Stats::get_reltermfreq(const string & tname) const
00210 {
00211
00212 if (tname.empty()) return 0;
00213
00214 std::map<string, Xapian::doccount>::const_iterator rtfreq;
00215 rtfreq = reltermfreq.find(tname);
00216 Assert(rtfreq != reltermfreq.end());
00217 return rtfreq->second;
00218 }
00219
00220 inline void
00221 Stats::set_reltermfreq(const string & tname, Xapian::doccount rtfreq)
00222 {
00223
00224
00225 Assert(reltermfreq.find(tname) == reltermfreq.end() ||
00226 reltermfreq.find(tname)->second == rtfreq);
00227 reltermfreq[tname] = rtfreq;
00228 }
00229
00230 #endif