common/stats.h

Go to the documentation of this file.
00001 /* stats.h: Handling of statistics needed for the search.
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002,2003,2005,2007 Olly Betts
00005  * Copyright 2007 Lemur Consulting Ltd
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License as
00009  * published by the Free Software Foundation; either version 2 of the
00010  * License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00020  * USA
00021  */
00022 
00023 #ifndef OM_HGUARD_STATS_H
00024 #define OM_HGUARD_STATS_H
00025 
00026 #include "xapian/types.h"
00027 #include "omassert.h"
00028 #include <string>
00029 #include <map>
00030 
00031 #include "autoptr.h" // FIXME:1.1: remove this
00032 #include "weightinternal.h" // FIXME:1.1: remove this
00033 #include <list> // FIXME:1.1: remove this
00034 
00035 using namespace std;
00036 
00038 class Stats {
00039     public:
00041         Xapian::doccount collection_size;
00042 
00044         Xapian::doccount rset_size;
00045 
00047         Xapian::doclength average_length;
00048 
00050         std::map<string, Xapian::doccount> termfreq;
00051 
00053         std::map<string, Xapian::doccount> reltermfreq;
00054 
00055 
00056         Stats() : collection_size(0),
00057                   rset_size(0),
00058                   average_length(1.0)
00059         {}
00060 
00063         Stats & operator +=(const Stats & inc);
00064 
00070         Xapian::doccount get_termfreq(const string & tname) const;
00071 
00074         void set_termfreq(const string & tname, Xapian::doccount tfreq);
00075 
00081         Xapian::doccount get_reltermfreq(const string & tname) const;
00082 
00085         void set_reltermfreq(const string & tname, Xapian::doccount rtfreq);
00086 
00088         string get_description() const;
00089 
00090 
00103         mutable list<Xapian::Weight::Internal *> weight_internals;
00104 
00109         ~Stats() {
00110             list<Xapian::Weight::Internal *>::const_iterator i;
00111             for (i = weight_internals.begin(); i != weight_internals.end(); ++i)
00112             {
00113                 delete *i;
00114             }
00115         }
00116 
00128         Xapian::Weight::Internal * create_weight_internal() const
00129         {
00130             AutoPtr<Xapian::Weight::Internal> wti(new Xapian::Weight::Internal(*this));
00131             weight_internals.push_back(wti.get());
00132             return wti.release();
00133         }
00134 
00146         Xapian::Weight::Internal * create_weight_internal(const string & tname) const
00147         {
00148             AutoPtr<Xapian::Weight::Internal> wti(new Xapian::Weight::Internal(*this, tname));
00149             weight_internals.push_back(wti.get());
00150             return wti.release();
00151         }
00152 };
00153 
00155 // Inline method definitions for Stats //
00157 
00158 inline Stats &
00159 Stats::operator +=(const Stats & inc)
00160 {
00161     // Set the new collection size and average length.
00162     Xapian::doccount new_collection_size = collection_size + inc.collection_size;
00163     if (new_collection_size != 0) {
00164         // Cope with adding in a collection of zero size at the beginning:
00165         // perhaps we have multiple databases, but some are not yet populated
00166         average_length = (average_length * collection_size +
00167                           inc.average_length * inc.collection_size) /
00168                          new_collection_size;
00169     }
00170     collection_size = new_collection_size;
00171 
00172     // Add the rset size.
00173     rset_size += inc.rset_size;
00174 
00175     // Add termfreqs and reltermfreqs
00176     std::map<string, Xapian::doccount>::const_iterator i;
00177     for (i = inc.termfreq.begin(); i != inc.termfreq.end(); ++i) {
00178         termfreq[i->first] += i->second;
00179     }
00180     for (i = inc.reltermfreq.begin(); i != inc.reltermfreq.end(); ++i) {
00181         reltermfreq[i->first] += i->second;
00182     }
00183     return *this;
00184 }
00185 
00186 inline Xapian::doccount
00187 Stats::get_termfreq(const string & tname) const
00188 {
00189     // We pass an empty string for tname when calculating the extra weight.
00190     if (tname.empty()) return 0;
00191 
00192     std::map<string, Xapian::doccount>::const_iterator tfreq;
00193     tfreq = termfreq.find(tname);
00194     Assert(tfreq != termfreq.end());
00195     return tfreq->second;
00196 }
00197 
00198 inline void
00199 Stats::set_termfreq(const string & tname, Xapian::doccount tfreq)
00200 {
00201     // Can be called a second time, if a term occurs multiple times in the
00202     // query; if this happens, the termfreq should be the same each time.
00203     Assert(termfreq.find(tname) == termfreq.end() ||
00204            termfreq.find(tname)->second == tfreq);
00205     termfreq[tname] = tfreq;
00206 }
00207 
00208 inline Xapian::doccount
00209 Stats::get_reltermfreq(const string & tname) const
00210 {
00211     // We pass an empty string for tname when calculating the extra weight.
00212     if (tname.empty()) return 0;
00213 
00214     std::map<string, Xapian::doccount>::const_iterator rtfreq;
00215     rtfreq = reltermfreq.find(tname);
00216     Assert(rtfreq != reltermfreq.end());
00217     return rtfreq->second;
00218 }
00219 
00220 inline void
00221 Stats::set_reltermfreq(const string & tname, Xapian::doccount rtfreq)
00222 {
00223     // Can be called a second time, if a term occurs multiple times in the
00224     // query; if this happens, the termfreq should be the same each time.
00225     Assert(reltermfreq.find(tname) == reltermfreq.end() ||
00226            reltermfreq.find(tname)->second == rtfreq);
00227     reltermfreq[tname] = rtfreq;
00228 }
00229 
00230 #endif /* OM_HGUARD_STATS_H */

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.