common/expandweight.h

Go to the documentation of this file.
00001 
00004 /* Copyright (C) 2007 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00019  */
00020 
00021 #ifndef XAPIAN_INCLUDED_EXPANDWEIGHT_H
00022 #define XAPIAN_INCLUDED_EXPANDWEIGHT_H
00023 
00024 #include <xapian/database.h>
00025 
00026 #include "termlist.h"
00027 
00028 #include <string>
00029 #include <vector>
00030 
00031 namespace Xapian {
00032 namespace Internal {
00033 
00035 class ExpandStats {
00036     // Which databases in a multidb are included in termfreq.
00037     std::vector<bool> dbs_seen;
00038 
00039     // Average document length in the whole database.
00040     Xapian::doclength avlen;
00041 
00043     double expand_k;
00044 
00045   public:
00047     Xapian::doccount dbsize;
00048 
00050     Xapian::doccount termfreq;
00051 
00053     Xapian::weight multiplier;
00054 
00056     Xapian::doccount rtermfreq;
00057 
00059     size_t db_index;
00060 
00061     ExpandStats(Xapian::doclength avlen_, double expand_k_)
00062         : avlen(avlen_), expand_k(expand_k_),
00063           dbsize(0), termfreq(0), multiplier(0), rtermfreq(0), db_index(0) {
00064     }
00065 
00066     void accumulate(Xapian::termcount wdf, Xapian::doclength doclen, Xapian::doccount subtf, Xapian::doccount subdbsize) {
00067         // Boolean terms may have wdf == 0, but treat that as 1 so such terms
00068         // get a non-zero weight.
00069         if (wdf == 0) wdf = 1;
00070 
00071         multiplier += (expand_k + 1) * wdf / (expand_k * doclen / avlen + wdf);
00072         ++rtermfreq;
00073 
00074         // If we've not seen this sub-database before, then update dbsize and
00075         // termfreq and note that we have seen it.
00076         if (db_index >= dbs_seen.size() || !dbs_seen[db_index]) {
00077             dbsize += subdbsize;
00078             termfreq += subtf;
00079             if (db_index >= dbs_seen.size()) dbs_seen.resize(db_index + 1);
00080             dbs_seen[db_index] = true;
00081         }
00082     }
00083 };
00084 
00086 class ExpandWeight {
00088     const Xapian::Database db;
00089 
00091     Xapian::doccount rsize;
00092 
00103     bool use_exact_termfreq;
00104 
00106     double expand_k;
00107 
00108 public:
00109     ExpandWeight(const Xapian::Database &db_,
00110                  Xapian::doccount rsize_,
00111                  bool use_exact_termfreq_,
00112                  double expand_k_);
00113 
00114     Xapian::weight get_weight(TermList * merger,
00115                               const std::string & tname) const;
00116 };
00117 
00118 }
00119 }
00120 
00121 #endif // XAPIAN_INCLUDED_EXPANDWEIGHT_H

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.