matcher/rset.cc

Go to the documentation of this file.
00001 /* rset.cc
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2003,2007 Olly Betts
00006  * Copyright 2007 Lemur Consulting Ltd
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #include <config.h>
00025 
00026 #include "database.h"
00027 #include "rset.h"
00028 #include "stats.h"
00029 #include "omdebug.h"
00030 
00031 #include "autoptr.h"
00032 #include "termlist.h"
00033 
00034 void
00035 RSetI::calculate_stats()
00036 {
00037     DEBUGCALL(MATCH, void, "RSetI::calculate_stats", "");
00038     Assert(!calculated_reltermfreqs);
00039     std::set<Xapian::docid>::const_iterator doc;
00040     for (doc = documents.begin(); doc != documents.end(); doc++) {
00041         DEBUGLINE(WTCALC, "Counting reltermfreqs in document " << *doc << " [ ");
00042         if (dbroot) {
00043             AutoPtr<TermList> tl =
00044                 AutoPtr<TermList>(dbroot->open_term_list(*doc));
00045             tl->next();
00046             while (!tl->at_end()) {
00047                 // FIXME - can this lookup be done faster?
00048                 // Store termnames in a hash for each document, rather than
00049                 // a list?
00050                 string tname = tl->get_termname();
00051                 if (reltermfreqs.find(tname) != reltermfreqs.end()) {
00052                     reltermfreqs[tname] ++;
00053                     DEBUGLINE(WTCALC, tname << " now has reltermfreq of " << reltermfreqs[tname]);
00054                 }
00055                 tl->next();
00056             }
00057         } else {
00058             Xapian::TermIterator tl = root.termlist_begin(*doc);
00059             Xapian::TermIterator tlend = root.termlist_end(*doc);
00060             while (tl != tlend) {
00061                 // FIXME - can this lookup be done faster?
00062                 // Store termnames in a hash for each document, rather than
00063                 // a list?
00064                 string tname = *tl;
00065                 if (reltermfreqs.find(tname) != reltermfreqs.end()) {
00066                     reltermfreqs[tname] ++;
00067                     DEBUGLINE(WTCALC, tname << " now has reltermfreq of " << reltermfreqs[tname]);
00068                 }
00069                 tl++;
00070             }
00071         }
00072         DEBUGLINE(WTCALC, "] ");
00073     }
00074     calculated_reltermfreqs = true;
00075 }
00076 
00077 void
00078 RSetI::contribute_stats(Stats & stats)
00079 {
00080     DEBUGCALL(MATCH, void, "RSetI::contribute_stats", stats);
00081     calculate_stats();
00082 
00083     std::map<string, Xapian::doccount>::const_iterator i;
00084     for (i = reltermfreqs.begin(); i != reltermfreqs.end(); i++) {
00085         stats.set_reltermfreq(i->first, i->second);
00086     }
00087     stats.rset_size += get_rsize();
00088 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.