backends/multi/multi_alltermslist.cc

Go to the documentation of this file.
00001 
00004 /* Copyright (C) 2007,2008 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00019  */
00020 
00021 #include <config.h>
00022 
00023 #include "multialltermslist.h"
00024 
00025 #include <xapian/error.h>
00026 
00027 #include "omassert.h"
00028 
00029 #include <algorithm>
00030 
00031 using namespace std;
00032 
00034 struct CompareTermListsByTerm {
00036     bool operator()(const TermList *a, const TermList *b) {
00037         return a->get_termname() > b->get_termname();
00038     }
00039 };
00040 
00041 template<class CLASS> struct delete_ptr {
00042     void operator()(CLASS *p) { delete p; }
00043 };
00044 
00045 MultiAllTermsList::MultiAllTermsList(const vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> > & dbs,
00046                                      const string & prefix)
00047 {
00048     // The 0 and 1 cases should be handled by our caller.
00049     AssertRel(dbs.size(), >=, 2);
00050     termlists.reserve(dbs.size());
00051     try {
00052         vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> >::const_iterator i;
00053         for (i = dbs.begin(); i != dbs.end(); ++i) {
00054             termlists.push_back((*i)->open_allterms(prefix));
00055         }
00056     } catch (...) {
00057         for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
00058         throw;
00059     }
00060 }
00061 
00062 MultiAllTermsList::~MultiAllTermsList()
00063 {
00064     for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
00065 }
00066 
00067 string
00068 MultiAllTermsList::get_termname() const
00069 {
00070     return current_term;
00071 }
00072 
00073 Xapian::doccount
00074 MultiAllTermsList::get_termfreq() const
00075 {
00076     if (termlists.empty()) return 0;
00077     vector<TermList *>::const_iterator i = termlists.begin();
00078     Xapian::doccount total_tf = (*i)->get_termfreq();
00079     while (++i != termlists.end()) {
00080         if ((*i)->get_termname() == current_term)
00081             total_tf += (*i)->get_termfreq();
00082     }
00083     return total_tf;
00084 }
00085 
00086 Xapian::termcount
00087 MultiAllTermsList::get_collection_freq() const
00088 {
00089     if (termlists.empty()) return 0;
00090     vector<TermList *>::const_iterator i = termlists.begin();
00091     Xapian::termcount total_cf = (*i)->get_collection_freq();
00092     while (++i != termlists.end()) {
00093         if ((*i)->get_termname() == current_term)
00094             total_cf += (*i)->get_collection_freq();
00095     }
00096     return total_cf;
00097 }
00098 
00099 TermList *
00100 MultiAllTermsList::next()
00101 {
00102     if (current_term.empty()) {
00103         // Make termlists into a heap so that the one (or one of the ones) with
00104         // earliest sorting term is at the top of the heap.
00105         vector<TermList*>::iterator i = termlists.begin();
00106         while (i != termlists.end()) {
00107             (*i)->next();
00108             if ((*i)->at_end()) {
00109                 i = termlists.erase(i);
00110             } else {
00111                 ++i;
00112             }
00113         }
00114         make_heap(termlists.begin(), termlists.end(),
00115                   CompareTermListsByTerm());
00116     } else {
00117         // Advance to the next termname.
00118         do {
00119             TermList * tl = termlists.front();
00120             pop_heap(termlists.begin(), termlists.end(),
00121                      CompareTermListsByTerm());
00122             tl->next();
00123             if (tl->at_end()) {
00124                 delete tl;
00125                 termlists.pop_back();
00126             } else {
00127                 termlists.back() = tl;
00128                 push_heap(termlists.begin(), termlists.end(),
00129                           CompareTermListsByTerm());
00130             }
00131         } while (!termlists.empty() &&
00132                  termlists.front()->get_termname() == current_term);
00133     }
00134 
00135     if (termlists.size() <= 1) {
00136         if (termlists.empty()) return NULL;
00137         TermList * tl = termlists[0];
00138         termlists.clear();
00139         return tl;
00140     }
00141 
00142     current_term = termlists.front()->get_termname();
00143     return NULL;
00144 }
00145 
00146 TermList *
00147 MultiAllTermsList::skip_to(const std::string &term)
00148 {
00149     // Assume the skip is likely to be a long distance, and rebuild the heap
00150     // from scratch.  FIXME: It would be useful to profile this against an
00151     // approach more like that next() uses if this ever gets heavy use.
00152     vector<TermList*>::iterator i = termlists.begin();
00153     while (i != termlists.end()) {
00154         (*i)->skip_to(term);
00155         if ((*i)->at_end()) {
00156             i = termlists.erase(i);
00157         } else {
00158             ++i;
00159         }
00160     }
00161 
00162     if (termlists.size() <= 1) {
00163         if (termlists.empty()) return NULL;
00164         TermList * tl = termlists[0];
00165         termlists.clear();
00166         return tl;
00167     }
00168 
00169     make_heap(termlists.begin(), termlists.end(), CompareTermListsByTerm());
00170     
00171     current_term = termlists.front()->get_termname();
00172     return NULL;
00173 }
00174 
00175 bool
00176 MultiAllTermsList::at_end() const
00177 {
00178     return termlists.empty();
00179 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.