00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include "multialltermslist.h"
00024
00025 #include <xapian/error.h>
00026
00027 #include "omassert.h"
00028
00029 #include <algorithm>
00030
00031 using namespace std;
00032
00034 struct CompareTermListsByTerm {
00036 bool operator()(const TermList *a, const TermList *b) {
00037 return a->get_termname() > b->get_termname();
00038 }
00039 };
00040
00041 template<class CLASS> struct delete_ptr {
00042 void operator()(CLASS *p) { delete p; }
00043 };
00044
00045 MultiAllTermsList::MultiAllTermsList(const vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> > & dbs,
00046 const string & prefix)
00047 {
00048
00049 AssertRel(dbs.size(), >=, 2);
00050 termlists.reserve(dbs.size());
00051 try {
00052 vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> >::const_iterator i;
00053 for (i = dbs.begin(); i != dbs.end(); ++i) {
00054 termlists.push_back((*i)->open_allterms(prefix));
00055 }
00056 } catch (...) {
00057 for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
00058 throw;
00059 }
00060 }
00061
00062 MultiAllTermsList::~MultiAllTermsList()
00063 {
00064 for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
00065 }
00066
00067 string
00068 MultiAllTermsList::get_termname() const
00069 {
00070 return current_term;
00071 }
00072
00073 Xapian::doccount
00074 MultiAllTermsList::get_termfreq() const
00075 {
00076 if (termlists.empty()) return 0;
00077 vector<TermList *>::const_iterator i = termlists.begin();
00078 Xapian::doccount total_tf = (*i)->get_termfreq();
00079 while (++i != termlists.end()) {
00080 if ((*i)->get_termname() == current_term)
00081 total_tf += (*i)->get_termfreq();
00082 }
00083 return total_tf;
00084 }
00085
00086 Xapian::termcount
00087 MultiAllTermsList::get_collection_freq() const
00088 {
00089 if (termlists.empty()) return 0;
00090 vector<TermList *>::const_iterator i = termlists.begin();
00091 Xapian::termcount total_cf = (*i)->get_collection_freq();
00092 while (++i != termlists.end()) {
00093 if ((*i)->get_termname() == current_term)
00094 total_cf += (*i)->get_collection_freq();
00095 }
00096 return total_cf;
00097 }
00098
00099 TermList *
00100 MultiAllTermsList::next()
00101 {
00102 if (current_term.empty()) {
00103
00104
00105 vector<TermList*>::iterator i = termlists.begin();
00106 while (i != termlists.end()) {
00107 (*i)->next();
00108 if ((*i)->at_end()) {
00109 i = termlists.erase(i);
00110 } else {
00111 ++i;
00112 }
00113 }
00114 make_heap(termlists.begin(), termlists.end(),
00115 CompareTermListsByTerm());
00116 } else {
00117
00118 do {
00119 TermList * tl = termlists.front();
00120 pop_heap(termlists.begin(), termlists.end(),
00121 CompareTermListsByTerm());
00122 tl->next();
00123 if (tl->at_end()) {
00124 delete tl;
00125 termlists.pop_back();
00126 } else {
00127 termlists.back() = tl;
00128 push_heap(termlists.begin(), termlists.end(),
00129 CompareTermListsByTerm());
00130 }
00131 } while (!termlists.empty() &&
00132 termlists.front()->get_termname() == current_term);
00133 }
00134
00135 if (termlists.size() <= 1) {
00136 if (termlists.empty()) return NULL;
00137 TermList * tl = termlists[0];
00138 termlists.clear();
00139 return tl;
00140 }
00141
00142 current_term = termlists.front()->get_termname();
00143 return NULL;
00144 }
00145
00146 TermList *
00147 MultiAllTermsList::skip_to(const std::string &term)
00148 {
00149
00150
00151
00152 vector<TermList*>::iterator i = termlists.begin();
00153 while (i != termlists.end()) {
00154 (*i)->skip_to(term);
00155 if ((*i)->at_end()) {
00156 i = termlists.erase(i);
00157 } else {
00158 ++i;
00159 }
00160 }
00161
00162 if (termlists.size() <= 1) {
00163 if (termlists.empty()) return NULL;
00164 TermList * tl = termlists[0];
00165 termlists.clear();
00166 return tl;
00167 }
00168
00169 make_heap(termlists.begin(), termlists.end(), CompareTermListsByTerm());
00170
00171 current_term = termlists.front()->get_termname();
00172 return NULL;
00173 }
00174
00175 bool
00176 MultiAllTermsList::at_end() const
00177 {
00178 return termlists.empty();
00179 }