00001
00021 #include <config.h>
00022
00023 #include "exactphrasepostlist.h"
00024 #include "positionlist.h"
00025 #include "omdebug.h"
00026
00027 #include <algorithm>
00028 #include <vector>
00029
00030 using namespace std;
00031
00032 ExactPhrasePostList::ExactPhrasePostList(PostList *source_,
00033 const vector<PostList*> &terms_)
00034 : SelectPostList(source_), terms(terms_)
00035 {
00036 size_t n = terms_.size();
00037 poslists = new PositionList*[n];
00038 try {
00039 order = new unsigned[n];
00040 } catch (...) {
00041 delete [] poslists;
00042 throw;
00043 }
00044 for (size_t i = 0; i < n; ++i) order[i] = i;
00045 }
00046
00047 ExactPhrasePostList::~ExactPhrasePostList()
00048 {
00049 delete [] poslists;
00050 delete [] order;
00051 }
00052
00053 void
00054 ExactPhrasePostList::start_position_list(unsigned i)
00055 {
00056 unsigned index = order[i];
00057 poslists[i] = terms[index]->read_position_list();
00058 poslists[i]->index = index;
00059 }
00060
00061 class TermCompare {
00062 vector<PostList *> & terms;
00063
00064 public:
00065 TermCompare(vector<PostList *> & terms_) : terms(terms_) { }
00066
00067 bool operator()(unsigned a, unsigned b) const {
00068 return terms[a]->get_wdf() < terms[b]->get_wdf();
00069 }
00070 };
00071
00072 bool
00073 ExactPhrasePostList::test_doc()
00074 {
00075 DEBUGCALL(MATCH, bool, "ExactPhrasePostList::test_doc", "");
00076
00077 if (terms.size() <= 1) RETURN(true);
00078
00079
00080
00081
00082
00083 sort(order, order + terms.size(), TermCompare(terms));
00084
00085
00086
00087
00088
00089 start_position_list(0);
00090 poslists[0]->skip_to(poslists[0]->index);
00091 if (poslists[0]->at_end()) RETURN(false);
00092
00093
00094
00095
00096 start_position_list(1);
00097 if (poslists[0]->get_size() < poslists[1]->get_size()) {
00098 poslists[1]->skip_to(poslists[1]->index);
00099 if (poslists[1]->at_end()) RETURN(false);
00100 swap(poslists[0], poslists[1]);
00101 }
00102
00103 unsigned read_hwm = 1;
00104 Xapian::termpos idx0 = poslists[0]->index;
00105 do {
00106 Xapian::termpos base = poslists[0]->get_position() - idx0;
00107 unsigned i = 1;
00108 while (true) {
00109 if (i > read_hwm) {
00110 read_hwm = i;
00111 start_position_list(i);
00112
00113
00114
00115 }
00116 Xapian::termpos required = base + poslists[i]->index;
00117 poslists[i]->skip_to(required);
00118 if (poslists[i]->at_end()) RETURN(false);
00119 if (poslists[i]->get_position() != required) break;
00120 if (++i == terms.size()) RETURN(true);
00121 }
00122 poslists[0]->next();
00123 } while (!poslists[0]->at_end());
00124 RETURN(false);
00125 }
00126
00127 Xapian::termcount
00128 ExactPhrasePostList::get_wdf() const
00129 {
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140 std::vector<PostList *>::const_iterator i = terms.begin();
00141 Xapian::termcount wdf = (*i)->get_wdf();
00142 for (; i != terms.end(); i++) {
00143 wdf = std::min(wdf, (*i)->get_wdf());
00144 }
00145 return wdf;
00146 }
00147
00148 Xapian::doccount
00149 ExactPhrasePostList::get_termfreq_est() const
00150 {
00151
00152
00153
00154 return source->get_termfreq_est() / 4;
00155 }
00156
00157 string
00158 ExactPhrasePostList::get_description() const
00159 {
00160 return "(ExactPhrase " + source->get_description() + ")";
00161 }