00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include "phrasepostlist.h"
00026 #include "positionlist.h"
00027 #include "omassert.h"
00028 #include "omdebug.h"
00029 #include "utils.h"
00030
00031 #include <algorithm>
00032
00036 class PositionListCmpLt {
00037 public:
00040 bool operator()(const PositionList *a, const PositionList *b) {
00041 return a->get_size() < b->get_size();
00042 }
00043 };
00044
00045
00048 bool
00049 NearPostList::test_doc()
00050 {
00051 DEBUGCALL(MATCH, bool, "NearPostList::test_doc", "");
00052 std::vector<PositionList *> plists;
00053
00054 std::vector<PostList *>::iterator i;
00055 for (i = terms.begin(); i != terms.end(); i++) {
00056 PositionList * p = (*i)->read_position_list();
00057
00058 if (!p) return false;
00059 plists.push_back(p);
00060 }
00061
00062 std::sort(plists.begin(), plists.end(), PositionListCmpLt());
00063
00064 Xapian::termpos pos;
00065 do {
00066 plists[0]->next();
00067 if (plists[0]->at_end()) RETURN(false);
00068 pos = plists[0]->get_position();
00069 } while (!do_test(plists, 1, pos, pos));
00070
00071 RETURN(true);
00072 }
00073
00074 bool
00075 NearPostList::do_test(std::vector<PositionList *> &plists, Xapian::termcount i,
00076 Xapian::termcount min, Xapian::termcount max)
00077 {
00078 DEBUGCALL(MATCH, bool, "NearPostList::do_test", "[plists], " << i << ", " << min << ", " << max);
00079 DEBUGLINE(MATCH, "docid = " << get_docid() << ", window = " << window);
00080 Xapian::termcount tmp = max + 1;
00081
00082 if (window <= tmp) tmp -= window; else tmp = 0;
00083 plists[i]->skip_to(tmp);
00084 while (!plists[i]->at_end()) {
00085 Xapian::termpos pos = plists[i]->get_position();
00086 DEBUGLINE(MATCH, "[" << i << "]: " << max - window + 1 << " " << min
00087 << " " << pos << " " << max << " " << min + window - 1);
00088 if (pos > min + window - 1) RETURN(false);
00089 if (i + 1 == plists.size()) RETURN(true);
00090 if (pos < min) min = pos;
00091 else if (pos > max) max = pos;
00092 if (do_test(plists, i + 1, min, max)) RETURN(true);
00093 plists[i]->next();
00094 }
00095 RETURN(false);
00096 }
00097
00098 Xapian::termcount
00099 NearPostList::get_wdf() const
00100 {
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141 std::vector<PostList *>::const_iterator i = terms.begin();
00142 Xapian::termcount wdf = (*i)->get_wdf();
00143 for (; i != terms.end(); i++) {
00144 wdf = std::min(wdf, (*i)->get_wdf());
00145 }
00146
00147
00148
00149 return std::max(wdf, 1u);
00150 }
00151
00152 std::string
00153 NearPostList::get_description() const
00154 {
00155 return "(Near " + om_tostring(window) + " " + source->get_description() + ")";
00156 }
00157
00158
00159
00162 bool
00163 PhrasePostList::test_doc()
00164 {
00165 DEBUGCALL(MATCH, bool, "PhrasePostList::test_doc", "");
00166 std::vector<PositionList *> plists;
00167
00168 std::vector<PostList *>::iterator i;
00169 for (i = terms.begin(); i != terms.end(); i++) {
00170 PositionList * p = (*i)->read_position_list();
00171
00172 if (!p) return false;
00173 p->index = i - terms.begin();
00174 plists.push_back(p);
00175 }
00176
00177 std::sort(plists.begin(), plists.end(), PositionListCmpLt());
00178
00179 Xapian::termpos pos;
00180 Xapian::termpos idx, min;
00181 do {
00182 plists[0]->next();
00183 if (plists[0]->at_end()) {
00184 DEBUGLINE(MATCH, "--MISS--");
00185 RETURN(false);
00186 }
00187 pos = plists[0]->get_position();
00188 idx = plists[0]->index;
00189 min = pos + plists.size() - idx;
00190 if (min > window) min -= window; else min = 0;
00191 } while (!do_test(plists, 1, min, pos + window - idx));
00192 DEBUGLINE(MATCH, "**HIT**");
00193 RETURN(true);
00194 }
00195
00196 bool
00197 PhrasePostList::do_test(std::vector<PositionList *> &plists, Xapian::termcount i,
00198 Xapian::termcount min, Xapian::termcount max)
00199 {
00200 DEBUGCALL(MATCH, bool, "PhrasePostList::do_test", "[plists], " << i << ", " << min << ", " << max);
00201 DEBUGLINE(MATCH, "docid = " << get_docid() << ", window = " << window);
00202 Xapian::termpos idxi = plists[i]->index;
00203 DEBUGLINE(MATCH, "my idx in phrase is " << idxi);
00204
00205 Xapian::termpos mymin = min + idxi;
00206 Xapian::termpos mymax = max - plists.size() + idxi;
00207 DEBUGLINE(MATCH, "MIN = " << mymin << " MAX = " << mymax);
00208
00209
00210 for (Xapian::termcount j = 0; j < i; j++) {
00211 Xapian::termpos idxj = plists[j]->index;
00212 if (idxj > idxi) {
00213 Xapian::termpos tmp = plists[j]->get_position() + idxj - idxi;
00214 DEBUGLINE(MATCH, "ABOVE " << tmp);
00215 if (tmp < mymax) mymax = tmp;
00216 } else {
00217 AssertRel(idxi, !=, idxj);
00218 Xapian::termpos tmp = plists[j]->get_position() + idxi - idxj;
00219 DEBUGLINE(MATCH, "BELOW " << tmp);
00220 if (tmp > mymin) mymin = tmp;
00221 }
00222 DEBUGLINE(MATCH, "min = " << mymin << " max = " << mymax);
00223 }
00224 plists[i]->skip_to(mymin);
00225
00226 while (!plists[i]->at_end()) {
00227 Xapian::termpos pos = plists[i]->get_position();
00228 DEBUGLINE(MATCH, " " << mymin << " " << pos << " " << mymax);
00229 if (pos > mymax) RETURN(false);
00230 if (i + 1 == plists.size()) RETURN(true);
00231 Xapian::termpos tmp = pos + window - idxi;
00232 if (tmp < max) max = tmp;
00233 tmp = pos + plists.size() - idxi;
00234 if (tmp > window) {
00235 tmp -= window;
00236 if (tmp > min) min = tmp;
00237 }
00238 if (do_test(plists, i + 1, min, max)) RETURN(true);
00239 plists[i]->next();
00240 }
00241 RETURN(false);
00242 }
00243
00244 Xapian::termcount
00245 PhrasePostList::get_wdf() const
00246 {
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256 std::vector<PostList *>::const_iterator i = terms.begin();
00257 Xapian::termcount wdf = (*i)->get_wdf();
00258 for (; i != terms.end(); i++) {
00259 wdf = std::min(wdf, (*i)->get_wdf());
00260 }
00261
00262
00263
00264 return std::max(wdf / 2, 1u);
00265 }
00266
00267 std::string
00268 PhrasePostList::get_description() const
00269 {
00270 return "(Phrase " + om_tostring(window) + " "
00271 + source->get_description() + ")";
00272 }