tests/api_anydb.cc

Go to the documentation of this file.
00001 /* api_anydb.cc: tests which work with any backend
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2002,2003,2004,2005,2006,2007,2008 Olly Betts
00006  * Copyright 2006,2008 Lemur Consulting Ltd
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #include <config.h>
00025 
00026 #include "api_anydb.h"
00027 
00028 #include <algorithm>
00029 #include <string>
00030 
00031 #include <xapian.h>
00032 #include "backendmanager_local.h"
00033 #include "testsuite.h"
00034 #include "testutils.h"
00035 #include "utils.h"
00036 
00037 #include "apitest.h"
00038 
00039 #include <list>
00040 
00041 using namespace std;
00042 
00043 static void
00044 print_mset_weights(const Xapian::MSet &mset)
00045 {
00046     Xapian::MSetIterator i = mset.begin();
00047     for ( ; i != mset.end(); ++i) {
00048         tout << " " << i.get_weight();
00049     }
00050 }
00051 
00052 static void
00053 print_mset_percentages(const Xapian::MSet &mset)
00054 {
00055     Xapian::MSetIterator i = mset.begin();
00056     for ( ; i != mset.end(); ++i) {
00057         tout << " " << mset.convert_to_percent(i);
00058     }
00059 }
00060 
00061 static Xapian::Query
00062 query(Xapian::Query::op op, string t1 = "", string t2 = "",
00063       string t3 = "", string t4 = "", string t5 = "",
00064       string t6 = "", string t7 = "", string t8 = "",
00065       string t9 = "", string t10 = "")
00066 {
00067     vector<string> v;
00068     Xapian::Stem stemmer("english");
00069     if (!t1.empty()) v.push_back(stemmer(t1));
00070     if (!t2.empty()) v.push_back(stemmer(t2));
00071     if (!t3.empty()) v.push_back(stemmer(t3));
00072     if (!t4.empty()) v.push_back(stemmer(t4));
00073     if (!t5.empty()) v.push_back(stemmer(t5));
00074     if (!t6.empty()) v.push_back(stemmer(t6));
00075     if (!t7.empty()) v.push_back(stemmer(t7));
00076     if (!t8.empty()) v.push_back(stemmer(t8));
00077     if (!t9.empty()) v.push_back(stemmer(t9));
00078     if (!t10.empty()) v.push_back(stemmer(t10));
00079     return Xapian::Query(op, v.begin(), v.end());
00080 }
00081 
00082 static Xapian::Query
00083 query(Xapian::Query::op op, Xapian::termcount parameter,
00084       string t1 = "", string t2 = "",
00085       string t3 = "", string t4 = "", string t5 = "",
00086       string t6 = "", string t7 = "", string t8 = "",
00087       string t9 = "", string t10 = "")
00088 {
00089     vector<string> v;
00090     Xapian::Stem stemmer("english");
00091     if (!t1.empty()) v.push_back(stemmer(t1));
00092     if (!t2.empty()) v.push_back(stemmer(t2));
00093     if (!t3.empty()) v.push_back(stemmer(t3));
00094     if (!t4.empty()) v.push_back(stemmer(t4));
00095     if (!t5.empty()) v.push_back(stemmer(t5));
00096     if (!t6.empty()) v.push_back(stemmer(t6));
00097     if (!t7.empty()) v.push_back(stemmer(t7));
00098     if (!t8.empty()) v.push_back(stemmer(t8));
00099     if (!t9.empty()) v.push_back(stemmer(t9));
00100     if (!t10.empty()) v.push_back(stemmer(t10));
00101     return Xapian::Query(op, v.begin(), v.end(), parameter);
00102 }
00103 
00104 static Xapian::Query
00105 query(const string &t)
00106 {
00107     return Xapian::Query(Xapian::Stem("english")(t));
00108 }
00109 
00110 // #######################################################################
00111 // # Tests start here
00112 
00113 // tests that the backend doesn't return zero docids
00114 DEFINE_TESTCASE(zerodocid1, backend) {
00115     // open the database (in this case a simple text file
00116     // we prepared earlier)
00117 
00118     Xapian::Database mydb(get_database("apitest_onedoc"));
00119 
00120     Xapian::Enquire enquire(mydb);
00121 
00122     // make a simple query, with one word in it - "word".
00123     enquire.set_query(Xapian::Query("word"));
00124 
00125     // retrieve the top ten results (we only expect one)
00126     Xapian::MSet mymset = enquire.get_mset(0, 10);
00127 
00128     // We've done the query, now check that the result is what
00129     // we expect (1 document, with non-zero docid)
00130     TEST_MSET_SIZE(mymset, 1);
00131 
00132     TEST_AND_EXPLAIN(*(mymset.begin()) != 0,
00133                      "A query on a database returned a zero docid");
00134 
00135     return true;
00136 }
00137 
00138 // tests that an empty query returns no matches
00139 DEFINE_TESTCASE(emptyquery1, backend) {
00140     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00141 
00142     enquire.set_query(Xapian::Query());
00143     Xapian::MSet mymset = enquire.get_mset(0, 10);
00144     TEST_MSET_SIZE(mymset, 0);
00145     TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
00146     TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
00147     TEST_EQUAL(mymset.get_matches_estimated(), 0);
00148 
00149     vector<Xapian::Query> v;
00150     enquire.set_query(Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end()));
00151     mymset = enquire.get_mset(0, 10);
00152     TEST_MSET_SIZE(mymset, 0);
00153     TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
00154     TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
00155     TEST_EQUAL(mymset.get_matches_estimated(), 0);
00156 
00157     return true;
00158 }
00159 
00160 // tests the document count for a simple query
00161 DEFINE_TESTCASE(simplequery1, backend) {
00162     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00163     enquire.set_query(Xapian::Query("word"));
00164     Xapian::MSet mymset = enquire.get_mset(0, 10);
00165     TEST_MSET_SIZE(mymset, 2);
00166     return true;
00167 }
00168 
00169 // tests for the right documents and weights returned with simple query
00170 DEFINE_TESTCASE(simplequery2, backend) {
00171     // open the database (in this case a simple text file
00172     // we prepared earlier)
00173     Xapian::Database db = get_database("apitest_simpledata");
00174     Xapian::Enquire enquire(db);
00175     enquire.set_query(Xapian::Query("word"));
00176 
00177     // retrieve the top results
00178     Xapian::MSet mymset = enquire.get_mset(0, 10);
00179 
00180     // We've done the query, now check that the result is what
00181     // we expect (documents 2 and 4)
00182     mset_expect_order(mymset, 2, 4);
00183 
00184     // Check the weights
00185     Xapian::MSetIterator i = mymset.begin();
00186     // These weights are for BM25Weight(1,0,1,0.5,0.5)
00187     TEST_EQUAL_DOUBLE(i.get_weight(), 1.04648168717725);
00188     i++;
00189     TEST_EQUAL_DOUBLE(i.get_weight(), 0.640987686595914);
00190 
00191     return true;
00192 }
00193 
00194 // tests for the right document count for another simple query
00195 DEFINE_TESTCASE(simplequery3, backend) {
00196     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00197     enquire.set_query(query("this"));
00198     Xapian::MSet mymset = enquire.get_mset(0, 10);
00199 
00200     // Check that 6 documents were returned.
00201     TEST_MSET_SIZE(mymset, 6);
00202 
00203     return true;
00204 }
00205 
00206 // tests for the right document count for a wildcard query
00207 // FIXME: move this to querytest (and just use an InMemory DB).
00208 DEFINE_TESTCASE(wildquery1, backend) {
00209     Xapian::QueryParser queryparser;
00210     unsigned flags = Xapian::QueryParser::FLAG_WILDCARD |
00211                      Xapian::QueryParser::FLAG_LOVEHATE;
00212     queryparser.set_stemmer(Xapian::Stem("english"));
00213     queryparser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
00214     Xapian::Database db = get_database("apitest_simpledata");
00215     queryparser.set_database(db);
00216     Xapian::Enquire enquire(db);
00217 
00218     Xapian::Query qobj = queryparser.parse_query("th*", flags);
00219     tout << qobj.get_description() << endl;
00220     enquire.set_query(qobj);
00221     Xapian::MSet mymset = enquire.get_mset(0, 10);
00222     // Check that 6 documents were returned.
00223     TEST_MSET_SIZE(mymset, 6);
00224 
00225     qobj = queryparser.parse_query("notindb* \"this\"", flags);
00226     tout << qobj.get_description() << endl;
00227     enquire.set_query(qobj);
00228     mymset = enquire.get_mset(0, 10);
00229     // Check that 6 documents were returned.
00230     TEST_MSET_SIZE(mymset, 6);
00231 
00232     qobj = queryparser.parse_query("+notindb* \"this\"", flags);
00233     tout << qobj.get_description() << endl;
00234     enquire.set_query(qobj);
00235     mymset = enquire.get_mset(0, 10);
00236     // Check that 0 documents were returned.
00237     TEST_MSET_SIZE(mymset, 0);
00238 
00239     return true;
00240 }
00241 
00242 // tests a query across multiple databases
00243 DEFINE_TESTCASE(multidb1, backend) {
00244     Xapian::Database mydb1(get_database("apitest_simpledata", "apitest_simpledata2"));
00245     Xapian::Enquire enquire1(mydb1);
00246 
00247     Xapian::Database mydb2(get_database("apitest_simpledata"));
00248     mydb2.add_database(get_database("apitest_simpledata2"));
00249     Xapian::Enquire enquire2(mydb2);
00250 
00251     // make a simple query, with one word in it - "word".
00252     Xapian::Query myquery("word");
00253     enquire1.set_query(myquery);
00254     enquire2.set_query(myquery);
00255 
00256     // retrieve the top ten results from each method of accessing
00257     // multiple text files
00258     Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
00259     Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
00260 
00261     TEST_EQUAL(mymset1.size(), mymset2.size());
00262     TEST(mset_range_is_same_weights(mymset1, 0, mymset2, 0, mymset1.size()));
00263     return true;
00264 }
00265 
00266 // tests a query across multiple databases with terms only
00267 // in one of the two databases
00268 DEFINE_TESTCASE(multidb2, backend && !multi) {
00269     Xapian::Database mydb1(get_database("apitest_simpledata",
00270                                   "apitest_simpledata2"));
00271     Xapian::Enquire enquire1(mydb1);
00272 
00273     Xapian::Database mydb2(get_database("apitest_simpledata"));
00274     mydb2.add_database(get_database("apitest_simpledata2"));
00275     Xapian::Enquire enquire2(mydb2);
00276 
00277     // make a simple query
00278     Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
00279     enquire1.set_query(myquery);
00280     enquire2.set_query(myquery);
00281 
00282     // retrieve the top ten results from each method of accessing
00283     // multiple text files
00284     Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
00285     Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
00286 
00287     TEST_EQUAL(mymset1.size(), mymset2.size());
00288     TEST(mset_range_is_same_weights(mymset1, 0, mymset2, 0, mymset1.size()));
00289     return true;
00290 }
00291 
00292 // test that a multidb with 2 dbs query returns correct docids
00293 DEFINE_TESTCASE(multidb3, backend && !multi) {
00294     Xapian::Database mydb2(get_database("apitest_simpledata"));
00295     mydb2.add_database(get_database("apitest_simpledata2"));
00296     Xapian::Enquire enquire(mydb2);
00297 
00298     // make a query
00299     Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
00300     enquire.set_weighting_scheme(Xapian::BoolWeight());
00301     enquire.set_query(myquery);
00302 
00303     // retrieve the top ten results
00304     Xapian::MSet mymset = enquire.get_mset(0, 10);
00305     mset_expect_order(mymset, 2, 3, 7);
00306 
00307     return true;
00308 }
00309 
00310 // test that a multidb with 3 dbs query returns correct docids
00311 DEFINE_TESTCASE(multidb4, backend && !multi) {
00312     Xapian::Database mydb2(get_database("apitest_simpledata"));
00313     mydb2.add_database(get_database("apitest_simpledata2"));
00314     mydb2.add_database(get_database("apitest_termorder"));
00315     Xapian::Enquire enquire(mydb2);
00316 
00317     // make a query
00318     Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
00319     enquire.set_weighting_scheme(Xapian::BoolWeight());
00320     enquire.set_query(myquery);
00321 
00322     // retrieve the top ten results
00323     Xapian::MSet mymset = enquire.get_mset(0, 10);
00324     mset_expect_order(mymset, 2, 3, 4, 10);
00325 
00326     return true;
00327 }
00328 
00329 // tests MultiPostList::skip_to().
00330 DEFINE_TESTCASE(multidb5, backend && !multi) {
00331     Xapian::Database mydb2(get_database("apitest_simpledata"));
00332     mydb2.add_database(get_database("apitest_simpledata2"));
00333     Xapian::Enquire enquire(mydb2);
00334 
00335     // make a query
00336     Xapian::Query myquery = query(Xapian::Query::OP_AND, "inmemory", "word");
00337     enquire.set_weighting_scheme(Xapian::BoolWeight());
00338     enquire.set_query(myquery);
00339 
00340     // retrieve the top ten results
00341     Xapian::MSet mymset = enquire.get_mset(0, 10);
00342     mset_expect_order(mymset, 2);
00343 
00344     return true;
00345 }
00346 
00347 // tests that when specifying maxitems to get_mset, no more than
00348 // that are returned.
00349 DEFINE_TESTCASE(msetmaxitems1, backend) {
00350     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00351     enquire.set_query(query("this"));
00352     Xapian::MSet mymset = enquire.get_mset(0, 1);
00353     TEST_MSET_SIZE(mymset, 1);
00354 
00355     mymset = enquire.get_mset(0, 5);
00356     TEST_MSET_SIZE(mymset, 5);
00357 
00358     return true;
00359 }
00360 
00361 // tests the returned weights are as expected (regression test for remote
00362 // backend which was using the average weight rather than the actual document
00363 // weight for computing weights - fixed in 1.0.0).
00364 DEFINE_TESTCASE(expandweights1, backend) {
00365     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00366     enquire.set_query(Xapian::Query("this"));
00367 
00368     Xapian::MSet mymset = enquire.get_mset(0, 10);
00369 
00370     Xapian::RSet myrset;
00371     Xapian::MSetIterator i = mymset.begin();
00372     myrset.add_document(*i);
00373     myrset.add_document(*(++i));
00374 
00375     Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
00376     TEST_EQUAL(eset.size(), 3);
00377     TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
00378     TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
00379     TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
00380 
00381     return true;
00382 }
00383 
00384 // Just like test_expandweights1 but without USE_EXACT_TERMFREQ.
00385 DEFINE_TESTCASE(expandweights2, backend) {
00386     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00387     enquire.set_query(Xapian::Query("this"));
00388 
00389     Xapian::MSet mymset = enquire.get_mset(0, 10);
00390 
00391     Xapian::RSet myrset;
00392     Xapian::MSetIterator i = mymset.begin();
00393     myrset.add_document(*i);
00394     myrset.add_document(*(++i));
00395 
00396     Xapian::ESet eset = enquire.get_eset(3, myrset);
00397     TEST_EQUAL(eset.size(), 3);
00398     if (strcmp(get_dbtype(), "multi") != 0) {
00399         // For a single database, the weights should be the same with or
00400         // without USE_EXACT_TERMFREQ.
00401         TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
00402         TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
00403         TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
00404     } else {
00405         // For multiple databases, we expect that using USE_EXACT_TERMFREQ
00406         // will result in different weights in some cases.
00407         TEST_NOT_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
00408         TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
00409         TEST_NOT_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
00410     }
00411 
00412     return true;
00413 }
00414 
00415 // tests that when specifying maxitems to get_eset, no more than
00416 // that are returned.
00417 DEFINE_TESTCASE(expandmaxitems1, backend) {
00418     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00419     enquire.set_query(Xapian::Query("this"));
00420 
00421     Xapian::MSet mymset = enquire.get_mset(0, 10);
00422     tout << "mymset.size() = " << mymset.size() << endl;
00423     TEST(mymset.size() >= 2);
00424 
00425     Xapian::RSet myrset;
00426     Xapian::MSetIterator i = mymset.begin();
00427     myrset.add_document(*i);
00428     myrset.add_document(*(++i));
00429 
00430     Xapian::ESet myeset = enquire.get_eset(1, myrset);
00431     TEST_EQUAL(myeset.size(), 1);
00432 
00433     return true;
00434 }
00435 
00436 // tests that a pure boolean query has all weights set to 0
00437 DEFINE_TESTCASE(boolquery1, backend) {
00438     Xapian::Query myboolquery(query("this"));
00439 
00440     // open the database (in this case a simple text file
00441     // we prepared earlier)
00442     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00443     enquire.set_query(myboolquery);
00444     enquire.set_weighting_scheme(Xapian::BoolWeight());
00445 
00446     // retrieve the top results
00447     Xapian::MSet mymset = enquire.get_mset(0, 10);
00448 
00449     TEST_NOT_EQUAL(mymset.size(), 0);
00450     TEST_EQUAL(mymset.get_max_possible(), 0);
00451     for (Xapian::MSetIterator i = mymset.begin(); i != mymset.end(); ++i) {
00452         TEST_EQUAL(i.get_weight(), 0);
00453     }
00454     return true;
00455 }
00456 
00457 // tests that get_mset() specifying "this" works as expected
00458 DEFINE_TESTCASE(msetfirst1, backend) {
00459     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00460     enquire.set_query(query("this"));
00461     Xapian::MSet mymset1 = enquire.get_mset(0, 6);
00462     Xapian::MSet mymset2 = enquire.get_mset(3, 3);
00463     TEST(mset_range_is_same(mymset1, 3, mymset2, 0, 3));
00464 
00465     // Regression test - we weren't adjusting the index into items[] by
00466     // firstitem in api/omenquire.cc.
00467     TEST_EQUAL(mymset1[5].get_document().get_data(),
00468                mymset2[2].get_document().get_data());
00469     return true;
00470 }
00471 
00472 // tests the converting-to-percent functions
00473 DEFINE_TESTCASE(topercent1, backend) {
00474     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00475     enquire.set_query(query("this"));
00476     Xapian::MSet mymset = enquire.get_mset(0, 20);
00477 
00478     int last_pct = 100;
00479     Xapian::MSetIterator i = mymset.begin();
00480     for ( ; i != mymset.end(); ++i) {
00481         int pct = mymset.convert_to_percent(i);
00482         TEST_AND_EXPLAIN(pct == i.get_percent(),
00483                          "convert_to_%(msetitor) != convert_to_%(wt)");
00484         TEST_AND_EXPLAIN(pct == mymset.convert_to_percent(i.get_weight()),
00485                          "convert_to_%(msetitor) != convert_to_%(wt)");
00486         TEST_AND_EXPLAIN(pct >= 0 && pct <= 100,
00487                          "percentage out of range: " << pct);
00488         TEST_AND_EXPLAIN(pct <= last_pct, "percentage increased down mset");
00489         last_pct = pct;
00490     }
00491     return true;
00492 }
00493 
00494 // tests the percentage values returned
00495 DEFINE_TESTCASE(topercent2, backend) {
00496     BackendManagerLocal local_manager;
00497     local_manager.set_datadir(test_driver::get_srcdir() + "/testdata/");
00498     Xapian::Enquire localenq(local_manager.get_database("apitest_simpledata"));
00499     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00500 
00501     int pct;
00502 
00503     // First, test a search in which the top document scores 100%.
00504     enquire.set_query(query("this"));
00505     localenq.set_query(query("this"));
00506     Xapian::MSet mymset = enquire.get_mset(0, 20);
00507     Xapian::MSet localmset = localenq.get_mset(0, 20);
00508 
00509     Xapian::MSetIterator i = mymset.begin();
00510     TEST(i != mymset.end());
00511     pct = mymset.convert_to_percent(i);
00512     TEST_EQUAL(pct, 100);
00513 
00514     TEST_EQUAL(mymset, localmset);
00515     TEST(mset_range_is_same_percents(mymset, 0, localmset, 0, mymset.size()));
00516 
00517     // A search in which the top document doesn't have 100%
00518     Xapian::Query q = query(Xapian::Query::OP_OR,
00519                             "this", "line", "paragraph", "rubbish");
00520     enquire.set_query(q);
00521     localenq.set_query(q);
00522     mymset = enquire.get_mset(0, 20);
00523     localmset = localenq.get_mset(0, 20);
00524 
00525     i = mymset.begin();
00526     TEST(i != mymset.end());
00527     pct = mymset.convert_to_percent(i);
00528     TEST_GREATER(pct, 65);
00529     TEST_LESSER(pct, 75);
00530 
00531     ++i;
00532 
00533     TEST(i != mymset.end());
00534     pct = mymset.convert_to_percent(i);
00535     TEST_GREATER(pct, 40);
00536     TEST_LESSER(pct, 50);
00537 
00538     TEST_EQUAL(mymset, localmset);
00539     TEST(mset_range_is_same_percents(mymset, 0, localmset, 0, mymset.size()));
00540 
00541     return true;
00542 }
00543 
00544 class myExpandFunctor : public Xapian::ExpandDecider {
00545     public:
00546         bool operator()(const string & tname) const {
00547             unsigned long sum = 0;
00548             for (string::const_iterator i=tname.begin(); i!=tname.end(); ++i) {
00549                 sum += *i;
00550             }
00551 //          if (verbose) {
00552 //              tout << tname << "==> " << sum << "\n";
00553 //          }
00554             return (sum % 2) == 0;
00555         }
00556 };
00557 
00558 // tests the expand decision functor
00559 DEFINE_TESTCASE(expandfunctor1, backend) {
00560     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00561     enquire.set_query(Xapian::Query("this"));
00562 
00563     Xapian::MSet mymset = enquire.get_mset(0, 10);
00564     TEST(mymset.size() >= 2);
00565 
00566     Xapian::RSet myrset;
00567     Xapian::MSetIterator i = mymset.begin();
00568     myrset.add_document(*i);
00569     myrset.add_document(*(++i));
00570 
00571     myExpandFunctor myfunctor;
00572 
00573     Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
00574     unsigned int neweset_size = 0;
00575     Xapian::ESetIterator j = myeset_orig.begin();
00576     for ( ; j != myeset_orig.end(); ++j) {
00577         if (myfunctor(*j)) neweset_size++;
00578     }
00579     Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
00580 
00581 #if 0
00582     // Compare myeset with the hand-filtered version of myeset_orig.
00583     if (verbose) {
00584         tout << "orig_eset: ";
00585         copy(myeset_orig.begin(), myeset_orig.end(),
00586              ostream_iterator<Xapian::ESetItem>(tout, " "));
00587         tout << "\n";
00588 
00589         tout << "new_eset: ";
00590         copy(myeset.begin(), myeset.end(),
00591              ostream_iterator<Xapian::ESetItem>(tout, " "));
00592         tout << "\n";
00593     }
00594 #endif
00595     Xapian::ESetIterator orig = myeset_orig.begin();
00596     Xapian::ESetIterator filt = myeset.begin();
00597     for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
00598         // skip over items that shouldn't be in myeset
00599         while (orig != myeset_orig.end() && !myfunctor(*orig)) {
00600             ++orig;
00601         }
00602 
00603         TEST_AND_EXPLAIN(*orig == *filt &&
00604                          orig.get_weight() == filt.get_weight(),
00605                          "Mismatch in items " << *orig << " vs. " << *filt
00606                          << " after filtering");
00607     }
00608 
00609     while (orig != myeset_orig.end() && !myfunctor(*orig)) {
00610         ++orig;
00611     }
00612 
00613     TEST_EQUAL(orig, myeset_orig.end());
00614     TEST_AND_EXPLAIN(filt == myeset.end(),
00615                      "Extra items in the filtered eset.");
00616     return true;
00617 }
00618 
00619 // tests the percent cutoff option
00620 DEFINE_TESTCASE(pctcutoff1, backend) {
00621     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00622     enquire.set_query(query(Xapian::Query::OP_OR,
00623                             "this", "line", "paragraph", "rubbish"));
00624     Xapian::MSet mymset1 = enquire.get_mset(0, 100);
00625 
00626     if (verbose) {
00627         tout << "Original mset pcts:";
00628         print_mset_percentages(mymset1);
00629         tout << "\n";
00630     }
00631 
00632     unsigned int num_items = 0;
00633     int my_pct = 100;
00634     int changes = 0;
00635     Xapian::MSetIterator i = mymset1.begin();
00636     int c = 0;
00637     for ( ; i != mymset1.end(); ++i, ++c) {
00638         int new_pct = mymset1.convert_to_percent(i);
00639         if (new_pct != my_pct) {
00640             changes++;
00641             if (changes > 3) break;
00642             num_items = c;
00643             my_pct = new_pct;
00644         }
00645     }
00646 
00647     TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
00648     if (verbose) {
00649         tout << "Cutoff percent: " << my_pct << "\n";
00650     }
00651 
00652     enquire.set_cutoff(my_pct);
00653     Xapian::MSet mymset2 = enquire.get_mset(0, 100);
00654 
00655     if (verbose) {
00656         tout << "Percentages after cutoff:";
00657         print_mset_percentages(mymset2);
00658         tout << "\n";
00659     }
00660 
00661     TEST_AND_EXPLAIN(mymset2.size() >= num_items,
00662                      "Match with % cutoff lost too many items");
00663 
00664     TEST_AND_EXPLAIN(mymset2.size() == num_items ||
00665                      (mymset2.convert_to_percent(mymset2[num_items]) == my_pct &&
00666                       mymset2.convert_to_percent(mymset2.back()) == my_pct),
00667                      "Match with % cutoff returned too many items");
00668 
00669     return true;
00670 }
00671 
00672 // Tests the percent cutoff option combined with collapsing
00673 DEFINE_TESTCASE(pctcutoff2, backend) {
00674     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00675     enquire.set_query(Xapian::Query("this"));
00676     enquire.set_query(Xapian::Query(Xapian::Query::OP_AND_NOT, Xapian::Query("this"), Xapian::Query("banana")));
00677     Xapian::MSet mset = enquire.get_mset(0, 100);
00678 
00679     if (verbose) {
00680         tout << "Original mset pcts:";
00681         print_mset_percentages(mset);
00682         tout << "\n";
00683     }
00684 
00685     TEST(mset.size() >= 2);
00686     TEST(mset[0].get_percent() - mset[1].get_percent() >= 2);
00687 
00688     Xapian::percent cutoff = mset[0].get_percent() + mset[1].get_percent();
00689     cutoff /= 2;
00690 
00691     enquire.set_cutoff(cutoff);
00692     enquire.set_collapse_key(1234); // Value which is always empty.
00693 
00694     mset = enquire.get_mset(0, 1);
00695     TEST_EQUAL(mset.size(), 1);
00696     TEST_EQUAL(mset.get_matches_lower_bound(), 1);
00697 
00698     return true;
00699 }
00700 
00701 // tests the cutoff option
00702 DEFINE_TESTCASE(cutoff1, backend) {
00703     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00704     enquire.set_query(query(Xapian::Query::OP_OR,
00705                             "this", "line", "paragraph", "rubbish"));
00706     Xapian::MSet mymset1 = enquire.get_mset(0, 100);
00707 
00708     if (verbose) {
00709         tout << "Original mset weights:";
00710         print_mset_weights(mymset1);
00711         tout << "\n";
00712     }
00713 
00714     unsigned int num_items = 0;
00715     Xapian::weight my_wt = -100;
00716     int changes = 0;
00717     Xapian::MSetIterator i = mymset1.begin();
00718     int c = 0;
00719     for ( ; i != mymset1.end(); ++i, ++c) {
00720         Xapian::weight new_wt = i.get_weight();
00721         if (new_wt != my_wt) {
00722             changes++;
00723             if (changes > 3) break;
00724             num_items = c;
00725             my_wt = new_wt;
00726         }
00727     }
00728 
00729     TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
00730     if (verbose) {
00731         tout << "Cutoff weight: " << my_wt << "\n";
00732     }
00733 
00734     enquire.set_cutoff(0, my_wt);
00735     Xapian::MSet mymset2 = enquire.get_mset(0, 100);
00736 
00737     if (verbose) {
00738         tout << "Weights after cutoff:";
00739         print_mset_weights(mymset2);
00740         tout << "\n";
00741     }
00742 
00743     TEST_AND_EXPLAIN(mymset2.size() >= num_items,
00744                      "Match with cutoff lost too many items");
00745 
00746     TEST_AND_EXPLAIN(mymset2.size() == num_items ||
00747                      (mymset2[num_items].get_weight() == my_wt &&
00748                       mymset2.back().get_weight() == my_wt),
00749                      "Match with cutoff returned too many items");
00750 
00751     return true;
00752 }
00753 
00754 // tests the allow query terms expand option
00755 DEFINE_TESTCASE(allowqterms1, backend) {
00756     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00757     enquire.set_query(Xapian::Query("this"));
00758 
00759     Xapian::MSet mymset = enquire.get_mset(0, 10);
00760     TEST(mymset.size() >= 2);
00761 
00762     Xapian::RSet myrset;
00763     Xapian::MSetIterator i = mymset.begin();
00764     myrset.add_document(*i);
00765     myrset.add_document(*(++i));
00766 
00767     Xapian::ESet myeset = enquire.get_eset(1000, myrset);
00768     Xapian::ESetIterator j = myeset.begin();
00769     for ( ; j != myeset.end(); ++j) {
00770         TEST_NOT_EQUAL(*j, "this");
00771     }
00772 
00773     Xapian::ESet myeset2 = enquire.get_eset(1000, myrset, Xapian::Enquire::INCLUDE_QUERY_TERMS);
00774     j = myeset2.begin();
00775     for ( ; j != myeset2.end(); ++j) {
00776         if (*j == "this") break;
00777     }
00778     TEST(j != myeset2.end());
00779     return true;
00780 }
00781 
00782 // tests that the MSet max_attained works
00783 DEFINE_TESTCASE(maxattain1, backend) {
00784     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00785     enquire.set_query(query("this"));
00786     Xapian::MSet mymset = enquire.get_mset(0, 100);
00787 
00788     Xapian::weight mymax = 0;
00789     Xapian::MSetIterator i = mymset.begin();
00790     for ( ; i != mymset.end(); ++i) {
00791         if (i.get_weight() > mymax) mymax = i.get_weight();
00792     }
00793     TEST_EQUAL(mymax, mymset.get_max_attained());
00794 
00795     return true;
00796 }
00797 
00798 // tests a reversed boolean query
00799 DEFINE_TESTCASE(reversebool1, backend) {
00800     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00801     enquire.set_query(Xapian::Query("this"));
00802     enquire.set_weighting_scheme(Xapian::BoolWeight());
00803 
00804     Xapian::MSet mymset1 = enquire.get_mset(0, 100);
00805     TEST_AND_EXPLAIN(mymset1.size() > 1,
00806                      "Mset was too small to test properly");
00807 
00808     enquire.set_docid_order(Xapian::Enquire::ASCENDING);
00809     Xapian::MSet mymset2 = enquire.get_mset(0, 100);
00810     enquire.set_docid_order(Xapian::Enquire::DESCENDING);
00811     Xapian::MSet mymset3 = enquire.get_mset(0, 100);
00812 
00813     // mymset1 and mymset2 should be identical
00814     TEST_EQUAL(mymset1.size(), mymset2.size());
00815 
00816     {
00817         Xapian::MSetIterator i = mymset1.begin();
00818         Xapian::MSetIterator j = mymset2.begin();
00819         for ( ; i != mymset1.end(), j != mymset2.end(); ++i, j++) {
00820             // if this fails, then setting match_sort_forward=true was not
00821             // the same as the default.
00822             TEST_EQUAL(*i, *j);
00823         }
00824     }
00825 
00826     // mymset1 and mymset3 should be same but reversed
00827     TEST_EQUAL(mymset1.size(), mymset3.size());
00828 
00829     {
00830         Xapian::MSetIterator i = mymset1.begin();
00831         vector<Xapian::docid> rev(mymset3.begin(), mymset3.end());
00832         // Next iterator not const because of compiler brokenness (egcs 1.1.2)
00833         vector<Xapian::docid>::reverse_iterator j = rev.rbegin();
00834         for ( ; i != mymset1.end(); ++i, j++) {
00835             // if this fails, then setting match_sort_forward=false didn't
00836             // reverse the results.
00837             TEST_EQUAL(*i, *j);
00838         }
00839     }
00840 
00841     return true;
00842 }
00843 
00844 // tests a reversed boolean query, where the full mset isn't returned
00845 DEFINE_TESTCASE(reversebool2, backend) {
00846     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00847     enquire.set_query(Xapian::Query("this"));
00848     enquire.set_weighting_scheme(Xapian::BoolWeight());
00849 
00850     Xapian::MSet mymset1 = enquire.get_mset(0, 100);
00851 
00852     TEST_AND_EXPLAIN(mymset1.size() > 1,
00853                      "Mset was too small to test properly");
00854 
00855     enquire.set_docid_order(Xapian::Enquire::ASCENDING);
00856     Xapian::doccount msize = mymset1.size() / 2;
00857     Xapian::MSet mymset2 = enquire.get_mset(0, msize);
00858     enquire.set_docid_order(Xapian::Enquire::DESCENDING);
00859     Xapian::MSet mymset3 = enquire.get_mset(0, msize);
00860 
00861     // mymset2 should be first msize items of mymset1
00862     TEST_EQUAL(msize, mymset2.size());
00863     {
00864         Xapian::MSetIterator i = mymset1.begin();
00865         Xapian::MSetIterator j = mymset2.begin();
00866         for ( ; i != mymset1.end(), j != mymset2.end(); ++i, j++) {
00867             // if this fails, then setting match_sort_forward=true was not
00868             // the same as the default.
00869             TEST_EQUAL(*i, *j);
00870         }
00871     }
00872 
00873     // mymset3 should be last msize items of mymset1, in reverse order
00874     TEST_EQUAL(msize, mymset3.size());
00875     {
00876         vector<Xapian::docid> rev(mymset1.begin(), mymset1.end());
00877         // Next iterator not const because of compiler brokenness (egcs 1.1.2)
00878         vector<Xapian::docid>::reverse_iterator i = rev.rbegin();
00879         Xapian::MSetIterator j = mymset3.begin();
00880         for ( ; j != mymset3.end(); ++i, j++) {
00881             // if this fails, then setting match_sort_forward=false didn't
00882             // reverse the results.
00883             TEST_EQUAL(*i, *j);
00884         }
00885     }
00886 
00887     return true;
00888 }
00889 
00890 // tests that get_matching_terms() returns the terms in the right order
00891 DEFINE_TESTCASE(getmterms1, backend) {
00892     list<string> answers_list;
00893     answers_list.push_back("one");
00894     answers_list.push_back("two");
00895     answers_list.push_back("three");
00896     answers_list.push_back("four");
00897 
00898     Xapian::Database mydb(get_database("apitest_termorder"));
00899     Xapian::Enquire enquire(mydb);
00900 
00901     Xapian::Query myquery(Xapian::Query::OP_OR,
00902             Xapian::Query(Xapian::Query::OP_AND,
00903                     Xapian::Query("one", 1, 1),
00904                     Xapian::Query("three", 1, 3)),
00905             Xapian::Query(Xapian::Query::OP_OR,
00906                     Xapian::Query("four", 1, 4),
00907                     Xapian::Query("two", 1, 2)));
00908 
00909     enquire.set_query(myquery);
00910 
00911     Xapian::MSet mymset = enquire.get_mset(0, 10);
00912 
00913     TEST_MSET_SIZE(mymset, 1);
00914     list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
00915                           enquire.get_matching_terms_end(mymset.begin()));
00916     TEST(list == answers_list);
00917 
00918     return true;
00919 }
00920 
00921 // tests that get_matching_terms() returns the terms only once
00922 DEFINE_TESTCASE(getmterms2, backend) {
00923     list<string> answers_list;
00924     answers_list.push_back("one");
00925     answers_list.push_back("two");
00926     answers_list.push_back("three");
00927 
00928     Xapian::Database mydb(get_database("apitest_termorder"));
00929     Xapian::Enquire enquire(mydb);
00930 
00931     Xapian::Query myquery(Xapian::Query::OP_OR,
00932             Xapian::Query(Xapian::Query::OP_AND,
00933                     Xapian::Query("one", 1, 1),
00934                     Xapian::Query("three", 1, 3)),
00935             Xapian::Query(Xapian::Query::OP_OR,
00936                     Xapian::Query("one", 1, 4),
00937                     Xapian::Query("two", 1, 2)));
00938 
00939     enquire.set_query(myquery);
00940 
00941     Xapian::MSet mymset = enquire.get_mset(0, 10);
00942 
00943     TEST_MSET_SIZE(mymset, 1);
00944     list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
00945                           enquire.get_matching_terms_end(mymset.begin()));
00946     TEST(list == answers_list);
00947 
00948     return true;
00949 }
00950 
00951 // tests that the collapsing on termpos optimisation works
00952 DEFINE_TESTCASE(poscollapse1, backend) {
00953     Xapian::Query myquery1(Xapian::Query::OP_OR,
00954                      Xapian::Query("this", 1, 1),
00955                      Xapian::Query("this", 1, 1));
00956     Xapian::Query myquery2("this", 2, 1);
00957 
00958     if (verbose) {
00959         tout << myquery1.get_description() << "\n";
00960         tout << myquery2.get_description() << "\n";
00961     }
00962 
00963     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00964     enquire.set_query(myquery1);
00965     Xapian::MSet mymset1 = enquire.get_mset(0, 10);
00966 
00967     enquire.set_query(myquery2);
00968     Xapian::MSet mymset2 = enquire.get_mset(0, 10);
00969 
00970     TEST_EQUAL(mymset1, mymset2);
00971 
00972     return true;
00973 }
00974 
00975 // test that running a query twice returns the same results
00976 DEFINE_TESTCASE(repeatquery1, backend) {
00977     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00978     enquire.set_query(Xapian::Query("this"));
00979 
00980     enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
00981 
00982     Xapian::MSet mymset1 = enquire.get_mset(0, 10);
00983     Xapian::MSet mymset2 = enquire.get_mset(0, 10);
00984     TEST_EQUAL(mymset1, mymset2);
00985 
00986     return true;
00987 }
00988 
00989 // test that prefetching documents works (at least, gives same results)
00990 DEFINE_TESTCASE(fetchdocs1, backend) {
00991     Xapian::Enquire enquire(get_database("apitest_simpledata"));
00992     enquire.set_query(Xapian::Query("this"));
00993 
00994     enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
00995 
00996     Xapian::MSet mymset1 = enquire.get_mset(0, 10);
00997     Xapian::MSet mymset2 = enquire.get_mset(0, 10);
00998     TEST_EQUAL(mymset1, mymset2);
00999     mymset2.fetch(mymset2[0], mymset2[mymset2.size() - 1]);
01000     mymset2.fetch(mymset2.begin(), mymset2.end());
01001     mymset2.fetch(mymset2.begin());
01002     mymset2.fetch();
01003 
01004     Xapian::MSetIterator it1 = mymset1.begin();
01005     Xapian::MSetIterator it2 = mymset2.begin();
01006 
01007     while (it1 != mymset1.end() && it2 != mymset2.end()) {
01008         TEST_EQUAL(it1.get_document().get_data(),
01009                    it2.get_document().get_data());
01010         TEST_NOT_EQUAL(it1.get_document().get_data(), "");
01011         TEST_NOT_EQUAL(it2.get_document().get_data(), "");
01012         it1++;
01013         it2++;
01014     }
01015     TEST_EQUAL(it1, mymset1.end());
01016     TEST_EQUAL(it1, mymset2.end());
01017 
01018     return true;
01019 }
01020 
01021 // test that searching for a term not in the database fails nicely
01022 DEFINE_TESTCASE(absentterm1, backend) {
01023     Xapian::Enquire enquire(get_database("apitest_simpledata"));
01024     enquire.set_weighting_scheme(Xapian::BoolWeight());
01025     enquire.set_query(Xapian::Query("frink"));
01026 
01027     Xapian::MSet mymset = enquire.get_mset(0, 10);
01028     mset_expect_order(mymset);
01029 
01030     return true;
01031 }
01032 
01033 // as absentterm1, but setting query from a vector of terms
01034 DEFINE_TESTCASE(absentterm2, backend) {
01035     Xapian::Enquire enquire(get_database("apitest_simpledata"));
01036     vector<string> terms;
01037     terms.push_back("frink");
01038 
01039     Xapian::Query query(Xapian::Query::OP_OR, terms.begin(), terms.end());
01040     enquire.set_query(query);
01041 
01042     Xapian::MSet mymset = enquire.get_mset(0, 10);
01043     mset_expect_order(mymset);
01044 
01045     return true;
01046 }
01047 
01048 // test that rsets do sensible things
01049 DEFINE_TESTCASE(rset1, backend) {
01050     Xapian::Database mydb(get_database("apitest_rset"));
01051     Xapian::Enquire enquire(mydb);
01052     Xapian::Query myquery = query(Xapian::Query::OP_OR, "giraffe", "tiger");
01053     enquire.set_query(myquery);
01054 
01055     Xapian::MSet mymset1 = enquire.get_mset(0, 10);
01056 
01057     Xapian::RSet myrset;
01058     myrset.add_document(1);
01059 
01060     Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
01061 
01062     // We should have the same documents turn up, but 1 and 3 should
01063     // have higher weights with the RSet.
01064     TEST_MSET_SIZE(mymset1, 3);
01065     TEST_MSET_SIZE(mymset2, 3);
01066 
01067     return true;
01068 }
01069 
01070 // test that rsets do more sensible things
01071 DEFINE_TESTCASE(rset2, backend) {
01072     Xapian::Database mydb(get_database("apitest_rset"));
01073     Xapian::Enquire enquire(mydb);
01074     Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "people");
01075     enquire.set_query(myquery);
01076 
01077     Xapian::MSet mymset1 = enquire.get_mset(0, 10);
01078 
01079     Xapian::RSet myrset;
01080     myrset.add_document(2);
01081 
01082     Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
01083 
01084     mset_expect_order(mymset1, 1, 2);
01085     mset_expect_order(mymset2, 2, 1);
01086 
01087     return true;
01088 }
01089 
01090 // test that rsets behave correctly with multiDBs
01091 DEFINE_TESTCASE(rsetmultidb1, backend && !multi) {
01092     Xapian::Database mydb1(get_database("apitest_rset", "apitest_simpledata2"));
01093     Xapian::Database mydb2(get_database("apitest_rset"));
01094     mydb2.add_database(get_database("apitest_simpledata2"));
01095 
01096     Xapian::Enquire enquire1(mydb1);
01097     Xapian::Enquire enquire2(mydb2);
01098 
01099     Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "multiple");
01100 
01101     enquire1.set_query(myquery);
01102     enquire2.set_query(myquery);
01103 
01104     Xapian::RSet myrset1;
01105     Xapian::RSet myrset2;
01106     myrset1.add_document(4);
01107     myrset2.add_document(2);
01108 
01109     Xapian::MSet mymset1a = enquire1.get_mset(0, 10);
01110     Xapian::MSet mymset1b = enquire1.get_mset(0, 10, &myrset1);
01111     Xapian::MSet mymset2a = enquire2.get_mset(0, 10);
01112     Xapian::MSet mymset2b = enquire2.get_mset(0, 10, &myrset2);
01113 
01114     mset_expect_order(mymset1a, 1, 4);
01115     mset_expect_order(mymset1b, 4, 1);
01116     mset_expect_order(mymset2a, 1, 2);
01117     mset_expect_order(mymset2b, 2, 1);
01118 
01119     TEST(mset_range_is_same_weights(mymset1a, 0, mymset2a, 0, 2));
01120     TEST(mset_range_is_same_weights(mymset1b, 0, mymset2b, 0, 2));
01121     TEST_NOT_EQUAL(mymset1a, mymset1b);
01122     TEST_NOT_EQUAL(mymset2a, mymset2b);
01123 
01124     return true;
01125 }
01126 
01127 // regression tests - used to cause assertion in stats.h to fail
01128 // Doesn't actually fail for multi but it doesn't make sense to run there.
01129 DEFINE_TESTCASE(rsetmultidb3, backend && !multi) {
01130     Xapian::Enquire enquire(get_database("apitest_simpledata2"));
01131     enquire.set_query(query(Xapian::Query::OP_OR, "cuddly", "people"));
01132     Xapian::MSet mset = enquire.get_mset(0, 10); // used to fail assertion
01133     return true;
01134 }
01135 
01137 DEFINE_TESTCASE(eliteset1, backend) {
01138     // FIXME: OP_ELITE_SET erroneously picks the best N terms separately in
01139     // each sub-database!
01140     SKIP_TEST_FOR_BACKEND("multi");
01141 
01142     Xapian::Database mydb(get_database("apitest_simpledata"));
01143     Xapian::Enquire enquire(mydb);
01144 
01145     Xapian::Query myquery1 = query(Xapian::Query::OP_OR, "word");
01146 
01147     Xapian::Query myquery2 = query(Xapian::Query::OP_ELITE_SET, 1,
01148                                    "simple", "word");
01149 
01150     enquire.set_query(myquery1, 2); // So the query lengths are the same.
01151     Xapian::MSet mymset1 = enquire.get_mset(0, 10);
01152 
01153     enquire.set_query(myquery2);
01154     Xapian::MSet mymset2 = enquire.get_mset(0, 10);
01155 
01156     TEST_EQUAL(mymset1, mymset2);
01157     return true;
01158 }
01159 
01162 DEFINE_TESTCASE(eliteset2, backend) {
01163     // FIXME: OP_ELITE_SET erroneously picks the best N terms separately in
01164     // each sub-database!
01165     SKIP_TEST_FOR_BACKEND("multi");
01166 
01167     Xapian::Database mydb(get_database("apitest_simpledata"));
01168     Xapian::Enquire enquire(mydb);
01169 
01170     Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
01171 
01172     vector<Xapian::Query> qs;
01173     qs.push_back(query("this"));
01174     qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
01175     Xapian::Query myquery2(Xapian::Query::OP_ELITE_SET,
01176                            qs.begin(), qs.end(), 1);
01177 
01178     enquire.set_query(myquery1);
01179     Xapian::MSet mymset1 = enquire.get_mset(0, 10);
01180 
01181     enquire.set_query(myquery2);
01182     Xapian::MSet mymset2 = enquire.get_mset(0, 10);
01183 
01184     TEST_EQUAL(mymset1, mymset2);
01185     // query lengths differ so mset weights not the same (with some weighting
01186     // parameters)
01187     //test_mset_order_equal(mymset1, mymset2);
01188 
01189     return true;
01190 }
01191 
01194 DEFINE_TESTCASE(eliteset3, backend) {
01195     Xapian::Database mydb1(get_database("apitest_simpledata"));
01196     Xapian::Enquire enquire1(mydb1);
01197 
01198     Xapian::Database mydb2(get_database("apitest_simpledata"));
01199     Xapian::Enquire enquire2(mydb2);
01200 
01201     // make a query
01202     Xapian::Stem stemmer("english");
01203 
01204     string term1 = stemmer("word");
01205     string term2 = stemmer("rubbish");
01206     string term3 = stemmer("banana");
01207 
01208     vector<string> terms;
01209     terms.push_back(term1);
01210     terms.push_back(term2);
01211     terms.push_back(term3);
01212 
01213     Xapian::Query myquery1(Xapian::Query::OP_OR, terms.begin(), terms.end());
01214     enquire1.set_query(myquery1);
01215 
01216     Xapian::Query myquery2(Xapian::Query::OP_ELITE_SET, terms.begin(), terms.end(), 3);
01217     enquire2.set_query(myquery2);
01218 
01219     // retrieve the results
01220     Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
01221     Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
01222 
01223     TEST_EQUAL(mymset1.get_termfreq(term1),
01224                mymset2.get_termfreq(term1));
01225     TEST_EQUAL(mymset1.get_termweight(term1),
01226                mymset2.get_termweight(term1));
01227     TEST_EQUAL(mymset1.get_termfreq(term2),
01228                mymset2.get_termfreq(term2));
01229     TEST_EQUAL(mymset1.get_termweight(term2),
01230                mymset2.get_termweight(term2));
01231     TEST_EQUAL(mymset1.get_termfreq(term3),
01232                mymset2.get_termfreq(term3));
01233     TEST_EQUAL(mymset1.get_termweight(term3),
01234                mymset2.get_termweight(term3));
01235 //    TEST_EQUAL(mymset1, mymset2);
01236 
01237     return true;
01238 }
01239 
01241 DEFINE_TESTCASE(eliteset4, backend) {
01242     // FIXME: OP_ELITE_SET erroneously picks the best N terms separately in
01243     // each sub-database!
01244     SKIP_TEST_FOR_BACKEND("multi");
01245 
01246     Xapian::Database mydb1(get_database("apitest_simpledata"));
01247     Xapian::Enquire enquire1(mydb1);
01248 
01249     Xapian::Database mydb2(get_database("apitest_simpledata"));
01250     Xapian::Enquire enquire2(mydb2);
01251 
01252     Xapian::Query myquery1 = query("rubbish");
01253     Xapian::Query myquery2 = query(Xapian::Query::OP_ELITE_SET, 1,
01254                                    "word", "rubbish", "fibble");
01255     enquire1.set_query(myquery1);
01256     enquire2.set_query(myquery2);
01257 
01258     // retrieve the results
01259     Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
01260     Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
01261 
01262     TEST_NOT_EQUAL(mymset2.size(), 0);
01263     TEST_EQUAL(mymset1, mymset2);
01264 //    TEST_EQUAL(mymset1, mymset2);
01265 
01266     return true;
01267 }
01268 
01270 DEFINE_TESTCASE(eliteset5, backend) {
01271     SKIP_TEST_FOR_BACKEND("multi");
01272 
01273     Xapian::Database mydb1(get_database("apitest_simpledata"));
01274     Xapian::Enquire enquire1(mydb1);
01275 
01276     vector<string> v;
01277     for (int i = 0; i != 3; ++i) {
01278         v.push_back("simpl");
01279         v.push_back("queri");
01280 
01281         v.push_back("rubbish");
01282         v.push_back("rubbish");
01283         v.push_back("rubbish");
01284         v.push_back("word");
01285         v.push_back("word");
01286         v.push_back("word");
01287     }
01288 
01289     Xapian::Query myquery1 = Xapian::Query(Xapian::Query::OP_ELITE_SET,
01290                                            v.begin(), v.end(), 1);
01291     myquery1 = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT,
01292                              myquery1,
01293                              0.004);
01294 
01295     enquire1.set_query(myquery1);
01296     // On architectures with excess precision (or, at least, on x86), the
01297     // following call used to result in a segfault.
01298     enquire1.get_mset(0, 10);
01299 
01300     return true;
01301 }
01302 
01304 DEFINE_TESTCASE(termlisttermfreq1, backend) {
01305     Xapian::Database mydb(get_database("apitest_simpledata"));
01306     Xapian::Enquire enquire(mydb);
01307     Xapian::Stem stemmer("english");
01308     Xapian::RSet rset1;
01309     Xapian::RSet rset2;
01310     rset1.add_document(5);
01311     rset2.add_document(6);
01312 
01313     Xapian::ESet eset1 = enquire.get_eset(1000, rset1);
01314     Xapian::ESet eset2 = enquire.get_eset(1000, rset2);
01315 
01316     // search for weight of term 'another'
01317     string theterm = stemmer("another");
01318 
01319     Xapian::weight wt1 = 0;
01320     Xapian::weight wt2 = 0;
01321     {
01322         Xapian::ESetIterator i = eset1.begin();
01323         for ( ; i != eset1.end(); i++) {
01324             if (*i == theterm) {
01325                 wt1 = i.get_weight();
01326                 break;
01327             }
01328         }
01329     }
01330     {
01331         Xapian::ESetIterator i = eset2.begin();
01332         for ( ; i != eset2.end(); i++) {
01333             if (*i == theterm) {
01334                 wt2 = i.get_weight();
01335                 break;
01336             }
01337         }
01338     }
01339 
01340     TEST_NOT_EQUAL(wt1, 0);
01341     TEST_NOT_EQUAL(wt2, 0);
01342     TEST_EQUAL(wt1, wt2);
01343 
01344     return true;
01345 }
01346 
01348 DEFINE_TESTCASE(qterminfo1, backend) {
01349     Xapian::Database mydb1(get_database("apitest_simpledata", "apitest_simpledata2"));
01350     Xapian::Enquire enquire1(mydb1);
01351 
01352     Xapian::Database mydb2(get_database("apitest_simpledata"));
01353     mydb2.add_database(get_database("apitest_simpledata2"));
01354     Xapian::Enquire enquire2(mydb2);
01355 
01356     // make a query
01357     Xapian::Stem stemmer("english");
01358 
01359     string term1 = stemmer("word");
01360     string term2 = stemmer("inmemory");
01361     string term3 = stemmer("flibble");
01362 
01363     Xapian::Query myquery(Xapian::Query::OP_OR,
01364                     Xapian::Query(term1),
01365                     Xapian::Query(Xapian::Query::OP_OR,
01366                             Xapian::Query(term2),
01367                             Xapian::Query(term3)));
01368     enquire1.set_query(myquery);
01369     enquire2.set_query(myquery);
01370 
01371     // retrieve the results
01372     Xapian::MSet mymset1a = enquire1.get_mset(0, 0);
01373     Xapian::MSet mymset2a = enquire2.get_mset(0, 0);
01374 
01375     TEST_EQUAL(mymset1a.get_termfreq(term1),
01376                mymset2a.get_termfreq(term1));
01377     TEST_EQUAL(mymset1a.get_termweight(term1),
01378                mymset2a.get_termweight(term1));
01379     TEST_EQUAL(mymset1a.get_termfreq(term2),
01380                mymset2a.get_termfreq(term2));
01381     TEST_EQUAL(mymset1a.get_termweight(term2),
01382                mymset2a.get_termweight(term2));
01383     TEST_EQUAL(mymset1a.get_termfreq(term3),
01384                mymset2a.get_termfreq(term3));
01385     TEST_EQUAL(mymset1a.get_termweight(term3),
01386                mymset2a.get_termweight(term3));
01387 
01388     TEST_EQUAL(mymset1a.get_termfreq(term1), 3);
01389     TEST_EQUAL(mymset1a.get_termfreq(term2), 1);
01390     TEST_EQUAL(mymset1a.get_termfreq(term3), 0);
01391 
01392     TEST_NOT_EQUAL(mymset1a.get_termweight(term1), 0);
01393     TEST_NOT_EQUAL(mymset1a.get_termweight(term2), 0);
01394     // non-existent terms still have weight
01395     TEST_NOT_EQUAL(mymset1a.get_termweight(term3), 0);
01396 
01397     TEST_EXCEPTION(Xapian::InvalidArgumentError,
01398                    mymset1a.get_termfreq("sponge"));
01399 
01400     return true;
01401 }
01402 
01404 DEFINE_TESTCASE(qterminfo2, backend) {
01405     Xapian::Database db(get_database("apitest_simpledata"));
01406     Xapian::Enquire enquire(db);
01407 
01408     // make a query
01409     Xapian::Stem stemmer("english");
01410 
01411     string term1 = stemmer("paragraph");
01412     string term2 = stemmer("another");
01413 
01414     Xapian::Query query(Xapian::Query::OP_AND_NOT, term1,
01415             Xapian::Query(Xapian::Query::OP_AND, term1, term2));
01416     enquire.set_query(query);
01417 
01418     // retrieve the results
01419     // Note: get_mset() used to throw "AssertionError" in debug builds
01420     Xapian::MSet mset = enquire.get_mset(0, 10);
01421 
01422     TEST_NOT_EQUAL(mset.get_termweight("paragraph"), 0);
01423 
01424     return true;
01425 }
01426 
01427 // tests that when specifying that no items are to be returned, those
01428 // statistics which should be the same are.
01429 DEFINE_TESTCASE(msetzeroitems1, backend) {
01430     Xapian::Enquire enquire(get_database("apitest_simpledata"));
01431     enquire.set_query(query("this"));
01432     Xapian::MSet mymset1 = enquire.get_mset(0, 0);
01433 
01434     Xapian::MSet mymset2 = enquire.get_mset(0, 1);
01435 
01436     TEST_EQUAL(mymset1.get_max_possible(), mymset2.get_max_possible());
01437 
01438     return true;
01439 }
01440 
01441 // test that the matches_* of a simple query are as expected
01442 DEFINE_TESTCASE(matches1, backend) {
01443     Xapian::Enquire enquire(get_database("apitest_simpledata"));
01444     Xapian::Query myquery;
01445     Xapian::MSet mymset;
01446 
01447     myquery = query("word");
01448     enquire.set_query(myquery);
01449     mymset = enquire.get_mset(0, 10);
01450     TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
01451     TEST_EQUAL(mymset.get_matches_estimated(), 2);
01452     TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
01453 
01454     myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
01455     enquire.set_query(myquery);
01456     mymset = enquire.get_mset(0, 10);
01457     TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
01458     TEST_EQUAL(mymset.get_matches_estimated(), 2);
01459     TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
01460 
01461     myquery = query(Xapian::Query::OP_AND, "inmemory", "word");
01462     enquire.set_query(myquery);
01463     mymset = enquire.get_mset(0, 10);
01464     TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
01465     TEST_EQUAL(mymset.get_matches_estimated(), 0);
01466     TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
01467 
01468     myquery = query(Xapian::Query::OP_AND, "simple", "word");
01469     enquire.set_query(myquery);
01470     mymset = enquire.get_mset(0, 10);
01471     TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
01472     TEST_EQUAL(mymset.get_matches_estimated(), 2);
01473     TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
01474 
01475     myquery = query(Xapian::Query::OP_AND, "simple", "word");
01476     enquire.set_query(myquery);
01477     mymset = enquire.get_mset(0, 0);
01478     // For a single database, this is true, but not for "multi" (since there
01479     // one sub-database has 3 documents and simple and word both have termfreq
01480     // of 2, so the matcher can tell at least one document must match!)
01481     // TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
01482     TEST(mymset.get_matches_lower_bound() <= mymset.get_matches_estimated());
01483     TEST_EQUAL(mymset.get_matches_estimated(), 1);
01484     TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
01485 
01486     mymset = enquire.get_mset(0, 1);
01487     TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
01488     TEST_EQUAL(mymset.get_matches_estimated(), 2);
01489     TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
01490 
01491     mymset = enquire.get_mset(0, 2);
01492     TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
01493     TEST_EQUAL(mymset.get_matches_estimated(), 2);
01494     TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
01495 
01496     myquery = query(Xapian::Query::OP_AND, "paragraph", "another");
01497     enquire.set_query(myquery);
01498     mymset = enquire.get_mset(0, 0);
01499     TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
01500     TEST_EQUAL(mymset.get_matches_estimated(), 2);
01501     TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
01502 
01503     mymset = enquire.get_mset(0, 1);
01504     TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
01505     TEST_EQUAL(mymset.get_matches_estimated(), 2);
01506     TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
01507 
01508     mymset = enquire.get_mset(0, 2);
01509     TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
01510     TEST_EQUAL(mymset.get_matches_estimated(), 1);
01511     TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
01512 
01513     mymset = enquire.get_mset(1, 20);
01514     TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
01515     TEST_EQUAL(mymset.get_matches_estimated(), 1);
01516     TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
01517 
01518     return true;
01519 }
01520 
01521 // tests that wqf affects the document weights
01522 DEFINE_TESTCASE(wqf1, backend) {
01523     // Both queries have length 2; in q1 word has wqf=2, in q2 word has wqf=1
01524     Xapian::Query q1("word", 2);
01525     Xapian::Query q2("word");
01526     Xapian::Enquire enquire(get_database("apitest_simpledata"));
01527     enquire.set_query(q1);
01528     Xapian::MSet mset1 = enquire.get_mset(0, 10);
01529     enquire.set_query(q2);
01530     Xapian::MSet mset2 = enquire.get_mset(0, 2);
01531     // Check the weights
01532     TEST(mset1.begin().get_weight() > mset2.begin().get_weight());
01533     return true;
01534 }
01535 
01536 // tests that query length affects the document weights
01537 DEFINE_TESTCASE(qlen1, backend) {
01538     Xapian::Query q1("word");
01539     Xapian::Query q2("word");
01540     Xapian::Enquire enquire(get_database("apitest_simpledata"));
01541     enquire.set_query(q1);
01542     Xapian::MSet mset1 = enquire.get_mset(0, 10);
01543     enquire.set_query(q2);
01544     Xapian::MSet mset2 = enquire.get_mset(0, 2);
01545     // Check the weights
01546     //TEST(mset1.begin().get_weight() < mset2.begin().get_weight());
01547     TEST(mset1.begin().get_weight() == mset2.begin().get_weight());
01548     return true;
01549 }
01550 
01551 // tests that opening a non-existent termlist throws the correct exception
01552 DEFINE_TESTCASE(termlist1, backend) {
01553     Xapian::Database db(get_database("apitest_onedoc"));
01554     TEST_EXCEPTION(Xapian::InvalidArgumentError,
01555                    Xapian::TermIterator t = db.termlist_begin(0));
01556     TEST_EXCEPTION(Xapian::DocNotFoundError,
01557                    Xapian::TermIterator t = db.termlist_begin(2));
01558     /* Cause the database to be used properly, showing up problems
01559      * with the link being in a bad state.  CME */
01560     Xapian::TermIterator temp = db.termlist_begin(1);
01561     TEST_EXCEPTION(Xapian::DocNotFoundError,
01562                    Xapian::TermIterator t = db.termlist_begin(999999999));
01563     return true;
01564 }
01565 
01566 // tests that a Xapian::TermIterator works as an STL iterator
01567 DEFINE_TESTCASE(termlist2, backend) {
01568     Xapian::Database db(get_database("apitest_onedoc"));
01569     Xapian::TermIterator t = db.termlist_begin(1);
01570     Xapian::TermIterator tend = db.termlist_end(1);
01571 
01572     // test operator= creates a copy which compares equal
01573     Xapian::TermIterator t_copy = t;
01574     TEST_EQUAL(t, t_copy);
01575 
01576     // test copy constructor creates a copy which compares equal
01577     Xapian::TermIterator t_clone(t);
01578     TEST_EQUAL(t, t_clone);
01579 
01580     vector<string> v(t, tend);
01581 
01582     t = db.termlist_begin(1);
01583     tend = db.termlist_end(1);
01584     vector<string>::const_iterator i;
01585     for (i = v.begin(); i != v.end(); i++) {
01586         TEST_NOT_EQUAL(t, tend);
01587         TEST_EQUAL(*i, *t);
01588         t++;
01589     }
01590     TEST_EQUAL(t, tend);
01591     return true;
01592 }
01593 
01594 static Xapian::TermIterator
01595 test_termlist3_helper()
01596 {
01597     Xapian::Database db(get_database("apitest_onedoc"));
01598     return db.termlist_begin(1);
01599 }
01600 
01601 // tests that a Xapian::TermIterator still works when the DB is deleted
01602 DEFINE_TESTCASE(termlist3, backend) {
01603     Xapian::TermIterator u = test_termlist3_helper();
01604     Xapian::Database db(get_database("apitest_onedoc"));
01605     Xapian::TermIterator t = db.termlist_begin(1);
01606     Xapian::TermIterator tend = db.termlist_end(1);
01607 
01608     while (t != tend) {
01609         TEST_EQUAL(*t, *u);
01610         t++;
01611         u++;
01612     }
01613     return true;
01614 }
01615 
01616 // tests skip_to
01617 DEFINE_TESTCASE(termlist4, backend) {
01618     Xapian::Database db(get_database("apitest_onedoc"));
01619     Xapian::TermIterator i = db.termlist_begin(1);
01620     i.skip_to("");
01621     i.skip_to("\xff");
01622     return true;
01623 }
01624 
01625 // tests punctuation is OK in terms (particularly in remote queries)
01626 DEFINE_TESTCASE(puncterms1, backend) {
01627     Xapian::Database db(get_database("apitest_punc"));
01628     Xapian::Enquire enquire(db);
01629 
01630     Xapian::Query q1("semi;colon");
01631     enquire.set_query(q1);
01632     Xapian::MSet m1 = enquire.get_mset(0, 10);
01633 
01634     Xapian::Query q2("col:on");
01635     enquire.set_query(q2);
01636     Xapian::MSet m2 = enquire.get_mset(0, 10);
01637 
01638     Xapian::Query q3("com,ma");
01639     enquire.set_query(q3);
01640     Xapian::MSet m3 = enquire.get_mset(0, 10);
01641 
01642     return true;
01643 }
01644 
01645 // test that searching for a term with a space or backslash in it works
01646 DEFINE_TESTCASE(spaceterms1, backend) {
01647     Xapian::Enquire enquire(get_database("apitest_space"));
01648     Xapian::MSet mymset;
01649     Xapian::doccount count;
01650     Xapian::MSetIterator m;
01651     Xapian::Stem stemmer("english");
01652 
01653     enquire.set_query(stemmer("space man"));
01654     mymset = enquire.get_mset(0, 10);
01655     TEST_MSET_SIZE(mymset, 1);
01656     count = 0;
01657     for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
01658     TEST_EQUAL(count, 1);
01659 
01660     for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
01661         TEST_NOT_EQUAL(mymset.begin().get_document().get_data(), "");
01662         TEST_NOT_EQUAL(mymset.begin().get_document().get_value(value_no), "");
01663     }
01664 
01665     enquire.set_query(stemmer("tab\tby"));
01666     mymset = enquire.get_mset(0, 10);
01667     TEST_MSET_SIZE(mymset, 1);
01668     count = 0;
01669     for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
01670     TEST_EQUAL(count, 1);
01671 
01672     for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
01673         string value = mymset.begin().get_document().get_value(value_no);
01674         TEST_NOT_EQUAL(value, "");
01675         if (value_no == 0) {
01676             TEST(value.size() > 262);
01677             TEST_EQUAL(static_cast<unsigned char>(value[261]), 255);
01678         }
01679     }
01680 
01681     enquire.set_query(stemmer("back\\slash"));
01682     mymset = enquire.get_mset(0, 10);
01683     TEST_MSET_SIZE(mymset, 1);
01684     count = 0;
01685     for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
01686     TEST_EQUAL(count, 1);
01687 
01688     return true;
01689 }
01690 
01691 // test that XOR queries work
01692 DEFINE_TESTCASE(xor1, backend) {
01693     Xapian::Enquire enquire(get_database("apitest_simpledata"));
01694     enquire.set_query(Xapian::Query("this"));
01695     Xapian::Stem stemmer("english");
01696 
01697     vector<string> terms;
01698     terms.push_back(stemmer("this"));
01699     terms.push_back(stemmer("word"));
01700     terms.push_back(stemmer("of"));
01701 
01702     Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
01703     enquire.set_weighting_scheme(Xapian::BoolWeight());
01704     enquire.set_query(query);
01705 
01706     Xapian::MSet mymset = enquire.get_mset(0, 10);
01707     mset_expect_order(mymset, 1, 2, 5, 6);
01708 
01709     return true;
01710 }
01711 
01712 // test Xapian::Database::get_document()
01713 DEFINE_TESTCASE(getdoc1, backend) {
01714     Xapian::Database db(get_database("apitest_onedoc"));
01715     Xapian::Document doc(db.get_document(1));
01716     TEST_EXCEPTION(Xapian::InvalidArgumentError, db.get_document(0));
01717     TEST_EXCEPTION(Xapian::DocNotFoundError, db.get_document(999999999));
01718     TEST_EXCEPTION(Xapian::DocNotFoundError, db.get_document(123456789));
01719     TEST_EXCEPTION(Xapian::DocNotFoundError, db.get_document(3));
01720     TEST_EXCEPTION(Xapian::DocNotFoundError, db.get_document(2));
01721     // Check that Document works as a handle on modification
01722     // (this was broken for the first try at Xapian::Document prior to 0.7).
01723     Xapian::Document doc2 = doc;
01724     doc.set_data("modified!");
01725     TEST_EQUAL(doc.get_data(), "modified!");
01726     TEST_EQUAL(doc.get_data(), doc2.get_data());
01727     return true;
01728 }
01729 
01730 // test whether operators with no elements work as a null query
01731 DEFINE_TESTCASE(emptyop1, backend) {
01732     Xapian::Enquire enquire(get_database("apitest_simpledata"));
01733     vector<Xapian::Query> nullvec;
01734 
01735     Xapian::Query query1(Xapian::Query::OP_XOR, nullvec.begin(), nullvec.end());
01736 
01737     enquire.set_query(query1);
01738     Xapian::MSet mymset = enquire.get_mset(0, 10);
01739     TEST_MSET_SIZE(mymset, 0);
01740     TEST_EXCEPTION(Xapian::InvalidArgumentError, enquire.get_matching_terms_begin(1));
01741 
01742     return true;
01743 }
01744 
01745 // Regression test for check_at_least SEGV when there are no matches.
01746 DEFINE_TESTCASE(checkatleast1, backend) {
01747     Xapian::Enquire enquire(get_database("apitest_simpledata"));
01748     enquire.set_query(Xapian::Query("thom"));
01749     Xapian::MSet mymset = enquire.get_mset(0, 10, 11);
01750     TEST_EQUAL(0, mymset.size());
01751 
01752     return true;
01753 }
01754 
01755 // Regression test - if check_at_least was set we returned (check_at_least - 1)
01756 // results, rather than the requested msize.  Fixed in 1.0.2.
01757 DEFINE_TESTCASE(checkatleast2, backend) {
01758     Xapian::Enquire enquire(get_database("apitest_simpledata"));
01759     enquire.set_query(Xapian::Query("paragraph"));
01760 
01761     Xapian::MSet mymset = enquire.get_mset(0, 3, 10);
01762     TEST_MSET_SIZE(mymset, 3);
01763     TEST_EQUAL(mymset.get_matches_lower_bound(), 5);
01764 
01765     mymset = enquire.get_mset(0, 2, 4);
01766     TEST_MSET_SIZE(mymset, 2);
01767     TEST_GREATER_OR_EQUAL(mymset.get_matches_lower_bound(), 4);
01768 
01769     return true;
01770 }
01771 
01772 // Feature tests - check_at_least with various sorting options.
01773 DEFINE_TESTCASE(checkatleast3, backend) {
01774     Xapian::Enquire enquire(get_database("etext"));
01775     enquire.set_query(Xapian::Query("prussian")); // 60 matches.
01776 
01777     for (int order = 0; order < 3; ++order) {
01778         switch (order) {
01779             case 0:
01780                 enquire.set_docid_order(Xapian::Enquire::ASCENDING);
01781                 break;
01782             case 1:
01783                 enquire.set_docid_order(Xapian::Enquire::DESCENDING);
01784                 break;
01785             case 2:
01786                 enquire.set_docid_order(Xapian::Enquire::DONT_CARE);
01787                 break;
01788         }
01789 
01790         for (int sort = 0; sort < 4; ++sort) {
01791             switch (sort) {
01792                 case 0:
01793                     enquire.set_sort_by_relevance();
01794                     break;
01795                 case 1:
01796                     enquire.set_sort_by_value(0);
01797                     break;
01798                 case 2:
01799                     enquire.set_sort_by_value_then_relevance(0);
01800                     break;
01801                 case 3:
01802                     enquire.set_sort_by_relevance_then_value(0);
01803                     break;
01804             }
01805 
01806             Xapian::MSet mset = enquire.get_mset(0, 100, 500);
01807             TEST_MSET_SIZE(mset, 60);
01808             TEST_EQUAL(mset.get_matches_lower_bound(), 60);
01809             TEST_EQUAL(mset.get_matches_estimated(), 60);
01810             TEST_EQUAL(mset.get_matches_upper_bound(), 60);
01811 
01812             mset = enquire.get_mset(0, 50, 100);
01813             TEST_MSET_SIZE(mset, 50);
01814             TEST_EQUAL(mset.get_matches_lower_bound(), 60);
01815             TEST_EQUAL(mset.get_matches_estimated(), 60);
01816             TEST_EQUAL(mset.get_matches_upper_bound(), 60);
01817 
01818             mset = enquire.get_mset(0, 10, 50);
01819             TEST_MSET_SIZE(mset, 10);
01820             TEST(mset.get_matches_lower_bound() >= 50);
01821         }
01822     }
01823 
01824     return true;
01825 }
01826 
01827 // tests all document postlists
01828 DEFINE_TESTCASE(allpostlist1, backend) {
01829     Xapian::Database db(get_database("apitest_manydocs"));
01830     Xapian::PostingIterator i = db.postlist_begin("");
01831     unsigned int j = 1;
01832     while (i != db.postlist_end("")) {
01833         TEST_EQUAL(*i, j);
01834         i++;
01835         j++;
01836     }
01837     TEST_EQUAL(j, 513);
01838 
01839     i = db.postlist_begin("");
01840     j = 1;
01841     while (i != db.postlist_end("")) {
01842         TEST_EQUAL(*i, j);
01843         i++;
01844         j++;
01845         if (j == 50) {
01846             j += 10;
01847             i.skip_to(j);
01848         }
01849     }
01850     TEST_EQUAL(j, 513);
01851 
01852     return true;
01853 }
01854 
01855 static void test_emptyterm1_helper(Xapian::Database & db)
01856 {
01857     // Don't bother with postlist_begin() because allpostlist tests cover that.
01858     TEST_EXCEPTION(Xapian::InvalidArgumentError, db.positionlist_begin(1, ""));
01859     TEST_EQUAL(db.get_doccount(), db.get_termfreq(""));
01860     TEST_EQUAL(db.get_doccount() != 0, db.term_exists(""));
01861     TEST_EQUAL(db.get_doccount(), db.get_collection_freq(""));
01862 }
01863 
01864 // tests results of passing an empty term to various methods
01865 DEFINE_TESTCASE(emptyterm1, backend) {
01866     Xapian::Database db(get_database("apitest_manydocs"));
01867     TEST_EQUAL(db.get_doccount(), 512);
01868     test_emptyterm1_helper(db);
01869 
01870     db = get_database("apitest_onedoc");
01871     TEST_EQUAL(db.get_doccount(), 1);
01872     test_emptyterm1_helper(db);
01873 
01874     db = get_database("");
01875     TEST_EQUAL(db.get_doccount(), 0);
01876     test_emptyterm1_helper(db);
01877 
01878     return true;
01879 }
01880 
01881 // Feature test for Query::OP_VALUE_RANGE.
01882 DEFINE_TESTCASE(valuerange1, backend) {
01883     Xapian::Database db(get_database("apitest_phrase"));
01884     Xapian::Enquire enq(db);
01885     static const char * vals[] = {
01886         "", " ", "a", "aa", "abcd", "e", "g", "h", "hzz", "i", "l", "z", NULL
01887     };
01888     for (const char **start = vals; *start; ++start) {
01889         for (const char **end = vals; *end; ++end) {
01890             Xapian::Query query(Xapian::Query::OP_VALUE_RANGE, 1, *start, *end);
01891             enq.set_query(query);
01892             Xapian::MSet mset = enq.get_mset(0, 20);
01893             // Check that documents in the MSet match the value range filter.
01894             set<Xapian::docid> matched;
01895             Xapian::MSetIterator i;
01896             for (i = mset.begin(); i != mset.end(); ++i) {
01897                 matched.insert(*i);
01898                 string value = db.get_document(*i).get_value(1);
01899                 tout << "'" << *start << "' <= '" << value << "' <= '" << *end << "'" << endl;
01900                 TEST(value >= *start);
01901                 TEST(value <= *end);
01902             }
01903             // Check that documents not in the MSet don't match the value range filter.
01904             for (Xapian::docid j = db.get_lastdocid(); j != 0; --j) {
01905                 if (matched.find(j) == matched.end()) {
01906                     string value = db.get_document(j).get_value(1);
01907                     tout << value << " < '" << *start << "' or > '" << *end << "'" << endl;
01908                     TEST(value < *start || value > *end);
01909                 }
01910             }
01911         }
01912     }
01913     return true;
01914 }
01915 
01916 // Regression test for Query::OP_VALUE_LE - used to return document IDs for
01917 // non-existent documents.
01918 DEFINE_TESTCASE(valuerange2, backend && writable) {
01919     Xapian::WritableDatabase db = get_writable_database();
01920     Xapian::Document doc;
01921     doc.set_data("5");
01922     doc.add_value(0, "5");
01923     db.replace_document(5, doc);
01924     Xapian::Enquire enq(db);
01925 
01926     Xapian::Query query(Xapian::Query::OP_VALUE_LE, 0, "6");
01927     enq.set_query(query);
01928     Xapian::MSet mset = enq.get_mset(0, 20);
01929 
01930     TEST_EQUAL(mset.size(), 1);
01931     TEST_EQUAL(*(mset[0]), 5);
01932     return true;
01933 }
01934 
01935 // Test for alldocs postlist with a sparse database.
01936 DEFINE_TESTCASE(alldocspl1, backend && writable) {
01937     Xapian::WritableDatabase db = get_writable_database();
01938     Xapian::Document doc;
01939     doc.set_data("5");
01940     doc.add_value(0, "5");
01941     db.replace_document(5, doc);
01942 
01943     Xapian::PostingIterator i = db.postlist_begin("");
01944     TEST(i != db.postlist_end(""));
01945     TEST_EQUAL(*i, 5);
01946     TEST_EQUAL(i.get_doclength(), 0);
01947     TEST_EQUAL(i.get_wdf(), 1);
01948     ++i;
01949     TEST(i == db.postlist_end(""));
01950 
01951     return true;
01952 }
01953 
01954 // Test reading and writing a modified alldocspostlist.
01955 DEFINE_TESTCASE(alldocspl2, backend && writable) {
01956     Xapian::PostingIterator i, end;
01957     {
01958         Xapian::WritableDatabase db = get_writable_database();
01959         Xapian::Document doc;
01960         doc.set_data("5");
01961         doc.add_value(0, "5");
01962         db.replace_document(5, doc);
01963 
01964         // Test iterating before flushing the changes.
01965         i = db.postlist_begin("");
01966         end = db.postlist_end("");
01967         TEST(i != end);
01968         TEST_EQUAL(*i, 5);
01969         TEST_EQUAL(i.get_doclength(), 0);
01970         TEST_EQUAL(i.get_wdf(), 1);
01971         ++i;
01972         TEST(i == end);
01973 
01974         db.flush();
01975 
01976         // Test iterating after flushing the changes.
01977         i = db.postlist_begin("");
01978         end = db.postlist_end("");
01979         TEST(i != end);
01980         TEST_EQUAL(*i, 5);
01981         TEST_EQUAL(i.get_doclength(), 0);
01982         TEST_EQUAL(i.get_wdf(), 1);
01983         ++i;
01984         TEST(i == end);
01985 
01986         // Add another document.
01987         doc = Xapian::Document();
01988         doc.set_data("5");
01989         doc.add_value(0, "7");
01990         db.replace_document(7, doc);
01991 
01992         // Test iterating through before flushing the changes.
01993         i = db.postlist_begin("");
01994         end = db.postlist_end("");
01995         TEST(i != end);
01996         TEST_EQUAL(*i, 5);
01997         TEST_EQUAL(i.get_doclength(), 0);
01998         TEST_EQUAL(i.get_wdf(), 1);
01999         ++i;
02000         TEST(i != end);
02001         TEST_EQUAL(*i, 7);
02002         TEST_EQUAL(i.get_doclength(), 0);
02003         TEST_EQUAL(i.get_wdf(), 1);
02004         ++i;
02005         TEST(i == end);
02006 
02007         // Delete the first document.
02008         db.delete_document(5);
02009 
02010         // Test iterating through before flushing the changes.
02011         i = db.postlist_begin("");
02012         end = db.postlist_end("");
02013         TEST(i != end);
02014         TEST_EQUAL(*i, 7);
02015         TEST_EQUAL(i.get_doclength(), 0);
02016         TEST_EQUAL(i.get_wdf(), 1);
02017         ++i;
02018         TEST(i == end);
02019 
02020         // Test iterating through after flushing the changes, and dropping the reference to the main DB.
02021         db.flush();
02022         i = db.postlist_begin("");
02023         end = db.postlist_end("");
02024     }
02025 
02026     TEST(i != end);
02027     TEST_EQUAL(*i, 7);
02028     TEST_EQUAL(i.get_doclength(), 0);
02029     TEST_EQUAL(i.get_wdf(), 1);
02030     ++i;
02031     TEST(i == end);
02032 
02033     return true;
02034 }
02035 
02036 // Feature test for Query::OP_VALUE_GE.
02037 DEFINE_TESTCASE(valuege1, backend) {
02038     Xapian::Database db(get_database("apitest_phrase"));
02039     Xapian::Enquire enq(db);
02040     static const char * vals[] = {
02041         "", " ", "a", "aa", "abcd", "e", "g", "h", "hzz", "i", "l", "z", NULL
02042     };
02043     for (const char **start = vals; *start; ++start) {
02044         Xapian::Query query(Xapian::Query::OP_VALUE_GE, 1, *start);
02045         enq.set_query(query);
02046         Xapian::MSet mset = enq.get_mset(0, 20);
02047         // Check that documents in the MSet match the value range filter.
02048         set<Xapian::docid> matched;
02049         Xapian::MSetIterator i;
02050         for (i = mset.begin(); i != mset.end(); ++i) {
02051             matched.insert(*i);
02052             string value = db.get_document(*i).get_value(1);
02053             tout << "'" << *start << "' <= '" << value << "'" << endl;
02054             TEST(value >= *start);
02055         }
02056         // Check that documents not in the MSet don't match the value range filter.
02057         for (Xapian::docid j = db.get_lastdocid(); j != 0; --j) {
02058             if (matched.find(j) == matched.end()) {
02059                 string value = db.get_document(j).get_value(1);
02060                 tout << value << " < '" << *start << "'" << endl;
02061                 TEST(value < *start);
02062             }
02063         }
02064     }
02065     return true;
02066 }
02067 
02068 // Feature test for Query::OP_VALUE_LE.
02069 DEFINE_TESTCASE(valuele1, backend) {
02070     Xapian::Database db(get_database("apitest_phrase"));
02071     Xapian::Enquire enq(db);
02072     static const char * vals[] = {
02073         "", " ", "a", "aa", "abcd", "e", "g", "h", "hzz", "i", "l", "z", NULL
02074     };
02075     for (const char **end = vals; *end; ++end) {
02076         Xapian::Query query(Xapian::Query::OP_VALUE_LE, 1, *end);
02077         enq.set_query(query);
02078         Xapian::MSet mset = enq.get_mset(0, 20);
02079         // Check that documents in the MSet match the value range filter.
02080         set<Xapian::docid> matched;
02081         Xapian::MSetIterator i;
02082         for (i = mset.begin(); i != mset.end(); ++i) {
02083             matched.insert(*i);
02084             string value = db.get_document(*i).get_value(1);
02085             tout << "'" << *end << "' <= '" << value << "'" << endl;
02086             TEST(value <= *end);
02087         }
02088         // Check that documents not in the MSet don't match the value range filter.
02089         for (Xapian::docid j = db.get_lastdocid(); j != 0; --j) {
02090             if (matched.find(j) == matched.end()) {
02091                 string value = db.get_document(j).get_value(1);
02092                 tout << value << " < '" << *end << "'" << endl;
02093                 TEST(value > *end);
02094             }
02095         }
02096     }
02097     return true;
02098 }
02099 
02100 // Feature test for Query::OP_SCALE_WEIGHT.
02101 DEFINE_TESTCASE(scaleweight1, backend) {
02102     Xapian::Database db(get_database("apitest_phrase"));
02103     Xapian::Enquire enq(db);
02104     Xapian::QueryParser qp;
02105 
02106     static const char * queries[] = {
02107         "pad",
02108         "milk fridge",
02109         "leave milk on fridge",
02110         "ordered milk operator",
02111         "ordered phrase operator",
02112         "leave \"milk on fridge\"",
02113         "notpresent",
02114         "leave \"milk notpresent\"",
02115         NULL
02116     };
02117     static double multipliers[] = {
02118         -1000000, -2.5, -1, -0.5, 0, 0.5, 1, 2.5, 1000000,
02119         0, 0
02120     };
02121 
02122     for (const char **qstr = queries; *qstr; ++qstr) {
02123         Xapian::Query query1 = qp.parse_query(*qstr);
02124         tout << "query1: " << query1.get_description() << endl;
02125         for (double *multp = multipliers; multp[0] != multp[1]; ++multp) {
02126             double mult = *multp;
02127             if (mult < 0) {
02128                 TEST_EXCEPTION(Xapian::InvalidArgumentError,
02129                                Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT,
02130                                              query1, mult));
02131                 continue;
02132             }
02133             Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, mult);
02134             tout << "query2: " << query2.get_description() << endl;
02135 
02136             enq.set_query(query1);
02137             Xapian::MSet mset1 = enq.get_mset(0, 20);
02138             enq.set_query(query2);
02139             Xapian::MSet mset2 = enq.get_mset(0, 20);
02140 
02141             TEST_EQUAL(mset1.size(), mset2.size());
02142 
02143             Xapian::MSetIterator i1, i2;
02144             if (mult > 0) {
02145                 for (i1 = mset1.begin(), i2 = mset2.begin();
02146                      i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
02147                     TEST_EQUAL_DOUBLE(i1.get_weight() * mult, i2.get_weight());
02148                     TEST_EQUAL(*i1, *i2);
02149                 }
02150             } else {
02151                 // Weights in mset2 are 0; so it should be sorted by docid.
02152                 vector<Xapian::docid> ids1;
02153                 vector<Xapian::docid> ids2;
02154                 for (i1 = mset1.begin(), i2 = mset2.begin();
02155                      i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
02156                     TEST_NOT_EQUAL_DOUBLE(i1.get_weight(), 0);
02157                     TEST_EQUAL_DOUBLE(i2.get_weight(), 0);
02158                     ids1.push_back(*i1);
02159                     ids2.push_back(*i2);
02160                 }
02161                 sort(ids1.begin(), ids1.end());
02162                 TEST_EQUAL(ids1, ids2);
02163             }
02164         }
02165     }
02166     return true;
02167 }
02168 
02169 // Test Query::OP_SCALE_WEIGHT being used to multiply some of the weights of a
02170 // search by zero.
02171 DEFINE_TESTCASE(scaleweight2, backend) {
02172     Xapian::Database db(get_database("apitest_phrase"));
02173     Xapian::Enquire enq(db);
02174     Xapian::MSetIterator i;
02175 
02176     Xapian::Query query1("fridg");
02177     Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, 2.5);
02178     Xapian::Query query3("milk");
02179     Xapian::Query query4(Xapian::Query::OP_SCALE_WEIGHT, query3, 0);
02180     Xapian::Query query5(Xapian::Query::OP_OR, query2, query4);
02181 
02182     // query5 should first return the same results as query1, in the same
02183     // order, and then return the results of query3 which aren't also results
02184     // of query1, in ascending docid order.  We test that this happens.
02185 
02186     // First, build a vector of docids matching the first part of the query,
02187     // and append the non-duplicate docids matching the second part of the
02188     // query.
02189     vector<Xapian::docid> ids1;
02190     set<Xapian::docid> idsin1;
02191     vector<Xapian::docid> ids3;
02192 
02193     enq.set_query(query1);
02194     Xapian::MSet mset1 = enq.get_mset(0, 20);
02195     enq.set_query(query3);
02196     Xapian::MSet mset3 = enq.get_mset(0, 20);
02197     TEST_NOT_EQUAL(mset1.size(), 0);
02198     for (i = mset1.begin(); i != mset1.end(); ++i) {
02199         ids1.push_back(*i);
02200         idsin1.insert(*i);
02201     }
02202     TEST_NOT_EQUAL(mset3.size(), 0);
02203     for (i = mset3.begin(); i != mset3.end(); ++i) {
02204         if (idsin1.find(*i) != idsin1.end())
02205             continue;
02206         ids3.push_back(*i);
02207     }
02208     sort(ids3.begin(), ids3.end());
02209     ids1.insert(ids1.end(), ids3.begin(), ids3.end());
02210 
02211     // Now, run the combined query and build a vector of the matching docids.
02212     vector<Xapian::docid> ids5;
02213     enq.set_query(query5);
02214     Xapian::MSet mset5 = enq.get_mset(0, 20);
02215     for (i = mset5.begin(); i != mset5.end(); ++i) {
02216         ids5.push_back(*i);
02217     }
02218 
02219     TEST_EQUAL(ids1, ids5);
02220     return true;
02221 }
02222 
02223 // Regression test for bug fixed in 1.0.5 - this test would failed under
02224 // valgrind because it used an uninitialised value.
02225 DEFINE_TESTCASE(bm25weight1, backend) {
02226     Xapian::Enquire enquire(get_database("apitest_simpledata"));
02227     enquire.set_weighting_scheme(Xapian::BM25Weight(1, 25, 1, 0.01, 0.5));
02228     enquire.set_query(Xapian::Query("word") );
02229 
02230     Xapian::MSet mset = enquire.get_mset(0, 25);
02231 
02232     return true;
02233 }
02234 
02235 // Feature test for TradWeight.
02236 DEFINE_TESTCASE(tradweight1, backend) {
02237     Xapian::Enquire enquire(get_database("apitest_simpledata"));
02238     enquire.set_weighting_scheme(Xapian::TradWeight());
02239     enquire.set_query(Xapian::Query("word") );
02240 
02241     Xapian::MSet mset = enquire.get_mset(0, 25);
02242 
02243     enquire.set_weighting_scheme(Xapian::TradWeight(0));
02244     enquire.set_query(Xapian::Query("word") );
02245 
02246     mset = enquire.get_mset(0, 25);
02247     // FIXME: should check that TradWeight(0) means wdf and doc length really
02248     // don't affect the weights as stated in the documentation.
02249 
02250     return true;
02251 }

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.