00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include <xapian/document.h>
00024 #include <xapian/error.h>
00025 #include <xapian/termiterator.h>
00026
00027 #include "flint_termlisttable.h"
00028 #include "flint_utils.h"
00029 #include "omassert.h"
00030 #include "omdebug.h"
00031 #include "stringutils.h"
00032 #include "utils.h"
00033
00034 #include <string>
00035
00036 using namespace std;
00037
00038 void
00039 FlintTermListTable::set_termlist(Xapian::docid did,
00040 const Xapian::Document & doc,
00041 flint_doclen_t doclen)
00042 {
00043 DEBUGCALL(DB, void, "FlintTermListTable::set_termlist",
00044 did << ", " << doc << ", " << doclen);
00045
00046 string tag = pack_uint(doclen);
00047
00048 Xapian::doccount termlist_size = doc.termlist_count();
00049 if (termlist_size == 0) {
00050
00051 Assert(doclen == 0);
00052 Assert(doc.termlist_begin() == doc.termlist_end());
00053 add(flint_docid_to_key(did), string());
00054 return;
00055 }
00056
00057 Xapian::TermIterator t = doc.termlist_begin();
00058 if (t != doc.termlist_end()) {
00059 tag += pack_uint(termlist_size);
00060 string prev_term = *t;
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072 if (prev_term.size() == '0') tag += '0';
00073
00074 tag += prev_term.size();
00075 tag += prev_term;
00076 tag += pack_uint(t.get_wdf());
00077 --termlist_size;
00078
00079 while (++t != doc.termlist_end()) {
00080 const string & term = *t;
00081
00082
00083
00084 size_t reuse = common_prefix_length(prev_term, term);
00085
00086
00087
00088
00089
00090
00091
00092
00093 size_t packed = 0;
00094 Xapian::termcount wdf = t.get_wdf();
00095
00096
00097
00098 if (wdf < 127)
00099 packed = (wdf + 1) * (prev_term.size() + 1) + reuse;
00100
00101 if (packed && packed < 256) {
00102
00103 tag += char(packed);
00104 tag += char(term.size() - reuse);
00105 tag.append(term.data() + reuse, term.size() - reuse);
00106 } else {
00107 tag += char(reuse);
00108 tag += char(term.size() - reuse);
00109 tag.append(term.data() + reuse, term.size() - reuse);
00110
00111
00112 tag += pack_uint(wdf);
00113 }
00114
00115 prev_term = *t;
00116 --termlist_size;
00117 }
00118 }
00119 Assert(termlist_size == 0);
00120 add(flint_docid_to_key(did), tag);
00121 }
00122
00123 flint_doclen_t
00124 FlintTermListTable::get_doclength(Xapian::docid did) const
00125 {
00126 DEBUGCALL(DB, flint_doclen_t, "FlintTermListTable::get_doclength", did);
00127
00128 string tag;
00129 if (!get_exact_entry(flint_docid_to_key(did), tag))
00130 throw Xapian::DocNotFoundError("No termlist found for document " +
00131 om_tostring(did));
00132
00133 if (tag.empty()) RETURN(0);
00134
00135 const char * pos = tag.data();
00136 const char * end = pos + tag.size();
00137
00138 flint_doclen_t doclen;
00139 if (!unpack_uint(&pos, end, &doclen)) {
00140 const char *msg;
00141 if (pos == 0) {
00142 msg = "Too little data for doclen in termlist";
00143 } else {
00144 msg = "Overflowed value for doclen in termlist";
00145 }
00146 throw Xapian::DatabaseCorruptError(msg);
00147 }
00148
00149 RETURN(doclen);
00150 }