00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef OM_HGUARD_FLINT_UTILS_H
00024 #define OM_HGUARD_FLINT_UTILS_H
00025
00026 #include <xapian/types.h>
00027
00028 #include <string>
00029
00030 using namespace std;
00031
00033 #define CASSERT(a) {char assert[(a) ? 1 : -1];(void)assert;}
00034
00036 #define CASSERT_TYPE_UNSIGNED(T) CASSERT(static_cast<T>(-1) > 0)
00037
00038 typedef unsigned char om_byte;
00039 typedef unsigned int om_uint32;
00040 typedef int om_int32;
00041
00066 template<class T>
00067 bool
00068 unpack_uint(const char ** src,
00069 const char * src_end,
00070 T * resultptr)
00071 {
00072
00073 CASSERT_TYPE_UNSIGNED(T);
00074
00075
00076 CASSERT(sizeof(om_byte) == 1);
00077
00078 unsigned int shift = 0;
00079 T result = 0;
00080
00081 while (true) {
00082 if ((*src) == src_end) {
00083 *src = 0;
00084 return false;
00085 }
00086
00087 om_byte part = static_cast<om_byte>(**src);
00088 (*src)++;
00089
00090
00091 if (((shift > (sizeof(T) - 1) * 8 + 1) &&
00092 ((part & 0x7f) << (shift % 8)) >= 0x100) ||
00093 (shift >= sizeof(T) * 8)) {
00094
00095 while (true) {
00096 if ((part & 0x80) == 0) return false;
00097 if ((*src) == src_end) {
00098 *src = 0;
00099 return false;
00100 }
00101 part = static_cast<om_byte>(**src);
00102 (*src)++;
00103 }
00104 }
00105
00106 result += T(part & 0x7f) << shift;
00107 shift += 7;
00108
00109 if ((part & 0x80) == 0) {
00110 if (resultptr) *resultptr = result;
00111 return true;
00112 }
00113 }
00114 }
00115
00116
00123 template<class T>
00124 string
00125 pack_uint(T value)
00126 {
00127
00128 CASSERT_TYPE_UNSIGNED(T);
00129
00130 if (value == 0) return string("", 1u);
00131 string result;
00132
00133 while (value != 0) {
00134 om_byte part = static_cast<om_byte>(value & 0x7f);
00135 value = value >> 7;
00136 if (value) part |= 0x80;
00137 result.append(1u, char(part));
00138 }
00139
00140 return result;
00141 }
00142
00151 template<>
00152 inline string
00153 pack_uint<bool>(bool value)
00154 {
00155 return string(1, static_cast<char>(value));
00156 }
00157
00175 template<class T>
00176 bool
00177 unpack_uint_last(const char ** src, const char * src_end, T * resultptr)
00178 {
00179
00180 CASSERT_TYPE_UNSIGNED(T);
00181
00182 CASSERT(sizeof(om_byte) == 1);
00183
00184 if (src_end - *src > int(sizeof(T))) {
00185
00186 *src = src_end;
00187 return false;
00188 }
00189
00190 T result = 0;
00191 int shift = 0;
00192 while (*src != src_end) {
00193 result |= static_cast<T>(static_cast<om_byte>(**src)) << shift;
00194 ++(*src);
00195 shift += 8;
00196 }
00197 *resultptr = result;
00198 return true;
00199 }
00200
00210 template<class T>
00211 string
00212 pack_uint_last(T value)
00213 {
00214
00215 CASSERT_TYPE_UNSIGNED(T);
00216
00217 string result;
00218 while (value) {
00219 result += char(value);
00220 value >>= 8;
00221 }
00222 return result;
00223 }
00224
00235 template<class T>
00236 string
00237 pack_uint_preserving_sort(T value)
00238 {
00239
00240 CASSERT_TYPE_UNSIGNED(T);
00241
00242 string result;
00243 while (value != 0) {
00244 om_byte part = static_cast<om_byte>(value & 0xff);
00245 value = value >> 8;
00246 result.insert(string::size_type(0), 1u, char(part));
00247 }
00248 result.insert(string::size_type(0), 1u, char(result.size()));
00249 return result;
00250 }
00251
00271 template<class T>
00272 bool
00273 unpack_uint_preserving_sort(const char ** src,
00274 const char * src_end,
00275 T * resultptr)
00276 {
00277 if (*src == src_end) {
00278 *src = 0;
00279 return false;
00280 }
00281
00282 unsigned int length = static_cast<om_byte>(**src);
00283 (*src)++;
00284
00285 if (length > sizeof(T)) {
00286 *src += length;
00287 if (*src > src_end) {
00288 *src = 0;
00289 }
00290 return false;
00291 }
00292
00293
00294 T result = 0;
00295 while (length > 0) {
00296 result = result << 8;
00297 result += static_cast<om_byte>(**src);
00298 (*src)++;
00299 length--;
00300 }
00301 *resultptr = result;
00302
00303 return true;
00304 }
00305
00306 inline bool
00307 unpack_string(const char ** src,
00308 const char * src_end,
00309 string & result)
00310 {
00311 string::size_type length;
00312 if (!unpack_uint(src, src_end, &length)) {
00313 return false;
00314 }
00315
00316 if (src_end - *src < 0 ||
00317 string::size_type(src_end - *src) < length) {
00318 src = 0;
00319 return false;
00320 }
00321
00322 result = string(*src, length);
00323 *src += length;
00324 return true;
00325 }
00326
00327 inline string
00328 pack_string(string value)
00329 {
00330 return pack_uint(value.size()) + value;
00331 }
00332
00338 inline string
00339 pack_string_preserving_sort(string value)
00340 {
00341 string::size_type i = 0, j;
00342 while ((j = value.find('\0', i)) != string::npos) {
00343 value.replace(j, 1, "\0\xff", 2);
00344 i = j + 2;
00345 }
00346 value += '\0';
00347 return value + '\0';
00348 }
00349
00350 inline bool
00351 unpack_string_preserving_sort(const char ** src,
00352 const char * src_end,
00353 string & result)
00354 {
00355 result = "";
00356 while (*src < src_end) {
00357 const char *begin = *src;
00358 while (**src) {
00359 ++(*src);
00360 if (*src == src_end) return false;
00361 }
00362 result += string(begin, *src - begin);
00363 ++(*src);
00364 if (*src == src_end) return false;
00365 if (**src != '\xff') {
00366 ++(*src);
00367 return true;
00368 }
00369 result += '\0';
00370 ++(*src);
00371 }
00372 return false;
00373 }
00374
00375 inline bool
00376 unpack_bool(const char ** src,
00377 const char * src_end,
00378 bool * resultptr)
00379 {
00380 if (*src == src_end) {
00381 *src = 0;
00382 return false;
00383 }
00384 switch (*((*src)++)) {
00385 case '0':
00386 if (resultptr) *resultptr = false;
00387 return true;
00388 case '1':
00389 if (resultptr) *resultptr = true;
00390 return true;
00391 }
00392 *src = 0;
00393 return false;
00394 }
00395
00396 inline string
00397 pack_bool(bool value)
00398 {
00399 return value ? "1" : "0";
00400 }
00401
00405 inline string
00406 flint_docid_to_key(Xapian::docid did)
00407 {
00408 return pack_uint_preserving_sort(did);
00409 }
00410
00411 #endif