00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef XAPIAN_INCLUDED_STEMINTERNAL_H
00022 #define XAPIAN_INCLUDED_STEMINTERNAL_H
00023
00024 #include <xapian/base.h>
00025 #include <xapian/stem.h>
00026
00027 #include <stdlib.h>
00028 #include <string>
00029
00030
00031
00032
00033 typedef unsigned char symbol;
00034
00035 #define HEAD 2*sizeof(int)
00036
00037
00038
00039 #define SIZE(P) ((const int *)(const void *)(P))[-1]
00040 #define SET_SIZE(P, N) ((int *)(void *)(P))[-1] = N
00041 #define CAPACITY(P) ((const int *)(const void *)(P))[-2]
00042 #define SET_CAPACITY(P, N) ((int *)(void *)(P))[-2] = N
00043
00044 typedef int (*among_function)(Xapian::Stem::Internal *);
00045
00046 struct among {
00047 int s_size;
00048 const symbol * s;
00049 int substring_i;
00050 int result;
00051 };
00052
00053 extern symbol * create_s();
00054
00055 inline void lose_s(symbol * p) {
00056 if (p) free((char *) p - HEAD);
00057 }
00058
00059 extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
00060
00061 namespace Xapian {
00062
00063 class Stem::Internal : public Xapian::Internal::RefCntBase {
00064 int slice_check();
00065
00066 protected:
00067 symbol * p;
00068 int c, l, lb, bra, ket;
00069
00070 int get_utf8(int * slot);
00071 int get_b_utf8(int * slot);
00072
00073 int in_grouping_U(const unsigned char * s, int min, int max, int repeat);
00074 int in_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
00075 int out_grouping_U(const unsigned char * s, int min, int max, int repeat);
00076 int out_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
00077
00078 int eq_s(int s_size, const symbol * s);
00079 int eq_s_b(int s_size, const symbol * s);
00080 int eq_v(const symbol * v) { return eq_s(SIZE(v), v); }
00081 int eq_v_b(const symbol * v) { return eq_s_b(SIZE(v), v); }
00082
00083 int find_among(const struct among * v, int v_size, const unsigned char * fnum, const among_function * f);
00084 int find_among_b(const struct among * v, int v_size, const unsigned char * fnum, const among_function * f);
00085
00086 int replace_s(int c_bra, int c_ket, int s_size, const symbol * s);
00087 int slice_from_s(int s_size, const symbol * s);
00088 int slice_from_v(const symbol * v) { return slice_from_s(SIZE(v), v); }
00089
00090 int slice_del() { return slice_from_s(0, 0); }
00091
00092 void insert_s(int c_bra, int c_ket, int s_size, const symbol * s);
00093 void insert_v(int c_bra, int c_ket, const symbol * v) {
00094 insert_s(c_bra, c_ket, SIZE(v), v);
00095 }
00096
00097 symbol * slice_to(symbol * v);
00098 symbol * assign_to(symbol * v);
00099
00100 #if 0
00101 void debug(int number, int line_count);
00102 #endif
00103
00104 public:
00106 Internal();
00107
00109 virtual ~Internal();
00110
00112 std::string operator()(const std::string & word);
00113
00115 virtual int stem() = 0;
00116
00118 virtual const char * get_description() const = 0;
00119 };
00120
00121 }
00122
00123 #endif // XAPIAN_INCLUDED_STEMINTERNAL_H