languages/steminternal.h

Go to the documentation of this file.
00001 
00004 /* Copyright (C) 2007 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00019  */
00020 
00021 #ifndef XAPIAN_INCLUDED_STEMINTERNAL_H
00022 #define XAPIAN_INCLUDED_STEMINTERNAL_H
00023 
00024 #include <xapian/base.h>
00025 #include <xapian/stem.h>
00026 
00027 #include <stdlib.h>
00028 #include <string>
00029 
00030 // FIXME: we might want to make Stem::Internal a virtual base class and have
00031 // Stem::Internal::Snowball to allow for non-Snowball stemmers...
00032 
00033 typedef unsigned char symbol;
00034 
00035 #define HEAD 2*sizeof(int)
00036 
00037 // Cast via (void*) to avoid warnings about alignment (the pointers *are*
00038 // appropriately aligned).
00039 #define SIZE(P)        ((const int *)(const void *)(P))[-1]
00040 #define SET_SIZE(P, N) ((int *)(void *)(P))[-1] = N
00041 #define CAPACITY(P)    ((const int *)(const void *)(P))[-2]
00042 #define SET_CAPACITY(P, N) ((int *)(void *)(P))[-2] = N
00043 
00044 typedef int (*among_function)(Xapian::Stem::Internal *);
00045 
00046 struct among {
00047     int s_size;         /* length of search string (in symbols) */
00048     const symbol * s;   /* search string */
00049     int substring_i;    /* index to longest matching substring */
00050     int result;         /* result of the lookup */
00051 };
00052 
00053 extern symbol * create_s();
00054 
00055 inline void lose_s(symbol * p) {
00056     if (p) free((char *) p - HEAD);
00057 }
00058 
00059 extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
00060 
00061 namespace Xapian {
00062 
00063 class Stem::Internal : public Xapian::Internal::RefCntBase {
00064     int slice_check();
00065 
00066   protected:
00067     symbol * p;
00068     int c, l, lb, bra, ket;
00069 
00070     int get_utf8(int * slot);
00071     int get_b_utf8(int * slot);
00072 
00073     int in_grouping_U(const unsigned char * s, int min, int max, int repeat);
00074     int in_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
00075     int out_grouping_U(const unsigned char * s, int min, int max, int repeat);
00076     int out_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
00077 
00078     int eq_s(int s_size, const symbol * s);
00079     int eq_s_b(int s_size, const symbol * s);
00080     int eq_v(const symbol * v) { return eq_s(SIZE(v), v); }
00081     int eq_v_b(const symbol * v) { return eq_s_b(SIZE(v), v); }
00082 
00083     int find_among(const struct among * v, int v_size, const unsigned char * fnum, const among_function * f);
00084     int find_among_b(const struct among * v, int v_size, const unsigned char * fnum, const among_function * f);
00085 
00086     int replace_s(int c_bra, int c_ket, int s_size, const symbol * s);
00087     int slice_from_s(int s_size, const symbol * s);
00088     int slice_from_v(const symbol * v) { return slice_from_s(SIZE(v), v); }
00089 
00090     int slice_del() { return slice_from_s(0, 0); }
00091 
00092     void insert_s(int c_bra, int c_ket, int s_size, const symbol * s);
00093     void insert_v(int c_bra, int c_ket, const symbol * v) {
00094         insert_s(c_bra, c_ket, SIZE(v), v);
00095     }
00096 
00097     symbol * slice_to(symbol * v);
00098     symbol * assign_to(symbol * v);
00099 
00100 #if 0
00101     void debug(int number, int line_count);
00102 #endif
00103 
00104   public:
00106     Internal();
00107 
00109     virtual ~Internal();
00110 
00112     std::string operator()(const std::string & word);
00113 
00115     virtual int stem() = 0;
00116 
00118     virtual const char * get_description() const = 0;
00119 };
00120 
00121 }
00122 
00123 #endif // XAPIAN_INCLUDED_STEMINTERNAL_H

Documentation for Xapian (version 1.0.10).
Generated on 24 Dec 2008 by Doxygen 1.5.2.