00001
00002
00003 #include <limits.h>
00004 #include "swedish.h"
00005
00006 static const symbol s_0_0[1] = { 'a' };
00007 static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' };
00008 static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' };
00009 static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' };
00010 static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' };
00011 static const symbol s_0_5[2] = { 'a', 'd' };
00012 static const symbol s_0_6[1] = { 'e' };
00013 static const symbol s_0_7[3] = { 'a', 'd', 'e' };
00014 static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' };
00015 static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' };
00016 static const symbol s_0_10[3] = { 'a', 'r', 'e' };
00017 static const symbol s_0_11[4] = { 'a', 's', 't', 'e' };
00018 static const symbol s_0_12[2] = { 'e', 'n' };
00019 static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' };
00020 static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' };
00021 static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' };
00022 static const symbol s_0_16[3] = { 'e', 'r', 'n' };
00023 static const symbol s_0_17[2] = { 'a', 'r' };
00024 static const symbol s_0_18[2] = { 'e', 'r' };
00025 static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' };
00026 static const symbol s_0_20[2] = { 'o', 'r' };
00027 static const symbol s_0_21[1] = { 's' };
00028 static const symbol s_0_22[2] = { 'a', 's' };
00029 static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' };
00030 static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' };
00031 static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' };
00032 static const symbol s_0_26[2] = { 'e', 's' };
00033 static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' };
00034 static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' };
00035 static const symbol s_0_29[3] = { 'e', 'n', 's' };
00036 static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' };
00037 static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' };
00038 static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' };
00039 static const symbol s_0_33[2] = { 'a', 't' };
00040 static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' };
00041 static const symbol s_0_35[3] = { 'h', 'e', 't' };
00042 static const symbol s_0_36[3] = { 'a', 's', 't' };
00043
00044 static const struct among a_0[37] =
00045 {
00046 { 1, s_0_0, -1, 1},
00047 { 4, s_0_1, 0, 1},
00048 { 4, s_0_2, 0, 1},
00049 { 7, s_0_3, 2, 1},
00050 { 4, s_0_4, 0, 1},
00051 { 2, s_0_5, -1, 1},
00052 { 1, s_0_6, -1, 1},
00053 { 3, s_0_7, 6, 1},
00054 { 4, s_0_8, 6, 1},
00055 { 4, s_0_9, 6, 1},
00056 { 3, s_0_10, 6, 1},
00057 { 4, s_0_11, 6, 1},
00058 { 2, s_0_12, -1, 1},
00059 { 5, s_0_13, 12, 1},
00060 { 4, s_0_14, 12, 1},
00061 { 5, s_0_15, 12, 1},
00062 { 3, s_0_16, -1, 1},
00063 { 2, s_0_17, -1, 1},
00064 { 2, s_0_18, -1, 1},
00065 { 5, s_0_19, 18, 1},
00066 { 2, s_0_20, -1, 1},
00067 { 1, s_0_21, -1, 2},
00068 { 2, s_0_22, 21, 1},
00069 { 5, s_0_23, 22, 1},
00070 { 5, s_0_24, 22, 1},
00071 { 5, s_0_25, 22, 1},
00072 { 2, s_0_26, 21, 1},
00073 { 4, s_0_27, 26, 1},
00074 { 5, s_0_28, 26, 1},
00075 { 3, s_0_29, 21, 1},
00076 { 5, s_0_30, 29, 1},
00077 { 6, s_0_31, 29, 1},
00078 { 4, s_0_32, 21, 1},
00079 { 2, s_0_33, -1, 1},
00080 { 5, s_0_34, -1, 1},
00081 { 3, s_0_35, -1, 1},
00082 { 3, s_0_36, -1, 1}
00083 };
00084
00085 static const symbol s_1_0[2] = { 'd', 'd' };
00086 static const symbol s_1_1[2] = { 'g', 'd' };
00087 static const symbol s_1_2[2] = { 'n', 'n' };
00088 static const symbol s_1_3[2] = { 'd', 't' };
00089 static const symbol s_1_4[2] = { 'g', 't' };
00090 static const symbol s_1_5[2] = { 'k', 't' };
00091 static const symbol s_1_6[2] = { 't', 't' };
00092
00093 static const struct among a_1[7] =
00094 {
00095 { 2, s_1_0, -1, -1},
00096 { 2, s_1_1, -1, -1},
00097 { 2, s_1_2, -1, -1},
00098 { 2, s_1_3, -1, -1},
00099 { 2, s_1_4, -1, -1},
00100 { 2, s_1_5, -1, -1},
00101 { 2, s_1_6, -1, -1}
00102 };
00103
00104 static const symbol s_2_0[2] = { 'i', 'g' };
00105 static const symbol s_2_1[3] = { 'l', 'i', 'g' };
00106 static const symbol s_2_2[3] = { 'e', 'l', 's' };
00107 static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' };
00108 static const symbol s_2_4[5] = { 'l', 0xC3, 0xB6, 's', 't' };
00109
00110 static const struct among a_2[5] =
00111 {
00112 { 2, s_2_0, -1, 1},
00113 { 3, s_2_1, 0, 1},
00114 { 3, s_2_2, -1, 1},
00115 { 5, s_2_3, -1, 3},
00116 { 5, s_2_4, -1, 2}
00117 };
00118
00119 static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
00120
00121 static const unsigned char g_s_ending[] = { 119, 127, 149 };
00122
00123 static const symbol s_0[] = { 'l', 0xC3, 0xB6, 's' };
00124 static const symbol s_1[] = { 'f', 'u', 'l', 'l' };
00125
00126 int Xapian::InternalStemSwedish::r_mark_regions() {
00127 I_p1 = l;
00128 { int c_test1 = c;
00129 { int ret = skip_utf8(p, c, 0, l, + 3);
00130 if (ret < 0) return 0;
00131 c = ret;
00132 }
00133 I_x = c;
00134 c = c_test1;
00135 }
00136 if (out_grouping_U(g_v, 97, 246, 1) < 0) return 0;
00137 { int ret = in_grouping_U(g_v, 97, 246, 1);
00138 if (ret < 0) return 0;
00139 c += ret;
00140 }
00141 I_p1 = c;
00142
00143 if (!(I_p1 < I_x)) goto lab0;
00144 I_p1 = I_x;
00145 lab0:
00146 return 1;
00147 }
00148
00149 int Xapian::InternalStemSwedish::r_main_suffix() {
00150 int among_var;
00151 { int m1 = l - c; (void)m1;
00152 int mlimit1;
00153 if (c < I_p1) return 0;
00154 c = I_p1;
00155 mlimit1 = lb; lb = c;
00156 c = l - m1;
00157 ket = c;
00158 if (c <= lb || p[c - 1] >> 5 != 3 || !((1851442 >> (p[c - 1] & 0x1f)) & 1)) { lb = mlimit1; return 0; }
00159 among_var = find_among_b(a_0, 37, 0, 0);
00160 if (!(among_var)) { lb = mlimit1; return 0; }
00161 bra = c;
00162 lb = mlimit1;
00163 }
00164 switch(among_var) {
00165 case 0: return 0;
00166 case 1:
00167 if (slice_del() == -1) return -1;
00168 break;
00169 case 2:
00170 if (in_grouping_b_U(g_s_ending, 98, 121, 0)) return 0;
00171 if (slice_del() == -1) return -1;
00172 break;
00173 }
00174 return 1;
00175 }
00176
00177 int Xapian::InternalStemSwedish::r_consonant_pair() {
00178 { int m1 = l - c; (void)m1;
00179 int mlimit1;
00180 if (c < I_p1) return 0;
00181 c = I_p1;
00182 mlimit1 = lb; lb = c;
00183 c = l - m1;
00184 { int m2 = l - c; (void)m2;
00185 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((1064976 >> (p[c - 1] & 0x1f)) & 1)) { lb = mlimit1; return 0; }
00186 if (!(find_among_b(a_1, 7, 0, 0))) { lb = mlimit1; return 0; }
00187 c = l - m2;
00188 ket = c;
00189 { int ret = skip_utf8(p, c, lb, 0, -1);
00190 if (ret < 0) { lb = mlimit1; return 0; }
00191 c = ret;
00192 }
00193 bra = c;
00194 if (slice_del() == -1) return -1;
00195 }
00196 lb = mlimit1;
00197 }
00198 return 1;
00199 }
00200
00201 int Xapian::InternalStemSwedish::r_other_suffix() {
00202 int among_var;
00203 { int m1 = l - c; (void)m1;
00204 int mlimit1;
00205 if (c < I_p1) return 0;
00206 c = I_p1;
00207 mlimit1 = lb; lb = c;
00208 c = l - m1;
00209 ket = c;
00210 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((1572992 >> (p[c - 1] & 0x1f)) & 1)) { lb = mlimit1; return 0; }
00211 among_var = find_among_b(a_2, 5, 0, 0);
00212 if (!(among_var)) { lb = mlimit1; return 0; }
00213 bra = c;
00214 switch(among_var) {
00215 case 0: { lb = mlimit1; return 0; }
00216 case 1:
00217 if (slice_del() == -1) return -1;
00218 break;
00219 case 2:
00220 { int ret = slice_from_s(4, s_0);
00221 if (ret < 0) return ret;
00222 }
00223 break;
00224 case 3:
00225 { int ret = slice_from_s(4, s_1);
00226 if (ret < 0) return ret;
00227 }
00228 break;
00229 }
00230 lb = mlimit1;
00231 }
00232 return 1;
00233 }
00234
00235 int Xapian::InternalStemSwedish::stem() {
00236 { int c1 = c;
00237 { int ret = r_mark_regions();
00238 if (ret == 0) goto lab0;
00239 if (ret < 0) return ret;
00240 }
00241 lab0:
00242 c = c1;
00243 }
00244 lb = c; c = l;
00245
00246 { int m2 = l - c; (void)m2;
00247 { int ret = r_main_suffix();
00248 if (ret == 0) goto lab1;
00249 if (ret < 0) return ret;
00250 }
00251 lab1:
00252 c = l - m2;
00253 }
00254 { int m3 = l - c; (void)m3;
00255 { int ret = r_consonant_pair();
00256 if (ret == 0) goto lab2;
00257 if (ret < 0) return ret;
00258 }
00259 lab2:
00260 c = l - m3;
00261 }
00262 { int m4 = l - c; (void)m4;
00263 { int ret = r_other_suffix();
00264 if (ret == 0) goto lab3;
00265 if (ret < 0) return ret;
00266 }
00267 lab3:
00268 c = l - m4;
00269 }
00270 c = lb;
00271 return 1;
00272 }
00273
00274 Xapian::InternalStemSwedish::InternalStemSwedish()
00275 : I_x(0), I_p1(0)
00276 {
00277 }
00278
00279 Xapian::InternalStemSwedish::~InternalStemSwedish()
00280 {
00281 }
00282
00283 const char *
00284 Xapian::InternalStemSwedish::get_description() const
00285 {
00286 return "swedish";
00287 }