00001
00002
00003 #include <limits.h>
00004 #include "danish.h"
00005
00006 static const symbol s_0_0[3] = { 'h', 'e', 'd' };
00007 static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' };
00008 static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' };
00009 static const symbol s_0_3[1] = { 'e' };
00010 static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' };
00011 static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' };
00012 static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' };
00013 static const symbol s_0_7[3] = { 'e', 'n', 'e' };
00014 static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' };
00015 static const symbol s_0_9[3] = { 'e', 'r', 'e' };
00016 static const symbol s_0_10[2] = { 'e', 'n' };
00017 static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' };
00018 static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' };
00019 static const symbol s_0_13[2] = { 'e', 'r' };
00020 static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' };
00021 static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' };
00022 static const symbol s_0_16[1] = { 's' };
00023 static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' };
00024 static const symbol s_0_18[2] = { 'e', 's' };
00025 static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' };
00026 static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' };
00027 static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' };
00028 static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' };
00029 static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' };
00030 static const symbol s_0_24[3] = { 'e', 'n', 's' };
00031 static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' };
00032 static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' };
00033 static const symbol s_0_27[3] = { 'e', 'r', 's' };
00034 static const symbol s_0_28[3] = { 'e', 't', 's' };
00035 static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' };
00036 static const symbol s_0_30[2] = { 'e', 't' };
00037 static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' };
00038
00039 static const struct among a_0[32] =
00040 {
00041 { 3, s_0_0, -1, 1},
00042 { 5, s_0_1, 0, 1},
00043 { 4, s_0_2, -1, 1},
00044 { 1, s_0_3, -1, 1},
00045 { 5, s_0_4, 3, 1},
00046 { 4, s_0_5, 3, 1},
00047 { 6, s_0_6, 5, 1},
00048 { 3, s_0_7, 3, 1},
00049 { 4, s_0_8, 3, 1},
00050 { 3, s_0_9, 3, 1},
00051 { 2, s_0_10, -1, 1},
00052 { 5, s_0_11, 10, 1},
00053 { 4, s_0_12, 10, 1},
00054 { 2, s_0_13, -1, 1},
00055 { 5, s_0_14, 13, 1},
00056 { 4, s_0_15, 13, 1},
00057 { 1, s_0_16, -1, 2},
00058 { 4, s_0_17, 16, 1},
00059 { 2, s_0_18, 16, 1},
00060 { 5, s_0_19, 18, 1},
00061 { 7, s_0_20, 19, 1},
00062 { 4, s_0_21, 18, 1},
00063 { 5, s_0_22, 18, 1},
00064 { 4, s_0_23, 18, 1},
00065 { 3, s_0_24, 16, 1},
00066 { 6, s_0_25, 24, 1},
00067 { 5, s_0_26, 24, 1},
00068 { 3, s_0_27, 16, 1},
00069 { 3, s_0_28, 16, 1},
00070 { 5, s_0_29, 28, 1},
00071 { 2, s_0_30, -1, 1},
00072 { 4, s_0_31, 30, 1}
00073 };
00074
00075 static const symbol s_1_0[2] = { 'g', 'd' };
00076 static const symbol s_1_1[2] = { 'd', 't' };
00077 static const symbol s_1_2[2] = { 'g', 't' };
00078 static const symbol s_1_3[2] = { 'k', 't' };
00079
00080 static const struct among a_1[4] =
00081 {
00082 { 2, s_1_0, -1, -1},
00083 { 2, s_1_1, -1, -1},
00084 { 2, s_1_2, -1, -1},
00085 { 2, s_1_3, -1, -1}
00086 };
00087
00088 static const symbol s_2_0[2] = { 'i', 'g' };
00089 static const symbol s_2_1[3] = { 'l', 'i', 'g' };
00090 static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' };
00091 static const symbol s_2_3[3] = { 'e', 'l', 's' };
00092 static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' };
00093
00094 static const struct among a_2[5] =
00095 {
00096 { 2, s_2_0, -1, 1},
00097 { 3, s_2_1, 0, 1},
00098 { 4, s_2_2, 1, 1},
00099 { 3, s_2_3, -1, 1},
00100 { 5, s_2_4, -1, 2}
00101 };
00102
00103 static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
00104
00105 static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
00106
00107 static const symbol s_0[] = { 's', 't' };
00108 static const symbol s_1[] = { 'i', 'g' };
00109 static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' };
00110
00111 int Xapian::InternalStemDanish::r_mark_regions() {
00112 I_p1 = l;
00113 { int c_test1 = c;
00114 { int ret = skip_utf8(p, c, 0, l, + 3);
00115 if (ret < 0) return 0;
00116 c = ret;
00117 }
00118 I_x = c;
00119 c = c_test1;
00120 }
00121 if (out_grouping_U(g_v, 97, 248, 1) < 0) return 0;
00122 { int ret = in_grouping_U(g_v, 97, 248, 1);
00123 if (ret < 0) return 0;
00124 c += ret;
00125 }
00126 I_p1 = c;
00127
00128 if (!(I_p1 < I_x)) goto lab0;
00129 I_p1 = I_x;
00130 lab0:
00131 return 1;
00132 }
00133
00134 int Xapian::InternalStemDanish::r_main_suffix() {
00135 int among_var;
00136 { int m1 = l - c; (void)m1;
00137 int mlimit1;
00138 if (c < I_p1) return 0;
00139 c = I_p1;
00140 mlimit1 = lb; lb = c;
00141 c = l - m1;
00142 ket = c;
00143 if (c <= lb || p[c - 1] >> 5 != 3 || !((1851440 >> (p[c - 1] & 0x1f)) & 1)) { lb = mlimit1; return 0; }
00144 among_var = find_among_b(a_0, 32, 0, 0);
00145 if (!(among_var)) { lb = mlimit1; return 0; }
00146 bra = c;
00147 lb = mlimit1;
00148 }
00149 switch(among_var) {
00150 case 0: return 0;
00151 case 1:
00152 if (slice_del() == -1) return -1;
00153 break;
00154 case 2:
00155 if (in_grouping_b_U(g_s_ending, 97, 229, 0)) return 0;
00156 if (slice_del() == -1) return -1;
00157 break;
00158 }
00159 return 1;
00160 }
00161
00162 int Xapian::InternalStemDanish::r_consonant_pair() {
00163 { int m_test1 = l - c;
00164 { int m2 = l - c; (void)m2;
00165 int mlimit2;
00166 if (c < I_p1) return 0;
00167 c = I_p1;
00168 mlimit2 = lb; lb = c;
00169 c = l - m2;
00170 ket = c;
00171 if (c - 1 <= lb || (p[c - 1] != 100 && p[c - 1] != 116)) { lb = mlimit2; return 0; }
00172 if (!(find_among_b(a_1, 4, 0, 0))) { lb = mlimit2; return 0; }
00173 bra = c;
00174 lb = mlimit2;
00175 }
00176 c = l - m_test1;
00177 }
00178 { int ret = skip_utf8(p, c, lb, 0, -1);
00179 if (ret < 0) return 0;
00180 c = ret;
00181 }
00182 bra = c;
00183 if (slice_del() == -1) return -1;
00184 return 1;
00185 }
00186
00187 int Xapian::InternalStemDanish::r_other_suffix() {
00188 int among_var;
00189 { int m1 = l - c; (void)m1;
00190 ket = c;
00191 if (!(eq_s_b(2, s_0))) goto lab0;
00192 bra = c;
00193 if (!(eq_s_b(2, s_1))) goto lab0;
00194 if (slice_del() == -1) return -1;
00195 lab0:
00196 c = l - m1;
00197 }
00198 { int m2 = l - c; (void)m2;
00199 int mlimit2;
00200 if (c < I_p1) return 0;
00201 c = I_p1;
00202 mlimit2 = lb; lb = c;
00203 c = l - m2;
00204 ket = c;
00205 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((1572992 >> (p[c - 1] & 0x1f)) & 1)) { lb = mlimit2; return 0; }
00206 among_var = find_among_b(a_2, 5, 0, 0);
00207 if (!(among_var)) { lb = mlimit2; return 0; }
00208 bra = c;
00209 lb = mlimit2;
00210 }
00211 switch(among_var) {
00212 case 0: return 0;
00213 case 1:
00214 if (slice_del() == -1) return -1;
00215 { int m3 = l - c; (void)m3;
00216 { int ret = r_consonant_pair();
00217 if (ret == 0) goto lab1;
00218 if (ret < 0) return ret;
00219 }
00220 lab1:
00221 c = l - m3;
00222 }
00223 break;
00224 case 2:
00225 { int ret = slice_from_s(4, s_2);
00226 if (ret < 0) return ret;
00227 }
00228 break;
00229 }
00230 return 1;
00231 }
00232
00233 int Xapian::InternalStemDanish::r_undouble() {
00234 { int m1 = l - c; (void)m1;
00235 int mlimit1;
00236 if (c < I_p1) return 0;
00237 c = I_p1;
00238 mlimit1 = lb; lb = c;
00239 c = l - m1;
00240 ket = c;
00241 if (out_grouping_b_U(g_v, 97, 248, 0)) { lb = mlimit1; return 0; }
00242 bra = c;
00243 { symbol * ret = slice_to(S_ch);
00244 if (ret == 0) return -1;
00245 S_ch = ret;
00246 }
00247 lb = mlimit1;
00248 }
00249 if (!(eq_v_b(S_ch))) return 0;
00250 if (slice_del() == -1) return -1;
00251 return 1;
00252 }
00253
00254 int Xapian::InternalStemDanish::stem() {
00255 { int c1 = c;
00256 { int ret = r_mark_regions();
00257 if (ret == 0) goto lab0;
00258 if (ret < 0) return ret;
00259 }
00260 lab0:
00261 c = c1;
00262 }
00263 lb = c; c = l;
00264
00265 { int m2 = l - c; (void)m2;
00266 { int ret = r_main_suffix();
00267 if (ret == 0) goto lab1;
00268 if (ret < 0) return ret;
00269 }
00270 lab1:
00271 c = l - m2;
00272 }
00273 { int m3 = l - c; (void)m3;
00274 { int ret = r_consonant_pair();
00275 if (ret == 0) goto lab2;
00276 if (ret < 0) return ret;
00277 }
00278 lab2:
00279 c = l - m3;
00280 }
00281 { int m4 = l - c; (void)m4;
00282 { int ret = r_other_suffix();
00283 if (ret == 0) goto lab3;
00284 if (ret < 0) return ret;
00285 }
00286 lab3:
00287 c = l - m4;
00288 }
00289 { int m5 = l - c; (void)m5;
00290 { int ret = r_undouble();
00291 if (ret == 0) goto lab4;
00292 if (ret < 0) return ret;
00293 }
00294 lab4:
00295 c = l - m5;
00296 }
00297 c = lb;
00298 return 1;
00299 }
00300
00301 Xapian::InternalStemDanish::InternalStemDanish()
00302 : I_x(0), I_p1(0), S_ch(0)
00303 {
00304 S_ch = create_s();
00305 }
00306
00307 Xapian::InternalStemDanish::~InternalStemDanish()
00308 {
00309 lose_s(S_ch);
00310 }
00311
00312 const char *
00313 Xapian::InternalStemDanish::get_description() const
00314 {
00315 return "danish";
00316 }