00001
00002
00003 #include <limits.h>
00004 #include "norwegian.h"
00005
00006 static const symbol s_0_0[1] = { 'a' };
00007 static const symbol s_0_1[1] = { 'e' };
00008 static const symbol s_0_2[3] = { 'e', 'd', 'e' };
00009 static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' };
00010 static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' };
00011 static const symbol s_0_5[3] = { 'a', 'n', 'e' };
00012 static const symbol s_0_6[3] = { 'e', 'n', 'e' };
00013 static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' };
00014 static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' };
00015 static const symbol s_0_9[2] = { 'e', 'n' };
00016 static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' };
00017 static const symbol s_0_11[2] = { 'a', 'r' };
00018 static const symbol s_0_12[2] = { 'e', 'r' };
00019 static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' };
00020 static const symbol s_0_14[1] = { 's' };
00021 static const symbol s_0_15[2] = { 'a', 's' };
00022 static const symbol s_0_16[2] = { 'e', 's' };
00023 static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' };
00024 static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' };
00025 static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' };
00026 static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' };
00027 static const symbol s_0_21[3] = { 'e', 'n', 's' };
00028 static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' };
00029 static const symbol s_0_23[3] = { 'e', 'r', 's' };
00030 static const symbol s_0_24[3] = { 'e', 't', 's' };
00031 static const symbol s_0_25[2] = { 'e', 't' };
00032 static const symbol s_0_26[3] = { 'h', 'e', 't' };
00033 static const symbol s_0_27[3] = { 'e', 'r', 't' };
00034 static const symbol s_0_28[3] = { 'a', 's', 't' };
00035
00036 static const struct among a_0[29] =
00037 {
00038 { 1, s_0_0, -1, 1},
00039 { 1, s_0_1, -1, 1},
00040 { 3, s_0_2, 1, 1},
00041 { 4, s_0_3, 1, 1},
00042 { 4, s_0_4, 1, 1},
00043 { 3, s_0_5, 1, 1},
00044 { 3, s_0_6, 1, 1},
00045 { 6, s_0_7, 6, 1},
00046 { 4, s_0_8, 1, 3},
00047 { 2, s_0_9, -1, 1},
00048 { 5, s_0_10, 9, 1},
00049 { 2, s_0_11, -1, 1},
00050 { 2, s_0_12, -1, 1},
00051 { 5, s_0_13, 12, 1},
00052 { 1, s_0_14, -1, 2},
00053 { 2, s_0_15, 14, 1},
00054 { 2, s_0_16, 14, 1},
00055 { 4, s_0_17, 16, 1},
00056 { 5, s_0_18, 16, 1},
00057 { 4, s_0_19, 16, 1},
00058 { 7, s_0_20, 19, 1},
00059 { 3, s_0_21, 14, 1},
00060 { 6, s_0_22, 21, 1},
00061 { 3, s_0_23, 14, 1},
00062 { 3, s_0_24, 14, 1},
00063 { 2, s_0_25, -1, 1},
00064 { 3, s_0_26, 25, 1},
00065 { 3, s_0_27, -1, 3},
00066 { 3, s_0_28, -1, 1}
00067 };
00068
00069 static const symbol s_1_0[2] = { 'd', 't' };
00070 static const symbol s_1_1[2] = { 'v', 't' };
00071
00072 static const struct among a_1[2] =
00073 {
00074 { 2, s_1_0, -1, -1},
00075 { 2, s_1_1, -1, -1}
00076 };
00077
00078 static const symbol s_2_0[3] = { 'l', 'e', 'g' };
00079 static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' };
00080 static const symbol s_2_2[2] = { 'i', 'g' };
00081 static const symbol s_2_3[3] = { 'e', 'i', 'g' };
00082 static const symbol s_2_4[3] = { 'l', 'i', 'g' };
00083 static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' };
00084 static const symbol s_2_6[3] = { 'e', 'l', 's' };
00085 static const symbol s_2_7[3] = { 'l', 'o', 'v' };
00086 static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' };
00087 static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' };
00088 static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' };
00089
00090 static const struct among a_2[11] =
00091 {
00092 { 3, s_2_0, -1, 1},
00093 { 4, s_2_1, 0, 1},
00094 { 2, s_2_2, -1, 1},
00095 { 3, s_2_3, 2, 1},
00096 { 3, s_2_4, 2, 1},
00097 { 4, s_2_5, 4, 1},
00098 { 3, s_2_6, -1, 1},
00099 { 3, s_2_7, -1, 1},
00100 { 4, s_2_8, 7, 1},
00101 { 4, s_2_9, 7, 1},
00102 { 7, s_2_10, 9, 1}
00103 };
00104
00105 static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
00106
00107 static const unsigned char g_s_ending[] = { 119, 125, 149, 1 };
00108
00109 static const symbol s_0[] = { 'e', 'r' };
00110
00111 int Xapian::InternalStemNorwegian::r_mark_regions() {
00112 I_p1 = l;
00113 { int c_test1 = c;
00114 { int ret = skip_utf8(p, c, 0, l, + 3);
00115 if (ret < 0) return 0;
00116 c = ret;
00117 }
00118 I_x = c;
00119 c = c_test1;
00120 }
00121 if (out_grouping_U(g_v, 97, 248, 1) < 0) return 0;
00122 { int ret = in_grouping_U(g_v, 97, 248, 1);
00123 if (ret < 0) return 0;
00124 c += ret;
00125 }
00126 I_p1 = c;
00127
00128 if (!(I_p1 < I_x)) goto lab0;
00129 I_p1 = I_x;
00130 lab0:
00131 return 1;
00132 }
00133
00134 int Xapian::InternalStemNorwegian::r_main_suffix() {
00135 int among_var;
00136 { int m1 = l - c; (void)m1;
00137 int mlimit1;
00138 if (c < I_p1) return 0;
00139 c = I_p1;
00140 mlimit1 = lb; lb = c;
00141 c = l - m1;
00142 ket = c;
00143 if (c <= lb || p[c - 1] >> 5 != 3 || !((1851426 >> (p[c - 1] & 0x1f)) & 1)) { lb = mlimit1; return 0; }
00144 among_var = find_among_b(a_0, 29, 0, 0);
00145 if (!(among_var)) { lb = mlimit1; return 0; }
00146 bra = c;
00147 lb = mlimit1;
00148 }
00149 switch(among_var) {
00150 case 0: return 0;
00151 case 1:
00152 if (slice_del() == -1) return -1;
00153 break;
00154 case 2:
00155 { int m2 = l - c; (void)m2;
00156 if (in_grouping_b_U(g_s_ending, 98, 122, 0)) goto lab1;
00157 goto lab0;
00158 lab1:
00159 c = l - m2;
00160 if (c <= lb || p[c - 1] != 'k') return 0;
00161 c--;
00162 if (out_grouping_b_U(g_v, 97, 248, 0)) return 0;
00163 }
00164 lab0:
00165 if (slice_del() == -1) return -1;
00166 break;
00167 case 3:
00168 { int ret = slice_from_s(2, s_0);
00169 if (ret < 0) return ret;
00170 }
00171 break;
00172 }
00173 return 1;
00174 }
00175
00176 int Xapian::InternalStemNorwegian::r_consonant_pair() {
00177 { int m_test1 = l - c;
00178 { int m2 = l - c; (void)m2;
00179 int mlimit2;
00180 if (c < I_p1) return 0;
00181 c = I_p1;
00182 mlimit2 = lb; lb = c;
00183 c = l - m2;
00184 ket = c;
00185 if (c - 1 <= lb || p[c - 1] != 116) { lb = mlimit2; return 0; }
00186 if (!(find_among_b(a_1, 2, 0, 0))) { lb = mlimit2; return 0; }
00187 bra = c;
00188 lb = mlimit2;
00189 }
00190 c = l - m_test1;
00191 }
00192 { int ret = skip_utf8(p, c, lb, 0, -1);
00193 if (ret < 0) return 0;
00194 c = ret;
00195 }
00196 bra = c;
00197 if (slice_del() == -1) return -1;
00198 return 1;
00199 }
00200
00201 int Xapian::InternalStemNorwegian::r_other_suffix() {
00202 int among_var;
00203 { int m1 = l - c; (void)m1;
00204 int mlimit1;
00205 if (c < I_p1) return 0;
00206 c = I_p1;
00207 mlimit1 = lb; lb = c;
00208 c = l - m1;
00209 ket = c;
00210 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((4718720 >> (p[c - 1] & 0x1f)) & 1)) { lb = mlimit1; return 0; }
00211 among_var = find_among_b(a_2, 11, 0, 0);
00212 if (!(among_var)) { lb = mlimit1; return 0; }
00213 bra = c;
00214 lb = mlimit1;
00215 }
00216 switch(among_var) {
00217 case 0: return 0;
00218 case 1:
00219 if (slice_del() == -1) return -1;
00220 break;
00221 }
00222 return 1;
00223 }
00224
00225 int Xapian::InternalStemNorwegian::stem() {
00226 { int c1 = c;
00227 { int ret = r_mark_regions();
00228 if (ret == 0) goto lab0;
00229 if (ret < 0) return ret;
00230 }
00231 lab0:
00232 c = c1;
00233 }
00234 lb = c; c = l;
00235
00236 { int m2 = l - c; (void)m2;
00237 { int ret = r_main_suffix();
00238 if (ret == 0) goto lab1;
00239 if (ret < 0) return ret;
00240 }
00241 lab1:
00242 c = l - m2;
00243 }
00244 { int m3 = l - c; (void)m3;
00245 { int ret = r_consonant_pair();
00246 if (ret == 0) goto lab2;
00247 if (ret < 0) return ret;
00248 }
00249 lab2:
00250 c = l - m3;
00251 }
00252 { int m4 = l - c; (void)m4;
00253 { int ret = r_other_suffix();
00254 if (ret == 0) goto lab3;
00255 if (ret < 0) return ret;
00256 }
00257 lab3:
00258 c = l - m4;
00259 }
00260 c = lb;
00261 return 1;
00262 }
00263
00264 Xapian::InternalStemNorwegian::InternalStemNorwegian()
00265 : I_x(0), I_p1(0)
00266 {
00267 }
00268
00269 Xapian::InternalStemNorwegian::~InternalStemNorwegian()
00270 {
00271 }
00272
00273 const char *
00274 Xapian::InternalStemNorwegian::get_description() const
00275 {
00276 return "norwegian";
00277 }