00001
00002
00003 #include <limits.h>
00004 #include "dutch.h"
00005
00006 static const symbol s_0_1[2] = { 0xC3, 0xA1 };
00007 static const symbol s_0_2[2] = { 0xC3, 0xA4 };
00008 static const symbol s_0_3[2] = { 0xC3, 0xA9 };
00009 static const symbol s_0_4[2] = { 0xC3, 0xAB };
00010 static const symbol s_0_5[2] = { 0xC3, 0xAD };
00011 static const symbol s_0_6[2] = { 0xC3, 0xAF };
00012 static const symbol s_0_7[2] = { 0xC3, 0xB3 };
00013 static const symbol s_0_8[2] = { 0xC3, 0xB6 };
00014 static const symbol s_0_9[2] = { 0xC3, 0xBA };
00015 static const symbol s_0_10[2] = { 0xC3, 0xBC };
00016
00017 static const struct among a_0[11] =
00018 {
00019 { 0, 0, -1, 6},
00020 { 2, s_0_1, 0, 1},
00021 { 2, s_0_2, 0, 1},
00022 { 2, s_0_3, 0, 2},
00023 { 2, s_0_4, 0, 2},
00024 { 2, s_0_5, 0, 3},
00025 { 2, s_0_6, 0, 3},
00026 { 2, s_0_7, 0, 4},
00027 { 2, s_0_8, 0, 4},
00028 { 2, s_0_9, 0, 5},
00029 { 2, s_0_10, 0, 5}
00030 };
00031
00032 static const symbol s_1_1[1] = { 'I' };
00033 static const symbol s_1_2[1] = { 'Y' };
00034
00035 static const struct among a_1[3] =
00036 {
00037 { 0, 0, -1, 3},
00038 { 1, s_1_1, 0, 2},
00039 { 1, s_1_2, 0, 1}
00040 };
00041
00042 static const symbol s_2_0[2] = { 'd', 'd' };
00043 static const symbol s_2_1[2] = { 'k', 'k' };
00044 static const symbol s_2_2[2] = { 't', 't' };
00045
00046 static const struct among a_2[3] =
00047 {
00048 { 2, s_2_0, -1, -1},
00049 { 2, s_2_1, -1, -1},
00050 { 2, s_2_2, -1, -1}
00051 };
00052
00053 static const symbol s_3_0[3] = { 'e', 'n', 'e' };
00054 static const symbol s_3_1[2] = { 's', 'e' };
00055 static const symbol s_3_2[2] = { 'e', 'n' };
00056 static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' };
00057 static const symbol s_3_4[1] = { 's' };
00058
00059 static const struct among a_3[5] =
00060 {
00061 { 3, s_3_0, -1, 2},
00062 { 2, s_3_1, -1, 3},
00063 { 2, s_3_2, -1, 2},
00064 { 5, s_3_3, 2, 1},
00065 { 1, s_3_4, -1, 3}
00066 };
00067
00068 static const symbol s_4_0[3] = { 'e', 'n', 'd' };
00069 static const symbol s_4_1[2] = { 'i', 'g' };
00070 static const symbol s_4_2[3] = { 'i', 'n', 'g' };
00071 static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' };
00072 static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' };
00073 static const symbol s_4_5[3] = { 'b', 'a', 'r' };
00074
00075 static const struct among a_4[6] =
00076 {
00077 { 3, s_4_0, -1, 1},
00078 { 2, s_4_1, -1, 2},
00079 { 3, s_4_2, -1, 1},
00080 { 4, s_4_3, -1, 3},
00081 { 4, s_4_4, -1, 4},
00082 { 3, s_4_5, -1, 5}
00083 };
00084
00085 static const symbol s_5_0[2] = { 'a', 'a' };
00086 static const symbol s_5_1[2] = { 'e', 'e' };
00087 static const symbol s_5_2[2] = { 'o', 'o' };
00088 static const symbol s_5_3[2] = { 'u', 'u' };
00089
00090 static const struct among a_5[4] =
00091 {
00092 { 2, s_5_0, -1, -1},
00093 { 2, s_5_1, -1, -1},
00094 { 2, s_5_2, -1, -1},
00095 { 2, s_5_3, -1, -1}
00096 };
00097
00098 static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
00099
00100 static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
00101
00102 static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
00103
00104 static const symbol s_0[] = { 'a' };
00105 static const symbol s_1[] = { 'e' };
00106 static const symbol s_2[] = { 'i' };
00107 static const symbol s_3[] = { 'o' };
00108 static const symbol s_4[] = { 'u' };
00109 static const symbol s_5[] = { 'Y' };
00110 static const symbol s_6[] = { 'I' };
00111 static const symbol s_7[] = { 'Y' };
00112 static const symbol s_8[] = { 'y' };
00113 static const symbol s_9[] = { 'i' };
00114 static const symbol s_10[] = { 'g', 'e', 'm' };
00115 static const symbol s_11[] = { 'h', 'e', 'i', 'd' };
00116 static const symbol s_12[] = { 'h', 'e', 'i', 'd' };
00117 static const symbol s_13[] = { 'e', 'n' };
00118 static const symbol s_14[] = { 'i', 'g' };
00119
00120 int Xapian::InternalStemDutch::r_prelude() {
00121 int among_var;
00122 { int c_test1 = c;
00123 while(1) {
00124 int c2 = c;
00125 bra = c;
00126 if (c + 1 >= l || p[c + 1] >> 5 != 5 || !((340306450 >> (p[c + 1] & 0x1f)) & 1)) among_var = 6; else
00127 among_var = find_among(a_0, 11, 0, 0);
00128 if (!(among_var)) goto lab0;
00129 ket = c;
00130 switch(among_var) {
00131 case 0: goto lab0;
00132 case 1:
00133 { int ret = slice_from_s(1, s_0);
00134 if (ret < 0) return ret;
00135 }
00136 break;
00137 case 2:
00138 { int ret = slice_from_s(1, s_1);
00139 if (ret < 0) return ret;
00140 }
00141 break;
00142 case 3:
00143 { int ret = slice_from_s(1, s_2);
00144 if (ret < 0) return ret;
00145 }
00146 break;
00147 case 4:
00148 { int ret = slice_from_s(1, s_3);
00149 if (ret < 0) return ret;
00150 }
00151 break;
00152 case 5:
00153 { int ret = slice_from_s(1, s_4);
00154 if (ret < 0) return ret;
00155 }
00156 break;
00157 case 6:
00158 { int ret = skip_utf8(p, c, 0, l, 1);
00159 if (ret < 0) goto lab0;
00160 c = ret;
00161 }
00162 break;
00163 }
00164 continue;
00165 lab0:
00166 c = c2;
00167 break;
00168 }
00169 c = c_test1;
00170 }
00171 { int c3 = c;
00172 bra = c;
00173 if (c == l || p[c] != 'y') { c = c3; goto lab1; }
00174 c++;
00175 ket = c;
00176 { int ret = slice_from_s(1, s_5);
00177 if (ret < 0) return ret;
00178 }
00179 lab1:
00180 ;
00181 }
00182 while(1) {
00183 int c4 = c;
00184 while(1) {
00185 int c5 = c;
00186 if (in_grouping_U(g_v, 97, 232, 0)) goto lab3;
00187 bra = c;
00188 { int c6 = c;
00189 if (c == l || p[c] != 'i') goto lab5;
00190 c++;
00191 ket = c;
00192 if (in_grouping_U(g_v, 97, 232, 0)) goto lab5;
00193 { int ret = slice_from_s(1, s_6);
00194 if (ret < 0) return ret;
00195 }
00196 goto lab4;
00197 lab5:
00198 c = c6;
00199 if (c == l || p[c] != 'y') goto lab3;
00200 c++;
00201 ket = c;
00202 { int ret = slice_from_s(1, s_7);
00203 if (ret < 0) return ret;
00204 }
00205 }
00206 lab4:
00207 c = c5;
00208 break;
00209 lab3:
00210 c = c5;
00211 { int ret = skip_utf8(p, c, 0, l, 1);
00212 if (ret < 0) goto lab2;
00213 c = ret;
00214 }
00215 }
00216 continue;
00217 lab2:
00218 c = c4;
00219 break;
00220 }
00221 return 1;
00222 }
00223
00224 int Xapian::InternalStemDutch::r_mark_regions() {
00225 I_p1 = l;
00226 I_p2 = l;
00227 { int ret = out_grouping_U(g_v, 97, 232, 1);
00228 if (ret < 0) return 0;
00229 c += ret;
00230 }
00231 { int ret = in_grouping_U(g_v, 97, 232, 1);
00232 if (ret < 0) return 0;
00233 c += ret;
00234 }
00235 I_p1 = c;
00236
00237 if (!(I_p1 < 3)) goto lab0;
00238 I_p1 = 3;
00239 lab0:
00240 { int ret = out_grouping_U(g_v, 97, 232, 1);
00241 if (ret < 0) return 0;
00242 c += ret;
00243 }
00244 { int ret = in_grouping_U(g_v, 97, 232, 1);
00245 if (ret < 0) return 0;
00246 c += ret;
00247 }
00248 I_p2 = c;
00249 return 1;
00250 }
00251
00252 int Xapian::InternalStemDutch::r_postlude() {
00253 int among_var;
00254 while(1) {
00255 int c1 = c;
00256 bra = c;
00257 if (c >= l || (p[c + 0] != 73 && p[c + 0] != 89)) among_var = 3; else
00258 among_var = find_among(a_1, 3, 0, 0);
00259 if (!(among_var)) goto lab0;
00260 ket = c;
00261 switch(among_var) {
00262 case 0: goto lab0;
00263 case 1:
00264 { int ret = slice_from_s(1, s_8);
00265 if (ret < 0) return ret;
00266 }
00267 break;
00268 case 2:
00269 { int ret = slice_from_s(1, s_9);
00270 if (ret < 0) return ret;
00271 }
00272 break;
00273 case 3:
00274 { int ret = skip_utf8(p, c, 0, l, 1);
00275 if (ret < 0) goto lab0;
00276 c = ret;
00277 }
00278 break;
00279 }
00280 continue;
00281 lab0:
00282 c = c1;
00283 break;
00284 }
00285 return 1;
00286 }
00287
00288 int Xapian::InternalStemDutch::r_R1() {
00289 if (!(I_p1 <= c)) return 0;
00290 return 1;
00291 }
00292
00293 int Xapian::InternalStemDutch::r_R2() {
00294 if (!(I_p2 <= c)) return 0;
00295 return 1;
00296 }
00297
00298 int Xapian::InternalStemDutch::r_undouble() {
00299 { int m_test1 = l - c;
00300 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((1050640 >> (p[c - 1] & 0x1f)) & 1)) return 0;
00301 if (!(find_among_b(a_2, 3, 0, 0))) return 0;
00302 c = l - m_test1;
00303 }
00304 ket = c;
00305 { int ret = skip_utf8(p, c, lb, 0, -1);
00306 if (ret < 0) return 0;
00307 c = ret;
00308 }
00309 bra = c;
00310 if (slice_del() == -1) return -1;
00311 return 1;
00312 }
00313
00314 int Xapian::InternalStemDutch::r_e_ending() {
00315 B_e_found = 0;
00316 ket = c;
00317 if (c <= lb || p[c - 1] != 'e') return 0;
00318 c--;
00319 bra = c;
00320 { int ret = r_R1();
00321 if (ret <= 0) return ret;
00322 }
00323 { int m_test1 = l - c;
00324 if (out_grouping_b_U(g_v, 97, 232, 0)) return 0;
00325 c = l - m_test1;
00326 }
00327 if (slice_del() == -1) return -1;
00328 B_e_found = 1;
00329 { int ret = r_undouble();
00330 if (ret <= 0) return ret;
00331 }
00332 return 1;
00333 }
00334
00335 int Xapian::InternalStemDutch::r_en_ending() {
00336 { int ret = r_R1();
00337 if (ret <= 0) return ret;
00338 }
00339 { int m1 = l - c; (void)m1;
00340 if (out_grouping_b_U(g_v, 97, 232, 0)) return 0;
00341 c = l - m1;
00342 { int m2 = l - c; (void)m2;
00343 if (!(eq_s_b(3, s_10))) goto lab0;
00344 return 0;
00345 lab0:
00346 c = l - m2;
00347 }
00348 }
00349 if (slice_del() == -1) return -1;
00350 { int ret = r_undouble();
00351 if (ret <= 0) return ret;
00352 }
00353 return 1;
00354 }
00355
00356 int Xapian::InternalStemDutch::r_standard_suffix() {
00357 int among_var;
00358 { int m1 = l - c; (void)m1;
00359 ket = c;
00360 if (c <= lb || p[c - 1] >> 5 != 3 || !((540704 >> (p[c - 1] & 0x1f)) & 1)) goto lab0;
00361 among_var = find_among_b(a_3, 5, 0, 0);
00362 if (!(among_var)) goto lab0;
00363 bra = c;
00364 switch(among_var) {
00365 case 0: goto lab0;
00366 case 1:
00367 { int ret = r_R1();
00368 if (ret == 0) goto lab0;
00369 if (ret < 0) return ret;
00370 }
00371 { int ret = slice_from_s(4, s_11);
00372 if (ret < 0) return ret;
00373 }
00374 break;
00375 case 2:
00376 { int ret = r_en_ending();
00377 if (ret == 0) goto lab0;
00378 if (ret < 0) return ret;
00379 }
00380 break;
00381 case 3:
00382 { int ret = r_R1();
00383 if (ret == 0) goto lab0;
00384 if (ret < 0) return ret;
00385 }
00386 if (out_grouping_b_U(g_v_j, 97, 232, 0)) goto lab0;
00387 if (slice_del() == -1) return -1;
00388 break;
00389 }
00390 lab0:
00391 c = l - m1;
00392 }
00393 { int m2 = l - c; (void)m2;
00394 { int ret = r_e_ending();
00395 if (ret == 0) goto lab1;
00396 if (ret < 0) return ret;
00397 }
00398 lab1:
00399 c = l - m2;
00400 }
00401 { int m3 = l - c; (void)m3;
00402 ket = c;
00403 if (!(eq_s_b(4, s_12))) goto lab2;
00404 bra = c;
00405 { int ret = r_R2();
00406 if (ret == 0) goto lab2;
00407 if (ret < 0) return ret;
00408 }
00409 { int m4 = l - c; (void)m4;
00410 if (c <= lb || p[c - 1] != 'c') goto lab3;
00411 c--;
00412 goto lab2;
00413 lab3:
00414 c = l - m4;
00415 }
00416 if (slice_del() == -1) return -1;
00417 ket = c;
00418 if (!(eq_s_b(2, s_13))) goto lab2;
00419 bra = c;
00420 { int ret = r_en_ending();
00421 if (ret == 0) goto lab2;
00422 if (ret < 0) return ret;
00423 }
00424 lab2:
00425 c = l - m3;
00426 }
00427 { int m5 = l - c; (void)m5;
00428 ket = c;
00429 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((264336 >> (p[c - 1] & 0x1f)) & 1)) goto lab4;
00430 among_var = find_among_b(a_4, 6, 0, 0);
00431 if (!(among_var)) goto lab4;
00432 bra = c;
00433 switch(among_var) {
00434 case 0: goto lab4;
00435 case 1:
00436 { int ret = r_R2();
00437 if (ret == 0) goto lab4;
00438 if (ret < 0) return ret;
00439 }
00440 if (slice_del() == -1) return -1;
00441 { int m6 = l - c; (void)m6;
00442 ket = c;
00443 if (!(eq_s_b(2, s_14))) goto lab6;
00444 bra = c;
00445 { int ret = r_R2();
00446 if (ret == 0) goto lab6;
00447 if (ret < 0) return ret;
00448 }
00449 { int m7 = l - c; (void)m7;
00450 if (c <= lb || p[c - 1] != 'e') goto lab7;
00451 c--;
00452 goto lab6;
00453 lab7:
00454 c = l - m7;
00455 }
00456 if (slice_del() == -1) return -1;
00457 goto lab5;
00458 lab6:
00459 c = l - m6;
00460 { int ret = r_undouble();
00461 if (ret == 0) goto lab4;
00462 if (ret < 0) return ret;
00463 }
00464 }
00465 lab5:
00466 break;
00467 case 2:
00468 { int ret = r_R2();
00469 if (ret == 0) goto lab4;
00470 if (ret < 0) return ret;
00471 }
00472 { int m8 = l - c; (void)m8;
00473 if (c <= lb || p[c - 1] != 'e') goto lab8;
00474 c--;
00475 goto lab4;
00476 lab8:
00477 c = l - m8;
00478 }
00479 if (slice_del() == -1) return -1;
00480 break;
00481 case 3:
00482 { int ret = r_R2();
00483 if (ret == 0) goto lab4;
00484 if (ret < 0) return ret;
00485 }
00486 if (slice_del() == -1) return -1;
00487 { int ret = r_e_ending();
00488 if (ret == 0) goto lab4;
00489 if (ret < 0) return ret;
00490 }
00491 break;
00492 case 4:
00493 { int ret = r_R2();
00494 if (ret == 0) goto lab4;
00495 if (ret < 0) return ret;
00496 }
00497 if (slice_del() == -1) return -1;
00498 break;
00499 case 5:
00500 { int ret = r_R2();
00501 if (ret == 0) goto lab4;
00502 if (ret < 0) return ret;
00503 }
00504 if (!(B_e_found)) goto lab4;
00505 if (slice_del() == -1) return -1;
00506 break;
00507 }
00508 lab4:
00509 c = l - m5;
00510 }
00511 { int m9 = l - c; (void)m9;
00512 if (out_grouping_b_U(g_v_I, 73, 232, 0)) goto lab9;
00513 { int m_test10 = l - c;
00514 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((2129954 >> (p[c - 1] & 0x1f)) & 1)) goto lab9;
00515 if (!(find_among_b(a_5, 4, 0, 0))) goto lab9;
00516 if (out_grouping_b_U(g_v, 97, 232, 0)) goto lab9;
00517 c = l - m_test10;
00518 }
00519 ket = c;
00520 { int ret = skip_utf8(p, c, lb, 0, -1);
00521 if (ret < 0) goto lab9;
00522 c = ret;
00523 }
00524 bra = c;
00525 if (slice_del() == -1) return -1;
00526 lab9:
00527 c = l - m9;
00528 }
00529 return 1;
00530 }
00531
00532 int Xapian::InternalStemDutch::stem() {
00533 { int c1 = c;
00534 { int ret = r_prelude();
00535 if (ret == 0) goto lab0;
00536 if (ret < 0) return ret;
00537 }
00538 lab0:
00539 c = c1;
00540 }
00541 { int c2 = c;
00542 { int ret = r_mark_regions();
00543 if (ret == 0) goto lab1;
00544 if (ret < 0) return ret;
00545 }
00546 lab1:
00547 c = c2;
00548 }
00549 lb = c; c = l;
00550
00551 { int m3 = l - c; (void)m3;
00552 { int ret = r_standard_suffix();
00553 if (ret == 0) goto lab2;
00554 if (ret < 0) return ret;
00555 }
00556 lab2:
00557 c = l - m3;
00558 }
00559 c = lb;
00560 { int c4 = c;
00561 { int ret = r_postlude();
00562 if (ret == 0) goto lab3;
00563 if (ret < 0) return ret;
00564 }
00565 lab3:
00566 c = c4;
00567 }
00568 return 1;
00569 }
00570
00571 Xapian::InternalStemDutch::InternalStemDutch()
00572 : I_p2(0), I_p1(0), B_e_found(0)
00573 {
00574 }
00575
00576 Xapian::InternalStemDutch::~InternalStemDutch()
00577 {
00578 }
00579
00580 const char *
00581 Xapian::InternalStemDutch::get_description() const
00582 {
00583 return "dutch";
00584 }