00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100 #ifndef DMETAPHONE_MAIN
00101
00102 #include "postgres.h"
00103
00104 #include "utils/builtins.h"
00105
00106
00107 #define NDEBUG
00108 #endif
00109
00110 #include <stdio.h>
00111 #include <ctype.h>
00112 #include <stdlib.h>
00113 #include <string.h>
00114 #include <stdarg.h>
00115 #include <assert.h>
00116
00117 extern Datum dmetaphone(PG_FUNCTION_ARGS);
00118 extern Datum dmetaphone_alt(PG_FUNCTION_ARGS);
00119
00120
00121 static void DoubleMetaphone(char *, char **);
00122
00123 #ifndef DMETAPHONE_MAIN
00124
00125
00126
00127
00128
00129 PG_FUNCTION_INFO_V1(dmetaphone);
00130
00131 Datum
00132 dmetaphone(PG_FUNCTION_ARGS)
00133 {
00134 text *arg;
00135 char *aptr,
00136 *codes[2],
00137 *code;
00138
00139 #ifdef DMETAPHONE_NOSTRICT
00140 if (PG_ARGISNULL(0))
00141 PG_RETURN_NULL();
00142 #endif
00143 arg = PG_GETARG_TEXT_P(0);
00144 aptr = text_to_cstring(arg);
00145
00146 DoubleMetaphone(aptr, codes);
00147 code = codes[0];
00148 if (!code)
00149 code = "";
00150
00151 PG_RETURN_TEXT_P(cstring_to_text(code));
00152 }
00153
00154
00155
00156
00157
00158 PG_FUNCTION_INFO_V1(dmetaphone_alt);
00159
00160 Datum
00161 dmetaphone_alt(PG_FUNCTION_ARGS)
00162 {
00163 text *arg;
00164 char *aptr,
00165 *codes[2],
00166 *code;
00167
00168 #ifdef DMETAPHONE_NOSTRICT
00169 if (PG_ARGISNULL(0))
00170 PG_RETURN_NULL();
00171 #endif
00172 arg = PG_GETARG_TEXT_P(0);
00173 aptr = text_to_cstring(arg);
00174
00175 DoubleMetaphone(aptr, codes);
00176 code = codes[1];
00177 if (!code)
00178 code = "";
00179
00180 PG_RETURN_TEXT_P(cstring_to_text(code));
00181 }
00182
00183
00184
00185
00186
00187
00188 #define META_MALLOC(v,n,t) \
00189 (v = (t*)palloc(((n)*sizeof(t))))
00190
00191 #define META_REALLOC(v,n,t) \
00192 (v = (t*)repalloc((v),((n)*sizeof(t))))
00193
00194
00195
00196
00197
00198
00199
00200
00201 #define META_FREE(x)
00202 #else
00203
00204
00205
00206 #define META_MALLOC(v,n,t) \
00207 (v = (t*)malloc(((n)*sizeof(t))))
00208
00209 #define META_REALLOC(v,n,t) \
00210 (v = (t*)realloc((v),((n)*sizeof(t))))
00211
00212 #define META_FREE(x) free((x))
00213 #endif
00214
00215
00216
00217
00218
00219 typedef struct
00220 {
00221 char *str;
00222 int length;
00223 int bufsize;
00224 int free_string_on_destroy;
00225 }
00226
00227 metastring;
00228
00229
00230
00231
00232
00233
00234
00235 static metastring *
00236 NewMetaString(char *init_str)
00237 {
00238 metastring *s;
00239 char empty_string[] = "";
00240
00241 META_MALLOC(s, 1, metastring);
00242 assert(s != NULL);
00243
00244 if (init_str == NULL)
00245 init_str = empty_string;
00246 s->length = strlen(init_str);
00247
00248 s->bufsize = s->length + 7;
00249
00250 META_MALLOC(s->str, s->bufsize, char);
00251 assert(s->str != NULL);
00252
00253 strncpy(s->str, init_str, s->length + 1);
00254 s->free_string_on_destroy = 1;
00255
00256 return s;
00257 }
00258
00259
00260 static void
00261 DestroyMetaString(metastring *s)
00262 {
00263 if (s == NULL)
00264 return;
00265
00266 if (s->free_string_on_destroy && (s->str != NULL))
00267 META_FREE(s->str);
00268
00269 META_FREE(s);
00270 }
00271
00272
00273 static void
00274 IncreaseBuffer(metastring *s, int chars_needed)
00275 {
00276 META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
00277 assert(s->str != NULL);
00278 s->bufsize = s->bufsize + chars_needed + 10;
00279 }
00280
00281
00282 static void
00283 MakeUpper(metastring *s)
00284 {
00285 char *i;
00286
00287 for (i = s->str; *i; i++)
00288 *i = toupper((unsigned char) *i);
00289 }
00290
00291
00292 static int
00293 IsVowel(metastring *s, int pos)
00294 {
00295 char c;
00296
00297 if ((pos < 0) || (pos >= s->length))
00298 return 0;
00299
00300 c = *(s->str + pos);
00301 if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') ||
00302 (c == 'U') || (c == 'Y'))
00303 return 1;
00304
00305 return 0;
00306 }
00307
00308
00309 static int
00310 SlavoGermanic(metastring *s)
00311 {
00312 if ((char *) strstr(s->str, "W"))
00313 return 1;
00314 else if ((char *) strstr(s->str, "K"))
00315 return 1;
00316 else if ((char *) strstr(s->str, "CZ"))
00317 return 1;
00318 else if ((char *) strstr(s->str, "WITZ"))
00319 return 1;
00320 else
00321 return 0;
00322 }
00323
00324
00325 static char
00326 GetAt(metastring *s, int pos)
00327 {
00328 if ((pos < 0) || (pos >= s->length))
00329 return '\0';
00330
00331 return ((char) *(s->str + pos));
00332 }
00333
00334
00335 static void
00336 SetAt(metastring *s, int pos, char c)
00337 {
00338 if ((pos < 0) || (pos >= s->length))
00339 return;
00340
00341 *(s->str + pos) = c;
00342 }
00343
00344
00345
00346
00347
00348 static int
00349 StringAt(metastring *s, int start, int length,...)
00350 {
00351 char *test;
00352 char *pos;
00353 va_list ap;
00354
00355 if ((start < 0) || (start >= s->length))
00356 return 0;
00357
00358 pos = (s->str + start);
00359 va_start(ap, length);
00360
00361 do
00362 {
00363 test = va_arg(ap, char *);
00364 if (*test && (strncmp(pos, test, length) == 0))
00365 return 1;
00366 }
00367 while (strcmp(test, "") != 0);
00368
00369 va_end(ap);
00370
00371 return 0;
00372 }
00373
00374
00375 static void
00376 MetaphAdd(metastring *s, char *new_str)
00377 {
00378 int add_length;
00379
00380 if (new_str == NULL)
00381 return;
00382
00383 add_length = strlen(new_str);
00384 if ((s->length + add_length) > (s->bufsize - 1))
00385 IncreaseBuffer(s, add_length);
00386
00387 strcat(s->str, new_str);
00388 s->length += add_length;
00389 }
00390
00391
00392 static void
00393 DoubleMetaphone(char *str, char **codes)
00394 {
00395 int length;
00396 metastring *original;
00397 metastring *primary;
00398 metastring *secondary;
00399 int current;
00400 int last;
00401
00402 current = 0;
00403
00404 length = strlen(str);
00405 last = length - 1;
00406 original = NewMetaString(str);
00407
00408 MetaphAdd(original, " ");
00409
00410 primary = NewMetaString("");
00411 secondary = NewMetaString("");
00412 primary->free_string_on_destroy = 0;
00413 secondary->free_string_on_destroy = 0;
00414
00415 MakeUpper(original);
00416
00417
00418 if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
00419 current += 1;
00420
00421
00422 if (GetAt(original, 0) == 'X')
00423 {
00424 MetaphAdd(primary, "S");
00425 MetaphAdd(secondary, "S");
00426 current += 1;
00427 }
00428
00429
00430 while ((primary->length < 4) || (secondary->length < 4))
00431 {
00432 if (current >= length)
00433 break;
00434
00435 switch (GetAt(original, current))
00436 {
00437 case 'A':
00438 case 'E':
00439 case 'I':
00440 case 'O':
00441 case 'U':
00442 case 'Y':
00443 if (current == 0)
00444 {
00445
00446 MetaphAdd(primary, "A");
00447 MetaphAdd(secondary, "A");
00448 }
00449 current += 1;
00450 break;
00451
00452 case 'B':
00453
00454
00455 MetaphAdd(primary, "P");
00456 MetaphAdd(secondary, "P");
00457
00458 if (GetAt(original, current + 1) == 'B')
00459 current += 2;
00460 else
00461 current += 1;
00462 break;
00463
00464 case '\xc7':
00465 MetaphAdd(primary, "S");
00466 MetaphAdd(secondary, "S");
00467 current += 1;
00468 break;
00469
00470 case 'C':
00471
00472 if ((current > 1)
00473 && !IsVowel(original, current - 2)
00474 && StringAt(original, (current - 1), 3, "ACH", "")
00475 && ((GetAt(original, current + 2) != 'I')
00476 && ((GetAt(original, current + 2) != 'E')
00477 || StringAt(original, (current - 2), 6, "BACHER",
00478 "MACHER", ""))))
00479 {
00480 MetaphAdd(primary, "K");
00481 MetaphAdd(secondary, "K");
00482 current += 2;
00483 break;
00484 }
00485
00486
00487 if ((current == 0)
00488 && StringAt(original, current, 6, "CAESAR", ""))
00489 {
00490 MetaphAdd(primary, "S");
00491 MetaphAdd(secondary, "S");
00492 current += 2;
00493 break;
00494 }
00495
00496
00497 if (StringAt(original, current, 4, "CHIA", ""))
00498 {
00499 MetaphAdd(primary, "K");
00500 MetaphAdd(secondary, "K");
00501 current += 2;
00502 break;
00503 }
00504
00505 if (StringAt(original, current, 2, "CH", ""))
00506 {
00507
00508 if ((current > 0)
00509 && StringAt(original, current, 4, "CHAE", ""))
00510 {
00511 MetaphAdd(primary, "K");
00512 MetaphAdd(secondary, "X");
00513 current += 2;
00514 break;
00515 }
00516
00517
00518 if ((current == 0)
00519 && (StringAt(original, (current + 1), 5,
00520 "HARAC", "HARIS", "")
00521 || StringAt(original, (current + 1), 3, "HOR",
00522 "HYM", "HIA", "HEM", ""))
00523 && !StringAt(original, 0, 5, "CHORE", ""))
00524 {
00525 MetaphAdd(primary, "K");
00526 MetaphAdd(secondary, "K");
00527 current += 2;
00528 break;
00529 }
00530
00531
00532 if (
00533 (StringAt(original, 0, 4, "VAN ", "VON ", "")
00534 || StringAt(original, 0, 3, "SCH", ""))
00535
00536 || StringAt(original, (current - 2), 6, "ORCHES",
00537 "ARCHIT", "ORCHID", "")
00538 || StringAt(original, (current + 2), 1, "T", "S",
00539 "")
00540 || ((StringAt(original, (current - 1), 1,
00541 "A", "O", "U", "E", "")
00542 || (current == 0))
00543
00544
00545
00546
00547 && StringAt(original, (current + 2), 1, "L", "R",
00548 "N", "M", "B", "H", "F", "V", "W",
00549 " ", "")))
00550 {
00551 MetaphAdd(primary, "K");
00552 MetaphAdd(secondary, "K");
00553 }
00554 else
00555 {
00556 if (current > 0)
00557 {
00558 if (StringAt(original, 0, 2, "MC", ""))
00559 {
00560
00561 MetaphAdd(primary, "K");
00562 MetaphAdd(secondary, "K");
00563 }
00564 else
00565 {
00566 MetaphAdd(primary, "X");
00567 MetaphAdd(secondary, "K");
00568 }
00569 }
00570 else
00571 {
00572 MetaphAdd(primary, "X");
00573 MetaphAdd(secondary, "X");
00574 }
00575 }
00576 current += 2;
00577 break;
00578 }
00579
00580 if (StringAt(original, current, 2, "CZ", "")
00581 && !StringAt(original, (current - 2), 4, "WICZ", ""))
00582 {
00583 MetaphAdd(primary, "S");
00584 MetaphAdd(secondary, "X");
00585 current += 2;
00586 break;
00587 }
00588
00589
00590 if (StringAt(original, (current + 1), 3, "CIA", ""))
00591 {
00592 MetaphAdd(primary, "X");
00593 MetaphAdd(secondary, "X");
00594 current += 3;
00595 break;
00596 }
00597
00598
00599 if (StringAt(original, current, 2, "CC", "")
00600 && !((current == 1) && (GetAt(original, 0) == 'M')))
00601 {
00602
00603 if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
00604 && !StringAt(original, (current + 2), 2, "HU", ""))
00605 {
00606
00607 if (
00608 ((current == 1)
00609 && (GetAt(original, current - 1) == 'A'))
00610 || StringAt(original, (current - 1), 5, "UCCEE",
00611 "UCCES", ""))
00612 {
00613 MetaphAdd(primary, "KS");
00614 MetaphAdd(secondary, "KS");
00615
00616 }
00617 else
00618 {
00619 MetaphAdd(primary, "X");
00620 MetaphAdd(secondary, "X");
00621 }
00622 current += 3;
00623 break;
00624 }
00625 else
00626 {
00627 MetaphAdd(primary, "K");
00628 MetaphAdd(secondary, "K");
00629 current += 2;
00630 break;
00631 }
00632 }
00633
00634 if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
00635 {
00636 MetaphAdd(primary, "K");
00637 MetaphAdd(secondary, "K");
00638 current += 2;
00639 break;
00640 }
00641
00642 if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
00643 {
00644
00645 if (StringAt
00646 (original, current, 3, "CIO", "CIE", "CIA", ""))
00647 {
00648 MetaphAdd(primary, "S");
00649 MetaphAdd(secondary, "X");
00650 }
00651 else
00652 {
00653 MetaphAdd(primary, "S");
00654 MetaphAdd(secondary, "S");
00655 }
00656 current += 2;
00657 break;
00658 }
00659
00660
00661 MetaphAdd(primary, "K");
00662 MetaphAdd(secondary, "K");
00663
00664
00665 if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
00666 current += 3;
00667 else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
00668 && !StringAt(original, (current + 1), 2,
00669 "CE", "CI", ""))
00670 current += 2;
00671 else
00672 current += 1;
00673 break;
00674
00675 case 'D':
00676 if (StringAt(original, current, 2, "DG", ""))
00677 {
00678 if (StringAt(original, (current + 2), 1,
00679 "I", "E", "Y", ""))
00680 {
00681
00682 MetaphAdd(primary, "J");
00683 MetaphAdd(secondary, "J");
00684 current += 3;
00685 break;
00686 }
00687 else
00688 {
00689
00690 MetaphAdd(primary, "TK");
00691 MetaphAdd(secondary, "TK");
00692 current += 2;
00693 break;
00694 }
00695 }
00696
00697 if (StringAt(original, current, 2, "DT", "DD", ""))
00698 {
00699 MetaphAdd(primary, "T");
00700 MetaphAdd(secondary, "T");
00701 current += 2;
00702 break;
00703 }
00704
00705
00706 MetaphAdd(primary, "T");
00707 MetaphAdd(secondary, "T");
00708 current += 1;
00709 break;
00710
00711 case 'F':
00712 if (GetAt(original, current + 1) == 'F')
00713 current += 2;
00714 else
00715 current += 1;
00716 MetaphAdd(primary, "F");
00717 MetaphAdd(secondary, "F");
00718 break;
00719
00720 case 'G':
00721 if (GetAt(original, current + 1) == 'H')
00722 {
00723 if ((current > 0) && !IsVowel(original, current - 1))
00724 {
00725 MetaphAdd(primary, "K");
00726 MetaphAdd(secondary, "K");
00727 current += 2;
00728 break;
00729 }
00730
00731 if (current < 3)
00732 {
00733
00734 if (current == 0)
00735 {
00736 if (GetAt(original, current + 2) == 'I')
00737 {
00738 MetaphAdd(primary, "J");
00739 MetaphAdd(secondary, "J");
00740 }
00741 else
00742 {
00743 MetaphAdd(primary, "K");
00744 MetaphAdd(secondary, "K");
00745 }
00746 current += 2;
00747 break;
00748 }
00749 }
00750
00751
00752
00753
00754
00755 if (
00756 ((current > 1)
00757 && StringAt(original, (current - 2), 1,
00758 "B", "H", "D", ""))
00759
00760 || ((current > 2)
00761 && StringAt(original, (current - 3), 1,
00762 "B", "H", "D", ""))
00763
00764 || ((current > 3)
00765 && StringAt(original, (current - 4), 1,
00766 "B", "H", "")))
00767 {
00768 current += 2;
00769 break;
00770 }
00771 else
00772 {
00773
00774
00775
00776
00777 if ((current > 2)
00778 && (GetAt(original, current - 1) == 'U')
00779 && StringAt(original, (current - 3), 1, "C",
00780 "G", "L", "R", "T", ""))
00781 {
00782 MetaphAdd(primary, "F");
00783 MetaphAdd(secondary, "F");
00784 }
00785 else if ((current > 0)
00786 && GetAt(original, current - 1) != 'I')
00787 {
00788
00789
00790 MetaphAdd(primary, "K");
00791 MetaphAdd(secondary, "K");
00792 }
00793
00794 current += 2;
00795 break;
00796 }
00797 }
00798
00799 if (GetAt(original, current + 1) == 'N')
00800 {
00801 if ((current == 1) && IsVowel(original, 0)
00802 && !SlavoGermanic(original))
00803 {
00804 MetaphAdd(primary, "KN");
00805 MetaphAdd(secondary, "N");
00806 }
00807 else
00808
00809 if (!StringAt(original, (current + 2), 2, "EY", "")
00810 && (GetAt(original, current + 1) != 'Y')
00811 && !SlavoGermanic(original))
00812 {
00813 MetaphAdd(primary, "N");
00814 MetaphAdd(secondary, "KN");
00815 }
00816 else
00817 {
00818 MetaphAdd(primary, "KN");
00819 MetaphAdd(secondary, "KN");
00820 }
00821 current += 2;
00822 break;
00823 }
00824
00825
00826 if (StringAt(original, (current + 1), 2, "LI", "")
00827 && !SlavoGermanic(original))
00828 {
00829 MetaphAdd(primary, "KL");
00830 MetaphAdd(secondary, "L");
00831 current += 2;
00832 break;
00833 }
00834
00835
00836 if ((current == 0)
00837 && ((GetAt(original, current + 1) == 'Y')
00838 || StringAt(original, (current + 1), 2, "ES", "EP",
00839 "EB", "EL", "EY", "IB", "IL", "IN", "IE",
00840 "EI", "ER", "")))
00841 {
00842 MetaphAdd(primary, "K");
00843 MetaphAdd(secondary, "J");
00844 current += 2;
00845 break;
00846 }
00847
00848
00849 if (
00850 (StringAt(original, (current + 1), 2, "ER", "")
00851 || (GetAt(original, current + 1) == 'Y'))
00852 && !StringAt(original, 0, 6,
00853 "DANGER", "RANGER", "MANGER", "")
00854 && !StringAt(original, (current - 1), 1, "E", "I", "")
00855 && !StringAt(original, (current - 1), 3, "RGY", "OGY",
00856 ""))
00857 {
00858 MetaphAdd(primary, "K");
00859 MetaphAdd(secondary, "J");
00860 current += 2;
00861 break;
00862 }
00863
00864
00865 if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
00866 || StringAt(original, (current - 1), 4,
00867 "AGGI", "OGGI", ""))
00868 {
00869
00870 if (
00871 (StringAt(original, 0, 4, "VAN ", "VON ", "")
00872 || StringAt(original, 0, 3, "SCH", ""))
00873 || StringAt(original, (current + 1), 2, "ET", ""))
00874 {
00875 MetaphAdd(primary, "K");
00876 MetaphAdd(secondary, "K");
00877 }
00878 else
00879 {
00880
00881 if (StringAt
00882 (original, (current + 1), 4, "IER ", ""))
00883 {
00884 MetaphAdd(primary, "J");
00885 MetaphAdd(secondary, "J");
00886 }
00887 else
00888 {
00889 MetaphAdd(primary, "J");
00890 MetaphAdd(secondary, "K");
00891 }
00892 }
00893 current += 2;
00894 break;
00895 }
00896
00897 if (GetAt(original, current + 1) == 'G')
00898 current += 2;
00899 else
00900 current += 1;
00901 MetaphAdd(primary, "K");
00902 MetaphAdd(secondary, "K");
00903 break;
00904
00905 case 'H':
00906
00907 if (((current == 0) || IsVowel(original, current - 1))
00908 && IsVowel(original, current + 1))
00909 {
00910 MetaphAdd(primary, "H");
00911 MetaphAdd(secondary, "H");
00912 current += 2;
00913 }
00914 else
00915
00916 current += 1;
00917 break;
00918
00919 case 'J':
00920
00921 if (StringAt(original, current, 4, "JOSE", "")
00922 || StringAt(original, 0, 4, "SAN ", ""))
00923 {
00924 if (((current == 0)
00925 && (GetAt(original, current + 4) == ' '))
00926 || StringAt(original, 0, 4, "SAN ", ""))
00927 {
00928 MetaphAdd(primary, "H");
00929 MetaphAdd(secondary, "H");
00930 }
00931 else
00932 {
00933 MetaphAdd(primary, "J");
00934 MetaphAdd(secondary, "H");
00935 }
00936 current += 1;
00937 break;
00938 }
00939
00940 if ((current == 0)
00941 && !StringAt(original, current, 4, "JOSE", ""))
00942 {
00943 MetaphAdd(primary, "J");
00944 MetaphAdd(secondary, "A");
00945 }
00946 else
00947 {
00948
00949 if (IsVowel(original, current - 1)
00950 && !SlavoGermanic(original)
00951 && ((GetAt(original, current + 1) == 'A')
00952 || (GetAt(original, current + 1) == 'O')))
00953 {
00954 MetaphAdd(primary, "J");
00955 MetaphAdd(secondary, "H");
00956 }
00957 else
00958 {
00959 if (current == last)
00960 {
00961 MetaphAdd(primary, "J");
00962 MetaphAdd(secondary, "");
00963 }
00964 else
00965 {
00966 if (!StringAt(original, (current + 1), 1, "L", "T",
00967 "K", "S", "N", "M", "B", "Z", "")
00968 && !StringAt(original, (current - 1), 1,
00969 "S", "K", "L", ""))
00970 {
00971 MetaphAdd(primary, "J");
00972 MetaphAdd(secondary, "J");
00973 }
00974 }
00975 }
00976 }
00977
00978 if (GetAt(original, current + 1) == 'J')
00979 current += 2;
00980 else
00981 current += 1;
00982 break;
00983
00984 case 'K':
00985 if (GetAt(original, current + 1) == 'K')
00986 current += 2;
00987 else
00988 current += 1;
00989 MetaphAdd(primary, "K");
00990 MetaphAdd(secondary, "K");
00991 break;
00992
00993 case 'L':
00994 if (GetAt(original, current + 1) == 'L')
00995 {
00996
00997 if (((current == (length - 3))
00998 && StringAt(original, (current - 1), 4, "ILLO",
00999 "ILLA", "ALLE", ""))
01000 || ((StringAt(original, (last - 1), 2, "AS", "OS", "")
01001 || StringAt(original, last, 1, "A", "O", ""))
01002 && StringAt(original, (current - 1), 4,
01003 "ALLE", "")))
01004 {
01005 MetaphAdd(primary, "L");
01006 MetaphAdd(secondary, "");
01007 current += 2;
01008 break;
01009 }
01010 current += 2;
01011 }
01012 else
01013 current += 1;
01014 MetaphAdd(primary, "L");
01015 MetaphAdd(secondary, "L");
01016 break;
01017
01018 case 'M':
01019 if ((StringAt(original, (current - 1), 3, "UMB", "")
01020 && (((current + 1) == last)
01021 || StringAt(original, (current + 2), 2, "ER", "")))
01022
01023 || (GetAt(original, current + 1) == 'M'))
01024 current += 2;
01025 else
01026 current += 1;
01027 MetaphAdd(primary, "M");
01028 MetaphAdd(secondary, "M");
01029 break;
01030
01031 case 'N':
01032 if (GetAt(original, current + 1) == 'N')
01033 current += 2;
01034 else
01035 current += 1;
01036 MetaphAdd(primary, "N");
01037 MetaphAdd(secondary, "N");
01038 break;
01039
01040 case '\xd1':
01041 current += 1;
01042 MetaphAdd(primary, "N");
01043 MetaphAdd(secondary, "N");
01044 break;
01045
01046 case 'P':
01047 if (GetAt(original, current + 1) == 'H')
01048 {
01049 MetaphAdd(primary, "F");
01050 MetaphAdd(secondary, "F");
01051 current += 2;
01052 break;
01053 }
01054
01055
01056 if (StringAt(original, (current + 1), 1, "P", "B", ""))
01057 current += 2;
01058 else
01059 current += 1;
01060 MetaphAdd(primary, "P");
01061 MetaphAdd(secondary, "P");
01062 break;
01063
01064 case 'Q':
01065 if (GetAt(original, current + 1) == 'Q')
01066 current += 2;
01067 else
01068 current += 1;
01069 MetaphAdd(primary, "K");
01070 MetaphAdd(secondary, "K");
01071 break;
01072
01073 case 'R':
01074
01075 if ((current == last)
01076 && !SlavoGermanic(original)
01077 && StringAt(original, (current - 2), 2, "IE", "")
01078 && !StringAt(original, (current - 4), 2, "ME", "MA", ""))
01079 {
01080 MetaphAdd(primary, "");
01081 MetaphAdd(secondary, "R");
01082 }
01083 else
01084 {
01085 MetaphAdd(primary, "R");
01086 MetaphAdd(secondary, "R");
01087 }
01088
01089 if (GetAt(original, current + 1) == 'R')
01090 current += 2;
01091 else
01092 current += 1;
01093 break;
01094
01095 case 'S':
01096
01097 if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
01098 {
01099 current += 1;
01100 break;
01101 }
01102
01103
01104 if ((current == 0)
01105 && StringAt(original, current, 5, "SUGAR", ""))
01106 {
01107 MetaphAdd(primary, "X");
01108 MetaphAdd(secondary, "S");
01109 current += 1;
01110 break;
01111 }
01112
01113 if (StringAt(original, current, 2, "SH", ""))
01114 {
01115
01116 if (StringAt
01117 (original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
01118 "HOLZ", ""))
01119 {
01120 MetaphAdd(primary, "S");
01121 MetaphAdd(secondary, "S");
01122 }
01123 else
01124 {
01125 MetaphAdd(primary, "X");
01126 MetaphAdd(secondary, "X");
01127 }
01128 current += 2;
01129 break;
01130 }
01131
01132
01133 if (StringAt(original, current, 3, "SIO", "SIA", "")
01134 || StringAt(original, current, 4, "SIAN", ""))
01135 {
01136 if (!SlavoGermanic(original))
01137 {
01138 MetaphAdd(primary, "S");
01139 MetaphAdd(secondary, "X");
01140 }
01141 else
01142 {
01143 MetaphAdd(primary, "S");
01144 MetaphAdd(secondary, "S");
01145 }
01146 current += 3;
01147 break;
01148 }
01149
01150
01151
01152
01153
01154
01155 if (((current == 0)
01156 && StringAt(original, (current + 1), 1,
01157 "M", "N", "L", "W", ""))
01158 || StringAt(original, (current + 1), 1, "Z", ""))
01159 {
01160 MetaphAdd(primary, "S");
01161 MetaphAdd(secondary, "X");
01162 if (StringAt(original, (current + 1), 1, "Z", ""))
01163 current += 2;
01164 else
01165 current += 1;
01166 break;
01167 }
01168
01169 if (StringAt(original, current, 2, "SC", ""))
01170 {
01171
01172 if (GetAt(original, current + 2) == 'H')
01173 {
01174
01175 if (StringAt(original, (current + 3), 2,
01176 "OO", "ER", "EN",
01177 "UY", "ED", "EM", ""))
01178 {
01179
01180 if (StringAt(original, (current + 3), 2,
01181 "ER", "EN", ""))
01182 {
01183 MetaphAdd(primary, "X");
01184 MetaphAdd(secondary, "SK");
01185 }
01186 else
01187 {
01188 MetaphAdd(primary, "SK");
01189 MetaphAdd(secondary, "SK");
01190 }
01191 current += 3;
01192 break;
01193 }
01194 else
01195 {
01196 if ((current == 0) && !IsVowel(original, 3)
01197 && (GetAt(original, 3) != 'W'))
01198 {
01199 MetaphAdd(primary, "X");
01200 MetaphAdd(secondary, "S");
01201 }
01202 else
01203 {
01204 MetaphAdd(primary, "X");
01205 MetaphAdd(secondary, "X");
01206 }
01207 current += 3;
01208 break;
01209 }
01210 }
01211
01212 if (StringAt(original, (current + 2), 1,
01213 "I", "E", "Y", ""))
01214 {
01215 MetaphAdd(primary, "S");
01216 MetaphAdd(secondary, "S");
01217 current += 3;
01218 break;
01219 }
01220
01221 MetaphAdd(primary, "SK");
01222 MetaphAdd(secondary, "SK");
01223 current += 3;
01224 break;
01225 }
01226
01227
01228 if ((current == last)
01229 && StringAt(original, (current - 2), 2, "AI", "OI", ""))
01230 {
01231 MetaphAdd(primary, "");
01232 MetaphAdd(secondary, "S");
01233 }
01234 else
01235 {
01236 MetaphAdd(primary, "S");
01237 MetaphAdd(secondary, "S");
01238 }
01239
01240 if (StringAt(original, (current + 1), 1, "S", "Z", ""))
01241 current += 2;
01242 else
01243 current += 1;
01244 break;
01245
01246 case 'T':
01247 if (StringAt(original, current, 4, "TION", ""))
01248 {
01249 MetaphAdd(primary, "X");
01250 MetaphAdd(secondary, "X");
01251 current += 3;
01252 break;
01253 }
01254
01255 if (StringAt(original, current, 3, "TIA", "TCH", ""))
01256 {
01257 MetaphAdd(primary, "X");
01258 MetaphAdd(secondary, "X");
01259 current += 3;
01260 break;
01261 }
01262
01263 if (StringAt(original, current, 2, "TH", "")
01264 || StringAt(original, current, 3, "TTH", ""))
01265 {
01266
01267 if (StringAt(original, (current + 2), 2, "OM", "AM", "")
01268 || StringAt(original, 0, 4, "VAN ", "VON ", "")
01269 || StringAt(original, 0, 3, "SCH", ""))
01270 {
01271 MetaphAdd(primary, "T");
01272 MetaphAdd(secondary, "T");
01273 }
01274 else
01275 {
01276 MetaphAdd(primary, "0");
01277 MetaphAdd(secondary, "T");
01278 }
01279 current += 2;
01280 break;
01281 }
01282
01283 if (StringAt(original, (current + 1), 1, "T", "D", ""))
01284 current += 2;
01285 else
01286 current += 1;
01287 MetaphAdd(primary, "T");
01288 MetaphAdd(secondary, "T");
01289 break;
01290
01291 case 'V':
01292 if (GetAt(original, current + 1) == 'V')
01293 current += 2;
01294 else
01295 current += 1;
01296 MetaphAdd(primary, "F");
01297 MetaphAdd(secondary, "F");
01298 break;
01299
01300 case 'W':
01301
01302 if (StringAt(original, current, 2, "WR", ""))
01303 {
01304 MetaphAdd(primary, "R");
01305 MetaphAdd(secondary, "R");
01306 current += 2;
01307 break;
01308 }
01309
01310 if ((current == 0)
01311 && (IsVowel(original, current + 1)
01312 || StringAt(original, current, 2, "WH", "")))
01313 {
01314
01315 if (IsVowel(original, current + 1))
01316 {
01317 MetaphAdd(primary, "A");
01318 MetaphAdd(secondary, "F");
01319 }
01320 else
01321 {
01322
01323 MetaphAdd(primary, "A");
01324 MetaphAdd(secondary, "A");
01325 }
01326 }
01327
01328
01329 if (((current == last) && IsVowel(original, current - 1))
01330 || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
01331 "OWSKI", "OWSKY", "")
01332 || StringAt(original, 0, 3, "SCH", ""))
01333 {
01334 MetaphAdd(primary, "");
01335 MetaphAdd(secondary, "F");
01336 current += 1;
01337 break;
01338 }
01339
01340
01341 if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
01342 {
01343 MetaphAdd(primary, "TS");
01344 MetaphAdd(secondary, "FX");
01345 current += 4;
01346 break;
01347 }
01348
01349
01350 current += 1;
01351 break;
01352
01353 case 'X':
01354
01355 if (!((current == last)
01356 && (StringAt(original, (current - 3), 3,
01357 "IAU", "EAU", "")
01358 || StringAt(original, (current - 2), 2,
01359 "AU", "OU", ""))))
01360 {
01361 MetaphAdd(primary, "KS");
01362 MetaphAdd(secondary, "KS");
01363 }
01364
01365
01366 if (StringAt(original, (current + 1), 1, "C", "X", ""))
01367 current += 2;
01368 else
01369 current += 1;
01370 break;
01371
01372 case 'Z':
01373
01374 if (GetAt(original, current + 1) == 'H')
01375 {
01376 MetaphAdd(primary, "J");
01377 MetaphAdd(secondary, "J");
01378 current += 2;
01379 break;
01380 }
01381 else if (StringAt(original, (current + 1), 2,
01382 "ZO", "ZI", "ZA", "")
01383 || (SlavoGermanic(original)
01384 && ((current > 0)
01385 && GetAt(original, current - 1) != 'T')))
01386 {
01387 MetaphAdd(primary, "S");
01388 MetaphAdd(secondary, "TS");
01389 }
01390 else
01391 {
01392 MetaphAdd(primary, "S");
01393 MetaphAdd(secondary, "S");
01394 }
01395
01396 if (GetAt(original, current + 1) == 'Z')
01397 current += 2;
01398 else
01399 current += 1;
01400 break;
01401
01402 default:
01403 current += 1;
01404 }
01405
01406
01407
01408
01409
01410 }
01411
01412
01413 if (primary->length > 4)
01414 SetAt(primary, 4, '\0');
01415
01416 if (secondary->length > 4)
01417 SetAt(secondary, 4, '\0');
01418
01419 *codes = primary->str;
01420 *++codes = secondary->str;
01421
01422 DestroyMetaString(original);
01423 DestroyMetaString(primary);
01424 DestroyMetaString(secondary);
01425 }
01426
01427 #ifdef DMETAPHONE_MAIN
01428
01429
01430
01431 main(int argc, char **argv)
01432 {
01433 char *codes[2];
01434
01435 if (argc > 1)
01436 {
01437 DoubleMetaphone(argv[1], codes);
01438 printf("%s|%s\n", codes[0], codes[1]);
01439 }
01440 }
01441
01442 #endif