00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "postgres.h"
00015 #include "fmgr.h"
00016 #include "mb/pg_wchar.h"
00017
00018
00019
00020
00021
00022 #define PGSJISALTCODE 0x81ac
00023 #define PGEUCALTCODE 0xa2ae
00024
00025
00026
00027
00028 #include "sjis.map"
00029
00030 PG_MODULE_MAGIC;
00031
00032 PG_FUNCTION_INFO_V1(euc_jp_to_sjis);
00033 PG_FUNCTION_INFO_V1(sjis_to_euc_jp);
00034 PG_FUNCTION_INFO_V1(euc_jp_to_mic);
00035 PG_FUNCTION_INFO_V1(mic_to_euc_jp);
00036 PG_FUNCTION_INFO_V1(sjis_to_mic);
00037 PG_FUNCTION_INFO_V1(mic_to_sjis);
00038
00039 extern Datum euc_jp_to_sjis(PG_FUNCTION_ARGS);
00040 extern Datum sjis_to_euc_jp(PG_FUNCTION_ARGS);
00041 extern Datum euc_jp_to_mic(PG_FUNCTION_ARGS);
00042 extern Datum mic_to_euc_jp(PG_FUNCTION_ARGS);
00043 extern Datum sjis_to_mic(PG_FUNCTION_ARGS);
00044 extern Datum mic_to_sjis(PG_FUNCTION_ARGS);
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 static void sjis2mic(const unsigned char *sjis, unsigned char *p, int len);
00058 static void mic2sjis(const unsigned char *mic, unsigned char *p, int len);
00059 static void euc_jp2mic(const unsigned char *euc, unsigned char *p, int len);
00060 static void mic2euc_jp(const unsigned char *mic, unsigned char *p, int len);
00061 static void euc_jp2sjis(const unsigned char *mic, unsigned char *p, int len);
00062 static void sjis2euc_jp(const unsigned char *mic, unsigned char *p, int len);
00063
00064 Datum
00065 euc_jp_to_sjis(PG_FUNCTION_ARGS)
00066 {
00067 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00068 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00069 int len = PG_GETARG_INT32(4);
00070
00071 CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_SJIS);
00072
00073 euc_jp2sjis(src, dest, len);
00074
00075 PG_RETURN_VOID();
00076 }
00077
00078 Datum
00079 sjis_to_euc_jp(PG_FUNCTION_ARGS)
00080 {
00081 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00082 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00083 int len = PG_GETARG_INT32(4);
00084
00085 CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_EUC_JP);
00086
00087 sjis2euc_jp(src, dest, len);
00088
00089 PG_RETURN_VOID();
00090 }
00091
00092 Datum
00093 euc_jp_to_mic(PG_FUNCTION_ARGS)
00094 {
00095 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00096 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00097 int len = PG_GETARG_INT32(4);
00098
00099 CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_MULE_INTERNAL);
00100
00101 euc_jp2mic(src, dest, len);
00102
00103 PG_RETURN_VOID();
00104 }
00105
00106 Datum
00107 mic_to_euc_jp(PG_FUNCTION_ARGS)
00108 {
00109 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00110 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00111 int len = PG_GETARG_INT32(4);
00112
00113 CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_JP);
00114
00115 mic2euc_jp(src, dest, len);
00116
00117 PG_RETURN_VOID();
00118 }
00119
00120 Datum
00121 sjis_to_mic(PG_FUNCTION_ARGS)
00122 {
00123 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00124 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00125 int len = PG_GETARG_INT32(4);
00126
00127 CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_MULE_INTERNAL);
00128
00129 sjis2mic(src, dest, len);
00130
00131 PG_RETURN_VOID();
00132 }
00133
00134 Datum
00135 mic_to_sjis(PG_FUNCTION_ARGS)
00136 {
00137 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00138 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00139 int len = PG_GETARG_INT32(4);
00140
00141 CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_SJIS);
00142
00143 mic2sjis(src, dest, len);
00144
00145 PG_RETURN_VOID();
00146 }
00147
00148
00149
00150
00151 static void
00152 sjis2mic(const unsigned char *sjis, unsigned char *p, int len)
00153 {
00154 int c1,
00155 c2,
00156 i,
00157 k,
00158 k2;
00159
00160 while (len > 0)
00161 {
00162 c1 = *sjis;
00163 if (c1 >= 0xa1 && c1 <= 0xdf)
00164 {
00165
00166 *p++ = LC_JISX0201K;
00167 *p++ = c1;
00168 sjis++;
00169 len--;
00170 }
00171 else if (IS_HIGHBIT_SET(c1))
00172 {
00173
00174
00175
00176 if (len < 2 || !ISSJISHEAD(c1) || !ISSJISTAIL(sjis[1]))
00177 report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
00178 c2 = sjis[1];
00179 k = (c1 << 8) + c2;
00180 if (k >= 0xed40 && k < 0xf040)
00181 {
00182
00183 for (i = 0;; i++)
00184 {
00185 k2 = ibmkanji[i].nec;
00186 if (k2 == 0xffff)
00187 break;
00188 if (k2 == k)
00189 {
00190 k = ibmkanji[i].sjis;
00191 c1 = (k >> 8) & 0xff;
00192 c2 = k & 0xff;
00193 }
00194 }
00195 }
00196
00197 if (k < 0xeb3f)
00198 {
00199
00200 *p++ = LC_JISX0208;
00201 *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
00202 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
00203 }
00204 else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
00205 {
00206
00207 *p++ = LC_JISX0208;
00208 *p++ = PGEUCALTCODE >> 8;
00209 *p++ = PGEUCALTCODE & 0xff;
00210 }
00211 else if (k >= 0xf040 && k < 0xf540)
00212 {
00213
00214
00215
00216
00217 *p++ = LC_JISX0208;
00218 c1 -= 0x6f;
00219 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
00220 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
00221 }
00222 else if (k >= 0xf540 && k < 0xfa40)
00223 {
00224
00225
00226
00227
00228 *p++ = LC_JISX0212;
00229 c1 -= 0x74;
00230 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
00231 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
00232 }
00233 else if (k >= 0xfa40)
00234 {
00235
00236
00237
00238 for (i = 0;; i++)
00239 {
00240 k2 = ibmkanji[i].sjis;
00241 if (k2 == 0xffff)
00242 break;
00243 if (k2 == k)
00244 {
00245 k = ibmkanji[i].euc;
00246 if (k >= 0x8f0000)
00247 {
00248 *p++ = LC_JISX0212;
00249 *p++ = 0x80 | ((k & 0xff00) >> 8);
00250 *p++ = 0x80 | (k & 0xff);
00251 }
00252 else
00253 {
00254 *p++ = LC_JISX0208;
00255 *p++ = 0x80 | (k >> 8);
00256 *p++ = 0x80 | (k & 0xff);
00257 }
00258 }
00259 }
00260 }
00261 sjis += 2;
00262 len -= 2;
00263 }
00264 else
00265 {
00266 if (c1 == 0)
00267 report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
00268 *p++ = c1;
00269 sjis++;
00270 len--;
00271 }
00272 }
00273 *p = '\0';
00274 }
00275
00276
00277
00278
00279 static void
00280 mic2sjis(const unsigned char *mic, unsigned char *p, int len)
00281 {
00282 int c1,
00283 c2,
00284 k,
00285 l;
00286
00287 while (len > 0)
00288 {
00289 c1 = *mic;
00290 if (!IS_HIGHBIT_SET(c1))
00291 {
00292
00293 if (c1 == 0)
00294 report_invalid_encoding(PG_MULE_INTERNAL,
00295 (const char *) mic, len);
00296 *p++ = c1;
00297 mic++;
00298 len--;
00299 continue;
00300 }
00301 l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
00302 if (l < 0)
00303 report_invalid_encoding(PG_MULE_INTERNAL,
00304 (const char *) mic, len);
00305 if (c1 == LC_JISX0201K)
00306 *p++ = mic[1];
00307 else if (c1 == LC_JISX0208)
00308 {
00309 c1 = mic[1];
00310 c2 = mic[2];
00311 k = (c1 << 8) | (c2 & 0xff);
00312 if (k >= 0xf5a1)
00313 {
00314
00315 c1 -= 0x54;
00316 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
00317 }
00318 else
00319 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
00320 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
00321 }
00322 else if (c1 == LC_JISX0212)
00323 {
00324 int i,
00325 k2;
00326
00327 c1 = mic[1];
00328 c2 = mic[2];
00329 k = c1 << 8 | c2;
00330 if (k >= 0xf5a1)
00331 {
00332
00333 c1 -= 0x54;
00334 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
00335 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
00336 }
00337 else
00338 {
00339
00340 for (i = 0;; i++)
00341 {
00342 k2 = ibmkanji[i].euc & 0xffff;
00343 if (k2 == 0xffff)
00344 {
00345 *p++ = PGSJISALTCODE >> 8;
00346 *p++ = PGSJISALTCODE & 0xff;
00347 break;
00348 }
00349 if (k2 == k)
00350 {
00351 k = ibmkanji[i].sjis;
00352 *p++ = k >> 8;
00353 *p++ = k & 0xff;
00354 break;
00355 }
00356 }
00357 }
00358 }
00359 else
00360 report_untranslatable_char(PG_MULE_INTERNAL, PG_SJIS,
00361 (const char *) mic, len);
00362 mic += l;
00363 len -= l;
00364 }
00365 *p = '\0';
00366 }
00367
00368
00369
00370
00371 static void
00372 euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
00373 {
00374 int c1;
00375 int l;
00376
00377 while (len > 0)
00378 {
00379 c1 = *euc;
00380 if (!IS_HIGHBIT_SET(c1))
00381 {
00382
00383 if (c1 == 0)
00384 report_invalid_encoding(PG_EUC_JP,
00385 (const char *) euc, len);
00386 *p++ = c1;
00387 euc++;
00388 len--;
00389 continue;
00390 }
00391 l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
00392 if (l < 0)
00393 report_invalid_encoding(PG_EUC_JP,
00394 (const char *) euc, len);
00395 if (c1 == SS2)
00396 {
00397 *p++ = LC_JISX0201K;
00398 *p++ = euc[1];
00399 }
00400 else if (c1 == SS3)
00401 {
00402 *p++ = LC_JISX0212;
00403 *p++ = euc[1];
00404 *p++ = euc[2];
00405 }
00406 else
00407 {
00408 *p++ = LC_JISX0208;
00409 *p++ = c1;
00410 *p++ = euc[1];
00411 }
00412 euc += l;
00413 len -= l;
00414 }
00415 *p = '\0';
00416 }
00417
00418
00419
00420
00421 static void
00422 mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
00423 {
00424 int c1;
00425 int l;
00426
00427 while (len > 0)
00428 {
00429 c1 = *mic;
00430 if (!IS_HIGHBIT_SET(c1))
00431 {
00432
00433 if (c1 == 0)
00434 report_invalid_encoding(PG_MULE_INTERNAL,
00435 (const char *) mic, len);
00436 *p++ = c1;
00437 mic++;
00438 len--;
00439 continue;
00440 }
00441 l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
00442 if (l < 0)
00443 report_invalid_encoding(PG_MULE_INTERNAL,
00444 (const char *) mic, len);
00445 if (c1 == LC_JISX0201K)
00446 {
00447 *p++ = SS2;
00448 *p++ = mic[1];
00449 }
00450 else if (c1 == LC_JISX0212)
00451 {
00452 *p++ = SS3;
00453 *p++ = mic[1];
00454 *p++ = mic[2];
00455 }
00456 else if (c1 == LC_JISX0208)
00457 {
00458 *p++ = mic[1];
00459 *p++ = mic[2];
00460 }
00461 else
00462 report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_JP,
00463 (const char *) mic, len);
00464 mic += l;
00465 len -= l;
00466 }
00467 *p = '\0';
00468 }
00469
00470
00471
00472
00473 static void
00474 euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
00475 {
00476 int c1,
00477 c2,
00478 k;
00479 int l;
00480
00481 while (len > 0)
00482 {
00483 c1 = *euc;
00484 if (!IS_HIGHBIT_SET(c1))
00485 {
00486
00487 if (c1 == 0)
00488 report_invalid_encoding(PG_EUC_JP,
00489 (const char *) euc, len);
00490 *p++ = c1;
00491 euc++;
00492 len--;
00493 continue;
00494 }
00495 l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
00496 if (l < 0)
00497 report_invalid_encoding(PG_EUC_JP,
00498 (const char *) euc, len);
00499 if (c1 == SS2)
00500 {
00501
00502 *p++ = euc[1];
00503 }
00504 else if (c1 == SS3)
00505 {
00506
00507 c1 = euc[1];
00508 c2 = euc[2];
00509 k = c1 << 8 | c2;
00510 if (k >= 0xf5a1)
00511 {
00512
00513 c1 -= 0x54;
00514 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
00515 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
00516 }
00517 else
00518 {
00519 int i,
00520 k2;
00521
00522
00523 for (i = 0;; i++)
00524 {
00525 k2 = ibmkanji[i].euc & 0xffff;
00526 if (k2 == 0xffff)
00527 {
00528 *p++ = PGSJISALTCODE >> 8;
00529 *p++ = PGSJISALTCODE & 0xff;
00530 break;
00531 }
00532 if (k2 == k)
00533 {
00534 k = ibmkanji[i].sjis;
00535 *p++ = k >> 8;
00536 *p++ = k & 0xff;
00537 break;
00538 }
00539 }
00540 }
00541 }
00542 else
00543 {
00544
00545 c2 = euc[1];
00546 k = (c1 << 8) | (c2 & 0xff);
00547 if (k >= 0xf5a1)
00548 {
00549
00550 c1 -= 0x54;
00551 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
00552 }
00553 else
00554 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
00555 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
00556 }
00557 euc += l;
00558 len -= l;
00559 }
00560 *p = '\0';
00561 }
00562
00563
00564
00565
00566 static void
00567 sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
00568 {
00569 int c1,
00570 c2,
00571 i,
00572 k,
00573 k2;
00574 int l;
00575
00576 while (len > 0)
00577 {
00578 c1 = *sjis;
00579 if (!IS_HIGHBIT_SET(c1))
00580 {
00581
00582 if (c1 == 0)
00583 report_invalid_encoding(PG_SJIS,
00584 (const char *) sjis, len);
00585 *p++ = c1;
00586 sjis++;
00587 len--;
00588 continue;
00589 }
00590 l = pg_encoding_verifymb(PG_SJIS, (const char *) sjis, len);
00591 if (l < 0)
00592 report_invalid_encoding(PG_SJIS,
00593 (const char *) sjis, len);
00594 if (c1 >= 0xa1 && c1 <= 0xdf)
00595 {
00596
00597 *p++ = SS2;
00598 *p++ = c1;
00599 }
00600 else
00601 {
00602
00603
00604
00605 c2 = sjis[1];
00606 k = (c1 << 8) + c2;
00607 if (k >= 0xed40 && k < 0xf040)
00608 {
00609
00610 for (i = 0;; i++)
00611 {
00612 k2 = ibmkanji[i].nec;
00613 if (k2 == 0xffff)
00614 break;
00615 if (k2 == k)
00616 {
00617 k = ibmkanji[i].sjis;
00618 c1 = (k >> 8) & 0xff;
00619 c2 = k & 0xff;
00620 }
00621 }
00622 }
00623
00624 if (k < 0xeb3f)
00625 {
00626
00627 *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
00628 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
00629 }
00630 else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
00631 {
00632
00633 *p++ = PGEUCALTCODE >> 8;
00634 *p++ = PGEUCALTCODE & 0xff;
00635 }
00636 else if (k >= 0xf040 && k < 0xf540)
00637 {
00638
00639
00640
00641
00642 c1 -= 0x6f;
00643 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
00644 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
00645 }
00646 else if (k >= 0xf540 && k < 0xfa40)
00647 {
00648
00649
00650
00651
00652 *p++ = SS3;
00653 c1 -= 0x74;
00654 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
00655 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
00656 }
00657 else if (k >= 0xfa40)
00658 {
00659
00660
00661
00662
00663 for (i = 0;; i++)
00664 {
00665 k2 = ibmkanji[i].sjis;
00666 if (k2 == 0xffff)
00667 break;
00668 if (k2 == k)
00669 {
00670 k = ibmkanji[i].euc;
00671 if (k >= 0x8f0000)
00672 {
00673 *p++ = SS3;
00674 *p++ = 0x80 | ((k & 0xff00) >> 8);
00675 *p++ = 0x80 | (k & 0xff);
00676 }
00677 else
00678 {
00679 *p++ = 0x80 | (k >> 8);
00680 *p++ = 0x80 | (k & 0xff);
00681 }
00682 }
00683 }
00684 }
00685 }
00686 sjis += l;
00687 len -= l;
00688 }
00689 *p = '\0';
00690 }