00001
00002
00003
00004
00005
00006
00007
00008 #ifdef FRONTEND
00009 #include "postgres_fe.h"
00010 #else
00011 #include "postgres.h"
00012 #endif
00013
00014 #include "mb/pg_wchar.h"
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 static int
00040 pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
00041 {
00042 int cnt = 0;
00043
00044 while (len > 0 && *from)
00045 {
00046 *to++ = *from++;
00047 len--;
00048 cnt++;
00049 }
00050 *to = 0;
00051 return cnt;
00052 }
00053
00054 static int
00055 pg_ascii_mblen(const unsigned char *s)
00056 {
00057 return 1;
00058 }
00059
00060 static int
00061 pg_ascii_dsplen(const unsigned char *s)
00062 {
00063 if (*s == '\0')
00064 return 0;
00065 if (*s < 0x20 || *s == 0x7f)
00066 return -1;
00067
00068 return 1;
00069 }
00070
00071
00072
00073
00074 static int
00075 pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
00076 {
00077 int cnt = 0;
00078
00079 while (len > 0 && *from)
00080 {
00081 if (*from == SS2 && len >= 2)
00082
00083 {
00084 from++;
00085 *to = (SS2 << 8) | *from++;
00086 len -= 2;
00087 }
00088 else if (*from == SS3 && len >= 3)
00089 {
00090 from++;
00091 *to = (SS3 << 16) | (*from++ << 8);
00092 *to |= *from++;
00093 len -= 3;
00094 }
00095 else if (IS_HIGHBIT_SET(*from) && len >= 2)
00096 {
00097 *to = *from++ << 8;
00098 *to |= *from++;
00099 len -= 2;
00100 }
00101 else
00102 {
00103 *to = *from++;
00104 len--;
00105 }
00106 to++;
00107 cnt++;
00108 }
00109 *to = 0;
00110 return cnt;
00111 }
00112
00113 static inline int
00114 pg_euc_mblen(const unsigned char *s)
00115 {
00116 int len;
00117
00118 if (*s == SS2)
00119 len = 2;
00120 else if (*s == SS3)
00121 len = 3;
00122 else if (IS_HIGHBIT_SET(*s))
00123 len = 2;
00124 else
00125 len = 1;
00126 return len;
00127 }
00128
00129 static inline int
00130 pg_euc_dsplen(const unsigned char *s)
00131 {
00132 int len;
00133
00134 if (*s == SS2)
00135 len = 2;
00136 else if (*s == SS3)
00137 len = 2;
00138 else if (IS_HIGHBIT_SET(*s))
00139 len = 2;
00140 else
00141 len = pg_ascii_dsplen(s);
00142 return len;
00143 }
00144
00145
00146
00147
00148 static int
00149 pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
00150 {
00151 return pg_euc2wchar_with_len(from, to, len);
00152 }
00153
00154 static int
00155 pg_eucjp_mblen(const unsigned char *s)
00156 {
00157 return pg_euc_mblen(s);
00158 }
00159
00160 static int
00161 pg_eucjp_dsplen(const unsigned char *s)
00162 {
00163 int len;
00164
00165 if (*s == SS2)
00166 len = 1;
00167 else if (*s == SS3)
00168 len = 2;
00169 else if (IS_HIGHBIT_SET(*s))
00170 len = 2;
00171 else
00172 len = pg_ascii_dsplen(s);
00173 return len;
00174 }
00175
00176
00177
00178
00179 static int
00180 pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
00181 {
00182 return pg_euc2wchar_with_len(from, to, len);
00183 }
00184
00185 static int
00186 pg_euckr_mblen(const unsigned char *s)
00187 {
00188 return pg_euc_mblen(s);
00189 }
00190
00191 static int
00192 pg_euckr_dsplen(const unsigned char *s)
00193 {
00194 return pg_euc_dsplen(s);
00195 }
00196
00197
00198
00199
00200
00201 static int
00202 pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
00203 {
00204 int cnt = 0;
00205
00206 while (len > 0 && *from)
00207 {
00208 if (*from == SS2 && len >= 3)
00209 {
00210 from++;
00211 *to = (SS2 << 16) | (*from++ << 8);
00212 *to |= *from++;
00213 len -= 3;
00214 }
00215 else if (*from == SS3 && len >= 3)
00216 {
00217 from++;
00218 *to = (SS3 << 16) | (*from++ << 8);
00219 *to |= *from++;
00220 len -= 3;
00221 }
00222 else if (IS_HIGHBIT_SET(*from) && len >= 2)
00223 {
00224 *to = *from++ << 8;
00225 *to |= *from++;
00226 len -= 2;
00227 }
00228 else
00229 {
00230 *to = *from++;
00231 len--;
00232 }
00233 to++;
00234 cnt++;
00235 }
00236 *to = 0;
00237 return cnt;
00238 }
00239
00240 static int
00241 pg_euccn_mblen(const unsigned char *s)
00242 {
00243 int len;
00244
00245 if (IS_HIGHBIT_SET(*s))
00246 len = 2;
00247 else
00248 len = 1;
00249 return len;
00250 }
00251
00252 static int
00253 pg_euccn_dsplen(const unsigned char *s)
00254 {
00255 int len;
00256
00257 if (IS_HIGHBIT_SET(*s))
00258 len = 2;
00259 else
00260 len = pg_ascii_dsplen(s);
00261 return len;
00262 }
00263
00264
00265
00266
00267
00268 static int
00269 pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
00270 {
00271 int cnt = 0;
00272
00273 while (len > 0 && *from)
00274 {
00275 if (*from == SS2 && len >= 4)
00276 {
00277 from++;
00278 *to = (((uint32) SS2) << 24) | (*from++ << 16);
00279 *to |= *from++ << 8;
00280 *to |= *from++;
00281 len -= 4;
00282 }
00283 else if (*from == SS3 && len >= 3)
00284 {
00285 from++;
00286 *to = (SS3 << 16) | (*from++ << 8);
00287 *to |= *from++;
00288 len -= 3;
00289 }
00290 else if (IS_HIGHBIT_SET(*from) && len >= 2)
00291 {
00292 *to = *from++ << 8;
00293 *to |= *from++;
00294 len -= 2;
00295 }
00296 else
00297 {
00298 *to = *from++;
00299 len--;
00300 }
00301 to++;
00302 cnt++;
00303 }
00304 *to = 0;
00305 return cnt;
00306 }
00307
00308 static int
00309 pg_euctw_mblen(const unsigned char *s)
00310 {
00311 int len;
00312
00313 if (*s == SS2)
00314 len = 4;
00315 else if (*s == SS3)
00316 len = 3;
00317 else if (IS_HIGHBIT_SET(*s))
00318 len = 2;
00319 else
00320 len = 1;
00321 return len;
00322 }
00323
00324 static int
00325 pg_euctw_dsplen(const unsigned char *s)
00326 {
00327 int len;
00328
00329 if (*s == SS2)
00330 len = 2;
00331 else if (*s == SS3)
00332 len = 2;
00333 else if (IS_HIGHBIT_SET(*s))
00334 len = 2;
00335 else
00336 len = pg_ascii_dsplen(s);
00337 return len;
00338 }
00339
00340
00341
00342
00343
00344
00345
00346 static int
00347 pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
00348 {
00349 int cnt = 0;
00350
00351 while (len > 0 && *from)
00352 {
00353 unsigned char c;
00354
00355 if ((c = (*from >> 24)))
00356 {
00357 *to++ = c;
00358 *to++ = (*from >> 16) & 0xff;
00359 *to++ = (*from >> 8) & 0xff;
00360 *to++ = *from & 0xff;
00361 cnt += 4;
00362 }
00363 else if ((c = (*from >> 16)))
00364 {
00365 *to++ = c;
00366 *to++ = (*from >> 8) & 0xff;
00367 *to++ = *from & 0xff;
00368 cnt += 3;
00369 }
00370 else if ((c = (*from >> 8)))
00371 {
00372 *to++ = c;
00373 *to++ = *from & 0xff;
00374 cnt += 2;
00375 }
00376 else
00377 {
00378 *to++ = *from;
00379 cnt++;
00380 }
00381 from++;
00382 len--;
00383 }
00384 *to = 0;
00385 return cnt;
00386 }
00387
00388
00389
00390
00391
00392 static int
00393 pg_johab_mblen(const unsigned char *s)
00394 {
00395 return pg_euc_mblen(s);
00396 }
00397
00398 static int
00399 pg_johab_dsplen(const unsigned char *s)
00400 {
00401 return pg_euc_dsplen(s);
00402 }
00403
00404
00405
00406
00407
00408
00409
00410 static int
00411 pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
00412 {
00413 int cnt = 0;
00414 uint32 c1,
00415 c2,
00416 c3,
00417 c4;
00418
00419 while (len > 0 && *from)
00420 {
00421 if ((*from & 0x80) == 0)
00422 {
00423 *to = *from++;
00424 len--;
00425 }
00426 else if ((*from & 0xe0) == 0xc0)
00427 {
00428 if (len < 2)
00429 break;
00430 c1 = *from++ & 0x1f;
00431 c2 = *from++ & 0x3f;
00432 *to = (c1 << 6) | c2;
00433 len -= 2;
00434 }
00435 else if ((*from & 0xf0) == 0xe0)
00436 {
00437 if (len < 3)
00438 break;
00439 c1 = *from++ & 0x0f;
00440 c2 = *from++ & 0x3f;
00441 c3 = *from++ & 0x3f;
00442 *to = (c1 << 12) | (c2 << 6) | c3;
00443 len -= 3;
00444 }
00445 else if ((*from & 0xf8) == 0xf0)
00446 {
00447 if (len < 4)
00448 break;
00449 c1 = *from++ & 0x07;
00450 c2 = *from++ & 0x3f;
00451 c3 = *from++ & 0x3f;
00452 c4 = *from++ & 0x3f;
00453 *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
00454 len -= 4;
00455 }
00456 else
00457 {
00458
00459 *to = *from++;
00460 len--;
00461 }
00462 to++;
00463 cnt++;
00464 }
00465 *to = 0;
00466 return cnt;
00467 }
00468
00469
00470
00471
00472
00473
00474 unsigned char *
00475 unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
00476 {
00477 if (c <= 0x7F)
00478 {
00479 utf8string[0] = c;
00480 }
00481 else if (c <= 0x7FF)
00482 {
00483 utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
00484 utf8string[1] = 0x80 | (c & 0x3F);
00485 }
00486 else if (c <= 0xFFFF)
00487 {
00488 utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
00489 utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
00490 utf8string[2] = 0x80 | (c & 0x3F);
00491 }
00492 else
00493 {
00494 utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
00495 utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
00496 utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
00497 utf8string[3] = 0x80 | (c & 0x3F);
00498 }
00499
00500 return utf8string;
00501 }
00502
00503
00504
00505
00506
00507
00508
00509 static int
00510 pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
00511 {
00512 int cnt = 0;
00513
00514 while (len > 0 && *from)
00515 {
00516 int char_len;
00517
00518 unicode_to_utf8(*from, to);
00519 char_len = pg_utf_mblen(to);
00520 cnt += char_len;
00521 to += char_len;
00522 from++;
00523 len--;
00524 }
00525 *to = 0;
00526 return cnt;
00527 }
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540 int
00541 pg_utf_mblen(const unsigned char *s)
00542 {
00543 int len;
00544
00545 if ((*s & 0x80) == 0)
00546 len = 1;
00547 else if ((*s & 0xe0) == 0xc0)
00548 len = 2;
00549 else if ((*s & 0xf0) == 0xe0)
00550 len = 3;
00551 else if ((*s & 0xf8) == 0xf0)
00552 len = 4;
00553 #ifdef NOT_USED
00554 else if ((*s & 0xfc) == 0xf8)
00555 len = 5;
00556 else if ((*s & 0xfe) == 0xfc)
00557 len = 6;
00558 #endif
00559 else
00560 len = 1;
00561 return len;
00562 }
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576 struct mbinterval
00577 {
00578 unsigned short first;
00579 unsigned short last;
00580 };
00581
00582
00583 static int
00584 mbbisearch(pg_wchar ucs, const struct mbinterval * table, int max)
00585 {
00586 int min = 0;
00587 int mid;
00588
00589 if (ucs < table[0].first || ucs > table[max].last)
00590 return 0;
00591 while (max >= min)
00592 {
00593 mid = (min + max) / 2;
00594 if (ucs > table[mid].last)
00595 min = mid + 1;
00596 else if (ucs < table[mid].first)
00597 max = mid - 1;
00598 else
00599 return 1;
00600 }
00601
00602 return 0;
00603 }
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636 static int
00637 ucs_wcwidth(pg_wchar ucs)
00638 {
00639
00640 static const struct mbinterval combining[] = {
00641 {0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486},
00642 {0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9},
00643 {0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},
00644 {0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
00645 {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED},
00646 {0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A},
00647 {0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
00648 {0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954},
00649 {0x0962, 0x0963}, {0x0981, 0x0981}, {0x09BC, 0x09BC},
00650 {0x09C1, 0x09C4}, {0x09CD, 0x09CD}, {0x09E2, 0x09E3},
00651 {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
00652 {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71},
00653 {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5},
00654 {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, {0x0B01, 0x0B01},
00655 {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
00656 {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82},
00657 {0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40},
00658 {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},
00659 {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
00660 {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA},
00661 {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31},
00662 {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1},
00663 {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
00664 {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
00665 {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84},
00666 {0x0F86, 0x0F87}, {0x0F90, 0x0F97}, {0x0F99, 0x0FBC},
00667 {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
00668 {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059},
00669 {0x1160, 0x11FF}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},
00670 {0x17C9, 0x17D3}, {0x180B, 0x180E}, {0x18A9, 0x18A9},
00671 {0x200B, 0x200F}, {0x202A, 0x202E}, {0x206A, 0x206F},
00672 {0x20D0, 0x20E3}, {0x302A, 0x302F}, {0x3099, 0x309A},
00673 {0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF},
00674 {0xFFF9, 0xFFFB}
00675 };
00676
00677
00678 if (ucs == 0)
00679 return 0;
00680
00681 if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
00682 return -1;
00683
00684
00685 if (mbbisearch(ucs, combining,
00686 sizeof(combining) / sizeof(struct mbinterval) - 1))
00687 return 0;
00688
00689
00690
00691
00692
00693 return 1 +
00694 (ucs >= 0x1100 &&
00695 (ucs <= 0x115f ||
00696 (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
00697 ucs != 0x303f) ||
00698 (ucs >= 0xac00 && ucs <= 0xd7a3) ||
00699 (ucs >= 0xf900 && ucs <= 0xfaff) ||
00700
00701 (ucs >= 0xfe30 && ucs <= 0xfe6f) ||
00702 (ucs >= 0xff00 && ucs <= 0xff5f) ||
00703 (ucs >= 0xffe0 && ucs <= 0xffe6) ||
00704 (ucs >= 0x20000 && ucs <= 0x2ffff)));
00705 }
00706
00707
00708
00709
00710
00711
00712
00713 pg_wchar
00714 utf8_to_unicode(const unsigned char *c)
00715 {
00716 if ((*c & 0x80) == 0)
00717 return (pg_wchar) c[0];
00718 else if ((*c & 0xe0) == 0xc0)
00719 return (pg_wchar) (((c[0] & 0x1f) << 6) |
00720 (c[1] & 0x3f));
00721 else if ((*c & 0xf0) == 0xe0)
00722 return (pg_wchar) (((c[0] & 0x0f) << 12) |
00723 ((c[1] & 0x3f) << 6) |
00724 (c[2] & 0x3f));
00725 else if ((*c & 0xf8) == 0xf0)
00726 return (pg_wchar) (((c[0] & 0x07) << 18) |
00727 ((c[1] & 0x3f) << 12) |
00728 ((c[2] & 0x3f) << 6) |
00729 (c[3] & 0x3f));
00730 else
00731
00732 return 0xffffffff;
00733 }
00734
00735 static int
00736 pg_utf_dsplen(const unsigned char *s)
00737 {
00738 return ucs_wcwidth(utf8_to_unicode(s));
00739 }
00740
00741
00742
00743
00744
00745
00746
00747 static int
00748 pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
00749 {
00750 int cnt = 0;
00751
00752 while (len > 0 && *from)
00753 {
00754 if (IS_LC1(*from) && len >= 2)
00755 {
00756 *to = *from++ << 16;
00757 *to |= *from++;
00758 len -= 2;
00759 }
00760 else if (IS_LCPRV1(*from) && len >= 3)
00761 {
00762 from++;
00763 *to = *from++ << 16;
00764 *to |= *from++;
00765 len -= 3;
00766 }
00767 else if (IS_LC2(*from) && len >= 3)
00768 {
00769 *to = *from++ << 16;
00770 *to |= *from++ << 8;
00771 *to |= *from++;
00772 len -= 3;
00773 }
00774 else if (IS_LCPRV2(*from) && len >= 4)
00775 {
00776 from++;
00777 *to = *from++ << 16;
00778 *to |= *from++ << 8;
00779 *to |= *from++;
00780 len -= 4;
00781 }
00782 else
00783 {
00784 *to = (unsigned char) *from++;
00785 len--;
00786 }
00787 to++;
00788 cnt++;
00789 }
00790 *to = 0;
00791 return cnt;
00792 }
00793
00794
00795
00796
00797
00798
00799
00800 static int
00801 pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
00802 {
00803 int cnt = 0;
00804
00805 while (len > 0 && *from)
00806 {
00807 unsigned char lb;
00808
00809 lb = (*from >> 16) & 0xff;
00810 if (IS_LC1(lb))
00811 {
00812 *to++ = lb;
00813 *to++ = *from & 0xff;
00814 cnt += 2;
00815 }
00816 else if (IS_LC2(lb))
00817 {
00818 *to++ = lb;
00819 *to++ = (*from >> 8) & 0xff;
00820 *to++ = *from & 0xff;
00821 cnt += 3;
00822 }
00823 else if (IS_LCPRV1_A_RANGE(lb))
00824 {
00825 *to++ = LCPRV1_A;
00826 *to++ = lb;
00827 *to++ = *from & 0xff;
00828 cnt += 3;
00829 }
00830 else if (IS_LCPRV1_B_RANGE(lb))
00831 {
00832 *to++ = LCPRV1_B;
00833 *to++ = lb;
00834 *to++ = *from & 0xff;
00835 cnt += 3;
00836 }
00837 else if (IS_LCPRV2_A_RANGE(lb))
00838 {
00839 *to++ = LCPRV2_A;
00840 *to++ = lb;
00841 *to++ = (*from >> 8) & 0xff;
00842 *to++ = *from & 0xff;
00843 cnt += 4;
00844 }
00845 else if (IS_LCPRV2_B_RANGE(lb))
00846 {
00847 *to++ = LCPRV2_B;
00848 *to++ = lb;
00849 *to++ = (*from >> 8) & 0xff;
00850 *to++ = *from & 0xff;
00851 cnt += 4;
00852 }
00853 else
00854 {
00855 *to++ = *from & 0xff;
00856 cnt += 1;
00857 }
00858 from++;
00859 len--;
00860 }
00861 *to = 0;
00862 return cnt;
00863 }
00864
00865 int
00866 pg_mule_mblen(const unsigned char *s)
00867 {
00868 int len;
00869
00870 if (IS_LC1(*s))
00871 len = 2;
00872 else if (IS_LCPRV1(*s))
00873 len = 3;
00874 else if (IS_LC2(*s))
00875 len = 3;
00876 else if (IS_LCPRV2(*s))
00877 len = 4;
00878 else
00879 len = 1;
00880 return len;
00881 }
00882
00883 static int
00884 pg_mule_dsplen(const unsigned char *s)
00885 {
00886 int len;
00887
00888
00889
00890
00891
00892
00893
00894 if (IS_LC1(*s))
00895 len = 1;
00896 else if (IS_LCPRV1(*s))
00897 len = 1;
00898 else if (IS_LC2(*s))
00899 len = 2;
00900 else if (IS_LCPRV2(*s))
00901 len = 2;
00902 else
00903 len = 1;
00904
00905 return len;
00906 }
00907
00908
00909
00910
00911 static int
00912 pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
00913 {
00914 int cnt = 0;
00915
00916 while (len > 0 && *from)
00917 {
00918 *to++ = *from++;
00919 len--;
00920 cnt++;
00921 }
00922 *to = 0;
00923 return cnt;
00924 }
00925
00926
00927
00928
00929
00930
00931
00932
00933 static int
00934 pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
00935 {
00936 int cnt = 0;
00937
00938 while (len > 0 && *from)
00939 {
00940 *to++ = *from++;
00941 len--;
00942 cnt++;
00943 }
00944 *to = 0;
00945 return cnt;
00946 }
00947
00948 static int
00949 pg_latin1_mblen(const unsigned char *s)
00950 {
00951 return 1;
00952 }
00953
00954 static int
00955 pg_latin1_dsplen(const unsigned char *s)
00956 {
00957 return pg_ascii_dsplen(s);
00958 }
00959
00960
00961
00962
00963 static int
00964 pg_sjis_mblen(const unsigned char *s)
00965 {
00966 int len;
00967
00968 if (*s >= 0xa1 && *s <= 0xdf)
00969 len = 1;
00970 else if (IS_HIGHBIT_SET(*s))
00971 len = 2;
00972 else
00973 len = 1;
00974 return len;
00975 }
00976
00977 static int
00978 pg_sjis_dsplen(const unsigned char *s)
00979 {
00980 int len;
00981
00982 if (*s >= 0xa1 && *s <= 0xdf)
00983 len = 1;
00984 else if (IS_HIGHBIT_SET(*s))
00985 len = 2;
00986 else
00987 len = pg_ascii_dsplen(s);
00988 return len;
00989 }
00990
00991
00992
00993
00994 static int
00995 pg_big5_mblen(const unsigned char *s)
00996 {
00997 int len;
00998
00999 if (IS_HIGHBIT_SET(*s))
01000 len = 2;
01001 else
01002 len = 1;
01003 return len;
01004 }
01005
01006 static int
01007 pg_big5_dsplen(const unsigned char *s)
01008 {
01009 int len;
01010
01011 if (IS_HIGHBIT_SET(*s))
01012 len = 2;
01013 else
01014 len = pg_ascii_dsplen(s);
01015 return len;
01016 }
01017
01018
01019
01020
01021 static int
01022 pg_gbk_mblen(const unsigned char *s)
01023 {
01024 int len;
01025
01026 if (IS_HIGHBIT_SET(*s))
01027 len = 2;
01028 else
01029 len = 1;
01030 return len;
01031 }
01032
01033 static int
01034 pg_gbk_dsplen(const unsigned char *s)
01035 {
01036 int len;
01037
01038 if (IS_HIGHBIT_SET(*s))
01039 len = 2;
01040 else
01041 len = pg_ascii_dsplen(s);
01042 return len;
01043 }
01044
01045
01046
01047
01048 static int
01049 pg_uhc_mblen(const unsigned char *s)
01050 {
01051 int len;
01052
01053 if (IS_HIGHBIT_SET(*s))
01054 len = 2;
01055 else
01056 len = 1;
01057 return len;
01058 }
01059
01060 static int
01061 pg_uhc_dsplen(const unsigned char *s)
01062 {
01063 int len;
01064
01065 if (IS_HIGHBIT_SET(*s))
01066 len = 2;
01067 else
01068 len = pg_ascii_dsplen(s);
01069 return len;
01070 }
01071
01072
01073
01074
01075
01076 static int
01077 pg_gb18030_mblen(const unsigned char *s)
01078 {
01079 int len;
01080
01081 if (!IS_HIGHBIT_SET(*s))
01082 len = 1;
01083 else
01084 {
01085 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) || (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
01086 len = 2;
01087 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
01088 len = 4;
01089 else
01090 len = 2;
01091 }
01092 return len;
01093 }
01094
01095 static int
01096 pg_gb18030_dsplen(const unsigned char *s)
01097 {
01098 int len;
01099
01100 if (IS_HIGHBIT_SET(*s))
01101 len = 2;
01102 else
01103 len = pg_ascii_dsplen(s);
01104 return len;
01105 }
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124 static int
01125 pg_ascii_verifier(const unsigned char *s, int len)
01126 {
01127 return 1;
01128 }
01129
01130 #define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
01131
01132 static int
01133 pg_eucjp_verifier(const unsigned char *s, int len)
01134 {
01135 int l;
01136 unsigned char c1,
01137 c2;
01138
01139 c1 = *s++;
01140
01141 switch (c1)
01142 {
01143 case SS2:
01144 l = 2;
01145 if (l > len)
01146 return -1;
01147 c2 = *s++;
01148 if (c2 < 0xa1 || c2 > 0xdf)
01149 return -1;
01150 break;
01151
01152 case SS3:
01153 l = 3;
01154 if (l > len)
01155 return -1;
01156 c2 = *s++;
01157 if (!IS_EUC_RANGE_VALID(c2))
01158 return -1;
01159 c2 = *s++;
01160 if (!IS_EUC_RANGE_VALID(c2))
01161 return -1;
01162 break;
01163
01164 default:
01165 if (IS_HIGHBIT_SET(c1))
01166 {
01167 l = 2;
01168 if (l > len)
01169 return -1;
01170 if (!IS_EUC_RANGE_VALID(c1))
01171 return -1;
01172 c2 = *s++;
01173 if (!IS_EUC_RANGE_VALID(c2))
01174 return -1;
01175 }
01176 else
01177
01178 {
01179 l = 1;
01180 }
01181 break;
01182 }
01183
01184 return l;
01185 }
01186
01187 static int
01188 pg_euckr_verifier(const unsigned char *s, int len)
01189 {
01190 int l;
01191 unsigned char c1,
01192 c2;
01193
01194 c1 = *s++;
01195
01196 if (IS_HIGHBIT_SET(c1))
01197 {
01198 l = 2;
01199 if (l > len)
01200 return -1;
01201 if (!IS_EUC_RANGE_VALID(c1))
01202 return -1;
01203 c2 = *s++;
01204 if (!IS_EUC_RANGE_VALID(c2))
01205 return -1;
01206 }
01207 else
01208
01209 {
01210 l = 1;
01211 }
01212
01213 return l;
01214 }
01215
01216
01217 #define pg_euccn_verifier pg_euckr_verifier
01218
01219 static int
01220 pg_euctw_verifier(const unsigned char *s, int len)
01221 {
01222 int l;
01223 unsigned char c1,
01224 c2;
01225
01226 c1 = *s++;
01227
01228 switch (c1)
01229 {
01230 case SS2:
01231 l = 4;
01232 if (l > len)
01233 return -1;
01234 c2 = *s++;
01235 if (c2 < 0xa1 || c2 > 0xa7)
01236 return -1;
01237 c2 = *s++;
01238 if (!IS_EUC_RANGE_VALID(c2))
01239 return -1;
01240 c2 = *s++;
01241 if (!IS_EUC_RANGE_VALID(c2))
01242 return -1;
01243 break;
01244
01245 case SS3:
01246 return -1;
01247
01248 default:
01249 if (IS_HIGHBIT_SET(c1))
01250 {
01251 l = 2;
01252 if (l > len)
01253 return -1;
01254
01255 c2 = *s++;
01256 if (!IS_EUC_RANGE_VALID(c2))
01257 return -1;
01258 }
01259 else
01260
01261 {
01262 l = 1;
01263 }
01264 break;
01265 }
01266 return l;
01267 }
01268
01269 static int
01270 pg_johab_verifier(const unsigned char *s, int len)
01271 {
01272 int l,
01273 mbl;
01274 unsigned char c;
01275
01276 l = mbl = pg_johab_mblen(s);
01277
01278 if (len < l)
01279 return -1;
01280
01281 if (!IS_HIGHBIT_SET(*s))
01282 return mbl;
01283
01284 while (--l > 0)
01285 {
01286 c = *++s;
01287 if (!IS_EUC_RANGE_VALID(c))
01288 return -1;
01289 }
01290 return mbl;
01291 }
01292
01293 static int
01294 pg_mule_verifier(const unsigned char *s, int len)
01295 {
01296 int l,
01297 mbl;
01298 unsigned char c;
01299
01300 l = mbl = pg_mule_mblen(s);
01301
01302 if (len < l)
01303 return -1;
01304
01305 while (--l > 0)
01306 {
01307 c = *++s;
01308 if (!IS_HIGHBIT_SET(c))
01309 return -1;
01310 }
01311 return mbl;
01312 }
01313
01314 static int
01315 pg_latin1_verifier(const unsigned char *s, int len)
01316 {
01317 return 1;
01318 }
01319
01320 static int
01321 pg_sjis_verifier(const unsigned char *s, int len)
01322 {
01323 int l,
01324 mbl;
01325 unsigned char c1,
01326 c2;
01327
01328 l = mbl = pg_sjis_mblen(s);
01329
01330 if (len < l)
01331 return -1;
01332
01333 if (l == 1)
01334 return mbl;
01335
01336 c1 = *s++;
01337 c2 = *s;
01338 if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
01339 return -1;
01340 return mbl;
01341 }
01342
01343 static int
01344 pg_big5_verifier(const unsigned char *s, int len)
01345 {
01346 int l,
01347 mbl;
01348
01349 l = mbl = pg_big5_mblen(s);
01350
01351 if (len < l)
01352 return -1;
01353
01354 while (--l > 0)
01355 {
01356 if (*++s == '\0')
01357 return -1;
01358 }
01359
01360 return mbl;
01361 }
01362
01363 static int
01364 pg_gbk_verifier(const unsigned char *s, int len)
01365 {
01366 int l,
01367 mbl;
01368
01369 l = mbl = pg_gbk_mblen(s);
01370
01371 if (len < l)
01372 return -1;
01373
01374 while (--l > 0)
01375 {
01376 if (*++s == '\0')
01377 return -1;
01378 }
01379
01380 return mbl;
01381 }
01382
01383 static int
01384 pg_uhc_verifier(const unsigned char *s, int len)
01385 {
01386 int l,
01387 mbl;
01388
01389 l = mbl = pg_uhc_mblen(s);
01390
01391 if (len < l)
01392 return -1;
01393
01394 while (--l > 0)
01395 {
01396 if (*++s == '\0')
01397 return -1;
01398 }
01399
01400 return mbl;
01401 }
01402
01403 static int
01404 pg_gb18030_verifier(const unsigned char *s, int len)
01405 {
01406 int l,
01407 mbl;
01408
01409 l = mbl = pg_gb18030_mblen(s);
01410
01411 if (len < l)
01412 return -1;
01413
01414 while (--l > 0)
01415 {
01416 if (*++s == '\0')
01417 return -1;
01418 }
01419
01420 return mbl;
01421 }
01422
01423 static int
01424 pg_utf8_verifier(const unsigned char *s, int len)
01425 {
01426 int l = pg_utf_mblen(s);
01427
01428 if (len < l)
01429 return -1;
01430
01431 if (!pg_utf8_islegal(s, l))
01432 return -1;
01433
01434 return l;
01435 }
01436
01437
01438
01439
01440
01441
01442
01443
01444
01445
01446
01447
01448
01449
01450
01451 bool
01452 pg_utf8_islegal(const unsigned char *source, int length)
01453 {
01454 unsigned char a;
01455
01456 switch (length)
01457 {
01458 default:
01459
01460 return false;
01461 case 4:
01462 a = source[3];
01463 if (a < 0x80 || a > 0xBF)
01464 return false;
01465
01466 case 3:
01467 a = source[2];
01468 if (a < 0x80 || a > 0xBF)
01469 return false;
01470
01471 case 2:
01472 a = source[1];
01473 switch (*source)
01474 {
01475 case 0xE0:
01476 if (a < 0xA0 || a > 0xBF)
01477 return false;
01478 break;
01479 case 0xED:
01480 if (a < 0x80 || a > 0x9F)
01481 return false;
01482 break;
01483 case 0xF0:
01484 if (a < 0x90 || a > 0xBF)
01485 return false;
01486 break;
01487 case 0xF4:
01488 if (a < 0x80 || a > 0x8F)
01489 return false;
01490 break;
01491 default:
01492 if (a < 0x80 || a > 0xBF)
01493 return false;
01494 break;
01495 }
01496
01497 case 1:
01498 a = *source;
01499 if (a >= 0x80 && a < 0xC2)
01500 return false;
01501 if (a > 0xF4)
01502 return false;
01503 break;
01504 }
01505 return true;
01506 }
01507
01508 #ifndef FRONTEND
01509
01510
01511
01512
01513
01514
01515
01516
01517
01518
01519
01520 static bool
01521 pg_generic_charinc(unsigned char *charptr, int len)
01522 {
01523 unsigned char *lastbyte = charptr + len - 1;
01524 mbverifier mbverify;
01525
01526
01527 mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify;
01528
01529 while (*lastbyte < (unsigned char) 255)
01530 {
01531 (*lastbyte)++;
01532 if ((*mbverify) (charptr, len) == len)
01533 return true;
01534 }
01535
01536 return false;
01537 }
01538
01539
01540
01541
01542
01543
01544
01545
01546
01547
01548
01549
01550
01551
01552
01553
01554 static bool
01555 pg_utf8_increment(unsigned char *charptr, int length)
01556 {
01557 unsigned char a;
01558 unsigned char limit;
01559
01560 switch (length)
01561 {
01562 default:
01563
01564 return false;
01565 case 4:
01566 a = charptr[3];
01567 if (a < 0xBF)
01568 {
01569 charptr[3]++;
01570 break;
01571 }
01572
01573 case 3:
01574 a = charptr[2];
01575 if (a < 0xBF)
01576 {
01577 charptr[2]++;
01578 break;
01579 }
01580
01581 case 2:
01582 a = charptr[1];
01583 switch (*charptr)
01584 {
01585 case 0xED:
01586 limit = 0x9F;
01587 break;
01588 case 0xF4:
01589 limit = 0x8F;
01590 break;
01591 default:
01592 limit = 0xBF;
01593 break;
01594 }
01595 if (a < limit)
01596 {
01597 charptr[1]++;
01598 break;
01599 }
01600
01601 case 1:
01602 a = *charptr;
01603 if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
01604 return false;
01605 charptr[0]++;
01606 break;
01607 }
01608
01609 return true;
01610 }
01611
01612
01613
01614
01615
01616
01617
01618
01619
01620
01621
01622
01623
01624
01625
01626
01627
01628
01629
01630
01631
01632 static bool
01633 pg_eucjp_increment(unsigned char *charptr, int length)
01634 {
01635 unsigned char c1,
01636 c2;
01637 int i;
01638
01639 c1 = *charptr;
01640
01641 switch (c1)
01642 {
01643 case SS2:
01644 if (length != 2)
01645 return false;
01646
01647 c2 = charptr[1];
01648
01649 if (c2 >= 0xdf)
01650 charptr[0] = charptr[1] = 0xa1;
01651 else if (c2 < 0xa1)
01652 charptr[1] = 0xa1;
01653 else
01654 charptr[1]++;
01655 break;
01656
01657 case SS3:
01658 if (length != 3)
01659 return false;
01660
01661 for (i = 2; i > 0; i--)
01662 {
01663 c2 = charptr[i];
01664 if (c2 < 0xa1)
01665 {
01666 charptr[i] = 0xa1;
01667 return true;
01668 }
01669 else if (c2 < 0xfe)
01670 {
01671 charptr[i]++;
01672 return true;
01673 }
01674 }
01675
01676
01677 return false;
01678
01679 default:
01680 if (IS_HIGHBIT_SET(c1))
01681 {
01682 if (length != 2)
01683 return false;
01684
01685 for (i = 1; i >= 0; i--)
01686 {
01687 c2 = charptr[i];
01688 if (c2 < 0xa1)
01689 {
01690 charptr[i] = 0xa1;
01691 return true;
01692 }
01693 else if (c2 < 0xfe)
01694 {
01695 charptr[i]++;
01696 return true;
01697 }
01698 }
01699
01700
01701 return false;
01702 }
01703 else
01704 {
01705 if (c1 > 0x7e)
01706 return false;
01707 (*charptr)++;
01708 }
01709 break;
01710 }
01711
01712 return true;
01713 }
01714 #endif
01715
01716
01717
01718
01719
01720
01721
01722
01723 pg_wchar_tbl pg_wchar_table[] = {
01724 {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1},
01725 {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},
01726 {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2},
01727 {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3},
01728 {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4},
01729 {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},
01730 {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4},
01731 {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4},
01732 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01733 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01734 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01735 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01736 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01737 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01738 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01739 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01740 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01741 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01742 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01743 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01744 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01745 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01746 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01747 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01748 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01749 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01750 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01751 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01752 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01753 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01754 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01755 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01756 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01757 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01758 {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},
01759 {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2},
01760 {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2},
01761 {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2},
01762 {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2},
01763 {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4},
01764 {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3},
01765 {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}
01766 };
01767
01768
01769 int
01770 pg_mic_mblen(const unsigned char *mbstr)
01771 {
01772 return pg_mule_mblen(mbstr);
01773 }
01774
01775
01776
01777
01778 int
01779 pg_encoding_mblen(int encoding, const char *mbstr)
01780 {
01781 Assert(PG_VALID_ENCODING(encoding));
01782
01783 return ((encoding >= 0 &&
01784 encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
01785 ((*pg_wchar_table[encoding].mblen) ((const unsigned char *) mbstr)) :
01786 ((*pg_wchar_table[PG_SQL_ASCII].mblen) ((const unsigned char *) mbstr)));
01787 }
01788
01789
01790
01791
01792 int
01793 pg_encoding_dsplen(int encoding, const char *mbstr)
01794 {
01795 Assert(PG_VALID_ENCODING(encoding));
01796
01797 return ((encoding >= 0 &&
01798 encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
01799 ((*pg_wchar_table[encoding].dsplen) ((const unsigned char *) mbstr)) :
01800 ((*pg_wchar_table[PG_SQL_ASCII].dsplen) ((const unsigned char *) mbstr)));
01801 }
01802
01803
01804
01805
01806
01807
01808 int
01809 pg_encoding_verifymb(int encoding, const char *mbstr, int len)
01810 {
01811 Assert(PG_VALID_ENCODING(encoding));
01812
01813 return ((encoding >= 0 &&
01814 encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
01815 ((*pg_wchar_table[encoding].mbverify) ((const unsigned char *) mbstr, len)) :
01816 ((*pg_wchar_table[PG_SQL_ASCII].mbverify) ((const unsigned char *) mbstr, len)));
01817 }
01818
01819
01820
01821
01822 int
01823 pg_encoding_max_length(int encoding)
01824 {
01825 Assert(PG_VALID_ENCODING(encoding));
01826
01827 return pg_wchar_table[encoding].maxmblen;
01828 }
01829
01830 #ifndef FRONTEND
01831
01832
01833
01834
01835 int
01836 pg_database_encoding_max_length(void)
01837 {
01838 return pg_wchar_table[GetDatabaseEncoding()].maxmblen;
01839 }
01840
01841
01842
01843
01844 mbcharacter_incrementer
01845 pg_database_encoding_character_incrementer(void)
01846 {
01847
01848
01849
01850
01851 switch (GetDatabaseEncoding())
01852 {
01853 case PG_UTF8:
01854 return pg_utf8_increment;
01855
01856 case PG_EUC_JP:
01857 return pg_eucjp_increment;
01858
01859 default:
01860 return pg_generic_charinc;
01861 }
01862 }
01863
01864
01865
01866
01867
01868 bool
01869 pg_verifymbstr(const char *mbstr, int len, bool noError)
01870 {
01871 return
01872 pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
01873 }
01874
01875
01876
01877
01878
01879 bool
01880 pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
01881 {
01882 return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
01883 }
01884
01885
01886
01887
01888
01889
01890
01891
01892
01893
01894
01895
01896 int
01897 pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
01898 {
01899 mbverifier mbverify;
01900 int mb_len;
01901
01902 Assert(PG_VALID_ENCODING(encoding));
01903
01904
01905
01906
01907 if (pg_encoding_max_length(encoding) <= 1)
01908 {
01909 const char *nullpos = memchr(mbstr, 0, len);
01910
01911 if (nullpos == NULL)
01912 return len;
01913 if (noError)
01914 return -1;
01915 report_invalid_encoding(encoding, nullpos, 1);
01916 }
01917
01918
01919 mbverify = pg_wchar_table[encoding].mbverify;
01920
01921 mb_len = 0;
01922
01923 while (len > 0)
01924 {
01925 int l;
01926
01927
01928 if (!IS_HIGHBIT_SET(*mbstr))
01929 {
01930 if (*mbstr != '\0')
01931 {
01932 mb_len++;
01933 mbstr++;
01934 len--;
01935 continue;
01936 }
01937 if (noError)
01938 return -1;
01939 report_invalid_encoding(encoding, mbstr, len);
01940 }
01941
01942 l = (*mbverify) ((const unsigned char *) mbstr, len);
01943
01944 if (l < 0)
01945 {
01946 if (noError)
01947 return -1;
01948 report_invalid_encoding(encoding, mbstr, len);
01949 }
01950
01951 mbstr += l;
01952 len -= l;
01953 mb_len++;
01954 }
01955 return mb_len;
01956 }
01957
01958
01959
01960
01961
01962
01963
01964
01965
01966
01967
01968 void
01969 check_encoding_conversion_args(int src_encoding,
01970 int dest_encoding,
01971 int len,
01972 int expected_src_encoding,
01973 int expected_dest_encoding)
01974 {
01975 if (!PG_VALID_ENCODING(src_encoding))
01976 elog(ERROR, "invalid source encoding ID: %d", src_encoding);
01977 if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
01978 elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
01979 pg_enc2name_tbl[expected_src_encoding].name,
01980 pg_enc2name_tbl[src_encoding].name);
01981 if (!PG_VALID_ENCODING(dest_encoding))
01982 elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
01983 if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
01984 elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
01985 pg_enc2name_tbl[expected_dest_encoding].name,
01986 pg_enc2name_tbl[dest_encoding].name);
01987 if (len < 0)
01988 elog(ERROR, "encoding conversion length must not be negative");
01989 }
01990
01991
01992
01993
01994
01995
01996
01997 void
01998 report_invalid_encoding(int encoding, const char *mbstr, int len)
01999 {
02000 int l = pg_encoding_mblen(encoding, mbstr);
02001 char buf[8 * 5 + 1];
02002 char *p = buf;
02003 int j,
02004 jlimit;
02005
02006 jlimit = Min(l, len);
02007 jlimit = Min(jlimit, 8);
02008
02009 for (j = 0; j < jlimit; j++)
02010 {
02011 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
02012 if (j < jlimit - 1)
02013 p += sprintf(p, " ");
02014 }
02015
02016 ereport(ERROR,
02017 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
02018 errmsg("invalid byte sequence for encoding \"%s\": %s",
02019 pg_enc2name_tbl[encoding].name,
02020 buf)));
02021 }
02022
02023
02024
02025
02026
02027
02028
02029 void
02030 report_untranslatable_char(int src_encoding, int dest_encoding,
02031 const char *mbstr, int len)
02032 {
02033 int l = pg_encoding_mblen(src_encoding, mbstr);
02034 char buf[8 * 5 + 1];
02035 char *p = buf;
02036 int j,
02037 jlimit;
02038
02039 jlimit = Min(l, len);
02040 jlimit = Min(jlimit, 8);
02041
02042 for (j = 0; j < jlimit; j++)
02043 {
02044 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
02045 if (j < jlimit - 1)
02046 p += sprintf(p, " ");
02047 }
02048
02049 ereport(ERROR,
02050 (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
02051 errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
02052 buf,
02053 pg_enc2name_tbl[src_encoding].name,
02054 pg_enc2name_tbl[dest_encoding].name)));
02055 }
02056
02057 #endif