00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "postgres.h"
00014 #include "mb/pg_wchar.h"
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 void
00026 latin2mic(const unsigned char *l, unsigned char *p, int len,
00027 int lc, int encoding)
00028 {
00029 int c1;
00030
00031 while (len > 0)
00032 {
00033 c1 = *l;
00034 if (c1 == 0)
00035 report_invalid_encoding(encoding, (const char *) l, len);
00036 if (IS_HIGHBIT_SET(c1))
00037 *p++ = lc;
00038 *p++ = c1;
00039 l++;
00040 len--;
00041 }
00042 *p = '\0';
00043 }
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053 void
00054 mic2latin(const unsigned char *mic, unsigned char *p, int len,
00055 int lc, int encoding)
00056 {
00057 int c1;
00058
00059 while (len > 0)
00060 {
00061 c1 = *mic;
00062 if (c1 == 0)
00063 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
00064 if (!IS_HIGHBIT_SET(c1))
00065 {
00066
00067 *p++ = c1;
00068 mic++;
00069 len--;
00070 }
00071 else
00072 {
00073 int l = pg_mic_mblen(mic);
00074
00075 if (len < l)
00076 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
00077 len);
00078 if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
00079 report_untranslatable_char(PG_MULE_INTERNAL, encoding,
00080 (const char *) mic, len);
00081 *p++ = mic[1];
00082 mic += 2;
00083 len -= 2;
00084 }
00085 }
00086 *p = '\0';
00087 }
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097 void
00098 pg_ascii2mic(const unsigned char *l, unsigned char *p, int len)
00099 {
00100 int c1;
00101
00102 while (len > 0)
00103 {
00104 c1 = *l;
00105 if (c1 == 0 || IS_HIGHBIT_SET(c1))
00106 report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
00107 *p++ = c1;
00108 l++;
00109 len--;
00110 }
00111 *p = '\0';
00112 }
00113
00114
00115
00116
00117 void
00118 pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len)
00119 {
00120 int c1;
00121
00122 while (len > 0)
00123 {
00124 c1 = *mic;
00125 if (c1 == 0 || IS_HIGHBIT_SET(c1))
00126 report_untranslatable_char(PG_MULE_INTERNAL, PG_SQL_ASCII,
00127 (const char *) mic, len);
00128 *p++ = c1;
00129 mic++;
00130 len--;
00131 }
00132 *p = '\0';
00133 }
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147 void
00148 latin2mic_with_table(const unsigned char *l,
00149 unsigned char *p,
00150 int len,
00151 int lc,
00152 int encoding,
00153 const unsigned char *tab)
00154 {
00155 unsigned char c1,
00156 c2;
00157
00158 while (len > 0)
00159 {
00160 c1 = *l;
00161 if (c1 == 0)
00162 report_invalid_encoding(encoding, (const char *) l, len);
00163 if (!IS_HIGHBIT_SET(c1))
00164 *p++ = c1;
00165 else
00166 {
00167 c2 = tab[c1 - HIGHBIT];
00168 if (c2)
00169 {
00170 *p++ = lc;
00171 *p++ = c2;
00172 }
00173 else
00174 report_untranslatable_char(encoding, PG_MULE_INTERNAL,
00175 (const char *) l, len);
00176 }
00177 l++;
00178 len--;
00179 }
00180 *p = '\0';
00181 }
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195 void
00196 mic2latin_with_table(const unsigned char *mic,
00197 unsigned char *p,
00198 int len,
00199 int lc,
00200 int encoding,
00201 const unsigned char *tab)
00202 {
00203 unsigned char c1,
00204 c2;
00205
00206 while (len > 0)
00207 {
00208 c1 = *mic;
00209 if (c1 == 0)
00210 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
00211 if (!IS_HIGHBIT_SET(c1))
00212 {
00213
00214 *p++ = c1;
00215 mic++;
00216 len--;
00217 }
00218 else
00219 {
00220 int l = pg_mic_mblen(mic);
00221
00222 if (len < l)
00223 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
00224 len);
00225 if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
00226 (c2 = tab[mic[1] - HIGHBIT]) == 0)
00227 {
00228 report_untranslatable_char(PG_MULE_INTERNAL, encoding,
00229 (const char *) mic, len);
00230 break;
00231 }
00232 *p++ = c2;
00233 mic += 2;
00234 len -= 2;
00235 }
00236 }
00237 *p = '\0';
00238 }
00239
00240
00241
00242
00243
00244 static int
00245 compare1(const void *p1, const void *p2)
00246 {
00247 uint32 v1,
00248 v2;
00249
00250 v1 = *(const uint32 *) p1;
00251 v2 = ((const pg_utf_to_local *) p2)->utf;
00252 return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
00253 }
00254
00255
00256
00257
00258
00259 static int
00260 compare2(const void *p1, const void *p2)
00261 {
00262 uint32 v1,
00263 v2;
00264
00265 v1 = *(const uint32 *) p1;
00266 v2 = ((const pg_local_to_utf *) p2)->code;
00267 return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
00268 }
00269
00270
00271
00272
00273
00274 static int
00275 compare3(const void *p1, const void *p2)
00276 {
00277 uint32 s1,
00278 s2,
00279 d1,
00280 d2;
00281
00282 s1 = *(const uint32 *) p1;
00283 s2 = *((const uint32 *) p1 + 1);
00284 d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
00285 d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
00286 return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
00287 }
00288
00289
00290
00291
00292
00293 static int
00294 compare4(const void *p1, const void *p2)
00295 {
00296 uint32 v1,
00297 v2;
00298
00299 v1 = *(const uint32 *) p1;
00300 v2 = ((const pg_local_to_utf_combined *) p2)->code;
00301 return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
00302 }
00303
00304
00305
00306
00307 static unsigned char *
00308 set_iso_code(unsigned char *iso, uint32 code)
00309 {
00310 if (code & 0xff000000)
00311 *iso++ = code >> 24;
00312 if (code & 0x00ff0000)
00313 *iso++ = (code & 0x00ff0000) >> 16;
00314 if (code & 0x0000ff00)
00315 *iso++ = (code & 0x0000ff00) >> 8;
00316 if (code & 0x000000ff)
00317 *iso++ = code & 0x000000ff;
00318 return iso;
00319 }
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335 void
00336 UtfToLocal(const unsigned char *utf, unsigned char *iso,
00337 const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
00338 int size1, int size2, int encoding, int len)
00339 {
00340 uint32 iutf;
00341 uint32 cutf[2];
00342 uint32 code;
00343 pg_utf_to_local *p;
00344 pg_utf_to_local_combined *cp;
00345 int l;
00346
00347 for (; len > 0; len -= l)
00348 {
00349
00350 if (*utf == '\0')
00351 break;
00352
00353 l = pg_utf_mblen(utf);
00354
00355 if (len < l)
00356 break;
00357
00358 if (!pg_utf8_islegal(utf, l))
00359 break;
00360
00361 if (l == 1)
00362 {
00363
00364 *iso++ = *utf++;
00365 continue;
00366 }
00367 else if (l == 2)
00368 {
00369 iutf = *utf++ << 8;
00370 iutf |= *utf++;
00371 }
00372 else if (l == 3)
00373 {
00374 iutf = *utf++ << 16;
00375 iutf |= *utf++ << 8;
00376 iutf |= *utf++;
00377 }
00378 else if (l == 4)
00379 {
00380 iutf = *utf++ << 24;
00381 iutf |= *utf++ << 16;
00382 iutf |= *utf++ << 8;
00383 iutf |= *utf++;
00384 }
00385
00386
00387
00388
00389 if (cmap && len > l)
00390 {
00391 const unsigned char *utf_save = utf;
00392 int len_save = len;
00393 int l_save = l;
00394
00395 len -= l;
00396
00397 l = pg_utf_mblen(utf);
00398 if (len < l)
00399 break;
00400
00401 if (!pg_utf8_islegal(utf, l))
00402 break;
00403
00404 cutf[0] = iutf;
00405
00406 if (l == 1)
00407 {
00408 if (len_save > 1)
00409 {
00410 p = bsearch(&cutf[0], map, size1,
00411 sizeof(pg_utf_to_local), compare1);
00412 if (p == NULL)
00413 report_untranslatable_char(PG_UTF8, encoding,
00414 (const char *) (utf_save - l_save), len_save);
00415 iso = set_iso_code(iso, p->code);
00416 }
00417
00418
00419 *iso++ = *utf++;
00420 continue;
00421 }
00422 else if (l == 2)
00423 {
00424 iutf = *utf++ << 8;
00425 iutf |= *utf++;
00426 }
00427 else if (l == 3)
00428 {
00429 iutf = *utf++ << 16;
00430 iutf |= *utf++ << 8;
00431 iutf |= *utf++;
00432 }
00433 else if (l == 4)
00434 {
00435 iutf = *utf++ << 24;
00436 iutf |= *utf++ << 16;
00437 iutf |= *utf++ << 8;
00438 iutf |= *utf++;
00439 }
00440
00441 cutf[1] = iutf;
00442 cp = bsearch(cutf, cmap, size2,
00443 sizeof(pg_utf_to_local_combined), compare3);
00444 if (cp)
00445 code = cp->code;
00446 else
00447 {
00448
00449 p = bsearch(&cutf[0], map, size1,
00450 sizeof(pg_utf_to_local), compare1);
00451 if (p == NULL)
00452 report_untranslatable_char(PG_UTF8, encoding,
00453 (const char *) (utf_save - l_save), len_save);
00454 iso = set_iso_code(iso, p->code);
00455
00456 p = bsearch(&cutf[1], map, size1,
00457 sizeof(pg_utf_to_local), compare1);
00458 if (p == NULL)
00459 report_untranslatable_char(PG_UTF8, encoding,
00460 (const char *) (utf - l), len);
00461 code = p->code;
00462 }
00463 }
00464 else
00465 {
00466 p = bsearch(&iutf, map, size1,
00467 sizeof(pg_utf_to_local), compare1);
00468 if (p == NULL)
00469 report_untranslatable_char(PG_UTF8, encoding,
00470 (const char *) (utf - l), len);
00471 code = p->code;
00472 }
00473 iso = set_iso_code(iso, code);
00474 }
00475
00476 if (len > 0)
00477 report_invalid_encoding(PG_UTF8, (const char *) utf, len);
00478
00479 *iso = '\0';
00480 }
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496 void
00497 LocalToUtf(const unsigned char *iso, unsigned char *utf,
00498 const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
00499 int size1, int size2, int encoding, int len)
00500 {
00501 unsigned int iiso;
00502 int l;
00503 pg_local_to_utf *p;
00504 pg_local_to_utf_combined *cp;
00505
00506 if (!PG_VALID_ENCODING(encoding))
00507 ereport(ERROR,
00508 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00509 errmsg("invalid encoding number: %d", encoding)));
00510
00511 for (; len > 0; len -= l)
00512 {
00513
00514 if (*iso == '\0')
00515 break;
00516
00517 if (!IS_HIGHBIT_SET(*iso))
00518 {
00519
00520 *utf++ = *iso++;
00521 l = 1;
00522 continue;
00523 }
00524
00525 l = pg_encoding_verifymb(encoding, (const char *) iso, len);
00526 if (l < 0)
00527 break;
00528
00529 if (l == 1)
00530 iiso = *iso++;
00531 else if (l == 2)
00532 {
00533 iiso = *iso++ << 8;
00534 iiso |= *iso++;
00535 }
00536 else if (l == 3)
00537 {
00538 iiso = *iso++ << 16;
00539 iiso |= *iso++ << 8;
00540 iiso |= *iso++;
00541 }
00542 else if (l == 4)
00543 {
00544 iiso = *iso++ << 24;
00545 iiso |= *iso++ << 16;
00546 iiso |= *iso++ << 8;
00547 iiso |= *iso++;
00548 }
00549
00550 p = bsearch(&iiso, map, size1,
00551 sizeof(pg_local_to_utf), compare2);
00552
00553 if (p == NULL)
00554 {
00555
00556
00557
00558
00559 if (cmap)
00560 {
00561 cp = bsearch(&iiso, cmap, size2,
00562 sizeof(pg_local_to_utf_combined), compare4);
00563
00564 if (cp)
00565 {
00566 if (cp->utf1 & 0xff000000)
00567 *utf++ = cp->utf1 >> 24;
00568 if (cp->utf1 & 0x00ff0000)
00569 *utf++ = (cp->utf1 & 0x00ff0000) >> 16;
00570 if (cp->utf1 & 0x0000ff00)
00571 *utf++ = (cp->utf1 & 0x0000ff00) >> 8;
00572 if (cp->utf1 & 0x000000ff)
00573 *utf++ = cp->utf1 & 0x000000ff;
00574
00575 if (cp->utf2 & 0xff000000)
00576 *utf++ = cp->utf2 >> 24;
00577 if (cp->utf2 & 0x00ff0000)
00578 *utf++ = (cp->utf2 & 0x00ff0000) >> 16;
00579 if (cp->utf2 & 0x0000ff00)
00580 *utf++ = (cp->utf2 & 0x0000ff00) >> 8;
00581 if (cp->utf2 & 0x000000ff)
00582 *utf++ = cp->utf2 & 0x000000ff;
00583
00584 continue;
00585 }
00586 }
00587
00588 report_untranslatable_char(encoding, PG_UTF8,
00589 (const char *) (iso - l), len);
00590
00591 }
00592 else
00593 {
00594 if (p->utf & 0xff000000)
00595 *utf++ = p->utf >> 24;
00596 if (p->utf & 0x00ff0000)
00597 *utf++ = (p->utf & 0x00ff0000) >> 16;
00598 if (p->utf & 0x0000ff00)
00599 *utf++ = (p->utf & 0x0000ff00) >> 8;
00600 if (p->utf & 0x000000ff)
00601 *utf++ = p->utf & 0x000000ff;
00602 }
00603 }
00604
00605 if (len > 0)
00606 report_invalid_encoding(encoding, (const char *) iso, len);
00607
00608 *utf = '\0';
00609 }