#include "postgres.h"#include "mb/pg_wchar.h"
Go to the source code of this file.
Functions | |
| void | latin2mic (const unsigned char *l, unsigned char *p, int len, int lc, int encoding) |
| void | mic2latin (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding) |
| void | pg_ascii2mic (const unsigned char *l, unsigned char *p, int len) |
| void | pg_mic2ascii (const unsigned char *mic, unsigned char *p, int len) |
| void | latin2mic_with_table (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab) |
| void | mic2latin_with_table (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab) |
| static int | compare1 (const void *p1, const void *p2) |
| static int | compare2 (const void *p1, const void *p2) |
| static int | compare3 (const void *p1, const void *p2) |
| static int | compare4 (const void *p1, const void *p2) |
| static unsigned char * | set_iso_code (unsigned char *iso, uint32 code) |
| void | UtfToLocal (const unsigned char *utf, unsigned char *iso, const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap, int size1, int size2, int encoding, int len) |
| void | LocalToUtf (const unsigned char *iso, unsigned char *utf, const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap, int size1, int size2, int encoding, int len) |
| static int compare1 | ( | const void * | p1, | |
| const void * | p2 | |||
| ) | [static] |
Definition at line 245 of file conv.c.
Referenced by UtfToLocal().
{
uint32 v1,
v2;
v1 = *(const uint32 *) p1;
v2 = ((const pg_utf_to_local *) p2)->utf;
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}
| static int compare2 | ( | const void * | p1, | |
| const void * | p2 | |||
| ) | [static] |
Definition at line 260 of file conv.c.
Referenced by LocalToUtf().
{
uint32 v1,
v2;
v1 = *(const uint32 *) p1;
v2 = ((const pg_local_to_utf *) p2)->code;
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}
| static int compare3 | ( | const void * | p1, | |
| const void * | p2 | |||
| ) | [static] |
Definition at line 275 of file conv.c.
Referenced by UtfToLocal().
{
uint32 s1,
s2,
d1,
d2;
s1 = *(const uint32 *) p1;
s2 = *((const uint32 *) p1 + 1);
d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
}
| static int compare4 | ( | const void * | p1, | |
| const void * | p2 | |||
| ) | [static] |
Definition at line 294 of file conv.c.
Referenced by LocalToUtf().
{
uint32 v1,
v2;
v1 = *(const uint32 *) p1;
v2 = ((const pg_local_to_utf_combined *) p2)->code;
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}
| void latin2mic | ( | const unsigned char * | l, | |
| unsigned char * | p, | |||
| int | len, | |||
| int | lc, | |||
| int | encoding | |||
| ) |
Definition at line 26 of file conv.c.
References IS_HIGHBIT_SET, and report_invalid_encoding().
Referenced by koi8r2mic(), latin12mic(), latin22mic(), latin32mic(), and latin42mic().
{
int c1;
while (len > 0)
{
c1 = *l;
if (c1 == 0)
report_invalid_encoding(encoding, (const char *) l, len);
if (IS_HIGHBIT_SET(c1))
*p++ = lc;
*p++ = c1;
l++;
len--;
}
*p = '\0';
}
| void latin2mic_with_table | ( | const unsigned char * | l, | |
| unsigned char * | p, | |||
| int | len, | |||
| int | lc, | |||
| int | encoding, | |||
| const unsigned char * | tab | |||
| ) |
Definition at line 148 of file conv.c.
References IS_HIGHBIT_SET, PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().
Referenced by iso2mic(), win12502mic(), win12512mic(), and win8662mic().
{
unsigned char c1,
c2;
while (len > 0)
{
c1 = *l;
if (c1 == 0)
report_invalid_encoding(encoding, (const char *) l, len);
if (!IS_HIGHBIT_SET(c1))
*p++ = c1;
else
{
c2 = tab[c1 - HIGHBIT];
if (c2)
{
*p++ = lc;
*p++ = c2;
}
else
report_untranslatable_char(encoding, PG_MULE_INTERNAL,
(const char *) l, len);
}
l++;
len--;
}
*p = '\0';
}
| void LocalToUtf | ( | const unsigned char * | iso, | |
| unsigned char * | utf, | |||
| const pg_local_to_utf * | map, | |||
| const pg_local_to_utf_combined * | cmap, | |||
| int | size1, | |||
| int | size2, | |||
| int | encoding, | |||
| int | len | |||
| ) |
Definition at line 497 of file conv.c.
References compare2(), compare4(), ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, NULL, pg_encoding_verifymb(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), pg_local_to_utf::utf, pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.
Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), uhc_to_utf8(), and win_to_utf8().
{
unsigned int iiso;
int l;
pg_local_to_utf *p;
pg_local_to_utf_combined *cp;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid encoding number: %d", encoding)));
for (; len > 0; len -= l)
{
/* "break" cases all represent errors */
if (*iso == '\0')
break;
if (!IS_HIGHBIT_SET(*iso))
{
/* ASCII case is easy */
*utf++ = *iso++;
l = 1;
continue;
}
l = pg_encoding_verifymb(encoding, (const char *) iso, len);
if (l < 0)
break;
if (l == 1)
iiso = *iso++;
else if (l == 2)
{
iiso = *iso++ << 8;
iiso |= *iso++;
}
else if (l == 3)
{
iiso = *iso++ << 16;
iiso |= *iso++ << 8;
iiso |= *iso++;
}
else if (l == 4)
{
iiso = *iso++ << 24;
iiso |= *iso++ << 16;
iiso |= *iso++ << 8;
iiso |= *iso++;
}
p = bsearch(&iiso, map, size1,
sizeof(pg_local_to_utf), compare2);
if (p == NULL)
{
/*
* not found in the ordinary map. if there's a combined character
* map, try with it
*/
if (cmap)
{
cp = bsearch(&iiso, cmap, size2,
sizeof(pg_local_to_utf_combined), compare4);
if (cp)
{
if (cp->utf1 & 0xff000000)
*utf++ = cp->utf1 >> 24;
if (cp->utf1 & 0x00ff0000)
*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
if (cp->utf1 & 0x0000ff00)
*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
if (cp->utf1 & 0x000000ff)
*utf++ = cp->utf1 & 0x000000ff;
if (cp->utf2 & 0xff000000)
*utf++ = cp->utf2 >> 24;
if (cp->utf2 & 0x00ff0000)
*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
if (cp->utf2 & 0x0000ff00)
*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
if (cp->utf2 & 0x000000ff)
*utf++ = cp->utf2 & 0x000000ff;
continue;
}
}
report_untranslatable_char(encoding, PG_UTF8,
(const char *) (iso - l), len);
}
else
{
if (p->utf & 0xff000000)
*utf++ = p->utf >> 24;
if (p->utf & 0x00ff0000)
*utf++ = (p->utf & 0x00ff0000) >> 16;
if (p->utf & 0x0000ff00)
*utf++ = (p->utf & 0x0000ff00) >> 8;
if (p->utf & 0x000000ff)
*utf++ = p->utf & 0x000000ff;
}
}
if (len > 0)
report_invalid_encoding(encoding, (const char *) iso, len);
*utf = '\0';
}
| void mic2latin | ( | const unsigned char * | mic, | |
| unsigned char * | p, | |||
| int | len, | |||
| int | lc, | |||
| int | encoding | |||
| ) |
Definition at line 54 of file conv.c.
References IS_HIGHBIT_SET, pg_mic_mblen(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().
Referenced by mic2koi8r(), mic2latin1(), mic2latin2(), mic2latin3(), and mic2latin4().
{
int c1;
while (len > 0)
{
c1 = *mic;
if (c1 == 0)
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
if (!IS_HIGHBIT_SET(c1))
{
/* easy for ASCII */
*p++ = c1;
mic++;
len--;
}
else
{
int l = pg_mic_mblen(mic);
if (len < l)
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
len);
if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
report_untranslatable_char(PG_MULE_INTERNAL, encoding,
(const char *) mic, len);
*p++ = mic[1];
mic += 2;
len -= 2;
}
}
*p = '\0';
}
| void mic2latin_with_table | ( | const unsigned char * | mic, | |
| unsigned char * | p, | |||
| int | len, | |||
| int | lc, | |||
| int | encoding, | |||
| const unsigned char * | tab | |||
| ) |
Definition at line 196 of file conv.c.
References HIGHBIT, IS_HIGHBIT_SET, pg_mic_mblen(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().
Referenced by mic2iso(), mic2win1250(), mic2win1251(), and mic2win866().
{
unsigned char c1,
c2;
while (len > 0)
{
c1 = *mic;
if (c1 == 0)
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
if (!IS_HIGHBIT_SET(c1))
{
/* easy for ASCII */
*p++ = c1;
mic++;
len--;
}
else
{
int l = pg_mic_mblen(mic);
if (len < l)
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
len);
if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
(c2 = tab[mic[1] - HIGHBIT]) == 0)
{
report_untranslatable_char(PG_MULE_INTERNAL, encoding,
(const char *) mic, len);
break; /* keep compiler quiet */
}
*p++ = c2;
mic += 2;
len -= 2;
}
}
*p = '\0';
}
| void pg_ascii2mic | ( | const unsigned char * | l, | |
| unsigned char * | p, | |||
| int | len | |||
| ) |
Definition at line 98 of file conv.c.
References IS_HIGHBIT_SET, PG_SQL_ASCII, and report_invalid_encoding().
Referenced by ascii_to_mic(), and ascii_to_utf8().
{
int c1;
while (len > 0)
{
c1 = *l;
if (c1 == 0 || IS_HIGHBIT_SET(c1))
report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
*p++ = c1;
l++;
len--;
}
*p = '\0';
}
| void pg_mic2ascii | ( | const unsigned char * | mic, | |
| unsigned char * | p, | |||
| int | len | |||
| ) |
Definition at line 118 of file conv.c.
References IS_HIGHBIT_SET, PG_MULE_INTERNAL, PG_SQL_ASCII, and report_untranslatable_char().
Referenced by mic_to_ascii(), and utf8_to_ascii().
{
int c1;
while (len > 0)
{
c1 = *mic;
if (c1 == 0 || IS_HIGHBIT_SET(c1))
report_untranslatable_char(PG_MULE_INTERNAL, PG_SQL_ASCII,
(const char *) mic, len);
*p++ = c1;
mic++;
len--;
}
*p = '\0';
}
| static unsigned char* set_iso_code | ( | unsigned char * | iso, | |
| uint32 | code | |||
| ) | [static] |
Definition at line 308 of file conv.c.
Referenced by UtfToLocal().
{
if (code & 0xff000000)
*iso++ = code >> 24;
if (code & 0x00ff0000)
*iso++ = (code & 0x00ff0000) >> 16;
if (code & 0x0000ff00)
*iso++ = (code & 0x0000ff00) >> 8;
if (code & 0x000000ff)
*iso++ = code & 0x000000ff;
return iso;
}
| void UtfToLocal | ( | const unsigned char * | utf, | |
| unsigned char * | iso, | |||
| const pg_utf_to_local * | map, | |||
| const pg_utf_to_local_combined * | cmap, | |||
| int | size1, | |||
| int | size2, | |||
| int | encoding, | |||
| int | len | |||
| ) |
Definition at line 336 of file conv.c.
References pg_utf_to_local_combined::code, pg_utf_to_local::code, compare1(), compare3(), NULL, PG_UTF8, pg_utf8_islegal(), pg_utf_mblen(), report_invalid_encoding(), report_untranslatable_char(), and set_iso_code().
Referenced by utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().
{
uint32 iutf;
uint32 cutf[2];
uint32 code;
pg_utf_to_local *p;
pg_utf_to_local_combined *cp;
int l;
for (; len > 0; len -= l)
{
/* "break" cases all represent errors */
if (*utf == '\0')
break;
l = pg_utf_mblen(utf);
if (len < l)
break;
if (!pg_utf8_islegal(utf, l))
break;
if (l == 1)
{
/* ASCII case is easy */
*iso++ = *utf++;
continue;
}
else if (l == 2)
{
iutf = *utf++ << 8;
iutf |= *utf++;
}
else if (l == 3)
{
iutf = *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
}
else if (l == 4)
{
iutf = *utf++ << 24;
iutf |= *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
}
/*
* first, try with combined map if possible
*/
if (cmap && len > l)
{
const unsigned char *utf_save = utf;
int len_save = len;
int l_save = l;
len -= l;
l = pg_utf_mblen(utf);
if (len < l)
break;
if (!pg_utf8_islegal(utf, l))
break;
cutf[0] = iutf;
if (l == 1)
{
if (len_save > 1)
{
p = bsearch(&cutf[0], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf_save - l_save), len_save);
iso = set_iso_code(iso, p->code);
}
/* ASCII case is easy */
*iso++ = *utf++;
continue;
}
else if (l == 2)
{
iutf = *utf++ << 8;
iutf |= *utf++;
}
else if (l == 3)
{
iutf = *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
}
else if (l == 4)
{
iutf = *utf++ << 24;
iutf |= *utf++ << 16;
iutf |= *utf++ << 8;
iutf |= *utf++;
}
cutf[1] = iutf;
cp = bsearch(cutf, cmap, size2,
sizeof(pg_utf_to_local_combined), compare3);
if (cp)
code = cp->code;
else
{
/* not found in combined map. try with ordinary map */
p = bsearch(&cutf[0], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf_save - l_save), len_save);
iso = set_iso_code(iso, p->code);
p = bsearch(&cutf[1], map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
code = p->code;
}
}
else /* no cmap or no remaining data */
{
p = bsearch(&iutf, map, size1,
sizeof(pg_utf_to_local), compare1);
if (p == NULL)
report_untranslatable_char(PG_UTF8, encoding,
(const char *) (utf - l), len);
code = p->code;
}
iso = set_iso_code(iso, code);
}
if (len > 0)
report_invalid_encoding(PG_UTF8, (const char *) utf, len);
*iso = '\0';
}
1.7.1