#include "postgres.h"#include "mb/pg_wchar.h"
Go to the source code of this file.
Data Structures | |
| struct | mbinterval |
Defines | |
| #define | IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe) |
| #define | pg_euccn_verifier pg_euckr_verifier |
Functions | |
| static int | pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
| static int | pg_ascii_mblen (const unsigned char *s) |
| static int | pg_ascii_dsplen (const unsigned char *s) |
| static int | pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
| static int | pg_euc_mblen (const unsigned char *s) |
| static int | pg_euc_dsplen (const unsigned char *s) |
| static int | pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
| static int | pg_eucjp_mblen (const unsigned char *s) |
| static int | pg_eucjp_dsplen (const unsigned char *s) |
| static int | pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
| static int | pg_euckr_mblen (const unsigned char *s) |
| static int | pg_euckr_dsplen (const unsigned char *s) |
| static int | pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
| static int | pg_euccn_mblen (const unsigned char *s) |
| static int | pg_euccn_dsplen (const unsigned char *s) |
| static int | pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
| static int | pg_euctw_mblen (const unsigned char *s) |
| static int | pg_euctw_dsplen (const unsigned char *s) |
| static int | pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len) |
| static int | pg_johab_mblen (const unsigned char *s) |
| static int | pg_johab_dsplen (const unsigned char *s) |
| static int | pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
| unsigned char * | unicode_to_utf8 (pg_wchar c, unsigned char *utf8string) |
| static int | pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len) |
| int | pg_utf_mblen (const unsigned char *s) |
| static int | mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max) |
| static int | ucs_wcwidth (pg_wchar ucs) |
| pg_wchar | utf8_to_unicode (const unsigned char *c) |
| static int | pg_utf_dsplen (const unsigned char *s) |
| static int | pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
| static int | pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len) |
| int | pg_mule_mblen (const unsigned char *s) |
| static int | pg_mule_dsplen (const unsigned char *s) |
| static int | pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
| static int | pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len) |
| static int | pg_latin1_mblen (const unsigned char *s) |
| static int | pg_latin1_dsplen (const unsigned char *s) |
| static int | pg_sjis_mblen (const unsigned char *s) |
| static int | pg_sjis_dsplen (const unsigned char *s) |
| static int | pg_big5_mblen (const unsigned char *s) |
| static int | pg_big5_dsplen (const unsigned char *s) |
| static int | pg_gbk_mblen (const unsigned char *s) |
| static int | pg_gbk_dsplen (const unsigned char *s) |
| static int | pg_uhc_mblen (const unsigned char *s) |
| static int | pg_uhc_dsplen (const unsigned char *s) |
| static int | pg_gb18030_mblen (const unsigned char *s) |
| static int | pg_gb18030_dsplen (const unsigned char *s) |
| static int | pg_ascii_verifier (const unsigned char *s, int len) |
| static int | pg_eucjp_verifier (const unsigned char *s, int len) |
| static int | pg_euckr_verifier (const unsigned char *s, int len) |
| static int | pg_euctw_verifier (const unsigned char *s, int len) |
| static int | pg_johab_verifier (const unsigned char *s, int len) |
| static int | pg_mule_verifier (const unsigned char *s, int len) |
| static int | pg_latin1_verifier (const unsigned char *s, int len) |
| static int | pg_sjis_verifier (const unsigned char *s, int len) |
| static int | pg_big5_verifier (const unsigned char *s, int len) |
| static int | pg_gbk_verifier (const unsigned char *s, int len) |
| static int | pg_uhc_verifier (const unsigned char *s, int len) |
| static int | pg_gb18030_verifier (const unsigned char *s, int len) |
| static int | pg_utf8_verifier (const unsigned char *s, int len) |
| bool | pg_utf8_islegal (const unsigned char *source, int length) |
| static bool | pg_generic_charinc (unsigned char *charptr, int len) |
| static bool | pg_utf8_increment (unsigned char *charptr, int length) |
| static bool | pg_eucjp_increment (unsigned char *charptr, int length) |
| int | pg_mic_mblen (const unsigned char *mbstr) |
| int | pg_encoding_mblen (int encoding, const char *mbstr) |
| int | pg_encoding_dsplen (int encoding, const char *mbstr) |
| int | pg_encoding_verifymb (int encoding, const char *mbstr, int len) |
| int | pg_encoding_max_length (int encoding) |
| int | pg_database_encoding_max_length (void) |
| mbcharacter_incrementer | pg_database_encoding_character_incrementer (void) |
| bool | pg_verifymbstr (const char *mbstr, int len, bool noError) |
| bool | pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError) |
| int | pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError) |
| void | check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding) |
| void | report_invalid_encoding (int encoding, const char *mbstr, int len) |
| void | report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len) |
Variables | |
| pg_wchar_tbl | pg_wchar_table [] |
Definition at line 1130 of file wchar.c.
Referenced by pg_eucjp_verifier(), pg_euckr_verifier(), pg_euctw_verifier(), and pg_johab_verifier().
| void check_encoding_conversion_args | ( | int | src_encoding, | |
| int | dest_encoding, | |||
| int | len, | |||
| int | expected_src_encoding, | |||
| int | expected_dest_encoding | |||
| ) |
Definition at line 1969 of file wchar.c.
References elog, ERROR, name, pg_enc2name_tbl, and PG_VALID_ENCODING.
{
if (!PG_VALID_ENCODING(src_encoding))
elog(ERROR, "invalid source encoding ID: %d", src_encoding);
if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
pg_enc2name_tbl[expected_src_encoding].name,
pg_enc2name_tbl[src_encoding].name);
if (!PG_VALID_ENCODING(dest_encoding))
elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
pg_enc2name_tbl[expected_dest_encoding].name,
pg_enc2name_tbl[dest_encoding].name);
if (len < 0)
elog(ERROR, "encoding conversion length must not be negative");
}
| static int mbbisearch | ( | pg_wchar | ucs, | |
| const struct mbinterval * | table, | |||
| int | max | |||
| ) | [static] |
Definition at line 584 of file wchar.c.
Referenced by ucs_wcwidth().
{
int min = 0;
int mid;
if (ucs < table[0].first || ucs > table[max].last)
return 0;
while (max >= min)
{
mid = (min + max) / 2;
if (ucs > table[mid].last)
min = mid + 1;
else if (ucs < table[mid].first)
max = mid - 1;
else
return 1;
}
return 0;
}
| static int pg_ascii2wchar_with_len | ( | const unsigned char * | from, | |
| pg_wchar * | to, | |||
| int | len | |||
| ) | [static] |
| static int pg_ascii_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 61 of file wchar.c.
Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().
{
if (*s == '\0')
return 0;
if (*s < 0x20 || *s == 0x7f)
return -1;
return 1;
}
| static int pg_ascii_mblen | ( | const unsigned char * | s | ) | [static] |
| static int pg_ascii_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
| static int pg_big5_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1007 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{
int len;
if (IS_HIGHBIT_SET(*s))
len = 2; /* kanji? */
else
len = pg_ascii_dsplen(s); /* should be ASCII */
return len;
}
| static int pg_big5_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 995 of file wchar.c.
References IS_HIGHBIT_SET.
Referenced by pg_big5_verifier().
{
int len;
if (IS_HIGHBIT_SET(*s))
len = 2; /* kanji? */
else
len = 1; /* should be ASCII */
return len;
}
| static int pg_big5_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1344 of file wchar.c.
References pg_big5_mblen().
{
int l,
mbl;
l = mbl = pg_big5_mblen(s);
if (len < l)
return -1;
while (--l > 0)
{
if (*++s == '\0')
return -1;
}
return mbl;
}
| mbcharacter_incrementer pg_database_encoding_character_incrementer | ( | void | ) |
Definition at line 1845 of file wchar.c.
References GetDatabaseEncoding(), PG_EUC_JP, and PG_UTF8.
Referenced by make_greater_string().
{
/*
* Eventually it might be best to add a field to pg_wchar_table[], but for
* now we just use a switch.
*/
switch (GetDatabaseEncoding())
{
case PG_UTF8:
return pg_utf8_increment;
case PG_EUC_JP:
return pg_eucjp_increment;
default:
return pg_generic_charinc;
}
}
| int pg_database_encoding_max_length | ( | void | ) |
Definition at line 1836 of file wchar.c.
References GetDatabaseEncoding(), and pg_wchar_tbl::maxmblen.
Referenced by appendStringInfoRegexpSubstr(), BeginCopy(), bpcharlen(), charlen_to_bytelen(), check_replace_text_has_escape_char(), dotrim(), gbt_bpchar_compress(), gbt_bpchar_consistent(), gbt_text_compress(), gbt_text_consistent(), Generic_Text_IC_like(), GenericMatchText(), infix(), init_tsvector_parser(), like_escape(), like_fixed_prefix(), lowerstr_with_len(), lpad(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), regexp_fixed_prefix(), rpad(), show_trgm(), str_initcap(), str_tolower(), str_toupper(), text_length(), text_position_setup(), text_reverse(), text_substring(), TParserInit(), translate(), tsvectorout(), and unaccent_lexize().
{
return pg_wchar_table[GetDatabaseEncoding()].maxmblen;
}
| int pg_encoding_dsplen | ( | int | encoding, | |
| const char * | mbstr | |||
| ) |
Definition at line 1793 of file wchar.c.
References Assert, PG_SQL_ASCII, and PG_VALID_ENCODING.
Referenced by PQdsplen(), and reportErrorPosition().
{
Assert(PG_VALID_ENCODING(encoding));
return ((encoding >= 0 &&
encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
((*pg_wchar_table[encoding].dsplen) ((const unsigned char *) mbstr)) :
((*pg_wchar_table[PG_SQL_ASCII].dsplen) ((const unsigned char *) mbstr)));
}
| int pg_encoding_max_length | ( | int | encoding | ) |
Definition at line 1823 of file wchar.c.
References Assert, pg_wchar_tbl::maxmblen, and PG_VALID_ENCODING.
Referenced by ascii(), chr(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), and type_maximum_size().
{
Assert(PG_VALID_ENCODING(encoding));
return pg_wchar_table[encoding].maxmblen;
}
| int pg_encoding_mblen | ( | int | encoding, | |
| const char * | mbstr | |||
| ) |
Definition at line 1779 of file wchar.c.
References Assert, PG_SQL_ASCII, and PG_VALID_ENCODING.
Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), CopyReadLineText(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), report_invalid_encoding(), report_untranslatable_char(), reportErrorPosition(), and unicode_to_sqlchar().
{
Assert(PG_VALID_ENCODING(encoding));
return ((encoding >= 0 &&
encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
((*pg_wchar_table[encoding].mblen) ((const unsigned char *) mbstr)) :
((*pg_wchar_table[PG_SQL_ASCII].mblen) ((const unsigned char *) mbstr)));
}
| int pg_encoding_verifymb | ( | int | encoding, | |
| const char * | mbstr, | |||
| int | len | |||
| ) |
Definition at line 1809 of file wchar.c.
References Assert, PG_SQL_ASCII, and PG_VALID_ENCODING.
Referenced by big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().
{
Assert(PG_VALID_ENCODING(encoding));
return ((encoding >= 0 &&
encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
((*pg_wchar_table[encoding].mbverify) ((const unsigned char *) mbstr, len)) :
((*pg_wchar_table[PG_SQL_ASCII].mbverify) ((const unsigned char *) mbstr, len)));
}
| static int pg_euc2wchar_with_len | ( | const unsigned char * | from, | |
| pg_wchar * | to, | |||
| int | len | |||
| ) | [static] |
Definition at line 75 of file wchar.c.
References IS_HIGHBIT_SET, SS2, and SS3.
Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().
{
int cnt = 0;
while (len > 0 && *from)
{
if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
* KANA") */
{
from++;
*to = (SS2 << 8) | *from++;
len -= 2;
}
else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
{
from++;
*to = (SS3 << 16) | (*from++ << 8);
*to |= *from++;
len -= 3;
}
else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
{
*to = *from++ << 8;
*to |= *from++;
len -= 2;
}
else /* must be ASCII */
{
*to = *from++;
len--;
}
to++;
cnt++;
}
*to = 0;
return cnt;
}
| static int pg_euc_dsplen | ( | const unsigned char * | s | ) | [inline, static] |
Definition at line 130 of file wchar.c.
References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.
Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().
{
int len;
if (*s == SS2)
len = 2;
else if (*s == SS3)
len = 2;
else if (IS_HIGHBIT_SET(*s))
len = 2;
else
len = pg_ascii_dsplen(s);
return len;
}
| static int pg_euc_mblen | ( | const unsigned char * | s | ) | [inline, static] |
Definition at line 114 of file wchar.c.
References IS_HIGHBIT_SET, SS2, and SS3.
Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().
{
int len;
if (*s == SS2)
len = 2;
else if (*s == SS3)
len = 3;
else if (IS_HIGHBIT_SET(*s))
len = 2;
else
len = 1;
return len;
}
| static int pg_euccn2wchar_with_len | ( | const unsigned char * | from, | |
| pg_wchar * | to, | |||
| int | len | |||
| ) | [static] |
Definition at line 202 of file wchar.c.
References IS_HIGHBIT_SET, SS2, and SS3.
{
int cnt = 0;
while (len > 0 && *from)
{
if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
{
from++;
*to = (SS2 << 16) | (*from++ << 8);
*to |= *from++;
len -= 3;
}
else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
{
from++;
*to = (SS3 << 16) | (*from++ << 8);
*to |= *from++;
len -= 3;
}
else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
{
*to = *from++ << 8;
*to |= *from++;
len -= 2;
}
else
{
*to = *from++;
len--;
}
to++;
cnt++;
}
*to = 0;
return cnt;
}
| static int pg_euccn_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 253 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{
int len;
if (IS_HIGHBIT_SET(*s))
len = 2;
else
len = pg_ascii_dsplen(s);
return len;
}
| static int pg_euccn_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 241 of file wchar.c.
References IS_HIGHBIT_SET.
{
int len;
if (IS_HIGHBIT_SET(*s))
len = 2;
else
len = 1;
return len;
}
| static int pg_eucjp2wchar_with_len | ( | const unsigned char * | from, | |
| pg_wchar * | to, | |||
| int | len | |||
| ) | [static] |
Definition at line 149 of file wchar.c.
References pg_euc2wchar_with_len().
{
return pg_euc2wchar_with_len(from, to, len);
}
| static int pg_eucjp_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 161 of file wchar.c.
References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.
{
int len;
if (*s == SS2)
len = 1;
else if (*s == SS3)
len = 2;
else if (IS_HIGHBIT_SET(*s))
len = 2;
else
len = pg_ascii_dsplen(s);
return len;
}
| static bool pg_eucjp_increment | ( | unsigned char * | charptr, | |
| int | length | |||
| ) | [static] |
Definition at line 1633 of file wchar.c.
References i, IS_HIGHBIT_SET, SS2, and SS3.
{
unsigned char c1,
c2;
int i;
c1 = *charptr;
switch (c1)
{
case SS2: /* JIS X 0201 */
if (length != 2)
return false;
c2 = charptr[1];
if (c2 >= 0xdf)
charptr[0] = charptr[1] = 0xa1;
else if (c2 < 0xa1)
charptr[1] = 0xa1;
else
charptr[1]++;
break;
case SS3: /* JIS X 0212 */
if (length != 3)
return false;
for (i = 2; i > 0; i--)
{
c2 = charptr[i];
if (c2 < 0xa1)
{
charptr[i] = 0xa1;
return true;
}
else if (c2 < 0xfe)
{
charptr[i]++;
return true;
}
}
/* Out of 3-byte code region */
return false;
default:
if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
{
if (length != 2)
return false;
for (i = 1; i >= 0; i--)
{
c2 = charptr[i];
if (c2 < 0xa1)
{
charptr[i] = 0xa1;
return true;
}
else if (c2 < 0xfe)
{
charptr[i]++;
return true;
}
}
/* Out of 2 byte code region */
return false;
}
else
{ /* ASCII, single byte */
if (c1 > 0x7e)
return false;
(*charptr)++;
}
break;
}
return true;
}
| static int pg_eucjp_mblen | ( | const unsigned char * | s | ) | [static] |
| static int pg_eucjp_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1133 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, SS2, and SS3.
{
int l;
unsigned char c1,
c2;
c1 = *s++;
switch (c1)
{
case SS2: /* JIS X 0201 */
l = 2;
if (l > len)
return -1;
c2 = *s++;
if (c2 < 0xa1 || c2 > 0xdf)
return -1;
break;
case SS3: /* JIS X 0212 */
l = 3;
if (l > len)
return -1;
c2 = *s++;
if (!IS_EUC_RANGE_VALID(c2))
return -1;
c2 = *s++;
if (!IS_EUC_RANGE_VALID(c2))
return -1;
break;
default:
if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
{
l = 2;
if (l > len)
return -1;
if (!IS_EUC_RANGE_VALID(c1))
return -1;
c2 = *s++;
if (!IS_EUC_RANGE_VALID(c2))
return -1;
}
else
/* must be ASCII */
{
l = 1;
}
break;
}
return l;
}
| static int pg_euckr2wchar_with_len | ( | const unsigned char * | from, | |
| pg_wchar * | to, | |||
| int | len | |||
| ) | [static] |
Definition at line 180 of file wchar.c.
References pg_euc2wchar_with_len().
{
return pg_euc2wchar_with_len(from, to, len);
}
| static int pg_euckr_dsplen | ( | const unsigned char * | s | ) | [static] |
| static int pg_euckr_mblen | ( | const unsigned char * | s | ) | [static] |
| static int pg_euckr_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1188 of file wchar.c.
References IS_EUC_RANGE_VALID, and IS_HIGHBIT_SET.
{
int l;
unsigned char c1,
c2;
c1 = *s++;
if (IS_HIGHBIT_SET(c1))
{
l = 2;
if (l > len)
return -1;
if (!IS_EUC_RANGE_VALID(c1))
return -1;
c2 = *s++;
if (!IS_EUC_RANGE_VALID(c2))
return -1;
}
else
/* must be ASCII */
{
l = 1;
}
return l;
}
| static int pg_euctw2wchar_with_len | ( | const unsigned char * | from, | |
| pg_wchar * | to, | |||
| int | len | |||
| ) | [static] |
Definition at line 269 of file wchar.c.
References IS_HIGHBIT_SET, SS2, and SS3.
{
int cnt = 0;
while (len > 0 && *from)
{
if (*from == SS2 && len >= 4) /* code set 2 */
{
from++;
*to = (((uint32) SS2) << 24) | (*from++ << 16);
*to |= *from++ << 8;
*to |= *from++;
len -= 4;
}
else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
{
from++;
*to = (SS3 << 16) | (*from++ << 8);
*to |= *from++;
len -= 3;
}
else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
{
*to = *from++ << 8;
*to |= *from++;
len -= 2;
}
else
{
*to = *from++;
len--;
}
to++;
cnt++;
}
*to = 0;
return cnt;
}
| static int pg_euctw_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 325 of file wchar.c.
References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.
{
int len;
if (*s == SS2)
len = 2;
else if (*s == SS3)
len = 2;
else if (IS_HIGHBIT_SET(*s))
len = 2;
else
len = pg_ascii_dsplen(s);
return len;
}
| static int pg_euctw_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 309 of file wchar.c.
References IS_HIGHBIT_SET, SS2, and SS3.
{
int len;
if (*s == SS2)
len = 4;
else if (*s == SS3)
len = 3;
else if (IS_HIGHBIT_SET(*s))
len = 2;
else
len = 1;
return len;
}
| static int pg_euctw_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1220 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, SS2, and SS3.
{
int l;
unsigned char c1,
c2;
c1 = *s++;
switch (c1)
{
case SS2: /* CNS 11643 Plane 1-7 */
l = 4;
if (l > len)
return -1;
c2 = *s++;
if (c2 < 0xa1 || c2 > 0xa7)
return -1;
c2 = *s++;
if (!IS_EUC_RANGE_VALID(c2))
return -1;
c2 = *s++;
if (!IS_EUC_RANGE_VALID(c2))
return -1;
break;
case SS3: /* unused */
return -1;
default:
if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
{
l = 2;
if (l > len)
return -1;
/* no further range check on c1? */
c2 = *s++;
if (!IS_EUC_RANGE_VALID(c2))
return -1;
}
else
/* must be ASCII */
{
l = 1;
}
break;
}
return l;
}
| static int pg_gb18030_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1096 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{
int len;
if (IS_HIGHBIT_SET(*s))
len = 2;
else
len = pg_ascii_dsplen(s); /* ASCII */
return len;
}
| static int pg_gb18030_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1077 of file wchar.c.
References IS_HIGHBIT_SET.
Referenced by pg_gb18030_verifier().
{
int len;
if (!IS_HIGHBIT_SET(*s))
len = 1; /* ASCII */
else
{
if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) || (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
len = 2;
else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
len = 4;
else
len = 2;
}
return len;
}
| static int pg_gb18030_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1404 of file wchar.c.
References pg_gb18030_mblen().
{
int l,
mbl;
l = mbl = pg_gb18030_mblen(s);
if (len < l)
return -1;
while (--l > 0)
{
if (*++s == '\0')
return -1;
}
return mbl;
}
| static int pg_gbk_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1034 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{
int len;
if (IS_HIGHBIT_SET(*s))
len = 2; /* kanji? */
else
len = pg_ascii_dsplen(s); /* should be ASCII */
return len;
}
| static int pg_gbk_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1022 of file wchar.c.
References IS_HIGHBIT_SET.
Referenced by pg_gbk_verifier().
{
int len;
if (IS_HIGHBIT_SET(*s))
len = 2; /* kanji? */
else
len = 1; /* should be ASCII */
return len;
}
| static int pg_gbk_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1364 of file wchar.c.
References pg_gbk_mblen().
{
int l,
mbl;
l = mbl = pg_gbk_mblen(s);
if (len < l)
return -1;
while (--l > 0)
{
if (*++s == '\0')
return -1;
}
return mbl;
}
| static bool pg_generic_charinc | ( | unsigned char * | charptr, | |
| int | len | |||
| ) | [static] |
Definition at line 1521 of file wchar.c.
References GetDatabaseEncoding(), pg_wchar_tbl::mbverify, and pg_wchar_table.
{
unsigned char *lastbyte = charptr + len - 1;
mbverifier mbverify;
/* We can just invoke the character verifier directly. */
mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify;
while (*lastbyte < (unsigned char) 255)
{
(*lastbyte)++;
if ((*mbverify) (charptr, len) == len)
return true;
}
return false;
}
| static int pg_johab_dsplen | ( | const unsigned char * | s | ) | [static] |
| static int pg_johab_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 393 of file wchar.c.
References pg_euc_mblen().
Referenced by pg_johab_verifier().
{
return pg_euc_mblen(s);
}
| static int pg_johab_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1270 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and pg_johab_mblen().
{
int l,
mbl;
unsigned char c;
l = mbl = pg_johab_mblen(s);
if (len < l)
return -1;
if (!IS_HIGHBIT_SET(*s))
return mbl;
while (--l > 0)
{
c = *++s;
if (!IS_EUC_RANGE_VALID(c))
return -1;
}
return mbl;
}
| static int pg_latin12wchar_with_len | ( | const unsigned char * | from, | |
| pg_wchar * | to, | |||
| int | len | |||
| ) | [static] |
| static int pg_latin1_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 955 of file wchar.c.
References pg_ascii_dsplen().
{
return pg_ascii_dsplen(s);
}
| static int pg_latin1_mblen | ( | const unsigned char * | s | ) | [static] |
| static int pg_latin1_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
| int pg_mic_mblen | ( | const unsigned char * | mbstr | ) |
Definition at line 1770 of file wchar.c.
References pg_mule_mblen().
Referenced by mic2latin(), and mic2latin_with_table().
{
return pg_mule_mblen(mbstr);
}
| static int pg_mule2wchar_with_len | ( | const unsigned char * | from, | |
| pg_wchar * | to, | |||
| int | len | |||
| ) | [static] |
Definition at line 748 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.
{
int cnt = 0;
while (len > 0 && *from)
{
if (IS_LC1(*from) && len >= 2)
{
*to = *from++ << 16;
*to |= *from++;
len -= 2;
}
else if (IS_LCPRV1(*from) && len >= 3)
{
from++;
*to = *from++ << 16;
*to |= *from++;
len -= 3;
}
else if (IS_LC2(*from) && len >= 3)
{
*to = *from++ << 16;
*to |= *from++ << 8;
*to |= *from++;
len -= 3;
}
else if (IS_LCPRV2(*from) && len >= 4)
{
from++;
*to = *from++ << 16;
*to |= *from++ << 8;
*to |= *from++;
len -= 4;
}
else
{ /* assume ASCII */
*to = (unsigned char) *from++;
len--;
}
to++;
cnt++;
}
*to = 0;
return cnt;
}
| static int pg_mule_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 884 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.
{
int len;
/*
* Note: it's not really appropriate to assume that all multibyte charsets
* are double-wide on screen. But this seems an okay approximation for
* the MULE charsets we currently support.
*/
if (IS_LC1(*s))
len = 1;
else if (IS_LCPRV1(*s))
len = 1;
else if (IS_LC2(*s))
len = 2;
else if (IS_LCPRV2(*s))
len = 2;
else
len = 1; /* assume ASCII */
return len;
}
| int pg_mule_mblen | ( | const unsigned char * | s | ) |
Definition at line 866 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.
Referenced by pg_mic_mblen(), and pg_mule_verifier().
| static int pg_mule_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1294 of file wchar.c.
References IS_HIGHBIT_SET, and pg_mule_mblen().
{
int l,
mbl;
unsigned char c;
l = mbl = pg_mule_mblen(s);
if (len < l)
return -1;
while (--l > 0)
{
c = *++s;
if (!IS_HIGHBIT_SET(c))
return -1;
}
return mbl;
}
| static int pg_sjis_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 978 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{
int len;
if (*s >= 0xa1 && *s <= 0xdf)
len = 1; /* 1 byte kana? */
else if (IS_HIGHBIT_SET(*s))
len = 2; /* kanji? */
else
len = pg_ascii_dsplen(s); /* should be ASCII */
return len;
}
| static int pg_sjis_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 964 of file wchar.c.
References IS_HIGHBIT_SET.
Referenced by pg_sjis_verifier().
{
int len;
if (*s >= 0xa1 && *s <= 0xdf)
len = 1; /* 1 byte kana? */
else if (IS_HIGHBIT_SET(*s))
len = 2; /* kanji? */
else
len = 1; /* should be ASCII */
return len;
}
| static int pg_sjis_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1321 of file wchar.c.
References ISSJISHEAD, ISSJISTAIL, and pg_sjis_mblen().
{
int l,
mbl;
unsigned char c1,
c2;
l = mbl = pg_sjis_mblen(s);
if (len < l)
return -1;
if (l == 1) /* pg_sjis_mblen already verified it */
return mbl;
c1 = *s++;
c2 = *s;
if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
return -1;
return mbl;
}
| static int pg_uhc_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1061 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{
int len;
if (IS_HIGHBIT_SET(*s))
len = 2; /* 2byte? */
else
len = pg_ascii_dsplen(s); /* should be ASCII */
return len;
}
| static int pg_uhc_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1049 of file wchar.c.
References IS_HIGHBIT_SET.
Referenced by pg_uhc_verifier().
{
int len;
if (IS_HIGHBIT_SET(*s))
len = 2; /* 2byte? */
else
len = 1; /* should be ASCII */
return len;
}
| static int pg_uhc_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1384 of file wchar.c.
References pg_uhc_mblen().
{
int l,
mbl;
l = mbl = pg_uhc_mblen(s);
if (len < l)
return -1;
while (--l > 0)
{
if (*++s == '\0')
return -1;
}
return mbl;
}
| static int pg_utf2wchar_with_len | ( | const unsigned char * | from, | |
| pg_wchar * | to, | |||
| int | len | |||
| ) | [static] |
Definition at line 411 of file wchar.c.
{
int cnt = 0;
uint32 c1,
c2,
c3,
c4;
while (len > 0 && *from)
{
if ((*from & 0x80) == 0)
{
*to = *from++;
len--;
}
else if ((*from & 0xe0) == 0xc0)
{
if (len < 2)
break; /* drop trailing incomplete char */
c1 = *from++ & 0x1f;
c2 = *from++ & 0x3f;
*to = (c1 << 6) | c2;
len -= 2;
}
else if ((*from & 0xf0) == 0xe0)
{
if (len < 3)
break; /* drop trailing incomplete char */
c1 = *from++ & 0x0f;
c2 = *from++ & 0x3f;
c3 = *from++ & 0x3f;
*to = (c1 << 12) | (c2 << 6) | c3;
len -= 3;
}
else if ((*from & 0xf8) == 0xf0)
{
if (len < 4)
break; /* drop trailing incomplete char */
c1 = *from++ & 0x07;
c2 = *from++ & 0x3f;
c3 = *from++ & 0x3f;
c4 = *from++ & 0x3f;
*to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
len -= 4;
}
else
{
/* treat a bogus char as length 1; not ours to raise error */
*to = *from++;
len--;
}
to++;
cnt++;
}
*to = 0;
return cnt;
}
| static bool pg_utf8_increment | ( | unsigned char * | charptr, | |
| int | length | |||
| ) | [static] |
Definition at line 1555 of file wchar.c.
{
unsigned char a;
unsigned char limit;
switch (length)
{
default:
/* reject lengths 5 and 6 for now */
return false;
case 4:
a = charptr[3];
if (a < 0xBF)
{
charptr[3]++;
break;
}
/* FALL THRU */
case 3:
a = charptr[2];
if (a < 0xBF)
{
charptr[2]++;
break;
}
/* FALL THRU */
case 2:
a = charptr[1];
switch (*charptr)
{
case 0xED:
limit = 0x9F;
break;
case 0xF4:
limit = 0x8F;
break;
default:
limit = 0xBF;
break;
}
if (a < limit)
{
charptr[1]++;
break;
}
/* FALL THRU */
case 1:
a = *charptr;
if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
return false;
charptr[0]++;
break;
}
return true;
}
| bool pg_utf8_islegal | ( | const unsigned char * | source, | |
| int | length | |||
| ) |
Definition at line 1452 of file wchar.c.
Referenced by pg_utf8_verifier(), utf8_to_iso8859_1(), and UtfToLocal().
{
unsigned char a;
switch (length)
{
default:
/* reject lengths 5 and 6 for now */
return false;
case 4:
a = source[3];
if (a < 0x80 || a > 0xBF)
return false;
/* FALL THRU */
case 3:
a = source[2];
if (a < 0x80 || a > 0xBF)
return false;
/* FALL THRU */
case 2:
a = source[1];
switch (*source)
{
case 0xE0:
if (a < 0xA0 || a > 0xBF)
return false;
break;
case 0xED:
if (a < 0x80 || a > 0x9F)
return false;
break;
case 0xF0:
if (a < 0x90 || a > 0xBF)
return false;
break;
case 0xF4:
if (a < 0x80 || a > 0x8F)
return false;
break;
default:
if (a < 0x80 || a > 0xBF)
return false;
break;
}
/* FALL THRU */
case 1:
a = *source;
if (a >= 0x80 && a < 0xC2)
return false;
if (a > 0xF4)
return false;
break;
}
return true;
}
| static int pg_utf8_verifier | ( | const unsigned char * | s, | |
| int | len | |||
| ) | [static] |
Definition at line 1424 of file wchar.c.
References pg_utf8_islegal(), and pg_utf_mblen().
{
int l = pg_utf_mblen(s);
if (len < l)
return -1;
if (!pg_utf8_islegal(s, l))
return -1;
return l;
}
| static int pg_utf_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 736 of file wchar.c.
References ucs_wcwidth(), and utf8_to_unicode().
{
return ucs_wcwidth(utf8_to_unicode(s));
}
| int pg_utf_mblen | ( | const unsigned char * | s | ) |
Definition at line 541 of file wchar.c.
Referenced by json_lex_string(), pg_utf8_verifier(), pg_wchar2utf_with_len(), utf8_to_iso8859_1(), and UtfToLocal().
{
int len;
if ((*s & 0x80) == 0)
len = 1;
else if ((*s & 0xe0) == 0xc0)
len = 2;
else if ((*s & 0xf0) == 0xe0)
len = 3;
else if ((*s & 0xf8) == 0xf0)
len = 4;
#ifdef NOT_USED
else if ((*s & 0xfc) == 0xf8)
len = 5;
else if ((*s & 0xfe) == 0xfc)
len = 6;
#endif
else
len = 1;
return len;
}
Definition at line 1880 of file wchar.c.
References pg_verify_mbstr_len().
Referenced by pg_any_to_server(), and t_readline().
{
return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
}
| int pg_verify_mbstr_len | ( | int | encoding, | |
| const char * | mbstr, | |||
| int | len, | |||
| bool | noError | |||
| ) |
Definition at line 1897 of file wchar.c.
References Assert, IS_HIGHBIT_SET, pg_wchar_tbl::mbverify, NULL, pg_encoding_max_length(), PG_VALID_ENCODING, and report_invalid_encoding().
Referenced by length_in_encoding(), pg_convert(), pg_verify_mbstr(), pg_verifymbstr(), read_extension_script_file(), and utf_u2e().
{
mbverifier mbverify;
int mb_len;
Assert(PG_VALID_ENCODING(encoding));
/*
* In single-byte encodings, we need only reject nulls (\0).
*/
if (pg_encoding_max_length(encoding) <= 1)
{
const char *nullpos = memchr(mbstr, 0, len);
if (nullpos == NULL)
return len;
if (noError)
return -1;
report_invalid_encoding(encoding, nullpos, 1);
}
/* fetch function pointer just once */
mbverify = pg_wchar_table[encoding].mbverify;
mb_len = 0;
while (len > 0)
{
int l;
/* fast path for ASCII-subset characters */
if (!IS_HIGHBIT_SET(*mbstr))
{
if (*mbstr != '\0')
{
mb_len++;
mbstr++;
len--;
continue;
}
if (noError)
return -1;
report_invalid_encoding(encoding, mbstr, len);
}
l = (*mbverify) ((const unsigned char *) mbstr, len);
if (l < 0)
{
if (noError)
return -1;
report_invalid_encoding(encoding, mbstr, len);
}
mbstr += l;
len -= l;
mb_len++;
}
return mb_len;
}
Definition at line 1869 of file wchar.c.
References GetDatabaseEncoding(), and pg_verify_mbstr_len().
Referenced by CopyReadAttributesText(), plperl_spi_exec(), plperl_spi_prepare(), plperl_spi_query(), PLy_cursor_query(), PLy_output(), PLy_spi_execute_query(), PLy_spi_prepare(), PLyObject_ToDatum(), read_text_file(), and spg_text_leaf_consistent().
{
return
pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
}
| static int pg_wchar2euc_with_len | ( | const pg_wchar * | from, | |
| unsigned char * | to, | |||
| int | len | |||
| ) | [static] |
Definition at line 347 of file wchar.c.
{
int cnt = 0;
while (len > 0 && *from)
{
unsigned char c;
if ((c = (*from >> 24)))
{
*to++ = c;
*to++ = (*from >> 16) & 0xff;
*to++ = (*from >> 8) & 0xff;
*to++ = *from & 0xff;
cnt += 4;
}
else if ((c = (*from >> 16)))
{
*to++ = c;
*to++ = (*from >> 8) & 0xff;
*to++ = *from & 0xff;
cnt += 3;
}
else if ((c = (*from >> 8)))
{
*to++ = c;
*to++ = *from & 0xff;
cnt += 2;
}
else
{
*to++ = *from;
cnt++;
}
from++;
len--;
}
*to = 0;
return cnt;
}
| static int pg_wchar2mule_with_len | ( | const pg_wchar * | from, | |
| unsigned char * | to, | |||
| int | len | |||
| ) | [static] |
Definition at line 801 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, and IS_LCPRV2_B_RANGE.
{
int cnt = 0;
while (len > 0 && *from)
{
unsigned char lb;
lb = (*from >> 16) & 0xff;
if (IS_LC1(lb))
{
*to++ = lb;
*to++ = *from & 0xff;
cnt += 2;
}
else if (IS_LC2(lb))
{
*to++ = lb;
*to++ = (*from >> 8) & 0xff;
*to++ = *from & 0xff;
cnt += 3;
}
else if (IS_LCPRV1_A_RANGE(lb))
{
*to++ = LCPRV1_A;
*to++ = lb;
*to++ = *from & 0xff;
cnt += 3;
}
else if (IS_LCPRV1_B_RANGE(lb))
{
*to++ = LCPRV1_B;
*to++ = lb;
*to++ = *from & 0xff;
cnt += 3;
}
else if (IS_LCPRV2_A_RANGE(lb))
{
*to++ = LCPRV2_A;
*to++ = lb;
*to++ = (*from >> 8) & 0xff;
*to++ = *from & 0xff;
cnt += 4;
}
else if (IS_LCPRV2_B_RANGE(lb))
{
*to++ = LCPRV2_B;
*to++ = lb;
*to++ = (*from >> 8) & 0xff;
*to++ = *from & 0xff;
cnt += 4;
}
else
{
*to++ = *from & 0xff;
cnt += 1;
}
from++;
len--;
}
*to = 0;
return cnt;
}
| static int pg_wchar2single_with_len | ( | const pg_wchar * | from, | |
| unsigned char * | to, | |||
| int | len | |||
| ) | [static] |
| static int pg_wchar2utf_with_len | ( | const pg_wchar * | from, | |
| unsigned char * | to, | |||
| int | len | |||
| ) | [static] |
Definition at line 510 of file wchar.c.
References pg_utf_mblen(), and unicode_to_utf8().
{
int cnt = 0;
while (len > 0 && *from)
{
int char_len;
unicode_to_utf8(*from, to);
char_len = pg_utf_mblen(to);
cnt += char_len;
to += char_len;
from++;
len--;
}
*to = 0;
return cnt;
}
| void report_invalid_encoding | ( | int | encoding, | |
| const char * | mbstr, | |||
| int | len | |||
| ) |
Definition at line 1998 of file wchar.c.
References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, and pg_encoding_mblen().
Referenced by big52mic(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_ascii2mic(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), utf8_to_iso8859_1(), and UtfToLocal().
{
int l = pg_encoding_mblen(encoding, mbstr);
char buf[8 * 5 + 1];
char *p = buf;
int j,
jlimit;
jlimit = Min(l, len);
jlimit = Min(jlimit, 8); /* prevent buffer overrun */
for (j = 0; j < jlimit; j++)
{
p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
if (j < jlimit - 1)
p += sprintf(p, " ");
}
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid byte sequence for encoding \"%s\": %s",
pg_enc2name_tbl[encoding].name,
buf)));
}
| void report_untranslatable_char | ( | int | src_encoding, | |
| int | dest_encoding, | |||
| const char * | mbstr, | |||
| int | len | |||
| ) |
Definition at line 2030 of file wchar.c.
References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, and pg_encoding_mblen().
Referenced by big52mic(), latin2mic_with_table(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_mic2ascii(), utf8_to_iso8859_1(), and UtfToLocal().
{
int l = pg_encoding_mblen(src_encoding, mbstr);
char buf[8 * 5 + 1];
char *p = buf;
int j,
jlimit;
jlimit = Min(l, len);
jlimit = Min(jlimit, 8); /* prevent buffer overrun */
for (j = 0; j < jlimit; j++)
{
p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
if (j < jlimit - 1)
p += sprintf(p, " ");
}
ereport(ERROR,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
buf,
pg_enc2name_tbl[src_encoding].name,
pg_enc2name_tbl[dest_encoding].name)));
}
| static int ucs_wcwidth | ( | pg_wchar | ucs | ) | [static] |
Definition at line 637 of file wchar.c.
References mbbisearch().
Referenced by pg_utf_dsplen().
{
/* sorted list of non-overlapping intervals of non-spacing characters */
static const struct mbinterval combining[] = {
{0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486},
{0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9},
{0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},
{0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
{0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED},
{0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A},
{0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
{0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954},
{0x0962, 0x0963}, {0x0981, 0x0981}, {0x09BC, 0x09BC},
{0x09C1, 0x09C4}, {0x09CD, 0x09CD}, {0x09E2, 0x09E3},
{0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
{0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71},
{0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5},
{0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, {0x0B01, 0x0B01},
{0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
{0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82},
{0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40},
{0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},
{0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
{0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA},
{0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31},
{0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1},
{0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
{0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
{0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84},
{0x0F86, 0x0F87}, {0x0F90, 0x0F97}, {0x0F99, 0x0FBC},
{0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
{0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059},
{0x1160, 0x11FF}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},
{0x17C9, 0x17D3}, {0x180B, 0x180E}, {0x18A9, 0x18A9},
{0x200B, 0x200F}, {0x202A, 0x202E}, {0x206A, 0x206F},
{0x20D0, 0x20E3}, {0x302A, 0x302F}, {0x3099, 0x309A},
{0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF},
{0xFFF9, 0xFFFB}
};
/* test for 8-bit control characters */
if (ucs == 0)
return 0;
if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
return -1;
/* binary search in table of non-spacing characters */
if (mbbisearch(ucs, combining,
sizeof(combining) / sizeof(struct mbinterval) - 1))
return 0;
/*
* if we arrive here, ucs is not a combining or C0/C1 control character
*/
return 1 +
(ucs >= 0x1100 &&
(ucs <= 0x115f || /* Hangul Jamo init. consonants */
(ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
ucs != 0x303f) || /* CJK ... Yi */
(ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
(ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
* Ideographs */
(ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
(ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
(ucs >= 0xffe0 && ucs <= 0xffe6) ||
(ucs >= 0x20000 && ucs <= 0x2ffff)));
}
| unsigned char* unicode_to_utf8 | ( | pg_wchar | c, | |
| unsigned char * | utf8string | |||
| ) |
Definition at line 475 of file wchar.c.
Referenced by json_lex_string(), pg_wchar2utf_with_len(), and unicode_to_sqlchar().
{
if (c <= 0x7F)
{
utf8string[0] = c;
}
else if (c <= 0x7FF)
{
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
utf8string[1] = 0x80 | (c & 0x3F);
}
else if (c <= 0xFFFF)
{
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
utf8string[2] = 0x80 | (c & 0x3F);
}
else
{
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
utf8string[3] = 0x80 | (c & 0x3F);
}
return utf8string;
}
| pg_wchar utf8_to_unicode | ( | const unsigned char * | c | ) |
Definition at line 714 of file wchar.c.
Referenced by pg_utf_dsplen(), and pg_wcsformat().
{
if ((*c & 0x80) == 0)
return (pg_wchar) c[0];
else if ((*c & 0xe0) == 0xc0)
return (pg_wchar) (((c[0] & 0x1f) << 6) |
(c[1] & 0x3f));
else if ((*c & 0xf0) == 0xe0)
return (pg_wchar) (((c[0] & 0x0f) << 12) |
((c[1] & 0x3f) << 6) |
(c[2] & 0x3f));
else if ((*c & 0xf8) == 0xf0)
return (pg_wchar) (((c[0] & 0x07) << 18) |
((c[1] & 0x3f) << 12) |
((c[2] & 0x3f) << 6) |
(c[3] & 0x3f));
else
/* that is an invalid code on purpose */
return 0xffffffff;
}
Definition at line 1723 of file wchar.c.
Referenced by pg_dsplen(), pg_encoding_max_length_sql(), pg_encoding_mb2wchar_with_len(), pg_encoding_mbcliplen(), pg_encoding_wchar2mb_with_len(), pg_generic_charinc(), pg_mb2wchar(), pg_mb2wchar_with_len(), pg_mblen(), pg_wchar2mb(), and pg_wchar2mb_with_len().
1.7.1