#include "postgres.h"
#include "mb/pg_wchar.h"
Go to the source code of this file.
Data Structures | |
struct | mbinterval |
Defines | |
#define | IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe) |
#define | pg_euccn_verifier pg_euckr_verifier |
Functions | |
static int | pg_ascii2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_ascii_mblen (const unsigned char *s) |
static int | pg_ascii_dsplen (const unsigned char *s) |
static int | pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euc_mblen (const unsigned char *s) |
static int | pg_euc_dsplen (const unsigned char *s) |
static int | pg_eucjp2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_eucjp_mblen (const unsigned char *s) |
static int | pg_eucjp_dsplen (const unsigned char *s) |
static int | pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euckr_mblen (const unsigned char *s) |
static int | pg_euckr_dsplen (const unsigned char *s) |
static int | pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euccn_mblen (const unsigned char *s) |
static int | pg_euccn_dsplen (const unsigned char *s) |
static int | pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_euctw_mblen (const unsigned char *s) |
static int | pg_euctw_dsplen (const unsigned char *s) |
static int | pg_wchar2euc_with_len (const pg_wchar *from, unsigned char *to, int len) |
static int | pg_johab_mblen (const unsigned char *s) |
static int | pg_johab_dsplen (const unsigned char *s) |
static int | pg_utf2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
unsigned char * | unicode_to_utf8 (pg_wchar c, unsigned char *utf8string) |
static int | pg_wchar2utf_with_len (const pg_wchar *from, unsigned char *to, int len) |
int | pg_utf_mblen (const unsigned char *s) |
static int | mbbisearch (pg_wchar ucs, const struct mbinterval *table, int max) |
static int | ucs_wcwidth (pg_wchar ucs) |
pg_wchar | utf8_to_unicode (const unsigned char *c) |
static int | pg_utf_dsplen (const unsigned char *s) |
static int | pg_mule2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_wchar2mule_with_len (const pg_wchar *from, unsigned char *to, int len) |
int | pg_mule_mblen (const unsigned char *s) |
static int | pg_mule_dsplen (const unsigned char *s) |
static int | pg_latin12wchar_with_len (const unsigned char *from, pg_wchar *to, int len) |
static int | pg_wchar2single_with_len (const pg_wchar *from, unsigned char *to, int len) |
static int | pg_latin1_mblen (const unsigned char *s) |
static int | pg_latin1_dsplen (const unsigned char *s) |
static int | pg_sjis_mblen (const unsigned char *s) |
static int | pg_sjis_dsplen (const unsigned char *s) |
static int | pg_big5_mblen (const unsigned char *s) |
static int | pg_big5_dsplen (const unsigned char *s) |
static int | pg_gbk_mblen (const unsigned char *s) |
static int | pg_gbk_dsplen (const unsigned char *s) |
static int | pg_uhc_mblen (const unsigned char *s) |
static int | pg_uhc_dsplen (const unsigned char *s) |
static int | pg_gb18030_mblen (const unsigned char *s) |
static int | pg_gb18030_dsplen (const unsigned char *s) |
static int | pg_ascii_verifier (const unsigned char *s, int len) |
static int | pg_eucjp_verifier (const unsigned char *s, int len) |
static int | pg_euckr_verifier (const unsigned char *s, int len) |
static int | pg_euctw_verifier (const unsigned char *s, int len) |
static int | pg_johab_verifier (const unsigned char *s, int len) |
static int | pg_mule_verifier (const unsigned char *s, int len) |
static int | pg_latin1_verifier (const unsigned char *s, int len) |
static int | pg_sjis_verifier (const unsigned char *s, int len) |
static int | pg_big5_verifier (const unsigned char *s, int len) |
static int | pg_gbk_verifier (const unsigned char *s, int len) |
static int | pg_uhc_verifier (const unsigned char *s, int len) |
static int | pg_gb18030_verifier (const unsigned char *s, int len) |
static int | pg_utf8_verifier (const unsigned char *s, int len) |
bool | pg_utf8_islegal (const unsigned char *source, int length) |
static bool | pg_generic_charinc (unsigned char *charptr, int len) |
static bool | pg_utf8_increment (unsigned char *charptr, int length) |
static bool | pg_eucjp_increment (unsigned char *charptr, int length) |
int | pg_mic_mblen (const unsigned char *mbstr) |
int | pg_encoding_mblen (int encoding, const char *mbstr) |
int | pg_encoding_dsplen (int encoding, const char *mbstr) |
int | pg_encoding_verifymb (int encoding, const char *mbstr, int len) |
int | pg_encoding_max_length (int encoding) |
int | pg_database_encoding_max_length (void) |
mbcharacter_incrementer | pg_database_encoding_character_incrementer (void) |
bool | pg_verifymbstr (const char *mbstr, int len, bool noError) |
bool | pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError) |
int | pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError) |
void | check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding) |
void | report_invalid_encoding (int encoding, const char *mbstr, int len) |
void | report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len) |
Variables | |
pg_wchar_tbl | pg_wchar_table [] |
Definition at line 1130 of file wchar.c.
Referenced by pg_eucjp_verifier(), pg_euckr_verifier(), pg_euctw_verifier(), and pg_johab_verifier().
void check_encoding_conversion_args | ( | int | src_encoding, | |
int | dest_encoding, | |||
int | len, | |||
int | expected_src_encoding, | |||
int | expected_dest_encoding | |||
) |
Definition at line 1969 of file wchar.c.
References elog, ERROR, name, pg_enc2name_tbl, and PG_VALID_ENCODING.
{ if (!PG_VALID_ENCODING(src_encoding)) elog(ERROR, "invalid source encoding ID: %d", src_encoding); if (src_encoding != expected_src_encoding && expected_src_encoding >= 0) elog(ERROR, "expected source encoding \"%s\", but got \"%s\"", pg_enc2name_tbl[expected_src_encoding].name, pg_enc2name_tbl[src_encoding].name); if (!PG_VALID_ENCODING(dest_encoding)) elog(ERROR, "invalid destination encoding ID: %d", dest_encoding); if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0) elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"", pg_enc2name_tbl[expected_dest_encoding].name, pg_enc2name_tbl[dest_encoding].name); if (len < 0) elog(ERROR, "encoding conversion length must not be negative"); }
static int mbbisearch | ( | pg_wchar | ucs, | |
const struct mbinterval * | table, | |||
int | max | |||
) | [static] |
Definition at line 584 of file wchar.c.
Referenced by ucs_wcwidth().
{ int min = 0; int mid; if (ucs < table[0].first || ucs > table[max].last) return 0; while (max >= min) { mid = (min + max) / 2; if (ucs > table[mid].last) min = mid + 1; else if (ucs < table[mid].first) max = mid - 1; else return 1; } return 0; }
static int pg_ascii2wchar_with_len | ( | const unsigned char * | from, | |
pg_wchar * | to, | |||
int | len | |||
) | [static] |
static int pg_ascii_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 61 of file wchar.c.
Referenced by pg_big5_dsplen(), pg_euc_dsplen(), pg_euccn_dsplen(), pg_eucjp_dsplen(), pg_euctw_dsplen(), pg_gb18030_dsplen(), pg_gbk_dsplen(), pg_latin1_dsplen(), pg_sjis_dsplen(), and pg_uhc_dsplen().
{ if (*s == '\0') return 0; if (*s < 0x20 || *s == 0x7f) return -1; return 1; }
static int pg_ascii_mblen | ( | const unsigned char * | s | ) | [static] |
static int pg_ascii_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
static int pg_big5_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1007 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{ int len; if (IS_HIGHBIT_SET(*s)) len = 2; /* kanji? */ else len = pg_ascii_dsplen(s); /* should be ASCII */ return len; }
static int pg_big5_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 995 of file wchar.c.
References IS_HIGHBIT_SET.
Referenced by pg_big5_verifier().
{ int len; if (IS_HIGHBIT_SET(*s)) len = 2; /* kanji? */ else len = 1; /* should be ASCII */ return len; }
static int pg_big5_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1344 of file wchar.c.
References pg_big5_mblen().
{ int l, mbl; l = mbl = pg_big5_mblen(s); if (len < l) return -1; while (--l > 0) { if (*++s == '\0') return -1; } return mbl; }
mbcharacter_incrementer pg_database_encoding_character_incrementer | ( | void | ) |
Definition at line 1845 of file wchar.c.
References GetDatabaseEncoding(), PG_EUC_JP, and PG_UTF8.
Referenced by make_greater_string().
{ /* * Eventually it might be best to add a field to pg_wchar_table[], but for * now we just use a switch. */ switch (GetDatabaseEncoding()) { case PG_UTF8: return pg_utf8_increment; case PG_EUC_JP: return pg_eucjp_increment; default: return pg_generic_charinc; } }
int pg_database_encoding_max_length | ( | void | ) |
Definition at line 1836 of file wchar.c.
References GetDatabaseEncoding(), and pg_wchar_tbl::maxmblen.
Referenced by appendStringInfoRegexpSubstr(), BeginCopy(), bpcharlen(), charlen_to_bytelen(), check_replace_text_has_escape_char(), dotrim(), gbt_bpchar_compress(), gbt_bpchar_consistent(), gbt_text_compress(), gbt_text_consistent(), Generic_Text_IC_like(), GenericMatchText(), infix(), init_tsvector_parser(), like_escape(), like_fixed_prefix(), lowerstr_with_len(), lpad(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), regexp_fixed_prefix(), rpad(), show_trgm(), str_initcap(), str_tolower(), str_toupper(), text_length(), text_position_setup(), text_reverse(), text_substring(), TParserInit(), translate(), tsvectorout(), and unaccent_lexize().
{ return pg_wchar_table[GetDatabaseEncoding()].maxmblen; }
int pg_encoding_dsplen | ( | int | encoding, | |
const char * | mbstr | |||
) |
Definition at line 1793 of file wchar.c.
References Assert, PG_SQL_ASCII, and PG_VALID_ENCODING.
Referenced by PQdsplen(), and reportErrorPosition().
{ Assert(PG_VALID_ENCODING(encoding)); return ((encoding >= 0 && encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ? ((*pg_wchar_table[encoding].dsplen) ((const unsigned char *) mbstr)) : ((*pg_wchar_table[PG_SQL_ASCII].dsplen) ((const unsigned char *) mbstr))); }
int pg_encoding_max_length | ( | int | encoding | ) |
Definition at line 1823 of file wchar.c.
References Assert, pg_wchar_tbl::maxmblen, and PG_VALID_ENCODING.
Referenced by ascii(), chr(), pg_encoding_mbcliplen(), pg_verify_mbstr_len(), reportErrorPosition(), and type_maximum_size().
{ Assert(PG_VALID_ENCODING(encoding)); return pg_wchar_table[encoding].maxmblen; }
int pg_encoding_mblen | ( | int | encoding, | |
const char * | mbstr | |||
) |
Definition at line 1779 of file wchar.c.
References Assert, PG_SQL_ASCII, and PG_VALID_ENCODING.
Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), CopyReadLineText(), PQescapeInternal(), PQescapeStringInternal(), PQmblen(), report_invalid_encoding(), report_untranslatable_char(), reportErrorPosition(), and unicode_to_sqlchar().
{ Assert(PG_VALID_ENCODING(encoding)); return ((encoding >= 0 && encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ? ((*pg_wchar_table[encoding].mblen) ((const unsigned char *) mbstr)) : ((*pg_wchar_table[PG_SQL_ASCII].mblen) ((const unsigned char *) mbstr))); }
int pg_encoding_verifymb | ( | int | encoding, | |
const char * | mbstr, | |||
int | len | |||
) |
Definition at line 1809 of file wchar.c.
References Assert, PG_SQL_ASCII, and PG_VALID_ENCODING.
Referenced by big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().
{ Assert(PG_VALID_ENCODING(encoding)); return ((encoding >= 0 && encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ? ((*pg_wchar_table[encoding].mbverify) ((const unsigned char *) mbstr, len)) : ((*pg_wchar_table[PG_SQL_ASCII].mbverify) ((const unsigned char *) mbstr, len))); }
static int pg_euc2wchar_with_len | ( | const unsigned char * | from, | |
pg_wchar * | to, | |||
int | len | |||
) | [static] |
Definition at line 75 of file wchar.c.
References IS_HIGHBIT_SET, SS2, and SS3.
Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().
{ int cnt = 0; while (len > 0 && *from) { if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte * KANA") */ { from++; *to = (SS2 << 8) | *from++; len -= 2; } else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */ { from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */ { *to = *from++ << 8; *to |= *from++; len -= 2; } else /* must be ASCII */ { *to = *from++; len--; } to++; cnt++; } *to = 0; return cnt; }
static int pg_euc_dsplen | ( | const unsigned char * | s | ) | [inline, static] |
Definition at line 130 of file wchar.c.
References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.
Referenced by pg_euckr_dsplen(), and pg_johab_dsplen().
{ int len; if (*s == SS2) len = 2; else if (*s == SS3) len = 2; else if (IS_HIGHBIT_SET(*s)) len = 2; else len = pg_ascii_dsplen(s); return len; }
static int pg_euc_mblen | ( | const unsigned char * | s | ) | [inline, static] |
Definition at line 114 of file wchar.c.
References IS_HIGHBIT_SET, SS2, and SS3.
Referenced by pg_eucjp_mblen(), pg_euckr_mblen(), and pg_johab_mblen().
{ int len; if (*s == SS2) len = 2; else if (*s == SS3) len = 3; else if (IS_HIGHBIT_SET(*s)) len = 2; else len = 1; return len; }
static int pg_euccn2wchar_with_len | ( | const unsigned char * | from, | |
pg_wchar * | to, | |||
int | len | |||
) | [static] |
Definition at line 202 of file wchar.c.
References IS_HIGHBIT_SET, SS2, and SS3.
{ int cnt = 0; while (len > 0 && *from) { if (*from == SS2 && len >= 3) /* code set 2 (unused?) */ { from++; *to = (SS2 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */ { from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */ { *to = *from++ << 8; *to |= *from++; len -= 2; } else { *to = *from++; len--; } to++; cnt++; } *to = 0; return cnt; }
static int pg_euccn_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 253 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{ int len; if (IS_HIGHBIT_SET(*s)) len = 2; else len = pg_ascii_dsplen(s); return len; }
static int pg_euccn_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 241 of file wchar.c.
References IS_HIGHBIT_SET.
{ int len; if (IS_HIGHBIT_SET(*s)) len = 2; else len = 1; return len; }
static int pg_eucjp2wchar_with_len | ( | const unsigned char * | from, | |
pg_wchar * | to, | |||
int | len | |||
) | [static] |
Definition at line 149 of file wchar.c.
References pg_euc2wchar_with_len().
{ return pg_euc2wchar_with_len(from, to, len); }
static int pg_eucjp_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 161 of file wchar.c.
References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.
{ int len; if (*s == SS2) len = 1; else if (*s == SS3) len = 2; else if (IS_HIGHBIT_SET(*s)) len = 2; else len = pg_ascii_dsplen(s); return len; }
static bool pg_eucjp_increment | ( | unsigned char * | charptr, | |
int | length | |||
) | [static] |
Definition at line 1633 of file wchar.c.
References i, IS_HIGHBIT_SET, SS2, and SS3.
{ unsigned char c1, c2; int i; c1 = *charptr; switch (c1) { case SS2: /* JIS X 0201 */ if (length != 2) return false; c2 = charptr[1]; if (c2 >= 0xdf) charptr[0] = charptr[1] = 0xa1; else if (c2 < 0xa1) charptr[1] = 0xa1; else charptr[1]++; break; case SS3: /* JIS X 0212 */ if (length != 3) return false; for (i = 2; i > 0; i--) { c2 = charptr[i]; if (c2 < 0xa1) { charptr[i] = 0xa1; return true; } else if (c2 < 0xfe) { charptr[i]++; return true; } } /* Out of 3-byte code region */ return false; default: if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */ { if (length != 2) return false; for (i = 1; i >= 0; i--) { c2 = charptr[i]; if (c2 < 0xa1) { charptr[i] = 0xa1; return true; } else if (c2 < 0xfe) { charptr[i]++; return true; } } /* Out of 2 byte code region */ return false; } else { /* ASCII, single byte */ if (c1 > 0x7e) return false; (*charptr)++; } break; } return true; }
static int pg_eucjp_mblen | ( | const unsigned char * | s | ) | [static] |
static int pg_eucjp_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1133 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, SS2, and SS3.
{ int l; unsigned char c1, c2; c1 = *s++; switch (c1) { case SS2: /* JIS X 0201 */ l = 2; if (l > len) return -1; c2 = *s++; if (c2 < 0xa1 || c2 > 0xdf) return -1; break; case SS3: /* JIS X 0212 */ l = 3; if (l > len) return -1; c2 = *s++; if (!IS_EUC_RANGE_VALID(c2)) return -1; c2 = *s++; if (!IS_EUC_RANGE_VALID(c2)) return -1; break; default: if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */ { l = 2; if (l > len) return -1; if (!IS_EUC_RANGE_VALID(c1)) return -1; c2 = *s++; if (!IS_EUC_RANGE_VALID(c2)) return -1; } else /* must be ASCII */ { l = 1; } break; } return l; }
static int pg_euckr2wchar_with_len | ( | const unsigned char * | from, | |
pg_wchar * | to, | |||
int | len | |||
) | [static] |
Definition at line 180 of file wchar.c.
References pg_euc2wchar_with_len().
{ return pg_euc2wchar_with_len(from, to, len); }
static int pg_euckr_dsplen | ( | const unsigned char * | s | ) | [static] |
static int pg_euckr_mblen | ( | const unsigned char * | s | ) | [static] |
static int pg_euckr_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1188 of file wchar.c.
References IS_EUC_RANGE_VALID, and IS_HIGHBIT_SET.
{ int l; unsigned char c1, c2; c1 = *s++; if (IS_HIGHBIT_SET(c1)) { l = 2; if (l > len) return -1; if (!IS_EUC_RANGE_VALID(c1)) return -1; c2 = *s++; if (!IS_EUC_RANGE_VALID(c2)) return -1; } else /* must be ASCII */ { l = 1; } return l; }
static int pg_euctw2wchar_with_len | ( | const unsigned char * | from, | |
pg_wchar * | to, | |||
int | len | |||
) | [static] |
Definition at line 269 of file wchar.c.
References IS_HIGHBIT_SET, SS2, and SS3.
{ int cnt = 0; while (len > 0 && *from) { if (*from == SS2 && len >= 4) /* code set 2 */ { from++; *to = (((uint32) SS2) << 24) | (*from++ << 16); *to |= *from++ << 8; *to |= *from++; len -= 4; } else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */ { from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */ { *to = *from++ << 8; *to |= *from++; len -= 2; } else { *to = *from++; len--; } to++; cnt++; } *to = 0; return cnt; }
static int pg_euctw_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 325 of file wchar.c.
References IS_HIGHBIT_SET, pg_ascii_dsplen(), SS2, and SS3.
{ int len; if (*s == SS2) len = 2; else if (*s == SS3) len = 2; else if (IS_HIGHBIT_SET(*s)) len = 2; else len = pg_ascii_dsplen(s); return len; }
static int pg_euctw_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 309 of file wchar.c.
References IS_HIGHBIT_SET, SS2, and SS3.
{ int len; if (*s == SS2) len = 4; else if (*s == SS3) len = 3; else if (IS_HIGHBIT_SET(*s)) len = 2; else len = 1; return len; }
static int pg_euctw_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1220 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, SS2, and SS3.
{ int l; unsigned char c1, c2; c1 = *s++; switch (c1) { case SS2: /* CNS 11643 Plane 1-7 */ l = 4; if (l > len) return -1; c2 = *s++; if (c2 < 0xa1 || c2 > 0xa7) return -1; c2 = *s++; if (!IS_EUC_RANGE_VALID(c2)) return -1; c2 = *s++; if (!IS_EUC_RANGE_VALID(c2)) return -1; break; case SS3: /* unused */ return -1; default: if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */ { l = 2; if (l > len) return -1; /* no further range check on c1? */ c2 = *s++; if (!IS_EUC_RANGE_VALID(c2)) return -1; } else /* must be ASCII */ { l = 1; } break; } return l; }
static int pg_gb18030_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1096 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{ int len; if (IS_HIGHBIT_SET(*s)) len = 2; else len = pg_ascii_dsplen(s); /* ASCII */ return len; }
static int pg_gb18030_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1077 of file wchar.c.
References IS_HIGHBIT_SET.
Referenced by pg_gb18030_verifier().
{ int len; if (!IS_HIGHBIT_SET(*s)) len = 1; /* ASCII */ else { if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) || (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe)) len = 2; else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39) len = 4; else len = 2; } return len; }
static int pg_gb18030_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1404 of file wchar.c.
References pg_gb18030_mblen().
{ int l, mbl; l = mbl = pg_gb18030_mblen(s); if (len < l) return -1; while (--l > 0) { if (*++s == '\0') return -1; } return mbl; }
static int pg_gbk_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1034 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{ int len; if (IS_HIGHBIT_SET(*s)) len = 2; /* kanji? */ else len = pg_ascii_dsplen(s); /* should be ASCII */ return len; }
static int pg_gbk_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1022 of file wchar.c.
References IS_HIGHBIT_SET.
Referenced by pg_gbk_verifier().
{ int len; if (IS_HIGHBIT_SET(*s)) len = 2; /* kanji? */ else len = 1; /* should be ASCII */ return len; }
static int pg_gbk_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1364 of file wchar.c.
References pg_gbk_mblen().
{ int l, mbl; l = mbl = pg_gbk_mblen(s); if (len < l) return -1; while (--l > 0) { if (*++s == '\0') return -1; } return mbl; }
static bool pg_generic_charinc | ( | unsigned char * | charptr, | |
int | len | |||
) | [static] |
Definition at line 1521 of file wchar.c.
References GetDatabaseEncoding(), pg_wchar_tbl::mbverify, and pg_wchar_table.
{ unsigned char *lastbyte = charptr + len - 1; mbverifier mbverify; /* We can just invoke the character verifier directly. */ mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify; while (*lastbyte < (unsigned char) 255) { (*lastbyte)++; if ((*mbverify) (charptr, len) == len) return true; } return false; }
static int pg_johab_dsplen | ( | const unsigned char * | s | ) | [static] |
static int pg_johab_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 393 of file wchar.c.
References pg_euc_mblen().
Referenced by pg_johab_verifier().
{ return pg_euc_mblen(s); }
static int pg_johab_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1270 of file wchar.c.
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and pg_johab_mblen().
{ int l, mbl; unsigned char c; l = mbl = pg_johab_mblen(s); if (len < l) return -1; if (!IS_HIGHBIT_SET(*s)) return mbl; while (--l > 0) { c = *++s; if (!IS_EUC_RANGE_VALID(c)) return -1; } return mbl; }
static int pg_latin12wchar_with_len | ( | const unsigned char * | from, | |
pg_wchar * | to, | |||
int | len | |||
) | [static] |
static int pg_latin1_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 955 of file wchar.c.
References pg_ascii_dsplen().
{ return pg_ascii_dsplen(s); }
static int pg_latin1_mblen | ( | const unsigned char * | s | ) | [static] |
static int pg_latin1_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
int pg_mic_mblen | ( | const unsigned char * | mbstr | ) |
Definition at line 1770 of file wchar.c.
References pg_mule_mblen().
Referenced by mic2latin(), and mic2latin_with_table().
{ return pg_mule_mblen(mbstr); }
static int pg_mule2wchar_with_len | ( | const unsigned char * | from, | |
pg_wchar * | to, | |||
int | len | |||
) | [static] |
Definition at line 748 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.
{ int cnt = 0; while (len > 0 && *from) { if (IS_LC1(*from) && len >= 2) { *to = *from++ << 16; *to |= *from++; len -= 2; } else if (IS_LCPRV1(*from) && len >= 3) { from++; *to = *from++ << 16; *to |= *from++; len -= 3; } else if (IS_LC2(*from) && len >= 3) { *to = *from++ << 16; *to |= *from++ << 8; *to |= *from++; len -= 3; } else if (IS_LCPRV2(*from) && len >= 4) { from++; *to = *from++ << 16; *to |= *from++ << 8; *to |= *from++; len -= 4; } else { /* assume ASCII */ *to = (unsigned char) *from++; len--; } to++; cnt++; } *to = 0; return cnt; }
static int pg_mule_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 884 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.
{ int len; /* * Note: it's not really appropriate to assume that all multibyte charsets * are double-wide on screen. But this seems an okay approximation for * the MULE charsets we currently support. */ if (IS_LC1(*s)) len = 1; else if (IS_LCPRV1(*s)) len = 1; else if (IS_LC2(*s)) len = 2; else if (IS_LCPRV2(*s)) len = 2; else len = 1; /* assume ASCII */ return len; }
int pg_mule_mblen | ( | const unsigned char * | s | ) |
Definition at line 866 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1, and IS_LCPRV2.
Referenced by pg_mic_mblen(), and pg_mule_verifier().
static int pg_mule_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1294 of file wchar.c.
References IS_HIGHBIT_SET, and pg_mule_mblen().
{ int l, mbl; unsigned char c; l = mbl = pg_mule_mblen(s); if (len < l) return -1; while (--l > 0) { c = *++s; if (!IS_HIGHBIT_SET(c)) return -1; } return mbl; }
static int pg_sjis_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 978 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{ int len; if (*s >= 0xa1 && *s <= 0xdf) len = 1; /* 1 byte kana? */ else if (IS_HIGHBIT_SET(*s)) len = 2; /* kanji? */ else len = pg_ascii_dsplen(s); /* should be ASCII */ return len; }
static int pg_sjis_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 964 of file wchar.c.
References IS_HIGHBIT_SET.
Referenced by pg_sjis_verifier().
{ int len; if (*s >= 0xa1 && *s <= 0xdf) len = 1; /* 1 byte kana? */ else if (IS_HIGHBIT_SET(*s)) len = 2; /* kanji? */ else len = 1; /* should be ASCII */ return len; }
static int pg_sjis_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1321 of file wchar.c.
References ISSJISHEAD, ISSJISTAIL, and pg_sjis_mblen().
{ int l, mbl; unsigned char c1, c2; l = mbl = pg_sjis_mblen(s); if (len < l) return -1; if (l == 1) /* pg_sjis_mblen already verified it */ return mbl; c1 = *s++; c2 = *s; if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2)) return -1; return mbl; }
static int pg_uhc_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1061 of file wchar.c.
References IS_HIGHBIT_SET, and pg_ascii_dsplen().
{ int len; if (IS_HIGHBIT_SET(*s)) len = 2; /* 2byte? */ else len = pg_ascii_dsplen(s); /* should be ASCII */ return len; }
static int pg_uhc_mblen | ( | const unsigned char * | s | ) | [static] |
Definition at line 1049 of file wchar.c.
References IS_HIGHBIT_SET.
Referenced by pg_uhc_verifier().
{ int len; if (IS_HIGHBIT_SET(*s)) len = 2; /* 2byte? */ else len = 1; /* should be ASCII */ return len; }
static int pg_uhc_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1384 of file wchar.c.
References pg_uhc_mblen().
{ int l, mbl; l = mbl = pg_uhc_mblen(s); if (len < l) return -1; while (--l > 0) { if (*++s == '\0') return -1; } return mbl; }
static int pg_utf2wchar_with_len | ( | const unsigned char * | from, | |
pg_wchar * | to, | |||
int | len | |||
) | [static] |
Definition at line 411 of file wchar.c.
{ int cnt = 0; uint32 c1, c2, c3, c4; while (len > 0 && *from) { if ((*from & 0x80) == 0) { *to = *from++; len--; } else if ((*from & 0xe0) == 0xc0) { if (len < 2) break; /* drop trailing incomplete char */ c1 = *from++ & 0x1f; c2 = *from++ & 0x3f; *to = (c1 << 6) | c2; len -= 2; } else if ((*from & 0xf0) == 0xe0) { if (len < 3) break; /* drop trailing incomplete char */ c1 = *from++ & 0x0f; c2 = *from++ & 0x3f; c3 = *from++ & 0x3f; *to = (c1 << 12) | (c2 << 6) | c3; len -= 3; } else if ((*from & 0xf8) == 0xf0) { if (len < 4) break; /* drop trailing incomplete char */ c1 = *from++ & 0x07; c2 = *from++ & 0x3f; c3 = *from++ & 0x3f; c4 = *from++ & 0x3f; *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4; len -= 4; } else { /* treat a bogus char as length 1; not ours to raise error */ *to = *from++; len--; } to++; cnt++; } *to = 0; return cnt; }
static bool pg_utf8_increment | ( | unsigned char * | charptr, | |
int | length | |||
) | [static] |
Definition at line 1555 of file wchar.c.
{ unsigned char a; unsigned char limit; switch (length) { default: /* reject lengths 5 and 6 for now */ return false; case 4: a = charptr[3]; if (a < 0xBF) { charptr[3]++; break; } /* FALL THRU */ case 3: a = charptr[2]; if (a < 0xBF) { charptr[2]++; break; } /* FALL THRU */ case 2: a = charptr[1]; switch (*charptr) { case 0xED: limit = 0x9F; break; case 0xF4: limit = 0x8F; break; default: limit = 0xBF; break; } if (a < limit) { charptr[1]++; break; } /* FALL THRU */ case 1: a = *charptr; if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4) return false; charptr[0]++; break; } return true; }
bool pg_utf8_islegal | ( | const unsigned char * | source, | |
int | length | |||
) |
Definition at line 1452 of file wchar.c.
Referenced by pg_utf8_verifier(), utf8_to_iso8859_1(), and UtfToLocal().
{ unsigned char a; switch (length) { default: /* reject lengths 5 and 6 for now */ return false; case 4: a = source[3]; if (a < 0x80 || a > 0xBF) return false; /* FALL THRU */ case 3: a = source[2]; if (a < 0x80 || a > 0xBF) return false; /* FALL THRU */ case 2: a = source[1]; switch (*source) { case 0xE0: if (a < 0xA0 || a > 0xBF) return false; break; case 0xED: if (a < 0x80 || a > 0x9F) return false; break; case 0xF0: if (a < 0x90 || a > 0xBF) return false; break; case 0xF4: if (a < 0x80 || a > 0x8F) return false; break; default: if (a < 0x80 || a > 0xBF) return false; break; } /* FALL THRU */ case 1: a = *source; if (a >= 0x80 && a < 0xC2) return false; if (a > 0xF4) return false; break; } return true; }
static int pg_utf8_verifier | ( | const unsigned char * | s, | |
int | len | |||
) | [static] |
Definition at line 1424 of file wchar.c.
References pg_utf8_islegal(), and pg_utf_mblen().
{ int l = pg_utf_mblen(s); if (len < l) return -1; if (!pg_utf8_islegal(s, l)) return -1; return l; }
static int pg_utf_dsplen | ( | const unsigned char * | s | ) | [static] |
Definition at line 736 of file wchar.c.
References ucs_wcwidth(), and utf8_to_unicode().
{ return ucs_wcwidth(utf8_to_unicode(s)); }
int pg_utf_mblen | ( | const unsigned char * | s | ) |
Definition at line 541 of file wchar.c.
Referenced by json_lex_string(), pg_utf8_verifier(), pg_wchar2utf_with_len(), utf8_to_iso8859_1(), and UtfToLocal().
{ int len; if ((*s & 0x80) == 0) len = 1; else if ((*s & 0xe0) == 0xc0) len = 2; else if ((*s & 0xf0) == 0xe0) len = 3; else if ((*s & 0xf8) == 0xf0) len = 4; #ifdef NOT_USED else if ((*s & 0xfc) == 0xf8) len = 5; else if ((*s & 0xfe) == 0xfc) len = 6; #endif else len = 1; return len; }
Definition at line 1880 of file wchar.c.
References pg_verify_mbstr_len().
Referenced by pg_any_to_server(), and t_readline().
{ return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0; }
int pg_verify_mbstr_len | ( | int | encoding, | |
const char * | mbstr, | |||
int | len, | |||
bool | noError | |||
) |
Definition at line 1897 of file wchar.c.
References Assert, IS_HIGHBIT_SET, pg_wchar_tbl::mbverify, NULL, pg_encoding_max_length(), PG_VALID_ENCODING, and report_invalid_encoding().
Referenced by length_in_encoding(), pg_convert(), pg_verify_mbstr(), pg_verifymbstr(), read_extension_script_file(), and utf_u2e().
{ mbverifier mbverify; int mb_len; Assert(PG_VALID_ENCODING(encoding)); /* * In single-byte encodings, we need only reject nulls (\0). */ if (pg_encoding_max_length(encoding) <= 1) { const char *nullpos = memchr(mbstr, 0, len); if (nullpos == NULL) return len; if (noError) return -1; report_invalid_encoding(encoding, nullpos, 1); } /* fetch function pointer just once */ mbverify = pg_wchar_table[encoding].mbverify; mb_len = 0; while (len > 0) { int l; /* fast path for ASCII-subset characters */ if (!IS_HIGHBIT_SET(*mbstr)) { if (*mbstr != '\0') { mb_len++; mbstr++; len--; continue; } if (noError) return -1; report_invalid_encoding(encoding, mbstr, len); } l = (*mbverify) ((const unsigned char *) mbstr, len); if (l < 0) { if (noError) return -1; report_invalid_encoding(encoding, mbstr, len); } mbstr += l; len -= l; mb_len++; } return mb_len; }
Definition at line 1869 of file wchar.c.
References GetDatabaseEncoding(), and pg_verify_mbstr_len().
Referenced by CopyReadAttributesText(), plperl_spi_exec(), plperl_spi_prepare(), plperl_spi_query(), PLy_cursor_query(), PLy_output(), PLy_spi_execute_query(), PLy_spi_prepare(), PLyObject_ToDatum(), read_text_file(), and spg_text_leaf_consistent().
{ return pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0; }
static int pg_wchar2euc_with_len | ( | const pg_wchar * | from, | |
unsigned char * | to, | |||
int | len | |||
) | [static] |
Definition at line 347 of file wchar.c.
{ int cnt = 0; while (len > 0 && *from) { unsigned char c; if ((c = (*from >> 24))) { *to++ = c; *to++ = (*from >> 16) & 0xff; *to++ = (*from >> 8) & 0xff; *to++ = *from & 0xff; cnt += 4; } else if ((c = (*from >> 16))) { *to++ = c; *to++ = (*from >> 8) & 0xff; *to++ = *from & 0xff; cnt += 3; } else if ((c = (*from >> 8))) { *to++ = c; *to++ = *from & 0xff; cnt += 2; } else { *to++ = *from; cnt++; } from++; len--; } *to = 0; return cnt; }
static int pg_wchar2mule_with_len | ( | const pg_wchar * | from, | |
unsigned char * | to, | |||
int | len | |||
) | [static] |
Definition at line 801 of file wchar.c.
References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, and IS_LCPRV2_B_RANGE.
{ int cnt = 0; while (len > 0 && *from) { unsigned char lb; lb = (*from >> 16) & 0xff; if (IS_LC1(lb)) { *to++ = lb; *to++ = *from & 0xff; cnt += 2; } else if (IS_LC2(lb)) { *to++ = lb; *to++ = (*from >> 8) & 0xff; *to++ = *from & 0xff; cnt += 3; } else if (IS_LCPRV1_A_RANGE(lb)) { *to++ = LCPRV1_A; *to++ = lb; *to++ = *from & 0xff; cnt += 3; } else if (IS_LCPRV1_B_RANGE(lb)) { *to++ = LCPRV1_B; *to++ = lb; *to++ = *from & 0xff; cnt += 3; } else if (IS_LCPRV2_A_RANGE(lb)) { *to++ = LCPRV2_A; *to++ = lb; *to++ = (*from >> 8) & 0xff; *to++ = *from & 0xff; cnt += 4; } else if (IS_LCPRV2_B_RANGE(lb)) { *to++ = LCPRV2_B; *to++ = lb; *to++ = (*from >> 8) & 0xff; *to++ = *from & 0xff; cnt += 4; } else { *to++ = *from & 0xff; cnt += 1; } from++; len--; } *to = 0; return cnt; }
static int pg_wchar2single_with_len | ( | const pg_wchar * | from, | |
unsigned char * | to, | |||
int | len | |||
) | [static] |
static int pg_wchar2utf_with_len | ( | const pg_wchar * | from, | |
unsigned char * | to, | |||
int | len | |||
) | [static] |
Definition at line 510 of file wchar.c.
References pg_utf_mblen(), and unicode_to_utf8().
{ int cnt = 0; while (len > 0 && *from) { int char_len; unicode_to_utf8(*from, to); char_len = pg_utf_mblen(to); cnt += char_len; to += char_len; from++; len--; } *to = 0; return cnt; }
void report_invalid_encoding | ( | int | encoding, | |
const char * | mbstr, | |||
int | len | |||
) |
Definition at line 1998 of file wchar.c.
References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, and pg_encoding_mblen().
Referenced by big52mic(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_ascii2mic(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), utf8_to_iso8859_1(), and UtfToLocal().
{ int l = pg_encoding_mblen(encoding, mbstr); char buf[8 * 5 + 1]; char *p = buf; int j, jlimit; jlimit = Min(l, len); jlimit = Min(jlimit, 8); /* prevent buffer overrun */ for (j = 0; j < jlimit; j++) { p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]); if (j < jlimit - 1) p += sprintf(p, " "); } ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("invalid byte sequence for encoding \"%s\": %s", pg_enc2name_tbl[encoding].name, buf))); }
void report_untranslatable_char | ( | int | src_encoding, | |
int | dest_encoding, | |||
const char * | mbstr, | |||
int | len | |||
) |
Definition at line 2030 of file wchar.c.
References buf, ereport, errcode(), errmsg(), ERROR, Min, name, pg_enc2name_tbl, and pg_encoding_mblen().
Referenced by big52mic(), latin2mic_with_table(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_mic2ascii(), utf8_to_iso8859_1(), and UtfToLocal().
{ int l = pg_encoding_mblen(src_encoding, mbstr); char buf[8 * 5 + 1]; char *p = buf; int j, jlimit; jlimit = Min(l, len); jlimit = Min(jlimit, 8); /* prevent buffer overrun */ for (j = 0; j < jlimit; j++) { p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]); if (j < jlimit - 1) p += sprintf(p, " "); } ereport(ERROR, (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"", buf, pg_enc2name_tbl[src_encoding].name, pg_enc2name_tbl[dest_encoding].name))); }
static int ucs_wcwidth | ( | pg_wchar | ucs | ) | [static] |
Definition at line 637 of file wchar.c.
References mbbisearch().
Referenced by pg_utf_dsplen().
{ /* sorted list of non-overlapping intervals of non-spacing characters */ static const struct mbinterval combining[] = { {0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486}, {0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9}, {0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, {0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670}, {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C}, {0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954}, {0x0962, 0x0963}, {0x0981, 0x0981}, {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD}, {0x09E2, 0x09E3}, {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71}, {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, {0x0B01, 0x0B01}, {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43}, {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82}, {0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD}, {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD}, {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F90, 0x0F97}, {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032}, {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059}, {0x1160, 0x11FF}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6}, {0x17C9, 0x17D3}, {0x180B, 0x180E}, {0x18A9, 0x18A9}, {0x200B, 0x200F}, {0x202A, 0x202E}, {0x206A, 0x206F}, {0x20D0, 0x20E3}, {0x302A, 0x302F}, {0x3099, 0x309A}, {0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB} }; /* test for 8-bit control characters */ if (ucs == 0) return 0; if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff) return -1; /* binary search in table of non-spacing characters */ if (mbbisearch(ucs, combining, sizeof(combining) / sizeof(struct mbinterval) - 1)) return 0; /* * if we arrive here, ucs is not a combining or C0/C1 control character */ return 1 + (ucs >= 0x1100 && (ucs <= 0x115f || /* Hangul Jamo init. consonants */ (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a && ucs != 0x303f) || /* CJK ... Yi */ (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility * Ideographs */ (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */ (ucs >= 0xffe0 && ucs <= 0xffe6) || (ucs >= 0x20000 && ucs <= 0x2ffff))); }
unsigned char* unicode_to_utf8 | ( | pg_wchar | c, | |
unsigned char * | utf8string | |||
) |
Definition at line 475 of file wchar.c.
Referenced by json_lex_string(), pg_wchar2utf_with_len(), and unicode_to_sqlchar().
{ if (c <= 0x7F) { utf8string[0] = c; } else if (c <= 0x7FF) { utf8string[0] = 0xC0 | ((c >> 6) & 0x1F); utf8string[1] = 0x80 | (c & 0x3F); } else if (c <= 0xFFFF) { utf8string[0] = 0xE0 | ((c >> 12) & 0x0F); utf8string[1] = 0x80 | ((c >> 6) & 0x3F); utf8string[2] = 0x80 | (c & 0x3F); } else { utf8string[0] = 0xF0 | ((c >> 18) & 0x07); utf8string[1] = 0x80 | ((c >> 12) & 0x3F); utf8string[2] = 0x80 | ((c >> 6) & 0x3F); utf8string[3] = 0x80 | (c & 0x3F); } return utf8string; }
pg_wchar utf8_to_unicode | ( | const unsigned char * | c | ) |
Definition at line 714 of file wchar.c.
Referenced by pg_utf_dsplen(), and pg_wcsformat().
{ if ((*c & 0x80) == 0) return (pg_wchar) c[0]; else if ((*c & 0xe0) == 0xc0) return (pg_wchar) (((c[0] & 0x1f) << 6) | (c[1] & 0x3f)); else if ((*c & 0xf0) == 0xe0) return (pg_wchar) (((c[0] & 0x0f) << 12) | ((c[1] & 0x3f) << 6) | (c[2] & 0x3f)); else if ((*c & 0xf8) == 0xf0) return (pg_wchar) (((c[0] & 0x07) << 18) | ((c[1] & 0x3f) << 12) | ((c[2] & 0x3f) << 6) | (c[3] & 0x3f)); else /* that is an invalid code on purpose */ return 0xffffffff; }
Definition at line 1723 of file wchar.c.
Referenced by pg_dsplen(), pg_encoding_max_length_sql(), pg_encoding_mb2wchar_with_len(), pg_encoding_mbcliplen(), pg_encoding_wchar2mb_with_len(), pg_generic_charinc(), pg_mb2wchar(), pg_mb2wchar_with_len(), pg_mblen(), pg_wchar2mb(), and pg_wchar2mb_with_len().