00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef FRONTEND
00017 #include "postgres.h"
00018 #else
00019 #include "postgres_fe.h"
00020 #endif
00021
00022 #include <locale.h>
00023 #ifdef HAVE_LANGINFO_H
00024 #include <langinfo.h>
00025 #endif
00026
00027 #include "mb/pg_wchar.h"
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 struct encoding_match
00041 {
00042 enum pg_enc pg_enc_code;
00043 const char *system_enc_name;
00044 };
00045
00046 static const struct encoding_match encoding_match_list[] = {
00047 {PG_EUC_JP, "EUC-JP"},
00048 {PG_EUC_JP, "eucJP"},
00049 {PG_EUC_JP, "IBM-eucJP"},
00050 {PG_EUC_JP, "sdeckanji"},
00051 {PG_EUC_JP, "CP20932"},
00052
00053 {PG_EUC_CN, "EUC-CN"},
00054 {PG_EUC_CN, "eucCN"},
00055 {PG_EUC_CN, "IBM-eucCN"},
00056 {PG_EUC_CN, "GB2312"},
00057 {PG_EUC_CN, "dechanzi"},
00058 {PG_EUC_CN, "CP20936"},
00059
00060 {PG_EUC_KR, "EUC-KR"},
00061 {PG_EUC_KR, "eucKR"},
00062 {PG_EUC_KR, "IBM-eucKR"},
00063 {PG_EUC_KR, "deckorean"},
00064 {PG_EUC_KR, "5601"},
00065 {PG_EUC_KR, "CP51949"},
00066
00067 {PG_EUC_TW, "EUC-TW"},
00068 {PG_EUC_TW, "eucTW"},
00069 {PG_EUC_TW, "IBM-eucTW"},
00070 {PG_EUC_TW, "cns11643"},
00071
00072
00073 {PG_UTF8, "UTF-8"},
00074 {PG_UTF8, "utf8"},
00075 {PG_UTF8, "CP65001"},
00076
00077 {PG_LATIN1, "ISO-8859-1"},
00078 {PG_LATIN1, "ISO8859-1"},
00079 {PG_LATIN1, "iso88591"},
00080 {PG_LATIN1, "CP28591"},
00081
00082 {PG_LATIN2, "ISO-8859-2"},
00083 {PG_LATIN2, "ISO8859-2"},
00084 {PG_LATIN2, "iso88592"},
00085 {PG_LATIN2, "CP28592"},
00086
00087 {PG_LATIN3, "ISO-8859-3"},
00088 {PG_LATIN3, "ISO8859-3"},
00089 {PG_LATIN3, "iso88593"},
00090 {PG_LATIN3, "CP28593"},
00091
00092 {PG_LATIN4, "ISO-8859-4"},
00093 {PG_LATIN4, "ISO8859-4"},
00094 {PG_LATIN4, "iso88594"},
00095 {PG_LATIN4, "CP28594"},
00096
00097 {PG_LATIN5, "ISO-8859-9"},
00098 {PG_LATIN5, "ISO8859-9"},
00099 {PG_LATIN5, "iso88599"},
00100 {PG_LATIN5, "CP28599"},
00101
00102 {PG_LATIN6, "ISO-8859-10"},
00103 {PG_LATIN6, "ISO8859-10"},
00104 {PG_LATIN6, "iso885910"},
00105
00106 {PG_LATIN7, "ISO-8859-13"},
00107 {PG_LATIN7, "ISO8859-13"},
00108 {PG_LATIN7, "iso885913"},
00109
00110 {PG_LATIN8, "ISO-8859-14"},
00111 {PG_LATIN8, "ISO8859-14"},
00112 {PG_LATIN8, "iso885914"},
00113
00114 {PG_LATIN9, "ISO-8859-15"},
00115 {PG_LATIN9, "ISO8859-15"},
00116 {PG_LATIN9, "iso885915"},
00117 {PG_LATIN9, "CP28605"},
00118
00119 {PG_LATIN10, "ISO-8859-16"},
00120 {PG_LATIN10, "ISO8859-16"},
00121 {PG_LATIN10, "iso885916"},
00122
00123 {PG_KOI8R, "KOI8-R"},
00124 {PG_KOI8R, "CP20866"},
00125
00126 {PG_KOI8U, "KOI8-U"},
00127 {PG_KOI8U, "CP21866"},
00128
00129 {PG_WIN866, "CP866"},
00130 {PG_WIN874, "CP874"},
00131 {PG_WIN1250, "CP1250"},
00132 {PG_WIN1251, "CP1251"},
00133 {PG_WIN1251, "ansi-1251"},
00134 {PG_WIN1252, "CP1252"},
00135 {PG_WIN1253, "CP1253"},
00136 {PG_WIN1254, "CP1254"},
00137 {PG_WIN1255, "CP1255"},
00138 {PG_WIN1256, "CP1256"},
00139 {PG_WIN1257, "CP1257"},
00140 {PG_WIN1258, "CP1258"},
00141
00142 {PG_ISO_8859_5, "ISO-8859-5"},
00143 {PG_ISO_8859_5, "ISO8859-5"},
00144 {PG_ISO_8859_5, "iso88595"},
00145 {PG_ISO_8859_5, "CP28595"},
00146
00147 {PG_ISO_8859_6, "ISO-8859-6"},
00148 {PG_ISO_8859_6, "ISO8859-6"},
00149 {PG_ISO_8859_6, "iso88596"},
00150 {PG_ISO_8859_6, "CP28596"},
00151
00152 {PG_ISO_8859_7, "ISO-8859-7"},
00153 {PG_ISO_8859_7, "ISO8859-7"},
00154 {PG_ISO_8859_7, "iso88597"},
00155 {PG_ISO_8859_7, "CP28597"},
00156
00157 {PG_ISO_8859_8, "ISO-8859-8"},
00158 {PG_ISO_8859_8, "ISO8859-8"},
00159 {PG_ISO_8859_8, "iso88598"},
00160 {PG_ISO_8859_8, "CP28598"},
00161
00162 {PG_SJIS, "SJIS"},
00163 {PG_SJIS, "PCK"},
00164 {PG_SJIS, "CP932"},
00165
00166 {PG_BIG5, "BIG5"},
00167 {PG_BIG5, "BIG5HKSCS"},
00168 {PG_BIG5, "Big5-HKSCS"},
00169 {PG_BIG5, "CP950"},
00170
00171 {PG_GBK, "GBK"},
00172 {PG_GBK, "CP936"},
00173
00174 {PG_UHC, "UHC"},
00175 {PG_UHC, "CP949"},
00176
00177 {PG_JOHAB, "JOHAB"},
00178 {PG_JOHAB, "CP1361"},
00179
00180 {PG_GB18030, "GB18030"},
00181 {PG_GB18030, "CP54936"},
00182
00183 {PG_SHIFT_JIS_2004, "SJIS_2004"},
00184
00185 {PG_SQL_ASCII, "US-ASCII"},
00186
00187 {PG_SQL_ASCII, NULL}
00188 };
00189
00190 #ifdef WIN32
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200 static char *
00201 win32_langinfo(const char *ctype)
00202 {
00203 char *r = NULL;
00204
00205 #if (_MSC_VER >= 1700)
00206 _locale_t loct = NULL;
00207
00208 loct = _create_locale(LC_CTYPE, ctype);
00209 if (loct != NULL)
00210 {
00211 r = malloc(16);
00212 if (r != NULL)
00213 sprintf(r, "CP%u", loct->locinfo->lc_codepage);
00214 _free_locale(loct);
00215 }
00216 #else
00217 char *codepage;
00218
00219
00220
00221
00222
00223 codepage = strrchr(ctype, '.');
00224 if (codepage != NULL)
00225 {
00226 int ln;
00227
00228 codepage++;
00229 ln = strlen(codepage);
00230 r = malloc(ln + 3);
00231 if (r != NULL)
00232 sprintf(r, "CP%s", codepage);
00233 }
00234 #endif
00235
00236 return r;
00237 }
00238 #endif
00239
00240 #if (defined(HAVE_LANGINFO_H) && defined(CODESET)) || defined(WIN32)
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252 int
00253 pg_get_encoding_from_locale(const char *ctype, bool write_message)
00254 {
00255 char *sys;
00256 int i;
00257
00258
00259 if (ctype)
00260 {
00261 char *save;
00262 char *name;
00263
00264
00265 if (pg_strcasecmp(ctype, "C") == 0 ||
00266 pg_strcasecmp(ctype, "POSIX") == 0)
00267 return PG_SQL_ASCII;
00268
00269 save = setlocale(LC_CTYPE, NULL);
00270 if (!save)
00271 return -1;
00272
00273 save = strdup(save);
00274 if (!save)
00275 return -1;
00276
00277 name = setlocale(LC_CTYPE, ctype);
00278 if (!name)
00279 {
00280 free(save);
00281 return -1;
00282 }
00283
00284 #ifndef WIN32
00285 sys = nl_langinfo(CODESET);
00286 if (sys)
00287 sys = strdup(sys);
00288 #else
00289 sys = win32_langinfo(name);
00290 #endif
00291
00292 setlocale(LC_CTYPE, save);
00293 free(save);
00294 }
00295 else
00296 {
00297
00298 ctype = setlocale(LC_CTYPE, NULL);
00299 if (!ctype)
00300 return -1;
00301
00302
00303 if (pg_strcasecmp(ctype, "C") == 0 ||
00304 pg_strcasecmp(ctype, "POSIX") == 0)
00305 return PG_SQL_ASCII;
00306
00307 #ifndef WIN32
00308 sys = nl_langinfo(CODESET);
00309 if (sys)
00310 sys = strdup(sys);
00311 #else
00312 sys = win32_langinfo(ctype);
00313 #endif
00314 }
00315
00316 if (!sys)
00317 return -1;
00318
00319
00320 for (i = 0; encoding_match_list[i].system_enc_name; i++)
00321 {
00322 if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
00323 {
00324 free(sys);
00325 return encoding_match_list[i].pg_enc_code;
00326 }
00327 }
00328
00329
00330
00331 #ifdef __darwin__
00332
00333
00334
00335
00336
00337 if (strlen(sys) == 0)
00338 {
00339 free(sys);
00340 return PG_UTF8;
00341 }
00342 #endif
00343
00344
00345
00346
00347
00348 if (write_message)
00349 {
00350 #ifdef FRONTEND
00351 fprintf(stderr, _("could not determine encoding for locale \"%s\": codeset is \"%s\""),
00352 ctype, sys);
00353
00354 fputc('\n', stderr);
00355 #else
00356 ereport(WARNING,
00357 (errmsg("could not determine encoding for locale \"%s\": codeset is \"%s\"",
00358 ctype, sys),
00359 errdetail("Please report this to <[email protected]>.")));
00360 #endif
00361 }
00362
00363 free(sys);
00364 return -1;
00365 }
00366 #else
00367
00368
00369
00370
00371
00372
00373
00374
00375 int
00376 pg_get_encoding_from_locale(const char *ctype, bool write_message)
00377 {
00378 return PG_SQL_ASCII;
00379 }
00380
00381 #endif