Header And Logo

PostgreSQL
| The world's most advanced open source database.

pg_locale.c

Go to the documentation of this file.
00001 /*-----------------------------------------------------------------------
00002  *
00003  * PostgreSQL locale utilities
00004  *
00005  * Portions Copyright (c) 2002-2013, PostgreSQL Global Development Group
00006  *
00007  * src/backend/utils/adt/pg_locale.c
00008  *
00009  *-----------------------------------------------------------------------
00010  */
00011 
00012 /*----------
00013  * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
00014  * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
00015  * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
00016  * toupper(), etc. are always in the same fixed locale.
00017  *
00018  * LC_MESSAGES is settable at run time and will take effect
00019  * immediately.
00020  *
00021  * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
00022  * settable at run-time.  However, we don't actually set those locale
00023  * categories permanently.  This would have bizarre effects like no
00024  * longer accepting standard floating-point literals in some locales.
00025  * Instead, we only set the locales briefly when needed, cache the
00026  * required information obtained from localeconv(), and set them back.
00027  * The cached information is only used by the formatting functions
00028  * (to_char, etc.) and the money type.  For the user, this should all be
00029  * transparent.
00030  *
00031  * !!! NOW HEAR THIS !!!
00032  *
00033  * We've been bitten repeatedly by this bug, so let's try to keep it in
00034  * mind in future: on some platforms, the locale functions return pointers
00035  * to static data that will be overwritten by any later locale function.
00036  * Thus, for example, the obvious-looking sequence
00037  *          save = setlocale(category, NULL);
00038  *          if (!setlocale(category, value))
00039  *              fail = true;
00040  *          setlocale(category, save);
00041  * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
00042  * will change the memory save is pointing at.  To do this sort of thing
00043  * safely, you *must* pstrdup what setlocale returns the first time.
00044  *
00045  * FYI, The Open Group locale standard is defined here:
00046  *
00047  *  http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
00048  *----------
00049  */
00050 
00051 
00052 #include "postgres.h"
00053 
00054 #include <locale.h>
00055 #include <time.h>
00056 
00057 #include "access/htup_details.h"
00058 #include "catalog/pg_collation.h"
00059 #include "catalog/pg_control.h"
00060 #include "mb/pg_wchar.h"
00061 #include "utils/hsearch.h"
00062 #include "utils/memutils.h"
00063 #include "utils/pg_locale.h"
00064 #include "utils/syscache.h"
00065 
00066 #ifdef WIN32
00067 /*
00068  * This Windows file defines StrNCpy. We don't need it here, so we undefine
00069  * it to keep the compiler quiet, and undefine it again after the file is
00070  * included, so we don't accidentally use theirs.
00071  */
00072 #undef StrNCpy
00073 #include <shlwapi.h>
00074 #ifdef StrNCpy
00075 #undef STrNCpy
00076 #endif
00077 #endif
00078 
00079 #define     MAX_L10N_DATA       80
00080 
00081 
00082 /* GUC settings */
00083 char       *locale_messages;
00084 char       *locale_monetary;
00085 char       *locale_numeric;
00086 char       *locale_time;
00087 
00088 /* lc_time localization cache */
00089 char       *localized_abbrev_days[7];
00090 char       *localized_full_days[7];
00091 char       *localized_abbrev_months[12];
00092 char       *localized_full_months[12];
00093 
00094 /* indicates whether locale information cache is valid */
00095 static bool CurrentLocaleConvValid = false;
00096 static bool CurrentLCTimeValid = false;
00097 
00098 /* Environment variable storage area */
00099 
00100 #define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
00101 
00102 static char lc_collate_envbuf[LC_ENV_BUFSIZE];
00103 static char lc_ctype_envbuf[LC_ENV_BUFSIZE];
00104 
00105 #ifdef LC_MESSAGES
00106 static char lc_messages_envbuf[LC_ENV_BUFSIZE];
00107 #endif
00108 static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
00109 static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
00110 static char lc_time_envbuf[LC_ENV_BUFSIZE];
00111 
00112 /* Cache for collation-related knowledge */
00113 
00114 typedef struct
00115 {
00116     Oid         collid;         /* hash key: pg_collation OID */
00117     bool        collate_is_c;   /* is collation's LC_COLLATE C? */
00118     bool        ctype_is_c;     /* is collation's LC_CTYPE C? */
00119     bool        flags_valid;    /* true if above flags are valid */
00120     pg_locale_t locale;         /* locale_t struct, or 0 if not valid */
00121 } collation_cache_entry;
00122 
00123 static HTAB *collation_cache = NULL;
00124 
00125 
00126 #if defined(WIN32) && defined(LC_MESSAGES)
00127 static char *IsoLocaleName(const char *);       /* MSVC specific */
00128 #endif
00129 
00130 
00131 /*
00132  * pg_perm_setlocale
00133  *
00134  * This is identical to the libc function setlocale(), with the addition
00135  * that if the operation is successful, the corresponding LC_XXX environment
00136  * variable is set to match.  By setting the environment variable, we ensure
00137  * that any subsequent use of setlocale(..., "") will preserve the settings
00138  * made through this routine.  Of course, LC_ALL must also be unset to fully
00139  * ensure that, but that has to be done elsewhere after all the individual
00140  * LC_XXX variables have been set correctly.  (Thank you Perl for making this
00141  * kluge necessary.)
00142  */
00143 char *
00144 pg_perm_setlocale(int category, const char *locale)
00145 {
00146     char       *result;
00147     const char *envvar;
00148     char       *envbuf;
00149 
00150 #ifndef WIN32
00151     result = setlocale(category, locale);
00152 #else
00153 
00154     /*
00155      * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
00156      * the given value is good and set it in the environment variables. We
00157      * must ignore attempts to set to "", which means "keep using the old
00158      * environment value".
00159      */
00160 #ifdef LC_MESSAGES
00161     if (category == LC_MESSAGES)
00162     {
00163         result = (char *) locale;
00164         if (locale == NULL || locale[0] == '\0')
00165             return result;
00166     }
00167     else
00168 #endif
00169         result = setlocale(category, locale);
00170 #endif   /* WIN32 */
00171 
00172     if (result == NULL)
00173         return result;          /* fall out immediately on failure */
00174 
00175     switch (category)
00176     {
00177         case LC_COLLATE:
00178             envvar = "LC_COLLATE";
00179             envbuf = lc_collate_envbuf;
00180             break;
00181         case LC_CTYPE:
00182             envvar = "LC_CTYPE";
00183             envbuf = lc_ctype_envbuf;
00184             break;
00185 #ifdef LC_MESSAGES
00186         case LC_MESSAGES:
00187             envvar = "LC_MESSAGES";
00188             envbuf = lc_messages_envbuf;
00189 #ifdef WIN32
00190             result = IsoLocaleName(locale);
00191             if (result == NULL)
00192                 result = (char *) locale;
00193 #endif   /* WIN32 */
00194             break;
00195 #endif   /* LC_MESSAGES */
00196         case LC_MONETARY:
00197             envvar = "LC_MONETARY";
00198             envbuf = lc_monetary_envbuf;
00199             break;
00200         case LC_NUMERIC:
00201             envvar = "LC_NUMERIC";
00202             envbuf = lc_numeric_envbuf;
00203             break;
00204         case LC_TIME:
00205             envvar = "LC_TIME";
00206             envbuf = lc_time_envbuf;
00207             break;
00208         default:
00209             elog(FATAL, "unrecognized LC category: %d", category);
00210             envvar = NULL;      /* keep compiler quiet */
00211             envbuf = NULL;
00212             return NULL;
00213     }
00214 
00215     snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
00216 
00217     if (putenv(envbuf))
00218         return NULL;
00219 
00220     return result;
00221 }
00222 
00223 
00224 /*
00225  * Is the locale name valid for the locale category?
00226  *
00227  * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
00228  * canonical name is stored there.  This is especially useful for figuring out
00229  * what locale name "" means (ie, the server environment value).  (Actually,
00230  * it seems that on most implementations that's the only thing it's good for;
00231  * we could wish that setlocale gave back a canonically spelled version of
00232  * the locale name, but typically it doesn't.)
00233  */
00234 bool
00235 check_locale(int category, const char *locale, char **canonname)
00236 {
00237     char       *save;
00238     char       *res;
00239 
00240     if (canonname)
00241         *canonname = NULL;      /* in case of failure */
00242 
00243     save = setlocale(category, NULL);
00244     if (!save)
00245         return false;           /* won't happen, we hope */
00246 
00247     /* save may be pointing at a modifiable scratch variable, see above. */
00248     save = pstrdup(save);
00249 
00250     /* set the locale with setlocale, to see if it accepts it. */
00251     res = setlocale(category, locale);
00252 
00253     /* save canonical name if requested. */
00254     if (res && canonname)
00255         *canonname = pstrdup(res);
00256 
00257     /* restore old value. */
00258     if (!setlocale(category, save))
00259         elog(WARNING, "failed to restore old locale \"%s\"", save);
00260     pfree(save);
00261 
00262     return (res != NULL);
00263 }
00264 
00265 
00266 /*
00267  * GUC check/assign hooks
00268  *
00269  * For most locale categories, the assign hook doesn't actually set the locale
00270  * permanently, just reset flags so that the next use will cache the
00271  * appropriate values.  (See explanation at the top of this file.)
00272  *
00273  * Note: we accept value = "" as selecting the postmaster's environment
00274  * value, whatever it was (so long as the environment setting is legal).
00275  * This will have been locked down by an earlier call to pg_perm_setlocale.
00276  */
00277 bool
00278 check_locale_monetary(char **newval, void **extra, GucSource source)
00279 {
00280     return check_locale(LC_MONETARY, *newval, NULL);
00281 }
00282 
00283 void
00284 assign_locale_monetary(const char *newval, void *extra)
00285 {
00286     CurrentLocaleConvValid = false;
00287 }
00288 
00289 bool
00290 check_locale_numeric(char **newval, void **extra, GucSource source)
00291 {
00292     return check_locale(LC_NUMERIC, *newval, NULL);
00293 }
00294 
00295 void
00296 assign_locale_numeric(const char *newval, void *extra)
00297 {
00298     CurrentLocaleConvValid = false;
00299 }
00300 
00301 bool
00302 check_locale_time(char **newval, void **extra, GucSource source)
00303 {
00304     return check_locale(LC_TIME, *newval, NULL);
00305 }
00306 
00307 void
00308 assign_locale_time(const char *newval, void *extra)
00309 {
00310     CurrentLCTimeValid = false;
00311 }
00312 
00313 /*
00314  * We allow LC_MESSAGES to actually be set globally.
00315  *
00316  * Note: we normally disallow value = "" because it wouldn't have consistent
00317  * semantics (it'd effectively just use the previous value).  However, this
00318  * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
00319  * not even if the attempted setting fails due to invalid environment value.
00320  * The idea there is just to accept the environment setting *if possible*
00321  * during startup, until we can read the proper value from postgresql.conf.
00322  */
00323 bool
00324 check_locale_messages(char **newval, void **extra, GucSource source)
00325 {
00326     if (**newval == '\0')
00327     {
00328         if (source == PGC_S_DEFAULT)
00329             return true;
00330         else
00331             return false;
00332     }
00333 
00334     /*
00335      * LC_MESSAGES category does not exist everywhere, but accept it anyway
00336      *
00337      * On Windows, we can't even check the value, so accept blindly
00338      */
00339 #if defined(LC_MESSAGES) && !defined(WIN32)
00340     return check_locale(LC_MESSAGES, *newval, NULL);
00341 #else
00342     return true;
00343 #endif
00344 }
00345 
00346 void
00347 assign_locale_messages(const char *newval, void *extra)
00348 {
00349     /*
00350      * LC_MESSAGES category does not exist everywhere, but accept it anyway.
00351      * We ignore failure, as per comment above.
00352      */
00353 #ifdef LC_MESSAGES
00354     (void) pg_perm_setlocale(LC_MESSAGES, newval);
00355 #endif
00356 }
00357 
00358 
00359 /*
00360  * Frees the malloced content of a struct lconv.  (But not the struct
00361  * itself.)
00362  */
00363 static void
00364 free_struct_lconv(struct lconv * s)
00365 {
00366     if (s == NULL)
00367         return;
00368 
00369     if (s->currency_symbol)
00370         free(s->currency_symbol);
00371     if (s->decimal_point)
00372         free(s->decimal_point);
00373     if (s->grouping)
00374         free(s->grouping);
00375     if (s->thousands_sep)
00376         free(s->thousands_sep);
00377     if (s->int_curr_symbol)
00378         free(s->int_curr_symbol);
00379     if (s->mon_decimal_point)
00380         free(s->mon_decimal_point);
00381     if (s->mon_grouping)
00382         free(s->mon_grouping);
00383     if (s->mon_thousands_sep)
00384         free(s->mon_thousands_sep);
00385     if (s->negative_sign)
00386         free(s->negative_sign);
00387     if (s->positive_sign)
00388         free(s->positive_sign);
00389 }
00390 
00391 
00392 /*
00393  * Return a strdup'ed string converted from the specified encoding to the
00394  * database encoding.
00395  */
00396 static char *
00397 db_encoding_strdup(int encoding, const char *str)
00398 {
00399     char       *pstr;
00400     char       *mstr;
00401 
00402     /* convert the string to the database encoding */
00403     pstr = (char *) pg_do_encoding_conversion(
00404                                           (unsigned char *) str, strlen(str),
00405                                             encoding, GetDatabaseEncoding());
00406     mstr = strdup(pstr);
00407     if (pstr != str)
00408         pfree(pstr);
00409 
00410     return mstr;
00411 }
00412 
00413 
00414 /*
00415  * Return the POSIX lconv struct (contains number/money formatting
00416  * information) with locale information for all categories.
00417  */
00418 struct lconv *
00419 PGLC_localeconv(void)
00420 {
00421     static struct lconv CurrentLocaleConv;
00422     struct lconv *extlconv;
00423     char       *save_lc_monetary;
00424     char       *save_lc_numeric;
00425     char       *decimal_point;
00426     char       *grouping;
00427     char       *thousands_sep;
00428     int         encoding;
00429 
00430 #ifdef WIN32
00431     char       *save_lc_ctype;
00432 #endif
00433 
00434     /* Did we do it already? */
00435     if (CurrentLocaleConvValid)
00436         return &CurrentLocaleConv;
00437 
00438     free_struct_lconv(&CurrentLocaleConv);
00439 
00440     /* Save user's values of monetary and numeric locales */
00441     save_lc_monetary = setlocale(LC_MONETARY, NULL);
00442     if (save_lc_monetary)
00443         save_lc_monetary = pstrdup(save_lc_monetary);
00444 
00445     save_lc_numeric = setlocale(LC_NUMERIC, NULL);
00446     if (save_lc_numeric)
00447         save_lc_numeric = pstrdup(save_lc_numeric);
00448 
00449 #ifdef WIN32
00450 
00451     /*
00452      * Ideally, monetary and numeric local symbols could be returned in any
00453      * server encoding.  Unfortunately, the WIN32 API does not allow
00454      * setlocale() to return values in a codepage/CTYPE that uses more than
00455      * two bytes per character, like UTF-8:
00456      *
00457      * http://msdn.microsoft.com/en-us/library/x99tb11d.aspx
00458      *
00459      * Evidently, LC_CTYPE allows us to control the encoding used for strings
00460      * returned by localeconv().  The Open Group standard, mentioned at the
00461      * top of this C file, doesn't explicitly state this.
00462      *
00463      * Therefore, we set LC_CTYPE to match LC_NUMERIC or LC_MONETARY (which
00464      * cannot be UTF8), call localeconv(), and then convert from the
00465      * numeric/monitary LC_CTYPE to the server encoding.  One example use of
00466      * this is for the Euro symbol.
00467      *
00468      * Perhaps someday we will use GetLocaleInfoW() which returns values in
00469      * UTF16 and convert from that.
00470      */
00471 
00472     /* save user's value of ctype locale */
00473     save_lc_ctype = setlocale(LC_CTYPE, NULL);
00474     if (save_lc_ctype)
00475         save_lc_ctype = pstrdup(save_lc_ctype);
00476 
00477     /* use numeric to set the ctype */
00478     setlocale(LC_CTYPE, locale_numeric);
00479 #endif
00480 
00481     /* Get formatting information for numeric */
00482     setlocale(LC_NUMERIC, locale_numeric);
00483     extlconv = localeconv();
00484     encoding = pg_get_encoding_from_locale(locale_numeric, true);
00485 
00486     decimal_point = db_encoding_strdup(encoding, extlconv->decimal_point);
00487     thousands_sep = db_encoding_strdup(encoding, extlconv->thousands_sep);
00488     grouping = strdup(extlconv->grouping);
00489 
00490 #ifdef WIN32
00491     /* use monetary to set the ctype */
00492     setlocale(LC_CTYPE, locale_monetary);
00493 #endif
00494 
00495     /* Get formatting information for monetary */
00496     setlocale(LC_MONETARY, locale_monetary);
00497     extlconv = localeconv();
00498     encoding = pg_get_encoding_from_locale(locale_monetary, true);
00499 
00500     /*
00501      * Must copy all values since restoring internal settings may overwrite
00502      * localeconv()'s results.
00503      */
00504     CurrentLocaleConv = *extlconv;
00505     CurrentLocaleConv.decimal_point = decimal_point;
00506     CurrentLocaleConv.grouping = grouping;
00507     CurrentLocaleConv.thousands_sep = thousands_sep;
00508     CurrentLocaleConv.int_curr_symbol = db_encoding_strdup(encoding, extlconv->int_curr_symbol);
00509     CurrentLocaleConv.currency_symbol = db_encoding_strdup(encoding, extlconv->currency_symbol);
00510     CurrentLocaleConv.mon_decimal_point = db_encoding_strdup(encoding, extlconv->mon_decimal_point);
00511     CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping);
00512     CurrentLocaleConv.mon_thousands_sep = db_encoding_strdup(encoding, extlconv->mon_thousands_sep);
00513     CurrentLocaleConv.negative_sign = db_encoding_strdup(encoding, extlconv->negative_sign);
00514     CurrentLocaleConv.positive_sign = db_encoding_strdup(encoding, extlconv->positive_sign);
00515 
00516     /* Try to restore internal settings */
00517     if (save_lc_monetary)
00518     {
00519         if (!setlocale(LC_MONETARY, save_lc_monetary))
00520             elog(WARNING, "failed to restore old locale");
00521         pfree(save_lc_monetary);
00522     }
00523 
00524     if (save_lc_numeric)
00525     {
00526         if (!setlocale(LC_NUMERIC, save_lc_numeric))
00527             elog(WARNING, "failed to restore old locale");
00528         pfree(save_lc_numeric);
00529     }
00530 
00531 #ifdef WIN32
00532     /* Try to restore internal ctype settings */
00533     if (save_lc_ctype)
00534     {
00535         if (!setlocale(LC_CTYPE, save_lc_ctype))
00536             elog(WARNING, "failed to restore old locale");
00537         pfree(save_lc_ctype);
00538     }
00539 #endif
00540 
00541     CurrentLocaleConvValid = true;
00542     return &CurrentLocaleConv;
00543 }
00544 
00545 #ifdef WIN32
00546 /*
00547  * On WIN32, strftime() returns the encoding in CP_ACP (the default
00548  * operating system codpage for that computer), which is likely different
00549  * from SERVER_ENCODING.  This is especially important in Japanese versions
00550  * of Windows which will use SJIS encoding, which we don't support as a
00551  * server encoding.
00552  *
00553  * So, instead of using strftime(), use wcsftime() to return the value in
00554  * wide characters (internally UTF16) and then convert it to the appropriate
00555  * database encoding.
00556  *
00557  * Note that this only affects the calls to strftime() in this file, which are
00558  * used to get the locale-aware strings. Other parts of the backend use
00559  * pg_strftime(), which isn't locale-aware and does not need to be replaced.
00560  */
00561 static size_t
00562 strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm * tm)
00563 {
00564     size_t      len;
00565     wchar_t     wbuf[MAX_L10N_DATA];
00566     int         encoding;
00567 
00568     encoding = GetDatabaseEncoding();
00569 
00570     len = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
00571     if (len == 0)
00572 
00573         /*
00574          * strftime call failed - return 0 with the contents of dst
00575          * unspecified
00576          */
00577         return 0;
00578 
00579     len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen, NULL, NULL);
00580     if (len == 0)
00581         elog(ERROR,
00582         "could not convert string to UTF-8: error code %lu", GetLastError());
00583 
00584     dst[len] = '\0';
00585     if (encoding != PG_UTF8)
00586     {
00587         char       *convstr =
00588         (char *) pg_do_encoding_conversion((unsigned char *) dst,
00589                                            len, PG_UTF8, encoding);
00590 
00591         if (dst != convstr)
00592         {
00593             strlcpy(dst, convstr, dstlen);
00594             len = strlen(dst);
00595         }
00596     }
00597 
00598     return len;
00599 }
00600 
00601 /* redefine strftime() */
00602 #define strftime(a,b,c,d) strftime_win32(a,b,L##c,d)
00603 #endif   /* WIN32 */
00604 
00605 
00606 /*
00607  * Update the lc_time localization cache variables if needed.
00608  */
00609 void
00610 cache_locale_time(void)
00611 {
00612     char       *save_lc_time;
00613     time_t      timenow;
00614     struct tm  *timeinfo;
00615     char        buf[MAX_L10N_DATA];
00616     char       *ptr;
00617     int         i;
00618 
00619 #ifdef WIN32
00620     char       *save_lc_ctype;
00621 #endif
00622 
00623     /* did we do this already? */
00624     if (CurrentLCTimeValid)
00625         return;
00626 
00627     elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
00628 
00629     /* save user's value of time locale */
00630     save_lc_time = setlocale(LC_TIME, NULL);
00631     if (save_lc_time)
00632         save_lc_time = pstrdup(save_lc_time);
00633 
00634 #ifdef WIN32
00635 
00636     /*
00637      * On WIN32, there is no way to get locale-specific time values in a
00638      * specified locale, like we do for monetary/numeric.  We can only get
00639      * CP_ACP (see strftime_win32) or UTF16.  Therefore, we get UTF16 and
00640      * convert it to the database locale.  However, wcsftime() internally uses
00641      * LC_CTYPE, so we set it here.  See the WIN32 comment near the top of
00642      * PGLC_localeconv().
00643      */
00644 
00645     /* save user's value of ctype locale */
00646     save_lc_ctype = setlocale(LC_CTYPE, NULL);
00647     if (save_lc_ctype)
00648         save_lc_ctype = pstrdup(save_lc_ctype);
00649 
00650     /* use lc_time to set the ctype */
00651     setlocale(LC_CTYPE, locale_time);
00652 #endif
00653 
00654     setlocale(LC_TIME, locale_time);
00655 
00656     timenow = time(NULL);
00657     timeinfo = localtime(&timenow);
00658 
00659     /* localized days */
00660     for (i = 0; i < 7; i++)
00661     {
00662         timeinfo->tm_wday = i;
00663         strftime(buf, MAX_L10N_DATA, "%a", timeinfo);
00664         ptr = MemoryContextStrdup(TopMemoryContext, buf);
00665         if (localized_abbrev_days[i])
00666             pfree(localized_abbrev_days[i]);
00667         localized_abbrev_days[i] = ptr;
00668 
00669         strftime(buf, MAX_L10N_DATA, "%A", timeinfo);
00670         ptr = MemoryContextStrdup(TopMemoryContext, buf);
00671         if (localized_full_days[i])
00672             pfree(localized_full_days[i]);
00673         localized_full_days[i] = ptr;
00674     }
00675 
00676     /* localized months */
00677     for (i = 0; i < 12; i++)
00678     {
00679         timeinfo->tm_mon = i;
00680         timeinfo->tm_mday = 1;  /* make sure we don't have invalid date */
00681         strftime(buf, MAX_L10N_DATA, "%b", timeinfo);
00682         ptr = MemoryContextStrdup(TopMemoryContext, buf);
00683         if (localized_abbrev_months[i])
00684             pfree(localized_abbrev_months[i]);
00685         localized_abbrev_months[i] = ptr;
00686 
00687         strftime(buf, MAX_L10N_DATA, "%B", timeinfo);
00688         ptr = MemoryContextStrdup(TopMemoryContext, buf);
00689         if (localized_full_months[i])
00690             pfree(localized_full_months[i]);
00691         localized_full_months[i] = ptr;
00692     }
00693 
00694     /* try to restore internal settings */
00695     if (save_lc_time)
00696     {
00697         if (!setlocale(LC_TIME, save_lc_time))
00698             elog(WARNING, "failed to restore old locale");
00699         pfree(save_lc_time);
00700     }
00701 
00702 #ifdef WIN32
00703     /* try to restore internal ctype settings */
00704     if (save_lc_ctype)
00705     {
00706         if (!setlocale(LC_CTYPE, save_lc_ctype))
00707             elog(WARNING, "failed to restore old locale");
00708         pfree(save_lc_ctype);
00709     }
00710 #endif
00711 
00712     CurrentLCTimeValid = true;
00713 }
00714 
00715 
00716 #if defined(WIN32) && defined(LC_MESSAGES)
00717 /*
00718  * Convert a Windows setlocale() argument to a Unix-style one.
00719  *
00720  * Regardless of platform, we install message catalogs under a Unix-style
00721  * LL[_CC][.ENCODING][@VARIANT] naming convention.  Only LC_MESSAGES settings
00722  * following that style will elicit localized interface strings.
00723  *
00724  * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
00725  * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
00726  * case-insensitive.  setlocale() returns the fully-qualified form; for
00727  * example, setlocale("thaI") returns "Thai_Thailand.874".  Internally,
00728  * setlocale() and _create_locale() select a "locale identifier"[1] and store
00729  * it in an undocumented _locale_t field.  From that LCID, we can retrieve the
00730  * ISO 639 language and the ISO 3166 country.  Character encoding does not
00731  * matter, because the server and client encodings govern that.
00732  *
00733  * Windows Vista introduced the "locale name" concept[2], closely following
00734  * RFC 4646.  Locale identifiers are now deprecated.  Starting with Visual
00735  * Studio 2012, setlocale() accepts locale names in addition to the strings it
00736  * accepted historically.  It does not standardize them; setlocale("Th-tH")
00737  * returns "Th-tH".  setlocale(category, "") still returns a traditional
00738  * string.  Furthermore, msvcr110.dll changed the undocumented _locale_t
00739  * content to carry locale names instead of locale identifiers.
00740  *
00741  * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol.
00742  * IsoLocaleName() always fails in a MinGW-built postgres.exe, so only
00743  * Unix-style values of the lc_messages GUC can elicit localized messages.  In
00744  * particular, every lc_messages setting that initdb can select automatically
00745  * will yield only C-locale messages.  XXX This could be fixed by running the
00746  * fully-qualified locale name through a lookup table.
00747  *
00748  * This function returns a pointer to a static buffer bearing the converted
00749  * name or NULL if conversion fails.
00750  *
00751  * [1] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373763.aspx
00752  * [2] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373814.aspx
00753  */
00754 static char *
00755 IsoLocaleName(const char *winlocname)
00756 {
00757 #if (_MSC_VER >= 1400)          /* VC8.0 or later */
00758     static char iso_lc_messages[32];
00759     _locale_t   loct = NULL;
00760 
00761     if (pg_strcasecmp("c", winlocname) == 0 ||
00762         pg_strcasecmp("posix", winlocname) == 0)
00763     {
00764         strcpy(iso_lc_messages, "C");
00765         return iso_lc_messages;
00766     }
00767 
00768     loct = _create_locale(LC_CTYPE, winlocname);
00769     if (loct != NULL)
00770     {
00771 #if (_MSC_VER >= 1700)          /* Visual Studio 2012 or later */
00772         size_t      rc;
00773         char       *hyphen;
00774 
00775         /* Locale names use only ASCII, any conversion locale suffices. */
00776         rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
00777                         sizeof(iso_lc_messages), NULL);
00778         _free_locale(loct);
00779         if (rc == -1 || rc == sizeof(iso_lc_messages))
00780             return NULL;
00781 
00782         /*
00783          * Since the message catalogs sit on a case-insensitive filesystem, we
00784          * need not standardize letter case here.  So long as we do not ship
00785          * message catalogs for which it would matter, we also need not
00786          * translate the script/variant portion, e.g. uz-Cyrl-UZ to
00787          * [email protected]  Simply replace the hyphen with an underscore.
00788          *
00789          * Note that the locale name can be less-specific than the value we
00790          * would derive under earlier Visual Studio releases.  For example,
00791          * French_France.1252 yields just "fr".  This does not affect any of
00792          * the country-specific message catalogs available as of this writing
00793          * (pt_BR, zh_CN, zh_TW).
00794          */
00795         hyphen = strchr(iso_lc_messages, '-');
00796         if (hyphen)
00797             *hyphen = '_';
00798 #else
00799         char        isolang[32],
00800                     isocrty[32];
00801         LCID        lcid;
00802 
00803         lcid = loct->locinfo->lc_handle[LC_CTYPE];
00804         if (lcid == 0)
00805             lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
00806         _free_locale(loct);
00807 
00808         if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
00809             return NULL;
00810         if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
00811             return NULL;
00812         snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
00813 #endif
00814         return iso_lc_messages;
00815     }
00816     return NULL;
00817 #else
00818     return NULL;                /* Not supported on this version of msvc/mingw */
00819 #endif   /* _MSC_VER >= 1400 */
00820 }
00821 #endif   /* WIN32 && LC_MESSAGES */
00822 
00823 
00824 /*
00825  * Cache mechanism for collation information.
00826  *
00827  * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
00828  * (or POSIX), so we can optimize a few code paths in various places.
00829  * For the built-in C and POSIX collations, we can know that without even
00830  * doing a cache lookup, but we want to support aliases for C/POSIX too.
00831  * For the "default" collation, there are separate static cache variables,
00832  * since consulting the pg_collation catalog doesn't tell us what we need.
00833  *
00834  * Also, if a pg_locale_t has been requested for a collation, we cache that
00835  * for the life of a backend.
00836  *
00837  * Note that some code relies on the flags not reporting false negatives
00838  * (that is, saying it's not C when it is).  For example, char2wchar()
00839  * could fail if the locale is C, so str_tolower() shouldn't call it
00840  * in that case.
00841  *
00842  * Note that we currently lack any way to flush the cache.  Since we don't
00843  * support ALTER COLLATION, this is OK.  The worst case is that someone
00844  * drops a collation, and a useless cache entry hangs around in existing
00845  * backends.
00846  */
00847 
00848 static collation_cache_entry *
00849 lookup_collation_cache(Oid collation, bool set_flags)
00850 {
00851     collation_cache_entry *cache_entry;
00852     bool        found;
00853 
00854     Assert(OidIsValid(collation));
00855     Assert(collation != DEFAULT_COLLATION_OID);
00856 
00857     if (collation_cache == NULL)
00858     {
00859         /* First time through, initialize the hash table */
00860         HASHCTL     ctl;
00861 
00862         memset(&ctl, 0, sizeof(ctl));
00863         ctl.keysize = sizeof(Oid);
00864         ctl.entrysize = sizeof(collation_cache_entry);
00865         ctl.hash = oid_hash;
00866         collation_cache = hash_create("Collation cache", 100, &ctl,
00867                                       HASH_ELEM | HASH_FUNCTION);
00868     }
00869 
00870     cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
00871     if (!found)
00872     {
00873         /*
00874          * Make sure cache entry is marked invalid, in case we fail before
00875          * setting things.
00876          */
00877         cache_entry->flags_valid = false;
00878         cache_entry->locale = 0;
00879     }
00880 
00881     if (set_flags && !cache_entry->flags_valid)
00882     {
00883         /* Attempt to set the flags */
00884         HeapTuple   tp;
00885         Form_pg_collation collform;
00886         const char *collcollate;
00887         const char *collctype;
00888 
00889         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
00890         if (!HeapTupleIsValid(tp))
00891             elog(ERROR, "cache lookup failed for collation %u", collation);
00892         collform = (Form_pg_collation) GETSTRUCT(tp);
00893 
00894         collcollate = NameStr(collform->collcollate);
00895         collctype = NameStr(collform->collctype);
00896 
00897         cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
00898                                      (strcmp(collcollate, "POSIX") == 0));
00899         cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
00900                                    (strcmp(collctype, "POSIX") == 0));
00901 
00902         cache_entry->flags_valid = true;
00903 
00904         ReleaseSysCache(tp);
00905     }
00906 
00907     return cache_entry;
00908 }
00909 
00910 
00911 /*
00912  * Detect whether collation's LC_COLLATE property is C
00913  */
00914 bool
00915 lc_collate_is_c(Oid collation)
00916 {
00917     /*
00918      * If we're asked about "collation 0", return false, so that the code will
00919      * go into the non-C path and report that the collation is bogus.
00920      */
00921     if (!OidIsValid(collation))
00922         return false;
00923 
00924     /*
00925      * If we're asked about the default collation, we have to inquire of the C
00926      * library.  Cache the result so we only have to compute it once.
00927      */
00928     if (collation == DEFAULT_COLLATION_OID)
00929     {
00930         static int  result = -1;
00931         char       *localeptr;
00932 
00933         if (result >= 0)
00934             return (bool) result;
00935         localeptr = setlocale(LC_COLLATE, NULL);
00936         if (!localeptr)
00937             elog(ERROR, "invalid LC_COLLATE setting");
00938 
00939         if (strcmp(localeptr, "C") == 0)
00940             result = true;
00941         else if (strcmp(localeptr, "POSIX") == 0)
00942             result = true;
00943         else
00944             result = false;
00945         return (bool) result;
00946     }
00947 
00948     /*
00949      * If we're asked about the built-in C/POSIX collations, we know that.
00950      */
00951     if (collation == C_COLLATION_OID ||
00952         collation == POSIX_COLLATION_OID)
00953         return true;
00954 
00955     /*
00956      * Otherwise, we have to consult pg_collation, but we cache that.
00957      */
00958     return (lookup_collation_cache(collation, true))->collate_is_c;
00959 }
00960 
00961 /*
00962  * Detect whether collation's LC_CTYPE property is C
00963  */
00964 bool
00965 lc_ctype_is_c(Oid collation)
00966 {
00967     /*
00968      * If we're asked about "collation 0", return false, so that the code will
00969      * go into the non-C path and report that the collation is bogus.
00970      */
00971     if (!OidIsValid(collation))
00972         return false;
00973 
00974     /*
00975      * If we're asked about the default collation, we have to inquire of the C
00976      * library.  Cache the result so we only have to compute it once.
00977      */
00978     if (collation == DEFAULT_COLLATION_OID)
00979     {
00980         static int  result = -1;
00981         char       *localeptr;
00982 
00983         if (result >= 0)
00984             return (bool) result;
00985         localeptr = setlocale(LC_CTYPE, NULL);
00986         if (!localeptr)
00987             elog(ERROR, "invalid LC_CTYPE setting");
00988 
00989         if (strcmp(localeptr, "C") == 0)
00990             result = true;
00991         else if (strcmp(localeptr, "POSIX") == 0)
00992             result = true;
00993         else
00994             result = false;
00995         return (bool) result;
00996     }
00997 
00998     /*
00999      * If we're asked about the built-in C/POSIX collations, we know that.
01000      */
01001     if (collation == C_COLLATION_OID ||
01002         collation == POSIX_COLLATION_OID)
01003         return true;
01004 
01005     /*
01006      * Otherwise, we have to consult pg_collation, but we cache that.
01007      */
01008     return (lookup_collation_cache(collation, true))->ctype_is_c;
01009 }
01010 
01011 
01012 /* simple subroutine for reporting errors from newlocale() */
01013 #ifdef HAVE_LOCALE_T
01014 static void
01015 report_newlocale_failure(const char *localename)
01016 {
01017     /* copy errno in case one of the ereport auxiliary functions changes it */
01018     int         save_errno = errno;
01019 
01020     /*
01021      * ENOENT means "no such locale", not "no such file", so clarify that
01022      * errno with an errdetail message.
01023      */
01024     ereport(ERROR,
01025             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01026              errmsg("could not create locale \"%s\": %m",
01027                     localename),
01028              (save_errno == ENOENT ?
01029               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
01030                         localename) : 0)));
01031 }
01032 #endif   /* HAVE_LOCALE_T */
01033 
01034 
01035 /*
01036  * Create a locale_t from a collation OID.  Results are cached for the
01037  * lifetime of the backend.  Thus, do not free the result with freelocale().
01038  *
01039  * As a special optimization, the default/database collation returns 0.
01040  * Callers should then revert to the non-locale_t-enabled code path.
01041  * In fact, they shouldn't call this function at all when they are dealing
01042  * with the default locale.  That can save quite a bit in hotspots.
01043  * Also, callers should avoid calling this before going down a C/POSIX
01044  * fastpath, because such a fastpath should work even on platforms without
01045  * locale_t support in the C library.
01046  *
01047  * For simplicity, we always generate COLLATE + CTYPE even though we
01048  * might only need one of them.  Since this is called only once per session,
01049  * it shouldn't cost much.
01050  */
01051 pg_locale_t
01052 pg_newlocale_from_collation(Oid collid)
01053 {
01054     collation_cache_entry *cache_entry;
01055 
01056     /* Callers must pass a valid OID */
01057     Assert(OidIsValid(collid));
01058 
01059     /* Return 0 for "default" collation, just in case caller forgets */
01060     if (collid == DEFAULT_COLLATION_OID)
01061         return (pg_locale_t) 0;
01062 
01063     cache_entry = lookup_collation_cache(collid, false);
01064 
01065     if (cache_entry->locale == 0)
01066     {
01067         /* We haven't computed this yet in this session, so do it */
01068 #ifdef HAVE_LOCALE_T
01069         HeapTuple   tp;
01070         Form_pg_collation collform;
01071         const char *collcollate;
01072         const char *collctype;
01073         locale_t    result;
01074 
01075         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
01076         if (!HeapTupleIsValid(tp))
01077             elog(ERROR, "cache lookup failed for collation %u", collid);
01078         collform = (Form_pg_collation) GETSTRUCT(tp);
01079 
01080         collcollate = NameStr(collform->collcollate);
01081         collctype = NameStr(collform->collctype);
01082 
01083         if (strcmp(collcollate, collctype) == 0)
01084         {
01085             /* Normal case where they're the same */
01086 #ifndef WIN32
01087             result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
01088                                NULL);
01089 #else
01090             result = _create_locale(LC_ALL, collcollate);
01091 #endif
01092             if (!result)
01093                 report_newlocale_failure(collcollate);
01094         }
01095         else
01096         {
01097 #ifndef WIN32
01098             /* We need two newlocale() steps */
01099             locale_t    loc1;
01100 
01101             loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
01102             if (!loc1)
01103                 report_newlocale_failure(collcollate);
01104             result = newlocale(LC_CTYPE_MASK, collctype, loc1);
01105             if (!result)
01106                 report_newlocale_failure(collctype);
01107 #else
01108 
01109             /*
01110              * XXX The _create_locale() API doesn't appear to support this.
01111              * Could perhaps be worked around by changing pg_locale_t to
01112              * contain two separate fields.
01113              */
01114             ereport(ERROR,
01115                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
01116                      errmsg("collations with different collate and ctype values are not supported on this platform")));
01117 #endif
01118         }
01119 
01120         cache_entry->locale = result;
01121 
01122         ReleaseSysCache(tp);
01123 #else                           /* not HAVE_LOCALE_T */
01124 
01125         /*
01126          * For platforms that don't support locale_t, we can't do anything
01127          * with non-default collations.
01128          */
01129         ereport(ERROR,
01130                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
01131         errmsg("nondefault collations are not supported on this platform")));
01132 #endif   /* not HAVE_LOCALE_T */
01133     }
01134 
01135     return cache_entry->locale;
01136 }
01137 
01138 
01139 /*
01140  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
01141  * Therefore we keep them here rather than with the mbutils code.
01142  */
01143 
01144 #ifdef USE_WIDE_UPPER_LOWER
01145 
01146 /*
01147  * wchar2char --- convert wide characters to multibyte format
01148  *
01149  * This has the same API as the standard wcstombs_l() function; in particular,
01150  * tolen is the maximum number of bytes to store at *to, and *from must be
01151  * zero-terminated.  The output will be zero-terminated iff there is room.
01152  */
01153 size_t
01154 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
01155 {
01156     size_t      result;
01157 
01158     if (tolen == 0)
01159         return 0;
01160 
01161 #ifdef WIN32
01162 
01163     /*
01164      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
01165      * for some reason mbstowcs and wcstombs won't do this for us, so we use
01166      * MultiByteToWideChar().
01167      */
01168     if (GetDatabaseEncoding() == PG_UTF8)
01169     {
01170         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
01171                                      NULL, NULL);
01172         /* A zero return is failure */
01173         if (result <= 0)
01174             result = -1;
01175         else
01176         {
01177             Assert(result <= tolen);
01178             /* Microsoft counts the zero terminator in the result */
01179             result--;
01180         }
01181     }
01182     else
01183 #endif   /* WIN32 */
01184     if (locale == (pg_locale_t) 0)
01185     {
01186         /* Use wcstombs directly for the default locale */
01187         result = wcstombs(to, from, tolen);
01188     }
01189     else
01190     {
01191 #ifdef HAVE_LOCALE_T
01192 #ifdef HAVE_WCSTOMBS_L
01193         /* Use wcstombs_l for nondefault locales */
01194         result = wcstombs_l(to, from, tolen, locale);
01195 #else                           /* !HAVE_WCSTOMBS_L */
01196         /* We have to temporarily set the locale as current ... ugh */
01197         locale_t    save_locale = uselocale(locale);
01198 
01199         result = wcstombs(to, from, tolen);
01200 
01201         uselocale(save_locale);
01202 #endif   /* HAVE_WCSTOMBS_L */
01203 #else                           /* !HAVE_LOCALE_T */
01204         /* Can't have locale != 0 without HAVE_LOCALE_T */
01205         elog(ERROR, "wcstombs_l is not available");
01206         result = 0;             /* keep compiler quiet */
01207 #endif   /* HAVE_LOCALE_T */
01208     }
01209 
01210     return result;
01211 }
01212 
01213 /*
01214  * char2wchar --- convert multibyte characters to wide characters
01215  *
01216  * This has almost the API of mbstowcs_l(), except that *from need not be
01217  * null-terminated; instead, the number of input bytes is specified as
01218  * fromlen.  Also, we ereport() rather than returning -1 for invalid
01219  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
01220  * The output will be zero-terminated iff there is room.
01221  */
01222 size_t
01223 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
01224            pg_locale_t locale)
01225 {
01226     size_t      result;
01227 
01228     if (tolen == 0)
01229         return 0;
01230 
01231 #ifdef WIN32
01232     /* See WIN32 "Unicode" comment above */
01233     if (GetDatabaseEncoding() == PG_UTF8)
01234     {
01235         /* Win32 API does not work for zero-length input */
01236         if (fromlen == 0)
01237             result = 0;
01238         else
01239         {
01240             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
01241             /* A zero return is failure */
01242             if (result == 0)
01243                 result = -1;
01244         }
01245 
01246         if (result != -1)
01247         {
01248             Assert(result < tolen);
01249             /* Append trailing null wchar (MultiByteToWideChar() does not) */
01250             to[result] = 0;
01251         }
01252     }
01253     else
01254 #endif   /* WIN32 */
01255     {
01256         /* mbstowcs requires ending '\0' */
01257         char       *str = pnstrdup(from, fromlen);
01258 
01259         if (locale == (pg_locale_t) 0)
01260         {
01261             /* Use mbstowcs directly for the default locale */
01262             result = mbstowcs(to, str, tolen);
01263         }
01264         else
01265         {
01266 #ifdef HAVE_LOCALE_T
01267 #ifdef HAVE_MBSTOWCS_L
01268             /* Use mbstowcs_l for nondefault locales */
01269             result = mbstowcs_l(to, str, tolen, locale);
01270 #else                           /* !HAVE_MBSTOWCS_L */
01271             /* We have to temporarily set the locale as current ... ugh */
01272             locale_t    save_locale = uselocale(locale);
01273 
01274             result = mbstowcs(to, str, tolen);
01275 
01276             uselocale(save_locale);
01277 #endif   /* HAVE_MBSTOWCS_L */
01278 #else                           /* !HAVE_LOCALE_T */
01279             /* Can't have locale != 0 without HAVE_LOCALE_T */
01280             elog(ERROR, "mbstowcs_l is not available");
01281             result = 0;         /* keep compiler quiet */
01282 #endif   /* HAVE_LOCALE_T */
01283         }
01284 
01285         pfree(str);
01286     }
01287 
01288     if (result == -1)
01289     {
01290         /*
01291          * Invalid multibyte character encountered.  We try to give a useful
01292          * error message by letting pg_verifymbstr check the string.  But it's
01293          * possible that the string is OK to us, and not OK to mbstowcs ---
01294          * this suggests that the LC_CTYPE locale is different from the
01295          * database encoding.  Give a generic error message if verifymbstr
01296          * can't find anything wrong.
01297          */
01298         pg_verifymbstr(from, fromlen, false);   /* might not return */
01299         /* but if it does ... */
01300         ereport(ERROR,
01301                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
01302                  errmsg("invalid multibyte character for locale"),
01303                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
01304     }
01305 
01306     return result;
01307 }
01308 
01309 #endif   /* USE_WIDE_UPPER_LOWER */