Header And Logo

PostgreSQL
| The world's most advanced open source database.

tzparser.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * tzparser.c
00004  *    Functions for parsing timezone offset files
00005  *
00006  * Note: this code is invoked from the check_hook for the GUC variable
00007  * timezone_abbreviations.  Therefore, it should report problems using
00008  * GUC_check_errmsg() and related functions, and try to avoid throwing
00009  * elog(ERROR).  This is not completely bulletproof at present --- in
00010  * particular out-of-memory will throw an error.  Could probably fix with
00011  * PG_TRY if necessary.
00012  *
00013  *
00014  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00015  * Portions Copyright (c) 1994, Regents of the University of California
00016  *
00017  * IDENTIFICATION
00018  *    src/backend/utils/misc/tzparser.c
00019  *
00020  *-------------------------------------------------------------------------
00021  */
00022 
00023 #include "postgres.h"
00024 
00025 #include <ctype.h>
00026 
00027 #include "miscadmin.h"
00028 #include "storage/fd.h"
00029 #include "utils/guc.h"
00030 #include "utils/memutils.h"
00031 #include "utils/tzparser.h"
00032 
00033 
00034 #define WHITESPACE " \t\n\r"
00035 
00036 static bool validateTzEntry(tzEntry *tzentry);
00037 static bool splitTzLine(const char *filename, int lineno,
00038             char *line, tzEntry *tzentry);
00039 static int addToArray(tzEntry **base, int *arraysize, int n,
00040            tzEntry *entry, bool override);
00041 static int ParseTzFile(const char *filename, int depth,
00042             tzEntry **base, int *arraysize, int n);
00043 
00044 
00045 /*
00046  * Apply additional validation checks to a tzEntry
00047  *
00048  * Returns TRUE if OK, else false
00049  */
00050 static bool
00051 validateTzEntry(tzEntry *tzentry)
00052 {
00053     unsigned char *p;
00054 
00055     /*
00056      * Check restrictions imposed by datetkntbl storage format (see
00057      * datetime.c)
00058      */
00059     if (strlen(tzentry->abbrev) > TOKMAXLEN)
00060     {
00061         GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
00062                          tzentry->abbrev, TOKMAXLEN,
00063                          tzentry->filename, tzentry->lineno);
00064         return false;
00065     }
00066     if (tzentry->offset % 900 != 0)
00067     {
00068         GUC_check_errmsg("time zone offset %d is not a multiple of 900 sec (15 min) in time zone file \"%s\", line %d",
00069                          tzentry->offset,
00070                          tzentry->filename, tzentry->lineno);
00071         return false;
00072     }
00073 
00074     /*
00075      * Sanity-check the offset: shouldn't exceed 14 hours
00076      */
00077     if (tzentry->offset > 14 * 60 * 60 ||
00078         tzentry->offset < -14 * 60 * 60)
00079     {
00080         GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
00081                          tzentry->offset,
00082                          tzentry->filename, tzentry->lineno);
00083         return false;
00084     }
00085 
00086     /*
00087      * Convert abbrev to lowercase (must match datetime.c's conversion)
00088      */
00089     for (p = (unsigned char *) tzentry->abbrev; *p; p++)
00090         *p = pg_tolower(*p);
00091 
00092     return true;
00093 }
00094 
00095 /*
00096  * Attempt to parse the line as a timezone abbrev spec (name, offset, dst)
00097  *
00098  * Returns TRUE if OK, else false; data is stored in *tzentry
00099  */
00100 static bool
00101 splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
00102 {
00103     char       *abbrev;
00104     char       *offset;
00105     char       *offset_endptr;
00106     char       *remain;
00107     char       *is_dst;
00108 
00109     tzentry->lineno = lineno;
00110     tzentry->filename = filename;
00111 
00112     abbrev = strtok(line, WHITESPACE);
00113     if (!abbrev)
00114     {
00115         GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
00116                          filename, lineno);
00117         return false;
00118     }
00119     tzentry->abbrev = abbrev;
00120 
00121     offset = strtok(NULL, WHITESPACE);
00122     if (!offset)
00123     {
00124         GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
00125                          filename, lineno);
00126         return false;
00127     }
00128     tzentry->offset = strtol(offset, &offset_endptr, 10);
00129     if (offset_endptr == offset || *offset_endptr != '\0')
00130     {
00131         GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
00132                          filename, lineno);
00133         return false;
00134     }
00135 
00136     is_dst = strtok(NULL, WHITESPACE);
00137     if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
00138     {
00139         tzentry->is_dst = true;
00140         remain = strtok(NULL, WHITESPACE);
00141     }
00142     else
00143     {
00144         /* there was no 'D' dst specifier */
00145         tzentry->is_dst = false;
00146         remain = is_dst;
00147     }
00148 
00149     if (!remain)                /* no more non-whitespace chars */
00150         return true;
00151 
00152     if (remain[0] != '#')       /* must be a comment */
00153     {
00154         GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
00155                          filename, lineno);
00156         return false;
00157     }
00158     return true;
00159 }
00160 
00161 /*
00162  * Insert entry into sorted array
00163  *
00164  * *base: base address of array (changeable if must enlarge array)
00165  * *arraysize: allocated length of array (changeable if must enlarge array)
00166  * n: current number of valid elements in array
00167  * entry: new data to insert
00168  * override: TRUE if OK to override
00169  *
00170  * Returns the new array length (new value for n), or -1 if error
00171  */
00172 static int
00173 addToArray(tzEntry **base, int *arraysize, int n,
00174            tzEntry *entry, bool override)
00175 {
00176     tzEntry    *arrayptr;
00177     int         low;
00178     int         high;
00179 
00180     /*
00181      * Search the array for a duplicate; as a useful side effect, the array is
00182      * maintained in sorted order.  We use strcmp() to ensure we match the
00183      * sort order datetime.c expects.
00184      */
00185     arrayptr = *base;
00186     low = 0;
00187     high = n - 1;
00188     while (low <= high)
00189     {
00190         int         mid = (low + high) >> 1;
00191         tzEntry    *midptr = arrayptr + mid;
00192         int         cmp;
00193 
00194         cmp = strcmp(entry->abbrev, midptr->abbrev);
00195         if (cmp < 0)
00196             high = mid - 1;
00197         else if (cmp > 0)
00198             low = mid + 1;
00199         else
00200         {
00201             /*
00202              * Found a duplicate entry; complain unless it's the same.
00203              */
00204             if (midptr->offset == entry->offset &&
00205                 midptr->is_dst == entry->is_dst)
00206             {
00207                 /* return unchanged array */
00208                 return n;
00209             }
00210             if (override)
00211             {
00212                 /* same abbrev but something is different, override */
00213                 midptr->offset = entry->offset;
00214                 midptr->is_dst = entry->is_dst;
00215                 return n;
00216             }
00217             /* same abbrev but something is different, complain */
00218             GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
00219                              entry->abbrev);
00220             GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
00221                                 midptr->filename, midptr->lineno,
00222                                 entry->filename, entry->lineno);
00223             return -1;
00224         }
00225     }
00226 
00227     /*
00228      * No match, insert at position "low".
00229      */
00230     if (n >= *arraysize)
00231     {
00232         *arraysize *= 2;
00233         *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
00234     }
00235 
00236     arrayptr = *base + low;
00237 
00238     memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
00239 
00240     memcpy(arrayptr, entry, sizeof(tzEntry));
00241 
00242     /* Must dup the abbrev to ensure it survives */
00243     arrayptr->abbrev = pstrdup(entry->abbrev);
00244 
00245     return n + 1;
00246 }
00247 
00248 /*
00249  * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
00250  *
00251  * filename: user-specified file name (does not include path)
00252  * depth: current recursion depth
00253  * *base: array for results (changeable if must enlarge array)
00254  * *arraysize: allocated length of array (changeable if must enlarge array)
00255  * n: current number of valid elements in array
00256  *
00257  * Returns the new array length (new value for n), or -1 if error
00258  */
00259 static int
00260 ParseTzFile(const char *filename, int depth,
00261             tzEntry **base, int *arraysize, int n)
00262 {
00263     char        share_path[MAXPGPATH];
00264     char        file_path[MAXPGPATH];
00265     FILE       *tzFile;
00266     char        tzbuf[1024];
00267     char       *line;
00268     tzEntry     tzentry;
00269     int         lineno = 0;
00270     bool        override = false;
00271     const char *p;
00272 
00273     /*
00274      * We enforce that the filename is all alpha characters.  This may be
00275      * overly restrictive, but we don't want to allow access to anything
00276      * outside the timezonesets directory, so for instance '/' *must* be
00277      * rejected.
00278      */
00279     for (p = filename; *p; p++)
00280     {
00281         if (!isalpha((unsigned char) *p))
00282         {
00283             /* at level 0, just use guc.c's regular "invalid value" message */
00284             if (depth > 0)
00285                 GUC_check_errmsg("invalid time zone file name \"%s\"",
00286                                  filename);
00287             return -1;
00288         }
00289     }
00290 
00291     /*
00292      * The maximal recursion depth is a pretty arbitrary setting. It is hard
00293      * to imagine that someone needs more than 3 levels so stick with this
00294      * conservative setting until someone complains.
00295      */
00296     if (depth > 3)
00297     {
00298         GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
00299                          filename);
00300         return -1;
00301     }
00302 
00303     get_share_path(my_exec_path, share_path);
00304     snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
00305              share_path, filename);
00306     tzFile = AllocateFile(file_path, "r");
00307     if (!tzFile)
00308     {
00309         /*
00310          * Check to see if the problem is not the filename but the directory.
00311          * This is worth troubling over because if the installation share/
00312          * directory is missing or unreadable, this is likely to be the first
00313          * place we notice a problem during postmaster startup.
00314          */
00315         int         save_errno = errno;
00316         DIR        *tzdir;
00317 
00318         snprintf(file_path, sizeof(file_path), "%s/timezonesets",
00319                  share_path);
00320         tzdir = AllocateDir(file_path);
00321         if (tzdir == NULL)
00322         {
00323             GUC_check_errmsg("could not open directory \"%s\": %m",
00324                              file_path);
00325             GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
00326                               my_exec_path);
00327             return -1;
00328         }
00329         FreeDir(tzdir);
00330         errno = save_errno;
00331 
00332         /*
00333          * otherwise, if file doesn't exist and it's level 0, guc.c's
00334          * complaint is enough
00335          */
00336         if (errno != ENOENT || depth > 0)
00337             GUC_check_errmsg("could not read time zone file \"%s\": %m",
00338                              filename);
00339 
00340         return -1;
00341     }
00342 
00343     while (!feof(tzFile))
00344     {
00345         lineno++;
00346         if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
00347         {
00348             if (ferror(tzFile))
00349             {
00350                 GUC_check_errmsg("could not read time zone file \"%s\": %m",
00351                                  filename);
00352                 return -1;
00353             }
00354             /* else we're at EOF after all */
00355             break;
00356         }
00357         if (strlen(tzbuf) == sizeof(tzbuf) - 1)
00358         {
00359             /* the line is too long for tzbuf */
00360             GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
00361                              filename, lineno);
00362             return -1;
00363         }
00364 
00365         /* skip over whitespace */
00366         line = tzbuf;
00367         while (*line && isspace((unsigned char) *line))
00368             line++;
00369 
00370         if (*line == '\0')      /* empty line */
00371             continue;
00372         if (*line == '#')       /* comment line */
00373             continue;
00374 
00375         if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
00376         {
00377             /* pstrdup so we can use filename in result data structure */
00378             char       *includeFile = pstrdup(line + strlen("@INCLUDE"));
00379 
00380             includeFile = strtok(includeFile, WHITESPACE);
00381             if (!includeFile || !*includeFile)
00382             {
00383                 GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
00384                                  filename, lineno);
00385                 return -1;
00386             }
00387             n = ParseTzFile(includeFile, depth + 1,
00388                             base, arraysize, n);
00389             if (n < 0)
00390                 return -1;
00391             continue;
00392         }
00393 
00394         if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
00395         {
00396             override = true;
00397             continue;
00398         }
00399 
00400         if (!splitTzLine(filename, lineno, line, &tzentry))
00401             return -1;
00402         if (!validateTzEntry(&tzentry))
00403             return -1;
00404         n = addToArray(base, arraysize, n, &tzentry, override);
00405         if (n < 0)
00406             return -1;
00407     }
00408 
00409     FreeFile(tzFile);
00410 
00411     return n;
00412 }
00413 
00414 /*
00415  * load_tzoffsets --- read and parse the specified timezone offset file
00416  *
00417  * On success, return a filled-in TimeZoneAbbrevTable, which must have been
00418  * malloc'd not palloc'd.  On failure, return NULL, using GUC_check_errmsg
00419  * and friends to give details of the problem.
00420  */
00421 TimeZoneAbbrevTable *
00422 load_tzoffsets(const char *filename)
00423 {
00424     TimeZoneAbbrevTable *result = NULL;
00425     MemoryContext tmpContext;
00426     MemoryContext oldContext;
00427     tzEntry    *array;
00428     int         arraysize;
00429     int         n;
00430 
00431     /*
00432      * Create a temp memory context to work in.  This makes it easy to clean
00433      * up afterwards.
00434      */
00435     tmpContext = AllocSetContextCreate(CurrentMemoryContext,
00436                                        "TZParserMemory",
00437                                        ALLOCSET_SMALL_MINSIZE,
00438                                        ALLOCSET_SMALL_INITSIZE,
00439                                        ALLOCSET_SMALL_MAXSIZE);
00440     oldContext = MemoryContextSwitchTo(tmpContext);
00441 
00442     /* Initialize array at a reasonable size */
00443     arraysize = 128;
00444     array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
00445 
00446     /* Parse the file(s) */
00447     n = ParseTzFile(filename, 0, &array, &arraysize, 0);
00448 
00449     /* If no errors so far, allocate result and let datetime.c convert data */
00450     if (n >= 0)
00451     {
00452         result = malloc(offsetof(TimeZoneAbbrevTable, abbrevs) +
00453                         n * sizeof(datetkn));
00454         if (!result)
00455             GUC_check_errmsg("out of memory");
00456         else
00457             ConvertTimeZoneAbbrevs(result, array, n);
00458     }
00459 
00460     /* Clean up */
00461     MemoryContextSwitchTo(oldContext);
00462     MemoryContextDelete(tmpContext);
00463 
00464     return result;
00465 }