Header And Logo

PostgreSQL
| The world's most advanced open source database.

scansup.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * scansup.c
00004  *    support routines for the lex/flex scanner, used by both the normal
00005  * backend as well as the bootstrap backend
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  *
00011  * IDENTIFICATION
00012  *    src/backend/parser/scansup.c
00013  *
00014  *-------------------------------------------------------------------------
00015  */
00016 #include "postgres.h"
00017 
00018 #include <ctype.h>
00019 
00020 #include "parser/scansup.h"
00021 #include "mb/pg_wchar.h"
00022 
00023 
00024 /* ----------------
00025  *      scanstr
00026  *
00027  * if the string passed in has escaped codes, map the escape codes to actual
00028  * chars
00029  *
00030  * the string returned is palloc'd and should eventually be pfree'd by the
00031  * caller!
00032  * ----------------
00033  */
00034 
00035 char *
00036 scanstr(const char *s)
00037 {
00038     char       *newStr;
00039     int         len,
00040                 i,
00041                 j;
00042 
00043     if (s == NULL || s[0] == '\0')
00044         return pstrdup("");
00045 
00046     len = strlen(s);
00047 
00048     newStr = palloc(len + 1);   /* string cannot get longer */
00049 
00050     for (i = 0, j = 0; i < len; i++)
00051     {
00052         if (s[i] == '\'')
00053         {
00054             /*
00055              * Note: if scanner is working right, unescaped quotes can only
00056              * appear in pairs, so there should be another character.
00057              */
00058             i++;
00059             /* The bootstrap parser is not as smart, so check here. */
00060             Assert(s[i] == '\'');
00061             newStr[j] = s[i];
00062         }
00063         else if (s[i] == '\\')
00064         {
00065             i++;
00066             switch (s[i])
00067             {
00068                 case 'b':
00069                     newStr[j] = '\b';
00070                     break;
00071                 case 'f':
00072                     newStr[j] = '\f';
00073                     break;
00074                 case 'n':
00075                     newStr[j] = '\n';
00076                     break;
00077                 case 'r':
00078                     newStr[j] = '\r';
00079                     break;
00080                 case 't':
00081                     newStr[j] = '\t';
00082                     break;
00083                 case '0':
00084                 case '1':
00085                 case '2':
00086                 case '3':
00087                 case '4':
00088                 case '5':
00089                 case '6':
00090                 case '7':
00091                     {
00092                         int         k;
00093                         long        octVal = 0;
00094 
00095                         for (k = 0;
00096                              s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
00097                              k++)
00098                             octVal = (octVal << 3) + (s[i + k] - '0');
00099                         i += k - 1;
00100                         newStr[j] = ((char) octVal);
00101                     }
00102                     break;
00103                 default:
00104                     newStr[j] = s[i];
00105                     break;
00106             }                   /* switch */
00107         }                       /* s[i] == '\\' */
00108         else
00109             newStr[j] = s[i];
00110         j++;
00111     }
00112     newStr[j] = '\0';
00113     return newStr;
00114 }
00115 
00116 
00117 /*
00118  * downcase_truncate_identifier() --- do appropriate downcasing and
00119  * truncation of an unquoted identifier.  Optionally warn of truncation.
00120  *
00121  * Returns a palloc'd string containing the adjusted identifier.
00122  *
00123  * Note: in some usages the passed string is not null-terminated.
00124  *
00125  * Note: the API of this function is designed to allow for downcasing
00126  * transformations that increase the string length, but we don't yet
00127  * support that.  If you want to implement it, you'll need to fix
00128  * SplitIdentifierString() in utils/adt/varlena.c.
00129  */
00130 char *
00131 downcase_truncate_identifier(const char *ident, int len, bool warn)
00132 {
00133     char       *result;
00134     int         i;
00135 
00136     result = palloc(len + 1);
00137 
00138     /*
00139      * SQL99 specifies Unicode-aware case normalization, which we don't yet
00140      * have the infrastructure for.  Instead we use tolower() to provide a
00141      * locale-aware translation.  However, there are some locales where this
00142      * is not right either (eg, Turkish may do strange things with 'i' and
00143      * 'I').  Our current compromise is to use tolower() for characters with
00144      * the high bit set, and use an ASCII-only downcasing for 7-bit
00145      * characters.
00146      */
00147     for (i = 0; i < len; i++)
00148     {
00149         unsigned char ch = (unsigned char) ident[i];
00150 
00151         if (ch >= 'A' && ch <= 'Z')
00152             ch += 'a' - 'A';
00153         else if (IS_HIGHBIT_SET(ch) && isupper(ch))
00154             ch = tolower(ch);
00155         result[i] = (char) ch;
00156     }
00157     result[i] = '\0';
00158 
00159     if (i >= NAMEDATALEN)
00160         truncate_identifier(result, i, warn);
00161 
00162     return result;
00163 }
00164 
00165 /*
00166  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
00167  *
00168  * The given string is modified in-place, if necessary.  A warning is
00169  * issued if requested.
00170  *
00171  * We require the caller to pass in the string length since this saves a
00172  * strlen() call in some common usages.
00173  */
00174 void
00175 truncate_identifier(char *ident, int len, bool warn)
00176 {
00177     if (len >= NAMEDATALEN)
00178     {
00179         len = pg_mbcliplen(ident, len, NAMEDATALEN - 1);
00180         if (warn)
00181         {
00182             /*
00183              * We avoid using %.*s here because it can misbehave if the data
00184              * is not valid in what libc thinks is the prevailing encoding.
00185              */
00186             char        buf[NAMEDATALEN];
00187 
00188             memcpy(buf, ident, len);
00189             buf[len] = '\0';
00190             ereport(NOTICE,
00191                     (errcode(ERRCODE_NAME_TOO_LONG),
00192                      errmsg("identifier \"%s\" will be truncated to \"%s\"",
00193                             ident, buf)));
00194         }
00195         ident[len] = '\0';
00196     }
00197 }
00198 
00199 /*
00200  * scanner_isspace() --- return TRUE if flex scanner considers char whitespace
00201  *
00202  * This should be used instead of the potentially locale-dependent isspace()
00203  * function when it's important to match the lexer's behavior.
00204  *
00205  * In principle we might need similar functions for isalnum etc, but for the
00206  * moment only isspace seems needed.
00207  */
00208 bool
00209 scanner_isspace(char ch)
00210 {
00211     /* This must match scan.l's list of {space} characters */
00212     if (ch == ' ' ||
00213         ch == '\t' ||
00214         ch == '\n' ||
00215         ch == '\r' ||
00216         ch == '\f')
00217         return true;
00218     return false;
00219 }