Header And Logo

PostgreSQL
| The world's most advanced open source database.

read.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * read.c
00004  *    routines to convert a string (legal ascii representation of node) back
00005  *    to nodes
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  *
00011  * IDENTIFICATION
00012  *    src/backend/nodes/read.c
00013  *
00014  * HISTORY
00015  *    AUTHOR            DATE            MAJOR EVENT
00016  *    Andrew Yu         Nov 2, 1994     file creation
00017  *
00018  *-------------------------------------------------------------------------
00019  */
00020 #include "postgres.h"
00021 
00022 #include <ctype.h>
00023 
00024 #include "nodes/pg_list.h"
00025 #include "nodes/readfuncs.h"
00026 #include "nodes/value.h"
00027 
00028 
00029 /* Static state for pg_strtok */
00030 static char *pg_strtok_ptr = NULL;
00031 
00032 
00033 /*
00034  * stringToNode -
00035  *    returns a Node with a given legal ASCII representation
00036  */
00037 void *
00038 stringToNode(char *str)
00039 {
00040     char       *save_strtok;
00041     void       *retval;
00042 
00043     /*
00044      * We save and restore the pre-existing state of pg_strtok. This makes the
00045      * world safe for re-entrant invocation of stringToNode, without incurring
00046      * a lot of notational overhead by having to pass the next-character
00047      * pointer around through all the readfuncs.c code.
00048      */
00049     save_strtok = pg_strtok_ptr;
00050 
00051     pg_strtok_ptr = str;        /* point pg_strtok at the string to read */
00052 
00053     retval = nodeRead(NULL, 0); /* do the reading */
00054 
00055     pg_strtok_ptr = save_strtok;
00056 
00057     return retval;
00058 }
00059 
00060 /*****************************************************************************
00061  *
00062  * the lisp token parser
00063  *
00064  *****************************************************************************/
00065 
00066 /*
00067  * pg_strtok --- retrieve next "token" from a string.
00068  *
00069  * Works kinda like strtok, except it never modifies the source string.
00070  * (Instead of storing nulls into the string, the length of the token
00071  * is returned to the caller.)
00072  * Also, the rules about what is a token are hard-wired rather than being
00073  * configured by passing a set of terminating characters.
00074  *
00075  * The string is assumed to have been initialized already by stringToNode.
00076  *
00077  * The rules for tokens are:
00078  *  * Whitespace (space, tab, newline) always separates tokens.
00079  *  * The characters '(', ')', '{', '}' form individual tokens even
00080  *    without any whitespace around them.
00081  *  * Otherwise, a token is all the characters up to the next whitespace
00082  *    or occurrence of one of the four special characters.
00083  *  * A backslash '\' can be used to quote whitespace or one of the four
00084  *    special characters, so that it is treated as a plain token character.
00085  *    Backslashes themselves must also be backslashed for consistency.
00086  *    Any other character can be, but need not be, backslashed as well.
00087  *  * If the resulting token is '<>' (with no backslash), it is returned
00088  *    as a non-NULL pointer to the token but with length == 0.  Note that
00089  *    there is no other way to get a zero-length token.
00090  *
00091  * Returns a pointer to the start of the next token, and the length of the
00092  * token (including any embedded backslashes!) in *length.  If there are
00093  * no more tokens, NULL and 0 are returned.
00094  *
00095  * NOTE: this routine doesn't remove backslashes; the caller must do so
00096  * if necessary (see "debackslash").
00097  *
00098  * NOTE: prior to release 7.0, this routine also had a special case to treat
00099  * a token starting with '"' as extending to the next '"'.  This code was
00100  * broken, however, since it would fail to cope with a string containing an
00101  * embedded '"'.  I have therefore removed this special case, and instead
00102  * introduced rules for using backslashes to quote characters.  Higher-level
00103  * code should add backslashes to a string constant to ensure it is treated
00104  * as a single token.
00105  */
00106 char *
00107 pg_strtok(int *length)
00108 {
00109     char       *local_str;      /* working pointer to string */
00110     char       *ret_str;        /* start of token to return */
00111 
00112     local_str = pg_strtok_ptr;
00113 
00114     while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
00115         local_str++;
00116 
00117     if (*local_str == '\0')
00118     {
00119         *length = 0;
00120         pg_strtok_ptr = local_str;
00121         return NULL;            /* no more tokens */
00122     }
00123 
00124     /*
00125      * Now pointing at start of next token.
00126      */
00127     ret_str = local_str;
00128 
00129     if (*local_str == '(' || *local_str == ')' ||
00130         *local_str == '{' || *local_str == '}')
00131     {
00132         /* special 1-character token */
00133         local_str++;
00134     }
00135     else
00136     {
00137         /* Normal token, possibly containing backslashes */
00138         while (*local_str != '\0' &&
00139                *local_str != ' ' && *local_str != '\n' &&
00140                *local_str != '\t' &&
00141                *local_str != '(' && *local_str != ')' &&
00142                *local_str != '{' && *local_str != '}')
00143         {
00144             if (*local_str == '\\' && local_str[1] != '\0')
00145                 local_str += 2;
00146             else
00147                 local_str++;
00148         }
00149     }
00150 
00151     *length = local_str - ret_str;
00152 
00153     /* Recognize special case for "empty" token */
00154     if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
00155         *length = 0;
00156 
00157     pg_strtok_ptr = local_str;
00158 
00159     return ret_str;
00160 }
00161 
00162 /*
00163  * debackslash -
00164  *    create a palloc'd string holding the given token.
00165  *    any protective backslashes in the token are removed.
00166  */
00167 char *
00168 debackslash(char *token, int length)
00169 {
00170     char       *result = palloc(length + 1);
00171     char       *ptr = result;
00172 
00173     while (length > 0)
00174     {
00175         if (*token == '\\' && length > 1)
00176             token++, length--;
00177         *ptr++ = *token++;
00178         length--;
00179     }
00180     *ptr = '\0';
00181     return result;
00182 }
00183 
00184 #define RIGHT_PAREN (1000000 + 1)
00185 #define LEFT_PAREN  (1000000 + 2)
00186 #define LEFT_BRACE  (1000000 + 3)
00187 #define OTHER_TOKEN (1000000 + 4)
00188 
00189 /*
00190  * nodeTokenType -
00191  *    returns the type of the node token contained in token.
00192  *    It returns one of the following valid NodeTags:
00193  *      T_Integer, T_Float, T_String, T_BitString
00194  *    and some of its own:
00195  *      RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
00196  *
00197  *    Assumption: the ascii representation is legal
00198  */
00199 static NodeTag
00200 nodeTokenType(char *token, int length)
00201 {
00202     NodeTag     retval;
00203     char       *numptr;
00204     int         numlen;
00205 
00206     /*
00207      * Check if the token is a number
00208      */
00209     numptr = token;
00210     numlen = length;
00211     if (*numptr == '+' || *numptr == '-')
00212         numptr++, numlen--;
00213     if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||
00214         (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
00215     {
00216         /*
00217          * Yes.  Figure out whether it is integral or float; this requires
00218          * both a syntax check and a range check. strtol() can do both for us.
00219          * We know the token will end at a character that strtol will stop at,
00220          * so we do not need to modify the string.
00221          */
00222         long        val;
00223         char       *endptr;
00224 
00225         errno = 0;
00226         val = strtol(token, &endptr, 10);
00227         (void) val;             /* avoid compiler warning if unused */
00228         if (endptr != token + length || errno == ERANGE
00229 #ifdef HAVE_LONG_INT_64
00230         /* if long > 32 bits, check for overflow of int4 */
00231             || val != (long) ((int32) val)
00232 #endif
00233             )
00234             return T_Float;
00235         return T_Integer;
00236     }
00237 
00238     /*
00239      * these three cases do not need length checks, since pg_strtok() will
00240      * always treat them as single-byte tokens
00241      */
00242     else if (*token == '(')
00243         retval = LEFT_PAREN;
00244     else if (*token == ')')
00245         retval = RIGHT_PAREN;
00246     else if (*token == '{')
00247         retval = LEFT_BRACE;
00248     else if (*token == '\"' && length > 1 && token[length - 1] == '\"')
00249         retval = T_String;
00250     else if (*token == 'b')
00251         retval = T_BitString;
00252     else
00253         retval = OTHER_TOKEN;
00254     return retval;
00255 }
00256 
00257 /*
00258  * nodeRead -
00259  *    Slightly higher-level reader.
00260  *
00261  * This routine applies some semantic knowledge on top of the purely
00262  * lexical tokenizer pg_strtok().   It can read
00263  *  * Value token nodes (integers, floats, or strings);
00264  *  * General nodes (via parseNodeString() from readfuncs.c);
00265  *  * Lists of the above;
00266  *  * Lists of integers or OIDs.
00267  * The return value is declared void *, not Node *, to avoid having to
00268  * cast it explicitly in callers that assign to fields of different types.
00269  *
00270  * External callers should always pass NULL/0 for the arguments.  Internally
00271  * a non-NULL token may be passed when the upper recursion level has already
00272  * scanned the first token of a node's representation.
00273  *
00274  * We assume pg_strtok is already initialized with a string to read (hence
00275  * this should only be invoked from within a stringToNode operation).
00276  */
00277 void *
00278 nodeRead(char *token, int tok_len)
00279 {
00280     Node       *result;
00281     NodeTag     type;
00282 
00283     if (token == NULL)          /* need to read a token? */
00284     {
00285         token = pg_strtok(&tok_len);
00286 
00287         if (token == NULL)      /* end of input */
00288             return NULL;
00289     }
00290 
00291     type = nodeTokenType(token, tok_len);
00292 
00293     switch ((int) type)
00294     {
00295         case LEFT_BRACE:
00296             result = parseNodeString();
00297             token = pg_strtok(&tok_len);
00298             if (token == NULL || token[0] != '}')
00299                 elog(ERROR, "did not find '}' at end of input node");
00300             break;
00301         case LEFT_PAREN:
00302             {
00303                 List       *l = NIL;
00304 
00305                 /*----------
00306                  * Could be an integer list:    (i int int ...)
00307                  * or an OID list:              (o int int ...)
00308                  * or a list of nodes/values:   (node node ...)
00309                  *----------
00310                  */
00311                 token = pg_strtok(&tok_len);
00312                 if (token == NULL)
00313                     elog(ERROR, "unterminated List structure");
00314                 if (tok_len == 1 && token[0] == 'i')
00315                 {
00316                     /* List of integers */
00317                     for (;;)
00318                     {
00319                         int         val;
00320                         char       *endptr;
00321 
00322                         token = pg_strtok(&tok_len);
00323                         if (token == NULL)
00324                             elog(ERROR, "unterminated List structure");
00325                         if (token[0] == ')')
00326                             break;
00327                         val = (int) strtol(token, &endptr, 10);
00328                         if (endptr != token + tok_len)
00329                             elog(ERROR, "unrecognized integer: \"%.*s\"",
00330                                  tok_len, token);
00331                         l = lappend_int(l, val);
00332                     }
00333                 }
00334                 else if (tok_len == 1 && token[0] == 'o')
00335                 {
00336                     /* List of OIDs */
00337                     for (;;)
00338                     {
00339                         Oid         val;
00340                         char       *endptr;
00341 
00342                         token = pg_strtok(&tok_len);
00343                         if (token == NULL)
00344                             elog(ERROR, "unterminated List structure");
00345                         if (token[0] == ')')
00346                             break;
00347                         val = (Oid) strtoul(token, &endptr, 10);
00348                         if (endptr != token + tok_len)
00349                             elog(ERROR, "unrecognized OID: \"%.*s\"",
00350                                  tok_len, token);
00351                         l = lappend_oid(l, val);
00352                     }
00353                 }
00354                 else
00355                 {
00356                     /* List of other node types */
00357                     for (;;)
00358                     {
00359                         /* We have already scanned next token... */
00360                         if (token[0] == ')')
00361                             break;
00362                         l = lappend(l, nodeRead(token, tok_len));
00363                         token = pg_strtok(&tok_len);
00364                         if (token == NULL)
00365                             elog(ERROR, "unterminated List structure");
00366                     }
00367                 }
00368                 result = (Node *) l;
00369                 break;
00370             }
00371         case RIGHT_PAREN:
00372             elog(ERROR, "unexpected right parenthesis");
00373             result = NULL;      /* keep compiler happy */
00374             break;
00375         case OTHER_TOKEN:
00376             if (tok_len == 0)
00377             {
00378                 /* must be "<>" --- represents a null pointer */
00379                 result = NULL;
00380             }
00381             else
00382             {
00383                 elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
00384                 result = NULL;  /* keep compiler happy */
00385             }
00386             break;
00387         case T_Integer:
00388 
00389             /*
00390              * we know that the token terminates on a char atol will stop at
00391              */
00392             result = (Node *) makeInteger(atol(token));
00393             break;
00394         case T_Float:
00395             {
00396                 char       *fval = (char *) palloc(tok_len + 1);
00397 
00398                 memcpy(fval, token, tok_len);
00399                 fval[tok_len] = '\0';
00400                 result = (Node *) makeFloat(fval);
00401             }
00402             break;
00403         case T_String:
00404             /* need to remove leading and trailing quotes, and backslashes */
00405             result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
00406             break;
00407         case T_BitString:
00408             {
00409                 char       *val = palloc(tok_len);
00410 
00411                 /* skip leading 'b' */
00412                 memcpy(val, token + 1, tok_len - 1);
00413                 val[tok_len - 1] = '\0';
00414                 result = (Node *) makeBitString(val);
00415                 break;
00416             }
00417         default:
00418             elog(ERROR, "unrecognized node type: %d", (int) type);
00419             result = NULL;      /* keep compiler happy */
00420             break;
00421     }
00422 
00423     return (void *) result;
00424 }