Header And Logo

PostgreSQL
| The world's most advanced open source database.

parse_node.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * parse_node.c
00004  *    various routines that make nodes for querytrees
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/parser/parse_node.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 #include "postgres.h"
00016 
00017 #include "access/heapam.h"
00018 #include "access/htup_details.h"
00019 #include "catalog/pg_type.h"
00020 #include "mb/pg_wchar.h"
00021 #include "nodes/makefuncs.h"
00022 #include "nodes/nodeFuncs.h"
00023 #include "parser/parsetree.h"
00024 #include "parser/parse_coerce.h"
00025 #include "parser/parse_expr.h"
00026 #include "parser/parse_relation.h"
00027 #include "utils/builtins.h"
00028 #include "utils/int8.h"
00029 #include "utils/lsyscache.h"
00030 #include "utils/syscache.h"
00031 #include "utils/varbit.h"
00032 
00033 
00034 static void pcb_error_callback(void *arg);
00035 
00036 
00037 /*
00038  * make_parsestate
00039  *      Allocate and initialize a new ParseState.
00040  *
00041  * Caller should eventually release the ParseState via free_parsestate().
00042  */
00043 ParseState *
00044 make_parsestate(ParseState *parentParseState)
00045 {
00046     ParseState *pstate;
00047 
00048     pstate = palloc0(sizeof(ParseState));
00049 
00050     pstate->parentParseState = parentParseState;
00051 
00052     /* Fill in fields that don't start at null/false/zero */
00053     pstate->p_next_resno = 1;
00054 
00055     if (parentParseState)
00056     {
00057         pstate->p_sourcetext = parentParseState->p_sourcetext;
00058         /* all hooks are copied from parent */
00059         pstate->p_pre_columnref_hook = parentParseState->p_pre_columnref_hook;
00060         pstate->p_post_columnref_hook = parentParseState->p_post_columnref_hook;
00061         pstate->p_paramref_hook = parentParseState->p_paramref_hook;
00062         pstate->p_coerce_param_hook = parentParseState->p_coerce_param_hook;
00063         pstate->p_ref_hook_state = parentParseState->p_ref_hook_state;
00064     }
00065 
00066     return pstate;
00067 }
00068 
00069 /*
00070  * free_parsestate
00071  *      Release a ParseState and any subsidiary resources.
00072  */
00073 void
00074 free_parsestate(ParseState *pstate)
00075 {
00076     /*
00077      * Check that we did not produce too many resnos; at the very least we
00078      * cannot allow more than 2^16, since that would exceed the range of a
00079      * AttrNumber. It seems safest to use MaxTupleAttributeNumber.
00080      */
00081     if (pstate->p_next_resno - 1 > MaxTupleAttributeNumber)
00082         ereport(ERROR,
00083                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00084                  errmsg("target lists can have at most %d entries",
00085                         MaxTupleAttributeNumber)));
00086 
00087     if (pstate->p_target_relation != NULL)
00088         heap_close(pstate->p_target_relation, NoLock);
00089 
00090     pfree(pstate);
00091 }
00092 
00093 
00094 /*
00095  * parser_errposition
00096  *      Report a parse-analysis-time cursor position, if possible.
00097  *
00098  * This is expected to be used within an ereport() call.  The return value
00099  * is a dummy (always 0, in fact).
00100  *
00101  * The locations stored in raw parsetrees are byte offsets into the source
00102  * string.  We have to convert them to 1-based character indexes for reporting
00103  * to clients.  (We do things this way to avoid unnecessary overhead in the
00104  * normal non-error case: computing character indexes would be much more
00105  * expensive than storing token offsets.)
00106  */
00107 int
00108 parser_errposition(ParseState *pstate, int location)
00109 {
00110     int         pos;
00111 
00112     /* No-op if location was not provided */
00113     if (location < 0)
00114         return 0;
00115     /* Can't do anything if source text is not available */
00116     if (pstate == NULL || pstate->p_sourcetext == NULL)
00117         return 0;
00118     /* Convert offset to character number */
00119     pos = pg_mbstrlen_with_len(pstate->p_sourcetext, location) + 1;
00120     /* And pass it to the ereport mechanism */
00121     return errposition(pos);
00122 }
00123 
00124 
00125 /*
00126  * setup_parser_errposition_callback
00127  *      Arrange for non-parser errors to report an error position
00128  *
00129  * Sometimes the parser calls functions that aren't part of the parser
00130  * subsystem and can't reasonably be passed a ParseState; yet we would
00131  * like any errors thrown in those functions to be tagged with a parse
00132  * error location.  Use this function to set up an error context stack
00133  * entry that will accomplish that.  Usage pattern:
00134  *
00135  *      declare a local variable "ParseCallbackState pcbstate"
00136  *      ...
00137  *      setup_parser_errposition_callback(&pcbstate, pstate, location);
00138  *      call function that might throw error;
00139  *      cancel_parser_errposition_callback(&pcbstate);
00140  */
00141 void
00142 setup_parser_errposition_callback(ParseCallbackState *pcbstate,
00143                                   ParseState *pstate, int location)
00144 {
00145     /* Setup error traceback support for ereport() */
00146     pcbstate->pstate = pstate;
00147     pcbstate->location = location;
00148     pcbstate->errcallback.callback = pcb_error_callback;
00149     pcbstate->errcallback.arg = (void *) pcbstate;
00150     pcbstate->errcallback.previous = error_context_stack;
00151     error_context_stack = &pcbstate->errcallback;
00152 }
00153 
00154 /*
00155  * Cancel a previously-set-up errposition callback.
00156  */
00157 void
00158 cancel_parser_errposition_callback(ParseCallbackState *pcbstate)
00159 {
00160     /* Pop the error context stack */
00161     error_context_stack = pcbstate->errcallback.previous;
00162 }
00163 
00164 /*
00165  * Error context callback for inserting parser error location.
00166  *
00167  * Note that this will be called for *any* error occurring while the
00168  * callback is installed.  We avoid inserting an irrelevant error location
00169  * if the error is a query cancel --- are there any other important cases?
00170  */
00171 static void
00172 pcb_error_callback(void *arg)
00173 {
00174     ParseCallbackState *pcbstate = (ParseCallbackState *) arg;
00175 
00176     if (geterrcode() != ERRCODE_QUERY_CANCELED)
00177         (void) parser_errposition(pcbstate->pstate, pcbstate->location);
00178 }
00179 
00180 
00181 /*
00182  * make_var
00183  *      Build a Var node for an attribute identified by RTE and attrno
00184  */
00185 Var *
00186 make_var(ParseState *pstate, RangeTblEntry *rte, int attrno, int location)
00187 {
00188     Var        *result;
00189     int         vnum,
00190                 sublevels_up;
00191     Oid         vartypeid;
00192     int32       type_mod;
00193     Oid         varcollid;
00194 
00195     vnum = RTERangeTablePosn(pstate, rte, &sublevels_up);
00196     get_rte_attribute_type(rte, attrno, &vartypeid, &type_mod, &varcollid);
00197     result = makeVar(vnum, attrno, vartypeid, type_mod, varcollid, sublevels_up);
00198     result->location = location;
00199     return result;
00200 }
00201 
00202 /*
00203  * transformArrayType()
00204  *      Identify the types involved in a subscripting operation
00205  *
00206  * On entry, arrayType/arrayTypmod identify the type of the input value
00207  * to be subscripted (which could be a domain type).  These are modified
00208  * if necessary to identify the actual array type and typmod, and the
00209  * array's element type is returned.  An error is thrown if the input isn't
00210  * an array type.
00211  */
00212 Oid
00213 transformArrayType(Oid *arrayType, int32 *arrayTypmod)
00214 {
00215     Oid         origArrayType = *arrayType;
00216     Oid         elementType;
00217     HeapTuple   type_tuple_array;
00218     Form_pg_type type_struct_array;
00219 
00220     /*
00221      * If the input is a domain, smash to base type, and extract the actual
00222      * typmod to be applied to the base type.  Subscripting a domain is an
00223      * operation that necessarily works on the base array type, not the domain
00224      * itself.  (Note that we provide no method whereby the creator of a
00225      * domain over an array type could hide its ability to be subscripted.)
00226      */
00227     *arrayType = getBaseTypeAndTypmod(*arrayType, arrayTypmod);
00228 
00229     /* Get the type tuple for the array */
00230     type_tuple_array = SearchSysCache1(TYPEOID, ObjectIdGetDatum(*arrayType));
00231     if (!HeapTupleIsValid(type_tuple_array))
00232         elog(ERROR, "cache lookup failed for type %u", *arrayType);
00233     type_struct_array = (Form_pg_type) GETSTRUCT(type_tuple_array);
00234 
00235     /* needn't check typisdefined since this will fail anyway */
00236 
00237     elementType = type_struct_array->typelem;
00238     if (elementType == InvalidOid)
00239         ereport(ERROR,
00240                 (errcode(ERRCODE_DATATYPE_MISMATCH),
00241                  errmsg("cannot subscript type %s because it is not an array",
00242                         format_type_be(origArrayType))));
00243 
00244     ReleaseSysCache(type_tuple_array);
00245 
00246     return elementType;
00247 }
00248 
00249 /*
00250  * transformArraySubscripts()
00251  *      Transform array subscripting.  This is used for both
00252  *      array fetch and array assignment.
00253  *
00254  * In an array fetch, we are given a source array value and we produce an
00255  * expression that represents the result of extracting a single array element
00256  * or an array slice.
00257  *
00258  * In an array assignment, we are given a destination array value plus a
00259  * source value that is to be assigned to a single element or a slice of
00260  * that array.  We produce an expression that represents the new array value
00261  * with the source data inserted into the right part of the array.
00262  *
00263  * For both cases, if the source array is of a domain-over-array type,
00264  * the result is of the base array type or its element type; essentially,
00265  * we must fold a domain to its base type before applying subscripting.
00266  *
00267  * pstate       Parse state
00268  * arrayBase    Already-transformed expression for the array as a whole
00269  * arrayType    OID of array's datatype (should match type of arrayBase,
00270  *              or be the base type of arrayBase's domain type)
00271  * elementType  OID of array's element type (fetch with transformArrayType,
00272  *              or pass InvalidOid to do it here)
00273  * arrayTypMod  typmod for the array (which is also typmod for the elements)
00274  * indirection  Untransformed list of subscripts (must not be NIL)
00275  * assignFrom   NULL for array fetch, else transformed expression for source.
00276  */
00277 ArrayRef *
00278 transformArraySubscripts(ParseState *pstate,
00279                          Node *arrayBase,
00280                          Oid arrayType,
00281                          Oid elementType,
00282                          int32 arrayTypMod,
00283                          List *indirection,
00284                          Node *assignFrom)
00285 {
00286     bool        isSlice = false;
00287     List       *upperIndexpr = NIL;
00288     List       *lowerIndexpr = NIL;
00289     ListCell   *idx;
00290     ArrayRef   *aref;
00291 
00292     /*
00293      * Caller may or may not have bothered to determine elementType.  Note
00294      * that if the caller did do so, arrayType/arrayTypMod must be as modified
00295      * by transformArrayType, ie, smash domain to base type.
00296      */
00297     if (!OidIsValid(elementType))
00298         elementType = transformArrayType(&arrayType, &arrayTypMod);
00299 
00300     /*
00301      * A list containing only single subscripts refers to a single array
00302      * element.  If any of the items are double subscripts (lower:upper), then
00303      * the subscript expression means an array slice operation. In this case,
00304      * we supply a default lower bound of 1 for any items that contain only a
00305      * single subscript.  We have to prescan the indirection list to see if
00306      * there are any double subscripts.
00307      */
00308     foreach(idx, indirection)
00309     {
00310         A_Indices  *ai = (A_Indices *) lfirst(idx);
00311 
00312         if (ai->lidx != NULL)
00313         {
00314             isSlice = true;
00315             break;
00316         }
00317     }
00318 
00319     /*
00320      * Transform the subscript expressions.
00321      */
00322     foreach(idx, indirection)
00323     {
00324         A_Indices  *ai = (A_Indices *) lfirst(idx);
00325         Node       *subexpr;
00326 
00327         Assert(IsA(ai, A_Indices));
00328         if (isSlice)
00329         {
00330             if (ai->lidx)
00331             {
00332                 subexpr = transformExpr(pstate, ai->lidx, pstate->p_expr_kind);
00333                 /* If it's not int4 already, try to coerce */
00334                 subexpr = coerce_to_target_type(pstate,
00335                                                 subexpr, exprType(subexpr),
00336                                                 INT4OID, -1,
00337                                                 COERCION_ASSIGNMENT,
00338                                                 COERCE_IMPLICIT_CAST,
00339                                                 -1);
00340                 if (subexpr == NULL)
00341                     ereport(ERROR,
00342                             (errcode(ERRCODE_DATATYPE_MISMATCH),
00343                              errmsg("array subscript must have type integer"),
00344                         parser_errposition(pstate, exprLocation(ai->lidx))));
00345             }
00346             else
00347             {
00348                 /* Make a constant 1 */
00349                 subexpr = (Node *) makeConst(INT4OID,
00350                                              -1,
00351                                              InvalidOid,
00352                                              sizeof(int32),
00353                                              Int32GetDatum(1),
00354                                              false,
00355                                              true);     /* pass by value */
00356             }
00357             lowerIndexpr = lappend(lowerIndexpr, subexpr);
00358         }
00359         subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind);
00360         /* If it's not int4 already, try to coerce */
00361         subexpr = coerce_to_target_type(pstate,
00362                                         subexpr, exprType(subexpr),
00363                                         INT4OID, -1,
00364                                         COERCION_ASSIGNMENT,
00365                                         COERCE_IMPLICIT_CAST,
00366                                         -1);
00367         if (subexpr == NULL)
00368             ereport(ERROR,
00369                     (errcode(ERRCODE_DATATYPE_MISMATCH),
00370                      errmsg("array subscript must have type integer"),
00371                      parser_errposition(pstate, exprLocation(ai->uidx))));
00372         upperIndexpr = lappend(upperIndexpr, subexpr);
00373     }
00374 
00375     /*
00376      * If doing an array store, coerce the source value to the right type.
00377      * (This should agree with the coercion done by transformAssignedExpr.)
00378      */
00379     if (assignFrom != NULL)
00380     {
00381         Oid         typesource = exprType(assignFrom);
00382         Oid         typeneeded = isSlice ? arrayType : elementType;
00383         Node       *newFrom;
00384 
00385         newFrom = coerce_to_target_type(pstate,
00386                                         assignFrom, typesource,
00387                                         typeneeded, arrayTypMod,
00388                                         COERCION_ASSIGNMENT,
00389                                         COERCE_IMPLICIT_CAST,
00390                                         -1);
00391         if (newFrom == NULL)
00392             ereport(ERROR,
00393                     (errcode(ERRCODE_DATATYPE_MISMATCH),
00394                      errmsg("array assignment requires type %s"
00395                             " but expression is of type %s",
00396                             format_type_be(typeneeded),
00397                             format_type_be(typesource)),
00398                  errhint("You will need to rewrite or cast the expression."),
00399                      parser_errposition(pstate, exprLocation(assignFrom))));
00400         assignFrom = newFrom;
00401     }
00402 
00403     /*
00404      * Ready to build the ArrayRef node.
00405      */
00406     aref = makeNode(ArrayRef);
00407     aref->refarraytype = arrayType;
00408     aref->refelemtype = elementType;
00409     aref->reftypmod = arrayTypMod;
00410     /* refcollid will be set by parse_collate.c */
00411     aref->refupperindexpr = upperIndexpr;
00412     aref->reflowerindexpr = lowerIndexpr;
00413     aref->refexpr = (Expr *) arrayBase;
00414     aref->refassgnexpr = (Expr *) assignFrom;
00415 
00416     return aref;
00417 }
00418 
00419 /*
00420  * make_const
00421  *
00422  *  Convert a Value node (as returned by the grammar) to a Const node
00423  *  of the "natural" type for the constant.  Note that this routine is
00424  *  only used when there is no explicit cast for the constant, so we
00425  *  have to guess what type is wanted.
00426  *
00427  *  For string literals we produce a constant of type UNKNOWN ---- whose
00428  *  representation is the same as cstring, but it indicates to later type
00429  *  resolution that we're not sure yet what type it should be considered.
00430  *  Explicit "NULL" constants are also typed as UNKNOWN.
00431  *
00432  *  For integers and floats we produce int4, int8, or numeric depending
00433  *  on the value of the number.  XXX We should produce int2 as well,
00434  *  but additional cleanup is needed before we can do that; there are
00435  *  too many examples that fail if we try.
00436  */
00437 Const *
00438 make_const(ParseState *pstate, Value *value, int location)
00439 {
00440     Const      *con;
00441     Datum       val;
00442     int64       val64;
00443     Oid         typeid;
00444     int         typelen;
00445     bool        typebyval;
00446     ParseCallbackState pcbstate;
00447 
00448     switch (nodeTag(value))
00449     {
00450         case T_Integer:
00451             val = Int32GetDatum(intVal(value));
00452 
00453             typeid = INT4OID;
00454             typelen = sizeof(int32);
00455             typebyval = true;
00456             break;
00457 
00458         case T_Float:
00459             /* could be an oversize integer as well as a float ... */
00460             if (scanint8(strVal(value), true, &val64))
00461             {
00462                 /*
00463                  * It might actually fit in int32. Probably only INT_MIN can
00464                  * occur, but we'll code the test generally just to be sure.
00465                  */
00466                 int32       val32 = (int32) val64;
00467 
00468                 if (val64 == (int64) val32)
00469                 {
00470                     val = Int32GetDatum(val32);
00471 
00472                     typeid = INT4OID;
00473                     typelen = sizeof(int32);
00474                     typebyval = true;
00475                 }
00476                 else
00477                 {
00478                     val = Int64GetDatum(val64);
00479 
00480                     typeid = INT8OID;
00481                     typelen = sizeof(int64);
00482                     typebyval = FLOAT8PASSBYVAL;        /* int8 and float8 alike */
00483                 }
00484             }
00485             else
00486             {
00487                 /* arrange to report location if numeric_in() fails */
00488                 setup_parser_errposition_callback(&pcbstate, pstate, location);
00489                 val = DirectFunctionCall3(numeric_in,
00490                                           CStringGetDatum(strVal(value)),
00491                                           ObjectIdGetDatum(InvalidOid),
00492                                           Int32GetDatum(-1));
00493                 cancel_parser_errposition_callback(&pcbstate);
00494 
00495                 typeid = NUMERICOID;
00496                 typelen = -1;   /* variable len */
00497                 typebyval = false;
00498             }
00499             break;
00500 
00501         case T_String:
00502 
00503             /*
00504              * We assume here that UNKNOWN's internal representation is the
00505              * same as CSTRING
00506              */
00507             val = CStringGetDatum(strVal(value));
00508 
00509             typeid = UNKNOWNOID;    /* will be coerced later */
00510             typelen = -2;       /* cstring-style varwidth type */
00511             typebyval = false;
00512             break;
00513 
00514         case T_BitString:
00515             /* arrange to report location if bit_in() fails */
00516             setup_parser_errposition_callback(&pcbstate, pstate, location);
00517             val = DirectFunctionCall3(bit_in,
00518                                       CStringGetDatum(strVal(value)),
00519                                       ObjectIdGetDatum(InvalidOid),
00520                                       Int32GetDatum(-1));
00521             cancel_parser_errposition_callback(&pcbstate);
00522             typeid = BITOID;
00523             typelen = -1;
00524             typebyval = false;
00525             break;
00526 
00527         case T_Null:
00528             /* return a null const */
00529             con = makeConst(UNKNOWNOID,
00530                             -1,
00531                             InvalidOid,
00532                             -2,
00533                             (Datum) 0,
00534                             true,
00535                             false);
00536             con->location = location;
00537             return con;
00538 
00539         default:
00540             elog(ERROR, "unrecognized node type: %d", (int) nodeTag(value));
00541             return NULL;        /* keep compiler quiet */
00542     }
00543 
00544     con = makeConst(typeid,
00545                     -1,         /* typmod -1 is OK for all cases */
00546                     InvalidOid, /* all cases are uncollatable types */
00547                     typelen,
00548                     val,
00549                     false,
00550                     typebyval);
00551     con->location = location;
00552 
00553     return con;
00554 }