Header And Logo

PostgreSQL
| The world's most advanced open source database.

jsonfuncs.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * jsonfuncs.c
00004  *      Functions to process JSON data type.
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  * IDENTIFICATION
00010  *    src/backend/utils/adt/jsonfuncs.c
00011  *
00012  *-------------------------------------------------------------------------
00013  */
00014 
00015 #include "postgres.h"
00016 
00017 #include <limits.h>
00018 
00019 #include "fmgr.h"
00020 #include "funcapi.h"
00021 #include "miscadmin.h"
00022 #include "access/htup_details.h"
00023 #include "catalog/pg_type.h"
00024 #include "lib/stringinfo.h"
00025 #include "mb/pg_wchar.h"
00026 #include "utils/array.h"
00027 #include "utils/builtins.h"
00028 #include "utils/hsearch.h"
00029 #include "utils/json.h"
00030 #include "utils/jsonapi.h"
00031 #include "utils/lsyscache.h"
00032 #include "utils/memutils.h"
00033 #include "utils/typcache.h"
00034 
00035 /* semantic action functions for json_object_keys */
00036 static void okeys_object_field_start(void *state, char *fname, bool isnull);
00037 static void okeys_array_start(void *state);
00038 static void okeys_scalar(void *state, char *token, JsonTokenType tokentype);
00039 
00040 /* semantic action functions for json_get* functions */
00041 static void get_object_start(void *state);
00042 static void get_object_field_start(void *state, char *fname, bool isnull);
00043 static void get_object_field_end(void *state, char *fname, bool isnull);
00044 static void get_array_start(void *state);
00045 static void get_array_element_start(void *state, bool isnull);
00046 static void get_array_element_end(void *state, bool isnull);
00047 static void get_scalar(void *state, char *token, JsonTokenType tokentype);
00048 
00049 /* common worker function for json getter functions */
00050 static inline Datum get_path_all(PG_FUNCTION_ARGS, bool as_text);
00051 static inline text *get_worker(text *json, char *field, int elem_index,
00052            char **tpath, int *ipath, int npath,
00053            bool normalize_results);
00054 
00055 /* semantic action functions for json_array_length */
00056 static void alen_object_start(void *state);
00057 static void alen_scalar(void *state, char *token, JsonTokenType tokentype);
00058 static void alen_array_element_start(void *state, bool isnull);
00059 
00060 /* common worker for json_each* functions */
00061 static inline Datum each_worker(PG_FUNCTION_ARGS, bool as_text);
00062 
00063 /* semantic action functions for json_each */
00064 static void each_object_field_start(void *state, char *fname, bool isnull);
00065 static void each_object_field_end(void *state, char *fname, bool isnull);
00066 static void each_array_start(void *state);
00067 static void each_scalar(void *state, char *token, JsonTokenType tokentype);
00068 
00069 /* semantic action functions for json_array_elements */
00070 static void elements_object_start(void *state);
00071 static void elements_array_element_start(void *state, bool isnull);
00072 static void elements_array_element_end(void *state, bool isnull);
00073 static void elements_scalar(void *state, char *token, JsonTokenType tokentype);
00074 
00075 /* turn a json object into a hash table */
00076 static HTAB *get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text);
00077 
00078 /* semantic action functions for get_json_object_as_hash */
00079 static void hash_object_field_start(void *state, char *fname, bool isnull);
00080 static void hash_object_field_end(void *state, char *fname, bool isnull);
00081 static void hash_array_start(void *state);
00082 static void hash_scalar(void *state, char *token, JsonTokenType tokentype);
00083 
00084 /* semantic action functions for populate_recordset */
00085 static void populate_recordset_object_field_start(void *state, char *fname, bool isnull);
00086 static void populate_recordset_object_field_end(void *state, char *fname, bool isnull);
00087 static void populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype);
00088 static void populate_recordset_object_start(void *state);
00089 static void populate_recordset_object_end(void *state);
00090 static void populate_recordset_array_start(void *state);
00091 static void populate_recordset_array_element_start(void *state, bool isnull);
00092 
00093 /* search type classification for json_get* functions */
00094 typedef enum
00095 {
00096     JSON_SEARCH_OBJECT = 1,
00097     JSON_SEARCH_ARRAY,
00098     JSON_SEARCH_PATH
00099 }   JsonSearch;
00100 
00101 /* state for json_object_keys */
00102 typedef struct okeysState
00103 {
00104     JsonLexContext *lex;
00105     char      **result;
00106     int         result_size;
00107     int         result_count;
00108     int         sent_count;
00109 }   okeysState, *OkeysState;
00110 
00111 /* state for json_get* functions */
00112 typedef struct getState
00113 {
00114     JsonLexContext *lex;
00115     JsonSearch  search_type;
00116     int         search_index;
00117     int         array_index;
00118     char       *search_term;
00119     char       *result_start;
00120     text       *tresult;
00121     bool        result_is_null;
00122     bool        normalize_results;
00123     bool        next_scalar;
00124     char      **path;
00125     int         npath;
00126     char      **current_path;
00127     bool       *pathok;
00128     int        *array_level_index;
00129     int        *path_level_index;
00130 }   getState, *GetState;
00131 
00132 /* state for json_array_length */
00133 typedef struct alenState
00134 {
00135     JsonLexContext *lex;
00136     int         count;
00137 }   alenState, *AlenState;
00138 
00139 /* state for json_each */
00140 typedef struct eachState
00141 {
00142     JsonLexContext *lex;
00143     Tuplestorestate *tuple_store;
00144     TupleDesc   ret_tdesc;
00145     MemoryContext tmp_cxt;
00146     char       *result_start;
00147     bool        normalize_results;
00148     bool        next_scalar;
00149     char       *normalized_scalar;
00150 }   eachState, *EachState;
00151 
00152 /* state for json_array_elements */
00153 typedef struct elementsState
00154 {
00155     JsonLexContext *lex;
00156     Tuplestorestate *tuple_store;
00157     TupleDesc   ret_tdesc;
00158     MemoryContext tmp_cxt;
00159     char       *result_start;
00160 }   elementsState, *ElementsState;
00161 
00162 /* state for get_json_object_as_hash */
00163 typedef struct jhashState
00164 {
00165     JsonLexContext *lex;
00166     HTAB       *hash;
00167     char       *saved_scalar;
00168     char       *save_json_start;
00169     bool        use_json_as_text;
00170     char       *function_name;
00171 }   jhashState, *JHashState;
00172 
00173 /* used to build the hashtable */
00174 typedef struct jsonHashEntry
00175 {
00176     char        fname[NAMEDATALEN];
00177     char       *val;
00178     char       *json;
00179     bool        isnull;
00180 }   jsonHashEntry, *JsonHashEntry;
00181 
00182 /* these two are stolen from hstore / record_out, used in populate_record* */
00183 typedef struct ColumnIOData
00184 {
00185     Oid         column_type;
00186     Oid         typiofunc;
00187     Oid         typioparam;
00188     FmgrInfo    proc;
00189 } ColumnIOData;
00190 
00191 typedef struct RecordIOData
00192 {
00193     Oid         record_type;
00194     int32       record_typmod;
00195     int         ncolumns;
00196     ColumnIOData columns[1];    /* VARIABLE LENGTH ARRAY */
00197 } RecordIOData;
00198 
00199 /* state for populate_recordset */
00200 typedef struct populateRecordsetState
00201 {
00202     JsonLexContext *lex;
00203     HTAB       *json_hash;
00204     char       *saved_scalar;
00205     char       *save_json_start;
00206     bool        use_json_as_text;
00207     Tuplestorestate *tuple_store;
00208     TupleDesc   ret_tdesc;
00209     HeapTupleHeader rec;
00210     RecordIOData *my_extra;
00211     MemoryContext fn_mcxt;      /* used to stash IO funcs */
00212 }   populateRecordsetState, *PopulateRecordsetState;
00213 
00214 /*
00215  * SQL function json_object-keys
00216  *
00217  * Returns the set of keys for the object argument.
00218  *
00219  * This SRF operates in value-per-call mode. It processes the
00220  * object during the first call, and the keys are simply stashed
00221  * in an array, whise size is expanded as necessary. This is probably
00222  * safe enough for a list of keys of a single object, since they are
00223  * limited in size to NAMEDATALEN and the number of keys is unlikely to
00224  * be so huge that it has major memory implications.
00225  */
00226 
00227 
00228 Datum
00229 json_object_keys(PG_FUNCTION_ARGS)
00230 {
00231     FuncCallContext *funcctx;
00232     OkeysState  state;
00233     int         i;
00234 
00235     if (SRF_IS_FIRSTCALL())
00236     {
00237         text       *json = PG_GETARG_TEXT_P(0);
00238         JsonLexContext *lex = makeJsonLexContext(json, true);
00239         JsonSemAction sem;
00240 
00241         MemoryContext oldcontext;
00242 
00243         funcctx = SRF_FIRSTCALL_INIT();
00244         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
00245 
00246         state = palloc(sizeof(okeysState));
00247         sem = palloc0(sizeof(jsonSemAction));
00248 
00249         state->lex = lex;
00250         state->result_size = 256;
00251         state->result_count = 0;
00252         state->sent_count = 0;
00253         state->result = palloc(256 * sizeof(char *));
00254 
00255         sem->semstate = (void *) state;
00256         sem->array_start = okeys_array_start;
00257         sem->scalar = okeys_scalar;
00258         sem->object_field_start = okeys_object_field_start;
00259         /* remainder are all NULL, courtesy of palloc0 above */
00260 
00261         pg_parse_json(lex, sem);
00262         /* keys are now in state->result */
00263 
00264         pfree(lex->strval->data);
00265         pfree(lex->strval);
00266         pfree(lex);
00267         pfree(sem);
00268 
00269         MemoryContextSwitchTo(oldcontext);
00270         funcctx->user_fctx = (void *) state;
00271 
00272     }
00273 
00274     funcctx = SRF_PERCALL_SETUP();
00275     state = (OkeysState) funcctx->user_fctx;
00276 
00277     if (state->sent_count < state->result_count)
00278     {
00279         char       *nxt = state->result[state->sent_count++];
00280 
00281         SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt));
00282     }
00283 
00284     /* cleanup to reduce or eliminate memory leaks */
00285     for (i = 0; i < state->result_count; i++)
00286         pfree(state->result[i]);
00287     pfree(state->result);
00288     pfree(state);
00289 
00290     SRF_RETURN_DONE(funcctx);
00291 }
00292 
00293 static void
00294 okeys_object_field_start(void *state, char *fname, bool isnull)
00295 {
00296     OkeysState  _state = (OkeysState) state;
00297 
00298     /* only collecting keys for the top level object */
00299     if (_state->lex->lex_level != 1)
00300         return;
00301 
00302     /* enlarge result array if necessary */
00303     if (_state->result_count >= _state->result_size)
00304     {
00305         _state->result_size *= 2;
00306         _state->result =
00307             repalloc(_state->result, sizeof(char *) * _state->result_size);
00308     }
00309 
00310     /* save a copy of the field name */
00311     _state->result[_state->result_count++] = pstrdup(fname);
00312 }
00313 
00314 static void
00315 okeys_array_start(void *state)
00316 {
00317     OkeysState  _state = (OkeysState) state;
00318 
00319     /* top level must be a json object */
00320     if (_state->lex->lex_level == 0)
00321         ereport(ERROR,
00322                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00323                  errmsg("cannot call json_object_keys on an array")));
00324 }
00325 
00326 static void
00327 okeys_scalar(void *state, char *token, JsonTokenType tokentype)
00328 {
00329     OkeysState  _state = (OkeysState) state;
00330 
00331     /* top level must be a json object */
00332     if (_state->lex->lex_level == 0)
00333         ereport(ERROR,
00334                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00335                  errmsg("cannot call json_object_keys on a scalar")));
00336 }
00337 
00338 /*
00339  * json getter functions
00340  * these implement the -> ->> #> and #>> operators
00341  * and the json_extract_path*(json, text, ...) functions
00342  */
00343 
00344 
00345 Datum
00346 json_object_field(PG_FUNCTION_ARGS)
00347 {
00348     text       *json = PG_GETARG_TEXT_P(0);
00349     text       *result;
00350     text       *fname = PG_GETARG_TEXT_P(1);
00351     char       *fnamestr = text_to_cstring(fname);
00352 
00353     result = get_worker(json, fnamestr, -1, NULL, NULL, -1, false);
00354 
00355     if (result != NULL)
00356         PG_RETURN_TEXT_P(result);
00357     else
00358         PG_RETURN_NULL();
00359 }
00360 
00361 Datum
00362 json_object_field_text(PG_FUNCTION_ARGS)
00363 {
00364     text       *json = PG_GETARG_TEXT_P(0);
00365     text       *result;
00366     text       *fname = PG_GETARG_TEXT_P(1);
00367     char       *fnamestr = text_to_cstring(fname);
00368 
00369     result = get_worker(json, fnamestr, -1, NULL, NULL, -1, true);
00370 
00371     if (result != NULL)
00372         PG_RETURN_TEXT_P(result);
00373     else
00374         PG_RETURN_NULL();
00375 }
00376 
00377 Datum
00378 json_array_element(PG_FUNCTION_ARGS)
00379 {
00380     text       *json = PG_GETARG_TEXT_P(0);
00381     text       *result;
00382     int         element = PG_GETARG_INT32(1);
00383 
00384     result = get_worker(json, NULL, element, NULL, NULL, -1, false);
00385 
00386     if (result != NULL)
00387         PG_RETURN_TEXT_P(result);
00388     else
00389         PG_RETURN_NULL();
00390 }
00391 
00392 Datum
00393 json_array_element_text(PG_FUNCTION_ARGS)
00394 {
00395     text       *json = PG_GETARG_TEXT_P(0);
00396     text       *result;
00397     int         element = PG_GETARG_INT32(1);
00398 
00399     result = get_worker(json, NULL, element, NULL, NULL, -1, true);
00400 
00401     if (result != NULL)
00402         PG_RETURN_TEXT_P(result);
00403     else
00404         PG_RETURN_NULL();
00405 }
00406 
00407 Datum
00408 json_extract_path(PG_FUNCTION_ARGS)
00409 {
00410     return get_path_all(fcinfo, false);
00411 }
00412 
00413 Datum
00414 json_extract_path_text(PG_FUNCTION_ARGS)
00415 {
00416     return get_path_all(fcinfo, true);
00417 }
00418 
00419 /*
00420  * common routine for extract_path functions
00421  */
00422 static inline Datum
00423 get_path_all(PG_FUNCTION_ARGS, bool as_text)
00424 {
00425     text       *json = PG_GETARG_TEXT_P(0);
00426     ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
00427     text       *result;
00428     Datum      *pathtext;
00429     bool       *pathnulls;
00430     int         npath;
00431     char      **tpath;
00432     int        *ipath;
00433     int         i;
00434     long        ind;
00435     char       *endptr;
00436 
00437     if (array_contains_nulls(path))
00438         ereport(ERROR,
00439                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00440                  errmsg("cannot call function with null path elements")));
00441 
00442 
00443     deconstruct_array(path, TEXTOID, -1, false, 'i',
00444                       &pathtext, &pathnulls, &npath);
00445 
00446     tpath = palloc(npath * sizeof(char *));
00447     ipath = palloc(npath * sizeof(int));
00448 
00449 
00450     for (i = 0; i < npath; i++)
00451     {
00452         tpath[i] = TextDatumGetCString(pathtext[i]);
00453         if (*tpath[i] == '\0')
00454             ereport(
00455                     ERROR,
00456                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00457                    errmsg("cannot call function with empty path elements")));
00458 
00459         /*
00460          * we have no idea at this stage what structure the document is so
00461          * just convert anything in the path that we can to an integer and set
00462          * all the other integers to -1 which will never match.
00463          */
00464         ind = strtol(tpath[i], &endptr, 10);
00465         if (*endptr == '\0' && ind <= INT_MAX && ind >= 0)
00466             ipath[i] = (int) ind;
00467         else
00468             ipath[i] = -1;
00469     }
00470 
00471 
00472     result = get_worker(json, NULL, -1, tpath, ipath, npath, as_text);
00473 
00474     if (result != NULL)
00475         PG_RETURN_TEXT_P(result);
00476     else
00477         PG_RETURN_NULL();
00478 }
00479 
00480 /*
00481  * get_worker
00482  *
00483  * common worker for all the json getter functions
00484  */
00485 static inline text *
00486 get_worker(text *json,
00487            char *field,
00488            int elem_index,
00489            char **tpath,
00490            int *ipath,
00491            int npath,
00492            bool normalize_results)
00493 {
00494     GetState    state;
00495     JsonLexContext *lex = makeJsonLexContext(json, true);
00496     JsonSemAction sem;
00497 
00498     /* only allowed to use one of these */
00499     Assert(elem_index < 0 || (tpath == NULL && ipath == NULL && field == NULL));
00500     Assert(tpath == NULL || field == NULL);
00501 
00502     state = palloc0(sizeof(getState));
00503     sem = palloc0(sizeof(jsonSemAction));
00504 
00505     state->lex = lex;
00506     /* is it "_as_text" variant? */
00507     state->normalize_results = normalize_results;
00508     if (field != NULL)
00509     {
00510         /* single text argument */
00511         state->search_type = JSON_SEARCH_OBJECT;
00512         state->search_term = field;
00513     }
00514     else if (tpath != NULL)
00515     {
00516         /* path array argument */
00517         state->search_type = JSON_SEARCH_PATH;
00518         state->path = tpath;
00519         state->npath = npath;
00520         state->current_path = palloc(sizeof(char *) * npath);
00521         state->pathok = palloc0(sizeof(bool) * npath);
00522         state->pathok[0] = true;
00523         state->array_level_index = palloc(sizeof(int) * npath);
00524         state->path_level_index = ipath;
00525 
00526     }
00527     else
00528     {
00529         /* single integer argument */
00530         state->search_type = JSON_SEARCH_ARRAY;
00531         state->search_index = elem_index;
00532         state->array_index = -1;
00533     }
00534 
00535     sem->semstate = (void *) state;
00536 
00537     /*
00538      * Not all  variants need all the semantic routines. only set the ones
00539      * that are actually needed for maximum efficiency.
00540      */
00541     sem->object_start = get_object_start;
00542     sem->array_start = get_array_start;
00543     sem->scalar = get_scalar;
00544     if (field != NULL || tpath != NULL)
00545     {
00546         sem->object_field_start = get_object_field_start;
00547         sem->object_field_end = get_object_field_end;
00548     }
00549     if (field == NULL)
00550     {
00551         sem->array_element_start = get_array_element_start;
00552         sem->array_element_end = get_array_element_end;
00553     }
00554 
00555     pg_parse_json(lex, sem);
00556 
00557     return state->tresult;
00558 }
00559 
00560 static void
00561 get_object_start(void *state)
00562 {
00563     GetState    _state = (GetState) state;
00564 
00565     /* json structure check */
00566     if (_state->lex->lex_level == 0 && _state->search_type == JSON_SEARCH_ARRAY)
00567         ereport(ERROR,
00568                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00569                  errmsg("cannot extract array element from a non-array")));
00570 }
00571 
00572 static void
00573 get_object_field_start(void *state, char *fname, bool isnull)
00574 {
00575     GetState    _state = (GetState) state;
00576     bool        get_next = false;
00577     int         lex_level = _state->lex->lex_level;
00578 
00579     if (lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT &&
00580         strcmp(fname, _state->search_term) == 0)
00581     {
00582 
00583         _state->tresult = NULL;
00584         _state->result_start = NULL;
00585         get_next = true;
00586     }
00587     else if (_state->search_type == JSON_SEARCH_PATH &&
00588              lex_level <= _state->npath &&
00589              _state->pathok[_state->lex->lex_level - 1] &&
00590              strcmp(fname, _state->path[lex_level - 1]) == 0)
00591     {
00592         /* path search, path so far is ok,  and we have a match */
00593 
00594         /* this object overrides any previous matching object */
00595 
00596         _state->tresult = NULL;
00597         _state->result_start = NULL;
00598 
00599         /* if not at end of path just mark path ok */
00600         if (lex_level < _state->npath)
00601             _state->pathok[lex_level] = true;
00602 
00603         /* end of path, so we want this value */
00604         if (lex_level == _state->npath)
00605             get_next = true;
00606     }
00607 
00608     if (get_next)
00609     {
00610         if (_state->normalize_results &&
00611             _state->lex->token_type == JSON_TOKEN_STRING)
00612         {
00613             /* for as_text variants, tell get_scalar to set it for us */
00614             _state->next_scalar = true;
00615         }
00616         else
00617         {
00618             /* for non-as_text variants, just note the json starting point */
00619             _state->result_start = _state->lex->token_start;
00620         }
00621     }
00622 }
00623 
00624 static void
00625 get_object_field_end(void *state, char *fname, bool isnull)
00626 {
00627     GetState    _state = (GetState) state;
00628     bool        get_last = false;
00629     int         lex_level = _state->lex->lex_level;
00630 
00631 
00632     /* same tests as in get_object_field_start, mutatis mutandis */
00633     if (lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT &&
00634         strcmp(fname, _state->search_term) == 0)
00635     {
00636         get_last = true;
00637     }
00638     else if (_state->search_type == JSON_SEARCH_PATH &&
00639              lex_level <= _state->npath &&
00640              _state->pathok[lex_level - 1] &&
00641              strcmp(fname, _state->path[lex_level - 1]) == 0)
00642     {
00643         /* done with this field so reset pathok */
00644         if (lex_level < _state->npath)
00645             _state->pathok[lex_level] = false;
00646 
00647         if (lex_level == _state->npath)
00648             get_last = true;
00649     }
00650 
00651     /* for as_test variants our work is already done */
00652     if (get_last && _state->result_start != NULL)
00653     {
00654         /*
00655          * make a text object from the string from the prevously noted json
00656          * start up to the end of the previous token (the lexer is by now
00657          * ahead of us on whatevere came after what we're interested in).
00658          */
00659         int         len = _state->lex->prev_token_terminator - _state->result_start;
00660 
00661         if (isnull && _state->normalize_results)
00662             _state->tresult = (text *) NULL;
00663         else
00664             _state->tresult = cstring_to_text_with_len(_state->result_start, len);
00665     }
00666 
00667     /*
00668      * don't need to reset _state->result_start b/c we're only returning one
00669      * datum, the conditions should not occur more than once, and this lets us
00670      * check cheaply that they don't (see object_field_start() )
00671      */
00672 }
00673 
00674 static void
00675 get_array_start(void *state)
00676 {
00677     GetState    _state = (GetState) state;
00678     int         lex_level = _state->lex->lex_level;
00679 
00680     /* json structure check */
00681     if (lex_level == 0 && _state->search_type == JSON_SEARCH_OBJECT)
00682         ereport(ERROR,
00683                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00684                  errmsg("cannot extract field from a non-object")));
00685     /* 
00686      * initialize array count for this nesting level 
00687      * Note: the lex_level seen by array_start is one less than that seen by
00688      * the elements of the array.
00689      */
00690     if (_state->search_type == JSON_SEARCH_PATH &&
00691         lex_level < _state->npath)
00692         _state->array_level_index[lex_level] = -1;
00693 }
00694 
00695 static void
00696 get_array_element_start(void *state, bool isnull)
00697 {
00698     GetState    _state = (GetState) state;
00699     bool        get_next = false;
00700     int         lex_level = _state->lex->lex_level;
00701 
00702     if (lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY)
00703     {
00704         /* single integer search */
00705         _state->array_index++;
00706         if (_state->array_index == _state->search_index)
00707             get_next = true;
00708     }
00709     else if (_state->search_type == JSON_SEARCH_PATH &&
00710              lex_level <= _state->npath &&
00711              _state->pathok[lex_level - 1])
00712     {
00713         /*
00714          * path search, path so far is ok
00715          *
00716          * increment the array counter. no point doing this if we already know
00717          * the path is bad.
00718          *
00719          * then check if we have a match.
00720          */
00721 
00722         if (++_state->array_level_index[lex_level - 1] ==
00723             _state->path_level_index[lex_level - 1])
00724         {
00725             if (lex_level == _state->npath)
00726             {
00727                 /* match and at end of path, so get value */
00728                 get_next = true;
00729             }
00730             else
00731             {
00732                 /* not at end of path just mark path ok */
00733                 _state->pathok[lex_level] = true;
00734             }
00735         }
00736 
00737     }
00738 
00739     /* same logic as for objects */
00740     if (get_next)
00741     {
00742         if (_state->normalize_results &&
00743             _state->lex->token_type == JSON_TOKEN_STRING)
00744         {
00745             _state->next_scalar = true;
00746         }
00747         else
00748         {
00749             _state->result_start = _state->lex->token_start;
00750         }
00751     }
00752 }
00753 
00754 static void
00755 get_array_element_end(void *state, bool isnull)
00756 {
00757     GetState    _state = (GetState) state;
00758     bool        get_last = false;
00759     int         lex_level = _state->lex->lex_level;
00760 
00761     /* same logic as in get_object_end, modified for arrays */
00762 
00763     if (lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY &&
00764         _state->array_index == _state->search_index)
00765     {
00766         get_last = true;
00767     }
00768     else if (_state->search_type == JSON_SEARCH_PATH &&
00769              lex_level <= _state->npath &&
00770              _state->pathok[lex_level - 1] &&
00771              _state->array_level_index[lex_level - 1] ==
00772              _state->path_level_index[lex_level - 1])
00773     {
00774         /* done with this element so reset pathok */
00775         if (lex_level < _state->npath)
00776             _state->pathok[lex_level] = false;
00777 
00778         if (lex_level == _state->npath)
00779             get_last = true;
00780     }
00781     if (get_last && _state->result_start != NULL)
00782     {
00783         int         len = _state->lex->prev_token_terminator - _state->result_start;
00784 
00785         if (isnull && _state->normalize_results)
00786             _state->tresult = (text *) NULL;
00787         else
00788             _state->tresult = cstring_to_text_with_len(_state->result_start, len);
00789     }
00790 }
00791 
00792 static void
00793 get_scalar(void *state, char *token, JsonTokenType tokentype)
00794 {
00795     GetState    _state = (GetState) state;
00796 
00797     if (_state->lex->lex_level == 0 && _state->search_type != JSON_SEARCH_PATH)
00798         ereport(ERROR,
00799                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00800                  errmsg("cannot extract element from a scalar")));
00801     if (_state->next_scalar)
00802     {
00803         /* a de-escaped text value is wanted, so supply it */
00804         _state->tresult = cstring_to_text(token);
00805         /* make sure the next call to get_scalar doesn't overwrite it */
00806         _state->next_scalar = false;
00807     }
00808 
00809 }
00810 
00811 /*
00812  * SQL function json_array_length(json) -> int
00813  */
00814 Datum
00815 json_array_length(PG_FUNCTION_ARGS)
00816 {
00817     text       *json = PG_GETARG_TEXT_P(0);
00818 
00819     AlenState   state;
00820     JsonLexContext *lex = makeJsonLexContext(json, false);
00821     JsonSemAction sem;
00822 
00823     state = palloc0(sizeof(alenState));
00824     sem = palloc0(sizeof(jsonSemAction));
00825 
00826     /* palloc0 does this for us */
00827 #if 0
00828     state->count = 0;
00829 #endif
00830     state->lex = lex;
00831 
00832     sem->semstate = (void *) state;
00833     sem->object_start = alen_object_start;
00834     sem->scalar = alen_scalar;
00835     sem->array_element_start = alen_array_element_start;
00836 
00837     pg_parse_json(lex, sem);
00838 
00839     PG_RETURN_INT32(state->count);
00840 }
00841 
00842 /*
00843  * These next two check ensure that the json is an array (since it can't be
00844  * a scalar or an object).
00845  */
00846 
00847 static void
00848 alen_object_start(void *state)
00849 {
00850     AlenState   _state = (AlenState) state;
00851 
00852     /* json structure check */
00853     if (_state->lex->lex_level == 0)
00854         ereport(ERROR,
00855                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00856                  errmsg("cannot get array length of a non-array")));
00857 }
00858 
00859 static void
00860 alen_scalar(void *state, char *token, JsonTokenType tokentype)
00861 {
00862     AlenState   _state = (AlenState) state;
00863 
00864     /* json structure check */
00865     if (_state->lex->lex_level == 0)
00866         ereport(ERROR,
00867                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00868                  errmsg("cannot get array length of a scalar")));
00869 }
00870 
00871 static void
00872 alen_array_element_start(void *state, bool isnull)
00873 {
00874     AlenState   _state = (AlenState) state;
00875 
00876     /* just count up all the level 1 elements */
00877     if (_state->lex->lex_level == 1)
00878         _state->count++;
00879 }
00880 
00881 /*
00882  * SQL function json_each and json_each_text
00883  *
00884  * decompose a json object into key value pairs.
00885  *
00886  * Unlike json_object_keys() these SRFs operate in materialize mode,
00887  * stashing results into a Tuplestore object as they go.
00888  * The construction of tuples is done using a temporary memory context
00889  * that is cleared out after each tuple is built.
00890  */
00891 Datum
00892 json_each(PG_FUNCTION_ARGS)
00893 {
00894     return each_worker(fcinfo, false);
00895 }
00896 
00897 Datum
00898 json_each_text(PG_FUNCTION_ARGS)
00899 {
00900     return each_worker(fcinfo, true);
00901 }
00902 
00903 static inline Datum
00904 each_worker(PG_FUNCTION_ARGS, bool as_text)
00905 {
00906     text       *json = PG_GETARG_TEXT_P(0);
00907     JsonLexContext *lex = makeJsonLexContext(json, true);
00908     JsonSemAction sem;
00909     ReturnSetInfo *rsi;
00910     MemoryContext old_cxt;
00911     TupleDesc   tupdesc;
00912     EachState   state;
00913 
00914     state = palloc0(sizeof(eachState));
00915     sem = palloc0(sizeof(jsonSemAction));
00916 
00917     rsi = (ReturnSetInfo *) fcinfo->resultinfo;
00918 
00919     if (!rsi || !IsA(rsi, ReturnSetInfo) ||
00920         (rsi->allowedModes & SFRM_Materialize) == 0 ||
00921         rsi->expectedDesc == NULL)
00922         ereport(ERROR,
00923                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00924                  errmsg("set-valued function called in context that "
00925                         "cannot accept a set")));
00926 
00927 
00928     rsi->returnMode = SFRM_Materialize;
00929 
00930     (void) get_call_result_type(fcinfo, NULL, &tupdesc);
00931 
00932     /* make these in a sufficiently long-lived memory context */
00933     old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
00934 
00935     state->ret_tdesc = CreateTupleDescCopy(tupdesc);
00936     BlessTupleDesc(state->ret_tdesc);
00937     state->tuple_store =
00938         tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
00939                               false, work_mem);
00940 
00941     MemoryContextSwitchTo(old_cxt);
00942 
00943     sem->semstate = (void *) state;
00944     sem->array_start = each_array_start;
00945     sem->scalar = each_scalar;
00946     sem->object_field_start = each_object_field_start;
00947     sem->object_field_end = each_object_field_end;
00948 
00949     state->normalize_results = as_text;
00950     state->next_scalar = false;
00951 
00952     state->lex = lex;
00953     state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
00954                                            "json_each temporary cxt",
00955                                            ALLOCSET_DEFAULT_MINSIZE,
00956                                            ALLOCSET_DEFAULT_INITSIZE,
00957                                            ALLOCSET_DEFAULT_MAXSIZE);
00958 
00959     pg_parse_json(lex, sem);
00960 
00961     rsi->setResult = state->tuple_store;
00962     rsi->setDesc = state->ret_tdesc;
00963 
00964     PG_RETURN_NULL();
00965 }
00966 
00967 
00968 static void
00969 each_object_field_start(void *state, char *fname, bool isnull)
00970 {
00971     EachState   _state = (EachState) state;
00972 
00973     /* save a pointer to where the value starts */
00974     if (_state->lex->lex_level == 1)
00975     {
00976         /*
00977          * next_scalar will be reset in the object_field_end handler, and
00978          * since we know the value is a scalar there is no danger of it being
00979          * on while recursing down the tree.
00980          */
00981         if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
00982             _state->next_scalar = true;
00983         else
00984             _state->result_start = _state->lex->token_start;
00985     }
00986 }
00987 
00988 static void
00989 each_object_field_end(void *state, char *fname, bool isnull)
00990 {
00991     EachState   _state = (EachState) state;
00992     MemoryContext old_cxt;
00993     int         len;
00994     text       *val;
00995     HeapTuple   tuple;
00996     Datum       values[2];
00997     bool        nulls[2] = {false, false};
00998 
00999     /* skip over nested objects */
01000     if (_state->lex->lex_level != 1)
01001         return;
01002 
01003     /* use the tmp context so we can clean up after each tuple is done */
01004     old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
01005 
01006     values[0] = CStringGetTextDatum(fname);
01007 
01008     if (isnull && _state->normalize_results)
01009     {
01010         nulls[1] = true;
01011         values[1] = (Datum) NULL;
01012     }
01013     else if (_state->next_scalar)
01014     {
01015         values[1] = CStringGetTextDatum(_state->normalized_scalar);
01016         _state->next_scalar = false;
01017     }
01018     else
01019     {
01020         len = _state->lex->prev_token_terminator - _state->result_start;
01021         val = cstring_to_text_with_len(_state->result_start, len);
01022         values[1] = PointerGetDatum(val);
01023     }
01024 
01025 
01026     tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
01027 
01028     tuplestore_puttuple(_state->tuple_store, tuple);
01029 
01030     /* clean up and switch back */
01031     MemoryContextSwitchTo(old_cxt);
01032     MemoryContextReset(_state->tmp_cxt);
01033 }
01034 
01035 static void
01036 each_array_start(void *state)
01037 {
01038     EachState   _state = (EachState) state;
01039 
01040     /* json structure check */
01041     if (_state->lex->lex_level == 0)
01042         ereport(ERROR,
01043                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01044                  errmsg("cannot deconstruct an array as an object")));
01045 }
01046 
01047 static void
01048 each_scalar(void *state, char *token, JsonTokenType tokentype)
01049 {
01050     EachState   _state = (EachState) state;
01051 
01052     /* json structure check */
01053     if (_state->lex->lex_level == 0)
01054         ereport(ERROR,
01055                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01056                  errmsg("cannot deconstruct a scalar")));
01057 
01058     /* supply de-escaped value if required */
01059     if (_state->next_scalar)
01060         _state->normalized_scalar = token;
01061 }
01062 
01063 /*
01064  * SQL function json_array_elements
01065  *
01066  * get the elements from a json array
01067  *
01068  * a lot of this processing is similar to the json_each* functions
01069  */
01070 Datum
01071 json_array_elements(PG_FUNCTION_ARGS)
01072 {
01073     text       *json = PG_GETARG_TEXT_P(0);
01074 
01075     /* elements doesn't need any escaped strings, so use false here */
01076     JsonLexContext *lex = makeJsonLexContext(json, false);
01077     JsonSemAction sem;
01078     ReturnSetInfo *rsi;
01079     MemoryContext old_cxt;
01080     TupleDesc   tupdesc;
01081     ElementsState state;
01082 
01083     state = palloc0(sizeof(elementsState));
01084     sem = palloc0(sizeof(jsonSemAction));
01085 
01086     rsi = (ReturnSetInfo *) fcinfo->resultinfo;
01087 
01088     if (!rsi || !IsA(rsi, ReturnSetInfo) ||
01089         (rsi->allowedModes & SFRM_Materialize) == 0 ||
01090         rsi->expectedDesc == NULL)
01091         ereport(ERROR,
01092                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
01093                  errmsg("set-valued function called in context that "
01094                         "cannot accept a set")));
01095 
01096 
01097     rsi->returnMode = SFRM_Materialize;
01098 
01099     /* it's a simple type, so don't use get_call_result_type() */
01100     tupdesc = rsi->expectedDesc;
01101 
01102     /* make these in a sufficiently long-lived memory context */
01103     old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
01104 
01105     state->ret_tdesc = CreateTupleDescCopy(tupdesc);
01106     BlessTupleDesc(state->ret_tdesc);
01107     state->tuple_store =
01108         tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
01109                               false, work_mem);
01110 
01111     MemoryContextSwitchTo(old_cxt);
01112 
01113     sem->semstate = (void *) state;
01114     sem->object_start = elements_object_start;
01115     sem->scalar = elements_scalar;
01116     sem->array_element_start = elements_array_element_start;
01117     sem->array_element_end = elements_array_element_end;
01118 
01119     state->lex = lex;
01120     state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
01121                                          "json_array_elements temporary cxt",
01122                                            ALLOCSET_DEFAULT_MINSIZE,
01123                                            ALLOCSET_DEFAULT_INITSIZE,
01124                                            ALLOCSET_DEFAULT_MAXSIZE);
01125 
01126     pg_parse_json(lex, sem);
01127 
01128     rsi->setResult = state->tuple_store;
01129     rsi->setDesc = state->ret_tdesc;
01130 
01131     PG_RETURN_NULL();
01132 }
01133 
01134 static void
01135 elements_array_element_start(void *state, bool isnull)
01136 {
01137     ElementsState _state = (ElementsState) state;
01138 
01139     /* save a pointer to where the value starts */
01140     if (_state->lex->lex_level == 1)
01141         _state->result_start = _state->lex->token_start;
01142 }
01143 
01144 static void
01145 elements_array_element_end(void *state, bool isnull)
01146 {
01147     ElementsState _state = (ElementsState) state;
01148     MemoryContext old_cxt;
01149     int         len;
01150     text       *val;
01151     HeapTuple   tuple;
01152     Datum       values[1];
01153     static bool nulls[1] = {false};
01154 
01155     /* skip over nested objects */
01156     if (_state->lex->lex_level != 1)
01157         return;
01158 
01159     /* use the tmp context so we can clean up after each tuple is done */
01160     old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
01161 
01162     len = _state->lex->prev_token_terminator - _state->result_start;
01163     val = cstring_to_text_with_len(_state->result_start, len);
01164 
01165     values[0] = PointerGetDatum(val);
01166 
01167     tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
01168 
01169     tuplestore_puttuple(_state->tuple_store, tuple);
01170 
01171     /* clean up and switch back */
01172     MemoryContextSwitchTo(old_cxt);
01173     MemoryContextReset(_state->tmp_cxt);
01174 }
01175 
01176 static void
01177 elements_object_start(void *state)
01178 {
01179     ElementsState _state = (ElementsState) state;
01180 
01181     /* json structure check */
01182     if (_state->lex->lex_level == 0)
01183         ereport(ERROR,
01184                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01185                  errmsg("cannot call json_array_elements on a non-array")));
01186 }
01187 
01188 static void
01189 elements_scalar(void *state, char *token, JsonTokenType tokentype)
01190 {
01191     ElementsState _state = (ElementsState) state;
01192 
01193     /* json structure check */
01194     if (_state->lex->lex_level == 0)
01195         ereport(ERROR,
01196                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01197                  errmsg("cannot call json_array_elements on a scalar")));
01198 
01199     /*
01200      * json_array_elements always returns json, so there's no need to think
01201      * about de-escaped values here.
01202      */
01203 }
01204 
01205 /*
01206  * SQL function json_populate_record
01207  *
01208  * set fields in a record from the argument json
01209  *
01210  * Code adapted shamelessly from hstore's populate_record
01211  * which is in turn partly adapted from record_out.
01212  *
01213  * The json is decomposed into a hash table, in which each
01214  * field in the record is then looked up by name.
01215  */
01216 Datum
01217 json_populate_record(PG_FUNCTION_ARGS)
01218 {
01219     Oid         argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
01220     text       *json;
01221     bool        use_json_as_text;
01222     HTAB       *json_hash;
01223     HeapTupleHeader rec;
01224     Oid         tupType;
01225     int32       tupTypmod;
01226     TupleDesc   tupdesc;
01227     HeapTupleData tuple;
01228     HeapTuple   rettuple;
01229     RecordIOData *my_extra;
01230     int         ncolumns;
01231     int         i;
01232     Datum      *values;
01233     bool       *nulls;
01234     char        fname[NAMEDATALEN];
01235     JsonHashEntry hashentry;
01236 
01237     use_json_as_text = PG_ARGISNULL(2) ? false : PG_GETARG_BOOL(2);
01238 
01239     if (!type_is_rowtype(argtype))
01240         ereport(ERROR,
01241                 (errcode(ERRCODE_DATATYPE_MISMATCH),
01242                  errmsg("first argument must be a rowtype")));
01243 
01244     if (PG_ARGISNULL(0))
01245     {
01246         if (PG_ARGISNULL(1))
01247             PG_RETURN_NULL();
01248 
01249         rec = NULL;
01250 
01251         /*
01252          * have no tuple to look at, so the only source of type info is the
01253          * argtype. The lookup_rowtype_tupdesc call below will error out if we
01254          * don't have a known composite type oid here.
01255          */
01256         tupType = argtype;
01257         tupTypmod = -1;
01258     }
01259     else
01260     {
01261         rec = PG_GETARG_HEAPTUPLEHEADER(0);
01262 
01263         if (PG_ARGISNULL(1))
01264             PG_RETURN_POINTER(rec);
01265 
01266         /* Extract type info from the tuple itself */
01267         tupType = HeapTupleHeaderGetTypeId(rec);
01268         tupTypmod = HeapTupleHeaderGetTypMod(rec);
01269     }
01270 
01271     json = PG_GETARG_TEXT_P(1);
01272 
01273     json_hash = get_json_object_as_hash(json, "json_populate_record", use_json_as_text);
01274 
01275     /*
01276      * if the input json is empty, we can only skip the rest if we were passed
01277      * in a non-null record, since otherwise there may be issues with domain
01278      * nulls.
01279      */
01280     if (hash_get_num_entries(json_hash) == 0 && rec)
01281         PG_RETURN_POINTER(rec);
01282 
01283 
01284     tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
01285     ncolumns = tupdesc->natts;
01286 
01287     if (rec)
01288     {
01289         /* Build a temporary HeapTuple control structure */
01290         tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
01291         ItemPointerSetInvalid(&(tuple.t_self));
01292         tuple.t_tableOid = InvalidOid;
01293         tuple.t_data = rec;
01294     }
01295 
01296     /*
01297      * We arrange to look up the needed I/O info just once per series of
01298      * calls, assuming the record type doesn't change underneath us.
01299      */
01300     my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
01301     if (my_extra == NULL ||
01302         my_extra->ncolumns != ncolumns)
01303     {
01304         fcinfo->flinfo->fn_extra =
01305             MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
01306                                sizeof(RecordIOData) - sizeof(ColumnIOData)
01307                                + ncolumns * sizeof(ColumnIOData));
01308         my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
01309         my_extra->record_type = InvalidOid;
01310         my_extra->record_typmod = 0;
01311     }
01312 
01313     if (my_extra->record_type != tupType ||
01314         my_extra->record_typmod != tupTypmod)
01315     {
01316         MemSet(my_extra, 0,
01317                sizeof(RecordIOData) - sizeof(ColumnIOData)
01318                + ncolumns * sizeof(ColumnIOData));
01319         my_extra->record_type = tupType;
01320         my_extra->record_typmod = tupTypmod;
01321         my_extra->ncolumns = ncolumns;
01322     }
01323 
01324     values = (Datum *) palloc(ncolumns * sizeof(Datum));
01325     nulls = (bool *) palloc(ncolumns * sizeof(bool));
01326 
01327     if (rec)
01328     {
01329         /* Break down the tuple into fields */
01330         heap_deform_tuple(&tuple, tupdesc, values, nulls);
01331     }
01332     else
01333     {
01334         for (i = 0; i < ncolumns; ++i)
01335         {
01336             values[i] = (Datum) 0;
01337             nulls[i] = true;
01338         }
01339     }
01340 
01341     for (i = 0; i < ncolumns; ++i)
01342     {
01343         ColumnIOData *column_info = &my_extra->columns[i];
01344         Oid         column_type = tupdesc->attrs[i]->atttypid;
01345         char       *value;
01346 
01347         /* Ignore dropped columns in datatype */
01348         if (tupdesc->attrs[i]->attisdropped)
01349         {
01350             nulls[i] = true;
01351             continue;
01352         }
01353 
01354         memset(fname, 0, NAMEDATALEN);
01355         strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN);
01356         hashentry = hash_search(json_hash, fname, HASH_FIND, NULL);
01357 
01358         /*
01359          * we can't just skip here if the key wasn't found since we might have
01360          * a domain to deal with. If we were passed in a non-null record
01361          * datum, we assume that the existing values are valid (if they're
01362          * not, then it's not our fault), but if we were passed in a null,
01363          * then every field which we don't populate needs to be run through
01364          * the input function just in case it's a domain type.
01365          */
01366         if (hashentry == NULL && rec)
01367             continue;
01368 
01369         /*
01370          * Prepare to convert the column value from text
01371          */
01372         if (column_info->column_type != column_type)
01373         {
01374             getTypeInputInfo(column_type,
01375                              &column_info->typiofunc,
01376                              &column_info->typioparam);
01377             fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
01378                           fcinfo->flinfo->fn_mcxt);
01379             column_info->column_type = column_type;
01380         }
01381         if (hashentry == NULL || hashentry->isnull)
01382         {
01383             /*
01384              * need InputFunctionCall to happen even for nulls, so that domain
01385              * checks are done
01386              */
01387             values[i] = InputFunctionCall(&column_info->proc, NULL,
01388                                           column_info->typioparam,
01389                                           tupdesc->attrs[i]->atttypmod);
01390             nulls[i] = true;
01391         }
01392         else
01393         {
01394             value = hashentry->val;
01395 
01396             values[i] = InputFunctionCall(&column_info->proc, value,
01397                                           column_info->typioparam,
01398                                           tupdesc->attrs[i]->atttypmod);
01399             nulls[i] = false;
01400         }
01401     }
01402 
01403     rettuple = heap_form_tuple(tupdesc, values, nulls);
01404 
01405     ReleaseTupleDesc(tupdesc);
01406 
01407     PG_RETURN_DATUM(HeapTupleGetDatum(rettuple));
01408 }
01409 
01410 /*
01411  * get_json_object_as_hash
01412  *
01413  * decompose a json object into a hash table.
01414  *
01415  * Currently doesn't allow anything but a flat object. Should this
01416  * change?
01417  *
01418  * funcname argument allows caller to pass in its name for use in
01419  * error messages.
01420  */
01421 static HTAB *
01422 get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text)
01423 {
01424     HASHCTL     ctl;
01425     HTAB       *tab;
01426     JHashState  state;
01427     JsonLexContext *lex = makeJsonLexContext(json, true);
01428     JsonSemAction sem;
01429 
01430     memset(&ctl, 0, sizeof(ctl));
01431     ctl.keysize = NAMEDATALEN;
01432     ctl.entrysize = sizeof(jsonHashEntry);
01433     ctl.hcxt = CurrentMemoryContext;
01434     tab = hash_create("json object hashtable",
01435                       100,
01436                       &ctl,
01437                       HASH_ELEM | HASH_CONTEXT);
01438 
01439     state = palloc0(sizeof(jhashState));
01440     sem = palloc0(sizeof(jsonSemAction));
01441 
01442     state->function_name = funcname;
01443     state->hash = tab;
01444     state->lex = lex;
01445     state->use_json_as_text = use_json_as_text;
01446 
01447     sem->semstate = (void *) state;
01448     sem->array_start = hash_array_start;
01449     sem->scalar = hash_scalar;
01450     sem->object_field_start = hash_object_field_start;
01451     sem->object_field_end = hash_object_field_end;
01452 
01453     pg_parse_json(lex, sem);
01454 
01455     return tab;
01456 }
01457 
01458 static void
01459 hash_object_field_start(void *state, char *fname, bool isnull)
01460 {
01461     JHashState  _state = (JHashState) state;
01462 
01463     if (_state->lex->lex_level > 1)
01464         return;
01465 
01466     if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
01467         _state->lex->token_type == JSON_TOKEN_OBJECT_START)
01468     {
01469         if (!_state->use_json_as_text)
01470             ereport(ERROR,
01471                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01472                      errmsg("cannot call %s on a nested object",
01473                             _state->function_name)));
01474         _state->save_json_start = _state->lex->token_start;
01475     }
01476     else
01477     {
01478         /* must be a scalar */
01479         _state->save_json_start = NULL;
01480     }
01481 }
01482 
01483 static void
01484 hash_object_field_end(void *state, char *fname, bool isnull)
01485 {
01486     JHashState  _state = (JHashState) state;
01487     JsonHashEntry hashentry;
01488     bool        found;
01489     char        name[NAMEDATALEN];
01490 
01491     /*
01492      * ignore field names >= NAMEDATALEN - they can't match a record field
01493      * ignore nested fields.
01494      */
01495     if (_state->lex->lex_level > 2 || strlen(fname) >= NAMEDATALEN)
01496         return;
01497 
01498     memset(name, 0, NAMEDATALEN);
01499     strncpy(name, fname, NAMEDATALEN);
01500 
01501     hashentry = hash_search(_state->hash, name, HASH_ENTER, &found);
01502 
01503     /*
01504      * found being true indicates a duplicate. We don't do anything about
01505      * that, a later field with the same name overrides the earlier field.
01506      */
01507 
01508     hashentry->isnull = isnull;
01509     if (_state->save_json_start != NULL)
01510     {
01511         int         len = _state->lex->prev_token_terminator - _state->save_json_start;
01512         char       *val = palloc((len + 1) * sizeof(char));
01513 
01514         memcpy(val, _state->save_json_start, len);
01515         val[len] = '\0';
01516         hashentry->val = val;
01517     }
01518     else
01519     {
01520         /* must have had a scalar instead */
01521         hashentry->val = _state->saved_scalar;
01522     }
01523 }
01524 
01525 static void
01526 hash_array_start(void *state)
01527 {
01528     JHashState  _state = (JHashState) state;
01529 
01530     if (_state->lex->lex_level == 0)
01531         ereport(ERROR,
01532                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01533                errmsg("cannot call %s on an array", _state->function_name)));
01534 }
01535 
01536 static void
01537 hash_scalar(void *state, char *token, JsonTokenType tokentype)
01538 {
01539     JHashState  _state = (JHashState) state;
01540 
01541     if (_state->lex->lex_level == 0)
01542         ereport(ERROR,
01543                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01544                errmsg("cannot call %s on a scalar", _state->function_name)));
01545 
01546     if (_state->lex->lex_level == 1)
01547         _state->saved_scalar = token;
01548 }
01549 
01550 
01551 /*
01552  * SQL function json_populate_recordset
01553  *
01554  * set fields in a set of records from the argument json,
01555  * which must be an array of objects.
01556  *
01557  * similar to json_populate_record, but the tuple-building code
01558  * is pushed down into the semantic action handlers so it's done
01559  * per object in the array.
01560  */
01561 Datum
01562 json_populate_recordset(PG_FUNCTION_ARGS)
01563 {
01564     Oid         argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
01565     text       *json;
01566     bool        use_json_as_text;
01567     ReturnSetInfo *rsi;
01568     MemoryContext old_cxt;
01569     Oid         tupType;
01570     int32       tupTypmod;
01571     HeapTupleHeader rec;
01572     TupleDesc   tupdesc;
01573     RecordIOData *my_extra;
01574     int         ncolumns;
01575     JsonLexContext *lex;
01576     JsonSemAction sem;
01577     PopulateRecordsetState state;
01578 
01579     use_json_as_text = PG_ARGISNULL(2) ? false : PG_GETARG_BOOL(2);
01580 
01581     if (!type_is_rowtype(argtype))
01582         ereport(ERROR,
01583                 (errcode(ERRCODE_DATATYPE_MISMATCH),
01584                  errmsg("first argument must be a rowtype")));
01585 
01586     rsi = (ReturnSetInfo *) fcinfo->resultinfo;
01587 
01588     if (!rsi || !IsA(rsi, ReturnSetInfo) ||
01589         (rsi->allowedModes & SFRM_Materialize) == 0 ||
01590         rsi->expectedDesc == NULL)
01591         ereport(ERROR,
01592                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
01593                  errmsg("set-valued function called in context that "
01594                         "cannot accept a set")));
01595 
01596 
01597     rsi->returnMode = SFRM_Materialize;
01598 
01599     /*
01600      * get the tupdesc from the result set info - it must be a record type
01601      * because we already checked that arg1 is a record type.
01602      */
01603     (void) get_call_result_type(fcinfo, NULL, &tupdesc);
01604 
01605     state = palloc0(sizeof(populateRecordsetState));
01606     sem = palloc0(sizeof(jsonSemAction));
01607 
01608 
01609     /* make these in a sufficiently long-lived memory context */
01610     old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
01611 
01612     state->ret_tdesc = CreateTupleDescCopy(tupdesc);
01613     BlessTupleDesc(state->ret_tdesc);
01614     state->tuple_store =
01615         tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
01616                               false, work_mem);
01617 
01618     MemoryContextSwitchTo(old_cxt);
01619 
01620     /* if the json is null send back an empty set */
01621     if (PG_ARGISNULL(1))
01622         PG_RETURN_NULL();
01623 
01624     json = PG_GETARG_TEXT_P(1);
01625 
01626     if (PG_ARGISNULL(0))
01627         rec = NULL;
01628     else
01629         rec = PG_GETARG_HEAPTUPLEHEADER(0);
01630 
01631     tupType = tupdesc->tdtypeid;
01632     tupTypmod = tupdesc->tdtypmod;
01633     ncolumns = tupdesc->natts;
01634 
01635     lex = makeJsonLexContext(json, true);
01636 
01637     /*
01638      * We arrange to look up the needed I/O info just once per series of
01639      * calls, assuming the record type doesn't change underneath us.
01640      */
01641     my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
01642     if (my_extra == NULL ||
01643         my_extra->ncolumns != ncolumns)
01644     {
01645         fcinfo->flinfo->fn_extra =
01646             MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
01647                                sizeof(RecordIOData) - sizeof(ColumnIOData)
01648                                + ncolumns * sizeof(ColumnIOData));
01649         my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
01650         my_extra->record_type = InvalidOid;
01651         my_extra->record_typmod = 0;
01652     }
01653 
01654     if (my_extra->record_type != tupType ||
01655         my_extra->record_typmod != tupTypmod)
01656     {
01657         MemSet(my_extra, 0,
01658                sizeof(RecordIOData) - sizeof(ColumnIOData)
01659                + ncolumns * sizeof(ColumnIOData));
01660         my_extra->record_type = tupType;
01661         my_extra->record_typmod = tupTypmod;
01662         my_extra->ncolumns = ncolumns;
01663     }
01664 
01665     sem->semstate = (void *) state;
01666     sem->array_start = populate_recordset_array_start;
01667     sem->array_element_start = populate_recordset_array_element_start;
01668     sem->scalar = populate_recordset_scalar;
01669     sem->object_field_start = populate_recordset_object_field_start;
01670     sem->object_field_end = populate_recordset_object_field_end;
01671     sem->object_start = populate_recordset_object_start;
01672     sem->object_end = populate_recordset_object_end;
01673 
01674     state->lex = lex;
01675 
01676     state->my_extra = my_extra;
01677     state->rec = rec;
01678     state->use_json_as_text = use_json_as_text;
01679     state->fn_mcxt = fcinfo->flinfo->fn_mcxt;
01680 
01681     pg_parse_json(lex, sem);
01682 
01683     rsi->setResult = state->tuple_store;
01684     rsi->setDesc = state->ret_tdesc;
01685 
01686     PG_RETURN_NULL();
01687 
01688 }
01689 
01690 static void
01691 populate_recordset_object_start(void *state)
01692 {
01693     PopulateRecordsetState _state = (PopulateRecordsetState) state;
01694     int         lex_level = _state->lex->lex_level;
01695     HASHCTL     ctl;
01696 
01697     if (lex_level == 0)
01698         ereport(ERROR,
01699                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01700                  errmsg("cannot call json_populate_recordset on an object")));
01701     else if (lex_level > 1 && !_state->use_json_as_text)
01702         ereport(ERROR,
01703                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01704          errmsg("cannot call json_populate_recordset with nested objects")));
01705 
01706     /* set up a new hash for this entry */
01707     memset(&ctl, 0, sizeof(ctl));
01708     ctl.keysize = NAMEDATALEN;
01709     ctl.entrysize = sizeof(jsonHashEntry);
01710     ctl.hcxt = CurrentMemoryContext;
01711     _state->json_hash = hash_create("json object hashtable",
01712                                     100,
01713                                     &ctl,
01714                                     HASH_ELEM | HASH_CONTEXT);
01715 }
01716 
01717 static void
01718 populate_recordset_object_end(void *state)
01719 {
01720     PopulateRecordsetState _state = (PopulateRecordsetState) state;
01721     HTAB       *json_hash = _state->json_hash;
01722     Datum      *values;
01723     bool       *nulls;
01724     char        fname[NAMEDATALEN];
01725     int         i;
01726     RecordIOData *my_extra = _state->my_extra;
01727     int         ncolumns = my_extra->ncolumns;
01728     TupleDesc   tupdesc = _state->ret_tdesc;
01729     JsonHashEntry hashentry;
01730     HeapTupleHeader rec = _state->rec;
01731     HeapTuple   rettuple;
01732 
01733     if (_state->lex->lex_level > 1)
01734         return;
01735 
01736     values = (Datum *) palloc(ncolumns * sizeof(Datum));
01737     nulls = (bool *) palloc(ncolumns * sizeof(bool));
01738 
01739     if (_state->rec)
01740     {
01741         HeapTupleData tuple;
01742 
01743         /* Build a temporary HeapTuple control structure */
01744         tuple.t_len = HeapTupleHeaderGetDatumLength(_state->rec);
01745         ItemPointerSetInvalid(&(tuple.t_self));
01746         tuple.t_tableOid = InvalidOid;
01747         tuple.t_data = _state->rec;
01748 
01749         /* Break down the tuple into fields */
01750         heap_deform_tuple(&tuple, tupdesc, values, nulls);
01751     }
01752     else
01753     {
01754         for (i = 0; i < ncolumns; ++i)
01755         {
01756             values[i] = (Datum) 0;
01757             nulls[i] = true;
01758         }
01759     }
01760 
01761     for (i = 0; i < ncolumns; ++i)
01762     {
01763         ColumnIOData *column_info = &my_extra->columns[i];
01764         Oid         column_type = tupdesc->attrs[i]->atttypid;
01765         char       *value;
01766 
01767         /* Ignore dropped columns in datatype */
01768         if (tupdesc->attrs[i]->attisdropped)
01769         {
01770             nulls[i] = true;
01771             continue;
01772         }
01773 
01774         memset(fname, 0, NAMEDATALEN);
01775         strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN);
01776         hashentry = hash_search(json_hash, fname, HASH_FIND, NULL);
01777 
01778         /*
01779          * we can't just skip here if the key wasn't found since we might have
01780          * a domain to deal with. If we were passed in a non-null record
01781          * datum, we assume that the existing values are valid (if they're
01782          * not, then it's not our fault), but if we were passed in a null,
01783          * then every field which we don't populate needs to be run through
01784          * the input function just in case it's a domain type.
01785          */
01786         if (hashentry == NULL && rec)
01787             continue;
01788 
01789         /*
01790          * Prepare to convert the column value from text
01791          */
01792         if (column_info->column_type != column_type)
01793         {
01794             getTypeInputInfo(column_type,
01795                              &column_info->typiofunc,
01796                              &column_info->typioparam);
01797             fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
01798                           _state->fn_mcxt);
01799             column_info->column_type = column_type;
01800         }
01801         if (hashentry == NULL || hashentry->isnull)
01802         {
01803             /*
01804              * need InputFunctionCall to happen even for nulls, so that domain
01805              * checks are done
01806              */
01807             values[i] = InputFunctionCall(&column_info->proc, NULL,
01808                                           column_info->typioparam,
01809                                           tupdesc->attrs[i]->atttypmod);
01810             nulls[i] = true;
01811         }
01812         else
01813         {
01814             value = hashentry->val;
01815 
01816             values[i] = InputFunctionCall(&column_info->proc, value,
01817                                           column_info->typioparam,
01818                                           tupdesc->attrs[i]->atttypmod);
01819             nulls[i] = false;
01820         }
01821     }
01822 
01823     rettuple = heap_form_tuple(tupdesc, values, nulls);
01824 
01825     tuplestore_puttuple(_state->tuple_store, rettuple);
01826 
01827     hash_destroy(json_hash);
01828 }
01829 
01830 static void
01831 populate_recordset_array_element_start(void *state, bool isnull)
01832 {
01833     PopulateRecordsetState _state = (PopulateRecordsetState) state;
01834 
01835     if (_state->lex->lex_level == 1 &&
01836         _state->lex->token_type != JSON_TOKEN_OBJECT_START)
01837         ereport(ERROR,
01838                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01839              errmsg("must call populate_recordset on an array of objects")));
01840 }
01841 
01842 static void
01843 populate_recordset_array_start(void *state)
01844 {
01845     PopulateRecordsetState _state = (PopulateRecordsetState) state;
01846 
01847     if (_state->lex->lex_level != 0 && !_state->use_json_as_text)
01848         ereport(ERROR,
01849                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01850           errmsg("cannot call json_populate_recordset with nested arrays")));
01851 }
01852 
01853 static void
01854 populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype)
01855 {
01856     PopulateRecordsetState _state = (PopulateRecordsetState) state;
01857 
01858     if (_state->lex->lex_level == 0)
01859         ereport(ERROR,
01860                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01861                  errmsg("cannot call json_populate_recordset on a scalar")));
01862 
01863     if (_state->lex->lex_level == 2)
01864         _state->saved_scalar = token;
01865 }
01866 
01867 static void
01868 populate_recordset_object_field_start(void *state, char *fname, bool isnull)
01869 {
01870     PopulateRecordsetState _state = (PopulateRecordsetState) state;
01871 
01872     if (_state->lex->lex_level > 2)
01873         return;
01874 
01875     if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
01876         _state->lex->token_type == JSON_TOKEN_OBJECT_START)
01877     {
01878         if (!_state->use_json_as_text)
01879             ereport(ERROR,
01880                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01881                      errmsg("cannot call json_populate_recordset on a nested object")));
01882         _state->save_json_start = _state->lex->token_start;
01883     }
01884     else
01885     {
01886         _state->save_json_start = NULL;
01887     }
01888 }
01889 
01890 static void
01891 populate_recordset_object_field_end(void *state, char *fname, bool isnull)
01892 {
01893     PopulateRecordsetState _state = (PopulateRecordsetState) state;
01894     JsonHashEntry hashentry;
01895     bool        found;
01896     char        name[NAMEDATALEN];
01897 
01898     /*
01899      * ignore field names >= NAMEDATALEN - they can't match a record field
01900      * ignore nested fields.
01901      */
01902     if (_state->lex->lex_level > 2 || strlen(fname) >= NAMEDATALEN)
01903         return;
01904 
01905     memset(name, 0, NAMEDATALEN);
01906     strncpy(name, fname, NAMEDATALEN);
01907 
01908     hashentry = hash_search(_state->json_hash, name, HASH_ENTER, &found);
01909 
01910     /*
01911      * found being true indicates a duplicate. We don't do anything about
01912      * that, a later field with the same name overrides the earlier field.
01913      */
01914 
01915     hashentry->isnull = isnull;
01916     if (_state->save_json_start != NULL)
01917     {
01918         int         len = _state->lex->prev_token_terminator - _state->save_json_start;
01919         char       *val = palloc((len + 1) * sizeof(char));
01920 
01921         memcpy(val, _state->save_json_start, len);
01922         val[len] = '\0';
01923         hashentry->val = val;
01924     }
01925     else
01926     {
01927         /* must have had a scalar instead */
01928         hashentry->val = _state->saved_scalar;
01929     }
01930 }