PostgreSQL Source Code: src/backend/utils/adt/json.c Source File

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * json.c
00004  *      JSON data type support.
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  * IDENTIFICATION
00010  *    src/backend/utils/adt/json.c
00011  *
00012  *-------------------------------------------------------------------------
00013  */
00014 #include "postgres.h"
00015 
00016 #include "access/htup_details.h"
00017 #include "access/transam.h"
00018 #include "catalog/pg_cast.h"
00019 #include "catalog/pg_type.h"
00020 #include "executor/spi.h"
00021 #include "lib/stringinfo.h"
00022 #include "libpq/pqformat.h"
00023 #include "mb/pg_wchar.h"
00024 #include "parser/parse_coerce.h"
00025 #include "utils/array.h"
00026 #include "utils/builtins.h"
00027 #include "utils/lsyscache.h"
00028 #include "utils/json.h"
00029 #include "utils/jsonapi.h"
00030 #include "utils/typcache.h"
00031 #include "utils/syscache.h"
00032 
00033 /*
00034  * The context of the parser is maintained by the recursive descent
00035  * mechanism, but is passed explicitly to the error reporting routine
00036  * for better diagnostics.
00037  */
00038 typedef enum                    /* contexts of JSON parser */
00039 {
00040     JSON_PARSE_VALUE,           /* expecting a value */
00041     JSON_PARSE_STRING,          /* expecting a string (for a field name) */
00042     JSON_PARSE_ARRAY_START,     /* saw '[', expecting value or ']' */
00043     JSON_PARSE_ARRAY_NEXT,      /* saw array element, expecting ',' or ']' */
00044     JSON_PARSE_OBJECT_START,    /* saw '{', expecting label or '}' */
00045     JSON_PARSE_OBJECT_LABEL,    /* saw object label, expecting ':' */
00046     JSON_PARSE_OBJECT_NEXT,     /* saw object value, expecting ',' or '}' */
00047     JSON_PARSE_OBJECT_COMMA,    /* saw object ',', expecting next label */
00048     JSON_PARSE_END              /* saw the end of a document, expect nothing */
00049 }   JsonParseContext;
00050 
00051 static inline void json_lex(JsonLexContext *lex);
00052 static inline void json_lex_string(JsonLexContext *lex);
00053 static inline void json_lex_number(JsonLexContext *lex, char *s);
00054 static inline void parse_scalar(JsonLexContext *lex, JsonSemAction sem);
00055 static void parse_object_field(JsonLexContext *lex, JsonSemAction sem);
00056 static void parse_object(JsonLexContext *lex, JsonSemAction sem);
00057 static void parse_array_element(JsonLexContext *lex, JsonSemAction sem);
00058 static void parse_array(JsonLexContext *lex, JsonSemAction sem);
00059 static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
00060 static void report_invalid_token(JsonLexContext *lex);
00061 static int  report_json_context(JsonLexContext *lex);
00062 static char *extract_mb_char(char *s);
00063 static void composite_to_json(Datum composite, StringInfo result,
00064                   bool use_line_feeds);
00065 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
00066                   Datum *vals, bool *nulls, int *valcount,
00067                   TYPCATEGORY tcategory, Oid typoutputfunc,
00068                   bool use_line_feeds);
00069 static void array_to_json_internal(Datum array, StringInfo result,
00070                        bool use_line_feeds);
00071 
00072 /* the null action object used for pure validation */
00073 static jsonSemAction nullSemAction =
00074 {
00075     NULL, NULL, NULL, NULL, NULL,
00076     NULL, NULL, NULL, NULL, NULL
00077 };
00078 static JsonSemAction NullSemAction = &nullSemAction;
00079 
00080 /* Recursive Descent parser support routines */
00081 
00082 /*
00083  * lex_peek
00084  *
00085  * what is the current look_ahead token?
00086 */
00087 static inline JsonTokenType
00088 lex_peek(JsonLexContext *lex)
00089 {
00090     return lex->token_type;
00091 }
00092 
00093 /*
00094  * lex_accept
00095  *
00096  * accept the look_ahead token and move the lexer to the next token if the
00097  * look_ahead token matches the token parameter. In that case, and if required,
00098  * also hand back the de-escaped lexeme.
00099  *
00100  * returns true if the token matched, false otherwise.
00101  */
00102 static inline bool
00103 lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme)
00104 {
00105     if (lex->token_type == token)
00106     {
00107         if (lexeme != NULL)
00108         {
00109             if (lex->token_type == JSON_TOKEN_STRING)
00110             {
00111                 if (lex->strval != NULL)
00112                     *lexeme = pstrdup(lex->strval->data);
00113             }
00114             else
00115             {
00116                 int         len = (lex->token_terminator - lex->token_start);
00117                 char       *tokstr = palloc(len + 1);
00118 
00119                 memcpy(tokstr, lex->token_start, len);
00120                 tokstr[len] = '\0';
00121                 *lexeme = tokstr;
00122             }
00123         }
00124         json_lex(lex);
00125         return true;
00126     }
00127     return false;
00128 }
00129 
00130 /*
00131  * lex_accept
00132  *
00133  * move the lexer to the next token if the current look_ahead token matches
00134  * the parameter token. Otherwise, report an error.
00135  */
00136 static inline void
00137 lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
00138 {
00139     if (!lex_accept(lex, token, NULL))
00140         report_parse_error(ctx, lex);;
00141 }
00142 
00143 /*
00144  * All the defined  type categories are upper case , so use lower case here
00145  * so we avoid any possible clash.
00146  */
00147 /* fake type category for JSON so we can distinguish it in datum_to_json */
00148 #define TYPCATEGORY_JSON 'j'
00149 /* fake category for types that have a cast to json */
00150 #define TYPCATEGORY_JSON_CAST 'c'
00151 /* letters appearing in numeric output that aren't valid in a JSON number */
00152 #define NON_NUMERIC_LETTER "NnAaIiFfTtYy"
00153 /* chars to consider as part of an alphanumeric token */
00154 #define JSON_ALPHANUMERIC_CHAR(c)  \
00155     (((c) >= 'a' && (c) <= 'z') || \
00156      ((c) >= 'A' && (c) <= 'Z') || \
00157      ((c) >= '0' && (c) <= '9') || \
00158      (c) == '_' || \
00159      IS_HIGHBIT_SET(c))
00160 
00161 /*
00162  * Input.
00163  */
00164 Datum
00165 json_in(PG_FUNCTION_ARGS)
00166 {
00167     char       *json = PG_GETARG_CSTRING(0);
00168     text       *result = cstring_to_text(json);
00169     JsonLexContext *lex;
00170 
00171     /* validate it */
00172     lex = makeJsonLexContext(result, false);
00173     pg_parse_json(lex, NullSemAction);
00174 
00175     /* Internal representation is the same as text, for now */
00176     PG_RETURN_TEXT_P(result);
00177 }
00178 
00179 /*
00180  * Output.
00181  */
00182 Datum
00183 json_out(PG_FUNCTION_ARGS)
00184 {
00185     /* we needn't detoast because text_to_cstring will handle that */
00186     Datum       txt = PG_GETARG_DATUM(0);
00187 
00188     PG_RETURN_CSTRING(TextDatumGetCString(txt));
00189 }
00190 
00191 /*
00192  * Binary send.
00193  */
00194 Datum
00195 json_send(PG_FUNCTION_ARGS)
00196 {
00197     text       *t = PG_GETARG_TEXT_PP(0);
00198     StringInfoData buf;
00199 
00200     pq_begintypsend(&buf);
00201     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
00202     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
00203 }
00204 
00205 /*
00206  * Binary receive.
00207  */
00208 Datum
00209 json_recv(PG_FUNCTION_ARGS)
00210 {
00211     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
00212     text       *result;
00213     char       *str;
00214     int         nbytes;
00215     JsonLexContext *lex;
00216 
00217     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
00218 
00219     result = palloc(nbytes + VARHDRSZ);
00220     SET_VARSIZE(result, nbytes + VARHDRSZ);
00221     memcpy(VARDATA(result), str, nbytes);
00222 
00223     /* Validate it. */
00224     lex = makeJsonLexContext(result, false);
00225     pg_parse_json(lex, NullSemAction);
00226 
00227     PG_RETURN_TEXT_P(result);
00228 }
00229 
00230 /*
00231  * makeJsonLexContext
00232  *
00233  * lex constructor, with or without StringInfo object
00234  * for de-escaped lexemes.
00235  *
00236  * Without is better as it makes the processing faster, so only make one
00237  * if really required.
00238  */
00239 JsonLexContext *
00240 makeJsonLexContext(text *json, bool need_escapes)
00241 {
00242     JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
00243 
00244     lex->input = lex->token_terminator = lex->line_start = VARDATA(json);
00245     lex->line_number = 1;
00246     lex->input_length = VARSIZE(json) - VARHDRSZ;
00247     if (need_escapes)
00248         lex->strval = makeStringInfo();
00249     return lex;
00250 }
00251 
00252 /*
00253  * pg_parse_json
00254  *
00255  * Publicly visible entry point for the JSON parser.
00256  *
00257  * lex is a lexing context, set up for the json to be processed by calling
00258  * makeJsonLexContext(). sem is a strucure of function pointers to semantic
00259  * action routines to be called at appropriate spots during parsing, and a
00260  * pointer to a state object to be passed to those routines.
00261  */
00262 void
00263 pg_parse_json(JsonLexContext *lex, JsonSemAction sem)
00264 {
00265     JsonTokenType tok;
00266 
00267     /* get the initial token */
00268     json_lex(lex);
00269 
00270     tok = lex_peek(lex);
00271 
00272     /* parse by recursive descent */
00273     switch (tok)
00274     {
00275         case JSON_TOKEN_OBJECT_START:
00276             parse_object(lex, sem);
00277             break;
00278         case JSON_TOKEN_ARRAY_START:
00279             parse_array(lex, sem);
00280             break;
00281         default:
00282             parse_scalar(lex, sem);     /* json can be a bare scalar */
00283     }
00284 
00285     lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
00286 
00287 }
00288 
00289 /*
00290  *  Recursive Descent parse routines. There is one for each structural
00291  *  element in a json document:
00292  *    - scalar (string, number, true, false, null)
00293  *    - array  ( [ ] )
00294  *    - array element
00295  *    - object ( { } )
00296  *    - object field
00297  */
00298 static inline void
00299 parse_scalar(JsonLexContext *lex, JsonSemAction sem)
00300 {
00301     char       *val = NULL;
00302     json_scalar_action sfunc = sem->scalar;
00303     char      **valaddr;
00304     JsonTokenType tok = lex_peek(lex);
00305 
00306     valaddr = sfunc == NULL ? NULL : &val;
00307 
00308     /* a scalar must be a string, a number, true, false, or null */
00309     switch (tok)
00310     {
00311         case JSON_TOKEN_TRUE:
00312             lex_accept(lex, JSON_TOKEN_TRUE, valaddr);
00313             break;
00314         case JSON_TOKEN_FALSE:
00315             lex_accept(lex, JSON_TOKEN_FALSE, valaddr);
00316             break;
00317         case JSON_TOKEN_NULL:
00318             lex_accept(lex, JSON_TOKEN_NULL, valaddr);
00319             break;
00320         case JSON_TOKEN_NUMBER:
00321             lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);
00322             break;
00323         case JSON_TOKEN_STRING:
00324             lex_accept(lex, JSON_TOKEN_STRING, valaddr);
00325             break;
00326         default:
00327             report_parse_error(JSON_PARSE_VALUE, lex);
00328     }
00329 
00330     if (sfunc != NULL)
00331         (*sfunc) (sem->semstate, val, tok);
00332 }
00333 
00334 static void
00335 parse_object_field(JsonLexContext *lex, JsonSemAction sem)
00336 {
00337     /*
00338      * an object field is "fieldname" : value where value can be a scalar,
00339      * object or array
00340      */
00341 
00342     char       *fname = NULL;   /* keep compiler quiet */
00343     json_ofield_action ostart = sem->object_field_start;
00344     json_ofield_action oend = sem->object_field_end;
00345     bool        isnull;
00346     char      **fnameaddr = NULL;
00347     JsonTokenType tok;
00348 
00349     if (ostart != NULL || oend != NULL)
00350         fnameaddr = &fname;
00351 
00352     if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr))
00353         report_parse_error(JSON_PARSE_STRING, lex);
00354 
00355     lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
00356 
00357     tok = lex_peek(lex);
00358     isnull = tok == JSON_TOKEN_NULL;
00359 
00360     if (ostart != NULL)
00361         (*ostart) (sem->semstate, fname, isnull);
00362 
00363     switch (tok)
00364     {
00365         case JSON_TOKEN_OBJECT_START:
00366             parse_object(lex, sem);
00367             break;
00368         case JSON_TOKEN_ARRAY_START:
00369             parse_array(lex, sem);
00370             break;
00371         default:
00372             parse_scalar(lex, sem);
00373     }
00374 
00375     if (oend != NULL)
00376         (*oend) (sem->semstate, fname, isnull);
00377 
00378     if (fname != NULL)
00379         pfree(fname);
00380 }
00381 
00382 static void
00383 parse_object(JsonLexContext *lex, JsonSemAction sem)
00384 {
00385     /*
00386      * an object is a possibly empty sequence of object fields, separated by
00387      * commas and surrounde by curly braces.
00388      */
00389     json_struct_action ostart = sem->object_start;
00390     json_struct_action oend = sem->object_end;
00391     JsonTokenType tok;
00392 
00393     if (ostart != NULL)
00394         (*ostart) (sem->semstate);
00395 
00396     /*
00397      * Data inside an object at at a higher nesting level than the object
00398      * itself. Note that we increment this after we call the semantic routine
00399      * for the object start and restore it before we call the routine for the
00400      * object end.
00401      */
00402     lex->lex_level++;
00403 
00404     /* we know this will succeeed, just clearing the token */
00405     lex_expect(JSON_PARSE_OBJECT_START, lex, JSON_TOKEN_OBJECT_START);
00406 
00407     tok = lex_peek(lex);
00408     switch (tok)
00409     {
00410         case JSON_TOKEN_STRING:
00411             parse_object_field(lex, sem);
00412             while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
00413                 parse_object_field(lex, sem);
00414             break;
00415         case JSON_TOKEN_OBJECT_END:
00416             break;
00417         default:
00418             /* case of an invalid initial token inside the object */
00419             report_parse_error(JSON_PARSE_OBJECT_START, lex);
00420     }
00421 
00422     lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
00423 
00424     lex->lex_level--;
00425 
00426     if (oend != NULL)
00427         (*oend) (sem->semstate);
00428 }
00429 
00430 static void
00431 parse_array_element(JsonLexContext *lex, JsonSemAction sem)
00432 {
00433     json_aelem_action astart = sem->array_element_start;
00434     json_aelem_action aend = sem->array_element_end;
00435     JsonTokenType tok = lex_peek(lex);
00436 
00437     bool        isnull;
00438 
00439     isnull = tok == JSON_TOKEN_NULL;
00440 
00441     if (astart != NULL)
00442         (*astart) (sem->semstate, isnull);
00443 
00444     /* an array element is any object, array or scalar */
00445     switch (tok)
00446     {
00447         case JSON_TOKEN_OBJECT_START:
00448             parse_object(lex, sem);
00449             break;
00450         case JSON_TOKEN_ARRAY_START:
00451             parse_array(lex, sem);
00452             break;
00453         default:
00454             parse_scalar(lex, sem);
00455     }
00456 
00457     if (aend != NULL)
00458         (*aend) (sem->semstate, isnull);
00459 }
00460 
00461 static void
00462 parse_array(JsonLexContext *lex, JsonSemAction sem)
00463 {
00464     /*
00465      * an array is a possibly empty sequence of array elements, separated by
00466      * commas and surrounded by square brackets.
00467      */
00468     json_struct_action astart = sem->array_start;
00469     json_struct_action aend = sem->array_end;
00470 
00471     if (astart != NULL)
00472         (*astart) (sem->semstate);
00473 
00474     /*
00475      * Data inside an array at at a higher nesting level than the array
00476      * itself. Note that we increment this after we call the semantic routine
00477      * for the array start and restore it before we call the routine for the
00478      * array end.
00479      */
00480     lex->lex_level++;
00481 
00482     lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
00483     if (lex_peek(lex) != JSON_TOKEN_ARRAY_END)
00484     {
00485 
00486         parse_array_element(lex, sem);
00487 
00488         while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
00489             parse_array_element(lex, sem);
00490     }
00491 
00492     lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
00493 
00494     lex->lex_level--;
00495 
00496     if (aend != NULL)
00497         (*aend) (sem->semstate);
00498 }
00499 
00500 /*
00501  * Lex one token from the input stream.
00502  */
00503 static inline void
00504 json_lex(JsonLexContext *lex)
00505 {
00506     char       *s;
00507     int         len;
00508 
00509     /* Skip leading whitespace. */
00510     s = lex->token_terminator;
00511     len = s - lex->input;
00512     while (len < lex->input_length &&
00513            (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
00514     {
00515         if (*s == '\n')
00516             ++lex->line_number;
00517         ++s;
00518         ++len;
00519     }
00520     lex->token_start = s;
00521 
00522     /* Determine token type. */
00523     if (len >= lex->input_length)
00524     {
00525         lex->token_start = NULL;
00526         lex->prev_token_terminator = lex->token_terminator;
00527         lex->token_terminator = s;
00528         lex->token_type = JSON_TOKEN_END;
00529     }
00530     else
00531         switch (*s)
00532         {
00533                 /* Single-character token, some kind of punctuation mark. */
00534             case '{':
00535                 lex->prev_token_terminator = lex->token_terminator;
00536                 lex->token_terminator = s + 1;
00537                 lex->token_type = JSON_TOKEN_OBJECT_START;
00538                 break;
00539             case '}':
00540                 lex->prev_token_terminator = lex->token_terminator;
00541                 lex->token_terminator = s + 1;
00542                 lex->token_type = JSON_TOKEN_OBJECT_END;
00543                 break;
00544             case '[':
00545                 lex->prev_token_terminator = lex->token_terminator;
00546                 lex->token_terminator = s + 1;
00547                 lex->token_type = JSON_TOKEN_ARRAY_START;
00548                 break;
00549             case ']':
00550                 lex->prev_token_terminator = lex->token_terminator;
00551                 lex->token_terminator = s + 1;
00552                 lex->token_type = JSON_TOKEN_ARRAY_END;
00553                 break;
00554             case ',':
00555                 lex->prev_token_terminator = lex->token_terminator;
00556                 lex->token_terminator = s + 1;
00557                 lex->token_type = JSON_TOKEN_COMMA;
00558                 break;
00559             case ':':
00560                 lex->prev_token_terminator = lex->token_terminator;
00561                 lex->token_terminator = s + 1;
00562                 lex->token_type = JSON_TOKEN_COLON;
00563                 break;
00564             case '"':
00565                 /* string */
00566                 json_lex_string(lex);
00567                 lex->token_type = JSON_TOKEN_STRING;
00568                 break;
00569             case '-':
00570                 /* Negative number. */
00571                 json_lex_number(lex, s + 1);
00572                 lex->token_type = JSON_TOKEN_NUMBER;
00573                 break;
00574             case '0':
00575             case '1':
00576             case '2':
00577             case '3':
00578             case '4':
00579             case '5':
00580             case '6':
00581             case '7':
00582             case '8':
00583             case '9':
00584                 /* Positive number. */
00585                 json_lex_number(lex, s);
00586                 lex->token_type = JSON_TOKEN_NUMBER;
00587                 break;
00588             default:
00589                 {
00590                     char       *p;
00591 
00592                     /*
00593                      * We're not dealing with a string, number, legal
00594                      * punctuation mark, or end of string.  The only legal
00595                      * tokens we might find here are true, false, and null,
00596                      * but for error reporting purposes we scan until we see a
00597                      * non-alphanumeric character.  That way, we can report
00598                      * the whole word as an unexpected token, rather than just
00599                      * some unintuitive prefix thereof.
00600                      */
00601                     for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && p - s < lex->input_length - len; p++)
00602                          /* skip */ ;
00603 
00604                     /*
00605                      * We got some sort of unexpected punctuation or an
00606                      * otherwise unexpected character, so just complain about
00607                      * that one character.
00608                      */
00609                     if (p == s)
00610                     {
00611                         lex->prev_token_terminator = lex->token_terminator;
00612                         lex->token_terminator = s + 1;
00613                         report_invalid_token(lex);
00614                     }
00615 
00616                     /*
00617                      * We've got a real alphanumeric token here.  If it
00618                      * happens to be true, false, or null, all is well.  If
00619                      * not, error out.
00620                      */
00621                     lex->prev_token_terminator = lex->token_terminator;
00622                     lex->token_terminator = p;
00623                     if (p - s == 4)
00624                     {
00625                         if (memcmp(s, "true", 4) == 0)
00626                             lex->token_type = JSON_TOKEN_TRUE;
00627                         else if (memcmp(s, "null", 4) == 0)
00628                             lex->token_type = JSON_TOKEN_NULL;
00629                         else
00630                             report_invalid_token(lex);
00631                     }
00632                     else if (p - s == 5 && memcmp(s, "false", 5) == 0)
00633                         lex->token_type = JSON_TOKEN_FALSE;
00634                     else
00635                         report_invalid_token(lex);
00636 
00637                 }
00638         }                       /* end of switch */
00639 }
00640 
00641 /*
00642  * The next token in the input stream is known to be a string; lex it.
00643  */
00644 static inline void
00645 json_lex_string(JsonLexContext *lex)
00646 {
00647     char       *s;
00648     int         len;
00649 
00650     if (lex->strval != NULL)
00651         resetStringInfo(lex->strval);
00652 
00653     len = lex->token_start - lex->input;
00654     len++;
00655     for (s = lex->token_start + 1; *s != '"'; s++, len++)
00656     {
00657         /* Premature end of the string. */
00658         if (len >= lex->input_length)
00659         {
00660             lex->token_terminator = s;
00661             report_invalid_token(lex);
00662         }
00663         else if ((unsigned char) *s < 32)
00664         {
00665             /* Per RFC4627, these characters MUST be escaped. */
00666             /* Since *s isn't printable, exclude it from the context string */
00667             lex->token_terminator = s;
00668             ereport(ERROR,
00669                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00670                      errmsg("invalid input syntax for type json"),
00671                      errdetail("Character with value 0x%02x must be escaped.",
00672                                (unsigned char) *s),
00673                      report_json_context(lex)));
00674         }
00675         else if (*s == '\\')
00676         {
00677             /* OK, we have an escape character. */
00678             s++;
00679             len++;
00680             if (len >= lex->input_length)
00681             {
00682                 lex->token_terminator = s;
00683                 report_invalid_token(lex);
00684             }
00685             else if (*s == 'u')
00686             {
00687                 int         i;
00688                 int         ch = 0;
00689 
00690                 for (i = 1; i <= 4; i++)
00691                 {
00692                     s++;
00693                     len++;
00694                     if (len >= lex->input_length)
00695                     {
00696                         lex->token_terminator = s;
00697                         report_invalid_token(lex);
00698                     }
00699                     else if (*s >= '0' && *s <= '9')
00700                         ch = (ch * 16) + (*s - '0');
00701                     else if (*s >= 'a' && *s <= 'f')
00702                         ch = (ch * 16) + (*s - 'a') + 10;
00703                     else if (*s >= 'A' && *s <= 'F')
00704                         ch = (ch * 16) + (*s - 'A') + 10;
00705                     else
00706                     {
00707                         lex->token_terminator = s + pg_mblen(s);
00708                         ereport(ERROR,
00709                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00710                                  errmsg("invalid input syntax for type json"),
00711                                  errdetail("\"\\u\" must be followed by four hexadecimal digits."),
00712                                  report_json_context(lex)));
00713                     }
00714                 }
00715                 if (lex->strval != NULL)
00716                 {
00717                     char        utf8str[5];
00718                     int         utf8len;
00719                     char       *converted;
00720 
00721                     unicode_to_utf8(ch, (unsigned char *) utf8str);
00722                     utf8len = pg_utf_mblen((unsigned char *) utf8str);
00723                     utf8str[utf8len] = '\0';
00724                     converted = pg_any_to_server(utf8str, utf8len, PG_UTF8);
00725                     appendStringInfoString(lex->strval, converted);
00726                     if (converted != utf8str)
00727                         pfree(converted);
00728 
00729                 }
00730             }
00731             else if (lex->strval != NULL)
00732             {
00733                 switch (*s)
00734                 {
00735                     case '"':
00736                     case '\\':
00737                     case '/':
00738                         appendStringInfoChar(lex->strval, *s);
00739                         break;
00740                     case 'b':
00741                         appendStringInfoChar(lex->strval, '\b');
00742                         break;
00743                     case 'f':
00744                         appendStringInfoChar(lex->strval, '\f');
00745                         break;
00746                     case 'n':
00747                         appendStringInfoChar(lex->strval, '\n');
00748                         break;
00749                     case 'r':
00750                         appendStringInfoChar(lex->strval, '\r');
00751                         break;
00752                     case 't':
00753                         appendStringInfoChar(lex->strval, '\t');
00754                         break;
00755                     default:
00756                         /* Not a valid string escape, so error out. */
00757                         lex->token_terminator = s + pg_mblen(s);
00758                         ereport(ERROR,
00759                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00760                                  errmsg("invalid input syntax for type json"),
00761                             errdetail("Escape sequence \"\\%s\" is invalid.",
00762                                       extract_mb_char(s)),
00763                                  report_json_context(lex)));
00764                 }
00765             }
00766             else if (strchr("\"\\/bfnrt", *s) == NULL)
00767             {
00768                 /*
00769                  * Simpler processing if we're not bothered about de-escaping
00770                  *
00771                  * It's very tempting to remove the strchr() call here and
00772                  * replace it with a switch statement, but testing so far has
00773                  * shown it's not a performance win.
00774                  */
00775                 lex->token_terminator = s + pg_mblen(s);
00776                 ereport(ERROR,
00777                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00778                          errmsg("invalid input syntax for type json"),
00779                          errdetail("Escape sequence \"\\%s\" is invalid.",
00780                                    extract_mb_char(s)),
00781                          report_json_context(lex)));
00782             }
00783 
00784         }
00785         else if (lex->strval != NULL)
00786         {
00787             appendStringInfoChar(lex->strval, *s);
00788         }
00789 
00790     }
00791 
00792     /* Hooray, we found the end of the string! */
00793     lex->prev_token_terminator = lex->token_terminator;
00794     lex->token_terminator = s + 1;
00795 }
00796 
00797 /*-------------------------------------------------------------------------
00798  * The next token in the input stream is known to be a number; lex it.
00799  *
00800  * In JSON, a number consists of four parts:
00801  *
00802  * (1) An optional minus sign ('-').
00803  *
00804  * (2) Either a single '0', or a string of one or more digits that does not
00805  *     begin with a '0'.
00806  *
00807  * (3) An optional decimal part, consisting of a period ('.') followed by
00808  *     one or more digits.  (Note: While this part can be omitted
00809  *     completely, it's not OK to have only the decimal point without
00810  *     any digits afterwards.)
00811  *
00812  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
00813  *     followed by '+' or '-', followed by one or more digits.  (Note:
00814  *     As with the decimal part, if 'e' or 'E' is present, it must be
00815  *     followed by at least one digit.)
00816  *
00817  * The 's' argument to this function points to the ostensible beginning
00818  * of part 2 - i.e. the character after any optional minus sign, and the
00819  * first character of the string if there is none.
00820  *
00821  *-------------------------------------------------------------------------
00822  */
00823 static inline void
00824 json_lex_number(JsonLexContext *lex, char *s)
00825 {
00826     bool        error = false;
00827     char       *p;
00828     int         len;
00829 
00830     len = s - lex->input;
00831     /* Part (1): leading sign indicator. */
00832     /* Caller already did this for us; so do nothing. */
00833 
00834     /* Part (2): parse main digit string. */
00835     if (*s == '0')
00836     {
00837         s++;
00838         len++;
00839     }
00840     else if (*s >= '1' && *s <= '9')
00841     {
00842         do
00843         {
00844             s++;
00845             len++;
00846         } while (*s >= '0' && *s <= '9' && len < lex->input_length);
00847     }
00848     else
00849         error = true;
00850 
00851     /* Part (3): parse optional decimal portion. */
00852     if (len < lex->input_length && *s == '.')
00853     {
00854         s++;
00855         len++;
00856         if (len == lex->input_length || *s < '0' || *s > '9')
00857             error = true;
00858         else
00859         {
00860             do
00861             {
00862                 s++;
00863                 len++;
00864             } while (*s >= '0' && *s <= '9' && len < lex->input_length);
00865         }
00866     }
00867 
00868     /* Part (4): parse optional exponent. */
00869     if (len < lex->input_length && (*s == 'e' || *s == 'E'))
00870     {
00871         s++;
00872         len++;
00873         if (len < lex->input_length && (*s == '+' || *s == '-'))
00874         {
00875             s++;
00876             len++;
00877         }
00878         if (len == lex->input_length || *s < '0' || *s > '9')
00879             error = true;
00880         else
00881         {
00882             do
00883             {
00884                 s++;
00885                 len++;
00886             } while (len < lex->input_length && *s >= '0' && *s <= '9');
00887         }
00888     }
00889 
00890     /*
00891      * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
00892      * here should be considered part of the token for error-reporting
00893      * purposes.
00894      */
00895     for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && len < lex->input_length; p++, len++)
00896         error = true;
00897     lex->prev_token_terminator = lex->token_terminator;
00898     lex->token_terminator = p;
00899     if (error)
00900         report_invalid_token(lex);
00901 }
00902 
00903 /*
00904  * Report a parse error.
00905  *
00906  * lex->token_start and lex->token_terminator must identify the current token.
00907  */
00908 static void
00909 report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
00910 {
00911     char       *token;
00912     int         toklen;
00913 
00914     /* Handle case where the input ended prematurely. */
00915     if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
00916         ereport(ERROR,
00917                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00918                  errmsg("invalid input syntax for type json"),
00919                  errdetail("The input string ended unexpectedly."),
00920                  report_json_context(lex)));
00921 
00922     /* Separate out the current token. */
00923     toklen = lex->token_terminator - lex->token_start;
00924     token = palloc(toklen + 1);
00925     memcpy(token, lex->token_start, toklen);
00926     token[toklen] = '\0';
00927 
00928     /* Complain, with the appropriate detail message. */
00929     if (ctx == JSON_PARSE_END)
00930         ereport(ERROR,
00931                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00932                  errmsg("invalid input syntax for type json"),
00933                  errdetail("Expected end of input, but found \"%s\".",
00934                            token),
00935                  report_json_context(lex)));
00936     else
00937     {
00938         switch (ctx)
00939         {
00940             case JSON_PARSE_VALUE:
00941                 ereport(ERROR,
00942                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00943                          errmsg("invalid input syntax for type json"),
00944                          errdetail("Expected JSON value, but found \"%s\".",
00945                                    token),
00946                          report_json_context(lex)));
00947                 break;
00948             case JSON_PARSE_STRING:
00949                 ereport(ERROR,
00950                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00951                          errmsg("invalid input syntax for type json"),
00952                          errdetail("Expected string, but found \"%s\".",
00953                                    token),
00954                          report_json_context(lex)));
00955                 break;
00956             case JSON_PARSE_ARRAY_START:
00957                 ereport(ERROR,
00958                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00959                          errmsg("invalid input syntax for type json"),
00960                          errdetail("Expected array element or \"]\", but found \"%s\".",
00961                                    token),
00962                          report_json_context(lex)));
00963                 break;
00964             case JSON_PARSE_ARRAY_NEXT:
00965                 ereport(ERROR,
00966                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00967                          errmsg("invalid input syntax for type json"),
00968                       errdetail("Expected \",\" or \"]\", but found \"%s\".",
00969                                 token),
00970                          report_json_context(lex)));
00971                 break;
00972             case JSON_PARSE_OBJECT_START:
00973                 ereport(ERROR,
00974                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00975                          errmsg("invalid input syntax for type json"),
00976                      errdetail("Expected string or \"}\", but found \"%s\".",
00977                                token),
00978                          report_json_context(lex)));
00979                 break;
00980             case JSON_PARSE_OBJECT_LABEL:
00981                 ereport(ERROR,
00982                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00983                          errmsg("invalid input syntax for type json"),
00984                          errdetail("Expected \":\", but found \"%s\".",
00985                                    token),
00986                          report_json_context(lex)));
00987                 break;
00988             case JSON_PARSE_OBJECT_NEXT:
00989                 ereport(ERROR,
00990                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00991                          errmsg("invalid input syntax for type json"),
00992                       errdetail("Expected \",\" or \"}\", but found \"%s\".",
00993                                 token),
00994                          report_json_context(lex)));
00995                 break;
00996             case JSON_PARSE_OBJECT_COMMA:
00997                 ereport(ERROR,
00998                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00999                          errmsg("invalid input syntax for type json"),
01000                          errdetail("Expected string, but found \"%s\".",
01001                                    token),
01002                          report_json_context(lex)));
01003                 break;
01004             default:
01005                 elog(ERROR, "unexpected json parse state: %d", ctx);
01006         }
01007     }
01008 }
01009 
01010 /*
01011  * Report an invalid input token.
01012  *
01013  * lex->token_start and lex->token_terminator must identify the token.
01014  */
01015 static void
01016 report_invalid_token(JsonLexContext *lex)
01017 {
01018     char       *token;
01019     int         toklen;
01020 
01021     /* Separate out the offending token. */
01022     toklen = lex->token_terminator - lex->token_start;
01023     token = palloc(toklen + 1);
01024     memcpy(token, lex->token_start, toklen);
01025     token[toklen] = '\0';
01026 
01027     ereport(ERROR,
01028             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
01029              errmsg("invalid input syntax for type json"),
01030              errdetail("Token \"%s\" is invalid.", token),
01031              report_json_context(lex)));
01032 }
01033 
01034 /*
01035  * Report a CONTEXT line for bogus JSON input.
01036  *
01037  * lex->token_terminator must be set to identify the spot where we detected
01038  * the error.  Note that lex->token_start might be NULL, in case we recognized
01039  * error at EOF.
01040  *
01041  * The return value isn't meaningful, but we make it non-void so that this
01042  * can be invoked inside ereport().
01043  */
01044 static int
01045 report_json_context(JsonLexContext *lex)
01046 {
01047     const char *context_start;
01048     const char *context_end;
01049     const char *line_start;
01050     int         line_number;
01051     char       *ctxt;
01052     int         ctxtlen;
01053     const char *prefix;
01054     const char *suffix;
01055 
01056     /* Choose boundaries for the part of the input we will display */
01057     context_start = lex->input;
01058     context_end = lex->token_terminator;
01059     line_start = context_start;
01060     line_number = 1;
01061     for (;;)
01062     {
01063         /* Always advance over newlines (context_end test is just paranoia) */
01064         if (*context_start == '\n' && context_start < context_end)
01065         {
01066             context_start++;
01067             line_start = context_start;
01068             line_number++;
01069             continue;
01070         }
01071         /* Otherwise, done as soon as we are close enough to context_end */
01072         if (context_end - context_start < 50)
01073             break;
01074         /* Advance to next multibyte character */
01075         if (IS_HIGHBIT_SET(*context_start))
01076             context_start += pg_mblen(context_start);
01077         else
01078             context_start++;
01079     }
01080 
01081     /*
01082      * We add "..." to indicate that the excerpt doesn't start at the
01083      * beginning of the line ... but if we're within 3 characters of the
01084      * beginning of the line, we might as well just show the whole line.
01085      */
01086     if (context_start - line_start <= 3)
01087         context_start = line_start;
01088 
01089     /* Get a null-terminated copy of the data to present */
01090     ctxtlen = context_end - context_start;
01091     ctxt = palloc(ctxtlen + 1);
01092     memcpy(ctxt, context_start, ctxtlen);
01093     ctxt[ctxtlen] = '\0';
01094 
01095     /*
01096      * Show the context, prefixing "..." if not starting at start of line, and
01097      * suffixing "..." if not ending at end of line.
01098      */
01099     prefix = (context_start > line_start) ? "..." : "";
01100     suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
01101 
01102     return errcontext("JSON data, line %d: %s%s%s",
01103                       line_number, prefix, ctxt, suffix);
01104 }
01105 
01106 /*
01107  * Extract a single, possibly multi-byte char from the input string.
01108  */
01109 static char *
01110 extract_mb_char(char *s)
01111 {
01112     char       *res;
01113     int         len;
01114 
01115     len = pg_mblen(s);
01116     res = palloc(len + 1);
01117     memcpy(res, s, len);
01118     res[len] = '\0';
01119 
01120     return res;
01121 }
01122 
01123 /*
01124  * Turn a scalar Datum into JSON, appending the string to "result".
01125  *
01126  * Hand off a non-scalar datum to composite_to_json or array_to_json_internal
01127  * as appropriate.
01128  */
01129 static void
01130 datum_to_json(Datum val, bool is_null, StringInfo result,
01131               TYPCATEGORY tcategory, Oid typoutputfunc)
01132 {
01133     char       *outputstr;
01134     text       *jsontext;
01135 
01136     if (is_null)
01137     {
01138         appendStringInfoString(result, "null");
01139         return;
01140     }
01141 
01142     switch (tcategory)
01143     {
01144         case TYPCATEGORY_ARRAY:
01145             array_to_json_internal(val, result, false);
01146             break;
01147         case TYPCATEGORY_COMPOSITE:
01148             composite_to_json(val, result, false);
01149             break;
01150         case TYPCATEGORY_BOOLEAN:
01151             if (DatumGetBool(val))
01152                 appendStringInfoString(result, "true");
01153             else
01154                 appendStringInfoString(result, "false");
01155             break;
01156         case TYPCATEGORY_NUMERIC:
01157             outputstr = OidOutputFunctionCall(typoutputfunc, val);
01158 
01159             /*
01160              * Don't call escape_json here if it's a valid JSON number.
01161              * Numeric output should usually be a valid JSON number and JSON
01162              * numbers shouldn't be quoted. Quote cases like "Nan" and
01163              * "Infinity", however.
01164              */
01165             if (strpbrk(outputstr, NON_NUMERIC_LETTER) == NULL)
01166                 appendStringInfoString(result, outputstr);
01167             else
01168                 escape_json(result, outputstr);
01169             pfree(outputstr);
01170             break;
01171         case TYPCATEGORY_JSON:
01172             /* JSON will already be escaped */
01173             outputstr = OidOutputFunctionCall(typoutputfunc, val);
01174             appendStringInfoString(result, outputstr);
01175             pfree(outputstr);
01176             break;
01177         case TYPCATEGORY_JSON_CAST:
01178             jsontext = DatumGetTextP(OidFunctionCall1(typoutputfunc, val));
01179             outputstr = text_to_cstring(jsontext);
01180             appendStringInfoString(result, outputstr);
01181             pfree(outputstr);
01182             pfree(jsontext);
01183             break;
01184         default:
01185             outputstr = OidOutputFunctionCall(typoutputfunc, val);
01186             escape_json(result, outputstr);
01187             pfree(outputstr);
01188             break;
01189     }
01190 }
01191 
01192 /*
01193  * Process a single dimension of an array.
01194  * If it's the innermost dimension, output the values, otherwise call
01195  * ourselves recursively to process the next dimension.
01196  */
01197 static void
01198 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
01199                   bool *nulls, int *valcount, TYPCATEGORY tcategory,
01200                   Oid typoutputfunc, bool use_line_feeds)
01201 {
01202     int         i;
01203     const char *sep;
01204 
01205     Assert(dim < ndims);
01206 
01207     sep = use_line_feeds ? ",\n " : ",";
01208 
01209     appendStringInfoChar(result, '[');
01210 
01211     for (i = 1; i <= dims[dim]; i++)
01212     {
01213         if (i > 1)
01214             appendStringInfoString(result, sep);
01215 
01216         if (dim + 1 == ndims)
01217         {
01218             datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
01219                           typoutputfunc);
01220             (*valcount)++;
01221         }
01222         else
01223         {
01224             /*
01225              * Do we want line feeds on inner dimensions of arrays? For now
01226              * we'll say no.
01227              */
01228             array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
01229                               valcount, tcategory, typoutputfunc, false);
01230         }
01231     }
01232 
01233     appendStringInfoChar(result, ']');
01234 }
01235 
01236 /*
01237  * Turn an array into JSON.
01238  */
01239 static void
01240 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
01241 {
01242     ArrayType  *v = DatumGetArrayTypeP(array);
01243     Oid         element_type = ARR_ELEMTYPE(v);
01244     int        *dim;
01245     int         ndim;
01246     int         nitems;
01247     int         count = 0;
01248     Datum      *elements;
01249     bool       *nulls;
01250     int16       typlen;
01251     bool        typbyval;
01252     char        typalign,
01253                 typdelim;
01254     Oid         typioparam;
01255     Oid         typoutputfunc;
01256     TYPCATEGORY tcategory;
01257     Oid         castfunc = InvalidOid;
01258 
01259     ndim = ARR_NDIM(v);
01260     dim = ARR_DIMS(v);
01261     nitems = ArrayGetNItems(ndim, dim);
01262 
01263     if (nitems <= 0)
01264     {
01265         appendStringInfoString(result, "[]");
01266         return;
01267     }
01268 
01269     get_type_io_data(element_type, IOFunc_output,
01270                      &typlen, &typbyval, &typalign,
01271                      &typdelim, &typioparam, &typoutputfunc);
01272 
01273     if (element_type > FirstNormalObjectId)
01274     {
01275         HeapTuple   tuple;
01276         Form_pg_cast castForm;
01277 
01278         tuple = SearchSysCache2(CASTSOURCETARGET,
01279                                 ObjectIdGetDatum(element_type),
01280                                 ObjectIdGetDatum(JSONOID));
01281         if (HeapTupleIsValid(tuple))
01282         {
01283             castForm = (Form_pg_cast) GETSTRUCT(tuple);
01284 
01285             if (castForm->castmethod == COERCION_METHOD_FUNCTION)
01286                 castfunc = typoutputfunc = castForm->castfunc;
01287 
01288             ReleaseSysCache(tuple);
01289         }
01290     }
01291 
01292     deconstruct_array(v, element_type, typlen, typbyval,
01293                       typalign, &elements, &nulls,
01294                       &nitems);
01295 
01296     if (castfunc != InvalidOid)
01297         tcategory = TYPCATEGORY_JSON_CAST;
01298     else if (element_type == RECORDOID)
01299         tcategory = TYPCATEGORY_COMPOSITE;
01300     else if (element_type == JSONOID)
01301         tcategory = TYPCATEGORY_JSON;
01302     else
01303         tcategory = TypeCategory(element_type);
01304 
01305     array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
01306                       typoutputfunc, use_line_feeds);
01307 
01308     pfree(elements);
01309     pfree(nulls);
01310 }
01311 
01312 /*
01313  * Turn a composite / record into JSON.
01314  */
01315 static void
01316 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
01317 {
01318     HeapTupleHeader td;
01319     Oid         tupType;
01320     int32       tupTypmod;
01321     TupleDesc   tupdesc;
01322     HeapTupleData tmptup,
01323                *tuple;
01324     int         i;
01325     bool        needsep = false;
01326     const char *sep;
01327 
01328     sep = use_line_feeds ? ",\n " : ",";
01329 
01330     td = DatumGetHeapTupleHeader(composite);
01331 
01332     /* Extract rowtype info and find a tupdesc */
01333     tupType = HeapTupleHeaderGetTypeId(td);
01334     tupTypmod = HeapTupleHeaderGetTypMod(td);
01335     tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
01336 
01337     /* Build a temporary HeapTuple control structure */
01338     tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
01339     tmptup.t_data = td;
01340     tuple = &tmptup;
01341 
01342     appendStringInfoChar(result, '{');
01343 
01344     for (i = 0; i < tupdesc->natts; i++)
01345     {
01346         Datum       val,
01347                     origval;
01348         bool        isnull;
01349         char       *attname;
01350         TYPCATEGORY tcategory;
01351         Oid         typoutput;
01352         bool        typisvarlena;
01353         Oid         castfunc = InvalidOid;
01354 
01355         if (tupdesc->attrs[i]->attisdropped)
01356             continue;
01357 
01358         if (needsep)
01359             appendStringInfoString(result, sep);
01360         needsep = true;
01361 
01362         attname = NameStr(tupdesc->attrs[i]->attname);
01363         escape_json(result, attname);
01364         appendStringInfoChar(result, ':');
01365 
01366         origval = heap_getattr(tuple, i + 1, tupdesc, &isnull);
01367 
01368         getTypeOutputInfo(tupdesc->attrs[i]->atttypid,
01369                           &typoutput, &typisvarlena);
01370 
01371         if (tupdesc->attrs[i]->atttypid > FirstNormalObjectId)
01372         {
01373             HeapTuple   cast_tuple;
01374             Form_pg_cast castForm;
01375 
01376             cast_tuple = SearchSysCache2(CASTSOURCETARGET,
01377                                ObjectIdGetDatum(tupdesc->attrs[i]->atttypid),
01378                                          ObjectIdGetDatum(JSONOID));
01379             if (HeapTupleIsValid(cast_tuple))
01380             {
01381                 castForm = (Form_pg_cast) GETSTRUCT(cast_tuple);
01382 
01383                 if (castForm->castmethod == COERCION_METHOD_FUNCTION)
01384                     castfunc = typoutput = castForm->castfunc;
01385 
01386                 ReleaseSysCache(cast_tuple);
01387             }
01388         }
01389 
01390         if (castfunc != InvalidOid)
01391             tcategory = TYPCATEGORY_JSON_CAST;
01392         else if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID)
01393             tcategory = TYPCATEGORY_ARRAY;
01394         else if (tupdesc->attrs[i]->atttypid == RECORDOID)
01395             tcategory = TYPCATEGORY_COMPOSITE;
01396         else if (tupdesc->attrs[i]->atttypid == JSONOID)
01397             tcategory = TYPCATEGORY_JSON;
01398         else
01399             tcategory = TypeCategory(tupdesc->attrs[i]->atttypid);
01400 
01401         /*
01402          * If we have a toasted datum, forcibly detoast it here to avoid
01403          * memory leakage inside the type's output routine.
01404          */
01405         if (typisvarlena && !isnull)
01406             val = PointerGetDatum(PG_DETOAST_DATUM(origval));
01407         else
01408             val = origval;
01409 
01410         datum_to_json(val, isnull, result, tcategory, typoutput);
01411 
01412         /* Clean up detoasted copy, if any */
01413         if (val != origval)
01414             pfree(DatumGetPointer(val));
01415     }
01416 
01417     appendStringInfoChar(result, '}');
01418     ReleaseTupleDesc(tupdesc);
01419 }
01420 
01421 /*
01422  * SQL function array_to_json(row)
01423  */
01424 extern Datum
01425 array_to_json(PG_FUNCTION_ARGS)
01426 {
01427     Datum       array = PG_GETARG_DATUM(0);
01428     StringInfo  result;
01429 
01430     result = makeStringInfo();
01431 
01432     array_to_json_internal(array, result, false);
01433 
01434     PG_RETURN_TEXT_P(cstring_to_text(result->data));
01435 }
01436 
01437 /*
01438  * SQL function array_to_json(row, prettybool)
01439  */
01440 extern Datum
01441 array_to_json_pretty(PG_FUNCTION_ARGS)
01442 {
01443     Datum       array = PG_GETARG_DATUM(0);
01444     bool        use_line_feeds = PG_GETARG_BOOL(1);
01445     StringInfo  result;
01446 
01447     result = makeStringInfo();
01448 
01449     array_to_json_internal(array, result, use_line_feeds);
01450 
01451     PG_RETURN_TEXT_P(cstring_to_text(result->data));
01452 }
01453 
01454 /*
01455  * SQL function row_to_json(row)
01456  */
01457 extern Datum
01458 row_to_json(PG_FUNCTION_ARGS)
01459 {
01460     Datum       array = PG_GETARG_DATUM(0);
01461     StringInfo  result;
01462 
01463     result = makeStringInfo();
01464 
01465     composite_to_json(array, result, false);
01466 
01467     PG_RETURN_TEXT_P(cstring_to_text(result->data));
01468 }
01469 
01470 /*
01471  * SQL function row_to_json(row, prettybool)
01472  */
01473 extern Datum
01474 row_to_json_pretty(PG_FUNCTION_ARGS)
01475 {
01476     Datum       array = PG_GETARG_DATUM(0);
01477     bool        use_line_feeds = PG_GETARG_BOOL(1);
01478     StringInfo  result;
01479 
01480     result = makeStringInfo();
01481 
01482     composite_to_json(array, result, use_line_feeds);
01483 
01484     PG_RETURN_TEXT_P(cstring_to_text(result->data));
01485 }
01486 
01487 /*
01488  * SQL function to_json(anyvalue)
01489  */
01490 Datum
01491 to_json(PG_FUNCTION_ARGS)
01492 {
01493     Oid         val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
01494     StringInfo  result;
01495     Datum       orig_val,
01496                 val;
01497     TYPCATEGORY tcategory;
01498     Oid         typoutput;
01499     bool        typisvarlena;
01500     Oid         castfunc = InvalidOid;
01501 
01502     if (val_type == InvalidOid)
01503         ereport(ERROR,
01504                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01505                  errmsg("could not determine input data type")));
01506 
01507 
01508     result = makeStringInfo();
01509 
01510     orig_val = PG_ARGISNULL(0) ? (Datum) 0 : PG_GETARG_DATUM(0);
01511 
01512     getTypeOutputInfo(val_type, &typoutput, &typisvarlena);
01513 
01514     if (val_type > FirstNormalObjectId)
01515     {
01516         HeapTuple   tuple;
01517         Form_pg_cast castForm;
01518 
01519         tuple = SearchSysCache2(CASTSOURCETARGET,
01520                                 ObjectIdGetDatum(val_type),
01521                                 ObjectIdGetDatum(JSONOID));
01522         if (HeapTupleIsValid(tuple))
01523         {
01524             castForm = (Form_pg_cast) GETSTRUCT(tuple);
01525 
01526             if (castForm->castmethod == COERCION_METHOD_FUNCTION)
01527                 castfunc = typoutput = castForm->castfunc;
01528 
01529             ReleaseSysCache(tuple);
01530         }
01531     }
01532 
01533     if (castfunc != InvalidOid)
01534         tcategory = TYPCATEGORY_JSON_CAST;
01535     else if (val_type == RECORDARRAYOID)
01536         tcategory = TYPCATEGORY_ARRAY;
01537     else if (val_type == RECORDOID)
01538         tcategory = TYPCATEGORY_COMPOSITE;
01539     else if (val_type == JSONOID)
01540         tcategory = TYPCATEGORY_JSON;
01541     else
01542         tcategory = TypeCategory(val_type);
01543 
01544     /*
01545      * If we have a toasted datum, forcibly detoast it here to avoid memory
01546      * leakage inside the type's output routine.
01547      */
01548     if (typisvarlena && orig_val != (Datum) 0)
01549         val = PointerGetDatum(PG_DETOAST_DATUM(orig_val));
01550     else
01551         val = orig_val;
01552 
01553     datum_to_json(val, false, result, tcategory, typoutput);
01554 
01555     /* Clean up detoasted copy, if any */
01556     if (val != orig_val)
01557         pfree(DatumGetPointer(val));
01558 
01559     PG_RETURN_TEXT_P(cstring_to_text(result->data));
01560 }
01561 
01562 /*
01563  * json_agg transition function
01564  */
01565 Datum
01566 json_agg_transfn(PG_FUNCTION_ARGS)
01567 {
01568     Oid         val_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
01569     MemoryContext aggcontext,
01570                 oldcontext;
01571     StringInfo  state;
01572     Datum       orig_val,
01573                 val;
01574     TYPCATEGORY tcategory;
01575     Oid         typoutput;
01576     bool        typisvarlena;
01577     Oid         castfunc = InvalidOid;
01578 
01579     if (val_type == InvalidOid)
01580         ereport(ERROR,
01581                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
01582                  errmsg("could not determine input data type")));
01583 
01584     if (!AggCheckCallContext(fcinfo, &aggcontext))
01585     {
01586         /* cannot be called directly because of internal-type argument */
01587         elog(ERROR, "json_agg_transfn called in non-aggregate context");
01588     }
01589 
01590     if (PG_ARGISNULL(0))
01591     {
01592         /*
01593          * Make this StringInfo in a context where it will persist for the
01594          * duration off the aggregate call. It's only needed for this initial
01595          * piece, as the StringInfo routines make sure they use the right
01596          * context to enlarge the object if necessary.
01597          */
01598         oldcontext = MemoryContextSwitchTo(aggcontext);
01599         state = makeStringInfo();
01600         MemoryContextSwitchTo(oldcontext);
01601 
01602         appendStringInfoChar(state, '[');
01603     }
01604     else
01605     {
01606         state = (StringInfo) PG_GETARG_POINTER(0);
01607         appendStringInfoString(state, ", ");
01608     }
01609 
01610     /* fast path for NULLs */
01611     if (PG_ARGISNULL(1))
01612     {
01613         orig_val = (Datum) 0;
01614         datum_to_json(orig_val, true, state, 0, InvalidOid);
01615         PG_RETURN_POINTER(state);
01616     }
01617 
01618 
01619     orig_val = PG_GETARG_DATUM(1);
01620 
01621     getTypeOutputInfo(val_type, &typoutput, &typisvarlena);
01622 
01623     if (val_type > FirstNormalObjectId)
01624     {
01625         HeapTuple   tuple;
01626         Form_pg_cast castForm;
01627 
01628         tuple = SearchSysCache2(CASTSOURCETARGET,
01629                                 ObjectIdGetDatum(val_type),
01630                                 ObjectIdGetDatum(JSONOID));
01631         if (HeapTupleIsValid(tuple))
01632         {
01633             castForm = (Form_pg_cast) GETSTRUCT(tuple);
01634 
01635             if (castForm->castmethod == COERCION_METHOD_FUNCTION)
01636                 castfunc = typoutput = castForm->castfunc;
01637 
01638             ReleaseSysCache(tuple);
01639         }
01640     }
01641 
01642     if (castfunc != InvalidOid)
01643         tcategory = TYPCATEGORY_JSON_CAST;
01644     else if (val_type == RECORDARRAYOID)
01645         tcategory = TYPCATEGORY_ARRAY;
01646     else if (val_type == RECORDOID)
01647         tcategory = TYPCATEGORY_COMPOSITE;
01648     else if (val_type == JSONOID)
01649         tcategory = TYPCATEGORY_JSON;
01650     else
01651         tcategory = TypeCategory(val_type);
01652 
01653     /*
01654      * If we have a toasted datum, forcibly detoast it here to avoid memory
01655      * leakage inside the type's output routine.
01656      */
01657     if (typisvarlena)
01658         val = PointerGetDatum(PG_DETOAST_DATUM(orig_val));
01659     else
01660         val = orig_val;
01661 
01662     if (!PG_ARGISNULL(0) &&
01663       (tcategory == TYPCATEGORY_ARRAY || tcategory == TYPCATEGORY_COMPOSITE))
01664     {
01665         appendStringInfoString(state, "\n ");
01666     }
01667 
01668     datum_to_json(val, false, state, tcategory, typoutput);
01669 
01670     /* Clean up detoasted copy, if any */
01671     if (val != orig_val)
01672         pfree(DatumGetPointer(val));
01673 
01674     /*
01675      * The transition type for array_agg() is declared to be "internal", which
01676      * is a pass-by-value type the same size as a pointer.  So we can safely
01677      * pass the ArrayBuildState pointer through nodeAgg.c's machinations.
01678      */
01679     PG_RETURN_POINTER(state);
01680 }
01681 
01682 /*
01683  * json_agg final function
01684  */
01685 Datum
01686 json_agg_finalfn(PG_FUNCTION_ARGS)
01687 {
01688     StringInfo  state;
01689 
01690     /* cannot be called directly because of internal-type argument */
01691     Assert(AggCheckCallContext(fcinfo, NULL));
01692 
01693     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
01694 
01695     if (state == NULL)
01696         PG_RETURN_NULL();
01697 
01698     appendStringInfoChar(state, ']');
01699 
01700     PG_RETURN_TEXT_P(cstring_to_text(state->data));
01701 }
01702 
01703 /*
01704  * Produce a JSON string literal, properly escaping characters in the text.
01705  */
01706 void
01707 escape_json(StringInfo buf, const char *str)
01708 {
01709     const char *p;
01710 
01711     appendStringInfoCharMacro(buf, '\"');
01712     for (p = str; *p; p++)
01713     {
01714         switch (*p)
01715         {
01716             case '\b':
01717                 appendStringInfoString(buf, "\\b");
01718                 break;
01719             case '\f':
01720                 appendStringInfoString(buf, "\\f");
01721                 break;
01722             case '\n':
01723                 appendStringInfoString(buf, "\\n");
01724                 break;
01725             case '\r':
01726                 appendStringInfoString(buf, "\\r");
01727                 break;
01728             case '\t':
01729                 appendStringInfoString(buf, "\\t");
01730                 break;
01731             case '"':
01732                 appendStringInfoString(buf, "\\\"");
01733                 break;
01734             case '\\':
01735                 appendStringInfoString(buf, "\\\\");
01736                 break;
01737             default:
01738                 if ((unsigned char) *p < ' ')
01739                     appendStringInfo(buf, "\\u%04x", (int) *p);
01740                 else
01741                     appendStringInfoCharMacro(buf, *p);
01742                 break;
01743         }
01744     }
01745     appendStringInfoCharMacro(buf, '\"');
01746 }
Header And Logo

json.c