PostgreSQL Source Code: src/backend/utils/adt/varlena.c Source File

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * varlena.c
00004  *    Functions for the variable-length built-in types.
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/utils/adt/varlena.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 #include "postgres.h"
00016 
00017 #include <ctype.h>
00018 #include <limits.h>
00019 
00020 #include "access/tuptoaster.h"
00021 #include "catalog/pg_collation.h"
00022 #include "catalog/pg_type.h"
00023 #include "libpq/md5.h"
00024 #include "libpq/pqformat.h"
00025 #include "miscadmin.h"
00026 #include "parser/scansup.h"
00027 #include "regex/regex.h"
00028 #include "utils/builtins.h"
00029 #include "utils/bytea.h"
00030 #include "utils/lsyscache.h"
00031 #include "utils/pg_locale.h"
00032 
00033 
00034 /* GUC variable */
00035 int         bytea_output = BYTEA_OUTPUT_HEX;
00036 
00037 typedef struct varlena unknown;
00038 
00039 typedef struct
00040 {
00041     bool        use_wchar;      /* T if multibyte encoding */
00042     char       *str1;           /* use these if not use_wchar */
00043     char       *str2;           /* note: these point to original texts */
00044     pg_wchar   *wstr1;          /* use these if use_wchar */
00045     pg_wchar   *wstr2;          /* note: these are palloc'd */
00046     int         len1;           /* string lengths in logical characters */
00047     int         len2;
00048     /* Skip table for Boyer-Moore-Horspool search algorithm: */
00049     int         skiptablemask;  /* mask for ANDing with skiptable subscripts */
00050     int         skiptable[256]; /* skip distance for given mismatched char */
00051 } TextPositionState;
00052 
00053 #define DatumGetUnknownP(X)         ((unknown *) PG_DETOAST_DATUM(X))
00054 #define DatumGetUnknownPCopy(X)     ((unknown *) PG_DETOAST_DATUM_COPY(X))
00055 #define PG_GETARG_UNKNOWN_P(n)      DatumGetUnknownP(PG_GETARG_DATUM(n))
00056 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
00057 #define PG_RETURN_UNKNOWN_P(x)      PG_RETURN_POINTER(x)
00058 
00059 static int32 text_length(Datum str);
00060 static text *text_catenate(text *t1, text *t2);
00061 static text *text_substring(Datum str,
00062                int32 start,
00063                int32 length,
00064                bool length_not_specified);
00065 static text *text_overlay(text *t1, text *t2, int sp, int sl);
00066 static int  text_position(text *t1, text *t2);
00067 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
00068 static int  text_position_next(int start_pos, TextPositionState *state);
00069 static void text_position_cleanup(TextPositionState *state);
00070 static int  text_cmp(text *arg1, text *arg2, Oid collid);
00071 static bytea *bytea_catenate(bytea *t1, bytea *t2);
00072 static bytea *bytea_substring(Datum str,
00073                 int S,
00074                 int L,
00075                 bool length_not_specified);
00076 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
00077 static void appendStringInfoText(StringInfo str, const text *t);
00078 static Datum text_to_array_internal(PG_FUNCTION_ARGS);
00079 static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
00080                        const char *fldsep, const char *null_string);
00081 static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
00082 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
00083                          int *value);
00084 static const char *text_format_parse_format(const char *start_ptr,
00085                          const char *end_ptr,
00086                          int *argpos, int *widthpos,
00087                          int *flags, int *width);
00088 static void text_format_string_conversion(StringInfo buf, char conversion,
00089                               FmgrInfo *typOutputInfo,
00090                               Datum value, bool isNull,
00091                               int flags, int width);
00092 static void text_format_append_string(StringInfo buf, const char *str,
00093                           int flags, int width);
00094 
00095 
00096 /*****************************************************************************
00097  *   CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                          *
00098  *****************************************************************************/
00099 
00100 /*
00101  * cstring_to_text
00102  *
00103  * Create a text value from a null-terminated C string.
00104  *
00105  * The new text value is freshly palloc'd with a full-size VARHDR.
00106  */
00107 text *
00108 cstring_to_text(const char *s)
00109 {
00110     return cstring_to_text_with_len(s, strlen(s));
00111 }
00112 
00113 /*
00114  * cstring_to_text_with_len
00115  *
00116  * Same as cstring_to_text except the caller specifies the string length;
00117  * the string need not be null_terminated.
00118  */
00119 text *
00120 cstring_to_text_with_len(const char *s, int len)
00121 {
00122     text       *result = (text *) palloc(len + VARHDRSZ);
00123 
00124     SET_VARSIZE(result, len + VARHDRSZ);
00125     memcpy(VARDATA(result), s, len);
00126 
00127     return result;
00128 }
00129 
00130 /*
00131  * text_to_cstring
00132  *
00133  * Create a palloc'd, null-terminated C string from a text value.
00134  *
00135  * We support being passed a compressed or toasted text value.
00136  * This is a bit bogus since such values shouldn't really be referred to as
00137  * "text *", but it seems useful for robustness.  If we didn't handle that
00138  * case here, we'd need another routine that did, anyway.
00139  */
00140 char *
00141 text_to_cstring(const text *t)
00142 {
00143     /* must cast away the const, unfortunately */
00144     text       *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
00145     int         len = VARSIZE_ANY_EXHDR(tunpacked);
00146     char       *result;
00147 
00148     result = (char *) palloc(len + 1);
00149     memcpy(result, VARDATA_ANY(tunpacked), len);
00150     result[len] = '\0';
00151 
00152     if (tunpacked != t)
00153         pfree(tunpacked);
00154 
00155     return result;
00156 }
00157 
00158 /*
00159  * text_to_cstring_buffer
00160  *
00161  * Copy a text value into a caller-supplied buffer of size dst_len.
00162  *
00163  * The text string is truncated if necessary to fit.  The result is
00164  * guaranteed null-terminated (unless dst_len == 0).
00165  *
00166  * We support being passed a compressed or toasted text value.
00167  * This is a bit bogus since such values shouldn't really be referred to as
00168  * "text *", but it seems useful for robustness.  If we didn't handle that
00169  * case here, we'd need another routine that did, anyway.
00170  */
00171 void
00172 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
00173 {
00174     /* must cast away the const, unfortunately */
00175     text       *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
00176     size_t      src_len = VARSIZE_ANY_EXHDR(srcunpacked);
00177 
00178     if (dst_len > 0)
00179     {
00180         dst_len--;
00181         if (dst_len >= src_len)
00182             dst_len = src_len;
00183         else    /* ensure truncation is encoding-safe */
00184             dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
00185         memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
00186         dst[dst_len] = '\0';
00187     }
00188 
00189     if (srcunpacked != src)
00190         pfree(srcunpacked);
00191 }
00192 
00193 
00194 /*****************************************************************************
00195  *   USER I/O ROUTINES                                                       *
00196  *****************************************************************************/
00197 
00198 
00199 #define VAL(CH)         ((CH) - '0')
00200 #define DIG(VAL)        ((VAL) + '0')
00201 
00202 /*
00203  *      byteain         - converts from printable representation of byte array
00204  *
00205  *      Non-printable characters must be passed as '\nnn' (octal) and are
00206  *      converted to internal form.  '\' must be passed as '\\'.
00207  *      ereport(ERROR, ...) if bad form.
00208  *
00209  *      BUGS:
00210  *              The input is scanned twice.
00211  *              The error checking of input is minimal.
00212  */
00213 Datum
00214 byteain(PG_FUNCTION_ARGS)
00215 {
00216     char       *inputText = PG_GETARG_CSTRING(0);
00217     char       *tp;
00218     char       *rp;
00219     int         bc;
00220     bytea      *result;
00221 
00222     /* Recognize hex input */
00223     if (inputText[0] == '\\' && inputText[1] == 'x')
00224     {
00225         size_t      len = strlen(inputText);
00226 
00227         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
00228         result = palloc(bc);
00229         bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
00230         SET_VARSIZE(result, bc + VARHDRSZ);     /* actual length */
00231 
00232         PG_RETURN_BYTEA_P(result);
00233     }
00234 
00235     /* Else, it's the traditional escaped style */
00236     for (bc = 0, tp = inputText; *tp != '\0'; bc++)
00237     {
00238         if (tp[0] != '\\')
00239             tp++;
00240         else if ((tp[0] == '\\') &&
00241                  (tp[1] >= '0' && tp[1] <= '3') &&
00242                  (tp[2] >= '0' && tp[2] <= '7') &&
00243                  (tp[3] >= '0' && tp[3] <= '7'))
00244             tp += 4;
00245         else if ((tp[0] == '\\') &&
00246                  (tp[1] == '\\'))
00247             tp += 2;
00248         else
00249         {
00250             /*
00251              * one backslash, not followed by another or ### valid octal
00252              */
00253             ereport(ERROR,
00254                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00255                      errmsg("invalid input syntax for type bytea")));
00256         }
00257     }
00258 
00259     bc += VARHDRSZ;
00260 
00261     result = (bytea *) palloc(bc);
00262     SET_VARSIZE(result, bc);
00263 
00264     tp = inputText;
00265     rp = VARDATA(result);
00266     while (*tp != '\0')
00267     {
00268         if (tp[0] != '\\')
00269             *rp++ = *tp++;
00270         else if ((tp[0] == '\\') &&
00271                  (tp[1] >= '0' && tp[1] <= '3') &&
00272                  (tp[2] >= '0' && tp[2] <= '7') &&
00273                  (tp[3] >= '0' && tp[3] <= '7'))
00274         {
00275             bc = VAL(tp[1]);
00276             bc <<= 3;
00277             bc += VAL(tp[2]);
00278             bc <<= 3;
00279             *rp++ = bc + VAL(tp[3]);
00280 
00281             tp += 4;
00282         }
00283         else if ((tp[0] == '\\') &&
00284                  (tp[1] == '\\'))
00285         {
00286             *rp++ = '\\';
00287             tp += 2;
00288         }
00289         else
00290         {
00291             /*
00292              * We should never get here. The first pass should not allow it.
00293              */
00294             ereport(ERROR,
00295                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00296                      errmsg("invalid input syntax for type bytea")));
00297         }
00298     }
00299 
00300     PG_RETURN_BYTEA_P(result);
00301 }
00302 
00303 /*
00304  *      byteaout        - converts to printable representation of byte array
00305  *
00306  *      In the traditional escaped format, non-printable characters are
00307  *      printed as '\nnn' (octal) and '\' as '\\'.
00308  */
00309 Datum
00310 byteaout(PG_FUNCTION_ARGS)
00311 {
00312     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
00313     char       *result;
00314     char       *rp;
00315 
00316     if (bytea_output == BYTEA_OUTPUT_HEX)
00317     {
00318         /* Print hex format */
00319         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
00320         *rp++ = '\\';
00321         *rp++ = 'x';
00322         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
00323     }
00324     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
00325     {
00326         /* Print traditional escaped format */
00327         char       *vp;
00328         int         len;
00329         int         i;
00330 
00331         len = 1;                /* empty string has 1 char */
00332         vp = VARDATA_ANY(vlena);
00333         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
00334         {
00335             if (*vp == '\\')
00336                 len += 2;
00337             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
00338                 len += 4;
00339             else
00340                 len++;
00341         }
00342         rp = result = (char *) palloc(len);
00343         vp = VARDATA_ANY(vlena);
00344         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
00345         {
00346             if (*vp == '\\')
00347             {
00348                 *rp++ = '\\';
00349                 *rp++ = '\\';
00350             }
00351             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
00352             {
00353                 int         val;    /* holds unprintable chars */
00354 
00355                 val = *vp;
00356                 rp[0] = '\\';
00357                 rp[3] = DIG(val & 07);
00358                 val >>= 3;
00359                 rp[2] = DIG(val & 07);
00360                 val >>= 3;
00361                 rp[1] = DIG(val & 03);
00362                 rp += 4;
00363             }
00364             else
00365                 *rp++ = *vp;
00366         }
00367     }
00368     else
00369     {
00370         elog(ERROR, "unrecognized bytea_output setting: %d",
00371              bytea_output);
00372         rp = result = NULL;     /* keep compiler quiet */
00373     }
00374     *rp = '\0';
00375     PG_RETURN_CSTRING(result);
00376 }
00377 
00378 /*
00379  *      bytearecv           - converts external binary format to bytea
00380  */
00381 Datum
00382 bytearecv(PG_FUNCTION_ARGS)
00383 {
00384     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
00385     bytea      *result;
00386     int         nbytes;
00387 
00388     nbytes = buf->len - buf->cursor;
00389     result = (bytea *) palloc(nbytes + VARHDRSZ);
00390     SET_VARSIZE(result, nbytes + VARHDRSZ);
00391     pq_copymsgbytes(buf, VARDATA(result), nbytes);
00392     PG_RETURN_BYTEA_P(result);
00393 }
00394 
00395 /*
00396  *      byteasend           - converts bytea to binary format
00397  *
00398  * This is a special case: just copy the input...
00399  */
00400 Datum
00401 byteasend(PG_FUNCTION_ARGS)
00402 {
00403     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
00404 
00405     PG_RETURN_BYTEA_P(vlena);
00406 }
00407 
00408 Datum
00409 bytea_string_agg_transfn(PG_FUNCTION_ARGS)
00410 {
00411     StringInfo  state;
00412 
00413     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
00414 
00415     /* Append the value unless null. */
00416     if (!PG_ARGISNULL(1))
00417     {
00418         bytea      *value = PG_GETARG_BYTEA_PP(1);
00419 
00420         /* On the first time through, we ignore the delimiter. */
00421         if (state == NULL)
00422             state = makeStringAggState(fcinfo);
00423         else if (!PG_ARGISNULL(2))
00424         {
00425             bytea      *delim = PG_GETARG_BYTEA_PP(2);
00426 
00427             appendBinaryStringInfo(state, VARDATA_ANY(delim), VARSIZE_ANY_EXHDR(delim));
00428         }
00429 
00430         appendBinaryStringInfo(state, VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value));
00431     }
00432 
00433     /*
00434      * The transition type for string_agg() is declared to be "internal",
00435      * which is a pass-by-value type the same size as a pointer.
00436      */
00437     PG_RETURN_POINTER(state);
00438 }
00439 
00440 Datum
00441 bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
00442 {
00443     StringInfo  state;
00444 
00445     /* cannot be called directly because of internal-type argument */
00446     Assert(AggCheckCallContext(fcinfo, NULL));
00447 
00448     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
00449 
00450     if (state != NULL)
00451     {
00452         bytea      *result;
00453 
00454         result = (bytea *) palloc(state->len + VARHDRSZ);
00455         SET_VARSIZE(result, state->len + VARHDRSZ);
00456         memcpy(VARDATA(result), state->data, state->len);
00457         PG_RETURN_BYTEA_P(result);
00458     }
00459     else
00460         PG_RETURN_NULL();
00461 }
00462 
00463 /*
00464  *      textin          - converts "..." to internal representation
00465  */
00466 Datum
00467 textin(PG_FUNCTION_ARGS)
00468 {
00469     char       *inputText = PG_GETARG_CSTRING(0);
00470 
00471     PG_RETURN_TEXT_P(cstring_to_text(inputText));
00472 }
00473 
00474 /*
00475  *      textout         - converts internal representation to "..."
00476  */
00477 Datum
00478 textout(PG_FUNCTION_ARGS)
00479 {
00480     Datum       txt = PG_GETARG_DATUM(0);
00481 
00482     PG_RETURN_CSTRING(TextDatumGetCString(txt));
00483 }
00484 
00485 /*
00486  *      textrecv            - converts external binary format to text
00487  */
00488 Datum
00489 textrecv(PG_FUNCTION_ARGS)
00490 {
00491     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
00492     text       *result;
00493     char       *str;
00494     int         nbytes;
00495 
00496     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
00497 
00498     result = cstring_to_text_with_len(str, nbytes);
00499     pfree(str);
00500     PG_RETURN_TEXT_P(result);
00501 }
00502 
00503 /*
00504  *      textsend            - converts text to binary format
00505  */
00506 Datum
00507 textsend(PG_FUNCTION_ARGS)
00508 {
00509     text       *t = PG_GETARG_TEXT_PP(0);
00510     StringInfoData buf;
00511 
00512     pq_begintypsend(&buf);
00513     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
00514     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
00515 }
00516 
00517 
00518 /*
00519  *      unknownin           - converts "..." to internal representation
00520  */
00521 Datum
00522 unknownin(PG_FUNCTION_ARGS)
00523 {
00524     char       *str = PG_GETARG_CSTRING(0);
00525 
00526     /* representation is same as cstring */
00527     PG_RETURN_CSTRING(pstrdup(str));
00528 }
00529 
00530 /*
00531  *      unknownout          - converts internal representation to "..."
00532  */
00533 Datum
00534 unknownout(PG_FUNCTION_ARGS)
00535 {
00536     /* representation is same as cstring */
00537     char       *str = PG_GETARG_CSTRING(0);
00538 
00539     PG_RETURN_CSTRING(pstrdup(str));
00540 }
00541 
00542 /*
00543  *      unknownrecv         - converts external binary format to unknown
00544  */
00545 Datum
00546 unknownrecv(PG_FUNCTION_ARGS)
00547 {
00548     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
00549     char       *str;
00550     int         nbytes;
00551 
00552     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
00553     /* representation is same as cstring */
00554     PG_RETURN_CSTRING(str);
00555 }
00556 
00557 /*
00558  *      unknownsend         - converts unknown to binary format
00559  */
00560 Datum
00561 unknownsend(PG_FUNCTION_ARGS)
00562 {
00563     /* representation is same as cstring */
00564     char       *str = PG_GETARG_CSTRING(0);
00565     StringInfoData buf;
00566 
00567     pq_begintypsend(&buf);
00568     pq_sendtext(&buf, str, strlen(str));
00569     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
00570 }
00571 
00572 
00573 /* ========== PUBLIC ROUTINES ========== */
00574 
00575 /*
00576  * textlen -
00577  *    returns the logical length of a text*
00578  *     (which is less than the VARSIZE of the text*)
00579  */
00580 Datum
00581 textlen(PG_FUNCTION_ARGS)
00582 {
00583     Datum       str = PG_GETARG_DATUM(0);
00584 
00585     /* try to avoid decompressing argument */
00586     PG_RETURN_INT32(text_length(str));
00587 }
00588 
00589 /*
00590  * text_length -
00591  *  Does the real work for textlen()
00592  *
00593  *  This is broken out so it can be called directly by other string processing
00594  *  functions.  Note that the argument is passed as a Datum, to indicate that
00595  *  it may still be in compressed form.  We can avoid decompressing it at all
00596  *  in some cases.
00597  */
00598 static int32
00599 text_length(Datum str)
00600 {
00601     /* fastpath when max encoding length is one */
00602     if (pg_database_encoding_max_length() == 1)
00603         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
00604     else
00605     {
00606         text       *t = DatumGetTextPP(str);
00607 
00608         PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
00609                                              VARSIZE_ANY_EXHDR(t)));
00610     }
00611 }
00612 
00613 /*
00614  * textoctetlen -
00615  *    returns the physical length of a text*
00616  *     (which is less than the VARSIZE of the text*)
00617  */
00618 Datum
00619 textoctetlen(PG_FUNCTION_ARGS)
00620 {
00621     Datum       str = PG_GETARG_DATUM(0);
00622 
00623     /* We need not detoast the input at all */
00624     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
00625 }
00626 
00627 /*
00628  * textcat -
00629  *    takes two text* and returns a text* that is the concatenation of
00630  *    the two.
00631  *
00632  * Rewritten by Sapa, [email protected]. 8-Jul-96.
00633  * Updated by Thomas, [email protected] 1997-07-10.
00634  * Allocate space for output in all cases.
00635  * XXX - thomas 1997-07-10
00636  */
00637 Datum
00638 textcat(PG_FUNCTION_ARGS)
00639 {
00640     text       *t1 = PG_GETARG_TEXT_PP(0);
00641     text       *t2 = PG_GETARG_TEXT_PP(1);
00642 
00643     PG_RETURN_TEXT_P(text_catenate(t1, t2));
00644 }
00645 
00646 /*
00647  * text_catenate
00648  *  Guts of textcat(), broken out so it can be used by other functions
00649  *
00650  * Arguments can be in short-header form, but not compressed or out-of-line
00651  */
00652 static text *
00653 text_catenate(text *t1, text *t2)
00654 {
00655     text       *result;
00656     int         len1,
00657                 len2,
00658                 len;
00659     char       *ptr;
00660 
00661     len1 = VARSIZE_ANY_EXHDR(t1);
00662     len2 = VARSIZE_ANY_EXHDR(t2);
00663 
00664     /* paranoia ... probably should throw error instead? */
00665     if (len1 < 0)
00666         len1 = 0;
00667     if (len2 < 0)
00668         len2 = 0;
00669 
00670     len = len1 + len2 + VARHDRSZ;
00671     result = (text *) palloc(len);
00672 
00673     /* Set size of result string... */
00674     SET_VARSIZE(result, len);
00675 
00676     /* Fill data field of result string... */
00677     ptr = VARDATA(result);
00678     if (len1 > 0)
00679         memcpy(ptr, VARDATA_ANY(t1), len1);
00680     if (len2 > 0)
00681         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
00682 
00683     return result;
00684 }
00685 
00686 /*
00687  * charlen_to_bytelen()
00688  *  Compute the number of bytes occupied by n characters starting at *p
00689  *
00690  * It is caller's responsibility that there actually are n characters;
00691  * the string need not be null-terminated.
00692  */
00693 static int
00694 charlen_to_bytelen(const char *p, int n)
00695 {
00696     if (pg_database_encoding_max_length() == 1)
00697     {
00698         /* Optimization for single-byte encodings */
00699         return n;
00700     }
00701     else
00702     {
00703         const char *s;
00704 
00705         for (s = p; n > 0; n--)
00706             s += pg_mblen(s);
00707 
00708         return s - p;
00709     }
00710 }
00711 
00712 /*
00713  * text_substr()
00714  * Return a substring starting at the specified position.
00715  * - thomas 1997-12-31
00716  *
00717  * Input:
00718  *  - string
00719  *  - starting position (is one-based)
00720  *  - string length
00721  *
00722  * If the starting position is zero or less, then return from the start of the string
00723  *  adjusting the length to be consistent with the "negative start" per SQL.
00724  * If the length is less than zero, return the remaining string.
00725  *
00726  * Added multibyte support.
00727  * - Tatsuo Ishii 1998-4-21
00728  * Changed behavior if starting position is less than one to conform to SQL behavior.
00729  * Formerly returned the entire string; now returns a portion.
00730  * - Thomas Lockhart 1998-12-10
00731  * Now uses faster TOAST-slicing interface
00732  * - John Gray 2002-02-22
00733  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
00734  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
00735  * error; if E < 1, return '', not entire string). Fixed MB related bug when
00736  * S > LC and < LC + 4 sometimes garbage characters are returned.
00737  * - Joe Conway 2002-08-10
00738  */
00739 Datum
00740 text_substr(PG_FUNCTION_ARGS)
00741 {
00742     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
00743                                     PG_GETARG_INT32(1),
00744                                     PG_GETARG_INT32(2),
00745                                     false));
00746 }
00747 
00748 /*
00749  * text_substr_no_len -
00750  *    Wrapper to avoid opr_sanity failure due to
00751  *    one function accepting a different number of args.
00752  */
00753 Datum
00754 text_substr_no_len(PG_FUNCTION_ARGS)
00755 {
00756     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
00757                                     PG_GETARG_INT32(1),
00758                                     -1, true));
00759 }
00760 
00761 /*
00762  * text_substring -
00763  *  Does the real work for text_substr() and text_substr_no_len()
00764  *
00765  *  This is broken out so it can be called directly by other string processing
00766  *  functions.  Note that the argument is passed as a Datum, to indicate that
00767  *  it may still be in compressed/toasted form.  We can avoid detoasting all
00768  *  of it in some cases.
00769  *
00770  *  The result is always a freshly palloc'd datum.
00771  */
00772 static text *
00773 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
00774 {
00775     int32       eml = pg_database_encoding_max_length();
00776     int32       S = start;      /* start position */
00777     int32       S1;             /* adjusted start position */
00778     int32       L1;             /* adjusted substring length */
00779 
00780     /* life is easy if the encoding max length is 1 */
00781     if (eml == 1)
00782     {
00783         S1 = Max(S, 1);
00784 
00785         if (length_not_specified)       /* special case - get length to end of
00786                                          * string */
00787             L1 = -1;
00788         else
00789         {
00790             /* end position */
00791             int         E = S + length;
00792 
00793             /*
00794              * A negative value for L is the only way for the end position to
00795              * be before the start. SQL99 says to throw an error.
00796              */
00797             if (E < S)
00798                 ereport(ERROR,
00799                         (errcode(ERRCODE_SUBSTRING_ERROR),
00800                          errmsg("negative substring length not allowed")));
00801 
00802             /*
00803              * A zero or negative value for the end position can happen if the
00804              * start was negative or one. SQL99 says to return a zero-length
00805              * string.
00806              */
00807             if (E < 1)
00808                 return cstring_to_text("");
00809 
00810             L1 = E - S1;
00811         }
00812 
00813         /*
00814          * If the start position is past the end of the string, SQL99 says to
00815          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
00816          * that for us. Convert to zero-based starting position
00817          */
00818         return DatumGetTextPSlice(str, S1 - 1, L1);
00819     }
00820     else if (eml > 1)
00821     {
00822         /*
00823          * When encoding max length is > 1, we can't get LC without
00824          * detoasting, so we'll grab a conservatively large slice now and go
00825          * back later to do the right thing
00826          */
00827         int32       slice_start;
00828         int32       slice_size;
00829         int32       slice_strlen;
00830         text       *slice;
00831         int32       E1;
00832         int32       i;
00833         char       *p;
00834         char       *s;
00835         text       *ret;
00836 
00837         /*
00838          * if S is past the end of the string, the tuple toaster will return a
00839          * zero-length string to us
00840          */
00841         S1 = Max(S, 1);
00842 
00843         /*
00844          * We need to start at position zero because there is no way to know
00845          * in advance which byte offset corresponds to the supplied start
00846          * position.
00847          */
00848         slice_start = 0;
00849 
00850         if (length_not_specified)       /* special case - get length to end of
00851                                          * string */
00852             slice_size = L1 = -1;
00853         else
00854         {
00855             int         E = S + length;
00856 
00857             /*
00858              * A negative value for L is the only way for the end position to
00859              * be before the start. SQL99 says to throw an error.
00860              */
00861             if (E < S)
00862                 ereport(ERROR,
00863                         (errcode(ERRCODE_SUBSTRING_ERROR),
00864                          errmsg("negative substring length not allowed")));
00865 
00866             /*
00867              * A zero or negative value for the end position can happen if the
00868              * start was negative or one. SQL99 says to return a zero-length
00869              * string.
00870              */
00871             if (E < 1)
00872                 return cstring_to_text("");
00873 
00874             /*
00875              * if E is past the end of the string, the tuple toaster will
00876              * truncate the length for us
00877              */
00878             L1 = E - S1;
00879 
00880             /*
00881              * Total slice size in bytes can't be any longer than the start
00882              * position plus substring length times the encoding max length.
00883              */
00884             slice_size = (S1 + L1) * eml;
00885         }
00886 
00887         /*
00888          * If we're working with an untoasted source, no need to do an extra
00889          * copying step.
00890          */
00891         if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
00892             VARATT_IS_EXTERNAL(DatumGetPointer(str)))
00893             slice = DatumGetTextPSlice(str, slice_start, slice_size);
00894         else
00895             slice = (text *) DatumGetPointer(str);
00896 
00897         /* see if we got back an empty string */
00898         if (VARSIZE_ANY_EXHDR(slice) == 0)
00899         {
00900             if (slice != (text *) DatumGetPointer(str))
00901                 pfree(slice);
00902             return cstring_to_text("");
00903         }
00904 
00905         /* Now we can get the actual length of the slice in MB characters */
00906         slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
00907                                             VARSIZE_ANY_EXHDR(slice));
00908 
00909         /*
00910          * Check that the start position wasn't > slice_strlen. If so, SQL99
00911          * says to return a zero-length string.
00912          */
00913         if (S1 > slice_strlen)
00914         {
00915             if (slice != (text *) DatumGetPointer(str))
00916                 pfree(slice);
00917             return cstring_to_text("");
00918         }
00919 
00920         /*
00921          * Adjust L1 and E1 now that we know the slice string length. Again
00922          * remember that S1 is one based, and slice_start is zero based.
00923          */
00924         if (L1 > -1)
00925             E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
00926         else
00927             E1 = slice_start + 1 + slice_strlen;
00928 
00929         /*
00930          * Find the start position in the slice; remember S1 is not zero based
00931          */
00932         p = VARDATA_ANY(slice);
00933         for (i = 0; i < S1 - 1; i++)
00934             p += pg_mblen(p);
00935 
00936         /* hang onto a pointer to our start position */
00937         s = p;
00938 
00939         /*
00940          * Count the actual bytes used by the substring of the requested
00941          * length.
00942          */
00943         for (i = S1; i < E1; i++)
00944             p += pg_mblen(p);
00945 
00946         ret = (text *) palloc(VARHDRSZ + (p - s));
00947         SET_VARSIZE(ret, VARHDRSZ + (p - s));
00948         memcpy(VARDATA(ret), s, (p - s));
00949 
00950         if (slice != (text *) DatumGetPointer(str))
00951             pfree(slice);
00952 
00953         return ret;
00954     }
00955     else
00956         elog(ERROR, "invalid backend encoding: encoding max length < 1");
00957 
00958     /* not reached: suppress compiler warning */
00959     return NULL;
00960 }
00961 
00962 /*
00963  * textoverlay
00964  *  Replace specified substring of first string with second
00965  *
00966  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
00967  * This code is a direct implementation of what the standard says.
00968  */
00969 Datum
00970 textoverlay(PG_FUNCTION_ARGS)
00971 {
00972     text       *t1 = PG_GETARG_TEXT_PP(0);
00973     text       *t2 = PG_GETARG_TEXT_PP(1);
00974     int         sp = PG_GETARG_INT32(2);        /* substring start position */
00975     int         sl = PG_GETARG_INT32(3);        /* substring length */
00976 
00977     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
00978 }
00979 
00980 Datum
00981 textoverlay_no_len(PG_FUNCTION_ARGS)
00982 {
00983     text       *t1 = PG_GETARG_TEXT_PP(0);
00984     text       *t2 = PG_GETARG_TEXT_PP(1);
00985     int         sp = PG_GETARG_INT32(2);        /* substring start position */
00986     int         sl;
00987 
00988     sl = text_length(PointerGetDatum(t2));      /* defaults to length(t2) */
00989     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
00990 }
00991 
00992 static text *
00993 text_overlay(text *t1, text *t2, int sp, int sl)
00994 {
00995     text       *result;
00996     text       *s1;
00997     text       *s2;
00998     int         sp_pl_sl;
00999 
01000     /*
01001      * Check for possible integer-overflow cases.  For negative sp, throw a
01002      * "substring length" error because that's what should be expected
01003      * according to the spec's definition of OVERLAY().
01004      */
01005     if (sp <= 0)
01006         ereport(ERROR,
01007                 (errcode(ERRCODE_SUBSTRING_ERROR),
01008                  errmsg("negative substring length not allowed")));
01009     sp_pl_sl = sp + sl;
01010     if (sp_pl_sl <= sl)
01011         ereport(ERROR,
01012                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
01013                  errmsg("integer out of range")));
01014 
01015     s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
01016     s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
01017     result = text_catenate(s1, t2);
01018     result = text_catenate(result, s2);
01019 
01020     return result;
01021 }
01022 
01023 /*
01024  * textpos -
01025  *    Return the position of the specified substring.
01026  *    Implements the SQL POSITION() function.
01027  *    Ref: A Guide To The SQL Standard, Date & Darwen, 1997
01028  * - thomas 1997-07-27
01029  */
01030 Datum
01031 textpos(PG_FUNCTION_ARGS)
01032 {
01033     text       *str = PG_GETARG_TEXT_PP(0);
01034     text       *search_str = PG_GETARG_TEXT_PP(1);
01035 
01036     PG_RETURN_INT32((int32) text_position(str, search_str));
01037 }
01038 
01039 /*
01040  * text_position -
01041  *  Does the real work for textpos()
01042  *
01043  * Inputs:
01044  *      t1 - string to be searched
01045  *      t2 - pattern to match within t1
01046  * Result:
01047  *      Character index of the first matched char, starting from 1,
01048  *      or 0 if no match.
01049  *
01050  *  This is broken out so it can be called directly by other string processing
01051  *  functions.
01052  */
01053 static int
01054 text_position(text *t1, text *t2)
01055 {
01056     TextPositionState state;
01057     int         result;
01058 
01059     text_position_setup(t1, t2, &state);
01060     result = text_position_next(1, &state);
01061     text_position_cleanup(&state);
01062     return result;
01063 }
01064 
01065 
01066 /*
01067  * text_position_setup, text_position_next, text_position_cleanup -
01068  *  Component steps of text_position()
01069  *
01070  * These are broken out so that a string can be efficiently searched for
01071  * multiple occurrences of the same pattern.  text_position_next may be
01072  * called multiple times with increasing values of start_pos, which is
01073  * the 1-based character position to start the search from.  The "state"
01074  * variable is normally just a local variable in the caller.
01075  */
01076 
01077 static void
01078 text_position_setup(text *t1, text *t2, TextPositionState *state)
01079 {
01080     int         len1 = VARSIZE_ANY_EXHDR(t1);
01081     int         len2 = VARSIZE_ANY_EXHDR(t2);
01082 
01083     if (pg_database_encoding_max_length() == 1)
01084     {
01085         /* simple case - single byte encoding */
01086         state->use_wchar = false;
01087         state->str1 = VARDATA_ANY(t1);
01088         state->str2 = VARDATA_ANY(t2);
01089         state->len1 = len1;
01090         state->len2 = len2;
01091     }
01092     else
01093     {
01094         /* not as simple - multibyte encoding */
01095         pg_wchar   *p1,
01096                    *p2;
01097 
01098         p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
01099         len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
01100         p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
01101         len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
01102 
01103         state->use_wchar = true;
01104         state->wstr1 = p1;
01105         state->wstr2 = p2;
01106         state->len1 = len1;
01107         state->len2 = len2;
01108     }
01109 
01110     /*
01111      * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
01112      * notes we use the terminology that the "haystack" is the string to be
01113      * searched (t1) and the "needle" is the pattern being sought (t2).
01114      *
01115      * If the needle is empty or bigger than the haystack then there is no
01116      * point in wasting cycles initializing the table.  We also choose not to
01117      * use B-M-H for needles of length 1, since the skip table can't possibly
01118      * save anything in that case.
01119      */
01120     if (len1 >= len2 && len2 > 1)
01121     {
01122         int         searchlength = len1 - len2;
01123         int         skiptablemask;
01124         int         last;
01125         int         i;
01126 
01127         /*
01128          * First we must determine how much of the skip table to use.  The
01129          * declaration of TextPositionState allows up to 256 elements, but for
01130          * short search problems we don't really want to have to initialize so
01131          * many elements --- it would take too long in comparison to the
01132          * actual search time.  So we choose a useful skip table size based on
01133          * the haystack length minus the needle length.  The closer the needle
01134          * length is to the haystack length the less useful skipping becomes.
01135          *
01136          * Note: since we use bit-masking to select table elements, the skip
01137          * table size MUST be a power of 2, and so the mask must be 2^N-1.
01138          */
01139         if (searchlength < 16)
01140             skiptablemask = 3;
01141         else if (searchlength < 64)
01142             skiptablemask = 7;
01143         else if (searchlength < 128)
01144             skiptablemask = 15;
01145         else if (searchlength < 512)
01146             skiptablemask = 31;
01147         else if (searchlength < 2048)
01148             skiptablemask = 63;
01149         else if (searchlength < 4096)
01150             skiptablemask = 127;
01151         else
01152             skiptablemask = 255;
01153         state->skiptablemask = skiptablemask;
01154 
01155         /*
01156          * Initialize the skip table.  We set all elements to the needle
01157          * length, since this is the correct skip distance for any character
01158          * not found in the needle.
01159          */
01160         for (i = 0; i <= skiptablemask; i++)
01161             state->skiptable[i] = len2;
01162 
01163         /*
01164          * Now examine the needle.  For each character except the last one,
01165          * set the corresponding table element to the appropriate skip
01166          * distance.  Note that when two characters share the same skip table
01167          * entry, the one later in the needle must determine the skip
01168          * distance.
01169          */
01170         last = len2 - 1;
01171 
01172         if (!state->use_wchar)
01173         {
01174             const char *str2 = state->str2;
01175 
01176             for (i = 0; i < last; i++)
01177                 state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
01178         }
01179         else
01180         {
01181             const pg_wchar *wstr2 = state->wstr2;
01182 
01183             for (i = 0; i < last; i++)
01184                 state->skiptable[wstr2[i] & skiptablemask] = last - i;
01185         }
01186     }
01187 }
01188 
01189 static int
01190 text_position_next(int start_pos, TextPositionState *state)
01191 {
01192     int         haystack_len = state->len1;
01193     int         needle_len = state->len2;
01194     int         skiptablemask = state->skiptablemask;
01195 
01196     Assert(start_pos > 0);      /* else caller error */
01197 
01198     if (needle_len <= 0)
01199         return start_pos;       /* result for empty pattern */
01200 
01201     start_pos--;                /* adjust for zero based arrays */
01202 
01203     /* Done if the needle can't possibly fit */
01204     if (haystack_len < start_pos + needle_len)
01205         return 0;
01206 
01207     if (!state->use_wchar)
01208     {
01209         /* simple case - single byte encoding */
01210         const char *haystack = state->str1;
01211         const char *needle = state->str2;
01212         const char *haystack_end = &haystack[haystack_len];
01213         const char *hptr;
01214 
01215         if (needle_len == 1)
01216         {
01217             /* No point in using B-M-H for a one-character needle */
01218             char        nchar = *needle;
01219 
01220             hptr = &haystack[start_pos];
01221             while (hptr < haystack_end)
01222             {
01223                 if (*hptr == nchar)
01224                     return hptr - haystack + 1;
01225                 hptr++;
01226             }
01227         }
01228         else
01229         {
01230             const char *needle_last = &needle[needle_len - 1];
01231 
01232             /* Start at startpos plus the length of the needle */
01233             hptr = &haystack[start_pos + needle_len - 1];
01234             while (hptr < haystack_end)
01235             {
01236                 /* Match the needle scanning *backward* */
01237                 const char *nptr;
01238                 const char *p;
01239 
01240                 nptr = needle_last;
01241                 p = hptr;
01242                 while (*nptr == *p)
01243                 {
01244                     /* Matched it all?  If so, return 1-based position */
01245                     if (nptr == needle)
01246                         return p - haystack + 1;
01247                     nptr--, p--;
01248                 }
01249 
01250                 /*
01251                  * No match, so use the haystack char at hptr to decide how
01252                  * far to advance.  If the needle had any occurrence of that
01253                  * character (or more precisely, one sharing the same
01254                  * skiptable entry) before its last character, then we advance
01255                  * far enough to align the last such needle character with
01256                  * that haystack position.  Otherwise we can advance by the
01257                  * whole needle length.
01258                  */
01259                 hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
01260             }
01261         }
01262     }
01263     else
01264     {
01265         /* The multibyte char version. This works exactly the same way. */
01266         const pg_wchar *haystack = state->wstr1;
01267         const pg_wchar *needle = state->wstr2;
01268         const pg_wchar *haystack_end = &haystack[haystack_len];
01269         const pg_wchar *hptr;
01270 
01271         if (needle_len == 1)
01272         {
01273             /* No point in using B-M-H for a one-character needle */
01274             pg_wchar    nchar = *needle;
01275 
01276             hptr = &haystack[start_pos];
01277             while (hptr < haystack_end)
01278             {
01279                 if (*hptr == nchar)
01280                     return hptr - haystack + 1;
01281                 hptr++;
01282             }
01283         }
01284         else
01285         {
01286             const pg_wchar *needle_last = &needle[needle_len - 1];
01287 
01288             /* Start at startpos plus the length of the needle */
01289             hptr = &haystack[start_pos + needle_len - 1];
01290             while (hptr < haystack_end)
01291             {
01292                 /* Match the needle scanning *backward* */
01293                 const pg_wchar *nptr;
01294                 const pg_wchar *p;
01295 
01296                 nptr = needle_last;
01297                 p = hptr;
01298                 while (*nptr == *p)
01299                 {
01300                     /* Matched it all?  If so, return 1-based position */
01301                     if (nptr == needle)
01302                         return p - haystack + 1;
01303                     nptr--, p--;
01304                 }
01305 
01306                 /*
01307                  * No match, so use the haystack char at hptr to decide how
01308                  * far to advance.  If the needle had any occurrence of that
01309                  * character (or more precisely, one sharing the same
01310                  * skiptable entry) before its last character, then we advance
01311                  * far enough to align the last such needle character with
01312                  * that haystack position.  Otherwise we can advance by the
01313                  * whole needle length.
01314                  */
01315                 hptr += state->skiptable[*hptr & skiptablemask];
01316             }
01317         }
01318     }
01319 
01320     return 0;                   /* not found */
01321 }
01322 
01323 static void
01324 text_position_cleanup(TextPositionState *state)
01325 {
01326     if (state->use_wchar)
01327     {
01328         pfree(state->wstr1);
01329         pfree(state->wstr2);
01330     }
01331 }
01332 
01333 /* varstr_cmp()
01334  * Comparison function for text strings with given lengths.
01335  * Includes locale support, but must copy strings to temporary memory
01336  *  to allow null-termination for inputs to strcoll().
01337  * Returns an integer less than, equal to, or greater than zero, indicating
01338  * whether arg1 is less than, equal to, or greater than arg2.
01339  */
01340 int
01341 varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
01342 {
01343     int         result;
01344 
01345     /*
01346      * Unfortunately, there is no strncoll(), so in the non-C locale case we
01347      * have to do some memory copying.  This turns out to be significantly
01348      * slower, so we optimize the case where LC_COLLATE is C.  We also try to
01349      * optimize relatively-short strings by avoiding palloc/pfree overhead.
01350      */
01351     if (lc_collate_is_c(collid))
01352     {
01353         result = memcmp(arg1, arg2, Min(len1, len2));
01354         if ((result == 0) && (len1 != len2))
01355             result = (len1 < len2) ? -1 : 1;
01356     }
01357     else
01358     {
01359 #define STACKBUFLEN     1024
01360 
01361         char        a1buf[STACKBUFLEN];
01362         char        a2buf[STACKBUFLEN];
01363         char       *a1p,
01364                    *a2p;
01365 
01366 #ifdef HAVE_LOCALE_T
01367         pg_locale_t mylocale = 0;
01368 #endif
01369 
01370         if (collid != DEFAULT_COLLATION_OID)
01371         {
01372             if (!OidIsValid(collid))
01373             {
01374                 /*
01375                  * This typically means that the parser could not resolve a
01376                  * conflict of implicit collations, so report it that way.
01377                  */
01378                 ereport(ERROR,
01379                         (errcode(ERRCODE_INDETERMINATE_COLLATION),
01380                          errmsg("could not determine which collation to use for string comparison"),
01381                          errhint("Use the COLLATE clause to set the collation explicitly.")));
01382             }
01383 #ifdef HAVE_LOCALE_T
01384             mylocale = pg_newlocale_from_collation(collid);
01385 #endif
01386         }
01387 
01388 #ifdef WIN32
01389         /* Win32 does not have UTF-8, so we need to map to UTF-16 */
01390         if (GetDatabaseEncoding() == PG_UTF8)
01391         {
01392             int         a1len;
01393             int         a2len;
01394             int         r;
01395 
01396             if (len1 >= STACKBUFLEN / 2)
01397             {
01398                 a1len = len1 * 2 + 2;
01399                 a1p = palloc(a1len);
01400             }
01401             else
01402             {
01403                 a1len = STACKBUFLEN;
01404                 a1p = a1buf;
01405             }
01406             if (len2 >= STACKBUFLEN / 2)
01407             {
01408                 a2len = len2 * 2 + 2;
01409                 a2p = palloc(a2len);
01410             }
01411             else
01412             {
01413                 a2len = STACKBUFLEN;
01414                 a2p = a2buf;
01415             }
01416 
01417             /* stupid Microsloth API does not work for zero-length input */
01418             if (len1 == 0)
01419                 r = 0;
01420             else
01421             {
01422                 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
01423                                         (LPWSTR) a1p, a1len / 2);
01424                 if (!r)
01425                     ereport(ERROR,
01426                             (errmsg("could not convert string to UTF-16: error code %lu",
01427                                     GetLastError())));
01428             }
01429             ((LPWSTR) a1p)[r] = 0;
01430 
01431             if (len2 == 0)
01432                 r = 0;
01433             else
01434             {
01435                 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
01436                                         (LPWSTR) a2p, a2len / 2);
01437                 if (!r)
01438                     ereport(ERROR,
01439                             (errmsg("could not convert string to UTF-16: error code %lu",
01440                                     GetLastError())));
01441             }
01442             ((LPWSTR) a2p)[r] = 0;
01443 
01444             errno = 0;
01445 #ifdef HAVE_LOCALE_T
01446             if (mylocale)
01447                 result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale);
01448             else
01449 #endif
01450                 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
01451             if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw
01452                                          * headers */
01453                 ereport(ERROR,
01454                         (errmsg("could not compare Unicode strings: %m")));
01455 
01456             /*
01457              * In some locales wcscoll() can claim that nonidentical strings
01458              * are equal.  Believing that would be bad news for a number of
01459              * reasons, so we follow Perl's lead and sort "equal" strings
01460              * according to strcmp (on the UTF-8 representation).
01461              */
01462             if (result == 0)
01463             {
01464                 result = memcmp(arg1, arg2, Min(len1, len2));
01465                 if ((result == 0) && (len1 != len2))
01466                     result = (len1 < len2) ? -1 : 1;
01467             }
01468 
01469             if (a1p != a1buf)
01470                 pfree(a1p);
01471             if (a2p != a2buf)
01472                 pfree(a2p);
01473 
01474             return result;
01475         }
01476 #endif   /* WIN32 */
01477 
01478         if (len1 >= STACKBUFLEN)
01479             a1p = (char *) palloc(len1 + 1);
01480         else
01481             a1p = a1buf;
01482         if (len2 >= STACKBUFLEN)
01483             a2p = (char *) palloc(len2 + 1);
01484         else
01485             a2p = a2buf;
01486 
01487         memcpy(a1p, arg1, len1);
01488         a1p[len1] = '\0';
01489         memcpy(a2p, arg2, len2);
01490         a2p[len2] = '\0';
01491 
01492 #ifdef HAVE_LOCALE_T
01493         if (mylocale)
01494             result = strcoll_l(a1p, a2p, mylocale);
01495         else
01496 #endif
01497             result = strcoll(a1p, a2p);
01498 
01499         /*
01500          * In some locales strcoll() can claim that nonidentical strings are
01501          * equal.  Believing that would be bad news for a number of reasons,
01502          * so we follow Perl's lead and sort "equal" strings according to
01503          * strcmp().
01504          */
01505         if (result == 0)
01506             result = strcmp(a1p, a2p);
01507 
01508         if (a1p != a1buf)
01509             pfree(a1p);
01510         if (a2p != a2buf)
01511             pfree(a2p);
01512     }
01513 
01514     return result;
01515 }
01516 
01517 
01518 /* text_cmp()
01519  * Internal comparison function for text strings.
01520  * Returns -1, 0 or 1
01521  */
01522 static int
01523 text_cmp(text *arg1, text *arg2, Oid collid)
01524 {
01525     char       *a1p,
01526                *a2p;
01527     int         len1,
01528                 len2;
01529 
01530     a1p = VARDATA_ANY(arg1);
01531     a2p = VARDATA_ANY(arg2);
01532 
01533     len1 = VARSIZE_ANY_EXHDR(arg1);
01534     len2 = VARSIZE_ANY_EXHDR(arg2);
01535 
01536     return varstr_cmp(a1p, len1, a2p, len2, collid);
01537 }
01538 
01539 /*
01540  * Comparison functions for text strings.
01541  *
01542  * Note: btree indexes need these routines not to leak memory; therefore,
01543  * be careful to free working copies of toasted datums.  Most places don't
01544  * need to be so careful.
01545  */
01546 
01547 Datum
01548 texteq(PG_FUNCTION_ARGS)
01549 {
01550     Datum       arg1 = PG_GETARG_DATUM(0);
01551     Datum       arg2 = PG_GETARG_DATUM(1);
01552     bool        result;
01553     Size        len1,
01554                 len2;
01555 
01556     /*
01557      * Since we only care about equality or not-equality, we can avoid all the
01558      * expense of strcoll() here, and just do bitwise comparison.  In fact, we
01559      * don't even have to do a bitwise comparison if we can show the lengths
01560      * of the strings are unequal; which might save us from having to detoast
01561      * one or both values.
01562      */
01563     len1 = toast_raw_datum_size(arg1);
01564     len2 = toast_raw_datum_size(arg2);
01565     if (len1 != len2)
01566         result = false;
01567     else
01568     {
01569         text       *targ1 = DatumGetTextPP(arg1);
01570         text       *targ2 = DatumGetTextPP(arg2);
01571 
01572         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
01573                          len1 - VARHDRSZ) == 0);
01574 
01575         PG_FREE_IF_COPY(targ1, 0);
01576         PG_FREE_IF_COPY(targ2, 1);
01577     }
01578 
01579     PG_RETURN_BOOL(result);
01580 }
01581 
01582 Datum
01583 textne(PG_FUNCTION_ARGS)
01584 {
01585     Datum       arg1 = PG_GETARG_DATUM(0);
01586     Datum       arg2 = PG_GETARG_DATUM(1);
01587     bool        result;
01588     Size        len1,
01589                 len2;
01590 
01591     /* See comment in texteq() */
01592     len1 = toast_raw_datum_size(arg1);
01593     len2 = toast_raw_datum_size(arg2);
01594     if (len1 != len2)
01595         result = true;
01596     else
01597     {
01598         text       *targ1 = DatumGetTextPP(arg1);
01599         text       *targ2 = DatumGetTextPP(arg2);
01600 
01601         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
01602                          len1 - VARHDRSZ) != 0);
01603 
01604         PG_FREE_IF_COPY(targ1, 0);
01605         PG_FREE_IF_COPY(targ2, 1);
01606     }
01607 
01608     PG_RETURN_BOOL(result);
01609 }
01610 
01611 Datum
01612 text_lt(PG_FUNCTION_ARGS)
01613 {
01614     text       *arg1 = PG_GETARG_TEXT_PP(0);
01615     text       *arg2 = PG_GETARG_TEXT_PP(1);
01616     bool        result;
01617 
01618     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
01619 
01620     PG_FREE_IF_COPY(arg1, 0);
01621     PG_FREE_IF_COPY(arg2, 1);
01622 
01623     PG_RETURN_BOOL(result);
01624 }
01625 
01626 Datum
01627 text_le(PG_FUNCTION_ARGS)
01628 {
01629     text       *arg1 = PG_GETARG_TEXT_PP(0);
01630     text       *arg2 = PG_GETARG_TEXT_PP(1);
01631     bool        result;
01632 
01633     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
01634 
01635     PG_FREE_IF_COPY(arg1, 0);
01636     PG_FREE_IF_COPY(arg2, 1);
01637 
01638     PG_RETURN_BOOL(result);
01639 }
01640 
01641 Datum
01642 text_gt(PG_FUNCTION_ARGS)
01643 {
01644     text       *arg1 = PG_GETARG_TEXT_PP(0);
01645     text       *arg2 = PG_GETARG_TEXT_PP(1);
01646     bool        result;
01647 
01648     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
01649 
01650     PG_FREE_IF_COPY(arg1, 0);
01651     PG_FREE_IF_COPY(arg2, 1);
01652 
01653     PG_RETURN_BOOL(result);
01654 }
01655 
01656 Datum
01657 text_ge(PG_FUNCTION_ARGS)
01658 {
01659     text       *arg1 = PG_GETARG_TEXT_PP(0);
01660     text       *arg2 = PG_GETARG_TEXT_PP(1);
01661     bool        result;
01662 
01663     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
01664 
01665     PG_FREE_IF_COPY(arg1, 0);
01666     PG_FREE_IF_COPY(arg2, 1);
01667 
01668     PG_RETURN_BOOL(result);
01669 }
01670 
01671 Datum
01672 bttextcmp(PG_FUNCTION_ARGS)
01673 {
01674     text       *arg1 = PG_GETARG_TEXT_PP(0);
01675     text       *arg2 = PG_GETARG_TEXT_PP(1);
01676     int32       result;
01677 
01678     result = text_cmp(arg1, arg2, PG_GET_COLLATION());
01679 
01680     PG_FREE_IF_COPY(arg1, 0);
01681     PG_FREE_IF_COPY(arg2, 1);
01682 
01683     PG_RETURN_INT32(result);
01684 }
01685 
01686 
01687 Datum
01688 text_larger(PG_FUNCTION_ARGS)
01689 {
01690     text       *arg1 = PG_GETARG_TEXT_PP(0);
01691     text       *arg2 = PG_GETARG_TEXT_PP(1);
01692     text       *result;
01693 
01694     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
01695 
01696     PG_RETURN_TEXT_P(result);
01697 }
01698 
01699 Datum
01700 text_smaller(PG_FUNCTION_ARGS)
01701 {
01702     text       *arg1 = PG_GETARG_TEXT_PP(0);
01703     text       *arg2 = PG_GETARG_TEXT_PP(1);
01704     text       *result;
01705 
01706     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
01707 
01708     PG_RETURN_TEXT_P(result);
01709 }
01710 
01711 
01712 /*
01713  * The following operators support character-by-character comparison
01714  * of text datums, to allow building indexes suitable for LIKE clauses.
01715  * Note that the regular texteq/textne comparison operators are assumed
01716  * to be compatible with these!
01717  */
01718 
01719 static int
01720 internal_text_pattern_compare(text *arg1, text *arg2)
01721 {
01722     int         result;
01723     int         len1,
01724                 len2;
01725 
01726     len1 = VARSIZE_ANY_EXHDR(arg1);
01727     len2 = VARSIZE_ANY_EXHDR(arg2);
01728 
01729     result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
01730     if (result != 0)
01731         return result;
01732     else if (len1 < len2)
01733         return -1;
01734     else if (len1 > len2)
01735         return 1;
01736     else
01737         return 0;
01738 }
01739 
01740 
01741 Datum
01742 text_pattern_lt(PG_FUNCTION_ARGS)
01743 {
01744     text       *arg1 = PG_GETARG_TEXT_PP(0);
01745     text       *arg2 = PG_GETARG_TEXT_PP(1);
01746     int         result;
01747 
01748     result = internal_text_pattern_compare(arg1, arg2);
01749 
01750     PG_FREE_IF_COPY(arg1, 0);
01751     PG_FREE_IF_COPY(arg2, 1);
01752 
01753     PG_RETURN_BOOL(result < 0);
01754 }
01755 
01756 
01757 Datum
01758 text_pattern_le(PG_FUNCTION_ARGS)
01759 {
01760     text       *arg1 = PG_GETARG_TEXT_PP(0);
01761     text       *arg2 = PG_GETARG_TEXT_PP(1);
01762     int         result;
01763 
01764     result = internal_text_pattern_compare(arg1, arg2);
01765 
01766     PG_FREE_IF_COPY(arg1, 0);
01767     PG_FREE_IF_COPY(arg2, 1);
01768 
01769     PG_RETURN_BOOL(result <= 0);
01770 }
01771 
01772 
01773 Datum
01774 text_pattern_ge(PG_FUNCTION_ARGS)
01775 {
01776     text       *arg1 = PG_GETARG_TEXT_PP(0);
01777     text       *arg2 = PG_GETARG_TEXT_PP(1);
01778     int         result;
01779 
01780     result = internal_text_pattern_compare(arg1, arg2);
01781 
01782     PG_FREE_IF_COPY(arg1, 0);
01783     PG_FREE_IF_COPY(arg2, 1);
01784 
01785     PG_RETURN_BOOL(result >= 0);
01786 }
01787 
01788 
01789 Datum
01790 text_pattern_gt(PG_FUNCTION_ARGS)
01791 {
01792     text       *arg1 = PG_GETARG_TEXT_PP(0);
01793     text       *arg2 = PG_GETARG_TEXT_PP(1);
01794     int         result;
01795 
01796     result = internal_text_pattern_compare(arg1, arg2);
01797 
01798     PG_FREE_IF_COPY(arg1, 0);
01799     PG_FREE_IF_COPY(arg2, 1);
01800 
01801     PG_RETURN_BOOL(result > 0);
01802 }
01803 
01804 
01805 Datum
01806 bttext_pattern_cmp(PG_FUNCTION_ARGS)
01807 {
01808     text       *arg1 = PG_GETARG_TEXT_PP(0);
01809     text       *arg2 = PG_GETARG_TEXT_PP(1);
01810     int         result;
01811 
01812     result = internal_text_pattern_compare(arg1, arg2);
01813 
01814     PG_FREE_IF_COPY(arg1, 0);
01815     PG_FREE_IF_COPY(arg2, 1);
01816 
01817     PG_RETURN_INT32(result);
01818 }
01819 
01820 
01821 /*-------------------------------------------------------------
01822  * byteaoctetlen
01823  *
01824  * get the number of bytes contained in an instance of type 'bytea'
01825  *-------------------------------------------------------------
01826  */
01827 Datum
01828 byteaoctetlen(PG_FUNCTION_ARGS)
01829 {
01830     Datum       str = PG_GETARG_DATUM(0);
01831 
01832     /* We need not detoast the input at all */
01833     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
01834 }
01835 
01836 /*
01837  * byteacat -
01838  *    takes two bytea* and returns a bytea* that is the concatenation of
01839  *    the two.
01840  *
01841  * Cloned from textcat and modified as required.
01842  */
01843 Datum
01844 byteacat(PG_FUNCTION_ARGS)
01845 {
01846     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
01847     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
01848 
01849     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
01850 }
01851 
01852 /*
01853  * bytea_catenate
01854  *  Guts of byteacat(), broken out so it can be used by other functions
01855  *
01856  * Arguments can be in short-header form, but not compressed or out-of-line
01857  */
01858 static bytea *
01859 bytea_catenate(bytea *t1, bytea *t2)
01860 {
01861     bytea      *result;
01862     int         len1,
01863                 len2,
01864                 len;
01865     char       *ptr;
01866 
01867     len1 = VARSIZE_ANY_EXHDR(t1);
01868     len2 = VARSIZE_ANY_EXHDR(t2);
01869 
01870     /* paranoia ... probably should throw error instead? */
01871     if (len1 < 0)
01872         len1 = 0;
01873     if (len2 < 0)
01874         len2 = 0;
01875 
01876     len = len1 + len2 + VARHDRSZ;
01877     result = (bytea *) palloc(len);
01878 
01879     /* Set size of result string... */
01880     SET_VARSIZE(result, len);
01881 
01882     /* Fill data field of result string... */
01883     ptr = VARDATA(result);
01884     if (len1 > 0)
01885         memcpy(ptr, VARDATA_ANY(t1), len1);
01886     if (len2 > 0)
01887         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
01888 
01889     return result;
01890 }
01891 
01892 #define PG_STR_GET_BYTEA(str_) \
01893     DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
01894 
01895 /*
01896  * bytea_substr()
01897  * Return a substring starting at the specified position.
01898  * Cloned from text_substr and modified as required.
01899  *
01900  * Input:
01901  *  - string
01902  *  - starting position (is one-based)
01903  *  - string length (optional)
01904  *
01905  * If the starting position is zero or less, then return from the start of the string
01906  * adjusting the length to be consistent with the "negative start" per SQL.
01907  * If the length is less than zero, an ERROR is thrown. If no third argument
01908  * (length) is provided, the length to the end of the string is assumed.
01909  */
01910 Datum
01911 bytea_substr(PG_FUNCTION_ARGS)
01912 {
01913     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
01914                                       PG_GETARG_INT32(1),
01915                                       PG_GETARG_INT32(2),
01916                                       false));
01917 }
01918 
01919 /*
01920  * bytea_substr_no_len -
01921  *    Wrapper to avoid opr_sanity failure due to
01922  *    one function accepting a different number of args.
01923  */
01924 Datum
01925 bytea_substr_no_len(PG_FUNCTION_ARGS)
01926 {
01927     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
01928                                       PG_GETARG_INT32(1),
01929                                       -1,
01930                                       true));
01931 }
01932 
01933 static bytea *
01934 bytea_substring(Datum str,
01935                 int S,
01936                 int L,
01937                 bool length_not_specified)
01938 {
01939     int         S1;             /* adjusted start position */
01940     int         L1;             /* adjusted substring length */
01941 
01942     S1 = Max(S, 1);
01943 
01944     if (length_not_specified)
01945     {
01946         /*
01947          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
01948          * end of the string if we pass it a negative value for length.
01949          */
01950         L1 = -1;
01951     }
01952     else
01953     {
01954         /* end position */
01955         int         E = S + L;
01956 
01957         /*
01958          * A negative value for L is the only way for the end position to be
01959          * before the start. SQL99 says to throw an error.
01960          */
01961         if (E < S)
01962             ereport(ERROR,
01963                     (errcode(ERRCODE_SUBSTRING_ERROR),
01964                      errmsg("negative substring length not allowed")));
01965 
01966         /*
01967          * A zero or negative value for the end position can happen if the
01968          * start was negative or one. SQL99 says to return a zero-length
01969          * string.
01970          */
01971         if (E < 1)
01972             return PG_STR_GET_BYTEA("");
01973 
01974         L1 = E - S1;
01975     }
01976 
01977     /*
01978      * If the start position is past the end of the string, SQL99 says to
01979      * return a zero-length string -- DatumGetByteaPSlice() will do that for
01980      * us. Convert to zero-based starting position
01981      */
01982     return DatumGetByteaPSlice(str, S1 - 1, L1);
01983 }
01984 
01985 /*
01986  * byteaoverlay
01987  *  Replace specified substring of first string with second
01988  *
01989  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
01990  * This code is a direct implementation of what the standard says.
01991  */
01992 Datum
01993 byteaoverlay(PG_FUNCTION_ARGS)
01994 {
01995     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
01996     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
01997     int         sp = PG_GETARG_INT32(2);        /* substring start position */
01998     int         sl = PG_GETARG_INT32(3);        /* substring length */
01999 
02000     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
02001 }
02002 
02003 Datum
02004 byteaoverlay_no_len(PG_FUNCTION_ARGS)
02005 {
02006     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
02007     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
02008     int         sp = PG_GETARG_INT32(2);        /* substring start position */
02009     int         sl;
02010 
02011     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
02012     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
02013 }
02014 
02015 static bytea *
02016 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
02017 {
02018     bytea      *result;
02019     bytea      *s1;
02020     bytea      *s2;
02021     int         sp_pl_sl;
02022 
02023     /*
02024      * Check for possible integer-overflow cases.  For negative sp, throw a
02025      * "substring length" error because that's what should be expected
02026      * according to the spec's definition of OVERLAY().
02027      */
02028     if (sp <= 0)
02029         ereport(ERROR,
02030                 (errcode(ERRCODE_SUBSTRING_ERROR),
02031                  errmsg("negative substring length not allowed")));
02032     sp_pl_sl = sp + sl;
02033     if (sp_pl_sl <= sl)
02034         ereport(ERROR,
02035                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
02036                  errmsg("integer out of range")));
02037 
02038     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
02039     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
02040     result = bytea_catenate(s1, t2);
02041     result = bytea_catenate(result, s2);
02042 
02043     return result;
02044 }
02045 
02046 /*
02047  * byteapos -
02048  *    Return the position of the specified substring.
02049  *    Implements the SQL POSITION() function.
02050  * Cloned from textpos and modified as required.
02051  */
02052 Datum
02053 byteapos(PG_FUNCTION_ARGS)
02054 {
02055     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
02056     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
02057     int         pos;
02058     int         px,
02059                 p;
02060     int         len1,
02061                 len2;
02062     char       *p1,
02063                *p2;
02064 
02065     len1 = VARSIZE_ANY_EXHDR(t1);
02066     len2 = VARSIZE_ANY_EXHDR(t2);
02067 
02068     if (len2 <= 0)
02069         PG_RETURN_INT32(1);     /* result for empty pattern */
02070 
02071     p1 = VARDATA_ANY(t1);
02072     p2 = VARDATA_ANY(t2);
02073 
02074     pos = 0;
02075     px = (len1 - len2);
02076     for (p = 0; p <= px; p++)
02077     {
02078         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
02079         {
02080             pos = p + 1;
02081             break;
02082         };
02083         p1++;
02084     };
02085 
02086     PG_RETURN_INT32(pos);
02087 }
02088 
02089 /*-------------------------------------------------------------
02090  * byteaGetByte
02091  *
02092  * this routine treats "bytea" as an array of bytes.
02093  * It returns the Nth byte (a number between 0 and 255).
02094  *-------------------------------------------------------------
02095  */
02096 Datum
02097 byteaGetByte(PG_FUNCTION_ARGS)
02098 {
02099     bytea      *v = PG_GETARG_BYTEA_PP(0);
02100     int32       n = PG_GETARG_INT32(1);
02101     int         len;
02102     int         byte;
02103 
02104     len = VARSIZE_ANY_EXHDR(v);
02105 
02106     if (n < 0 || n >= len)
02107         ereport(ERROR,
02108                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
02109                  errmsg("index %d out of valid range, 0..%d",
02110                         n, len - 1)));
02111 
02112     byte = ((unsigned char *) VARDATA_ANY(v))[n];
02113 
02114     PG_RETURN_INT32(byte);
02115 }
02116 
02117 /*-------------------------------------------------------------
02118  * byteaGetBit
02119  *
02120  * This routine treats a "bytea" type like an array of bits.
02121  * It returns the value of the Nth bit (0 or 1).
02122  *
02123  *-------------------------------------------------------------
02124  */
02125 Datum
02126 byteaGetBit(PG_FUNCTION_ARGS)
02127 {
02128     bytea      *v = PG_GETARG_BYTEA_PP(0);
02129     int32       n = PG_GETARG_INT32(1);
02130     int         byteNo,
02131                 bitNo;
02132     int         len;
02133     int         byte;
02134 
02135     len = VARSIZE_ANY_EXHDR(v);
02136 
02137     if (n < 0 || n >= len * 8)
02138         ereport(ERROR,
02139                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
02140                  errmsg("index %d out of valid range, 0..%d",
02141                         n, len * 8 - 1)));
02142 
02143     byteNo = n / 8;
02144     bitNo = n % 8;
02145 
02146     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
02147 
02148     if (byte & (1 << bitNo))
02149         PG_RETURN_INT32(1);
02150     else
02151         PG_RETURN_INT32(0);
02152 }
02153 
02154 /*-------------------------------------------------------------
02155  * byteaSetByte
02156  *
02157  * Given an instance of type 'bytea' creates a new one with
02158  * the Nth byte set to the given value.
02159  *
02160  *-------------------------------------------------------------
02161  */
02162 Datum
02163 byteaSetByte(PG_FUNCTION_ARGS)
02164 {
02165     bytea      *v = PG_GETARG_BYTEA_P(0);
02166     int32       n = PG_GETARG_INT32(1);
02167     int32       newByte = PG_GETARG_INT32(2);
02168     int         len;
02169     bytea      *res;
02170 
02171     len = VARSIZE(v) - VARHDRSZ;
02172 
02173     if (n < 0 || n >= len)
02174         ereport(ERROR,
02175                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
02176                  errmsg("index %d out of valid range, 0..%d",
02177                         n, len - 1)));
02178 
02179     /*
02180      * Make a copy of the original varlena.
02181      */
02182     res = (bytea *) palloc(VARSIZE(v));
02183     memcpy((char *) res, (char *) v, VARSIZE(v));
02184 
02185     /*
02186      * Now set the byte.
02187      */
02188     ((unsigned char *) VARDATA(res))[n] = newByte;
02189 
02190     PG_RETURN_BYTEA_P(res);
02191 }
02192 
02193 /*-------------------------------------------------------------
02194  * byteaSetBit
02195  *
02196  * Given an instance of type 'bytea' creates a new one with
02197  * the Nth bit set to the given value.
02198  *
02199  *-------------------------------------------------------------
02200  */
02201 Datum
02202 byteaSetBit(PG_FUNCTION_ARGS)
02203 {
02204     bytea      *v = PG_GETARG_BYTEA_P(0);
02205     int32       n = PG_GETARG_INT32(1);
02206     int32       newBit = PG_GETARG_INT32(2);
02207     bytea      *res;
02208     int         len;
02209     int         oldByte,
02210                 newByte;
02211     int         byteNo,
02212                 bitNo;
02213 
02214     len = VARSIZE(v) - VARHDRSZ;
02215 
02216     if (n < 0 || n >= len * 8)
02217         ereport(ERROR,
02218                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
02219                  errmsg("index %d out of valid range, 0..%d",
02220                         n, len * 8 - 1)));
02221 
02222     byteNo = n / 8;
02223     bitNo = n % 8;
02224 
02225     /*
02226      * sanity check!
02227      */
02228     if (newBit != 0 && newBit != 1)
02229         ereport(ERROR,
02230                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
02231                  errmsg("new bit must be 0 or 1")));
02232 
02233     /*
02234      * Make a copy of the original varlena.
02235      */
02236     res = (bytea *) palloc(VARSIZE(v));
02237     memcpy((char *) res, (char *) v, VARSIZE(v));
02238 
02239     /*
02240      * Update the byte.
02241      */
02242     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
02243 
02244     if (newBit == 0)
02245         newByte = oldByte & (~(1 << bitNo));
02246     else
02247         newByte = oldByte | (1 << bitNo);
02248 
02249     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
02250 
02251     PG_RETURN_BYTEA_P(res);
02252 }
02253 
02254 
02255 /* text_name()
02256  * Converts a text type to a Name type.
02257  */
02258 Datum
02259 text_name(PG_FUNCTION_ARGS)
02260 {
02261     text       *s = PG_GETARG_TEXT_PP(0);
02262     Name        result;
02263     int         len;
02264 
02265     len = VARSIZE_ANY_EXHDR(s);
02266 
02267     /* Truncate oversize input */
02268     if (len >= NAMEDATALEN)
02269         len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
02270 
02271     /* We use palloc0 here to ensure result is zero-padded */
02272     result = (Name) palloc0(NAMEDATALEN);
02273     memcpy(NameStr(*result), VARDATA_ANY(s), len);
02274 
02275     PG_RETURN_NAME(result);
02276 }
02277 
02278 /* name_text()
02279  * Converts a Name type to a text type.
02280  */
02281 Datum
02282 name_text(PG_FUNCTION_ARGS)
02283 {
02284     Name        s = PG_GETARG_NAME(0);
02285 
02286     PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
02287 }
02288 
02289 
02290 /*
02291  * textToQualifiedNameList - convert a text object to list of names
02292  *
02293  * This implements the input parsing needed by nextval() and other
02294  * functions that take a text parameter representing a qualified name.
02295  * We split the name at dots, downcase if not double-quoted, and
02296  * truncate names if they're too long.
02297  */
02298 List *
02299 textToQualifiedNameList(text *textval)
02300 {
02301     char       *rawname;
02302     List       *result = NIL;
02303     List       *namelist;
02304     ListCell   *l;
02305 
02306     /* Convert to C string (handles possible detoasting). */
02307     /* Note we rely on being able to modify rawname below. */
02308     rawname = text_to_cstring(textval);
02309 
02310     if (!SplitIdentifierString(rawname, '.', &namelist))
02311         ereport(ERROR,
02312                 (errcode(ERRCODE_INVALID_NAME),
02313                  errmsg("invalid name syntax")));
02314 
02315     if (namelist == NIL)
02316         ereport(ERROR,
02317                 (errcode(ERRCODE_INVALID_NAME),
02318                  errmsg("invalid name syntax")));
02319 
02320     foreach(l, namelist)
02321     {
02322         char       *curname = (char *) lfirst(l);
02323 
02324         result = lappend(result, makeString(pstrdup(curname)));
02325     }
02326 
02327     pfree(rawname);
02328     list_free(namelist);
02329 
02330     return result;
02331 }
02332 
02333 /*
02334  * SplitIdentifierString --- parse a string containing identifiers
02335  *
02336  * This is the guts of textToQualifiedNameList, and is exported for use in
02337  * other situations such as parsing GUC variables.  In the GUC case, it's
02338  * important to avoid memory leaks, so the API is designed to minimize the
02339  * amount of stuff that needs to be allocated and freed.
02340  *
02341  * Inputs:
02342  *  rawstring: the input string; must be overwritable!  On return, it's
02343  *             been modified to contain the separated identifiers.
02344  *  separator: the separator punctuation expected between identifiers
02345  *             (typically '.' or ',').  Whitespace may also appear around
02346  *             identifiers.
02347  * Outputs:
02348  *  namelist: filled with a palloc'd list of pointers to identifiers within
02349  *            rawstring.  Caller should list_free() this even on error return.
02350  *
02351  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
02352  *
02353  * Note that an empty string is considered okay here, though not in
02354  * textToQualifiedNameList.
02355  */
02356 bool
02357 SplitIdentifierString(char *rawstring, char separator,
02358                       List **namelist)
02359 {
02360     char       *nextp = rawstring;
02361     bool        done = false;
02362 
02363     *namelist = NIL;
02364 
02365     while (isspace((unsigned char) *nextp))
02366         nextp++;                /* skip leading whitespace */
02367 
02368     if (*nextp == '\0')
02369         return true;            /* allow empty string */
02370 
02371     /* At the top of the loop, we are at start of a new identifier. */
02372     do
02373     {
02374         char       *curname;
02375         char       *endp;
02376 
02377         if (*nextp == '\"')
02378         {
02379             /* Quoted name --- collapse quote-quote pairs, no downcasing */
02380             curname = nextp + 1;
02381             for (;;)
02382             {
02383                 endp = strchr(nextp + 1, '\"');
02384                 if (endp == NULL)
02385                     return false;       /* mismatched quotes */
02386                 if (endp[1] != '\"')
02387                     break;      /* found end of quoted name */
02388                 /* Collapse adjacent quotes into one quote, and look again */
02389                 memmove(endp, endp + 1, strlen(endp));
02390                 nextp = endp;
02391             }
02392             /* endp now points at the terminating quote */
02393             nextp = endp + 1;
02394         }
02395         else
02396         {
02397             /* Unquoted name --- extends to separator or whitespace */
02398             char       *downname;
02399             int         len;
02400 
02401             curname = nextp;
02402             while (*nextp && *nextp != separator &&
02403                    !isspace((unsigned char) *nextp))
02404                 nextp++;
02405             endp = nextp;
02406             if (curname == nextp)
02407                 return false;   /* empty unquoted name not allowed */
02408 
02409             /*
02410              * Downcase the identifier, using same code as main lexer does.
02411              *
02412              * XXX because we want to overwrite the input in-place, we cannot
02413              * support a downcasing transformation that increases the string
02414              * length.  This is not a problem given the current implementation
02415              * of downcase_truncate_identifier, but we'll probably have to do
02416              * something about this someday.
02417              */
02418             len = endp - curname;
02419             downname = downcase_truncate_identifier(curname, len, false);
02420             Assert(strlen(downname) <= len);
02421             strncpy(curname, downname, len);
02422             pfree(downname);
02423         }
02424 
02425         while (isspace((unsigned char) *nextp))
02426             nextp++;            /* skip trailing whitespace */
02427 
02428         if (*nextp == separator)
02429         {
02430             nextp++;
02431             while (isspace((unsigned char) *nextp))
02432                 nextp++;        /* skip leading whitespace for next */
02433             /* we expect another name, so done remains false */
02434         }
02435         else if (*nextp == '\0')
02436             done = true;
02437         else
02438             return false;       /* invalid syntax */
02439 
02440         /* Now safe to overwrite separator with a null */
02441         *endp = '\0';
02442 
02443         /* Truncate name if it's overlength */
02444         truncate_identifier(curname, strlen(curname), false);
02445 
02446         /*
02447          * Finished isolating current name --- add it to list
02448          */
02449         *namelist = lappend(*namelist, curname);
02450 
02451         /* Loop back if we didn't reach end of string */
02452     } while (!done);
02453 
02454     return true;
02455 }
02456 
02457 
02458 /*
02459  * SplitDirectoriesString --- parse a string containing directory names
02460  *
02461  * This is similar to SplitIdentifierString, except that the parsing
02462  * rules are meant to handle pathnames instead of identifiers: there is
02463  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
02464  * and we apply canonicalize_path() to each extracted string.  Because of the
02465  * last, the returned strings are separately palloc'd rather than being
02466  * pointers into rawstring --- but we still scribble on rawstring.
02467  *
02468  * Inputs:
02469  *  rawstring: the input string; must be modifiable!
02470  *  separator: the separator punctuation expected between directories
02471  *             (typically ',' or ';').  Whitespace may also appear around
02472  *             directories.
02473  * Outputs:
02474  *  namelist: filled with a palloc'd list of directory names.
02475  *            Caller should list_free_deep() this even on error return.
02476  *
02477  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
02478  *
02479  * Note that an empty string is considered okay here.
02480  */
02481 bool
02482 SplitDirectoriesString(char *rawstring, char separator,
02483                        List **namelist)
02484 {
02485     char       *nextp = rawstring;
02486     bool        done = false;
02487 
02488     *namelist = NIL;
02489 
02490     while (isspace((unsigned char) *nextp))
02491         nextp++;                /* skip leading whitespace */
02492 
02493     if (*nextp == '\0')
02494         return true;            /* allow empty string */
02495 
02496     /* At the top of the loop, we are at start of a new directory. */
02497     do
02498     {
02499         char       *curname;
02500         char       *endp;
02501 
02502         if (*nextp == '\"')
02503         {
02504             /* Quoted name --- collapse quote-quote pairs */
02505             curname = nextp + 1;
02506             for (;;)
02507             {
02508                 endp = strchr(nextp + 1, '\"');
02509                 if (endp == NULL)
02510                     return false;       /* mismatched quotes */
02511                 if (endp[1] != '\"')
02512                     break;      /* found end of quoted name */
02513                 /* Collapse adjacent quotes into one quote, and look again */
02514                 memmove(endp, endp + 1, strlen(endp));
02515                 nextp = endp;
02516             }
02517             /* endp now points at the terminating quote */
02518             nextp = endp + 1;
02519         }
02520         else
02521         {
02522             /* Unquoted name --- extends to separator or end of string */
02523             curname = endp = nextp;
02524             while (*nextp && *nextp != separator)
02525             {
02526                 /* trailing whitespace should not be included in name */
02527                 if (!isspace((unsigned char) *nextp))
02528                     endp = nextp + 1;
02529                 nextp++;
02530             }
02531             if (curname == endp)
02532                 return false;   /* empty unquoted name not allowed */
02533         }
02534 
02535         while (isspace((unsigned char) *nextp))
02536             nextp++;            /* skip trailing whitespace */
02537 
02538         if (*nextp == separator)
02539         {
02540             nextp++;
02541             while (isspace((unsigned char) *nextp))
02542                 nextp++;        /* skip leading whitespace for next */
02543             /* we expect another name, so done remains false */
02544         }
02545         else if (*nextp == '\0')
02546             done = true;
02547         else
02548             return false;       /* invalid syntax */
02549 
02550         /* Now safe to overwrite separator with a null */
02551         *endp = '\0';
02552 
02553         /* Truncate path if it's overlength */
02554         if (strlen(curname) >= MAXPGPATH)
02555             curname[MAXPGPATH - 1] = '\0';
02556 
02557         /*
02558          * Finished isolating current name --- add it to list
02559          */
02560         curname = pstrdup(curname);
02561         canonicalize_path(curname);
02562         *namelist = lappend(*namelist, curname);
02563 
02564         /* Loop back if we didn't reach end of string */
02565     } while (!done);
02566 
02567     return true;
02568 }
02569 
02570 
02571 /*****************************************************************************
02572  *  Comparison Functions used for bytea
02573  *
02574  * Note: btree indexes need these routines not to leak memory; therefore,
02575  * be careful to free working copies of toasted datums.  Most places don't
02576  * need to be so careful.
02577  *****************************************************************************/
02578 
02579 Datum
02580 byteaeq(PG_FUNCTION_ARGS)
02581 {
02582     Datum       arg1 = PG_GETARG_DATUM(0);
02583     Datum       arg2 = PG_GETARG_DATUM(1);
02584     bool        result;
02585     Size        len1,
02586                 len2;
02587 
02588     /*
02589      * We can use a fast path for unequal lengths, which might save us from
02590      * having to detoast one or both values.
02591      */
02592     len1 = toast_raw_datum_size(arg1);
02593     len2 = toast_raw_datum_size(arg2);
02594     if (len1 != len2)
02595         result = false;
02596     else
02597     {
02598         bytea      *barg1 = DatumGetByteaPP(arg1);
02599         bytea      *barg2 = DatumGetByteaPP(arg2);
02600 
02601         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
02602                          len1 - VARHDRSZ) == 0);
02603 
02604         PG_FREE_IF_COPY(barg1, 0);
02605         PG_FREE_IF_COPY(barg2, 1);
02606     }
02607 
02608     PG_RETURN_BOOL(result);
02609 }
02610 
02611 Datum
02612 byteane(PG_FUNCTION_ARGS)
02613 {
02614     Datum       arg1 = PG_GETARG_DATUM(0);
02615     Datum       arg2 = PG_GETARG_DATUM(1);
02616     bool        result;
02617     Size        len1,
02618                 len2;
02619 
02620     /*
02621      * We can use a fast path for unequal lengths, which might save us from
02622      * having to detoast one or both values.
02623      */
02624     len1 = toast_raw_datum_size(arg1);
02625     len2 = toast_raw_datum_size(arg2);
02626     if (len1 != len2)
02627         result = true;
02628     else
02629     {
02630         bytea      *barg1 = DatumGetByteaPP(arg1);
02631         bytea      *barg2 = DatumGetByteaPP(arg2);
02632 
02633         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
02634                          len1 - VARHDRSZ) != 0);
02635 
02636         PG_FREE_IF_COPY(barg1, 0);
02637         PG_FREE_IF_COPY(barg2, 1);
02638     }
02639 
02640     PG_RETURN_BOOL(result);
02641 }
02642 
02643 Datum
02644 bytealt(PG_FUNCTION_ARGS)
02645 {
02646     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
02647     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
02648     int         len1,
02649                 len2;
02650     int         cmp;
02651 
02652     len1 = VARSIZE_ANY_EXHDR(arg1);
02653     len2 = VARSIZE_ANY_EXHDR(arg2);
02654 
02655     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
02656 
02657     PG_FREE_IF_COPY(arg1, 0);
02658     PG_FREE_IF_COPY(arg2, 1);
02659 
02660     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
02661 }
02662 
02663 Datum
02664 byteale(PG_FUNCTION_ARGS)
02665 {
02666     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
02667     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
02668     int         len1,
02669                 len2;
02670     int         cmp;
02671 
02672     len1 = VARSIZE_ANY_EXHDR(arg1);
02673     len2 = VARSIZE_ANY_EXHDR(arg2);
02674 
02675     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
02676 
02677     PG_FREE_IF_COPY(arg1, 0);
02678     PG_FREE_IF_COPY(arg2, 1);
02679 
02680     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
02681 }
02682 
02683 Datum
02684 byteagt(PG_FUNCTION_ARGS)
02685 {
02686     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
02687     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
02688     int         len1,
02689                 len2;
02690     int         cmp;
02691 
02692     len1 = VARSIZE_ANY_EXHDR(arg1);
02693     len2 = VARSIZE_ANY_EXHDR(arg2);
02694 
02695     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
02696 
02697     PG_FREE_IF_COPY(arg1, 0);
02698     PG_FREE_IF_COPY(arg2, 1);
02699 
02700     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
02701 }
02702 
02703 Datum
02704 byteage(PG_FUNCTION_ARGS)
02705 {
02706     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
02707     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
02708     int         len1,
02709                 len2;
02710     int         cmp;
02711 
02712     len1 = VARSIZE_ANY_EXHDR(arg1);
02713     len2 = VARSIZE_ANY_EXHDR(arg2);
02714 
02715     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
02716 
02717     PG_FREE_IF_COPY(arg1, 0);
02718     PG_FREE_IF_COPY(arg2, 1);
02719 
02720     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
02721 }
02722 
02723 Datum
02724 byteacmp(PG_FUNCTION_ARGS)
02725 {
02726     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
02727     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
02728     int         len1,
02729                 len2;
02730     int         cmp;
02731 
02732     len1 = VARSIZE_ANY_EXHDR(arg1);
02733     len2 = VARSIZE_ANY_EXHDR(arg2);
02734 
02735     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
02736     if ((cmp == 0) && (len1 != len2))
02737         cmp = (len1 < len2) ? -1 : 1;
02738 
02739     PG_FREE_IF_COPY(arg1, 0);
02740     PG_FREE_IF_COPY(arg2, 1);
02741 
02742     PG_RETURN_INT32(cmp);
02743 }
02744 
02745 /*
02746  * appendStringInfoText
02747  *
02748  * Append a text to str.
02749  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
02750  */
02751 static void
02752 appendStringInfoText(StringInfo str, const text *t)
02753 {
02754     appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
02755 }
02756 
02757 /*
02758  * replace_text
02759  * replace all occurrences of 'old_sub_str' in 'orig_str'
02760  * with 'new_sub_str' to form 'new_str'
02761  *
02762  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
02763  * otherwise returns 'new_str'
02764  */
02765 Datum
02766 replace_text(PG_FUNCTION_ARGS)
02767 {
02768     text       *src_text = PG_GETARG_TEXT_PP(0);
02769     text       *from_sub_text = PG_GETARG_TEXT_PP(1);
02770     text       *to_sub_text = PG_GETARG_TEXT_PP(2);
02771     int         src_text_len;
02772     int         from_sub_text_len;
02773     TextPositionState state;
02774     text       *ret_text;
02775     int         start_posn;
02776     int         curr_posn;
02777     int         chunk_len;
02778     char       *start_ptr;
02779     StringInfoData str;
02780 
02781     text_position_setup(src_text, from_sub_text, &state);
02782 
02783     /*
02784      * Note: we check the converted string length, not the original, because
02785      * they could be different if the input contained invalid encoding.
02786      */
02787     src_text_len = state.len1;
02788     from_sub_text_len = state.len2;
02789 
02790     /* Return unmodified source string if empty source or pattern */
02791     if (src_text_len < 1 || from_sub_text_len < 1)
02792     {
02793         text_position_cleanup(&state);
02794         PG_RETURN_TEXT_P(src_text);
02795     }
02796 
02797     start_posn = 1;
02798     curr_posn = text_position_next(1, &state);
02799 
02800     /* When the from_sub_text is not found, there is nothing to do. */
02801     if (curr_posn == 0)
02802     {
02803         text_position_cleanup(&state);
02804         PG_RETURN_TEXT_P(src_text);
02805     }
02806 
02807     /* start_ptr points to the start_posn'th character of src_text */
02808     start_ptr = VARDATA_ANY(src_text);
02809 
02810     initStringInfo(&str);
02811 
02812     do
02813     {
02814         CHECK_FOR_INTERRUPTS();
02815 
02816         /* copy the data skipped over by last text_position_next() */
02817         chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
02818         appendBinaryStringInfo(&str, start_ptr, chunk_len);
02819 
02820         appendStringInfoText(&str, to_sub_text);
02821 
02822         start_posn = curr_posn;
02823         start_ptr += chunk_len;
02824         start_posn += from_sub_text_len;
02825         start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
02826 
02827         curr_posn = text_position_next(start_posn, &state);
02828     }
02829     while (curr_posn > 0);
02830 
02831     /* copy trailing data */
02832     chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
02833     appendBinaryStringInfo(&str, start_ptr, chunk_len);
02834 
02835     text_position_cleanup(&state);
02836 
02837     ret_text = cstring_to_text_with_len(str.data, str.len);
02838     pfree(str.data);
02839 
02840     PG_RETURN_TEXT_P(ret_text);
02841 }
02842 
02843 /*
02844  * check_replace_text_has_escape_char
02845  *
02846  * check whether replace_text contains escape char.
02847  */
02848 static bool
02849 check_replace_text_has_escape_char(const text *replace_text)
02850 {
02851     const char *p = VARDATA_ANY(replace_text);
02852     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
02853 
02854     if (pg_database_encoding_max_length() == 1)
02855     {
02856         for (; p < p_end; p++)
02857         {
02858             if (*p == '\\')
02859                 return true;
02860         }
02861     }
02862     else
02863     {
02864         for (; p < p_end; p += pg_mblen(p))
02865         {
02866             if (*p == '\\')
02867                 return true;
02868         }
02869     }
02870 
02871     return false;
02872 }
02873 
02874 /*
02875  * appendStringInfoRegexpSubstr
02876  *
02877  * Append replace_text to str, substituting regexp back references for
02878  * \n escapes.  start_ptr is the start of the match in the source string,
02879  * at logical character position data_pos.
02880  */
02881 static void
02882 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
02883                              regmatch_t *pmatch,
02884                              char *start_ptr, int data_pos)
02885 {
02886     const char *p = VARDATA_ANY(replace_text);
02887     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
02888     int         eml = pg_database_encoding_max_length();
02889 
02890     for (;;)
02891     {
02892         const char *chunk_start = p;
02893         int         so;
02894         int         eo;
02895 
02896         /* Find next escape char. */
02897         if (eml == 1)
02898         {
02899             for (; p < p_end && *p != '\\'; p++)
02900                  /* nothing */ ;
02901         }
02902         else
02903         {
02904             for (; p < p_end && *p != '\\'; p += pg_mblen(p))
02905                  /* nothing */ ;
02906         }
02907 
02908         /* Copy the text we just scanned over, if any. */
02909         if (p > chunk_start)
02910             appendBinaryStringInfo(str, chunk_start, p - chunk_start);
02911 
02912         /* Done if at end of string, else advance over escape char. */
02913         if (p >= p_end)
02914             break;
02915         p++;
02916 
02917         if (p >= p_end)
02918         {
02919             /* Escape at very end of input.  Treat same as unexpected char */
02920             appendStringInfoChar(str, '\\');
02921             break;
02922         }
02923 
02924         if (*p >= '1' && *p <= '9')
02925         {
02926             /* Use the back reference of regexp. */
02927             int         idx = *p - '0';
02928 
02929             so = pmatch[idx].rm_so;
02930             eo = pmatch[idx].rm_eo;
02931             p++;
02932         }
02933         else if (*p == '&')
02934         {
02935             /* Use the entire matched string. */
02936             so = pmatch[0].rm_so;
02937             eo = pmatch[0].rm_eo;
02938             p++;
02939         }
02940         else if (*p == '\\')
02941         {
02942             /* \\ means transfer one \ to output. */
02943             appendStringInfoChar(str, '\\');
02944             p++;
02945             continue;
02946         }
02947         else
02948         {
02949             /*
02950              * If escape char is not followed by any expected char, just treat
02951              * it as ordinary data to copy.  (XXX would it be better to throw
02952              * an error?)
02953              */
02954             appendStringInfoChar(str, '\\');
02955             continue;
02956         }
02957 
02958         if (so != -1 && eo != -1)
02959         {
02960             /*
02961              * Copy the text that is back reference of regexp.  Note so and eo
02962              * are counted in characters not bytes.
02963              */
02964             char       *chunk_start;
02965             int         chunk_len;
02966 
02967             Assert(so >= data_pos);
02968             chunk_start = start_ptr;
02969             chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
02970             chunk_len = charlen_to_bytelen(chunk_start, eo - so);
02971             appendBinaryStringInfo(str, chunk_start, chunk_len);
02972         }
02973     }
02974 }
02975 
02976 #define REGEXP_REPLACE_BACKREF_CNT      10
02977 
02978 /*
02979  * replace_text_regexp
02980  *
02981  * replace text that matches to regexp in src_text to replace_text.
02982  *
02983  * Note: to avoid having to include regex.h in builtins.h, we declare
02984  * the regexp argument as void *, but really it's regex_t *.
02985  */
02986 text *
02987 replace_text_regexp(text *src_text, void *regexp,
02988                     text *replace_text, bool glob)
02989 {
02990     text       *ret_text;
02991     regex_t    *re = (regex_t *) regexp;
02992     int         src_text_len = VARSIZE_ANY_EXHDR(src_text);
02993     StringInfoData buf;
02994     regmatch_t  pmatch[REGEXP_REPLACE_BACKREF_CNT];
02995     pg_wchar   *data;
02996     size_t      data_len;
02997     int         search_start;
02998     int         data_pos;
02999     char       *start_ptr;
03000     bool        have_escape;
03001 
03002     initStringInfo(&buf);
03003 
03004     /* Convert data string to wide characters. */
03005     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
03006     data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
03007 
03008     /* Check whether replace_text has escape char. */
03009     have_escape = check_replace_text_has_escape_char(replace_text);
03010 
03011     /* start_ptr points to the data_pos'th character of src_text */
03012     start_ptr = (char *) VARDATA_ANY(src_text);
03013     data_pos = 0;
03014 
03015     search_start = 0;
03016     while (search_start <= data_len)
03017     {
03018         int         regexec_result;
03019 
03020         CHECK_FOR_INTERRUPTS();
03021 
03022         regexec_result = pg_regexec(re,
03023                                     data,
03024                                     data_len,
03025                                     search_start,
03026                                     NULL,       /* no details */
03027                                     REGEXP_REPLACE_BACKREF_CNT,
03028                                     pmatch,
03029                                     0);
03030 
03031         if (regexec_result == REG_NOMATCH)
03032             break;
03033 
03034         if (regexec_result != REG_OKAY)
03035         {
03036             char        errMsg[100];
03037 
03038             pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
03039             ereport(ERROR,
03040                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
03041                      errmsg("regular expression failed: %s", errMsg)));
03042         }
03043 
03044         /*
03045          * Copy the text to the left of the match position.  Note we are given
03046          * character not byte indexes.
03047          */
03048         if (pmatch[0].rm_so - data_pos > 0)
03049         {
03050             int         chunk_len;
03051 
03052             chunk_len = charlen_to_bytelen(start_ptr,
03053                                            pmatch[0].rm_so - data_pos);
03054             appendBinaryStringInfo(&buf, start_ptr, chunk_len);
03055 
03056             /*
03057              * Advance start_ptr over that text, to avoid multiple rescans of
03058              * it if the replace_text contains multiple back-references.
03059              */
03060             start_ptr += chunk_len;
03061             data_pos = pmatch[0].rm_so;
03062         }
03063 
03064         /*
03065          * Copy the replace_text. Process back references when the
03066          * replace_text has escape characters.
03067          */
03068         if (have_escape)
03069             appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
03070                                          start_ptr, data_pos);
03071         else
03072             appendStringInfoText(&buf, replace_text);
03073 
03074         /* Advance start_ptr and data_pos over the matched text. */
03075         start_ptr += charlen_to_bytelen(start_ptr,
03076                                         pmatch[0].rm_eo - data_pos);
03077         data_pos = pmatch[0].rm_eo;
03078 
03079         /*
03080          * When global option is off, replace the first instance only.
03081          */
03082         if (!glob)
03083             break;
03084 
03085         /*
03086          * Search from next character when the matching text is zero width.
03087          */
03088         search_start = data_pos;
03089         if (pmatch[0].rm_so == pmatch[0].rm_eo)
03090             search_start++;
03091     }
03092 
03093     /*
03094      * Copy the text to the right of the last match.
03095      */
03096     if (data_pos < data_len)
03097     {
03098         int         chunk_len;
03099 
03100         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
03101         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
03102     }
03103 
03104     ret_text = cstring_to_text_with_len(buf.data, buf.len);
03105     pfree(buf.data);
03106     pfree(data);
03107 
03108     return ret_text;
03109 }
03110 
03111 /*
03112  * split_text
03113  * parse input string
03114  * return ord item (1 based)
03115  * based on provided field separator
03116  */
03117 Datum
03118 split_text(PG_FUNCTION_ARGS)
03119 {
03120     text       *inputstring = PG_GETARG_TEXT_PP(0);
03121     text       *fldsep = PG_GETARG_TEXT_PP(1);
03122     int         fldnum = PG_GETARG_INT32(2);
03123     int         inputstring_len;
03124     int         fldsep_len;
03125     TextPositionState state;
03126     int         start_posn;
03127     int         end_posn;
03128     text       *result_text;
03129 
03130     /* field number is 1 based */
03131     if (fldnum < 1)
03132         ereport(ERROR,
03133                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
03134                  errmsg("field position must be greater than zero")));
03135 
03136     text_position_setup(inputstring, fldsep, &state);
03137 
03138     /*
03139      * Note: we check the converted string length, not the original, because
03140      * they could be different if the input contained invalid encoding.
03141      */
03142     inputstring_len = state.len1;
03143     fldsep_len = state.len2;
03144 
03145     /* return empty string for empty input string */
03146     if (inputstring_len < 1)
03147     {
03148         text_position_cleanup(&state);
03149         PG_RETURN_TEXT_P(cstring_to_text(""));
03150     }
03151 
03152     /* empty field separator */
03153     if (fldsep_len < 1)
03154     {
03155         text_position_cleanup(&state);
03156         /* if first field, return input string, else empty string */
03157         if (fldnum == 1)
03158             PG_RETURN_TEXT_P(inputstring);
03159         else
03160             PG_RETURN_TEXT_P(cstring_to_text(""));
03161     }
03162 
03163     /* identify bounds of first field */
03164     start_posn = 1;
03165     end_posn = text_position_next(1, &state);
03166 
03167     /* special case if fldsep not found at all */
03168     if (end_posn == 0)
03169     {
03170         text_position_cleanup(&state);
03171         /* if field 1 requested, return input string, else empty string */
03172         if (fldnum == 1)
03173             PG_RETURN_TEXT_P(inputstring);
03174         else
03175             PG_RETURN_TEXT_P(cstring_to_text(""));
03176     }
03177 
03178     while (end_posn > 0 && --fldnum > 0)
03179     {
03180         /* identify bounds of next field */
03181         start_posn = end_posn + fldsep_len;
03182         end_posn = text_position_next(start_posn, &state);
03183     }
03184 
03185     text_position_cleanup(&state);
03186 
03187     if (fldnum > 0)
03188     {
03189         /* N'th field separator not found */
03190         /* if last field requested, return it, else empty string */
03191         if (fldnum == 1)
03192             result_text = text_substring(PointerGetDatum(inputstring),
03193                                          start_posn,
03194                                          -1,
03195                                          true);
03196         else
03197             result_text = cstring_to_text("");
03198     }
03199     else
03200     {
03201         /* non-last field requested */
03202         result_text = text_substring(PointerGetDatum(inputstring),
03203                                      start_posn,
03204                                      end_posn - start_posn,
03205                                      false);
03206     }
03207 
03208     PG_RETURN_TEXT_P(result_text);
03209 }
03210 
03211 /*
03212  * Convenience function to return true when two text params are equal.
03213  */
03214 static bool
03215 text_isequal(text *txt1, text *txt2)
03216 {
03217     return DatumGetBool(DirectFunctionCall2(texteq,
03218                                             PointerGetDatum(txt1),
03219                                             PointerGetDatum(txt2)));
03220 }
03221 
03222 /*
03223  * text_to_array
03224  * parse input string and return text array of elements,
03225  * based on provided field separator
03226  */
03227 Datum
03228 text_to_array(PG_FUNCTION_ARGS)
03229 {
03230     return text_to_array_internal(fcinfo);
03231 }
03232 
03233 /*
03234  * text_to_array_null
03235  * parse input string and return text array of elements,
03236  * based on provided field separator and null string
03237  *
03238  * This is a separate entry point only to prevent the regression tests from
03239  * complaining about different argument sets for the same internal function.
03240  */
03241 Datum
03242 text_to_array_null(PG_FUNCTION_ARGS)
03243 {
03244     return text_to_array_internal(fcinfo);
03245 }
03246 
03247 /*
03248  * common code for text_to_array and text_to_array_null functions
03249  *
03250  * These are not strict so we have to test for null inputs explicitly.
03251  */
03252 static Datum
03253 text_to_array_internal(PG_FUNCTION_ARGS)
03254 {
03255     text       *inputstring;
03256     text       *fldsep;
03257     text       *null_string;
03258     int         inputstring_len;
03259     int         fldsep_len;
03260     char       *start_ptr;
03261     text       *result_text;
03262     bool        is_null;
03263     ArrayBuildState *astate = NULL;
03264 
03265     /* when input string is NULL, then result is NULL too */
03266     if (PG_ARGISNULL(0))
03267         PG_RETURN_NULL();
03268 
03269     inputstring = PG_GETARG_TEXT_PP(0);
03270 
03271     /* fldsep can be NULL */
03272     if (!PG_ARGISNULL(1))
03273         fldsep = PG_GETARG_TEXT_PP(1);
03274     else
03275         fldsep = NULL;
03276 
03277     /* null_string can be NULL or omitted */
03278     if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
03279         null_string = PG_GETARG_TEXT_PP(2);
03280     else
03281         null_string = NULL;
03282 
03283     if (fldsep != NULL)
03284     {
03285         /*
03286          * Normal case with non-null fldsep.  Use the text_position machinery
03287          * to search for occurrences of fldsep.
03288          */
03289         TextPositionState state;
03290         int         fldnum;
03291         int         start_posn;
03292         int         end_posn;
03293         int         chunk_len;
03294 
03295         text_position_setup(inputstring, fldsep, &state);
03296 
03297         /*
03298          * Note: we check the converted string length, not the original,
03299          * because they could be different if the input contained invalid
03300          * encoding.
03301          */
03302         inputstring_len = state.len1;
03303         fldsep_len = state.len2;
03304 
03305         /* return empty array for empty input string */
03306         if (inputstring_len < 1)
03307         {
03308             text_position_cleanup(&state);
03309             PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
03310         }
03311 
03312         /*
03313          * empty field separator: return the input string as a one-element
03314          * array
03315          */
03316         if (fldsep_len < 1)
03317         {
03318             text_position_cleanup(&state);
03319             /* single element can be a NULL too */
03320             is_null = null_string ? text_isequal(inputstring, null_string) : false;
03321             PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
03322                                                 PointerGetDatum(inputstring),
03323                                                          is_null, 1));
03324         }
03325 
03326         start_posn = 1;
03327         /* start_ptr points to the start_posn'th character of inputstring */
03328         start_ptr = VARDATA_ANY(inputstring);
03329 
03330         for (fldnum = 1;; fldnum++)     /* field number is 1 based */
03331         {
03332             CHECK_FOR_INTERRUPTS();
03333 
03334             end_posn = text_position_next(start_posn, &state);
03335 
03336             if (end_posn == 0)
03337             {
03338                 /* fetch last field */
03339                 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
03340             }
03341             else
03342             {
03343                 /* fetch non-last field */
03344                 chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
03345             }
03346 
03347             /* must build a temp text datum to pass to accumArrayResult */
03348             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
03349             is_null = null_string ? text_isequal(result_text, null_string) : false;
03350 
03351             /* stash away this field */
03352             astate = accumArrayResult(astate,
03353                                       PointerGetDatum(result_text),
03354                                       is_null,
03355                                       TEXTOID,
03356                                       CurrentMemoryContext);
03357 
03358             pfree(result_text);
03359 
03360             if (end_posn == 0)
03361                 break;
03362 
03363             start_posn = end_posn;
03364             start_ptr += chunk_len;
03365             start_posn += fldsep_len;
03366             start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
03367         }
03368 
03369         text_position_cleanup(&state);
03370     }
03371     else
03372     {
03373         /*
03374          * When fldsep is NULL, each character in the inputstring becomes an
03375          * element in the result array.  The separator is effectively the
03376          * space between characters.
03377          */
03378         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
03379 
03380         /* return empty array for empty input string */
03381         if (inputstring_len < 1)
03382             PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
03383 
03384         start_ptr = VARDATA_ANY(inputstring);
03385 
03386         while (inputstring_len > 0)
03387         {
03388             int         chunk_len = pg_mblen(start_ptr);
03389 
03390             CHECK_FOR_INTERRUPTS();
03391 
03392             /* must build a temp text datum to pass to accumArrayResult */
03393             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
03394             is_null = null_string ? text_isequal(result_text, null_string) : false;
03395 
03396             /* stash away this field */
03397             astate = accumArrayResult(astate,
03398                                       PointerGetDatum(result_text),
03399                                       is_null,
03400                                       TEXTOID,
03401                                       CurrentMemoryContext);
03402 
03403             pfree(result_text);
03404 
03405             start_ptr += chunk_len;
03406             inputstring_len -= chunk_len;
03407         }
03408     }
03409 
03410     PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
03411                                           CurrentMemoryContext));
03412 }
03413 
03414 /*
03415  * array_to_text
03416  * concatenate Cstring representation of input array elements
03417  * using provided field separator
03418  */
03419 Datum
03420 array_to_text(PG_FUNCTION_ARGS)
03421 {
03422     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
03423     char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
03424 
03425     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
03426 }
03427 
03428 /*
03429  * array_to_text_null
03430  * concatenate Cstring representation of input array elements
03431  * using provided field separator and null string
03432  *
03433  * This version is not strict so we have to test for null inputs explicitly.
03434  */
03435 Datum
03436 array_to_text_null(PG_FUNCTION_ARGS)
03437 {
03438     ArrayType  *v;
03439     char       *fldsep;
03440     char       *null_string;
03441 
03442     /* returns NULL when first or second parameter is NULL */
03443     if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
03444         PG_RETURN_NULL();
03445 
03446     v = PG_GETARG_ARRAYTYPE_P(0);
03447     fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
03448 
03449     /* NULL null string is passed through as a null pointer */
03450     if (!PG_ARGISNULL(2))
03451         null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
03452     else
03453         null_string = NULL;
03454 
03455     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
03456 }
03457 
03458 /*
03459  * common code for array_to_text and array_to_text_null functions
03460  */
03461 static text *
03462 array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
03463                        const char *fldsep, const char *null_string)
03464 {
03465     text       *result;
03466     int         nitems,
03467                *dims,
03468                 ndims;
03469     Oid         element_type;
03470     int         typlen;
03471     bool        typbyval;
03472     char        typalign;
03473     StringInfoData buf;
03474     bool        printed = false;
03475     char       *p;
03476     bits8      *bitmap;
03477     int         bitmask;
03478     int         i;
03479     ArrayMetaState *my_extra;
03480 
03481     ndims = ARR_NDIM(v);
03482     dims = ARR_DIMS(v);
03483     nitems = ArrayGetNItems(ndims, dims);
03484 
03485     /* if there are no elements, return an empty string */
03486     if (nitems == 0)
03487         return cstring_to_text_with_len("", 0);
03488 
03489     element_type = ARR_ELEMTYPE(v);
03490     initStringInfo(&buf);
03491 
03492     /*
03493      * We arrange to look up info about element type, including its output
03494      * conversion proc, only once per series of calls, assuming the element
03495      * type doesn't change underneath us.
03496      */
03497     my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
03498     if (my_extra == NULL)
03499     {
03500         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
03501                                                       sizeof(ArrayMetaState));
03502         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
03503         my_extra->element_type = ~element_type;
03504     }
03505 
03506     if (my_extra->element_type != element_type)
03507     {
03508         /*
03509          * Get info about element type, including its output conversion proc
03510          */
03511         get_type_io_data(element_type, IOFunc_output,
03512                          &my_extra->typlen, &my_extra->typbyval,
03513                          &my_extra->typalign, &my_extra->typdelim,
03514                          &my_extra->typioparam, &my_extra->typiofunc);
03515         fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
03516                       fcinfo->flinfo->fn_mcxt);
03517         my_extra->element_type = element_type;
03518     }
03519     typlen = my_extra->typlen;
03520     typbyval = my_extra->typbyval;
03521     typalign = my_extra->typalign;
03522 
03523     p = ARR_DATA_PTR(v);
03524     bitmap = ARR_NULLBITMAP(v);
03525     bitmask = 1;
03526 
03527     for (i = 0; i < nitems; i++)
03528     {
03529         Datum       itemvalue;
03530         char       *value;
03531 
03532         /* Get source element, checking for NULL */
03533         if (bitmap && (*bitmap & bitmask) == 0)
03534         {
03535             /* if null_string is NULL, we just ignore null elements */
03536             if (null_string != NULL)
03537             {
03538                 if (printed)
03539                     appendStringInfo(&buf, "%s%s", fldsep, null_string);
03540                 else
03541                     appendStringInfoString(&buf, null_string);
03542                 printed = true;
03543             }
03544         }
03545         else
03546         {
03547             itemvalue = fetch_att(p, typbyval, typlen);
03548 
03549             value = OutputFunctionCall(&my_extra->proc, itemvalue);
03550 
03551             if (printed)
03552                 appendStringInfo(&buf, "%s%s", fldsep, value);
03553             else
03554                 appendStringInfoString(&buf, value);
03555             printed = true;
03556 
03557             p = att_addlength_pointer(p, typlen, p);
03558             p = (char *) att_align_nominal(p, typalign);
03559         }
03560 
03561         /* advance bitmap pointer if any */
03562         if (bitmap)
03563         {
03564             bitmask <<= 1;
03565             if (bitmask == 0x100)
03566             {
03567                 bitmap++;
03568                 bitmask = 1;
03569             }
03570         }
03571     }
03572 
03573     result = cstring_to_text_with_len(buf.data, buf.len);
03574     pfree(buf.data);
03575 
03576     return result;
03577 }
03578 
03579 #define HEXBASE 16
03580 /*
03581  * Convert a int32 to a string containing a base 16 (hex) representation of
03582  * the number.
03583  */
03584 Datum
03585 to_hex32(PG_FUNCTION_ARGS)
03586 {
03587     uint32      value = (uint32) PG_GETARG_INT32(0);
03588     char       *ptr;
03589     const char *digits = "0123456789abcdef";
03590     char        buf[32];        /* bigger than needed, but reasonable */
03591 
03592     ptr = buf + sizeof(buf) - 1;
03593     *ptr = '\0';
03594 
03595     do
03596     {
03597         *--ptr = digits[value % HEXBASE];
03598         value /= HEXBASE;
03599     } while (ptr > buf && value);
03600 
03601     PG_RETURN_TEXT_P(cstring_to_text(ptr));
03602 }
03603 
03604 /*
03605  * Convert a int64 to a string containing a base 16 (hex) representation of
03606  * the number.
03607  */
03608 Datum
03609 to_hex64(PG_FUNCTION_ARGS)
03610 {
03611     uint64      value = (uint64) PG_GETARG_INT64(0);
03612     char       *ptr;
03613     const char *digits = "0123456789abcdef";
03614     char        buf[32];        /* bigger than needed, but reasonable */
03615 
03616     ptr = buf + sizeof(buf) - 1;
03617     *ptr = '\0';
03618 
03619     do
03620     {
03621         *--ptr = digits[value % HEXBASE];
03622         value /= HEXBASE;
03623     } while (ptr > buf && value);
03624 
03625     PG_RETURN_TEXT_P(cstring_to_text(ptr));
03626 }
03627 
03628 /*
03629  * Create an md5 hash of a text string and return it as hex
03630  *
03631  * md5 produces a 16 byte (128 bit) hash; double it for hex
03632  */
03633 #define MD5_HASH_LEN  32
03634 
03635 Datum
03636 md5_text(PG_FUNCTION_ARGS)
03637 {
03638     text       *in_text = PG_GETARG_TEXT_PP(0);
03639     size_t      len;
03640     char        hexsum[MD5_HASH_LEN + 1];
03641 
03642     /* Calculate the length of the buffer using varlena metadata */
03643     len = VARSIZE_ANY_EXHDR(in_text);
03644 
03645     /* get the hash result */
03646     if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
03647         ereport(ERROR,
03648                 (errcode(ERRCODE_OUT_OF_MEMORY),
03649                  errmsg("out of memory")));
03650 
03651     /* convert to text and return it */
03652     PG_RETURN_TEXT_P(cstring_to_text(hexsum));
03653 }
03654 
03655 /*
03656  * Create an md5 hash of a bytea field and return it as a hex string:
03657  * 16-byte md5 digest is represented in 32 hex characters.
03658  */
03659 Datum
03660 md5_bytea(PG_FUNCTION_ARGS)
03661 {
03662     bytea      *in = PG_GETARG_BYTEA_PP(0);
03663     size_t      len;
03664     char        hexsum[MD5_HASH_LEN + 1];
03665 
03666     len = VARSIZE_ANY_EXHDR(in);
03667     if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
03668         ereport(ERROR,
03669                 (errcode(ERRCODE_OUT_OF_MEMORY),
03670                  errmsg("out of memory")));
03671 
03672     PG_RETURN_TEXT_P(cstring_to_text(hexsum));
03673 }
03674 
03675 /*
03676  * Return the size of a datum, possibly compressed
03677  *
03678  * Works on any data type
03679  */
03680 Datum
03681 pg_column_size(PG_FUNCTION_ARGS)
03682 {
03683     Datum       value = PG_GETARG_DATUM(0);
03684     int32       result;
03685     int         typlen;
03686 
03687     /* On first call, get the input type's typlen, and save at *fn_extra */
03688     if (fcinfo->flinfo->fn_extra == NULL)
03689     {
03690         /* Lookup the datatype of the supplied argument */
03691         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
03692 
03693         typlen = get_typlen(argtypeid);
03694         if (typlen == 0)        /* should not happen */
03695             elog(ERROR, "cache lookup failed for type %u", argtypeid);
03696 
03697         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
03698                                                       sizeof(int));
03699         *((int *) fcinfo->flinfo->fn_extra) = typlen;
03700     }
03701     else
03702         typlen = *((int *) fcinfo->flinfo->fn_extra);
03703 
03704     if (typlen == -1)
03705     {
03706         /* varlena type, possibly toasted */
03707         result = toast_datum_size(value);
03708     }
03709     else if (typlen == -2)
03710     {
03711         /* cstring */
03712         result = strlen(DatumGetCString(value)) + 1;
03713     }
03714     else
03715     {
03716         /* ordinary fixed-width type */
03717         result = typlen;
03718     }
03719 
03720     PG_RETURN_INT32(result);
03721 }
03722 
03723 /*
03724  * string_agg - Concatenates values and returns string.
03725  *
03726  * Syntax: string_agg(value text, delimiter text) RETURNS text
03727  *
03728  * Note: Any NULL values are ignored. The first-call delimiter isn't
03729  * actually used at all, and on subsequent calls the delimiter precedes
03730  * the associated value.
03731  */
03732 
03733 /* subroutine to initialize state */
03734 static StringInfo
03735 makeStringAggState(FunctionCallInfo fcinfo)
03736 {
03737     StringInfo  state;
03738     MemoryContext aggcontext;
03739     MemoryContext oldcontext;
03740 
03741     if (!AggCheckCallContext(fcinfo, &aggcontext))
03742     {
03743         /* cannot be called directly because of internal-type argument */
03744         elog(ERROR, "string_agg_transfn called in non-aggregate context");
03745     }
03746 
03747     /*
03748      * Create state in aggregate context.  It'll stay there across subsequent
03749      * calls.
03750      */
03751     oldcontext = MemoryContextSwitchTo(aggcontext);
03752     state = makeStringInfo();
03753     MemoryContextSwitchTo(oldcontext);
03754 
03755     return state;
03756 }
03757 
03758 Datum
03759 string_agg_transfn(PG_FUNCTION_ARGS)
03760 {
03761     StringInfo  state;
03762 
03763     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
03764 
03765     /* Append the value unless null. */
03766     if (!PG_ARGISNULL(1))
03767     {
03768         /* On the first time through, we ignore the delimiter. */
03769         if (state == NULL)
03770             state = makeStringAggState(fcinfo);
03771         else if (!PG_ARGISNULL(2))
03772             appendStringInfoText(state, PG_GETARG_TEXT_PP(2));  /* delimiter */
03773 
03774         appendStringInfoText(state, PG_GETARG_TEXT_PP(1));      /* value */
03775     }
03776 
03777     /*
03778      * The transition type for string_agg() is declared to be "internal",
03779      * which is a pass-by-value type the same size as a pointer.
03780      */
03781     PG_RETURN_POINTER(state);
03782 }
03783 
03784 Datum
03785 string_agg_finalfn(PG_FUNCTION_ARGS)
03786 {
03787     StringInfo  state;
03788 
03789     /* cannot be called directly because of internal-type argument */
03790     Assert(AggCheckCallContext(fcinfo, NULL));
03791 
03792     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
03793 
03794     if (state != NULL)
03795         PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
03796     else
03797         PG_RETURN_NULL();
03798 }
03799 
03800 /*
03801  * Implementation of both concat() and concat_ws().
03802  *
03803  * sepstr is the separator string to place between values.
03804  * argidx identifies the first argument to concatenate (counting from zero).
03805  * Returns NULL if result should be NULL, else text value.
03806  */
03807 static text *
03808 concat_internal(const char *sepstr, int argidx,
03809                 FunctionCallInfo fcinfo)
03810 {
03811     text       *result;
03812     StringInfoData str;
03813     bool        first_arg = true;
03814     int         i;
03815 
03816     /*
03817      * concat(VARIADIC some-array) is essentially equivalent to
03818      * array_to_text(), ie concat the array elements with the given separator.
03819      * So we just pass the case off to that code.
03820      */
03821     if (get_fn_expr_variadic(fcinfo->flinfo))
03822     {
03823         Oid         arr_typid;
03824         ArrayType  *arr;
03825 
03826         /* Should have just the one argument */
03827         Assert(argidx == PG_NARGS() - 1);
03828 
03829         /* concat(VARIADIC NULL) is defined as NULL */
03830         if (PG_ARGISNULL(argidx))
03831             return NULL;
03832 
03833         /*
03834          * Non-null argument had better be an array.  The parser doesn't
03835          * enforce this for VARIADIC ANY functions (maybe it should?), so that
03836          * check uses ereport not just elog.
03837          */
03838         arr_typid = get_fn_expr_argtype(fcinfo->flinfo, argidx);
03839         if (!OidIsValid(arr_typid))
03840             elog(ERROR, "could not determine data type of concat() input");
03841 
03842         if (!OidIsValid(get_element_type(arr_typid)))
03843             ereport(ERROR,
03844                     (errcode(ERRCODE_DATATYPE_MISMATCH),
03845                      errmsg("VARIADIC argument must be an array")));
03846 
03847         /* OK, safe to fetch the array value */
03848         arr = PG_GETARG_ARRAYTYPE_P(argidx);
03849 
03850         /*
03851          * And serialize the array.  We tell array_to_text to ignore null
03852          * elements, which matches the behavior of the loop below.
03853          */
03854         return array_to_text_internal(fcinfo, arr, sepstr, NULL);
03855     }
03856 
03857     /* Normal case without explicit VARIADIC marker */
03858     initStringInfo(&str);
03859 
03860     for (i = argidx; i < PG_NARGS(); i++)
03861     {
03862         if (!PG_ARGISNULL(i))
03863         {
03864             Datum       value = PG_GETARG_DATUM(i);
03865             Oid         valtype;
03866             Oid         typOutput;
03867             bool        typIsVarlena;
03868 
03869             /* add separator if appropriate */
03870             if (first_arg)
03871                 first_arg = false;
03872             else
03873                 appendStringInfoString(&str, sepstr);
03874 
03875             /* call the appropriate type output function, append the result */
03876             valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
03877             if (!OidIsValid(valtype))
03878                 elog(ERROR, "could not determine data type of concat() input");
03879             getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
03880             appendStringInfoString(&str,
03881                                    OidOutputFunctionCall(typOutput, value));
03882         }
03883     }
03884 
03885     result = cstring_to_text_with_len(str.data, str.len);
03886     pfree(str.data);
03887 
03888     return result;
03889 }
03890 
03891 /*
03892  * Concatenate all arguments. NULL arguments are ignored.
03893  */
03894 Datum
03895 text_concat(PG_FUNCTION_ARGS)
03896 {
03897     text       *result;
03898 
03899     result = concat_internal("", 0, fcinfo);
03900     if (result == NULL)
03901         PG_RETURN_NULL();
03902     PG_RETURN_TEXT_P(result);
03903 }
03904 
03905 /*
03906  * Concatenate all but first argument value with separators. The first
03907  * parameter is used as the separator. NULL arguments are ignored.
03908  */
03909 Datum
03910 text_concat_ws(PG_FUNCTION_ARGS)
03911 {
03912     char       *sep;
03913     text       *result;
03914 
03915     /* return NULL when separator is NULL */
03916     if (PG_ARGISNULL(0))
03917         PG_RETURN_NULL();
03918     sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
03919 
03920     result = concat_internal(sep, 1, fcinfo);
03921     if (result == NULL)
03922         PG_RETURN_NULL();
03923     PG_RETURN_TEXT_P(result);
03924 }
03925 
03926 /*
03927  * Return first n characters in the string. When n is negative,
03928  * return all but last |n| characters.
03929  */
03930 Datum
03931 text_left(PG_FUNCTION_ARGS)
03932 {
03933     text       *str = PG_GETARG_TEXT_PP(0);
03934     const char *p = VARDATA_ANY(str);
03935     int         len = VARSIZE_ANY_EXHDR(str);
03936     int         n = PG_GETARG_INT32(1);
03937     int         rlen;
03938 
03939     if (n < 0)
03940         n = pg_mbstrlen_with_len(p, len) + n;
03941     rlen = pg_mbcharcliplen(p, len, n);
03942 
03943     PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
03944 }
03945 
03946 /*
03947  * Return last n characters in the string. When n is negative,
03948  * return all but first |n| characters.
03949  */
03950 Datum
03951 text_right(PG_FUNCTION_ARGS)
03952 {
03953     text       *str = PG_GETARG_TEXT_PP(0);
03954     const char *p = VARDATA_ANY(str);
03955     int         len = VARSIZE_ANY_EXHDR(str);
03956     int         n = PG_GETARG_INT32(1);
03957     int         off;
03958 
03959     if (n < 0)
03960         n = -n;
03961     else
03962         n = pg_mbstrlen_with_len(p, len) - n;
03963     off = pg_mbcharcliplen(p, len, n);
03964 
03965     PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
03966 }
03967 
03968 /*
03969  * Return reversed string
03970  */
03971 Datum
03972 text_reverse(PG_FUNCTION_ARGS)
03973 {
03974     text       *str = PG_GETARG_TEXT_PP(0);
03975     const char *p = VARDATA_ANY(str);
03976     int         len = VARSIZE_ANY_EXHDR(str);
03977     const char *endp = p + len;
03978     text       *result;
03979     char       *dst;
03980 
03981     result = palloc(len + VARHDRSZ);
03982     dst = (char *) VARDATA(result) + len;
03983     SET_VARSIZE(result, len + VARHDRSZ);
03984 
03985     if (pg_database_encoding_max_length() > 1)
03986     {
03987         /* multibyte version */
03988         while (p < endp)
03989         {
03990             int         sz;
03991 
03992             sz = pg_mblen(p);
03993             dst -= sz;
03994             memcpy(dst, p, sz);
03995             p += sz;
03996         }
03997     }
03998     else
03999     {
04000         /* single byte version */
04001         while (p < endp)
04002             *(--dst) = *p++;
04003     }
04004 
04005     PG_RETURN_TEXT_P(result);
04006 }
04007 
04008 
04009 /*
04010  * Support macros for text_format()
04011  */
04012 #define TEXT_FORMAT_FLAG_MINUS  0x0001  /* is minus flag present? */
04013 
04014 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
04015     do { \
04016         if (++(ptr) >= (end_ptr)) \
04017             ereport(ERROR, \
04018                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
04019                      errmsg("unterminated format specifier"))); \
04020     } while (0)
04021 
04022 /*
04023  * Returns a formatted string
04024  */
04025 Datum
04026 text_format(PG_FUNCTION_ARGS)
04027 {
04028     text       *fmt;
04029     StringInfoData str;
04030     const char *cp;
04031     const char *start_ptr;
04032     const char *end_ptr;
04033     text       *result;
04034     int         arg;
04035     bool        funcvariadic;
04036     int         nargs;
04037     Datum      *elements = NULL;
04038     bool       *nulls = NULL;
04039     Oid         element_type = InvalidOid;
04040     Oid         prev_type = InvalidOid;
04041     Oid         prev_width_type = InvalidOid;
04042     FmgrInfo    typoutputfinfo;
04043     FmgrInfo    typoutputinfo_width;
04044 
04045     /* When format string is null, immediately return null */
04046     if (PG_ARGISNULL(0))
04047         PG_RETURN_NULL();
04048 
04049     /* If argument is marked VARIADIC, expand array into elements */
04050     if (get_fn_expr_variadic(fcinfo->flinfo))
04051     {
04052         Oid         arr_typid;
04053         ArrayType  *arr;
04054         int16       elmlen;
04055         bool        elmbyval;
04056         char        elmalign;
04057         int         nitems;
04058 
04059         /* Should have just the one argument */
04060         Assert(PG_NARGS() == 2);
04061 
04062         /* If argument is NULL, we treat it as zero-length array */
04063         if (PG_ARGISNULL(1))
04064             nitems = 0;
04065         else
04066         {
04067             /*
04068              * Non-null argument had better be an array.  The parser doesn't
04069              * enforce this for VARIADIC ANY functions (maybe it should?), so
04070              * that check uses ereport not just elog.
04071              */
04072             arr_typid = get_fn_expr_argtype(fcinfo->flinfo, 1);
04073             if (!OidIsValid(arr_typid))
04074                 elog(ERROR, "could not determine data type of format() input");
04075 
04076             if (!OidIsValid(get_element_type(arr_typid)))
04077                 ereport(ERROR,
04078                         (errcode(ERRCODE_DATATYPE_MISMATCH),
04079                          errmsg("VARIADIC argument must be an array")));
04080 
04081             /* OK, safe to fetch the array value */
04082             arr = PG_GETARG_ARRAYTYPE_P(1);
04083 
04084             /* Get info about array element type */
04085             element_type = ARR_ELEMTYPE(arr);
04086             get_typlenbyvalalign(element_type,
04087                                  &elmlen, &elmbyval, &elmalign);
04088 
04089             /* Extract all array elements */
04090             deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
04091                               &elements, &nulls, &nitems);
04092         }
04093 
04094         nargs = nitems + 1;
04095         funcvariadic = true;
04096     }
04097     else
04098     {
04099         /* Non-variadic case, we'll process the arguments individually */
04100         nargs = PG_NARGS();
04101         funcvariadic = false;
04102     }
04103 
04104     /* Setup for main loop. */
04105     fmt = PG_GETARG_TEXT_PP(0);
04106     start_ptr = VARDATA_ANY(fmt);
04107     end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
04108     initStringInfo(&str);
04109     arg = 1;                    /* next argument position to print */
04110 
04111     /* Scan format string, looking for conversion specifiers. */
04112     for (cp = start_ptr; cp < end_ptr; cp++)
04113     {
04114         int         argpos;
04115         int         widthpos;
04116         int         flags;
04117         int         width;
04118         Datum       value;
04119         bool        isNull;
04120         Oid         typid;
04121 
04122         /*
04123          * If it's not the start of a conversion specifier, just copy it to
04124          * the output buffer.
04125          */
04126         if (*cp != '%')
04127         {
04128             appendStringInfoCharMacro(&str, *cp);
04129             continue;
04130         }
04131 
04132         ADVANCE_PARSE_POINTER(cp, end_ptr);
04133 
04134         /* Easy case: %% outputs a single % */
04135         if (*cp == '%')
04136         {
04137             appendStringInfoCharMacro(&str, *cp);
04138             continue;
04139         }
04140 
04141         /* Parse the optional portions of the format specifier */
04142         cp = text_format_parse_format(cp, end_ptr,
04143                                       &argpos, &widthpos,
04144                                       &flags, &width);
04145 
04146         /*
04147          * Next we should see the main conversion specifier.  Whether or not
04148          * an argument position was present, it's known that at least one
04149          * character remains in the string at this point.  Experience suggests
04150          * that it's worth checking that that character is one of the expected
04151          * ones before we try to fetch arguments, so as to produce the least
04152          * confusing response to a mis-formatted specifier.
04153          */
04154         if (strchr("sIL", *cp) == NULL)
04155             ereport(ERROR,
04156                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04157                      errmsg("unrecognized conversion type specifier \"%c\"",
04158                             *cp)));
04159 
04160         /* If indirect width was specified, get its value */
04161         if (widthpos >= 0)
04162         {
04163             /* Collect the specified or next argument position */
04164             if (widthpos > 0)
04165                 arg = widthpos;
04166             if (arg >= nargs)
04167                 ereport(ERROR,
04168                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04169                          errmsg("too few arguments for format")));
04170 
04171             /* Get the value and type of the selected argument */
04172             if (!funcvariadic)
04173             {
04174                 value = PG_GETARG_DATUM(arg);
04175                 isNull = PG_ARGISNULL(arg);
04176                 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
04177             }
04178             else
04179             {
04180                 value = elements[arg - 1];
04181                 isNull = nulls[arg - 1];
04182                 typid = element_type;
04183             }
04184             if (!OidIsValid(typid))
04185                 elog(ERROR, "could not determine data type of format() input");
04186 
04187             arg++;
04188 
04189             /* We can treat NULL width the same as zero */
04190             if (isNull)
04191                 width = 0;
04192             else if (typid == INT4OID)
04193                 width = DatumGetInt32(value);
04194             else if (typid == INT2OID)
04195                 width = DatumGetInt16(value);
04196             else
04197             {
04198                 /* For less-usual datatypes, convert to text then to int */
04199                 char       *str;
04200 
04201                 if (typid != prev_width_type)
04202                 {
04203                     Oid         typoutputfunc;
04204                     bool        typIsVarlena;
04205 
04206                     getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
04207                     fmgr_info(typoutputfunc, &typoutputinfo_width);
04208                     prev_width_type = typid;
04209                 }
04210 
04211                 str = OutputFunctionCall(&typoutputinfo_width, value);
04212 
04213                 /* pg_atoi will complain about bad data or overflow */
04214                 width = pg_atoi(str, sizeof(int), '\0');
04215 
04216                 pfree(str);
04217             }
04218         }
04219 
04220         /* Collect the specified or next argument position */
04221         if (argpos > 0)
04222             arg = argpos;
04223         if (arg >= nargs)
04224             ereport(ERROR,
04225                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04226                      errmsg("too few arguments for format")));
04227 
04228         /* Get the value and type of the selected argument */
04229         if (!funcvariadic)
04230         {
04231             value = PG_GETARG_DATUM(arg);
04232             isNull = PG_ARGISNULL(arg);
04233             typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
04234         }
04235         else
04236         {
04237             value = elements[arg - 1];
04238             isNull = nulls[arg - 1];
04239             typid = element_type;
04240         }
04241         if (!OidIsValid(typid))
04242             elog(ERROR, "could not determine data type of format() input");
04243 
04244         arg++;
04245 
04246         /*
04247          * Get the appropriate typOutput function, reusing previous one if
04248          * same type as previous argument.  That's particularly useful in the
04249          * variadic-array case, but often saves work even for ordinary calls.
04250          */
04251         if (typid != prev_type)
04252         {
04253             Oid         typoutputfunc;
04254             bool        typIsVarlena;
04255 
04256             getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
04257             fmgr_info(typoutputfunc, &typoutputfinfo);
04258             prev_type = typid;
04259         }
04260 
04261         /*
04262          * And now we can format the value.
04263          */
04264         switch (*cp)
04265         {
04266             case 's':
04267             case 'I':
04268             case 'L':
04269                 text_format_string_conversion(&str, *cp, &typoutputfinfo,
04270                                               value, isNull,
04271                                               flags, width);
04272                 break;
04273             default:
04274                 /* should not get here, because of previous check */
04275                 ereport(ERROR,
04276                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04277                          errmsg("unrecognized conversion type specifier \"%c\"",
04278                                 *cp)));
04279                 break;
04280         }
04281     }
04282 
04283     /* Don't need deconstruct_array results anymore. */
04284     if (elements != NULL)
04285         pfree(elements);
04286     if (nulls != NULL)
04287         pfree(nulls);
04288 
04289     /* Generate results. */
04290     result = cstring_to_text_with_len(str.data, str.len);
04291     pfree(str.data);
04292 
04293     PG_RETURN_TEXT_P(result);
04294 }
04295 
04296 /*
04297  * Parse contiguous digits as a decimal number.
04298  *
04299  * Returns true if some digits could be parsed.
04300  * The value is returned into *value, and *ptr is advanced to the next
04301  * character to be parsed.
04302  *
04303  * Note parsing invariant: at least one character is known available before
04304  * string end (end_ptr) at entry, and this is still true at exit.
04305  */
04306 static bool
04307 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
04308 {
04309     bool        found = false;
04310     const char *cp = *ptr;
04311     int         val = 0;
04312 
04313     while (*cp >= '0' && *cp <= '9')
04314     {
04315         int         newval = val * 10 + (*cp - '0');
04316 
04317         if (newval / 10 != val) /* overflow? */
04318             ereport(ERROR,
04319                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
04320                      errmsg("number is out of range")));
04321         val = newval;
04322         ADVANCE_PARSE_POINTER(cp, end_ptr);
04323         found = true;
04324     }
04325 
04326     *ptr = cp;
04327     *value = val;
04328 
04329     return found;
04330 }
04331 
04332 /*
04333  * Parse a format specifier (generally following the SUS printf spec).
04334  *
04335  * We have already advanced over the initial '%', and we are looking for
04336  * [argpos][flags][width]type (but the type character is not consumed here).
04337  *
04338  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
04339  * Output parameters:
04340  *  argpos: argument position for value to be printed.  -1 means unspecified.
04341  *  widthpos: argument position for width.  Zero means the argument position
04342  *          was unspecified (ie, take the next arg) and -1 means no width
04343  *          argument (width was omitted or specified as a constant).
04344  *  flags: bitmask of flags.
04345  *  width: directly-specified width value.  Zero means the width was omitted
04346  *          (note it's not necessary to distinguish this case from an explicit
04347  *          zero width value).
04348  *
04349  * The function result is the next character position to be parsed, ie, the
04350  * location where the type character is/should be.
04351  *
04352  * Note parsing invariant: at least one character is known available before
04353  * string end (end_ptr) at entry, and this is still true at exit.
04354  */
04355 static const char *
04356 text_format_parse_format(const char *start_ptr, const char *end_ptr,
04357                          int *argpos, int *widthpos,
04358                          int *flags, int *width)
04359 {
04360     const char *cp = start_ptr;
04361     int         n;
04362 
04363     /* set defaults for output parameters */
04364     *argpos = -1;
04365     *widthpos = -1;
04366     *flags = 0;
04367     *width = 0;
04368 
04369     /* try to identify first number */
04370     if (text_format_parse_digits(&cp, end_ptr, &n))
04371     {
04372         if (*cp != '$')
04373         {
04374             /* Must be just a width and a type, so we're done */
04375             *width = n;
04376             return cp;
04377         }
04378         /* The number was argument position */
04379         *argpos = n;
04380         /* Explicit 0 for argument index is immediately refused */
04381         if (n == 0)
04382             ereport(ERROR,
04383                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04384                      errmsg("format specifies argument 0, but arguments are numbered from 1")));
04385         ADVANCE_PARSE_POINTER(cp, end_ptr);
04386     }
04387 
04388     /* Handle flags (only minus is supported now) */
04389     while (*cp == '-')
04390     {
04391         *flags |= TEXT_FORMAT_FLAG_MINUS;
04392         ADVANCE_PARSE_POINTER(cp, end_ptr);
04393     }
04394 
04395     if (*cp == '*')
04396     {
04397         /* Handle indirect width */
04398         ADVANCE_PARSE_POINTER(cp, end_ptr);
04399         if (text_format_parse_digits(&cp, end_ptr, &n))
04400         {
04401             /* number in this position must be closed by $ */
04402             if (*cp != '$')
04403                 ereport(ERROR,
04404                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04405                   errmsg("width argument position must be ended by \"$\"")));
04406             /* The number was width argument position */
04407             *widthpos = n;
04408             /* Explicit 0 for argument index is immediately refused */
04409             if (n == 0)
04410                 ereport(ERROR,
04411                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04412                          errmsg("format specifies argument 0, but arguments are numbered from 1")));
04413             ADVANCE_PARSE_POINTER(cp, end_ptr);
04414         }
04415         else
04416             *widthpos = 0;      /* width's argument position is unspecified */
04417     }
04418     else
04419     {
04420         /* Check for direct width specification */
04421         if (text_format_parse_digits(&cp, end_ptr, &n))
04422             *width = n;
04423     }
04424 
04425     /* cp should now be pointing at type character */
04426     return cp;
04427 }
04428 
04429 /*
04430  * Format a %s, %I, or %L conversion
04431  */
04432 static void
04433 text_format_string_conversion(StringInfo buf, char conversion,
04434                               FmgrInfo *typOutputInfo,
04435                               Datum value, bool isNull,
04436                               int flags, int width)
04437 {
04438     char       *str;
04439 
04440     /* Handle NULL arguments before trying to stringify the value. */
04441     if (isNull)
04442     {
04443         if (conversion == 's')
04444             text_format_append_string(buf, "", flags, width);
04445         else if (conversion == 'L')
04446             text_format_append_string(buf, "NULL", flags, width);
04447         else if (conversion == 'I')
04448             ereport(ERROR,
04449                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
04450             errmsg("null values cannot be formatted as an SQL identifier")));
04451         return;
04452     }
04453 
04454     /* Stringify. */
04455     str = OutputFunctionCall(typOutputInfo, value);
04456 
04457     /* Escape. */
04458     if (conversion == 'I')
04459     {
04460         /* quote_identifier may or may not allocate a new string. */
04461         text_format_append_string(buf, quote_identifier(str), flags, width);
04462     }
04463     else if (conversion == 'L')
04464     {
04465         char       *qstr = quote_literal_cstr(str);
04466 
04467         text_format_append_string(buf, qstr, flags, width);
04468         /* quote_literal_cstr() always allocates a new string */
04469         pfree(qstr);
04470     }
04471     else
04472         text_format_append_string(buf, str, flags, width);
04473 
04474     /* Cleanup. */
04475     pfree(str);
04476 }
04477 
04478 /*
04479  * Append str to buf, padding as directed by flags/width
04480  */
04481 static void
04482 text_format_append_string(StringInfo buf, const char *str,
04483                           int flags, int width)
04484 {
04485     bool        align_to_left = false;
04486     int         len;
04487 
04488     /* fast path for typical easy case */
04489     if (width == 0)
04490     {
04491         appendStringInfoString(buf, str);
04492         return;
04493     }
04494 
04495     if (width < 0)
04496     {
04497         /* Negative width: implicit '-' flag, then take absolute value */
04498         align_to_left = true;
04499         /* -INT_MIN is undefined */
04500         if (width <= INT_MIN)
04501             ereport(ERROR,
04502                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
04503                      errmsg("number is out of range")));
04504         width = -width;
04505     }
04506     else if (flags & TEXT_FORMAT_FLAG_MINUS)
04507         align_to_left = true;
04508 
04509     len = pg_mbstrlen(str);
04510     if (align_to_left)
04511     {
04512         /* left justify */
04513         appendStringInfoString(buf, str);
04514         if (len < width)
04515             appendStringInfoSpaces(buf, width - len);
04516     }
04517     else
04518     {
04519         /* right justify */
04520         if (len < width)
04521             appendStringInfoSpaces(buf, width - len);
04522         appendStringInfoString(buf, str);
04523     }
04524 }
04525 
04526 /*
04527  * text_format_nv - nonvariadic wrapper for text_format function.
04528  *
04529  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
04530  * which checks that all built-in functions that share the implementing C
04531  * function take the same number of arguments.
04532  */
04533 Datum
04534 text_format_nv(PG_FUNCTION_ARGS)
04535 {
04536     return text_format(fcinfo);
04537 }
Header And Logo

varlena.c