Header And Logo

PostgreSQL
| The world's most advanced open source database.

oracle_compat.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  * oracle_compat.c
00003  *  Oracle compatible functions.
00004  *
00005  * Copyright (c) 1996-2013, PostgreSQL Global Development Group
00006  *
00007  *  Author: Edmund Mergl <[email protected]>
00008  *  Multibyte enhancement: Tatsuo Ishii <[email protected]>
00009  *
00010  *
00011  * IDENTIFICATION
00012  *  src/backend/utils/adt/oracle_compat.c
00013  *
00014  *-------------------------------------------------------------------------
00015  */
00016 #include "postgres.h"
00017 
00018 #include "utils/builtins.h"
00019 #include "utils/formatting.h"
00020 #include "mb/pg_wchar.h"
00021 
00022 
00023 static text *dotrim(const char *string, int stringlen,
00024        const char *set, int setlen,
00025        bool doltrim, bool dortrim);
00026 
00027 
00028 /********************************************************************
00029  *
00030  * lower
00031  *
00032  * Syntax:
00033  *
00034  *   text lower(text string)
00035  *
00036  * Purpose:
00037  *
00038  *   Returns string, with all letters forced to lowercase.
00039  *
00040  ********************************************************************/
00041 
00042 Datum
00043 lower(PG_FUNCTION_ARGS)
00044 {
00045     text       *in_string = PG_GETARG_TEXT_PP(0);
00046     char       *out_string;
00047     text       *result;
00048 
00049     out_string = str_tolower(VARDATA_ANY(in_string),
00050                              VARSIZE_ANY_EXHDR(in_string),
00051                              PG_GET_COLLATION());
00052     result = cstring_to_text(out_string);
00053     pfree(out_string);
00054 
00055     PG_RETURN_TEXT_P(result);
00056 }
00057 
00058 
00059 /********************************************************************
00060  *
00061  * upper
00062  *
00063  * Syntax:
00064  *
00065  *   text upper(text string)
00066  *
00067  * Purpose:
00068  *
00069  *   Returns string, with all letters forced to uppercase.
00070  *
00071  ********************************************************************/
00072 
00073 Datum
00074 upper(PG_FUNCTION_ARGS)
00075 {
00076     text       *in_string = PG_GETARG_TEXT_PP(0);
00077     char       *out_string;
00078     text       *result;
00079 
00080     out_string = str_toupper(VARDATA_ANY(in_string),
00081                              VARSIZE_ANY_EXHDR(in_string),
00082                              PG_GET_COLLATION());
00083     result = cstring_to_text(out_string);
00084     pfree(out_string);
00085 
00086     PG_RETURN_TEXT_P(result);
00087 }
00088 
00089 
00090 /********************************************************************
00091  *
00092  * initcap
00093  *
00094  * Syntax:
00095  *
00096  *   text initcap(text string)
00097  *
00098  * Purpose:
00099  *
00100  *   Returns string, with first letter of each word in uppercase, all
00101  *   other letters in lowercase. A word is defined as a sequence of
00102  *   alphanumeric characters, delimited by non-alphanumeric
00103  *   characters.
00104  *
00105  ********************************************************************/
00106 
00107 Datum
00108 initcap(PG_FUNCTION_ARGS)
00109 {
00110     text       *in_string = PG_GETARG_TEXT_PP(0);
00111     char       *out_string;
00112     text       *result;
00113 
00114     out_string = str_initcap(VARDATA_ANY(in_string),
00115                              VARSIZE_ANY_EXHDR(in_string),
00116                              PG_GET_COLLATION());
00117     result = cstring_to_text(out_string);
00118     pfree(out_string);
00119 
00120     PG_RETURN_TEXT_P(result);
00121 }
00122 
00123 
00124 /********************************************************************
00125  *
00126  * lpad
00127  *
00128  * Syntax:
00129  *
00130  *   text lpad(text string1, int4 len, text string2)
00131  *
00132  * Purpose:
00133  *
00134  *   Returns string1, left-padded to length len with the sequence of
00135  *   characters in string2.  If len is less than the length of string1,
00136  *   instead truncate (on the right) to len.
00137  *
00138  ********************************************************************/
00139 
00140 Datum
00141 lpad(PG_FUNCTION_ARGS)
00142 {
00143     text       *string1 = PG_GETARG_TEXT_PP(0);
00144     int32       len = PG_GETARG_INT32(1);
00145     text       *string2 = PG_GETARG_TEXT_PP(2);
00146     text       *ret;
00147     char       *ptr1,
00148                *ptr2,
00149                *ptr2start,
00150                *ptr2end,
00151                *ptr_ret;
00152     int         m,
00153                 s1len,
00154                 s2len;
00155 
00156     int         bytelen;
00157 
00158     /* Negative len is silently taken as zero */
00159     if (len < 0)
00160         len = 0;
00161 
00162     s1len = VARSIZE_ANY_EXHDR(string1);
00163     if (s1len < 0)
00164         s1len = 0;              /* shouldn't happen */
00165 
00166     s2len = VARSIZE_ANY_EXHDR(string2);
00167     if (s2len < 0)
00168         s2len = 0;              /* shouldn't happen */
00169 
00170     s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
00171 
00172     if (s1len > len)
00173         s1len = len;            /* truncate string1 to len chars */
00174 
00175     if (s2len <= 0)
00176         len = s1len;            /* nothing to pad with, so don't pad */
00177 
00178     bytelen = pg_database_encoding_max_length() * len;
00179 
00180     /* check for integer overflow */
00181     if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
00182         ereport(ERROR,
00183                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00184                  errmsg("requested length too large")));
00185 
00186     ret = (text *) palloc(VARHDRSZ + bytelen);
00187 
00188     m = len - s1len;
00189 
00190     ptr2 = ptr2start = VARDATA_ANY(string2);
00191     ptr2end = ptr2 + s2len;
00192     ptr_ret = VARDATA(ret);
00193 
00194     while (m--)
00195     {
00196         int         mlen = pg_mblen(ptr2);
00197 
00198         memcpy(ptr_ret, ptr2, mlen);
00199         ptr_ret += mlen;
00200         ptr2 += mlen;
00201         if (ptr2 == ptr2end)    /* wrap around at end of s2 */
00202             ptr2 = ptr2start;
00203     }
00204 
00205     ptr1 = VARDATA_ANY(string1);
00206 
00207     while (s1len--)
00208     {
00209         int         mlen = pg_mblen(ptr1);
00210 
00211         memcpy(ptr_ret, ptr1, mlen);
00212         ptr_ret += mlen;
00213         ptr1 += mlen;
00214     }
00215 
00216     SET_VARSIZE(ret, ptr_ret - (char *) ret);
00217 
00218     PG_RETURN_TEXT_P(ret);
00219 }
00220 
00221 
00222 /********************************************************************
00223  *
00224  * rpad
00225  *
00226  * Syntax:
00227  *
00228  *   text rpad(text string1, int4 len, text string2)
00229  *
00230  * Purpose:
00231  *
00232  *   Returns string1, right-padded to length len with the sequence of
00233  *   characters in string2.  If len is less than the length of string1,
00234  *   instead truncate (on the right) to len.
00235  *
00236  ********************************************************************/
00237 
00238 Datum
00239 rpad(PG_FUNCTION_ARGS)
00240 {
00241     text       *string1 = PG_GETARG_TEXT_PP(0);
00242     int32       len = PG_GETARG_INT32(1);
00243     text       *string2 = PG_GETARG_TEXT_PP(2);
00244     text       *ret;
00245     char       *ptr1,
00246                *ptr2,
00247                *ptr2start,
00248                *ptr2end,
00249                *ptr_ret;
00250     int         m,
00251                 s1len,
00252                 s2len;
00253 
00254     int         bytelen;
00255 
00256     /* Negative len is silently taken as zero */
00257     if (len < 0)
00258         len = 0;
00259 
00260     s1len = VARSIZE_ANY_EXHDR(string1);
00261     if (s1len < 0)
00262         s1len = 0;              /* shouldn't happen */
00263 
00264     s2len = VARSIZE_ANY_EXHDR(string2);
00265     if (s2len < 0)
00266         s2len = 0;              /* shouldn't happen */
00267 
00268     s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
00269 
00270     if (s1len > len)
00271         s1len = len;            /* truncate string1 to len chars */
00272 
00273     if (s2len <= 0)
00274         len = s1len;            /* nothing to pad with, so don't pad */
00275 
00276     bytelen = pg_database_encoding_max_length() * len;
00277 
00278     /* Check for integer overflow */
00279     if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
00280         ereport(ERROR,
00281                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00282                  errmsg("requested length too large")));
00283 
00284     ret = (text *) palloc(VARHDRSZ + bytelen);
00285     m = len - s1len;
00286 
00287     ptr1 = VARDATA_ANY(string1);
00288     ptr_ret = VARDATA(ret);
00289 
00290     while (s1len--)
00291     {
00292         int         mlen = pg_mblen(ptr1);
00293 
00294         memcpy(ptr_ret, ptr1, mlen);
00295         ptr_ret += mlen;
00296         ptr1 += mlen;
00297     }
00298 
00299     ptr2 = ptr2start = VARDATA_ANY(string2);
00300     ptr2end = ptr2 + s2len;
00301 
00302     while (m--)
00303     {
00304         int         mlen = pg_mblen(ptr2);
00305 
00306         memcpy(ptr_ret, ptr2, mlen);
00307         ptr_ret += mlen;
00308         ptr2 += mlen;
00309         if (ptr2 == ptr2end)    /* wrap around at end of s2 */
00310             ptr2 = ptr2start;
00311     }
00312 
00313     SET_VARSIZE(ret, ptr_ret - (char *) ret);
00314 
00315     PG_RETURN_TEXT_P(ret);
00316 }
00317 
00318 
00319 /********************************************************************
00320  *
00321  * btrim
00322  *
00323  * Syntax:
00324  *
00325  *   text btrim(text string, text set)
00326  *
00327  * Purpose:
00328  *
00329  *   Returns string with characters removed from the front and back
00330  *   up to the first character not in set.
00331  *
00332  ********************************************************************/
00333 
00334 Datum
00335 btrim(PG_FUNCTION_ARGS)
00336 {
00337     text       *string = PG_GETARG_TEXT_PP(0);
00338     text       *set = PG_GETARG_TEXT_PP(1);
00339     text       *ret;
00340 
00341     ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
00342                  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
00343                  true, true);
00344 
00345     PG_RETURN_TEXT_P(ret);
00346 }
00347 
00348 /********************************************************************
00349  *
00350  * btrim1 --- btrim with set fixed as ' '
00351  *
00352  ********************************************************************/
00353 
00354 Datum
00355 btrim1(PG_FUNCTION_ARGS)
00356 {
00357     text       *string = PG_GETARG_TEXT_PP(0);
00358     text       *ret;
00359 
00360     ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
00361                  " ", 1,
00362                  true, true);
00363 
00364     PG_RETURN_TEXT_P(ret);
00365 }
00366 
00367 /*
00368  * Common implementation for btrim, ltrim, rtrim
00369  */
00370 static text *
00371 dotrim(const char *string, int stringlen,
00372        const char *set, int setlen,
00373        bool doltrim, bool dortrim)
00374 {
00375     int         i;
00376 
00377     /* Nothing to do if either string or set is empty */
00378     if (stringlen > 0 && setlen > 0)
00379     {
00380         if (pg_database_encoding_max_length() > 1)
00381         {
00382             /*
00383              * In the multibyte-encoding case, build arrays of pointers to
00384              * character starts, so that we can avoid inefficient checks in
00385              * the inner loops.
00386              */
00387             const char **stringchars;
00388             const char **setchars;
00389             int        *stringmblen;
00390             int        *setmblen;
00391             int         stringnchars;
00392             int         setnchars;
00393             int         resultndx;
00394             int         resultnchars;
00395             const char *p;
00396             int         len;
00397             int         mblen;
00398             const char *str_pos;
00399             int         str_len;
00400 
00401             stringchars = (const char **) palloc(stringlen * sizeof(char *));
00402             stringmblen = (int *) palloc(stringlen * sizeof(int));
00403             stringnchars = 0;
00404             p = string;
00405             len = stringlen;
00406             while (len > 0)
00407             {
00408                 stringchars[stringnchars] = p;
00409                 stringmblen[stringnchars] = mblen = pg_mblen(p);
00410                 stringnchars++;
00411                 p += mblen;
00412                 len -= mblen;
00413             }
00414 
00415             setchars = (const char **) palloc(setlen * sizeof(char *));
00416             setmblen = (int *) palloc(setlen * sizeof(int));
00417             setnchars = 0;
00418             p = set;
00419             len = setlen;
00420             while (len > 0)
00421             {
00422                 setchars[setnchars] = p;
00423                 setmblen[setnchars] = mblen = pg_mblen(p);
00424                 setnchars++;
00425                 p += mblen;
00426                 len -= mblen;
00427             }
00428 
00429             resultndx = 0;      /* index in stringchars[] */
00430             resultnchars = stringnchars;
00431 
00432             if (doltrim)
00433             {
00434                 while (resultnchars > 0)
00435                 {
00436                     str_pos = stringchars[resultndx];
00437                     str_len = stringmblen[resultndx];
00438                     for (i = 0; i < setnchars; i++)
00439                     {
00440                         if (str_len == setmblen[i] &&
00441                             memcmp(str_pos, setchars[i], str_len) == 0)
00442                             break;
00443                     }
00444                     if (i >= setnchars)
00445                         break;  /* no match here */
00446                     string += str_len;
00447                     stringlen -= str_len;
00448                     resultndx++;
00449                     resultnchars--;
00450                 }
00451             }
00452 
00453             if (dortrim)
00454             {
00455                 while (resultnchars > 0)
00456                 {
00457                     str_pos = stringchars[resultndx + resultnchars - 1];
00458                     str_len = stringmblen[resultndx + resultnchars - 1];
00459                     for (i = 0; i < setnchars; i++)
00460                     {
00461                         if (str_len == setmblen[i] &&
00462                             memcmp(str_pos, setchars[i], str_len) == 0)
00463                             break;
00464                     }
00465                     if (i >= setnchars)
00466                         break;  /* no match here */
00467                     stringlen -= str_len;
00468                     resultnchars--;
00469                 }
00470             }
00471 
00472             pfree(stringchars);
00473             pfree(stringmblen);
00474             pfree(setchars);
00475             pfree(setmblen);
00476         }
00477         else
00478         {
00479             /*
00480              * In the single-byte-encoding case, we don't need such overhead.
00481              */
00482             if (doltrim)
00483             {
00484                 while (stringlen > 0)
00485                 {
00486                     char        str_ch = *string;
00487 
00488                     for (i = 0; i < setlen; i++)
00489                     {
00490                         if (str_ch == set[i])
00491                             break;
00492                     }
00493                     if (i >= setlen)
00494                         break;  /* no match here */
00495                     string++;
00496                     stringlen--;
00497                 }
00498             }
00499 
00500             if (dortrim)
00501             {
00502                 while (stringlen > 0)
00503                 {
00504                     char        str_ch = string[stringlen - 1];
00505 
00506                     for (i = 0; i < setlen; i++)
00507                     {
00508                         if (str_ch == set[i])
00509                             break;
00510                     }
00511                     if (i >= setlen)
00512                         break;  /* no match here */
00513                     stringlen--;
00514                 }
00515             }
00516         }
00517     }
00518 
00519     /* Return selected portion of string */
00520     return cstring_to_text_with_len(string, stringlen);
00521 }
00522 
00523 /********************************************************************
00524  *
00525  * byteatrim
00526  *
00527  * Syntax:
00528  *
00529  *   bytea byteatrim(byta string, bytea set)
00530  *
00531  * Purpose:
00532  *
00533  *   Returns string with characters removed from the front and back
00534  *   up to the first character not in set.
00535  *
00536  * Cloned from btrim and modified as required.
00537  ********************************************************************/
00538 
00539 Datum
00540 byteatrim(PG_FUNCTION_ARGS)
00541 {
00542     bytea      *string = PG_GETARG_BYTEA_PP(0);
00543     bytea      *set = PG_GETARG_BYTEA_PP(1);
00544     bytea      *ret;
00545     char       *ptr,
00546                *end,
00547                *ptr2,
00548                *ptr2start,
00549                *end2;
00550     int         m,
00551                 stringlen,
00552                 setlen;
00553 
00554     stringlen = VARSIZE_ANY_EXHDR(string);
00555     setlen = VARSIZE_ANY_EXHDR(set);
00556 
00557     if (stringlen <= 0 || setlen <= 0)
00558         PG_RETURN_BYTEA_P(string);
00559 
00560     m = stringlen;
00561     ptr = VARDATA_ANY(string);
00562     end = ptr + stringlen - 1;
00563     ptr2start = VARDATA_ANY(set);
00564     end2 = ptr2start + setlen - 1;
00565 
00566     while (m > 0)
00567     {
00568         ptr2 = ptr2start;
00569         while (ptr2 <= end2)
00570         {
00571             if (*ptr == *ptr2)
00572                 break;
00573             ++ptr2;
00574         }
00575         if (ptr2 > end2)
00576             break;
00577         ptr++;
00578         m--;
00579     }
00580 
00581     while (m > 0)
00582     {
00583         ptr2 = ptr2start;
00584         while (ptr2 <= end2)
00585         {
00586             if (*end == *ptr2)
00587                 break;
00588             ++ptr2;
00589         }
00590         if (ptr2 > end2)
00591             break;
00592         end--;
00593         m--;
00594     }
00595 
00596     ret = (bytea *) palloc(VARHDRSZ + m);
00597     SET_VARSIZE(ret, VARHDRSZ + m);
00598     memcpy(VARDATA(ret), ptr, m);
00599 
00600     PG_RETURN_BYTEA_P(ret);
00601 }
00602 
00603 /********************************************************************
00604  *
00605  * ltrim
00606  *
00607  * Syntax:
00608  *
00609  *   text ltrim(text string, text set)
00610  *
00611  * Purpose:
00612  *
00613  *   Returns string with initial characters removed up to the first
00614  *   character not in set.
00615  *
00616  ********************************************************************/
00617 
00618 Datum
00619 ltrim(PG_FUNCTION_ARGS)
00620 {
00621     text       *string = PG_GETARG_TEXT_PP(0);
00622     text       *set = PG_GETARG_TEXT_PP(1);
00623     text       *ret;
00624 
00625     ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
00626                  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
00627                  true, false);
00628 
00629     PG_RETURN_TEXT_P(ret);
00630 }
00631 
00632 /********************************************************************
00633  *
00634  * ltrim1 --- ltrim with set fixed as ' '
00635  *
00636  ********************************************************************/
00637 
00638 Datum
00639 ltrim1(PG_FUNCTION_ARGS)
00640 {
00641     text       *string = PG_GETARG_TEXT_PP(0);
00642     text       *ret;
00643 
00644     ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
00645                  " ", 1,
00646                  true, false);
00647 
00648     PG_RETURN_TEXT_P(ret);
00649 }
00650 
00651 /********************************************************************
00652  *
00653  * rtrim
00654  *
00655  * Syntax:
00656  *
00657  *   text rtrim(text string, text set)
00658  *
00659  * Purpose:
00660  *
00661  *   Returns string with final characters removed after the last
00662  *   character not in set.
00663  *
00664  ********************************************************************/
00665 
00666 Datum
00667 rtrim(PG_FUNCTION_ARGS)
00668 {
00669     text       *string = PG_GETARG_TEXT_PP(0);
00670     text       *set = PG_GETARG_TEXT_PP(1);
00671     text       *ret;
00672 
00673     ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
00674                  VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
00675                  false, true);
00676 
00677     PG_RETURN_TEXT_P(ret);
00678 }
00679 
00680 /********************************************************************
00681  *
00682  * rtrim1 --- rtrim with set fixed as ' '
00683  *
00684  ********************************************************************/
00685 
00686 Datum
00687 rtrim1(PG_FUNCTION_ARGS)
00688 {
00689     text       *string = PG_GETARG_TEXT_PP(0);
00690     text       *ret;
00691 
00692     ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
00693                  " ", 1,
00694                  false, true);
00695 
00696     PG_RETURN_TEXT_P(ret);
00697 }
00698 
00699 
00700 /********************************************************************
00701  *
00702  * translate
00703  *
00704  * Syntax:
00705  *
00706  *   text translate(text string, text from, text to)
00707  *
00708  * Purpose:
00709  *
00710  *   Returns string after replacing all occurrences of characters in from
00711  *   with the corresponding character in to.  If from is longer than to,
00712  *   occurrences of the extra characters in from are deleted.
00713  *   Improved by Edwin Ramirez <[email protected]>.
00714  *
00715  ********************************************************************/
00716 
00717 Datum
00718 translate(PG_FUNCTION_ARGS)
00719 {
00720     text       *string = PG_GETARG_TEXT_PP(0);
00721     text       *from = PG_GETARG_TEXT_PP(1);
00722     text       *to = PG_GETARG_TEXT_PP(2);
00723     text       *result;
00724     char       *from_ptr,
00725                *to_ptr;
00726     char       *source,
00727                *target;
00728     int         m,
00729                 fromlen,
00730                 tolen,
00731                 retlen,
00732                 i;
00733     int         worst_len;
00734     int         len;
00735     int         source_len;
00736     int         from_index;
00737 
00738     m = VARSIZE_ANY_EXHDR(string);
00739     if (m <= 0)
00740         PG_RETURN_TEXT_P(string);
00741     source = VARDATA_ANY(string);
00742 
00743     fromlen = VARSIZE_ANY_EXHDR(from);
00744     from_ptr = VARDATA_ANY(from);
00745     tolen = VARSIZE_ANY_EXHDR(to);
00746     to_ptr = VARDATA_ANY(to);
00747 
00748     /*
00749      * The worst-case expansion is to substitute a max-length character for a
00750      * single-byte character at each position of the string.
00751      */
00752     worst_len = pg_database_encoding_max_length() * m;
00753 
00754     /* check for integer overflow */
00755     if (worst_len / pg_database_encoding_max_length() != m)
00756         ereport(ERROR,
00757                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00758                  errmsg("requested length too large")));
00759 
00760     result = (text *) palloc(worst_len + VARHDRSZ);
00761     target = VARDATA(result);
00762     retlen = 0;
00763 
00764     while (m > 0)
00765     {
00766         source_len = pg_mblen(source);
00767         from_index = 0;
00768 
00769         for (i = 0; i < fromlen; i += len)
00770         {
00771             len = pg_mblen(&from_ptr[i]);
00772             if (len == source_len &&
00773                 memcmp(source, &from_ptr[i], len) == 0)
00774                 break;
00775 
00776             from_index++;
00777         }
00778         if (i < fromlen)
00779         {
00780             /* substitute */
00781             char       *p = to_ptr;
00782 
00783             for (i = 0; i < from_index; i++)
00784             {
00785                 p += pg_mblen(p);
00786                 if (p >= (to_ptr + tolen))
00787                     break;
00788             }
00789             if (p < (to_ptr + tolen))
00790             {
00791                 len = pg_mblen(p);
00792                 memcpy(target, p, len);
00793                 target += len;
00794                 retlen += len;
00795             }
00796 
00797         }
00798         else
00799         {
00800             /* no match, so copy */
00801             memcpy(target, source, source_len);
00802             target += source_len;
00803             retlen += source_len;
00804         }
00805 
00806         source += source_len;
00807         m -= source_len;
00808     }
00809 
00810     SET_VARSIZE(result, retlen + VARHDRSZ);
00811 
00812     /*
00813      * The function result is probably much bigger than needed, if we're using
00814      * a multibyte encoding, but it's not worth reallocating it; the result
00815      * probably won't live long anyway.
00816      */
00817 
00818     PG_RETURN_TEXT_P(result);
00819 }
00820 
00821 /********************************************************************
00822  *
00823  * ascii
00824  *
00825  * Syntax:
00826  *
00827  *   int ascii(text string)
00828  *
00829  * Purpose:
00830  *
00831  *   Returns the decimal representation of the first character from
00832  *   string.
00833  *   If the string is empty we return 0.
00834  *   If the database encoding is UTF8, we return the Unicode codepoint.
00835  *   If the database encoding is any other multi-byte encoding, we
00836  *   return the value of the first byte if it is an ASCII character
00837  *   (range 1 .. 127), or raise an error.
00838  *   For all other encodings we return the value of the first byte,
00839  *   (range 1..255).
00840  *
00841  ********************************************************************/
00842 
00843 Datum
00844 ascii(PG_FUNCTION_ARGS)
00845 {
00846     text       *string = PG_GETARG_TEXT_PP(0);
00847     int         encoding = GetDatabaseEncoding();
00848     unsigned char *data;
00849 
00850     if (VARSIZE_ANY_EXHDR(string) <= 0)
00851         PG_RETURN_INT32(0);
00852 
00853     data = (unsigned char *) VARDATA_ANY(string);
00854 
00855     if (encoding == PG_UTF8 && *data > 127)
00856     {
00857         /* return the code point for Unicode */
00858 
00859         int         result = 0,
00860                     tbytes = 0,
00861                     i;
00862 
00863         if (*data >= 0xF0)
00864         {
00865             result = *data & 0x07;
00866             tbytes = 3;
00867         }
00868         else if (*data >= 0xE0)
00869         {
00870             result = *data & 0x0F;
00871             tbytes = 2;
00872         }
00873         else
00874         {
00875             Assert(*data > 0xC0);
00876             result = *data & 0x1f;
00877             tbytes = 1;
00878         }
00879 
00880         Assert(tbytes > 0);
00881 
00882         for (i = 1; i <= tbytes; i++)
00883         {
00884             Assert((data[i] & 0xC0) == 0x80);
00885             result = (result << 6) + (data[i] & 0x3f);
00886         }
00887 
00888         PG_RETURN_INT32(result);
00889     }
00890     else
00891     {
00892         if (pg_encoding_max_length(encoding) > 1 && *data > 127)
00893             ereport(ERROR,
00894                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00895                      errmsg("requested character too large")));
00896 
00897 
00898         PG_RETURN_INT32((int32) *data);
00899     }
00900 }
00901 
00902 /********************************************************************
00903  *
00904  * chr
00905  *
00906  * Syntax:
00907  *
00908  *   text chr(int val)
00909  *
00910  * Purpose:
00911  *
00912  *  Returns the character having the binary equivalent to val.
00913  *
00914  * For UTF8 we treat the argumwent as a Unicode code point.
00915  * For other multi-byte encodings we raise an error for arguments
00916  * outside the strict ASCII range (1..127).
00917  *
00918  * It's important that we don't ever return a value that is not valid
00919  * in the database encoding, so that this doesn't become a way for
00920  * invalid data to enter the database.
00921  *
00922  ********************************************************************/
00923 
00924 Datum
00925 chr         (PG_FUNCTION_ARGS)
00926 {
00927     uint32      cvalue = PG_GETARG_UINT32(0);
00928     text       *result;
00929     int         encoding = GetDatabaseEncoding();
00930 
00931     if (encoding == PG_UTF8 && cvalue > 127)
00932     {
00933         /* for Unicode we treat the argument as a code point */
00934         int         bytes;
00935         char       *wch;
00936 
00937         /* We only allow valid Unicode code points */
00938         if (cvalue > 0x001fffff)
00939             ereport(ERROR,
00940                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00941                      errmsg("requested character too large for encoding: %d",
00942                             cvalue)));
00943 
00944         if (cvalue > 0xffff)
00945             bytes = 4;
00946         else if (cvalue > 0x07ff)
00947             bytes = 3;
00948         else
00949             bytes = 2;
00950 
00951         result = (text *) palloc(VARHDRSZ + bytes);
00952         SET_VARSIZE(result, VARHDRSZ + bytes);
00953         wch = VARDATA(result);
00954 
00955         if (bytes == 2)
00956         {
00957             wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
00958             wch[1] = 0x80 | (cvalue & 0x3F);;
00959         }
00960         else if (bytes == 3)
00961         {
00962             wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
00963             wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
00964             wch[2] = 0x80 | (cvalue & 0x3F);
00965         }
00966         else
00967         {
00968             wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
00969             wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
00970             wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
00971             wch[3] = 0x80 | (cvalue & 0x3F);
00972         }
00973 
00974     }
00975 
00976     else
00977     {
00978         bool        is_mb;
00979 
00980         /*
00981          * Error out on arguments that make no sense or that we can't validly
00982          * represent in the encoding.
00983          */
00984 
00985         if (cvalue == 0)
00986             ereport(ERROR,
00987                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00988                      errmsg("null character not permitted")));
00989 
00990         is_mb = pg_encoding_max_length(encoding) > 1;
00991 
00992         if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
00993             ereport(ERROR,
00994                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00995                      errmsg("requested character too large for encoding: %d",
00996                             cvalue)));
00997 
00998 
00999         result = (text *) palloc(VARHDRSZ + 1);
01000         SET_VARSIZE(result, VARHDRSZ + 1);
01001         *VARDATA(result) = (char) cvalue;
01002     }
01003 
01004     PG_RETURN_TEXT_P(result);
01005 }
01006 
01007 /********************************************************************
01008  *
01009  * repeat
01010  *
01011  * Syntax:
01012  *
01013  *   text repeat(text string, int val)
01014  *
01015  * Purpose:
01016  *
01017  *  Repeat string by val.
01018  *
01019  ********************************************************************/
01020 
01021 Datum
01022 repeat(PG_FUNCTION_ARGS)
01023 {
01024     text       *string = PG_GETARG_TEXT_PP(0);
01025     int32       count = PG_GETARG_INT32(1);
01026     text       *result;
01027     int         slen,
01028                 tlen;
01029     int         i;
01030     char       *cp,
01031                *sp;
01032 
01033     if (count < 0)
01034         count = 0;
01035 
01036     slen = VARSIZE_ANY_EXHDR(string);
01037     tlen = VARHDRSZ + (count * slen);
01038 
01039     /* Check for integer overflow */
01040     if (slen != 0 && count != 0)
01041     {
01042         int         check = count * slen;
01043         int         check2 = check + VARHDRSZ;
01044 
01045         if ((check / slen) != count || check2 <= check)
01046             ereport(ERROR,
01047                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
01048                      errmsg("requested length too large")));
01049     }
01050 
01051     result = (text *) palloc(tlen);
01052 
01053     SET_VARSIZE(result, tlen);
01054     cp = VARDATA(result);
01055     sp = VARDATA_ANY(string);
01056     for (i = 0; i < count; i++)
01057     {
01058         memcpy(cp, sp, slen);
01059         cp += slen;
01060     }
01061 
01062     PG_RETURN_TEXT_P(result);
01063 }