Header And Logo

PostgreSQL
| The world's most advanced open source database.

ascii.c

Go to the documentation of this file.
00001 /*-----------------------------------------------------------------------
00002  * ascii.c
00003  *   The PostgreSQL routine for string to ascii conversion.
00004  *
00005  *   Portions Copyright (c) 1999-2013, PostgreSQL Global Development Group
00006  *
00007  * IDENTIFICATION
00008  *    src/backend/utils/adt/ascii.c
00009  *
00010  *-----------------------------------------------------------------------
00011  */
00012 #include "postgres.h"
00013 
00014 #include "mb/pg_wchar.h"
00015 #include "utils/ascii.h"
00016 
00017 static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
00018             unsigned char *dest, int enc);
00019 static text *encode_to_ascii(text *data, int enc);
00020 
00021 
00022 /* ----------
00023  * to_ascii
00024  * ----------
00025  */
00026 static void
00027 pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
00028 {
00029     unsigned char *x;
00030     const unsigned char *ascii;
00031     int         range;
00032 
00033     /*
00034      * relevant start for an encoding
00035      */
00036 #define RANGE_128   128
00037 #define RANGE_160   160
00038 
00039     if (enc == PG_LATIN1)
00040     {
00041         /*
00042          * ISO-8859-1 <range: 160 -- 255>
00043          */
00044         ascii = (const unsigned char *) "  cL Y  \"Ca  -R     'u .,      ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
00045         range = RANGE_160;
00046     }
00047     else if (enc == PG_LATIN2)
00048     {
00049         /*
00050          * ISO-8859-2 <range: 160 -- 255>
00051          */
00052         ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
00053         range = RANGE_160;
00054     }
00055     else if (enc == PG_LATIN9)
00056     {
00057         /*
00058          * ISO-8859-15 <range: 160 -- 255>
00059          */
00060         ascii = (const unsigned char *) "  cL YS sCa  -R     Zu .z   EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
00061         range = RANGE_160;
00062     }
00063     else if (enc == PG_WIN1250)
00064     {
00065         /*
00066          * Window CP1250 <range: 128 -- 255>
00067          */
00068         ascii = (const unsigned char *) "  ' \"    %S<STZZ `'\"\".--  s>stzz   L A  \"CS  -RZ  ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
00069         range = RANGE_128;
00070     }
00071     else
00072     {
00073         ereport(ERROR,
00074                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00075                  errmsg("encoding conversion from %s to ASCII not supported",
00076                         pg_encoding_to_char(enc))));
00077         return;                 /* keep compiler quiet */
00078     }
00079 
00080     /*
00081      * Encode
00082      */
00083     for (x = src; x < src_end; x++)
00084     {
00085         if (*x < 128)
00086             *dest++ = *x;
00087         else if (*x < range)
00088             *dest++ = ' ';      /* bogus 128 to 'range' */
00089         else
00090             *dest++ = ascii[*x - range];
00091     }
00092 }
00093 
00094 /* ----------
00095  * encode text
00096  *
00097  * The text datum is overwritten in-place, therefore this coding method
00098  * cannot support conversions that change the string length!
00099  * ----------
00100  */
00101 static text *
00102 encode_to_ascii(text *data, int enc)
00103 {
00104     pg_to_ascii((unsigned char *) VARDATA(data),        /* src */
00105                 (unsigned char *) (data) + VARSIZE(data),       /* src end */
00106                 (unsigned char *) VARDATA(data),        /* dest */
00107                 enc);           /* encoding */
00108 
00109     return data;
00110 }
00111 
00112 /* ----------
00113  * convert to ASCII - enc is set as 'name' arg.
00114  * ----------
00115  */
00116 Datum
00117 to_ascii_encname(PG_FUNCTION_ARGS)
00118 {
00119     text       *data = PG_GETARG_TEXT_P_COPY(0);
00120     char       *encname = NameStr(*PG_GETARG_NAME(1));
00121     int         enc = pg_char_to_encoding(encname);
00122 
00123     if (enc < 0)
00124         ereport(ERROR,
00125                 (errcode(ERRCODE_UNDEFINED_OBJECT),
00126                  errmsg("%s is not a valid encoding name", encname)));
00127 
00128     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
00129 }
00130 
00131 /* ----------
00132  * convert to ASCII - enc is set as int4
00133  * ----------
00134  */
00135 Datum
00136 to_ascii_enc(PG_FUNCTION_ARGS)
00137 {
00138     text       *data = PG_GETARG_TEXT_P_COPY(0);
00139     int         enc = PG_GETARG_INT32(1);
00140 
00141     if (!PG_VALID_ENCODING(enc))
00142         ereport(ERROR,
00143                 (errcode(ERRCODE_UNDEFINED_OBJECT),
00144                  errmsg("%d is not a valid encoding code", enc)));
00145 
00146     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
00147 }
00148 
00149 /* ----------
00150  * convert to ASCII - current enc is DatabaseEncoding
00151  * ----------
00152  */
00153 Datum
00154 to_ascii_default(PG_FUNCTION_ARGS)
00155 {
00156     text       *data = PG_GETARG_TEXT_P_COPY(0);
00157     int         enc = GetDatabaseEncoding();
00158 
00159     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
00160 }
00161 
00162 /* ----------
00163  * Copy a string in an arbitrary backend-safe encoding, converting it to a
00164  * valid ASCII string by replacing non-ASCII bytes with '?'.  Otherwise the
00165  * behavior is identical to strlcpy(), except that we don't bother with a
00166  * return value.
00167  *
00168  * This must not trigger ereport(ERROR), as it is called in postmaster.
00169  * ----------
00170  */
00171 void
00172 ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
00173 {
00174     if (destsiz == 0)           /* corner case: no room for trailing nul */
00175         return;
00176 
00177     while (--destsiz > 0)
00178     {
00179         /* use unsigned char here to avoid compiler warning */
00180         unsigned char ch = *src++;
00181 
00182         if (ch == '\0')
00183             break;
00184         /* Keep printable ASCII characters */
00185         if (32 <= ch && ch <= 127)
00186             *dest = ch;
00187         /* White-space is also OK */
00188         else if (ch == '\n' || ch == '\r' || ch == '\t')
00189             *dest = ch;
00190         /* Everything else is replaced with '?' */
00191         else
00192             *dest = '?';
00193         dest++;
00194     }
00195 
00196     *dest = '\0';
00197 }