00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "postgres.h"
00015
00016 #include <ctype.h>
00017
00018 #include "utils/builtins.h"
00019
00020
00021 struct pg_encoding
00022 {
00023 unsigned (*encode_len) (const char *data, unsigned dlen);
00024 unsigned (*decode_len) (const char *data, unsigned dlen);
00025 unsigned (*encode) (const char *data, unsigned dlen, char *res);
00026 unsigned (*decode) (const char *data, unsigned dlen, char *res);
00027 };
00028
00029 static const struct pg_encoding *pg_find_encoding(const char *name);
00030
00031
00032
00033
00034
00035 Datum
00036 binary_encode(PG_FUNCTION_ARGS)
00037 {
00038 bytea *data = PG_GETARG_BYTEA_P(0);
00039 Datum name = PG_GETARG_DATUM(1);
00040 text *result;
00041 char *namebuf;
00042 int datalen,
00043 resultlen,
00044 res;
00045 const struct pg_encoding *enc;
00046
00047 datalen = VARSIZE(data) - VARHDRSZ;
00048
00049 namebuf = TextDatumGetCString(name);
00050
00051 enc = pg_find_encoding(namebuf);
00052 if (enc == NULL)
00053 ereport(ERROR,
00054 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00055 errmsg("unrecognized encoding: \"%s\"", namebuf)));
00056
00057 resultlen = enc->encode_len(VARDATA(data), datalen);
00058 result = palloc(VARHDRSZ + resultlen);
00059
00060 res = enc->encode(VARDATA(data), datalen, VARDATA(result));
00061
00062
00063 if (res > resultlen)
00064 elog(FATAL, "overflow - encode estimate too small");
00065
00066 SET_VARSIZE(result, VARHDRSZ + res);
00067
00068 PG_RETURN_TEXT_P(result);
00069 }
00070
00071 Datum
00072 binary_decode(PG_FUNCTION_ARGS)
00073 {
00074 text *data = PG_GETARG_TEXT_P(0);
00075 Datum name = PG_GETARG_DATUM(1);
00076 bytea *result;
00077 char *namebuf;
00078 int datalen,
00079 resultlen,
00080 res;
00081 const struct pg_encoding *enc;
00082
00083 datalen = VARSIZE(data) - VARHDRSZ;
00084
00085 namebuf = TextDatumGetCString(name);
00086
00087 enc = pg_find_encoding(namebuf);
00088 if (enc == NULL)
00089 ereport(ERROR,
00090 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00091 errmsg("unrecognized encoding: \"%s\"", namebuf)));
00092
00093 resultlen = enc->decode_len(VARDATA(data), datalen);
00094 result = palloc(VARHDRSZ + resultlen);
00095
00096 res = enc->decode(VARDATA(data), datalen, VARDATA(result));
00097
00098
00099 if (res > resultlen)
00100 elog(FATAL, "overflow - decode estimate too small");
00101
00102 SET_VARSIZE(result, VARHDRSZ + res);
00103
00104 PG_RETURN_BYTEA_P(result);
00105 }
00106
00107
00108
00109
00110
00111
00112 static const char hextbl[] = "0123456789abcdef";
00113
00114 static const int8 hexlookup[128] = {
00115 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
00116 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
00117 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
00118 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
00119 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
00120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
00121 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
00122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
00123 };
00124
00125 unsigned
00126 hex_encode(const char *src, unsigned len, char *dst)
00127 {
00128 const char *end = src + len;
00129
00130 while (src < end)
00131 {
00132 *dst++ = hextbl[(*src >> 4) & 0xF];
00133 *dst++ = hextbl[*src & 0xF];
00134 src++;
00135 }
00136 return len * 2;
00137 }
00138
00139 static inline char
00140 get_hex(char c)
00141 {
00142 int res = -1;
00143
00144 if (c > 0 && c < 127)
00145 res = hexlookup[(unsigned char) c];
00146
00147 if (res < 0)
00148 ereport(ERROR,
00149 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00150 errmsg("invalid hexadecimal digit: \"%c\"", c)));
00151
00152 return (char) res;
00153 }
00154
00155 unsigned
00156 hex_decode(const char *src, unsigned len, char *dst)
00157 {
00158 const char *s,
00159 *srcend;
00160 char v1,
00161 v2,
00162 *p;
00163
00164 srcend = src + len;
00165 s = src;
00166 p = dst;
00167 while (s < srcend)
00168 {
00169 if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
00170 {
00171 s++;
00172 continue;
00173 }
00174 v1 = get_hex(*s++) << 4;
00175 if (s >= srcend)
00176 ereport(ERROR,
00177 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00178 errmsg("invalid hexadecimal data: odd number of digits")));
00179
00180 v2 = get_hex(*s++);
00181 *p++ = v1 | v2;
00182 }
00183
00184 return p - dst;
00185 }
00186
00187 static unsigned
00188 hex_enc_len(const char *src, unsigned srclen)
00189 {
00190 return srclen << 1;
00191 }
00192
00193 static unsigned
00194 hex_dec_len(const char *src, unsigned srclen)
00195 {
00196 return srclen >> 1;
00197 }
00198
00199
00200
00201
00202
00203 static const char _base64[] =
00204 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
00205
00206 static const int8 b64lookup[128] = {
00207 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
00208 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
00209 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
00210 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
00211 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
00212 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
00213 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
00214 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
00215 };
00216
00217 static unsigned
00218 b64_encode(const char *src, unsigned len, char *dst)
00219 {
00220 char *p,
00221 *lend = dst + 76;
00222 const char *s,
00223 *end = src + len;
00224 int pos = 2;
00225 uint32 buf = 0;
00226
00227 s = src;
00228 p = dst;
00229
00230 while (s < end)
00231 {
00232 buf |= (unsigned char) *s << (pos << 3);
00233 pos--;
00234 s++;
00235
00236
00237 if (pos < 0)
00238 {
00239 *p++ = _base64[(buf >> 18) & 0x3f];
00240 *p++ = _base64[(buf >> 12) & 0x3f];
00241 *p++ = _base64[(buf >> 6) & 0x3f];
00242 *p++ = _base64[buf & 0x3f];
00243
00244 pos = 2;
00245 buf = 0;
00246 }
00247 if (p >= lend)
00248 {
00249 *p++ = '\n';
00250 lend = p + 76;
00251 }
00252 }
00253 if (pos != 2)
00254 {
00255 *p++ = _base64[(buf >> 18) & 0x3f];
00256 *p++ = _base64[(buf >> 12) & 0x3f];
00257 *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
00258 *p++ = '=';
00259 }
00260
00261 return p - dst;
00262 }
00263
00264 static unsigned
00265 b64_decode(const char *src, unsigned len, char *dst)
00266 {
00267 const char *srcend = src + len,
00268 *s = src;
00269 char *p = dst;
00270 char c;
00271 int b = 0;
00272 uint32 buf = 0;
00273 int pos = 0,
00274 end = 0;
00275
00276 while (s < srcend)
00277 {
00278 c = *s++;
00279
00280 if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
00281 continue;
00282
00283 if (c == '=')
00284 {
00285
00286 if (!end)
00287 {
00288 if (pos == 2)
00289 end = 1;
00290 else if (pos == 3)
00291 end = 2;
00292 else
00293 ereport(ERROR,
00294 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00295 errmsg("unexpected \"=\"")));
00296 }
00297 b = 0;
00298 }
00299 else
00300 {
00301 b = -1;
00302 if (c > 0 && c < 127)
00303 b = b64lookup[(unsigned char) c];
00304 if (b < 0)
00305 ereport(ERROR,
00306 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00307 errmsg("invalid symbol")));
00308 }
00309
00310 buf = (buf << 6) + b;
00311 pos++;
00312 if (pos == 4)
00313 {
00314 *p++ = (buf >> 16) & 255;
00315 if (end == 0 || end > 1)
00316 *p++ = (buf >> 8) & 255;
00317 if (end == 0 || end > 2)
00318 *p++ = buf & 255;
00319 buf = 0;
00320 pos = 0;
00321 }
00322 }
00323
00324 if (pos != 0)
00325 ereport(ERROR,
00326 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00327 errmsg("invalid end sequence")));
00328
00329 return p - dst;
00330 }
00331
00332
00333 static unsigned
00334 b64_enc_len(const char *src, unsigned srclen)
00335 {
00336
00337 return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
00338 }
00339
00340 static unsigned
00341 b64_dec_len(const char *src, unsigned srclen)
00342 {
00343 return (srclen * 3) >> 2;
00344 }
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360 #define VAL(CH) ((CH) - '0')
00361 #define DIG(VAL) ((VAL) + '0')
00362
00363 static unsigned
00364 esc_encode(const char *src, unsigned srclen, char *dst)
00365 {
00366 const char *end = src + srclen;
00367 char *rp = dst;
00368 int len = 0;
00369
00370 while (src < end)
00371 {
00372 unsigned char c = (unsigned char) *src;
00373
00374 if (c == '\0' || IS_HIGHBIT_SET(c))
00375 {
00376 rp[0] = '\\';
00377 rp[1] = DIG(c >> 6);
00378 rp[2] = DIG((c >> 3) & 7);
00379 rp[3] = DIG(c & 7);
00380 rp += 4;
00381 len += 4;
00382 }
00383 else if (c == '\\')
00384 {
00385 rp[0] = '\\';
00386 rp[1] = '\\';
00387 rp += 2;
00388 len += 2;
00389 }
00390 else
00391 {
00392 *rp++ = c;
00393 len++;
00394 }
00395
00396 src++;
00397 }
00398
00399 return len;
00400 }
00401
00402 static unsigned
00403 esc_decode(const char *src, unsigned srclen, char *dst)
00404 {
00405 const char *end = src + srclen;
00406 char *rp = dst;
00407 int len = 0;
00408
00409 while (src < end)
00410 {
00411 if (src[0] != '\\')
00412 *rp++ = *src++;
00413 else if (src + 3 < end &&
00414 (src[1] >= '0' && src[1] <= '3') &&
00415 (src[2] >= '0' && src[2] <= '7') &&
00416 (src[3] >= '0' && src[3] <= '7'))
00417 {
00418 int val;
00419
00420 val = VAL(src[1]);
00421 val <<= 3;
00422 val += VAL(src[2]);
00423 val <<= 3;
00424 *rp++ = val + VAL(src[3]);
00425 src += 4;
00426 }
00427 else if (src + 1 < end &&
00428 (src[1] == '\\'))
00429 {
00430 *rp++ = '\\';
00431 src += 2;
00432 }
00433 else
00434 {
00435
00436
00437
00438
00439 ereport(ERROR,
00440 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00441 errmsg("invalid input syntax for type bytea")));
00442 }
00443
00444 len++;
00445 }
00446
00447 return len;
00448 }
00449
00450 static unsigned
00451 esc_enc_len(const char *src, unsigned srclen)
00452 {
00453 const char *end = src + srclen;
00454 int len = 0;
00455
00456 while (src < end)
00457 {
00458 if (*src == '\0' || IS_HIGHBIT_SET(*src))
00459 len += 4;
00460 else if (*src == '\\')
00461 len += 2;
00462 else
00463 len++;
00464
00465 src++;
00466 }
00467
00468 return len;
00469 }
00470
00471 static unsigned
00472 esc_dec_len(const char *src, unsigned srclen)
00473 {
00474 const char *end = src + srclen;
00475 int len = 0;
00476
00477 while (src < end)
00478 {
00479 if (src[0] != '\\')
00480 src++;
00481 else if (src + 3 < end &&
00482 (src[1] >= '0' && src[1] <= '3') &&
00483 (src[2] >= '0' && src[2] <= '7') &&
00484 (src[3] >= '0' && src[3] <= '7'))
00485 {
00486
00487
00488
00489 src += 4;
00490 }
00491 else if (src + 1 < end &&
00492 (src[1] == '\\'))
00493 {
00494
00495
00496
00497 src += 2;
00498 }
00499 else
00500 {
00501
00502
00503
00504 ereport(ERROR,
00505 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
00506 errmsg("invalid input syntax for type bytea")));
00507 }
00508
00509 len++;
00510 }
00511 return len;
00512 }
00513
00514
00515
00516
00517
00518 static const struct
00519 {
00520 const char *name;
00521 struct pg_encoding enc;
00522 } enclist[] =
00523
00524 {
00525 {
00526 "hex",
00527 {
00528 hex_enc_len, hex_dec_len, hex_encode, hex_decode
00529 }
00530 },
00531 {
00532 "base64",
00533 {
00534 b64_enc_len, b64_dec_len, b64_encode, b64_decode
00535 }
00536 },
00537 {
00538 "escape",
00539 {
00540 esc_enc_len, esc_dec_len, esc_encode, esc_decode
00541 }
00542 },
00543 {
00544 NULL,
00545 {
00546 NULL, NULL, NULL, NULL
00547 }
00548 }
00549 };
00550
00551 static const struct pg_encoding *
00552 pg_find_encoding(const char *name)
00553 {
00554 int i;
00555
00556 for (i = 0; enclist[i].name; i++)
00557 if (pg_strcasecmp(enclist[i].name, name) == 0)
00558 return &enclist[i].enc;
00559
00560 return NULL;
00561 }