Header And Logo

PostgreSQL
| The world's most advanced open source database.

euc_tw_and_big5.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  *    EUC_TW, BIG5 and MULE_INTERNAL
00004  *
00005  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00006  * Portions Copyright (c) 1994, Regents of the University of California
00007  *
00008  * IDENTIFICATION
00009  *    src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
00010  *
00011  *-------------------------------------------------------------------------
00012  */
00013 
00014 #include "postgres.h"
00015 #include "fmgr.h"
00016 #include "mb/pg_wchar.h"
00017 
00018 #define ENCODING_GROWTH_RATE 4
00019 
00020 PG_MODULE_MAGIC;
00021 
00022 PG_FUNCTION_INFO_V1(euc_tw_to_big5);
00023 PG_FUNCTION_INFO_V1(big5_to_euc_tw);
00024 PG_FUNCTION_INFO_V1(euc_tw_to_mic);
00025 PG_FUNCTION_INFO_V1(mic_to_euc_tw);
00026 PG_FUNCTION_INFO_V1(big5_to_mic);
00027 PG_FUNCTION_INFO_V1(mic_to_big5);
00028 
00029 extern Datum euc_tw_to_big5(PG_FUNCTION_ARGS);
00030 extern Datum big5_to_euc_tw(PG_FUNCTION_ARGS);
00031 extern Datum euc_tw_to_mic(PG_FUNCTION_ARGS);
00032 extern Datum mic_to_euc_tw(PG_FUNCTION_ARGS);
00033 extern Datum big5_to_mic(PG_FUNCTION_ARGS);
00034 extern Datum mic_to_big5(PG_FUNCTION_ARGS);
00035 
00036 /* ----------
00037  * conv_proc(
00038  *      INTEGER,    -- source encoding id
00039  *      INTEGER,    -- destination encoding id
00040  *      CSTRING,    -- source string (null terminated C string)
00041  *      CSTRING,    -- destination string (null terminated C string)
00042  *      INTEGER     -- source string length
00043  * ) returns VOID;
00044  * ----------
00045  */
00046 
00047 static void big52mic(const unsigned char *big5, unsigned char *p, int len);
00048 static void mic2big5(const unsigned char *mic, unsigned char *p, int len);
00049 static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len);
00050 static void mic2euc_tw(const unsigned char *mic, unsigned char *p, int len);
00051 
00052 Datum
00053 euc_tw_to_big5(PG_FUNCTION_ARGS)
00054 {
00055     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00056     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00057     int         len = PG_GETARG_INT32(4);
00058     unsigned char *buf;
00059 
00060     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_BIG5);
00061 
00062     buf = palloc(len * ENCODING_GROWTH_RATE + 1);
00063     euc_tw2mic(src, buf, len);
00064     mic2big5(buf, dest, strlen((char *) buf));
00065     pfree(buf);
00066 
00067     PG_RETURN_VOID();
00068 }
00069 
00070 Datum
00071 big5_to_euc_tw(PG_FUNCTION_ARGS)
00072 {
00073     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00074     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00075     int         len = PG_GETARG_INT32(4);
00076     unsigned char *buf;
00077 
00078     CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_EUC_TW);
00079 
00080     buf = palloc(len * ENCODING_GROWTH_RATE + 1);
00081     big52mic(src, buf, len);
00082     mic2euc_tw(buf, dest, strlen((char *) buf));
00083     pfree(buf);
00084 
00085     PG_RETURN_VOID();
00086 }
00087 
00088 Datum
00089 euc_tw_to_mic(PG_FUNCTION_ARGS)
00090 {
00091     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00092     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00093     int         len = PG_GETARG_INT32(4);
00094 
00095     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_MULE_INTERNAL);
00096 
00097     euc_tw2mic(src, dest, len);
00098 
00099     PG_RETURN_VOID();
00100 }
00101 
00102 Datum
00103 mic_to_euc_tw(PG_FUNCTION_ARGS)
00104 {
00105     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00106     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00107     int         len = PG_GETARG_INT32(4);
00108 
00109     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_TW);
00110 
00111     mic2euc_tw(src, dest, len);
00112 
00113     PG_RETURN_VOID();
00114 }
00115 
00116 Datum
00117 big5_to_mic(PG_FUNCTION_ARGS)
00118 {
00119     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00120     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00121     int         len = PG_GETARG_INT32(4);
00122 
00123     CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_MULE_INTERNAL);
00124 
00125     big52mic(src, dest, len);
00126 
00127     PG_RETURN_VOID();
00128 }
00129 
00130 Datum
00131 mic_to_big5(PG_FUNCTION_ARGS)
00132 {
00133     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00134     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00135     int         len = PG_GETARG_INT32(4);
00136 
00137     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_BIG5);
00138 
00139     mic2big5(src, dest, len);
00140 
00141     PG_RETURN_VOID();
00142 }
00143 
00144 /*
00145  * EUC_TW ---> MIC
00146  */
00147 static void
00148 euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
00149 {
00150     int         c1;
00151     int         l;
00152 
00153     while (len > 0)
00154     {
00155         c1 = *euc;
00156         if (IS_HIGHBIT_SET(c1))
00157         {
00158             l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len);
00159             if (l < 0)
00160                 report_invalid_encoding(PG_EUC_TW,
00161                                         (const char *) euc, len);
00162             if (c1 == SS2)
00163             {
00164                 c1 = euc[1];    /* plane No. */
00165                 if (c1 == 0xa1)
00166                     *p++ = LC_CNS11643_1;
00167                 else if (c1 == 0xa2)
00168                     *p++ = LC_CNS11643_2;
00169                 else
00170                 {
00171                     /* other planes are MULE private charsets */
00172                     *p++ = LCPRV2_B;
00173                     *p++ = c1 - 0xa3 + LC_CNS11643_3;
00174                 }
00175                 *p++ = euc[2];
00176                 *p++ = euc[3];
00177             }
00178             else
00179             {                   /* CNS11643-1 */
00180                 *p++ = LC_CNS11643_1;
00181                 *p++ = c1;
00182                 *p++ = euc[1];
00183             }
00184             euc += l;
00185             len -= l;
00186         }
00187         else
00188         {                       /* should be ASCII */
00189             if (c1 == 0)
00190                 report_invalid_encoding(PG_EUC_TW,
00191                                         (const char *) euc, len);
00192             *p++ = c1;
00193             euc++;
00194             len--;
00195         }
00196     }
00197     *p = '\0';
00198 }
00199 
00200 /*
00201  * MIC ---> EUC_TW
00202  */
00203 static void
00204 mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
00205 {
00206     int         c1;
00207     int         l;
00208 
00209     while (len > 0)
00210     {
00211         c1 = *mic;
00212         if (!IS_HIGHBIT_SET(c1))
00213         {
00214             /* ASCII */
00215             if (c1 == 0)
00216                 report_invalid_encoding(PG_MULE_INTERNAL,
00217                                         (const char *) mic, len);
00218             *p++ = c1;
00219             mic++;
00220             len--;
00221             continue;
00222         }
00223         l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
00224         if (l < 0)
00225             report_invalid_encoding(PG_MULE_INTERNAL,
00226                                     (const char *) mic, len);
00227         if (c1 == LC_CNS11643_1)
00228         {
00229             *p++ = mic[1];
00230             *p++ = mic[2];
00231         }
00232         else if (c1 == LC_CNS11643_2)
00233         {
00234             *p++ = SS2;
00235             *p++ = 0xa2;
00236             *p++ = mic[1];
00237             *p++ = mic[2];
00238         }
00239         else if (c1 == LCPRV2_B &&
00240                  mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
00241         {
00242             *p++ = SS2;
00243             *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
00244             *p++ = mic[2];
00245             *p++ = mic[3];
00246         }
00247         else
00248             report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW,
00249                                        (const char *) mic, len);
00250         mic += l;
00251         len -= l;
00252     }
00253     *p = '\0';
00254 }
00255 
00256 /*
00257  * Big5 ---> MIC
00258  */
00259 static void
00260 big52mic(const unsigned char *big5, unsigned char *p, int len)
00261 {
00262     unsigned short c1;
00263     unsigned short big5buf,
00264                 cnsBuf;
00265     unsigned char lc;
00266     int         l;
00267 
00268     while (len > 0)
00269     {
00270         c1 = *big5;
00271         if (!IS_HIGHBIT_SET(c1))
00272         {
00273             /* ASCII */
00274             if (c1 == 0)
00275                 report_invalid_encoding(PG_BIG5,
00276                                         (const char *) big5, len);
00277             *p++ = c1;
00278             big5++;
00279             len--;
00280             continue;
00281         }
00282         l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len);
00283         if (l < 0)
00284             report_invalid_encoding(PG_BIG5,
00285                                     (const char *) big5, len);
00286         big5buf = (c1 << 8) | big5[1];
00287         cnsBuf = BIG5toCNS(big5buf, &lc);
00288         if (lc != 0)
00289         {
00290             /* Planes 3 and 4 are MULE private charsets */
00291             if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
00292                 *p++ = LCPRV2_B;
00293             *p++ = lc;          /* Plane No. */
00294             *p++ = (cnsBuf >> 8) & 0x00ff;
00295             *p++ = cnsBuf & 0x00ff;
00296         }
00297         else
00298             report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL,
00299                                        (const char *) big5, len);
00300         big5 += l;
00301         len -= l;
00302     }
00303     *p = '\0';
00304 }
00305 
00306 /*
00307  * MIC ---> Big5
00308  */
00309 static void
00310 mic2big5(const unsigned char *mic, unsigned char *p, int len)
00311 {
00312     unsigned short c1;
00313     unsigned short big5buf,
00314                 cnsBuf;
00315     int         l;
00316 
00317     while (len > 0)
00318     {
00319         c1 = *mic;
00320         if (!IS_HIGHBIT_SET(c1))
00321         {
00322             /* ASCII */
00323             if (c1 == 0)
00324                 report_invalid_encoding(PG_MULE_INTERNAL,
00325                                         (const char *) mic, len);
00326             *p++ = c1;
00327             mic++;
00328             len--;
00329             continue;
00330         }
00331         l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
00332         if (l < 0)
00333             report_invalid_encoding(PG_MULE_INTERNAL,
00334                                     (const char *) mic, len);
00335         if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
00336         {
00337             if (c1 == LCPRV2_B)
00338             {
00339                 c1 = mic[1];    /* get plane no. */
00340                 cnsBuf = (mic[2] << 8) | mic[3];
00341             }
00342             else
00343             {
00344                 cnsBuf = (mic[1] << 8) | mic[2];
00345             }
00346             big5buf = CNStoBIG5(cnsBuf, c1);
00347             if (big5buf == 0)
00348                 report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
00349                                            (const char *) mic, len);
00350             *p++ = (big5buf >> 8) & 0x00ff;
00351             *p++ = big5buf & 0x00ff;
00352         }
00353         else
00354             report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
00355                                        (const char *) mic, len);
00356         mic += l;
00357         len -= l;
00358     }
00359     *p = '\0';
00360 }