Header And Logo

PostgreSQL
| The world's most advanced open source database.

euc2004_sjis2004.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  *    EUC_JIS_2004, SHIFT_JIS_2004
00004  *
00005  * Copyright (c) 2007-2013, PostgreSQL Global Development Group
00006  *
00007  * IDENTIFICATION
00008  *    src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
00009  *
00010  *-------------------------------------------------------------------------
00011  */
00012 
00013 #include "postgres.h"
00014 #include "fmgr.h"
00015 #include "mb/pg_wchar.h"
00016 
00017 PG_MODULE_MAGIC;
00018 
00019 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
00020 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
00021 
00022 extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
00023 extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
00024 
00025 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
00026 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
00027 
00028 /* ----------
00029  * conv_proc(
00030  *      INTEGER,    -- source encoding id
00031  *      INTEGER,    -- destination encoding id
00032  *      CSTRING,    -- source string (null terminated C string)
00033  *      CSTRING,    -- destination string (null terminated C string)
00034  *      INTEGER     -- source string length
00035  * ) returns VOID;
00036  * ----------
00037  */
00038 
00039 Datum
00040 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
00041 {
00042     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00043     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00044     int         len = PG_GETARG_INT32(4);
00045 
00046     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
00047 
00048     euc_jis_20042shift_jis_2004(src, dest, len);
00049 
00050     PG_RETURN_VOID();
00051 }
00052 
00053 Datum
00054 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
00055 {
00056     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00057     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00058     int         len = PG_GETARG_INT32(4);
00059 
00060     CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
00061 
00062     shift_jis_20042euc_jis_2004(src, dest, len);
00063 
00064     PG_RETURN_VOID();
00065 }
00066 
00067 /*
00068  * EUC_JIS_2004 -> SHIFT_JIS_2004
00069  */
00070 static void
00071 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
00072 {
00073     int         c1,
00074                 ku,
00075                 ten;
00076     int         l;
00077 
00078     while (len > 0)
00079     {
00080         c1 = *euc;
00081         if (!IS_HIGHBIT_SET(c1))
00082         {
00083             /* ASCII */
00084             if (c1 == 0)
00085                 report_invalid_encoding(PG_EUC_JIS_2004,
00086                                         (const char *) euc, len);
00087             *p++ = c1;
00088             euc++;
00089             len--;
00090             continue;
00091         }
00092 
00093         l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
00094 
00095         if (l < 0)
00096             report_invalid_encoding(PG_EUC_JIS_2004,
00097                                     (const char *) euc, len);
00098 
00099         if (c1 == SS2 && l == 2)    /* JIS X 0201 kana? */
00100         {
00101             *p++ = euc[1];
00102         }
00103         else if (c1 == SS3 && l == 3)   /* JIS X 0213 plane 2? */
00104         {
00105             ku = euc[1] - 0xa0;
00106             ten = euc[2] - 0xa0;
00107 
00108             switch (ku)
00109             {
00110                 case 1:
00111                 case 3:
00112                 case 4:
00113                 case 5:
00114                 case 8:
00115                 case 12:
00116                 case 13:
00117                 case 14:
00118                 case 15:
00119                     *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
00120                     break;
00121                 default:
00122                     if (ku >= 78 && ku <= 94)
00123                     {
00124                         *p++ = (ku + 0x19b) >> 1;
00125                     }
00126                     else
00127                         report_invalid_encoding(PG_EUC_JIS_2004,
00128                                                 (const char *) euc, len);
00129             }
00130 
00131             if (ku % 2)
00132             {
00133                 if (ten >= 1 && ten <= 63)
00134                     *p++ = ten + 0x3f;
00135                 else if (ten >= 64 && ten <= 94)
00136                     *p++ = ten + 0x40;
00137                 else
00138                     report_invalid_encoding(PG_EUC_JIS_2004,
00139                                             (const char *) euc, len);
00140             }
00141             else
00142                 *p++ = ten + 0x9e;
00143         }
00144 
00145         else if (l == 2)        /* JIS X 0213 plane 1? */
00146         {
00147             ku = c1 - 0xa0;
00148             ten = euc[1] - 0xa0;
00149 
00150             if (ku >= 1 && ku <= 62)
00151                 *p++ = (ku + 0x101) >> 1;
00152             else if (ku >= 63 && ku <= 94)
00153                 *p++ = (ku + 0x181) >> 1;
00154             else
00155                 report_invalid_encoding(PG_EUC_JIS_2004,
00156                                         (const char *) euc, len);
00157 
00158             if (ku % 2)
00159             {
00160                 if (ten >= 1 && ten <= 63)
00161                     *p++ = ten + 0x3f;
00162                 else if (ten >= 64 && ten <= 94)
00163                     *p++ = ten + 0x40;
00164                 else
00165                     report_invalid_encoding(PG_EUC_JIS_2004,
00166                                             (const char *) euc, len);
00167             }
00168             else
00169                 *p++ = ten + 0x9e;
00170         }
00171         else
00172             report_invalid_encoding(PG_EUC_JIS_2004,
00173                                     (const char *) euc, len);
00174 
00175         euc += l;
00176         len -= l;
00177     }
00178     *p = '\0';
00179 }
00180 
00181 /*
00182  * returns SHIFT_JIS_2004 "ku" code indicated by second byte
00183  * *ku = 0: "ku" = even
00184  * *ku = 1: "ku" = odd
00185  */
00186 static int
00187 get_ten(int b, int *ku)
00188 {
00189     int         ten;
00190 
00191     if (b >= 0x40 && b <= 0x7e)
00192     {
00193         ten = b - 0x3f;
00194         *ku = 1;
00195     }
00196     else if (b >= 0x80 && b <= 0x9e)
00197     {
00198         ten = b - 0x40;
00199         *ku = 1;
00200     }
00201     else if (b >= 0x9f && b <= 0xfc)
00202     {
00203         ten = b - 0x9e;
00204         *ku = 0;
00205     }
00206     else
00207     {
00208         ten = -1;               /* error */
00209         *ku = 0;                /* keep compiler quiet */
00210     }
00211     return ten;
00212 }
00213 
00214 /*
00215  * SHIFT_JIS_2004 ---> EUC_JIS_2004
00216  */
00217 
00218 static void
00219 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
00220 {
00221     int         c1;
00222     int         ku,
00223                 ten,
00224                 kubun;
00225     int         plane;
00226     int         l;
00227 
00228     while (len > 0)
00229     {
00230         c1 = *sjis;
00231 
00232         if (!IS_HIGHBIT_SET(c1))
00233         {
00234             /* ASCII */
00235             if (c1 == 0)
00236                 report_invalid_encoding(PG_SHIFT_JIS_2004,
00237                                         (const char *) sjis, len);
00238             *p++ = c1;
00239             sjis++;
00240             len--;
00241             continue;
00242         }
00243 
00244         l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
00245 
00246         if (l < 0 || l > len)
00247             report_invalid_encoding(PG_SHIFT_JIS_2004,
00248                                     (const char *) sjis, len);
00249 
00250         if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
00251         {
00252             /* JIS X0201 (1 byte kana) */
00253             *p++ = SS2;
00254             *p++ = c1;
00255         }
00256         else if (l == 2)
00257         {
00258             int         c2 = sjis[1];
00259 
00260             plane = 1;
00261             ku = 1;
00262             ten = 1;
00263 
00264             /*
00265              * JIS X 0213
00266              */
00267             if (c1 >= 0x81 && c1 <= 0x9f)       /* plane 1 1ku-62ku */
00268             {
00269                 ku = (c1 << 1) - 0x100;
00270                 ten = get_ten(c2, &kubun);
00271                 if (ten < 0)
00272                     report_invalid_encoding(PG_SHIFT_JIS_2004,
00273                                             (const char *) sjis, len);
00274                 ku -= kubun;
00275             }
00276             else if (c1 >= 0xe0 && c1 <= 0xef)  /* plane 1 62ku-94ku */
00277             {
00278                 ku = (c1 << 1) - 0x180;
00279                 ten = get_ten(c2, &kubun);
00280                 if (ten < 0)
00281                     report_invalid_encoding(PG_SHIFT_JIS_2004,
00282 
00283                                             (const char *) sjis, len);
00284                 ku -= kubun;
00285             }
00286             else if (c1 >= 0xf0 && c1 <= 0xf3)  /* plane 2
00287                                                  * 1,3,4,5,8,12,13,14,15 ku */
00288             {
00289                 plane = 2;
00290                 ten = get_ten(c2, &kubun);
00291                 if (ten < 0)
00292                     report_invalid_encoding(PG_SHIFT_JIS_2004,
00293                                             (const char *) sjis, len);
00294                 switch (c1)
00295                 {
00296                     case 0xf0:
00297                         ku = kubun == 0 ? 8 : 1;
00298                         break;
00299                     case 0xf1:
00300                         ku = kubun == 0 ? 4 : 3;
00301                         break;
00302                     case 0xf2:
00303                         ku = kubun == 0 ? 12 : 5;
00304                         break;
00305                     default:
00306                         ku = kubun == 0 ? 14 : 13;
00307                         break;
00308                 }
00309             }
00310             else if (c1 >= 0xf4 && c1 <= 0xfc)  /* plane 2 78-94ku */
00311             {
00312                 plane = 2;
00313                 ten = get_ten(c2, &kubun);
00314                 if (ten < 0)
00315                     report_invalid_encoding(PG_SHIFT_JIS_2004,
00316                                             (const char *) sjis, len);
00317                 if (c1 == 0xf4 && kubun == 1)
00318                     ku = 15;
00319                 else
00320                     ku = (c1 << 1) - 0x19a - kubun;
00321             }
00322             else
00323                 report_invalid_encoding(PG_SHIFT_JIS_2004,
00324                                         (const char *) sjis, len);
00325 
00326             if (plane == 2)
00327                 *p++ = SS3;
00328 
00329             *p++ = ku + 0xa0;
00330             *p++ = ten + 0xa0;
00331         }
00332         sjis += l;
00333         len -= l;
00334     }
00335     *p = '\0';
00336 }