00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "postgres.h"
00014 #include "fmgr.h"
00015 #include "mb/pg_wchar.h"
00016
00017 PG_MODULE_MAGIC;
00018
00019 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
00020 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
00021
00022 extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
00023 extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
00024
00025 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
00026 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 Datum
00040 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
00041 {
00042 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00043 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00044 int len = PG_GETARG_INT32(4);
00045
00046 CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
00047
00048 euc_jis_20042shift_jis_2004(src, dest, len);
00049
00050 PG_RETURN_VOID();
00051 }
00052
00053 Datum
00054 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
00055 {
00056 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00057 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00058 int len = PG_GETARG_INT32(4);
00059
00060 CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
00061
00062 shift_jis_20042euc_jis_2004(src, dest, len);
00063
00064 PG_RETURN_VOID();
00065 }
00066
00067
00068
00069
00070 static void
00071 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
00072 {
00073 int c1,
00074 ku,
00075 ten;
00076 int l;
00077
00078 while (len > 0)
00079 {
00080 c1 = *euc;
00081 if (!IS_HIGHBIT_SET(c1))
00082 {
00083
00084 if (c1 == 0)
00085 report_invalid_encoding(PG_EUC_JIS_2004,
00086 (const char *) euc, len);
00087 *p++ = c1;
00088 euc++;
00089 len--;
00090 continue;
00091 }
00092
00093 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
00094
00095 if (l < 0)
00096 report_invalid_encoding(PG_EUC_JIS_2004,
00097 (const char *) euc, len);
00098
00099 if (c1 == SS2 && l == 2)
00100 {
00101 *p++ = euc[1];
00102 }
00103 else if (c1 == SS3 && l == 3)
00104 {
00105 ku = euc[1] - 0xa0;
00106 ten = euc[2] - 0xa0;
00107
00108 switch (ku)
00109 {
00110 case 1:
00111 case 3:
00112 case 4:
00113 case 5:
00114 case 8:
00115 case 12:
00116 case 13:
00117 case 14:
00118 case 15:
00119 *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
00120 break;
00121 default:
00122 if (ku >= 78 && ku <= 94)
00123 {
00124 *p++ = (ku + 0x19b) >> 1;
00125 }
00126 else
00127 report_invalid_encoding(PG_EUC_JIS_2004,
00128 (const char *) euc, len);
00129 }
00130
00131 if (ku % 2)
00132 {
00133 if (ten >= 1 && ten <= 63)
00134 *p++ = ten + 0x3f;
00135 else if (ten >= 64 && ten <= 94)
00136 *p++ = ten + 0x40;
00137 else
00138 report_invalid_encoding(PG_EUC_JIS_2004,
00139 (const char *) euc, len);
00140 }
00141 else
00142 *p++ = ten + 0x9e;
00143 }
00144
00145 else if (l == 2)
00146 {
00147 ku = c1 - 0xa0;
00148 ten = euc[1] - 0xa0;
00149
00150 if (ku >= 1 && ku <= 62)
00151 *p++ = (ku + 0x101) >> 1;
00152 else if (ku >= 63 && ku <= 94)
00153 *p++ = (ku + 0x181) >> 1;
00154 else
00155 report_invalid_encoding(PG_EUC_JIS_2004,
00156 (const char *) euc, len);
00157
00158 if (ku % 2)
00159 {
00160 if (ten >= 1 && ten <= 63)
00161 *p++ = ten + 0x3f;
00162 else if (ten >= 64 && ten <= 94)
00163 *p++ = ten + 0x40;
00164 else
00165 report_invalid_encoding(PG_EUC_JIS_2004,
00166 (const char *) euc, len);
00167 }
00168 else
00169 *p++ = ten + 0x9e;
00170 }
00171 else
00172 report_invalid_encoding(PG_EUC_JIS_2004,
00173 (const char *) euc, len);
00174
00175 euc += l;
00176 len -= l;
00177 }
00178 *p = '\0';
00179 }
00180
00181
00182
00183
00184
00185
00186 static int
00187 get_ten(int b, int *ku)
00188 {
00189 int ten;
00190
00191 if (b >= 0x40 && b <= 0x7e)
00192 {
00193 ten = b - 0x3f;
00194 *ku = 1;
00195 }
00196 else if (b >= 0x80 && b <= 0x9e)
00197 {
00198 ten = b - 0x40;
00199 *ku = 1;
00200 }
00201 else if (b >= 0x9f && b <= 0xfc)
00202 {
00203 ten = b - 0x9e;
00204 *ku = 0;
00205 }
00206 else
00207 {
00208 ten = -1;
00209 *ku = 0;
00210 }
00211 return ten;
00212 }
00213
00214
00215
00216
00217
00218 static void
00219 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
00220 {
00221 int c1;
00222 int ku,
00223 ten,
00224 kubun;
00225 int plane;
00226 int l;
00227
00228 while (len > 0)
00229 {
00230 c1 = *sjis;
00231
00232 if (!IS_HIGHBIT_SET(c1))
00233 {
00234
00235 if (c1 == 0)
00236 report_invalid_encoding(PG_SHIFT_JIS_2004,
00237 (const char *) sjis, len);
00238 *p++ = c1;
00239 sjis++;
00240 len--;
00241 continue;
00242 }
00243
00244 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
00245
00246 if (l < 0 || l > len)
00247 report_invalid_encoding(PG_SHIFT_JIS_2004,
00248 (const char *) sjis, len);
00249
00250 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
00251 {
00252
00253 *p++ = SS2;
00254 *p++ = c1;
00255 }
00256 else if (l == 2)
00257 {
00258 int c2 = sjis[1];
00259
00260 plane = 1;
00261 ku = 1;
00262 ten = 1;
00263
00264
00265
00266
00267 if (c1 >= 0x81 && c1 <= 0x9f)
00268 {
00269 ku = (c1 << 1) - 0x100;
00270 ten = get_ten(c2, &kubun);
00271 if (ten < 0)
00272 report_invalid_encoding(PG_SHIFT_JIS_2004,
00273 (const char *) sjis, len);
00274 ku -= kubun;
00275 }
00276 else if (c1 >= 0xe0 && c1 <= 0xef)
00277 {
00278 ku = (c1 << 1) - 0x180;
00279 ten = get_ten(c2, &kubun);
00280 if (ten < 0)
00281 report_invalid_encoding(PG_SHIFT_JIS_2004,
00282
00283 (const char *) sjis, len);
00284 ku -= kubun;
00285 }
00286 else if (c1 >= 0xf0 && c1 <= 0xf3)
00287
00288 {
00289 plane = 2;
00290 ten = get_ten(c2, &kubun);
00291 if (ten < 0)
00292 report_invalid_encoding(PG_SHIFT_JIS_2004,
00293 (const char *) sjis, len);
00294 switch (c1)
00295 {
00296 case 0xf0:
00297 ku = kubun == 0 ? 8 : 1;
00298 break;
00299 case 0xf1:
00300 ku = kubun == 0 ? 4 : 3;
00301 break;
00302 case 0xf2:
00303 ku = kubun == 0 ? 12 : 5;
00304 break;
00305 default:
00306 ku = kubun == 0 ? 14 : 13;
00307 break;
00308 }
00309 }
00310 else if (c1 >= 0xf4 && c1 <= 0xfc)
00311 {
00312 plane = 2;
00313 ten = get_ten(c2, &kubun);
00314 if (ten < 0)
00315 report_invalid_encoding(PG_SHIFT_JIS_2004,
00316 (const char *) sjis, len);
00317 if (c1 == 0xf4 && kubun == 1)
00318 ku = 15;
00319 else
00320 ku = (c1 << 1) - 0x19a - kubun;
00321 }
00322 else
00323 report_invalid_encoding(PG_SHIFT_JIS_2004,
00324 (const char *) sjis, len);
00325
00326 if (plane == 2)
00327 *p++ = SS3;
00328
00329 *p++ = ku + 0xa0;
00330 *p++ = ten + 0xa0;
00331 }
00332 sjis += l;
00333 len -= l;
00334 }
00335 *p = '\0';
00336 }