00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "postgres.h"
00015 #include "fmgr.h"
00016 #include "mb/pg_wchar.h"
00017
00018 #define ENCODING_GROWTH_RATE 4
00019
00020 PG_MODULE_MAGIC;
00021
00022 PG_FUNCTION_INFO_V1(euc_tw_to_big5);
00023 PG_FUNCTION_INFO_V1(big5_to_euc_tw);
00024 PG_FUNCTION_INFO_V1(euc_tw_to_mic);
00025 PG_FUNCTION_INFO_V1(mic_to_euc_tw);
00026 PG_FUNCTION_INFO_V1(big5_to_mic);
00027 PG_FUNCTION_INFO_V1(mic_to_big5);
00028
00029 extern Datum euc_tw_to_big5(PG_FUNCTION_ARGS);
00030 extern Datum big5_to_euc_tw(PG_FUNCTION_ARGS);
00031 extern Datum euc_tw_to_mic(PG_FUNCTION_ARGS);
00032 extern Datum mic_to_euc_tw(PG_FUNCTION_ARGS);
00033 extern Datum big5_to_mic(PG_FUNCTION_ARGS);
00034 extern Datum mic_to_big5(PG_FUNCTION_ARGS);
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 static void big52mic(const unsigned char *big5, unsigned char *p, int len);
00048 static void mic2big5(const unsigned char *mic, unsigned char *p, int len);
00049 static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len);
00050 static void mic2euc_tw(const unsigned char *mic, unsigned char *p, int len);
00051
00052 Datum
00053 euc_tw_to_big5(PG_FUNCTION_ARGS)
00054 {
00055 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00056 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00057 int len = PG_GETARG_INT32(4);
00058 unsigned char *buf;
00059
00060 CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_BIG5);
00061
00062 buf = palloc(len * ENCODING_GROWTH_RATE + 1);
00063 euc_tw2mic(src, buf, len);
00064 mic2big5(buf, dest, strlen((char *) buf));
00065 pfree(buf);
00066
00067 PG_RETURN_VOID();
00068 }
00069
00070 Datum
00071 big5_to_euc_tw(PG_FUNCTION_ARGS)
00072 {
00073 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00074 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00075 int len = PG_GETARG_INT32(4);
00076 unsigned char *buf;
00077
00078 CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_EUC_TW);
00079
00080 buf = palloc(len * ENCODING_GROWTH_RATE + 1);
00081 big52mic(src, buf, len);
00082 mic2euc_tw(buf, dest, strlen((char *) buf));
00083 pfree(buf);
00084
00085 PG_RETURN_VOID();
00086 }
00087
00088 Datum
00089 euc_tw_to_mic(PG_FUNCTION_ARGS)
00090 {
00091 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00092 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00093 int len = PG_GETARG_INT32(4);
00094
00095 CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_MULE_INTERNAL);
00096
00097 euc_tw2mic(src, dest, len);
00098
00099 PG_RETURN_VOID();
00100 }
00101
00102 Datum
00103 mic_to_euc_tw(PG_FUNCTION_ARGS)
00104 {
00105 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00106 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00107 int len = PG_GETARG_INT32(4);
00108
00109 CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_TW);
00110
00111 mic2euc_tw(src, dest, len);
00112
00113 PG_RETURN_VOID();
00114 }
00115
00116 Datum
00117 big5_to_mic(PG_FUNCTION_ARGS)
00118 {
00119 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00120 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00121 int len = PG_GETARG_INT32(4);
00122
00123 CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_MULE_INTERNAL);
00124
00125 big52mic(src, dest, len);
00126
00127 PG_RETURN_VOID();
00128 }
00129
00130 Datum
00131 mic_to_big5(PG_FUNCTION_ARGS)
00132 {
00133 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00134 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00135 int len = PG_GETARG_INT32(4);
00136
00137 CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_BIG5);
00138
00139 mic2big5(src, dest, len);
00140
00141 PG_RETURN_VOID();
00142 }
00143
00144
00145
00146
00147 static void
00148 euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
00149 {
00150 int c1;
00151 int l;
00152
00153 while (len > 0)
00154 {
00155 c1 = *euc;
00156 if (IS_HIGHBIT_SET(c1))
00157 {
00158 l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len);
00159 if (l < 0)
00160 report_invalid_encoding(PG_EUC_TW,
00161 (const char *) euc, len);
00162 if (c1 == SS2)
00163 {
00164 c1 = euc[1];
00165 if (c1 == 0xa1)
00166 *p++ = LC_CNS11643_1;
00167 else if (c1 == 0xa2)
00168 *p++ = LC_CNS11643_2;
00169 else
00170 {
00171
00172 *p++ = LCPRV2_B;
00173 *p++ = c1 - 0xa3 + LC_CNS11643_3;
00174 }
00175 *p++ = euc[2];
00176 *p++ = euc[3];
00177 }
00178 else
00179 {
00180 *p++ = LC_CNS11643_1;
00181 *p++ = c1;
00182 *p++ = euc[1];
00183 }
00184 euc += l;
00185 len -= l;
00186 }
00187 else
00188 {
00189 if (c1 == 0)
00190 report_invalid_encoding(PG_EUC_TW,
00191 (const char *) euc, len);
00192 *p++ = c1;
00193 euc++;
00194 len--;
00195 }
00196 }
00197 *p = '\0';
00198 }
00199
00200
00201
00202
00203 static void
00204 mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
00205 {
00206 int c1;
00207 int l;
00208
00209 while (len > 0)
00210 {
00211 c1 = *mic;
00212 if (!IS_HIGHBIT_SET(c1))
00213 {
00214
00215 if (c1 == 0)
00216 report_invalid_encoding(PG_MULE_INTERNAL,
00217 (const char *) mic, len);
00218 *p++ = c1;
00219 mic++;
00220 len--;
00221 continue;
00222 }
00223 l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
00224 if (l < 0)
00225 report_invalid_encoding(PG_MULE_INTERNAL,
00226 (const char *) mic, len);
00227 if (c1 == LC_CNS11643_1)
00228 {
00229 *p++ = mic[1];
00230 *p++ = mic[2];
00231 }
00232 else if (c1 == LC_CNS11643_2)
00233 {
00234 *p++ = SS2;
00235 *p++ = 0xa2;
00236 *p++ = mic[1];
00237 *p++ = mic[2];
00238 }
00239 else if (c1 == LCPRV2_B &&
00240 mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
00241 {
00242 *p++ = SS2;
00243 *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
00244 *p++ = mic[2];
00245 *p++ = mic[3];
00246 }
00247 else
00248 report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW,
00249 (const char *) mic, len);
00250 mic += l;
00251 len -= l;
00252 }
00253 *p = '\0';
00254 }
00255
00256
00257
00258
00259 static void
00260 big52mic(const unsigned char *big5, unsigned char *p, int len)
00261 {
00262 unsigned short c1;
00263 unsigned short big5buf,
00264 cnsBuf;
00265 unsigned char lc;
00266 int l;
00267
00268 while (len > 0)
00269 {
00270 c1 = *big5;
00271 if (!IS_HIGHBIT_SET(c1))
00272 {
00273
00274 if (c1 == 0)
00275 report_invalid_encoding(PG_BIG5,
00276 (const char *) big5, len);
00277 *p++ = c1;
00278 big5++;
00279 len--;
00280 continue;
00281 }
00282 l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len);
00283 if (l < 0)
00284 report_invalid_encoding(PG_BIG5,
00285 (const char *) big5, len);
00286 big5buf = (c1 << 8) | big5[1];
00287 cnsBuf = BIG5toCNS(big5buf, &lc);
00288 if (lc != 0)
00289 {
00290
00291 if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
00292 *p++ = LCPRV2_B;
00293 *p++ = lc;
00294 *p++ = (cnsBuf >> 8) & 0x00ff;
00295 *p++ = cnsBuf & 0x00ff;
00296 }
00297 else
00298 report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL,
00299 (const char *) big5, len);
00300 big5 += l;
00301 len -= l;
00302 }
00303 *p = '\0';
00304 }
00305
00306
00307
00308
00309 static void
00310 mic2big5(const unsigned char *mic, unsigned char *p, int len)
00311 {
00312 unsigned short c1;
00313 unsigned short big5buf,
00314 cnsBuf;
00315 int l;
00316
00317 while (len > 0)
00318 {
00319 c1 = *mic;
00320 if (!IS_HIGHBIT_SET(c1))
00321 {
00322
00323 if (c1 == 0)
00324 report_invalid_encoding(PG_MULE_INTERNAL,
00325 (const char *) mic, len);
00326 *p++ = c1;
00327 mic++;
00328 len--;
00329 continue;
00330 }
00331 l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
00332 if (l < 0)
00333 report_invalid_encoding(PG_MULE_INTERNAL,
00334 (const char *) mic, len);
00335 if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
00336 {
00337 if (c1 == LCPRV2_B)
00338 {
00339 c1 = mic[1];
00340 cnsBuf = (mic[2] << 8) | mic[3];
00341 }
00342 else
00343 {
00344 cnsBuf = (mic[1] << 8) | mic[2];
00345 }
00346 big5buf = CNStoBIG5(cnsBuf, c1);
00347 if (big5buf == 0)
00348 report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
00349 (const char *) mic, len);
00350 *p++ = (big5buf >> 8) & 0x00ff;
00351 *p++ = big5buf & 0x00ff;
00352 }
00353 else
00354 report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
00355 (const char *) mic, len);
00356 mic += l;
00357 len -= l;
00358 }
00359 *p = '\0';
00360 }