Header And Logo

PostgreSQL
| The world's most advanced open source database.

utf8_and_iso8859.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  *    ISO 8859 2-16 <--> UTF8
00004  *
00005  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00006  * Portions Copyright (c) 1994, Regents of the University of California
00007  *
00008  * IDENTIFICATION
00009  *    src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
00010  *
00011  *-------------------------------------------------------------------------
00012  */
00013 
00014 #include "postgres.h"
00015 #include "fmgr.h"
00016 #include "mb/pg_wchar.h"
00017 #include "../../Unicode/iso8859_10_to_utf8.map"
00018 #include "../../Unicode/iso8859_13_to_utf8.map"
00019 #include "../../Unicode/iso8859_14_to_utf8.map"
00020 #include "../../Unicode/iso8859_15_to_utf8.map"
00021 #include "../../Unicode/iso8859_2_to_utf8.map"
00022 #include "../../Unicode/iso8859_3_to_utf8.map"
00023 #include "../../Unicode/iso8859_4_to_utf8.map"
00024 #include "../../Unicode/iso8859_5_to_utf8.map"
00025 #include "../../Unicode/iso8859_6_to_utf8.map"
00026 #include "../../Unicode/iso8859_7_to_utf8.map"
00027 #include "../../Unicode/iso8859_8_to_utf8.map"
00028 #include "../../Unicode/iso8859_9_to_utf8.map"
00029 #include "../../Unicode/utf8_to_iso8859_10.map"
00030 #include "../../Unicode/utf8_to_iso8859_13.map"
00031 #include "../../Unicode/utf8_to_iso8859_14.map"
00032 #include "../../Unicode/utf8_to_iso8859_15.map"
00033 #include "../../Unicode/utf8_to_iso8859_16.map"
00034 #include "../../Unicode/utf8_to_iso8859_2.map"
00035 #include "../../Unicode/utf8_to_iso8859_3.map"
00036 #include "../../Unicode/utf8_to_iso8859_4.map"
00037 #include "../../Unicode/utf8_to_iso8859_5.map"
00038 #include "../../Unicode/utf8_to_iso8859_6.map"
00039 #include "../../Unicode/utf8_to_iso8859_7.map"
00040 #include "../../Unicode/utf8_to_iso8859_8.map"
00041 #include "../../Unicode/utf8_to_iso8859_9.map"
00042 #include "../../Unicode/iso8859_16_to_utf8.map"
00043 
00044 PG_MODULE_MAGIC;
00045 
00046 PG_FUNCTION_INFO_V1(iso8859_to_utf8);
00047 PG_FUNCTION_INFO_V1(utf8_to_iso8859);
00048 
00049 extern Datum iso8859_to_utf8(PG_FUNCTION_ARGS);
00050 extern Datum utf8_to_iso8859(PG_FUNCTION_ARGS);
00051 
00052 /* ----------
00053  * conv_proc(
00054  *      INTEGER,    -- source encoding id
00055  *      INTEGER,    -- destination encoding id
00056  *      CSTRING,    -- source string (null terminated C string)
00057  *      CSTRING,    -- destination string (null terminated C string)
00058  *      INTEGER     -- source string length
00059  * ) returns VOID;
00060  * ----------
00061  */
00062 
00063 typedef struct
00064 {
00065     pg_enc      encoding;
00066     pg_local_to_utf *map1;      /* to UTF8 map name */
00067     pg_utf_to_local *map2;      /* from UTF8 map name */
00068     int         size1;          /* size of map1 */
00069     int         size2;          /* size of map2 */
00070 } pg_conv_map;
00071 
00072 static pg_conv_map maps[] = {
00073     {PG_LATIN2, LUmapISO8859_2, ULmapISO8859_2,
00074         sizeof(LUmapISO8859_2) / sizeof(pg_local_to_utf),
00075     sizeof(ULmapISO8859_2) / sizeof(pg_utf_to_local)},  /* ISO-8859-2 Latin 2 */
00076     {PG_LATIN3, LUmapISO8859_3, ULmapISO8859_3,
00077         sizeof(LUmapISO8859_3) / sizeof(pg_local_to_utf),
00078     sizeof(ULmapISO8859_3) / sizeof(pg_utf_to_local)},  /* ISO-8859-3 Latin 3 */
00079     {PG_LATIN4, LUmapISO8859_4, ULmapISO8859_4,
00080         sizeof(LUmapISO8859_4) / sizeof(pg_local_to_utf),
00081     sizeof(ULmapISO8859_4) / sizeof(pg_utf_to_local)},  /* ISO-8859-4 Latin 4 */
00082     {PG_LATIN5, LUmapISO8859_9, ULmapISO8859_9,
00083         sizeof(LUmapISO8859_9) / sizeof(pg_local_to_utf),
00084     sizeof(ULmapISO8859_9) / sizeof(pg_utf_to_local)},  /* ISO-8859-9 Latin 5 */
00085     {PG_LATIN6, LUmapISO8859_10, ULmapISO8859_10,
00086         sizeof(LUmapISO8859_10) / sizeof(pg_local_to_utf),
00087     sizeof(ULmapISO8859_10) / sizeof(pg_utf_to_local)}, /* ISO-8859-10 Latin 6 */
00088     {PG_LATIN7, LUmapISO8859_13, ULmapISO8859_13,
00089         sizeof(LUmapISO8859_13) / sizeof(pg_local_to_utf),
00090     sizeof(ULmapISO8859_13) / sizeof(pg_utf_to_local)}, /* ISO-8859-13 Latin 7 */
00091     {PG_LATIN8, LUmapISO8859_14, ULmapISO8859_14,
00092         sizeof(LUmapISO8859_14) / sizeof(pg_local_to_utf),
00093     sizeof(ULmapISO8859_14) / sizeof(pg_utf_to_local)}, /* ISO-8859-14 Latin 8 */
00094     {PG_LATIN9, LUmapISO8859_15, ULmapISO8859_15,
00095         sizeof(LUmapISO8859_15) / sizeof(pg_local_to_utf),
00096     sizeof(ULmapISO8859_15) / sizeof(pg_utf_to_local)}, /* ISO-8859-15 Latin 9 */
00097     {PG_LATIN10, LUmapISO8859_16, ULmapISO8859_16,
00098         sizeof(LUmapISO8859_16) / sizeof(pg_local_to_utf),
00099     sizeof(ULmapISO8859_16) / sizeof(pg_utf_to_local)}, /* ISO-8859-16 Latin 10 */
00100     {PG_ISO_8859_5, LUmapISO8859_5, ULmapISO8859_5,
00101         sizeof(LUmapISO8859_5) / sizeof(pg_local_to_utf),
00102     sizeof(ULmapISO8859_5) / sizeof(pg_utf_to_local)},  /* ISO-8859-5 */
00103     {PG_ISO_8859_6, LUmapISO8859_6, ULmapISO8859_6,
00104         sizeof(LUmapISO8859_6) / sizeof(pg_local_to_utf),
00105     sizeof(ULmapISO8859_6) / sizeof(pg_utf_to_local)},  /* ISO-8859-6 */
00106     {PG_ISO_8859_7, LUmapISO8859_7, ULmapISO8859_7,
00107         sizeof(LUmapISO8859_7) / sizeof(pg_local_to_utf),
00108     sizeof(ULmapISO8859_7) / sizeof(pg_utf_to_local)},  /* ISO-8859-7 */
00109     {PG_ISO_8859_8, LUmapISO8859_8, ULmapISO8859_8,
00110         sizeof(LUmapISO8859_8) / sizeof(pg_local_to_utf),
00111     sizeof(ULmapISO8859_8) / sizeof(pg_utf_to_local)},  /* ISO-8859-8 */
00112 };
00113 
00114 Datum
00115 iso8859_to_utf8(PG_FUNCTION_ARGS)
00116 {
00117     int         encoding = PG_GETARG_INT32(0);
00118     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00119     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00120     int         len = PG_GETARG_INT32(4);
00121     int         i;
00122 
00123     CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
00124 
00125     for (i = 0; i < sizeof(maps) / sizeof(pg_conv_map); i++)
00126     {
00127         if (encoding == maps[i].encoding)
00128         {
00129             LocalToUtf(src, dest, maps[i].map1, NULL, maps[i].size1, 0, encoding, len);
00130             PG_RETURN_VOID();
00131         }
00132     }
00133 
00134     ereport(ERROR,
00135             (errcode(ERRCODE_INTERNAL_ERROR),
00136              errmsg("unexpected encoding ID %d for ISO 8859 character sets", encoding)));
00137 
00138     PG_RETURN_VOID();
00139 }
00140 
00141 Datum
00142 utf8_to_iso8859(PG_FUNCTION_ARGS)
00143 {
00144     int         encoding = PG_GETARG_INT32(1);
00145     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
00146     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
00147     int         len = PG_GETARG_INT32(4);
00148     int         i;
00149 
00150     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
00151 
00152     for (i = 0; i < sizeof(maps) / sizeof(pg_conv_map); i++)
00153     {
00154         if (encoding == maps[i].encoding)
00155         {
00156             UtfToLocal(src, dest, maps[i].map2, NULL, maps[i].size2, 0, encoding, len);
00157             PG_RETURN_VOID();
00158         }
00159     }
00160 
00161     ereport(ERROR,
00162             (errcode(ERRCODE_INTERNAL_ERROR),
00163              errmsg("unexpected encoding ID %d for ISO 8859 character sets", encoding)));
00164 
00165     PG_RETURN_VOID();
00166 }