Header And Logo

PostgreSQL
| The world's most advanced open source database.

Functions

conv.c File Reference

#include "postgres.h"
#include "mb/pg_wchar.h"
Include dependency graph for conv.c:

Go to the source code of this file.

Functions

void latin2mic (const unsigned char *l, unsigned char *p, int len, int lc, int encoding)
void mic2latin (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding)
void pg_ascii2mic (const unsigned char *l, unsigned char *p, int len)
void pg_mic2ascii (const unsigned char *mic, unsigned char *p, int len)
void latin2mic_with_table (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab)
void mic2latin_with_table (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab)
static int compare1 (const void *p1, const void *p2)
static int compare2 (const void *p1, const void *p2)
static int compare3 (const void *p1, const void *p2)
static int compare4 (const void *p1, const void *p2)
static unsigned char * set_iso_code (unsigned char *iso, uint32 code)
void UtfToLocal (const unsigned char *utf, unsigned char *iso, const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap, int size1, int size2, int encoding, int len)
void LocalToUtf (const unsigned char *iso, unsigned char *utf, const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap, int size1, int size2, int encoding, int len)

Function Documentation

static int compare1 ( const void *  p1,
const void *  p2 
) [static]

Definition at line 245 of file conv.c.

Referenced by UtfToLocal().

{
    uint32      v1,
                v2;

    v1 = *(const uint32 *) p1;
    v2 = ((const pg_utf_to_local *) p2)->utf;
    return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}

static int compare2 ( const void *  p1,
const void *  p2 
) [static]

Definition at line 260 of file conv.c.

Referenced by LocalToUtf().

{
    uint32      v1,
                v2;

    v1 = *(const uint32 *) p1;
    v2 = ((const pg_local_to_utf *) p2)->code;
    return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}

static int compare3 ( const void *  p1,
const void *  p2 
) [static]

Definition at line 275 of file conv.c.

References s1, and s2.

Referenced by UtfToLocal().

{
    uint32      s1,
                s2,
                d1,
                d2;

    s1 = *(const uint32 *) p1;
    s2 = *((const uint32 *) p1 + 1);
    d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
    d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
    return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
}

static int compare4 ( const void *  p1,
const void *  p2 
) [static]

Definition at line 294 of file conv.c.

Referenced by LocalToUtf().

{
    uint32      v1,
                v2;

    v1 = *(const uint32 *) p1;
    v2 = ((const pg_local_to_utf_combined *) p2)->code;
    return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
}

void latin2mic ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding 
)

Definition at line 26 of file conv.c.

References IS_HIGHBIT_SET, and report_invalid_encoding().

Referenced by koi8r2mic(), latin12mic(), latin22mic(), latin32mic(), and latin42mic().

{
    int         c1;

    while (len > 0)
    {
        c1 = *l;
        if (c1 == 0)
            report_invalid_encoding(encoding, (const char *) l, len);
        if (IS_HIGHBIT_SET(c1))
            *p++ = lc;
        *p++ = c1;
        l++;
        len--;
    }
    *p = '\0';
}

void latin2mic_with_table ( const unsigned char *  l,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab 
)

Definition at line 148 of file conv.c.

References IS_HIGHBIT_SET, PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by iso2mic(), win12502mic(), win12512mic(), and win8662mic().

{
    unsigned char c1,
                c2;

    while (len > 0)
    {
        c1 = *l;
        if (c1 == 0)
            report_invalid_encoding(encoding, (const char *) l, len);
        if (!IS_HIGHBIT_SET(c1))
            *p++ = c1;
        else
        {
            c2 = tab[c1 - HIGHBIT];
            if (c2)
            {
                *p++ = lc;
                *p++ = c2;
            }
            else
                report_untranslatable_char(encoding, PG_MULE_INTERNAL,
                                           (const char *) l, len);
        }
        l++;
        len--;
    }
    *p = '\0';
}

void LocalToUtf ( const unsigned char *  iso,
unsigned char *  utf,
const pg_local_to_utf map,
const pg_local_to_utf_combined cmap,
int  size1,
int  size2,
int  encoding,
int  len 
)

Definition at line 497 of file conv.c.

References compare2(), compare4(), ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, NULL, pg_encoding_verifymb(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), pg_local_to_utf::utf, pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.

Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), uhc_to_utf8(), and win_to_utf8().

{
    unsigned int iiso;
    int         l;
    pg_local_to_utf *p;
    pg_local_to_utf_combined *cp;

    if (!PG_VALID_ENCODING(encoding))
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                 errmsg("invalid encoding number: %d", encoding)));

    for (; len > 0; len -= l)
    {
        /* "break" cases all represent errors */
        if (*iso == '\0')
            break;

        if (!IS_HIGHBIT_SET(*iso))
        {
            /* ASCII case is easy */
            *utf++ = *iso++;
            l = 1;
            continue;
        }

        l = pg_encoding_verifymb(encoding, (const char *) iso, len);
        if (l < 0)
            break;

        if (l == 1)
            iiso = *iso++;
        else if (l == 2)
        {
            iiso = *iso++ << 8;
            iiso |= *iso++;
        }
        else if (l == 3)
        {
            iiso = *iso++ << 16;
            iiso |= *iso++ << 8;
            iiso |= *iso++;
        }
        else if (l == 4)
        {
            iiso = *iso++ << 24;
            iiso |= *iso++ << 16;
            iiso |= *iso++ << 8;
            iiso |= *iso++;
        }

        p = bsearch(&iiso, map, size1,
                    sizeof(pg_local_to_utf), compare2);

        if (p == NULL)
        {
            /*
             * not found in the ordinary map. if there's a combined character
             * map, try with it
             */
            if (cmap)
            {
                cp = bsearch(&iiso, cmap, size2,
                             sizeof(pg_local_to_utf_combined), compare4);

                if (cp)
                {
                    if (cp->utf1 & 0xff000000)
                        *utf++ = cp->utf1 >> 24;
                    if (cp->utf1 & 0x00ff0000)
                        *utf++ = (cp->utf1 & 0x00ff0000) >> 16;
                    if (cp->utf1 & 0x0000ff00)
                        *utf++ = (cp->utf1 & 0x0000ff00) >> 8;
                    if (cp->utf1 & 0x000000ff)
                        *utf++ = cp->utf1 & 0x000000ff;

                    if (cp->utf2 & 0xff000000)
                        *utf++ = cp->utf2 >> 24;
                    if (cp->utf2 & 0x00ff0000)
                        *utf++ = (cp->utf2 & 0x00ff0000) >> 16;
                    if (cp->utf2 & 0x0000ff00)
                        *utf++ = (cp->utf2 & 0x0000ff00) >> 8;
                    if (cp->utf2 & 0x000000ff)
                        *utf++ = cp->utf2 & 0x000000ff;

                    continue;
                }
            }

            report_untranslatable_char(encoding, PG_UTF8,
                                       (const char *) (iso - l), len);

        }
        else
        {
            if (p->utf & 0xff000000)
                *utf++ = p->utf >> 24;
            if (p->utf & 0x00ff0000)
                *utf++ = (p->utf & 0x00ff0000) >> 16;
            if (p->utf & 0x0000ff00)
                *utf++ = (p->utf & 0x0000ff00) >> 8;
            if (p->utf & 0x000000ff)
                *utf++ = p->utf & 0x000000ff;
        }
    }

    if (len > 0)
        report_invalid_encoding(encoding, (const char *) iso, len);

    *utf = '\0';
}

void mic2latin ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding 
)

Definition at line 54 of file conv.c.

References IS_HIGHBIT_SET, pg_mic_mblen(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic2koi8r(), mic2latin1(), mic2latin2(), mic2latin3(), and mic2latin4().

{
    int         c1;

    while (len > 0)
    {
        c1 = *mic;
        if (c1 == 0)
            report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
        if (!IS_HIGHBIT_SET(c1))
        {
            /* easy for ASCII */
            *p++ = c1;
            mic++;
            len--;
        }
        else
        {
            int         l = pg_mic_mblen(mic);

            if (len < l)
                report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
                                        len);
            if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
                report_untranslatable_char(PG_MULE_INTERNAL, encoding,
                                           (const char *) mic, len);
            *p++ = mic[1];
            mic += 2;
            len -= 2;
        }
    }
    *p = '\0';
}

void mic2latin_with_table ( const unsigned char *  mic,
unsigned char *  p,
int  len,
int  lc,
int  encoding,
const unsigned char *  tab 
)

Definition at line 196 of file conv.c.

References HIGHBIT, IS_HIGHBIT_SET, pg_mic_mblen(), PG_MULE_INTERNAL, report_invalid_encoding(), and report_untranslatable_char().

Referenced by mic2iso(), mic2win1250(), mic2win1251(), and mic2win866().

{
    unsigned char c1,
                c2;

    while (len > 0)
    {
        c1 = *mic;
        if (c1 == 0)
            report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
        if (!IS_HIGHBIT_SET(c1))
        {
            /* easy for ASCII */
            *p++ = c1;
            mic++;
            len--;
        }
        else
        {
            int         l = pg_mic_mblen(mic);

            if (len < l)
                report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
                                        len);
            if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
                (c2 = tab[mic[1] - HIGHBIT]) == 0)
            {
                report_untranslatable_char(PG_MULE_INTERNAL, encoding,
                                           (const char *) mic, len);
                break;          /* keep compiler quiet */
            }
            *p++ = c2;
            mic += 2;
            len -= 2;
        }
    }
    *p = '\0';
}

void pg_ascii2mic ( const unsigned char *  l,
unsigned char *  p,
int  len 
)

Definition at line 98 of file conv.c.

References IS_HIGHBIT_SET, PG_SQL_ASCII, and report_invalid_encoding().

Referenced by ascii_to_mic(), and ascii_to_utf8().

{
    int         c1;

    while (len > 0)
    {
        c1 = *l;
        if (c1 == 0 || IS_HIGHBIT_SET(c1))
            report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
        *p++ = c1;
        l++;
        len--;
    }
    *p = '\0';
}

void pg_mic2ascii ( const unsigned char *  mic,
unsigned char *  p,
int  len 
)

Definition at line 118 of file conv.c.

References IS_HIGHBIT_SET, PG_MULE_INTERNAL, PG_SQL_ASCII, and report_untranslatable_char().

Referenced by mic_to_ascii(), and utf8_to_ascii().

{
    int         c1;

    while (len > 0)
    {
        c1 = *mic;
        if (c1 == 0 || IS_HIGHBIT_SET(c1))
            report_untranslatable_char(PG_MULE_INTERNAL, PG_SQL_ASCII,
                                       (const char *) mic, len);
        *p++ = c1;
        mic++;
        len--;
    }
    *p = '\0';
}

static unsigned char* set_iso_code ( unsigned char *  iso,
uint32  code 
) [static]

Definition at line 308 of file conv.c.

Referenced by UtfToLocal().

{
    if (code & 0xff000000)
        *iso++ = code >> 24;
    if (code & 0x00ff0000)
        *iso++ = (code & 0x00ff0000) >> 16;
    if (code & 0x0000ff00)
        *iso++ = (code & 0x0000ff00) >> 8;
    if (code & 0x000000ff)
        *iso++ = code & 0x000000ff;
    return iso;
}

void UtfToLocal ( const unsigned char *  utf,
unsigned char *  iso,
const pg_utf_to_local map,
const pg_utf_to_local_combined cmap,
int  size1,
int  size2,
int  encoding,
int  len 
)

Definition at line 336 of file conv.c.

References pg_utf_to_local_combined::code, pg_utf_to_local::code, compare1(), compare3(), NULL, PG_UTF8, pg_utf8_islegal(), pg_utf_mblen(), report_invalid_encoding(), report_untranslatable_char(), and set_iso_code().

Referenced by utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().

{
    uint32      iutf;
    uint32      cutf[2];
    uint32      code;
    pg_utf_to_local *p;
    pg_utf_to_local_combined *cp;
    int         l;

    for (; len > 0; len -= l)
    {
        /* "break" cases all represent errors */
        if (*utf == '\0')
            break;

        l = pg_utf_mblen(utf);

        if (len < l)
            break;

        if (!pg_utf8_islegal(utf, l))
            break;

        if (l == 1)
        {
            /* ASCII case is easy */
            *iso++ = *utf++;
            continue;
        }
        else if (l == 2)
        {
            iutf = *utf++ << 8;
            iutf |= *utf++;
        }
        else if (l == 3)
        {
            iutf = *utf++ << 16;
            iutf |= *utf++ << 8;
            iutf |= *utf++;
        }
        else if (l == 4)
        {
            iutf = *utf++ << 24;
            iutf |= *utf++ << 16;
            iutf |= *utf++ << 8;
            iutf |= *utf++;
        }

        /*
         * first, try with combined map if possible
         */
        if (cmap && len > l)
        {
            const unsigned char *utf_save = utf;
            int         len_save = len;
            int         l_save = l;

            len -= l;

            l = pg_utf_mblen(utf);
            if (len < l)
                break;

            if (!pg_utf8_islegal(utf, l))
                break;

            cutf[0] = iutf;

            if (l == 1)
            {
                if (len_save > 1)
                {
                    p = bsearch(&cutf[0], map, size1,
                                sizeof(pg_utf_to_local), compare1);
                    if (p == NULL)
                        report_untranslatable_char(PG_UTF8, encoding,
                               (const char *) (utf_save - l_save), len_save);
                    iso = set_iso_code(iso, p->code);
                }

                /* ASCII case is easy */
                *iso++ = *utf++;
                continue;
            }
            else if (l == 2)
            {
                iutf = *utf++ << 8;
                iutf |= *utf++;
            }
            else if (l == 3)
            {
                iutf = *utf++ << 16;
                iutf |= *utf++ << 8;
                iutf |= *utf++;
            }
            else if (l == 4)
            {
                iutf = *utf++ << 24;
                iutf |= *utf++ << 16;
                iutf |= *utf++ << 8;
                iutf |= *utf++;
            }

            cutf[1] = iutf;
            cp = bsearch(cutf, cmap, size2,
                         sizeof(pg_utf_to_local_combined), compare3);
            if (cp)
                code = cp->code;
            else
            {
                /* not found in combined map. try with ordinary map */
                p = bsearch(&cutf[0], map, size1,
                            sizeof(pg_utf_to_local), compare1);
                if (p == NULL)
                    report_untranslatable_char(PG_UTF8, encoding,
                               (const char *) (utf_save - l_save), len_save);
                iso = set_iso_code(iso, p->code);

                p = bsearch(&cutf[1], map, size1,
                            sizeof(pg_utf_to_local), compare1);
                if (p == NULL)
                    report_untranslatable_char(PG_UTF8, encoding,
                                               (const char *) (utf - l), len);
                code = p->code;
            }
        }
        else    /* no cmap or no remaining data */
        {
            p = bsearch(&iutf, map, size1,
                        sizeof(pg_utf_to_local), compare1);
            if (p == NULL)
                report_untranslatable_char(PG_UTF8, encoding,
                                           (const char *) (utf - l), len);
            code = p->code;
        }
        iso = set_iso_code(iso, code);
    }

    if (len > 0)
        report_invalid_encoding(PG_UTF8, (const char *) utf, len);

    *iso = '\0';
}