00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include <stdlib.h>
00031 #include <stdio.h>
00032 #include <vlc/vlc.h>
00033
00034 #if !defined WIN32
00035 # if HAVE_LANGINFO_CODESET
00036 # include <langinfo.h>
00037 # else
00038 # if HAVE_SETLOCALE
00039 # include <locale.h>
00040 # endif
00041 # endif
00042 #elif defined WIN32
00043 # include <windows.h>
00044 #endif
00045
00046 #ifdef SYS_DARWIN
00047 # include <errno.h>
00048 # include <string.h>
00049 #endif
00050
00051 #include "charset.h"
00052
00053 typedef struct VLCCharsetAlias
00054 {
00055 char *psz_alias, *psz_name;
00056 } VLCCharsetAlias;
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067 #if defined WIN32 || defined OS2 || !HAVE_LANGINFO_CODESET
00068 static const char* vlc_encoding_from_language( const char *l )
00069 {
00070
00071 if (strstr(l, "zh_TW")) return "Big5";
00072 if (strstr(l, "zh_HK")) return "Big5HKSCS";
00073 if (strstr(l, "zh")) return "GB2312";
00074 if (strstr(l, "th")) return "TIS-620";
00075 if (strstr(l, "ja")) return "EUC-JP";
00076 if (strstr(l, "ko")) return "EUC-KR";
00077 if (strstr(l, "ru")) return "KOI8-R";
00078 if (strstr(l, "uk")) return "KOI8-U";
00079 if (strstr(l, "pl") || strstr(l, "hr") ||
00080 strstr(l, "hu") || strstr(l, "cs") ||
00081 strstr(l, "sk") || strstr(l, "sl")) return "ISO-8859-2";
00082 if (strstr(l, "eo") || strstr(l, "mt")) return "ISO-8859-3";
00083 if (strstr(l, "lt") || strstr(l, "la")) return "ISO-8859-4";
00084 if (strstr(l, "bg") || strstr(l, "be") ||
00085 strstr(l, "mk") || strstr(l, "uk")) return "ISO-8859-5";
00086 if (strstr(l, "ar")) return "ISO-8859-6";
00087 if (strstr(l, "el")) return "ISO-8859-7";
00088 if (strstr(l, "he") || strstr(l, "iw")) return "ISO-8859-8";
00089 if (strstr(l, "tr")) return "ISO-8859-9";
00090 if (strstr(l, "th")) return "ISO-8859-11";
00091 if (strstr(l, "lv")) return "ISO-8859-13";
00092 if (strstr(l, "cy")) return "ISO-8859-14";
00093 if (strstr(l, "et")) return "ISO-8859-15";
00094 if (strstr(l, "ro")) return "ISO-8859-2";
00095 if (strstr(l, "am") || strstr(l, "vi")) return "UTF-8";
00096
00097 return "ISO-8859-1";
00098 }
00099 #endif
00100
00101 static const char* vlc_charset_aliases( const char *psz_name )
00102 {
00103 VLCCharsetAlias *a;
00104
00105 #if defined WIN32
00106 VLCCharsetAlias aliases[] =
00107 {
00108 { "CP936", "GBK" },
00109 { "CP1361", "JOHAB" },
00110 { "CP20127", "ASCII" },
00111 { "CP20866", "KOI8-R" },
00112 { "CP21866", "KOI8-RU" },
00113 { "CP28591", "ISO-8859-1" },
00114 { "CP28592", "ISO-8859-2" },
00115 { "CP28593", "ISO-8859-3" },
00116 { "CP28594", "ISO-8859-4" },
00117 { "CP28595", "ISO-8859-5" },
00118 { "CP28596", "ISO-8859-6" },
00119 { "CP28597", "ISO-8859-7" },
00120 { "CP28598", "ISO-8859-8" },
00121 { "CP28599", "ISO-8859-9" },
00122 { "CP28605", "ISO-8859-15" },
00123 { NULL, NULL }
00124 };
00125 #elif SYS_AIX
00126 VLCCharsetAlias aliases[] =
00127 {
00128 { "IBM-850", "CP850" },
00129 { "IBM-856", "CP856" },
00130 { "IBM-921", "ISO-8859-13" },
00131 { "IBM-922", "CP922" },
00132 { "IBM-932", "CP932" },
00133 { "IBM-943", "CP943" },
00134 { "IBM-1046", "CP1046" },
00135 { "IBM-1124", "CP1124" },
00136 { "IBM-1129", "CP1129" },
00137 { "IBM-1252", "CP1252" },
00138 { "IBM-EUCCN", "GB2312" },
00139 { "IBM-EUCJP", "EUC-JP" },
00140 { "IBM-EUCKR", "EUC-KR" },
00141 { "IBM-EUCTW", "EUC-TW" },
00142 { NULL, NULL }
00143 };
00144 #elif SYS_HPUX
00145 VLCCharsetAlias aliases[] =
00146 {
00147 { "ROMAN8", "HP-ROMAN8" },
00148 { "ARABIC8", "HP-ARABIC8" },
00149 { "GREEK8", "HP-GREEK8" },
00150 { "HEBREW8", "HP-HEBREW8" },
00151 { "TURKISH8", "HP-TURKISH8" },
00152 { "KANA8", "HP-KANA8" },
00153 { "HP15CN", "GB2312" },
00154 { NULL, NULL }
00155 };
00156 #elif SYS_IRIX
00157 VLCCharsetAlias aliases[] =
00158 {
00159 { "EUCCN", "GB2312" },
00160 { NULL, NULL }
00161 };
00162 #elif SYS_OSF
00163 VLCCharsetAlias aliases[] =
00164 {
00165 { "KSC5601", "CP949" },
00166 { "SDECKANJI", "EUC-JP" },
00167 { "TACTIS", "TIS-620" },
00168 { NULL, NULL }
00169 };
00170 #elif SYS_SOLARIS
00171 VLCCharsetAlias aliases[] =
00172 {
00173 { "646", "ASCII" },
00174 { "CNS11643", "EUC-TW" },
00175 { "5601", "EUC-KR" },
00176 { "JOHAP92", "JOHAB" },
00177 { "PCK", "SHIFT_JIS" },
00178 { "2533", "TIS-620" },
00179 { NULL, NULL }
00180 };
00181 #elif SYS_BSD
00182 VLCCharsetAlias aliases[] =
00183 {
00184 { "646", " ASCII" },
00185 { "EUCCN", "GB2312" },
00186 { NULL, NULL }
00187 };
00188 #else
00189 VLCCharsetAlias aliases[] = {{NULL, NULL}};
00190 #endif
00191
00192 if( aliases )
00193 {
00194 for (a = aliases; a->psz_alias; a++)
00195 if (strcasecmp (a->psz_alias, psz_name) == 0)
00196 return a->psz_name;
00197 }
00198
00199
00200
00201 return psz_name;
00202 }
00203
00204
00205 #if defined WIN32 || defined OS2 || !HAVE_LANGINFO_CODESET
00206 static void vlc_encoding_from_locale( char *psz_locale, char *psz_charset )
00207 {
00208 char *psz_dot = strchr( psz_locale, '.' );
00209
00210 if( psz_dot != NULL )
00211 {
00212 const char *psz_modifier;
00213
00214 psz_dot++;
00215
00216
00217 psz_modifier = strchr( psz_dot, '@' );
00218
00219 if( psz_modifier == NULL )
00220 {
00221 strcpy( psz_charset, psz_dot );
00222 return;
00223 }
00224 if( 0 < ( psz_modifier - psz_dot )
00225 && ( psz_modifier - psz_dot ) < 2 + 10 + 1 )
00226 {
00227 memcpy( psz_charset, psz_dot, psz_modifier - psz_dot );
00228 psz_charset[ psz_modifier - psz_dot ] = '\0';
00229 return;
00230 }
00231 }
00232
00233 strcpy( psz_charset, vlc_encoding_from_language( psz_locale ) );
00234 }
00235 #endif
00236
00237 vlc_bool_t vlc_current_charset( char **psz_charset )
00238 {
00239 const char *psz_codeset;
00240
00241 #if !(defined WIN32 || defined OS2 || defined SYS_DARWIN)
00242
00243 # if HAVE_LANGINFO_CODESET
00244
00245 psz_codeset = nl_langinfo( CODESET );
00246 if( !strcmp( psz_codeset, "ANSI_X3.4-1968" ) )
00247 psz_codeset = "ASCII";
00248 # else
00249
00250 const char *psz_locale = NULL;
00251 char buf[2 + 10 + 1];
00252
00253
00254
00255
00256
00257 # if HAVE_SETLOCALE && !SYS_DARWIN
00258 psz_locale = setlocale( LC_ALL, NULL );
00259 # endif
00260 if( psz_locale == NULL || psz_locale[0] == '\0' )
00261 {
00262 psz_locale = getenv( "LC_ALL" );
00263 if( psz_locale == NULL || psz_locale[0] == '\0' )
00264 {
00265 psz_locale = getenv( "LC_CTYPE" );
00266 if( psz_locale == NULL || psz_locale[0] == '\0')
00267 psz_locale = getenv( "LANG" );
00268 }
00269 }
00270
00271
00272
00273 vlc_encoding_from_locale( (char *)psz_locale, buf );
00274 psz_codeset = buf;
00275 # endif
00276
00277 #elif defined SYS_DARWIN
00278
00279
00280 psz_codeset = "UTF-8";
00281
00282 #elif defined WIN32
00283
00284 char buf[2 + 10 + 1];
00285
00286
00287 sprintf( buf, "CP%u", GetACP() );
00288 psz_codeset = buf;
00289
00290 #elif defined OS2
00291
00292 const char *psz_locale;
00293 char buf[2 + 10 + 1];
00294 ULONG cp[3];
00295 ULONG cplen;
00296
00297
00298
00299 psz_locale = getenv( "LC_ALL" );
00300 if( psz_locale == NULL || psz_locale[0] == '\0' )
00301 {
00302 psz+locale = getenv( "LC_CTYPE" );
00303 if( psz_locale == NULL || locale[0] == '\0' )
00304 locale = getenv( "LANG" );
00305 }
00306 if( psz_locale != NULL && psz_locale[0] != '\0' )
00307 vlc_encoding_from_locale( psz_locale, buf );
00308 psz_codeset = buf;
00309 else
00310 {
00311
00312 if( DosQueryCp( sizeof( cp ), cp, &cplen ) )
00313 psz_codeset = "";
00314 else
00315 {
00316 sprintf( buf, "CP%u", cp[0] );
00317 psz_codeset = buf;
00318 }
00319 }
00320 #endif
00321 if( psz_codeset == NULL )
00322
00323 psz_codeset = "";
00324 else
00325 psz_codeset = vlc_charset_aliases( psz_codeset );
00326
00327
00328
00329
00330 if( psz_codeset[0] == '\0' )
00331 {
00332
00333 if( !( psz_codeset = getenv( "CHARSET" ) ) )
00334 psz_codeset = "ISO-8859-1";
00335 }
00336
00337 if( psz_charset )
00338 *psz_charset = strdup(psz_codeset);
00339
00340 if( !strcasecmp(psz_codeset, "UTF8") || !strcasecmp(psz_codeset, "UTF-8") )
00341 return VLC_TRUE;
00342
00343 return VLC_FALSE;
00344 }
00345
00346 char *__vlc_fix_readdir_charset( vlc_object_t *p_this, const char *psz_string )
00347 {
00348 #ifdef SYS_DARWIN
00349 if ( p_this->p_libvlc->iconv_macosx != (vlc_iconv_t)-1 )
00350 {
00351 const char *psz_in = psz_string;
00352 size_t i_in = strlen(psz_in);
00353 size_t i_out = i_in * 2;
00354 char *psz_utf8 = malloc(i_out + 1);
00355 char *psz_out = psz_utf8;
00356
00357 vlc_mutex_lock( &p_this->p_libvlc->iconv_lock );
00358 size_t i_ret = vlc_iconv( p_this->p_libvlc->iconv_macosx,
00359 &psz_in, &i_in, &psz_out, &i_out );
00360 vlc_mutex_unlock( &p_this->p_libvlc->iconv_lock );
00361 if( i_ret == (size_t)-1 || i_in )
00362 {
00363 msg_Warn( p_this,
00364 "failed to convert \"%s\" from HFS+ charset (%s)",
00365 psz_string, strerror(errno) );
00366 free( psz_utf8 );
00367 return strdup( psz_string );
00368 }
00369
00370 *psz_out = '\0';
00371 return psz_utf8;
00372 }
00373 #endif
00374
00375 return strdup( psz_string );
00376 }