00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include <vlc/vlc.h>
00029 #include <vlc/vout.h>
00030 #include <vlc/decoder.h>
00031
00032 #include "vlc_osd.h"
00033 #include "vlc_filter.h"
00034
00035 #include "charset.h"
00036
00037
00038
00039
00040 struct decoder_sys_t
00041 {
00042 int i_align;
00043 vlc_iconv_t iconv_handle;
00044 };
00045
00046
00047
00048
00049 static int OpenDecoder ( vlc_object_t * );
00050 static void CloseDecoder ( vlc_object_t * );
00051
00052 static subpicture_t *DecodeBlock ( decoder_t *, block_t ** );
00053 static subpicture_t *ParseText ( decoder_t *, block_t * );
00054 static void StripTags ( char * );
00055
00056 #define DEFAULT_NAME "System Default"
00057
00058
00059
00060
00061 static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
00062 "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
00063 "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
00064 "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
00065 "ISO-8859-6", "CP1256", "MacArabic", "",
00066 "ISO-8859-7", "CP1253", "MacGreek", "",
00067 "ISO-8859-8", "CP1255", "MacHebrew", "",
00068 "ISO-8859-9", "CP1254", "MacTurkish", "",
00069 "ISO-8859-13", "CP1257", "",
00070 "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
00071 "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
00072 "ISO-2022-KR", "EUC-KR", "",
00073 "MacThai", "KOI8-T", "",
00074 "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
00075 "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
00076 "Macintosh", "",
00077 "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
00078 "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
00079 "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
00080 "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
00081 "HPROMAN8", "NEXTSTEP" };
00082
00083 static int pi_justification[] = { 0, 1, 2 };
00084 static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
00085
00086 #define ENCODING_TEXT N_("Subtitles text encoding")
00087 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
00088 #define ALIGN_TEXT N_("Subtitles justification")
00089 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
00090
00091 vlc_module_begin();
00092 set_shortname( _("Subtitles"));
00093 set_description( _("Text subtitles decoder") );
00094 set_capability( "decoder", 50 );
00095 set_callbacks( OpenDecoder, CloseDecoder );
00096 set_category( CAT_INPUT );
00097 set_subcategory( SUBCAT_INPUT_SCODEC );
00098
00099 add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
00100 VLC_FALSE );
00101 change_integer_list( pi_justification, ppsz_justification_text, 0 );
00102 add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
00103 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
00104 change_string_list( ppsz_encodings, 0, 0 );
00105 vlc_module_end();
00106
00107
00108
00109
00110
00111
00112
00113 static int OpenDecoder( vlc_object_t *p_this )
00114 {
00115 decoder_t *p_dec = (decoder_t*)p_this;
00116 decoder_sys_t *p_sys;
00117 vlc_value_t val;
00118
00119 if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
00120 p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
00121 {
00122 return VLC_EGENERIC;
00123 }
00124
00125 p_dec->pf_decode_sub = DecodeBlock;
00126
00127
00128 if( ( p_dec->p_sys = p_sys =
00129 (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
00130 {
00131 msg_Err( p_dec, "out of memory" );
00132 return VLC_EGENERIC;
00133 }
00134
00135 var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
00136 var_Get( p_dec, "subsdec-align", &val );
00137 p_sys->i_align = val.i_int;
00138
00139 if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
00140 {
00141 msg_Dbg( p_dec, "using character encoding: %s",
00142 p_dec->fmt_in.subs.psz_encoding );
00143 p_sys->iconv_handle =
00144 vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
00145 }
00146 else
00147 {
00148 var_Create( p_dec, "subsdec-encoding",
00149 VLC_VAR_STRING | VLC_VAR_DOINHERIT );
00150 var_Get( p_dec, "subsdec-encoding", &val );
00151 if( !strcmp( val.psz_string, DEFAULT_NAME ) )
00152 {
00153 char *psz_charset =(char*)malloc( 100 );
00154 vlc_current_charset( &psz_charset );
00155 p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );
00156 msg_Dbg( p_dec, "using character encoding: %s", psz_charset );
00157 free( psz_charset );
00158 }
00159 else if( val.psz_string )
00160 {
00161 msg_Dbg( p_dec, "using character encoding: %s", val.psz_string );
00162 p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string );
00163 }
00164
00165 if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
00166 {
00167 msg_Warn( p_dec, "unable to do requested conversion" );
00168 }
00169
00170 if( val.psz_string ) free( val.psz_string );
00171 }
00172
00173 return VLC_SUCCESS;
00174 }
00175
00176
00177
00178
00179
00180
00181 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
00182 {
00183 subpicture_t *p_spu;
00184
00185 if( !pp_block || *pp_block == NULL ) return NULL;
00186
00187 p_spu = ParseText( p_dec, *pp_block );
00188
00189 block_Release( *pp_block );
00190 *pp_block = NULL;
00191
00192 return p_spu;
00193 }
00194
00195
00196
00197
00198 static void CloseDecoder( vlc_object_t *p_this )
00199 {
00200 decoder_t *p_dec = (decoder_t *)p_this;
00201 decoder_sys_t *p_sys = p_dec->p_sys;
00202
00203 if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
00204 {
00205 vlc_iconv_close( p_sys->iconv_handle );
00206 }
00207
00208 free( p_sys );
00209 }
00210
00211
00212
00213
00214 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
00215 {
00216 decoder_sys_t *p_sys = p_dec->p_sys;
00217 subpicture_t *p_spu = 0;
00218 char *psz_subtitle;
00219 int i_align_h, i_align_v;
00220 video_format_t fmt;
00221
00222
00223 if( p_block->i_pts == 0 )
00224 {
00225 msg_Warn( p_dec, "subtitle without a date" );
00226 return NULL;
00227 }
00228
00229
00230 if( p_block->i_buffer <= 1 || p_block->p_buffer[0] == '\0' )
00231 {
00232 msg_Warn( p_dec, "empty subtitle" );
00233 return NULL;
00234 }
00235
00236
00237 psz_subtitle = strndup( (const char *)p_block->p_buffer,
00238 p_block->i_buffer );
00239
00240 i_align_h = p_sys->i_align ? 20 : 0;
00241 i_align_v = 10;
00242
00243 if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
00244 {
00245 char *psz_new_subtitle;
00246 char *psz_convert_buffer_out;
00247 char *psz_convert_buffer_in;
00248 size_t ret, inbytes_left, outbytes_left;
00249
00250 psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
00251 psz_convert_buffer_out = psz_new_subtitle;
00252 psz_convert_buffer_in = psz_subtitle;
00253 inbytes_left = strlen( psz_subtitle );
00254 outbytes_left = 6 * inbytes_left;
00255 ret = vlc_iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
00256 &inbytes_left, &psz_convert_buffer_out,
00257 &outbytes_left );
00258 *psz_convert_buffer_out = '\0';
00259
00260 if( inbytes_left )
00261 {
00262 msg_Warn( p_dec, "Failed to convert subtitle encoding, "
00263 "dropping subtitle.\nTry setting a different "
00264 "character-encoding for the subtitle." );
00265 free( psz_subtitle );
00266 return NULL;
00267 }
00268 else
00269 {
00270 free( psz_subtitle );
00271 psz_subtitle = psz_new_subtitle;
00272 }
00273 }
00274
00275 if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
00276 {
00277
00278
00279
00280 char *psz_new_subtitle;
00281 char *psz_buffer_sub;
00282 int i_comma;
00283 int i_text;
00284
00285 psz_buffer_sub = psz_subtitle;
00286 for( ;; )
00287 {
00288 i_comma = 0;
00289 while( i_comma < 8 &&
00290 *psz_buffer_sub != '\0' )
00291 {
00292 if( *psz_buffer_sub == ',' )
00293 {
00294 i_comma++;
00295 }
00296 psz_buffer_sub++;
00297 }
00298 psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
00299 i_text = 0;
00300 while( psz_buffer_sub[0] != '\0' )
00301 {
00302 if( psz_buffer_sub[0] == '\\' && ( psz_buffer_sub[1] == 'n' ||
00303 psz_buffer_sub[1] == 'N' ) )
00304 {
00305 psz_new_subtitle[i_text] = '\n';
00306 i_text++;
00307 psz_buffer_sub += 2;
00308 }
00309 else if( psz_buffer_sub[0] == '{' &&
00310 psz_buffer_sub[1] == '\\' )
00311 {
00312
00313 while( psz_buffer_sub[0] != '\0' &&
00314 psz_buffer_sub[0] != '}' )
00315 {
00316 psz_buffer_sub++;
00317 }
00318 psz_buffer_sub++;
00319 }
00320 else
00321 {
00322 psz_new_subtitle[i_text] = psz_buffer_sub[0];
00323 i_text++;
00324 psz_buffer_sub++;
00325 }
00326 }
00327 psz_new_subtitle[i_text] = '\0';
00328 free( psz_subtitle );
00329 psz_subtitle = psz_new_subtitle;
00330 break;
00331 }
00332 }
00333
00334 StripTags( psz_subtitle );
00335
00336 p_spu = p_dec->pf_spu_buffer_new( p_dec );
00337 if( !p_spu )
00338 {
00339 msg_Warn( p_dec, "can't get spu buffer" );
00340 free( psz_subtitle );
00341 return 0;
00342 }
00343
00344
00345 memset( &fmt, 0, sizeof(video_format_t) );
00346 fmt.i_chroma = VLC_FOURCC('T','E','X','T');
00347 fmt.i_aspect = 0;
00348 fmt.i_width = fmt.i_height = 0;
00349 fmt.i_x_offset = fmt.i_y_offset = 0;
00350 p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
00351 if( !p_spu->p_region )
00352 {
00353 msg_Err( p_dec, "cannot allocate SPU region" );
00354 free( psz_subtitle );
00355 p_dec->pf_spu_buffer_del( p_dec, p_spu );
00356 return 0;
00357 }
00358
00359 p_spu->p_region->psz_text = psz_subtitle;
00360 p_spu->i_start = p_block->i_pts;
00361 p_spu->i_stop = p_block->i_pts + p_block->i_length;
00362 p_spu->b_ephemer = (p_block->i_length == 0);
00363 p_spu->b_absolute = VLC_FALSE;
00364
00365 p_spu->i_flags = OSD_ALIGN_BOTTOM | p_sys->i_align;
00366 p_spu->i_x = i_align_h;
00367 p_spu->i_y = i_align_v;
00368
00369 return p_spu;
00370 }
00371
00372 static void StripTags( char *psz_text )
00373 {
00374 int i_left_moves = 0;
00375 vlc_bool_t b_inside_tag = VLC_FALSE;
00376 int i = 0;
00377 int i_tag_start = -1;
00378 while( psz_text[ i ] )
00379 {
00380 if( !b_inside_tag )
00381 {
00382 if( psz_text[ i ] == '<' )
00383 {
00384 b_inside_tag = VLC_TRUE;
00385 i_tag_start = i;
00386 }
00387 psz_text[ i - i_left_moves ] = psz_text[ i ];
00388 }
00389 else
00390 {
00391 if( ( psz_text[ i ] == ' ' ) ||
00392 ( psz_text[ i ] == '\t' ) ||
00393 ( psz_text[ i ] == '\n' ) ||
00394 ( psz_text[ i ] == '\r' ) )
00395 {
00396 b_inside_tag = VLC_FALSE;
00397 i_tag_start = -1;
00398 }
00399 else if( psz_text[ i ] == '>' )
00400 {
00401 i_left_moves += i - i_tag_start + 1;
00402 i_tag_start = -1;
00403 b_inside_tag = VLC_FALSE;
00404 }
00405 else
00406 {
00407 psz_text[ i - i_left_moves ] = psz_text[ i ];
00408 }
00409 }
00410 i++;
00411 }
00412 psz_text[ i - i_left_moves ] = '\0';
00413 }