Main Page | Modules | Class Hierarchy | Class List | Directories | File List | Class Members | File Members | Related Pages

subsdec.c

00001 /*****************************************************************************
00002  * subsdec.c : text subtitles decoder
00003  *****************************************************************************
00004  * Copyright (C) 2000-2001 the VideoLAN team
00005  * $Id: subsdec.c 12746 2005-10-02 13:15:22Z dionoea $
00006  *
00007  * Authors: Gildas Bazin <[email protected]>
00008  *          Samuel Hocevar <[email protected]>
00009  *
00010  * This program is free software; you can redistribute it and/or modify
00011  * it under the terms of the GNU General Public License as published by
00012  * the Free Software Foundation; either version 2 of the License, or
00013  * (at your option) any later version.
00014  *
00015  * This program is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  * GNU General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU General Public License
00021  * along with this program; if not, write to the Free Software
00022  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
00023  *****************************************************************************/
00024 
00025 /*****************************************************************************
00026  * Preamble
00027  *****************************************************************************/
00028 #include <vlc/vlc.h>
00029 #include <vlc/vout.h>
00030 #include <vlc/decoder.h>
00031 
00032 #include "vlc_osd.h"
00033 #include "vlc_filter.h"
00034 
00035 #include "charset.h"
00036 
00037 /*****************************************************************************
00038  * decoder_sys_t : decoder descriptor
00039  *****************************************************************************/
00040 struct decoder_sys_t
00041 {
00042     int                 i_align;          /* Subtitles alignment on the vout */
00043     vlc_iconv_t         iconv_handle;            /* handle to iconv instance */
00044 };
00045 
00046 /*****************************************************************************
00047  * Local prototypes
00048  *****************************************************************************/
00049 static int  OpenDecoder   ( vlc_object_t * );
00050 static void CloseDecoder  ( vlc_object_t * );
00051 
00052 static subpicture_t *DecodeBlock   ( decoder_t *, block_t ** );
00053 static subpicture_t *ParseText     ( decoder_t *, block_t * );
00054 static void         StripTags      ( char * );
00055 
00056 #define DEFAULT_NAME "System Default"
00057 
00058 /*****************************************************************************
00059  * Module descriptor.
00060  *****************************************************************************/
00061 static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
00062     "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
00063     "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
00064     "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
00065     "ISO-8859-6", "CP1256", "MacArabic", "",
00066     "ISO-8859-7", "CP1253", "MacGreek", "",
00067     "ISO-8859-8", "CP1255", "MacHebrew", "",
00068     "ISO-8859-9", "CP1254", "MacTurkish", "",
00069     "ISO-8859-13", "CP1257", "",
00070     "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
00071     "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
00072     "ISO-2022-KR", "EUC-KR", "",
00073     "MacThai", "KOI8-T", "",
00074     "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
00075     "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
00076     "Macintosh", "",
00077     "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
00078     "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
00079     "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
00080     "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
00081     "HPROMAN8", "NEXTSTEP" };
00082 
00083 static int  pi_justification[] = { 0, 1, 2 };
00084 static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
00085 
00086 #define ENCODING_TEXT N_("Subtitles text encoding")
00087 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
00088 #define ALIGN_TEXT N_("Subtitles justification")
00089 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
00090 
00091 vlc_module_begin();
00092     set_shortname( _("Subtitles"));
00093     set_description( _("Text subtitles decoder") );
00094     set_capability( "decoder", 50 );
00095     set_callbacks( OpenDecoder, CloseDecoder );
00096     set_category( CAT_INPUT );
00097     set_subcategory( SUBCAT_INPUT_SCODEC );
00098 
00099     add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
00100                  VLC_FALSE );
00101         change_integer_list( pi_justification, ppsz_justification_text, 0 );
00102     add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
00103                 ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
00104         change_string_list( ppsz_encodings, 0, 0 );
00105 vlc_module_end();
00106 
00107 /*****************************************************************************
00108  * OpenDecoder: probe the decoder and return score
00109  *****************************************************************************
00110  * Tries to launch a decoder and return score so that the interface is able
00111  * to chose.
00112  *****************************************************************************/
00113 static int OpenDecoder( vlc_object_t *p_this )
00114 {
00115     decoder_t     *p_dec = (decoder_t*)p_this;
00116     decoder_sys_t *p_sys;
00117     vlc_value_t val;
00118 
00119     if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
00120         p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
00121     {
00122         return VLC_EGENERIC;
00123     }
00124 
00125     p_dec->pf_decode_sub = DecodeBlock;
00126 
00127     /* Allocate the memory needed to store the decoder's structure */
00128     if( ( p_dec->p_sys = p_sys =
00129           (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
00130     {
00131         msg_Err( p_dec, "out of memory" );
00132         return VLC_EGENERIC;
00133     }
00134 
00135     var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
00136     var_Get( p_dec, "subsdec-align", &val );
00137     p_sys->i_align = val.i_int;
00138 
00139     if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
00140     {
00141         msg_Dbg( p_dec, "using character encoding: %s",
00142                  p_dec->fmt_in.subs.psz_encoding );
00143         p_sys->iconv_handle =
00144             vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
00145     }
00146     else
00147     {
00148         var_Create( p_dec, "subsdec-encoding",
00149                     VLC_VAR_STRING | VLC_VAR_DOINHERIT );
00150         var_Get( p_dec, "subsdec-encoding", &val );
00151         if( !strcmp( val.psz_string, DEFAULT_NAME ) )
00152         {
00153             char *psz_charset =(char*)malloc( 100 );
00154             vlc_current_charset( &psz_charset );
00155             p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );
00156             msg_Dbg( p_dec, "using character encoding: %s", psz_charset );
00157             free( psz_charset );
00158         }
00159         else if( val.psz_string )
00160         {
00161             msg_Dbg( p_dec, "using character encoding: %s", val.psz_string );
00162             p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string );
00163         }
00164 
00165         if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
00166         {
00167             msg_Warn( p_dec, "unable to do requested conversion" );
00168         }
00169 
00170         if( val.psz_string ) free( val.psz_string );
00171     }
00172 
00173     return VLC_SUCCESS;
00174 }
00175 
00176 /****************************************************************************
00177  * DecodeBlock: the whole thing
00178  ****************************************************************************
00179  * This function must be fed with complete subtitles units.
00180  ****************************************************************************/
00181 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
00182 {
00183     subpicture_t *p_spu;
00184 
00185     if( !pp_block || *pp_block == NULL ) return NULL;
00186 
00187     p_spu = ParseText( p_dec, *pp_block );
00188 
00189     block_Release( *pp_block );
00190     *pp_block = NULL;
00191 
00192     return p_spu;
00193 }
00194 
00195 /*****************************************************************************
00196  * CloseDecoder: clean up the decoder
00197  *****************************************************************************/
00198 static void CloseDecoder( vlc_object_t *p_this )
00199 {
00200     decoder_t *p_dec = (decoder_t *)p_this;
00201     decoder_sys_t *p_sys = p_dec->p_sys;
00202 
00203     if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
00204     {
00205         vlc_iconv_close( p_sys->iconv_handle );
00206     }
00207 
00208     free( p_sys );
00209 }
00210 
00211 /*****************************************************************************
00212  * ParseText: parse an text subtitle packet and send it to the video output
00213  *****************************************************************************/
00214 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
00215 {
00216     decoder_sys_t *p_sys = p_dec->p_sys;
00217     subpicture_t *p_spu = 0;
00218     char *psz_subtitle;
00219     int i_align_h, i_align_v;
00220     video_format_t fmt;
00221 
00222     /* We cannot display a subpicture with no date */
00223     if( p_block->i_pts == 0 )
00224     {
00225         msg_Warn( p_dec, "subtitle without a date" );
00226         return NULL;
00227     }
00228 
00229     /* Check validity of packet data */
00230     if( p_block->i_buffer <= 1 || p_block->p_buffer[0] == '\0' )
00231     {
00232         msg_Warn( p_dec, "empty subtitle" );
00233         return NULL;
00234     }
00235 
00236     /* Should be resiliant against bad subtitles */
00237     psz_subtitle = strndup( (const char *)p_block->p_buffer,
00238                             p_block->i_buffer );
00239 
00240     i_align_h = p_sys->i_align ? 20 : 0;
00241     i_align_v = 10;
00242 
00243     if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
00244     {
00245         char *psz_new_subtitle;
00246         char *psz_convert_buffer_out;
00247         char *psz_convert_buffer_in;
00248         size_t ret, inbytes_left, outbytes_left;
00249 
00250         psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
00251         psz_convert_buffer_out = psz_new_subtitle;
00252         psz_convert_buffer_in = psz_subtitle;
00253         inbytes_left = strlen( psz_subtitle );
00254         outbytes_left = 6 * inbytes_left;
00255         ret = vlc_iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
00256                          &inbytes_left, &psz_convert_buffer_out,
00257                          &outbytes_left );
00258         *psz_convert_buffer_out = '\0';
00259 
00260         if( inbytes_left )
00261         {
00262             msg_Warn( p_dec, "Failed to convert subtitle encoding, "
00263                       "dropping subtitle.\nTry setting a different "
00264                       "character-encoding for the subtitle." );
00265             free( psz_subtitle );
00266             return NULL;
00267         }
00268         else
00269         {
00270             free( psz_subtitle );
00271             psz_subtitle = psz_new_subtitle;
00272         }
00273     }
00274 
00275     if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
00276     {
00277         /* Decode SSA strings */
00278         /* We expect: ReadOrder, Layer, Style, Name, MarginL, MarginR,
00279          * MarginV, Effect, Text */
00280         char *psz_new_subtitle;
00281         char *psz_buffer_sub;
00282         int         i_comma;
00283         int         i_text;
00284 
00285         psz_buffer_sub = psz_subtitle;
00286         for( ;; )
00287         {
00288             i_comma = 0;
00289             while( i_comma < 8 &&
00290                 *psz_buffer_sub != '\0' )
00291             {
00292                 if( *psz_buffer_sub == ',' )
00293                 {
00294                     i_comma++;
00295                 }
00296                 psz_buffer_sub++;
00297             }
00298             psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
00299             i_text = 0;
00300             while( psz_buffer_sub[0] != '\0' )
00301             {
00302                 if( psz_buffer_sub[0] == '\\' && ( psz_buffer_sub[1] == 'n' ||
00303                     psz_buffer_sub[1] == 'N' ) )
00304                 {
00305                     psz_new_subtitle[i_text] = '\n';
00306                     i_text++;
00307                     psz_buffer_sub += 2;
00308                 }
00309                 else if( psz_buffer_sub[0] == '{' &&
00310                          psz_buffer_sub[1] == '\\' )
00311                 {
00312                     /* SSA control code */
00313                     while( psz_buffer_sub[0] != '\0' &&
00314                            psz_buffer_sub[0] != '}' )
00315                     {
00316                         psz_buffer_sub++;
00317                     }
00318                     psz_buffer_sub++;
00319                 }
00320                 else
00321                 {
00322                     psz_new_subtitle[i_text] = psz_buffer_sub[0];
00323                     i_text++;
00324                     psz_buffer_sub++;
00325                 }
00326             }
00327             psz_new_subtitle[i_text] = '\0';
00328             free( psz_subtitle );
00329             psz_subtitle = psz_new_subtitle;
00330             break;
00331         }
00332     }
00333 
00334     StripTags( psz_subtitle );
00335 
00336     p_spu = p_dec->pf_spu_buffer_new( p_dec );
00337     if( !p_spu )
00338     {
00339         msg_Warn( p_dec, "can't get spu buffer" );
00340         free( psz_subtitle );
00341         return 0;
00342     }
00343 
00344     /* Create a new subpicture region */
00345     memset( &fmt, 0, sizeof(video_format_t) );
00346     fmt.i_chroma = VLC_FOURCC('T','E','X','T');
00347     fmt.i_aspect = 0;
00348     fmt.i_width = fmt.i_height = 0;
00349     fmt.i_x_offset = fmt.i_y_offset = 0;
00350     p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
00351     if( !p_spu->p_region )
00352     {
00353         msg_Err( p_dec, "cannot allocate SPU region" );
00354         free( psz_subtitle );
00355         p_dec->pf_spu_buffer_del( p_dec, p_spu );
00356         return 0;
00357     }
00358 
00359     p_spu->p_region->psz_text = psz_subtitle;
00360     p_spu->i_start = p_block->i_pts;
00361     p_spu->i_stop = p_block->i_pts + p_block->i_length;
00362     p_spu->b_ephemer = (p_block->i_length == 0);
00363     p_spu->b_absolute = VLC_FALSE;
00364 
00365     p_spu->i_flags = OSD_ALIGN_BOTTOM | p_sys->i_align;
00366     p_spu->i_x = i_align_h;
00367     p_spu->i_y = i_align_v;
00368 
00369     return p_spu;
00370 }
00371 
00372 static void StripTags( char *psz_text )
00373 {
00374     int i_left_moves = 0;
00375     vlc_bool_t b_inside_tag = VLC_FALSE;
00376     int i = 0;
00377     int i_tag_start = -1;
00378     while( psz_text[ i ] )
00379     {
00380         if( !b_inside_tag )
00381         {
00382             if( psz_text[ i ] == '<' )
00383             {
00384                 b_inside_tag = VLC_TRUE;
00385                 i_tag_start = i;
00386             }
00387             psz_text[ i - i_left_moves ] = psz_text[ i ];
00388         }
00389         else
00390         {
00391             if( ( psz_text[ i ] == ' ' ) ||
00392                 ( psz_text[ i ] == '\t' ) ||
00393                 ( psz_text[ i ] == '\n' ) ||
00394                 ( psz_text[ i ] == '\r' ) )
00395             {
00396                 b_inside_tag = VLC_FALSE;
00397                 i_tag_start = -1;
00398             }
00399             else if( psz_text[ i ] == '>' )
00400             {
00401                 i_left_moves += i - i_tag_start + 1;
00402                 i_tag_start = -1;
00403                 b_inside_tag = VLC_FALSE;
00404             }
00405             else
00406             {
00407                 psz_text[ i - i_left_moves ] = psz_text[ i ];
00408             }
00409         }
00410         i++;
00411     }
00412     psz_text[ i - i_left_moves ] = '\0';
00413 }

Generated on Tue Dec 20 10:14:30 2005 for vlc-0.8.4a by  doxygen 1.4.2