Main Page | Modules | Class Hierarchy | Class List | Directories | File List | Class Members | File Members | Related Pages

subtitles.c

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * subtitles.c
00003  *****************************************************************************
00004  * Copyright (C) 2003-2004 the VideoLAN team
00005  * $Id: subtitles.c 12881 2005-10-18 21:53:42Z hartman $
00006  *
00007  * Authors: Derk-Jan Hartman <hartman at videolan.org>
00008  * This is adapted code from the GPL'ed MPlayer (http://mplayerhq.hu)
00009  *
00010  * This program is free software; you can redistribute it and/or modify
00011  * it under the terms of the GNU General Public License as published by
00012  * the Free Software Foundation; either version 2 of the License, or
00013  * (at your option) any later version.
00014  *
00015  * This program is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  * GNU General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU General Public License
00021  * along with this program; if not, write to the Free Software
00022  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
00023  *****************************************************************************/
00024 
00030 #include <stdlib.h>
00031 #include <vlc/vlc.h>
00032 #include <vlc/input.h>
00033 #include "charset.h"
00034 
00035 #ifdef HAVE_DIRENT_H
00036 #   include <dirent.h>
00037 #endif
00038 
00039 #ifdef HAVE_LIMITS_H  
00040 #   include <limits.h>  
00041 #endif
00042 
00043 #include <ctype.h>
00044 
00048 #if defined( WIN32 )
00049     #define DIRECTORY_SEPARATOR '\\'
00050 #else
00051     #define DIRECTORY_SEPARATOR '/'
00052 #endif
00053 
00057 #define MAX_SUBTITLE_FILES 128
00058 
00059 
00063 static const char * sub_exts[] = {  "utf", "utf8", "utf-8", "sub", "srt", "smi", "txt", "ssa", "idx", NULL};
00064 /* extensions from unsupported types */
00065 /* rt, aqt, jss, js, ass */
00066 
00067 static void strcpy_trim( char *d, char *s )
00068 {
00069     /* skip leading whitespace */
00070     while( *s && !isalnum(*s) )
00071     {
00072         s++;
00073     }
00074     for(;;)
00075     {
00076         /* copy word */
00077         while( *s && isalnum(*s) )
00078         {
00079             *d = tolower(*s);
00080             s++; d++;
00081         }
00082         if( *s == 0 ) break;
00083         /* trim excess whitespace */
00084         while( *s && !isalnum(*s) )
00085         {
00086             s++;
00087         }
00088         if( *s == 0 ) break;
00089         *d++ = ' ';
00090     }
00091     *d = 0;
00092 }
00093 
00094 static void strcpy_strip_ext( char *d, char *s )
00095 {
00096     char *tmp = strrchr(s, '.');
00097     if( !tmp )
00098     {
00099         strcpy(d, s);
00100         return;
00101     }
00102     else
00103     {
00104         strncpy(d, s, tmp - s);
00105         d[tmp - s] = 0;
00106     }
00107     while( *d )
00108     {
00109         *d = tolower(*d);
00110         d++;
00111     }
00112 }
00113 
00114 static void strcpy_get_ext( char *d, char *s )
00115 {
00116     char *tmp = strrchr(s, '.');
00117     if( !tmp )
00118     {
00119         strcpy(d, "");
00120         return;
00121     } else strcpy( d, tmp + 1 );
00122 }
00123 
00124 static int whiteonly( char *s )
00125 {
00126   while ( *s )
00127   {
00128         if( isalnum( *s ) ) return 0;
00129         s++;
00130   }
00131   return 1;
00132 }
00133 
00134 typedef struct _subfn
00135 {
00136     int priority;
00137     char *psz_fname;
00138     char *psz_ext;
00139 } subfn;
00140 
00141 static int compare_sub_priority( const void *a, const void *b )
00142 {
00143     if (((subfn*)a)->priority > ((subfn*)b)->priority)
00144     {
00145         return -1;
00146     }
00147 
00148     if (((subfn*)a)->priority < ((subfn*)b)->priority)
00149     {
00150         return 1;
00151     }
00152 
00153 #ifndef UNDER_CE
00154     return strcoll(((subfn*)a)->psz_fname, ((subfn*)b)->psz_fname);
00155 #else
00156     return strcmp(((subfn*)a)->psz_fname, ((subfn*)b)->psz_fname);
00157 #endif
00158 }
00159 
00160 /* Utility function for scandir */  
00161 static int Filter( const struct dirent *p_dir_content )
00162 {
00163     int i;
00164     char *tmp = NULL;
00165 
00166     if( p_dir_content == NULL || p_dir_content->d_name == NULL ) return VLC_FALSE;
00167     /* does it end with a subtitle extension? */
00168     tmp = strrchr( p_dir_content->d_name, '.');
00169     if( !tmp )
00170     {
00171         return VLC_FALSE;
00172     }
00173     else
00174     {
00175         for( i = 0; sub_exts[i]; i++ )
00176         {
00177             if( strcmp( sub_exts[i], tmp+1 ) == 0 )
00178             {
00179                 return VLC_TRUE;
00180             }
00181         }
00182     }
00183     return VLC_FALSE;
00184 }
00185 
00186 
00190 static char **paths_to_list( char *psz_dir, char *psz_path )
00191 {
00192     unsigned int i, k, i_nb_subdirs;
00193     char **subdirs; /* list of subdirectories to look in */
00194 
00195     if( !psz_dir ) return NULL;
00196     if( !psz_path ) return NULL;
00197 
00198     i_nb_subdirs = 1;
00199     for( k = 0; k < strlen( psz_path ); k++ )
00200     {
00201         if( psz_path[k] == ',' )
00202         {
00203             i_nb_subdirs++;
00204         }
00205     }
00206 
00207     if( i_nb_subdirs > 0 )
00208     {
00209         char *psz_parser = NULL, *psz_temp = NULL;
00210 
00211         subdirs = (char**)malloc( sizeof(char*) * ( i_nb_subdirs + 1 ) );
00212         memset( subdirs, 0, sizeof(char*) * ( i_nb_subdirs + 1 ) );
00213         i = 0;
00214         psz_parser = psz_path;
00215         while( psz_parser && *psz_parser )
00216         {
00217             char *psz_subdir;
00218             psz_subdir = psz_parser;
00219             psz_parser = strchr( psz_subdir, ',' );
00220             if( psz_parser )
00221             {
00222                 *psz_parser = '\0';
00223                 psz_parser++;
00224                 while( *psz_parser == ' ' )
00225                 {
00226                     psz_parser++;
00227                 }
00228             }
00229             if( strlen( psz_subdir ) > 0 )
00230             {
00231                 psz_temp = (char *)malloc( strlen(psz_dir)
00232                                            + strlen(psz_subdir) + 2 );
00233                 if( psz_temp )
00234                 {
00235                     sprintf( psz_temp, "%s%s%c", 
00236                              psz_subdir[0] == '.' ? psz_dir : "",
00237                              psz_subdir,
00238                              psz_subdir[strlen(psz_subdir) - 1] ==
00239                               DIRECTORY_SEPARATOR ? '\0' : DIRECTORY_SEPARATOR );
00240                     subdirs[i] = psz_temp;
00241                     i++;
00242                 }
00243             }
00244         }
00245         subdirs[i] = NULL;
00246     }
00247     else
00248     {
00249         subdirs = NULL;
00250     }
00251     return subdirs;
00252 }
00253 
00254 
00270 char **subtitles_Detect( input_thread_t *p_this, char *psz_path,
00271                          char *psz_name )
00272 {
00273     vlc_value_t fuzzy;
00274     int j, i_result2, i_dir_content, i_sub_count = 0, i_fname_len = 0;
00275     char *f_dir = NULL, *f_fname = NULL, *f_fname_noext = NULL, *f_fname_trim = NULL;
00276     char *tmp = NULL;
00277 
00278     char tmp_fname_noext[PATH_MAX];
00279     char tmp_fname_trim[PATH_MAX];
00280     char tmp_fname_ext[PATH_MAX];
00281 
00282     struct dirent **pp_dir_content;
00283     char **tmp_subdirs, **subdirs; /* list of subdirectories to look in */
00284 
00285     subfn *result = NULL; /* unsorted results */
00286     char **result2; /* sorted results */
00287 
00288     char *psz_fname_original = strdup( psz_name );
00289     char *psz_fname = psz_fname_original;
00290 
00291     if( psz_fname == NULL ) return NULL;
00292 
00293     if( !strncmp( psz_fname, "file://", 7 ) )
00294     {
00295         psz_fname += 7;
00296     }
00297 
00298     /* extract filename & dirname from psz_fname */
00299     tmp = strrchr( psz_fname, DIRECTORY_SEPARATOR );
00300     if( tmp )
00301     {
00302         int dirlen = 0;
00303 
00304         f_fname = malloc( strlen(tmp) );
00305         if( f_fname )
00306             strcpy( f_fname, tmp+1 ); // we skip the seperator, so it will still fit in the allocated space
00307         dirlen = strlen(psz_fname) - strlen(tmp) + 1; // add the seperator
00308         f_dir = malloc( dirlen + 1 );
00309         if( f_dir )
00310         {
00311             strncpy( f_dir, psz_fname, dirlen );
00312             f_dir[dirlen] = 0;
00313         }
00314     }
00315     else
00316     {
00317         /* FIXME: we should check the CWD here */
00318         /* f_fname = strdup( psz_fname ); */
00319         if( psz_fname_original ) free( psz_fname_original );
00320         return NULL;
00321     }
00322 
00323     i_fname_len = strlen( f_fname );
00324     f_fname_noext = malloc(i_fname_len + 1);
00325     f_fname_trim = malloc(i_fname_len + 1 );
00326 
00327     strcpy_strip_ext( f_fname_noext, f_fname );
00328     strcpy_trim( f_fname_trim, f_fname_noext );
00329 
00330     result = (subfn*)malloc( sizeof(subfn) * MAX_SUBTITLE_FILES );
00331     if( result )
00332         memset( result, 0, sizeof(subfn) * MAX_SUBTITLE_FILES );
00333 
00334     var_Get( p_this, "sub-autodetect-fuzzy", &fuzzy );
00335 
00336     tmp_subdirs = paths_to_list( f_dir, psz_path );
00337     subdirs = tmp_subdirs;
00338 
00339     for( j = -1; (j == -1) || ( (j >= 0) && (subdirs != NULL) && (*subdirs != NULL) );
00340          j++)
00341     {
00342         pp_dir_content = NULL;
00343         i_dir_content = 0;
00344 
00345         if( j < 0 && f_dir == NULL )
00346             continue;
00347 
00348         /* parse psz_src dir */  
00349         if( ( i_dir_content = scandir( j < 0 ? f_dir : *subdirs, &pp_dir_content, Filter,
00350                                 NULL ) ) != -1 )
00351         {
00352             int a;
00353 
00354             msg_Dbg( p_this, "looking for a subtitle file in %s", j < 0 ? f_dir : *subdirs );
00355             for( a = 0; a < i_dir_content; a++ )
00356             {
00357                 int i_prio = 0;
00358                 struct dirent *p_dir_content = pp_dir_content[a];
00359                 char *psz_inUTF8 = FromLocale( p_dir_content->d_name );
00360                 char *p_fixed_name = vlc_fix_readdir_charset( p_this, psz_inUTF8 );
00361 
00362                 LocaleFree( psz_inUTF8 );
00363 
00364                 /* retrieve various parts of the filename */
00365                 strcpy_strip_ext( tmp_fname_noext, p_fixed_name );
00366                 strcpy_get_ext( tmp_fname_ext, p_fixed_name );
00367                 strcpy_trim( tmp_fname_trim, tmp_fname_noext );
00368 
00369                 if( !i_prio && !strcmp( tmp_fname_trim, f_fname_trim ) )
00370                 {
00371                     /* matches the movie name exactly */
00372                     i_prio = 4;
00373                 }
00374                 if( !i_prio &&
00375                     ( tmp = strstr( tmp_fname_trim, f_fname_trim ) ) )
00376                 {
00377                     /* contains the movie name */
00378                     tmp += strlen( f_fname_trim );
00379                     if( whiteonly( tmp ) )
00380                     {
00381                         /* chars in front of the movie name */
00382                         i_prio = 2;
00383                     }
00384                     else
00385                     {
00386                         /* chars after (and possibly in front of)
00387                          * the movie name */
00388                         i_prio = 3;
00389                     }
00390                 }
00391                 if( !i_prio )
00392                 {
00393                     /* doesn't contain the movie name */
00394                     if( j == 0 ) i_prio = 1;
00395                 }
00396                 if( i_prio >= fuzzy.i_int )
00397                 {
00398                     FILE *f;
00399                     char *tmpresult;
00400 
00401                     asprintf( &tmpresult, "%s%s", j < 0 ? f_dir : *subdirs, p_fixed_name );
00402                     msg_Dbg( p_this, "autodetected subtitle: %s with priority %d", p_fixed_name, i_prio );
00403                     if( ( f = fopen( tmpresult, "rt" ) ) )
00404                     {
00405                         fclose( f );
00406                         result[i_sub_count].priority = i_prio;
00407                         result[i_sub_count].psz_fname = tmpresult;
00408                         result[i_sub_count].psz_ext = strdup(tmp_fname_ext);
00409                         i_sub_count++;
00410                     } else free( tmpresult );
00411                 }
00412                 if( i_sub_count >= MAX_SUBTITLE_FILES ) break;
00413                 free( p_fixed_name );
00414             }
00415         }
00416         if( j >= 0 ) free( *subdirs++ );
00417     }
00418 
00419     if( tmp_subdirs )   free( tmp_subdirs );
00420     if( f_fname_trim )  free( f_fname_trim );
00421     if( f_fname_noext ) free( f_fname_noext );
00422     if( f_fname ) free( f_fname );
00423     if( f_dir )   free( f_dir );
00424 
00425     qsort( result, i_sub_count, sizeof( subfn ), compare_sub_priority );
00426 
00427     result2 = (char**)malloc( sizeof(char*) * ( i_sub_count + 1 ) );
00428     if( result2 )
00429         memset( result2, 0, sizeof(char*) * ( i_sub_count + 1 ) );
00430     i_result2 = 0;
00431 
00432     for( j = 0; j < i_sub_count; j++ )
00433     {
00434         if( result[j].psz_ext && !strcasecmp( result[j].psz_ext, "sub" ) )
00435         {
00436             int i;
00437             for( i = 0; i < i_sub_count; i++ )
00438             {
00439                 if( result[i].psz_fname && result[j].psz_fname &&
00440                     !strncasecmp( result[j].psz_fname, result[i].psz_fname, sizeof( result[j].psz_fname) - 4 ) && 
00441                     !strcasecmp( result[i].psz_ext, "idx" ) )
00442                     break;
00443             }
00444             if( i >= i_sub_count )
00445             {
00446                 result2[i_result2] = result[j].psz_fname;
00447                 i_result2++;
00448             }
00449         }
00450         else
00451         {
00452             result2[i_result2] = result[j].psz_fname;
00453             i_result2++;
00454         }
00455     }
00456 
00457     if( psz_fname_original ) free( psz_fname_original );
00458     if( result ) free( result );
00459     return result2;
00460 }

Generated on Tue Dec 20 10:15:00 2005 for vlc-0.8.4a by  doxygen 1.4.2