vlc-0.8.4a: /home/huihoo/src/vlc/vlc-0.8.4a/modules/video

00001 /*****************************************************************************
00002  * deinterlace.c : deinterlacer plugin for vlc
00003  *****************************************************************************
00004  * Copyright (C) 2000, 2001, 2002, 2003 the VideoLAN team
00005  * $Id: deinterlace.c 13364 2005-11-24 08:10:13Z md $
00006  *
00007  * Author: Sam Hocevar <sam@zoy.org>
00008  *
00009  * This program is free software; you can redistribute it and/or modify
00010  * it under the terms of the GNU General Public License as published by
00011  * the Free Software Foundation; either version 2 of the License, or
00012  * (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
00022  *****************************************************************************/
00023 
00024 /*****************************************************************************
00025  * Preamble
00026  *****************************************************************************/
00027 #include <errno.h>
00028 #include <stdlib.h>                                      /* malloc(), free() */
00029 #include <string.h>
00030 
00031 #include <vlc/vlc.h>
00032 #include <vlc/vout.h>
00033 #include <vlc/sout.h>
00034 #include "vlc_filter.h"
00035 
00036 #ifdef HAVE_ALTIVEC_H
00037 #   include <altivec.h>
00038 #endif
00039 
00040 #ifdef CAN_COMPILE_MMXEXT
00041 #   include "mmx.h"
00042 #endif
00043 
00044 #include "filter_common.h"
00045 
00046 #define DEINTERLACE_DISCARD 1
00047 #define DEINTERLACE_MEAN    2
00048 #define DEINTERLACE_BLEND   3
00049 #define DEINTERLACE_BOB     4
00050 #define DEINTERLACE_LINEAR  5
00051 #define DEINTERLACE_X       6
00052 
00053 /*****************************************************************************
00054  * Local protypes
00055  *****************************************************************************/
00056 static int  Create    ( vlc_object_t * );
00057 static void Destroy   ( vlc_object_t * );
00058 
00059 static int  Init      ( vout_thread_t * );
00060 static void End       ( vout_thread_t * );
00061 static void Render    ( vout_thread_t *, picture_t * );
00062 
00063 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
00064 static void RenderBob    ( vout_thread_t *, picture_t *, picture_t *, int );
00065 static void RenderMean   ( vout_thread_t *, picture_t *, picture_t * );
00066 static void RenderBlend  ( vout_thread_t *, picture_t *, picture_t * );
00067 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
00068 static void RenderX      ( vout_thread_t *, picture_t *, picture_t * );
00069 
00070 static void MergeGeneric ( void *, const void *, const void *, size_t );
00071 #if defined(CAN_COMPILE_C_ALTIVEC)
00072 static void MergeAltivec ( void *, const void *, const void *, size_t );
00073 #endif
00074 #if defined(CAN_COMPILE_MMXEXT)
00075 static void MergeMMX     ( void *, const void *, const void *, size_t );
00076 #endif
00077 #if defined(CAN_COMPILE_SSE)
00078 static void MergeSSE2    ( void *, const void *, const void *, size_t );
00079 #endif
00080 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
00081 static void EndMMX       ( void );
00082 #endif
00083 
00084 static int  SendEvents   ( vlc_object_t *, char const *,
00085                            vlc_value_t, vlc_value_t, void * );
00086 
00087 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method );
00088 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
00089 
00090 static int OpenFilter( vlc_object_t *p_this );
00091 static void CloseFilter( vlc_object_t *p_this );
00092 
00093 /*****************************************************************************
00094  * Callback prototypes
00095  *****************************************************************************/
00096 static int FilterCallback ( vlc_object_t *, char const *,
00097                             vlc_value_t, vlc_value_t, void * );
00098 
00099 /*****************************************************************************
00100  * Module descriptor
00101  *****************************************************************************/
00102 #define MODE_TEXT N_("Deinterlace mode")
00103 #define MODE_LONGTEXT N_("Default deinterlace method to use for local playback")
00104 
00105 #define SOUT_MODE_TEXT N_("Deinterlace mode")
00106 #define SOUT_MODE_LONGTEXT N_("Default deinterlace methode to use for streaming")
00107 
00108 #define FILTER_CFG_PREFIX "sout-deinterlace-"
00109 
00110 static char *mode_list[] = { "discard", "blend", "mean", "bob", "linear", "x" };
00111 static char *mode_list_text[] = { N_("Discard"), N_("Blend"), N_("Mean"),
00112                                   N_("Bob"), N_("Linear"), "X" };
00113 
00114 vlc_module_begin();
00115     set_description( _("Deinterlacing video filter") );
00116     set_shortname( N_("Deinterlace" ));
00117     set_capability( "video filter", 0 );
00118     set_category( CAT_VIDEO );
00119     set_subcategory( SUBCAT_VIDEO_VFILTER );
00120 
00121     set_section( N_("Display"),NULL);
00122     add_string( "deinterlace-mode", "discard", NULL, MODE_TEXT,
00123                 MODE_LONGTEXT, VLC_FALSE );
00124         change_string_list( mode_list, mode_list_text, 0 );
00125 
00126     add_shortcut( "deinterlace" );
00127     set_callbacks( Create, Destroy );
00128 
00129     add_submodule();
00130     set_capability( "video filter2", 0 );
00131     set_section( N_("Streaming"),NULL);
00132     add_string( FILTER_CFG_PREFIX "mode", "blend", NULL, SOUT_MODE_TEXT,
00133                 SOUT_MODE_LONGTEXT, VLC_FALSE );
00134         change_string_list( mode_list, mode_list_text, 0 );
00135     set_callbacks( OpenFilter, CloseFilter );
00136 vlc_module_end();
00137 
00138 static const char *ppsz_filter_options[] = {
00139     "mode", NULL
00140 };
00141 
00142 /*****************************************************************************
00143  * vout_sys_t: Deinterlace video output method descriptor
00144  *****************************************************************************
00145  * This structure is part of the video output thread descriptor.
00146  * It describes the Deinterlace specific properties of an output thread.
00147  *****************************************************************************/
00148 struct vout_sys_t
00149 {
00150     int        i_mode;        /* Deinterlace mode */
00151     vlc_bool_t b_double_rate; /* Shall we double the framerate? */
00152 
00153     mtime_t    last_date;
00154     mtime_t    next_date;
00155 
00156     vout_thread_t *p_vout;
00157 
00158     vlc_mutex_t filter_lock;
00159 
00160     void (*pf_merge) ( void *, const void *, const void *, size_t );
00161     void (*pf_end_merge) ( void );
00162 };
00163 
00164 /*****************************************************************************
00165  * Control: control facility for the vout (forwards to child vout)
00166  *****************************************************************************/
00167 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
00168 {
00169     if( i_query == VOUT_SET_ZOOM )
00170     {
00171         p_vout->p_sys->p_vout->i_window_width = p_vout->i_window_width;
00172         p_vout->p_sys->p_vout->i_window_height = p_vout->i_window_height;
00173     }
00174     return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
00175 }
00176 
00177 /*****************************************************************************
00178  * Create: allocates Deinterlace video thread output method
00179  *****************************************************************************
00180  * This function allocates and initializes a Deinterlace vout method.
00181  *****************************************************************************/
00182 static int Create( vlc_object_t *p_this )
00183 {
00184     vout_thread_t *p_vout = (vout_thread_t *)p_this;
00185     vlc_value_t val;
00186 
00187     /* Allocate structure */
00188     p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
00189     if( p_vout->p_sys == NULL )
00190     {
00191         msg_Err( p_vout, "out of memory" );
00192         return VLC_ENOMEM;
00193     }
00194 
00195     p_vout->pf_init = Init;
00196     p_vout->pf_end = End;
00197     p_vout->pf_manage = NULL;
00198     p_vout->pf_render = Render;
00199     p_vout->pf_display = NULL;
00200     p_vout->pf_control = Control;
00201 
00202     p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
00203     p_vout->p_sys->b_double_rate = VLC_FALSE;
00204     p_vout->p_sys->last_date = 0;
00205     p_vout->p_sys->p_vout = 0;
00206     vlc_mutex_init( p_vout, &p_vout->p_sys->filter_lock );
00207 
00208 #if defined(CAN_COMPILE_C_ALTIVEC)
00209     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_ALTIVEC )
00210     {
00211         p_vout->p_sys->pf_merge = MergeAltivec;
00212         p_vout->p_sys->pf_end_merge = NULL;
00213     }
00214     else
00215 #endif
00216 #if defined(CAN_COMPILE_SSE)
00217     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_SSE2 )
00218     {
00219         p_vout->p_sys->pf_merge = MergeSSE2;
00220         p_vout->p_sys->pf_end_merge = EndMMX;
00221     }
00222     else
00223 #endif
00224 #if defined(CAN_COMPILE_MMXEXT)
00225     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_MMX )
00226     {
00227         p_vout->p_sys->pf_merge = MergeMMX;
00228         p_vout->p_sys->pf_end_merge = EndMMX;
00229     }
00230     else
00231 #endif
00232     {
00233         p_vout->p_sys->pf_merge = MergeGeneric;
00234         p_vout->p_sys->pf_end_merge = NULL;
00235     }
00236 
00237     /* Look what method was requested */
00238     var_Create( p_vout, "deinterlace-mode", VLC_VAR_STRING );
00239     var_Change( p_vout, "deinterlace-mode", VLC_VAR_INHERITVALUE, &val, NULL );
00240 
00241     if( val.psz_string == NULL )
00242     {
00243         msg_Err( p_vout, "configuration variable deinterlace-mode empty" );
00244         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
00245 
00246         val.psz_string = strdup( "discard" );
00247     }
00248 
00249     msg_Dbg( p_vout, "using %s deinterlace mode", val.psz_string );
00250 
00251     SetFilterMethod( p_vout, val.psz_string );
00252 
00253     free( val.psz_string );
00254 
00255     return VLC_SUCCESS;
00256 }
00257 
00258 /*****************************************************************************
00259  * SetFilterMethod: setup the deinterlace method to use.
00260  *****************************************************************************/
00261 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method )
00262 {
00263     if( !strcmp( psz_method, "discard" ) )
00264     {
00265         p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
00266         p_vout->p_sys->b_double_rate = VLC_FALSE;
00267     }
00268     else if( !strcmp( psz_method, "mean" ) )
00269     {
00270         p_vout->p_sys->i_mode = DEINTERLACE_MEAN;
00271         p_vout->p_sys->b_double_rate = VLC_FALSE;
00272     }
00273     else if( !strcmp( psz_method, "blend" )
00274              || !strcmp( psz_method, "average" )
00275              || !strcmp( psz_method, "combine-fields" ) )
00276     {
00277         p_vout->p_sys->i_mode = DEINTERLACE_BLEND;
00278         p_vout->p_sys->b_double_rate = VLC_FALSE;
00279     }
00280     else if( !strcmp( psz_method, "bob" )
00281              || !strcmp( psz_method, "progressive-scan" ) )
00282     {
00283         p_vout->p_sys->i_mode = DEINTERLACE_BOB;
00284         p_vout->p_sys->b_double_rate = VLC_TRUE;
00285     }
00286     else if( !strcmp( psz_method, "linear" ) )
00287     {
00288         p_vout->p_sys->i_mode = DEINTERLACE_LINEAR;
00289         p_vout->p_sys->b_double_rate = VLC_TRUE;
00290     }
00291     else if( !strcmp( psz_method, "x" ) )
00292     {
00293         p_vout->p_sys->i_mode = DEINTERLACE_X;
00294         p_vout->p_sys->b_double_rate = VLC_FALSE;
00295     }
00296     else
00297     {
00298         msg_Err( p_vout, "no valid deinterlace mode provided, "
00299                  "using \"discard\"" );
00300     }
00301 
00302     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
00303 }
00304 
00305 /*****************************************************************************
00306  * Init: initialize Deinterlace video thread output method
00307  *****************************************************************************/
00308 static int Init( vout_thread_t *p_vout )
00309 {
00310     int i_index;
00311     picture_t *p_pic;
00312 
00313     I_OUTPUTPICTURES = 0;
00314 
00315     /* Initialize the output structure, full of directbuffers since we want
00316      * the decoder to output directly to our structures. */
00317     switch( p_vout->render.i_chroma )
00318     {
00319         case VLC_FOURCC('I','4','2','0'):
00320         case VLC_FOURCC('I','Y','U','V'):
00321         case VLC_FOURCC('Y','V','1','2'):
00322         case VLC_FOURCC('I','4','2','2'):
00323             p_vout->output.i_chroma = p_vout->render.i_chroma;
00324             p_vout->output.i_width  = p_vout->render.i_width;
00325             p_vout->output.i_height = p_vout->render.i_height;
00326             p_vout->output.i_aspect = p_vout->render.i_aspect;
00327             p_vout->fmt_out = p_vout->fmt_in;
00328             break;
00329 
00330         default:
00331             return VLC_EGENERIC; /* unknown chroma */
00332             break;
00333     }
00334 
00335     /* Try to open the real video output */
00336     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
00337 
00338     if( p_vout->p_sys->p_vout == NULL )
00339     {
00340         /* Everything failed */
00341         msg_Err( p_vout, "cannot open vout, aborting" );
00342 
00343         return VLC_EGENERIC;
00344     }
00345 
00346     var_AddCallback( p_vout, "deinterlace-mode", FilterCallback, NULL );
00347 
00348     ALLOCATE_DIRECTBUFFERS( VOUT_MAX_PICTURES );
00349 
00350     ADD_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
00351 
00352     ADD_PARENT_CALLBACKS( SendEventsToChild );
00353 
00354     return VLC_SUCCESS;
00355 }
00356 
00357 /*****************************************************************************
00358  * SpawnRealVout: spawn the real video output.
00359  *****************************************************************************/
00360 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
00361 {
00362     vout_thread_t *p_real_vout = NULL;
00363     video_format_t fmt = {0};
00364 
00365     msg_Dbg( p_vout, "spawning the real video output" );
00366 
00367     fmt = p_vout->fmt_out;
00368 
00369     switch( p_vout->render.i_chroma )
00370     {
00371     case VLC_FOURCC('I','4','2','0'):
00372     case VLC_FOURCC('I','Y','U','V'):
00373     case VLC_FOURCC('Y','V','1','2'):
00374         switch( p_vout->p_sys->i_mode )
00375         {
00376         case DEINTERLACE_MEAN:
00377         case DEINTERLACE_DISCARD:
00378             fmt.i_height /= 2; fmt.i_visible_height /= 2; fmt.i_y_offset /= 2;
00379             fmt.i_sar_den *= 2;
00380             p_real_vout = vout_Create( p_vout, &fmt );
00381             break;
00382 
00383         case DEINTERLACE_BOB:
00384         case DEINTERLACE_BLEND:
00385         case DEINTERLACE_LINEAR:
00386         case DEINTERLACE_X:
00387             p_real_vout = vout_Create( p_vout, &fmt );
00388             break;
00389         }
00390         break;
00391 
00392     case VLC_FOURCC('I','4','2','2'):
00393         fmt.i_chroma = VLC_FOURCC('I','4','2','0');
00394         p_real_vout = vout_Create( p_vout, &fmt );
00395         break;
00396 
00397     default:
00398         break;
00399     }
00400 
00401     return p_real_vout;
00402 }
00403 
00404 /*****************************************************************************
00405  * End: terminate Deinterlace video thread output method
00406  *****************************************************************************/
00407 static void End( vout_thread_t *p_vout )
00408 {
00409     int i_index;
00410 
00411     /* Free the fake output buffers we allocated */
00412     for( i_index = I_OUTPUTPICTURES ; i_index ; )
00413     {
00414         i_index--;
00415         free( PP_OUTPUTPICTURE[ i_index ]->p_data_orig );
00416     }
00417 
00418     if( p_vout->p_sys->p_vout )
00419     {
00420         DEL_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
00421         vlc_object_detach( p_vout->p_sys->p_vout );
00422         vout_Destroy( p_vout->p_sys->p_vout );
00423     }
00424 
00425     DEL_PARENT_CALLBACKS( SendEventsToChild );
00426 }
00427 
00428 /*****************************************************************************
00429  * Destroy: destroy Deinterlace video thread output method
00430  *****************************************************************************
00431  * Terminate an output method created by DeinterlaceCreateOutputMethod
00432  *****************************************************************************/
00433 static void Destroy( vlc_object_t *p_this )
00434 {
00435     vout_thread_t *p_vout = (vout_thread_t *)p_this;
00436     vlc_mutex_destroy( &p_vout->p_sys->filter_lock );
00437     free( p_vout->p_sys );
00438 }
00439 
00440 /*****************************************************************************
00441  * Render: displays previously rendered output
00442  *****************************************************************************
00443  * This function send the currently rendered image to Deinterlace image,
00444  * waits until it is displayed and switch the two rendering buffers, preparing
00445  * next frame.
00446  *****************************************************************************/
00447 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
00448 {
00449     vout_sys_t *p_sys = p_vout->p_sys;
00450     picture_t *pp_outpic[2];
00451 
00452     p_vout->fmt_out.i_x_offset = p_sys->p_vout->fmt_in.i_x_offset =
00453         p_vout->fmt_in.i_x_offset;
00454     p_vout->fmt_out.i_y_offset = p_sys->p_vout->fmt_in.i_y_offset =
00455         p_vout->fmt_in.i_y_offset;
00456     p_vout->fmt_out.i_visible_width = p_sys->p_vout->fmt_in.i_visible_width =
00457         p_vout->fmt_in.i_visible_width;
00458     p_vout->fmt_out.i_visible_height = p_sys->p_vout->fmt_in.i_visible_height =
00459         p_vout->fmt_in.i_visible_height;
00460     if( p_vout->p_sys->i_mode == DEINTERLACE_MEAN ||
00461         p_vout->p_sys->i_mode == DEINTERLACE_DISCARD )
00462     {
00463         p_vout->fmt_out.i_y_offset /= 2; p_sys->p_vout->fmt_in.i_y_offset /= 2;
00464         p_vout->fmt_out.i_visible_height /= 2;
00465         p_sys->p_vout->fmt_in.i_visible_height /= 2;
00466     }
00467  
00468     pp_outpic[0] = pp_outpic[1] = NULL;
00469 
00470     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
00471 
00472     /* Get a new picture */
00473     while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
00474                                                 0, 0, 0 ) )
00475               == NULL )
00476     {
00477         if( p_vout->b_die || p_vout->b_error )
00478         {
00479             vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
00480             return;
00481         }
00482         msleep( VOUT_OUTMEM_SLEEP );
00483     }
00484 
00485     vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[0], p_pic->date );
00486 
00487     /* If we are using double rate, get an additional new picture */
00488     if( p_vout->p_sys->b_double_rate )
00489     {
00490         while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
00491                                                  0, 0, 0 ) )
00492                   == NULL )
00493         {
00494             if( p_vout->b_die || p_vout->b_error )
00495             {
00496                 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
00497                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
00498                 return;
00499             }
00500             msleep( VOUT_OUTMEM_SLEEP );
00501         }
00502 
00503         /* 20ms is a bit arbitrary, but it's only for the first image we get */
00504         if( !p_vout->p_sys->last_date )
00505         {
00506             vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[1],
00507                               p_pic->date + 20000 );
00508         }
00509         else
00510         {
00511             vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[1],
00512                       (3 * p_pic->date - p_vout->p_sys->last_date) / 2 );
00513         }
00514         p_vout->p_sys->last_date = p_pic->date;
00515     }
00516 
00517     switch( p_vout->p_sys->i_mode )
00518     {
00519         case DEINTERLACE_DISCARD:
00520             RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
00521             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
00522             break;
00523 
00524         case DEINTERLACE_BOB:
00525             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
00526             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
00527             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
00528             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
00529             break;
00530 
00531         case DEINTERLACE_LINEAR:
00532             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
00533             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
00534             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
00535             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
00536             break;
00537 
00538         case DEINTERLACE_MEAN:
00539             RenderMean( p_vout, pp_outpic[0], p_pic );
00540             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
00541             break;
00542 
00543         case DEINTERLACE_BLEND:
00544             RenderBlend( p_vout, pp_outpic[0], p_pic );
00545             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
00546             break;
00547 
00548         case DEINTERLACE_X:
00549             RenderX( p_vout, pp_outpic[0], p_pic );
00550             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
00551             break;
00552     }
00553     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
00554 }
00555 
00556 /*****************************************************************************
00557  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
00558  *****************************************************************************/
00559 static void RenderDiscard( vout_thread_t *p_vout,
00560                            picture_t *p_outpic, picture_t *p_pic, int i_field )
00561 {
00562     int i_plane;
00563 
00564     /* Copy image and skip lines */
00565     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
00566     {
00567         uint8_t *p_in, *p_out_end, *p_out;
00568         int i_increment;
00569 
00570         p_in = p_pic->p[i_plane].p_pixels
00571                    + i_field * p_pic->p[i_plane].i_pitch;
00572 
00573         p_out = p_outpic->p[i_plane].p_pixels;
00574         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
00575                              * p_outpic->p[i_plane].i_visible_lines;
00576 
00577         switch( p_vout->render.i_chroma )
00578         {
00579         case VLC_FOURCC('I','4','2','0'):
00580         case VLC_FOURCC('I','Y','U','V'):
00581         case VLC_FOURCC('Y','V','1','2'):
00582 
00583             for( ; p_out < p_out_end ; )
00584             {
00585                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
00586                                           p_pic->p[i_plane].i_pitch );
00587 
00588                 p_out += p_pic->p[i_plane].i_pitch;
00589                 p_in += 2 * p_pic->p[i_plane].i_pitch;
00590             }
00591             break;
00592 
00593         case VLC_FOURCC('I','4','2','2'):
00594 
00595             i_increment = 2 * p_pic->p[i_plane].i_pitch;
00596 
00597             if( i_plane == Y_PLANE )
00598             {
00599                 for( ; p_out < p_out_end ; )
00600                 {
00601                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
00602                                               p_pic->p[i_plane].i_pitch );
00603                     p_out += p_pic->p[i_plane].i_pitch;
00604                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
00605                                               p_pic->p[i_plane].i_pitch );
00606                     p_out += p_pic->p[i_plane].i_pitch;
00607                     p_in += i_increment;
00608                 }
00609             }
00610             else
00611             {
00612                 for( ; p_out < p_out_end ; )
00613                 {
00614                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
00615                                               p_pic->p[i_plane].i_pitch );
00616                     p_out += p_pic->p[i_plane].i_pitch;
00617                     p_in += i_increment;
00618                 }
00619             }
00620             break;
00621 
00622         default:
00623             break;
00624         }
00625     }
00626 }
00627 
00628 /*****************************************************************************
00629  * RenderBob: renders a BOB picture - simple copy
00630  *****************************************************************************/
00631 static void RenderBob( vout_thread_t *p_vout,
00632                        picture_t *p_outpic, picture_t *p_pic, int i_field )
00633 {
00634     int i_plane;
00635 
00636     /* Copy image and skip lines */
00637     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
00638     {
00639         uint8_t *p_in, *p_out_end, *p_out;
00640 
00641         p_in = p_pic->p[i_plane].p_pixels;
00642         p_out = p_outpic->p[i_plane].p_pixels;
00643         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
00644                              * p_outpic->p[i_plane].i_visible_lines;
00645 
00646         switch( p_vout->render.i_chroma )
00647         {
00648             case VLC_FOURCC('I','4','2','0'):
00649             case VLC_FOURCC('I','Y','U','V'):
00650             case VLC_FOURCC('Y','V','1','2'):
00651                 /* For BOTTOM field we need to add the first line */
00652                 if( i_field == 1 )
00653                 {
00654                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
00655                                               p_pic->p[i_plane].i_pitch );
00656                     p_in += p_pic->p[i_plane].i_pitch;
00657                     p_out += p_pic->p[i_plane].i_pitch;
00658                 }
00659 
00660                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
00661 
00662                 for( ; p_out < p_out_end ; )
00663                 {
00664                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
00665                                               p_pic->p[i_plane].i_pitch );
00666 
00667                     p_out += p_pic->p[i_plane].i_pitch;
00668 
00669                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
00670                                               p_pic->p[i_plane].i_pitch );
00671 
00672                     p_in += 2 * p_pic->p[i_plane].i_pitch;
00673                     p_out += p_pic->p[i_plane].i_pitch;
00674                 }
00675 
00676                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
00677                                           p_pic->p[i_plane].i_pitch );
00678 
00679                 /* For TOP field we need to add the last line */
00680                 if( i_field == 0 )
00681                 {
00682                     p_in += p_pic->p[i_plane].i_pitch;
00683                     p_out += p_pic->p[i_plane].i_pitch;
00684                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
00685                                               p_pic->p[i_plane].i_pitch );
00686                 }
00687                 break;
00688 
00689             case VLC_FOURCC('I','4','2','2'):
00690                 /* For BOTTOM field we need to add the first line */
00691                 if( i_field == 1 )
00692                 {
00693                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
00694                                               p_pic->p[i_plane].i_pitch );
00695                     p_in += p_pic->p[i_plane].i_pitch;
00696                     p_out += p_pic->p[i_plane].i_pitch;
00697                 }
00698 
00699                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
00700 
00701                 if( i_plane == Y_PLANE )
00702                 {
00703                     for( ; p_out < p_out_end ; )
00704                     {
00705                         p_vout->p_vlc->pf_memcpy( p_out, p_in,
00706                                                   p_pic->p[i_plane].i_pitch );
00707 
00708                         p_out += p_pic->p[i_plane].i_pitch;
00709 
00710                         p_vout->p_vlc->pf_memcpy( p_out, p_in,
00711                                                   p_pic->p[i_plane].i_pitch );
00712 
00713                         p_in += 2 * p_pic->p[i_plane].i_pitch;
00714                         p_out += p_pic->p[i_plane].i_pitch;
00715                     }
00716                 }
00717                 else
00718                 {
00719                     for( ; p_out < p_out_end ; )
00720                     {
00721                         p_vout->p_vlc->pf_memcpy( p_out, p_in,
00722                                                   p_pic->p[i_plane].i_pitch );
00723 
00724                         p_out += p_pic->p[i_plane].i_pitch;
00725                         p_in += 2 * p_pic->p[i_plane].i_pitch;
00726                     }
00727                 }
00728 
00729                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
00730                                           p_pic->p[i_plane].i_pitch );
00731 
00732                 /* For TOP field we need to add the last line */
00733                 if( i_field == 0 )
00734                 {
00735                     p_in += p_pic->p[i_plane].i_pitch;
00736                     p_out += p_pic->p[i_plane].i_pitch;
00737                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
00738                                               p_pic->p[i_plane].i_pitch );
00739                 }
00740                 break;
00741         }
00742     }
00743 }
00744 
00745 #define Merge p_vout->p_sys->pf_merge
00746 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
00747 
00748 /*****************************************************************************
00749  * RenderLinear: BOB with linear interpolation
00750  *****************************************************************************/
00751 static void RenderLinear( vout_thread_t *p_vout,
00752                           picture_t *p_outpic, picture_t *p_pic, int i_field )
00753 {
00754     int i_plane;
00755 
00756     /* Copy image and skip lines */
00757     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
00758     {
00759         uint8_t *p_in, *p_out_end, *p_out;
00760 
00761         p_in = p_pic->p[i_plane].p_pixels;
00762         p_out = p_outpic->p[i_plane].p_pixels;
00763         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
00764                              * p_outpic->p[i_plane].i_visible_lines;
00765 
00766         /* For BOTTOM field we need to add the first line */
00767         if( i_field == 1 )
00768         {
00769             p_vout->p_vlc->pf_memcpy( p_out, p_in,
00770                                       p_pic->p[i_plane].i_pitch );
00771             p_in += p_pic->p[i_plane].i_pitch;
00772             p_out += p_pic->p[i_plane].i_pitch;
00773         }
00774 
00775         p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
00776 
00777         for( ; p_out < p_out_end ; )
00778         {
00779             p_vout->p_vlc->pf_memcpy( p_out, p_in,
00780                                       p_pic->p[i_plane].i_pitch );
00781 
00782             p_out += p_pic->p[i_plane].i_pitch;
00783 
00784             Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
00785                    p_pic->p[i_plane].i_pitch );
00786 
00787             p_in += 2 * p_pic->p[i_plane].i_pitch;
00788             p_out += p_pic->p[i_plane].i_pitch;
00789         }
00790 
00791         p_vout->p_vlc->pf_memcpy( p_out, p_in,
00792                                   p_pic->p[i_plane].i_pitch );
00793 
00794         /* For TOP field we need to add the last line */
00795         if( i_field == 0 )
00796         {
00797             p_in += p_pic->p[i_plane].i_pitch;
00798             p_out += p_pic->p[i_plane].i_pitch;
00799             p_vout->p_vlc->pf_memcpy( p_out, p_in,
00800                                       p_pic->p[i_plane].i_pitch );
00801         }
00802     }
00803     EndMerge();
00804 }
00805 
00806 static void RenderMean( vout_thread_t *p_vout,
00807                         picture_t *p_outpic, picture_t *p_pic )
00808 {
00809     int i_plane;
00810 
00811     /* Copy image and skip lines */
00812     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
00813     {
00814         uint8_t *p_in, *p_out_end, *p_out;
00815 
00816         p_in = p_pic->p[i_plane].p_pixels;
00817 
00818         p_out = p_outpic->p[i_plane].p_pixels;
00819         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
00820                              * p_outpic->p[i_plane].i_visible_lines;
00821 
00822         /* All lines: mean value */
00823         for( ; p_out < p_out_end ; )
00824         {
00825             Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
00826                    p_pic->p[i_plane].i_pitch );
00827 
00828             p_out += p_pic->p[i_plane].i_pitch;
00829             p_in += 2 * p_pic->p[i_plane].i_pitch;
00830         }
00831     }
00832     EndMerge();
00833 }
00834 
00835 static void RenderBlend( vout_thread_t *p_vout,
00836                          picture_t *p_outpic, picture_t *p_pic )
00837 {
00838     int i_plane;
00839 
00840     /* Copy image and skip lines */
00841     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
00842     {
00843         uint8_t *p_in, *p_out_end, *p_out;
00844 
00845         p_in = p_pic->p[i_plane].p_pixels;
00846 
00847         p_out = p_outpic->p[i_plane].p_pixels;
00848         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
00849                              * p_outpic->p[i_plane].i_visible_lines;
00850 
00851         switch( p_vout->render.i_chroma )
00852         {
00853             case VLC_FOURCC('I','4','2','0'):
00854             case VLC_FOURCC('I','Y','U','V'):
00855             case VLC_FOURCC('Y','V','1','2'):
00856                 /* First line: simple copy */
00857                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
00858                                           p_pic->p[i_plane].i_pitch );
00859                 p_out += p_pic->p[i_plane].i_pitch;
00860 
00861                 /* Remaining lines: mean value */
00862                 for( ; p_out < p_out_end ; )
00863                 {
00864                     Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
00865                            p_pic->p[i_plane].i_pitch );
00866 
00867                     p_out += p_pic->p[i_plane].i_pitch;
00868                     p_in += p_pic->p[i_plane].i_pitch;
00869                 }
00870                 break;
00871 
00872             case VLC_FOURCC('I','4','2','2'):
00873                 /* First line: simple copy */
00874                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
00875                                           p_pic->p[i_plane].i_pitch );
00876                 p_out += p_pic->p[i_plane].i_pitch;
00877 
00878                 /* Remaining lines: mean value */
00879                 if( i_plane == Y_PLANE )
00880                 {
00881                     for( ; p_out < p_out_end ; )
00882                     {
00883                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
00884                                p_pic->p[i_plane].i_pitch );
00885 
00886                         p_out += p_pic->p[i_plane].i_pitch;
00887                         p_in += p_pic->p[i_plane].i_pitch;
00888                     }
00889                 }
00890 
00891                 else
00892                 {
00893                     for( ; p_out < p_out_end ; )
00894                     {
00895                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
00896                                p_pic->p[i_plane].i_pitch );
00897 
00898                         p_out += p_pic->p[i_plane].i_pitch;
00899                         p_in += 2*p_pic->p[i_plane].i_pitch;
00900                     }
00901                 }
00902                 break;
00903         }
00904     }
00905     EndMerge();
00906 }
00907 
00908 #undef Merge
00909 
00910 static void MergeGeneric( void *_p_dest, const void *_p_s1,
00911                           const void *_p_s2, size_t i_bytes )
00912 {
00913     uint8_t* p_dest = (uint8_t*)_p_dest;
00914     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
00915     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
00916     uint8_t* p_end = p_dest + i_bytes - 8;
00917 
00918     while( p_dest < p_end )
00919     {
00920         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00921         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00922         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00923         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00924         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00925         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00926         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00927         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00928     }
00929 
00930     p_end += 8;
00931 
00932     while( p_dest < p_end )
00933     {
00934         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00935     }
00936 }
00937 
00938 #if defined(CAN_COMPILE_MMXEXT)
00939 static void MergeMMX( void *_p_dest, const void *_p_s1, const void *_p_s2,
00940                       size_t i_bytes )
00941 {
00942     uint8_t* p_dest = (uint8_t*)_p_dest;
00943     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
00944     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
00945     uint8_t* p_end = p_dest + i_bytes - 8;
00946     while( p_dest < p_end )
00947     {
00948         __asm__  __volatile__( "movq %2,%%mm1;"
00949                                "pavgb %1, %%mm1;"
00950                                "movq %%mm1, %0" :"=m" (*p_dest):
00951                                                  "m" (*p_s1),
00952                                                  "m" (*p_s2) );
00953         p_dest += 8;
00954         p_s1 += 8;
00955         p_s2 += 8;
00956     }
00957 
00958     p_end += 8;
00959 
00960     while( p_dest < p_end )
00961     {
00962         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00963     }
00964 }
00965 #endif
00966 
00967 #if defined(CAN_COMPILE_SSE)
00968 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
00969                        size_t i_bytes )
00970 {
00971     uint8_t* p_dest = (uint8_t*)_p_dest;
00972     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
00973     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
00974     uint8_t* p_end;
00975     while( (ptrdiff_t)p_s1 % 16 )
00976     {
00977         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00978     }        
00979     p_end = p_dest + i_bytes - 16;
00980     while( p_dest < p_end )
00981     {
00982         __asm__  __volatile__( "movdqu %2,%%xmm1;"
00983                                "pavgb %1, %%xmm1;"
00984                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
00985                                                  "m" (*p_s1),
00986                                                  "m" (*p_s2) );
00987         p_dest += 16;
00988         p_s1 += 16;
00989         p_s2 += 16;
00990     }
00991 
00992     p_end += 16;
00993 
00994     while( p_dest < p_end )
00995     {
00996         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
00997     }
00998 }
00999 #endif
01000 
01001 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
01002 static void EndMMX( void )
01003 {
01004     __asm__ __volatile__( "emms" :: );
01005 }
01006 #endif
01007 
01008 #ifdef CAN_COMPILE_C_ALTIVEC
01009 static void MergeAltivec( void *_p_dest, const void *_p_s1,
01010                           const void *_p_s2, size_t i_bytes )
01011 {
01012     uint8_t *p_dest = (uint8_t *)_p_dest;
01013     uint8_t *p_s1   = (uint8_t *)_p_s1;
01014     uint8_t *p_s2   = (uint8_t *)_p_s2;
01015     uint8_t *p_end  = p_dest + i_bytes - 15;
01016 
01017     /* Use C until the first 16-bytes aligned destination pixel */
01018     while( (int)p_dest & 0xF )
01019     {
01020         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
01021     }
01022 
01023     if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
01024     {
01025         /* Unaligned source */
01026         vector unsigned char s1v, s2v, destv;
01027         vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
01028         vector unsigned char perm1v, perm2v;
01029 
01030         perm1v = vec_lvsl( 0, p_s1 );
01031         perm2v = vec_lvsl( 0, p_s2 );
01032         s1oldv = vec_ld( 0, p_s1 );
01033         s2oldv = vec_ld( 0, p_s2 );
01034 
01035         while( p_dest < p_end )
01036         {
01037             s1newv = vec_ld( 16, p_s1 );
01038             s2newv = vec_ld( 16, p_s2 );
01039             s1v    = vec_perm( s1oldv, s1newv, perm1v );
01040             s2v    = vec_perm( s2oldv, s2newv, perm2v );
01041             s1oldv = s1newv;
01042             s2oldv = s2newv;
01043             destv  = vec_avg( s1v, s2v );
01044             vec_st( destv, 0, p_dest );
01045 
01046             p_s1   += 16;
01047             p_s2   += 16;
01048             p_dest += 16;
01049         }
01050     }
01051     else
01052     {
01053         /* Aligned source */
01054         vector unsigned char s1v, s2v, destv;
01055 
01056         while( p_dest < p_end )
01057         {
01058             s1v   = vec_ld( 0, p_s1 );
01059             s2v   = vec_ld( 0, p_s2 );
01060             destv = vec_avg( s1v, s2v );
01061             vec_st( destv, 0, p_dest );
01062 
01063             p_s1   += 16;
01064             p_s2   += 16;
01065             p_dest += 16;
01066         }
01067     }
01068 
01069     p_end += 15;
01070 
01071     while( p_dest < p_end )
01072     {
01073         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
01074     }
01075 }
01076 #endif
01077 
01078 /*****************************************************************************
01079  * RenderX: This algo works on a 8x8 block basic, it copies the top field
01080  * and apply a process to recreate the bottom field :
01081  *  If a 8x8 block is classified as :
01082  *   - progressive: it applies a small blend (1,6,1)
01083  *   - interlaced:
01084  *    * in the MMX version: we do a ME between the 2 fields, if there is a
01085  *    good match we use MC to recreate the bottom field (with a small
01086  *    blend (1,6,1) )
01087  *    * otherwise: it recreates the bottom field by an edge oriented
01088  *    interpolation.
01089   *****************************************************************************/
01090 
01091 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
01092  * XXX: It need to access to 8x10
01093  * We use more than 8 lines to help with scrolling (text)
01094  * (and because XDeint8x8Frame use line 9)
01095  * XXX: smooth/uniform area with noise detection doesn't works well
01096  * but it's not really a problem because they don't have much details anyway
01097  */
01098 static inline int ssd( int a ) { return a*a; }
01099 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
01100 {
01101     int y, x;
01102     int ff, fr;
01103     int fc;
01104 
01105     /* Detect interlacing */
01106     fc = 0;
01107     for( y = 0; y < 7; y += 2 )
01108     {
01109         ff = fr = 0;
01110         for( x = 0; x < 8; x++ )
01111         {
01112             fr += ssd(src[      x] - src[1*i_src+x]) +
01113                   ssd(src[i_src+x] - src[2*i_src+x]);
01114             ff += ssd(src[      x] - src[2*i_src+x]) +
01115                   ssd(src[i_src+x] - src[3*i_src+x]);
01116         }
01117         if( ff < 6*fr/8 && fr > 32 )
01118             fc++;
01119 
01120         src += 2*i_src;
01121     }
01122 
01123     return fc < 1 ? VLC_FALSE : VLC_TRUE;
01124 }
01125 #ifdef CAN_COMPILE_MMXEXT
01126 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
01127 {
01128 
01129     int y, x;
01130     int32_t ff, fr;
01131     int fc;
01132 
01133     /* Detect interlacing */
01134     fc = 0;
01135     pxor_r2r( mm7, mm7 );
01136     for( y = 0; y < 9; y += 2 )
01137     {
01138         ff = fr = 0;
01139         pxor_r2r( mm5, mm5 );
01140         pxor_r2r( mm6, mm6 );
01141         for( x = 0; x < 8; x+=4 )
01142         {
01143             movd_m2r( src[        x], mm0 );
01144             movd_m2r( src[1*i_src+x], mm1 );
01145             movd_m2r( src[2*i_src+x], mm2 );
01146             movd_m2r( src[3*i_src+x], mm3 );
01147 
01148             punpcklbw_r2r( mm7, mm0 );
01149             punpcklbw_r2r( mm7, mm1 );
01150             punpcklbw_r2r( mm7, mm2 );
01151             punpcklbw_r2r( mm7, mm3 );
01152 
01153             movq_r2r( mm0, mm4 );
01154 
01155             psubw_r2r( mm1, mm0 );
01156             psubw_r2r( mm2, mm4 );
01157 
01158             psubw_r2r( mm1, mm2 );
01159             psubw_r2r( mm1, mm3 );
01160 
01161             pmaddwd_r2r( mm0, mm0 );
01162             pmaddwd_r2r( mm4, mm4 );
01163             pmaddwd_r2r( mm2, mm2 );
01164             pmaddwd_r2r( mm3, mm3 );
01165             paddd_r2r( mm0, mm2 );
01166             paddd_r2r( mm4, mm3 );
01167             paddd_r2r( mm2, mm5 );
01168             paddd_r2r( mm3, mm6 );
01169         }
01170 
01171         movq_r2r( mm5, mm0 );
01172         psrlq_i2r( 32, mm0 );
01173         paddd_r2r( mm0, mm5 );
01174         movd_r2m( mm5, fr );
01175 
01176         movq_r2r( mm6, mm0 );
01177         psrlq_i2r( 32, mm0 );
01178         paddd_r2r( mm0, mm6 );
01179         movd_r2m( mm6, ff );
01180 
01181         if( ff < 6*fr/8 && fr > 32 )
01182             fc++;
01183 
01184         src += 2*i_src;
01185     }
01186     return fc;
01187 }
01188 #endif
01189 
01190 /* XDeint8x8Frame: apply a small blend between field (1,6,1).
01191  * This won't destroy details, and help if there is a bit of interlacing.
01192  * (It helps with paning to avoid flickers)
01193  * (Use 8x9 pixels)
01194  */
01195 #if 0
01196 static inline void XDeint8x8FrameC( uint8_t *dst, int i_dst,
01197                                     uint8_t *src, int i_src )
01198 {
01199     int y, x;
01200 
01201     /* Progressive */
01202     for( y = 0; y < 8; y += 2 )
01203     {
01204         memcpy( dst, src, 8 );
01205         dst += i_dst;
01206 
01207         for( x = 0; x < 8; x++ )
01208             dst[x] = (src[x] + 6*src[1*i_src+x] + src[2*i_src+x] + 4 ) >> 3;
01209         dst += 1*i_dst;
01210         src += 2*i_src;
01211     }
01212 }
01213 #endif
01214 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
01215                                     uint8_t *src1, int i_src1,
01216                                     uint8_t *src2, int i_src2 )
01217 {
01218     int y, x;
01219 
01220     /* Progressive */
01221     for( y = 0; y < 8; y += 2 )
01222     {
01223         memcpy( dst, src1, 8 );
01224         dst  += i_dst;
01225 
01226         for( x = 0; x < 8; x++ )
01227             dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
01228         dst += i_dst;
01229 
01230         src1 += i_src1;
01231         src2 += i_src2;
01232     }
01233 }
01234 
01235 #ifdef CAN_COMPILE_MMXEXT
01236 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
01237                                          uint8_t *src1, int i_src1,
01238                                          uint8_t *src2, int i_src2 )
01239 {
01240     static const uint64_t m_4 = I64C(0x0004000400040004);
01241     int y, x;
01242 
01243     /* Progressive */
01244     pxor_r2r( mm7, mm7 );
01245     for( y = 0; y < 8; y += 2 )
01246     {
01247         for( x = 0; x < 8; x +=4 )
01248         {
01249             movd_m2r( src1[x], mm0 );
01250             movd_r2m( mm0, dst[x] );
01251 
01252             movd_m2r( src2[x], mm1 );
01253             movd_m2r( src1[i_src1+x], mm2 );
01254 
01255             punpcklbw_r2r( mm7, mm0 );
01256             punpcklbw_r2r( mm7, mm1 );
01257             punpcklbw_r2r( mm7, mm2 );
01258             paddw_r2r( mm1, mm1 );
01259             movq_r2r( mm1, mm3 );
01260             paddw_r2r( mm3, mm3 );
01261             paddw_r2r( mm2, mm0 );
01262             paddw_r2r( mm3, mm1 );
01263             paddw_m2r( m_4, mm1 );
01264             paddw_r2r( mm1, mm0 );
01265             psraw_i2r( 3, mm0 );
01266             packuswb_r2r( mm7, mm0 );
01267             movd_r2m( mm0, dst[i_dst+x] );
01268         }
01269         dst += 2*i_dst;
01270         src1 += i_src1;
01271         src2 += i_src2;
01272     }
01273 }
01274 
01275 #endif
01276 
01277 /* For debug */
01278 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
01279 {
01280     int y;
01281     for( y = 0; y < 8; y++ )
01282         memset( &dst[y*i_dst], v, 8 );
01283 }
01284 
01285 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
01286  * neighbour
01287  * (Use 8x9 pixels)
01288  * TODO: a better one for the inner part.
01289  */
01290 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
01291                                      uint8_t *src, int i_src )
01292 {
01293     int y, x;
01294 
01295     /* Interlaced */
01296     for( y = 0; y < 8; y += 2 )
01297     {
01298         memcpy( dst, src, 8 );
01299         dst += i_dst;
01300 
01301         for( x = 0; x < 8; x++ )
01302             dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
01303         dst += 1*i_dst;
01304         src += 2*i_src;
01305     }
01306 }
01307 #ifdef CAN_COMPILE_MMXEXT
01308 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
01309                                           uint8_t *src, int i_src )
01310 {
01311     int y;
01312 
01313     /* Interlaced */
01314     for( y = 0; y < 8; y += 2 )
01315     {
01316         movq_m2r( src[0], mm0 );
01317         movq_r2m( mm0, dst[0] );
01318         dst += i_dst;
01319 
01320         movq_m2r( src[2*i_src], mm1 );
01321         pavgb_r2r( mm1, mm0 );
01322 
01323         movq_r2m( mm0, dst[0] );
01324 
01325         dst += 1*i_dst;
01326         src += 2*i_src;
01327     }
01328 }
01329 #endif
01330 
01331 /* XDeint8x8Field: Edge oriented interpolation
01332  * (Need -4 and +5 pixels H, +1 line)
01333  */
01334 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
01335                                     uint8_t *src, int i_src )
01336 {
01337     int y, x;
01338 
01339     /* Interlaced */
01340     for( y = 0; y < 8; y += 2 )
01341     {
01342         memcpy( dst, src, 8 );
01343         dst += i_dst;
01344 
01345         for( x = 0; x < 8; x++ )
01346         {
01347             uint8_t *src2 = &src[2*i_src];
01348             /* I use 8 pixels just to match the MMX version, but it's overkill
01349              * 5 would be enough (less isn't good) */
01350             const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
01351                            abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
01352                            abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
01353                            abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
01354 
01355             const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
01356                            abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
01357                            abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
01358                            abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
01359 
01360             const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
01361                            abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
01362                            abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
01363                            abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
01364 
01365             if( c0 < c1 && c1 <= c2 )
01366                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
01367             else if( c2 < c1 && c1 <= c0 )
01368                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
01369             else
01370                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
01371         }
01372 
01373         dst += 1*i_dst;
01374         src += 2*i_src;
01375     }
01376 }
01377 #ifdef CAN_COMPILE_MMXEXT
01378 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
01379                                          uint8_t *src, int i_src )
01380 {
01381     int y, x;
01382 
01383     /* Interlaced */
01384     for( y = 0; y < 8; y += 2 )
01385     {
01386         memcpy( dst, src, 8 );
01387         dst += i_dst;
01388 
01389         for( x = 0; x < 8; x++ )
01390         {
01391             uint8_t *src2 = &src[2*i_src];
01392             int32_t c0, c1, c2;
01393 
01394             movq_m2r( src[x-2], mm0 );
01395             movq_m2r( src[x-3], mm1 );
01396             movq_m2r( src[x-4], mm2 );
01397 
01398             psadbw_m2r( src2[x-4], mm0 );
01399             psadbw_m2r( src2[x-3], mm1 );
01400             psadbw_m2r( src2[x-2], mm2 );
01401 
01402             movd_r2m( mm0, c2 );
01403             movd_r2m( mm1, c1 );
01404             movd_r2m( mm2, c0 );
01405 
01406             if( c0 < c1 && c1 <= c2 )
01407                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
01408             else if( c2 < c1 && c1 <= c0 )
01409                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
01410             else
01411                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
01412         }
01413 
01414         dst += 1*i_dst;
01415         src += 2*i_src;
01416     }
01417 }
01418 #endif
01419 
01420 #if 0
01421 static inline int XDeint8x8SsdC( uint8_t *pix1, int i_pix1,
01422                                  uint8_t *pix2, int i_pix2 )
01423 {
01424     int y, x;
01425     int s = 0;
01426 
01427     for( y = 0; y < 8; y++ )
01428         for( x = 0; x < 8; x++ )
01429             s += ssd( pix1[y*i_pix1+x] - pix2[y*i_pix2+x] );
01430     return s;
01431 }
01432 
01433 #ifdef CAN_COMPILE_MMXEXT
01434 static inline int XDeint8x8SsdMMXEXT( uint8_t *pix1, int i_pix1,
01435                                       uint8_t *pix2, int i_pix2 )
01436 {
01437     int y;
01438     int32_t s;
01439 
01440     pxor_r2r( mm7, mm7 );
01441     pxor_r2r( mm6, mm6 );
01442 
01443     for( y = 0; y < 8; y++ )
01444     {
01445         movq_m2r( pix1[0], mm0 );
01446         movq_m2r( pix2[0], mm1 );
01447 
01448         movq_r2r( mm0, mm2 );
01449         movq_r2r( mm1, mm3 );
01450 
01451         punpcklbw_r2r( mm7, mm0 );
01452         punpckhbw_r2r( mm7, mm2 );
01453         punpcklbw_r2r( mm7, mm1 );
01454         punpckhbw_r2r( mm7, mm3 );
01455 
01456         psubw_r2r( mm1, mm0 );
01457         psubw_r2r( mm3, mm2 );
01458 
01459         pmaddwd_r2r( mm0, mm0 );
01460         pmaddwd_r2r( mm2, mm2 );
01461 
01462         paddd_r2r( mm2, mm0 );
01463         paddd_r2r( mm0, mm6 );
01464 
01465         pix1 += i_pix1;
01466         pix2 += i_pix2;
01467     }
01468 
01469     movq_r2r( mm6, mm7 );
01470     psrlq_i2r( 32, mm7 );
01471     paddd_r2r( mm6, mm7 );
01472     movd_r2m( mm7, s );
01473 
01474     return s;
01475 }
01476 #endif
01477 #endif
01478 
01479 #if 0
01480 /* A little try with motion, but doesn't work better that pure intra (and slow) */
01481 #ifdef CAN_COMPILE_MMXEXT
01482 /* XDeintMC:
01483  *  Bilinear MC QPel
01484  *  TODO: mmx version (easier in sse2)
01485  */
01486 static inline void XDeintMC( uint8_t *dst, int i_dst,
01487                              uint8_t *src, int i_src,
01488                              int mvx, int mvy,
01489                              int i_width, int i_height )
01490 {
01491     const int d4x = mvx&0x03;
01492     const int d4y = mvy&0x03;
01493 
01494     const int cA = (4-d4x)*(4-d4y);
01495     const int cB = d4x    *(4-d4y);
01496     const int cC = (4-d4x)*d4y;
01497     const int cD = d4x    *d4y;
01498 
01499     int y, x;
01500     uint8_t *srcp;
01501 
01502 
01503     src  += (mvy >> 2) * i_src + (mvx >> 2);
01504     srcp = &src[i_src];
01505 
01506     for( y = 0; y < i_height; y++ )
01507     {
01508         for( x = 0; x < i_width; x++ )
01509         {
01510             dst[x] = ( cA*src[x]  + cB*src[x+1] +
01511                        cC*srcp[x] + cD*srcp[x+1] + 8 ) >> 4;
01512         }
01513         dst  += i_dst;
01514 
01515         src   = srcp;
01516         srcp += i_src;
01517     }
01518 }
01519 static int XDeint8x4SadMMXEXT( uint8_t *pix1, int i_pix1,
01520                                uint8_t *pix2, int i_pix2 )
01521 {
01522     int32_t s;
01523 
01524     movq_m2r( pix1[0*i_pix1], mm0 );
01525     movq_m2r( pix1[1*i_pix1], mm1 );
01526 
01527     psadbw_m2r( pix2[0*i_pix2], mm0 );
01528     psadbw_m2r( pix2[1*i_pix2], mm1 );
01529 
01530     movq_m2r( pix1[2*i_pix1], mm2 );
01531     movq_m2r( pix1[3*i_pix1], mm3 );
01532     psadbw_m2r( pix2[2*i_pix2], mm2 );
01533     psadbw_m2r( pix2[3*i_pix2], mm3 );
01534 
01535     paddd_r2r( mm1, mm0 );
01536     paddd_r2r( mm3, mm2 );
01537     paddd_r2r( mm2, mm0 );
01538     movd_r2m( mm0, s );
01539 
01540     return s;
01541 }
01542 
01543 static inline int XDeint8x4TestQpel( uint8_t *src, int i_src,
01544                                      uint8_t *ref, int i_stride,
01545                                      int mx, int my,
01546                                      int xmax, int ymax )
01547 {
01548     uint8_t buffer[8*4];
01549 
01550     if( abs(mx) >= 4*xmax || abs(my) >= 4*ymax )
01551         return 255*255*255;
01552 
01553     XDeintMC( buffer, 8, ref, i_stride, mx, my, 8, 4 );
01554     return XDeint8x4SadMMXEXT( src, i_src, buffer, 8 );
01555 }
01556 static inline int XDeint8x4TestInt( uint8_t *src, int i_src,
01557                                     uint8_t *ref, int i_stride,
01558                                     int mx, int my,
01559                                     int xmax, int ymax )
01560 {
01561     if( abs(mx) >= xmax || abs(my) >= ymax )
01562         return 255*255*255;
01563 
01564     return XDeint8x4SadMMXEXT( src, i_src, &ref[my*i_stride+mx], i_stride );
01565 }
01566 
01567 static inline void XDeint8x8FieldMotion( uint8_t *dst, int i_dst,
01568                                          uint8_t *src, int i_src,
01569                                          int *mpx, int *mpy,
01570                                          int xmax, int ymax )
01571 {
01572     static const int dx[8] = { 0,  0, -1, 1, -1, -1,  1, 1 };
01573     static const int dy[8] = {-1,  1,  0, 0, -1,  1, -1, 1 };
01574     uint8_t *next = &src[i_src];
01575     const int i_src2 = 2*i_src;
01576     int mvx, mvy;
01577     int mvs, s;
01578     int i_step;
01579 
01580     uint8_t *rec = &dst[i_dst];
01581 
01582     /* We construct with intra method the missing field */
01583     XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
01584 
01585     /* Now we will try to find a match with ME with the other field */
01586 
01587     /* ME: A small/partial EPZS
01588      * We search only for small MV (with high motion intra will be perfect */
01589     if( xmax > 4 ) xmax = 4;
01590     if( ymax > 4 ) ymax = 4;
01591 
01592     /* Init with NULL Mv */
01593     mvx = mvy = 0;
01594     mvs = XDeint8x4SadMMXEXT( rec, i_src2, next, i_src2 );
01595 
01596     /* Try predicted Mv */
01597     if( (s=XDeint8x4TestInt( rec, i_src2, next, i_src2, *mpx, *mpy, xmax, ymax)) < mvs )
01598     {
01599         mvs = s;
01600         mvx = *mpx;
01601         mvy = *mpy;
01602     }
01603     /* Search interger pel (small mv) */
01604     for( i_step = 0; i_step < 4; i_step++ )
01605     {
01606         int c = 4;
01607         int s;
01608         int i;
01609 
01610         for( i = 0; i < 4; i++ )
01611         {
01612             s = XDeint8x4TestInt( rec, i_src2,
01613                                   next, i_src2, mvx+dx[i], mvy+dy[i],
01614                                   xmax, ymax );
01615             if( s < mvs )
01616             {
01617                 mvs = s;
01618                 c = i;
01619             }
01620         }
01621         if( c == 4 )
01622             break;
01623 
01624         mvx += dx[c];
01625         mvy += dy[c];
01626     }
01627     *mpx = mvx;
01628     *mpy = mvy;
01629 
01630     mvx <<= 2;
01631     mvy <<= 2;
01632 
01633     if( mvs > 4 && mvs < 256 )
01634     {
01635         /* Search Qpel */
01636         /* XXX: for now only HPEL (too slow) */
01637         for( i_step = 0; i_step < 4; i_step++ )
01638         {
01639             int c = 8;
01640             int s;
01641             int i;
01642 
01643             for( i = 0; i < 8; i++ )
01644             {
01645                 s = XDeint8x4TestQpel( rec, i_src2, next, i_src2,
01646                                        mvx+dx[i], mvy+dy[i],
01647                                        xmax, ymax );
01648                 if( s < mvs )
01649                 {
01650                     mvs = s;
01651                     c = i;
01652                 }
01653             }
01654             if( c == 8 )
01655                 break;
01656 
01657             mvx += dx[c];
01658             mvy += dy[c];
01659         }
01660     }
01661 
01662     if( mvs < 128 )
01663     {
01664         uint8_t buffer[8*4];
01665         XDeintMC( buffer, 8, next, i_src2, mvx, mvy, 8, 4 );
01666         XDeint8x8MergeMMXEXT( dst, i_dst, src, 2*i_src, buffer, 8 );
01667 
01668         //XDeint8x8Set( dst, i_dst, 0 );
01669     }
01670 }
01671 #endif
01672 #endif
01673 
01674 #if 0
01675 /* Kernel interpolation (1,-5,20,20,-5,1)
01676  * Loose a bit more details+add aliasing than edge interpol but avoid
01677  * more artifacts
01678  */
01679 static inline uint8_t clip1( int a )
01680 {
01681     if( a <= 0 )
01682         return 0;
01683     else if( a >= 255 )
01684         return 255;
01685     else
01686         return a;
01687 }
01688 static inline void XDeint8x8Field( uint8_t *dst, int i_dst,
01689                                    uint8_t *src, int i_src )
01690 {
01691     int y, x;
01692 
01693     /* Interlaced */
01694     for( y = 0; y < 8; y += 2 )
01695     {
01696         const int i_src2 = i_src*2;
01697 
01698         memcpy( dst, src, 8 );
01699         dst += i_dst;
01700 
01701         for( x = 0; x < 8; x++ )
01702         {
01703             int pix;
01704 
01705             pix =   1*(src[-2*i_src2+x]+src[3*i_src2+x]) +
01706                    -5*(src[-1*i_src2+x]+src[2*i_src2+x])
01707                   +20*(src[ 0*i_src2+x]+src[1*i_src2+x]);
01708 
01709             dst[x] = clip1( ( pix + 16 ) >> 5 );
01710         }
01711 
01712         dst += 1*i_dst;
01713         src += 2*i_src;
01714     }
01715 }
01716 
01717 #endif
01718 
01719 /* NxN arbitray size (and then only use pixel in the NxN block)
01720  */
01721 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
01722                                    int i_height, int i_width )
01723 {
01724     int y, x;
01725     int ff, fr;
01726     int fc;
01727 
01728 
01729     /* Detect interlacing */
01730     /* FIXME way too simple, need to be more like XDeint8x8Detect */
01731     ff = fr = 0;
01732     fc = 0;
01733     for( y = 0; y < i_height - 2; y += 2 )
01734     {
01735         const uint8_t *s = &src[y*i_src];
01736         for( x = 0; x < i_width; x++ )
01737         {
01738             fr += ssd(s[      x] - s[1*i_src+x]);
01739             ff += ssd(s[      x] - s[2*i_src+x]);
01740         }
01741         if( ff < fr && fr > i_width / 2 )
01742             fc++;
01743     }
01744 
01745     return fc < 2 ? VLC_FALSE : VLC_TRUE;
01746 }
01747 
01748 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
01749                                    uint8_t *src, int i_src,
01750                                    int i_width, int i_height )
01751 {
01752     int y, x;
01753 
01754     /* Progressive */
01755     for( y = 0; y < i_height; y += 2 )
01756     {
01757         memcpy( dst, src, i_width );
01758         dst += i_dst;
01759 
01760         if( y < i_height - 2 )
01761         {
01762             for( x = 0; x < i_width; x++ )
01763                 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
01764         }
01765         else
01766         {
01767             /* Blend last line */
01768             for( x = 0; x < i_width; x++ )
01769                 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
01770         }
01771         dst += 1*i_dst;
01772         src += 2*i_src;
01773     }
01774 }
01775 
01776 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
01777                                    uint8_t *src, int i_src,
01778                                    int i_width, int i_height )
01779 {
01780     int y, x;
01781 
01782     /* Interlaced */
01783     for( y = 0; y < i_height; y += 2 )
01784     {
01785         memcpy( dst, src, i_width );
01786         dst += i_dst;
01787 
01788         if( y < i_height - 2 )
01789         {
01790             for( x = 0; x < i_width; x++ )
01791                 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
01792         }
01793         else
01794         {
01795             /* Blend last line */
01796             for( x = 0; x < i_width; x++ )
01797                 dst[x] = (src[x] + src[i_src+x]) >> 1;
01798         }
01799         dst += 1*i_dst;
01800         src += 2*i_src;
01801     }
01802 }
01803 
01804 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
01805                               int i_width, int i_height )
01806 {
01807     if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
01808         XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
01809     else
01810         XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
01811 }
01812 
01813 
01814 static inline int median( int a, int b, int c )
01815 {
01816     int min = a, max =a;
01817     if( b < min )
01818         min = b;
01819     else
01820         max = b;
01821 
01822     if( c < min )
01823         min = c;
01824     else if( c > max )
01825         max = c;
01826 
01827     return a + b + c - min - max;
01828 }
01829 
01830 
01831 /* XDeintBand8x8:
01832  */
01833 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
01834                                    uint8_t *src, int i_src,
01835                                    const int i_mbx, int i_modx )
01836 {
01837     int x;
01838 
01839     for( x = 0; x < i_mbx; x++ )
01840     {
01841         int s;
01842         if( ( s = XDeint8x8DetectC( src, i_src ) ) )
01843         {
01844             if( x == 0 || x == i_mbx - 1 )
01845                 XDeint8x8FieldEC( dst, i_dst, src, i_src );
01846             else
01847                 XDeint8x8FieldC( dst, i_dst, src, i_src );
01848         }
01849         else
01850         {
01851             XDeint8x8MergeC( dst, i_dst,
01852                              &src[0*i_src], 2*i_src,
01853                              &src[1*i_src], 2*i_src );
01854         }
01855 
01856         dst += 8;
01857         src += 8;
01858     }
01859 
01860     if( i_modx )
01861         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
01862 }
01863 #ifdef CAN_COMPILE_MMXEXT
01864 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
01865                                         uint8_t *src, int i_src,
01866                                         const int i_mbx, int i_modx )
01867 {
01868     int x;
01869 
01870     /* Reset current line */
01871     for( x = 0; x < i_mbx; x++ )
01872     {
01873         int s;
01874         if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
01875         {
01876             if( x == 0 || x == i_mbx - 1 )
01877                 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
01878             else
01879                 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
01880         }
01881         else
01882         {
01883             XDeint8x8MergeMMXEXT( dst, i_dst,
01884                                   &src[0*i_src], 2*i_src,
01885                                   &src[1*i_src], 2*i_src );
01886         }
01887 
01888         dst += 8;
01889         src += 8;
01890     }
01891 
01892     if( i_modx )
01893         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
01894 }
01895 #endif
01896 
01897 static void RenderX( vout_thread_t *p_vout,
01898                      picture_t *p_outpic, picture_t *p_pic )
01899 {
01900     int i_plane;
01901 
01902     /* Copy image and skip lines */
01903     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
01904     {
01905         const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
01906         const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
01907 
01908         const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
01909         const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
01910 
01911         const int i_dst = p_outpic->p[i_plane].i_pitch;
01912         const int i_src = p_pic->p[i_plane].i_pitch;
01913 
01914         int y, x;
01915 
01916         for( y = 0; y < i_mby; y++ )
01917         {
01918             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
01919             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
01920 
01921 #ifdef CAN_COMPILE_MMXEXT
01922             if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_MMXEXT )
01923                 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
01924             else
01925 #endif
01926                 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
01927         }
01928 
01929         /* Last line (C only)*/
01930         if( i_mody )
01931         {
01932             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
01933             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
01934 
01935             for( x = 0; x < i_mbx; x++ )
01936             {
01937                 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
01938 
01939                 dst += 8;
01940                 src += 8;
01941             }
01942 
01943             if( i_modx )
01944                 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
01945         }
01946     }
01947 
01948 #ifdef CAN_COMPILE_MMXEXT
01949     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_MMXEXT )
01950         emms();
01951 #endif
01952 }
01953 
01954 /*****************************************************************************
01955  * SendEvents: forward mouse and keyboard events to the parent p_vout
01956  *****************************************************************************/
01957 static int SendEvents( vlc_object_t *p_this, char const *psz_var,
01958                        vlc_value_t oldval, vlc_value_t newval, void *_p_vout )
01959 {
01960     vout_thread_t *p_vout = (vout_thread_t *)_p_vout;
01961     vlc_value_t sentval = newval;
01962 
01963     if( !strcmp( psz_var, "mouse-y" ) )
01964     {
01965         switch( p_vout->p_sys->i_mode )
01966         {
01967             case DEINTERLACE_MEAN:
01968             case DEINTERLACE_DISCARD:
01969                 sentval.i_int *= 2;
01970                 break;
01971         }
01972     }
01973 
01974     var_Set( p_vout, psz_var, sentval );
01975 
01976     return VLC_SUCCESS;
01977 }
01978 
01979 /*****************************************************************************
01980  * FilterCallback: called when changing the deinterlace method on the fly.
01981  *****************************************************************************/
01982 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
01983                            vlc_value_t oldval, vlc_value_t newval,
01984                            void *p_data )
01985 {
01986     vout_thread_t * p_vout = (vout_thread_t *)p_this;
01987     int i_old_mode = p_vout->p_sys->i_mode;
01988 
01989     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
01990 
01991     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
01992 
01993     SetFilterMethod( p_vout, newval.psz_string );
01994 
01995     switch( p_vout->render.i_chroma )
01996     {
01997     case VLC_FOURCC('I','4','2','2'):
01998         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
01999         return VLC_SUCCESS;
02000         break;
02001 
02002     case VLC_FOURCC('I','4','2','0'):
02003     case VLC_FOURCC('I','Y','U','V'):
02004     case VLC_FOURCC('Y','V','1','2'):
02005         switch( p_vout->p_sys->i_mode )
02006         {
02007         case DEINTERLACE_MEAN:
02008         case DEINTERLACE_DISCARD:
02009             if( ( i_old_mode == DEINTERLACE_MEAN )
02010                 || ( i_old_mode == DEINTERLACE_DISCARD ) )
02011             {
02012                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
02013                 return VLC_SUCCESS;
02014             }
02015             break;
02016 
02017         case DEINTERLACE_BOB:
02018         case DEINTERLACE_BLEND:
02019         case DEINTERLACE_LINEAR:
02020             if( ( i_old_mode == DEINTERLACE_BOB )
02021                 || ( i_old_mode == DEINTERLACE_BLEND )
02022                 || ( i_old_mode == DEINTERLACE_LINEAR ) )
02023             {
02024                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
02025                 return VLC_SUCCESS;
02026             }
02027             break;
02028         }
02029         break;
02030 
02031     default:
02032         break;
02033     }
02034 
02035     /* We need to kill the old vout */
02036 
02037     DEL_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
02038 
02039     vlc_object_detach( p_vout->p_sys->p_vout );
02040     vout_Destroy( p_vout->p_sys->p_vout );
02041 
02042     /* Try to open a new video output */
02043     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
02044 
02045     if( p_vout->p_sys->p_vout == NULL )
02046     {
02047         /* Everything failed */
02048         msg_Err( p_vout, "cannot open vout, aborting" );
02049 
02050         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
02051         return VLC_EGENERIC;
02052     }
02053 
02054     ADD_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
02055 
02056     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
02057     return VLC_SUCCESS;
02058 }
02059 
02060 /*****************************************************************************
02061  * SendEventsToChild: forward events to the child/children vout
02062  *****************************************************************************/
02063 static int SendEventsToChild( vlc_object_t *p_this, char const *psz_var,
02064                        vlc_value_t oldval, vlc_value_t newval, void *p_data )
02065 {
02066     vout_thread_t *p_vout = (vout_thread_t *)p_this;
02067     var_Set( p_vout->p_sys->p_vout, psz_var, newval );
02068     return VLC_SUCCESS;
02069 }
02070 
02071 
02072 /*****************************************************************************
02073  * video filter2 functions
02074  *****************************************************************************/
02075 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
02076 {
02077     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
02078     picture_t *p_pic_dst;
02079 
02080     /* Request output picture */
02081     p_pic_dst = p_filter->pf_vout_buffer_new( p_filter );
02082     if( p_pic_dst == NULL )
02083     {
02084         msg_Warn( p_filter, "can't get output picture" );
02085         return NULL;
02086     }
02087 
02088     switch( p_vout->p_sys->i_mode )
02089     {
02090         case DEINTERLACE_DISCARD:
02091 #if 0
02092             RenderDiscard( p_vout, p_pic_dst, p_pic, 0 );
02093 #endif
02094             msg_Err( p_vout, "discarding lines is not supported yet" );
02095             p_pic_dst->pf_release( p_pic_dst );
02096             return p_pic;
02097             break;
02098 
02099         case DEINTERLACE_BOB:
02100 #if 0
02101             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
02102             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
02103             break;
02104 #endif
02105 
02106         case DEINTERLACE_LINEAR:
02107 #if 0
02108             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
02109             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
02110 #endif
02111             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
02112             p_pic_dst->pf_release( p_pic_dst );
02113             return p_pic;
02114             break;
02115 
02116         case DEINTERLACE_MEAN:
02117             RenderMean( p_vout, p_pic_dst, p_pic );
02118             break;
02119 
02120         case DEINTERLACE_BLEND:
02121             RenderBlend( p_vout, p_pic_dst, p_pic );
02122             break;
02123 
02124         case DEINTERLACE_X:
02125             RenderX( p_vout, p_pic_dst, p_pic );
02126             break;
02127     }
02128 
02129     p_pic_dst->date = p_pic->date;
02130     p_pic_dst->b_force = p_pic->b_force;
02131     p_pic_dst->i_nb_fields = p_pic->i_nb_fields;
02132     p_pic_dst->b_progressive = VLC_TRUE;
02133     p_pic_dst->b_top_field_first = p_pic->b_top_field_first;
02134 
02135     p_pic->pf_release( p_pic );
02136     return p_pic_dst;
02137 }
02138 
02139 /*****************************************************************************
02140  * OpenFilter:
02141  *****************************************************************************/
02142 static int OpenFilter( vlc_object_t *p_this )
02143 {
02144     filter_t *p_filter = (filter_t*)p_this;
02145     vout_thread_t *p_vout;
02146     vlc_value_t val;
02147 
02148     if( ( p_filter->fmt_in.video.i_chroma != VLC_FOURCC('I','4','2','0') &&
02149           p_filter->fmt_in.video.i_chroma != VLC_FOURCC('I','Y','U','V') &&
02150           p_filter->fmt_in.video.i_chroma != VLC_FOURCC('Y','V','1','2') ) ||
02151         p_filter->fmt_in.video.i_chroma != p_filter->fmt_out.video.i_chroma )
02152     {
02153         return VLC_EGENERIC;
02154     }
02155 
02156     /* Impossible to use VLC_OBJECT_VOUT here because it would be used
02157      * by spu filters */
02158     p_vout = vlc_object_create( p_filter, sizeof(vout_thread_t) );
02159     vlc_object_attach( p_vout, p_filter );
02160     p_filter->p_sys = (filter_sys_t *)p_vout;
02161     p_vout->render.i_chroma = p_filter->fmt_in.video.i_chroma;
02162 
02163     sout_CfgParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
02164                    p_filter->p_cfg );
02165     var_Get( p_filter, FILTER_CFG_PREFIX "mode", &val );
02166     var_Create( p_filter, "deinterlace-mode", VLC_VAR_STRING );
02167     var_Set( p_filter, "deinterlace-mode", val );
02168 
02169     if ( Create( VLC_OBJECT(p_vout) ) != VLC_SUCCESS )
02170     {
02171         vlc_object_detach( p_vout );
02172         vlc_object_release( p_vout );
02173         return VLC_EGENERIC;
02174     }
02175 
02176     p_filter->pf_video_filter = Deinterlace;
02177 
02178     msg_Dbg( p_filter, "deinterlacing" );
02179 
02180     return VLC_SUCCESS;
02181 }
02182 
02183 /*****************************************************************************
02184  * CloseFilter: clean up the filter
02185  *****************************************************************************/
02186 static void CloseFilter( vlc_object_t *p_this )
02187 {
02188     filter_t *p_filter = (filter_t*)p_this;
02189     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
02190 
02191     Destroy( VLC_OBJECT(p_vout) );
02192     vlc_object_detach( p_vout );
02193     vlc_object_release( p_vout );
02194 }
02195