00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052 #include <inttypes.h>
00053 #include <string.h>
00054 #include <math.h>
00055 #include <stdio.h>
00056 #include "config.h"
00057 #include <assert.h>
00058 #ifdef HAVE_MALLOC_H
00059 #include <malloc.h>
00060 #else
00061 #include <stdlib.h>
00062 #endif
00063 #include "swscale.h"
00064 #include "swscale_internal.h"
00065 #include "common.h"
00066 #include "rgb2rgb.h"
00067 #define RUNTIME_CPUDETECT 1
00068
00069 #undef MOVNTQ
00070 #undef PAVGB
00071
00072
00073
00074
00075
00076
00077 #define DITHER1XBPP
00078
00079 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
00080
00081 #define RET 0xC3 //near return opcode for X86
00082
00083 #ifdef MP_DEBUG
00084 #define ASSERT(x) assert(x);
00085 #else
00086 #define ASSERT(x) ;
00087 #endif
00088
00089 #ifdef M_PI
00090 #define PI M_PI
00091 #else
00092 #define PI 3.14159265358979323846
00093 #endif
00094
00095
00096 #define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \
00097 || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
00098 #define isYUV(x) ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || isPlanarYUV(x))
00099 #define isGray(x) ((x)==IMGFMT_Y800)
00100 #define isRGB(x) (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
00101 #define isBGR(x) (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
00102 #define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
00103 || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
00104 || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
00105 || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\
00106 || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
00107 #define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
00108 || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
00109 || isRGB(x) || isBGR(x)\
00110 || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
00111 #define isPacked(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||isRGB(x) || isBGR(x))
00112
00113 #define RGB2YUV_SHIFT 16
00114 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
00115 #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
00116 #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
00117 #define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
00118 #define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
00119 #define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
00120 #define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
00121 #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
00122 #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
00123
00124 extern const int32_t Inverse_Table_6_9[8][4];
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
00142 #define MIN(a,b) ((a) > (b) ? (b) : (a))
00143 #define MAX(a,b) ((a) < (b) ? (b) : (a))
00144
00145 #ifdef ARCH_X86
00146 static uint64_t attribute_used __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL;
00147 static uint64_t attribute_used __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;
00148 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
00149 static uint64_t attribute_used __attribute__((aligned(8))) w02= 0x0002000200020002LL;
00150 static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
00151 static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
00152 static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
00153 static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
00154
00155 static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
00156 static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
00157 static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
00158 static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
00159
00160 static uint64_t __attribute__((aligned(8))) dither4[2]={
00161 0x0103010301030103LL,
00162 0x0200020002000200LL,};
00163
00164 static uint64_t __attribute__((aligned(8))) dither8[2]={
00165 0x0602060206020602LL,
00166 0x0004000400040004LL,};
00167
00168 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;
00169 static uint64_t attribute_used __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;
00170 static uint64_t attribute_used __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;
00171 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;
00172 static uint64_t attribute_used __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL;
00173 static uint64_t attribute_used __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL;
00174
00175 static uint64_t attribute_used __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
00176 static uint64_t attribute_used __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
00177 static uint64_t attribute_used __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
00178
00179 #ifdef FAST_BGR2YV12
00180 static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
00181 static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
00182 static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
00183 #else
00184 static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
00185 static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
00186 static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
00187 #endif
00188 static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
00189 static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;
00190 static const uint64_t w1111 attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
00191 #endif
00192
00193
00194 static unsigned char clip_table[768];
00195
00196 static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
00197
00198 extern const uint8_t dither_2x2_4[2][8];
00199 extern const uint8_t dither_2x2_8[2][8];
00200 extern const uint8_t dither_8x8_32[8][8];
00201 extern const uint8_t dither_8x8_73[8][8];
00202 extern const uint8_t dither_8x8_220[8][8];
00203
00204 #ifdef ARCH_X86
00205 void in_asm_used_var_warning_killer()
00206 {
00207 volatile int i= bF8+bFC+w10+
00208 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
00209 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
00210 if(i) i=0;
00211 }
00212 #endif
00213
00214 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00215 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00216 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
00217 {
00218
00219 int i;
00220 for(i=0; i<dstW; i++)
00221 {
00222 int val=1<<18;
00223 int j;
00224 for(j=0; j<lumFilterSize; j++)
00225 val += lumSrc[j][i] * lumFilter[j];
00226
00227 dest[i]= MIN(MAX(val>>19, 0), 255);
00228 }
00229
00230 if(uDest != NULL)
00231 for(i=0; i<chrDstW; i++)
00232 {
00233 int u=1<<18;
00234 int v=1<<18;
00235 int j;
00236 for(j=0; j<chrFilterSize; j++)
00237 {
00238 u += chrSrc[j][i] * chrFilter[j];
00239 v += chrSrc[j][i + 2048] * chrFilter[j];
00240 }
00241
00242 uDest[i]= MIN(MAX(u>>19, 0), 255);
00243 vDest[i]= MIN(MAX(v>>19, 0), 255);
00244 }
00245 }
00246
00247
00248 #define YSCALE_YUV_2_PACKEDX_C(type) \
00249 for(i=0; i<(dstW>>1); i++){\
00250 int j;\
00251 int Y1=1<<18;\
00252 int Y2=1<<18;\
00253 int U=1<<18;\
00254 int V=1<<18;\
00255 type *r, *b, *g;\
00256 const int i2= 2*i;\
00257 \
00258 for(j=0; j<lumFilterSize; j++)\
00259 {\
00260 Y1 += lumSrc[j][i2] * lumFilter[j];\
00261 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
00262 }\
00263 for(j=0; j<chrFilterSize; j++)\
00264 {\
00265 U += chrSrc[j][i] * chrFilter[j];\
00266 V += chrSrc[j][i+2048] * chrFilter[j];\
00267 }\
00268 Y1>>=19;\
00269 Y2>>=19;\
00270 U >>=19;\
00271 V >>=19;\
00272 if((Y1|Y2|U|V)&256)\
00273 {\
00274 if(Y1>255) Y1=255;\
00275 else if(Y1<0)Y1=0;\
00276 if(Y2>255) Y2=255;\
00277 else if(Y2<0)Y2=0;\
00278 if(U>255) U=255;\
00279 else if(U<0) U=0;\
00280 if(V>255) V=255;\
00281 else if(V<0) V=0;\
00282 }
00283
00284 #define YSCALE_YUV_2_RGBX_C(type) \
00285 YSCALE_YUV_2_PACKEDX_C(type)\
00286 r = c->table_rV[V];\
00287 g = c->table_gU[U] + c->table_gV[V];\
00288 b = c->table_bU[U];\
00289
00290 #define YSCALE_YUV_2_PACKED2_C \
00291 for(i=0; i<(dstW>>1); i++){\
00292 const int i2= 2*i;\
00293 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19;\
00294 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\
00295 int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19;\
00296 int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\
00297
00298 #define YSCALE_YUV_2_RGB2_C(type) \
00299 YSCALE_YUV_2_PACKED2_C\
00300 type *r, *b, *g;\
00301 r = c->table_rV[V];\
00302 g = c->table_gU[U] + c->table_gV[V];\
00303 b = c->table_bU[U];\
00304
00305 #define YSCALE_YUV_2_PACKED1_C \
00306 for(i=0; i<(dstW>>1); i++){\
00307 const int i2= 2*i;\
00308 int Y1= buf0[i2 ]>>7;\
00309 int Y2= buf0[i2+1]>>7;\
00310 int U= (uvbuf1[i ])>>7;\
00311 int V= (uvbuf1[i+2048])>>7;\
00312
00313 #define YSCALE_YUV_2_RGB1_C(type) \
00314 YSCALE_YUV_2_PACKED1_C\
00315 type *r, *b, *g;\
00316 r = c->table_rV[V];\
00317 g = c->table_gU[U] + c->table_gV[V];\
00318 b = c->table_bU[U];\
00319
00320 #define YSCALE_YUV_2_PACKED1B_C \
00321 for(i=0; i<(dstW>>1); i++){\
00322 const int i2= 2*i;\
00323 int Y1= buf0[i2 ]>>7;\
00324 int Y2= buf0[i2+1]>>7;\
00325 int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\
00326 int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
00327
00328 #define YSCALE_YUV_2_RGB1B_C(type) \
00329 YSCALE_YUV_2_PACKED1B_C\
00330 type *r, *b, *g;\
00331 r = c->table_rV[V];\
00332 g = c->table_gU[U] + c->table_gV[V];\
00333 b = c->table_bU[U];\
00334
00335 #define YSCALE_YUV_2_ANYRGB_C(func, func2)\
00336 switch(c->dstFormat)\
00337 {\
00338 case IMGFMT_BGR32:\
00339 case IMGFMT_RGB32:\
00340 func(uint32_t)\
00341 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
00342 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
00343 } \
00344 break;\
00345 case IMGFMT_RGB24:\
00346 func(uint8_t)\
00347 ((uint8_t*)dest)[0]= r[Y1];\
00348 ((uint8_t*)dest)[1]= g[Y1];\
00349 ((uint8_t*)dest)[2]= b[Y1];\
00350 ((uint8_t*)dest)[3]= r[Y2];\
00351 ((uint8_t*)dest)[4]= g[Y2];\
00352 ((uint8_t*)dest)[5]= b[Y2];\
00353 dest+=6;\
00354 }\
00355 break;\
00356 case IMGFMT_BGR24:\
00357 func(uint8_t)\
00358 ((uint8_t*)dest)[0]= b[Y1];\
00359 ((uint8_t*)dest)[1]= g[Y1];\
00360 ((uint8_t*)dest)[2]= r[Y1];\
00361 ((uint8_t*)dest)[3]= b[Y2];\
00362 ((uint8_t*)dest)[4]= g[Y2];\
00363 ((uint8_t*)dest)[5]= r[Y2];\
00364 dest+=6;\
00365 }\
00366 break;\
00367 case IMGFMT_RGB16:\
00368 case IMGFMT_BGR16:\
00369 {\
00370 const int dr1= dither_2x2_8[y&1 ][0];\
00371 const int dg1= dither_2x2_4[y&1 ][0];\
00372 const int db1= dither_2x2_8[(y&1)^1][0];\
00373 const int dr2= dither_2x2_8[y&1 ][1];\
00374 const int dg2= dither_2x2_4[y&1 ][1];\
00375 const int db2= dither_2x2_8[(y&1)^1][1];\
00376 func(uint16_t)\
00377 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
00378 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
00379 }\
00380 }\
00381 break;\
00382 case IMGFMT_RGB15:\
00383 case IMGFMT_BGR15:\
00384 {\
00385 const int dr1= dither_2x2_8[y&1 ][0];\
00386 const int dg1= dither_2x2_8[y&1 ][1];\
00387 const int db1= dither_2x2_8[(y&1)^1][0];\
00388 const int dr2= dither_2x2_8[y&1 ][1];\
00389 const int dg2= dither_2x2_8[y&1 ][0];\
00390 const int db2= dither_2x2_8[(y&1)^1][1];\
00391 func(uint16_t)\
00392 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
00393 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
00394 }\
00395 }\
00396 break;\
00397 case IMGFMT_RGB8:\
00398 case IMGFMT_BGR8:\
00399 {\
00400 const uint8_t * const d64= dither_8x8_73[y&7];\
00401 const uint8_t * const d32= dither_8x8_32[y&7];\
00402 func(uint8_t)\
00403 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
00404 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
00405 }\
00406 }\
00407 break;\
00408 case IMGFMT_RGB4:\
00409 case IMGFMT_BGR4:\
00410 {\
00411 const uint8_t * const d64= dither_8x8_73 [y&7];\
00412 const uint8_t * const d128=dither_8x8_220[y&7];\
00413 func(uint8_t)\
00414 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
00415 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
00416 }\
00417 }\
00418 break;\
00419 case IMGFMT_RG4B:\
00420 case IMGFMT_BG4B:\
00421 {\
00422 const uint8_t * const d64= dither_8x8_73 [y&7];\
00423 const uint8_t * const d128=dither_8x8_220[y&7];\
00424 func(uint8_t)\
00425 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
00426 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
00427 }\
00428 }\
00429 break;\
00430 case IMGFMT_RGB1:\
00431 case IMGFMT_BGR1:\
00432 {\
00433 const uint8_t * const d128=dither_8x8_220[y&7];\
00434 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
00435 for(i=0; i<dstW-7; i+=8){\
00436 int acc;\
00437 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
00438 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
00439 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
00440 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
00441 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
00442 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
00443 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
00444 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
00445 ((uint8_t*)dest)[0]= acc;\
00446 dest++;\
00447 }\
00448 \
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492 \
00493 }\
00494 break;\
00495 case IMGFMT_YUY2:\
00496 func2\
00497 ((uint8_t*)dest)[2*i2+0]= Y1;\
00498 ((uint8_t*)dest)[2*i2+1]= U;\
00499 ((uint8_t*)dest)[2*i2+2]= Y2;\
00500 ((uint8_t*)dest)[2*i2+3]= V;\
00501 } \
00502 break;\
00503 case IMGFMT_UYVY:\
00504 func2\
00505 ((uint8_t*)dest)[2*i2+0]= U;\
00506 ((uint8_t*)dest)[2*i2+1]= Y1;\
00507 ((uint8_t*)dest)[2*i2+2]= V;\
00508 ((uint8_t*)dest)[2*i2+3]= Y2;\
00509 } \
00510 break;\
00511 }\
00512
00513
00514 static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00515 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00516 uint8_t *dest, int dstW, int y)
00517 {
00518 int i;
00519 switch(c->dstFormat)
00520 {
00521 case IMGFMT_RGB32:
00522 case IMGFMT_BGR32:
00523 YSCALE_YUV_2_RGBX_C(uint32_t)
00524 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
00525 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
00526 }
00527 break;
00528 case IMGFMT_RGB24:
00529 YSCALE_YUV_2_RGBX_C(uint8_t)
00530 ((uint8_t*)dest)[0]= r[Y1];
00531 ((uint8_t*)dest)[1]= g[Y1];
00532 ((uint8_t*)dest)[2]= b[Y1];
00533 ((uint8_t*)dest)[3]= r[Y2];
00534 ((uint8_t*)dest)[4]= g[Y2];
00535 ((uint8_t*)dest)[5]= b[Y2];
00536 dest+=6;
00537 }
00538 break;
00539 case IMGFMT_BGR24:
00540 YSCALE_YUV_2_RGBX_C(uint8_t)
00541 ((uint8_t*)dest)[0]= b[Y1];
00542 ((uint8_t*)dest)[1]= g[Y1];
00543 ((uint8_t*)dest)[2]= r[Y1];
00544 ((uint8_t*)dest)[3]= b[Y2];
00545 ((uint8_t*)dest)[4]= g[Y2];
00546 ((uint8_t*)dest)[5]= r[Y2];
00547 dest+=6;
00548 }
00549 break;
00550 case IMGFMT_RGB16:
00551 case IMGFMT_BGR16:
00552 {
00553 const int dr1= dither_2x2_8[y&1 ][0];
00554 const int dg1= dither_2x2_4[y&1 ][0];
00555 const int db1= dither_2x2_8[(y&1)^1][0];
00556 const int dr2= dither_2x2_8[y&1 ][1];
00557 const int dg2= dither_2x2_4[y&1 ][1];
00558 const int db2= dither_2x2_8[(y&1)^1][1];
00559 YSCALE_YUV_2_RGBX_C(uint16_t)
00560 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
00561 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
00562 }
00563 }
00564 break;
00565 case IMGFMT_RGB15:
00566 case IMGFMT_BGR15:
00567 {
00568 const int dr1= dither_2x2_8[y&1 ][0];
00569 const int dg1= dither_2x2_8[y&1 ][1];
00570 const int db1= dither_2x2_8[(y&1)^1][0];
00571 const int dr2= dither_2x2_8[y&1 ][1];
00572 const int dg2= dither_2x2_8[y&1 ][0];
00573 const int db2= dither_2x2_8[(y&1)^1][1];
00574 YSCALE_YUV_2_RGBX_C(uint16_t)
00575 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
00576 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
00577 }
00578 }
00579 break;
00580 case IMGFMT_RGB8:
00581 case IMGFMT_BGR8:
00582 {
00583 const uint8_t * const d64= dither_8x8_73[y&7];
00584 const uint8_t * const d32= dither_8x8_32[y&7];
00585 YSCALE_YUV_2_RGBX_C(uint8_t)
00586 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
00587 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
00588 }
00589 }
00590 break;
00591 case IMGFMT_RGB4:
00592 case IMGFMT_BGR4:
00593 {
00594 const uint8_t * const d64= dither_8x8_73 [y&7];
00595 const uint8_t * const d128=dither_8x8_220[y&7];
00596 YSCALE_YUV_2_RGBX_C(uint8_t)
00597 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
00598 +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
00599 }
00600 }
00601 break;
00602 case IMGFMT_RG4B:
00603 case IMGFMT_BG4B:
00604 {
00605 const uint8_t * const d64= dither_8x8_73 [y&7];
00606 const uint8_t * const d128=dither_8x8_220[y&7];
00607 YSCALE_YUV_2_RGBX_C(uint8_t)
00608 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
00609 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
00610 }
00611 }
00612 break;
00613 case IMGFMT_RGB1:
00614 case IMGFMT_BGR1:
00615 {
00616 const uint8_t * const d128=dither_8x8_220[y&7];
00617 uint8_t *g= c->table_gU[128] + c->table_gV[128];
00618 int acc=0;
00619 for(i=0; i<dstW-1; i+=2){
00620 int j;
00621 int Y1=1<<18;
00622 int Y2=1<<18;
00623
00624 for(j=0; j<lumFilterSize; j++)
00625 {
00626 Y1 += lumSrc[j][i] * lumFilter[j];
00627 Y2 += lumSrc[j][i+1] * lumFilter[j];
00628 }
00629 Y1>>=19;
00630 Y2>>=19;
00631 if((Y1|Y2)&256)
00632 {
00633 if(Y1>255) Y1=255;
00634 else if(Y1<0)Y1=0;
00635 if(Y2>255) Y2=255;
00636 else if(Y2<0)Y2=0;
00637 }
00638 acc+= acc + g[Y1+d128[(i+0)&7]];
00639 acc+= acc + g[Y2+d128[(i+1)&7]];
00640 if((i&7)==6){
00641 ((uint8_t*)dest)[0]= acc;
00642 dest++;
00643 }
00644 }
00645 }
00646 break;
00647 case IMGFMT_YUY2:
00648 YSCALE_YUV_2_PACKEDX_C(void)
00649 ((uint8_t*)dest)[2*i2+0]= Y1;
00650 ((uint8_t*)dest)[2*i2+1]= U;
00651 ((uint8_t*)dest)[2*i2+2]= Y2;
00652 ((uint8_t*)dest)[2*i2+3]= V;
00653 }
00654 break;
00655 case IMGFMT_UYVY:
00656 YSCALE_YUV_2_PACKEDX_C(void)
00657 ((uint8_t*)dest)[2*i2+0]= U;
00658 ((uint8_t*)dest)[2*i2+1]= Y1;
00659 ((uint8_t*)dest)[2*i2+2]= V;
00660 ((uint8_t*)dest)[2*i2+3]= Y2;
00661 }
00662 break;
00663 }
00664 }
00665
00666
00667
00668
00669 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
00670 #define COMPILE_C
00671 #endif
00672
00673 #ifdef ARCH_POWERPC
00674 #ifdef HAVE_ALTIVEC
00675 #define COMPILE_ALTIVEC
00676 #endif //HAVE_ALTIVEC
00677 #endif //ARCH_POWERPC
00678
00679 #ifdef ARCH_X86
00680
00681 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
00682 #define COMPILE_MMX
00683 #endif
00684
00685 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
00686 #define COMPILE_MMX2
00687 #endif
00688
00689 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
00690 #define COMPILE_3DNOW
00691 #endif
00692 #endif //ARCH_X86
00693
00694 #undef HAVE_MMX
00695 #undef HAVE_MMX2
00696 #undef HAVE_3DNOW
00697
00698 #ifdef COMPILE_C
00699 #undef HAVE_MMX
00700 #undef HAVE_MMX2
00701 #undef HAVE_3DNOW
00702 #undef HAVE_ALTIVEC
00703 #define RENAME(a) a ## _C
00704 #include "swscale_template.c"
00705 #endif
00706
00707 #ifdef ARCH_POWERPC
00708 #ifdef COMPILE_ALTIVEC
00709 #undef RENAME
00710 #define HAVE_ALTIVEC
00711 #define RENAME(a) a ## _altivec
00712 #include "swscale_template.c"
00713 #endif
00714 #endif //ARCH_POWERPC
00715
00716 #ifdef ARCH_X86
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729 #ifdef COMPILE_MMX
00730 #undef RENAME
00731 #define HAVE_MMX
00732 #undef HAVE_MMX2
00733 #undef HAVE_3DNOW
00734 #define RENAME(a) a ## _MMX
00735 #include "swscale_template.c"
00736 #endif
00737
00738
00739 #ifdef COMPILE_MMX2
00740 #undef RENAME
00741 #define HAVE_MMX
00742 #define HAVE_MMX2
00743 #undef HAVE_3DNOW
00744 #define RENAME(a) a ## _MMX2
00745 #include "swscale_template.c"
00746 #endif
00747
00748
00749 #ifdef COMPILE_3DNOW
00750 #undef RENAME
00751 #define HAVE_MMX
00752 #undef HAVE_MMX2
00753 #define HAVE_3DNOW
00754 #define RENAME(a) a ## _3DNow
00755 #include "swscale_template.c"
00756 #endif
00757
00758 #endif //ARCH_X86
00759
00760
00761
00762 static double getSplineCoeff(double a, double b, double c, double d, double dist)
00763 {
00764
00765 if(dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
00766 else return getSplineCoeff( 0.0,
00767 b+ 2.0*c + 3.0*d,
00768 c + 3.0*d,
00769 -b- 3.0*c - 6.0*d,
00770 dist-1.0);
00771 }
00772
00773 static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
00774 int srcW, int dstW, int filterAlign, int one, int flags,
00775 SwsVector *srcFilter, SwsVector *dstFilter)
00776 {
00777 int i;
00778 int filterSize;
00779 int filter2Size;
00780 int minFilterSize;
00781 double *filter=NULL;
00782 double *filter2=NULL;
00783 #ifdef ARCH_X86
00784 if(flags & SWS_CPU_CAPS_MMX)
00785 asm volatile("emms\n\t"::: "memory");
00786 #endif
00787
00788
00789 *filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t));
00790
00791 if(ABS(xInc - 0x10000) <10)
00792 {
00793 int i;
00794 filterSize= 1;
00795 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
00796 for(i=0; i<dstW*filterSize; i++) filter[i]=0;
00797
00798 for(i=0; i<dstW; i++)
00799 {
00800 filter[i*filterSize]=1;
00801 (*filterPos)[i]=i;
00802 }
00803
00804 }
00805 else if(flags&SWS_POINT)
00806 {
00807 int i;
00808 int xDstInSrc;
00809 filterSize= 1;
00810 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
00811
00812 xDstInSrc= xInc/2 - 0x8000;
00813 for(i=0; i<dstW; i++)
00814 {
00815 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
00816
00817 (*filterPos)[i]= xx;
00818 filter[i]= 1.0;
00819 xDstInSrc+= xInc;
00820 }
00821 }
00822 else if((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR))
00823 {
00824 int i;
00825 int xDstInSrc;
00826 if (flags&SWS_BICUBIC) filterSize= 4;
00827 else if(flags&SWS_X ) filterSize= 4;
00828 else filterSize= 2;
00829 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
00830
00831 xDstInSrc= xInc/2 - 0x8000;
00832 for(i=0; i<dstW; i++)
00833 {
00834 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
00835 int j;
00836
00837 (*filterPos)[i]= xx;
00838
00839 for(j=0; j<filterSize; j++)
00840 {
00841 double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
00842 double coeff= 1.0 - d;
00843 if(coeff<0) coeff=0;
00844 filter[i*filterSize + j]= coeff;
00845 xx++;
00846 }
00847 xDstInSrc+= xInc;
00848 }
00849 }
00850 else
00851 {
00852 double xDstInSrc;
00853 double sizeFactor, filterSizeInSrc;
00854 const double xInc1= (double)xInc / (double)(1<<16);
00855 int param= (flags&SWS_PARAM_MASK)>>SWS_PARAM_SHIFT;
00856
00857 if (flags&SWS_BICUBIC) sizeFactor= 4.0;
00858 else if(flags&SWS_X) sizeFactor= 8.0;
00859 else if(flags&SWS_AREA) sizeFactor= 1.0;
00860 else if(flags&SWS_GAUSS) sizeFactor= 8.0;
00861 else if(flags&SWS_LANCZOS) sizeFactor= param ? 2.0*param : 6.0;
00862 else if(flags&SWS_SINC) sizeFactor= 20.0;
00863 else if(flags&SWS_SPLINE) sizeFactor= 20.0;
00864 else if(flags&SWS_BILINEAR) sizeFactor= 2.0;
00865 else {
00866 sizeFactor= 0.0;
00867 ASSERT(0)
00868 }
00869
00870 if(xInc1 <= 1.0) filterSizeInSrc= sizeFactor;
00871 else filterSizeInSrc= sizeFactor*srcW / (double)dstW;
00872
00873 filterSize= (int)ceil(1 + filterSizeInSrc);
00874 if(filterSize > srcW-2) filterSize=srcW-2;
00875
00876 filter= (double*)memalign(16, dstW*sizeof(double)*filterSize);
00877
00878 xDstInSrc= xInc1 / 2.0 - 0.5;
00879 for(i=0; i<dstW; i++)
00880 {
00881 int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
00882 int j;
00883 (*filterPos)[i]= xx;
00884 for(j=0; j<filterSize; j++)
00885 {
00886 double d= ABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
00887 double coeff;
00888 if(flags & SWS_BICUBIC)
00889 {
00890 double A= param ? -param*0.01 : -0.60;
00891
00892
00893 if(d<1.0)
00894 coeff = (1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d);
00895 else if(d<2.0)
00896 coeff = (-4.0*A + 8.0*A*d - 5.0*A*d*d + A*d*d*d);
00897 else
00898 coeff=0.0;
00899 }
00900
00901
00902
00903
00904
00905
00906 else if(flags & SWS_X)
00907 {
00908 double A= param ? param*0.1 : 1.0;
00909
00910 if(d<1.0)
00911 coeff = cos(d*PI);
00912 else
00913 coeff=-1.0;
00914 if(coeff<0.0) coeff= -pow(-coeff, A);
00915 else coeff= pow( coeff, A);
00916 coeff= coeff*0.5 + 0.5;
00917 }
00918 else if(flags & SWS_AREA)
00919 {
00920 double srcPixelSize= 1.0/xInc1;
00921 if(d + srcPixelSize/2 < 0.5) coeff= 1.0;
00922 else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
00923 else coeff=0.0;
00924 }
00925 else if(flags & SWS_GAUSS)
00926 {
00927 double p= param ? param*0.1 : 3.0;
00928 coeff = pow(2.0, - p*d*d);
00929 }
00930 else if(flags & SWS_SINC)
00931 {
00932 coeff = d ? sin(d*PI)/(d*PI) : 1.0;
00933 }
00934 else if(flags & SWS_LANCZOS)
00935 {
00936 double p= param ? param : 3.0;
00937 coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
00938 if(d>p) coeff=0;
00939 }
00940 else if(flags & SWS_BILINEAR)
00941 {
00942 coeff= 1.0 - d;
00943 if(coeff<0) coeff=0;
00944 }
00945 else if(flags & SWS_SPLINE)
00946 {
00947 double p=-2.196152422706632;
00948 coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
00949 }
00950 else {
00951 coeff= 0.0;
00952 ASSERT(0)
00953 }
00954
00955 filter[i*filterSize + j]= coeff;
00956 xx++;
00957 }
00958 xDstInSrc+= xInc1;
00959 }
00960 }
00961
00962
00963
00964
00965 ASSERT(filterSize>0)
00966 filter2Size= filterSize;
00967 if(srcFilter) filter2Size+= srcFilter->length - 1;
00968 if(dstFilter) filter2Size+= dstFilter->length - 1;
00969 ASSERT(filter2Size>0)
00970 filter2= (double*)memalign(8, filter2Size*dstW*sizeof(double));
00971
00972 for(i=0; i<dstW; i++)
00973 {
00974 int j;
00975 SwsVector scaleFilter;
00976 SwsVector *outVec;
00977
00978 scaleFilter.coeff= filter + i*filterSize;
00979 scaleFilter.length= filterSize;
00980
00981 if(srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
00982 else outVec= &scaleFilter;
00983
00984 ASSERT(outVec->length == filter2Size)
00985
00986
00987 for(j=0; j<outVec->length; j++)
00988 {
00989 filter2[i*filter2Size + j]= outVec->coeff[j];
00990 }
00991
00992 (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
00993
00994 if(outVec != &scaleFilter) sws_freeVec(outVec);
00995 }
00996 free(filter); filter=NULL;
00997
00998
00999
01000 minFilterSize= 0;
01001 for(i=dstW-1; i>=0; i--)
01002 {
01003 int min= filter2Size;
01004 int j;
01005 double cutOff=0.0;
01006
01007
01008 for(j=0; j<filter2Size; j++)
01009 {
01010 int k;
01011 cutOff += ABS(filter2[i*filter2Size]);
01012
01013 if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
01014
01015
01016 if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
01017
01018
01019 for(k=1; k<filter2Size; k++)
01020 filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
01021 filter2[i*filter2Size + k - 1]= 0.0;
01022 (*filterPos)[i]++;
01023 }
01024
01025 cutOff=0.0;
01026
01027 for(j=filter2Size-1; j>0; j--)
01028 {
01029 cutOff += ABS(filter2[i*filter2Size + j]);
01030
01031 if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
01032 min--;
01033 }
01034
01035 if(min>minFilterSize) minFilterSize= min;
01036 }
01037
01038 if (flags & SWS_CPU_CAPS_ALTIVEC) {
01039
01040
01041 if (minFilterSize < 5)
01042 filterAlign = 4;
01043
01044
01045
01046
01047
01048
01049 if (minFilterSize < 3)
01050 filterAlign = 1;
01051 }
01052
01053 ASSERT(minFilterSize > 0)
01054 filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
01055 ASSERT(filterSize > 0)
01056 filter= (double*)memalign(8, filterSize*dstW*sizeof(double));
01057 *outFilterSize= filterSize;
01058
01059 if(flags&SWS_PRINT_INFO)
01060 MSG_INFO("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
01061
01062 for(i=0; i<dstW; i++)
01063 {
01064 int j;
01065
01066 for(j=0; j<filterSize; j++)
01067 {
01068 if(j>=filter2Size) filter[i*filterSize + j]= 0.0;
01069 else filter[i*filterSize + j]= filter2[i*filter2Size + j];
01070 }
01071 }
01072 free(filter2); filter2=NULL;
01073
01074
01075
01076
01077
01078 for(i=0; i<dstW; i++)
01079 {
01080 int j;
01081 if((*filterPos)[i] < 0)
01082 {
01083
01084 for(j=1; j<filterSize; j++)
01085 {
01086 int left= MAX(j + (*filterPos)[i], 0);
01087 filter[i*filterSize + left] += filter[i*filterSize + j];
01088 filter[i*filterSize + j]=0;
01089 }
01090 (*filterPos)[i]= 0;
01091 }
01092
01093 if((*filterPos)[i] + filterSize > srcW)
01094 {
01095 int shift= (*filterPos)[i] + filterSize - srcW;
01096
01097 for(j=filterSize-2; j>=0; j--)
01098 {
01099 int right= MIN(j + shift, filterSize-1);
01100 filter[i*filterSize +right] += filter[i*filterSize +j];
01101 filter[i*filterSize +j]=0;
01102 }
01103 (*filterPos)[i]= srcW - filterSize;
01104 }
01105 }
01106
01107
01108 *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
01109 memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
01110
01111
01112 for(i=0; i<dstW; i++)
01113 {
01114 int j;
01115 double error=0;
01116 double sum=0;
01117 double scale= one;
01118
01119 for(j=0; j<filterSize; j++)
01120 {
01121 sum+= filter[i*filterSize + j];
01122 }
01123 scale/= sum;
01124 for(j=0; j<*outFilterSize; j++)
01125 {
01126 double v= filter[i*filterSize + j]*scale + error;
01127 int intV= floor(v + 0.5);
01128 (*outFilter)[i*(*outFilterSize) + j]= intV;
01129 error = v - intV;
01130 }
01131 }
01132
01133 (*filterPos)[dstW]= (*filterPos)[dstW-1];
01134 for(i=0; i<*outFilterSize; i++)
01135 {
01136 int j= dstW*(*outFilterSize);
01137 (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
01138 }
01139
01140 free(filter);
01141 }
01142
01143 #ifdef ARCH_X86
01144 static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
01145 {
01146 uint8_t *fragmentA;
01147 int imm8OfPShufW1A;
01148 int imm8OfPShufW2A;
01149 int fragmentLengthA;
01150 uint8_t *fragmentB;
01151 int imm8OfPShufW1B;
01152 int imm8OfPShufW2B;
01153 int fragmentLengthB;
01154 int fragmentPos;
01155
01156 int xpos, i;
01157
01158
01159
01160
01161
01162 asm volatile(
01163 "jmp 9f \n\t"
01164
01165 "0: \n\t"
01166 "movq (%%edx, %%eax), %%mm3 \n\t"
01167 "movd (%%ecx, %%esi), %%mm0 \n\t"
01168 "movd 1(%%ecx, %%esi), %%mm1 \n\t"
01169 "punpcklbw %%mm7, %%mm1 \n\t"
01170 "punpcklbw %%mm7, %%mm0 \n\t"
01171 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
01172 "1: \n\t"
01173 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
01174 "2: \n\t"
01175 "psubw %%mm1, %%mm0 \n\t"
01176 "movl 8(%%ebx, %%eax), %%esi \n\t"
01177 "pmullw %%mm3, %%mm0 \n\t"
01178 "psllw $7, %%mm1 \n\t"
01179 "paddw %%mm1, %%mm0 \n\t"
01180
01181 "movq %%mm0, (%%edi, %%eax) \n\t"
01182
01183 "addl $8, %%eax \n\t"
01184
01185 "9: \n\t"
01186
01187 "leal 0b, %0 \n\t"
01188 "leal 1b, %1 \n\t"
01189 "leal 2b, %2 \n\t"
01190 "decl %1 \n\t"
01191 "decl %2 \n\t"
01192 "subl %0, %1 \n\t"
01193 "subl %0, %2 \n\t"
01194 "leal 9b, %3 \n\t"
01195 "subl %0, %3 \n\t"
01196
01197
01198 :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
01199 "=r" (fragmentLengthA)
01200 );
01201
01202 asm volatile(
01203 "jmp 9f \n\t"
01204
01205 "0: \n\t"
01206 "movq (%%edx, %%eax), %%mm3 \n\t"
01207 "movd (%%ecx, %%esi), %%mm0 \n\t"
01208 "punpcklbw %%mm7, %%mm0 \n\t"
01209 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
01210 "1: \n\t"
01211 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
01212 "2: \n\t"
01213 "psubw %%mm1, %%mm0 \n\t"
01214 "movl 8(%%ebx, %%eax), %%esi \n\t"
01215 "pmullw %%mm3, %%mm0 \n\t"
01216 "psllw $7, %%mm1 \n\t"
01217 "paddw %%mm1, %%mm0 \n\t"
01218
01219 "movq %%mm0, (%%edi, %%eax) \n\t"
01220
01221 "addl $8, %%eax \n\t"
01222
01223 "9: \n\t"
01224
01225 "leal 0b, %0 \n\t"
01226 "leal 1b, %1 \n\t"
01227 "leal 2b, %2 \n\t"
01228 "decl %1 \n\t"
01229 "decl %2 \n\t"
01230 "subl %0, %1 \n\t"
01231 "subl %0, %2 \n\t"
01232 "leal 9b, %3 \n\t"
01233 "subl %0, %3 \n\t"
01234
01235
01236 :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
01237 "=r" (fragmentLengthB)
01238 );
01239
01240 xpos= 0;
01241 fragmentPos=0;
01242
01243 for(i=0; i<dstW/numSplits; i++)
01244 {
01245 int xx=xpos>>16;
01246
01247 if((i&3) == 0)
01248 {
01249 int a=0;
01250 int b=((xpos+xInc)>>16) - xx;
01251 int c=((xpos+xInc*2)>>16) - xx;
01252 int d=((xpos+xInc*3)>>16) - xx;
01253
01254 filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9;
01255 filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9;
01256 filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
01257 filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
01258 filterPos[i/2]= xx;
01259
01260 if(d+1<4)
01261 {
01262 int maxShift= 3-(d+1);
01263 int shift=0;
01264
01265 memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
01266
01267 funnyCode[fragmentPos + imm8OfPShufW1B]=
01268 (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
01269 funnyCode[fragmentPos + imm8OfPShufW2B]=
01270 a | (b<<2) | (c<<4) | (d<<6);
01271
01272 if(i+3>=dstW) shift=maxShift;
01273 else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3;
01274
01275 if(shift && i>=shift)
01276 {
01277 funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
01278 funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
01279 filterPos[i/2]-=shift;
01280 }
01281
01282 fragmentPos+= fragmentLengthB;
01283 }
01284 else
01285 {
01286 int maxShift= 3-d;
01287 int shift=0;
01288
01289 memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
01290
01291 funnyCode[fragmentPos + imm8OfPShufW1A]=
01292 funnyCode[fragmentPos + imm8OfPShufW2A]=
01293 a | (b<<2) | (c<<4) | (d<<6);
01294
01295 if(i+4>=dstW) shift=maxShift;
01296 else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3;
01297
01298 if(shift && i>=shift)
01299 {
01300 funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
01301 funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
01302 filterPos[i/2]-=shift;
01303 }
01304
01305 fragmentPos+= fragmentLengthA;
01306 }
01307
01308 funnyCode[fragmentPos]= RET;
01309 }
01310 xpos+=xInc;
01311 }
01312 filterPos[i/2]= xpos>>16;
01313 }
01314 #endif // ARCH_X86
01315
01316 static void globalInit(){
01317
01318 int i;
01319 for(i=0; i<768; i++){
01320 int c= MIN(MAX(i-256, 0), 255);
01321 clip_table[i]=c;
01322 }
01323 }
01324
01325 static SwsFunc getSwsFunc(int flags){
01326
01327 #ifdef RUNTIME_CPUDETECT
01328 #ifdef ARCH_X86
01329
01330 if(flags & SWS_CPU_CAPS_MMX2)
01331 return swScale_MMX2;
01332 else if(flags & SWS_CPU_CAPS_3DNOW)
01333 return swScale_3DNow;
01334 else if(flags & SWS_CPU_CAPS_MMX)
01335 return swScale_MMX;
01336 else
01337 return swScale_C;
01338
01339 #else
01340 #ifdef ARCH_POWERPC
01341 if(flags & SWS_CPU_CAPS_ALTIVEC)
01342 return swScale_altivec;
01343 else
01344 return swScale_C;
01345 #endif
01346 return swScale_C;
01347 #endif
01348 #else //RUNTIME_CPUDETECT
01349 #ifdef HAVE_MMX2
01350 return swScale_MMX2;
01351 #elif defined (HAVE_3DNOW)
01352 return swScale_3DNow;
01353 #elif defined (HAVE_MMX)
01354 return swScale_MMX;
01355 #elif defined (HAVE_ALTIVEC)
01356 return swScale_altivec;
01357 #else
01358 return swScale_C;
01359 #endif
01360 #endif
01361 }
01362
01363 static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
01364 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
01365 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
01366
01367 if(dstStride[0]==srcStride[0])
01368 memcpy(dst, src[0], srcSliceH*dstStride[0]);
01369 else
01370 {
01371 int i;
01372 uint8_t *srcPtr= src[0];
01373 uint8_t *dstPtr= dst;
01374 for(i=0; i<srcSliceH; i++)
01375 {
01376 memcpy(dstPtr, srcPtr, srcStride[0]);
01377 srcPtr+= srcStride[0];
01378 dstPtr+= dstStride[0];
01379 }
01380 }
01381 dst = dstParam[1] + dstStride[1]*srcSliceY;
01382 interleaveBytes( src[1],src[2],dst,c->srcW,srcSliceH,srcStride[1],srcStride[2],dstStride[0] );
01383
01384 return srcSliceH;
01385 }
01386
01387 static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
01388 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
01389 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
01390
01391 yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
01392
01393 return srcSliceH;
01394 }
01395
01396 static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
01397 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
01398 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
01399
01400 yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
01401
01402 return srcSliceH;
01403 }
01404
01405
01406 static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
01407 int srcSliceH, uint8_t* dst[], int dstStride[]){
01408 const int srcFormat= c->srcFormat;
01409 const int dstFormat= c->dstFormat;
01410 const int srcBpp= ((srcFormat&0xFF) + 7)>>3;
01411 const int dstBpp= ((dstFormat&0xFF) + 7)>>3;
01412 const int srcId= (srcFormat&0xFF)>>2;
01413 const int dstId= (dstFormat&0xFF)>>2;
01414 void (*conv)(const uint8_t *src, uint8_t *dst, unsigned src_size)=NULL;
01415
01416
01417 if( (isBGR(srcFormat) && isBGR(dstFormat))
01418 || (isRGB(srcFormat) && isRGB(dstFormat))){
01419 switch(srcId | (dstId<<4)){
01420 case 0x34: conv= rgb16to15; break;
01421 case 0x36: conv= rgb24to15; break;
01422 case 0x38: conv= rgb32to15; break;
01423 case 0x43: conv= rgb15to16; break;
01424 case 0x46: conv= rgb24to16; break;
01425 case 0x48: conv= rgb32to16; break;
01426 case 0x63: conv= rgb15to24; break;
01427 case 0x64: conv= rgb16to24; break;
01428 case 0x68: conv= rgb32to24; break;
01429 case 0x83: conv= rgb15to32; break;
01430 case 0x84: conv= rgb16to32; break;
01431 case 0x86: conv= rgb24to32; break;
01432 default: MSG_ERR("swScaler: internal error %s -> %s converter\n",
01433 vo_format_name(srcFormat), vo_format_name(dstFormat)); break;
01434 }
01435 }else if( (isBGR(srcFormat) && isRGB(dstFormat))
01436 || (isRGB(srcFormat) && isBGR(dstFormat))){
01437 switch(srcId | (dstId<<4)){
01438 case 0x33: conv= rgb15tobgr15; break;
01439 case 0x34: conv= rgb16tobgr15; break;
01440 case 0x36: conv= rgb24tobgr15; break;
01441 case 0x38: conv= rgb32tobgr15; break;
01442 case 0x43: conv= rgb15tobgr16; break;
01443 case 0x44: conv= rgb16tobgr16; break;
01444 case 0x46: conv= rgb24tobgr16; break;
01445 case 0x48: conv= rgb32tobgr16; break;
01446 case 0x63: conv= rgb15tobgr24; break;
01447 case 0x64: conv= rgb16tobgr24; break;
01448 case 0x66: conv= rgb24tobgr24; break;
01449 case 0x68: conv= rgb32tobgr24; break;
01450 case 0x83: conv= rgb15tobgr32; break;
01451 case 0x84: conv= rgb16tobgr32; break;
01452 case 0x86: conv= rgb24tobgr32; break;
01453 case 0x88: conv= rgb32tobgr32; break;
01454 default: MSG_ERR("swScaler: internal error %s -> %s converter\n",
01455 vo_format_name(srcFormat), vo_format_name(dstFormat)); break;
01456 }
01457 }else{
01458 MSG_ERR("swScaler: internal error %s -> %s converter\n",
01459 vo_format_name(srcFormat), vo_format_name(dstFormat));
01460 }
01461
01462 if(dstStride[0]*srcBpp == srcStride[0]*dstBpp)
01463 conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
01464 else
01465 {
01466 int i;
01467 uint8_t *srcPtr= src[0];
01468 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
01469
01470 for(i=0; i<srcSliceH; i++)
01471 {
01472 conv(srcPtr, dstPtr, c->srcW*srcBpp);
01473 srcPtr+= srcStride[0];
01474 dstPtr+= dstStride[0];
01475 }
01476 }
01477 return srcSliceH;
01478 }
01479
01480 static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
01481 int srcSliceH, uint8_t* dst[], int dstStride[]){
01482
01483 rgb24toyv12(
01484 src[0],
01485 dst[0]+ srcSliceY *dstStride[0],
01486 dst[1]+(srcSliceY>>1)*dstStride[1],
01487 dst[2]+(srcSliceY>>1)*dstStride[2],
01488 c->srcW, srcSliceH,
01489 dstStride[0], dstStride[1], srcStride[0]);
01490 return srcSliceH;
01491 }
01492
01493 static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
01494 int srcSliceH, uint8_t* dst[], int dstStride[]){
01495 int i;
01496
01497
01498 if(srcStride[0]==dstStride[0])
01499 memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
01500 else{
01501 uint8_t *srcPtr= src[0];
01502 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
01503
01504 for(i=0; i<srcSliceH; i++)
01505 {
01506 memcpy(dstPtr, srcPtr, c->srcW);
01507 srcPtr+= srcStride[0];
01508 dstPtr+= dstStride[0];
01509 }
01510 }
01511
01512 if(c->dstFormat==IMGFMT_YV12){
01513 planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
01514 planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
01515 }else{
01516 planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
01517 planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
01518 }
01519 return srcSliceH;
01520 }
01521
01525 static inline void sws_orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]){
01526 if(format == IMGFMT_YV12 || format == IMGFMT_YVU9
01527 || format == IMGFMT_444P || format == IMGFMT_422P || format == IMGFMT_411P){
01528 sortedP[0]= p[0];
01529 sortedP[1]= p[2];
01530 sortedP[2]= p[1];
01531 sortedStride[0]= stride[0];
01532 sortedStride[1]= stride[2];
01533 sortedStride[2]= stride[1];
01534 }
01535 else if(isPacked(format) || isGray(format) || format == IMGFMT_Y8)
01536 {
01537 sortedP[0]= p[0];
01538 sortedP[1]=
01539 sortedP[2]= NULL;
01540 sortedStride[0]= stride[0];
01541 sortedStride[1]=
01542 sortedStride[2]= 0;
01543 }
01544 else if(format == IMGFMT_I420 || format == IMGFMT_IYUV)
01545 {
01546 sortedP[0]= p[0];
01547 sortedP[1]= p[1];
01548 sortedP[2]= p[2];
01549 sortedStride[0]= stride[0];
01550 sortedStride[1]= stride[1];
01551 sortedStride[2]= stride[2];
01552 }else{
01553 MSG_ERR("internal error in orderYUV\n");
01554 }
01555 }
01556
01557
01558 static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
01559 int srcSliceH, uint8_t* dst[], int dstStride[]){
01560
01561 if(isPacked(c->srcFormat))
01562 {
01563 if(dstStride[0]==srcStride[0])
01564 memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
01565 else
01566 {
01567 int i;
01568 uint8_t *srcPtr= src[0];
01569 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
01570 int length=0;
01571
01572
01573 while(length+c->srcW <= ABS(dstStride[0])
01574 && length+c->srcW <= ABS(srcStride[0])) length+= c->srcW;
01575 ASSERT(length!=0);
01576
01577 for(i=0; i<srcSliceH; i++)
01578 {
01579 memcpy(dstPtr, srcPtr, length);
01580 srcPtr+= srcStride[0];
01581 dstPtr+= dstStride[0];
01582 }
01583 }
01584 }
01585 else
01586 {
01587 int plane;
01588 for(plane=0; plane<3; plane++)
01589 {
01590 int length= plane==0 ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
01591 int y= plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
01592 int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
01593
01594 if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
01595 {
01596 if(!isGray(c->dstFormat))
01597 memset(dst[plane], 128, dstStride[plane]*height);
01598 }
01599 else
01600 {
01601 if(dstStride[plane]==srcStride[plane])
01602 memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
01603 else
01604 {
01605 int i;
01606 uint8_t *srcPtr= src[plane];
01607 uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
01608 for(i=0; i<height; i++)
01609 {
01610 memcpy(dstPtr, srcPtr, length);
01611 srcPtr+= srcStride[plane];
01612 dstPtr+= dstStride[plane];
01613 }
01614 }
01615 }
01616 }
01617 }
01618 return srcSliceH;
01619 }
01620
01621 static int remove_dup_fourcc(int fourcc)
01622 {
01623 switch(fourcc)
01624 {
01625 case IMGFMT_I420:
01626 case IMGFMT_IYUV: return IMGFMT_YV12;
01627 case IMGFMT_Y8 : return IMGFMT_Y800;
01628 case IMGFMT_IF09: return IMGFMT_YVU9;
01629 default: return fourcc;
01630 }
01631 }
01632
01633 static void getSubSampleFactors(int *h, int *v, int format){
01634 switch(format){
01635 case IMGFMT_UYVY:
01636 case IMGFMT_YUY2:
01637 *h=1;
01638 *v=0;
01639 break;
01640 case IMGFMT_YV12:
01641 case IMGFMT_Y800:
01642 *h=1;
01643 *v=1;
01644 break;
01645 case IMGFMT_YVU9:
01646 *h=2;
01647 *v=2;
01648 break;
01649 case IMGFMT_444P:
01650 *h=0;
01651 *v=0;
01652 break;
01653 case IMGFMT_422P:
01654 *h=1;
01655 *v=0;
01656 break;
01657 case IMGFMT_411P:
01658 *h=2;
01659 *v=0;
01660 break;
01661 default:
01662 *h=0;
01663 *v=0;
01664 break;
01665 }
01666 }
01667
01668 static uint16_t roundToInt16(int64_t f){
01669 int r= (f + (1<<15))>>16;
01670 if(r<-0x7FFF) return 0x8000;
01671 else if(r> 0x7FFF) return 0x7FFF;
01672 else return r;
01673 }
01674
01680 int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
01681 int64_t crv = inv_table[0];
01682 int64_t cbu = inv_table[1];
01683 int64_t cgu = -inv_table[2];
01684 int64_t cgv = -inv_table[3];
01685 int64_t cy = 1<<16;
01686 int64_t oy = 0;
01687
01688 if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
01689 memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
01690 memcpy(c->dstColorspaceTable, table, sizeof(int)*4);
01691
01692 c->brightness= brightness;
01693 c->contrast = contrast;
01694 c->saturation= saturation;
01695 c->srcRange = srcRange;
01696 c->dstRange = dstRange;
01697
01698 c->uOffset= 0x0400040004000400LL;
01699 c->vOffset= 0x0400040004000400LL;
01700
01701 if(!srcRange){
01702 cy= (cy*255) / 219;
01703 oy= 16<<16;
01704 }
01705
01706 cy = (cy *contrast )>>16;
01707 crv= (crv*contrast * saturation)>>32;
01708 cbu= (cbu*contrast * saturation)>>32;
01709 cgu= (cgu*contrast * saturation)>>32;
01710 cgv= (cgv*contrast * saturation)>>32;
01711
01712 oy -= 256*brightness;
01713
01714 c->yCoeff= roundToInt16(cy *8192) * 0x0001000100010001ULL;
01715 c->vrCoeff= roundToInt16(crv*8192) * 0x0001000100010001ULL;
01716 c->ubCoeff= roundToInt16(cbu*8192) * 0x0001000100010001ULL;
01717 c->vgCoeff= roundToInt16(cgv*8192) * 0x0001000100010001ULL;
01718 c->ugCoeff= roundToInt16(cgu*8192) * 0x0001000100010001ULL;
01719 c->yOffset= roundToInt16(oy * 8) * 0x0001000100010001ULL;
01720
01721 yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
01722
01723
01724 #ifdef HAVE_ALTIVEC
01725 yuv2rgb_altivec_init_tables (c, inv_table);
01726 #endif
01727 return 0;
01728 }
01729
01733 int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
01734 if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
01735
01736 *inv_table = c->srcColorspaceTable;
01737 *table = c->dstColorspaceTable;
01738 *srcRange = c->srcRange;
01739 *dstRange = c->dstRange;
01740 *brightness= c->brightness;
01741 *contrast = c->contrast;
01742 *saturation= c->saturation;
01743
01744 return 0;
01745 }
01746
01747 SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int dstH, int origDstFormat, int flags,
01748 SwsFilter *srcFilter, SwsFilter *dstFilter){
01749
01750 SwsContext *c;
01751 int i;
01752 int usesVFilter, usesHFilter;
01753 int unscaled, needsDither;
01754 int srcFormat, dstFormat;
01755 SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
01756 #ifdef ARCH_X86
01757 if(flags & SWS_CPU_CAPS_MMX)
01758 asm volatile("emms\n\t"::: "memory");
01759 #endif
01760
01761 #ifndef RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
01762 flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC);
01763 #ifdef HAVE_MMX2
01764 flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
01765 #elif defined (HAVE_3DNOW)
01766 flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
01767 #elif defined (HAVE_MMX)
01768 flags |= SWS_CPU_CAPS_MMX;
01769 #elif defined (HAVE_ALTIVEC)
01770 flags |= SWS_CPU_CAPS_ALTIVEC;
01771 #endif
01772 #endif
01773 if(clip_table[512] != 255) globalInit();
01774 if(rgb15to16 == NULL) sws_rgb2rgb_init(flags);
01775
01776
01777 srcFormat = remove_dup_fourcc(origSrcFormat);
01778 dstFormat = remove_dup_fourcc(origDstFormat);
01779
01780 unscaled = (srcW == dstW && srcH == dstH);
01781 needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
01782 && (dstFormat&0xFF)<24
01783 && ((dstFormat&0xFF)<(srcFormat&0xFF) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
01784
01785 if(!isSupportedIn(srcFormat))
01786 {
01787 MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat));
01788 return NULL;
01789 }
01790 if(!isSupportedOut(dstFormat))
01791 {
01792 MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat));
01793 return NULL;
01794 }
01795
01796
01797 if(srcW<4 || srcH<1 || dstW<8 || dstH<1)
01798 {
01799 MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
01800 srcW, srcH, dstW, dstH);
01801 return NULL;
01802 }
01803
01804 if(!dstFilter) dstFilter= &dummyFilter;
01805 if(!srcFilter) srcFilter= &dummyFilter;
01806
01807 c= memalign(64, sizeof(SwsContext));
01808 memset(c, 0, sizeof(SwsContext));
01809
01810 c->srcW= srcW;
01811 c->srcH= srcH;
01812 c->dstW= dstW;
01813 c->dstH= dstH;
01814 c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
01815 c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
01816 c->flags= flags;
01817 c->dstFormat= dstFormat;
01818 c->srcFormat= srcFormat;
01819 c->origDstFormat= origDstFormat;
01820 c->origSrcFormat= origSrcFormat;
01821 c->vRounder= 4* 0x0001000100010001ULL;
01822
01823 usesHFilter= usesVFilter= 0;
01824 if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
01825 if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
01826 if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
01827 if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
01828 if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
01829 if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
01830 if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
01831 if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
01832
01833 getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
01834 getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
01835
01836
01837 if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
01838
01839
01840 c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
01841 c->chrSrcVSubSample+= c->vChrDrop;
01842
01843
01844 if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP))
01845 c->chrSrcHSubSample=1;
01846
01847 c->chrIntHSubSample= c->chrDstHSubSample;
01848 c->chrIntVSubSample= c->chrSrcVSubSample;
01849
01850
01851 c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
01852 c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
01853 c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
01854 c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
01855
01856 sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], 0, Inverse_Table_6_9[SWS_CS_DEFAULT] , 0, 0, 1<<16, 1<<16);
01857
01858
01859 if(unscaled && !usesHFilter && !usesVFilter)
01860 {
01861
01862 if(srcFormat == IMGFMT_YV12 && dstFormat == IMGFMT_NV12)
01863 {
01864 c->swScale= PlanarToNV12Wrapper;
01865 }
01866
01867 if((srcFormat==IMGFMT_YV12 || srcFormat==IMGFMT_422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
01868 {
01869 c->swScale= yuv2rgb_get_func_ptr(c);
01870 }
01871
01872 if( srcFormat==IMGFMT_YVU9 && dstFormat==IMGFMT_YV12 )
01873 {
01874 c->swScale= yvu9toyv12Wrapper;
01875 }
01876
01877
01878 if(srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_YV12)
01879 c->swScale= bgr24toyv12Wrapper;
01880
01881
01882 if( (isBGR(srcFormat) || isRGB(srcFormat))
01883 && (isBGR(dstFormat) || isRGB(dstFormat))
01884 && !needsDither)
01885 c->swScale= rgb2rgbWrapper;
01886
01887
01888 if(c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
01889
01890 if( (isBGR(srcFormat) || isRGB(srcFormat))
01891 && (isBGR(dstFormat) || isRGB(dstFormat))
01892 && needsDither)
01893 c->swScale= rgb2rgbWrapper;
01894
01895
01896 if(srcFormat == IMGFMT_YV12 &&
01897 (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY))
01898 {
01899 if (dstFormat == IMGFMT_YUY2)
01900 c->swScale= PlanarToYuy2Wrapper;
01901 else
01902 c->swScale= PlanarToUyvyWrapper;
01903 }
01904 }
01905
01906 #ifdef HAVE_ALTIVEC
01907 if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
01908 ((srcFormat == IMGFMT_YV12 &&
01909 (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY)))) {
01910
01911 if (dstFormat == IMGFMT_YUY2)
01912 c->swScale= yv12toyuy2_unscaled_altivec;
01913 else
01914 c->swScale= yv12touyvy_unscaled_altivec;
01915 }
01916 #endif
01917
01918
01919 if( srcFormat == dstFormat
01920 || (isPlanarYUV(srcFormat) && isGray(dstFormat))
01921 || (isPlanarYUV(dstFormat) && isGray(srcFormat))
01922 )
01923 {
01924 c->swScale= simpleCopy;
01925 }
01926
01927 if(c->swScale){
01928 if(flags&SWS_PRINT_INFO)
01929 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
01930 vo_format_name(srcFormat), vo_format_name(dstFormat));
01931 return c;
01932 }
01933 }
01934
01935 if(flags & SWS_CPU_CAPS_MMX2)
01936 {
01937 c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
01938 if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
01939 {
01940 if(flags&SWS_PRINT_INFO)
01941 MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
01942 }
01943 if(usesHFilter) c->canMMX2BeUsed=0;
01944 }
01945 else
01946 c->canMMX2BeUsed=0;
01947
01948 c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
01949 c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
01950
01951
01952
01953
01954
01955
01956
01957 if(flags&SWS_FAST_BILINEAR)
01958 {
01959 if(c->canMMX2BeUsed)
01960 {
01961 c->lumXInc+= 20;
01962 c->chrXInc+= 20;
01963 }
01964
01965 else if(flags & SWS_CPU_CAPS_MMX)
01966 {
01967 c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
01968 c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
01969 }
01970 }
01971
01972
01973 {
01974 const int filterAlign=
01975 (flags & SWS_CPU_CAPS_MMX) ? 4 :
01976 (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
01977 1;
01978
01979 initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
01980 srcW , dstW, filterAlign, 1<<14,
01981 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
01982 srcFilter->lumH, dstFilter->lumH);
01983 initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
01984 c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
01985 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
01986 srcFilter->chrH, dstFilter->chrH);
01987
01988 #ifdef ARCH_X86
01989
01990 if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
01991 {
01992 c->lumMmx2Filter = (int16_t*)memalign(8, (dstW /8+8)*sizeof(int16_t));
01993 c->chrMmx2Filter = (int16_t*)memalign(8, (c->chrDstW /4+8)*sizeof(int16_t));
01994 c->lumMmx2FilterPos= (int32_t*)memalign(8, (dstW /2/8+8)*sizeof(int32_t));
01995 c->chrMmx2FilterPos= (int32_t*)memalign(8, (c->chrDstW/2/4+8)*sizeof(int32_t));
01996
01997 initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
01998 initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
01999 }
02000 #endif
02001 }
02002
02003
02004
02005
02006 {
02007 const int filterAlign=
02008 (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
02009 1;
02010
02011 initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
02012 srcH , dstH, filterAlign, (1<<12)-4,
02013 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
02014 srcFilter->lumV, dstFilter->lumV);
02015 initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
02016 c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
02017 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
02018 srcFilter->chrV, dstFilter->chrV);
02019 }
02020
02021
02022 c->vLumBufSize= c->vLumFilterSize;
02023 c->vChrBufSize= c->vChrFilterSize;
02024 for(i=0; i<dstH; i++)
02025 {
02026 int chrI= i*c->chrDstH / dstH;
02027 int nextSlice= MAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1,
02028 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
02029
02030 nextSlice>>= c->chrSrcVSubSample;
02031 nextSlice<<= c->chrSrcVSubSample;
02032 if(c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice)
02033 c->vLumBufSize= nextSlice - c->vLumFilterPos[i ];
02034 if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
02035 c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
02036 }
02037
02038
02039 c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
02040 c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
02041
02042 for(i=0; i<c->vLumBufSize; i++)
02043 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
02044 for(i=0; i<c->vChrBufSize; i++)
02045 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
02046
02047
02048 for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
02049 for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
02050
02051 ASSERT(c->chrDstH <= dstH)
02052
02053 if(flags&SWS_PRINT_INFO)
02054 {
02055 #ifdef DITHER1XBPP
02056 char *dither= " dithered";
02057 #else
02058 char *dither= "";
02059 #endif
02060 if(flags&SWS_FAST_BILINEAR)
02061 MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, ");
02062 else if(flags&SWS_BILINEAR)
02063 MSG_INFO("\nSwScaler: BILINEAR scaler, ");
02064 else if(flags&SWS_BICUBIC)
02065 MSG_INFO("\nSwScaler: BICUBIC scaler, ");
02066 else if(flags&SWS_X)
02067 MSG_INFO("\nSwScaler: Experimental scaler, ");
02068 else if(flags&SWS_POINT)
02069 MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, ");
02070 else if(flags&SWS_AREA)
02071 MSG_INFO("\nSwScaler: Area Averageing scaler, ");
02072 else if(flags&SWS_BICUBLIN)
02073 MSG_INFO("\nSwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
02074 else if(flags&SWS_GAUSS)
02075 MSG_INFO("\nSwScaler: Gaussian scaler, ");
02076 else if(flags&SWS_SINC)
02077 MSG_INFO("\nSwScaler: Sinc scaler, ");
02078 else if(flags&SWS_LANCZOS)
02079 MSG_INFO("\nSwScaler: Lanczos scaler, ");
02080 else if(flags&SWS_SPLINE)
02081 MSG_INFO("\nSwScaler: Bicubic spline scaler, ");
02082 else
02083 MSG_INFO("\nSwScaler: ehh flags invalid?! ");
02084
02085 if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16)
02086 MSG_INFO("from %s to%s %s ",
02087 vo_format_name(srcFormat), dither, vo_format_name(dstFormat));
02088 else
02089 MSG_INFO("from %s to %s ",
02090 vo_format_name(srcFormat), vo_format_name(dstFormat));
02091
02092 if(flags & SWS_CPU_CAPS_MMX2)
02093 MSG_INFO("using MMX2\n");
02094 else if(flags & SWS_CPU_CAPS_3DNOW)
02095 MSG_INFO("using 3DNOW\n");
02096 else if(flags & SWS_CPU_CAPS_MMX)
02097 MSG_INFO("using MMX\n");
02098 else if(flags & SWS_CPU_CAPS_ALTIVEC)
02099 MSG_INFO("using AltiVec\n");
02100 else
02101 MSG_INFO("using C\n");
02102 }
02103
02104 if(flags & SWS_PRINT_INFO)
02105 {
02106 if(flags & SWS_CPU_CAPS_MMX)
02107 {
02108 if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
02109 MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
02110 else
02111 {
02112 if(c->hLumFilterSize==4)
02113 MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
02114 else if(c->hLumFilterSize==8)
02115 MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
02116 else
02117 MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
02118
02119 if(c->hChrFilterSize==4)
02120 MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
02121 else if(c->hChrFilterSize==8)
02122 MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
02123 else
02124 MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
02125 }
02126 }
02127 else
02128 {
02129 #ifdef ARCH_X86
02130 MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n");
02131 #else
02132 if(flags & SWS_FAST_BILINEAR)
02133 MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
02134 else
02135 MSG_V("SwScaler: using C scaler for horizontal scaling\n");
02136 #endif
02137 }
02138 if(isPlanarYUV(dstFormat))
02139 {
02140 if(c->vLumFilterSize==1)
02141 MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
02142 else
02143 MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
02144 }
02145 else
02146 {
02147 if(c->vLumFilterSize==1 && c->vChrFilterSize==2)
02148 MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
02149 "SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
02150 else if(c->vLumFilterSize==2 && c->vChrFilterSize==2)
02151 MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
02152 else
02153 MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
02154 }
02155
02156 if(dstFormat==IMGFMT_BGR24)
02157 MSG_V("SwScaler: using %s YV12->BGR24 Converter\n",
02158 (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
02159 else if(dstFormat==IMGFMT_BGR32)
02160 MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
02161 else if(dstFormat==IMGFMT_BGR16)
02162 MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
02163 else if(dstFormat==IMGFMT_BGR15)
02164 MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
02165
02166 MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
02167 }
02168 if(flags & SWS_PRINT_INFO)
02169 {
02170 MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
02171 c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
02172 MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
02173 c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
02174 }
02175
02176 c->swScale= getSwsFunc(flags);
02177 return c;
02178 }
02179
02184 int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
02185 int srcSliceH, uint8_t* dst[], int dstStride[]){
02186
02187 int srcStride2[3]= {srcStride[0], srcStride[1], srcStride[2]};
02188 int dstStride2[3]= {dstStride[0], dstStride[1], dstStride[2]};
02189 return c->swScale(c, src, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
02190 }
02191
02195 int sws_scale(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
02196 int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
02197 int srcStride[3];
02198 int dstStride[3];
02199 uint8_t *src[3];
02200 uint8_t *dst[3];
02201 sws_orderYUV(c->origSrcFormat, src, srcStride, srcParam, srcStrideParam);
02202 sws_orderYUV(c->origDstFormat, dst, dstStride, dstParam, dstStrideParam);
02203
02204
02205 return c->swScale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
02206 }
02207
02208 SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
02209 float lumaSharpen, float chromaSharpen,
02210 float chromaHShift, float chromaVShift,
02211 int verbose)
02212 {
02213 SwsFilter *filter= malloc(sizeof(SwsFilter));
02214
02215 if(lumaGBlur!=0.0){
02216 filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
02217 filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
02218 }else{
02219 filter->lumH= sws_getIdentityVec();
02220 filter->lumV= sws_getIdentityVec();
02221 }
02222
02223 if(chromaGBlur!=0.0){
02224 filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
02225 filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
02226 }else{
02227 filter->chrH= sws_getIdentityVec();
02228 filter->chrV= sws_getIdentityVec();
02229 }
02230
02231 if(chromaSharpen!=0.0){
02232 SwsVector *g= sws_getConstVec(-1.0, 3);
02233 SwsVector *id= sws_getConstVec(10.0/chromaSharpen, 1);
02234 g->coeff[1]=2.0;
02235 sws_addVec(id, g);
02236 sws_convVec(filter->chrH, id);
02237 sws_convVec(filter->chrV, id);
02238 sws_freeVec(g);
02239 sws_freeVec(id);
02240 }
02241
02242 if(lumaSharpen!=0.0){
02243 SwsVector *g= sws_getConstVec(-1.0, 3);
02244 SwsVector *id= sws_getConstVec(10.0/lumaSharpen, 1);
02245 g->coeff[1]=2.0;
02246 sws_addVec(id, g);
02247 sws_convVec(filter->lumH, id);
02248 sws_convVec(filter->lumV, id);
02249 sws_freeVec(g);
02250 sws_freeVec(id);
02251 }
02252
02253 if(chromaHShift != 0.0)
02254 sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
02255
02256 if(chromaVShift != 0.0)
02257 sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
02258
02259 sws_normalizeVec(filter->chrH, 1.0);
02260 sws_normalizeVec(filter->chrV, 1.0);
02261 sws_normalizeVec(filter->lumH, 1.0);
02262 sws_normalizeVec(filter->lumV, 1.0);
02263
02264 if(verbose) sws_printVec(filter->chrH);
02265 if(verbose) sws_printVec(filter->lumH);
02266
02267 return filter;
02268 }
02269
02274 SwsVector *sws_getGaussianVec(double variance, double quality){
02275 const int length= (int)(variance*quality + 0.5) | 1;
02276 int i;
02277 double *coeff= memalign(sizeof(double), length*sizeof(double));
02278 double middle= (length-1)*0.5;
02279 SwsVector *vec= malloc(sizeof(SwsVector));
02280
02281 vec->coeff= coeff;
02282 vec->length= length;
02283
02284 for(i=0; i<length; i++)
02285 {
02286 double dist= i-middle;
02287 coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
02288 }
02289
02290 sws_normalizeVec(vec, 1.0);
02291
02292 return vec;
02293 }
02294
02295 SwsVector *sws_getConstVec(double c, int length){
02296 int i;
02297 double *coeff= memalign(sizeof(double), length*sizeof(double));
02298 SwsVector *vec= malloc(sizeof(SwsVector));
02299
02300 vec->coeff= coeff;
02301 vec->length= length;
02302
02303 for(i=0; i<length; i++)
02304 coeff[i]= c;
02305
02306 return vec;
02307 }
02308
02309
02310 SwsVector *sws_getIdentityVec(void){
02311 double *coeff= memalign(sizeof(double), sizeof(double));
02312 SwsVector *vec= malloc(sizeof(SwsVector));
02313 coeff[0]= 1.0;
02314
02315 vec->coeff= coeff;
02316 vec->length= 1;
02317
02318 return vec;
02319 }
02320
02321 void sws_normalizeVec(SwsVector *a, double height){
02322 int i;
02323 double sum=0;
02324 double inv;
02325
02326 for(i=0; i<a->length; i++)
02327 sum+= a->coeff[i];
02328
02329 inv= height/sum;
02330
02331 for(i=0; i<a->length; i++)
02332 a->coeff[i]*= inv;
02333 }
02334
02335 void sws_scaleVec(SwsVector *a, double scalar){
02336 int i;
02337
02338 for(i=0; i<a->length; i++)
02339 a->coeff[i]*= scalar;
02340 }
02341
02342 static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
02343 int length= a->length + b->length - 1;
02344 double *coeff= memalign(sizeof(double), length*sizeof(double));
02345 int i, j;
02346 SwsVector *vec= malloc(sizeof(SwsVector));
02347
02348 vec->coeff= coeff;
02349 vec->length= length;
02350
02351 for(i=0; i<length; i++) coeff[i]= 0.0;
02352
02353 for(i=0; i<a->length; i++)
02354 {
02355 for(j=0; j<b->length; j++)
02356 {
02357 coeff[i+j]+= a->coeff[i]*b->coeff[j];
02358 }
02359 }
02360
02361 return vec;
02362 }
02363
02364 static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
02365 int length= MAX(a->length, b->length);
02366 double *coeff= memalign(sizeof(double), length*sizeof(double));
02367 int i;
02368 SwsVector *vec= malloc(sizeof(SwsVector));
02369
02370 vec->coeff= coeff;
02371 vec->length= length;
02372
02373 for(i=0; i<length; i++) coeff[i]= 0.0;
02374
02375 for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
02376 for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
02377
02378 return vec;
02379 }
02380
02381 static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
02382 int length= MAX(a->length, b->length);
02383 double *coeff= memalign(sizeof(double), length*sizeof(double));
02384 int i;
02385 SwsVector *vec= malloc(sizeof(SwsVector));
02386
02387 vec->coeff= coeff;
02388 vec->length= length;
02389
02390 for(i=0; i<length; i++) coeff[i]= 0.0;
02391
02392 for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
02393 for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
02394
02395 return vec;
02396 }
02397
02398
02399 static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
02400 int length= a->length + ABS(shift)*2;
02401 double *coeff= memalign(sizeof(double), length*sizeof(double));
02402 int i;
02403 SwsVector *vec= malloc(sizeof(SwsVector));
02404
02405 vec->coeff= coeff;
02406 vec->length= length;
02407
02408 for(i=0; i<length; i++) coeff[i]= 0.0;
02409
02410 for(i=0; i<a->length; i++)
02411 {
02412 coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
02413 }
02414
02415 return vec;
02416 }
02417
02418 void sws_shiftVec(SwsVector *a, int shift){
02419 SwsVector *shifted= sws_getShiftedVec(a, shift);
02420 free(a->coeff);
02421 a->coeff= shifted->coeff;
02422 a->length= shifted->length;
02423 free(shifted);
02424 }
02425
02426 void sws_addVec(SwsVector *a, SwsVector *b){
02427 SwsVector *sum= sws_sumVec(a, b);
02428 free(a->coeff);
02429 a->coeff= sum->coeff;
02430 a->length= sum->length;
02431 free(sum);
02432 }
02433
02434 void sws_subVec(SwsVector *a, SwsVector *b){
02435 SwsVector *diff= sws_diffVec(a, b);
02436 free(a->coeff);
02437 a->coeff= diff->coeff;
02438 a->length= diff->length;
02439 free(diff);
02440 }
02441
02442 void sws_convVec(SwsVector *a, SwsVector *b){
02443 SwsVector *conv= sws_getConvVec(a, b);
02444 free(a->coeff);
02445 a->coeff= conv->coeff;
02446 a->length= conv->length;
02447 free(conv);
02448 }
02449
02450 SwsVector *sws_cloneVec(SwsVector *a){
02451 double *coeff= memalign(sizeof(double), a->length*sizeof(double));
02452 int i;
02453 SwsVector *vec= malloc(sizeof(SwsVector));
02454
02455 vec->coeff= coeff;
02456 vec->length= a->length;
02457
02458 for(i=0; i<a->length; i++) coeff[i]= a->coeff[i];
02459
02460 return vec;
02461 }
02462
02463 void sws_printVec(SwsVector *a){
02464 int i;
02465 double max=0;
02466 double min=0;
02467 double range;
02468
02469 for(i=0; i<a->length; i++)
02470 if(a->coeff[i]>max) max= a->coeff[i];
02471
02472 for(i=0; i<a->length; i++)
02473 if(a->coeff[i]<min) min= a->coeff[i];
02474
02475 range= max - min;
02476
02477 for(i=0; i<a->length; i++)
02478 {
02479 int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
02480 MSG_DBG2("%1.3f ", a->coeff[i]);
02481 for(;x>0; x--) MSG_DBG2(" ");
02482 MSG_DBG2("|\n");
02483 }
02484 }
02485
02486 void sws_freeVec(SwsVector *a){
02487 if(!a) return;
02488 if(a->coeff) free(a->coeff);
02489 a->coeff=NULL;
02490 a->length=0;
02491 free(a);
02492 }
02493
02494 void sws_freeFilter(SwsFilter *filter){
02495 if(!filter) return;
02496
02497 if(filter->lumH) sws_freeVec(filter->lumH);
02498 if(filter->lumV) sws_freeVec(filter->lumV);
02499 if(filter->chrH) sws_freeVec(filter->chrH);
02500 if(filter->chrV) sws_freeVec(filter->chrV);
02501 free(filter);
02502 }
02503
02504
02505 void sws_freeContext(SwsContext *c){
02506 int i;
02507 if(!c) return;
02508
02509 if(c->lumPixBuf)
02510 {
02511 for(i=0; i<c->vLumBufSize; i++)
02512 {
02513 if(c->lumPixBuf[i]) free(c->lumPixBuf[i]);
02514 c->lumPixBuf[i]=NULL;
02515 }
02516 free(c->lumPixBuf);
02517 c->lumPixBuf=NULL;
02518 }
02519
02520 if(c->chrPixBuf)
02521 {
02522 for(i=0; i<c->vChrBufSize; i++)
02523 {
02524 if(c->chrPixBuf[i]) free(c->chrPixBuf[i]);
02525 c->chrPixBuf[i]=NULL;
02526 }
02527 free(c->chrPixBuf);
02528 c->chrPixBuf=NULL;
02529 }
02530
02531 if(c->vLumFilter) free(c->vLumFilter);
02532 c->vLumFilter = NULL;
02533 if(c->vChrFilter) free(c->vChrFilter);
02534 c->vChrFilter = NULL;
02535 if(c->hLumFilter) free(c->hLumFilter);
02536 c->hLumFilter = NULL;
02537 if(c->hChrFilter) free(c->hChrFilter);
02538 c->hChrFilter = NULL;
02539
02540 if(c->vLumFilterPos) free(c->vLumFilterPos);
02541 c->vLumFilterPos = NULL;
02542 if(c->vChrFilterPos) free(c->vChrFilterPos);
02543 c->vChrFilterPos = NULL;
02544 if(c->hLumFilterPos) free(c->hLumFilterPos);
02545 c->hLumFilterPos = NULL;
02546 if(c->hChrFilterPos) free(c->hChrFilterPos);
02547 c->hChrFilterPos = NULL;
02548
02549 if(c->lumMmx2Filter) free(c->lumMmx2Filter);
02550 c->lumMmx2Filter=NULL;
02551 if(c->chrMmx2Filter) free(c->chrMmx2Filter);
02552 c->chrMmx2Filter=NULL;
02553 if(c->lumMmx2FilterPos) free(c->lumMmx2FilterPos);
02554 c->lumMmx2FilterPos=NULL;
02555 if(c->chrMmx2FilterPos) free(c->chrMmx2FilterPos);
02556 c->chrMmx2FilterPos=NULL;
02557 if(c->yuvTable) free(c->yuvTable);
02558 c->yuvTable=NULL;
02559
02560 free(c);
02561 }
02562