00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #undef MOVNTQ
00032 #undef EMMS
00033 #undef SFENCE
00034
00035 #ifdef HAVE_3DNOW
00036
00037 #define EMMS "femms"
00038 #else
00039 #define EMMS "emms"
00040 #endif
00041
00042 #ifdef HAVE_MMX2
00043 #define MOVNTQ "movntq"
00044 #define SFENCE "sfence"
00045 #else
00046 #define MOVNTQ "movq"
00047 #define SFENCE "/nop"
00048 #endif
00049
00050 #define YUV2RGB \
00051
00052
00053
00054
00055 \
00056 \
00057 "punpcklbw %%mm4, %%mm0;" \
00058 "punpcklbw %%mm4, %%mm1;" \
00059 \
00060 "psllw $3, %%mm0;" \
00061 "psllw $3, %%mm1;" \
00062 \
00063 "psubsw "U_OFFSET"(%4), %%mm0;" \
00064 "psubsw "V_OFFSET"(%4), %%mm1;" \
00065 \
00066 "movq %%mm0, %%mm2;" \
00067 "movq %%mm1, %%mm3;" \
00068 \
00069 "pmulhw "UG_COEFF"(%4), %%mm2;" \
00070 "pmulhw "VG_COEFF"(%4), %%mm3;" \
00071 \
00072 "pmulhw "UB_COEFF"(%4), %%mm0;" \
00073 "pmulhw "VR_COEFF"(%4), %%mm1;" \
00074 \
00075 "paddsw %%mm3, %%mm2;" \
00076 \
00077 \
00078 "movq %%mm6, %%mm7;" \
00079 "pand "MANGLE(mmx_00ffw)", %%mm6;" \
00080 \
00081 "psrlw $8, %%mm7;" \
00082 \
00083 "psllw $3, %%mm6;" \
00084 "psllw $3, %%mm7;" \
00085 \
00086 "psubw "Y_OFFSET"(%4), %%mm6;" \
00087 "psubw "Y_OFFSET"(%4), %%mm7;" \
00088 \
00089 "pmulhw "Y_COEFF"(%4), %%mm6;" \
00090 "pmulhw "Y_COEFF"(%4), %%mm7;" \
00091 \
00092
00093
00094
00095
00096 \
00097 "movq %%mm0, %%mm3;" \
00098 "movq %%mm1, %%mm4;" \
00099 "movq %%mm2, %%mm5;" \
00100 \
00101 "paddsw %%mm6, %%mm0;" \
00102 "paddsw %%mm7, %%mm3;" \
00103 \
00104 "paddsw %%mm6, %%mm1;" \
00105 "paddsw %%mm7, %%mm4;" \
00106 \
00107 "paddsw %%mm6, %%mm2;" \
00108 "paddsw %%mm7, %%mm5;" \
00109 \
00110 \
00111 "packuswb %%mm0, %%mm0;" \
00112 "packuswb %%mm1, %%mm1;" \
00113 "packuswb %%mm2, %%mm2;" \
00114 \
00115 \
00116 "packuswb %%mm3, %%mm3;" \
00117 "packuswb %%mm4, %%mm4;" \
00118 "packuswb %%mm5, %%mm5;" \
00119 \
00120 \
00121 "punpcklbw %%mm3, %%mm0;" \
00122 "punpcklbw %%mm4, %%mm1;" \
00123 "punpcklbw %%mm5, %%mm2;" \
00124
00125
00126 static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
00127 int srcSliceH, uint8_t* dst[], int dstStride[]){
00128 int y, h_size;
00129
00130 if(c->srcFormat == IMGFMT_422P){
00131 srcStride[1] *= 2;
00132 srcStride[2] *= 2;
00133 }
00134
00135 h_size= (c->dstW+7)&~7;
00136 if(h_size*2 > dstStride[0]) h_size-=8;
00137
00138 __asm__ __volatile__ ("pxor %mm4, %mm4;" );
00139
00140
00141 for (y= 0; y<srcSliceH; y++ ) {
00142 uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
00143 uint8_t *_py = src[0] + y*srcStride[0];
00144 uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
00145 uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
00146 int index= -h_size/2;
00147
00148 b5Dither= dither8[y&1];
00149 g6Dither= dither4[y&1];
00150 g5Dither= dither8[y&1];
00151 r5Dither= dither8[(y+1)&1];
00152
00153
00154 __asm__ __volatile__ (
00155
00156 "movd (%2, %0), %%mm0;"
00157 "movd (%3, %0), %%mm1;"
00158 "movq (%5, %0, 2), %%mm6;"
00159
00160 "1: \n\t"
00161
00162
00163
00164
00165
00166
00167 YUV2RGB
00168
00169 #ifdef DITHER1XBPP
00170 "paddusb "MANGLE(b5Dither)", %%mm0;"
00171 "paddusb "MANGLE(g6Dither)", %%mm2;"
00172 "paddusb "MANGLE(r5Dither)", %%mm1;"
00173 #endif
00174
00175 "pand "MANGLE(mmx_redmask)", %%mm0;"
00176 "pand "MANGLE(mmx_grnmask)", %%mm2;"
00177 "pand "MANGLE(mmx_redmask)", %%mm1;"
00178
00179 "psrlw $3,%%mm0;"
00180 "pxor %%mm4, %%mm4;"
00181
00182 "movq %%mm0, %%mm5;"
00183 "movq %%mm2, %%mm7;"
00184
00185
00186 "punpcklbw %%mm4, %%mm2;"
00187 "punpcklbw %%mm1, %%mm0;"
00188
00189 "psllw $3, %%mm2;"
00190 "por %%mm2, %%mm0;"
00191
00192 "movq 8 (%5, %0, 2), %%mm6;"
00193 MOVNTQ " %%mm0, (%1);"
00194
00195
00196 "punpckhbw %%mm4, %%mm7;"
00197 "punpckhbw %%mm1, %%mm5;"
00198
00199 "psllw $3, %%mm7;"
00200 "movd 4 (%2, %0), %%mm0;"
00201
00202 "por %%mm7, %%mm5;"
00203 "movd 4 (%3, %0), %%mm1;"
00204
00205 MOVNTQ " %%mm5, 8 (%1);"
00206
00207 "addl $16, %1 \n\t"
00208 "addl $4, %0 \n\t"
00209 " js 1b \n\t"
00210
00211 : "+r" (index), "+r" (_image)
00212 : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
00213 );
00214 }
00215
00216 __asm__ __volatile__ (EMMS);
00217
00218 return srcSliceH;
00219 }
00220
00221 static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
00222 int srcSliceH, uint8_t* dst[], int dstStride[]){
00223 int y, h_size;
00224
00225 if(c->srcFormat == IMGFMT_422P){
00226 srcStride[1] *= 2;
00227 srcStride[2] *= 2;
00228 }
00229
00230 h_size= (c->dstW+7)&~7;
00231 if(h_size*2 > dstStride[0]) h_size-=8;
00232
00233 __asm__ __volatile__ ("pxor %mm4, %mm4;" );
00234
00235
00236 for (y= 0; y<srcSliceH; y++ ) {
00237 uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
00238 uint8_t *_py = src[0] + y*srcStride[0];
00239 uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
00240 uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
00241 int index= -h_size/2;
00242
00243 b5Dither= dither8[y&1];
00244 g6Dither= dither4[y&1];
00245 g5Dither= dither8[y&1];
00246 r5Dither= dither8[(y+1)&1];
00247
00248
00249 __asm__ __volatile__ (
00250
00251 "movd (%2, %0), %%mm0;"
00252 "movd (%3, %0), %%mm1;"
00253 "movq (%5, %0, 2), %%mm6;"
00254
00255 "1: \n\t"
00256 YUV2RGB
00257
00258 #ifdef DITHER1XBPP
00259 "paddusb "MANGLE(b5Dither)", %%mm0 \n\t"
00260 "paddusb "MANGLE(g5Dither)", %%mm2 \n\t"
00261 "paddusb "MANGLE(r5Dither)", %%mm1 \n\t"
00262 #endif
00263
00264
00265 "pand "MANGLE(mmx_redmask)", %%mm0;"
00266 "pand "MANGLE(mmx_redmask)", %%mm2;"
00267 "pand "MANGLE(mmx_redmask)", %%mm1;"
00268
00269 "psrlw $3,%%mm0;"
00270 "psrlw $1,%%mm1;"
00271 "pxor %%mm4, %%mm4;"
00272
00273 "movq %%mm0, %%mm5;"
00274 "movq %%mm2, %%mm7;"
00275
00276
00277 "punpcklbw %%mm4, %%mm2;"
00278 "punpcklbw %%mm1, %%mm0;"
00279
00280 "psllw $2, %%mm2;"
00281 "por %%mm2, %%mm0;"
00282
00283 "movq 8 (%5, %0, 2), %%mm6;"
00284 MOVNTQ " %%mm0, (%1);"
00285
00286
00287 "punpckhbw %%mm4, %%mm7;"
00288 "punpckhbw %%mm1, %%mm5;"
00289
00290 "psllw $2, %%mm7;"
00291 "movd 4 (%2, %0), %%mm0;"
00292
00293 "por %%mm7, %%mm5;"
00294 "movd 4 (%3, %0), %%mm1;"
00295
00296 MOVNTQ " %%mm5, 8 (%1);"
00297
00298 "addl $16, %1 \n\t"
00299 "addl $4, %0 \n\t"
00300 " js 1b \n\t"
00301 : "+r" (index), "+r" (_image)
00302 : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
00303 );
00304 }
00305
00306 __asm__ __volatile__ (EMMS);
00307 return srcSliceH;
00308 }
00309
00310 static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
00311 int srcSliceH, uint8_t* dst[], int dstStride[]){
00312 int y, h_size;
00313
00314 if(c->srcFormat == IMGFMT_422P){
00315 srcStride[1] *= 2;
00316 srcStride[2] *= 2;
00317 }
00318
00319 h_size= (c->dstW+7)&~7;
00320 if(h_size*3 > dstStride[0]) h_size-=8;
00321
00322 __asm__ __volatile__ ("pxor %mm4, %mm4;" );
00323
00324 for (y= 0; y<srcSliceH; y++ ) {
00325 uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
00326 uint8_t *_py = src[0] + y*srcStride[0];
00327 uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
00328 uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
00329 int index= -h_size/2;
00330
00331
00332
00333 __asm__ __volatile__ (
00334
00335 "movd (%2, %0), %%mm0;"
00336 "movd (%3, %0), %%mm1;"
00337 "movq (%5, %0, 2), %%mm6;"
00338
00339 "1: \n\t"
00340 YUV2RGB
00341
00342 #ifdef HAVE_MMX2
00343 "movq "MANGLE(M24A)", %%mm4 \n\t"
00344 "movq "MANGLE(M24C)", %%mm7 \n\t"
00345 "pshufw $0x50, %%mm0, %%mm5 \n\t"
00346 "pshufw $0x50, %%mm2, %%mm3 \n\t"
00347 "pshufw $0x00, %%mm1, %%mm6 \n\t"
00348
00349 "pand %%mm4, %%mm5 \n\t"
00350 "pand %%mm4, %%mm3 \n\t"
00351 "pand %%mm7, %%mm6 \n\t"
00352
00353 "psllq $8, %%mm3 \n\t"
00354 "por %%mm5, %%mm6 \n\t"
00355 "por %%mm3, %%mm6 \n\t"
00356 MOVNTQ" %%mm6, (%1) \n\t"
00357
00358 "psrlq $8, %%mm2 \n\t"
00359 "pshufw $0xA5, %%mm0, %%mm5 \n\t"
00360 "pshufw $0x55, %%mm2, %%mm3 \n\t"
00361 "pshufw $0xA5, %%mm1, %%mm6 \n\t"
00362
00363 "pand "MANGLE(M24B)", %%mm5 \n\t"
00364 "pand %%mm7, %%mm3 \n\t"
00365 "pand %%mm4, %%mm6 \n\t"
00366
00367 "por %%mm5, %%mm3 \n\t"
00368 "por %%mm3, %%mm6 \n\t"
00369 MOVNTQ" %%mm6, 8(%1) \n\t"
00370
00371 "pshufw $0xFF, %%mm0, %%mm5 \n\t"
00372 "pshufw $0xFA, %%mm2, %%mm3 \n\t"
00373 "pshufw $0xFA, %%mm1, %%mm6 \n\t"
00374 "movd 4 (%2, %0), %%mm0;"
00375
00376 "pand %%mm7, %%mm5 \n\t"
00377 "pand %%mm4, %%mm3 \n\t"
00378 "pand "MANGLE(M24B)", %%mm6 \n\t"
00379 "movd 4 (%3, %0), %%mm1;"
00380 \
00381 "por %%mm5, %%mm3 \n\t"
00382 "por %%mm3, %%mm6 \n\t"
00383 MOVNTQ" %%mm6, 16(%1) \n\t"
00384 "movq 8 (%5, %0, 2), %%mm6;"
00385 "pxor %%mm4, %%mm4 \n\t"
00386
00387 #else
00388
00389 "pxor %%mm4, %%mm4 \n\t"
00390 "movq %%mm0, %%mm5 \n\t"
00391 "movq %%mm1, %%mm6 \n\t"
00392 "punpcklbw %%mm2, %%mm0 \n\t"
00393 "punpcklbw %%mm4, %%mm1 \n\t"
00394 "punpckhbw %%mm2, %%mm5 \n\t"
00395 "punpckhbw %%mm4, %%mm6 \n\t"
00396 "movq %%mm0, %%mm7 \n\t"
00397 "movq %%mm5, %%mm3 \n\t"
00398 "punpcklwd %%mm1, %%mm7 \n\t"
00399 "punpckhwd %%mm1, %%mm0 \n\t"
00400 "punpcklwd %%mm6, %%mm5 \n\t"
00401 "punpckhwd %%mm6, %%mm3 \n\t"
00402
00403 "movq %%mm7, %%mm2 \n\t"
00404 "movq %%mm0, %%mm6 \n\t"
00405 "movq %%mm5, %%mm1 \n\t"
00406 "movq %%mm3, %%mm4 \n\t"
00407
00408 "psllq $40, %%mm7 \n\t"
00409 "psllq $40, %%mm0 \n\t"
00410 "psllq $40, %%mm5 \n\t"
00411 "psllq $40, %%mm3 \n\t"
00412
00413 "punpckhdq %%mm2, %%mm7 \n\t"
00414 "punpckhdq %%mm6, %%mm0 \n\t"
00415 "punpckhdq %%mm1, %%mm5 \n\t"
00416 "punpckhdq %%mm4, %%mm3 \n\t"
00417
00418 "psrlq $8, %%mm7 \n\t"
00419 "movq %%mm0, %%mm6 \n\t"
00420 "psllq $40, %%mm0 \n\t"
00421 "por %%mm0, %%mm7 \n\t"
00422 MOVNTQ" %%mm7, (%1) \n\t"
00423
00424 "movd 4 (%2, %0), %%mm0;"
00425
00426 "psrlq $24, %%mm6 \n\t"
00427 "movq %%mm5, %%mm1 \n\t"
00428 "psllq $24, %%mm5 \n\t"
00429 "por %%mm5, %%mm6 \n\t"
00430 MOVNTQ" %%mm6, 8(%1) \n\t"
00431
00432 "movq 8 (%5, %0, 2), %%mm6;"
00433
00434 "psrlq $40, %%mm1 \n\t"
00435 "psllq $8, %%mm3 \n\t"
00436 "por %%mm3, %%mm1 \n\t"
00437 MOVNTQ" %%mm1, 16(%1) \n\t"
00438
00439 "movd 4 (%3, %0), %%mm1;"
00440 "pxor %%mm4, %%mm4 \n\t"
00441 #endif
00442
00443 "addl $24, %1 \n\t"
00444 "addl $4, %0 \n\t"
00445 " js 1b \n\t"
00446
00447 : "+r" (index), "+r" (_image)
00448 : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
00449 );
00450 }
00451
00452 __asm__ __volatile__ (EMMS);
00453 return srcSliceH;
00454 }
00455
00456 static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
00457 int srcSliceH, uint8_t* dst[], int dstStride[]){
00458 int y, h_size;
00459
00460 if(c->srcFormat == IMGFMT_422P){
00461 srcStride[1] *= 2;
00462 srcStride[2] *= 2;
00463 }
00464
00465 h_size= (c->dstW+7)&~7;
00466 if(h_size*4 > dstStride[0]) h_size-=8;
00467
00468 __asm__ __volatile__ ("pxor %mm4, %mm4;" );
00469
00470 for (y= 0; y<srcSliceH; y++ ) {
00471 uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
00472 uint8_t *_py = src[0] + y*srcStride[0];
00473 uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
00474 uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
00475 int index= -h_size/2;
00476
00477
00478
00479 __asm__ __volatile__ (
00480
00481 "movd (%2, %0), %%mm0;"
00482 "movd (%3, %0), %%mm1;"
00483 "movq (%5, %0, 2), %%mm6;"
00484
00485 "1: \n\t"
00486 YUV2RGB
00487
00488
00489
00490
00491 "pxor %%mm3, %%mm3;"
00492
00493 "movq %%mm0, %%mm6;"
00494 "movq %%mm1, %%mm7;"
00495
00496 "movq %%mm0, %%mm4;"
00497 "movq %%mm1, %%mm5;"
00498
00499 "punpcklbw %%mm2, %%mm6;"
00500 "punpcklbw %%mm3, %%mm7;"
00501
00502 "punpcklwd %%mm7, %%mm6;"
00503 MOVNTQ " %%mm6, (%1);"
00504
00505 "movq %%mm0, %%mm6;"
00506 "punpcklbw %%mm2, %%mm6;"
00507
00508 "punpckhwd %%mm7, %%mm6;"
00509 MOVNTQ " %%mm6, 8 (%1);"
00510
00511 "punpckhbw %%mm2, %%mm4;"
00512 "punpckhbw %%mm3, %%mm5;"
00513
00514 "punpcklwd %%mm5, %%mm4;"
00515 MOVNTQ " %%mm4, 16 (%1);"
00516
00517 "movq %%mm0, %%mm4;"
00518 "punpckhbw %%mm2, %%mm4;"
00519
00520 "punpckhwd %%mm5, %%mm4;"
00521 MOVNTQ " %%mm4, 24 (%1);"
00522
00523 "movd 4 (%2, %0), %%mm0;"
00524 "movd 4 (%3, %0), %%mm1;"
00525
00526 "pxor %%mm4, %%mm4;"
00527 "movq 8 (%5, %0, 2), %%mm6;"
00528
00529 "addl $32, %1 \n\t"
00530 "addl $4, %0 \n\t"
00531 " js 1b \n\t"
00532
00533 : "+r" (index), "+r" (_image)
00534 : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
00535 );
00536 }
00537
00538 __asm__ __volatile__ (EMMS);
00539 return srcSliceH;
00540 }