00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #undef MOVNTQ
00020 #undef PAVGB
00021 #undef PREFETCH
00022 #undef PREFETCHW
00023 #undef EMMS
00024 #undef SFENCE
00025
00026 #ifdef HAVE_3DNOW
00027
00028 #define EMMS "femms"
00029 #else
00030 #define EMMS "emms"
00031 #endif
00032
00033 #ifdef HAVE_3DNOW
00034 #define PREFETCH "prefetch"
00035 #define PREFETCHW "prefetchw"
00036 #elif defined ( HAVE_MMX2 )
00037 #define PREFETCH "prefetchnta"
00038 #define PREFETCHW "prefetcht0"
00039 #else
00040 #define PREFETCH "/nop"
00041 #define PREFETCHW "/nop"
00042 #endif
00043
00044 #ifdef HAVE_MMX2
00045 #define SFENCE "sfence"
00046 #else
00047 #define SFENCE "/nop"
00048 #endif
00049
00050 #ifdef HAVE_MMX2
00051 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00052 #elif defined (HAVE_3DNOW)
00053 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
00054 #endif
00055
00056 #ifdef HAVE_MMX2
00057 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
00058 #else
00059 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
00060 #endif
00061
00062 #ifdef HAVE_ALTIVEC
00063 #include "swscale_altivec_template.c"
00064 #endif
00065
00066 #define YSCALEYUV2YV12X(x, offset) \
00067 "xorl %%eax, %%eax \n\t"\
00068 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
00069 "movq %%mm3, %%mm4 \n\t"\
00070 "leal " offset "(%0), %%edx \n\t"\
00071 "movl (%%edx), %%esi \n\t"\
00072 ".balign 16 \n\t" \
00073 "1: \n\t"\
00074 "movq 8(%%edx), %%mm0 \n\t" \
00075 "movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" \
00076 "movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" \
00077 "addl $16, %%edx \n\t"\
00078 "movl (%%edx), %%esi \n\t"\
00079 "testl %%esi, %%esi \n\t"\
00080 "pmulhw %%mm0, %%mm2 \n\t"\
00081 "pmulhw %%mm0, %%mm5 \n\t"\
00082 "paddw %%mm2, %%mm3 \n\t"\
00083 "paddw %%mm5, %%mm4 \n\t"\
00084 " jnz 1b \n\t"\
00085 "psraw $3, %%mm3 \n\t"\
00086 "psraw $3, %%mm4 \n\t"\
00087 "packuswb %%mm4, %%mm3 \n\t"\
00088 MOVNTQ(%%mm3, (%1, %%eax))\
00089 "addl $8, %%eax \n\t"\
00090 "cmpl %2, %%eax \n\t"\
00091 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
00092 "movq %%mm3, %%mm4 \n\t"\
00093 "leal " offset "(%0), %%edx \n\t"\
00094 "movl (%%edx), %%esi \n\t"\
00095 "jb 1b \n\t"
00096
00097 #define YSCALEYUV2YV121 \
00098 "movl %2, %%eax \n\t"\
00099 ".balign 16 \n\t" \
00100 "1: \n\t"\
00101 "movq (%0, %%eax, 2), %%mm0 \n\t"\
00102 "movq 8(%0, %%eax, 2), %%mm1 \n\t"\
00103 "psraw $7, %%mm0 \n\t"\
00104 "psraw $7, %%mm1 \n\t"\
00105 "packuswb %%mm1, %%mm0 \n\t"\
00106 MOVNTQ(%%mm0, (%1, %%eax))\
00107 "addl $8, %%eax \n\t"\
00108 "jnc 1b \n\t"
00109
00110
00111
00112
00113
00114
00115
00116
00117 #define YSCALEYUV2PACKEDX \
00118 "xorl %%eax, %%eax \n\t"\
00119 ".balign 16 \n\t"\
00120 "nop \n\t"\
00121 "1: \n\t"\
00122 "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
00123 "movl (%%edx), %%esi \n\t"\
00124 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
00125 "movq %%mm3, %%mm4 \n\t"\
00126 ".balign 16 \n\t"\
00127 "2: \n\t"\
00128 "movq 8(%%edx), %%mm0 \n\t" \
00129 "movq (%%esi, %%eax), %%mm2 \n\t" \
00130 "movq 4096(%%esi, %%eax), %%mm5 \n\t" \
00131 "addl $16, %%edx \n\t"\
00132 "movl (%%edx), %%esi \n\t"\
00133 "pmulhw %%mm0, %%mm2 \n\t"\
00134 "pmulhw %%mm0, %%mm5 \n\t"\
00135 "paddw %%mm2, %%mm3 \n\t"\
00136 "paddw %%mm5, %%mm4 \n\t"\
00137 "testl %%esi, %%esi \n\t"\
00138 " jnz 2b \n\t"\
00139 \
00140 "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
00141 "movl (%%edx), %%esi \n\t"\
00142 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
00143 "movq %%mm1, %%mm7 \n\t"\
00144 ".balign 16 \n\t"\
00145 "2: \n\t"\
00146 "movq 8(%%edx), %%mm0 \n\t" \
00147 "movq (%%esi, %%eax, 2), %%mm2 \n\t" \
00148 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" \
00149 "addl $16, %%edx \n\t"\
00150 "movl (%%edx), %%esi \n\t"\
00151 "pmulhw %%mm0, %%mm2 \n\t"\
00152 "pmulhw %%mm0, %%mm5 \n\t"\
00153 "paddw %%mm2, %%mm1 \n\t"\
00154 "paddw %%mm5, %%mm7 \n\t"\
00155 "testl %%esi, %%esi \n\t"\
00156 " jnz 2b \n\t"\
00157
00158
00159 #define YSCALEYUV2RGBX \
00160 YSCALEYUV2PACKEDX\
00161 "psubw "U_OFFSET"(%0), %%mm3 \n\t" \
00162 "psubw "V_OFFSET"(%0), %%mm4 \n\t" \
00163 "movq %%mm3, %%mm2 \n\t" \
00164 "movq %%mm4, %%mm5 \n\t" \
00165 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
00166 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
00167 \
00168 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
00169 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
00170 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" \
00171 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" \
00172 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
00173 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
00174 \
00175 "paddw %%mm3, %%mm4 \n\t"\
00176 "movq %%mm2, %%mm0 \n\t"\
00177 "movq %%mm5, %%mm6 \n\t"\
00178 "movq %%mm4, %%mm3 \n\t"\
00179 "punpcklwd %%mm2, %%mm2 \n\t"\
00180 "punpcklwd %%mm5, %%mm5 \n\t"\
00181 "punpcklwd %%mm4, %%mm4 \n\t"\
00182 "paddw %%mm1, %%mm2 \n\t"\
00183 "paddw %%mm1, %%mm5 \n\t"\
00184 "paddw %%mm1, %%mm4 \n\t"\
00185 "punpckhwd %%mm0, %%mm0 \n\t"\
00186 "punpckhwd %%mm6, %%mm6 \n\t"\
00187 "punpckhwd %%mm3, %%mm3 \n\t"\
00188 "paddw %%mm7, %%mm0 \n\t"\
00189 "paddw %%mm7, %%mm6 \n\t"\
00190 "paddw %%mm7, %%mm3 \n\t"\
00191 \
00192 "packuswb %%mm0, %%mm2 \n\t"\
00193 "packuswb %%mm6, %%mm5 \n\t"\
00194 "packuswb %%mm3, %%mm4 \n\t"\
00195 "pxor %%mm7, %%mm7 \n\t"
00196 #if 0
00197 #define FULL_YSCALEYUV2RGB \
00198 "pxor %%mm7, %%mm7 \n\t"\
00199 "movd %6, %%mm6 \n\t" \
00200 "punpcklwd %%mm6, %%mm6 \n\t"\
00201 "punpcklwd %%mm6, %%mm6 \n\t"\
00202 "movd %7, %%mm5 \n\t" \
00203 "punpcklwd %%mm5, %%mm5 \n\t"\
00204 "punpcklwd %%mm5, %%mm5 \n\t"\
00205 "xorl %%eax, %%eax \n\t"\
00206 ".balign 16 \n\t"\
00207 "1: \n\t"\
00208 "movq (%0, %%eax, 2), %%mm0 \n\t" \
00209 "movq (%1, %%eax, 2), %%mm1 \n\t" \
00210 "movq (%2, %%eax,2), %%mm2 \n\t" \
00211 "movq (%3, %%eax,2), %%mm3 \n\t" \
00212 "psubw %%mm1, %%mm0 \n\t" \
00213 "psubw %%mm3, %%mm2 \n\t" \
00214 "pmulhw %%mm6, %%mm0 \n\t" \
00215 "pmulhw %%mm5, %%mm2 \n\t" \
00216 "psraw $4, %%mm1 \n\t" \
00217 "movq 4096(%2, %%eax,2), %%mm4 \n\t" \
00218 "psraw $4, %%mm3 \n\t" \
00219 "paddw %%mm0, %%mm1 \n\t" \
00220 "movq 4096(%3, %%eax,2), %%mm0 \n\t" \
00221 "paddw %%mm2, %%mm3 \n\t" \
00222 "psubw %%mm0, %%mm4 \n\t" \
00223 "psubw "MANGLE(w80)", %%mm1 \n\t" \
00224 "psubw "MANGLE(w400)", %%mm3 \n\t" \
00225 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
00226 \
00227 \
00228 "pmulhw %%mm5, %%mm4 \n\t" \
00229 "movq %%mm3, %%mm2 \n\t" \
00230 "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
00231 "psraw $4, %%mm0 \n\t" \
00232 "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
00233 "paddw %%mm4, %%mm0 \n\t" \
00234 "psubw "MANGLE(w400)", %%mm0 \n\t" \
00235 \
00236 \
00237 "movq %%mm0, %%mm4 \n\t" \
00238 "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
00239 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
00240 "paddw %%mm1, %%mm3 \n\t" \
00241 "paddw %%mm1, %%mm0 \n\t" \
00242 "packuswb %%mm3, %%mm3 \n\t"\
00243 \
00244 "packuswb %%mm0, %%mm0 \n\t"\
00245 "paddw %%mm4, %%mm2 \n\t"\
00246 "paddw %%mm2, %%mm1 \n\t" \
00247 \
00248 "packuswb %%mm1, %%mm1 \n\t"
00249 #endif
00250
00251 #define YSCALEYUV2PACKED(index, c) \
00252 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
00253 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\
00254 "psraw $3, %%mm0 \n\t"\
00255 "psraw $3, %%mm1 \n\t"\
00256 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
00257 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
00258 "xorl "#index", "#index" \n\t"\
00259 ".balign 16 \n\t"\
00260 "1: \n\t"\
00261 "movq (%2, "#index"), %%mm2 \n\t" \
00262 "movq (%3, "#index"), %%mm3 \n\t" \
00263 "movq 4096(%2, "#index"), %%mm5 \n\t" \
00264 "movq 4096(%3, "#index"), %%mm4 \n\t" \
00265 "psubw %%mm3, %%mm2 \n\t" \
00266 "psubw %%mm4, %%mm5 \n\t" \
00267 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
00268 "pmulhw %%mm0, %%mm2 \n\t" \
00269 "pmulhw %%mm0, %%mm5 \n\t" \
00270 "psraw $7, %%mm3 \n\t" \
00271 "psraw $7, %%mm4 \n\t" \
00272 "paddw %%mm2, %%mm3 \n\t" \
00273 "paddw %%mm5, %%mm4 \n\t" \
00274 "movq (%0, "#index", 2), %%mm0 \n\t" \
00275 "movq (%1, "#index", 2), %%mm1 \n\t" \
00276 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
00277 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
00278 "psubw %%mm1, %%mm0 \n\t" \
00279 "psubw %%mm7, %%mm6 \n\t" \
00280 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" \
00281 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" \
00282 "psraw $7, %%mm1 \n\t" \
00283 "psraw $7, %%mm7 \n\t" \
00284 "paddw %%mm0, %%mm1 \n\t" \
00285 "paddw %%mm6, %%mm7 \n\t" \
00286
00287 #define YSCALEYUV2RGB(index, c) \
00288 "xorl "#index", "#index" \n\t"\
00289 ".balign 16 \n\t"\
00290 "1: \n\t"\
00291 "movq (%2, "#index"), %%mm2 \n\t" \
00292 "movq (%3, "#index"), %%mm3 \n\t" \
00293 "movq 4096(%2, "#index"), %%mm5\n\t" \
00294 "movq 4096(%3, "#index"), %%mm4\n\t" \
00295 "psubw %%mm3, %%mm2 \n\t" \
00296 "psubw %%mm4, %%mm5 \n\t" \
00297 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
00298 "pmulhw %%mm0, %%mm2 \n\t" \
00299 "pmulhw %%mm0, %%mm5 \n\t" \
00300 "psraw $4, %%mm3 \n\t" \
00301 "psraw $4, %%mm4 \n\t" \
00302 "paddw %%mm2, %%mm3 \n\t" \
00303 "paddw %%mm5, %%mm4 \n\t" \
00304 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00305 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00306 "movq %%mm3, %%mm2 \n\t" \
00307 "movq %%mm4, %%mm5 \n\t" \
00308 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
00309 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
00310 \
00311 "movq (%0, "#index", 2), %%mm0 \n\t" \
00312 "movq (%1, "#index", 2), %%mm1 \n\t" \
00313 "movq 8(%0, "#index", 2), %%mm6\n\t" \
00314 "movq 8(%1, "#index", 2), %%mm7\n\t" \
00315 "psubw %%mm1, %%mm0 \n\t" \
00316 "psubw %%mm7, %%mm6 \n\t" \
00317 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" \
00318 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" \
00319 "psraw $4, %%mm1 \n\t" \
00320 "psraw $4, %%mm7 \n\t" \
00321 "paddw %%mm0, %%mm1 \n\t" \
00322 "paddw %%mm6, %%mm7 \n\t" \
00323 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
00324 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
00325 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00326 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00327 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00328 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00329 \
00330 "paddw %%mm3, %%mm4 \n\t"\
00331 "movq %%mm2, %%mm0 \n\t"\
00332 "movq %%mm5, %%mm6 \n\t"\
00333 "movq %%mm4, %%mm3 \n\t"\
00334 "punpcklwd %%mm2, %%mm2 \n\t"\
00335 "punpcklwd %%mm5, %%mm5 \n\t"\
00336 "punpcklwd %%mm4, %%mm4 \n\t"\
00337 "paddw %%mm1, %%mm2 \n\t"\
00338 "paddw %%mm1, %%mm5 \n\t"\
00339 "paddw %%mm1, %%mm4 \n\t"\
00340 "punpckhwd %%mm0, %%mm0 \n\t"\
00341 "punpckhwd %%mm6, %%mm6 \n\t"\
00342 "punpckhwd %%mm3, %%mm3 \n\t"\
00343 "paddw %%mm7, %%mm0 \n\t"\
00344 "paddw %%mm7, %%mm6 \n\t"\
00345 "paddw %%mm7, %%mm3 \n\t"\
00346 \
00347 "packuswb %%mm0, %%mm2 \n\t"\
00348 "packuswb %%mm6, %%mm5 \n\t"\
00349 "packuswb %%mm3, %%mm4 \n\t"\
00350 "pxor %%mm7, %%mm7 \n\t"
00351
00352 #define YSCALEYUV2PACKED1(index, c) \
00353 "xorl "#index", "#index" \n\t"\
00354 ".balign 16 \n\t"\
00355 "1: \n\t"\
00356 "movq (%2, "#index"), %%mm3 \n\t" \
00357 "movq 4096(%2, "#index"), %%mm4 \n\t" \
00358 "psraw $7, %%mm3 \n\t" \
00359 "psraw $7, %%mm4 \n\t" \
00360 "movq (%0, "#index", 2), %%mm1 \n\t" \
00361 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00362 "psraw $7, %%mm1 \n\t" \
00363 "psraw $7, %%mm7 \n\t" \
00364
00365 #define YSCALEYUV2RGB1(index, c) \
00366 "xorl "#index", "#index" \n\t"\
00367 ".balign 16 \n\t"\
00368 "1: \n\t"\
00369 "movq (%2, "#index"), %%mm3 \n\t" \
00370 "movq 4096(%2, "#index"), %%mm4 \n\t" \
00371 "psraw $4, %%mm3 \n\t" \
00372 "psraw $4, %%mm4 \n\t" \
00373 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00374 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00375 "movq %%mm3, %%mm2 \n\t" \
00376 "movq %%mm4, %%mm5 \n\t" \
00377 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
00378 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
00379 \
00380 "movq (%0, "#index", 2), %%mm1 \n\t" \
00381 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00382 "psraw $4, %%mm1 \n\t" \
00383 "psraw $4, %%mm7 \n\t" \
00384 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
00385 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
00386 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00387 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00388 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00389 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00390 \
00391 "paddw %%mm3, %%mm4 \n\t"\
00392 "movq %%mm2, %%mm0 \n\t"\
00393 "movq %%mm5, %%mm6 \n\t"\
00394 "movq %%mm4, %%mm3 \n\t"\
00395 "punpcklwd %%mm2, %%mm2 \n\t"\
00396 "punpcklwd %%mm5, %%mm5 \n\t"\
00397 "punpcklwd %%mm4, %%mm4 \n\t"\
00398 "paddw %%mm1, %%mm2 \n\t"\
00399 "paddw %%mm1, %%mm5 \n\t"\
00400 "paddw %%mm1, %%mm4 \n\t"\
00401 "punpckhwd %%mm0, %%mm0 \n\t"\
00402 "punpckhwd %%mm6, %%mm6 \n\t"\
00403 "punpckhwd %%mm3, %%mm3 \n\t"\
00404 "paddw %%mm7, %%mm0 \n\t"\
00405 "paddw %%mm7, %%mm6 \n\t"\
00406 "paddw %%mm7, %%mm3 \n\t"\
00407 \
00408 "packuswb %%mm0, %%mm2 \n\t"\
00409 "packuswb %%mm6, %%mm5 \n\t"\
00410 "packuswb %%mm3, %%mm4 \n\t"\
00411 "pxor %%mm7, %%mm7 \n\t"
00412
00413 #define YSCALEYUV2PACKED1b(index, c) \
00414 "xorl "#index", "#index" \n\t"\
00415 ".balign 16 \n\t"\
00416 "1: \n\t"\
00417 "movq (%2, "#index"), %%mm2 \n\t" \
00418 "movq (%3, "#index"), %%mm3 \n\t" \
00419 "movq 4096(%2, "#index"), %%mm5 \n\t" \
00420 "movq 4096(%3, "#index"), %%mm4 \n\t" \
00421 "paddw %%mm2, %%mm3 \n\t" \
00422 "paddw %%mm5, %%mm4 \n\t" \
00423 "psrlw $8, %%mm3 \n\t" \
00424 "psrlw $8, %%mm4 \n\t" \
00425 "movq (%0, "#index", 2), %%mm1 \n\t" \
00426 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00427 "psraw $7, %%mm1 \n\t" \
00428 "psraw $7, %%mm7 \n\t"
00429
00430
00431 #define YSCALEYUV2RGB1b(index, c) \
00432 "xorl "#index", "#index" \n\t"\
00433 ".balign 16 \n\t"\
00434 "1: \n\t"\
00435 "movq (%2, "#index"), %%mm2 \n\t" \
00436 "movq (%3, "#index"), %%mm3 \n\t" \
00437 "movq 4096(%2, "#index"), %%mm5 \n\t" \
00438 "movq 4096(%3, "#index"), %%mm4 \n\t" \
00439 "paddw %%mm2, %%mm3 \n\t" \
00440 "paddw %%mm5, %%mm4 \n\t" \
00441 "psrlw $5, %%mm3 \n\t" \
00442 "psrlw $5, %%mm4 \n\t" \
00443 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00444 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00445 "movq %%mm3, %%mm2 \n\t" \
00446 "movq %%mm4, %%mm5 \n\t" \
00447 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
00448 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
00449 \
00450 "movq (%0, "#index", 2), %%mm1 \n\t" \
00451 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00452 "psraw $4, %%mm1 \n\t" \
00453 "psraw $4, %%mm7 \n\t" \
00454 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
00455 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
00456 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00457 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00458 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00459 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00460 \
00461 "paddw %%mm3, %%mm4 \n\t"\
00462 "movq %%mm2, %%mm0 \n\t"\
00463 "movq %%mm5, %%mm6 \n\t"\
00464 "movq %%mm4, %%mm3 \n\t"\
00465 "punpcklwd %%mm2, %%mm2 \n\t"\
00466 "punpcklwd %%mm5, %%mm5 \n\t"\
00467 "punpcklwd %%mm4, %%mm4 \n\t"\
00468 "paddw %%mm1, %%mm2 \n\t"\
00469 "paddw %%mm1, %%mm5 \n\t"\
00470 "paddw %%mm1, %%mm4 \n\t"\
00471 "punpckhwd %%mm0, %%mm0 \n\t"\
00472 "punpckhwd %%mm6, %%mm6 \n\t"\
00473 "punpckhwd %%mm3, %%mm3 \n\t"\
00474 "paddw %%mm7, %%mm0 \n\t"\
00475 "paddw %%mm7, %%mm6 \n\t"\
00476 "paddw %%mm7, %%mm3 \n\t"\
00477 \
00478 "packuswb %%mm0, %%mm2 \n\t"\
00479 "packuswb %%mm6, %%mm5 \n\t"\
00480 "packuswb %%mm3, %%mm4 \n\t"\
00481 "pxor %%mm7, %%mm7 \n\t"
00482
00483 #define WRITEBGR32(dst, dstw, index) \
00484 \
00485 "movq %%mm2, %%mm1 \n\t" \
00486 "movq %%mm5, %%mm6 \n\t" \
00487 "punpcklbw %%mm4, %%mm2 \n\t" \
00488 "punpcklbw %%mm7, %%mm5 \n\t" \
00489 "punpckhbw %%mm4, %%mm1 \n\t" \
00490 "punpckhbw %%mm7, %%mm6 \n\t" \
00491 "movq %%mm2, %%mm0 \n\t" \
00492 "movq %%mm1, %%mm3 \n\t" \
00493 "punpcklwd %%mm5, %%mm0 \n\t" \
00494 "punpckhwd %%mm5, %%mm2 \n\t" \
00495 "punpcklwd %%mm6, %%mm1 \n\t" \
00496 "punpckhwd %%mm6, %%mm3 \n\t" \
00497 \
00498 MOVNTQ(%%mm0, (dst, index, 4))\
00499 MOVNTQ(%%mm2, 8(dst, index, 4))\
00500 MOVNTQ(%%mm1, 16(dst, index, 4))\
00501 MOVNTQ(%%mm3, 24(dst, index, 4))\
00502 \
00503 "addl $8, "#index" \n\t"\
00504 "cmpl "#dstw", "#index" \n\t"\
00505 " jb 1b \n\t"
00506
00507 #define WRITEBGR16(dst, dstw, index) \
00508 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00509 "pand "MANGLE(bFC)", %%mm4 \n\t" \
00510 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00511 "psrlq $3, %%mm2 \n\t"\
00512 \
00513 "movq %%mm2, %%mm1 \n\t"\
00514 "movq %%mm4, %%mm3 \n\t"\
00515 \
00516 "punpcklbw %%mm7, %%mm3 \n\t"\
00517 "punpcklbw %%mm5, %%mm2 \n\t"\
00518 "punpckhbw %%mm7, %%mm4 \n\t"\
00519 "punpckhbw %%mm5, %%mm1 \n\t"\
00520 \
00521 "psllq $3, %%mm3 \n\t"\
00522 "psllq $3, %%mm4 \n\t"\
00523 \
00524 "por %%mm3, %%mm2 \n\t"\
00525 "por %%mm4, %%mm1 \n\t"\
00526 \
00527 MOVNTQ(%%mm2, (dst, index, 2))\
00528 MOVNTQ(%%mm1, 8(dst, index, 2))\
00529 \
00530 "addl $8, "#index" \n\t"\
00531 "cmpl "#dstw", "#index" \n\t"\
00532 " jb 1b \n\t"
00533
00534 #define WRITEBGR15(dst, dstw, index) \
00535 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00536 "pand "MANGLE(bF8)", %%mm4 \n\t" \
00537 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00538 "psrlq $3, %%mm2 \n\t"\
00539 "psrlq $1, %%mm5 \n\t"\
00540 \
00541 "movq %%mm2, %%mm1 \n\t"\
00542 "movq %%mm4, %%mm3 \n\t"\
00543 \
00544 "punpcklbw %%mm7, %%mm3 \n\t"\
00545 "punpcklbw %%mm5, %%mm2 \n\t"\
00546 "punpckhbw %%mm7, %%mm4 \n\t"\
00547 "punpckhbw %%mm5, %%mm1 \n\t"\
00548 \
00549 "psllq $2, %%mm3 \n\t"\
00550 "psllq $2, %%mm4 \n\t"\
00551 \
00552 "por %%mm3, %%mm2 \n\t"\
00553 "por %%mm4, %%mm1 \n\t"\
00554 \
00555 MOVNTQ(%%mm2, (dst, index, 2))\
00556 MOVNTQ(%%mm1, 8(dst, index, 2))\
00557 \
00558 "addl $8, "#index" \n\t"\
00559 "cmpl "#dstw", "#index" \n\t"\
00560 " jb 1b \n\t"
00561
00562 #define WRITEBGR24OLD(dst, dstw, index) \
00563 \
00564 "movq %%mm2, %%mm1 \n\t" \
00565 "movq %%mm5, %%mm6 \n\t" \
00566 "punpcklbw %%mm4, %%mm2 \n\t" \
00567 "punpcklbw %%mm7, %%mm5 \n\t" \
00568 "punpckhbw %%mm4, %%mm1 \n\t" \
00569 "punpckhbw %%mm7, %%mm6 \n\t" \
00570 "movq %%mm2, %%mm0 \n\t" \
00571 "movq %%mm1, %%mm3 \n\t" \
00572 "punpcklwd %%mm5, %%mm0 \n\t" \
00573 "punpckhwd %%mm5, %%mm2 \n\t" \
00574 "punpcklwd %%mm6, %%mm1 \n\t" \
00575 "punpckhwd %%mm6, %%mm3 \n\t" \
00576 \
00577 "movq %%mm0, %%mm4 \n\t" \
00578 "psrlq $8, %%mm0 \n\t" \
00579 "pand "MANGLE(bm00000111)", %%mm4\n\t" \
00580 "pand "MANGLE(bm11111000)", %%mm0\n\t" \
00581 "por %%mm4, %%mm0 \n\t" \
00582 "movq %%mm2, %%mm4 \n\t" \
00583 "psllq $48, %%mm2 \n\t" \
00584 "por %%mm2, %%mm0 \n\t" \
00585 \
00586 "movq %%mm4, %%mm2 \n\t" \
00587 "psrld $16, %%mm4 \n\t" \
00588 "psrlq $24, %%mm2 \n\t" \
00589 "por %%mm4, %%mm2 \n\t" \
00590 "pand "MANGLE(bm00001111)", %%mm2\n\t" \
00591 "movq %%mm1, %%mm4 \n\t" \
00592 "psrlq $8, %%mm1 \n\t" \
00593 "pand "MANGLE(bm00000111)", %%mm4\n\t" \
00594 "pand "MANGLE(bm11111000)", %%mm1\n\t" \
00595 "por %%mm4, %%mm1 \n\t" \
00596 "movq %%mm1, %%mm4 \n\t" \
00597 "psllq $32, %%mm1 \n\t" \
00598 "por %%mm1, %%mm2 \n\t" \
00599 \
00600 "psrlq $32, %%mm4 \n\t" \
00601 "movq %%mm3, %%mm5 \n\t" \
00602 "psrlq $8, %%mm3 \n\t" \
00603 "pand "MANGLE(bm00000111)", %%mm5\n\t" \
00604 "pand "MANGLE(bm11111000)", %%mm3\n\t" \
00605 "por %%mm5, %%mm3 \n\t" \
00606 "psllq $16, %%mm3 \n\t" \
00607 "por %%mm4, %%mm3 \n\t" \
00608 \
00609 MOVNTQ(%%mm0, (dst))\
00610 MOVNTQ(%%mm2, 8(dst))\
00611 MOVNTQ(%%mm3, 16(dst))\
00612 "addl $24, "#dst" \n\t"\
00613 \
00614 "addl $8, "#index" \n\t"\
00615 "cmpl "#dstw", "#index" \n\t"\
00616 " jb 1b \n\t"
00617
00618 #define WRITEBGR24MMX(dst, dstw, index) \
00619 \
00620 "movq %%mm2, %%mm1 \n\t" \
00621 "movq %%mm5, %%mm6 \n\t" \
00622 "punpcklbw %%mm4, %%mm2 \n\t" \
00623 "punpcklbw %%mm7, %%mm5 \n\t" \
00624 "punpckhbw %%mm4, %%mm1 \n\t" \
00625 "punpckhbw %%mm7, %%mm6 \n\t" \
00626 "movq %%mm2, %%mm0 \n\t" \
00627 "movq %%mm1, %%mm3 \n\t" \
00628 "punpcklwd %%mm5, %%mm0 \n\t" \
00629 "punpckhwd %%mm5, %%mm2 \n\t" \
00630 "punpcklwd %%mm6, %%mm1 \n\t" \
00631 "punpckhwd %%mm6, %%mm3 \n\t" \
00632 \
00633 "movq %%mm0, %%mm4 \n\t" \
00634 "movq %%mm2, %%mm6 \n\t" \
00635 "movq %%mm1, %%mm5 \n\t" \
00636 "movq %%mm3, %%mm7 \n\t" \
00637 \
00638 "psllq $40, %%mm0 \n\t" \
00639 "psllq $40, %%mm2 \n\t" \
00640 "psllq $40, %%mm1 \n\t" \
00641 "psllq $40, %%mm3 \n\t" \
00642 \
00643 "punpckhdq %%mm4, %%mm0 \n\t" \
00644 "punpckhdq %%mm6, %%mm2 \n\t" \
00645 "punpckhdq %%mm5, %%mm1 \n\t" \
00646 "punpckhdq %%mm7, %%mm3 \n\t" \
00647 \
00648 "psrlq $8, %%mm0 \n\t" \
00649 "movq %%mm2, %%mm6 \n\t" \
00650 "psllq $40, %%mm2 \n\t" \
00651 "por %%mm2, %%mm0 \n\t" \
00652 MOVNTQ(%%mm0, (dst))\
00653 \
00654 "psrlq $24, %%mm6 \n\t" \
00655 "movq %%mm1, %%mm5 \n\t" \
00656 "psllq $24, %%mm1 \n\t" \
00657 "por %%mm1, %%mm6 \n\t" \
00658 MOVNTQ(%%mm6, 8(dst))\
00659 \
00660 "psrlq $40, %%mm5 \n\t" \
00661 "psllq $8, %%mm3 \n\t" \
00662 "por %%mm3, %%mm5 \n\t" \
00663 MOVNTQ(%%mm5, 16(dst))\
00664 \
00665 "addl $24, "#dst" \n\t"\
00666 \
00667 "addl $8, "#index" \n\t"\
00668 "cmpl "#dstw", "#index" \n\t"\
00669 " jb 1b \n\t"
00670
00671 #define WRITEBGR24MMX2(dst, dstw, index) \
00672 \
00673 "movq "MANGLE(M24A)", %%mm0 \n\t"\
00674 "movq "MANGLE(M24C)", %%mm7 \n\t"\
00675 "pshufw $0x50, %%mm2, %%mm1 \n\t" \
00676 "pshufw $0x50, %%mm4, %%mm3 \n\t" \
00677 "pshufw $0x00, %%mm5, %%mm6 \n\t" \
00678 \
00679 "pand %%mm0, %%mm1 \n\t" \
00680 "pand %%mm0, %%mm3 \n\t" \
00681 "pand %%mm7, %%mm6 \n\t" \
00682 \
00683 "psllq $8, %%mm3 \n\t" \
00684 "por %%mm1, %%mm6 \n\t"\
00685 "por %%mm3, %%mm6 \n\t"\
00686 MOVNTQ(%%mm6, (dst))\
00687 \
00688 "psrlq $8, %%mm4 \n\t" \
00689 "pshufw $0xA5, %%mm2, %%mm1 \n\t" \
00690 "pshufw $0x55, %%mm4, %%mm3 \n\t" \
00691 "pshufw $0xA5, %%mm5, %%mm6 \n\t" \
00692 \
00693 "pand "MANGLE(M24B)", %%mm1 \n\t" \
00694 "pand %%mm7, %%mm3 \n\t" \
00695 "pand %%mm0, %%mm6 \n\t" \
00696 \
00697 "por %%mm1, %%mm3 \n\t" \
00698 "por %%mm3, %%mm6 \n\t"\
00699 MOVNTQ(%%mm6, 8(dst))\
00700 \
00701 "pshufw $0xFF, %%mm2, %%mm1 \n\t" \
00702 "pshufw $0xFA, %%mm4, %%mm3 \n\t" \
00703 "pshufw $0xFA, %%mm5, %%mm6 \n\t" \
00704 \
00705 "pand %%mm7, %%mm1 \n\t" \
00706 "pand %%mm0, %%mm3 \n\t" \
00707 "pand "MANGLE(M24B)", %%mm6 \n\t" \
00708 \
00709 "por %%mm1, %%mm3 \n\t"\
00710 "por %%mm3, %%mm6 \n\t"\
00711 MOVNTQ(%%mm6, 16(dst))\
00712 \
00713 "addl $24, "#dst" \n\t"\
00714 \
00715 "addl $8, "#index" \n\t"\
00716 "cmpl "#dstw", "#index" \n\t"\
00717 " jb 1b \n\t"
00718
00719 #ifdef HAVE_MMX2
00720 #undef WRITEBGR24
00721 #define WRITEBGR24 WRITEBGR24MMX2
00722 #else
00723 #undef WRITEBGR24
00724 #define WRITEBGR24 WRITEBGR24MMX
00725 #endif
00726
00727 #define WRITEYUY2(dst, dstw, index) \
00728 "packuswb %%mm3, %%mm3 \n\t"\
00729 "packuswb %%mm4, %%mm4 \n\t"\
00730 "packuswb %%mm7, %%mm1 \n\t"\
00731 "punpcklbw %%mm4, %%mm3 \n\t"\
00732 "movq %%mm1, %%mm7 \n\t"\
00733 "punpcklbw %%mm3, %%mm1 \n\t"\
00734 "punpckhbw %%mm3, %%mm7 \n\t"\
00735 \
00736 MOVNTQ(%%mm1, (dst, index, 2))\
00737 MOVNTQ(%%mm7, 8(dst, index, 2))\
00738 \
00739 "addl $8, "#index" \n\t"\
00740 "cmpl "#dstw", "#index" \n\t"\
00741 " jb 1b \n\t"
00742
00743
00744 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00745 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00746 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
00747 {
00748 #ifdef HAVE_MMX
00749 if(uDest != NULL)
00750 {
00751 asm volatile(
00752 YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET)
00753 :: "r" (&c->redDither),
00754 "r" (uDest), "m" (chrDstW)
00755 : "%eax", "%edx", "%esi"
00756 );
00757
00758 asm volatile(
00759 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET)
00760 :: "r" (&c->redDither),
00761 "r" (vDest), "m" (chrDstW)
00762 : "%eax", "%edx", "%esi"
00763 );
00764 }
00765
00766 asm volatile(
00767 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET)
00768 :: "r" (&c->redDither),
00769 "r" (dest), "m" (dstW)
00770 : "%eax", "%edx", "%esi"
00771 );
00772 #else
00773 #ifdef HAVE_ALTIVEC
00774 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
00775 chrFilter, chrSrc, chrFilterSize,
00776 dest, uDest, vDest, dstW, chrDstW);
00777 #else //HAVE_ALTIVEC
00778 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
00779 chrFilter, chrSrc, chrFilterSize,
00780 dest, uDest, vDest, dstW, chrDstW);
00781 #endif
00782 #endif
00783 }
00784
00785 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
00786 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
00787 {
00788 #ifdef HAVE_MMX
00789 if(uDest != NULL)
00790 {
00791 asm volatile(
00792 YSCALEYUV2YV121
00793 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
00794 "g" (-chrDstW)
00795 : "%eax"
00796 );
00797
00798 asm volatile(
00799 YSCALEYUV2YV121
00800 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
00801 "g" (-chrDstW)
00802 : "%eax"
00803 );
00804 }
00805
00806 asm volatile(
00807 YSCALEYUV2YV121
00808 :: "r" (lumSrc + dstW), "r" (dest + dstW),
00809 "g" (-dstW)
00810 : "%eax"
00811 );
00812 #else
00813 int i;
00814 for(i=0; i<dstW; i++)
00815 {
00816 int val= lumSrc[i]>>7;
00817
00818 if(val&256){
00819 if(val<0) val=0;
00820 else val=255;
00821 }
00822
00823 dest[i]= val;
00824 }
00825
00826 if(uDest != NULL)
00827 for(i=0; i<chrDstW; i++)
00828 {
00829 int u=chrSrc[i]>>7;
00830 int v=chrSrc[i + 2048]>>7;
00831
00832 if((u|v)&256){
00833 if(u<0) u=0;
00834 else if (u>255) u=255;
00835 if(v<0) v=0;
00836 else if (v>255) v=255;
00837 }
00838
00839 uDest[i]= u;
00840 vDest[i]= v;
00841 }
00842 #endif
00843 }
00844
00845
00849 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00850 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00851 uint8_t *dest, int dstW, int dstY)
00852 {
00853 int dummy=0;
00854 switch(c->dstFormat)
00855 {
00856 #ifdef HAVE_MMX
00857 case IMGFMT_BGR32:
00858 {
00859 asm volatile(
00860 YSCALEYUV2RGBX
00861 WRITEBGR32(%4, %5, %%eax)
00862
00863 :: "r" (&c->redDither),
00864 "m" (dummy), "m" (dummy), "m" (dummy),
00865 "r" (dest), "m" (dstW)
00866 : "%eax", "%edx", "%esi"
00867 );
00868 }
00869 break;
00870 case IMGFMT_BGR24:
00871 {
00872 asm volatile(
00873 YSCALEYUV2RGBX
00874 "leal (%%eax, %%eax, 2), %%ebx \n\t"
00875 "addl %4, %%ebx \n\t"
00876 WRITEBGR24(%%ebx, %5, %%eax)
00877
00878 :: "r" (&c->redDither),
00879 "m" (dummy), "m" (dummy), "m" (dummy),
00880 "r" (dest), "m" (dstW)
00881 : "%eax", "%ebx", "%edx", "%esi"
00882 );
00883 }
00884 break;
00885 case IMGFMT_BGR15:
00886 {
00887 asm volatile(
00888 YSCALEYUV2RGBX
00889
00890 #ifdef DITHER1XBPP
00891 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
00892 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
00893 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
00894 #endif
00895
00896 WRITEBGR15(%4, %5, %%eax)
00897
00898 :: "r" (&c->redDither),
00899 "m" (dummy), "m" (dummy), "m" (dummy),
00900 "r" (dest), "m" (dstW)
00901 : "%eax", "%edx", "%esi"
00902 );
00903 }
00904 break;
00905 case IMGFMT_BGR16:
00906 {
00907 asm volatile(
00908 YSCALEYUV2RGBX
00909
00910 #ifdef DITHER1XBPP
00911 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
00912 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
00913 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
00914 #endif
00915
00916 WRITEBGR16(%4, %5, %%eax)
00917
00918 :: "r" (&c->redDither),
00919 "m" (dummy), "m" (dummy), "m" (dummy),
00920 "r" (dest), "m" (dstW)
00921 : "%eax", "%edx", "%esi"
00922 );
00923 }
00924 break;
00925 case IMGFMT_YUY2:
00926 {
00927 asm volatile(
00928 YSCALEYUV2PACKEDX
00929
00930
00931 "psraw $3, %%mm3 \n\t"
00932 "psraw $3, %%mm4 \n\t"
00933 "psraw $3, %%mm1 \n\t"
00934 "psraw $3, %%mm7 \n\t"
00935 WRITEYUY2(%4, %5, %%eax)
00936
00937 :: "r" (&c->redDither),
00938 "m" (dummy), "m" (dummy), "m" (dummy),
00939 "r" (dest), "m" (dstW)
00940 : "%eax", "%edx", "%esi"
00941 );
00942 }
00943 break;
00944 #endif
00945 default:
00946 #ifdef HAVE_ALTIVEC
00947 altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
00948 chrFilter, chrSrc, chrFilterSize,
00949 dest, dstW, dstY);
00950 #else
00951 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
00952 chrFilter, chrSrc, chrFilterSize,
00953 dest, dstW, dstY);
00954 #endif
00955 break;
00956 }
00957 }
00958
00962 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
00963 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
00964 {
00965 int yalpha1=yalpha^4095;
00966 int uvalpha1=uvalpha^4095;
00967 int i;
00968
00969 #if 0 //isn't used
00970 if(flags&SWS_FULL_CHR_H_INT)
00971 {
00972 switch(dstFormat)
00973 {
00974 #ifdef HAVE_MMX
00975 case IMGFMT_BGR32:
00976 asm volatile(
00977
00978
00979 FULL_YSCALEYUV2RGB
00980 "punpcklbw %%mm1, %%mm3 \n\t"
00981 "punpcklbw %%mm7, %%mm0 \n\t"
00982
00983 "movq %%mm3, %%mm1 \n\t"
00984 "punpcklwd %%mm0, %%mm3 \n\t"
00985 "punpckhwd %%mm0, %%mm1 \n\t"
00986
00987 MOVNTQ(%%mm3, (%4, %%eax, 4))
00988 MOVNTQ(%%mm1, 8(%4, %%eax, 4))
00989
00990 "addl $4, %%eax \n\t"
00991 "cmpl %5, %%eax \n\t"
00992 " jb 1b \n\t"
00993
00994
00995 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
00996 "m" (yalpha1), "m" (uvalpha1)
00997 : "%eax"
00998 );
00999 break;
01000 case IMGFMT_BGR24:
01001 asm volatile(
01002
01003 FULL_YSCALEYUV2RGB
01004
01005
01006 "punpcklbw %%mm1, %%mm3 \n\t"
01007 "punpcklbw %%mm7, %%mm0 \n\t"
01008
01009 "movq %%mm3, %%mm1 \n\t"
01010 "punpcklwd %%mm0, %%mm3 \n\t"
01011 "punpckhwd %%mm0, %%mm1 \n\t"
01012
01013 "movq %%mm3, %%mm2 \n\t"
01014 "psrlq $8, %%mm3 \n\t"
01015 "pand "MANGLE(bm00000111)", %%mm2\n\t"
01016 "pand "MANGLE(bm11111000)", %%mm3\n\t"
01017 "por %%mm2, %%mm3 \n\t"
01018 "movq %%mm1, %%mm2 \n\t"
01019 "psllq $48, %%mm1 \n\t"
01020 "por %%mm1, %%mm3 \n\t"
01021
01022 "movq %%mm2, %%mm1 \n\t"
01023 "psrld $16, %%mm2 \n\t"
01024 "psrlq $24, %%mm1 \n\t"
01025 "por %%mm2, %%mm1 \n\t"
01026
01027 "movl %4, %%ebx \n\t"
01028 "addl %%eax, %%ebx \n\t"
01029
01030 #ifdef HAVE_MMX2
01031
01032 "movntq %%mm3, (%%ebx, %%eax, 2)\n\t"
01033 "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t"
01034 #else
01035 "movd %%mm3, (%%ebx, %%eax, 2) \n\t"
01036 "psrlq $32, %%mm3 \n\t"
01037 "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t"
01038 "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t"
01039 #endif
01040 "addl $4, %%eax \n\t"
01041 "cmpl %5, %%eax \n\t"
01042 " jb 1b \n\t"
01043
01044 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
01045 "m" (yalpha1), "m" (uvalpha1)
01046 : "%eax", "%ebx"
01047 );
01048 break;
01049 case IMGFMT_BGR15:
01050 asm volatile(
01051
01052 FULL_YSCALEYUV2RGB
01053 #ifdef DITHER1XBPP
01054 "paddusb "MANGLE(g5Dither)", %%mm1\n\t"
01055 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
01056 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
01057 #endif
01058 "punpcklbw %%mm7, %%mm1 \n\t"
01059 "punpcklbw %%mm7, %%mm3 \n\t"
01060 "punpcklbw %%mm7, %%mm0 \n\t"
01061
01062 "psrlw $3, %%mm3 \n\t"
01063 "psllw $2, %%mm1 \n\t"
01064 "psllw $7, %%mm0 \n\t"
01065 "pand "MANGLE(g15Mask)", %%mm1 \n\t"
01066 "pand "MANGLE(r15Mask)", %%mm0 \n\t"
01067
01068 "por %%mm3, %%mm1 \n\t"
01069 "por %%mm1, %%mm0 \n\t"
01070
01071 MOVNTQ(%%mm0, (%4, %%eax, 2))
01072
01073 "addl $4, %%eax \n\t"
01074 "cmpl %5, %%eax \n\t"
01075 " jb 1b \n\t"
01076
01077 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
01078 "m" (yalpha1), "m" (uvalpha1)
01079 : "%eax"
01080 );
01081 break;
01082 case IMGFMT_BGR16:
01083 asm volatile(
01084
01085 FULL_YSCALEYUV2RGB
01086 #ifdef DITHER1XBPP
01087 "paddusb "MANGLE(g6Dither)", %%mm1\n\t"
01088 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
01089 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
01090 #endif
01091 "punpcklbw %%mm7, %%mm1 \n\t"
01092 "punpcklbw %%mm7, %%mm3 \n\t"
01093 "punpcklbw %%mm7, %%mm0 \n\t"
01094
01095 "psrlw $3, %%mm3 \n\t"
01096 "psllw $3, %%mm1 \n\t"
01097 "psllw $8, %%mm0 \n\t"
01098 "pand "MANGLE(g16Mask)", %%mm1 \n\t"
01099 "pand "MANGLE(r16Mask)", %%mm0 \n\t"
01100
01101 "por %%mm3, %%mm1 \n\t"
01102 "por %%mm1, %%mm0 \n\t"
01103
01104 MOVNTQ(%%mm0, (%4, %%eax, 2))
01105
01106 "addl $4, %%eax \n\t"
01107 "cmpl %5, %%eax \n\t"
01108 " jb 1b \n\t"
01109
01110 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
01111 "m" (yalpha1), "m" (uvalpha1)
01112 : "%eax"
01113 );
01114 break;
01115 #endif
01116 case IMGFMT_RGB32:
01117 #ifndef HAVE_MMX
01118 case IMGFMT_BGR32:
01119 #endif
01120 if(dstFormat==IMGFMT_BGR32)
01121 {
01122 int i;
01123 #ifdef WORDS_BIGENDIAN
01124 dest++;
01125 #endif
01126 for(i=0;i<dstW;i++){
01127
01128 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
01129 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
01130 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
01131 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
01132 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
01133 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
01134 dest+= 4;
01135 }
01136 }
01137 else if(dstFormat==IMGFMT_BGR24)
01138 {
01139 int i;
01140 for(i=0;i<dstW;i++){
01141
01142 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
01143 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
01144 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
01145 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
01146 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
01147 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
01148 dest+= 3;
01149 }
01150 }
01151 else if(dstFormat==IMGFMT_BGR16)
01152 {
01153 int i;
01154 for(i=0;i<dstW;i++){
01155
01156 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
01157 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
01158 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
01159
01160 ((uint16_t*)dest)[i] =
01161 clip_table16b[(Y + yuvtab_40cf[U]) >>13] |
01162 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
01163 clip_table16r[(Y + yuvtab_3343[V]) >>13];
01164 }
01165 }
01166 else if(dstFormat==IMGFMT_BGR15)
01167 {
01168 int i;
01169 for(i=0;i<dstW;i++){
01170
01171 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
01172 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
01173 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
01174
01175 ((uint16_t*)dest)[i] =
01176 clip_table15b[(Y + yuvtab_40cf[U]) >>13] |
01177 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
01178 clip_table15r[(Y + yuvtab_3343[V]) >>13];
01179 }
01180 }
01181 }
01182 else
01183 {
01184 #endif // if 0
01185 #ifdef HAVE_MMX
01186 switch(c->dstFormat)
01187 {
01188
01189 case IMGFMT_BGR32:
01190 asm volatile(
01191 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01192 "movl %4, %%esp \n\t"
01193 YSCALEYUV2RGB(%%eax, %5)
01194 WRITEBGR32(%%esp, 8280(%5), %%eax)
01195 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01196
01197 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01198 "r" (&c->redDither)
01199 : "%eax"
01200 );
01201 return;
01202 case IMGFMT_BGR24:
01203 asm volatile(
01204 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01205 "movl %4, %%esp \n\t"
01206 YSCALEYUV2RGB(%%eax, %5)
01207 WRITEBGR24(%%esp, 8280(%5), %%eax)
01208 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01209 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01210 "r" (&c->redDither)
01211 : "%eax"
01212 );
01213 return;
01214 case IMGFMT_BGR15:
01215 asm volatile(
01216 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01217 "movl %4, %%esp \n\t"
01218 YSCALEYUV2RGB(%%eax, %5)
01219
01220 #ifdef DITHER1XBPP
01221 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
01222 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
01223 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
01224 #endif
01225
01226 WRITEBGR15(%%esp, 8280(%5), %%eax)
01227 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01228
01229 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01230 "r" (&c->redDither)
01231 : "%eax"
01232 );
01233 return;
01234 case IMGFMT_BGR16:
01235 asm volatile(
01236 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01237 "movl %4, %%esp \n\t"
01238 YSCALEYUV2RGB(%%eax, %5)
01239
01240 #ifdef DITHER1XBPP
01241 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
01242 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
01243 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
01244 #endif
01245
01246 WRITEBGR16(%%esp, 8280(%5), %%eax)
01247 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01248 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01249 "r" (&c->redDither)
01250 : "%eax"
01251 );
01252 return;
01253 case IMGFMT_YUY2:
01254 asm volatile(
01255 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01256 "movl %4, %%esp \n\t"
01257 YSCALEYUV2PACKED(%%eax, %5)
01258 WRITEYUY2(%%esp, 8280(%5), %%eax)
01259 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01260 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01261 "r" (&c->redDither)
01262 : "%eax"
01263 );
01264 return;
01265 default: break;
01266 }
01267 #endif
01268 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
01269 }
01270
01274 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
01275 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
01276 {
01277 const int yalpha1=0;
01278 int i;
01279
01280 uint16_t *buf1= buf0;
01281 const int yalpha= 4096;
01282
01283 if(flags&SWS_FULL_CHR_H_INT)
01284 {
01285 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
01286 return;
01287 }
01288
01289 #ifdef HAVE_MMX
01290 if( uvalpha < 2048 )
01291 {
01292 switch(dstFormat)
01293 {
01294 case IMGFMT_BGR32:
01295 asm volatile(
01296 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01297 "movl %4, %%esp \n\t"
01298 YSCALEYUV2RGB1(%%eax, %5)
01299 WRITEBGR32(%%esp, 8280(%5), %%eax)
01300 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01301
01302 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01303 "r" (&c->redDither)
01304 : "%eax"
01305 );
01306 return;
01307 case IMGFMT_BGR24:
01308 asm volatile(
01309 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01310 "movl %4, %%esp \n\t"
01311 YSCALEYUV2RGB1(%%eax, %5)
01312 WRITEBGR24(%%esp, 8280(%5), %%eax)
01313 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01314
01315 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01316 "r" (&c->redDither)
01317 : "%eax"
01318 );
01319 return;
01320 case IMGFMT_BGR15:
01321 asm volatile(
01322 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01323 "movl %4, %%esp \n\t"
01324 YSCALEYUV2RGB1(%%eax, %5)
01325
01326 #ifdef DITHER1XBPP
01327 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
01328 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
01329 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
01330 #endif
01331 WRITEBGR15(%%esp, 8280(%5), %%eax)
01332 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01333
01334 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01335 "r" (&c->redDither)
01336 : "%eax"
01337 );
01338 return;
01339 case IMGFMT_BGR16:
01340 asm volatile(
01341 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01342 "movl %4, %%esp \n\t"
01343 YSCALEYUV2RGB1(%%eax, %5)
01344
01345 #ifdef DITHER1XBPP
01346 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
01347 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
01348 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
01349 #endif
01350
01351 WRITEBGR16(%%esp, 8280(%5), %%eax)
01352 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01353
01354 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01355 "r" (&c->redDither)
01356 : "%eax"
01357 );
01358 return;
01359 case IMGFMT_YUY2:
01360 asm volatile(
01361 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01362 "movl %4, %%esp \n\t"
01363 YSCALEYUV2PACKED1(%%eax, %5)
01364 WRITEYUY2(%%esp, 8280(%5), %%eax)
01365 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01366
01367 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01368 "r" (&c->redDither)
01369 : "%eax"
01370 );
01371 return;
01372 }
01373 }
01374 else
01375 {
01376 switch(dstFormat)
01377 {
01378 case IMGFMT_BGR32:
01379 asm volatile(
01380 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01381 "movl %4, %%esp \n\t"
01382 YSCALEYUV2RGB1b(%%eax, %5)
01383 WRITEBGR32(%%esp, 8280(%5), %%eax)
01384 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01385
01386 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01387 "r" (&c->redDither)
01388 : "%eax"
01389 );
01390 return;
01391 case IMGFMT_BGR24:
01392 asm volatile(
01393 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01394 "movl %4, %%esp \n\t"
01395 YSCALEYUV2RGB1b(%%eax, %5)
01396 WRITEBGR24(%%esp, 8280(%5), %%eax)
01397 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01398
01399 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01400 "r" (&c->redDither)
01401 : "%eax"
01402 );
01403 return;
01404 case IMGFMT_BGR15:
01405 asm volatile(
01406 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01407 "movl %4, %%esp \n\t"
01408 YSCALEYUV2RGB1b(%%eax, %5)
01409
01410 #ifdef DITHER1XBPP
01411 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
01412 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
01413 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
01414 #endif
01415 WRITEBGR15(%%esp, 8280(%5), %%eax)
01416 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01417
01418 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01419 "r" (&c->redDither)
01420 : "%eax"
01421 );
01422 return;
01423 case IMGFMT_BGR16:
01424 asm volatile(
01425 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01426 "movl %4, %%esp \n\t"
01427 YSCALEYUV2RGB1b(%%eax, %5)
01428
01429 #ifdef DITHER1XBPP
01430 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
01431 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
01432 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
01433 #endif
01434
01435 WRITEBGR16(%%esp, 8280(%5), %%eax)
01436 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01437
01438 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01439 "r" (&c->redDither)
01440 : "%eax"
01441 );
01442 return;
01443 case IMGFMT_YUY2:
01444 asm volatile(
01445 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
01446 "movl %4, %%esp \n\t"
01447 YSCALEYUV2PACKED1b(%%eax, %5)
01448 WRITEYUY2(%%esp, 8280(%5), %%eax)
01449 "movl "ESP_OFFSET"(%5), %%esp \n\t"
01450
01451 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
01452 "r" (&c->redDither)
01453 : "%eax"
01454 );
01455 return;
01456 }
01457 }
01458 #endif
01459 if( uvalpha < 2048 )
01460 {
01461 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
01462 }else{
01463 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
01464 }
01465 }
01466
01467
01468
01469 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width)
01470 {
01471 #ifdef HAVE_MMX
01472 asm volatile(
01473 "movq "MANGLE(bm01010101)", %%mm2\n\t"
01474 "movl %0, %%eax \n\t"
01475 "1: \n\t"
01476 "movq (%1, %%eax,2), %%mm0 \n\t"
01477 "movq 8(%1, %%eax,2), %%mm1 \n\t"
01478 "pand %%mm2, %%mm0 \n\t"
01479 "pand %%mm2, %%mm1 \n\t"
01480 "packuswb %%mm1, %%mm0 \n\t"
01481 "movq %%mm0, (%2, %%eax) \n\t"
01482 "addl $8, %%eax \n\t"
01483 " js 1b \n\t"
01484 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
01485 : "%eax"
01486 );
01487 #else
01488 int i;
01489 for(i=0; i<width; i++)
01490 dst[i]= src[2*i];
01491 #endif
01492 }
01493
01494 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
01495 {
01496 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01497 asm volatile(
01498 "movq "MANGLE(bm01010101)", %%mm4\n\t"
01499 "movl %0, %%eax \n\t"
01500 "1: \n\t"
01501 "movq (%1, %%eax,4), %%mm0 \n\t"
01502 "movq 8(%1, %%eax,4), %%mm1 \n\t"
01503 "movq (%2, %%eax,4), %%mm2 \n\t"
01504 "movq 8(%2, %%eax,4), %%mm3 \n\t"
01505 PAVGB(%%mm2, %%mm0)
01506 PAVGB(%%mm3, %%mm1)
01507 "psrlw $8, %%mm0 \n\t"
01508 "psrlw $8, %%mm1 \n\t"
01509 "packuswb %%mm1, %%mm0 \n\t"
01510 "movq %%mm0, %%mm1 \n\t"
01511 "psrlw $8, %%mm0 \n\t"
01512 "pand %%mm4, %%mm1 \n\t"
01513 "packuswb %%mm0, %%mm0 \n\t"
01514 "packuswb %%mm1, %%mm1 \n\t"
01515 "movd %%mm0, (%4, %%eax) \n\t"
01516 "movd %%mm1, (%3, %%eax) \n\t"
01517 "addl $4, %%eax \n\t"
01518 " js 1b \n\t"
01519 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
01520 : "%eax"
01521 );
01522 #else
01523 int i;
01524 for(i=0; i<width; i++)
01525 {
01526 dstU[i]= (src1[4*i + 1] + src2[4*i + 1])>>1;
01527 dstV[i]= (src1[4*i + 3] + src2[4*i + 3])>>1;
01528 }
01529 #endif
01530 }
01531
01532
01533 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, int width)
01534 {
01535 #ifdef HAVE_MMX
01536 asm volatile(
01537 "movl %0, %%eax \n\t"
01538 "1: \n\t"
01539 "movq (%1, %%eax,2), %%mm0 \n\t"
01540 "movq 8(%1, %%eax,2), %%mm1 \n\t"
01541 "psrlw $8, %%mm0 \n\t"
01542 "psrlw $8, %%mm1 \n\t"
01543 "packuswb %%mm1, %%mm0 \n\t"
01544 "movq %%mm0, (%2, %%eax) \n\t"
01545 "addl $8, %%eax \n\t"
01546 " js 1b \n\t"
01547 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
01548 : "%eax"
01549 );
01550 #else
01551 int i;
01552 for(i=0; i<width; i++)
01553 dst[i]= src[2*i+1];
01554 #endif
01555 }
01556
01557 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
01558 {
01559 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01560 asm volatile(
01561 "movq "MANGLE(bm01010101)", %%mm4\n\t"
01562 "movl %0, %%eax \n\t"
01563 "1: \n\t"
01564 "movq (%1, %%eax,4), %%mm0 \n\t"
01565 "movq 8(%1, %%eax,4), %%mm1 \n\t"
01566 "movq (%2, %%eax,4), %%mm2 \n\t"
01567 "movq 8(%2, %%eax,4), %%mm3 \n\t"
01568 PAVGB(%%mm2, %%mm0)
01569 PAVGB(%%mm3, %%mm1)
01570 "pand %%mm4, %%mm0 \n\t"
01571 "pand %%mm4, %%mm1 \n\t"
01572 "packuswb %%mm1, %%mm0 \n\t"
01573 "movq %%mm0, %%mm1 \n\t"
01574 "psrlw $8, %%mm0 \n\t"
01575 "pand %%mm4, %%mm1 \n\t"
01576 "packuswb %%mm0, %%mm0 \n\t"
01577 "packuswb %%mm1, %%mm1 \n\t"
01578 "movd %%mm0, (%4, %%eax) \n\t"
01579 "movd %%mm1, (%3, %%eax) \n\t"
01580 "addl $4, %%eax \n\t"
01581 " js 1b \n\t"
01582 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
01583 : "%eax"
01584 );
01585 #else
01586 int i;
01587 for(i=0; i<width; i++)
01588 {
01589 dstU[i]= (src1[4*i + 0] + src2[4*i + 0])>>1;
01590 dstV[i]= (src1[4*i + 2] + src2[4*i + 2])>>1;
01591 }
01592 #endif
01593 }
01594
01595 static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
01596 {
01597 #ifdef HAVE_MMXFIXME
01598 #else
01599 int i;
01600 for(i=0; i<width; i++)
01601 {
01602 int b= ((uint32_t*)src)[i]&0xFF;
01603 int g= (((uint32_t*)src)[i]>>8)&0xFF;
01604 int r= (((uint32_t*)src)[i]>>16)&0xFF;
01605
01606 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
01607 }
01608 #endif
01609 }
01610
01611 static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
01612 {
01613 #ifdef HAVE_MMXFIXME
01614 #else
01615 int i;
01616 for(i=0; i<width; i++)
01617 {
01618 const int a= ((uint32_t*)src1)[2*i+0];
01619 const int e= ((uint32_t*)src1)[2*i+1];
01620 const int c= ((uint32_t*)src2)[2*i+0];
01621 const int d= ((uint32_t*)src2)[2*i+1];
01622 const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
01623 const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
01624 const int b= l&0x3FF;
01625 const int g= h>>8;
01626 const int r= l>>16;
01627
01628 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
01629 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
01630 }
01631 #endif
01632 }
01633
01634 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width)
01635 {
01636 #ifdef HAVE_MMX
01637 asm volatile(
01638 "movl %2, %%eax \n\t"
01639 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
01640 "movq "MANGLE(w1111)", %%mm5 \n\t"
01641 "pxor %%mm7, %%mm7 \n\t"
01642 "leal (%%eax, %%eax, 2), %%ebx \n\t"
01643 ".balign 16 \n\t"
01644 "1: \n\t"
01645 PREFETCH" 64(%0, %%ebx) \n\t"
01646 "movd (%0, %%ebx), %%mm0 \n\t"
01647 "movd 3(%0, %%ebx), %%mm1 \n\t"
01648 "punpcklbw %%mm7, %%mm0 \n\t"
01649 "punpcklbw %%mm7, %%mm1 \n\t"
01650 "movd 6(%0, %%ebx), %%mm2 \n\t"
01651 "movd 9(%0, %%ebx), %%mm3 \n\t"
01652 "punpcklbw %%mm7, %%mm2 \n\t"
01653 "punpcklbw %%mm7, %%mm3 \n\t"
01654 "pmaddwd %%mm6, %%mm0 \n\t"
01655 "pmaddwd %%mm6, %%mm1 \n\t"
01656 "pmaddwd %%mm6, %%mm2 \n\t"
01657 "pmaddwd %%mm6, %%mm3 \n\t"
01658 #ifndef FAST_BGR2YV12
01659 "psrad $8, %%mm0 \n\t"
01660 "psrad $8, %%mm1 \n\t"
01661 "psrad $8, %%mm2 \n\t"
01662 "psrad $8, %%mm3 \n\t"
01663 #endif
01664 "packssdw %%mm1, %%mm0 \n\t"
01665 "packssdw %%mm3, %%mm2 \n\t"
01666 "pmaddwd %%mm5, %%mm0 \n\t"
01667 "pmaddwd %%mm5, %%mm2 \n\t"
01668 "packssdw %%mm2, %%mm0 \n\t"
01669 "psraw $7, %%mm0 \n\t"
01670
01671 "movd 12(%0, %%ebx), %%mm4 \n\t"
01672 "movd 15(%0, %%ebx), %%mm1 \n\t"
01673 "punpcklbw %%mm7, %%mm4 \n\t"
01674 "punpcklbw %%mm7, %%mm1 \n\t"
01675 "movd 18(%0, %%ebx), %%mm2 \n\t"
01676 "movd 21(%0, %%ebx), %%mm3 \n\t"
01677 "punpcklbw %%mm7, %%mm2 \n\t"
01678 "punpcklbw %%mm7, %%mm3 \n\t"
01679 "pmaddwd %%mm6, %%mm4 \n\t"
01680 "pmaddwd %%mm6, %%mm1 \n\t"
01681 "pmaddwd %%mm6, %%mm2 \n\t"
01682 "pmaddwd %%mm6, %%mm3 \n\t"
01683 #ifndef FAST_BGR2YV12
01684 "psrad $8, %%mm4 \n\t"
01685 "psrad $8, %%mm1 \n\t"
01686 "psrad $8, %%mm2 \n\t"
01687 "psrad $8, %%mm3 \n\t"
01688 #endif
01689 "packssdw %%mm1, %%mm4 \n\t"
01690 "packssdw %%mm3, %%mm2 \n\t"
01691 "pmaddwd %%mm5, %%mm4 \n\t"
01692 "pmaddwd %%mm5, %%mm2 \n\t"
01693 "addl $24, %%ebx \n\t"
01694 "packssdw %%mm2, %%mm4 \n\t"
01695 "psraw $7, %%mm4 \n\t"
01696
01697 "packuswb %%mm4, %%mm0 \n\t"
01698 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t"
01699
01700 "movq %%mm0, (%1, %%eax) \n\t"
01701 "addl $8, %%eax \n\t"
01702 " js 1b \n\t"
01703 : : "r" (src+width*3), "r" (dst+width), "g" (-width)
01704 : "%eax", "%ebx"
01705 );
01706 #else
01707 int i;
01708 for(i=0; i<width; i++)
01709 {
01710 int b= src[i*3+0];
01711 int g= src[i*3+1];
01712 int r= src[i*3+2];
01713
01714 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
01715 }
01716 #endif
01717 }
01718
01719 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
01720 {
01721 #ifdef HAVE_MMX
01722 asm volatile(
01723 "movl %4, %%eax \n\t"
01724 "movq "MANGLE(w1111)", %%mm5 \n\t"
01725 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
01726 "pxor %%mm7, %%mm7 \n\t"
01727 "leal (%%eax, %%eax, 2), %%ebx \n\t"
01728 "addl %%ebx, %%ebx \n\t"
01729 ".balign 16 \n\t"
01730 "1: \n\t"
01731 PREFETCH" 64(%0, %%ebx) \n\t"
01732 PREFETCH" 64(%1, %%ebx) \n\t"
01733 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01734 "movq (%0, %%ebx), %%mm0 \n\t"
01735 "movq (%1, %%ebx), %%mm1 \n\t"
01736 "movq 6(%0, %%ebx), %%mm2 \n\t"
01737 "movq 6(%1, %%ebx), %%mm3 \n\t"
01738 PAVGB(%%mm1, %%mm0)
01739 PAVGB(%%mm3, %%mm2)
01740 "movq %%mm0, %%mm1 \n\t"
01741 "movq %%mm2, %%mm3 \n\t"
01742 "psrlq $24, %%mm0 \n\t"
01743 "psrlq $24, %%mm2 \n\t"
01744 PAVGB(%%mm1, %%mm0)
01745 PAVGB(%%mm3, %%mm2)
01746 "punpcklbw %%mm7, %%mm0 \n\t"
01747 "punpcklbw %%mm7, %%mm2 \n\t"
01748 #else
01749 "movd (%0, %%ebx), %%mm0 \n\t"
01750 "movd (%1, %%ebx), %%mm1 \n\t"
01751 "movd 3(%0, %%ebx), %%mm2 \n\t"
01752 "movd 3(%1, %%ebx), %%mm3 \n\t"
01753 "punpcklbw %%mm7, %%mm0 \n\t"
01754 "punpcklbw %%mm7, %%mm1 \n\t"
01755 "punpcklbw %%mm7, %%mm2 \n\t"
01756 "punpcklbw %%mm7, %%mm3 \n\t"
01757 "paddw %%mm1, %%mm0 \n\t"
01758 "paddw %%mm3, %%mm2 \n\t"
01759 "paddw %%mm2, %%mm0 \n\t"
01760 "movd 6(%0, %%ebx), %%mm4 \n\t"
01761 "movd 6(%1, %%ebx), %%mm1 \n\t"
01762 "movd 9(%0, %%ebx), %%mm2 \n\t"
01763 "movd 9(%1, %%ebx), %%mm3 \n\t"
01764 "punpcklbw %%mm7, %%mm4 \n\t"
01765 "punpcklbw %%mm7, %%mm1 \n\t"
01766 "punpcklbw %%mm7, %%mm2 \n\t"
01767 "punpcklbw %%mm7, %%mm3 \n\t"
01768 "paddw %%mm1, %%mm4 \n\t"
01769 "paddw %%mm3, %%mm2 \n\t"
01770 "paddw %%mm4, %%mm2 \n\t"
01771 "psrlw $2, %%mm0 \n\t"
01772 "psrlw $2, %%mm2 \n\t"
01773 #endif
01774 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
01775 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
01776
01777 "pmaddwd %%mm0, %%mm1 \n\t"
01778 "pmaddwd %%mm2, %%mm3 \n\t"
01779 "pmaddwd %%mm6, %%mm0 \n\t"
01780 "pmaddwd %%mm6, %%mm2 \n\t"
01781 #ifndef FAST_BGR2YV12
01782 "psrad $8, %%mm0 \n\t"
01783 "psrad $8, %%mm1 \n\t"
01784 "psrad $8, %%mm2 \n\t"
01785 "psrad $8, %%mm3 \n\t"
01786 #endif
01787 "packssdw %%mm2, %%mm0 \n\t"
01788 "packssdw %%mm3, %%mm1 \n\t"
01789 "pmaddwd %%mm5, %%mm0 \n\t"
01790 "pmaddwd %%mm5, %%mm1 \n\t"
01791 "packssdw %%mm1, %%mm0 \n\t"
01792 "psraw $7, %%mm0 \n\t"
01793
01794 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01795 "movq 12(%0, %%ebx), %%mm4 \n\t"
01796 "movq 12(%1, %%ebx), %%mm1 \n\t"
01797 "movq 18(%0, %%ebx), %%mm2 \n\t"
01798 "movq 18(%1, %%ebx), %%mm3 \n\t"
01799 PAVGB(%%mm1, %%mm4)
01800 PAVGB(%%mm3, %%mm2)
01801 "movq %%mm4, %%mm1 \n\t"
01802 "movq %%mm2, %%mm3 \n\t"
01803 "psrlq $24, %%mm4 \n\t"
01804 "psrlq $24, %%mm2 \n\t"
01805 PAVGB(%%mm1, %%mm4)
01806 PAVGB(%%mm3, %%mm2)
01807 "punpcklbw %%mm7, %%mm4 \n\t"
01808 "punpcklbw %%mm7, %%mm2 \n\t"
01809 #else
01810 "movd 12(%0, %%ebx), %%mm4 \n\t"
01811 "movd 12(%1, %%ebx), %%mm1 \n\t"
01812 "movd 15(%0, %%ebx), %%mm2 \n\t"
01813 "movd 15(%1, %%ebx), %%mm3 \n\t"
01814 "punpcklbw %%mm7, %%mm4 \n\t"
01815 "punpcklbw %%mm7, %%mm1 \n\t"
01816 "punpcklbw %%mm7, %%mm2 \n\t"
01817 "punpcklbw %%mm7, %%mm3 \n\t"
01818 "paddw %%mm1, %%mm4 \n\t"
01819 "paddw %%mm3, %%mm2 \n\t"
01820 "paddw %%mm2, %%mm4 \n\t"
01821 "movd 18(%0, %%ebx), %%mm5 \n\t"
01822 "movd 18(%1, %%ebx), %%mm1 \n\t"
01823 "movd 21(%0, %%ebx), %%mm2 \n\t"
01824 "movd 21(%1, %%ebx), %%mm3 \n\t"
01825 "punpcklbw %%mm7, %%mm5 \n\t"
01826 "punpcklbw %%mm7, %%mm1 \n\t"
01827 "punpcklbw %%mm7, %%mm2 \n\t"
01828 "punpcklbw %%mm7, %%mm3 \n\t"
01829 "paddw %%mm1, %%mm5 \n\t"
01830 "paddw %%mm3, %%mm2 \n\t"
01831 "paddw %%mm5, %%mm2 \n\t"
01832 "movq "MANGLE(w1111)", %%mm5 \n\t"
01833 "psrlw $2, %%mm4 \n\t"
01834 "psrlw $2, %%mm2 \n\t"
01835 #endif
01836 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
01837 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
01838
01839 "pmaddwd %%mm4, %%mm1 \n\t"
01840 "pmaddwd %%mm2, %%mm3 \n\t"
01841 "pmaddwd %%mm6, %%mm4 \n\t"
01842 "pmaddwd %%mm6, %%mm2 \n\t"
01843 #ifndef FAST_BGR2YV12
01844 "psrad $8, %%mm4 \n\t"
01845 "psrad $8, %%mm1 \n\t"
01846 "psrad $8, %%mm2 \n\t"
01847 "psrad $8, %%mm3 \n\t"
01848 #endif
01849 "packssdw %%mm2, %%mm4 \n\t"
01850 "packssdw %%mm3, %%mm1 \n\t"
01851 "pmaddwd %%mm5, %%mm4 \n\t"
01852 "pmaddwd %%mm5, %%mm1 \n\t"
01853 "addl $24, %%ebx \n\t"
01854 "packssdw %%mm1, %%mm4 \n\t"
01855 "psraw $7, %%mm4 \n\t"
01856
01857 "movq %%mm0, %%mm1 \n\t"
01858 "punpckldq %%mm4, %%mm0 \n\t"
01859 "punpckhdq %%mm4, %%mm1 \n\t"
01860 "packsswb %%mm1, %%mm0 \n\t"
01861 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t"
01862
01863 "movd %%mm0, (%2, %%eax) \n\t"
01864 "punpckhdq %%mm0, %%mm0 \n\t"
01865 "movd %%mm0, (%3, %%eax) \n\t"
01866 "addl $4, %%eax \n\t"
01867 " js 1b \n\t"
01868 : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
01869 : "%eax", "%ebx"
01870 );
01871 #else
01872 int i;
01873 for(i=0; i<width; i++)
01874 {
01875 int b= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3];
01876 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4];
01877 int r= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5];
01878
01879 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
01880 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
01881 }
01882 #endif
01883 }
01884
01885 static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width)
01886 {
01887 int i;
01888 for(i=0; i<width; i++)
01889 {
01890 int d= ((uint16_t*)src)[i];
01891 int b= d&0x1F;
01892 int g= (d>>5)&0x3F;
01893 int r= (d>>11)&0x1F;
01894
01895 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
01896 }
01897 }
01898
01899 static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
01900 {
01901 int i;
01902 for(i=0; i<width; i++)
01903 {
01904 int d0= ((uint32_t*)src1)[i];
01905 int d1= ((uint32_t*)src2)[i];
01906
01907 int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F);
01908 int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F);
01909
01910 int dh2= (dh>>11) + (dh<<21);
01911 int d= dh2 + dl;
01912
01913 int b= d&0x7F;
01914 int r= (d>>11)&0x7F;
01915 int g= d>>21;
01916 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
01917 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
01918 }
01919 }
01920
01921 static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width)
01922 {
01923 int i;
01924 for(i=0; i<width; i++)
01925 {
01926 int d= ((uint16_t*)src)[i];
01927 int b= d&0x1F;
01928 int g= (d>>5)&0x1F;
01929 int r= (d>>10)&0x1F;
01930
01931 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
01932 }
01933 }
01934
01935 static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
01936 {
01937 int i;
01938 for(i=0; i<width; i++)
01939 {
01940 int d0= ((uint32_t*)src1)[i];
01941 int d1= ((uint32_t*)src2)[i];
01942
01943 int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F);
01944 int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F);
01945
01946 int dh2= (dh>>11) + (dh<<21);
01947 int d= dh2 + dl;
01948
01949 int b= d&0x7F;
01950 int r= (d>>10)&0x7F;
01951 int g= d>>21;
01952 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
01953 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
01954 }
01955 }
01956
01957
01958 static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width)
01959 {
01960 int i;
01961 for(i=0; i<width; i++)
01962 {
01963 int r= ((uint32_t*)src)[i]&0xFF;
01964 int g= (((uint32_t*)src)[i]>>8)&0xFF;
01965 int b= (((uint32_t*)src)[i]>>16)&0xFF;
01966
01967 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
01968 }
01969 }
01970
01971 static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
01972 {
01973 int i;
01974 for(i=0; i<width; i++)
01975 {
01976 const int a= ((uint32_t*)src1)[2*i+0];
01977 const int e= ((uint32_t*)src1)[2*i+1];
01978 const int c= ((uint32_t*)src2)[2*i+0];
01979 const int d= ((uint32_t*)src2)[2*i+1];
01980 const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
01981 const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
01982 const int r= l&0x3FF;
01983 const int g= h>>8;
01984 const int b= l>>16;
01985
01986 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
01987 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
01988 }
01989 }
01990
01991 static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width)
01992 {
01993 int i;
01994 for(i=0; i<width; i++)
01995 {
01996 int r= src[i*3+0];
01997 int g= src[i*3+1];
01998 int b= src[i*3+2];
01999
02000 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
02001 }
02002 }
02003
02004 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
02005 {
02006 int i;
02007 for(i=0; i<width; i++)
02008 {
02009 int r= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3];
02010 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4];
02011 int b= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5];
02012
02013 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
02014 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
02015 }
02016 }
02017
02018
02019
02020 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
02021 int16_t *filter, int16_t *filterPos, int filterSize)
02022 {
02023 #ifdef HAVE_MMX
02024 assert(filterSize % 4 == 0 && filterSize>0);
02025 if(filterSize==4)
02026 {
02027 int counter= -2*dstW;
02028 filter-= counter*2;
02029 filterPos-= counter/2;
02030 dst-= counter/2;
02031 asm volatile(
02032 "pxor %%mm7, %%mm7 \n\t"
02033 "movq "MANGLE(w02)", %%mm6 \n\t"
02034 "pushl %%ebp \n\t"
02035 "movl %%eax, %%ebp \n\t"
02036 ".balign 16 \n\t"
02037 "1: \n\t"
02038 "movzwl (%2, %%ebp), %%eax \n\t"
02039 "movzwl 2(%2, %%ebp), %%ebx \n\t"
02040 "movq (%1, %%ebp, 4), %%mm1 \n\t"
02041 "movq 8(%1, %%ebp, 4), %%mm3 \n\t"
02042 "movd (%3, %%eax), %%mm0 \n\t"
02043 "movd (%3, %%ebx), %%mm2 \n\t"
02044 "punpcklbw %%mm7, %%mm0 \n\t"
02045 "punpcklbw %%mm7, %%mm2 \n\t"
02046 "pmaddwd %%mm1, %%mm0 \n\t"
02047 "pmaddwd %%mm2, %%mm3 \n\t"
02048 "psrad $8, %%mm0 \n\t"
02049 "psrad $8, %%mm3 \n\t"
02050 "packssdw %%mm3, %%mm0 \n\t"
02051 "pmaddwd %%mm6, %%mm0 \n\t"
02052 "packssdw %%mm0, %%mm0 \n\t"
02053 "movd %%mm0, (%4, %%ebp) \n\t"
02054 "addl $4, %%ebp \n\t"
02055 " jnc 1b \n\t"
02056
02057 "popl %%ebp \n\t"
02058 : "+a" (counter)
02059 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
02060 : "%ebx"
02061 );
02062 }
02063 else if(filterSize==8)
02064 {
02065 int counter= -2*dstW;
02066 filter-= counter*4;
02067 filterPos-= counter/2;
02068 dst-= counter/2;
02069 asm volatile(
02070 "pxor %%mm7, %%mm7 \n\t"
02071 "movq "MANGLE(w02)", %%mm6 \n\t"
02072 "pushl %%ebp \n\t"
02073 "movl %%eax, %%ebp \n\t"
02074 ".balign 16 \n\t"
02075 "1: \n\t"
02076 "movzwl (%2, %%ebp), %%eax \n\t"
02077 "movzwl 2(%2, %%ebp), %%ebx \n\t"
02078 "movq (%1, %%ebp, 8), %%mm1 \n\t"
02079 "movq 16(%1, %%ebp, 8), %%mm3 \n\t"
02080 "movd (%3, %%eax), %%mm0 \n\t"
02081 "movd (%3, %%ebx), %%mm2 \n\t"
02082 "punpcklbw %%mm7, %%mm0 \n\t"
02083 "punpcklbw %%mm7, %%mm2 \n\t"
02084 "pmaddwd %%mm1, %%mm0 \n\t"
02085 "pmaddwd %%mm2, %%mm3 \n\t"
02086
02087 "movq 8(%1, %%ebp, 8), %%mm1 \n\t"
02088 "movq 24(%1, %%ebp, 8), %%mm5 \n\t"
02089 "movd 4(%3, %%eax), %%mm4 \n\t"
02090 "movd 4(%3, %%ebx), %%mm2 \n\t"
02091 "punpcklbw %%mm7, %%mm4 \n\t"
02092 "punpcklbw %%mm7, %%mm2 \n\t"
02093 "pmaddwd %%mm1, %%mm4 \n\t"
02094 "pmaddwd %%mm2, %%mm5 \n\t"
02095 "paddd %%mm4, %%mm0 \n\t"
02096 "paddd %%mm5, %%mm3 \n\t"
02097
02098 "psrad $8, %%mm0 \n\t"
02099 "psrad $8, %%mm3 \n\t"
02100 "packssdw %%mm3, %%mm0 \n\t"
02101 "pmaddwd %%mm6, %%mm0 \n\t"
02102 "packssdw %%mm0, %%mm0 \n\t"
02103 "movd %%mm0, (%4, %%ebp) \n\t"
02104 "addl $4, %%ebp \n\t"
02105 " jnc 1b \n\t"
02106
02107 "popl %%ebp \n\t"
02108 : "+a" (counter)
02109 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
02110 : "%ebx"
02111 );
02112 }
02113 else
02114 {
02115 int counter= -2*dstW;
02116
02117 filterPos-= counter/2;
02118 dst-= counter/2;
02119 asm volatile(
02120 "pxor %%mm7, %%mm7 \n\t"
02121 "movq "MANGLE(w02)", %%mm6 \n\t"
02122 ".balign 16 \n\t"
02123 "1: \n\t"
02124 "movl %2, %%ecx \n\t"
02125 "movzwl (%%ecx, %0), %%eax \n\t"
02126 "movzwl 2(%%ecx, %0), %%ebx \n\t"
02127 "movl %5, %%ecx \n\t"
02128 "pxor %%mm4, %%mm4 \n\t"
02129 "pxor %%mm5, %%mm5 \n\t"
02130 "2: \n\t"
02131 "movq (%1), %%mm1 \n\t"
02132 "movq (%1, %6), %%mm3 \n\t"
02133 "movd (%%ecx, %%eax), %%mm0 \n\t"
02134 "movd (%%ecx, %%ebx), %%mm2 \n\t"
02135 "punpcklbw %%mm7, %%mm0 \n\t"
02136 "punpcklbw %%mm7, %%mm2 \n\t"
02137 "pmaddwd %%mm1, %%mm0 \n\t"
02138 "pmaddwd %%mm2, %%mm3 \n\t"
02139 "paddd %%mm3, %%mm5 \n\t"
02140 "paddd %%mm0, %%mm4 \n\t"
02141 "addl $8, %1 \n\t"
02142 "addl $4, %%ecx \n\t"
02143 "cmpl %4, %%ecx \n\t"
02144 " jb 2b \n\t"
02145 "addl %6, %1 \n\t"
02146 "psrad $8, %%mm4 \n\t"
02147 "psrad $8, %%mm5 \n\t"
02148 "packssdw %%mm5, %%mm4 \n\t"
02149 "pmaddwd %%mm6, %%mm4 \n\t"
02150 "packssdw %%mm4, %%mm4 \n\t"
02151 "movl %3, %%eax \n\t"
02152 "movd %%mm4, (%%eax, %0) \n\t"
02153 "addl $4, %0 \n\t"
02154 " jnc 1b \n\t"
02155
02156 : "+r" (counter), "+r" (filter)
02157 : "m" (filterPos), "m" (dst), "m"(src+filterSize),
02158 "m" (src), "r" (filterSize*2)
02159 : "%ebx", "%eax", "%ecx"
02160 );
02161 }
02162 #else
02163 #ifdef HAVE_ALTIVEC
02164 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
02165 #else
02166 int i;
02167 for(i=0; i<dstW; i++)
02168 {
02169 int j;
02170 int srcPos= filterPos[i];
02171 int val=0;
02172
02173 for(j=0; j<filterSize; j++)
02174 {
02175
02176 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
02177 }
02178
02179 dst[i] = MIN(MAX(0, val>>7), (1<<15)-1);
02180
02181 }
02182 #endif
02183 #endif
02184 }
02185
02186 static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc,
02187 int flags, int canMMX2BeUsed, int16_t *hLumFilter,
02188 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
02189 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
02190 int32_t *mmx2FilterPos)
02191 {
02192 if(srcFormat==IMGFMT_YUY2)
02193 {
02194 RENAME(yuy2ToY)(formatConvBuffer, src, srcW);
02195 src= formatConvBuffer;
02196 }
02197 else if(srcFormat==IMGFMT_UYVY)
02198 {
02199 RENAME(uyvyToY)(formatConvBuffer, src, srcW);
02200 src= formatConvBuffer;
02201 }
02202 else if(srcFormat==IMGFMT_BGR32)
02203 {
02204 RENAME(bgr32ToY)(formatConvBuffer, src, srcW);
02205 src= formatConvBuffer;
02206 }
02207 else if(srcFormat==IMGFMT_BGR24)
02208 {
02209 RENAME(bgr24ToY)(formatConvBuffer, src, srcW);
02210 src= formatConvBuffer;
02211 }
02212 else if(srcFormat==IMGFMT_BGR16)
02213 {
02214 RENAME(bgr16ToY)(formatConvBuffer, src, srcW);
02215 src= formatConvBuffer;
02216 }
02217 else if(srcFormat==IMGFMT_BGR15)
02218 {
02219 RENAME(bgr15ToY)(formatConvBuffer, src, srcW);
02220 src= formatConvBuffer;
02221 }
02222 else if(srcFormat==IMGFMT_RGB32)
02223 {
02224 RENAME(rgb32ToY)(formatConvBuffer, src, srcW);
02225 src= formatConvBuffer;
02226 }
02227 else if(srcFormat==IMGFMT_RGB24)
02228 {
02229 RENAME(rgb24ToY)(formatConvBuffer, src, srcW);
02230 src= formatConvBuffer;
02231 }
02232
02233 #ifdef HAVE_MMX
02234
02235 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
02236 #else
02237 if(!(flags&SWS_FAST_BILINEAR))
02238 #endif
02239 {
02240 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
02241 }
02242 else
02243 {
02244 #ifdef ARCH_X86
02245 #ifdef HAVE_MMX2
02246 int i;
02247 if(canMMX2BeUsed)
02248 {
02249 asm volatile(
02250 "pxor %%mm7, %%mm7 \n\t"
02251 "movl %0, %%ecx \n\t"
02252 "movl %1, %%edi \n\t"
02253 "movl %2, %%edx \n\t"
02254 "movl %3, %%ebx \n\t"
02255 "xorl %%eax, %%eax \n\t"
02256 PREFETCH" (%%ecx) \n\t"
02257 PREFETCH" 32(%%ecx) \n\t"
02258 PREFETCH" 64(%%ecx) \n\t"
02259
02260 #define FUNNY_Y_CODE \
02261 "movl (%%ebx), %%esi \n\t"\
02262 "call *%4 \n\t"\
02263 "addl (%%ebx, %%eax), %%ecx \n\t"\
02264 "addl %%eax, %%edi \n\t"\
02265 "xorl %%eax, %%eax \n\t"\
02266
02267 FUNNY_Y_CODE
02268 FUNNY_Y_CODE
02269 FUNNY_Y_CODE
02270 FUNNY_Y_CODE
02271 FUNNY_Y_CODE
02272 FUNNY_Y_CODE
02273 FUNNY_Y_CODE
02274 FUNNY_Y_CODE
02275
02276 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
02277 "m" (funnyYCode)
02278 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
02279 );
02280 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
02281 }
02282 else
02283 {
02284 #endif
02285
02286 asm volatile(
02287 "xorl %%eax, %%eax \n\t"
02288 "xorl %%ebx, %%ebx \n\t"
02289 "xorl %%ecx, %%ecx \n\t"
02290 ".balign 16 \n\t"
02291 "1: \n\t"
02292 "movzbl (%0, %%ebx), %%edi \n\t"
02293 "movzbl 1(%0, %%ebx), %%esi \n\t"
02294 "subl %%edi, %%esi \n\t"
02295 "imull %%ecx, %%esi \n\t"
02296 "shll $16, %%edi \n\t"
02297 "addl %%edi, %%esi \n\t"
02298 "movl %1, %%edi \n\t"
02299 "shrl $9, %%esi \n\t"
02300 "movw %%si, (%%edi, %%eax, 2) \n\t"
02301 "addw %4, %%cx \n\t"
02302 "adcl %3, %%ebx \n\t"
02303
02304 "movzbl (%0, %%ebx), %%edi \n\t"
02305 "movzbl 1(%0, %%ebx), %%esi \n\t"
02306 "subl %%edi, %%esi \n\t"
02307 "imull %%ecx, %%esi \n\t"
02308 "shll $16, %%edi \n\t"
02309 "addl %%edi, %%esi \n\t"
02310 "movl %1, %%edi \n\t"
02311 "shrl $9, %%esi \n\t"
02312 "movw %%si, 2(%%edi, %%eax, 2) \n\t"
02313 "addw %4, %%cx \n\t"
02314 "adcl %3, %%ebx \n\t"
02315
02316
02317 "addl $2, %%eax \n\t"
02318 "cmpl %2, %%eax \n\t"
02319 " jb 1b \n\t"
02320
02321
02322 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF)
02323 : "%eax", "%ebx", "%ecx", "%edi", "%esi"
02324 );
02325 #ifdef HAVE_MMX2
02326 }
02327 #endif
02328 #else
02329 int i;
02330 unsigned int xpos=0;
02331 for(i=0;i<dstWidth;i++)
02332 {
02333 register unsigned int xx=xpos>>16;
02334 register unsigned int xalpha=(xpos&0xFFFF)>>9;
02335 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
02336 xpos+=xInc;
02337 }
02338 #endif
02339 }
02340 }
02341
02342 inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2,
02343 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
02344 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
02345 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
02346 int32_t *mmx2FilterPos)
02347 {
02348 if(srcFormat==IMGFMT_YUY2)
02349 {
02350 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02351 src1= formatConvBuffer;
02352 src2= formatConvBuffer+2048;
02353 }
02354 else if(srcFormat==IMGFMT_UYVY)
02355 {
02356 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02357 src1= formatConvBuffer;
02358 src2= formatConvBuffer+2048;
02359 }
02360 else if(srcFormat==IMGFMT_BGR32)
02361 {
02362 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02363 src1= formatConvBuffer;
02364 src2= formatConvBuffer+2048;
02365 }
02366 else if(srcFormat==IMGFMT_BGR24)
02367 {
02368 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02369 src1= formatConvBuffer;
02370 src2= formatConvBuffer+2048;
02371 }
02372 else if(srcFormat==IMGFMT_BGR16)
02373 {
02374 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02375 src1= formatConvBuffer;
02376 src2= formatConvBuffer+2048;
02377 }
02378 else if(srcFormat==IMGFMT_BGR15)
02379 {
02380 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02381 src1= formatConvBuffer;
02382 src2= formatConvBuffer+2048;
02383 }
02384 else if(srcFormat==IMGFMT_RGB32)
02385 {
02386 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02387 src1= formatConvBuffer;
02388 src2= formatConvBuffer+2048;
02389 }
02390 else if(srcFormat==IMGFMT_RGB24)
02391 {
02392 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
02393 src1= formatConvBuffer;
02394 src2= formatConvBuffer+2048;
02395 }
02396 else if(isGray(srcFormat))
02397 {
02398 return;
02399 }
02400
02401 #ifdef HAVE_MMX
02402
02403 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
02404 #else
02405 if(!(flags&SWS_FAST_BILINEAR))
02406 #endif
02407 {
02408 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
02409 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
02410 }
02411 else
02412 {
02413 #ifdef ARCH_X86
02414 #ifdef HAVE_MMX2
02415 int i;
02416 if(canMMX2BeUsed)
02417 {
02418 asm volatile(
02419 "pxor %%mm7, %%mm7 \n\t"
02420 "movl %0, %%ecx \n\t"
02421 "movl %1, %%edi \n\t"
02422 "movl %2, %%edx \n\t"
02423 "movl %3, %%ebx \n\t"
02424 "xorl %%eax, %%eax \n\t"
02425 PREFETCH" (%%ecx) \n\t"
02426 PREFETCH" 32(%%ecx) \n\t"
02427 PREFETCH" 64(%%ecx) \n\t"
02428
02429 #define FUNNY_UV_CODE \
02430 "movl (%%ebx), %%esi \n\t"\
02431 "call *%4 \n\t"\
02432 "addl (%%ebx, %%eax), %%ecx \n\t"\
02433 "addl %%eax, %%edi \n\t"\
02434 "xorl %%eax, %%eax \n\t"\
02435
02436 FUNNY_UV_CODE
02437 FUNNY_UV_CODE
02438 FUNNY_UV_CODE
02439 FUNNY_UV_CODE
02440 "xorl %%eax, %%eax \n\t"
02441 "movl %5, %%ecx \n\t"
02442 "movl %1, %%edi \n\t"
02443 "addl $4096, %%edi \n\t"
02444 PREFETCH" (%%ecx) \n\t"
02445 PREFETCH" 32(%%ecx) \n\t"
02446 PREFETCH" 64(%%ecx) \n\t"
02447
02448 FUNNY_UV_CODE
02449 FUNNY_UV_CODE
02450 FUNNY_UV_CODE
02451 FUNNY_UV_CODE
02452
02453 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
02454 "m" (funnyUVCode), "m" (src2)
02455 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
02456 );
02457 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
02458 {
02459
02460 dst[i] = src1[srcW-1]*128;
02461 dst[i+2048] = src2[srcW-1]*128;
02462 }
02463 }
02464 else
02465 {
02466 #endif
02467 asm volatile(
02468 "xorl %%eax, %%eax \n\t"
02469 "xorl %%ebx, %%ebx \n\t"
02470 "xorl %%ecx, %%ecx \n\t"
02471 ".balign 16 \n\t"
02472 "1: \n\t"
02473 "movl %0, %%esi \n\t"
02474 "movzbl (%%esi, %%ebx), %%edi \n\t"
02475 "movzbl 1(%%esi, %%ebx), %%esi \n\t"
02476 "subl %%edi, %%esi \n\t"
02477 "imull %%ecx, %%esi \n\t"
02478 "shll $16, %%edi \n\t"
02479 "addl %%edi, %%esi \n\t"
02480 "movl %1, %%edi \n\t"
02481 "shrl $9, %%esi \n\t"
02482 "movw %%si, (%%edi, %%eax, 2) \n\t"
02483
02484 "movzbl (%5, %%ebx), %%edi \n\t"
02485 "movzbl 1(%5, %%ebx), %%esi \n\t"
02486 "subl %%edi, %%esi \n\t"
02487 "imull %%ecx, %%esi \n\t"
02488 "shll $16, %%edi \n\t"
02489 "addl %%edi, %%esi \n\t"
02490 "movl %1, %%edi \n\t"
02491 "shrl $9, %%esi \n\t"
02492 "movw %%si, 4096(%%edi, %%eax, 2)\n\t"
02493
02494 "addw %4, %%cx \n\t"
02495 "adcl %3, %%ebx \n\t"
02496 "addl $1, %%eax \n\t"
02497 "cmpl %2, %%eax \n\t"
02498 " jb 1b \n\t"
02499
02500 :: "m" (src1), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF),
02501 "r" (src2)
02502 : "%eax", "%ebx", "%ecx", "%edi", "%esi"
02503 );
02504 #ifdef HAVE_MMX2
02505 }
02506 #endif
02507 #else
02508 int i;
02509 unsigned int xpos=0;
02510 for(i=0;i<dstWidth;i++)
02511 {
02512 register unsigned int xx=xpos>>16;
02513 register unsigned int xalpha=(xpos&0xFFFF)>>9;
02514 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
02515 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
02516
02517
02518
02519
02520 xpos+=xInc;
02521 }
02522 #endif
02523 }
02524 }
02525
02526 static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
02527 int srcSliceH, uint8_t* dst[], int dstStride[]){
02528
02529
02530 const int srcW= c->srcW;
02531 const int dstW= c->dstW;
02532 const int dstH= c->dstH;
02533 const int chrDstW= c->chrDstW;
02534 const int chrSrcW= c->chrSrcW;
02535 const int lumXInc= c->lumXInc;
02536 const int chrXInc= c->chrXInc;
02537 const int dstFormat= c->dstFormat;
02538 const int srcFormat= c->srcFormat;
02539 const int flags= c->flags;
02540 const int canMMX2BeUsed= c->canMMX2BeUsed;
02541 int16_t *vLumFilterPos= c->vLumFilterPos;
02542 int16_t *vChrFilterPos= c->vChrFilterPos;
02543 int16_t *hLumFilterPos= c->hLumFilterPos;
02544 int16_t *hChrFilterPos= c->hChrFilterPos;
02545 int16_t *vLumFilter= c->vLumFilter;
02546 int16_t *vChrFilter= c->vChrFilter;
02547 int16_t *hLumFilter= c->hLumFilter;
02548 int16_t *hChrFilter= c->hChrFilter;
02549 int32_t *lumMmxFilter= c->lumMmxFilter;
02550 int32_t *chrMmxFilter= c->chrMmxFilter;
02551 const int vLumFilterSize= c->vLumFilterSize;
02552 const int vChrFilterSize= c->vChrFilterSize;
02553 const int hLumFilterSize= c->hLumFilterSize;
02554 const int hChrFilterSize= c->hChrFilterSize;
02555 int16_t **lumPixBuf= c->lumPixBuf;
02556 int16_t **chrPixBuf= c->chrPixBuf;
02557 const int vLumBufSize= c->vLumBufSize;
02558 const int vChrBufSize= c->vChrBufSize;
02559 uint8_t *funnyYCode= c->funnyYCode;
02560 uint8_t *funnyUVCode= c->funnyUVCode;
02561 uint8_t *formatConvBuffer= c->formatConvBuffer;
02562 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
02563 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
02564 int lastDstY;
02565
02566
02567 int dstY= c->dstY;
02568 int lumBufIndex= c->lumBufIndex;
02569 int chrBufIndex= c->chrBufIndex;
02570 int lastInLumBuf= c->lastInLumBuf;
02571 int lastInChrBuf= c->lastInChrBuf;
02572
02573 if(isPacked(c->srcFormat)){
02574 src[0]=
02575 src[1]=
02576 src[2]= src[0];
02577 srcStride[0]=
02578 srcStride[1]=
02579 srcStride[2]= srcStride[0];
02580 }
02581 srcStride[1]<<= c->vChrDrop;
02582 srcStride[2]<<= c->vChrDrop;
02583
02584
02585
02586
02587 #if 0 //self test FIXME move to a vfilter or something
02588 {
02589 static volatile int i=0;
02590 i++;
02591 if(srcFormat==IMGFMT_YV12 && i==1 && srcSliceH>= c->srcH)
02592 selfTest(src, srcStride, c->srcW, c->srcH);
02593 i--;
02594 }
02595 #endif
02596
02597
02598
02599
02600 if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
02601 {
02602 static int firstTime=1;
02603 if(flags & SWS_PRINT_INFO && firstTime)
02604 {
02605 MSG_WARN("SwScaler: Warning: dstStride is not aligned!\n"
02606 "SwScaler: ->cannot do aligned memory acesses anymore\n");
02607 firstTime=0;
02608 }
02609 }
02610
02611
02612
02613 if(srcSliceY ==0){
02614 lumBufIndex=0;
02615 chrBufIndex=0;
02616 dstY=0;
02617 lastInLumBuf= -1;
02618 lastInChrBuf= -1;
02619 }
02620
02621 lastDstY= dstY;
02622
02623 for(;dstY < dstH; dstY++){
02624 unsigned char *dest =dst[0]+dstStride[0]*dstY;
02625 const int chrDstY= dstY>>c->chrDstVSubSample;
02626 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
02627 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
02628
02629 const int firstLumSrcY= vLumFilterPos[dstY];
02630 const int firstChrSrcY= vChrFilterPos[chrDstY];
02631 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1;
02632 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1;
02633
02634
02635
02636
02637 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
02638 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
02639
02640 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1)
02641 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1)
02642
02643
02644 if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
02645 {
02646
02647 while(lastInLumBuf < lastLumSrcY)
02648 {
02649 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
02650 lumBufIndex++;
02651
02652 ASSERT(lumBufIndex < 2*vLumBufSize)
02653 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
02654 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
02655
02656 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
02657 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
02658 funnyYCode, c->srcFormat, formatConvBuffer,
02659 c->lumMmx2Filter, c->lumMmx2FilterPos);
02660 lastInLumBuf++;
02661 }
02662 while(lastInChrBuf < lastChrSrcY)
02663 {
02664 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
02665 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
02666 chrBufIndex++;
02667 ASSERT(chrBufIndex < 2*vChrBufSize)
02668 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH))
02669 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
02670
02671
02672 if(!(isGray(srcFormat) || isGray(dstFormat)))
02673 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
02674 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
02675 funnyUVCode, c->srcFormat, formatConvBuffer,
02676 c->chrMmx2Filter, c->chrMmx2FilterPos);
02677 lastInChrBuf++;
02678 }
02679
02680 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
02681 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
02682 }
02683 else
02684 {
02685
02686
02687
02688
02689
02690
02691 while(lastInLumBuf+1 < srcSliceY + srcSliceH)
02692 {
02693 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
02694 lumBufIndex++;
02695 ASSERT(lumBufIndex < 2*vLumBufSize)
02696 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
02697 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
02698 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
02699 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
02700 funnyYCode, c->srcFormat, formatConvBuffer,
02701 c->lumMmx2Filter, c->lumMmx2FilterPos);
02702 lastInLumBuf++;
02703 }
02704 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
02705 {
02706 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
02707 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
02708 chrBufIndex++;
02709 ASSERT(chrBufIndex < 2*vChrBufSize)
02710 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH)
02711 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
02712
02713 if(!(isGray(srcFormat) || isGray(dstFormat)))
02714 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
02715 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
02716 funnyUVCode, c->srcFormat, formatConvBuffer,
02717 c->chrMmx2Filter, c->chrMmx2FilterPos);
02718 lastInChrBuf++;
02719 }
02720
02721 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
02722 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
02723 break;
02724 }
02725
02726 #ifdef HAVE_MMX
02727 b5Dither= dither8[dstY&1];
02728 g6Dither= dither4[dstY&1];
02729 g5Dither= dither8[dstY&1];
02730 r5Dither= dither8[(dstY+1)&1];
02731 #endif
02732 if(dstY < dstH-2)
02733 {
02734 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02735 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02736 #ifdef HAVE_MMX
02737 int i;
02738 for(i=0; i<vLumFilterSize; i++)
02739 {
02740 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
02741 lumMmxFilter[4*i+2]=
02742 lumMmxFilter[4*i+3]=
02743 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
02744 }
02745 for(i=0; i<vChrFilterSize; i++)
02746 {
02747 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
02748 chrMmxFilter[4*i+2]=
02749 chrMmxFilter[4*i+3]=
02750 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
02751 }
02752 #endif
02753 if(isPlanarYUV(dstFormat) || isGray(dstFormat))
02754 {
02755 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02756 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL;
02757 if(vLumFilterSize == 1 && vChrFilterSize == 1)
02758 {
02759 int16_t *lumBuf = lumPixBuf[0];
02760 int16_t *chrBuf= chrPixBuf[0];
02761 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
02762 }
02763 else
02764 {
02765 RENAME(yuv2yuvX)(c,
02766 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02767 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02768 dest, uDest, vDest, dstW, chrDstW);
02769 }
02770 }
02771 else
02772 {
02773 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
02774 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
02775 if(vLumFilterSize == 1 && vChrFilterSize == 2)
02776 {
02777 int chrAlpha= vChrFilter[2*dstY+1];
02778 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
02779 dest, dstW, chrAlpha, dstFormat, flags, dstY);
02780 }
02781 else if(vLumFilterSize == 2 && vChrFilterSize == 2)
02782 {
02783 int lumAlpha= vLumFilter[2*dstY+1];
02784 int chrAlpha= vChrFilter[2*dstY+1];
02785 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
02786 dest, dstW, lumAlpha, chrAlpha, dstY);
02787 }
02788 else
02789 {
02790 RENAME(yuv2packedX)(c,
02791 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02792 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02793 dest, dstW, dstY);
02794 }
02795 }
02796 }
02797 else
02798 {
02799 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02800 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02801 if(isPlanarYUV(dstFormat) || isGray(dstFormat))
02802 {
02803 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02804 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL;
02805 yuv2yuvXinC(
02806 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02807 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02808 dest, uDest, vDest, dstW, chrDstW);
02809 }
02810 else
02811 {
02812 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
02813 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
02814 yuv2packedXinC(c,
02815 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02816 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02817 dest, dstW, dstY);
02818 }
02819 }
02820 }
02821
02822 #ifdef HAVE_MMX
02823 __asm __volatile(SFENCE:::"memory");
02824 __asm __volatile(EMMS:::"memory");
02825 #endif
02826
02827 c->dstY= dstY;
02828 c->lumBufIndex= lumBufIndex;
02829 c->chrBufIndex= chrBufIndex;
02830 c->lastInLumBuf= lastInLumBuf;
02831 c->lastInChrBuf= lastInChrBuf;
02832
02833 return dstY - lastDstY;
02834 }