00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065 #include <stdio.h>
00066 #include <stdlib.h>
00067 #include <string.h>
00068 #include <inttypes.h>
00069 #include <assert.h>
00070 #include "config.h"
00071 #include "rgb2rgb.h"
00072 #include "swscale.h"
00073 #include "swscale_internal.h"
00074 #include "mangle.h"
00075 #include "img_format.h"
00076
00077 #undef PROFILE_THE_BEAST
00078 #undef INC_SCALING
00079
00080 typedef unsigned char ubyte;
00081 typedef signed char sbyte;
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120 static
00121 const vector unsigned char
00122 perm_rgb_0 = (vector unsigned char)(0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05,
00123 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a),
00124 perm_rgb_1 = (vector unsigned char)(0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17,
00125 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f),
00126 perm_rgb_2 = (vector unsigned char)(0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
00127 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05),
00128 perm_rgb_3 = (vector unsigned char)(0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a,
00129 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f);
00130
00131 #define vec_merge3(x2,x1,x0,y0,y1,y2) \
00132 do { \
00133 typeof(x0) o0,o2,o3; \
00134 o0 = vec_mergeh (x0,x1); \
00135 y0 = vec_perm (o0, x2, perm_rgb_0);\
00136 o2 = vec_perm (o0, x2, perm_rgb_1);\
00137 o3 = vec_mergel (x0,x1); \
00138 y1 = vec_perm (o3,o2,perm_rgb_2); \
00139 y2 = vec_perm (o3,o2,perm_rgb_3); \
00140 } while(0)
00141
00142 #define vec_mstrgb24(x0,x1,x2,ptr) \
00143 do { \
00144 typeof(x0) _0,_1,_2; \
00145 vec_merge3 (x0,x1,x2,_0,_1,_2); \
00146 vec_st (_0, 0, ptr++); \
00147 vec_st (_1, 0, ptr++); \
00148 vec_st (_2, 0, ptr++); \
00149 } while (0);
00150
00151 #define vec_mstbgr24(x0,x1,x2,ptr) \
00152 do { \
00153 typeof(x0) _0,_1,_2; \
00154 vec_merge3 (x2,x1,x0,_0,_1,_2); \
00155 vec_st (_0, 0, ptr++); \
00156 vec_st (_1, 0, ptr++); \
00157 vec_st (_2, 0, ptr++); \
00158 } while (0);
00159
00160
00161
00162
00163
00164 #define vec_mstrgb32(T,x0,x1,x2,x3,ptr) \
00165 do { \
00166 T _0,_1,_2,_3; \
00167 _0 = vec_mergeh (x0,x1); \
00168 _1 = vec_mergeh (x2,x3); \
00169 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00170 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00171 vec_st (_2, 0*16, (T *)ptr); \
00172 vec_st (_3, 1*16, (T *)ptr); \
00173 _0 = vec_mergel (x0,x1); \
00174 _1 = vec_mergel (x2,x3); \
00175 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00176 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00177 vec_st (_2, 2*16, (T *)ptr); \
00178 vec_st (_3, 3*16, (T *)ptr); \
00179 ptr += 4; \
00180 } while (0);
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199 #define vec_unh(x) \
00200 (vector signed short) \
00201 vec_perm(x,(typeof(x))(0),\
00202 (vector unsigned char)(0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
00203 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07))
00204 #define vec_unl(x) \
00205 (vector signed short) \
00206 vec_perm(x,(typeof(x))(0),\
00207 (vector unsigned char)(0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
00208 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F))
00209
00210 #define vec_clip(x) \
00211 vec_max (vec_min (x, (typeof(x))(255)), (typeof(x))(0))
00212
00213 #define vec_packclp_a(x,y) \
00214 (vector unsigned char)vec_pack (vec_clip (x), vec_clip (y))
00215
00216 #define vec_packclp(x,y) \
00217 (vector unsigned char)vec_packs \
00218 ((vector unsigned short)vec_max (x,(vector signed short) (0)), \
00219 (vector unsigned short)vec_max (y,(vector signed short) (0)))
00220
00221
00222
00223
00224 static inline void cvtyuvtoRGB (SwsContext *c,
00225 vector signed short Y, vector signed short U, vector signed short V,
00226 vector signed short *R, vector signed short *G, vector signed short *B)
00227 {
00228 vector signed short vx,ux,uvx;
00229
00230 Y = vec_mradds (Y, c->CY, c->OY);
00231
00232 U = vec_sub (U,(vector signed short)(128));
00233 V = vec_sub (V,(vector signed short)(128));
00234
00235
00236 ux = vec_sl (U, c->CSHIFT);
00237 *B = vec_mradds (ux, c->CBU, Y);
00238
00239
00240 vx = vec_sl (V, c->CSHIFT);
00241 *R = vec_mradds (vx, c->CRV, Y);
00242
00243
00244 uvx = vec_mradds (U, c->CGU, Y);
00245 *G = vec_mradds (V, c->CGV, uvx);
00246 }
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256 #define DEFCSP420_CVT(name,out_pixels) \
00257 static int altivec_##name (SwsContext *c, \
00258 unsigned char **in, int *instrides, \
00259 int srcSliceY, int srcSliceH, \
00260 unsigned char **oplanes, int *outstrides) \
00261 { \
00262 int w = c->srcW; \
00263 int h = srcSliceH; \
00264 int i,j; \
00265 int instrides_scl[3]; \
00266 vector unsigned char y0,y1; \
00267 \
00268 vector signed char u,v; \
00269 \
00270 vector signed short Y0,Y1,Y2,Y3; \
00271 vector signed short U,V; \
00272 vector signed short vx,ux,uvx; \
00273 vector signed short vx0,ux0,uvx0; \
00274 vector signed short vx1,ux1,uvx1; \
00275 vector signed short R0,G0,B0; \
00276 vector signed short R1,G1,B1; \
00277 vector unsigned char R,G,B; \
00278 \
00279 vector unsigned char *uivP, *vivP; \
00280 vector unsigned char align_perm; \
00281 \
00282 vector signed short \
00283 lCY = c->CY, \
00284 lOY = c->OY, \
00285 lCRV = c->CRV, \
00286 lCBU = c->CBU, \
00287 lCGU = c->CGU, \
00288 lCGV = c->CGV; \
00289 \
00290 vector unsigned short lCSHIFT = c->CSHIFT; \
00291 \
00292 ubyte *y1i = in[0]; \
00293 ubyte *y2i = in[0]+w; \
00294 ubyte *ui = in[1]; \
00295 ubyte *vi = in[2]; \
00296 \
00297 vector unsigned char *oute \
00298 = (vector unsigned char *) \
00299 (oplanes[0]+srcSliceY*outstrides[0]); \
00300 vector unsigned char *outo \
00301 = (vector unsigned char *) \
00302 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); \
00303 \
00304 \
00305 instrides_scl[0] = instrides[0]; \
00306 instrides_scl[1] = instrides[1]-w/2; \
00307 instrides_scl[2] = instrides[2]-w/2; \
00308 \
00309 \
00310 for (i=0;i<h/2;i++) { \
00311 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); \
00312 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); \
00313 \
00314 for (j=0;j<w/16;j++) { \
00315 \
00316 y0 = vec_ldl (0,y1i); \
00317 y1 = vec_ldl (0,y2i); \
00318 uivP = (vector unsigned char *)ui; \
00319 vivP = (vector unsigned char *)vi; \
00320 \
00321 align_perm = vec_lvsl (0, ui); \
00322 u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm); \
00323 \
00324 align_perm = vec_lvsl (0, vi); \
00325 v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm); \
00326 \
00327 u = (vector signed char)vec_sub (u, (vector signed char)(128)); \
00328 v = (vector signed char)vec_sub (v, (vector signed char)(128)); \
00329 U = vec_unpackh (u); \
00330 V = vec_unpackh (v); \
00331 \
00332 \
00333 Y0 = vec_unh (y0); \
00334 Y1 = vec_unl (y0); \
00335 Y2 = vec_unh (y1); \
00336 Y3 = vec_unl (y1); \
00337 \
00338 Y0 = vec_mradds (Y0, lCY, lOY); \
00339 Y1 = vec_mradds (Y1, lCY, lOY); \
00340 Y2 = vec_mradds (Y2, lCY, lOY); \
00341 Y3 = vec_mradds (Y3, lCY, lOY); \
00342 \
00343 \
00344 ux = vec_sl (U, lCSHIFT); \
00345 ux = vec_mradds (ux, lCBU, (vector signed short)(0)); \
00346 ux0 = vec_mergeh (ux,ux); \
00347 ux1 = vec_mergel (ux,ux); \
00348 \
00349 \
00350 vx = vec_sl (V, lCSHIFT); \
00351 vx = vec_mradds (vx, lCRV, (vector signed short)(0)); \
00352 vx0 = vec_mergeh (vx,vx); \
00353 vx1 = vec_mergel (vx,vx); \
00354 \
00355 \
00356 uvx = vec_mradds (U, lCGU, (vector signed short)(0)); \
00357 uvx = vec_mradds (V, lCGV, uvx); \
00358 uvx0 = vec_mergeh (uvx,uvx); \
00359 uvx1 = vec_mergel (uvx,uvx); \
00360 \
00361 R0 = vec_add (Y0,vx0); \
00362 G0 = vec_add (Y0,uvx0); \
00363 B0 = vec_add (Y0,ux0); \
00364 R1 = vec_add (Y1,vx1); \
00365 G1 = vec_add (Y1,uvx1); \
00366 B1 = vec_add (Y1,ux1); \
00367 \
00368 R = vec_packclp (R0,R1); \
00369 G = vec_packclp (G0,G1); \
00370 B = vec_packclp (B0,B1); \
00371 \
00372 out_pixels(R,G,B,oute); \
00373 \
00374 R0 = vec_add (Y2,vx0); \
00375 G0 = vec_add (Y2,uvx0); \
00376 B0 = vec_add (Y2,ux0); \
00377 R1 = vec_add (Y3,vx1); \
00378 G1 = vec_add (Y3,uvx1); \
00379 B1 = vec_add (Y3,ux1); \
00380 R = vec_packclp (R0,R1); \
00381 G = vec_packclp (G0,G1); \
00382 B = vec_packclp (B0,B1); \
00383 \
00384 \
00385 out_pixels(R,G,B,outo); \
00386 \
00387 y1i += 16; \
00388 y2i += 16; \
00389 ui += 8; \
00390 vi += 8; \
00391 \
00392 } \
00393 \
00394 outo += (outstrides[0])>>4; \
00395 oute += (outstrides[0])>>4; \
00396 \
00397 ui += instrides_scl[1]; \
00398 vi += instrides_scl[2]; \
00399 y1i += instrides_scl[0]; \
00400 y2i += instrides_scl[0]; \
00401 } \
00402 return srcSliceH; \
00403 }
00404
00405
00406 #define out_abgr(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))(0)),c,b,a,ptr)
00407 #define out_bgra(a,b,c,ptr) vec_mstrgb32(typeof(a),c,b,a,((typeof (a))(0)),ptr)
00408 #define out_rgba(a,b,c,ptr) vec_mstrgb32(typeof(a),a,b,c,((typeof (a))(0)),ptr)
00409 #define out_argb(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))(0)),a,b,c,ptr)
00410 #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
00411 #define out_bgr24(a,b,c,ptr) vec_mstrgb24(c,b,a,ptr)
00412
00413 DEFCSP420_CVT (yuv2_abgr32, out_abgr)
00414 DEFCSP420_CVT (yuv2_bgra32, out_argb)
00415 DEFCSP420_CVT (yuv2_rgba32, out_rgba)
00416 DEFCSP420_CVT (yuv2_argb32, out_argb)
00417 DEFCSP420_CVT (yuv2_rgb24, out_rgb24)
00418 DEFCSP420_CVT (yuv2_bgr24, out_bgr24)
00419
00420
00421
00422
00423 static
00424 const vector unsigned char
00425 demux_u = (vector unsigned char)(0x10,0x00,0x10,0x00,
00426 0x10,0x04,0x10,0x04,
00427 0x10,0x08,0x10,0x08,
00428 0x10,0x0c,0x10,0x0c),
00429 demux_v = (vector unsigned char)(0x10,0x02,0x10,0x02,
00430 0x10,0x06,0x10,0x06,
00431 0x10,0x0A,0x10,0x0A,
00432 0x10,0x0E,0x10,0x0E),
00433 demux_y = (vector unsigned char)(0x10,0x01,0x10,0x03,
00434 0x10,0x05,0x10,0x07,
00435 0x10,0x09,0x10,0x0B,
00436 0x10,0x0D,0x10,0x0F);
00437
00438
00439
00440
00441 static int altivec_uyvy_rgb32 (SwsContext *c,
00442 unsigned char **in, int *instrides,
00443 int srcSliceY, int srcSliceH,
00444 unsigned char **oplanes, int *outstrides)
00445 {
00446 int w = c->srcW;
00447 int h = srcSliceH;
00448 int i,j;
00449 vector unsigned char uyvy;
00450 vector signed short Y,U,V;
00451 vector signed short vx,ux,uvx;
00452 vector signed short R0,G0,B0,R1,G1,B1;
00453 vector unsigned char R,G,B;
00454 vector unsigned char *out;
00455 ubyte *img;
00456
00457 img = in[0];
00458 out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
00459
00460 for (i=0;i<h;i++) {
00461 for (j=0;j<w/16;j++) {
00462 uyvy = vec_ld (0, img);
00463 U = (vector signed short)
00464 vec_perm (uyvy, (vector unsigned char)(0), demux_u);
00465
00466 V = (vector signed short)
00467 vec_perm (uyvy, (vector unsigned char)(0), demux_v);
00468
00469 Y = (vector signed short)
00470 vec_perm (uyvy, (vector unsigned char)(0), demux_y);
00471
00472 cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
00473
00474 uyvy = vec_ld (16, img);
00475 U = (vector signed short)
00476 vec_perm (uyvy, (vector unsigned char)(0), demux_u);
00477
00478 V = (vector signed short)
00479 vec_perm (uyvy, (vector unsigned char)(0), demux_v);
00480
00481 Y = (vector signed short)
00482 vec_perm (uyvy, (vector unsigned char)(0), demux_y);
00483
00484 cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
00485
00486 R = vec_packclp (R0,R1);
00487 G = vec_packclp (G0,G1);
00488 B = vec_packclp (B0,B1);
00489
00490
00491 out_rgba (R,G,B,out);
00492
00493 img += 32;
00494 }
00495 }
00496 return srcSliceH;
00497 }
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507 SwsFunc yuv2rgb_init_altivec (SwsContext *c)
00508 {
00509 if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
00510 return NULL;
00511
00512
00513
00514
00515
00516
00517
00518
00519 if ((c->srcW & 0xf) != 0) return NULL;
00520
00521 switch (c->srcFormat) {
00522 case IMGFMT_YVU9:
00523 case IMGFMT_IF09:
00524 case IMGFMT_YV12:
00525 case IMGFMT_I420:
00526 case IMGFMT_IYUV:
00527 case IMGFMT_CLPL:
00528 case IMGFMT_Y800:
00529 case IMGFMT_Y8:
00530 case IMGFMT_NV12:
00531 case IMGFMT_NV21:
00532 if ((c->srcH & 0x1) != 0)
00533 return NULL;
00534
00535 switch(c->dstFormat){
00536 case IMGFMT_RGB24:
00537 MSG_WARN("ALTIVEC: Color Space RGB24\n");
00538 return altivec_yuv2_rgb24;
00539 case IMGFMT_BGR24:
00540 MSG_WARN("ALTIVEC: Color Space BGR24\n");
00541 return altivec_yuv2_bgr24;
00542 case IMGFMT_RGB32:
00543 MSG_WARN("ALTIVEC: Color Space ARGB32\n");
00544 return altivec_yuv2_argb32;
00545 case IMGFMT_BGR32:
00546 MSG_WARN("ALTIVEC: Color Space BGRA32\n");
00547
00548
00549 return altivec_yuv2_bgra32;
00550 default: return NULL;
00551 }
00552 break;
00553
00554 case IMGFMT_UYVY:
00555 switch(c->dstFormat){
00556 case IMGFMT_RGB32:
00557 MSG_WARN("ALTIVEC: Color Space UYVY -> RGB32\n");
00558 return altivec_uyvy_rgb32;
00559 default: return NULL;
00560 }
00561 break;
00562
00563 }
00564 return NULL;
00565 }
00566
00567 void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4])
00568 {
00569 vector signed short CY, CRV, CBU, CGU, CGV, OY, Y0;
00570 int64_t crv __attribute__ ((aligned(16))) = inv_table[0];
00571 int64_t cbu __attribute__ ((aligned(16))) = inv_table[1];
00572 int64_t cgu __attribute__ ((aligned(16))) = inv_table[2];
00573 int64_t cgv __attribute__ ((aligned(16))) = inv_table[3];
00574 int64_t cy = (1<<16)-1;
00575 int64_t oy = 0;
00576 short tmp __attribute__ ((aligned(16)));
00577
00578 if ((c->flags & SWS_CPU_CAPS_ALTIVEC) == 0)
00579 return;
00580
00581 cy = (cy *c->contrast )>>17;
00582 crv= (crv*c->contrast * c->saturation)>>32;
00583 cbu= (cbu*c->contrast * c->saturation)>>32;
00584 cgu= (cgu*c->contrast * c->saturation)>>32;
00585 cgv= (cgv*c->contrast * c->saturation)>>32;
00586
00587 oy -= 256*c->brightness;
00588
00589 tmp = cy;
00590 CY = vec_lde (0, &tmp);
00591 CY = vec_splat (CY, 0);
00592
00593 tmp = oy;
00594 OY = vec_lde (0, &tmp);
00595 OY = vec_splat (OY, 0);
00596
00597 tmp = crv>>3;
00598 CRV = vec_lde (0, &tmp);
00599 CRV = vec_splat (CRV, 0);
00600 tmp = cbu>>3;
00601 CBU = vec_lde (0, &tmp);
00602 CBU = vec_splat (CBU, 0);
00603
00604 tmp = -(cgu>>1);
00605 CGU = vec_lde (0, &tmp);
00606 CGU = vec_splat (CGU, 0);
00607 tmp = -(cgv>>1);
00608 CGV = vec_lde (0, &tmp);
00609 CGV = vec_splat (CGV, 0);
00610
00611 c->CSHIFT = (vector unsigned short)(2);
00612 c->CY = CY;
00613 c->OY = OY;
00614 c->CRV = CRV;
00615 c->CBU = CBU;
00616 c->CGU = CGU;
00617 c->CGV = CGV;
00618
00619 #if 0
00620 printf ("cy: %hvx\n", CY);
00621 printf ("oy: %hvx\n", OY);
00622 printf ("crv: %hvx\n", CRV);
00623 printf ("cbu: %hvx\n", CBU);
00624 printf ("cgv: %hvx\n", CGV);
00625 printf ("cgu: %hvx\n", CGU);
00626 #endif
00627
00628 return;
00629 }
00630
00631
00632 void
00633 altivec_yuv2packedX (SwsContext *c,
00634 int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00635 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00636 uint8_t *dest, int dstW, int dstY)
00637 {
00638 int i,j;
00639 short tmp __attribute__((aligned (16)));
00640 short *p;
00641 short *f;
00642 vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
00643 vector signed short R0,G0,B0,R1,G1,B1;
00644
00645 vector unsigned char R,G,B,pels[3];
00646 vector unsigned char *out,*nout;
00647 vector signed short RND = (vector signed short)(1<<3);
00648 vector unsigned short SCL = (vector unsigned short)(4);
00649 unsigned long scratch[16] __attribute__ ((aligned (16)));
00650
00651 vector signed short *vYCoeffsBank, *vCCoeffsBank;
00652
00653 vector signed short *YCoeffs, *CCoeffs;
00654
00655 vYCoeffsBank = malloc (sizeof (vector signed short)*lumFilterSize*dstW);
00656 vCCoeffsBank = malloc (sizeof (vector signed short)*chrFilterSize*dstW);
00657
00658 for (i=0;i<lumFilterSize*dstW;i++) {
00659 tmp = c->vLumFilter[i];
00660 p = &vYCoeffsBank[i];
00661 for (j=0;j<8;j++)
00662 p[j] = tmp;
00663 }
00664
00665 for (i=0;i<chrFilterSize*dstW;i++) {
00666 tmp = c->vChrFilter[i];
00667 p = &vCCoeffsBank[i];
00668 for (j=0;j<8;j++)
00669 p[j] = tmp;
00670 }
00671
00672 YCoeffs = vYCoeffsBank+dstY*lumFilterSize;
00673 CCoeffs = vCCoeffsBank+dstY*chrFilterSize;
00674
00675 out = (vector unsigned char *)dest;
00676
00677 for(i=0; i<dstW; i+=16){
00678 Y0 = RND;
00679 Y1 = RND;
00680
00681 for(j=0; j<lumFilterSize; j++) {
00682 X0 = vec_ld (0, &lumSrc[j][i]);
00683 X1 = vec_ld (16, &lumSrc[j][i]);
00684 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00685 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00686 }
00687
00688 U = RND;
00689 V = RND;
00690
00691 for(j=0; j<chrFilterSize; j++) {
00692 X = vec_ld (0, &chrSrc[j][i/2]);
00693 U = vec_mradds (X, CCoeffs[j], U);
00694 X = vec_ld (0, &chrSrc[j][i/2+2048]);
00695 V = vec_mradds (X, CCoeffs[j], V);
00696 }
00697
00698
00699 Y0 = vec_sra (Y0, SCL);
00700 Y1 = vec_sra (Y1, SCL);
00701 U = vec_sra (U, SCL);
00702 V = vec_sra (V, SCL);
00703
00704 Y0 = vec_clip (Y0);
00705 Y1 = vec_clip (Y1);
00706 U = vec_clip (U);
00707 V = vec_clip (V);
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718 U0 = vec_mergeh (U,U);
00719 V0 = vec_mergeh (V,V);
00720
00721 U1 = vec_mergel (U,U);
00722 V1 = vec_mergel (V,V);
00723
00724 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00725 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00726
00727 R = vec_packclp (R0,R1);
00728 G = vec_packclp (G0,G1);
00729 B = vec_packclp (B0,B1);
00730
00731 out_rgba (R,G,B,out);
00732 }
00733
00734 if (i < dstW) {
00735 i -= 16;
00736
00737 Y0 = RND;
00738 Y1 = RND;
00739
00740 for(j=0; j<lumFilterSize; j++) {
00741 X0 = vec_ld (0, &lumSrc[j][i]);
00742 X1 = vec_ld (16, &lumSrc[j][i]);
00743 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00744 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00745 }
00746
00747 U = RND;
00748 V = RND;
00749
00750 for(j=0; j<chrFilterSize; j++) {
00751 X = vec_ld (0, &chrSrc[j][i/2]);
00752 U = vec_mradds (X, CCoeffs[j], U);
00753 X = vec_ld (0, &chrSrc[j][i/2+2048]);
00754 V = vec_mradds (X, CCoeffs[j], V);
00755 }
00756
00757
00758 Y0 = vec_sra (Y0, SCL);
00759 Y1 = vec_sra (Y1, SCL);
00760 U = vec_sra (U, SCL);
00761 V = vec_sra (V, SCL);
00762
00763 Y0 = vec_clip (Y0);
00764 Y1 = vec_clip (Y1);
00765 U = vec_clip (U);
00766 V = vec_clip (V);
00767
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777 U0 = vec_mergeh (U,U);
00778 V0 = vec_mergeh (V,V);
00779
00780 U1 = vec_mergel (U,U);
00781 V1 = vec_mergel (V,V);
00782
00783 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00784 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00785
00786 R = vec_packclp (R0,R1);
00787 G = vec_packclp (G0,G1);
00788 B = vec_packclp (B0,B1);
00789
00790 nout = (vector unsigned char *)scratch;
00791 out_rgba (R,G,B,nout);
00792
00793 memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
00794 }
00795
00796 if (vYCoeffsBank) free (vYCoeffsBank);
00797 if (vCCoeffsBank) free (vCCoeffsBank);
00798
00799 }