vlc-0.8.4a: /home/huihoo/src/vlc/vlc-0.8.4a/modules/video

00001 /*
00002  * yuv2rgb.c, Software YUV to RGB coverter
00003  *
00004  *  Copyright (C) 1999, Aaron Holtzman <[email protected]>
00005  *  All Rights Reserved.
00006  *
00007  *  Functions broken out from display_x11.c and several new modes
00008  *  added by Håkan Hjort <[email protected]>
00009  *
00010  *  15 & 16 bpp support by Franck Sicard <[email protected]>
00011  *
00012  *  This file is part of mpeg2dec, a free MPEG-2 video decoder
00013  *
00014  *  mpeg2dec is free software; you can redistribute it and/or modify
00015  *  it under the terms of the GNU General Public License as published by
00016  *  the Free Software Foundation; either version 2, or (at your option)
00017  *  any later version.
00018  *
00019  *  mpeg2dec is distributed in the hope that it will be useful,
00020  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00021  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00022  *  GNU General Public License for more details.
00023  *
00024  *  You should have received a copy of the GNU General Public License
00025  *  along with GNU Make; see the file COPYING.  If not, write to
00026  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
00027  *
00028  * MMX/MMX2 Template stuff from Michael Niedermayer ([email protected]) (needed for fast movntq support)
00029  * 1,4,8bpp support by Michael Niedermayer ([email protected])
00030  * context / deglobalize stuff by Michael Niedermayer
00031  */
00032 
00033 #include <stdio.h>
00034 #include <stdlib.h>
00035 #include <inttypes.h>
00036 #include <assert.h>
00037 
00038 #include "config.h"
00039 //#include "video_out.h"
00040 #include "rgb2rgb.h"
00041 #include "swscale.h"
00042 #include "swscale_internal.h"
00043 #include "common.h"
00044 
00045 #ifdef HAVE_MLIB
00046 #include "yuv2rgb_mlib.c"
00047 #endif
00048 
00049 #define DITHER1XBPP // only for mmx
00050 
00051 const uint8_t  __attribute__((aligned(8))) dither_2x2_4[2][8]={
00052 {  1,   3,   1,   3,   1,   3,   1,   3, },
00053 {  2,   0,   2,   0,   2,   0,   2,   0, },
00054 };
00055 
00056 const uint8_t  __attribute__((aligned(8))) dither_2x2_8[2][8]={
00057 {  6,   2,   6,   2,   6,   2,   6,   2, },
00058 {  0,   4,   0,   4,   0,   4,   0,   4, },
00059 };
00060 
00061 const uint8_t  __attribute__((aligned(8))) dither_8x8_32[8][8]={
00062 { 17,   9,  23,  15,  16,   8,  22,  14, },
00063 {  5,  29,   3,  27,   4,  28,   2,  26, },
00064 { 21,  13,  19,  11,  20,  12,  18,  10, },
00065 {  0,  24,   6,  30,   1,  25,   7,  31, },
00066 { 16,   8,  22,  14,  17,   9,  23,  15, },
00067 {  4,  28,   2,  26,   5,  29,   3,  27, },
00068 { 20,  12,  18,  10,  21,  13,  19,  11, },
00069 {  1,  25,   7,  31,   0,  24,   6,  30, },
00070 };
00071 
00072 #if 0
00073 const uint8_t  __attribute__((aligned(8))) dither_8x8_64[8][8]={
00074 {  0,  48,  12,  60,   3,  51,  15,  63, },
00075 { 32,  16,  44,  28,  35,  19,  47,  31, },
00076 {  8,  56,   4,  52,  11,  59,   7,  55, },
00077 { 40,  24,  36,  20,  43,  27,  39,  23, },
00078 {  2,  50,  14,  62,   1,  49,  13,  61, },
00079 { 34,  18,  46,  30,  33,  17,  45,  29, },
00080 { 10,  58,   6,  54,   9,  57,   5,  53, },
00081 { 42,  26,  38,  22,  41,  25,  37,  21, },
00082 };
00083 #endif
00084 
00085 const uint8_t  __attribute__((aligned(8))) dither_8x8_73[8][8]={
00086 {  0,  55,  14,  68,   3,  58,  17,  72, },
00087 { 37,  18,  50,  32,  40,  22,  54,  35, },
00088 {  9,  64,   5,  59,  13,  67,   8,  63, },
00089 { 46,  27,  41,  23,  49,  31,  44,  26, },
00090 {  2,  57,  16,  71,   1,  56,  15,  70, },
00091 { 39,  21,  52,  34,  38,  19,  51,  33, },
00092 { 11,  66,   7,  62,  10,  65,   6,  60, },
00093 { 48,  30,  43,  25,  47,  29,  42,  24, },
00094 };
00095 
00096 #if 0
00097 const uint8_t  __attribute__((aligned(8))) dither_8x8_128[8][8]={
00098 { 68,  36,  92,  60,  66,  34,  90,  58, },
00099 { 20, 116,  12, 108,  18, 114,  10, 106, },
00100 { 84,  52,  76,  44,  82,  50,  74,  42, },
00101 {  0,  96,  24, 120,   6, 102,  30, 126, },
00102 { 64,  32,  88,  56,  70,  38,  94,  62, },
00103 { 16, 112,   8, 104,  22, 118,  14, 110, },
00104 { 80,  48,  72,  40,  86,  54,  78,  46, },
00105 {  4, 100,  28, 124,   2,  98,  26, 122, },
00106 };
00107 #endif
00108 
00109 #if 1
00110 const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
00111 {117,  62, 158, 103, 113,  58, 155, 100, },
00112 { 34, 199,  21, 186,  31, 196,  17, 182, },
00113 {144,  89, 131,  76, 141,  86, 127,  72, },
00114 {  0, 165,  41, 206,  10, 175,  52, 217, },
00115 {110,  55, 151,  96, 120,  65, 162, 107, },
00116 { 28, 193,  14, 179,  38, 203,  24, 189, },
00117 {138,  83, 124,  69, 148,  93, 134,  79, },
00118 {  7, 172,  48, 213,   3, 168,  45, 210, },
00119 };
00120 #elif 1
00121 // tries to correct a gamma of 1.5
00122 const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
00123 {  0, 143,  18, 200,   2, 156,  25, 215, },
00124 { 78,  28, 125,  64,  89,  36, 138,  74, },
00125 { 10, 180,   3, 161,  16, 195,   8, 175, },
00126 {109,  51,  93,  38, 121,  60, 105,  47, },
00127 {  1, 152,  23, 210,   0, 147,  20, 205, },
00128 { 85,  33, 134,  71,  81,  30, 130,  67, },
00129 { 14, 190,   6, 171,  12, 185,   5, 166, },
00130 {117,  57, 101,  44, 113,  54,  97,  41, },
00131 };
00132 #elif 1
00133 // tries to correct a gamma of 2.0
00134 const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
00135 {  0, 124,   8, 193,   0, 140,  12, 213, },
00136 { 55,  14, 104,  42,  66,  19, 119,  52, },
00137 {  3, 168,   1, 145,   6, 187,   3, 162, },
00138 { 86,  31,  70,  21,  99,  39,  82,  28, },
00139 {  0, 134,  11, 206,   0, 129,   9, 200, },
00140 { 62,  17, 114,  48,  58,  16, 109,  45, },
00141 {  5, 181,   2, 157,   4, 175,   1, 151, },
00142 { 95,  36,  78,  26,  90,  34,  74,  24, },
00143 };
00144 #else
00145 // tries to correct a gamma of 2.5
00146 const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
00147 {  0, 107,   3, 187,   0, 125,   6, 212, },
00148 { 39,   7,  86,  28,  49,  11, 102,  36, },
00149 {  1, 158,   0, 131,   3, 180,   1, 151, },
00150 { 68,  19,  52,  12,  81,  25,  64,  17, },
00151 {  0, 119,   5, 203,   0, 113,   4, 195, },
00152 { 45,   9,  96,  33,  42,   8,  91,  30, },
00153 {  2, 172,   1, 144,   2, 165,   0, 137, },
00154 { 77,  23,  60,  15,  72,  21,  56,  14, },
00155 };
00156 #endif
00157 
00158 #ifdef ARCH_X86
00159 
00160 /* hope these constant values are cache line aligned */
00161 uint64_t attribute_used __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ffULL;
00162 uint64_t attribute_used __attribute__((aligned(8))) mmx_redmask = 0xf8f8f8f8f8f8f8f8ULL;
00163 uint64_t attribute_used __attribute__((aligned(8))) mmx_grnmask = 0xfcfcfcfcfcfcfcfcULL;
00164 
00165 uint64_t attribute_used __attribute__((aligned(8))) M24A=   0x00FF0000FF0000FFULL;
00166 uint64_t attribute_used __attribute__((aligned(8))) M24B=   0xFF0000FF0000FF00ULL;
00167 uint64_t attribute_used __attribute__((aligned(8))) M24C=   0x0000FF0000FF0000ULL;
00168 
00169 // the volatile is required because gcc otherwise optimizes some writes away not knowing that these
00170 // are read in the asm block
00171 volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
00172 volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
00173 volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
00174 volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
00175 
00176 uint64_t __attribute__((aligned(8))) dither4[2]={
00177         0x0103010301030103LL,
00178         0x0200020002000200LL,};
00179 
00180 uint64_t __attribute__((aligned(8))) dither8[2]={
00181         0x0602060206020602LL,
00182         0x0004000400040004LL,};
00183 
00184 #undef HAVE_MMX
00185 #undef ARCH_X86
00186 
00187 //MMX versions
00188 #undef RENAME
00189 #define HAVE_MMX
00190 #undef HAVE_MMX2
00191 #undef HAVE_3DNOW
00192 #define ARCH_X86
00193 #define RENAME(a) a ## _MMX
00194 #include "yuv2rgb_template.c"
00195 
00196 //MMX2 versions
00197 #undef RENAME
00198 #define HAVE_MMX
00199 #define HAVE_MMX2
00200 #undef HAVE_3DNOW
00201 #define ARCH_X86
00202 #define RENAME(a) a ## _MMX2
00203 #include "yuv2rgb_template.c"
00204 
00205 #endif // CAN_COMPILE_X86_ASM
00206 
00207 const int32_t Inverse_Table_6_9[8][4] = {
00208     {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
00209     {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
00210     {104597, 132201, 25675, 53279}, /* unspecified */
00211     {104597, 132201, 25675, 53279}, /* reserved */
00212     {104448, 132798, 24759, 53109}, /* FCC */
00213     {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
00214     {104597, 132201, 25675, 53279}, /* SMPTE 170M */
00215     {117579, 136230, 16907, 35559}  /* SMPTE 240M (1987) */
00216 };
00217 
00218 #define RGB(i)                                  \
00219         U = pu[i];                              \
00220         V = pv[i];                              \
00221         r = c->table_rV[V];                     \
00222         g = c->table_gU[U] + c->table_gV[V];            \
00223         b = c->table_bU[U];
00224 
00225 #define DST1(i)                                 \
00226         Y = py_1[2*i];                          \
00227         dst_1[2*i] = r[Y] + g[Y] + b[Y];        \
00228         Y = py_1[2*i+1];                        \
00229         dst_1[2*i+1] = r[Y] + g[Y] + b[Y];
00230 
00231 #define DST2(i)                                 \
00232         Y = py_2[2*i];                          \
00233         dst_2[2*i] = r[Y] + g[Y] + b[Y];        \
00234         Y = py_2[2*i+1];                        \
00235         dst_2[2*i+1] = r[Y] + g[Y] + b[Y];
00236 
00237 #define DST1RGB(i)                                                      \
00238         Y = py_1[2*i];                                                  \
00239         dst_1[6*i] = r[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = b[Y];    \
00240         Y = py_1[2*i+1];                                                \
00241         dst_1[6*i+3] = r[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = b[Y];
00242 
00243 #define DST2RGB(i)                                                      \
00244         Y = py_2[2*i];                                                  \
00245         dst_2[6*i] = r[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = b[Y];    \
00246         Y = py_2[2*i+1];                                                \
00247         dst_2[6*i+3] = r[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = b[Y];
00248 
00249 #define DST1BGR(i)                                                      \
00250         Y = py_1[2*i];                                                  \
00251         dst_1[6*i] = b[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = r[Y];    \
00252         Y = py_1[2*i+1];                                                \
00253         dst_1[6*i+3] = b[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = r[Y];
00254 
00255 #define DST2BGR(i)                                                      \
00256         Y = py_2[2*i];                                                  \
00257         dst_2[6*i] = b[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = r[Y];    \
00258         Y = py_2[2*i+1];                                                \
00259         dst_2[6*i+3] = b[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = r[Y];
00260 
00261 #define PROLOG(func_name, dst_type) \
00262 static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \
00263              int srcSliceH, uint8_t* dst[], int dstStride[]){\
00264     int y;\
00265 \
00266     if(c->srcFormat == IMGFMT_422P){\
00267         srcStride[1] *= 2;\
00268         srcStride[2] *= 2;\
00269     }\
00270     for(y=0; y<srcSliceH; y+=2){\
00271         dst_type *dst_1= (dst_type*)(dst[0] + (y+srcSliceY  )*dstStride[0]);\
00272         dst_type *dst_2= (dst_type*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);\
00273         dst_type *r, *g, *b;\
00274         uint8_t *py_1= src[0] + y*srcStride[0];\
00275         uint8_t *py_2= py_1 + srcStride[0];\
00276         uint8_t *pu= src[1] + (y>>1)*srcStride[1];\
00277         uint8_t *pv= src[2] + (y>>1)*srcStride[2];\
00278         unsigned int h_size= c->dstW>>3;\
00279         while (h_size--) {\
00280             int U, V, Y;\
00281 
00282 #define EPILOG(dst_delta)\
00283             pu += 4;\
00284             pv += 4;\
00285             py_1 += 8;\
00286             py_2 += 8;\
00287             dst_1 += dst_delta;\
00288             dst_2 += dst_delta;\
00289         }\
00290     }\
00291     return srcSliceH;\
00292 }
00293 
00294 PROLOG(yuv2rgb_c_32, uint32_t)
00295         RGB(0);
00296         DST1(0);
00297         DST2(0);
00298 
00299         RGB(1);
00300         DST2(1);
00301         DST1(1);
00302 
00303         RGB(2);
00304         DST1(2);
00305         DST2(2);
00306 
00307         RGB(3);
00308         DST2(3);
00309         DST1(3);
00310 EPILOG(8)
00311 
00312 PROLOG(yuv2rgb_c_24_rgb, uint8_t)
00313         RGB(0);
00314         DST1RGB(0);
00315         DST2RGB(0);
00316 
00317         RGB(1);
00318         DST2RGB(1);
00319         DST1RGB(1);
00320 
00321         RGB(2);
00322         DST1RGB(2);
00323         DST2RGB(2);
00324 
00325         RGB(3);
00326         DST2RGB(3);
00327         DST1RGB(3);
00328 EPILOG(24)
00329 
00330 // only trivial mods from yuv2rgb_c_24_rgb
00331 PROLOG(yuv2rgb_c_24_bgr, uint8_t)
00332         RGB(0);
00333         DST1BGR(0);
00334         DST2BGR(0);
00335 
00336         RGB(1);
00337         DST2BGR(1);
00338         DST1BGR(1);
00339 
00340         RGB(2);
00341         DST1BGR(2);
00342         DST2BGR(2);
00343 
00344         RGB(3);
00345         DST2BGR(3);
00346         DST1BGR(3);
00347 EPILOG(24)
00348 
00349 // This is exactly the same code as yuv2rgb_c_32 except for the types of
00350 // r, g, b, dst_1, dst_2
00351 PROLOG(yuv2rgb_c_16, uint16_t)
00352         RGB(0);
00353         DST1(0);
00354         DST2(0);
00355 
00356         RGB(1);
00357         DST2(1);
00358         DST1(1);
00359 
00360         RGB(2);
00361         DST1(2);
00362         DST2(2);
00363 
00364         RGB(3);
00365         DST2(3);
00366         DST1(3);
00367 EPILOG(8)
00368 
00369 // This is exactly the same code as yuv2rgb_c_32 except for the types of
00370 // r, g, b, dst_1, dst_2
00371 PROLOG(yuv2rgb_c_8, uint8_t)
00372         RGB(0);
00373         DST1(0);
00374         DST2(0);
00375 
00376         RGB(1);
00377         DST2(1);
00378         DST1(1);
00379 
00380         RGB(2);
00381         DST1(2);
00382         DST2(2);
00383 
00384         RGB(3);
00385         DST2(3);
00386         DST1(3);
00387 EPILOG(8)
00388 
00389 // r, g, b, dst_1, dst_2
00390 PROLOG(yuv2rgb_c_8_ordered_dither, uint8_t)
00391         const uint8_t *d32= dither_8x8_32[y&7];
00392         const uint8_t *d64= dither_8x8_73[y&7];
00393 #define DST1bpp8(i,o)                                   \
00394         Y = py_1[2*i];                          \
00395         dst_1[2*i] = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]];     \
00396         Y = py_1[2*i+1];                        \
00397         dst_1[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]];
00398 
00399 #define DST2bpp8(i,o)                                   \
00400         Y = py_2[2*i];                          \
00401         dst_2[2*i] =  r[Y+d32[8+o]] + g[Y+d32[8+o]] + b[Y+d64[8+o]];    \
00402         Y = py_2[2*i+1];                        \
00403         dst_2[2*i+1] =  r[Y+d32[9+o]] + g[Y+d32[9+o]] + b[Y+d64[9+o]];
00404 
00405 
00406         RGB(0);
00407         DST1bpp8(0,0);
00408         DST2bpp8(0,0);
00409 
00410         RGB(1);
00411         DST2bpp8(1,2);
00412         DST1bpp8(1,2);
00413 
00414         RGB(2);
00415         DST1bpp8(2,4);
00416         DST2bpp8(2,4);
00417 
00418         RGB(3);
00419         DST2bpp8(3,6);
00420         DST1bpp8(3,6);
00421 EPILOG(8)
00422 
00423 
00424 // This is exactly the same code as yuv2rgb_c_32 except for the types of
00425 // r, g, b, dst_1, dst_2
00426 PROLOG(yuv2rgb_c_4, uint8_t)
00427         int acc;
00428 #define DST1_4(i)                                       \
00429         Y = py_1[2*i];                          \
00430         acc = r[Y] + g[Y] + b[Y];       \
00431         Y = py_1[2*i+1];                        \
00432         acc |= (r[Y] + g[Y] + b[Y])<<4;\
00433         dst_1[i] = acc; 
00434 
00435 #define DST2_4(i)                                       \
00436         Y = py_2[2*i];                          \
00437         acc = r[Y] + g[Y] + b[Y];       \
00438         Y = py_2[2*i+1];                        \
00439         acc |= (r[Y] + g[Y] + b[Y])<<4;\
00440         dst_2[i] = acc; 
00441         
00442         RGB(0);
00443         DST1_4(0);
00444         DST2_4(0);
00445 
00446         RGB(1);
00447         DST2_4(1);
00448         DST1_4(1);
00449 
00450         RGB(2);
00451         DST1_4(2);
00452         DST2_4(2);
00453 
00454         RGB(3);
00455         DST2_4(3);
00456         DST1_4(3);
00457 EPILOG(4)
00458 
00459 PROLOG(yuv2rgb_c_4_ordered_dither, uint8_t)
00460         const uint8_t *d64= dither_8x8_73[y&7];
00461         const uint8_t *d128=dither_8x8_220[y&7];
00462         int acc;
00463 
00464 #define DST1bpp4(i,o)                                   \
00465         Y = py_1[2*i];                          \
00466         acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]];  \
00467         Y = py_1[2*i+1];                        \
00468         acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4;\
00469         dst_1[i]= acc;
00470 
00471 #define DST2bpp4(i,o)                                   \
00472         Y = py_2[2*i];                          \
00473         acc =  r[Y+d128[8+o]] + g[Y+d64[8+o]] + b[Y+d128[8+o]]; \
00474         Y = py_2[2*i+1];                        \
00475         acc |=  (r[Y+d128[9+o]] + g[Y+d64[9+o]] + b[Y+d128[9+o]])<<4;\
00476         dst_2[i]= acc;
00477 
00478 
00479         RGB(0);
00480         DST1bpp4(0,0);
00481         DST2bpp4(0,0);
00482 
00483         RGB(1);
00484         DST2bpp4(1,2);
00485         DST1bpp4(1,2);
00486 
00487         RGB(2);
00488         DST1bpp4(2,4);
00489         DST2bpp4(2,4);
00490 
00491         RGB(3);
00492         DST2bpp4(3,6);
00493         DST1bpp4(3,6);
00494 EPILOG(4)
00495 
00496 // This is exactly the same code as yuv2rgb_c_32 except for the types of
00497 // r, g, b, dst_1, dst_2
00498 PROLOG(yuv2rgb_c_4b, uint8_t)
00499         RGB(0);
00500         DST1(0);
00501         DST2(0);
00502 
00503         RGB(1);
00504         DST2(1);
00505         DST1(1);
00506 
00507         RGB(2);
00508         DST1(2);
00509         DST2(2);
00510 
00511         RGB(3);
00512         DST2(3);
00513         DST1(3);
00514 EPILOG(8)
00515 
00516 PROLOG(yuv2rgb_c_4b_ordered_dither, uint8_t)
00517         const uint8_t *d64= dither_8x8_73[y&7];
00518         const uint8_t *d128=dither_8x8_220[y&7];
00519 
00520 #define DST1bpp4b(i,o)                                  \
00521         Y = py_1[2*i];                          \
00522         dst_1[2*i] = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]];   \
00523         Y = py_1[2*i+1];                        \
00524         dst_1[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]];
00525 
00526 #define DST2bpp4b(i,o)                                  \
00527         Y = py_2[2*i];                          \
00528         dst_2[2*i] =  r[Y+d128[8+o]] + g[Y+d64[8+o]] + b[Y+d128[8+o]];  \
00529         Y = py_2[2*i+1];                        \
00530         dst_2[2*i+1] =  r[Y+d128[9+o]] + g[Y+d64[9+o]] + b[Y+d128[9+o]];
00531 
00532 
00533         RGB(0);
00534         DST1bpp4b(0,0);
00535         DST2bpp4b(0,0);
00536 
00537         RGB(1);
00538         DST2bpp4b(1,2);
00539         DST1bpp4b(1,2);
00540 
00541         RGB(2);
00542         DST1bpp4b(2,4);
00543         DST2bpp4b(2,4);
00544 
00545         RGB(3);
00546         DST2bpp4b(3,6);
00547         DST1bpp4b(3,6);
00548 EPILOG(8)
00549 
00550 PROLOG(yuv2rgb_c_1_ordered_dither, uint8_t)
00551         const uint8_t *d128=dither_8x8_220[y&7];
00552         char out_1=0, out_2=0;
00553         g= c->table_gU[128] + c->table_gV[128];
00554 
00555 #define DST1bpp1(i,o)                                   \
00556         Y = py_1[2*i];                          \
00557         out_1+= out_1 + g[Y+d128[0+o]]; \
00558         Y = py_1[2*i+1];                        \
00559         out_1+= out_1 + g[Y+d128[1+o]];
00560 
00561 #define DST2bpp1(i,o)                                   \
00562         Y = py_2[2*i];                          \
00563         out_2+= out_2 + g[Y+d128[8+o]]; \
00564         Y = py_2[2*i+1];                        \
00565         out_2+= out_2 + g[Y+d128[9+o]];
00566 
00567         DST1bpp1(0,0);
00568         DST2bpp1(0,0);
00569 
00570         DST2bpp1(1,2);
00571         DST1bpp1(1,2);
00572 
00573         DST1bpp1(2,4);
00574         DST2bpp1(2,4);
00575 
00576         DST2bpp1(3,6);
00577         DST1bpp1(3,6);
00578         
00579         dst_1[0]= out_1;
00580         dst_2[0]= out_2;
00581 EPILOG(1)
00582 
00583 SwsFunc yuv2rgb_get_func_ptr (SwsContext *c)
00584 {
00585 #ifdef ARCH_X86
00586     if(c->flags & SWS_CPU_CAPS_MMX2){
00587         switch(c->dstFormat){
00588         case IMGFMT_BGR32: return yuv420_rgb32_MMX2;
00589         case IMGFMT_BGR24: return yuv420_rgb24_MMX2;
00590         case IMGFMT_BGR16: return yuv420_rgb16_MMX2;
00591         case IMGFMT_BGR15: return yuv420_rgb15_MMX2;
00592         }
00593     }
00594     if(c->flags & SWS_CPU_CAPS_MMX){
00595         switch(c->dstFormat){
00596         case IMGFMT_BGR32: return yuv420_rgb32_MMX;
00597         case IMGFMT_BGR24: return yuv420_rgb24_MMX;
00598         case IMGFMT_BGR16: return yuv420_rgb16_MMX;
00599         case IMGFMT_BGR15: return yuv420_rgb15_MMX;
00600         }
00601     }
00602 #endif
00603 #ifdef HAVE_MLIB
00604     {
00605         SwsFunc t= yuv2rgb_init_mlib(c);
00606         if(t) return t;
00607     }
00608 #endif
00609 #ifdef HAVE_ALTIVEC
00610     if (c->flags & SWS_CPU_CAPS_ALTIVEC)
00611     {
00612         SwsFunc t = yuv2rgb_init_altivec(c);
00613         if(t) return t;
00614     }
00615 #endif
00616 
00617     MSG_WARN("No accelerated colorspace conversion found\n");
00618 
00619     switch(c->dstFormat){
00620     case IMGFMT_RGB32:
00621     case IMGFMT_BGR32: return yuv2rgb_c_32;
00622     case IMGFMT_RGB24: return yuv2rgb_c_24_rgb;
00623     case IMGFMT_BGR24: return yuv2rgb_c_24_bgr;
00624     case IMGFMT_RGB16:
00625     case IMGFMT_BGR16:
00626     case IMGFMT_RGB15:
00627     case IMGFMT_BGR15: return yuv2rgb_c_16;
00628     case IMGFMT_RGB8:
00629     case IMGFMT_BGR8:  return yuv2rgb_c_8_ordered_dither;
00630     case IMGFMT_RGB4:
00631     case IMGFMT_BGR4:  return yuv2rgb_c_4_ordered_dither;
00632     case IMGFMT_RG4B:
00633     case IMGFMT_BG4B:  return yuv2rgb_c_4b_ordered_dither;
00634     case IMGFMT_RGB1:
00635     case IMGFMT_BGR1:  return yuv2rgb_c_1_ordered_dither;
00636     default:
00637         assert(0);
00638     }
00639     return NULL;
00640 }
00641 
00642 static int div_round (int dividend, int divisor)
00643 {
00644     if (dividend > 0)
00645         return (dividend + (divisor>>1)) / divisor;
00646     else
00647         return -((-dividend + (divisor>>1)) / divisor);
00648 }
00649 
00650 int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation)
00651 {  
00652     const int isRgb = IMGFMT_IS_BGR(c->dstFormat);
00653     const int bpp = isRgb?IMGFMT_RGB_DEPTH(c->dstFormat):IMGFMT_BGR_DEPTH(c->dstFormat);
00654     int i;
00655     uint8_t table_Y[1024];
00656     uint32_t *table_32 = 0;
00657     uint16_t *table_16 = 0;
00658     uint8_t *table_8 = 0;
00659     uint8_t *table_332 = 0;
00660     uint8_t *table_121 = 0;
00661     uint8_t *table_1 = 0;
00662     int entry_size = 0;
00663     void *table_r = 0, *table_g = 0, *table_b = 0;
00664     void *table_start;
00665 
00666     int64_t crv =  inv_table[0];
00667     int64_t cbu =  inv_table[1];
00668     int64_t cgu = -inv_table[2];
00669     int64_t cgv = -inv_table[3];
00670     int64_t cy  = 1<<16;
00671     int64_t oy  = 0;
00672 
00673 //printf("%lld %lld %lld %lld %lld\n", cy, crv, cbu, cgu, cgv);
00674     if(!fullRange){
00675         cy= (cy*255) / 219;
00676         oy= 16<<16;
00677     }
00678         
00679     cy = (cy *contrast             )>>16;
00680     crv= (crv*contrast * saturation)>>32;
00681     cbu= (cbu*contrast * saturation)>>32;
00682     cgu= (cgu*contrast * saturation)>>32;
00683     cgv= (cgv*contrast * saturation)>>32;
00684 //printf("%lld %lld %lld %lld %lld\n", cy, crv, cbu, cgu, cgv);
00685     oy -= 256*brightness;
00686 
00687     for (i = 0; i < 1024; i++) {
00688         int j;
00689 
00690         j= (cy*(((i - 384)<<16) - oy) + (1<<31))>>32;
00691         j = (j < 0) ? 0 : ((j > 255) ? 255 : j);
00692         table_Y[i] = j;
00693     }
00694 
00695     switch (bpp) {
00696     case 32:
00697         table_start= table_32 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint32_t));
00698 
00699         entry_size = sizeof (uint32_t);
00700         table_r = table_32 + 197;
00701         table_b = table_32 + 197 + 685;
00702         table_g = table_32 + 197 + 2*682;
00703 
00704         for (i = -197; i < 256+197; i++)
00705             ((uint32_t *)table_r)[i] = table_Y[i+384] << (isRgb ? 16 : 0);
00706         for (i = -132; i < 256+132; i++)
00707             ((uint32_t *)table_g)[i] = table_Y[i+384] << 8;
00708         for (i = -232; i < 256+232; i++)
00709             ((uint32_t *)table_b)[i] = table_Y[i+384] << (isRgb ? 0 : 16);
00710         break;
00711 
00712     case 24:
00713         table_start= table_8 = malloc ((256 + 2*232) * sizeof (uint8_t));
00714 
00715         entry_size = sizeof (uint8_t);
00716         table_r = table_g = table_b = table_8 + 232;
00717 
00718         for (i = -232; i < 256+232; i++)
00719             ((uint8_t * )table_b)[i] = table_Y[i+384];
00720         break;
00721 
00722     case 15:
00723     case 16:
00724         table_start= table_16 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t));
00725 
00726         entry_size = sizeof (uint16_t);
00727         table_r = table_16 + 197;
00728         table_b = table_16 + 197 + 685;
00729         table_g = table_16 + 197 + 2*682;
00730 
00731         for (i = -197; i < 256+197; i++) {
00732             int j = table_Y[i+384] >> 3;
00733 
00734             if (isRgb)
00735                 j <<= ((bpp==16) ? 11 : 10);
00736 
00737             ((uint16_t *)table_r)[i] = j;
00738         }
00739         for (i = -132; i < 256+132; i++) {
00740             int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3);
00741 
00742             ((uint16_t *)table_g)[i] = j << 5;
00743         }
00744         for (i = -232; i < 256+232; i++) {
00745             int j = table_Y[i+384] >> 3;
00746 
00747             if (!isRgb)
00748                 j <<= ((bpp==16) ? 11 : 10);
00749 
00750             ((uint16_t *)table_b)[i] = j;
00751         }
00752         break;
00753 
00754     case 8:
00755         table_start= table_332 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t));
00756 
00757         entry_size = sizeof (uint8_t);
00758         table_r = table_332 + 197;
00759         table_b = table_332 + 197 + 685;
00760         table_g = table_332 + 197 + 2*682;
00761 
00762         for (i = -197; i < 256+197; i++) {
00763             int j = (table_Y[i+384 - 16] + 18)/36;
00764 
00765             if (isRgb)
00766                 j <<= 5;
00767 
00768             ((uint8_t *)table_r)[i] = j;
00769         }
00770         for (i = -132; i < 256+132; i++) {
00771             int j = (table_Y[i+384 - 16] + 18)/36;
00772 
00773             if (!isRgb)
00774                 j <<= 1;
00775 
00776             ((uint8_t *)table_g)[i] = j << 2;
00777         }
00778         for (i = -232; i < 256+232; i++) {
00779             int j = (table_Y[i+384 - 37] + 43)/85;
00780 
00781             if (!isRgb)
00782                 j <<= 6;
00783 
00784             ((uint8_t *)table_b)[i] = j;
00785         }
00786         break;
00787     case 4:
00788     case 4|128:
00789         table_start= table_121 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t));
00790 
00791         entry_size = sizeof (uint8_t);
00792         table_r = table_121 + 197;
00793         table_b = table_121 + 197 + 685;
00794         table_g = table_121 + 197 + 2*682;
00795 
00796         for (i = -197; i < 256+197; i++) {
00797             int j = table_Y[i+384 - 110] >> 7;
00798 
00799             if (isRgb)
00800                 j <<= 3;
00801 
00802             ((uint8_t *)table_r)[i] = j;
00803         }
00804         for (i = -132; i < 256+132; i++) {
00805             int j = (table_Y[i+384 - 37]+ 43)/85;
00806 
00807             ((uint8_t *)table_g)[i] = j << 1;
00808         }
00809         for (i = -232; i < 256+232; i++) {
00810             int j =table_Y[i+384 - 110] >> 7;
00811 
00812             if (!isRgb)
00813                 j <<= 3;
00814 
00815             ((uint8_t *)table_b)[i] = j;
00816         }
00817         break;
00818 
00819     case 1:
00820         table_start= table_1 = malloc (256*2 * sizeof (uint8_t));
00821 
00822         entry_size = sizeof (uint8_t);
00823         table_g = table_1;
00824         table_r = table_b = NULL;
00825 
00826         for (i = 0; i < 256+256; i++) {
00827             int j = table_Y[i + 384 - 110]>>7;
00828 
00829             ((uint8_t *)table_g)[i] = j;
00830         }
00831         break;
00832 
00833     default:
00834         table_start= NULL;
00835         MSG_ERR("%ibpp not supported by yuv2rgb\n", bpp);
00836         //free mem?
00837         return -1;
00838     }
00839 
00840     for (i = 0; i < 256; i++) {
00841         c->table_rV[i] = table_r + entry_size * div_round (crv * (i-128), 76309);
00842         c->table_gU[i] = table_g + entry_size * div_round (cgu * (i-128), 76309);
00843         c->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309);
00844         c->table_bU[i] = table_b + entry_size * div_round (cbu * (i-128), 76309);
00845     }
00846 
00847     if(c->yuvTable) free(c->yuvTable);
00848     c->yuvTable= table_start;
00849     return 0;
00850 }