00001 #include "stdafx.h"
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #define ModelX 123 // enable C-level optimizations by Miha Peternel
00024
00025
00026
00027
00028 #include <math.h>
00029
00030 #define M_PI 3.1415926535897932384626433832795
00031 const static double HALF = 0.5;
00032
00033
00034 static short iclip[1024+1024];
00035 static short *iclp;
00036
00037
00038 static double c[8][8];
00039
00040
00041 void Initialize_REF_IDCT()
00042 {
00043 int freq, time, i;
00044 double scale;
00045
00046 for (freq=0; freq < 8; freq++)
00047 {
00048 scale = (freq == 0) ? sqrt(0.125) : 0.5;
00049 for (time=0; time<8; time++)
00050 c[freq][time] = scale*cos((M_PI/8.0)*freq*(time + 0.5));
00051 }
00052
00053 #ifdef ModelX
00054 iclp = iclip+1024;
00055 for (i= -1024; i<1024; i++)
00056 iclp[i] = (i<-256) ? -256 : ((i>255) ? 255 : i);
00057 #endif
00058 }
00059
00060 void REF_IDCT(short *block)
00061 {
00062 double tmp[64];
00063 double rnd[64];
00064 int int0, int1, int2, int3, int4, int5, int6, int7;
00065 unsigned short fpold;
00066 unsigned short fpnew;
00067
00068 int *b = (int *) block;
00069
00070 if( !(b[0]|(b[31]&~0x10000)) )
00071 {
00072 if( b[ 1]|b[ 2]|b[ 3]|b[ 4]|b[ 5]|b[ 6] )
00073 goto normal;
00074 if( b[ 7]|b[ 8]|b[ 9]|b[10]|b[11]|b[12] )
00075 goto normal;
00076 if( b[13]|b[14]|b[15]|b[16]|b[17]|b[18] )
00077 goto normal;
00078 if( b[19]|b[20]|b[21]|b[22]|b[23]|b[24] )
00079 goto normal;
00080 if( b[25]|b[26]|b[27]|b[28]|b[29]|b[30] )
00081 goto normal;
00082 b[31]=0;
00083 return;
00084 }
00085 normal:
00086
00087 __asm
00088 {
00089
00090 mov esi,[block]
00091 lea eax,[c]
00092 lea edi,[tmp]
00093
00094 mov ebx,8
00095 align 16
00096 __col1:
00097 movzx edx,[esi+1*2]
00098 mov ecx,[esi+2*2]
00099 or edx,[esi+4*2]
00100 or ecx,[esi+6*2]
00101 or edx,ecx
00102
00103 mov ecx,8/2
00104
00105 jnz __row1
00106 fild word ptr [esi+0*2]
00107 fmul qword ptr [eax+0*8*8]
00108 fst qword ptr [edi+0*8]
00109 fst qword ptr [edi+1*8]
00110 fst qword ptr [edi+2*8]
00111 fst qword ptr [edi+3*8]
00112 fst qword ptr [edi+4*8]
00113 fst qword ptr [edi+5*8]
00114 fst qword ptr [edi+6*8]
00115 fstp qword ptr [edi+7*8]
00116 add edi,8*8
00117 jmp __next1
00118 align 16
00119 __row1:
00120 fild word ptr [esi+0*2]
00121 fmul qword ptr [eax+0*8*8]
00122 fild word ptr [esi+1*2]
00123 fmul qword ptr [eax+1*8*8]
00124 fadd
00125 fild word ptr [esi+2*2]
00126 fmul qword ptr [eax+2*8*8]
00127 fadd
00128 fild word ptr [esi+3*2]
00129 fmul qword ptr [eax+3*8*8]
00130 fadd
00131 fild word ptr [esi+4*2]
00132 fmul qword ptr [eax+4*8*8]
00133 fadd
00134 fild word ptr [esi+5*2]
00135 fmul qword ptr [eax+5*8*8]
00136 fadd
00137 fild word ptr [esi+6*2]
00138 fmul qword ptr [eax+6*8*8]
00139 fadd
00140 fild word ptr [esi+7*2]
00141 fmul qword ptr [eax+7*8*8]
00142 fadd
00143
00144 fild word ptr [esi+0*2]
00145 fmul qword ptr [eax+0*8*8+8]
00146 fild word ptr [esi+1*2]
00147 fmul qword ptr [eax+1*8*8+8]
00148 fadd
00149 fild word ptr [esi+2*2]
00150 fmul qword ptr [eax+2*8*8+8]
00151 fadd
00152 fild word ptr [esi+3*2]
00153 fmul qword ptr [eax+3*8*8+8]
00154 fadd
00155 fild word ptr [esi+4*2]
00156 fmul qword ptr [eax+4*8*8+8]
00157 fadd
00158 fild word ptr [esi+5*2]
00159 fmul qword ptr [eax+5*8*8+8]
00160 fadd
00161 fild word ptr [esi+6*2]
00162 fmul qword ptr [eax+6*8*8+8]
00163 fadd
00164 fild word ptr [esi+7*2]
00165 fmul qword ptr [eax+7*8*8+8]
00166 fadd
00167 add eax,8*2
00168 fxch st(1)
00169 fstp qword ptr [edi]
00170 fstp qword ptr [edi+8]
00171 add edi,8*2
00172 dec ecx
00173
00174 jnz __row1
00175 add eax,-8*8
00176
00177 __next1:
00178 add esi,+8*2
00179
00180 sub ebx,0x80000001
00181 js __col1
00182
00183 test ebx,ebx
00184 jnz __col1
00185
00186 lea esi,[tmp]
00187 lea eax,[c]
00188 lea edi,[rnd]
00189
00190 fld qword ptr [HALF]
00191 mov ebx,8
00192 __row2:
00193 mov ecx,8/2
00194 align 16
00195 __col2:
00196 fld qword ptr [esi+0*8*8]
00197 fmul qword ptr [eax+0*8*8]
00198 fld qword ptr [esi+1*8*8]
00199 fmul qword ptr [eax+1*8*8]
00200 fadd
00201 fld qword ptr [esi+2*8*8]
00202 fmul qword ptr [eax+2*8*8]
00203 fadd
00204 fld qword ptr [esi+3*8*8]
00205 fmul qword ptr [eax+3*8*8]
00206 fadd
00207 fld qword ptr [esi+4*8*8]
00208 fmul qword ptr [eax+4*8*8]
00209 fadd
00210 fld qword ptr [esi+5*8*8]
00211 fmul qword ptr [eax+5*8*8]
00212 fadd
00213 fld qword ptr [esi+6*8*8]
00214 fmul qword ptr [eax+6*8*8]
00215 fadd
00216 fld qword ptr [esi+7*8*8]
00217 fmul qword ptr [eax+7*8*8]
00218 fadd
00219 fadd st(0),st(1)
00220
00221 fxch st(1)
00222
00223 fld qword ptr [esi+0*8*8]
00224 fmul qword ptr [eax+0*8*8+8]
00225 fld qword ptr [esi+1*8*8]
00226 fmul qword ptr [eax+1*8*8+8]
00227 fadd
00228 fld qword ptr [esi+2*8*8]
00229 fmul qword ptr [eax+2*8*8+8]
00230 fadd
00231 fld qword ptr [esi+3*8*8]
00232 fmul qword ptr [eax+3*8*8+8]
00233 fadd
00234 fld qword ptr [esi+4*8*8]
00235 fmul qword ptr [eax+4*8*8+8]
00236 fadd
00237 fld qword ptr [esi+5*8*8]
00238 fmul qword ptr [eax+5*8*8+8]
00239 fadd
00240 fld qword ptr [esi+6*8*8]
00241 fmul qword ptr [eax+6*8*8+8]
00242 fadd
00243 fld qword ptr [esi+7*8*8]
00244 fmul qword ptr [eax+7*8*8+8]
00245 fadd
00246 fadd st(0),st(1)
00247 add eax,8*2
00248
00249 fxch st(2)
00250 fstp qword ptr [edi]
00251 fxch st(1)
00252 fstp qword ptr [edi+8*8]
00253 add edi,8*8*2
00254
00255 dec ecx
00256
00257 jnz __col2
00258 add eax,-8*8
00259 add esi,+8
00260 add edi,8-8*8*8
00261
00262 sub ebx,0x80000001
00263 js __row2
00264
00265 test ebx,ebx
00266 jnz __row2
00267 ffree st(0)
00268
00269
00270 fstcw [fpold]
00271 movzx eax, [fpold]
00272
00273 or eax, 0x0400
00274 mov [fpnew], ax
00275 fldcw [fpnew]
00276
00277
00278 lea esi, [rnd]
00279 mov edi, [block]
00280 mov ebx, -256
00281 mov edx, +255
00282 mov ecx, 8
00283 align 16
00284 __floor:
00285 fld qword ptr [esi+0*8]
00286 fistp dword ptr [int0]
00287 mov eax,[int0]
00288 cmp eax,ebx
00289 cmovl eax,ebx
00290 cmp eax,edx
00291 cmovg eax,edx
00292 fld qword ptr [esi+1*8]
00293 fistp dword ptr [int1]
00294 mov word ptr [edi+0*2],ax
00295 mov eax,[int1]
00296 cmp eax,ebx
00297 cmovl eax,ebx
00298 cmp eax,edx
00299 cmovg eax,edx
00300 fld qword ptr [esi+2*8]
00301 fistp dword ptr [int2]
00302 mov word ptr [edi+1*2],ax
00303 mov eax,[int2]
00304 cmp eax,ebx
00305 cmovl eax,ebx
00306 cmp eax,edx
00307 cmovg eax,edx
00308 fld qword ptr [esi+3*8]
00309 fistp dword ptr [int3]
00310 mov word ptr [edi+2*2],ax
00311 mov eax,[int3]
00312 cmp eax,ebx
00313 cmovl eax,ebx
00314 cmp eax,edx
00315 cmovg eax,edx
00316 fld qword ptr [esi+4*8]
00317 fistp dword ptr [int4]
00318 mov word ptr [edi+3*2],ax
00319 mov eax,[int4]
00320 cmp eax,ebx
00321 cmovl eax,ebx
00322 cmp eax,edx
00323 cmovg eax,edx
00324 fld qword ptr [esi+5*8]
00325 fistp dword ptr [int5]
00326 mov word ptr [edi+4*2],ax
00327 mov eax,[int5]
00328 cmp eax,ebx
00329 cmovl eax,ebx
00330 cmp eax,edx
00331 cmovg eax,edx
00332 fld qword ptr [esi+6*8]
00333 fistp dword ptr [int6]
00334 mov word ptr [edi+5*2],ax
00335 mov eax,[int6]
00336 cmp eax,ebx
00337 cmovl eax,ebx
00338 cmp eax,edx
00339 cmovg eax,edx
00340 fld qword ptr [esi+7*8]
00341 fistp dword ptr [int7]
00342 mov word ptr [edi+6*2],ax
00343 mov eax,[int7]
00344 cmp eax,ebx
00345 cmovl eax,ebx
00346 cmp eax,edx
00347 cmovg eax,edx
00348 mov word ptr [edi+7*2],ax
00349
00350 add esi, 8*8
00351 add edi, 8*2
00352
00353 sub ecx,0x80000001
00354 js __floor
00355
00356 test ecx,ecx
00357 jnz __floor
00358
00359
00360 fldcw [fpold]
00361 };
00362 }