00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "config.h"
00028
00029 #include <math.h>
00030 #include <stdio.h>
00031 #ifdef LIBA52_DJBFFT
00032 #include <fftc4.h>
00033 #endif
00034 #ifndef M_PI
00035 #define M_PI 3.1415926535897932384626433832795029
00036 #endif
00037 #include <inttypes.h>
00038
00039 #include "a52.h"
00040 #include "a52_internal.h"
00041
00042
00043 typedef struct complex_s {
00044 sample_t real;
00045 sample_t imag;
00046 } complex_t;
00047
00048 static uint8_t fftorder[] = {
00049 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
00050 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
00051 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180,
00052 252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172,
00053 2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178,
00054 10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90,
00055 254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174,
00056 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
00057 };
00058
00059
00060 static sample_t roots16[3];
00061 static sample_t roots32[7];
00062 static sample_t roots64[15];
00063 static sample_t roots128[31];
00064
00065
00066 static complex_t pre1[128];
00067 static complex_t post1[64];
00068 static complex_t pre2[64];
00069 static complex_t post2[32];
00070
00071 static sample_t a52_imdct_window[256];
00072
00073 static void (* ifft128) (complex_t * buf);
00074 static void (* ifft64) (complex_t * buf);
00075
00076 static inline void ifft2 (complex_t * buf)
00077 {
00078 double r, i;
00079
00080 r = buf[0].real;
00081 i = buf[0].imag;
00082 buf[0].real += buf[1].real;
00083 buf[0].imag += buf[1].imag;
00084 buf[1].real = r - buf[1].real;
00085 buf[1].imag = i - buf[1].imag;
00086 }
00087
00088 static inline void ifft4 (complex_t * buf)
00089 {
00090 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00091
00092 tmp1 = buf[0].real + buf[1].real;
00093 tmp2 = buf[3].real + buf[2].real;
00094 tmp3 = buf[0].imag + buf[1].imag;
00095 tmp4 = buf[2].imag + buf[3].imag;
00096 tmp5 = buf[0].real - buf[1].real;
00097 tmp6 = buf[0].imag - buf[1].imag;
00098 tmp7 = buf[2].imag - buf[3].imag;
00099 tmp8 = buf[3].real - buf[2].real;
00100
00101 buf[0].real = tmp1 + tmp2;
00102 buf[0].imag = tmp3 + tmp4;
00103 buf[2].real = tmp1 - tmp2;
00104 buf[2].imag = tmp3 - tmp4;
00105 buf[1].real = tmp5 + tmp7;
00106 buf[1].imag = tmp6 + tmp8;
00107 buf[3].real = tmp5 - tmp7;
00108 buf[3].imag = tmp6 - tmp8;
00109 }
00110
00111
00112
00113 #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \
00114 tmp5 = a2.real * wr + a2.imag * wi; \
00115 tmp6 = a2.imag * wr - a2.real * wi; \
00116 tmp7 = a3.real * wr - a3.imag * wi; \
00117 tmp8 = a3.imag * wr + a3.real * wi; \
00118 tmp1 = tmp5 + tmp7; \
00119 tmp2 = tmp6 + tmp8; \
00120 tmp3 = tmp6 - tmp8; \
00121 tmp4 = tmp7 - tmp5; \
00122 a2.real = a0.real - tmp1; \
00123 a2.imag = a0.imag - tmp2; \
00124 a3.real = a1.real - tmp3; \
00125 a3.imag = a1.imag - tmp4; \
00126 a0.real += tmp1; \
00127 a0.imag += tmp2; \
00128 a1.real += tmp3; \
00129 a1.imag += tmp4; \
00130 } while (0)
00131
00132
00133
00134 #define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \
00135 tmp1 = a2.real + a3.real; \
00136 tmp2 = a2.imag + a3.imag; \
00137 tmp3 = a2.imag - a3.imag; \
00138 tmp4 = a3.real - a2.real; \
00139 a2.real = a0.real - tmp1; \
00140 a2.imag = a0.imag - tmp2; \
00141 a3.real = a1.real - tmp3; \
00142 a3.imag = a1.imag - tmp4; \
00143 a0.real += tmp1; \
00144 a0.imag += tmp2; \
00145 a1.real += tmp3; \
00146 a1.imag += tmp4; \
00147 } while (0)
00148
00149
00150
00151 #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \
00152 tmp5 = (a2.real + a2.imag) * w; \
00153 tmp6 = (a2.imag - a2.real) * w; \
00154 tmp7 = (a3.real - a3.imag) * w; \
00155 tmp8 = (a3.imag + a3.real) * w; \
00156 tmp1 = tmp5 + tmp7; \
00157 tmp2 = tmp6 + tmp8; \
00158 tmp3 = tmp6 - tmp8; \
00159 tmp4 = tmp7 - tmp5; \
00160 a2.real = a0.real - tmp1; \
00161 a2.imag = a0.imag - tmp2; \
00162 a3.real = a1.real - tmp3; \
00163 a3.imag = a1.imag - tmp4; \
00164 a0.real += tmp1; \
00165 a0.imag += tmp2; \
00166 a1.real += tmp3; \
00167 a1.imag += tmp4; \
00168 } while (0)
00169
00170 static inline void ifft8 (complex_t * buf)
00171 {
00172 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00173
00174 ifft4 (buf);
00175 ifft2 (buf + 4);
00176 ifft2 (buf + 6);
00177 BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]);
00178 BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]);
00179 }
00180
00181 static void ifft_pass (complex_t * buf, sample_t * weight, int n)
00182 {
00183 complex_t * buf1;
00184 complex_t * buf2;
00185 complex_t * buf3;
00186 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00187 int i;
00188
00189 buf++;
00190 buf1 = buf + n;
00191 buf2 = buf + 2 * n;
00192 buf3 = buf + 3 * n;
00193
00194 BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]);
00195
00196 i = n - 1;
00197
00198 do {
00199 BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], weight[n], weight[2*i]);
00200 buf++;
00201 buf1++;
00202 buf2++;
00203 buf3++;
00204 weight++;
00205 } while (--i);
00206 }
00207
00208 static void ifft16 (complex_t * buf)
00209 {
00210 ifft8 (buf);
00211 ifft4 (buf + 8);
00212 ifft4 (buf + 12);
00213 ifft_pass (buf, roots16 - 4, 4);
00214 }
00215
00216 static void ifft32 (complex_t * buf)
00217 {
00218 ifft16 (buf);
00219 ifft8 (buf + 16);
00220 ifft8 (buf + 24);
00221 ifft_pass (buf, roots32 - 8, 8);
00222 }
00223
00224 static void ifft64_c (complex_t * buf)
00225 {
00226 ifft32 (buf);
00227 ifft16 (buf + 32);
00228 ifft16 (buf + 48);
00229 ifft_pass (buf, roots64 - 16, 16);
00230 }
00231
00232 static void ifft128_c (complex_t * buf)
00233 {
00234 ifft32 (buf);
00235 ifft16 (buf + 32);
00236 ifft16 (buf + 48);
00237 ifft_pass (buf, roots64 - 16, 16);
00238
00239 ifft32 (buf + 64);
00240 ifft32 (buf + 96);
00241 ifft_pass (buf, roots128 - 32, 32);
00242 }
00243
00244 void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
00245 {
00246 int i, k;
00247 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
00248 const sample_t * window = a52_imdct_window;
00249 complex_t buf[128];
00250
00251 for (i = 0; i < 128; i++) {
00252 k = fftorder[i];
00253 t_r = pre1[i].real;
00254 t_i = pre1[i].imag;
00255
00256 buf[i].real = t_i * data[255-k] + t_r * data[k];
00257 buf[i].imag = t_r * data[255-k] - t_i * data[k];
00258 }
00259
00260 ifft128 (buf);
00261
00262
00263
00264 for (i = 0; i < 64; i++) {
00265
00266 t_r = post1[i].real;
00267 t_i = post1[i].imag;
00268
00269 a_r = t_r * buf[i].real + t_i * buf[i].imag;
00270 a_i = t_i * buf[i].real - t_r * buf[i].imag;
00271 b_r = t_i * buf[127-i].real + t_r * buf[127-i].imag;
00272 b_i = t_r * buf[127-i].real - t_i * buf[127-i].imag;
00273
00274 w_1 = window[2*i];
00275 w_2 = window[255-2*i];
00276 data[2*i] = delay[2*i] * w_2 - a_r * w_1 + bias;
00277 data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias;
00278 delay[2*i] = a_i;
00279
00280 w_1 = window[2*i+1];
00281 w_2 = window[254-2*i];
00282 data[2*i+1] = delay[2*i+1] * w_2 + b_r * w_1 + bias;
00283 data[254-2*i] = delay[2*i+1] * w_1 - b_r * w_2 + bias;
00284 delay[2*i+1] = b_i;
00285 }
00286 }
00287
00288 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
00289 {
00290 int i, k;
00291 sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2;
00292 const sample_t * window = a52_imdct_window;
00293 complex_t buf1[64], buf2[64];
00294
00295
00296 for (i = 0; i < 64; i++) {
00297 k = fftorder[i];
00298 t_r = pre2[i].real;
00299 t_i = pre2[i].imag;
00300
00301 buf1[i].real = t_i * data[254-k] + t_r * data[k];
00302 buf1[i].imag = t_r * data[254-k] - t_i * data[k];
00303
00304 buf2[i].real = t_i * data[255-k] + t_r * data[k+1];
00305 buf2[i].imag = t_r * data[255-k] - t_i * data[k+1];
00306 }
00307
00308 ifft64 (buf1);
00309 ifft64 (buf2);
00310
00311
00312
00313 for (i = 0; i < 32; i++) {
00314
00315 t_r = post2[i].real;
00316 t_i = post2[i].imag;
00317
00318 a_r = t_r * buf1[i].real + t_i * buf1[i].imag;
00319 a_i = t_i * buf1[i].real - t_r * buf1[i].imag;
00320 b_r = t_i * buf1[63-i].real + t_r * buf1[63-i].imag;
00321 b_i = t_r * buf1[63-i].real - t_i * buf1[63-i].imag;
00322
00323 c_r = t_r * buf2[i].real + t_i * buf2[i].imag;
00324 c_i = t_i * buf2[i].real - t_r * buf2[i].imag;
00325 d_r = t_i * buf2[63-i].real + t_r * buf2[63-i].imag;
00326 d_i = t_r * buf2[63-i].real - t_i * buf2[63-i].imag;
00327
00328 w_1 = window[2*i];
00329 w_2 = window[255-2*i];
00330 data[2*i] = delay[2*i] * w_2 - a_r * w_1 + bias;
00331 data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias;
00332 delay[2*i] = c_i;
00333
00334 w_1 = window[128+2*i];
00335 w_2 = window[127-2*i];
00336 data[128+2*i] = delay[127-2*i] * w_2 + a_i * w_1 + bias;
00337 data[127-2*i] = delay[127-2*i] * w_1 - a_i * w_2 + bias;
00338 delay[127-2*i] = c_r;
00339
00340 w_1 = window[2*i+1];
00341 w_2 = window[254-2*i];
00342 data[2*i+1] = delay[2*i+1] * w_2 - b_i * w_1 + bias;
00343 data[254-2*i] = delay[2*i+1] * w_1 + b_i * w_2 + bias;
00344 delay[2*i+1] = d_r;
00345
00346 w_1 = window[129+2*i];
00347 w_2 = window[126-2*i];
00348 data[129+2*i] = delay[126-2*i] * w_2 + b_r * w_1 + bias;
00349 data[126-2*i] = delay[126-2*i] * w_1 - b_r * w_2 + bias;
00350 delay[126-2*i] = d_i;
00351 }
00352 }
00353
00354 static double besselI0 (double x)
00355 {
00356 double bessel = 1;
00357 int i = 100;
00358
00359 do
00360 bessel = bessel * x / (i * i) + 1;
00361 while (--i);
00362 return bessel;
00363 }
00364
00365 void a52_imdct_init (uint32_t mm_accel)
00366 {
00367 int i, k;
00368 double sum;
00369
00370
00371 sum = 0;
00372 for (i = 0; i < 256; i++) {
00373 sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256));
00374 a52_imdct_window[i] = sum;
00375 }
00376 sum++;
00377 for (i = 0; i < 256; i++)
00378 a52_imdct_window[i] = sqrt (a52_imdct_window[i] / sum);
00379
00380 for (i = 0; i < 3; i++)
00381 roots16[i] = cos ((M_PI / 8) * (i + 1));
00382
00383 for (i = 0; i < 7; i++)
00384 roots32[i] = cos ((M_PI / 16) * (i + 1));
00385
00386 for (i = 0; i < 15; i++)
00387 roots64[i] = cos ((M_PI / 32) * (i + 1));
00388
00389 for (i = 0; i < 31; i++)
00390 roots128[i] = cos ((M_PI / 64) * (i + 1));
00391
00392 for (i = 0; i < 64; i++) {
00393 k = fftorder[i] / 2 + 64;
00394 pre1[i].real = cos ((M_PI / 256) * (k - 0.25));
00395 pre1[i].imag = sin ((M_PI / 256) * (k - 0.25));
00396 }
00397
00398 for (i = 64; i < 128; i++) {
00399 k = fftorder[i] / 2 + 64;
00400 pre1[i].real = -cos ((M_PI / 256) * (k - 0.25));
00401 pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25));
00402 }
00403
00404 for (i = 0; i < 64; i++) {
00405 post1[i].real = cos ((M_PI / 256) * (i + 0.5));
00406 post1[i].imag = sin ((M_PI / 256) * (i + 0.5));
00407 }
00408
00409 for (i = 0; i < 64; i++) {
00410 k = fftorder[i] / 4;
00411 pre2[i].real = cos ((M_PI / 128) * (k - 0.25));
00412 pre2[i].imag = sin ((M_PI / 128) * (k - 0.25));
00413 }
00414
00415 for (i = 0; i < 32; i++) {
00416 post2[i].real = cos ((M_PI / 128) * (i + 0.5));
00417 post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
00418 }
00419
00420 #ifdef LIBA52_DJBFFT
00421 if (mm_accel & MM_ACCEL_DJBFFT) {
00422 fprintf (stderr, "Using djbfft for IMDCT transform\n");
00423 ifft128 = (void (*) (complex_t *)) fftc4_un128;
00424 ifft64 = (void (*) (complex_t *)) fftc4_un64;
00425 } else
00426 #endif
00427 {
00428 fprintf (stderr, "No accelerated IMDCT transform found\n");
00429 ifft128 = ifft128_c;
00430 ifft64 = ifft64_c;
00431 }
00432 }