guliverkli: C:/guliverkli/src/filters/transform/mpadecfilter/a52dec-0.7.4/liba52/imdct.c Source File

00001 /*
00002  * imdct.c
00003  * Copyright (C) 2000-2002 Michel Lespinasse <[email protected]>
00004  * Copyright (C) 1999-2000 Aaron Holtzman <[email protected]>
00005  *
00006  * The ifft algorithms in this file have been largely inspired by Dan
00007  * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html
00008  *
00009  * This file is part of a52dec, a free ATSC A-52 stream decoder.
00010  * See http://liba52.sourceforge.net/ for updates.
00011  *
00012  * a52dec is free software; you can redistribute it and/or modify
00013  * it under the terms of the GNU General Public License as published by
00014  * the Free Software Foundation; either version 2 of the License, or
00015  * (at your option) any later version.
00016  *
00017  * a52dec is distributed in the hope that it will be useful,
00018  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00019  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020  * GNU General Public License for more details.
00021  *
00022  * You should have received a copy of the GNU General Public License
00023  * along with this program; if not, write to the Free Software
00024  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00025  */
00026 
00027 #include "config.h"
00028 
00029 #include <math.h>
00030 #include <stdio.h>
00031 #ifdef LIBA52_DJBFFT
00032 #include <fftc4.h>
00033 #endif
00034 #ifndef M_PI
00035 #define M_PI 3.1415926535897932384626433832795029
00036 #endif
00037 #include <inttypes.h>
00038 
00039 #include "a52.h"
00040 #include "a52_internal.h"
00041 //#include "mm_accel.h"
00042 
00043 typedef struct complex_s {
00044     sample_t real;
00045     sample_t imag;
00046 } complex_t;
00047 
00048 static uint8_t fftorder[] = {
00049       0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
00050       8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
00051       4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180,
00052     252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172,
00053       2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178,
00054      10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90,
00055     254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174,
00056       6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
00057 };
00058 
00059 /* Root values for IFFT */
00060 static sample_t roots16[3];
00061 static sample_t roots32[7];
00062 static sample_t roots64[15];
00063 static sample_t roots128[31];
00064 
00065 /* Twiddle factors for IMDCT */
00066 static complex_t pre1[128];
00067 static complex_t post1[64];
00068 static complex_t pre2[64];
00069 static complex_t post2[32];
00070 
00071 static sample_t a52_imdct_window[256];
00072 
00073 static void (* ifft128) (complex_t * buf);
00074 static void (* ifft64) (complex_t * buf);
00075 
00076 static inline void ifft2 (complex_t * buf)
00077 {
00078     double r, i;
00079 
00080     r = buf[0].real;
00081     i = buf[0].imag;
00082     buf[0].real += buf[1].real;
00083     buf[0].imag += buf[1].imag;
00084     buf[1].real = r - buf[1].real;
00085     buf[1].imag = i - buf[1].imag;
00086 }
00087 
00088 static inline void ifft4 (complex_t * buf)
00089 {
00090     double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00091 
00092     tmp1 = buf[0].real + buf[1].real;
00093     tmp2 = buf[3].real + buf[2].real;
00094     tmp3 = buf[0].imag + buf[1].imag;
00095     tmp4 = buf[2].imag + buf[3].imag;
00096     tmp5 = buf[0].real - buf[1].real;
00097     tmp6 = buf[0].imag - buf[1].imag;
00098     tmp7 = buf[2].imag - buf[3].imag;
00099     tmp8 = buf[3].real - buf[2].real;
00100 
00101     buf[0].real = tmp1 + tmp2;
00102     buf[0].imag = tmp3 + tmp4;
00103     buf[2].real = tmp1 - tmp2;
00104     buf[2].imag = tmp3 - tmp4;
00105     buf[1].real = tmp5 + tmp7;
00106     buf[1].imag = tmp6 + tmp8;
00107     buf[3].real = tmp5 - tmp7;
00108     buf[3].imag = tmp6 - tmp8;
00109 }
00110 
00111 /* the basic split-radix ifft butterfly */
00112 
00113 #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do {       \
00114     tmp5 = a2.real * wr + a2.imag * wi;         \
00115     tmp6 = a2.imag * wr - a2.real * wi;         \
00116     tmp7 = a3.real * wr - a3.imag * wi;         \
00117     tmp8 = a3.imag * wr + a3.real * wi;         \
00118     tmp1 = tmp5 + tmp7;                         \
00119     tmp2 = tmp6 + tmp8;                         \
00120     tmp3 = tmp6 - tmp8;                         \
00121     tmp4 = tmp7 - tmp5;                         \
00122     a2.real = a0.real - tmp1;                   \
00123     a2.imag = a0.imag - tmp2;                   \
00124     a3.real = a1.real - tmp3;                   \
00125     a3.imag = a1.imag - tmp4;                   \
00126     a0.real += tmp1;                            \
00127     a0.imag += tmp2;                            \
00128     a1.real += tmp3;                            \
00129     a1.imag += tmp4;                            \
00130 } while (0)
00131 
00132 /* split-radix ifft butterfly, specialized for wr=1 wi=0 */
00133 
00134 #define BUTTERFLY_ZERO(a0,a1,a2,a3) do {        \
00135     tmp1 = a2.real + a3.real;                   \
00136     tmp2 = a2.imag + a3.imag;                   \
00137     tmp3 = a2.imag - a3.imag;                   \
00138     tmp4 = a3.real - a2.real;                   \
00139     a2.real = a0.real - tmp1;                   \
00140     a2.imag = a0.imag - tmp2;                   \
00141     a3.real = a1.real - tmp3;                   \
00142     a3.imag = a1.imag - tmp4;                   \
00143     a0.real += tmp1;                            \
00144     a0.imag += tmp2;                            \
00145     a1.real += tmp3;                            \
00146     a1.imag += tmp4;                            \
00147 } while (0)
00148 
00149 /* split-radix ifft butterfly, specialized for wr=wi */
00150 
00151 #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do {      \
00152     tmp5 = (a2.real + a2.imag) * w;             \
00153     tmp6 = (a2.imag - a2.real) * w;             \
00154     tmp7 = (a3.real - a3.imag) * w;             \
00155     tmp8 = (a3.imag + a3.real) * w;             \
00156     tmp1 = tmp5 + tmp7;                         \
00157     tmp2 = tmp6 + tmp8;                         \
00158     tmp3 = tmp6 - tmp8;                         \
00159     tmp4 = tmp7 - tmp5;                         \
00160     a2.real = a0.real - tmp1;                   \
00161     a2.imag = a0.imag - tmp2;                   \
00162     a3.real = a1.real - tmp3;                   \
00163     a3.imag = a1.imag - tmp4;                   \
00164     a0.real += tmp1;                            \
00165     a0.imag += tmp2;                            \
00166     a1.real += tmp3;                            \
00167     a1.imag += tmp4;                            \
00168 } while (0)
00169 
00170 static inline void ifft8 (complex_t * buf)
00171 {
00172     double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00173 
00174     ifft4 (buf);
00175     ifft2 (buf + 4);
00176     ifft2 (buf + 6);
00177     BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]);
00178     BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]);
00179 }
00180 
00181 static void ifft_pass (complex_t * buf, sample_t * weight, int n)
00182 {
00183     complex_t * buf1;
00184     complex_t * buf2;
00185     complex_t * buf3;
00186     double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00187     int i;
00188 
00189     buf++;
00190     buf1 = buf + n;
00191     buf2 = buf + 2 * n;
00192     buf3 = buf + 3 * n;
00193 
00194     BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]);
00195 
00196     i = n - 1;
00197 
00198     do {
00199         BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], weight[n], weight[2*i]);
00200         buf++;
00201         buf1++;
00202         buf2++;
00203         buf3++;
00204         weight++;
00205     } while (--i);
00206 }
00207 
00208 static void ifft16 (complex_t * buf)
00209 {
00210     ifft8 (buf);
00211     ifft4 (buf + 8);
00212     ifft4 (buf + 12);
00213     ifft_pass (buf, roots16 - 4, 4);
00214 }
00215 
00216 static void ifft32 (complex_t * buf)
00217 {
00218     ifft16 (buf);
00219     ifft8 (buf + 16);
00220     ifft8 (buf + 24);
00221     ifft_pass (buf, roots32 - 8, 8);
00222 }
00223 
00224 static void ifft64_c (complex_t * buf)
00225 {
00226     ifft32 (buf);
00227     ifft16 (buf + 32);
00228     ifft16 (buf + 48);
00229     ifft_pass (buf, roots64 - 16, 16);
00230 }
00231 
00232 static void ifft128_c (complex_t * buf)
00233 {
00234     ifft32 (buf);
00235     ifft16 (buf + 32);
00236     ifft16 (buf + 48);
00237     ifft_pass (buf, roots64 - 16, 16);
00238 
00239     ifft32 (buf + 64);
00240     ifft32 (buf + 96);
00241     ifft_pass (buf, roots128 - 32, 32);
00242 }
00243 
00244 void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
00245 {
00246     int i, k;
00247     sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
00248     const sample_t * window = a52_imdct_window;
00249     complex_t buf[128];
00250         
00251     for (i = 0; i < 128; i++) {
00252         k = fftorder[i];
00253         t_r = pre1[i].real;
00254         t_i = pre1[i].imag;
00255 
00256         buf[i].real = t_i * data[255-k] + t_r * data[k];
00257         buf[i].imag = t_r * data[255-k] - t_i * data[k];
00258     }
00259 
00260     ifft128 (buf);
00261 
00262     /* Post IFFT complex multiply plus IFFT complex conjugate*/
00263     /* Window and convert to real valued signal */
00264     for (i = 0; i < 64; i++) {
00265         /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
00266         t_r = post1[i].real;
00267         t_i = post1[i].imag;
00268 
00269         a_r = t_r * buf[i].real     + t_i * buf[i].imag;
00270         a_i = t_i * buf[i].real     - t_r * buf[i].imag;
00271         b_r = t_i * buf[127-i].real + t_r * buf[127-i].imag;
00272         b_i = t_r * buf[127-i].real - t_i * buf[127-i].imag;
00273 
00274         w_1 = window[2*i];
00275         w_2 = window[255-2*i];
00276         data[2*i]     = delay[2*i] * w_2 - a_r * w_1 + bias;
00277         data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias;
00278         delay[2*i] = a_i;
00279 
00280         w_1 = window[2*i+1];
00281         w_2 = window[254-2*i];
00282         data[2*i+1]   = delay[2*i+1] * w_2 + b_r * w_1 + bias;
00283         data[254-2*i] = delay[2*i+1] * w_1 - b_r * w_2 + bias;
00284         delay[2*i+1] = b_i;
00285     }
00286 }
00287 
00288 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
00289 {
00290     int i, k;
00291     sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2;
00292     const sample_t * window = a52_imdct_window;
00293     complex_t buf1[64], buf2[64];
00294 
00295     /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
00296     for (i = 0; i < 64; i++) {
00297         k = fftorder[i];
00298         t_r = pre2[i].real;
00299         t_i = pre2[i].imag;
00300 
00301         buf1[i].real = t_i * data[254-k] + t_r * data[k];
00302         buf1[i].imag = t_r * data[254-k] - t_i * data[k];
00303 
00304         buf2[i].real = t_i * data[255-k] + t_r * data[k+1];
00305         buf2[i].imag = t_r * data[255-k] - t_i * data[k+1];
00306     }
00307 
00308     ifft64 (buf1);
00309     ifft64 (buf2);
00310 
00311     /* Post IFFT complex multiply */
00312     /* Window and convert to real valued signal */
00313     for (i = 0; i < 32; i++) {
00314         /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ 
00315         t_r = post2[i].real;
00316         t_i = post2[i].imag;
00317 
00318         a_r = t_r * buf1[i].real    + t_i * buf1[i].imag;
00319         a_i = t_i * buf1[i].real    - t_r * buf1[i].imag;
00320         b_r = t_i * buf1[63-i].real + t_r * buf1[63-i].imag;
00321         b_i = t_r * buf1[63-i].real - t_i * buf1[63-i].imag;
00322 
00323         c_r = t_r * buf2[i].real    + t_i * buf2[i].imag;
00324         c_i = t_i * buf2[i].real    - t_r * buf2[i].imag;
00325         d_r = t_i * buf2[63-i].real + t_r * buf2[63-i].imag;
00326         d_i = t_r * buf2[63-i].real - t_i * buf2[63-i].imag;
00327 
00328         w_1 = window[2*i];
00329         w_2 = window[255-2*i];
00330         data[2*i]     = delay[2*i] * w_2 - a_r * w_1 + bias;
00331         data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias;
00332         delay[2*i] = c_i;
00333 
00334         w_1 = window[128+2*i];
00335         w_2 = window[127-2*i];
00336         data[128+2*i] = delay[127-2*i] * w_2 + a_i * w_1 + bias;
00337         data[127-2*i] = delay[127-2*i] * w_1 - a_i * w_2 + bias;
00338         delay[127-2*i] = c_r;
00339 
00340         w_1 = window[2*i+1];
00341         w_2 = window[254-2*i];
00342         data[2*i+1]   = delay[2*i+1] * w_2 - b_i * w_1 + bias;
00343         data[254-2*i] = delay[2*i+1] * w_1 + b_i * w_2 + bias;
00344         delay[2*i+1] = d_r;
00345 
00346         w_1 = window[129+2*i];
00347         w_2 = window[126-2*i];
00348         data[129+2*i] = delay[126-2*i] * w_2 + b_r * w_1 + bias;
00349         data[126-2*i] = delay[126-2*i] * w_1 - b_r * w_2 + bias;
00350         delay[126-2*i] = d_i;
00351     }
00352 }
00353 
00354 static double besselI0 (double x)
00355 {
00356     double bessel = 1;
00357     int i = 100;
00358 
00359     do
00360         bessel = bessel * x / (i * i) + 1;
00361     while (--i);
00362     return bessel;
00363 }
00364 
00365 void a52_imdct_init (uint32_t mm_accel)
00366 {
00367     int i, k;
00368     double sum;
00369 
00370     /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
00371     sum = 0;
00372     for (i = 0; i < 256; i++) {
00373         sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256));
00374         a52_imdct_window[i] = sum;
00375     }
00376     sum++;
00377     for (i = 0; i < 256; i++)
00378         a52_imdct_window[i] = sqrt (a52_imdct_window[i] / sum);
00379 
00380     for (i = 0; i < 3; i++)
00381         roots16[i] = cos ((M_PI / 8) * (i + 1));
00382 
00383     for (i = 0; i < 7; i++)
00384         roots32[i] = cos ((M_PI / 16) * (i + 1));
00385 
00386     for (i = 0; i < 15; i++)
00387         roots64[i] = cos ((M_PI / 32) * (i + 1));
00388 
00389     for (i = 0; i < 31; i++)
00390         roots128[i] = cos ((M_PI / 64) * (i + 1));
00391 
00392     for (i = 0; i < 64; i++) {
00393         k = fftorder[i] / 2 + 64;
00394         pre1[i].real = cos ((M_PI / 256) * (k - 0.25));
00395         pre1[i].imag = sin ((M_PI / 256) * (k - 0.25));
00396     }
00397 
00398     for (i = 64; i < 128; i++) {
00399         k = fftorder[i] / 2 + 64;
00400         pre1[i].real = -cos ((M_PI / 256) * (k - 0.25));
00401         pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25));
00402     }
00403 
00404     for (i = 0; i < 64; i++) {
00405         post1[i].real = cos ((M_PI / 256) * (i + 0.5));
00406         post1[i].imag = sin ((M_PI / 256) * (i + 0.5));
00407     }
00408 
00409     for (i = 0; i < 64; i++) {
00410         k = fftorder[i] / 4;
00411         pre2[i].real = cos ((M_PI / 128) * (k - 0.25));
00412         pre2[i].imag = sin ((M_PI / 128) * (k - 0.25));
00413     }
00414 
00415     for (i = 0; i < 32; i++) {
00416         post2[i].real = cos ((M_PI / 128) * (i + 0.5));
00417         post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
00418     }
00419 
00420 #ifdef LIBA52_DJBFFT
00421     if (mm_accel & MM_ACCEL_DJBFFT) {
00422         fprintf (stderr, "Using djbfft for IMDCT transform\n");
00423         ifft128 = (void (*) (complex_t *)) fftc4_un128;
00424         ifft64 = (void (*) (complex_t *)) fftc4_un64;
00425     } else
00426 #endif
00427     {
00428         fprintf (stderr, "No accelerated IMDCT transform found\n");
00429         ifft128 = ifft128_c;
00430         ifft64 = ifft64_c;
00431     }
00432 }