LCOV - code coverage report
Current view: top level - media/libopus/celt - mdct.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 122 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 2 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Copyright (c) 2007-2008 CSIRO
       2             :    Copyright (c) 2007-2008 Xiph.Org Foundation
       3             :    Written by Jean-Marc Valin */
       4             : /*
       5             :    Redistribution and use in source and binary forms, with or without
       6             :    modification, are permitted provided that the following conditions
       7             :    are met:
       8             : 
       9             :    - Redistributions of source code must retain the above copyright
      10             :    notice, this list of conditions and the following disclaimer.
      11             : 
      12             :    - Redistributions in binary form must reproduce the above copyright
      13             :    notice, this list of conditions and the following disclaimer in the
      14             :    documentation and/or other materials provided with the distribution.
      15             : 
      16             :    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      17             :    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      18             :    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      19             :    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
      20             :    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      21             :    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      22             :    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
      23             :    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
      24             :    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
      25             :    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
      26             :    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      27             : */
      28             : 
      29             : /* This is a simple MDCT implementation that uses a N/4 complex FFT
      30             :    to do most of the work. It should be relatively straightforward to
      31             :    plug in pretty much and FFT here.
      32             : 
      33             :    This replaces the Vorbis FFT (and uses the exact same API), which
      34             :    was a bit too messy and that was ending up duplicating code
      35             :    (might as well use the same FFT everywhere).
      36             : 
      37             :    The algorithm is similar to (and inspired from) Fabrice Bellard's
      38             :    MDCT implementation in FFMPEG, but has differences in signs, ordering
      39             :    and scaling in many places.
      40             : */
      41             : 
      42             : #ifndef SKIP_CONFIG_H
      43             : #ifdef HAVE_CONFIG_H
      44             : #include "config.h"
      45             : #endif
      46             : #endif
      47             : 
      48             : #include "mdct.h"
      49             : #include "kiss_fft.h"
      50             : #include "_kiss_fft_guts.h"
      51             : #include <math.h>
      52             : #include "os_support.h"
      53             : #include "mathops.h"
      54             : #include "stack_alloc.h"
      55             : 
      56             : #if defined(MIPSr1_ASM)
      57             : #include "mips/mdct_mipsr1.h"
      58             : #endif
      59             : 
      60             : 
      61             : #ifdef CUSTOM_MODES
      62             : 
      63             : int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
      64             : {
      65             :    int i;
      66             :    kiss_twiddle_scalar *trig;
      67             :    int shift;
      68             :    int N2=N>>1;
      69             :    l->n = N;
      70             :    l->maxshift = maxshift;
      71             :    for (i=0;i<=maxshift;i++)
      72             :    {
      73             :       if (i==0)
      74             :          l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch);
      75             :       else
      76             :          l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch);
      77             : #ifndef ENABLE_TI_DSPLIB55
      78             :       if (l->kfft[i]==NULL)
      79             :          return 0;
      80             : #endif
      81             :    }
      82             :    l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar));
      83             :    if (l->trig==NULL)
      84             :      return 0;
      85             :    for (shift=0;shift<=maxshift;shift++)
      86             :    {
      87             :       /* We have enough points that sine isn't necessary */
      88             : #if defined(FIXED_POINT)
      89             : #if 1
      90             :       for (i=0;i<N2;i++)
      91             :          trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N));
      92             : #else
      93             :       for (i=0;i<N2;i++)
      94             :          trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N))));
      95             : #endif
      96             : #else
      97             :       for (i=0;i<N2;i++)
      98             :          trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N);
      99             : #endif
     100             :       trig += N2;
     101             :       N2 >>= 1;
     102             :       N >>= 1;
     103             :    }
     104             :    return 1;
     105             : }
     106             : 
     107             : void clt_mdct_clear(mdct_lookup *l, int arch)
     108             : {
     109             :    int i;
     110             :    for (i=0;i<=l->maxshift;i++)
     111             :       opus_fft_free(l->kfft[i], arch);
     112             :    opus_free((kiss_twiddle_scalar*)l->trig);
     113             : }
     114             : 
     115             : #endif /* CUSTOM_MODES */
     116             : 
     117             : /* Forward MDCT trashes the input array */
     118             : #ifndef OVERRIDE_clt_mdct_forward
     119           0 : void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
     120             :       const opus_val16 *window, int overlap, int shift, int stride, int arch)
     121             : {
     122             :    int i;
     123             :    int N, N2, N4;
     124             :    VARDECL(kiss_fft_scalar, f);
     125             :    VARDECL(kiss_fft_cpx, f2);
     126           0 :    const kiss_fft_state *st = l->kfft[shift];
     127             :    const kiss_twiddle_scalar *trig;
     128             :    opus_val16 scale;
     129             : #ifdef FIXED_POINT
     130             :    /* Allows us to scale with MULT16_32_Q16(), which is faster than
     131             :       MULT16_32_Q15() on ARM. */
     132             :    int scale_shift = st->scale_shift-1;
     133             : #endif
     134             :    SAVE_STACK;
     135             :    (void)arch;
     136           0 :    scale = st->scale;
     137             : 
     138           0 :    N = l->n;
     139           0 :    trig = l->trig;
     140           0 :    for (i=0;i<shift;i++)
     141             :    {
     142           0 :       N >>= 1;
     143           0 :       trig += N;
     144             :    }
     145           0 :    N2 = N>>1;
     146           0 :    N4 = N>>2;
     147             : 
     148           0 :    ALLOC(f, N2, kiss_fft_scalar);
     149           0 :    ALLOC(f2, N4, kiss_fft_cpx);
     150             : 
     151             :    /* Consider the input to be composed of four blocks: [a, b, c, d] */
     152             :    /* Window, shuffle, fold */
     153             :    {
     154             :       /* Temp pointers to make it really clear to the compiler what we're doing */
     155           0 :       const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
     156           0 :       const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
     157           0 :       kiss_fft_scalar * OPUS_RESTRICT yp = f;
     158           0 :       const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
     159           0 :       const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
     160           0 :       for(i=0;i<((overlap+3)>>2);i++)
     161             :       {
     162             :          /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
     163           0 :          *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
     164           0 :          *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
     165           0 :          xp1+=2;
     166           0 :          xp2-=2;
     167           0 :          wp1+=2;
     168           0 :          wp2-=2;
     169             :       }
     170           0 :       wp1 = window;
     171           0 :       wp2 = window+overlap-1;
     172           0 :       for(;i<N4-((overlap+3)>>2);i++)
     173             :       {
     174             :          /* Real part arranged as a-bR, Imag part arranged as -c-dR */
     175           0 :          *yp++ = *xp2;
     176           0 :          *yp++ = *xp1;
     177           0 :          xp1+=2;
     178           0 :          xp2-=2;
     179             :       }
     180           0 :       for(;i<N4;i++)
     181             :       {
     182             :          /* Real part arranged as a-bR, Imag part arranged as -c-dR */
     183           0 :          *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
     184           0 :          *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
     185           0 :          xp1+=2;
     186           0 :          xp2-=2;
     187           0 :          wp1+=2;
     188           0 :          wp2-=2;
     189             :       }
     190             :    }
     191             :    /* Pre-rotation */
     192             :    {
     193           0 :       kiss_fft_scalar * OPUS_RESTRICT yp = f;
     194           0 :       const kiss_twiddle_scalar *t = &trig[0];
     195           0 :       for(i=0;i<N4;i++)
     196             :       {
     197             :          kiss_fft_cpx yc;
     198             :          kiss_twiddle_scalar t0, t1;
     199             :          kiss_fft_scalar re, im, yr, yi;
     200           0 :          t0 = t[i];
     201           0 :          t1 = t[N4+i];
     202           0 :          re = *yp++;
     203           0 :          im = *yp++;
     204           0 :          yr = S_MUL(re,t0)  -  S_MUL(im,t1);
     205           0 :          yi = S_MUL(im,t0)  +  S_MUL(re,t1);
     206           0 :          yc.r = yr;
     207           0 :          yc.i = yi;
     208           0 :          yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
     209           0 :          yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
     210           0 :          f2[st->bitrev[i]] = yc;
     211             :       }
     212             :    }
     213             : 
     214             :    /* N/4 complex FFT, does not downscale anymore */
     215           0 :    opus_fft_impl(st, f2);
     216             : 
     217             :    /* Post-rotate */
     218             :    {
     219             :       /* Temp pointers to make it really clear to the compiler what we're doing */
     220           0 :       const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
     221           0 :       kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
     222           0 :       kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
     223           0 :       const kiss_twiddle_scalar *t = &trig[0];
     224             :       /* Temp pointers to make it really clear to the compiler what we're doing */
     225           0 :       for(i=0;i<N4;i++)
     226             :       {
     227             :          kiss_fft_scalar yr, yi;
     228           0 :          yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
     229           0 :          yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
     230           0 :          *yp1 = yr;
     231           0 :          *yp2 = yi;
     232           0 :          fp++;
     233           0 :          yp1 += 2*stride;
     234           0 :          yp2 -= 2*stride;
     235             :       }
     236             :    }
     237             :    RESTORE_STACK;
     238           0 : }
     239             : #endif /* OVERRIDE_clt_mdct_forward */
     240             : 
     241             : #ifndef OVERRIDE_clt_mdct_backward
     242           0 : void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
     243             :       const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
     244             : {
     245             :    int i;
     246             :    int N, N2, N4;
     247             :    const kiss_twiddle_scalar *trig;
     248             :    (void) arch;
     249             : 
     250           0 :    N = l->n;
     251           0 :    trig = l->trig;
     252           0 :    for (i=0;i<shift;i++)
     253             :    {
     254           0 :       N >>= 1;
     255           0 :       trig += N;
     256             :    }
     257           0 :    N2 = N>>1;
     258           0 :    N4 = N>>2;
     259             : 
     260             :    /* Pre-rotate */
     261             :    {
     262             :       /* Temp pointers to make it really clear to the compiler what we're doing */
     263           0 :       const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
     264           0 :       const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
     265           0 :       kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
     266           0 :       const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
     267           0 :       const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
     268           0 :       for(i=0;i<N4;i++)
     269             :       {
     270             :          int rev;
     271             :          kiss_fft_scalar yr, yi;
     272           0 :          rev = *bitrev++;
     273           0 :          yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i]));
     274           0 :          yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i]));
     275             :          /* We swap real and imag because we use an FFT instead of an IFFT. */
     276           0 :          yp[2*rev+1] = yr;
     277           0 :          yp[2*rev] = yi;
     278             :          /* Storing the pre-rotation directly in the bitrev order. */
     279           0 :          xp1+=2*stride;
     280           0 :          xp2-=2*stride;
     281             :       }
     282             :    }
     283             : 
     284           0 :    opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
     285             : 
     286             :    /* Post-rotate and de-shuffle from both ends of the buffer at once to make
     287             :       it in-place. */
     288             :    {
     289           0 :       kiss_fft_scalar * yp0 = out+(overlap>>1);
     290           0 :       kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
     291           0 :       const kiss_twiddle_scalar *t = &trig[0];
     292             :       /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
     293             :          middle pair will be computed twice. */
     294           0 :       for(i=0;i<(N4+1)>>1;i++)
     295             :       {
     296             :          kiss_fft_scalar re, im, yr, yi;
     297             :          kiss_twiddle_scalar t0, t1;
     298             :          /* We swap real and imag because we're using an FFT instead of an IFFT. */
     299           0 :          re = yp0[1];
     300           0 :          im = yp0[0];
     301           0 :          t0 = t[i];
     302           0 :          t1 = t[N4+i];
     303             :          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
     304           0 :          yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
     305           0 :          yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
     306             :          /* We swap real and imag because we're using an FFT instead of an IFFT. */
     307           0 :          re = yp1[1];
     308           0 :          im = yp1[0];
     309           0 :          yp0[0] = yr;
     310           0 :          yp1[1] = yi;
     311             : 
     312           0 :          t0 = t[(N4-i-1)];
     313           0 :          t1 = t[(N2-i-1)];
     314             :          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
     315           0 :          yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
     316           0 :          yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
     317           0 :          yp1[0] = yr;
     318           0 :          yp0[1] = yi;
     319           0 :          yp0 += 2;
     320           0 :          yp1 -= 2;
     321             :       }
     322             :    }
     323             : 
     324             :    /* Mirror on both sides for TDAC */
     325             :    {
     326           0 :       kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
     327           0 :       kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
     328           0 :       const opus_val16 * OPUS_RESTRICT wp1 = window;
     329           0 :       const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
     330             : 
     331           0 :       for(i = 0; i < overlap/2; i++)
     332             :       {
     333             :          kiss_fft_scalar x1, x2;
     334           0 :          x1 = *xp1;
     335           0 :          x2 = *yp1;
     336           0 :          *yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1));
     337           0 :          *xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1));
     338           0 :          wp1++;
     339           0 :          wp2--;
     340             :       }
     341             :    }
     342           0 : }
     343             : #endif /* OVERRIDE_clt_mdct_backward */

Generated by: LCOV version 1.13