LCOV - code coverage report
Current view: top level - media/webrtc/trunk/webrtc/modules/audio_processing/utility - ooura_fft.cc (source / functions) Hit Total Coverage
Test: output.info Lines: 0 392 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 14 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
       3             :  * Copyright Takuya OOURA, 1996-2001
       4             :  *
       5             :  * You may use, copy, modify and distribute this code for any purpose (include
       6             :  * commercial use) and without fee. Please refer to this package when you modify
       7             :  * this code.
       8             :  *
       9             :  * Changes by the WebRTC authors:
      10             :  *    - Trivial type modifications.
      11             :  *    - Minimal code subset to do rdft of length 128.
      12             :  *    - Optimizations because of known length.
      13             :  *    - Removed the global variables by moving the code in to a class in order
      14             :  *      to make it thread safe.
      15             :  *
      16             :  *  All changes are covered by the WebRTC license and IP grant:
      17             :  *  Use of this source code is governed by a BSD-style license
      18             :  *  that can be found in the LICENSE file in the root of the source
      19             :  *  tree. An additional intellectual property rights grant can be found
      20             :  *  in the file PATENTS.  All contributing project authors may
      21             :  *  be found in the AUTHORS file in the root of the source tree.
      22             :  */
      23             : 
      24             : #include "webrtc/modules/audio_processing/utility/ooura_fft.h"
      25             : 
      26             : #include <math.h>
      27             : 
      28             : #include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h"
      29             : #include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
      30             : #include "webrtc/typedefs.h"
      31             : 
      32             : namespace webrtc {
      33             : 
      34             : namespace {
      35             : 
      36             : #if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON))
      37           0 : static void cft1st_128_C(float* a) {
      38           0 :   const int n = 128;
      39             :   int j, k1, k2;
      40             :   float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
      41             :   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
      42             : 
      43             :   // The processing of the first set of elements was simplified in C to avoid
      44             :   // some operations (multiplication by zero or one, addition of two elements
      45             :   // multiplied by the same weight, ...).
      46           0 :   x0r = a[0] + a[2];
      47           0 :   x0i = a[1] + a[3];
      48           0 :   x1r = a[0] - a[2];
      49           0 :   x1i = a[1] - a[3];
      50           0 :   x2r = a[4] + a[6];
      51           0 :   x2i = a[5] + a[7];
      52           0 :   x3r = a[4] - a[6];
      53           0 :   x3i = a[5] - a[7];
      54           0 :   a[0] = x0r + x2r;
      55           0 :   a[1] = x0i + x2i;
      56           0 :   a[4] = x0r - x2r;
      57           0 :   a[5] = x0i - x2i;
      58           0 :   a[2] = x1r - x3i;
      59           0 :   a[3] = x1i + x3r;
      60           0 :   a[6] = x1r + x3i;
      61           0 :   a[7] = x1i - x3r;
      62           0 :   wk1r = rdft_w[2];
      63           0 :   x0r = a[8] + a[10];
      64           0 :   x0i = a[9] + a[11];
      65           0 :   x1r = a[8] - a[10];
      66           0 :   x1i = a[9] - a[11];
      67           0 :   x2r = a[12] + a[14];
      68           0 :   x2i = a[13] + a[15];
      69           0 :   x3r = a[12] - a[14];
      70           0 :   x3i = a[13] - a[15];
      71           0 :   a[8] = x0r + x2r;
      72           0 :   a[9] = x0i + x2i;
      73           0 :   a[12] = x2i - x0i;
      74           0 :   a[13] = x0r - x2r;
      75           0 :   x0r = x1r - x3i;
      76           0 :   x0i = x1i + x3r;
      77           0 :   a[10] = wk1r * (x0r - x0i);
      78           0 :   a[11] = wk1r * (x0r + x0i);
      79           0 :   x0r = x3i + x1r;
      80           0 :   x0i = x3r - x1i;
      81           0 :   a[14] = wk1r * (x0i - x0r);
      82           0 :   a[15] = wk1r * (x0i + x0r);
      83           0 :   k1 = 0;
      84           0 :   for (j = 16; j < n; j += 16) {
      85           0 :     k1 += 2;
      86           0 :     k2 = 2 * k1;
      87           0 :     wk2r = rdft_w[k1 + 0];
      88           0 :     wk2i = rdft_w[k1 + 1];
      89           0 :     wk1r = rdft_w[k2 + 0];
      90           0 :     wk1i = rdft_w[k2 + 1];
      91           0 :     wk3r = rdft_wk3ri_first[k1 + 0];
      92           0 :     wk3i = rdft_wk3ri_first[k1 + 1];
      93           0 :     x0r = a[j + 0] + a[j + 2];
      94           0 :     x0i = a[j + 1] + a[j + 3];
      95           0 :     x1r = a[j + 0] - a[j + 2];
      96           0 :     x1i = a[j + 1] - a[j + 3];
      97           0 :     x2r = a[j + 4] + a[j + 6];
      98           0 :     x2i = a[j + 5] + a[j + 7];
      99           0 :     x3r = a[j + 4] - a[j + 6];
     100           0 :     x3i = a[j + 5] - a[j + 7];
     101           0 :     a[j + 0] = x0r + x2r;
     102           0 :     a[j + 1] = x0i + x2i;
     103           0 :     x0r -= x2r;
     104           0 :     x0i -= x2i;
     105           0 :     a[j + 4] = wk2r * x0r - wk2i * x0i;
     106           0 :     a[j + 5] = wk2r * x0i + wk2i * x0r;
     107           0 :     x0r = x1r - x3i;
     108           0 :     x0i = x1i + x3r;
     109           0 :     a[j + 2] = wk1r * x0r - wk1i * x0i;
     110           0 :     a[j + 3] = wk1r * x0i + wk1i * x0r;
     111           0 :     x0r = x1r + x3i;
     112           0 :     x0i = x1i - x3r;
     113           0 :     a[j + 6] = wk3r * x0r - wk3i * x0i;
     114           0 :     a[j + 7] = wk3r * x0i + wk3i * x0r;
     115           0 :     wk1r = rdft_w[k2 + 2];
     116           0 :     wk1i = rdft_w[k2 + 3];
     117           0 :     wk3r = rdft_wk3ri_second[k1 + 0];
     118           0 :     wk3i = rdft_wk3ri_second[k1 + 1];
     119           0 :     x0r = a[j + 8] + a[j + 10];
     120           0 :     x0i = a[j + 9] + a[j + 11];
     121           0 :     x1r = a[j + 8] - a[j + 10];
     122           0 :     x1i = a[j + 9] - a[j + 11];
     123           0 :     x2r = a[j + 12] + a[j + 14];
     124           0 :     x2i = a[j + 13] + a[j + 15];
     125           0 :     x3r = a[j + 12] - a[j + 14];
     126           0 :     x3i = a[j + 13] - a[j + 15];
     127           0 :     a[j + 8] = x0r + x2r;
     128           0 :     a[j + 9] = x0i + x2i;
     129           0 :     x0r -= x2r;
     130           0 :     x0i -= x2i;
     131           0 :     a[j + 12] = -wk2i * x0r - wk2r * x0i;
     132           0 :     a[j + 13] = -wk2i * x0i + wk2r * x0r;
     133           0 :     x0r = x1r - x3i;
     134           0 :     x0i = x1i + x3r;
     135           0 :     a[j + 10] = wk1r * x0r - wk1i * x0i;
     136           0 :     a[j + 11] = wk1r * x0i + wk1i * x0r;
     137           0 :     x0r = x1r + x3i;
     138           0 :     x0i = x1i - x3r;
     139           0 :     a[j + 14] = wk3r * x0r - wk3i * x0i;
     140           0 :     a[j + 15] = wk3r * x0i + wk3i * x0r;
     141             :   }
     142           0 : }
     143             : 
     144           0 : static void cftmdl_128_C(float* a) {
     145           0 :   const int l = 8;
     146           0 :   const int n = 128;
     147           0 :   const int m = 32;
     148             :   int j0, j1, j2, j3, k, k1, k2, m2;
     149             :   float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
     150             :   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
     151             : 
     152           0 :   for (j0 = 0; j0 < l; j0 += 2) {
     153           0 :     j1 = j0 + 8;
     154           0 :     j2 = j0 + 16;
     155           0 :     j3 = j0 + 24;
     156           0 :     x0r = a[j0 + 0] + a[j1 + 0];
     157           0 :     x0i = a[j0 + 1] + a[j1 + 1];
     158           0 :     x1r = a[j0 + 0] - a[j1 + 0];
     159           0 :     x1i = a[j0 + 1] - a[j1 + 1];
     160           0 :     x2r = a[j2 + 0] + a[j3 + 0];
     161           0 :     x2i = a[j2 + 1] + a[j3 + 1];
     162           0 :     x3r = a[j2 + 0] - a[j3 + 0];
     163           0 :     x3i = a[j2 + 1] - a[j3 + 1];
     164           0 :     a[j0 + 0] = x0r + x2r;
     165           0 :     a[j0 + 1] = x0i + x2i;
     166           0 :     a[j2 + 0] = x0r - x2r;
     167           0 :     a[j2 + 1] = x0i - x2i;
     168           0 :     a[j1 + 0] = x1r - x3i;
     169           0 :     a[j1 + 1] = x1i + x3r;
     170           0 :     a[j3 + 0] = x1r + x3i;
     171           0 :     a[j3 + 1] = x1i - x3r;
     172             :   }
     173           0 :   wk1r = rdft_w[2];
     174           0 :   for (j0 = m; j0 < l + m; j0 += 2) {
     175           0 :     j1 = j0 + 8;
     176           0 :     j2 = j0 + 16;
     177           0 :     j3 = j0 + 24;
     178           0 :     x0r = a[j0 + 0] + a[j1 + 0];
     179           0 :     x0i = a[j0 + 1] + a[j1 + 1];
     180           0 :     x1r = a[j0 + 0] - a[j1 + 0];
     181           0 :     x1i = a[j0 + 1] - a[j1 + 1];
     182           0 :     x2r = a[j2 + 0] + a[j3 + 0];
     183           0 :     x2i = a[j2 + 1] + a[j3 + 1];
     184           0 :     x3r = a[j2 + 0] - a[j3 + 0];
     185           0 :     x3i = a[j2 + 1] - a[j3 + 1];
     186           0 :     a[j0 + 0] = x0r + x2r;
     187           0 :     a[j0 + 1] = x0i + x2i;
     188           0 :     a[j2 + 0] = x2i - x0i;
     189           0 :     a[j2 + 1] = x0r - x2r;
     190           0 :     x0r = x1r - x3i;
     191           0 :     x0i = x1i + x3r;
     192           0 :     a[j1 + 0] = wk1r * (x0r - x0i);
     193           0 :     a[j1 + 1] = wk1r * (x0r + x0i);
     194           0 :     x0r = x3i + x1r;
     195           0 :     x0i = x3r - x1i;
     196           0 :     a[j3 + 0] = wk1r * (x0i - x0r);
     197           0 :     a[j3 + 1] = wk1r * (x0i + x0r);
     198             :   }
     199           0 :   k1 = 0;
     200           0 :   m2 = 2 * m;
     201           0 :   for (k = m2; k < n; k += m2) {
     202           0 :     k1 += 2;
     203           0 :     k2 = 2 * k1;
     204           0 :     wk2r = rdft_w[k1 + 0];
     205           0 :     wk2i = rdft_w[k1 + 1];
     206           0 :     wk1r = rdft_w[k2 + 0];
     207           0 :     wk1i = rdft_w[k2 + 1];
     208           0 :     wk3r = rdft_wk3ri_first[k1 + 0];
     209           0 :     wk3i = rdft_wk3ri_first[k1 + 1];
     210           0 :     for (j0 = k; j0 < l + k; j0 += 2) {
     211           0 :       j1 = j0 + 8;
     212           0 :       j2 = j0 + 16;
     213           0 :       j3 = j0 + 24;
     214           0 :       x0r = a[j0 + 0] + a[j1 + 0];
     215           0 :       x0i = a[j0 + 1] + a[j1 + 1];
     216           0 :       x1r = a[j0 + 0] - a[j1 + 0];
     217           0 :       x1i = a[j0 + 1] - a[j1 + 1];
     218           0 :       x2r = a[j2 + 0] + a[j3 + 0];
     219           0 :       x2i = a[j2 + 1] + a[j3 + 1];
     220           0 :       x3r = a[j2 + 0] - a[j3 + 0];
     221           0 :       x3i = a[j2 + 1] - a[j3 + 1];
     222           0 :       a[j0 + 0] = x0r + x2r;
     223           0 :       a[j0 + 1] = x0i + x2i;
     224           0 :       x0r -= x2r;
     225           0 :       x0i -= x2i;
     226           0 :       a[j2 + 0] = wk2r * x0r - wk2i * x0i;
     227           0 :       a[j2 + 1] = wk2r * x0i + wk2i * x0r;
     228           0 :       x0r = x1r - x3i;
     229           0 :       x0i = x1i + x3r;
     230           0 :       a[j1 + 0] = wk1r * x0r - wk1i * x0i;
     231           0 :       a[j1 + 1] = wk1r * x0i + wk1i * x0r;
     232           0 :       x0r = x1r + x3i;
     233           0 :       x0i = x1i - x3r;
     234           0 :       a[j3 + 0] = wk3r * x0r - wk3i * x0i;
     235           0 :       a[j3 + 1] = wk3r * x0i + wk3i * x0r;
     236             :     }
     237           0 :     wk1r = rdft_w[k2 + 2];
     238           0 :     wk1i = rdft_w[k2 + 3];
     239           0 :     wk3r = rdft_wk3ri_second[k1 + 0];
     240           0 :     wk3i = rdft_wk3ri_second[k1 + 1];
     241           0 :     for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
     242           0 :       j1 = j0 + 8;
     243           0 :       j2 = j0 + 16;
     244           0 :       j3 = j0 + 24;
     245           0 :       x0r = a[j0 + 0] + a[j1 + 0];
     246           0 :       x0i = a[j0 + 1] + a[j1 + 1];
     247           0 :       x1r = a[j0 + 0] - a[j1 + 0];
     248           0 :       x1i = a[j0 + 1] - a[j1 + 1];
     249           0 :       x2r = a[j2 + 0] + a[j3 + 0];
     250           0 :       x2i = a[j2 + 1] + a[j3 + 1];
     251           0 :       x3r = a[j2 + 0] - a[j3 + 0];
     252           0 :       x3i = a[j2 + 1] - a[j3 + 1];
     253           0 :       a[j0 + 0] = x0r + x2r;
     254           0 :       a[j0 + 1] = x0i + x2i;
     255           0 :       x0r -= x2r;
     256           0 :       x0i -= x2i;
     257           0 :       a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
     258           0 :       a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
     259           0 :       x0r = x1r - x3i;
     260           0 :       x0i = x1i + x3r;
     261           0 :       a[j1 + 0] = wk1r * x0r - wk1i * x0i;
     262           0 :       a[j1 + 1] = wk1r * x0i + wk1i * x0r;
     263           0 :       x0r = x1r + x3i;
     264           0 :       x0i = x1i - x3r;
     265           0 :       a[j3 + 0] = wk3r * x0r - wk3i * x0i;
     266           0 :       a[j3 + 1] = wk3r * x0i + wk3i * x0r;
     267             :     }
     268             :   }
     269           0 : }
     270             : 
     271           0 : static void rftfsub_128_C(float* a) {
     272           0 :   const float* c = rdft_w + 32;
     273             :   int j1, j2, k1, k2;
     274             :   float wkr, wki, xr, xi, yr, yi;
     275             : 
     276           0 :   for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
     277           0 :     k2 = 128 - j2;
     278           0 :     k1 = 32 - j1;
     279           0 :     wkr = 0.5f - c[k1];
     280           0 :     wki = c[j1];
     281           0 :     xr = a[j2 + 0] - a[k2 + 0];
     282           0 :     xi = a[j2 + 1] + a[k2 + 1];
     283           0 :     yr = wkr * xr - wki * xi;
     284           0 :     yi = wkr * xi + wki * xr;
     285           0 :     a[j2 + 0] -= yr;
     286           0 :     a[j2 + 1] -= yi;
     287           0 :     a[k2 + 0] += yr;
     288           0 :     a[k2 + 1] -= yi;
     289             :   }
     290           0 : }
     291             : 
     292           0 : static void rftbsub_128_C(float* a) {
     293           0 :   const float* c = rdft_w + 32;
     294             :   int j1, j2, k1, k2;
     295             :   float wkr, wki, xr, xi, yr, yi;
     296             : 
     297           0 :   a[1] = -a[1];
     298           0 :   for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
     299           0 :     k2 = 128 - j2;
     300           0 :     k1 = 32 - j1;
     301           0 :     wkr = 0.5f - c[k1];
     302           0 :     wki = c[j1];
     303           0 :     xr = a[j2 + 0] - a[k2 + 0];
     304           0 :     xi = a[j2 + 1] + a[k2 + 1];
     305           0 :     yr = wkr * xr + wki * xi;
     306           0 :     yi = wkr * xi - wki * xr;
     307           0 :     a[j2 + 0] = a[j2 + 0] - yr;
     308           0 :     a[j2 + 1] = yi - a[j2 + 1];
     309           0 :     a[k2 + 0] = yr + a[k2 + 0];
     310           0 :     a[k2 + 1] = yi - a[k2 + 1];
     311             :   }
     312           0 :   a[65] = -a[65];
     313           0 : }
     314             : #endif
     315             : 
     316             : 
     317             : }  // namespace
     318             : 
     319           0 : OouraFft::OouraFft() {
     320             : #if defined(WEBRTC_ARCH_X86_FAMILY)
     321           0 :   use_sse2_ = (WebRtc_GetCPUInfo(kSSE2) != 0);
     322             : #else
     323             :   use_sse2_ = false;
     324             : #endif
     325           0 : }
     326             : 
     327             : OouraFft::~OouraFft() = default;
     328             : 
     329           0 : void OouraFft::Fft(float* a) const {
     330             :   float xi;
     331           0 :   bitrv2_128(a);
     332           0 :   cftfsub_128(a);
     333           0 :   rftfsub_128(a);
     334           0 :   xi = a[0] - a[1];
     335           0 :   a[0] += a[1];
     336           0 :   a[1] = xi;
     337           0 : }
     338           0 : void OouraFft::InverseFft(float* a) const {
     339           0 :   a[1] = 0.5f * (a[0] - a[1]);
     340           0 :   a[0] -= a[1];
     341           0 :   rftbsub_128(a);
     342           0 :   bitrv2_128(a);
     343           0 :   cftbsub_128(a);
     344           0 : }
     345             : 
     346           0 : void OouraFft::cft1st_128(float* a) const {
     347             : #if defined(MIPS_FPU_LE)
     348             :   cft1st_128_mips(a);
     349             : #elif defined(WEBRTC_HAS_NEON)
     350             :   cft1st_128_neon(a);
     351             : #elif defined(WEBRTC_ARCH_X86_FAMILY)
     352           0 :   if (use_sse2_) {
     353           0 :     cft1st_128_SSE2(a);
     354             :   } else {
     355           0 :     cft1st_128_C(a);
     356             :   }
     357             : #else
     358             :   cft1st_128_C(a);
     359             : #endif
     360           0 : }
     361           0 : void OouraFft::cftmdl_128(float* a) const {
     362             : #if defined(MIPS_FPU_LE)
     363             :   cftmdl_128_mips(a);
     364             : #elif defined(WEBRTC_HAS_NEON)
     365             :   cftmdl_128_neon(a);
     366             : #elif defined(WEBRTC_ARCH_X86_FAMILY)
     367           0 :   if (use_sse2_) {
     368           0 :     cftmdl_128_SSE2(a);
     369             :   } else {
     370           0 :     cftmdl_128_C(a);
     371             :   }
     372             : #else
     373             :   cftmdl_128_C(a);
     374             : #endif
     375           0 : }
     376           0 : void OouraFft::rftfsub_128(float* a) const {
     377             : #if defined(MIPS_FPU_LE)
     378             :   rftfsub_128_mips(a);
     379             : #elif defined(WEBRTC_HAS_NEON)
     380             :   rftfsub_128_neon(a);
     381             : #elif defined(WEBRTC_ARCH_X86_FAMILY)
     382           0 :   if (use_sse2_) {
     383           0 :     rftfsub_128_SSE2(a);
     384             :   } else {
     385           0 :     rftfsub_128_C(a);
     386             :   }
     387             : #else
     388             :   rftfsub_128_C(a);
     389             : #endif
     390           0 : }
     391             : 
     392           0 : void OouraFft::rftbsub_128(float* a) const {
     393             : #if defined(MIPS_FPU_LE)
     394             :   rftbsub_128_mips(a);
     395             : #elif defined(WEBRTC_HAS_NEON)
     396             :   rftbsub_128_neon(a);
     397             : #elif defined(WEBRTC_ARCH_X86_FAMILY)
     398           0 :   if (use_sse2_) {
     399           0 :     rftbsub_128_SSE2(a);
     400             :   } else {
     401           0 :     rftbsub_128_C(a);
     402             :   }
     403             : #else
     404             :   rftbsub_128_C(a);
     405             : #endif
     406           0 : }
     407             : 
     408           0 : void OouraFft::cftbsub_128(float* a) const {
     409             :   int j, j1, j2, j3, l;
     410             :   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
     411             : 
     412           0 :   cft1st_128(a);
     413           0 :   cftmdl_128(a);
     414           0 :   l = 32;
     415             : 
     416           0 :   for (j = 0; j < l; j += 2) {
     417           0 :     j1 = j + l;
     418           0 :     j2 = j1 + l;
     419           0 :     j3 = j2 + l;
     420           0 :     x0r = a[j] + a[j1];
     421           0 :     x0i = -a[j + 1] - a[j1 + 1];
     422           0 :     x1r = a[j] - a[j1];
     423           0 :     x1i = -a[j + 1] + a[j1 + 1];
     424           0 :     x2r = a[j2] + a[j3];
     425           0 :     x2i = a[j2 + 1] + a[j3 + 1];
     426           0 :     x3r = a[j2] - a[j3];
     427           0 :     x3i = a[j2 + 1] - a[j3 + 1];
     428           0 :     a[j] = x0r + x2r;
     429           0 :     a[j + 1] = x0i - x2i;
     430           0 :     a[j2] = x0r - x2r;
     431           0 :     a[j2 + 1] = x0i + x2i;
     432           0 :     a[j1] = x1r - x3i;
     433           0 :     a[j1 + 1] = x1i - x3r;
     434           0 :     a[j3] = x1r + x3i;
     435           0 :     a[j3 + 1] = x1i + x3r;
     436             :   }
     437           0 : }
     438             : 
     439           0 : void OouraFft::cftfsub_128(float* a) const {
     440             :   int j, j1, j2, j3, l;
     441             :   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
     442             : 
     443           0 :   cft1st_128(a);
     444           0 :   cftmdl_128(a);
     445           0 :   l = 32;
     446           0 :   for (j = 0; j < l; j += 2) {
     447           0 :     j1 = j + l;
     448           0 :     j2 = j1 + l;
     449           0 :     j3 = j2 + l;
     450           0 :     x0r = a[j] + a[j1];
     451           0 :     x0i = a[j + 1] + a[j1 + 1];
     452           0 :     x1r = a[j] - a[j1];
     453           0 :     x1i = a[j + 1] - a[j1 + 1];
     454           0 :     x2r = a[j2] + a[j3];
     455           0 :     x2i = a[j2 + 1] + a[j3 + 1];
     456           0 :     x3r = a[j2] - a[j3];
     457           0 :     x3i = a[j2 + 1] - a[j3 + 1];
     458           0 :     a[j] = x0r + x2r;
     459           0 :     a[j + 1] = x0i + x2i;
     460           0 :     a[j2] = x0r - x2r;
     461           0 :     a[j2 + 1] = x0i - x2i;
     462           0 :     a[j1] = x1r - x3i;
     463           0 :     a[j1 + 1] = x1i + x3r;
     464           0 :     a[j3] = x1r + x3i;
     465           0 :     a[j3 + 1] = x1i - x3r;
     466             :   }
     467           0 : }
     468             : 
     469           0 : void OouraFft::bitrv2_128(float* a) const {
     470             :   /*
     471             :       Following things have been attempted but are no faster:
     472             :       (a) Storing the swap indexes in a LUT (index calculations are done
     473             :           for 'free' while waiting on memory/L1).
     474             :       (b) Consolidate the load/store of two consecutive floats by a 64 bit
     475             :           integer (execution is memory/L1 bound).
     476             :       (c) Do a mix of floats and 64 bit integer to maximize register
     477             :           utilization (execution is memory/L1 bound).
     478             :       (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
     479             :       (e) Hard-coding of the offsets to completely eliminates index
     480             :           calculations.
     481             :   */
     482             : 
     483             :   unsigned int j, j1, k, k1;
     484             :   float xr, xi, yr, yi;
     485             : 
     486           0 :   const int ip[4] = {0, 64, 32, 96};
     487           0 :   for (k = 0; k < 4; k++) {
     488           0 :     for (j = 0; j < k; j++) {
     489           0 :       j1 = 2 * j + ip[k];
     490           0 :       k1 = 2 * k + ip[j];
     491           0 :       xr = a[j1 + 0];
     492           0 :       xi = a[j1 + 1];
     493           0 :       yr = a[k1 + 0];
     494           0 :       yi = a[k1 + 1];
     495           0 :       a[j1 + 0] = yr;
     496           0 :       a[j1 + 1] = yi;
     497           0 :       a[k1 + 0] = xr;
     498           0 :       a[k1 + 1] = xi;
     499           0 :       j1 += 8;
     500           0 :       k1 += 16;
     501           0 :       xr = a[j1 + 0];
     502           0 :       xi = a[j1 + 1];
     503           0 :       yr = a[k1 + 0];
     504           0 :       yi = a[k1 + 1];
     505           0 :       a[j1 + 0] = yr;
     506           0 :       a[j1 + 1] = yi;
     507           0 :       a[k1 + 0] = xr;
     508           0 :       a[k1 + 1] = xi;
     509           0 :       j1 += 8;
     510           0 :       k1 -= 8;
     511           0 :       xr = a[j1 + 0];
     512           0 :       xi = a[j1 + 1];
     513           0 :       yr = a[k1 + 0];
     514           0 :       yi = a[k1 + 1];
     515           0 :       a[j1 + 0] = yr;
     516           0 :       a[j1 + 1] = yi;
     517           0 :       a[k1 + 0] = xr;
     518           0 :       a[k1 + 1] = xi;
     519           0 :       j1 += 8;
     520           0 :       k1 += 16;
     521           0 :       xr = a[j1 + 0];
     522           0 :       xi = a[j1 + 1];
     523           0 :       yr = a[k1 + 0];
     524           0 :       yi = a[k1 + 1];
     525           0 :       a[j1 + 0] = yr;
     526           0 :       a[j1 + 1] = yi;
     527           0 :       a[k1 + 0] = xr;
     528           0 :       a[k1 + 1] = xi;
     529             :     }
     530           0 :     j1 = 2 * k + 8 + ip[k];
     531           0 :     k1 = j1 + 8;
     532           0 :     xr = a[j1 + 0];
     533           0 :     xi = a[j1 + 1];
     534           0 :     yr = a[k1 + 0];
     535           0 :     yi = a[k1 + 1];
     536           0 :     a[j1 + 0] = yr;
     537           0 :     a[j1 + 1] = yi;
     538           0 :     a[k1 + 0] = xr;
     539           0 :     a[k1 + 1] = xi;
     540             :   }
     541           0 : }
     542             : 
     543             : }  // namespace webrtc

Generated by: LCOV version 1.13