LCOV - code coverage report
Current view: top level - dom/media/webaudio - AudioNodeEngineSSE2.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 187 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 7 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* this source code form is subject to the terms of the mozilla public
       3             :  * license, v. 2.0. if a copy of the mpl was not distributed with this file,
       4             :  * You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : #include "AudioNodeEngineSSE2.h"
       7             : #include "AlignmentUtils.h"
       8             : #include <emmintrin.h>
       9             : 
      10             : 
      11             : namespace mozilla {
      12             : void
      13           0 : AudioBufferAddWithScale_SSE(const float* aInput,
      14             :                             float aScale,
      15             :                             float* aOutput,
      16             :                             uint32_t aSize)
      17             : {
      18             :   __m128 vin0, vin1, vin2, vin3,
      19             :          vscaled0, vscaled1, vscaled2, vscaled3,
      20             :          vout0, vout1, vout2, vout3,
      21             :          vgain;
      22             : 
      23           0 :   ASSERT_ALIGNED16(aInput);
      24           0 :   ASSERT_ALIGNED16(aOutput);
      25           0 :   ASSERT_MULTIPLE16(aSize);
      26             : 
      27           0 :   vgain = _mm_load1_ps(&aScale);
      28             : 
      29           0 :   for (unsigned i = 0; i < aSize; i+=16) {
      30           0 :     vin0 = _mm_load_ps(&aInput[i]);
      31           0 :     vin1 = _mm_load_ps(&aInput[i + 4]);
      32           0 :     vin2 = _mm_load_ps(&aInput[i + 8]);
      33           0 :     vin3 = _mm_load_ps(&aInput[i + 12]);
      34             : 
      35           0 :     vscaled0 = _mm_mul_ps(vin0, vgain);
      36           0 :     vscaled1 = _mm_mul_ps(vin1, vgain);
      37           0 :     vscaled2 = _mm_mul_ps(vin2, vgain);
      38           0 :     vscaled3 = _mm_mul_ps(vin3, vgain);
      39             : 
      40           0 :     vin0 = _mm_load_ps(&aOutput[i]);
      41           0 :     vin1 = _mm_load_ps(&aOutput[i + 4]);
      42           0 :     vin2 = _mm_load_ps(&aOutput[i + 8]);
      43           0 :     vin3 = _mm_load_ps(&aOutput[i + 12]);
      44             : 
      45           0 :     vout0 = _mm_add_ps(vin0, vscaled0);
      46           0 :     vout1 = _mm_add_ps(vin1, vscaled1);
      47           0 :     vout2 = _mm_add_ps(vin2, vscaled2);
      48           0 :     vout3 = _mm_add_ps(vin3, vscaled3);
      49             : 
      50           0 :     _mm_store_ps(&aOutput[i], vout0);
      51           0 :     _mm_store_ps(&aOutput[i + 4], vout1);
      52           0 :     _mm_store_ps(&aOutput[i + 8], vout2);
      53           0 :     _mm_store_ps(&aOutput[i + 12], vout3);
      54             :   }
      55           0 : }
      56             : 
      57             : void
      58           0 : AudioBlockCopyChannelWithScale_SSE(const float* aInput,
      59             :                                    float aScale,
      60             :                                    float* aOutput)
      61             : {
      62             :   __m128 vin0, vin1, vin2, vin3,
      63             :          vout0, vout1, vout2, vout3;
      64             : 
      65           0 :   ASSERT_ALIGNED16(aInput);
      66           0 :   ASSERT_ALIGNED16(aOutput);
      67             : 
      68           0 :   __m128 vgain = _mm_load1_ps(&aScale);
      69             : 
      70           0 :   for (unsigned i = 0 ; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
      71           0 :     vin0 = _mm_load_ps(&aInput[i]);
      72           0 :     vin1 = _mm_load_ps(&aInput[i + 4]);
      73           0 :     vin2 = _mm_load_ps(&aInput[i + 8]);
      74           0 :     vin3 = _mm_load_ps(&aInput[i + 12]);
      75           0 :     vout0 = _mm_mul_ps(vin0, vgain);
      76           0 :     vout1 = _mm_mul_ps(vin1, vgain);
      77           0 :     vout2 = _mm_mul_ps(vin2, vgain);
      78           0 :     vout3 = _mm_mul_ps(vin3, vgain);
      79           0 :     _mm_store_ps(&aOutput[i], vout0);
      80           0 :     _mm_store_ps(&aOutput[i + 4], vout1);
      81           0 :     _mm_store_ps(&aOutput[i + 8], vout2);
      82           0 :     _mm_store_ps(&aOutput[i + 12], vout3);
      83             :   }
      84           0 : }
      85             : 
      86             : void
      87           0 : AudioBlockCopyChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
      88             :                                    const float aScale[WEBAUDIO_BLOCK_SIZE],
      89             :                                    float aOutput[WEBAUDIO_BLOCK_SIZE])
      90             : {
      91             :   __m128 vin0, vin1, vin2, vin3,
      92             :          vscaled0, vscaled1, vscaled2, vscaled3,
      93             :          vout0, vout1, vout2, vout3;
      94             : 
      95           0 :   ASSERT_ALIGNED16(aInput);
      96           0 :   ASSERT_ALIGNED16(aScale);
      97           0 :   ASSERT_ALIGNED16(aOutput);
      98             : 
      99           0 :   for (unsigned i = 0 ; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
     100           0 :     vscaled0 = _mm_load_ps(&aScale[i]);
     101           0 :     vscaled1 = _mm_load_ps(&aScale[i+4]);
     102           0 :     vscaled2 = _mm_load_ps(&aScale[i+8]);
     103           0 :     vscaled3 = _mm_load_ps(&aScale[i+12]);
     104             : 
     105           0 :     vin0 = _mm_load_ps(&aInput[i]);
     106           0 :     vin1 = _mm_load_ps(&aInput[i + 4]);
     107           0 :     vin2 = _mm_load_ps(&aInput[i + 8]);
     108           0 :     vin3 = _mm_load_ps(&aInput[i + 12]);
     109             : 
     110           0 :     vout0 = _mm_mul_ps(vin0, vscaled0);
     111           0 :     vout1 = _mm_mul_ps(vin1, vscaled1);
     112           0 :     vout2 = _mm_mul_ps(vin2, vscaled2);
     113           0 :     vout3 = _mm_mul_ps(vin3, vscaled3);
     114             : 
     115           0 :     _mm_store_ps(&aOutput[i], vout0);
     116           0 :     _mm_store_ps(&aOutput[i + 4], vout1);
     117           0 :     _mm_store_ps(&aOutput[i + 8], vout2);
     118           0 :     _mm_store_ps(&aOutput[i + 12], vout3);
     119             :   }
     120           0 : }
     121             : 
     122             : void
     123           0 : AudioBufferInPlaceScale_SSE(float* aBlock,
     124             :                             float aScale,
     125             :                             uint32_t aSize)
     126             : {
     127             :   __m128 vout0, vout1, vout2, vout3,
     128             :          vin0, vin1, vin2, vin3;
     129             : 
     130           0 :   ASSERT_ALIGNED16(aBlock);
     131           0 :   ASSERT_MULTIPLE16(aSize);
     132             : 
     133           0 :   __m128 vgain = _mm_load1_ps(&aScale);
     134             : 
     135           0 :   for (unsigned i = 0; i < aSize; i+=16) {
     136           0 :     vin0 = _mm_load_ps(&aBlock[i]);
     137           0 :     vin1 = _mm_load_ps(&aBlock[i + 4]);
     138           0 :     vin2 = _mm_load_ps(&aBlock[i + 8]);
     139           0 :     vin3 = _mm_load_ps(&aBlock[i + 12]);
     140           0 :     vout0 = _mm_mul_ps(vin0, vgain);
     141           0 :     vout1 = _mm_mul_ps(vin1, vgain);
     142           0 :     vout2 = _mm_mul_ps(vin2, vgain);
     143           0 :     vout3 = _mm_mul_ps(vin3, vgain);
     144           0 :     _mm_store_ps(&aBlock[i], vout0);
     145           0 :     _mm_store_ps(&aBlock[i + 4], vout1);
     146           0 :     _mm_store_ps(&aBlock[i + 8], vout2);
     147           0 :     _mm_store_ps(&aBlock[i + 12], vout3);
     148             :   }
     149           0 : }
     150             : 
     151             : void
     152           0 : AudioBlockPanStereoToStereo_SSE(const float aInputL[WEBAUDIO_BLOCK_SIZE],
     153             :                                 const float aInputR[WEBAUDIO_BLOCK_SIZE],
     154             :                                 float aGainL, float aGainR, bool aIsOnTheLeft,
     155             :                                 float aOutputL[WEBAUDIO_BLOCK_SIZE],
     156             :                                 float aOutputR[WEBAUDIO_BLOCK_SIZE])
     157             : {
     158             :   __m128 vinl0, vinr0, vinl1, vinr1,
     159             :          vout0, vout1,
     160             :          vscaled0, vscaled1,
     161             :          vgainl, vgainr;
     162             : 
     163           0 :   ASSERT_ALIGNED16(aInputL);
     164           0 :   ASSERT_ALIGNED16(aInputR);
     165           0 :   ASSERT_ALIGNED16(aOutputL);
     166           0 :   ASSERT_ALIGNED16(aOutputR);
     167             : 
     168           0 :   vgainl = _mm_load1_ps(&aGainL);
     169           0 :   vgainr = _mm_load1_ps(&aGainR);
     170             : 
     171           0 :   if (aIsOnTheLeft) {
     172           0 :     for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
     173           0 :       vinl0 = _mm_load_ps(&aInputL[i]);
     174           0 :       vinr0 = _mm_load_ps(&aInputR[i]);
     175           0 :       vinl1 = _mm_load_ps(&aInputL[i+4]);
     176           0 :       vinr1 = _mm_load_ps(&aInputR[i+4]);
     177             : 
     178             :       /* left channel : aOutputL  = aInputL + aInputR * gainL */
     179           0 :       vscaled0 = _mm_mul_ps(vinr0, vgainl);
     180           0 :       vscaled1 = _mm_mul_ps(vinr1, vgainl);
     181           0 :       vout0 = _mm_add_ps(vscaled0, vinl0);
     182           0 :       vout1 = _mm_add_ps(vscaled1, vinl1);
     183           0 :       _mm_store_ps(&aOutputL[i], vout0);
     184           0 :       _mm_store_ps(&aOutputL[i+4], vout1);
     185             : 
     186             :       /* right channel : aOutputR = aInputR * gainR */
     187           0 :       vscaled0 = _mm_mul_ps(vinr0, vgainr);
     188           0 :       vscaled1 = _mm_mul_ps(vinr1, vgainr);
     189           0 :       _mm_store_ps(&aOutputR[i], vscaled0);
     190           0 :       _mm_store_ps(&aOutputR[i+4], vscaled1);
     191             :     }
     192             :   } else {
     193           0 :     for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
     194           0 :       vinl0 = _mm_load_ps(&aInputL[i]);
     195           0 :       vinr0 = _mm_load_ps(&aInputR[i]);
     196           0 :       vinl1 = _mm_load_ps(&aInputL[i+4]);
     197           0 :       vinr1 = _mm_load_ps(&aInputR[i+4]);
     198             : 
     199             :       /* left channel : aInputL * gainL */
     200           0 :       vscaled0 = _mm_mul_ps(vinl0, vgainl);
     201           0 :       vscaled1 = _mm_mul_ps(vinl1, vgainl);
     202           0 :       _mm_store_ps(&aOutputL[i], vscaled0);
     203           0 :       _mm_store_ps(&aOutputL[i+4], vscaled1);
     204             : 
     205             :       /* right channel: aOutputR = aInputR + aInputL * gainR */
     206           0 :       vscaled0 = _mm_mul_ps(vinl0, vgainr);
     207           0 :       vscaled1 = _mm_mul_ps(vinl1, vgainr);
     208           0 :       vout0 = _mm_add_ps(vscaled0, vinr0);
     209           0 :       vout1 = _mm_add_ps(vscaled1, vinr1);
     210           0 :       _mm_store_ps(&aOutputR[i], vout0);
     211           0 :       _mm_store_ps(&aOutputR[i+4], vout1);
     212             :     }
     213             :   }
     214           0 : }
     215             : 
     216           0 : void BufferComplexMultiply_SSE(const float* aInput,
     217             :                                const float* aScale,
     218             :                                float* aOutput,
     219             :                                uint32_t aSize)
     220             : {
     221             :   unsigned i;
     222             :   __m128 in0, in1, in2, in3,
     223             :          outreal0, outreal1, outreal2, outreal3,
     224             :          outimag0, outimag1, outimag2, outimag3;
     225             : 
     226           0 :   ASSERT_ALIGNED16(aInput);
     227           0 :   ASSERT_ALIGNED16(aScale);
     228           0 :   ASSERT_ALIGNED16(aOutput);
     229           0 :   ASSERT_MULTIPLE16(aSize);
     230             : 
     231           0 :   for (i = 0; i < aSize * 2; i += 16) {
     232           0 :     in0 = _mm_load_ps(&aInput[i]);
     233           0 :     in1 = _mm_load_ps(&aInput[i + 4]);
     234           0 :     in2 = _mm_load_ps(&aInput[i + 8]);
     235           0 :     in3 = _mm_load_ps(&aInput[i + 12]);
     236             : 
     237           0 :     outreal0 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(2, 0, 2, 0));
     238           0 :     outimag0 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(3, 1, 3, 1));
     239           0 :     outreal2 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(2, 0, 2, 0));
     240           0 :     outimag2 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(3, 1, 3, 1));
     241             : 
     242           0 :     in0 = _mm_load_ps(&aScale[i]);
     243           0 :     in1 = _mm_load_ps(&aScale[i + 4]);
     244           0 :     in2 = _mm_load_ps(&aScale[i + 8]);
     245           0 :     in3 = _mm_load_ps(&aScale[i + 12]);
     246             : 
     247           0 :     outreal1 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(2, 0, 2, 0));
     248           0 :     outimag1 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(3, 1, 3, 1));
     249           0 :     outreal3 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(2, 0, 2, 0));
     250           0 :     outimag3 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(3, 1, 3, 1));
     251             : 
     252           0 :     in0 = _mm_sub_ps(_mm_mul_ps(outreal0, outreal1),
     253           0 :                      _mm_mul_ps(outimag0, outimag1));
     254           0 :     in1 = _mm_add_ps(_mm_mul_ps(outreal0, outimag1),
     255           0 :                      _mm_mul_ps(outimag0, outreal1));
     256           0 :     in2 = _mm_sub_ps(_mm_mul_ps(outreal2, outreal3),
     257           0 :                      _mm_mul_ps(outimag2, outimag3));
     258           0 :     in3 = _mm_add_ps(_mm_mul_ps(outreal2, outimag3),
     259           0 :                      _mm_mul_ps(outimag2, outreal3));
     260             : 
     261           0 :     outreal0 = _mm_unpacklo_ps(in0, in1);
     262           0 :     outreal1 = _mm_unpackhi_ps(in0, in1);
     263           0 :     outreal2 = _mm_unpacklo_ps(in2, in3);
     264           0 :     outreal3 = _mm_unpackhi_ps(in2, in3);
     265             : 
     266           0 :     _mm_store_ps(&aOutput[i], outreal0);
     267           0 :     _mm_store_ps(&aOutput[i + 4], outreal1);
     268           0 :     _mm_store_ps(&aOutput[i + 8], outreal2);
     269           0 :     _mm_store_ps(&aOutput[i + 12], outreal3);
     270             :   }
     271           0 : }
     272             : 
     273             : float
     274           0 : AudioBufferSumOfSquares_SSE(const float* aInput, uint32_t aLength)
     275             : {
     276             :   unsigned i;
     277             :   __m128 in0, in1, in2, in3,
     278             :          acc0, acc1, acc2, acc3;
     279             :   float out[4];
     280             : 
     281           0 :   ASSERT_ALIGNED16(aInput);
     282           0 :   ASSERT_MULTIPLE16(aLength);
     283             : 
     284           0 :   acc0 = _mm_setzero_ps();
     285           0 :   acc1 = _mm_setzero_ps();
     286           0 :   acc2 = _mm_setzero_ps();
     287           0 :   acc3 = _mm_setzero_ps();
     288             : 
     289           0 :   for (i = 0; i < aLength; i+=16) {
     290           0 :     in0 = _mm_load_ps(&aInput[i]);
     291           0 :     in1 = _mm_load_ps(&aInput[i + 4]);
     292           0 :     in2 = _mm_load_ps(&aInput[i + 8]);
     293           0 :     in3 = _mm_load_ps(&aInput[i + 12]);
     294             : 
     295           0 :     in0 = _mm_mul_ps(in0, in0);
     296           0 :     in1 = _mm_mul_ps(in1, in1);
     297           0 :     in2 = _mm_mul_ps(in2, in2);
     298           0 :     in3 = _mm_mul_ps(in3, in3);
     299             : 
     300           0 :     acc0 = _mm_add_ps(acc0, in0);
     301           0 :     acc1 = _mm_add_ps(acc1, in1);
     302           0 :     acc2 = _mm_add_ps(acc2, in2);
     303           0 :     acc3 = _mm_add_ps(acc3, in3);
     304             :   }
     305             : 
     306           0 :   acc0 = _mm_add_ps(acc0, acc1);
     307           0 :   acc0 = _mm_add_ps(acc0, acc2);
     308           0 :   acc0 = _mm_add_ps(acc0, acc3);
     309             : 
     310             :   _mm_store_ps(out, acc0);
     311             : 
     312           0 :   return out[0] + out[1] + out[2] + out[3];
     313             : }
     314             : 
     315             : }

Generated by: LCOV version 1.13