LCOV - code coverage report
Current view: top level - media/libopus/celt - mathops.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 12 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 1 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Copyright (c) 2002-2008 Jean-Marc Valin
       2             :    Copyright (c) 2007-2008 CSIRO
       3             :    Copyright (c) 2007-2009 Xiph.Org Foundation
       4             :    Written by Jean-Marc Valin */
       5             : /**
       6             :    @file mathops.h
       7             :    @brief Various math functions
       8             : */
       9             : /*
      10             :    Redistribution and use in source and binary forms, with or without
      11             :    modification, are permitted provided that the following conditions
      12             :    are met:
      13             : 
      14             :    - Redistributions of source code must retain the above copyright
      15             :    notice, this list of conditions and the following disclaimer.
      16             : 
      17             :    - Redistributions in binary form must reproduce the above copyright
      18             :    notice, this list of conditions and the following disclaimer in the
      19             :    documentation and/or other materials provided with the distribution.
      20             : 
      21             :    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      22             :    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      23             :    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      24             :    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
      25             :    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      26             :    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      27             :    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
      28             :    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
      29             :    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
      30             :    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
      31             :    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      32             : */
      33             : 
      34             : #ifdef HAVE_CONFIG_H
      35             : #include "config.h"
      36             : #endif
      37             : 
      38             : #include "mathops.h"
      39             : 
      40             : /*Compute floor(sqrt(_val)) with exact arithmetic.
      41             :   This has been tested on all possible 32-bit inputs.*/
      42           0 : unsigned isqrt32(opus_uint32 _val){
      43             :   unsigned b;
      44             :   unsigned g;
      45             :   int      bshift;
      46             :   /*Uses the second method from
      47             :      http://www.azillionmonkeys.com/qed/sqroot.html
      48             :     The main idea is to search for the largest binary digit b such that
      49             :      (g+b)*(g+b) <= _val, and add it to the solution g.*/
      50           0 :   g=0;
      51           0 :   bshift=(EC_ILOG(_val)-1)>>1;
      52           0 :   b=1U<<bshift;
      53             :   do{
      54             :     opus_uint32 t;
      55           0 :     t=(((opus_uint32)g<<1)+b)<<bshift;
      56           0 :     if(t<=_val){
      57           0 :       g+=b;
      58           0 :       _val-=t;
      59             :     }
      60           0 :     b>>=1;
      61           0 :     bshift--;
      62             :   }
      63           0 :   while(bshift>=0);
      64           0 :   return g;
      65             : }
      66             : 
      67             : #ifdef FIXED_POINT
      68             : 
      69             : opus_val32 frac_div32(opus_val32 a, opus_val32 b)
      70             : {
      71             :    opus_val16 rcp;
      72             :    opus_val32 result, rem;
      73             :    int shift = celt_ilog2(b)-29;
      74             :    a = VSHR32(a,shift);
      75             :    b = VSHR32(b,shift);
      76             :    /* 16-bit reciprocal */
      77             :    rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
      78             :    result = MULT16_32_Q15(rcp, a);
      79             :    rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
      80             :    result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
      81             :    if (result >= 536870912)       /*  2^29 */
      82             :       return 2147483647;          /*  2^31 - 1 */
      83             :    else if (result <= -536870912) /* -2^29 */
      84             :       return -2147483647;         /* -2^31 */
      85             :    else
      86             :       return SHL32(result, 2);
      87             : }
      88             : 
      89             : /** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
      90             : opus_val16 celt_rsqrt_norm(opus_val32 x)
      91             : {
      92             :    opus_val16 n;
      93             :    opus_val16 r;
      94             :    opus_val16 r2;
      95             :    opus_val16 y;
      96             :    /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
      97             :    n = x-32768;
      98             :    /* Get a rough initial guess for the root.
      99             :       The optimal minimax quadratic approximation (using relative error) is
     100             :        r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
     101             :       Coefficients here, and the final result r, are Q14.*/
     102             :    r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
     103             :    /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
     104             :       We can compute the result from n and r using Q15 multiplies with some
     105             :        adjustment, carefully done to avoid overflow.
     106             :       Range of y is [-1564,1594]. */
     107             :    r2 = MULT16_16_Q15(r, r);
     108             :    y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
     109             :    /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
     110             :       This yields the Q14 reciprocal square root of the Q16 x, with a maximum
     111             :        relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
     112             :        peak absolute error of 2.26591/16384. */
     113             :    return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
     114             :               SUB16(MULT16_16_Q15(y, 12288), 16384))));
     115             : }
     116             : 
     117             : /** Sqrt approximation (QX input, QX/2 output) */
     118             : opus_val32 celt_sqrt(opus_val32 x)
     119             : {
     120             :    int k;
     121             :    opus_val16 n;
     122             :    opus_val32 rt;
     123             :    static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
     124             :    if (x==0)
     125             :       return 0;
     126             :    else if (x>=1073741824)
     127             :       return 32767;
     128             :    k = (celt_ilog2(x)>>1)-7;
     129             :    x = VSHR32(x, 2*k);
     130             :    n = x-32768;
     131             :    rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
     132             :               MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
     133             :    rt = VSHR32(rt,7-k);
     134             :    return rt;
     135             : }
     136             : 
     137             : #define L1 32767
     138             : #define L2 -7651
     139             : #define L3 8277
     140             : #define L4 -626
     141             : 
     142             : static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
     143             : {
     144             :    opus_val16 x2;
     145             : 
     146             :    x2 = MULT16_16_P15(x,x);
     147             :    return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
     148             :                                                                                 ))))))));
     149             : }
     150             : 
     151             : #undef L1
     152             : #undef L2
     153             : #undef L3
     154             : #undef L4
     155             : 
     156             : opus_val16 celt_cos_norm(opus_val32 x)
     157             : {
     158             :    x = x&0x0001ffff;
     159             :    if (x>SHL32(EXTEND32(1), 16))
     160             :       x = SUB32(SHL32(EXTEND32(1), 17),x);
     161             :    if (x&0x00007fff)
     162             :    {
     163             :       if (x<SHL32(EXTEND32(1), 15))
     164             :       {
     165             :          return _celt_cos_pi_2(EXTRACT16(x));
     166             :       } else {
     167             :          return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
     168             :       }
     169             :    } else {
     170             :       if (x&0x0000ffff)
     171             :          return 0;
     172             :       else if (x&0x0001ffff)
     173             :          return -32767;
     174             :       else
     175             :          return 32767;
     176             :    }
     177             : }
     178             : 
     179             : /** Reciprocal approximation (Q15 input, Q16 output) */
     180             : opus_val32 celt_rcp(opus_val32 x)
     181             : {
     182             :    int i;
     183             :    opus_val16 n;
     184             :    opus_val16 r;
     185             :    celt_assert2(x>0, "celt_rcp() only defined for positive values");
     186             :    i = celt_ilog2(x);
     187             :    /* n is Q15 with range [0,1). */
     188             :    n = VSHR32(x,i-15)-32768;
     189             :    /* Start with a linear approximation:
     190             :       r = 1.8823529411764706-0.9411764705882353*n.
     191             :       The coefficients and the result are Q14 in the range [15420,30840].*/
     192             :    r = ADD16(30840, MULT16_16_Q15(-15420, n));
     193             :    /* Perform two Newton iterations:
     194             :       r -= r*((r*n)-1.Q15)
     195             :          = r*((r*n)+(r-1.Q15)). */
     196             :    r = SUB16(r, MULT16_16_Q15(r,
     197             :              ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
     198             :    /* We subtract an extra 1 in the second iteration to avoid overflow; it also
     199             :        neatly compensates for truncation error in the rest of the process. */
     200             :    r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
     201             :              ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
     202             :    /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
     203             :        of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
     204             :        error of 1.24665/32768. */
     205             :    return VSHR32(EXTEND32(r),i-16);
     206             : }
     207             : 
     208             : #endif

Generated by: LCOV version 1.13