Line data Source code
1 : /* Copyright (c) 2002-2008 Jean-Marc Valin
2 : Copyright (c) 2007-2008 CSIRO
3 : Copyright (c) 2007-2009 Xiph.Org Foundation
4 : Written by Jean-Marc Valin */
5 : /**
6 : @file mathops.h
7 : @brief Various math functions
8 : */
9 : /*
10 : Redistribution and use in source and binary forms, with or without
11 : modification, are permitted provided that the following conditions
12 : are met:
13 :
14 : - Redistributions of source code must retain the above copyright
15 : notice, this list of conditions and the following disclaimer.
16 :
17 : - Redistributions in binary form must reproduce the above copyright
18 : notice, this list of conditions and the following disclaimer in the
19 : documentation and/or other materials provided with the distribution.
20 :
21 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 : ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 : LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 : A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25 : OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 : EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 : PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 : PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 : LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 : NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 : SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 : */
33 :
34 : #ifdef HAVE_CONFIG_H
35 : #include "config.h"
36 : #endif
37 :
38 : #include "mathops.h"
39 :
40 : /*Compute floor(sqrt(_val)) with exact arithmetic.
41 : This has been tested on all possible 32-bit inputs.*/
42 0 : unsigned isqrt32(opus_uint32 _val){
43 : unsigned b;
44 : unsigned g;
45 : int bshift;
46 : /*Uses the second method from
47 : http://www.azillionmonkeys.com/qed/sqroot.html
48 : The main idea is to search for the largest binary digit b such that
49 : (g+b)*(g+b) <= _val, and add it to the solution g.*/
50 0 : g=0;
51 0 : bshift=(EC_ILOG(_val)-1)>>1;
52 0 : b=1U<<bshift;
53 : do{
54 : opus_uint32 t;
55 0 : t=(((opus_uint32)g<<1)+b)<<bshift;
56 0 : if(t<=_val){
57 0 : g+=b;
58 0 : _val-=t;
59 : }
60 0 : b>>=1;
61 0 : bshift--;
62 : }
63 0 : while(bshift>=0);
64 0 : return g;
65 : }
66 :
67 : #ifdef FIXED_POINT
68 :
69 : opus_val32 frac_div32(opus_val32 a, opus_val32 b)
70 : {
71 : opus_val16 rcp;
72 : opus_val32 result, rem;
73 : int shift = celt_ilog2(b)-29;
74 : a = VSHR32(a,shift);
75 : b = VSHR32(b,shift);
76 : /* 16-bit reciprocal */
77 : rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
78 : result = MULT16_32_Q15(rcp, a);
79 : rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
80 : result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
81 : if (result >= 536870912) /* 2^29 */
82 : return 2147483647; /* 2^31 - 1 */
83 : else if (result <= -536870912) /* -2^29 */
84 : return -2147483647; /* -2^31 */
85 : else
86 : return SHL32(result, 2);
87 : }
88 :
89 : /** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
90 : opus_val16 celt_rsqrt_norm(opus_val32 x)
91 : {
92 : opus_val16 n;
93 : opus_val16 r;
94 : opus_val16 r2;
95 : opus_val16 y;
96 : /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
97 : n = x-32768;
98 : /* Get a rough initial guess for the root.
99 : The optimal minimax quadratic approximation (using relative error) is
100 : r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
101 : Coefficients here, and the final result r, are Q14.*/
102 : r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
103 : /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
104 : We can compute the result from n and r using Q15 multiplies with some
105 : adjustment, carefully done to avoid overflow.
106 : Range of y is [-1564,1594]. */
107 : r2 = MULT16_16_Q15(r, r);
108 : y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
109 : /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
110 : This yields the Q14 reciprocal square root of the Q16 x, with a maximum
111 : relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
112 : peak absolute error of 2.26591/16384. */
113 : return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
114 : SUB16(MULT16_16_Q15(y, 12288), 16384))));
115 : }
116 :
117 : /** Sqrt approximation (QX input, QX/2 output) */
118 : opus_val32 celt_sqrt(opus_val32 x)
119 : {
120 : int k;
121 : opus_val16 n;
122 : opus_val32 rt;
123 : static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
124 : if (x==0)
125 : return 0;
126 : else if (x>=1073741824)
127 : return 32767;
128 : k = (celt_ilog2(x)>>1)-7;
129 : x = VSHR32(x, 2*k);
130 : n = x-32768;
131 : rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
132 : MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
133 : rt = VSHR32(rt,7-k);
134 : return rt;
135 : }
136 :
137 : #define L1 32767
138 : #define L2 -7651
139 : #define L3 8277
140 : #define L4 -626
141 :
142 : static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
143 : {
144 : opus_val16 x2;
145 :
146 : x2 = MULT16_16_P15(x,x);
147 : return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
148 : ))))))));
149 : }
150 :
151 : #undef L1
152 : #undef L2
153 : #undef L3
154 : #undef L4
155 :
156 : opus_val16 celt_cos_norm(opus_val32 x)
157 : {
158 : x = x&0x0001ffff;
159 : if (x>SHL32(EXTEND32(1), 16))
160 : x = SUB32(SHL32(EXTEND32(1), 17),x);
161 : if (x&0x00007fff)
162 : {
163 : if (x<SHL32(EXTEND32(1), 15))
164 : {
165 : return _celt_cos_pi_2(EXTRACT16(x));
166 : } else {
167 : return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
168 : }
169 : } else {
170 : if (x&0x0000ffff)
171 : return 0;
172 : else if (x&0x0001ffff)
173 : return -32767;
174 : else
175 : return 32767;
176 : }
177 : }
178 :
179 : /** Reciprocal approximation (Q15 input, Q16 output) */
180 : opus_val32 celt_rcp(opus_val32 x)
181 : {
182 : int i;
183 : opus_val16 n;
184 : opus_val16 r;
185 : celt_assert2(x>0, "celt_rcp() only defined for positive values");
186 : i = celt_ilog2(x);
187 : /* n is Q15 with range [0,1). */
188 : n = VSHR32(x,i-15)-32768;
189 : /* Start with a linear approximation:
190 : r = 1.8823529411764706-0.9411764705882353*n.
191 : The coefficients and the result are Q14 in the range [15420,30840].*/
192 : r = ADD16(30840, MULT16_16_Q15(-15420, n));
193 : /* Perform two Newton iterations:
194 : r -= r*((r*n)-1.Q15)
195 : = r*((r*n)+(r-1.Q15)). */
196 : r = SUB16(r, MULT16_16_Q15(r,
197 : ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
198 : /* We subtract an extra 1 in the second iteration to avoid overflow; it also
199 : neatly compensates for truncation error in the rest of the process. */
200 : r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
201 : ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
202 : /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
203 : of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
204 : error of 1.24665/32768. */
205 : return VSHR32(EXTEND32(r),i-16);
206 : }
207 :
208 : #endif
|