Line data Source code
1 : /* Copyright (C) 2007-2008 Jean-Marc Valin
2 : * Copyright (C) 2008 Thorvald Natvig
3 : */
4 : /**
5 : @file resample_sse.h
6 : @brief Resampler functions (SSE version)
7 : */
8 : /*
9 : Redistribution and use in source and binary forms, with or without
10 : modification, are permitted provided that the following conditions
11 : are met:
12 :
13 : - Redistributions of source code must retain the above copyright
14 : notice, this list of conditions and the following disclaimer.
15 :
16 : - Redistributions in binary form must reproduce the above copyright
17 : notice, this list of conditions and the following disclaimer in the
18 : documentation and/or other materials provided with the distribution.
19 :
20 : - Neither the name of the Xiph.org Foundation nor the names of its
21 : contributors may be used to endorse or promote products derived from
22 : this software without specific prior written permission.
23 :
24 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 : ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 : LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 : A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
28 : CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 : EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 : PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 : PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 : LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 : NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 : SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 : */
36 :
37 : #include "simd_detect.h"
38 :
39 : #include <xmmintrin.h>
40 :
41 : #define OVERRIDE_INNER_PRODUCT_SINGLE
42 0 : float inner_product_single(const float *a, const float *b, unsigned int len)
43 : {
44 : int i;
45 : float ret;
46 0 : __m128 sum = _mm_setzero_ps();
47 0 : for (i=0;i<len;i+=8)
48 : {
49 0 : sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
50 0 : sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)));
51 : }
52 0 : sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
53 0 : sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
54 : _mm_store_ss(&ret, sum);
55 0 : return ret;
56 : }
57 :
58 : #define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
59 0 : float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
60 : int i;
61 : float ret;
62 0 : __m128 sum = _mm_setzero_ps();
63 0 : __m128 f = _mm_loadu_ps(frac);
64 0 : for(i=0;i<len;i+=2)
65 : {
66 0 : sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)));
67 0 : sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)));
68 : }
69 0 : sum = _mm_mul_ps(f, sum);
70 0 : sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
71 0 : sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
72 : _mm_store_ss(&ret, sum);
73 0 : return ret;
74 : }
75 :
76 : #ifdef _USE_SSE2
77 : #include <emmintrin.h>
78 : #define OVERRIDE_INNER_PRODUCT_DOUBLE
79 :
80 0 : double inner_product_double(const float *a, const float *b, unsigned int len)
81 : {
82 : int i;
83 : double ret;
84 0 : __m128d sum = _mm_setzero_pd();
85 : __m128 t;
86 0 : for (i=0;i<len;i+=8)
87 : {
88 0 : t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
89 0 : sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
90 0 : sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
91 :
92 0 : t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
93 0 : sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
94 0 : sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
95 : }
96 0 : sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
97 : _mm_store_sd(&ret, sum);
98 0 : return ret;
99 : }
100 :
101 : #define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
102 0 : double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
103 : int i;
104 : double ret;
105 : __m128d sum;
106 0 : __m128d sum1 = _mm_setzero_pd();
107 0 : __m128d sum2 = _mm_setzero_pd();
108 0 : __m128 f = _mm_loadu_ps(frac);
109 0 : __m128d f1 = _mm_cvtps_pd(f);
110 0 : __m128d f2 = _mm_cvtps_pd(_mm_movehl_ps(f,f));
111 : __m128 t;
112 0 : for(i=0;i<len;i+=2)
113 : {
114 0 : t = _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample));
115 0 : sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
116 0 : sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
117 :
118 0 : t = _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample));
119 0 : sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
120 0 : sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
121 : }
122 0 : sum1 = _mm_mul_pd(f1, sum1);
123 0 : sum2 = _mm_mul_pd(f2, sum2);
124 0 : sum = _mm_add_pd(sum1, sum2);
125 0 : sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
126 : _mm_store_sd(&ret, sum);
127 0 : return ret;
128 : }
129 :
130 : #endif
|