Line data Source code
1 : ////////////////////////////////////////////////////////////////////////////////
2 : ///
3 : /// General FIR digital filter routines with MMX optimization.
4 : ///
5 : /// Note : MMX optimized functions reside in a separate, platform-specific file,
6 : /// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
7 : ///
8 : /// Author : Copyright (c) Olli Parviainen
9 : /// Author e-mail : oparviai 'at' iki.fi
10 : /// SoundTouch WWW: http://www.surina.net/soundtouch
11 : ///
12 : ////////////////////////////////////////////////////////////////////////////////
13 : //
14 : // Last changed : $Date: 2015-02-21 21:24:29 +0000 (Sat, 21 Feb 2015) $
15 : // File revision : $Revision: 4 $
16 : //
17 : // $Id: FIRFilter.cpp 202 2015-02-21 21:24:29Z oparviai $
18 : //
19 : ////////////////////////////////////////////////////////////////////////////////
20 : //
21 : // License :
22 : //
23 : // SoundTouch audio processing library
24 : // Copyright (c) Olli Parviainen
25 : //
26 : // This library is free software; you can redistribute it and/or
27 : // modify it under the terms of the GNU Lesser General Public
28 : // License as published by the Free Software Foundation; either
29 : // version 2.1 of the License, or (at your option) any later version.
30 : //
31 : // This library is distributed in the hope that it will be useful,
32 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
33 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
34 : // Lesser General Public License for more details.
35 : //
36 : // You should have received a copy of the GNU Lesser General Public
37 : // License along with this library; if not, write to the Free Software
38 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
39 : //
40 : ////////////////////////////////////////////////////////////////////////////////
41 :
42 : #include <memory.h>
43 : #include <assert.h>
44 : #include <math.h>
45 : #include <stdlib.h>
46 : #include "FIRFilter.h"
47 : #include "cpu_detect.h"
48 :
49 : using namespace soundtouch;
50 :
51 : /*****************************************************************************
52 : *
53 : * Implementation of the class 'FIRFilter'
54 : *
55 : *****************************************************************************/
56 :
57 0 : FIRFilter::FIRFilter()
58 : {
59 0 : resultDivFactor = 0;
60 0 : resultDivider = 0;
61 0 : length = 0;
62 0 : lengthDiv8 = 0;
63 0 : filterCoeffs = NULL;
64 0 : }
65 :
66 :
67 0 : FIRFilter::~FIRFilter()
68 : {
69 0 : delete[] filterCoeffs;
70 0 : }
71 :
72 : // Usual C-version of the filter routine for stereo sound
73 0 : uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
74 : {
75 : int j, end;
76 : #ifdef SOUNDTOUCH_FLOAT_SAMPLES
77 : // when using floating point samples, use a scaler instead of a divider
78 : // because division is much slower operation than multiplying.
79 0 : double dScaler = 1.0 / (double)resultDivider;
80 : #endif
81 :
82 0 : assert(length != 0);
83 0 : assert(src != NULL);
84 0 : assert(dest != NULL);
85 0 : assert(filterCoeffs != NULL);
86 :
87 0 : end = 2 * (numSamples - length);
88 :
89 : #pragma omp parallel for
90 0 : for (j = 0; j < end; j += 2)
91 : {
92 : const SAMPLETYPE *ptr;
93 : LONG_SAMPLETYPE suml, sumr;
94 : uint i;
95 :
96 0 : suml = sumr = 0;
97 0 : ptr = src + j;
98 :
99 0 : for (i = 0; i < length; i += 4)
100 : {
101 : // loop is unrolled by factor of 4 here for efficiency
102 0 : suml += ptr[2 * i + 0] * filterCoeffs[i + 0] +
103 0 : ptr[2 * i + 2] * filterCoeffs[i + 1] +
104 0 : ptr[2 * i + 4] * filterCoeffs[i + 2] +
105 0 : ptr[2 * i + 6] * filterCoeffs[i + 3];
106 0 : sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] +
107 0 : ptr[2 * i + 3] * filterCoeffs[i + 1] +
108 0 : ptr[2 * i + 5] * filterCoeffs[i + 2] +
109 0 : ptr[2 * i + 7] * filterCoeffs[i + 3];
110 : }
111 :
112 : #ifdef SOUNDTOUCH_INTEGER_SAMPLES
113 : suml >>= resultDivFactor;
114 : sumr >>= resultDivFactor;
115 : // saturate to 16 bit integer limits
116 : suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
117 : // saturate to 16 bit integer limits
118 : sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
119 : #else
120 0 : suml *= dScaler;
121 0 : sumr *= dScaler;
122 : #endif // SOUNDTOUCH_INTEGER_SAMPLES
123 0 : dest[j] = (SAMPLETYPE)suml;
124 0 : dest[j + 1] = (SAMPLETYPE)sumr;
125 : }
126 0 : return numSamples - length;
127 : }
128 :
129 :
130 :
131 :
132 : // Usual C-version of the filter routine for mono sound
133 0 : uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
134 : {
135 : int j, end;
136 : #ifdef SOUNDTOUCH_FLOAT_SAMPLES
137 : // when using floating point samples, use a scaler instead of a divider
138 : // because division is much slower operation than multiplying.
139 0 : double dScaler = 1.0 / (double)resultDivider;
140 : #endif
141 :
142 0 : assert(length != 0);
143 :
144 0 : end = numSamples - length;
145 : #pragma omp parallel for
146 0 : for (j = 0; j < end; j ++)
147 : {
148 0 : const SAMPLETYPE *pSrc = src + j;
149 : LONG_SAMPLETYPE sum;
150 : uint i;
151 :
152 0 : sum = 0;
153 0 : for (i = 0; i < length; i += 4)
154 : {
155 : // loop is unrolled by factor of 4 here for efficiency
156 0 : sum += pSrc[i + 0] * filterCoeffs[i + 0] +
157 0 : pSrc[i + 1] * filterCoeffs[i + 1] +
158 0 : pSrc[i + 2] * filterCoeffs[i + 2] +
159 0 : pSrc[i + 3] * filterCoeffs[i + 3];
160 : }
161 : #ifdef SOUNDTOUCH_INTEGER_SAMPLES
162 : sum >>= resultDivFactor;
163 : // saturate to 16 bit integer limits
164 : sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
165 : #else
166 0 : sum *= dScaler;
167 : #endif // SOUNDTOUCH_INTEGER_SAMPLES
168 0 : dest[j] = (SAMPLETYPE)sum;
169 : }
170 0 : return end;
171 : }
172 :
173 :
174 0 : uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels)
175 : {
176 : int j, end;
177 :
178 : #ifdef SOUNDTOUCH_FLOAT_SAMPLES
179 : // when using floating point samples, use a scaler instead of a divider
180 : // because division is much slower operation than multiplying.
181 0 : double dScaler = 1.0 / (double)resultDivider;
182 : #endif
183 :
184 0 : assert(length != 0);
185 0 : assert(src != NULL);
186 0 : assert(dest != NULL);
187 0 : assert(filterCoeffs != NULL);
188 0 : assert(numChannels < 16);
189 :
190 0 : end = numChannels * (numSamples - length);
191 :
192 : #pragma omp parallel for
193 0 : for (j = 0; j < end; j += numChannels)
194 : {
195 : const SAMPLETYPE *ptr;
196 : LONG_SAMPLETYPE sums[16];
197 : uint c, i;
198 :
199 0 : for (c = 0; c < numChannels; c ++)
200 : {
201 0 : sums[c] = 0;
202 : }
203 :
204 0 : ptr = src + j;
205 :
206 0 : for (i = 0; i < length; i ++)
207 : {
208 0 : SAMPLETYPE coef=filterCoeffs[i];
209 0 : for (c = 0; c < numChannels; c ++)
210 : {
211 0 : sums[c] += ptr[0] * coef;
212 0 : ptr ++;
213 : }
214 : }
215 :
216 0 : for (c = 0; c < numChannels; c ++)
217 : {
218 : #ifdef SOUNDTOUCH_INTEGER_SAMPLES
219 : sums[c] >>= resultDivFactor;
220 : #else
221 0 : sums[c] *= dScaler;
222 : #endif // SOUNDTOUCH_INTEGER_SAMPLES
223 0 : dest[j+c] = (SAMPLETYPE)sums[c];
224 : }
225 : }
226 0 : return numSamples - length;
227 : }
228 :
229 :
230 : // Set filter coeffiecients and length.
231 : //
232 : // Throws an exception if filter length isn't divisible by 8
233 0 : void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint uResultDivFactor)
234 : {
235 0 : assert(newLength > 0);
236 0 : if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");
237 :
238 0 : lengthDiv8 = newLength / 8;
239 0 : length = lengthDiv8 * 8;
240 0 : assert(length == newLength);
241 :
242 0 : resultDivFactor = uResultDivFactor;
243 0 : resultDivider = (SAMPLETYPE)::pow(2.0, (int)resultDivFactor);
244 :
245 0 : delete[] filterCoeffs;
246 0 : filterCoeffs = new SAMPLETYPE[length];
247 0 : memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE));
248 0 : }
249 :
250 :
251 0 : uint FIRFilter::getLength() const
252 : {
253 0 : return length;
254 : }
255 :
256 :
257 :
258 : // Applies the filter to the given sequence of samples.
259 : //
260 : // Note : The amount of outputted samples is by value of 'filter_length'
261 : // smaller than the amount of input samples.
262 0 : uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels)
263 : {
264 0 : assert(length > 0);
265 0 : assert(lengthDiv8 * 8 == length);
266 :
267 0 : if (numSamples < length) return 0;
268 :
269 : #ifndef USE_MULTICH_ALWAYS
270 0 : if (numChannels == 1)
271 : {
272 0 : return evaluateFilterMono(dest, src, numSamples);
273 : }
274 0 : else if (numChannels == 2)
275 : {
276 0 : return evaluateFilterStereo(dest, src, numSamples);
277 : }
278 : else
279 : #endif // USE_MULTICH_ALWAYS
280 : {
281 0 : assert(numChannels > 0);
282 0 : return evaluateFilterMulti(dest, src, numSamples, numChannels);
283 : }
284 : }
285 :
286 :
287 :
288 : // Operator 'new' is overloaded so that it automatically creates a suitable instance
289 : // depending on if we've a MMX-capable CPU available or not.
290 0 : void * FIRFilter::operator new(size_t s)
291 : {
292 : // Notice! don't use "new FIRFilter" directly, use "newInstance" to create a new instance instead!
293 : ST_THROW_RT_ERROR("Error in FIRFilter::new: Don't use 'new FIRFilter', use 'newInstance' member instead!");
294 0 : return newInstance();
295 : }
296 :
297 :
298 0 : FIRFilter * FIRFilter::newInstance()
299 : {
300 : #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE)
301 : uint uExtensions;
302 :
303 0 : uExtensions = detectCPUextensions();
304 : #endif
305 :
306 : // Check if MMX/SSE instruction set extensions supported by CPU
307 :
308 : #ifdef SOUNDTOUCH_ALLOW_MMX
309 : // MMX routines available only with integer sample types
310 : if (uExtensions & SUPPORT_MMX)
311 : {
312 : return ::new FIRFilterMMX;
313 : }
314 : else
315 : #endif // SOUNDTOUCH_ALLOW_MMX
316 :
317 : #ifdef SOUNDTOUCH_ALLOW_SSE
318 0 : if (uExtensions & SUPPORT_SSE)
319 : {
320 : // SSE support
321 0 : return ::new FIRFilterSSE;
322 : }
323 : else
324 : #endif // SOUNDTOUCH_ALLOW_SSE
325 :
326 : {
327 : // ISA optimizations not supported, use plain C version
328 0 : return ::new FIRFilter;
329 : }
330 : }
|