Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "pitch_estimator.h"
12 :
13 : #include <math.h>
14 : #include <memory.h>
15 : #include <stdlib.h>
16 :
17 : #include "os_specific_inline.h"
18 :
19 : #include "webrtc/system_wrappers/include/compile_assert_c.h"
20 :
21 : /*
22 : * We are implementing the following filters;
23 : *
24 : * Pre-filtering:
25 : * y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
26 : *
27 : * Post-filtering:
28 : * y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
29 : *
30 : * Note that |lag| is a floating number so we perform an interpolation to
31 : * obtain the correct |lag|.
32 : *
33 : */
34 :
35 : static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25,
36 : -0.07};
37 :
38 : /* interpolation coefficients; generated by design_pitch_filter.m */
39 : static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = {
40 : {-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125,
41 : 0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109,
42 : 0.01754159521746},
43 : {-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643,
44 : 0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084,
45 : 0.01654127246314},
46 : {-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787,
47 : 0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311,
48 : 0.01295781500709},
49 : {-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393,
50 : 0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166,
51 : 0.00719783432422},
52 : {-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001,
53 : 0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000,
54 : -0.00000000000000},
55 : {0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562,
56 : 0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377,
57 : -0.00764851320885},
58 : {0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650,
59 : 0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059,
60 : -0.01463300534216},
61 : {0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190,
62 : 0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865,
63 : -0.01985640750433}
64 : };
65 :
66 : /*
67 : * Enumerating the operation of the filter.
68 : * iSAC has 4 different pitch-filter which are very similar in their structure.
69 : *
70 : * kPitchFilterPre : In this mode the filter is operating as pitch
71 : * pre-filter. This is used at the encoder.
72 : * kPitchFilterPost : In this mode the filter is operating as pitch
73 : * post-filter. This is the inverse of pre-filter and used
74 : * in the decoder.
75 : * kPitchFilterPreLa : This is, in structure, similar to pre-filtering but
76 : * utilizing 3 millisecond lookahead. It is used to
77 : * obtain the signal for LPC analysis.
78 : * kPitchFilterPreGain : This is, in structure, similar to pre-filtering but
79 : * differential changes in gain is considered. This is
80 : * used to find the optimal gain.
81 : */
82 : typedef enum {
83 : kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain
84 : } PitchFilterOperation;
85 :
86 : /*
87 : * Structure with parameters used for pitch-filtering.
88 : * buffer : a buffer where the sum of previous inputs and outputs
89 : * are stored.
90 : * damper_state : the state of the damping filter. The filter is defined by
91 : * |kDampFilter|.
92 : * interpol_coeff : pointer to a set of coefficient which are used to utilize
93 : * fractional pitch by interpolation.
94 : * gain : pitch-gain to be applied to the current segment of input.
95 : * lag : pitch-lag for the current segment of input.
96 : * lag_offset : the offset of lag w.r.t. current sample.
97 : * sub_frame : sub-frame index, there are 4 pitch sub-frames in an iSAC
98 : * frame.
99 : * This specifies the usage of the filter. See
100 : * 'PitchFilterOperation' for operational modes.
101 : * num_samples : number of samples to be processed in each segment.
102 : * index : index of the input and output sample.
103 : * damper_state_dg : state of damping filter for different trial gains.
104 : * gain_mult : differential changes to gain.
105 : */
106 : typedef struct {
107 : double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD];
108 : double damper_state[PITCH_DAMPORDER];
109 : const double *interpol_coeff;
110 : double gain;
111 : double lag;
112 : int lag_offset;
113 :
114 : int sub_frame;
115 : PitchFilterOperation mode;
116 : int num_samples;
117 : int index;
118 :
119 : double damper_state_dg[4][PITCH_DAMPORDER];
120 : double gain_mult[4];
121 : } PitchFilterParam;
122 :
123 : /**********************************************************************
124 : * FilterSegment()
125 : * Filter one segment, a quarter of a frame.
126 : *
127 : * Inputs
128 : * in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
129 : * filter_param : pitch filter parameters.
130 : *
131 : * Outputs
132 : * out_data : pointer to a buffer where the filtered signal is written to.
133 : * out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
134 : * where the output of different gain values (differential
135 : * change to gain) is written.
136 : */
137 0 : static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
138 : double* out_data,
139 : double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
140 : int n;
141 : int m;
142 : int j;
143 : double sum;
144 : double sum2;
145 : /* Index of |parameters->buffer| where the output is written to. */
146 0 : int pos = parameters->index + PITCH_BUFFSIZE;
147 : /* Index of |parameters->buffer| where samples are read for fractional-lag
148 : * computation. */
149 0 : int pos_lag = pos - parameters->lag_offset;
150 :
151 0 : for (n = 0; n < parameters->num_samples; ++n) {
152 : /* Shift low pass filter states. */
153 0 : for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
154 0 : parameters->damper_state[m] = parameters->damper_state[m - 1];
155 : }
156 : /* Filter to get fractional pitch. */
157 0 : sum = 0.0;
158 0 : for (m = 0; m < PITCH_FRACORDER; ++m) {
159 0 : sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m];
160 : }
161 : /* Multiply with gain. */
162 0 : parameters->damper_state[0] = parameters->gain * sum;
163 :
164 0 : if (parameters->mode == kPitchFilterPreGain) {
165 0 : int lag_index = parameters->index - parameters->lag_offset;
166 0 : int m_tmp = (lag_index < 0) ? -lag_index : 0;
167 : /* Update the damper state for the new sample. */
168 0 : for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
169 0 : for (j = 0; j < 4; ++j) {
170 0 : parameters->damper_state_dg[j][m] =
171 0 : parameters->damper_state_dg[j][m - 1];
172 : }
173 : }
174 :
175 0 : for (j = 0; j < parameters->sub_frame + 1; ++j) {
176 : /* Filter for fractional pitch. */
177 0 : sum2 = 0.0;
178 0 : for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) {
179 : /* |lag_index + m| is always larger than or equal to zero, see how
180 : * m_tmp is computed. This is equivalent to assume samples outside
181 : * |out_dg[j]| are zero. */
182 0 : sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m];
183 : }
184 : /* Add the contribution of differential gain change. */
185 0 : parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum +
186 0 : parameters->gain * sum2;
187 : }
188 :
189 : /* Filter with damping filter, and store the results. */
190 0 : for (j = 0; j < parameters->sub_frame + 1; ++j) {
191 0 : sum = 0.0;
192 0 : for (m = 0; m < PITCH_DAMPORDER; ++m) {
193 0 : sum -= parameters->damper_state_dg[j][m] * kDampFilter[m];
194 : }
195 0 : out_dg[j][parameters->index] = sum;
196 : }
197 : }
198 : /* Filter with damping filter. */
199 0 : sum = 0.0;
200 0 : for (m = 0; m < PITCH_DAMPORDER; ++m) {
201 0 : sum += parameters->damper_state[m] * kDampFilter[m];
202 : }
203 :
204 : /* Subtract from input and update buffer. */
205 0 : out_data[parameters->index] = in_data[parameters->index] - sum;
206 0 : parameters->buffer[pos] = in_data[parameters->index] +
207 0 : out_data[parameters->index];
208 :
209 0 : ++parameters->index;
210 0 : ++pos;
211 0 : ++pos_lag;
212 : }
213 0 : return;
214 : }
215 :
216 : /* Update filter parameters based on the pitch-gains and pitch-lags. */
217 0 : static void Update(PitchFilterParam* parameters) {
218 : double fraction;
219 : int fraction_index;
220 : /* Compute integer lag-offset. */
221 0 : parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY +
222 : 0.5);
223 : /* Find correct set of coefficients for computing fractional pitch. */
224 0 : fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY);
225 0 : fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5);
226 0 : parameters->interpol_coeff = kIntrpCoef[fraction_index];
227 :
228 0 : if (parameters->mode == kPitchFilterPreGain) {
229 : /* If in this mode make a differential change to pitch gain. */
230 0 : parameters->gain_mult[parameters->sub_frame] += 0.2;
231 0 : if (parameters->gain_mult[parameters->sub_frame] > 1.0) {
232 0 : parameters->gain_mult[parameters->sub_frame] = 1.0;
233 : }
234 0 : if (parameters->sub_frame > 0) {
235 0 : parameters->gain_mult[parameters->sub_frame - 1] -= 0.2;
236 : }
237 : }
238 0 : }
239 :
240 : /******************************************************************************
241 : * FilterFrame()
242 : * Filter a frame of 30 millisecond, given pitch-lags and pitch-gains.
243 : *
244 : * Inputs
245 : * in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
246 : * lags : pointer to pitch-lags, 4 lags per frame.
247 : * gains : pointer to pitch-gians, 4 gains per frame.
248 : * mode : defining the functionality of the filter. It takes the
249 : * following values.
250 : * kPitchFilterPre: Pitch pre-filter, used at encoder.
251 : * kPitchFilterPost: Pitch post-filter, used at decoder.
252 : * kPitchFilterPreLa: Pitch pre-filter with lookahead.
253 : * kPitchFilterPreGain: Pitch pre-filter used to otain optimal
254 : * pitch-gains.
255 : *
256 : * Outputs
257 : * out_data : pointer to a buffer where the filtered signal is written to.
258 : * out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
259 : * where the output of different gain values (differential
260 : * change to gain) is written.
261 : */
262 0 : static void FilterFrame(const double* in_data, PitchFiltstr* filter_state,
263 : double* lags, double* gains, PitchFilterOperation mode,
264 : double* out_data,
265 : double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
266 : PitchFilterParam filter_parameters;
267 : double gain_delta, lag_delta;
268 : double old_lag, old_gain;
269 : int n;
270 : int m;
271 0 : const double kEnhancer = 1.3;
272 :
273 : /* Set up buffer and states. */
274 0 : filter_parameters.index = 0;
275 0 : filter_parameters.lag_offset = 0;
276 0 : filter_parameters.mode = mode;
277 : /* Copy states to local variables. */
278 0 : memcpy(filter_parameters.buffer, filter_state->ubuf,
279 : sizeof(filter_state->ubuf));
280 : COMPILE_ASSERT(sizeof(filter_parameters.buffer) >=
281 : sizeof(filter_state->ubuf));
282 0 : memset(filter_parameters.buffer +
283 : sizeof(filter_state->ubuf) / sizeof(filter_state->ubuf[0]),
284 : 0, sizeof(filter_parameters.buffer) - sizeof(filter_state->ubuf));
285 0 : memcpy(filter_parameters.damper_state, filter_state->ystate,
286 : sizeof(filter_state->ystate));
287 :
288 0 : if (mode == kPitchFilterPreGain) {
289 : /* Clear buffers. */
290 0 : memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult));
291 0 : memset(filter_parameters.damper_state_dg, 0,
292 : sizeof(filter_parameters.damper_state_dg));
293 0 : for (n = 0; n < PITCH_SUBFRAMES; ++n) {
294 : //memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD));
295 0 : memset(out_dg[n], 0, sizeof(out_dg[n]));
296 : }
297 0 : } else if (mode == kPitchFilterPost) {
298 : /* Make output more periodic. Negative sign is to change the structure
299 : * of the filter. */
300 0 : for (n = 0; n < PITCH_SUBFRAMES; ++n) {
301 0 : gains[n] *= -kEnhancer;
302 : }
303 : }
304 :
305 0 : old_lag = *filter_state->oldlagp;
306 0 : old_gain = *filter_state->oldgainp;
307 :
308 : /* No interpolation if pitch lag step is big. */
309 0 : if ((lags[0] > (PITCH_UPSTEP * old_lag)) ||
310 0 : (lags[0] < (PITCH_DOWNSTEP * old_lag))) {
311 0 : old_lag = lags[0];
312 0 : old_gain = gains[0];
313 :
314 0 : if (mode == kPitchFilterPreGain) {
315 0 : filter_parameters.gain_mult[0] = 1.0;
316 : }
317 : }
318 :
319 0 : filter_parameters.num_samples = PITCH_UPDATE;
320 0 : for (m = 0; m < PITCH_SUBFRAMES; ++m) {
321 : /* Set the sub-frame value. */
322 0 : filter_parameters.sub_frame = m;
323 : /* Calculate interpolation steps for pitch-lag and pitch-gain. */
324 0 : lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME;
325 0 : filter_parameters.lag = old_lag;
326 0 : gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME;
327 0 : filter_parameters.gain = old_gain;
328 : /* Store for the next sub-frame. */
329 0 : old_lag = lags[m];
330 0 : old_gain = gains[m];
331 :
332 0 : for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) {
333 : /* Step-wise interpolation of pitch gains and lags. As pitch-lag changes,
334 : * some parameters of filter need to be update. */
335 0 : filter_parameters.gain += gain_delta;
336 0 : filter_parameters.lag += lag_delta;
337 : /* Update parameters according to new lag value. */
338 0 : Update(&filter_parameters);
339 : /* Filter a segment of input. */
340 0 : FilterSegment(in_data, &filter_parameters, out_data, out_dg);
341 : }
342 : }
343 :
344 0 : if (mode != kPitchFilterPreGain) {
345 : /* Export buffer and states. */
346 0 : memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN],
347 : sizeof(filter_state->ubuf));
348 0 : memcpy(filter_state->ystate, filter_parameters.damper_state,
349 : sizeof(filter_state->ystate));
350 :
351 : /* Store for the next frame. */
352 0 : *filter_state->oldlagp = old_lag;
353 0 : *filter_state->oldgainp = old_gain;
354 : }
355 :
356 0 : if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) {
357 : /* Filter the lookahead segment, this is treated as the last sub-frame. So
358 : * set |pf_param| to last sub-frame. */
359 0 : filter_parameters.sub_frame = PITCH_SUBFRAMES - 1;
360 0 : filter_parameters.num_samples = QLOOKAHEAD;
361 0 : FilterSegment(in_data, &filter_parameters, out_data, out_dg);
362 : }
363 0 : }
364 :
365 0 : void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data,
366 : PitchFiltstr* pf_state, double* lags,
367 : double* gains) {
368 0 : FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL);
369 0 : }
370 :
371 0 : void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data,
372 : PitchFiltstr* pf_state, double* lags,
373 : double* gains) {
374 0 : FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data,
375 : NULL);
376 0 : }
377 :
378 0 : void WebRtcIsac_PitchfilterPre_gains(
379 : double* in_data, double* out_data,
380 : double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state,
381 : double* lags, double* gains) {
382 0 : FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data,
383 : out_dg);
384 0 : }
385 :
386 0 : void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data,
387 : PitchFiltstr* pf_state, double* lags,
388 : double* gains) {
389 0 : FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL);
390 0 : }
|