Line data Source code
1 : /*
2 : * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "pitch_estimator.h"
12 :
13 : #include <math.h>
14 : #include <memory.h>
15 : #include <string.h>
16 : #ifdef WEBRTC_ANDROID
17 : #include <stdlib.h>
18 : #endif
19 :
20 : static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160,
21 : 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640};
22 :
23 : /* interpolation filter */
24 0 : __inline static void IntrepolFilter(double *data_ptr, double *intrp)
25 : {
26 0 : *intrp = kInterpolWin[0] * data_ptr[-3];
27 0 : *intrp += kInterpolWin[1] * data_ptr[-2];
28 0 : *intrp += kInterpolWin[2] * data_ptr[-1];
29 0 : *intrp += kInterpolWin[3] * data_ptr[0];
30 0 : *intrp += kInterpolWin[4] * data_ptr[1];
31 0 : *intrp += kInterpolWin[5] * data_ptr[2];
32 0 : *intrp += kInterpolWin[6] * data_ptr[3];
33 0 : *intrp += kInterpolWin[7] * data_ptr[4];
34 0 : }
35 :
36 :
37 : /* 2D parabolic interpolation */
38 : /* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */
39 0 : __inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val)
40 : {
41 : double c, b[2], A[2][2];
42 : double t1, t2, d;
43 : double delta1, delta2;
44 :
45 :
46 : // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}};
47 : // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0
48 :
49 0 : c = T[1][1];
50 0 : b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]);
51 0 : b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]);
52 0 : A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]);
53 0 : t1 = 0.5 * (T[0][0] + T[2][2]) - c;
54 0 : t2 = 0.5 * (T[2][0] + T[0][2]) - c;
55 0 : d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2;
56 0 : A[0][0] = -t1 - 0.5 * d;
57 0 : A[1][1] = -t2 - 0.5 * d;
58 :
59 : /* deal with singularities or ill-conditioned cases */
60 0 : if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) {
61 0 : *peak_val = T[1][1];
62 0 : return;
63 : }
64 :
65 : /* Cholesky decomposition: replace A by upper-triangular factor */
66 0 : A[0][0] = sqrt(A[0][0]);
67 0 : A[0][1] = A[0][1] / A[0][0];
68 0 : A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]);
69 :
70 : /* compute [x; y] = -0.5 * inv(A) * b */
71 0 : t1 = b[0] / A[0][0];
72 0 : t2 = (b[1] - t1 * A[0][1]) / A[1][1];
73 0 : delta2 = t2 / A[1][1];
74 0 : delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0];
75 0 : delta2 *= 0.5;
76 :
77 : /* limit norm */
78 0 : t1 = delta1 * delta1 + delta2 * delta2;
79 0 : if (t1 > 1.0) {
80 0 : delta1 /= t1;
81 0 : delta2 /= t1;
82 : }
83 :
84 0 : *peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c;
85 :
86 0 : *x += delta1;
87 0 : *y += delta2;
88 : }
89 :
90 :
91 0 : static void PCorr(const double *in, double *outcorr)
92 : {
93 : double sum, ysum, prod;
94 : const double *x, *inptr;
95 : int k, n;
96 :
97 : //ysum = 1e-6; /* use this with float (i.s.o. double)! */
98 0 : ysum = 1e-13;
99 0 : sum = 0.0;
100 0 : x = in + PITCH_MAX_LAG/2 + 2;
101 0 : for (n = 0; n < PITCH_CORR_LEN2; n++) {
102 0 : ysum += in[n] * in[n];
103 0 : sum += x[n] * in[n];
104 : }
105 :
106 0 : outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */
107 0 : *outcorr = sum / sqrt(ysum);
108 :
109 0 : for (k = 1; k < PITCH_LAG_SPAN2; k++) {
110 0 : ysum -= in[k-1] * in[k-1];
111 0 : ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1];
112 0 : sum = 0.0;
113 0 : inptr = &in[k];
114 0 : prod = x[0] * inptr[0];
115 0 : for (n = 1; n < PITCH_CORR_LEN2; n++) {
116 0 : sum += prod;
117 0 : prod = x[n] * inptr[n];
118 : }
119 0 : sum += prod;
120 0 : outcorr--;
121 0 : *outcorr = sum / sqrt(ysum);
122 : }
123 0 : }
124 :
125 :
126 0 : void WebRtcIsac_InitializePitch(const double *in,
127 : const double old_lag,
128 : const double old_gain,
129 : PitchAnalysisStruct *State,
130 : double *lags)
131 : {
132 : double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2];
133 : double ratio, log_lag, gain_bias;
134 : double bias;
135 : double corrvec1[PITCH_LAG_SPAN2];
136 : double corrvec2[PITCH_LAG_SPAN2];
137 : int m, k;
138 : // Allocating 10 extra entries at the begining of the CorrSurf
139 : double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)];
140 : double* CorrSurf[2*PITCH_BW+3];
141 : double *CorrSurfPtr1, *CorrSurfPtr2;
142 0 : double LagWin[3] = {0.2, 0.5, 0.98};
143 : int ind1, ind2, peaks_ind, peak, max_ind;
144 : int peaks[PITCH_MAX_NUM_PEAKS];
145 : double adj, gain_tmp;
146 : double corr, corr_max;
147 : double intrp_a, intrp_b, intrp_c, intrp_d;
148 : double peak_vals[PITCH_MAX_NUM_PEAKS];
149 : double lags1[PITCH_MAX_NUM_PEAKS];
150 : double lags2[PITCH_MAX_NUM_PEAKS];
151 : double T[3][3];
152 : int row;
153 :
154 0 : for(k = 0; k < 2*PITCH_BW+3; k++)
155 : {
156 0 : CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)];
157 : }
158 : /* reset CorrSurf matrix */
159 0 : memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4)));
160 :
161 : //warnings -DH
162 0 : max_ind = 0;
163 0 : peak = 0;
164 :
165 : /* copy old values from state buffer */
166 0 : memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
167 :
168 : /* decimation; put result after the old values */
169 0 : WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN,
170 : &buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]);
171 :
172 : /* low-pass filtering */
173 0 : for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++)
174 0 : buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2];
175 :
176 : /* copy end part back into state buffer */
177 0 : memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
178 :
179 : /* compute correlation for first and second half of the frame */
180 0 : PCorr(buf_dec, corrvec1);
181 0 : PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2);
182 :
183 : /* bias towards pitch lag of previous frame */
184 0 : log_lag = log(0.5 * old_lag);
185 0 : gain_bias = 4.0 * old_gain * old_gain;
186 0 : if (gain_bias > 0.8) gain_bias = 0.8;
187 0 : for (k = 0; k < PITCH_LAG_SPAN2; k++)
188 : {
189 0 : ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag;
190 0 : bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio);
191 0 : corrvec1[k] *= bias;
192 : }
193 :
194 : /* taper correlation functions */
195 0 : for (k = 0; k < 3; k++) {
196 0 : gain_tmp = LagWin[k];
197 0 : corrvec1[k] *= gain_tmp;
198 0 : corrvec2[k] *= gain_tmp;
199 0 : corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
200 0 : corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
201 : }
202 :
203 0 : corr_max = 0.0;
204 : /* fill middle row of correlation surface */
205 0 : ind1 = 0;
206 0 : ind2 = 0;
207 0 : CorrSurfPtr1 = &CorrSurf[PITCH_BW][2];
208 0 : for (k = 0; k < PITCH_LAG_SPAN2; k++) {
209 0 : corr = corrvec1[ind1++] + corrvec2[ind2++];
210 0 : CorrSurfPtr1[k] = corr;
211 0 : if (corr > corr_max) {
212 0 : corr_max = corr; /* update maximum */
213 0 : max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
214 : }
215 : }
216 : /* fill first and last rows of correlation surface */
217 0 : ind1 = 0;
218 0 : ind2 = PITCH_BW;
219 0 : CorrSurfPtr1 = &CorrSurf[0][2];
220 0 : CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2];
221 0 : for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) {
222 0 : ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
223 0 : adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
224 0 : corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
225 0 : CorrSurfPtr1[k] = corr;
226 0 : if (corr > corr_max) {
227 0 : corr_max = corr; /* update maximum */
228 0 : max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
229 : }
230 0 : corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
231 0 : CorrSurfPtr2[k] = corr;
232 0 : if (corr > corr_max) {
233 0 : corr_max = corr; /* update maximum */
234 0 : max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
235 : }
236 : }
237 : /* fill second and next to last rows of correlation surface */
238 0 : ind1 = 0;
239 0 : ind2 = PITCH_BW-1;
240 0 : CorrSurfPtr1 = &CorrSurf[1][2];
241 0 : CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1];
242 0 : for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) {
243 0 : ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
244 0 : adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
245 0 : corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
246 0 : CorrSurfPtr1[k] = corr;
247 0 : if (corr > corr_max) {
248 0 : corr_max = corr; /* update maximum */
249 0 : max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
250 : }
251 0 : corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
252 0 : CorrSurfPtr2[k] = corr;
253 0 : if (corr > corr_max) {
254 0 : corr_max = corr; /* update maximum */
255 0 : max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
256 : }
257 : }
258 : /* fill remainder of correlation surface */
259 0 : for (m = 2; m < PITCH_BW; m++) {
260 0 : ind1 = 0;
261 0 : ind2 = PITCH_BW - m; /* always larger than ind1 */
262 0 : CorrSurfPtr1 = &CorrSurf[m][2];
263 0 : CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m];
264 0 : for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) {
265 0 : ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
266 0 : adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
267 0 : corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
268 0 : CorrSurfPtr1[k] = corr;
269 0 : if (corr > corr_max) {
270 0 : corr_max = corr; /* update maximum */
271 0 : max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
272 : }
273 0 : corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
274 0 : CorrSurfPtr2[k] = corr;
275 0 : if (corr > corr_max) {
276 0 : corr_max = corr; /* update maximum */
277 0 : max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
278 : }
279 : }
280 : }
281 :
282 : /* threshold value to qualify as a peak */
283 0 : corr_max *= 0.6;
284 :
285 0 : peaks_ind = 0;
286 : /* find peaks */
287 0 : for (m = 1; m < PITCH_BW+1; m++) {
288 0 : if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
289 0 : CorrSurfPtr1 = &CorrSurf[m][2];
290 0 : for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) {
291 0 : corr = CorrSurfPtr1[k];
292 0 : if (corr > corr_max) {
293 0 : if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
294 0 : if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
295 : /* found a peak; store index into matrix */
296 0 : peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
297 0 : if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
298 : }
299 : }
300 : }
301 : }
302 : }
303 0 : for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) {
304 0 : if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
305 0 : CorrSurfPtr1 = &CorrSurf[m][2];
306 0 : for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) {
307 0 : corr = CorrSurfPtr1[k];
308 0 : if (corr > corr_max) {
309 0 : if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
310 0 : if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
311 : /* found a peak; store index into matrix */
312 0 : peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
313 0 : if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
314 : }
315 : }
316 : }
317 : }
318 : }
319 :
320 0 : if (peaks_ind > 0) {
321 : /* examine each peak */
322 0 : CorrSurfPtr1 = &CorrSurf[0][0];
323 0 : for (k = 0; k < peaks_ind; k++) {
324 0 : peak = peaks[k];
325 :
326 : /* compute four interpolated values around current peak */
327 0 : IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a);
328 0 : IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b);
329 0 : IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c);
330 0 : IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d);
331 :
332 : /* determine maximum of the interpolated values */
333 0 : corr = CorrSurfPtr1[peak];
334 0 : corr_max = intrp_a;
335 0 : if (intrp_b > corr_max) corr_max = intrp_b;
336 0 : if (intrp_c > corr_max) corr_max = intrp_c;
337 0 : if (intrp_d > corr_max) corr_max = intrp_d;
338 :
339 : /* determine where the peak sits and fill a 3x3 matrix around it */
340 0 : row = peak / (PITCH_LAG_SPAN2+4);
341 0 : lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
342 0 : lags2[k] = (double) (lags1[k] + PITCH_BW - row);
343 0 : if ( corr > corr_max ) {
344 0 : T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
345 0 : T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
346 0 : T[1][1] = corr;
347 0 : T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
348 0 : T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
349 0 : T[1][0] = intrp_a;
350 0 : T[0][1] = intrp_b;
351 0 : T[2][1] = intrp_c;
352 0 : T[1][2] = intrp_d;
353 : } else {
354 0 : if (intrp_a == corr_max) {
355 0 : lags1[k] -= 0.5;
356 0 : lags2[k] += 0.5;
357 0 : IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]);
358 0 : IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]);
359 0 : T[1][1] = intrp_a;
360 0 : T[0][2] = intrp_b;
361 0 : T[2][2] = intrp_c;
362 0 : T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)];
363 0 : T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
364 0 : T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
365 0 : T[1][2] = corr;
366 0 : } else if (intrp_b == corr_max) {
367 0 : lags1[k] -= 0.5;
368 0 : lags2[k] -= 0.5;
369 0 : IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]);
370 0 : T[2][0] = intrp_a;
371 0 : T[1][1] = intrp_b;
372 0 : IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]);
373 0 : T[2][2] = intrp_d;
374 0 : T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
375 0 : T[0][1] = CorrSurfPtr1[peak - 1];
376 0 : T[2][1] = corr;
377 0 : T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
378 0 : } else if (intrp_c == corr_max) {
379 0 : lags1[k] += 0.5;
380 0 : lags2[k] += 0.5;
381 0 : T[0][0] = intrp_a;
382 0 : IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]);
383 0 : T[1][1] = intrp_c;
384 0 : T[0][2] = intrp_d;
385 0 : IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]);
386 0 : T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
387 0 : T[0][1] = corr;
388 0 : T[2][1] = CorrSurfPtr1[peak + 1];
389 0 : T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
390 : } else {
391 0 : lags1[k] += 0.5;
392 0 : lags2[k] -= 0.5;
393 0 : T[0][0] = intrp_b;
394 0 : T[2][0] = intrp_c;
395 0 : T[1][1] = intrp_d;
396 0 : IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]);
397 0 : IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]);
398 0 : T[1][0] = corr;
399 0 : T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
400 0 : T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
401 0 : T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)];
402 : }
403 : }
404 :
405 : /* 2D parabolic interpolation gives more accurate lags and peak value */
406 0 : Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]);
407 : }
408 :
409 : /* determine the highest peak, after applying a bias towards short lags */
410 0 : corr_max = 0.0;
411 0 : for (k = 0; k < peaks_ind; k++) {
412 0 : corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k]));
413 0 : if (corr > corr_max) {
414 0 : corr_max = corr;
415 0 : peak = k;
416 : }
417 : }
418 :
419 0 : lags1[peak] *= 2.0;
420 0 : lags2[peak] *= 2.0;
421 :
422 0 : if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG;
423 0 : if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG;
424 0 : if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG;
425 0 : if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG;
426 :
427 : /* store lags of highest peak in output array */
428 0 : lags[0] = lags1[peak];
429 0 : lags[1] = lags1[peak];
430 0 : lags[2] = lags2[peak];
431 0 : lags[3] = lags2[peak];
432 : }
433 : else
434 : {
435 0 : row = max_ind / (PITCH_LAG_SPAN2+4);
436 0 : lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
437 0 : lags2[0] = (double) (lags1[0] + PITCH_BW - row);
438 :
439 0 : if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG;
440 0 : if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG;
441 0 : if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG;
442 0 : if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG;
443 :
444 : /* store lags of highest peak in output array */
445 0 : lags[0] = lags1[0];
446 0 : lags[1] = lags1[0];
447 0 : lags[2] = lags2[0];
448 0 : lags[3] = lags2[0];
449 : }
450 0 : }
451 :
452 :
453 :
454 : /* create weighting matrix by orthogonalizing a basis of polynomials of increasing order
455 : * t = (0:4)';
456 : * A = [t.^0, t.^1, t.^2, t.^3, t.^4];
457 : * [Q, dummy] = qr(A);
458 : * P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */
459 : static const double kWeight[5][5] = {
460 : { 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286},
461 : {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857},
462 : {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286},
463 : { 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143},
464 : { 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286}
465 : };
466 :
467 :
468 0 : void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */
469 : double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
470 : PitchAnalysisStruct *State,
471 : double *lags,
472 : double *gains)
473 : {
474 : double HPin[PITCH_FRAME_LEN];
475 : double Weighted[PITCH_FRAME_LEN];
476 : double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD];
477 : double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD];
478 : double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead
479 : double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD];
480 : double old_lag, old_gain;
481 : double nrg_wht, tmp;
482 : double Wnrg, Wfluct, Wgain;
483 : double H[4][4];
484 : double grad[4];
485 : double dG[4];
486 : int k, m, n, iter;
487 :
488 : /* high pass filtering using second order pole-zero filter */
489 0 : WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN);
490 :
491 : /* copy from state into buffer */
492 0 : memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD);
493 :
494 : /* compute weighted and whitened signals */
495 0 : WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr));
496 :
497 : /* copy from buffer into state */
498 0 : memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD);
499 :
500 0 : old_lag = State->PFstr_wght.oldlagp[0];
501 0 : old_gain = State->PFstr_wght.oldgainp[0];
502 :
503 : /* inital pitch estimate */
504 0 : WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags);
505 :
506 :
507 : /* Iterative optimization of lags - to be done */
508 :
509 : /* compute energy of whitened signal */
510 0 : nrg_wht = 0.0;
511 0 : for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++)
512 0 : nrg_wht += Whitened[k] * Whitened[k];
513 :
514 :
515 : /* Iterative optimization of gains */
516 :
517 : /* set weights for energy, gain fluctiation, and spectral gain penalty functions */
518 0 : Wnrg = 1.0 / nrg_wht;
519 0 : Wgain = 0.005;
520 0 : Wfluct = 3.0;
521 :
522 : /* set initial gains */
523 0 : for (k = 0; k < 4; k++)
524 0 : gains[k] = PITCH_MAX_GAIN_06;
525 :
526 : /* two iterations should be enough */
527 0 : for (iter = 0; iter < 2; iter++) {
528 : /* compute Jacobian of pre-filter output towards gains */
529 0 : WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains);
530 :
531 : /* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */
532 0 : for (k = 0; k < 4; k++) {
533 0 : tmp = 0.0;
534 0 : for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
535 0 : tmp += out_G[n] * out_dG[k][n];
536 0 : grad[k] = tmp * Wnrg;
537 : }
538 0 : for (k = 0; k < 4; k++) {
539 0 : for (m = 0; m <= k; m++) {
540 0 : tmp = 0.0;
541 0 : for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
542 0 : tmp += out_dG[m][n] * out_dG[k][n];
543 0 : H[k][m] = tmp * Wnrg;
544 : }
545 : }
546 :
547 : /* add gradient and Hessian (lower triangle) for dampening fast gain changes */
548 0 : for (k = 0; k < 4; k++) {
549 0 : tmp = kWeight[k+1][0] * old_gain;
550 0 : for (m = 0; m < 4; m++)
551 0 : tmp += kWeight[k+1][m+1] * gains[m];
552 0 : grad[k] += tmp * Wfluct;
553 : }
554 0 : for (k = 0; k < 4; k++) {
555 0 : for (m = 0; m <= k; m++) {
556 0 : H[k][m] += kWeight[k+1][m+1] * Wfluct;
557 : }
558 : }
559 :
560 : /* add gradient and Hessian for dampening gain */
561 0 : for (k = 0; k < 3; k++) {
562 0 : tmp = 1.0 / (1 - gains[k]);
563 0 : grad[k] += tmp * tmp * Wgain;
564 0 : H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain);
565 : }
566 0 : tmp = 1.0 / (1 - gains[3]);
567 0 : grad[3] += 1.33 * (tmp * tmp * Wgain);
568 0 : H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain);
569 :
570 :
571 : /* compute Cholesky factorization of Hessian
572 : * by overwritting the upper triangle; scale factors on diagonal
573 : * (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */
574 0 : H[0][1] = H[1][0] / H[0][0];
575 0 : H[0][2] = H[2][0] / H[0][0];
576 0 : H[0][3] = H[3][0] / H[0][0];
577 0 : H[1][1] -= H[0][0] * H[0][1] * H[0][1];
578 0 : H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1];
579 0 : H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1];
580 0 : H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2];
581 0 : H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2];
582 0 : H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3];
583 :
584 : /* Compute update as delta_gains = -inv(H) * grad */
585 : /* copy and negate */
586 0 : for (k = 0; k < 4; k++)
587 0 : dG[k] = -grad[k];
588 : /* back substitution */
589 0 : dG[1] -= dG[0] * H[0][1];
590 0 : dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2];
591 0 : dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3];
592 : /* scale */
593 0 : for (k = 0; k < 4; k++)
594 0 : dG[k] /= H[k][k];
595 : /* back substitution */
596 0 : dG[2] -= dG[3] * H[2][3];
597 0 : dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2];
598 0 : dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1];
599 :
600 : /* update gains and check range */
601 0 : for (k = 0; k < 4; k++) {
602 0 : gains[k] += dG[k];
603 0 : if (gains[k] > PITCH_MAX_GAIN)
604 0 : gains[k] = PITCH_MAX_GAIN;
605 0 : else if (gains[k] < 0.0)
606 0 : gains[k] = 0.0;
607 : }
608 : }
609 :
610 : /* update state for next frame */
611 0 : WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains);
612 :
613 : /* concatenate previous input's end and current input */
614 0 : memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD);
615 0 : memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN);
616 :
617 : /* lookahead pitch filtering for masking analysis */
618 0 : WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains);
619 :
620 : /* store last part of input */
621 0 : for (k = 0; k < QLOOKAHEAD; k++)
622 0 : State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN];
623 0 : }
|