Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include <math.h>
12 : #include <string.h>
13 : #include <stdlib.h>
14 :
15 : #include "webrtc/base/checks.h"
16 : #include "webrtc/common_audio/fft4g.h"
17 : #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
18 : #include "webrtc/modules/audio_processing/ns/noise_suppression.h"
19 : #include "webrtc/modules/audio_processing/ns/ns_core.h"
20 : #include "webrtc/modules/audio_processing/ns/windows_private.h"
21 :
22 : // Set Feature Extraction Parameters.
23 0 : static void set_feature_extraction_parameters(NoiseSuppressionC* self) {
24 : // Bin size of histogram.
25 0 : self->featureExtractionParams.binSizeLrt = 0.1f;
26 0 : self->featureExtractionParams.binSizeSpecFlat = 0.05f;
27 0 : self->featureExtractionParams.binSizeSpecDiff = 0.1f;
28 :
29 : // Range of histogram over which LRT threshold is computed.
30 0 : self->featureExtractionParams.rangeAvgHistLrt = 1.f;
31 :
32 : // Scale parameters: multiply dominant peaks of the histograms by scale factor
33 : // to obtain thresholds for prior model.
34 : // For LRT and spectral difference.
35 0 : self->featureExtractionParams.factor1ModelPars = 1.2f;
36 : // For spectral_flatness: used when noise is flatter than speech.
37 0 : self->featureExtractionParams.factor2ModelPars = 0.9f;
38 :
39 : // Peak limit for spectral flatness (varies between 0 and 1).
40 0 : self->featureExtractionParams.thresPosSpecFlat = 0.6f;
41 :
42 : // Limit on spacing of two highest peaks in histogram: spacing determined by
43 : // bin size.
44 0 : self->featureExtractionParams.limitPeakSpacingSpecFlat =
45 0 : 2 * self->featureExtractionParams.binSizeSpecFlat;
46 0 : self->featureExtractionParams.limitPeakSpacingSpecDiff =
47 0 : 2 * self->featureExtractionParams.binSizeSpecDiff;
48 :
49 : // Limit on relevance of second peak.
50 0 : self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f;
51 0 : self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f;
52 :
53 : // Fluctuation limit of LRT feature.
54 0 : self->featureExtractionParams.thresFluctLrt = 0.05f;
55 :
56 : // Limit on the max and min values for the feature thresholds.
57 0 : self->featureExtractionParams.maxLrt = 1.f;
58 0 : self->featureExtractionParams.minLrt = 0.2f;
59 :
60 0 : self->featureExtractionParams.maxSpecFlat = 0.95f;
61 0 : self->featureExtractionParams.minSpecFlat = 0.1f;
62 :
63 0 : self->featureExtractionParams.maxSpecDiff = 1.f;
64 0 : self->featureExtractionParams.minSpecDiff = 0.16f;
65 :
66 : // Criteria of weight of histogram peak to accept/reject feature.
67 0 : self->featureExtractionParams.thresWeightSpecFlat =
68 0 : (int)(0.3 * (self->modelUpdatePars[1])); // For spectral flatness.
69 0 : self->featureExtractionParams.thresWeightSpecDiff =
70 0 : (int)(0.3 * (self->modelUpdatePars[1])); // For spectral difference.
71 0 : }
72 :
73 : // Initialize state.
74 0 : int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs) {
75 : int i;
76 : // Check for valid pointer.
77 0 : if (self == NULL) {
78 0 : return -1;
79 : }
80 :
81 : // Initialization of struct.
82 0 : if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
83 0 : self->fs = fs;
84 : } else {
85 0 : return -1;
86 : }
87 0 : self->windShift = 0;
88 : // We only support 10ms frames.
89 0 : if (fs == 8000) {
90 0 : self->blockLen = 80;
91 0 : self->anaLen = 128;
92 0 : self->window = kBlocks80w128;
93 : } else {
94 0 : self->blockLen = 160;
95 0 : self->anaLen = 256;
96 0 : self->window = kBlocks160w256;
97 : }
98 0 : self->magnLen = self->anaLen / 2 + 1; // Number of frequency bins.
99 :
100 : // Initialize FFT work arrays.
101 0 : self->ip[0] = 0; // Setting this triggers initialization.
102 0 : memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
103 0 : WebRtc_rdft(self->anaLen, 1, self->dataBuf, self->ip, self->wfft);
104 :
105 0 : memset(self->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
106 0 : memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
107 0 : memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
108 :
109 : // For HB processing.
110 0 : memset(self->dataBufHB,
111 : 0,
112 : sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
113 :
114 : // For quantile noise estimation.
115 0 : memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL);
116 0 : for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
117 0 : self->lquantile[i] = 8.f;
118 0 : self->density[i] = 0.3f;
119 : }
120 :
121 0 : for (i = 0; i < SIMULT; i++) {
122 0 : self->counter[i] =
123 0 : (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
124 : }
125 :
126 0 : self->updates = 0;
127 :
128 : // Wiener filter initialization.
129 0 : for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
130 0 : self->smooth[i] = 1.f;
131 : }
132 :
133 : // Set the aggressiveness: default.
134 0 : self->aggrMode = 0;
135 :
136 : // Initialize variables for new method.
137 0 : self->priorSpeechProb = 0.5f; // Prior prob for speech/noise.
138 : // Previous analyze mag spectrum.
139 0 : memset(self->magnPrevAnalyze, 0, sizeof(float) * HALF_ANAL_BLOCKL);
140 : // Previous process mag spectrum.
141 0 : memset(self->magnPrevProcess, 0, sizeof(float) * HALF_ANAL_BLOCKL);
142 : // Current noise-spectrum.
143 0 : memset(self->noise, 0, sizeof(float) * HALF_ANAL_BLOCKL);
144 : // Previous noise-spectrum.
145 0 : memset(self->noisePrev, 0, sizeof(float) * HALF_ANAL_BLOCKL);
146 : // Conservative noise spectrum estimate.
147 0 : memset(self->magnAvgPause, 0, sizeof(float) * HALF_ANAL_BLOCKL);
148 : // For estimation of HB in second pass.
149 0 : memset(self->speechProb, 0, sizeof(float) * HALF_ANAL_BLOCKL);
150 : // Initial average magnitude spectrum.
151 0 : memset(self->initMagnEst, 0, sizeof(float) * HALF_ANAL_BLOCKL);
152 0 : for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
153 : // Smooth LR (same as threshold).
154 0 : self->logLrtTimeAvg[i] = LRT_FEATURE_THR;
155 : }
156 :
157 : // Feature quantities.
158 : // Spectral flatness (start on threshold).
159 0 : self->featureData[0] = SF_FEATURE_THR;
160 0 : self->featureData[1] = 0.f; // Spectral entropy: not used in this version.
161 0 : self->featureData[2] = 0.f; // Spectral variance: not used in this version.
162 : // Average LRT factor (start on threshold).
163 0 : self->featureData[3] = LRT_FEATURE_THR;
164 : // Spectral template diff (start on threshold).
165 0 : self->featureData[4] = SF_FEATURE_THR;
166 0 : self->featureData[5] = 0.f; // Normalization for spectral difference.
167 : // Window time-average of input magnitude spectrum.
168 0 : self->featureData[6] = 0.f;
169 :
170 : // Histogram quantities: used to estimate/update thresholds for features.
171 0 : memset(self->histLrt, 0, sizeof(int) * HIST_PAR_EST);
172 0 : memset(self->histSpecFlat, 0, sizeof(int) * HIST_PAR_EST);
173 0 : memset(self->histSpecDiff, 0, sizeof(int) * HIST_PAR_EST);
174 :
175 :
176 0 : self->blockInd = -1; // Frame counter.
177 : // Default threshold for LRT feature.
178 0 : self->priorModelPars[0] = LRT_FEATURE_THR;
179 : // Threshold for spectral flatness: determined on-line.
180 0 : self->priorModelPars[1] = 0.5f;
181 : // sgn_map par for spectral measure: 1 for flatness measure.
182 0 : self->priorModelPars[2] = 1.f;
183 : // Threshold for template-difference feature: determined on-line.
184 0 : self->priorModelPars[3] = 0.5f;
185 : // Default weighting parameter for LRT feature.
186 0 : self->priorModelPars[4] = 1.f;
187 : // Default weighting parameter for spectral flatness feature.
188 0 : self->priorModelPars[5] = 0.f;
189 : // Default weighting parameter for spectral difference feature.
190 0 : self->priorModelPars[6] = 0.f;
191 :
192 : // Update flag for parameters:
193 : // 0 no update, 1 = update once, 2 = update every window.
194 0 : self->modelUpdatePars[0] = 2;
195 0 : self->modelUpdatePars[1] = 500; // Window for update.
196 : // Counter for update of conservative noise spectrum.
197 0 : self->modelUpdatePars[2] = 0;
198 : // Counter if the feature thresholds are updated during the sequence.
199 0 : self->modelUpdatePars[3] = self->modelUpdatePars[1];
200 :
201 0 : self->signalEnergy = 0.0;
202 0 : self->sumMagn = 0.0;
203 0 : self->whiteNoiseLevel = 0.0;
204 0 : self->pinkNoiseNumerator = 0.0;
205 0 : self->pinkNoiseExp = 0.0;
206 :
207 0 : set_feature_extraction_parameters(self);
208 :
209 : // Default mode.
210 0 : WebRtcNs_set_policy_core(self, 0);
211 :
212 0 : self->initFlag = 1;
213 0 : return 0;
214 : }
215 :
216 : // Estimate noise.
217 0 : static void NoiseEstimation(NoiseSuppressionC* self,
218 : float* magn,
219 : float* noise) {
220 : size_t i, s, offset;
221 : float lmagn[HALF_ANAL_BLOCKL], delta;
222 :
223 0 : if (self->updates < END_STARTUP_LONG) {
224 0 : self->updates++;
225 : }
226 :
227 0 : for (i = 0; i < self->magnLen; i++) {
228 0 : lmagn[i] = (float)log(magn[i]);
229 : }
230 :
231 : // Loop over simultaneous estimates.
232 0 : for (s = 0; s < SIMULT; s++) {
233 0 : offset = s * self->magnLen;
234 :
235 : // newquantest(...)
236 0 : for (i = 0; i < self->magnLen; i++) {
237 : // Compute delta.
238 0 : if (self->density[offset + i] > 1.0) {
239 0 : delta = FACTOR * 1.f / self->density[offset + i];
240 : } else {
241 0 : delta = FACTOR;
242 : }
243 :
244 : // Update log quantile estimate.
245 0 : if (lmagn[i] > self->lquantile[offset + i]) {
246 0 : self->lquantile[offset + i] +=
247 0 : QUANTILE * delta / (float)(self->counter[s] + 1);
248 : } else {
249 0 : self->lquantile[offset + i] -=
250 0 : (1.f - QUANTILE) * delta / (float)(self->counter[s] + 1);
251 : }
252 :
253 : // Update density estimate.
254 0 : if (fabs(lmagn[i] - self->lquantile[offset + i]) < WIDTH) {
255 0 : self->density[offset + i] =
256 0 : ((float)self->counter[s] * self->density[offset + i] +
257 0 : 1.f / (2.f * WIDTH)) /
258 0 : (float)(self->counter[s] + 1);
259 : }
260 : } // End loop over magnitude spectrum.
261 :
262 0 : if (self->counter[s] >= END_STARTUP_LONG) {
263 0 : self->counter[s] = 0;
264 0 : if (self->updates >= END_STARTUP_LONG) {
265 0 : for (i = 0; i < self->magnLen; i++) {
266 0 : self->quantile[i] = (float)exp(self->lquantile[offset + i]);
267 : }
268 : }
269 : }
270 :
271 0 : self->counter[s]++;
272 : } // End loop over simultaneous estimates.
273 :
274 : // Sequentially update the noise during startup.
275 0 : if (self->updates < END_STARTUP_LONG) {
276 : // Use the last "s" to get noise during startup that differ from zero.
277 0 : for (i = 0; i < self->magnLen; i++) {
278 0 : self->quantile[i] = (float)exp(self->lquantile[offset + i]);
279 : }
280 : }
281 :
282 0 : for (i = 0; i < self->magnLen; i++) {
283 0 : noise[i] = self->quantile[i];
284 : }
285 0 : }
286 :
287 : // Extract thresholds for feature parameters.
288 : // Histograms are computed over some window size (given by
289 : // self->modelUpdatePars[1]).
290 : // Thresholds and weights are extracted every window.
291 : // |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights.
292 : // Threshold and weights are returned in: self->priorModelPars.
293 0 : static void FeatureParameterExtraction(NoiseSuppressionC* self, int flag) {
294 : int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
295 : int maxPeak1, maxPeak2;
296 : int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff,
297 : weightPeak2SpecDiff;
298 :
299 : float binMid, featureSum;
300 : float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
301 : float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl;
302 :
303 : // 3 features: LRT, flatness, difference.
304 : // lrt_feature = self->featureData[3];
305 : // flat_feature = self->featureData[0];
306 : // diff_feature = self->featureData[4];
307 :
308 : // Update histograms.
309 0 : if (flag == 0) {
310 : // LRT
311 0 : if ((self->featureData[3] <
312 0 : HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) &&
313 0 : (self->featureData[3] >= 0.0)) {
314 0 : i = (int)(self->featureData[3] /
315 0 : self->featureExtractionParams.binSizeLrt);
316 0 : self->histLrt[i]++;
317 : }
318 : // Spectral flatness.
319 0 : if ((self->featureData[0] <
320 0 : HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) &&
321 0 : (self->featureData[0] >= 0.0)) {
322 0 : i = (int)(self->featureData[0] /
323 0 : self->featureExtractionParams.binSizeSpecFlat);
324 0 : self->histSpecFlat[i]++;
325 : }
326 : // Spectral difference.
327 0 : if ((self->featureData[4] <
328 0 : HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) &&
329 0 : (self->featureData[4] >= 0.0)) {
330 0 : i = (int)(self->featureData[4] /
331 0 : self->featureExtractionParams.binSizeSpecDiff);
332 0 : self->histSpecDiff[i]++;
333 : }
334 : }
335 :
336 : // Extract parameters for speech/noise probability.
337 0 : if (flag == 1) {
338 : // LRT feature: compute the average over
339 : // self->featureExtractionParams.rangeAvgHistLrt.
340 0 : avgHistLrt = 0.0;
341 0 : avgHistLrtCompl = 0.0;
342 0 : avgSquareHistLrt = 0.0;
343 0 : numHistLrt = 0;
344 0 : for (i = 0; i < HIST_PAR_EST; i++) {
345 0 : binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt;
346 0 : if (binMid <= self->featureExtractionParams.rangeAvgHistLrt) {
347 0 : avgHistLrt += self->histLrt[i] * binMid;
348 0 : numHistLrt += self->histLrt[i];
349 : }
350 0 : avgSquareHistLrt += self->histLrt[i] * binMid * binMid;
351 0 : avgHistLrtCompl += self->histLrt[i] * binMid;
352 : }
353 0 : if (numHistLrt > 0) {
354 0 : avgHistLrt = avgHistLrt / ((float)numHistLrt);
355 : }
356 0 : avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]);
357 0 : avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]);
358 0 : fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl;
359 : // Get threshold for LRT feature.
360 0 : if (fluctLrt < self->featureExtractionParams.thresFluctLrt) {
361 : // Very low fluctuation, so likely noise.
362 0 : self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
363 : } else {
364 0 : self->priorModelPars[0] =
365 0 : self->featureExtractionParams.factor1ModelPars * avgHistLrt;
366 : // Check if value is within min/max range.
367 0 : if (self->priorModelPars[0] < self->featureExtractionParams.minLrt) {
368 0 : self->priorModelPars[0] = self->featureExtractionParams.minLrt;
369 : }
370 0 : if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt) {
371 0 : self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
372 : }
373 : }
374 : // Done with LRT feature.
375 :
376 : // For spectral flatness and spectral difference: compute the main peaks of
377 : // histogram.
378 0 : maxPeak1 = 0;
379 0 : maxPeak2 = 0;
380 0 : posPeak1SpecFlat = 0.0;
381 0 : posPeak2SpecFlat = 0.0;
382 0 : weightPeak1SpecFlat = 0;
383 0 : weightPeak2SpecFlat = 0;
384 :
385 : // Peaks for flatness.
386 0 : for (i = 0; i < HIST_PAR_EST; i++) {
387 0 : binMid =
388 0 : (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat;
389 0 : if (self->histSpecFlat[i] > maxPeak1) {
390 : // Found new "first" peak.
391 0 : maxPeak2 = maxPeak1;
392 0 : weightPeak2SpecFlat = weightPeak1SpecFlat;
393 0 : posPeak2SpecFlat = posPeak1SpecFlat;
394 :
395 0 : maxPeak1 = self->histSpecFlat[i];
396 0 : weightPeak1SpecFlat = self->histSpecFlat[i];
397 0 : posPeak1SpecFlat = binMid;
398 0 : } else if (self->histSpecFlat[i] > maxPeak2) {
399 : // Found new "second" peak.
400 0 : maxPeak2 = self->histSpecFlat[i];
401 0 : weightPeak2SpecFlat = self->histSpecFlat[i];
402 0 : posPeak2SpecFlat = binMid;
403 : }
404 : }
405 :
406 : // Compute two peaks for spectral difference.
407 0 : maxPeak1 = 0;
408 0 : maxPeak2 = 0;
409 0 : posPeak1SpecDiff = 0.0;
410 0 : posPeak2SpecDiff = 0.0;
411 0 : weightPeak1SpecDiff = 0;
412 0 : weightPeak2SpecDiff = 0;
413 : // Peaks for spectral difference.
414 0 : for (i = 0; i < HIST_PAR_EST; i++) {
415 0 : binMid =
416 0 : ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff;
417 0 : if (self->histSpecDiff[i] > maxPeak1) {
418 : // Found new "first" peak.
419 0 : maxPeak2 = maxPeak1;
420 0 : weightPeak2SpecDiff = weightPeak1SpecDiff;
421 0 : posPeak2SpecDiff = posPeak1SpecDiff;
422 :
423 0 : maxPeak1 = self->histSpecDiff[i];
424 0 : weightPeak1SpecDiff = self->histSpecDiff[i];
425 0 : posPeak1SpecDiff = binMid;
426 0 : } else if (self->histSpecDiff[i] > maxPeak2) {
427 : // Found new "second" peak.
428 0 : maxPeak2 = self->histSpecDiff[i];
429 0 : weightPeak2SpecDiff = self->histSpecDiff[i];
430 0 : posPeak2SpecDiff = binMid;
431 : }
432 : }
433 :
434 : // For spectrum flatness feature.
435 0 : useFeatureSpecFlat = 1;
436 : // Merge the two peaks if they are close.
437 0 : if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) <
438 0 : self->featureExtractionParams.limitPeakSpacingSpecFlat) &&
439 0 : (weightPeak2SpecFlat >
440 0 : self->featureExtractionParams.limitPeakWeightsSpecFlat *
441 : weightPeak1SpecFlat)) {
442 0 : weightPeak1SpecFlat += weightPeak2SpecFlat;
443 0 : posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat);
444 : }
445 : // Reject if weight of peaks is not large enough, or peak value too small.
446 0 : if (weightPeak1SpecFlat <
447 0 : self->featureExtractionParams.thresWeightSpecFlat ||
448 0 : posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat) {
449 0 : useFeatureSpecFlat = 0;
450 : }
451 : // If selected, get the threshold.
452 0 : if (useFeatureSpecFlat == 1) {
453 : // Compute the threshold.
454 0 : self->priorModelPars[1] =
455 0 : self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat;
456 : // Check if value is within min/max range.
457 0 : if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat) {
458 0 : self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat;
459 : }
460 0 : if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat) {
461 0 : self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat;
462 : }
463 : }
464 : // Done with flatness feature.
465 :
466 : // For template feature.
467 0 : useFeatureSpecDiff = 1;
468 : // Merge the two peaks if they are close.
469 0 : if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) <
470 0 : self->featureExtractionParams.limitPeakSpacingSpecDiff) &&
471 0 : (weightPeak2SpecDiff >
472 0 : self->featureExtractionParams.limitPeakWeightsSpecDiff *
473 : weightPeak1SpecDiff)) {
474 0 : weightPeak1SpecDiff += weightPeak2SpecDiff;
475 0 : posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff);
476 : }
477 : // Get the threshold value.
478 0 : self->priorModelPars[3] =
479 0 : self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff;
480 : // Reject if weight of peaks is not large enough.
481 0 : if (weightPeak1SpecDiff <
482 0 : self->featureExtractionParams.thresWeightSpecDiff) {
483 0 : useFeatureSpecDiff = 0;
484 : }
485 : // Check if value is within min/max range.
486 0 : if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff) {
487 0 : self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff;
488 : }
489 0 : if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff) {
490 0 : self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff;
491 : }
492 : // Done with spectral difference feature.
493 :
494 : // Don't use template feature if fluctuation of LRT feature is very low:
495 : // most likely just noise state.
496 0 : if (fluctLrt < self->featureExtractionParams.thresFluctLrt) {
497 0 : useFeatureSpecDiff = 0;
498 : }
499 :
500 : // Select the weights between the features.
501 : // self->priorModelPars[4] is weight for LRT: always selected.
502 : // self->priorModelPars[5] is weight for spectral flatness.
503 : // self->priorModelPars[6] is weight for spectral difference.
504 0 : featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff);
505 0 : self->priorModelPars[4] = 1.f / featureSum;
506 0 : self->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum;
507 0 : self->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum;
508 :
509 : // Set hists to zero for next update.
510 0 : if (self->modelUpdatePars[0] >= 1) {
511 0 : for (i = 0; i < HIST_PAR_EST; i++) {
512 0 : self->histLrt[i] = 0;
513 0 : self->histSpecFlat[i] = 0;
514 0 : self->histSpecDiff[i] = 0;
515 : }
516 : }
517 : } // End of flag == 1.
518 0 : }
519 :
520 : // Compute spectral flatness on input spectrum.
521 : // |magnIn| is the magnitude spectrum.
522 : // Spectral flatness is returned in self->featureData[0].
523 0 : static void ComputeSpectralFlatness(NoiseSuppressionC* self,
524 : const float* magnIn) {
525 : size_t i;
526 0 : size_t shiftLP = 1; // Option to remove first bin(s) from spectral measures.
527 : float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp;
528 :
529 : // Compute spectral measures.
530 : // For flatness.
531 0 : avgSpectralFlatnessNum = 0.0;
532 0 : avgSpectralFlatnessDen = self->sumMagn;
533 0 : for (i = 0; i < shiftLP; i++) {
534 0 : avgSpectralFlatnessDen -= magnIn[i];
535 : }
536 : // Compute log of ratio of the geometric to arithmetic mean: check for log(0)
537 : // case.
538 0 : for (i = shiftLP; i < self->magnLen; i++) {
539 0 : if (magnIn[i] > 0.0) {
540 0 : avgSpectralFlatnessNum += (float)log(magnIn[i]);
541 : } else {
542 0 : self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0];
543 0 : return;
544 : }
545 : }
546 : // Normalize.
547 0 : avgSpectralFlatnessDen = avgSpectralFlatnessDen / self->magnLen;
548 0 : avgSpectralFlatnessNum = avgSpectralFlatnessNum / self->magnLen;
549 :
550 : // Ratio and inverse log: check for case of log(0).
551 0 : spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen;
552 :
553 : // Time-avg update of spectral flatness feature.
554 0 : self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]);
555 : // Done with flatness feature.
556 : }
557 :
558 : // Compute prior and post SNR based on quantile noise estimation.
559 : // Compute DD estimate of prior SNR.
560 : // Inputs:
561 : // * |magn| is the signal magnitude spectrum estimate.
562 : // * |noise| is the magnitude noise spectrum estimate.
563 : // Outputs:
564 : // * |snrLocPrior| is the computed prior SNR.
565 : // * |snrLocPost| is the computed post SNR.
566 0 : static void ComputeSnr(const NoiseSuppressionC* self,
567 : const float* magn,
568 : const float* noise,
569 : float* snrLocPrior,
570 : float* snrLocPost) {
571 : size_t i;
572 :
573 0 : for (i = 0; i < self->magnLen; i++) {
574 : // Previous post SNR.
575 : // Previous estimate: based on previous frame with gain filter.
576 0 : float previousEstimateStsa = self->magnPrevAnalyze[i] /
577 0 : (self->noisePrev[i] + 0.0001f) * self->smooth[i];
578 : // Post SNR.
579 0 : snrLocPost[i] = 0.f;
580 0 : if (magn[i] > noise[i]) {
581 0 : snrLocPost[i] = magn[i] / (noise[i] + 0.0001f) - 1.f;
582 : }
583 : // DD estimate is sum of two terms: current estimate and previous estimate.
584 : // Directed decision update of snrPrior.
585 0 : snrLocPrior[i] =
586 0 : DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i];
587 : } // End of loop over frequencies.
588 0 : }
589 :
590 : // Compute the difference measure between input spectrum and a template/learned
591 : // noise spectrum.
592 : // |magnIn| is the input spectrum.
593 : // The reference/template spectrum is self->magnAvgPause[i].
594 : // Returns (normalized) spectral difference in self->featureData[4].
595 0 : static void ComputeSpectralDifference(NoiseSuppressionC* self,
596 : const float* magnIn) {
597 : // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 /
598 : // var(magnAvgPause)
599 : size_t i;
600 : float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
601 :
602 0 : avgPause = 0.0;
603 0 : avgMagn = self->sumMagn;
604 : // Compute average quantities.
605 0 : for (i = 0; i < self->magnLen; i++) {
606 : // Conservative smooth noise spectrum from pause frames.
607 0 : avgPause += self->magnAvgPause[i];
608 : }
609 0 : avgPause /= self->magnLen;
610 0 : avgMagn /= self->magnLen;
611 :
612 0 : covMagnPause = 0.0;
613 0 : varPause = 0.0;
614 0 : varMagn = 0.0;
615 : // Compute variance and covariance quantities.
616 0 : for (i = 0; i < self->magnLen; i++) {
617 0 : covMagnPause += (magnIn[i] - avgMagn) * (self->magnAvgPause[i] - avgPause);
618 0 : varPause +=
619 0 : (self->magnAvgPause[i] - avgPause) * (self->magnAvgPause[i] - avgPause);
620 0 : varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn);
621 : }
622 0 : covMagnPause /= self->magnLen;
623 0 : varPause /= self->magnLen;
624 0 : varMagn /= self->magnLen;
625 : // Update of average magnitude spectrum.
626 0 : self->featureData[6] += self->signalEnergy;
627 :
628 0 : avgDiffNormMagn =
629 0 : varMagn - (covMagnPause * covMagnPause) / (varPause + 0.0001f);
630 : // Normalize and compute time-avg update of difference feature.
631 0 : avgDiffNormMagn = (float)(avgDiffNormMagn / (self->featureData[5] + 0.0001f));
632 0 : self->featureData[4] +=
633 0 : SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]);
634 0 : }
635 :
636 : // Compute speech/noise probability.
637 : // Speech/noise probability is returned in |probSpeechFinal|.
638 : // |magn| is the input magnitude spectrum.
639 : // |noise| is the noise spectrum.
640 : // |snrLocPrior| is the prior SNR for each frequency.
641 : // |snrLocPost| is the post SNR for each frequency.
642 0 : static void SpeechNoiseProb(NoiseSuppressionC* self,
643 : float* probSpeechFinal,
644 : const float* snrLocPrior,
645 : const float* snrLocPost) {
646 : size_t i;
647 : int sgnMap;
648 : float invLrt, gainPrior, indPrior;
649 : float logLrtTimeAvgKsum, besselTmp;
650 : float indicator0, indicator1, indicator2;
651 : float tmpFloat1, tmpFloat2;
652 : float weightIndPrior0, weightIndPrior1, weightIndPrior2;
653 : float threshPrior0, threshPrior1, threshPrior2;
654 : float widthPrior, widthPrior0, widthPrior1, widthPrior2;
655 :
656 0 : widthPrior0 = WIDTH_PR_MAP;
657 : // Width for pause region: lower range, so increase width in tanh map.
658 0 : widthPrior1 = 2.f * WIDTH_PR_MAP;
659 0 : widthPrior2 = 2.f * WIDTH_PR_MAP; // For spectral-difference measure.
660 :
661 : // Threshold parameters for features.
662 0 : threshPrior0 = self->priorModelPars[0];
663 0 : threshPrior1 = self->priorModelPars[1];
664 0 : threshPrior2 = self->priorModelPars[3];
665 :
666 : // Sign for flatness feature.
667 0 : sgnMap = (int)(self->priorModelPars[2]);
668 :
669 : // Weight parameters for features.
670 0 : weightIndPrior0 = self->priorModelPars[4];
671 0 : weightIndPrior1 = self->priorModelPars[5];
672 0 : weightIndPrior2 = self->priorModelPars[6];
673 :
674 : // Compute feature based on average LR factor.
675 : // This is the average over all frequencies of the smooth log LRT.
676 0 : logLrtTimeAvgKsum = 0.0;
677 0 : for (i = 0; i < self->magnLen; i++) {
678 0 : tmpFloat1 = 1.f + 2.f * snrLocPrior[i];
679 0 : tmpFloat2 = 2.f * snrLocPrior[i] / (tmpFloat1 + 0.0001f);
680 0 : besselTmp = (snrLocPost[i] + 1.f) * tmpFloat2;
681 0 : self->logLrtTimeAvg[i] +=
682 0 : LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - self->logLrtTimeAvg[i]);
683 0 : logLrtTimeAvgKsum += self->logLrtTimeAvg[i];
684 : }
685 0 : logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (self->magnLen);
686 0 : self->featureData[3] = logLrtTimeAvgKsum;
687 : // Done with computation of LR factor.
688 :
689 : // Compute the indicator functions.
690 : // Average LRT feature.
691 0 : widthPrior = widthPrior0;
692 : // Use larger width in tanh map for pause regions.
693 0 : if (logLrtTimeAvgKsum < threshPrior0) {
694 0 : widthPrior = widthPrior1;
695 : }
696 : // Compute indicator function: sigmoid map.
697 0 : indicator0 =
698 : 0.5f *
699 0 : ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.f);
700 :
701 : // Spectral flatness feature.
702 0 : tmpFloat1 = self->featureData[0];
703 0 : widthPrior = widthPrior0;
704 : // Use larger width in tanh map for pause regions.
705 0 : if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) {
706 0 : widthPrior = widthPrior1;
707 : }
708 0 : if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) {
709 0 : widthPrior = widthPrior1;
710 : }
711 : // Compute indicator function: sigmoid map.
712 0 : indicator1 =
713 : 0.5f *
714 0 : ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) +
715 : 1.f);
716 :
717 : // For template spectrum-difference.
718 0 : tmpFloat1 = self->featureData[4];
719 0 : widthPrior = widthPrior0;
720 : // Use larger width in tanh map for pause regions.
721 0 : if (tmpFloat1 < threshPrior2) {
722 0 : widthPrior = widthPrior2;
723 : }
724 : // Compute indicator function: sigmoid map.
725 0 : indicator2 =
726 0 : 0.5f * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.f);
727 :
728 : // Combine the indicator function with the feature weights.
729 0 : indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 +
730 0 : weightIndPrior2 * indicator2;
731 : // Done with computing indicator function.
732 :
733 : // Compute the prior probability.
734 0 : self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb);
735 : // Make sure probabilities are within range: keep floor to 0.01.
736 0 : if (self->priorSpeechProb > 1.f) {
737 0 : self->priorSpeechProb = 1.f;
738 : }
739 0 : if (self->priorSpeechProb < 0.01f) {
740 0 : self->priorSpeechProb = 0.01f;
741 : }
742 :
743 : // Final speech probability: combine prior model with LR factor:.
744 0 : gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + 0.0001f);
745 0 : for (i = 0; i < self->magnLen; i++) {
746 0 : invLrt = (float)exp(-self->logLrtTimeAvg[i]);
747 0 : invLrt = (float)gainPrior * invLrt;
748 0 : probSpeechFinal[i] = 1.f / (1.f + invLrt);
749 : }
750 0 : }
751 :
752 : // Update the noise features.
753 : // Inputs:
754 : // * |magn| is the signal magnitude spectrum estimate.
755 : // * |updateParsFlag| is an update flag for parameters.
756 0 : static void FeatureUpdate(NoiseSuppressionC* self,
757 : const float* magn,
758 : int updateParsFlag) {
759 : // Compute spectral flatness on input spectrum.
760 0 : ComputeSpectralFlatness(self, magn);
761 : // Compute difference of input spectrum with learned/estimated noise spectrum.
762 0 : ComputeSpectralDifference(self, magn);
763 : // Compute histograms for parameter decisions (thresholds and weights for
764 : // features).
765 : // Parameters are extracted once every window time.
766 : // (=self->modelUpdatePars[1])
767 0 : if (updateParsFlag >= 1) {
768 : // Counter update.
769 0 : self->modelUpdatePars[3]--;
770 : // Update histogram.
771 0 : if (self->modelUpdatePars[3] > 0) {
772 0 : FeatureParameterExtraction(self, 0);
773 : }
774 : // Compute model parameters.
775 0 : if (self->modelUpdatePars[3] == 0) {
776 0 : FeatureParameterExtraction(self, 1);
777 0 : self->modelUpdatePars[3] = self->modelUpdatePars[1];
778 : // If wish to update only once, set flag to zero.
779 0 : if (updateParsFlag == 1) {
780 0 : self->modelUpdatePars[0] = 0;
781 : } else {
782 : // Update every window:
783 : // Get normalization for spectral difference for next window estimate.
784 0 : self->featureData[6] =
785 0 : self->featureData[6] / ((float)self->modelUpdatePars[1]);
786 0 : self->featureData[5] =
787 0 : 0.5f * (self->featureData[6] + self->featureData[5]);
788 0 : self->featureData[6] = 0.f;
789 : }
790 : }
791 : }
792 0 : }
793 :
794 : // Update the noise estimate.
795 : // Inputs:
796 : // * |magn| is the signal magnitude spectrum estimate.
797 : // * |snrLocPrior| is the prior SNR.
798 : // * |snrLocPost| is the post SNR.
799 : // Output:
800 : // * |noise| is the updated noise magnitude spectrum estimate.
801 0 : static void UpdateNoiseEstimate(NoiseSuppressionC* self,
802 : const float* magn,
803 : const float* snrLocPrior,
804 : const float* snrLocPost,
805 : float* noise) {
806 : size_t i;
807 : float probSpeech, probNonSpeech;
808 : // Time-avg parameter for noise update.
809 0 : float gammaNoiseTmp = NOISE_UPDATE;
810 : float gammaNoiseOld;
811 : float noiseUpdateTmp;
812 :
813 0 : for (i = 0; i < self->magnLen; i++) {
814 0 : probSpeech = self->speechProb[i];
815 0 : probNonSpeech = 1.f - probSpeech;
816 : // Temporary noise update:
817 : // Use it for speech frames if update value is less than previous.
818 0 : noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] +
819 0 : (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] +
820 0 : probSpeech * self->noisePrev[i]);
821 : // Time-constant based on speech/noise state.
822 0 : gammaNoiseOld = gammaNoiseTmp;
823 0 : gammaNoiseTmp = NOISE_UPDATE;
824 : // Increase gamma (i.e., less noise update) for frame likely to be speech.
825 0 : if (probSpeech > PROB_RANGE) {
826 0 : gammaNoiseTmp = SPEECH_UPDATE;
827 : }
828 : // Conservative noise update.
829 0 : if (probSpeech < PROB_RANGE) {
830 0 : self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]);
831 : }
832 : // Noise update.
833 0 : if (gammaNoiseTmp == gammaNoiseOld) {
834 0 : noise[i] = noiseUpdateTmp;
835 : } else {
836 0 : noise[i] = gammaNoiseTmp * self->noisePrev[i] +
837 0 : (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] +
838 0 : probSpeech * self->noisePrev[i]);
839 : // Allow for noise update downwards:
840 : // If noise update decreases the noise, it is safe, so allow it to
841 : // happen.
842 0 : if (noiseUpdateTmp < noise[i]) {
843 0 : noise[i] = noiseUpdateTmp;
844 : }
845 : }
846 : } // End of freq loop.
847 0 : }
848 :
849 : // Updates |buffer| with a new |frame|.
850 : // Inputs:
851 : // * |frame| is a new speech frame or NULL for setting to zero.
852 : // * |frame_length| is the length of the new frame.
853 : // * |buffer_length| is the length of the buffer.
854 : // Output:
855 : // * |buffer| is the updated buffer.
856 0 : static void UpdateBuffer(const float* frame,
857 : size_t frame_length,
858 : size_t buffer_length,
859 : float* buffer) {
860 0 : RTC_DCHECK_LT(buffer_length, 2 * frame_length);
861 :
862 0 : memcpy(buffer,
863 0 : buffer + frame_length,
864 0 : sizeof(*buffer) * (buffer_length - frame_length));
865 0 : if (frame) {
866 0 : memcpy(buffer + buffer_length - frame_length,
867 : frame,
868 : sizeof(*buffer) * frame_length);
869 : } else {
870 0 : memset(buffer + buffer_length - frame_length,
871 : 0,
872 : sizeof(*buffer) * frame_length);
873 : }
874 0 : }
875 :
876 : // Transforms the signal from time to frequency domain.
877 : // Inputs:
878 : // * |time_data| is the signal in the time domain.
879 : // * |time_data_length| is the length of the analysis buffer.
880 : // * |magnitude_length| is the length of the spectrum magnitude, which equals
881 : // the length of both |real| and |imag| (time_data_length / 2 + 1).
882 : // Outputs:
883 : // * |time_data| is the signal in the frequency domain.
884 : // * |real| is the real part of the frequency domain.
885 : // * |imag| is the imaginary part of the frequency domain.
886 : // * |magn| is the calculated signal magnitude in the frequency domain.
887 0 : static void FFT(NoiseSuppressionC* self,
888 : float* time_data,
889 : size_t time_data_length,
890 : size_t magnitude_length,
891 : float* real,
892 : float* imag,
893 : float* magn) {
894 : size_t i;
895 :
896 0 : RTC_DCHECK_EQ(magnitude_length, time_data_length / 2 + 1);
897 :
898 0 : WebRtc_rdft(time_data_length, 1, time_data, self->ip, self->wfft);
899 :
900 0 : imag[0] = 0;
901 0 : real[0] = time_data[0];
902 0 : magn[0] = fabsf(real[0]) + 1.f;
903 0 : imag[magnitude_length - 1] = 0;
904 0 : real[magnitude_length - 1] = time_data[1];
905 0 : magn[magnitude_length - 1] = fabsf(real[magnitude_length - 1]) + 1.f;
906 0 : for (i = 1; i < magnitude_length - 1; ++i) {
907 0 : real[i] = time_data[2 * i];
908 0 : imag[i] = time_data[2 * i + 1];
909 : // Magnitude spectrum.
910 0 : magn[i] = sqrtf(real[i] * real[i] + imag[i] * imag[i]) + 1.f;
911 : }
912 0 : }
913 :
914 : // Transforms the signal from frequency to time domain.
915 : // Inputs:
916 : // * |real| is the real part of the frequency domain.
917 : // * |imag| is the imaginary part of the frequency domain.
918 : // * |magnitude_length| is the length of the spectrum magnitude, which equals
919 : // the length of both |real| and |imag|.
920 : // * |time_data_length| is the length of the analysis buffer
921 : // (2 * (magnitude_length - 1)).
922 : // Output:
923 : // * |time_data| is the signal in the time domain.
924 0 : static void IFFT(NoiseSuppressionC* self,
925 : const float* real,
926 : const float* imag,
927 : size_t magnitude_length,
928 : size_t time_data_length,
929 : float* time_data) {
930 : size_t i;
931 :
932 0 : RTC_DCHECK_EQ(time_data_length, 2 * (magnitude_length - 1));
933 :
934 0 : time_data[0] = real[0];
935 0 : time_data[1] = real[magnitude_length - 1];
936 0 : for (i = 1; i < magnitude_length - 1; ++i) {
937 0 : time_data[2 * i] = real[i];
938 0 : time_data[2 * i + 1] = imag[i];
939 : }
940 0 : WebRtc_rdft(time_data_length, -1, time_data, self->ip, self->wfft);
941 :
942 0 : for (i = 0; i < time_data_length; ++i) {
943 0 : time_data[i] *= 2.f / time_data_length; // FFT scaling.
944 : }
945 0 : }
946 :
947 : // Calculates the energy of a buffer.
948 : // Inputs:
949 : // * |buffer| is the buffer over which the energy is calculated.
950 : // * |length| is the length of the buffer.
951 : // Returns the calculated energy.
952 0 : static float Energy(const float* buffer, size_t length) {
953 : size_t i;
954 0 : float energy = 0.f;
955 :
956 0 : for (i = 0; i < length; ++i) {
957 0 : energy += buffer[i] * buffer[i];
958 : }
959 :
960 0 : return energy;
961 : }
962 :
963 : // Windows a buffer.
964 : // Inputs:
965 : // * |window| is the window by which to multiply.
966 : // * |data| is the data without windowing.
967 : // * |length| is the length of the window and data.
968 : // Output:
969 : // * |data_windowed| is the windowed data.
970 0 : static void Windowing(const float* window,
971 : const float* data,
972 : size_t length,
973 : float* data_windowed) {
974 : size_t i;
975 :
976 0 : for (i = 0; i < length; ++i) {
977 0 : data_windowed[i] = window[i] * data[i];
978 : }
979 0 : }
980 :
981 : // Estimate prior SNR decision-directed and compute DD based Wiener Filter.
982 : // Input:
983 : // * |magn| is the signal magnitude spectrum estimate.
984 : // Output:
985 : // * |theFilter| is the frequency response of the computed Wiener filter.
986 0 : static void ComputeDdBasedWienerFilter(const NoiseSuppressionC* self,
987 : const float* magn,
988 : float* theFilter) {
989 : size_t i;
990 : float snrPrior, previousEstimateStsa, currentEstimateStsa;
991 :
992 0 : for (i = 0; i < self->magnLen; i++) {
993 : // Previous estimate: based on previous frame with gain filter.
994 0 : previousEstimateStsa = self->magnPrevProcess[i] /
995 0 : (self->noisePrev[i] + 0.0001f) * self->smooth[i];
996 : // Post and prior SNR.
997 0 : currentEstimateStsa = 0.f;
998 0 : if (magn[i] > self->noise[i]) {
999 0 : currentEstimateStsa = magn[i] / (self->noise[i] + 0.0001f) - 1.f;
1000 : }
1001 : // DD estimate is sum of two terms: current estimate and previous estimate.
1002 : // Directed decision update of |snrPrior|.
1003 0 : snrPrior = DD_PR_SNR * previousEstimateStsa +
1004 0 : (1.f - DD_PR_SNR) * currentEstimateStsa;
1005 : // Gain filter.
1006 0 : theFilter[i] = snrPrior / (self->overdrive + snrPrior);
1007 : } // End of loop over frequencies.
1008 0 : }
1009 :
1010 : // Changes the aggressiveness of the noise suppression method.
1011 : // |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is
1012 : // aggressive (15dB).
1013 : // Returns 0 on success and -1 otherwise.
1014 0 : int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode) {
1015 : // Allow for modes: 0, 1, 2, 3.
1016 0 : if (mode < 0 || mode > 3) {
1017 0 : return (-1);
1018 : }
1019 :
1020 0 : self->aggrMode = mode;
1021 0 : if (mode == 0) {
1022 0 : self->overdrive = 1.f;
1023 0 : self->denoiseBound = 0.5f;
1024 0 : self->gainmap = 0;
1025 0 : } else if (mode == 1) {
1026 : // self->overdrive = 1.25f;
1027 0 : self->overdrive = 1.f;
1028 0 : self->denoiseBound = 0.25f;
1029 0 : self->gainmap = 1;
1030 0 : } else if (mode == 2) {
1031 : // self->overdrive = 1.25f;
1032 0 : self->overdrive = 1.1f;
1033 0 : self->denoiseBound = 0.125f;
1034 0 : self->gainmap = 1;
1035 0 : } else if (mode == 3) {
1036 : // self->overdrive = 1.3f;
1037 0 : self->overdrive = 1.25f;
1038 0 : self->denoiseBound = 0.09f;
1039 0 : self->gainmap = 1;
1040 : }
1041 0 : return 0;
1042 : }
1043 :
1044 0 : void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame) {
1045 : size_t i;
1046 0 : const size_t kStartBand = 5; // Skip first frequency bins during estimation.
1047 : int updateParsFlag;
1048 : float energy;
1049 0 : float signalEnergy = 0.f;
1050 0 : float sumMagn = 0.f;
1051 : float tmpFloat1, tmpFloat2, tmpFloat3;
1052 : float winData[ANAL_BLOCKL_MAX];
1053 : float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL];
1054 : float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL];
1055 : float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
1056 : // Variables during startup.
1057 0 : float sum_log_i = 0.0;
1058 0 : float sum_log_i_square = 0.0;
1059 0 : float sum_log_magn = 0.0;
1060 0 : float sum_log_i_log_magn = 0.0;
1061 0 : float parametric_exp = 0.0;
1062 0 : float parametric_num = 0.0;
1063 :
1064 : // Check that initiation has been done.
1065 0 : RTC_DCHECK_EQ(1, self->initFlag);
1066 0 : updateParsFlag = self->modelUpdatePars[0];
1067 :
1068 : // Update analysis buffer for L band.
1069 0 : UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->analyzeBuf);
1070 :
1071 0 : Windowing(self->window, self->analyzeBuf, self->anaLen, winData);
1072 0 : energy = Energy(winData, self->anaLen);
1073 0 : if (energy == 0.0) {
1074 : // We want to avoid updating statistics in this case:
1075 : // Updating feature statistics when we have zeros only will cause
1076 : // thresholds to move towards zero signal situations. This in turn has the
1077 : // effect that once the signal is "turned on" (non-zero values) everything
1078 : // will be treated as speech and there is no noise suppression effect.
1079 : // Depending on the duration of the inactive signal it takes a
1080 : // considerable amount of time for the system to learn what is noise and
1081 : // what is speech.
1082 0 : return;
1083 : }
1084 :
1085 0 : self->blockInd++; // Update the block index only when we process a block.
1086 :
1087 0 : FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
1088 :
1089 0 : for (i = 0; i < self->magnLen; i++) {
1090 0 : signalEnergy += real[i] * real[i] + imag[i] * imag[i];
1091 0 : sumMagn += magn[i];
1092 0 : if (self->blockInd < END_STARTUP_SHORT) {
1093 0 : if (i >= kStartBand) {
1094 0 : tmpFloat2 = logf((float)i);
1095 0 : sum_log_i += tmpFloat2;
1096 0 : sum_log_i_square += tmpFloat2 * tmpFloat2;
1097 0 : tmpFloat1 = logf(magn[i]);
1098 0 : sum_log_magn += tmpFloat1;
1099 0 : sum_log_i_log_magn += tmpFloat2 * tmpFloat1;
1100 : }
1101 : }
1102 : }
1103 0 : signalEnergy /= self->magnLen;
1104 0 : self->signalEnergy = signalEnergy;
1105 0 : self->sumMagn = sumMagn;
1106 :
1107 : // Quantile noise estimate.
1108 0 : NoiseEstimation(self, magn, noise);
1109 : // Compute simplified noise model during startup.
1110 0 : if (self->blockInd < END_STARTUP_SHORT) {
1111 : // Estimate White noise.
1112 0 : self->whiteNoiseLevel += sumMagn / self->magnLen * self->overdrive;
1113 : // Estimate Pink noise parameters.
1114 0 : tmpFloat1 = sum_log_i_square * (self->magnLen - kStartBand);
1115 0 : tmpFloat1 -= (sum_log_i * sum_log_i);
1116 0 : tmpFloat2 =
1117 0 : (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
1118 0 : tmpFloat3 = tmpFloat2 / tmpFloat1;
1119 : // Constrain the estimated spectrum to be positive.
1120 0 : if (tmpFloat3 < 0.f) {
1121 0 : tmpFloat3 = 0.f;
1122 : }
1123 0 : self->pinkNoiseNumerator += tmpFloat3;
1124 0 : tmpFloat2 = (sum_log_i * sum_log_magn);
1125 0 : tmpFloat2 -= (self->magnLen - kStartBand) * sum_log_i_log_magn;
1126 0 : tmpFloat3 = tmpFloat2 / tmpFloat1;
1127 : // Constrain the pink noise power to be in the interval [0, 1].
1128 0 : if (tmpFloat3 < 0.f) {
1129 0 : tmpFloat3 = 0.f;
1130 : }
1131 0 : if (tmpFloat3 > 1.f) {
1132 0 : tmpFloat3 = 1.f;
1133 : }
1134 0 : self->pinkNoiseExp += tmpFloat3;
1135 :
1136 : // Calculate frequency independent parts of parametric noise estimate.
1137 0 : if (self->pinkNoiseExp > 0.f) {
1138 : // Use pink noise estimate.
1139 0 : parametric_num =
1140 0 : expf(self->pinkNoiseNumerator / (float)(self->blockInd + 1));
1141 0 : parametric_num *= (float)(self->blockInd + 1);
1142 0 : parametric_exp = self->pinkNoiseExp / (float)(self->blockInd + 1);
1143 : }
1144 0 : for (i = 0; i < self->magnLen; i++) {
1145 : // Estimate the background noise using the white and pink noise
1146 : // parameters.
1147 0 : if (self->pinkNoiseExp == 0.f) {
1148 : // Use white noise estimate.
1149 0 : self->parametricNoise[i] = self->whiteNoiseLevel;
1150 : } else {
1151 : // Use pink noise estimate.
1152 0 : float use_band = (float)(i < kStartBand ? kStartBand : i);
1153 0 : self->parametricNoise[i] =
1154 0 : parametric_num / powf(use_band, parametric_exp);
1155 : }
1156 : // Weight quantile noise with modeled noise.
1157 0 : noise[i] *= (self->blockInd);
1158 0 : tmpFloat2 =
1159 0 : self->parametricNoise[i] * (END_STARTUP_SHORT - self->blockInd);
1160 0 : noise[i] += (tmpFloat2 / (float)(self->blockInd + 1));
1161 0 : noise[i] /= END_STARTUP_SHORT;
1162 : }
1163 : }
1164 : // Compute average signal during END_STARTUP_LONG time:
1165 : // used to normalize spectral difference measure.
1166 0 : if (self->blockInd < END_STARTUP_LONG) {
1167 0 : self->featureData[5] *= self->blockInd;
1168 0 : self->featureData[5] += signalEnergy;
1169 0 : self->featureData[5] /= (self->blockInd + 1);
1170 : }
1171 :
1172 : // Post and prior SNR needed for SpeechNoiseProb.
1173 0 : ComputeSnr(self, magn, noise, snrLocPrior, snrLocPost);
1174 :
1175 0 : FeatureUpdate(self, magn, updateParsFlag);
1176 0 : SpeechNoiseProb(self, self->speechProb, snrLocPrior, snrLocPost);
1177 0 : UpdateNoiseEstimate(self, magn, snrLocPrior, snrLocPost, noise);
1178 :
1179 : // Keep track of noise spectrum for next frame.
1180 0 : memcpy(self->noise, noise, sizeof(*noise) * self->magnLen);
1181 0 : memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen);
1182 : }
1183 :
1184 0 : void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
1185 : const float* const* speechFrame,
1186 : size_t num_bands,
1187 : float* const* outFrame) {
1188 : // Main routine for noise reduction.
1189 0 : int flagHB = 0;
1190 : size_t i, j;
1191 :
1192 : float energy1, energy2, gain, factor, factor1, factor2;
1193 : float fout[BLOCKL_MAX];
1194 : float winData[ANAL_BLOCKL_MAX];
1195 : float magn[HALF_ANAL_BLOCKL];
1196 : float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL];
1197 : float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
1198 :
1199 : // SWB variables.
1200 0 : int deltaBweHB = 1;
1201 0 : int deltaGainHB = 1;
1202 0 : float decayBweHB = 1.0;
1203 0 : float gainMapParHB = 1.0;
1204 0 : float gainTimeDomainHB = 1.0;
1205 : float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB;
1206 : float sumMagnAnalyze, sumMagnProcess;
1207 :
1208 : // Check that initiation has been done.
1209 0 : RTC_DCHECK_EQ(1, self->initFlag);
1210 0 : RTC_DCHECK_LE(num_bands - 1, NUM_HIGH_BANDS_MAX);
1211 :
1212 0 : const float* const* speechFrameHB = NULL;
1213 0 : float* const* outFrameHB = NULL;
1214 0 : size_t num_high_bands = 0;
1215 0 : if (num_bands > 1) {
1216 0 : speechFrameHB = &speechFrame[1];
1217 0 : outFrameHB = &outFrame[1];
1218 0 : num_high_bands = num_bands - 1;
1219 0 : flagHB = 1;
1220 : // Range for averaging low band quantities for H band gain.
1221 0 : deltaBweHB = (int)self->magnLen / 4;
1222 0 : deltaGainHB = deltaBweHB;
1223 : }
1224 :
1225 : // Update analysis buffer for L band.
1226 0 : UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf);
1227 :
1228 0 : if (flagHB == 1) {
1229 : // Update analysis buffer for H bands.
1230 0 : for (i = 0; i < num_high_bands; ++i) {
1231 0 : UpdateBuffer(speechFrameHB[i],
1232 : self->blockLen,
1233 : self->anaLen,
1234 0 : self->dataBufHB[i]);
1235 : }
1236 : }
1237 :
1238 0 : Windowing(self->window, self->dataBuf, self->anaLen, winData);
1239 0 : energy1 = Energy(winData, self->anaLen);
1240 0 : if (energy1 == 0.0) {
1241 : // Synthesize the special case of zero input.
1242 : // Read out fully processed segment.
1243 0 : for (i = self->windShift; i < self->blockLen + self->windShift; i++) {
1244 0 : fout[i - self->windShift] = self->syntBuf[i];
1245 : }
1246 : // Update synthesis buffer.
1247 0 : UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
1248 :
1249 0 : for (i = 0; i < self->blockLen; ++i)
1250 0 : outFrame[0][i] =
1251 0 : WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
1252 :
1253 : // For time-domain gain of HB.
1254 0 : if (flagHB == 1) {
1255 0 : for (i = 0; i < num_high_bands; ++i) {
1256 0 : for (j = 0; j < self->blockLen; ++j) {
1257 0 : outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
1258 : self->dataBufHB[i][j],
1259 : WEBRTC_SPL_WORD16_MIN);
1260 : }
1261 : }
1262 : }
1263 :
1264 0 : return;
1265 : }
1266 :
1267 0 : FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
1268 :
1269 0 : if (self->blockInd < END_STARTUP_SHORT) {
1270 0 : for (i = 0; i < self->magnLen; i++) {
1271 0 : self->initMagnEst[i] += magn[i];
1272 : }
1273 : }
1274 :
1275 0 : ComputeDdBasedWienerFilter(self, magn, theFilter);
1276 :
1277 0 : for (i = 0; i < self->magnLen; i++) {
1278 : // Flooring bottom.
1279 0 : if (theFilter[i] < self->denoiseBound) {
1280 0 : theFilter[i] = self->denoiseBound;
1281 : }
1282 : // Flooring top.
1283 0 : if (theFilter[i] > 1.f) {
1284 0 : theFilter[i] = 1.f;
1285 : }
1286 0 : if (self->blockInd < END_STARTUP_SHORT) {
1287 0 : theFilterTmp[i] =
1288 0 : (self->initMagnEst[i] - self->overdrive * self->parametricNoise[i]);
1289 0 : theFilterTmp[i] /= (self->initMagnEst[i] + 0.0001f);
1290 : // Flooring bottom.
1291 0 : if (theFilterTmp[i] < self->denoiseBound) {
1292 0 : theFilterTmp[i] = self->denoiseBound;
1293 : }
1294 : // Flooring top.
1295 0 : if (theFilterTmp[i] > 1.f) {
1296 0 : theFilterTmp[i] = 1.f;
1297 : }
1298 : // Weight the two suppression filters.
1299 0 : theFilter[i] *= (self->blockInd);
1300 0 : theFilterTmp[i] *= (END_STARTUP_SHORT - self->blockInd);
1301 0 : theFilter[i] += theFilterTmp[i];
1302 0 : theFilter[i] /= (END_STARTUP_SHORT);
1303 : }
1304 :
1305 0 : self->smooth[i] = theFilter[i];
1306 0 : real[i] *= self->smooth[i];
1307 0 : imag[i] *= self->smooth[i];
1308 : }
1309 : // Keep track of |magn| spectrum for next frame.
1310 0 : memcpy(self->magnPrevProcess, magn, sizeof(*magn) * self->magnLen);
1311 0 : memcpy(self->noisePrev, self->noise, sizeof(self->noise[0]) * self->magnLen);
1312 : // Back to time domain.
1313 0 : IFFT(self, real, imag, self->magnLen, self->anaLen, winData);
1314 :
1315 : // Scale factor: only do it after END_STARTUP_LONG time.
1316 0 : factor = 1.f;
1317 0 : if (self->gainmap == 1 && self->blockInd > END_STARTUP_LONG) {
1318 0 : factor1 = 1.f;
1319 0 : factor2 = 1.f;
1320 :
1321 0 : energy2 = Energy(winData, self->anaLen);
1322 0 : gain = (float)sqrt(energy2 / (energy1 + 1.f));
1323 :
1324 : // Scaling for new version.
1325 0 : if (gain > B_LIM) {
1326 0 : factor1 = 1.f + 1.3f * (gain - B_LIM);
1327 0 : if (gain * factor1 > 1.f) {
1328 0 : factor1 = 1.f / gain;
1329 : }
1330 : }
1331 0 : if (gain < B_LIM) {
1332 : // Don't reduce scale too much for pause regions:
1333 : // attenuation here should be controlled by flooring.
1334 0 : if (gain <= self->denoiseBound) {
1335 0 : gain = self->denoiseBound;
1336 : }
1337 0 : factor2 = 1.f - 0.3f * (B_LIM - gain);
1338 : }
1339 : // Combine both scales with speech/noise prob:
1340 : // note prior (priorSpeechProb) is not frequency dependent.
1341 0 : factor = self->priorSpeechProb * factor1 +
1342 0 : (1.f - self->priorSpeechProb) * factor2;
1343 : } // Out of self->gainmap == 1.
1344 :
1345 0 : Windowing(self->window, winData, self->anaLen, winData);
1346 :
1347 : // Synthesis.
1348 0 : for (i = 0; i < self->anaLen; i++) {
1349 0 : self->syntBuf[i] += factor * winData[i];
1350 : }
1351 : // Read out fully processed segment.
1352 0 : for (i = self->windShift; i < self->blockLen + self->windShift; i++) {
1353 0 : fout[i - self->windShift] = self->syntBuf[i];
1354 : }
1355 : // Update synthesis buffer.
1356 0 : UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
1357 :
1358 0 : for (i = 0; i < self->blockLen; ++i)
1359 0 : outFrame[0][i] =
1360 0 : WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
1361 :
1362 : // For time-domain gain of HB.
1363 0 : if (flagHB == 1) {
1364 : // Average speech prob from low band.
1365 : // Average over second half (i.e., 4->8kHz) of frequencies spectrum.
1366 0 : avgProbSpeechHB = 0.0;
1367 0 : for (i = self->magnLen - deltaBweHB - 1; i < self->magnLen - 1; i++) {
1368 0 : avgProbSpeechHB += self->speechProb[i];
1369 : }
1370 0 : avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB);
1371 : // If the speech was suppressed by a component between Analyze and
1372 : // Process, for example the AEC, then it should not be considered speech
1373 : // for high band suppression purposes.
1374 0 : sumMagnAnalyze = 0;
1375 0 : sumMagnProcess = 0;
1376 0 : for (i = 0; i < self->magnLen; ++i) {
1377 0 : sumMagnAnalyze += self->magnPrevAnalyze[i];
1378 0 : sumMagnProcess += self->magnPrevProcess[i];
1379 : }
1380 0 : avgProbSpeechHB *= sumMagnProcess / sumMagnAnalyze;
1381 : // Average filter gain from low band.
1382 : // Average over second half (i.e., 4->8kHz) of frequencies spectrum.
1383 0 : avgFilterGainHB = 0.0;
1384 0 : for (i = self->magnLen - deltaGainHB - 1; i < self->magnLen - 1; i++) {
1385 0 : avgFilterGainHB += self->smooth[i];
1386 : }
1387 0 : avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB));
1388 0 : avgProbSpeechHBTmp = 2.f * avgProbSpeechHB - 1.f;
1389 : // Gain based on speech probability.
1390 0 : gainModHB = 0.5f * (1.f + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
1391 : // Combine gain with low band gain.
1392 0 : gainTimeDomainHB = 0.5f * gainModHB + 0.5f * avgFilterGainHB;
1393 0 : if (avgProbSpeechHB >= 0.5f) {
1394 0 : gainTimeDomainHB = 0.25f * gainModHB + 0.75f * avgFilterGainHB;
1395 : }
1396 0 : gainTimeDomainHB = gainTimeDomainHB * decayBweHB;
1397 : // Make sure gain is within flooring range.
1398 : // Flooring bottom.
1399 0 : if (gainTimeDomainHB < self->denoiseBound) {
1400 0 : gainTimeDomainHB = self->denoiseBound;
1401 : }
1402 : // Flooring top.
1403 0 : if (gainTimeDomainHB > 1.f) {
1404 0 : gainTimeDomainHB = 1.f;
1405 : }
1406 : // Apply gain.
1407 0 : for (i = 0; i < num_high_bands; ++i) {
1408 0 : for (j = 0; j < self->blockLen; j++) {
1409 0 : outFrameHB[i][j] =
1410 0 : WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
1411 : gainTimeDomainHB * self->dataBufHB[i][j],
1412 : WEBRTC_SPL_WORD16_MIN);
1413 : }
1414 : }
1415 : } // End of H band gain computation.
1416 : }
|