Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_coding/codecs/cng/webrtc_cng.h"
12 :
13 : #include <algorithm>
14 :
15 : #include "webrtc/base/safe_conversions.h"
16 : #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
17 :
18 : namespace webrtc {
19 :
20 : namespace {
21 :
22 : const size_t kCngMaxOutsizeOrder = 640;
23 :
24 : // TODO(ossu): Rename the left-over WebRtcCng according to style guide.
25 : void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a);
26 :
27 : const int32_t WebRtcCng_kDbov[94] = {
28 : 1081109975, 858756178, 682134279, 541838517, 430397633, 341876992,
29 : 271562548, 215709799, 171344384, 136103682, 108110997, 85875618,
30 : 68213428, 54183852, 43039763, 34187699, 27156255, 21570980,
31 : 17134438, 13610368, 10811100, 8587562, 6821343, 5418385,
32 : 4303976, 3418770, 2715625, 2157098, 1713444, 1361037,
33 : 1081110, 858756, 682134, 541839, 430398, 341877,
34 : 271563, 215710, 171344, 136104, 108111, 85876,
35 : 68213, 54184, 43040, 34188, 27156, 21571,
36 : 17134, 13610, 10811, 8588, 6821, 5418,
37 : 4304, 3419, 2716, 2157, 1713, 1361,
38 : 1081, 859, 682, 542, 430, 342,
39 : 272, 216, 171, 136, 108, 86,
40 : 68, 54, 43, 34, 27, 22,
41 : 17, 14, 11, 9, 7, 5,
42 : 4, 3, 3, 2, 2, 1,
43 : 1, 1, 1, 1
44 : };
45 :
46 : const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = {
47 : 32702, 32636, 32570, 32505, 32439, 32374,
48 : 32309, 32244, 32179, 32114, 32049, 31985
49 : };
50 :
51 : } // namespace
52 :
53 0 : ComfortNoiseDecoder::ComfortNoiseDecoder() {
54 : /* Needed to get the right function pointers in SPLIB. */
55 0 : WebRtcSpl_Init();
56 0 : Reset();
57 0 : }
58 :
59 0 : void ComfortNoiseDecoder::Reset() {
60 0 : dec_seed_ = 7777; /* For debugging only. */
61 0 : dec_target_energy_ = 0;
62 0 : dec_used_energy_ = 0;
63 0 : for (auto& c : dec_target_reflCoefs_)
64 0 : c = 0;
65 0 : for (auto& c : dec_used_reflCoefs_)
66 0 : c = 0;
67 0 : for (auto& c : dec_filtstate_)
68 0 : c = 0;
69 0 : for (auto& c : dec_filtstateLow_)
70 0 : c = 0;
71 0 : dec_order_ = 5;
72 0 : dec_target_scale_factor_ = 0;
73 0 : dec_used_scale_factor_ = 0;
74 0 : }
75 :
76 0 : void ComfortNoiseDecoder::UpdateSid(rtc::ArrayView<const uint8_t> sid) {
77 : int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER];
78 : int32_t targetEnergy;
79 0 : size_t length = sid.size();
80 : /* Throw away reflection coefficients of higher order than we can handle. */
81 0 : if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1))
82 0 : length = WEBRTC_CNG_MAX_LPC_ORDER + 1;
83 :
84 0 : dec_order_ = static_cast<uint16_t>(length - 1);
85 :
86 0 : uint8_t sid0 = std::min<uint8_t>(sid[0], 93);
87 0 : targetEnergy = WebRtcCng_kDbov[sid0];
88 : /* Take down target energy to 75%. */
89 0 : targetEnergy = targetEnergy >> 1;
90 0 : targetEnergy += targetEnergy >> 2;
91 :
92 0 : dec_target_energy_ = targetEnergy;
93 :
94 : /* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */
95 0 : if (dec_order_ == WEBRTC_CNG_MAX_LPC_ORDER) {
96 0 : for (size_t i = 0; i < (dec_order_); i++) {
97 0 : refCs[i] = sid[i + 1] << 8; /* Q7 to Q15*/
98 0 : dec_target_reflCoefs_[i] = refCs[i];
99 : }
100 : } else {
101 0 : for (size_t i = 0; i < (dec_order_); i++) {
102 0 : refCs[i] = (sid[i + 1] - 127) << 8; /* Q7 to Q15. */
103 0 : dec_target_reflCoefs_[i] = refCs[i];
104 : }
105 : }
106 :
107 0 : for (size_t i = (dec_order_); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
108 0 : refCs[i] = 0;
109 0 : dec_target_reflCoefs_[i] = refCs[i];
110 : }
111 0 : }
112 :
113 0 : bool ComfortNoiseDecoder::Generate(rtc::ArrayView<int16_t> out_data,
114 : bool new_period) {
115 : int16_t excitation[kCngMaxOutsizeOrder];
116 : int16_t low[kCngMaxOutsizeOrder];
117 : int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1];
118 0 : int16_t ReflBetaStd = 26214; /* 0.8 in q15. */
119 0 : int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */
120 0 : int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */
121 0 : int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */
122 : int16_t Beta, BetaC; /* These are in Q15. */
123 : int32_t targetEnergy;
124 : int16_t En;
125 : int16_t temp16;
126 0 : const size_t num_samples = out_data.size();
127 :
128 0 : if (num_samples > kCngMaxOutsizeOrder) {
129 0 : return false;
130 : }
131 :
132 0 : if (new_period) {
133 0 : dec_used_scale_factor_ = dec_target_scale_factor_;
134 0 : Beta = ReflBetaNewP;
135 0 : BetaC = ReflBetaCompNewP;
136 : } else {
137 0 : Beta = ReflBetaStd;
138 0 : BetaC = ReflBetaCompStd;
139 : }
140 :
141 : /* Calculate new scale factor in Q13 */
142 0 : dec_used_scale_factor_ =
143 0 : rtc::checked_cast<int16_t>(
144 0 : WEBRTC_SPL_MUL_16_16_RSFT(dec_used_scale_factor_, Beta >> 2, 13) +
145 0 : WEBRTC_SPL_MUL_16_16_RSFT(dec_target_scale_factor_, BetaC >> 2, 13));
146 :
147 0 : dec_used_energy_ = dec_used_energy_ >> 1;
148 0 : dec_used_energy_ += dec_target_energy_ >> 1;
149 :
150 : /* Do the same for the reflection coeffs, albeit in Q15. */
151 0 : for (size_t i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
152 0 : dec_used_reflCoefs_[i] = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(
153 0 : dec_used_reflCoefs_[i], Beta, 15);
154 0 : dec_used_reflCoefs_[i] += (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(
155 : dec_target_reflCoefs_[i], BetaC, 15);
156 : }
157 :
158 : /* Compute the polynomial coefficients. */
159 0 : WebRtcCng_K2a16(dec_used_reflCoefs_, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly);
160 :
161 :
162 0 : targetEnergy = dec_used_energy_;
163 :
164 : /* Calculate scaling factor based on filter energy. */
165 0 : En = 8192; /* 1.0 in Q13. */
166 0 : for (size_t i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) {
167 : /* Floating point value for reference.
168 : E *= 1.0 - (dec_used_reflCoefs_[i] / 32768.0) *
169 : (dec_used_reflCoefs_[i] / 32768.0);
170 : */
171 :
172 : /* Same in fixed point. */
173 : /* K(i).^2 in Q15. */
174 0 : temp16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(
175 : dec_used_reflCoefs_[i], dec_used_reflCoefs_[i], 15);
176 : /* 1 - K(i).^2 in Q15. */
177 0 : temp16 = 0x7fff - temp16;
178 0 : En = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15);
179 : }
180 :
181 : /* float scaling= sqrt(E * dec_target_energy_ / (1 << 24)); */
182 :
183 : /* Calculate sqrt(En * target_energy / excitation energy) */
184 0 : targetEnergy = WebRtcSpl_Sqrt(dec_used_energy_);
185 :
186 0 : En = (int16_t) WebRtcSpl_Sqrt(En) << 6;
187 0 : En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */
188 0 : dec_used_scale_factor_ = (int16_t)((En * targetEnergy) >> 12);
189 :
190 : /* Generate excitation. */
191 : /* Excitation energy per sample is 2.^24 - Q13 N(0,1). */
192 0 : for (size_t i = 0; i < num_samples; i++) {
193 0 : excitation[i] = WebRtcSpl_RandN(&dec_seed_) >> 1;
194 : }
195 :
196 : /* Scale to correct energy. */
197 0 : WebRtcSpl_ScaleVector(excitation, excitation, dec_used_scale_factor_,
198 0 : num_samples, 13);
199 :
200 : /* |lpPoly| - Coefficients in Q12.
201 : * |excitation| - Speech samples.
202 : * |nst->dec_filtstate| - State preservation.
203 : * |out_data| - Filtered speech samples. */
204 0 : WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation,
205 : num_samples, dec_filtstate_, WEBRTC_CNG_MAX_LPC_ORDER,
206 : dec_filtstateLow_, WEBRTC_CNG_MAX_LPC_ORDER,
207 0 : out_data.data(), low, num_samples);
208 :
209 0 : return true;
210 : }
211 :
212 0 : ComfortNoiseEncoder::ComfortNoiseEncoder(int fs, int interval, int quality)
213 : : enc_nrOfCoefs_(quality),
214 : enc_sampfreq_(fs),
215 : enc_interval_(interval),
216 : enc_msSinceSid_(0),
217 : enc_Energy_(0),
218 : enc_reflCoefs_{0},
219 : enc_corrVector_{0},
220 0 : enc_seed_(7777) /* For debugging only. */ {
221 0 : RTC_CHECK(quality <= WEBRTC_CNG_MAX_LPC_ORDER && quality > 0);
222 : /* Needed to get the right function pointers in SPLIB. */
223 0 : WebRtcSpl_Init();
224 0 : }
225 :
226 0 : void ComfortNoiseEncoder::Reset(int fs, int interval, int quality) {
227 0 : RTC_CHECK(quality <= WEBRTC_CNG_MAX_LPC_ORDER && quality > 0);
228 0 : enc_nrOfCoefs_ = quality;
229 0 : enc_sampfreq_ = fs;
230 0 : enc_interval_ = interval;
231 0 : enc_msSinceSid_ = 0;
232 0 : enc_Energy_ = 0;
233 0 : for (auto& c : enc_reflCoefs_)
234 0 : c = 0;
235 0 : for (auto& c : enc_corrVector_)
236 0 : c = 0;
237 0 : enc_seed_ = 7777; /* For debugging only. */
238 0 : }
239 :
240 0 : size_t ComfortNoiseEncoder::Encode(rtc::ArrayView<const int16_t> speech,
241 : bool force_sid,
242 : rtc::Buffer* output) {
243 : int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
244 : int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1];
245 : int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
246 : int16_t hanningW[kCngMaxOutsizeOrder];
247 0 : int16_t ReflBeta = 19661; /* 0.6 in q15. */
248 0 : int16_t ReflBetaComp = 13107; /* 0.4 in q15. */
249 : int32_t outEnergy;
250 : int outShifts;
251 : size_t i;
252 : int stab;
253 : int acorrScale;
254 : size_t index;
255 : size_t ind, factor;
256 : int32_t* bptr;
257 : int32_t blo, bhi;
258 : int16_t negate;
259 : const int16_t* aptr;
260 : int16_t speechBuf[kCngMaxOutsizeOrder];
261 :
262 0 : const size_t num_samples = speech.size();
263 0 : RTC_CHECK_LE(num_samples, kCngMaxOutsizeOrder);
264 :
265 0 : for (i = 0; i < num_samples; i++) {
266 0 : speechBuf[i] = speech[i];
267 : }
268 :
269 0 : factor = num_samples;
270 :
271 : /* Calculate energy and a coefficients. */
272 0 : outEnergy = WebRtcSpl_Energy(speechBuf, num_samples, &outShifts);
273 0 : while (outShifts > 0) {
274 : /* We can only do 5 shifts without destroying accuracy in
275 : * division factor. */
276 0 : if (outShifts > 5) {
277 0 : outEnergy <<= (outShifts - 5);
278 0 : outShifts = 5;
279 : } else {
280 0 : factor /= 2;
281 0 : outShifts--;
282 : }
283 : }
284 0 : outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor);
285 :
286 0 : if (outEnergy > 1) {
287 : /* Create Hanning Window. */
288 0 : WebRtcSpl_GetHanningWindow(hanningW, num_samples / 2);
289 0 : for (i = 0; i < (num_samples / 2); i++)
290 0 : hanningW[num_samples - i - 1] = hanningW[i];
291 :
292 0 : WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, num_samples,
293 0 : 14);
294 :
295 0 : WebRtcSpl_AutoCorrelation(speechBuf, num_samples, enc_nrOfCoefs_,
296 0 : corrVector, &acorrScale);
297 :
298 0 : if (*corrVector == 0)
299 0 : *corrVector = WEBRTC_SPL_WORD16_MAX;
300 :
301 : /* Adds the bandwidth expansion. */
302 0 : aptr = WebRtcCng_kCorrWindow;
303 0 : bptr = corrVector;
304 :
305 : /* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */
306 0 : for (ind = 0; ind < enc_nrOfCoefs_; ind++) {
307 : /* The below code multiplies the 16 b corrWindow values (Q15) with
308 : * the 32 b corrvector (Q0) and shifts the result down 15 steps. */
309 0 : negate = *bptr < 0;
310 0 : if (negate)
311 0 : *bptr = -*bptr;
312 :
313 0 : blo = (int32_t) * aptr * (*bptr & 0xffff);
314 0 : bhi = ((blo >> 16) & 0xffff)
315 0 : + ((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff));
316 0 : blo = (blo & 0xffff) | ((bhi & 0xffff) << 16);
317 :
318 0 : *bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t) blo >> 15);
319 0 : if (negate)
320 0 : *bptr = -*bptr;
321 0 : bptr++;
322 : }
323 : /* End of bandwidth expansion. */
324 :
325 0 : stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs,
326 : enc_nrOfCoefs_);
327 :
328 0 : if (!stab) {
329 : /* Disregard from this frame */
330 0 : return 0;
331 : }
332 :
333 : } else {
334 0 : for (i = 0; i < enc_nrOfCoefs_; i++)
335 0 : refCs[i] = 0;
336 : }
337 :
338 0 : if (force_sid) {
339 : /* Read instantaneous values instead of averaged. */
340 0 : for (i = 0; i < enc_nrOfCoefs_; i++)
341 0 : enc_reflCoefs_[i] = refCs[i];
342 0 : enc_Energy_ = outEnergy;
343 : } else {
344 : /* Average history with new values. */
345 0 : for (i = 0; i < enc_nrOfCoefs_; i++) {
346 0 : enc_reflCoefs_[i] = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(
347 0 : enc_reflCoefs_[i], ReflBeta, 15);
348 0 : enc_reflCoefs_[i] +=
349 0 : (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(refCs[i], ReflBetaComp, 15);
350 : }
351 0 : enc_Energy_ =
352 0 : (outEnergy >> 2) + (enc_Energy_ >> 1) + (enc_Energy_ >> 2);
353 : }
354 :
355 0 : if (enc_Energy_ < 1) {
356 0 : enc_Energy_ = 1;
357 : }
358 :
359 0 : if ((enc_msSinceSid_ > (enc_interval_ - 1)) || force_sid) {
360 : /* Search for best dbov value. */
361 0 : index = 0;
362 0 : for (i = 1; i < 93; i++) {
363 : /* Always round downwards. */
364 0 : if ((enc_Energy_ - WebRtcCng_kDbov[i]) > 0) {
365 0 : index = i;
366 0 : break;
367 : }
368 : }
369 0 : if ((i == 93) && (index == 0))
370 0 : index = 94;
371 :
372 0 : const size_t output_coefs = enc_nrOfCoefs_ + 1;
373 0 : output->AppendData(output_coefs, [&] (rtc::ArrayView<uint8_t> output) {
374 0 : output[0] = (uint8_t)index;
375 :
376 : /* Quantize coefficients with tweak for WebRtc implementation of
377 : * RFC3389. */
378 0 : if (enc_nrOfCoefs_ == WEBRTC_CNG_MAX_LPC_ORDER) {
379 0 : for (i = 0; i < enc_nrOfCoefs_; i++) {
380 : /* Q15 to Q7 with rounding. */
381 0 : output[i + 1] = ((enc_reflCoefs_[i] + 128) >> 8);
382 : }
383 : } else {
384 0 : for (i = 0; i < enc_nrOfCoefs_; i++) {
385 : /* Q15 to Q7 with rounding. */
386 0 : output[i + 1] = (127 + ((enc_reflCoefs_[i] + 128) >> 8));
387 : }
388 : }
389 :
390 0 : return output_coefs;
391 0 : });
392 :
393 0 : enc_msSinceSid_ =
394 0 : static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
395 0 : return output_coefs;
396 : } else {
397 0 : enc_msSinceSid_ +=
398 0 : static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
399 0 : return 0;
400 : }
401 : }
402 :
403 : namespace {
404 : /* Values in |k| are Q15, and |a| Q12. */
405 0 : void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) {
406 : int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
407 : int16_t* aptr;
408 : int16_t* aptr2;
409 : int16_t* anyptr;
410 : const int16_t* kptr;
411 : int m, i;
412 :
413 0 : kptr = k;
414 0 : *a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */
415 0 : *any = *a;
416 0 : a[1] = (*k + 4) >> 3;
417 0 : for (m = 1; m < useOrder; m++) {
418 0 : kptr++;
419 0 : aptr = a;
420 0 : aptr++;
421 0 : aptr2 = &a[m];
422 0 : anyptr = any;
423 0 : anyptr++;
424 :
425 0 : any[m + 1] = (*kptr + 4) >> 3;
426 0 : for (i = 0; i < m; i++) {
427 0 : *anyptr++ =
428 0 : (*aptr++) +
429 0 : (int16_t)((((int32_t)(*aptr2--) * (int32_t)*kptr) + 16384) >> 15);
430 : }
431 :
432 0 : aptr = a;
433 0 : anyptr = any;
434 0 : for (i = 0; i < (m + 2); i++) {
435 0 : *aptr++ = *anyptr++;
436 : }
437 : }
438 0 : }
439 :
440 : } // namespace
441 :
442 : } // namespace webrtc
|