Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "webrtc/modules/audio_coding/codecs/opus/opus_interface.h"
12 :
13 : #include "webrtc/base/checks.h"
14 : #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"
15 :
16 : #include <stdlib.h>
17 : #include <string.h>
18 :
19 : enum {
20 : /* Maximum supported frame size in WebRTC is 60 ms. */
21 : kWebRtcOpusMaxEncodeFrameSizeMs = 60,
22 :
23 : /* The format allows up to 120 ms frames. Since we don't control the other
24 : * side, we must allow for packets of that size. NetEq is currently limited
25 : * to 60 ms on the receive side. */
26 : kWebRtcOpusMaxDecodeFrameSizeMs = 120,
27 :
28 : /* Maximum sample count per channel is 48 kHz * maximum frame size in
29 : * milliseconds. */
30 : kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,
31 :
32 : /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
33 : kWebRtcOpusDefaultFrameSize = 960,
34 : };
35 :
36 0 : int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
37 : size_t channels,
38 : int32_t application) {
39 : int opus_app;
40 0 : if (!inst)
41 0 : return -1;
42 :
43 0 : switch (application) {
44 : case 0:
45 0 : opus_app = OPUS_APPLICATION_VOIP;
46 0 : break;
47 : case 1:
48 0 : opus_app = OPUS_APPLICATION_AUDIO;
49 0 : break;
50 : default:
51 0 : return -1;
52 : }
53 :
54 0 : OpusEncInst* state = calloc(1, sizeof(OpusEncInst));
55 0 : RTC_DCHECK(state);
56 :
57 : int error;
58 0 : state->encoder = opus_encoder_create(48000, (int)channels, opus_app,
59 : &error);
60 0 : if (error != OPUS_OK || !state->encoder) {
61 0 : WebRtcOpus_EncoderFree(state);
62 0 : return -1;
63 : }
64 :
65 0 : state->in_dtx_mode = 0;
66 0 : state->channels = channels;
67 :
68 0 : *inst = state;
69 0 : return 0;
70 : }
71 :
72 0 : int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
73 0 : if (inst) {
74 0 : opus_encoder_destroy(inst->encoder);
75 0 : free(inst);
76 0 : return 0;
77 : } else {
78 0 : return -1;
79 : }
80 : }
81 :
82 0 : int WebRtcOpus_Encode(OpusEncInst* inst,
83 : const int16_t* audio_in,
84 : size_t samples,
85 : size_t length_encoded_buffer,
86 : uint8_t* encoded) {
87 : int res;
88 :
89 0 : if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
90 0 : return -1;
91 : }
92 :
93 0 : res = opus_encode(inst->encoder,
94 : (const opus_int16*)audio_in,
95 : (int)samples,
96 : encoded,
97 : (opus_int32)length_encoded_buffer);
98 :
99 0 : if (res <= 0) {
100 0 : return -1;
101 : }
102 :
103 0 : if (res <= 2) {
104 : // Indicates DTX since the packet has nothing but a header. In principle,
105 : // there is no need to send this packet. However, we do transmit the first
106 : // occurrence to let the decoder know that the encoder enters DTX mode.
107 0 : if (inst->in_dtx_mode) {
108 0 : return 0;
109 : } else {
110 0 : inst->in_dtx_mode = 1;
111 0 : return 1;
112 : }
113 : }
114 :
115 0 : inst->in_dtx_mode = 0;
116 0 : return res;
117 : }
118 :
119 0 : int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
120 0 : if (inst) {
121 : #if defined(OPUS_COMPLEXITY) && (OPUS_COMPLEXITY != 0)
122 : opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(OPUS_COMPLEXITY));
123 : #endif
124 0 : return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
125 : } else {
126 0 : return -1;
127 : }
128 : }
129 :
130 0 : int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
131 0 : if (inst) {
132 0 : return opus_encoder_ctl(inst->encoder,
133 : OPUS_SET_PACKET_LOSS_PERC(loss_rate));
134 : } else {
135 0 : return -1;
136 : }
137 : }
138 :
139 0 : int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
140 : opus_int32 set_bandwidth;
141 :
142 0 : if (!inst)
143 0 : return -1;
144 :
145 0 : if (frequency_hz <= 8000) {
146 0 : set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
147 0 : } else if (frequency_hz <= 12000) {
148 0 : set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
149 0 : } else if (frequency_hz <= 16000) {
150 0 : set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
151 0 : } else if (frequency_hz <= 24000) {
152 0 : set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
153 : } else {
154 0 : set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
155 : }
156 0 : return opus_encoder_ctl(inst->encoder,
157 : OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
158 : }
159 :
160 0 : int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
161 0 : if (inst) {
162 0 : return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1));
163 : } else {
164 0 : return -1;
165 : }
166 : }
167 :
168 0 : int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
169 0 : if (inst) {
170 0 : return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0));
171 : } else {
172 0 : return -1;
173 : }
174 : }
175 :
176 0 : int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
177 0 : if (!inst) {
178 0 : return -1;
179 : }
180 :
181 : // To prevent Opus from entering CELT-only mode by forcing signal type to
182 : // voice to make sure that DTX behaves correctly. Currently, DTX does not
183 : // last long during a pure silence, if the signal type is not forced.
184 : // TODO(minyue): Remove the signal type forcing when Opus DTX works properly
185 : // without it.
186 0 : int ret = opus_encoder_ctl(inst->encoder,
187 : OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
188 0 : if (ret != OPUS_OK)
189 0 : return ret;
190 :
191 0 : return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1));
192 : }
193 :
194 0 : int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
195 0 : if (inst) {
196 0 : int ret = opus_encoder_ctl(inst->encoder,
197 : OPUS_SET_SIGNAL(OPUS_AUTO));
198 0 : if (ret != OPUS_OK)
199 0 : return ret;
200 0 : return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0));
201 : } else {
202 0 : return -1;
203 : }
204 : }
205 :
206 0 : int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
207 0 : if (inst) {
208 0 : return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
209 : } else {
210 0 : return -1;
211 : }
212 : }
213 :
214 0 : int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels) {
215 0 : if (!inst)
216 0 : return -1;
217 0 : if (num_channels == 0) {
218 0 : return opus_encoder_ctl(inst->encoder,
219 : OPUS_SET_FORCE_CHANNELS(OPUS_AUTO));
220 0 : } else if (num_channels == 1 || num_channels == 2) {
221 0 : return opus_encoder_ctl(inst->encoder,
222 : OPUS_SET_FORCE_CHANNELS(num_channels));
223 : } else {
224 0 : return -1;
225 : }
226 : }
227 :
228 0 : int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, size_t channels) {
229 : int error;
230 : OpusDecInst* state;
231 :
232 0 : if (inst != NULL) {
233 : /* Create Opus decoder state. */
234 0 : state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
235 0 : if (state == NULL) {
236 0 : return -1;
237 : }
238 :
239 : /* Create new memory, always at 48000 Hz. */
240 0 : state->decoder = opus_decoder_create(48000, (int)channels, &error);
241 0 : if (error == OPUS_OK && state->decoder != NULL) {
242 : /* Creation of memory all ok. */
243 0 : state->channels = channels;
244 0 : state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
245 0 : state->in_dtx_mode = 0;
246 0 : *inst = state;
247 0 : return 0;
248 : }
249 :
250 : /* If memory allocation was unsuccessful, free the entire state. */
251 0 : if (state->decoder) {
252 0 : opus_decoder_destroy(state->decoder);
253 : }
254 0 : free(state);
255 : }
256 0 : return -1;
257 : }
258 :
259 0 : int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
260 0 : if (inst) {
261 0 : opus_decoder_destroy(inst->decoder);
262 0 : free(inst);
263 0 : return 0;
264 : } else {
265 0 : return -1;
266 : }
267 : }
268 :
269 0 : size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
270 0 : return inst->channels;
271 : }
272 :
273 0 : void WebRtcOpus_DecoderInit(OpusDecInst* inst) {
274 0 : opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE);
275 0 : inst->in_dtx_mode = 0;
276 0 : }
277 :
278 : /* For decoder to determine if it is to output speech or comfort noise. */
279 0 : static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) {
280 : // Audio type becomes comfort noise if |encoded_byte| is 1 and keeps
281 : // to be so if the following |encoded_byte| are 0 or 1.
282 0 : if (encoded_bytes == 0 && inst->in_dtx_mode) {
283 0 : return 2; // Comfort noise.
284 0 : } else if (encoded_bytes == 1) {
285 0 : inst->in_dtx_mode = 1;
286 0 : return 2; // Comfort noise.
287 : } else {
288 0 : inst->in_dtx_mode = 0;
289 0 : return 0; // Speech.
290 : }
291 : }
292 :
293 : /* |frame_size| is set to maximum Opus frame size in the normal case, and
294 : * is set to the number of samples needed for PLC in case of losses.
295 : * It is up to the caller to make sure the value is correct. */
296 0 : static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded,
297 : size_t encoded_bytes, int frame_size,
298 : int16_t* decoded, int16_t* audio_type, int decode_fec) {
299 0 : int res = opus_decode(inst->decoder, encoded, (opus_int32)encoded_bytes,
300 : (opus_int16*)decoded, frame_size, decode_fec);
301 :
302 0 : if (res <= 0)
303 0 : return -1;
304 :
305 0 : *audio_type = DetermineAudioType(inst, encoded_bytes);
306 :
307 0 : return res;
308 : }
309 :
310 0 : int WebRtcOpus_Decode(OpusDecInst* inst, const uint8_t* encoded,
311 : size_t encoded_bytes, int16_t* decoded,
312 : int16_t* audio_type) {
313 : int decoded_samples;
314 :
315 0 : if (encoded_bytes == 0) {
316 0 : *audio_type = DetermineAudioType(inst, encoded_bytes);
317 0 : decoded_samples = WebRtcOpus_DecodePlc(inst, decoded, 1);
318 : } else {
319 0 : decoded_samples = DecodeNative(inst,
320 : encoded,
321 : encoded_bytes,
322 : kWebRtcOpusMaxFrameSizePerChannel,
323 : decoded,
324 : audio_type,
325 : 0);
326 : }
327 0 : if (decoded_samples < 0) {
328 0 : return -1;
329 : }
330 :
331 : /* Update decoded sample memory, to be used by the PLC in case of losses. */
332 0 : inst->prev_decoded_samples = decoded_samples;
333 :
334 0 : return decoded_samples;
335 : }
336 :
337 0 : int WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
338 : int number_of_lost_frames) {
339 0 : int16_t audio_type = 0;
340 : int decoded_samples;
341 : int plc_samples;
342 :
343 : /* The number of samples we ask for is |number_of_lost_frames| times
344 : * |prev_decoded_samples_|. Limit the number of samples to maximum
345 : * |kWebRtcOpusMaxFrameSizePerChannel|. */
346 0 : plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
347 0 : plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
348 : plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
349 0 : decoded_samples = DecodeNative(inst, NULL, 0, plc_samples,
350 : decoded, &audio_type, 0);
351 0 : if (decoded_samples < 0) {
352 0 : return -1;
353 : }
354 :
355 0 : return decoded_samples;
356 : }
357 :
358 0 : int WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded,
359 : size_t encoded_bytes, int16_t* decoded,
360 : int16_t* audio_type) {
361 : int decoded_samples;
362 : int fec_samples;
363 :
364 0 : if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
365 0 : return 0;
366 : }
367 :
368 0 : fec_samples = opus_packet_get_samples_per_frame(encoded, 48000);
369 :
370 0 : decoded_samples = DecodeNative(inst, encoded, encoded_bytes,
371 : fec_samples, decoded, audio_type, 1);
372 0 : if (decoded_samples < 0) {
373 0 : return -1;
374 : }
375 :
376 0 : return decoded_samples;
377 : }
378 :
379 0 : int WebRtcOpus_DurationEst(OpusDecInst* inst,
380 : const uint8_t* payload,
381 : size_t payload_length_bytes) {
382 0 : if (payload_length_bytes == 0) {
383 : // WebRtcOpus_Decode calls PLC when payload length is zero. So we return
384 : // PLC duration correspondingly.
385 0 : return WebRtcOpus_PlcDuration(inst);
386 : }
387 :
388 : int frames, samples;
389 0 : frames = opus_packet_get_nb_frames(payload, (opus_int32)payload_length_bytes);
390 0 : if (frames < 0) {
391 : /* Invalid payload data. */
392 0 : return 0;
393 : }
394 0 : samples = frames * opus_packet_get_samples_per_frame(payload, 48000);
395 0 : if (samples < 120 || samples > 5760) {
396 : /* Invalid payload duration. */
397 0 : return 0;
398 : }
399 0 : return samples;
400 : }
401 :
402 0 : int WebRtcOpus_PlcDuration(OpusDecInst* inst) {
403 : /* The number of samples we ask for is |number_of_lost_frames| times
404 : * |prev_decoded_samples_|. Limit the number of samples to maximum
405 : * |kWebRtcOpusMaxFrameSizePerChannel|. */
406 0 : const int plc_samples = inst->prev_decoded_samples;
407 : return (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
408 0 : plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
409 : }
410 :
411 0 : int WebRtcOpus_FecDurationEst(const uint8_t* payload,
412 : size_t payload_length_bytes) {
413 : int samples;
414 0 : if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
415 0 : return 0;
416 : }
417 :
418 0 : samples = opus_packet_get_samples_per_frame(payload, 48000);
419 0 : if (samples < 480 || samples > 5760) {
420 : /* Invalid payload duration. */
421 0 : return 0;
422 : }
423 0 : return samples;
424 : }
425 :
426 0 : int WebRtcOpus_PacketHasFec(const uint8_t* payload,
427 : size_t payload_length_bytes) {
428 : int frames, channels, payload_length_ms;
429 : int n;
430 : opus_int16 frame_sizes[48];
431 : const unsigned char *frame_data[48];
432 :
433 0 : if (payload == NULL || payload_length_bytes == 0)
434 0 : return 0;
435 :
436 : /* In CELT_ONLY mode, packets should not have FEC. */
437 0 : if (payload[0] & 0x80)
438 0 : return 0;
439 :
440 0 : payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48;
441 0 : if (10 > payload_length_ms)
442 0 : payload_length_ms = 10;
443 :
444 0 : channels = opus_packet_get_nb_channels(payload);
445 :
446 0 : switch (payload_length_ms) {
447 : case 10:
448 : case 20: {
449 0 : frames = 1;
450 0 : break;
451 : }
452 : case 40: {
453 0 : frames = 2;
454 0 : break;
455 : }
456 : case 60: {
457 0 : frames = 3;
458 0 : break;
459 : }
460 : default: {
461 0 : return 0; // It is actually even an invalid packet.
462 : }
463 : }
464 :
465 : /* The following is to parse the LBRR flags. */
466 0 : if (opus_packet_parse(payload, (opus_int32)payload_length_bytes, NULL,
467 : frame_data, frame_sizes, NULL) < 0) {
468 0 : return 0;
469 : }
470 :
471 0 : if (frame_sizes[0] <= 1) {
472 0 : return 0;
473 : }
474 :
475 0 : for (n = 0; n < channels; n++) {
476 0 : if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))
477 0 : return 1;
478 : }
479 :
480 0 : return 0;
481 : }
|