Line data Source code
1 : /***********************************************************************
2 : Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 : Redistribution and use in source and binary forms, with or without
4 : modification, are permitted provided that the following conditions
5 : are met:
6 : - Redistributions of source code must retain the above copyright notice,
7 : this list of conditions and the following disclaimer.
8 : - Redistributions in binary form must reproduce the above copyright
9 : notice, this list of conditions and the following disclaimer in the
10 : documentation and/or other materials provided with the distribution.
11 : - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 : names of specific contributors, may be used to endorse or promote
13 : products derived from this software without specific prior written
14 : permission.
15 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 : AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 : IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 : ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 : LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 : CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 : SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 : INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 : CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 : ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 : POSSIBILITY OF SUCH DAMAGE.
26 : ***********************************************************************/
27 :
28 : #ifdef HAVE_CONFIG_H
29 : #include "config.h"
30 : #endif
31 : #include "API.h"
32 : #include "main.h"
33 : #include "stack_alloc.h"
34 : #include "os_support.h"
35 :
36 : /************************/
37 : /* Decoder Super Struct */
38 : /************************/
39 : typedef struct {
40 : silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ];
41 : stereo_dec_state sStereo;
42 : opus_int nChannelsAPI;
43 : opus_int nChannelsInternal;
44 : opus_int prev_decode_only_middle;
45 : } silk_decoder;
46 :
47 : /*********************/
48 : /* Decoder functions */
49 : /*********************/
50 :
51 0 : opus_int silk_Get_Decoder_Size( /* O Returns error code */
52 : opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */
53 : )
54 : {
55 0 : opus_int ret = SILK_NO_ERROR;
56 :
57 0 : *decSizeBytes = sizeof( silk_decoder );
58 :
59 0 : return ret;
60 : }
61 :
62 : /* Reset decoder state */
63 0 : opus_int silk_InitDecoder( /* O Returns error code */
64 : void *decState /* I/O State */
65 : )
66 : {
67 0 : opus_int n, ret = SILK_NO_ERROR;
68 0 : silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
69 :
70 0 : for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
71 0 : ret = silk_init_decoder( &channel_state[ n ] );
72 : }
73 0 : silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
74 : /* Not strictly needed, but it's cleaner that way */
75 0 : ((silk_decoder *)decState)->prev_decode_only_middle = 0;
76 :
77 0 : return ret;
78 : }
79 :
80 : /* Decode a frame */
81 0 : opus_int silk_Decode( /* O Returns error code */
82 : void* decState, /* I/O State */
83 : silk_DecControlStruct* decControl, /* I/O Control Structure */
84 : opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */
85 : opus_int newPacketFlag, /* I Indicates first decoder call for this packet */
86 : ec_dec *psRangeDec, /* I/O Compressor data structure */
87 : opus_int16 *samplesOut, /* O Decoded output speech vector */
88 : opus_int32 *nSamplesOut, /* O Number of samples decoded */
89 : int arch /* I Run-time architecture */
90 : )
91 : {
92 0 : opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
93 : opus_int32 nSamplesOutDec, LBRR_symbol;
94 : opus_int16 *samplesOut1_tmp[ 2 ];
95 : VARDECL( opus_int16, samplesOut1_tmp_storage1 );
96 : VARDECL( opus_int16, samplesOut1_tmp_storage2 );
97 : VARDECL( opus_int16, samplesOut2_tmp );
98 0 : opus_int32 MS_pred_Q13[ 2 ] = { 0 };
99 : opus_int16 *resample_out_ptr;
100 0 : silk_decoder *psDec = ( silk_decoder * )decState;
101 0 : silk_decoder_state *channel_state = psDec->channel_state;
102 : opus_int has_side;
103 : opus_int stereo_to_mono;
104 : int delay_stack_alloc;
105 : SAVE_STACK;
106 :
107 0 : silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
108 :
109 : /**********************************/
110 : /* Test if first frame in payload */
111 : /**********************************/
112 0 : if( newPacketFlag ) {
113 0 : for( n = 0; n < decControl->nChannelsInternal; n++ ) {
114 0 : channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */
115 : }
116 : }
117 :
118 : /* If Mono -> Stereo transition in bitstream: init state of second channel */
119 0 : if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
120 0 : ret += silk_init_decoder( &channel_state[ 1 ] );
121 : }
122 :
123 0 : stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
124 0 : ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
125 :
126 0 : if( channel_state[ 0 ].nFramesDecoded == 0 ) {
127 0 : for( n = 0; n < decControl->nChannelsInternal; n++ ) {
128 : opus_int fs_kHz_dec;
129 0 : if( decControl->payloadSize_ms == 0 ) {
130 : /* Assuming packet loss, use 10 ms */
131 0 : channel_state[ n ].nFramesPerPacket = 1;
132 0 : channel_state[ n ].nb_subfr = 2;
133 0 : } else if( decControl->payloadSize_ms == 10 ) {
134 0 : channel_state[ n ].nFramesPerPacket = 1;
135 0 : channel_state[ n ].nb_subfr = 2;
136 0 : } else if( decControl->payloadSize_ms == 20 ) {
137 0 : channel_state[ n ].nFramesPerPacket = 1;
138 0 : channel_state[ n ].nb_subfr = 4;
139 0 : } else if( decControl->payloadSize_ms == 40 ) {
140 0 : channel_state[ n ].nFramesPerPacket = 2;
141 0 : channel_state[ n ].nb_subfr = 4;
142 0 : } else if( decControl->payloadSize_ms == 60 ) {
143 0 : channel_state[ n ].nFramesPerPacket = 3;
144 0 : channel_state[ n ].nb_subfr = 4;
145 : } else {
146 0 : silk_assert( 0 );
147 : RESTORE_STACK;
148 : return SILK_DEC_INVALID_FRAME_SIZE;
149 : }
150 0 : fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
151 0 : if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
152 0 : silk_assert( 0 );
153 : RESTORE_STACK;
154 : return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
155 : }
156 0 : ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
157 : }
158 : }
159 :
160 0 : if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
161 0 : silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
162 0 : silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
163 0 : silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
164 : }
165 0 : psDec->nChannelsAPI = decControl->nChannelsAPI;
166 0 : psDec->nChannelsInternal = decControl->nChannelsInternal;
167 :
168 0 : if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
169 0 : ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
170 : RESTORE_STACK;
171 0 : return( ret );
172 : }
173 :
174 0 : if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
175 : /* First decoder call for this payload */
176 : /* Decode VAD flags and LBRR flag */
177 0 : for( n = 0; n < decControl->nChannelsInternal; n++ ) {
178 0 : for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
179 0 : channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
180 : }
181 0 : channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
182 : }
183 : /* Decode LBRR flags */
184 0 : for( n = 0; n < decControl->nChannelsInternal; n++ ) {
185 0 : silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
186 0 : if( channel_state[ n ].LBRR_flag ) {
187 0 : if( channel_state[ n ].nFramesPerPacket == 1 ) {
188 0 : channel_state[ n ].LBRR_flags[ 0 ] = 1;
189 : } else {
190 0 : LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
191 0 : for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
192 0 : channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
193 : }
194 : }
195 : }
196 : }
197 :
198 0 : if( lostFlag == FLAG_DECODE_NORMAL ) {
199 : /* Regular decoding: skip all LBRR data */
200 0 : for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
201 0 : for( n = 0; n < decControl->nChannelsInternal; n++ ) {
202 0 : if( channel_state[ n ].LBRR_flags[ i ] ) {
203 : opus_int16 pulses[ MAX_FRAME_LENGTH ];
204 : opus_int condCoding;
205 :
206 0 : if( decControl->nChannelsInternal == 2 && n == 0 ) {
207 0 : silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
208 0 : if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
209 0 : silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
210 : }
211 : }
212 : /* Use conditional coding if previous frame available */
213 0 : if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
214 0 : condCoding = CODE_CONDITIONALLY;
215 : } else {
216 0 : condCoding = CODE_INDEPENDENTLY;
217 : }
218 0 : silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
219 0 : silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
220 0 : channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
221 : }
222 : }
223 : }
224 : }
225 : }
226 :
227 : /* Get MS predictor index */
228 0 : if( decControl->nChannelsInternal == 2 ) {
229 0 : if( lostFlag == FLAG_DECODE_NORMAL ||
230 0 : ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
231 : {
232 0 : silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
233 : /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
234 0 : if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
235 0 : ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
236 : {
237 0 : silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
238 : } else {
239 0 : decode_only_middle = 0;
240 : }
241 : } else {
242 0 : for( n = 0; n < 2; n++ ) {
243 0 : MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
244 : }
245 : }
246 : }
247 :
248 : /* Reset side channel decoder prediction memory for first frame with side coding */
249 0 : if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
250 0 : silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
251 0 : silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
252 0 : psDec->channel_state[ 1 ].lagPrev = 100;
253 0 : psDec->channel_state[ 1 ].LastGainIndex = 10;
254 0 : psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
255 0 : psDec->channel_state[ 1 ].first_frame_after_reset = 1;
256 : }
257 :
258 : /* Check if the temp buffer fits into the output PCM buffer. If it fits,
259 : we can delay allocating the temp buffer until after the SILK peak stack
260 : usage. We need to use a < and not a <= because of the two extra samples. */
261 0 : delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
262 0 : < decControl->API_sampleRate*decControl->nChannelsAPI;
263 0 : ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
264 : : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
265 : opus_int16 );
266 0 : if ( delay_stack_alloc )
267 : {
268 0 : samplesOut1_tmp[ 0 ] = samplesOut;
269 0 : samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
270 : } else {
271 0 : samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
272 0 : samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
273 : }
274 :
275 0 : if( lostFlag == FLAG_DECODE_NORMAL ) {
276 0 : has_side = !decode_only_middle;
277 : } else {
278 0 : has_side = !psDec->prev_decode_only_middle
279 0 : || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
280 : }
281 : /* Call decoder for one frame */
282 0 : for( n = 0; n < decControl->nChannelsInternal; n++ ) {
283 0 : if( n == 0 || has_side ) {
284 : opus_int FrameIndex;
285 : opus_int condCoding;
286 :
287 0 : FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
288 : /* Use independent coding if no previous frame available */
289 0 : if( FrameIndex <= 0 ) {
290 0 : condCoding = CODE_INDEPENDENTLY;
291 0 : } else if( lostFlag == FLAG_DECODE_LBRR ) {
292 0 : condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
293 0 : } else if( n > 0 && psDec->prev_decode_only_middle ) {
294 : /* If we skipped a side frame in this packet, we don't
295 : need LTP scaling; the LTP state is well-defined. */
296 0 : condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
297 : } else {
298 0 : condCoding = CODE_CONDITIONALLY;
299 : }
300 0 : ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, arch);
301 : } else {
302 0 : silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
303 : }
304 0 : channel_state[ n ].nFramesDecoded++;
305 : }
306 :
307 0 : if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
308 : /* Convert Mid/Side to Left/Right */
309 0 : silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
310 : } else {
311 : /* Buffering */
312 0 : silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
313 0 : silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
314 : }
315 :
316 : /* Number of output samples */
317 0 : *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
318 :
319 : /* Set up pointers to temp buffers */
320 0 : ALLOC( samplesOut2_tmp,
321 : decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
322 0 : if( decControl->nChannelsAPI == 2 ) {
323 0 : resample_out_ptr = samplesOut2_tmp;
324 : } else {
325 0 : resample_out_ptr = samplesOut;
326 : }
327 :
328 0 : ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
329 : ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
330 : : ALLOC_NONE,
331 : opus_int16 );
332 0 : if ( delay_stack_alloc ) {
333 0 : OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
334 0 : samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
335 0 : samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
336 : }
337 0 : for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
338 :
339 : /* Resample decoded signal to API_sampleRate */
340 0 : ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
341 :
342 : /* Interleave if stereo output and stereo stream */
343 0 : if( decControl->nChannelsAPI == 2 ) {
344 0 : for( i = 0; i < *nSamplesOut; i++ ) {
345 0 : samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
346 : }
347 : }
348 : }
349 :
350 : /* Create two channel output from mono stream */
351 0 : if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
352 0 : if ( stereo_to_mono ){
353 : /* Resample right channel for newly collapsed stereo just in case
354 : we weren't doing collapsing when switching to mono */
355 0 : ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
356 :
357 0 : for( i = 0; i < *nSamplesOut; i++ ) {
358 0 : samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
359 : }
360 : } else {
361 0 : for( i = 0; i < *nSamplesOut; i++ ) {
362 0 : samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
363 : }
364 : }
365 : }
366 :
367 : /* Export pitch lag, measured at 48 kHz sampling rate */
368 0 : if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
369 0 : int mult_tab[ 3 ] = { 6, 4, 3 };
370 0 : decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
371 : } else {
372 0 : decControl->prevPitchLag = 0;
373 : }
374 :
375 0 : if( lostFlag == FLAG_PACKET_LOST ) {
376 : /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
377 : if we lose packets when the energy is going down */
378 0 : for ( i = 0; i < psDec->nChannelsInternal; i++ )
379 0 : psDec->channel_state[ i ].LastGainIndex = 10;
380 : } else {
381 0 : psDec->prev_decode_only_middle = decode_only_middle;
382 : }
383 : RESTORE_STACK;
384 0 : return ret;
385 : }
386 :
387 : #if 0
388 : /* Getting table of contents for a packet */
389 : opus_int silk_get_TOC(
390 : const opus_uint8 *payload, /* I Payload data */
391 : const opus_int nBytesIn, /* I Number of input bytes */
392 : const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */
393 : silk_TOC_struct *Silk_TOC /* O Type of content */
394 : )
395 : {
396 : opus_int i, flags, ret = SILK_NO_ERROR;
397 :
398 : if( nBytesIn < 1 ) {
399 : return -1;
400 : }
401 : if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
402 : return -1;
403 : }
404 :
405 : silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
406 :
407 : /* For stereo, extract the flags for the mid channel */
408 : flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
409 :
410 : Silk_TOC->inbandFECFlag = flags & 1;
411 : for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
412 : flags = silk_RSHIFT( flags, 1 );
413 : Silk_TOC->VADFlags[ i ] = flags & 1;
414 : Silk_TOC->VADFlag |= flags & 1;
415 : }
416 :
417 : return ret;
418 : }
419 : #endif
|