Line data Source code
1 : /***********************************************************************
2 : Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 : Redistribution and use in source and binary forms, with or without
4 : modification, are permitted provided that the following conditions
5 : are met:
6 : - Redistributions of source code must retain the above copyright notice,
7 : this list of conditions and the following disclaimer.
8 : - Redistributions in binary form must reproduce the above copyright
9 : notice, this list of conditions and the following disclaimer in the
10 : documentation and/or other materials provided with the distribution.
11 : - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 : names of specific contributors, may be used to endorse or promote
13 : products derived from this software without specific prior written
14 : permission.
15 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 : AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 : IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 : ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 : LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 : CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 : SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 : INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 : CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 : ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 : POSSIBILITY OF SUCH DAMAGE.
26 : ***********************************************************************/
27 :
28 : #ifdef HAVE_CONFIG_H
29 : #include "config.h"
30 : #endif
31 : #include "define.h"
32 : #include "API.h"
33 : #include "control.h"
34 : #include "typedef.h"
35 : #include "stack_alloc.h"
36 : #include "structs.h"
37 : #include "tuning_parameters.h"
38 : #ifdef FIXED_POINT
39 : #include "main_FIX.h"
40 : #else
41 : #include "main_FLP.h"
42 : #endif
43 :
44 : /***************************************/
45 : /* Read control structure from encoder */
46 : /***************************************/
47 : static opus_int silk_QueryEncoder( /* O Returns error code */
48 : const void *encState, /* I State */
49 : silk_EncControlStruct *encStatus /* O Encoder Status */
50 : );
51 :
52 : /****************************************/
53 : /* Encoder functions */
54 : /****************************************/
55 :
56 0 : opus_int silk_Get_Encoder_Size( /* O Returns error code */
57 : opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */
58 : )
59 : {
60 0 : opus_int ret = SILK_NO_ERROR;
61 :
62 0 : *encSizeBytes = sizeof( silk_encoder );
63 :
64 0 : return ret;
65 : }
66 :
67 : /*************************/
68 : /* Init or Reset encoder */
69 : /*************************/
70 0 : opus_int silk_InitEncoder( /* O Returns error code */
71 : void *encState, /* I/O State */
72 : int arch, /* I Run-time architecture */
73 : silk_EncControlStruct *encStatus /* O Encoder Status */
74 : )
75 : {
76 : silk_encoder *psEnc;
77 0 : opus_int n, ret = SILK_NO_ERROR;
78 :
79 0 : psEnc = (silk_encoder *)encState;
80 :
81 : /* Reset encoder */
82 0 : silk_memset( psEnc, 0, sizeof( silk_encoder ) );
83 0 : for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) {
84 0 : if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) {
85 0 : silk_assert( 0 );
86 : }
87 : }
88 :
89 0 : psEnc->nChannelsAPI = 1;
90 0 : psEnc->nChannelsInternal = 1;
91 :
92 : /* Read control structure */
93 0 : if( ret += silk_QueryEncoder( encState, encStatus ) ) {
94 0 : silk_assert( 0 );
95 : }
96 :
97 0 : return ret;
98 : }
99 :
100 : /***************************************/
101 : /* Read control structure from encoder */
102 : /***************************************/
103 0 : static opus_int silk_QueryEncoder( /* O Returns error code */
104 : const void *encState, /* I State */
105 : silk_EncControlStruct *encStatus /* O Encoder Status */
106 : )
107 : {
108 0 : opus_int ret = SILK_NO_ERROR;
109 : silk_encoder_state_Fxx *state_Fxx;
110 0 : silk_encoder *psEnc = (silk_encoder *)encState;
111 :
112 0 : state_Fxx = psEnc->state_Fxx;
113 :
114 0 : encStatus->nChannelsAPI = psEnc->nChannelsAPI;
115 0 : encStatus->nChannelsInternal = psEnc->nChannelsInternal;
116 0 : encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz;
117 0 : encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz;
118 0 : encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz;
119 0 : encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz;
120 0 : encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms;
121 0 : encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps;
122 0 : encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc;
123 0 : encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity;
124 0 : encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC;
125 0 : encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX;
126 0 : encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR;
127 0 : encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
128 0 : encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch;
129 0 : encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0;
130 :
131 0 : return ret;
132 : }
133 :
134 :
135 : /**************************/
136 : /* Encode frame with Silk */
137 : /**************************/
138 : /* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */
139 : /* encControl->payloadSize_ms is set to */
140 0 : opus_int silk_Encode( /* O Returns error code */
141 : void *encState, /* I/O State */
142 : silk_EncControlStruct *encControl, /* I Control status */
143 : const opus_int16 *samplesIn, /* I Speech sample input vector */
144 : opus_int nSamplesIn, /* I Number of samples in input vector */
145 : ec_enc *psRangeEnc, /* I/O Compressor data structure */
146 : opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
147 : const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */
148 : )
149 : {
150 0 : opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
151 : opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms;
152 0 : opus_int nSamplesFromInput = 0, nSamplesFromInputMax;
153 : opus_int speech_act_thr_for_switch_Q8;
154 : opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
155 0 : silk_encoder *psEnc = ( silk_encoder * )encState;
156 : VARDECL( opus_int16, buf );
157 : opus_int transition, curr_block, tot_blocks;
158 : SAVE_STACK;
159 :
160 0 : if (encControl->reducedDependency)
161 : {
162 0 : psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1;
163 0 : psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1;
164 : }
165 0 : psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
166 :
167 : /* Check values in encoder control structure */
168 0 : if( ( ret = check_control_input( encControl ) ) != 0 ) {
169 0 : silk_assert( 0 );
170 : RESTORE_STACK;
171 : return ret;
172 : }
173 :
174 0 : encControl->switchReady = 0;
175 :
176 0 : if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
177 : /* Mono -> Stereo transition: init state of second channel and stereo state */
178 0 : ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch );
179 0 : silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) );
180 0 : silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) );
181 0 : psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0;
182 0 : psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1;
183 0 : psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0;
184 0 : psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1;
185 0 : psEnc->sStereo.width_prev_Q14 = 0;
186 0 : psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 );
187 0 : if( psEnc->nChannelsAPI == 2 ) {
188 0 : silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) );
189 0 : silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) );
190 : }
191 : }
192 :
193 0 : transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal);
194 :
195 0 : psEnc->nChannelsAPI = encControl->nChannelsAPI;
196 0 : psEnc->nChannelsInternal = encControl->nChannelsInternal;
197 :
198 0 : nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate );
199 0 : tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1;
200 0 : curr_block = 0;
201 0 : if( prefillFlag ) {
202 : /* Only accept input length of 10 ms */
203 0 : if( nBlocksOf10ms != 1 ) {
204 0 : silk_assert( 0 );
205 : RESTORE_STACK;
206 : return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
207 : }
208 : /* Reset Encoder */
209 0 : for( n = 0; n < encControl->nChannelsInternal; n++ ) {
210 0 : ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch );
211 0 : silk_assert( !ret );
212 : }
213 0 : tmp_payloadSize_ms = encControl->payloadSize_ms;
214 0 : encControl->payloadSize_ms = 10;
215 0 : tmp_complexity = encControl->complexity;
216 0 : encControl->complexity = 0;
217 0 : for( n = 0; n < encControl->nChannelsInternal; n++ ) {
218 0 : psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
219 0 : psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1;
220 : }
221 : } else {
222 : /* Only accept input lengths that are a multiple of 10 ms */
223 0 : if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) {
224 0 : silk_assert( 0 );
225 : RESTORE_STACK;
226 : return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
227 : }
228 : /* Make sure no more than one packet can be produced */
229 0 : if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) {
230 0 : silk_assert( 0 );
231 : RESTORE_STACK;
232 : return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
233 : }
234 : }
235 :
236 0 : for( n = 0; n < encControl->nChannelsInternal; n++ ) {
237 : /* Force the side channel to the same rate as the mid */
238 0 : opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
239 0 : if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
240 0 : silk_assert( 0 );
241 : RESTORE_STACK;
242 : return ret;
243 : }
244 0 : if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {
245 0 : for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
246 0 : psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0;
247 : }
248 : }
249 0 : psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX;
250 : }
251 0 : silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
252 :
253 : /* Input buffering/resampling and encoding */
254 0 : nSamplesToBufferMax =
255 0 : 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
256 0 : nSamplesFromInputMax =
257 0 : silk_DIV32_16( nSamplesToBufferMax *
258 : psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz,
259 : psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
260 0 : ALLOC( buf, nSamplesFromInputMax, opus_int16 );
261 : while( 1 ) {
262 0 : nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
263 0 : nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax );
264 0 : nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
265 : /* Resample and write to buffer */
266 0 : if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
267 0 : opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
268 0 : for( n = 0; n < nSamplesFromInput; n++ ) {
269 0 : buf[ n ] = samplesIn[ 2 * n ];
270 : }
271 : /* Making sure to start both resamplers from the same state when switching from mono to stereo */
272 0 : if( psEnc->nPrevChannelsInternal == 1 && id==0 ) {
273 0 : silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
274 : }
275 :
276 0 : ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
277 0 : &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
278 0 : psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
279 :
280 0 : nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
281 0 : nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
282 0 : for( n = 0; n < nSamplesFromInput; n++ ) {
283 0 : buf[ n ] = samplesIn[ 2 * n + 1 ];
284 : }
285 0 : ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
286 0 : &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
287 :
288 0 : psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
289 0 : } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
290 : /* Combine left and right channels before resampling */
291 0 : for( n = 0; n < nSamplesFromInput; n++ ) {
292 0 : sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ];
293 0 : buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
294 : }
295 0 : ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
296 0 : &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
297 : /* On the first mono frame, average the results for the two resampler states */
298 0 : if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) {
299 0 : ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
300 0 : &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
301 0 : for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) {
302 0 : psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] =
303 0 : silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]
304 : + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
305 : }
306 : }
307 0 : psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
308 : } else {
309 0 : silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
310 0 : silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16));
311 0 : ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
312 0 : &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
313 0 : psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
314 : }
315 :
316 0 : samplesIn += nSamplesFromInput * encControl->nChannelsAPI;
317 0 : nSamplesIn -= nSamplesFromInput;
318 :
319 : /* Default */
320 0 : psEnc->allowBandwidthSwitch = 0;
321 :
322 : /* Silk encoder */
323 0 : if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) {
324 : /* Enough data in input buffer, so encode */
325 0 : silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length );
326 0 : silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length );
327 :
328 : /* Deal with LBRR data */
329 0 : if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) {
330 : /* Create space at start of payload for VAD and FEC flags */
331 0 : opus_uint8 iCDF[ 2 ] = { 0, 0 };
332 0 : iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
333 0 : ec_enc_icdf( psRangeEnc, 0, iCDF, 8 );
334 :
335 : /* Encode any LBRR data from previous packet */
336 : /* Encode LBRR flags */
337 0 : for( n = 0; n < encControl->nChannelsInternal; n++ ) {
338 0 : LBRR_symbol = 0;
339 0 : for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
340 0 : LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i );
341 : }
342 0 : psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0;
343 0 : if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) {
344 0 : ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 );
345 : }
346 : }
347 :
348 : /* Code LBRR indices and excitation signals */
349 0 : for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
350 0 : for( n = 0; n < encControl->nChannelsInternal; n++ ) {
351 0 : if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) {
352 : opus_int condCoding;
353 :
354 0 : if( encControl->nChannelsInternal == 2 && n == 0 ) {
355 0 : silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] );
356 : /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */
357 0 : if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) {
358 0 : silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] );
359 : }
360 : }
361 : /* Use conditional coding if previous frame available */
362 0 : if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) {
363 0 : condCoding = CODE_CONDITIONALLY;
364 : } else {
365 0 : condCoding = CODE_INDEPENDENTLY;
366 : }
367 0 : silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding );
368 0 : silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType,
369 0 : psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length );
370 : }
371 : }
372 : }
373 :
374 : /* Reset LBRR flags */
375 0 : for( n = 0; n < encControl->nChannelsInternal; n++ ) {
376 0 : silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) );
377 : }
378 :
379 0 : psEnc->nBitsUsedLBRR = ec_tell( psRangeEnc );
380 : }
381 :
382 0 : silk_HP_variable_cutoff( psEnc->state_Fxx );
383 :
384 : /* Total target bits for packet */
385 0 : nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
386 : /* Subtract bits used for LBRR */
387 0 : if( !prefillFlag ) {
388 0 : nBits -= psEnc->nBitsUsedLBRR;
389 : }
390 : /* Divide by number of uncoded frames left in packet */
391 0 : nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket );
392 : /* Convert to bits/second */
393 0 : if( encControl->payloadSize_ms == 10 ) {
394 0 : TargetRate_bps = silk_SMULBB( nBits, 100 );
395 : } else {
396 0 : TargetRate_bps = silk_SMULBB( nBits, 50 );
397 : }
398 : /* Subtract fraction of bits in excess of target in previous frames and packets */
399 0 : TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
400 0 : if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) {
401 : /* Compare actual vs target bits so far in this packet */
402 0 : opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
403 0 : TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
404 : }
405 : /* Never exceed input bitrate */
406 0 : TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 );
407 :
408 : /* Convert Left/Right to Mid/Side */
409 0 : if( encControl->nChannelsInternal == 2 ) {
410 0 : silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
411 0 : psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
412 : MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
413 : psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
414 0 : if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
415 : /* Reset side channel encoder memory for first frame with side coding */
416 0 : if( psEnc->prev_decode_only_middle == 1 ) {
417 0 : silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
418 0 : silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
419 0 : silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
420 0 : silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
421 0 : psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100;
422 0 : psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100;
423 0 : psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10;
424 0 : psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
425 0 : psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536;
426 0 : psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
427 : }
428 0 : silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
429 : } else {
430 0 : psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
431 : }
432 0 : if( !prefillFlag ) {
433 0 : silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
434 0 : if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
435 0 : silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
436 : }
437 : }
438 : } else {
439 : /* Buffering */
440 0 : silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
441 0 : silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
442 : }
443 0 : silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
444 :
445 : /* Encode */
446 0 : for( n = 0; n < encControl->nChannelsInternal; n++ ) {
447 : opus_int maxBits, useCBR;
448 :
449 : /* Handling rate constraints */
450 0 : maxBits = encControl->maxBits;
451 0 : if( tot_blocks == 2 && curr_block == 0 ) {
452 0 : maxBits = maxBits * 3 / 5;
453 0 : } else if( tot_blocks == 3 ) {
454 0 : if( curr_block == 0 ) {
455 0 : maxBits = maxBits * 2 / 5;
456 0 : } else if( curr_block == 1 ) {
457 0 : maxBits = maxBits * 3 / 4;
458 : }
459 : }
460 0 : useCBR = encControl->useCBR && curr_block == tot_blocks - 1;
461 :
462 0 : if( encControl->nChannelsInternal == 1 ) {
463 0 : channelRate_bps = TargetRate_bps;
464 : } else {
465 0 : channelRate_bps = MStargetRates_bps[ n ];
466 0 : if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) {
467 0 : useCBR = 0;
468 : /* Give mid up to 1/2 of the max bits for that frame */
469 0 : maxBits -= encControl->maxBits / ( tot_blocks * 2 );
470 : }
471 : }
472 :
473 0 : if( channelRate_bps > 0 ) {
474 : opus_int condCoding;
475 :
476 0 : silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps );
477 :
478 : /* Use independent coding if no previous frame available */
479 0 : if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) {
480 0 : condCoding = CODE_INDEPENDENTLY;
481 0 : } else if( n > 0 && psEnc->prev_decode_only_middle ) {
482 : /* If we skipped a side frame in this packet, we don't
483 : need LTP scaling; the LTP state is well-defined. */
484 0 : condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
485 : } else {
486 0 : condCoding = CODE_CONDITIONALLY;
487 : }
488 0 : if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) {
489 0 : silk_assert( 0 );
490 : }
491 : }
492 0 : psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
493 0 : psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
494 0 : psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
495 : }
496 0 : psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ];
497 :
498 : /* Insert VAD and FEC flags at beginning of bitstream */
499 0 : if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) {
500 0 : flags = 0;
501 0 : for( n = 0; n < encControl->nChannelsInternal; n++ ) {
502 0 : for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
503 0 : flags = silk_LSHIFT( flags, 1 );
504 0 : flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ];
505 : }
506 0 : flags = silk_LSHIFT( flags, 1 );
507 0 : flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag;
508 : }
509 0 : if( !prefillFlag ) {
510 0 : ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
511 : }
512 :
513 : /* Return zero bytes if all channels DTXed */
514 0 : if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) {
515 0 : *nBytesOut = 0;
516 : }
517 :
518 0 : psEnc->nBitsExceeded += *nBytesOut * 8;
519 0 : psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
520 0 : psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 );
521 :
522 : /* Update flag indicating if bandwidth switching is allowed */
523 0 : speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ),
524 : SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms );
525 0 : if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) {
526 0 : psEnc->allowBandwidthSwitch = 1;
527 0 : psEnc->timeSinceSwitchAllowed_ms = 0;
528 : } else {
529 0 : psEnc->allowBandwidthSwitch = 0;
530 0 : psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms;
531 : }
532 : }
533 :
534 0 : if( nSamplesIn == 0 ) {
535 0 : break;
536 : }
537 : } else {
538 0 : break;
539 : }
540 0 : curr_block++;
541 : }
542 :
543 0 : psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
544 :
545 0 : encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;
546 0 : encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0;
547 0 : encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
548 0 : encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14;
549 0 : if( prefillFlag ) {
550 0 : encControl->payloadSize_ms = tmp_payloadSize_ms;
551 0 : encControl->complexity = tmp_complexity;
552 0 : for( n = 0; n < encControl->nChannelsInternal; n++ ) {
553 0 : psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
554 0 : psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0;
555 : }
556 : }
557 :
558 0 : encControl->signalType = psEnc->state_Fxx[0].sCmn.indices.signalType;
559 0 : encControl->offset = silk_Quantization_Offsets_Q10
560 0 : [ psEnc->state_Fxx[0].sCmn.indices.signalType >> 1 ]
561 0 : [ psEnc->state_Fxx[0].sCmn.indices.quantOffsetType ];
562 : RESTORE_STACK;
563 0 : return ret;
564 : }
565 :
|