Line data Source code
1 : /***********************************************************************
2 : Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 : Redistribution and use in source and binary forms, with or without
4 : modification, are permitted provided that the following conditions
5 : are met:
6 : - Redistributions of source code must retain the above copyright notice,
7 : this list of conditions and the following disclaimer.
8 : - Redistributions in binary form must reproduce the above copyright
9 : notice, this list of conditions and the following disclaimer in the
10 : documentation and/or other materials provided with the distribution.
11 : - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 : names of specific contributors, may be used to endorse or promote
13 : products derived from this software without specific prior written
14 : permission.
15 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 : AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 : IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 : ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 : LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 : CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 : SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 : INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 : CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 : ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 : POSSIBILITY OF SUCH DAMAGE.
26 : ***********************************************************************/
27 :
28 : #ifdef HAVE_CONFIG_H
29 : #include "config.h"
30 : #endif
31 :
32 : #include "main.h"
33 : #include "stack_alloc.h"
34 :
35 : /* Convert Left/Right stereo signal to adaptive Mid/Side representation */
36 0 : void silk_stereo_LR_to_MS(
37 : stereo_enc_state *state, /* I/O State */
38 : opus_int16 x1[], /* I/O Left input signal, becomes mid signal */
39 : opus_int16 x2[], /* I/O Right input signal, becomes side signal */
40 : opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */
41 : opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */
42 : opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */
43 : opus_int32 total_rate_bps, /* I Total bitrate */
44 : opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */
45 : opus_int toMono, /* I Last frame before a stereo->mono transition */
46 : opus_int fs_kHz, /* I Sample rate (kHz) */
47 : opus_int frame_length /* I Number of samples */
48 : )
49 : {
50 : opus_int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13;
51 : opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13;
52 : opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24;
53 : VARDECL( opus_int16, side );
54 : VARDECL( opus_int16, LP_mid );
55 : VARDECL( opus_int16, HP_mid );
56 : VARDECL( opus_int16, LP_side );
57 : VARDECL( opus_int16, HP_side );
58 0 : opus_int16 *mid = &x1[ -2 ];
59 : SAVE_STACK;
60 :
61 0 : ALLOC( side, frame_length + 2, opus_int16 );
62 : /* Convert to basic mid/side signals */
63 0 : for( n = 0; n < frame_length + 2; n++ ) {
64 0 : sum = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ];
65 0 : diff = x1[ n - 2 ] - (opus_int32)x2[ n - 2 ];
66 0 : mid[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
67 0 : side[ n ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( diff, 1 ) );
68 : }
69 :
70 : /* Buffering */
71 0 : silk_memcpy( mid, state->sMid, 2 * sizeof( opus_int16 ) );
72 0 : silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) );
73 0 : silk_memcpy( state->sMid, &mid[ frame_length ], 2 * sizeof( opus_int16 ) );
74 0 : silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) );
75 :
76 : /* LP and HP filter mid signal */
77 0 : ALLOC( LP_mid, frame_length, opus_int16 );
78 0 : ALLOC( HP_mid, frame_length, opus_int16 );
79 0 : for( n = 0; n < frame_length; n++ ) {
80 0 : sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
81 0 : LP_mid[ n ] = sum;
82 0 : HP_mid[ n ] = mid[ n + 1 ] - sum;
83 : }
84 :
85 : /* LP and HP filter side signal */
86 0 : ALLOC( LP_side, frame_length, opus_int16 );
87 0 : ALLOC( HP_side, frame_length, opus_int16 );
88 0 : for( n = 0; n < frame_length; n++ ) {
89 0 : sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
90 0 : LP_side[ n ] = sum;
91 0 : HP_side[ n ] = side[ n + 1 ] - sum;
92 : }
93 :
94 : /* Find energies and predictors */
95 0 : is10msFrame = frame_length == 10 * fs_kHz;
96 0 : smooth_coef_Q16 = is10msFrame ?
97 0 : SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) :
98 : SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF, 16 );
99 0 : smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 );
100 :
101 0 : pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 );
102 0 : pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 );
103 : /* Ratio of the norms of residual and mid signals */
104 0 : frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 );
105 0 : frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) );
106 :
107 : /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */
108 0 : total_rate_bps -= is10msFrame ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */
109 0 : if( total_rate_bps < 1 ) {
110 0 : total_rate_bps = 1;
111 : }
112 0 : min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 );
113 0 : silk_assert( min_mid_rate_bps < 32767 );
114 : /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */
115 0 : frac_3_Q16 = silk_MUL( 3, frac_Q16 );
116 0 : mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 );
117 : /* If Mid bitrate below minimum, reduce stereo width */
118 0 : if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) {
119 0 : mid_side_rates_bps[ 0 ] = min_mid_rate_bps;
120 0 : mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
121 : /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */
122 0 : width_Q14 = silk_DIV32_varQ( silk_LSHIFT( mid_side_rates_bps[ 1 ], 1 ) - min_mid_rate_bps,
123 0 : silk_SMULWB( SILK_FIX_CONST( 1, 16 ) + frac_3_Q16, min_mid_rate_bps ), 14+2 );
124 0 : width_Q14 = silk_LIMIT( width_Q14, 0, SILK_FIX_CONST( 1, 14 ) );
125 : } else {
126 0 : mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
127 0 : width_Q14 = SILK_FIX_CONST( 1, 14 );
128 : }
129 :
130 : /* Smoother */
131 0 : state->smth_width_Q14 = (opus_int16)silk_SMLAWB( state->smth_width_Q14, width_Q14 - state->smth_width_Q14, smooth_coef_Q16 );
132 :
133 : /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */
134 0 : *mid_only_flag = 0;
135 0 : if( toMono ) {
136 : /* Last frame before stereo->mono transition; collapse stereo width */
137 0 : width_Q14 = 0;
138 0 : pred_Q13[ 0 ] = 0;
139 0 : pred_Q13[ 1 ] = 0;
140 0 : silk_stereo_quant_pred( pred_Q13, ix );
141 0 : } else if( state->width_prev_Q14 == 0 &&
142 0 : ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) )
143 : {
144 : /* Code as panned-mono; previous frame already had zero width */
145 : /* Scale down and quantize predictors */
146 0 : pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
147 0 : pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
148 0 : silk_stereo_quant_pred( pred_Q13, ix );
149 : /* Collapse stereo width */
150 0 : width_Q14 = 0;
151 0 : pred_Q13[ 0 ] = 0;
152 0 : pred_Q13[ 1 ] = 0;
153 0 : mid_side_rates_bps[ 0 ] = total_rate_bps;
154 0 : mid_side_rates_bps[ 1 ] = 0;
155 0 : *mid_only_flag = 1;
156 0 : } else if( state->width_prev_Q14 != 0 &&
157 0 : ( 8 * total_rate_bps < 11 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.02, 14 ) ) )
158 : {
159 : /* Transition to zero-width stereo */
160 : /* Scale down and quantize predictors */
161 0 : pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
162 0 : pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
163 0 : silk_stereo_quant_pred( pred_Q13, ix );
164 : /* Collapse stereo width */
165 0 : width_Q14 = 0;
166 0 : pred_Q13[ 0 ] = 0;
167 0 : pred_Q13[ 1 ] = 0;
168 0 : } else if( state->smth_width_Q14 > SILK_FIX_CONST( 0.95, 14 ) ) {
169 : /* Full-width stereo coding */
170 0 : silk_stereo_quant_pred( pred_Q13, ix );
171 0 : width_Q14 = SILK_FIX_CONST( 1, 14 );
172 : } else {
173 : /* Reduced-width stereo coding; scale down and quantize predictors */
174 0 : pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
175 0 : pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
176 0 : silk_stereo_quant_pred( pred_Q13, ix );
177 0 : width_Q14 = state->smth_width_Q14;
178 : }
179 :
180 : /* Make sure to keep on encoding until the tapered output has been transmitted */
181 0 : if( *mid_only_flag == 1 ) {
182 0 : state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz;
183 0 : if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) {
184 0 : *mid_only_flag = 0;
185 : } else {
186 : /* Limit to avoid wrapping around */
187 0 : state->silent_side_len = 10000;
188 : }
189 : } else {
190 0 : state->silent_side_len = 0;
191 : }
192 :
193 0 : if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) {
194 0 : mid_side_rates_bps[ 1 ] = 1;
195 0 : mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]);
196 : }
197 :
198 : /* Interpolate predictors and subtract prediction from side channel */
199 0 : pred0_Q13 = -state->pred_prev_Q13[ 0 ];
200 0 : pred1_Q13 = -state->pred_prev_Q13[ 1 ];
201 0 : w_Q24 = silk_LSHIFT( state->width_prev_Q14, 10 );
202 0 : denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
203 0 : delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
204 0 : delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
205 0 : deltaw_Q24 = silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 );
206 0 : for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
207 0 : pred0_Q13 += delta0_Q13;
208 0 : pred1_Q13 += delta1_Q13;
209 0 : w_Q24 += deltaw_Q24;
210 0 : sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */
211 0 : sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */
212 0 : sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */
213 0 : x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
214 : }
215 :
216 0 : pred0_Q13 = -pred_Q13[ 0 ];
217 0 : pred1_Q13 = -pred_Q13[ 1 ];
218 0 : w_Q24 = silk_LSHIFT( width_Q14, 10 );
219 0 : for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
220 0 : sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */
221 0 : sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */
222 0 : sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */
223 0 : x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
224 : }
225 0 : state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ];
226 0 : state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ];
227 0 : state->width_prev_Q14 = (opus_int16)width_Q14;
228 : RESTORE_STACK;
229 0 : }
|