Line data Source code
1 : /********************************************************************
2 : * *
3 : * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 : * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 : * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 : * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 : * *
8 : * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 : * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 : * *
11 : ********************************************************************
12 :
13 : function:
14 : last mod: $Id: idct.c 17410 2010-09-21 21:53:48Z tterribe $
15 :
16 : ********************************************************************/
17 :
18 : #include <string.h>
19 : #include "internal.h"
20 : #include "dct.h"
21 :
22 : /*Performs an inverse 8 point Type-II DCT transform.
23 : The output is scaled by a factor of 2 relative to the orthonormal version of
24 : the transform.
25 : _y: The buffer to store the result in.
26 : Data will be placed in every 8th entry (e.g., in a column of an 8x8
27 : block).
28 : _x: The input coefficients.
29 : The first 8 entries are used (e.g., from a row of an 8x8 block).*/
30 0 : static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
31 : ogg_int32_t t[8];
32 : ogg_int32_t r;
33 : /*Stage 1:*/
34 : /*0-1 butterfly.*/
35 0 : t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
36 0 : t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
37 : /*2-3 rotation by 6pi/16.*/
38 0 : t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
39 0 : t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
40 : /*4-7 rotation by 7pi/16.*/
41 0 : t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
42 : /*5-6 rotation by 3pi/16.*/
43 0 : t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
44 0 : t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
45 0 : t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
46 : /*Stage 2:*/
47 : /*4-5 butterfly.*/
48 0 : r=t[4]+t[5];
49 0 : t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
50 0 : t[4]=r;
51 : /*7-6 butterfly.*/
52 0 : r=t[7]+t[6];
53 0 : t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
54 0 : t[7]=r;
55 : /*Stage 3:*/
56 : /*0-3 butterfly.*/
57 0 : r=t[0]+t[3];
58 0 : t[3]=t[0]-t[3];
59 0 : t[0]=r;
60 : /*1-2 butterfly.*/
61 0 : r=t[1]+t[2];
62 0 : t[2]=t[1]-t[2];
63 0 : t[1]=r;
64 : /*6-5 butterfly.*/
65 0 : r=t[6]+t[5];
66 0 : t[5]=t[6]-t[5];
67 0 : t[6]=r;
68 : /*Stage 4:*/
69 : /*0-7 butterfly.*/
70 0 : _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
71 : /*1-6 butterfly.*/
72 0 : _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
73 : /*2-5 butterfly.*/
74 0 : _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
75 : /*3-4 butterfly.*/
76 0 : _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
77 0 : _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
78 0 : _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
79 0 : _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
80 0 : _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
81 0 : }
82 :
83 : /*Performs an inverse 8 point Type-II DCT transform.
84 : The output is scaled by a factor of 2 relative to the orthonormal version of
85 : the transform.
86 : _y: The buffer to store the result in.
87 : Data will be placed in every 8th entry (e.g., in a column of an 8x8
88 : block).
89 : _x: The input coefficients.
90 : Only the first 4 entries are used.
91 : The other 4 are assumed to be 0.*/
92 0 : static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
93 : ogg_int32_t t[8];
94 : ogg_int32_t r;
95 : /*Stage 1:*/
96 0 : t[0]=OC_C4S4*_x[0]>>16;
97 0 : t[2]=OC_C6S2*_x[2]>>16;
98 0 : t[3]=OC_C2S6*_x[2]>>16;
99 0 : t[4]=OC_C7S1*_x[1]>>16;
100 0 : t[5]=-(OC_C5S3*_x[3]>>16);
101 0 : t[6]=OC_C3S5*_x[3]>>16;
102 0 : t[7]=OC_C1S7*_x[1]>>16;
103 : /*Stage 2:*/
104 0 : r=t[4]+t[5];
105 0 : t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
106 0 : t[4]=r;
107 0 : r=t[7]+t[6];
108 0 : t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
109 0 : t[7]=r;
110 : /*Stage 3:*/
111 0 : t[1]=t[0]+t[2];
112 0 : t[2]=t[0]-t[2];
113 0 : r=t[0]+t[3];
114 0 : t[3]=t[0]-t[3];
115 0 : t[0]=r;
116 0 : r=t[6]+t[5];
117 0 : t[5]=t[6]-t[5];
118 0 : t[6]=r;
119 : /*Stage 4:*/
120 0 : _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
121 0 : _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
122 0 : _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
123 0 : _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
124 0 : _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
125 0 : _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
126 0 : _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
127 0 : _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
128 0 : }
129 :
130 : /*Performs an inverse 8 point Type-II DCT transform.
131 : The output is scaled by a factor of 2 relative to the orthonormal version of
132 : the transform.
133 : _y: The buffer to store the result in.
134 : Data will be placed in every 8th entry (e.g., in a column of an 8x8
135 : block).
136 : _x: The input coefficients.
137 : Only the first 3 entries are used.
138 : The other 5 are assumed to be 0.*/
139 0 : static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
140 : ogg_int32_t t[8];
141 : ogg_int32_t r;
142 : /*Stage 1:*/
143 0 : t[0]=OC_C4S4*_x[0]>>16;
144 0 : t[2]=OC_C6S2*_x[2]>>16;
145 0 : t[3]=OC_C2S6*_x[2]>>16;
146 0 : t[4]=OC_C7S1*_x[1]>>16;
147 0 : t[7]=OC_C1S7*_x[1]>>16;
148 : /*Stage 2:*/
149 0 : t[5]=OC_C4S4*t[4]>>16;
150 0 : t[6]=OC_C4S4*t[7]>>16;
151 : /*Stage 3:*/
152 0 : t[1]=t[0]+t[2];
153 0 : t[2]=t[0]-t[2];
154 0 : r=t[0]+t[3];
155 0 : t[3]=t[0]-t[3];
156 0 : t[0]=r;
157 0 : r=t[6]+t[5];
158 0 : t[5]=t[6]-t[5];
159 0 : t[6]=r;
160 : /*Stage 4:*/
161 0 : _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
162 0 : _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
163 0 : _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
164 0 : _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
165 0 : _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
166 0 : _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
167 0 : _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
168 0 : _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
169 0 : }
170 :
171 : /*Performs an inverse 8 point Type-II DCT transform.
172 : The output is scaled by a factor of 2 relative to the orthonormal version of
173 : the transform.
174 : _y: The buffer to store the result in.
175 : Data will be placed in every 8th entry (e.g., in a column of an 8x8
176 : block).
177 : _x: The input coefficients.
178 : Only the first 2 entries are used.
179 : The other 6 are assumed to be 0.*/
180 0 : static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
181 : ogg_int32_t t[8];
182 : ogg_int32_t r;
183 : /*Stage 1:*/
184 0 : t[0]=OC_C4S4*_x[0]>>16;
185 0 : t[4]=OC_C7S1*_x[1]>>16;
186 0 : t[7]=OC_C1S7*_x[1]>>16;
187 : /*Stage 2:*/
188 0 : t[5]=OC_C4S4*t[4]>>16;
189 0 : t[6]=OC_C4S4*t[7]>>16;
190 : /*Stage 3:*/
191 0 : r=t[6]+t[5];
192 0 : t[5]=t[6]-t[5];
193 0 : t[6]=r;
194 : /*Stage 4:*/
195 0 : _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
196 0 : _y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
197 0 : _y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
198 0 : _y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
199 0 : _y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
200 0 : _y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
201 0 : _y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
202 0 : _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
203 0 : }
204 :
205 : /*Performs an inverse 8 point Type-II DCT transform.
206 : The output is scaled by a factor of 2 relative to the orthonormal version of
207 : the transform.
208 : _y: The buffer to store the result in.
209 : Data will be placed in every 8th entry (e.g., in a column of an 8x8
210 : block).
211 : _x: The input coefficients.
212 : Only the first entry is used.
213 : The other 7 are assumed to be 0.*/
214 0 : static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
215 0 : _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
216 0 : _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
217 0 : }
218 :
219 : /*Performs an inverse 8x8 Type-II DCT transform.
220 : The input is assumed to be scaled by a factor of 4 relative to orthonormal
221 : version of the transform.
222 : All coefficients but the first 3 in zig-zag scan order are assumed to be 0:
223 : x x 0 0 0 0 0 0
224 : x 0 0 0 0 0 0 0
225 : 0 0 0 0 0 0 0 0
226 : 0 0 0 0 0 0 0 0
227 : 0 0 0 0 0 0 0 0
228 : 0 0 0 0 0 0 0 0
229 : 0 0 0 0 0 0 0 0
230 : 0 0 0 0 0 0 0 0
231 : _y: The buffer to store the result in.
232 : This may be the same as _x.
233 : _x: The input coefficients.*/
234 0 : static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){
235 : ogg_int16_t w[64];
236 : int i;
237 : /*Transform rows of x into columns of w.*/
238 0 : idct8_2(w,_x);
239 0 : idct8_1(w+1,_x+8);
240 : /*Transform rows of w into columns of y.*/
241 0 : for(i=0;i<8;i++)idct8_2(_y+i,w+i*8);
242 : /*Adjust for the scale factor.*/
243 0 : for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
244 : /*Clear input data for next block (decoder only).*/
245 0 : if(_x!=_y)_x[0]=_x[1]=_x[8]=0;
246 0 : }
247 :
248 : /*Performs an inverse 8x8 Type-II DCT transform.
249 : The input is assumed to be scaled by a factor of 4 relative to orthonormal
250 : version of the transform.
251 : All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
252 : x x x x 0 0 0 0
253 : x x x 0 0 0 0 0
254 : x x 0 0 0 0 0 0
255 : x 0 0 0 0 0 0 0
256 : 0 0 0 0 0 0 0 0
257 : 0 0 0 0 0 0 0 0
258 : 0 0 0 0 0 0 0 0
259 : 0 0 0 0 0 0 0 0
260 : _y: The buffer to store the result in.
261 : This may be the same as _x.
262 : _x: The input coefficients.*/
263 0 : static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){
264 : ogg_int16_t w[64];
265 : int i;
266 : /*Transform rows of x into columns of w.*/
267 0 : idct8_4(w,_x);
268 0 : idct8_3(w+1,_x+8);
269 0 : idct8_2(w+2,_x+16);
270 0 : idct8_1(w+3,_x+24);
271 : /*Transform rows of w into columns of y.*/
272 0 : for(i=0;i<8;i++)idct8_4(_y+i,w+i*8);
273 : /*Adjust for the scale factor.*/
274 0 : for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
275 : /*Clear input data for next block (decoder only).*/
276 0 : if(_x!=_y)_x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0;
277 0 : }
278 :
279 : /*Performs an inverse 8x8 Type-II DCT transform.
280 : The input is assumed to be scaled by a factor of 4 relative to orthonormal
281 : version of the transform.
282 : _y: The buffer to store the result in.
283 : This may be the same as _x.
284 : _x: The input coefficients.*/
285 0 : static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){
286 : ogg_int16_t w[64];
287 : int i;
288 : /*Transform rows of x into columns of w.*/
289 0 : for(i=0;i<8;i++)idct8(w+i,_x+i*8);
290 : /*Transform rows of w into columns of y.*/
291 0 : for(i=0;i<8;i++)idct8(_y+i,w+i*8);
292 : /*Adjust for the scale factor.*/
293 0 : for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
294 0 : if(_x!=_y)for(i=0;i<64;i++)_x[i]=0;
295 0 : }
296 :
297 : /*Performs an inverse 8x8 Type-II DCT transform.
298 : The input is assumed to be scaled by a factor of 4 relative to orthonormal
299 : version of the transform.*/
300 0 : void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){
301 : /*_last_zzi is subtly different from an actual count of the number of
302 : coefficients we decoded for this block.
303 : It contains the value of zzi BEFORE the final token in the block was
304 : decoded.
305 : In most cases this is an EOB token (the continuation of an EOB run from a
306 : previous block counts), and so this is the same as the coefficient count.
307 : However, in the case that the last token was NOT an EOB token, but filled
308 : the block up with exactly 64 coefficients, _last_zzi will be less than 64.
309 : Provided the last token was not a pure zero run, the minimum value it can
310 : be is 46, and so that doesn't affect any of the cases in this routine.
311 : However, if the last token WAS a pure zero run of length 63, then _last_zzi
312 : will be 1 while the number of coefficients decoded is 64.
313 : Thus, we will trigger the following special case, where the real
314 : coefficient count would not.
315 : Note also that a zero run of length 64 will give _last_zzi a value of 0,
316 : but we still process the DC coefficient, which might have a non-zero value
317 : due to DC prediction.
318 : Although convoluted, this is arguably the correct behavior: it allows us to
319 : use a smaller transform when the block ends with a long zero run instead
320 : of a normal EOB token.
321 : It could be smarter... multiple separate zero runs at the end of a block
322 : will fool it, but an encoder that generates these really deserves what it
323 : gets.
324 : Needless to say we inherited this approach from VP3.*/
325 : /*Then perform the iDCT.*/
326 0 : if(_last_zzi<=3)oc_idct8x8_3(_y,_x);
327 0 : else if(_last_zzi<=10)oc_idct8x8_10(_y,_x);
328 0 : else oc_idct8x8_slow(_y,_x);
329 0 : }
|