Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <stdlib.h>
13 : #include "aom_dsp/inv_txfm.h"
14 : #include "av1/common/av1_fwd_txfm1d.h"
15 : #if CONFIG_COEFFICIENT_RANGE_CHECKING
16 :
17 : void range_check_func(int32_t stage, const int32_t *input, const int32_t *buf,
18 : int32_t size, int8_t bit);
19 :
20 : #define range_check(stage, input, buf, size, bit) \
21 : range_check_func(stage, input, buf, size, bit)
22 : #else
23 : #define range_check(stage, input, buf, size, bit) \
24 : { \
25 : (void)stage; \
26 : (void)input; \
27 : (void)buf; \
28 : (void)size; \
29 : (void)bit; \
30 : }
31 : #endif
32 :
33 : // TODO(angiebird): Make 1-d txfm functions static
34 0 : void av1_fdct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
35 : const int8_t *stage_range) {
36 0 : const int32_t size = 4;
37 : const int32_t *cospi;
38 :
39 0 : int32_t stage = 0;
40 : int32_t *bf0, *bf1;
41 : int32_t step[4];
42 :
43 : // stage 0;
44 0 : range_check(stage, input, input, size, stage_range[stage]);
45 :
46 : // stage 1;
47 0 : stage++;
48 0 : bf1 = output;
49 0 : bf1[0] = input[0] + input[3];
50 0 : bf1[1] = input[1] + input[2];
51 0 : bf1[2] = -input[2] + input[1];
52 0 : bf1[3] = -input[3] + input[0];
53 0 : range_check(stage, input, bf1, size, stage_range[stage]);
54 :
55 : // stage 2
56 0 : stage++;
57 0 : cospi = cospi_arr(cos_bit[stage]);
58 0 : bf0 = output;
59 0 : bf1 = step;
60 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
61 0 : bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
62 0 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
63 0 : bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
64 0 : range_check(stage, input, bf1, size, stage_range[stage]);
65 :
66 : // stage 3
67 0 : stage++;
68 0 : bf0 = step;
69 0 : bf1 = output;
70 0 : bf1[0] = bf0[0];
71 0 : bf1[1] = bf0[2];
72 0 : bf1[2] = bf0[1];
73 0 : bf1[3] = bf0[3];
74 0 : range_check(stage, input, bf1, size, stage_range[stage]);
75 0 : }
76 :
77 0 : void av1_fdct8_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
78 : const int8_t *stage_range) {
79 0 : const int32_t size = 8;
80 : const int32_t *cospi;
81 :
82 0 : int32_t stage = 0;
83 : int32_t *bf0, *bf1;
84 : int32_t step[8];
85 :
86 : // stage 0;
87 0 : range_check(stage, input, input, size, stage_range[stage]);
88 :
89 : // stage 1;
90 0 : stage++;
91 0 : bf1 = output;
92 0 : bf1[0] = input[0] + input[7];
93 0 : bf1[1] = input[1] + input[6];
94 0 : bf1[2] = input[2] + input[5];
95 0 : bf1[3] = input[3] + input[4];
96 0 : bf1[4] = -input[4] + input[3];
97 0 : bf1[5] = -input[5] + input[2];
98 0 : bf1[6] = -input[6] + input[1];
99 0 : bf1[7] = -input[7] + input[0];
100 0 : range_check(stage, input, bf1, size, stage_range[stage]);
101 :
102 : // stage 2
103 0 : stage++;
104 0 : cospi = cospi_arr(cos_bit[stage]);
105 0 : bf0 = output;
106 0 : bf1 = step;
107 0 : bf1[0] = bf0[0] + bf0[3];
108 0 : bf1[1] = bf0[1] + bf0[2];
109 0 : bf1[2] = -bf0[2] + bf0[1];
110 0 : bf1[3] = -bf0[3] + bf0[0];
111 0 : bf1[4] = bf0[4];
112 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
113 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
114 0 : bf1[7] = bf0[7];
115 0 : range_check(stage, input, bf1, size, stage_range[stage]);
116 :
117 : // stage 3
118 0 : stage++;
119 0 : cospi = cospi_arr(cos_bit[stage]);
120 0 : bf0 = step;
121 0 : bf1 = output;
122 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
123 0 : bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
124 0 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
125 0 : bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
126 0 : bf1[4] = bf0[4] + bf0[5];
127 0 : bf1[5] = -bf0[5] + bf0[4];
128 0 : bf1[6] = -bf0[6] + bf0[7];
129 0 : bf1[7] = bf0[7] + bf0[6];
130 0 : range_check(stage, input, bf1, size, stage_range[stage]);
131 :
132 : // stage 4
133 0 : stage++;
134 0 : cospi = cospi_arr(cos_bit[stage]);
135 0 : bf0 = output;
136 0 : bf1 = step;
137 0 : bf1[0] = bf0[0];
138 0 : bf1[1] = bf0[1];
139 0 : bf1[2] = bf0[2];
140 0 : bf1[3] = bf0[3];
141 0 : bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
142 0 : bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
143 0 : bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
144 0 : bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
145 0 : range_check(stage, input, bf1, size, stage_range[stage]);
146 :
147 : // stage 5
148 0 : stage++;
149 0 : bf0 = step;
150 0 : bf1 = output;
151 0 : bf1[0] = bf0[0];
152 0 : bf1[1] = bf0[4];
153 0 : bf1[2] = bf0[2];
154 0 : bf1[3] = bf0[6];
155 0 : bf1[4] = bf0[1];
156 0 : bf1[5] = bf0[5];
157 0 : bf1[6] = bf0[3];
158 0 : bf1[7] = bf0[7];
159 0 : range_check(stage, input, bf1, size, stage_range[stage]);
160 0 : }
161 :
162 0 : void av1_fdct16_new(const int32_t *input, int32_t *output,
163 : const int8_t *cos_bit, const int8_t *stage_range) {
164 0 : const int32_t size = 16;
165 : const int32_t *cospi;
166 :
167 0 : int32_t stage = 0;
168 : int32_t *bf0, *bf1;
169 : int32_t step[16];
170 :
171 : // stage 0;
172 0 : range_check(stage, input, input, size, stage_range[stage]);
173 :
174 : // stage 1;
175 0 : stage++;
176 0 : bf1 = output;
177 0 : bf1[0] = input[0] + input[15];
178 0 : bf1[1] = input[1] + input[14];
179 0 : bf1[2] = input[2] + input[13];
180 0 : bf1[3] = input[3] + input[12];
181 0 : bf1[4] = input[4] + input[11];
182 0 : bf1[5] = input[5] + input[10];
183 0 : bf1[6] = input[6] + input[9];
184 0 : bf1[7] = input[7] + input[8];
185 0 : bf1[8] = -input[8] + input[7];
186 0 : bf1[9] = -input[9] + input[6];
187 0 : bf1[10] = -input[10] + input[5];
188 0 : bf1[11] = -input[11] + input[4];
189 0 : bf1[12] = -input[12] + input[3];
190 0 : bf1[13] = -input[13] + input[2];
191 0 : bf1[14] = -input[14] + input[1];
192 0 : bf1[15] = -input[15] + input[0];
193 0 : range_check(stage, input, bf1, size, stage_range[stage]);
194 :
195 : // stage 2
196 0 : stage++;
197 0 : cospi = cospi_arr(cos_bit[stage]);
198 0 : bf0 = output;
199 0 : bf1 = step;
200 0 : bf1[0] = bf0[0] + bf0[7];
201 0 : bf1[1] = bf0[1] + bf0[6];
202 0 : bf1[2] = bf0[2] + bf0[5];
203 0 : bf1[3] = bf0[3] + bf0[4];
204 0 : bf1[4] = -bf0[4] + bf0[3];
205 0 : bf1[5] = -bf0[5] + bf0[2];
206 0 : bf1[6] = -bf0[6] + bf0[1];
207 0 : bf1[7] = -bf0[7] + bf0[0];
208 0 : bf1[8] = bf0[8];
209 0 : bf1[9] = bf0[9];
210 0 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
211 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
212 0 : bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
213 0 : bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
214 0 : bf1[14] = bf0[14];
215 0 : bf1[15] = bf0[15];
216 0 : range_check(stage, input, bf1, size, stage_range[stage]);
217 :
218 : // stage 3
219 0 : stage++;
220 0 : cospi = cospi_arr(cos_bit[stage]);
221 0 : bf0 = step;
222 0 : bf1 = output;
223 0 : bf1[0] = bf0[0] + bf0[3];
224 0 : bf1[1] = bf0[1] + bf0[2];
225 0 : bf1[2] = -bf0[2] + bf0[1];
226 0 : bf1[3] = -bf0[3] + bf0[0];
227 0 : bf1[4] = bf0[4];
228 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
229 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
230 0 : bf1[7] = bf0[7];
231 0 : bf1[8] = bf0[8] + bf0[11];
232 0 : bf1[9] = bf0[9] + bf0[10];
233 0 : bf1[10] = -bf0[10] + bf0[9];
234 0 : bf1[11] = -bf0[11] + bf0[8];
235 0 : bf1[12] = -bf0[12] + bf0[15];
236 0 : bf1[13] = -bf0[13] + bf0[14];
237 0 : bf1[14] = bf0[14] + bf0[13];
238 0 : bf1[15] = bf0[15] + bf0[12];
239 0 : range_check(stage, input, bf1, size, stage_range[stage]);
240 :
241 : // stage 4
242 0 : stage++;
243 0 : cospi = cospi_arr(cos_bit[stage]);
244 0 : bf0 = output;
245 0 : bf1 = step;
246 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
247 0 : bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
248 0 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
249 0 : bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
250 0 : bf1[4] = bf0[4] + bf0[5];
251 0 : bf1[5] = -bf0[5] + bf0[4];
252 0 : bf1[6] = -bf0[6] + bf0[7];
253 0 : bf1[7] = bf0[7] + bf0[6];
254 0 : bf1[8] = bf0[8];
255 0 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
256 0 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
257 0 : bf1[11] = bf0[11];
258 0 : bf1[12] = bf0[12];
259 0 : bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
260 0 : bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
261 0 : bf1[15] = bf0[15];
262 0 : range_check(stage, input, bf1, size, stage_range[stage]);
263 :
264 : // stage 5
265 0 : stage++;
266 0 : cospi = cospi_arr(cos_bit[stage]);
267 0 : bf0 = step;
268 0 : bf1 = output;
269 0 : bf1[0] = bf0[0];
270 0 : bf1[1] = bf0[1];
271 0 : bf1[2] = bf0[2];
272 0 : bf1[3] = bf0[3];
273 0 : bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
274 0 : bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
275 0 : bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
276 0 : bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
277 0 : bf1[8] = bf0[8] + bf0[9];
278 0 : bf1[9] = -bf0[9] + bf0[8];
279 0 : bf1[10] = -bf0[10] + bf0[11];
280 0 : bf1[11] = bf0[11] + bf0[10];
281 0 : bf1[12] = bf0[12] + bf0[13];
282 0 : bf1[13] = -bf0[13] + bf0[12];
283 0 : bf1[14] = -bf0[14] + bf0[15];
284 0 : bf1[15] = bf0[15] + bf0[14];
285 0 : range_check(stage, input, bf1, size, stage_range[stage]);
286 :
287 : // stage 6
288 0 : stage++;
289 0 : cospi = cospi_arr(cos_bit[stage]);
290 0 : bf0 = output;
291 0 : bf1 = step;
292 0 : bf1[0] = bf0[0];
293 0 : bf1[1] = bf0[1];
294 0 : bf1[2] = bf0[2];
295 0 : bf1[3] = bf0[3];
296 0 : bf1[4] = bf0[4];
297 0 : bf1[5] = bf0[5];
298 0 : bf1[6] = bf0[6];
299 0 : bf1[7] = bf0[7];
300 0 : bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
301 0 : bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
302 0 : bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
303 0 : bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
304 0 : bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
305 0 : bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
306 0 : bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
307 0 : bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
308 0 : range_check(stage, input, bf1, size, stage_range[stage]);
309 :
310 : // stage 7
311 0 : stage++;
312 0 : bf0 = step;
313 0 : bf1 = output;
314 0 : bf1[0] = bf0[0];
315 0 : bf1[1] = bf0[8];
316 0 : bf1[2] = bf0[4];
317 0 : bf1[3] = bf0[12];
318 0 : bf1[4] = bf0[2];
319 0 : bf1[5] = bf0[10];
320 0 : bf1[6] = bf0[6];
321 0 : bf1[7] = bf0[14];
322 0 : bf1[8] = bf0[1];
323 0 : bf1[9] = bf0[9];
324 0 : bf1[10] = bf0[5];
325 0 : bf1[11] = bf0[13];
326 0 : bf1[12] = bf0[3];
327 0 : bf1[13] = bf0[11];
328 0 : bf1[14] = bf0[7];
329 0 : bf1[15] = bf0[15];
330 0 : range_check(stage, input, bf1, size, stage_range[stage]);
331 0 : }
332 :
333 0 : void av1_fdct32_new(const int32_t *input, int32_t *output,
334 : const int8_t *cos_bit, const int8_t *stage_range) {
335 0 : const int32_t size = 32;
336 : const int32_t *cospi;
337 :
338 0 : int32_t stage = 0;
339 : int32_t *bf0, *bf1;
340 : int32_t step[32];
341 :
342 : // stage 0;
343 0 : range_check(stage, input, input, size, stage_range[stage]);
344 :
345 : // stage 1;
346 0 : stage++;
347 0 : bf1 = output;
348 0 : bf1[0] = input[0] + input[31];
349 0 : bf1[1] = input[1] + input[30];
350 0 : bf1[2] = input[2] + input[29];
351 0 : bf1[3] = input[3] + input[28];
352 0 : bf1[4] = input[4] + input[27];
353 0 : bf1[5] = input[5] + input[26];
354 0 : bf1[6] = input[6] + input[25];
355 0 : bf1[7] = input[7] + input[24];
356 0 : bf1[8] = input[8] + input[23];
357 0 : bf1[9] = input[9] + input[22];
358 0 : bf1[10] = input[10] + input[21];
359 0 : bf1[11] = input[11] + input[20];
360 0 : bf1[12] = input[12] + input[19];
361 0 : bf1[13] = input[13] + input[18];
362 0 : bf1[14] = input[14] + input[17];
363 0 : bf1[15] = input[15] + input[16];
364 0 : bf1[16] = -input[16] + input[15];
365 0 : bf1[17] = -input[17] + input[14];
366 0 : bf1[18] = -input[18] + input[13];
367 0 : bf1[19] = -input[19] + input[12];
368 0 : bf1[20] = -input[20] + input[11];
369 0 : bf1[21] = -input[21] + input[10];
370 0 : bf1[22] = -input[22] + input[9];
371 0 : bf1[23] = -input[23] + input[8];
372 0 : bf1[24] = -input[24] + input[7];
373 0 : bf1[25] = -input[25] + input[6];
374 0 : bf1[26] = -input[26] + input[5];
375 0 : bf1[27] = -input[27] + input[4];
376 0 : bf1[28] = -input[28] + input[3];
377 0 : bf1[29] = -input[29] + input[2];
378 0 : bf1[30] = -input[30] + input[1];
379 0 : bf1[31] = -input[31] + input[0];
380 0 : range_check(stage, input, bf1, size, stage_range[stage]);
381 :
382 : // stage 2
383 0 : stage++;
384 0 : cospi = cospi_arr(cos_bit[stage]);
385 0 : bf0 = output;
386 0 : bf1 = step;
387 0 : bf1[0] = bf0[0] + bf0[15];
388 0 : bf1[1] = bf0[1] + bf0[14];
389 0 : bf1[2] = bf0[2] + bf0[13];
390 0 : bf1[3] = bf0[3] + bf0[12];
391 0 : bf1[4] = bf0[4] + bf0[11];
392 0 : bf1[5] = bf0[5] + bf0[10];
393 0 : bf1[6] = bf0[6] + bf0[9];
394 0 : bf1[7] = bf0[7] + bf0[8];
395 0 : bf1[8] = -bf0[8] + bf0[7];
396 0 : bf1[9] = -bf0[9] + bf0[6];
397 0 : bf1[10] = -bf0[10] + bf0[5];
398 0 : bf1[11] = -bf0[11] + bf0[4];
399 0 : bf1[12] = -bf0[12] + bf0[3];
400 0 : bf1[13] = -bf0[13] + bf0[2];
401 0 : bf1[14] = -bf0[14] + bf0[1];
402 0 : bf1[15] = -bf0[15] + bf0[0];
403 0 : bf1[16] = bf0[16];
404 0 : bf1[17] = bf0[17];
405 0 : bf1[18] = bf0[18];
406 0 : bf1[19] = bf0[19];
407 0 : bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
408 0 : bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
409 0 : bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
410 0 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
411 0 : bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
412 0 : bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
413 0 : bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
414 0 : bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
415 0 : bf1[28] = bf0[28];
416 0 : bf1[29] = bf0[29];
417 0 : bf1[30] = bf0[30];
418 0 : bf1[31] = bf0[31];
419 0 : range_check(stage, input, bf1, size, stage_range[stage]);
420 :
421 : // stage 3
422 0 : stage++;
423 0 : cospi = cospi_arr(cos_bit[stage]);
424 0 : bf0 = step;
425 0 : bf1 = output;
426 0 : bf1[0] = bf0[0] + bf0[7];
427 0 : bf1[1] = bf0[1] + bf0[6];
428 0 : bf1[2] = bf0[2] + bf0[5];
429 0 : bf1[3] = bf0[3] + bf0[4];
430 0 : bf1[4] = -bf0[4] + bf0[3];
431 0 : bf1[5] = -bf0[5] + bf0[2];
432 0 : bf1[6] = -bf0[6] + bf0[1];
433 0 : bf1[7] = -bf0[7] + bf0[0];
434 0 : bf1[8] = bf0[8];
435 0 : bf1[9] = bf0[9];
436 0 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
437 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
438 0 : bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
439 0 : bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
440 0 : bf1[14] = bf0[14];
441 0 : bf1[15] = bf0[15];
442 0 : bf1[16] = bf0[16] + bf0[23];
443 0 : bf1[17] = bf0[17] + bf0[22];
444 0 : bf1[18] = bf0[18] + bf0[21];
445 0 : bf1[19] = bf0[19] + bf0[20];
446 0 : bf1[20] = -bf0[20] + bf0[19];
447 0 : bf1[21] = -bf0[21] + bf0[18];
448 0 : bf1[22] = -bf0[22] + bf0[17];
449 0 : bf1[23] = -bf0[23] + bf0[16];
450 0 : bf1[24] = -bf0[24] + bf0[31];
451 0 : bf1[25] = -bf0[25] + bf0[30];
452 0 : bf1[26] = -bf0[26] + bf0[29];
453 0 : bf1[27] = -bf0[27] + bf0[28];
454 0 : bf1[28] = bf0[28] + bf0[27];
455 0 : bf1[29] = bf0[29] + bf0[26];
456 0 : bf1[30] = bf0[30] + bf0[25];
457 0 : bf1[31] = bf0[31] + bf0[24];
458 0 : range_check(stage, input, bf1, size, stage_range[stage]);
459 :
460 : // stage 4
461 0 : stage++;
462 0 : cospi = cospi_arr(cos_bit[stage]);
463 0 : bf0 = output;
464 0 : bf1 = step;
465 0 : bf1[0] = bf0[0] + bf0[3];
466 0 : bf1[1] = bf0[1] + bf0[2];
467 0 : bf1[2] = -bf0[2] + bf0[1];
468 0 : bf1[3] = -bf0[3] + bf0[0];
469 0 : bf1[4] = bf0[4];
470 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
471 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
472 0 : bf1[7] = bf0[7];
473 0 : bf1[8] = bf0[8] + bf0[11];
474 0 : bf1[9] = bf0[9] + bf0[10];
475 0 : bf1[10] = -bf0[10] + bf0[9];
476 0 : bf1[11] = -bf0[11] + bf0[8];
477 0 : bf1[12] = -bf0[12] + bf0[15];
478 0 : bf1[13] = -bf0[13] + bf0[14];
479 0 : bf1[14] = bf0[14] + bf0[13];
480 0 : bf1[15] = bf0[15] + bf0[12];
481 0 : bf1[16] = bf0[16];
482 0 : bf1[17] = bf0[17];
483 0 : bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
484 0 : bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
485 0 : bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
486 0 : bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
487 0 : bf1[22] = bf0[22];
488 0 : bf1[23] = bf0[23];
489 0 : bf1[24] = bf0[24];
490 0 : bf1[25] = bf0[25];
491 0 : bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
492 0 : bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
493 0 : bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
494 0 : bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
495 0 : bf1[30] = bf0[30];
496 0 : bf1[31] = bf0[31];
497 0 : range_check(stage, input, bf1, size, stage_range[stage]);
498 :
499 : // stage 5
500 0 : stage++;
501 0 : cospi = cospi_arr(cos_bit[stage]);
502 0 : bf0 = step;
503 0 : bf1 = output;
504 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
505 0 : bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
506 0 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
507 0 : bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
508 0 : bf1[4] = bf0[4] + bf0[5];
509 0 : bf1[5] = -bf0[5] + bf0[4];
510 0 : bf1[6] = -bf0[6] + bf0[7];
511 0 : bf1[7] = bf0[7] + bf0[6];
512 0 : bf1[8] = bf0[8];
513 0 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
514 0 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
515 0 : bf1[11] = bf0[11];
516 0 : bf1[12] = bf0[12];
517 0 : bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
518 0 : bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
519 0 : bf1[15] = bf0[15];
520 0 : bf1[16] = bf0[16] + bf0[19];
521 0 : bf1[17] = bf0[17] + bf0[18];
522 0 : bf1[18] = -bf0[18] + bf0[17];
523 0 : bf1[19] = -bf0[19] + bf0[16];
524 0 : bf1[20] = -bf0[20] + bf0[23];
525 0 : bf1[21] = -bf0[21] + bf0[22];
526 0 : bf1[22] = bf0[22] + bf0[21];
527 0 : bf1[23] = bf0[23] + bf0[20];
528 0 : bf1[24] = bf0[24] + bf0[27];
529 0 : bf1[25] = bf0[25] + bf0[26];
530 0 : bf1[26] = -bf0[26] + bf0[25];
531 0 : bf1[27] = -bf0[27] + bf0[24];
532 0 : bf1[28] = -bf0[28] + bf0[31];
533 0 : bf1[29] = -bf0[29] + bf0[30];
534 0 : bf1[30] = bf0[30] + bf0[29];
535 0 : bf1[31] = bf0[31] + bf0[28];
536 0 : range_check(stage, input, bf1, size, stage_range[stage]);
537 :
538 : // stage 6
539 0 : stage++;
540 0 : cospi = cospi_arr(cos_bit[stage]);
541 0 : bf0 = output;
542 0 : bf1 = step;
543 0 : bf1[0] = bf0[0];
544 0 : bf1[1] = bf0[1];
545 0 : bf1[2] = bf0[2];
546 0 : bf1[3] = bf0[3];
547 0 : bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
548 0 : bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
549 0 : bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
550 0 : bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
551 0 : bf1[8] = bf0[8] + bf0[9];
552 0 : bf1[9] = -bf0[9] + bf0[8];
553 0 : bf1[10] = -bf0[10] + bf0[11];
554 0 : bf1[11] = bf0[11] + bf0[10];
555 0 : bf1[12] = bf0[12] + bf0[13];
556 0 : bf1[13] = -bf0[13] + bf0[12];
557 0 : bf1[14] = -bf0[14] + bf0[15];
558 0 : bf1[15] = bf0[15] + bf0[14];
559 0 : bf1[16] = bf0[16];
560 0 : bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
561 0 : bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
562 0 : bf1[19] = bf0[19];
563 0 : bf1[20] = bf0[20];
564 0 : bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
565 0 : bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
566 0 : bf1[23] = bf0[23];
567 0 : bf1[24] = bf0[24];
568 0 : bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
569 0 : bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
570 0 : bf1[27] = bf0[27];
571 0 : bf1[28] = bf0[28];
572 0 : bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
573 0 : bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
574 0 : bf1[31] = bf0[31];
575 0 : range_check(stage, input, bf1, size, stage_range[stage]);
576 :
577 : // stage 7
578 0 : stage++;
579 0 : cospi = cospi_arr(cos_bit[stage]);
580 0 : bf0 = step;
581 0 : bf1 = output;
582 0 : bf1[0] = bf0[0];
583 0 : bf1[1] = bf0[1];
584 0 : bf1[2] = bf0[2];
585 0 : bf1[3] = bf0[3];
586 0 : bf1[4] = bf0[4];
587 0 : bf1[5] = bf0[5];
588 0 : bf1[6] = bf0[6];
589 0 : bf1[7] = bf0[7];
590 0 : bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
591 0 : bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
592 0 : bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
593 0 : bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
594 0 : bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
595 0 : bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
596 0 : bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
597 0 : bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
598 0 : bf1[16] = bf0[16] + bf0[17];
599 0 : bf1[17] = -bf0[17] + bf0[16];
600 0 : bf1[18] = -bf0[18] + bf0[19];
601 0 : bf1[19] = bf0[19] + bf0[18];
602 0 : bf1[20] = bf0[20] + bf0[21];
603 0 : bf1[21] = -bf0[21] + bf0[20];
604 0 : bf1[22] = -bf0[22] + bf0[23];
605 0 : bf1[23] = bf0[23] + bf0[22];
606 0 : bf1[24] = bf0[24] + bf0[25];
607 0 : bf1[25] = -bf0[25] + bf0[24];
608 0 : bf1[26] = -bf0[26] + bf0[27];
609 0 : bf1[27] = bf0[27] + bf0[26];
610 0 : bf1[28] = bf0[28] + bf0[29];
611 0 : bf1[29] = -bf0[29] + bf0[28];
612 0 : bf1[30] = -bf0[30] + bf0[31];
613 0 : bf1[31] = bf0[31] + bf0[30];
614 0 : range_check(stage, input, bf1, size, stage_range[stage]);
615 :
616 : // stage 8
617 0 : stage++;
618 0 : cospi = cospi_arr(cos_bit[stage]);
619 0 : bf0 = output;
620 0 : bf1 = step;
621 0 : bf1[0] = bf0[0];
622 0 : bf1[1] = bf0[1];
623 0 : bf1[2] = bf0[2];
624 0 : bf1[3] = bf0[3];
625 0 : bf1[4] = bf0[4];
626 0 : bf1[5] = bf0[5];
627 0 : bf1[6] = bf0[6];
628 0 : bf1[7] = bf0[7];
629 0 : bf1[8] = bf0[8];
630 0 : bf1[9] = bf0[9];
631 0 : bf1[10] = bf0[10];
632 0 : bf1[11] = bf0[11];
633 0 : bf1[12] = bf0[12];
634 0 : bf1[13] = bf0[13];
635 0 : bf1[14] = bf0[14];
636 0 : bf1[15] = bf0[15];
637 0 : bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
638 0 : bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
639 0 : bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
640 0 : bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
641 0 : bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
642 0 : bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
643 0 : bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
644 0 : bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
645 0 : bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
646 0 : bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
647 0 : bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
648 0 : bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
649 0 : bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
650 0 : bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
651 0 : bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
652 0 : bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
653 0 : range_check(stage, input, bf1, size, stage_range[stage]);
654 :
655 : // stage 9
656 0 : stage++;
657 0 : bf0 = step;
658 0 : bf1 = output;
659 0 : bf1[0] = bf0[0];
660 0 : bf1[1] = bf0[16];
661 0 : bf1[2] = bf0[8];
662 0 : bf1[3] = bf0[24];
663 0 : bf1[4] = bf0[4];
664 0 : bf1[5] = bf0[20];
665 0 : bf1[6] = bf0[12];
666 0 : bf1[7] = bf0[28];
667 0 : bf1[8] = bf0[2];
668 0 : bf1[9] = bf0[18];
669 0 : bf1[10] = bf0[10];
670 0 : bf1[11] = bf0[26];
671 0 : bf1[12] = bf0[6];
672 0 : bf1[13] = bf0[22];
673 0 : bf1[14] = bf0[14];
674 0 : bf1[15] = bf0[30];
675 0 : bf1[16] = bf0[1];
676 0 : bf1[17] = bf0[17];
677 0 : bf1[18] = bf0[9];
678 0 : bf1[19] = bf0[25];
679 0 : bf1[20] = bf0[5];
680 0 : bf1[21] = bf0[21];
681 0 : bf1[22] = bf0[13];
682 0 : bf1[23] = bf0[29];
683 0 : bf1[24] = bf0[3];
684 0 : bf1[25] = bf0[19];
685 0 : bf1[26] = bf0[11];
686 0 : bf1[27] = bf0[27];
687 0 : bf1[28] = bf0[7];
688 0 : bf1[29] = bf0[23];
689 0 : bf1[30] = bf0[15];
690 0 : bf1[31] = bf0[31];
691 0 : range_check(stage, input, bf1, size, stage_range[stage]);
692 0 : }
693 :
694 0 : void av1_fadst4_new(const int32_t *input, int32_t *output,
695 : const int8_t *cos_bit, const int8_t *stage_range) {
696 0 : const int32_t size = 4;
697 : const int32_t *cospi;
698 :
699 0 : int32_t stage = 0;
700 : int32_t *bf0, *bf1;
701 : int32_t step[4];
702 :
703 : // stage 0;
704 0 : range_check(stage, input, input, size, stage_range[stage]);
705 :
706 : // stage 1;
707 0 : stage++;
708 0 : bf1 = output;
709 0 : bf1[0] = input[3];
710 0 : bf1[1] = input[0];
711 0 : bf1[2] = input[1];
712 0 : bf1[3] = input[2];
713 0 : range_check(stage, input, bf1, size, stage_range[stage]);
714 :
715 : // stage 2
716 0 : stage++;
717 0 : cospi = cospi_arr(cos_bit[stage]);
718 0 : bf0 = output;
719 0 : bf1 = step;
720 0 : bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
721 0 : bf1[1] = half_btf(-cospi[8], bf0[1], cospi[56], bf0[0], cos_bit[stage]);
722 0 : bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
723 0 : bf1[3] = half_btf(-cospi[40], bf0[3], cospi[24], bf0[2], cos_bit[stage]);
724 0 : range_check(stage, input, bf1, size, stage_range[stage]);
725 :
726 : // stage 3
727 0 : stage++;
728 0 : bf0 = step;
729 0 : bf1 = output;
730 0 : bf1[0] = bf0[0] + bf0[2];
731 0 : bf1[1] = bf0[1] + bf0[3];
732 0 : bf1[2] = -bf0[2] + bf0[0];
733 0 : bf1[3] = -bf0[3] + bf0[1];
734 0 : range_check(stage, input, bf1, size, stage_range[stage]);
735 :
736 : // stage 4
737 0 : stage++;
738 0 : cospi = cospi_arr(cos_bit[stage]);
739 0 : bf0 = output;
740 0 : bf1 = step;
741 0 : bf1[0] = bf0[0];
742 0 : bf1[1] = bf0[1];
743 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
744 0 : bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
745 0 : range_check(stage, input, bf1, size, stage_range[stage]);
746 :
747 : // stage 5
748 0 : stage++;
749 0 : bf0 = step;
750 0 : bf1 = output;
751 0 : bf1[0] = bf0[0];
752 0 : bf1[1] = -bf0[2];
753 0 : bf1[2] = bf0[3];
754 0 : bf1[3] = -bf0[1];
755 0 : range_check(stage, input, bf1, size, stage_range[stage]);
756 0 : }
757 :
758 0 : void av1_fadst8_new(const int32_t *input, int32_t *output,
759 : const int8_t *cos_bit, const int8_t *stage_range) {
760 0 : const int32_t size = 8;
761 : const int32_t *cospi;
762 :
763 0 : int32_t stage = 0;
764 : int32_t *bf0, *bf1;
765 : int32_t step[8];
766 :
767 : // stage 0;
768 0 : range_check(stage, input, input, size, stage_range[stage]);
769 :
770 : // stage 1;
771 0 : stage++;
772 0 : bf1 = output;
773 0 : bf1[0] = input[7];
774 0 : bf1[1] = input[0];
775 0 : bf1[2] = input[5];
776 0 : bf1[3] = input[2];
777 0 : bf1[4] = input[3];
778 0 : bf1[5] = input[4];
779 0 : bf1[6] = input[1];
780 0 : bf1[7] = input[6];
781 0 : range_check(stage, input, bf1, size, stage_range[stage]);
782 :
783 : // stage 2
784 0 : stage++;
785 0 : cospi = cospi_arr(cos_bit[stage]);
786 0 : bf0 = output;
787 0 : bf1 = step;
788 0 : bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
789 0 : bf1[1] = half_btf(-cospi[4], bf0[1], cospi[60], bf0[0], cos_bit[stage]);
790 0 : bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
791 0 : bf1[3] = half_btf(-cospi[20], bf0[3], cospi[44], bf0[2], cos_bit[stage]);
792 0 : bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
793 0 : bf1[5] = half_btf(-cospi[36], bf0[5], cospi[28], bf0[4], cos_bit[stage]);
794 0 : bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
795 0 : bf1[7] = half_btf(-cospi[52], bf0[7], cospi[12], bf0[6], cos_bit[stage]);
796 0 : range_check(stage, input, bf1, size, stage_range[stage]);
797 :
798 : // stage 3
799 0 : stage++;
800 0 : bf0 = step;
801 0 : bf1 = output;
802 0 : bf1[0] = bf0[0] + bf0[4];
803 0 : bf1[1] = bf0[1] + bf0[5];
804 0 : bf1[2] = bf0[2] + bf0[6];
805 0 : bf1[3] = bf0[3] + bf0[7];
806 0 : bf1[4] = -bf0[4] + bf0[0];
807 0 : bf1[5] = -bf0[5] + bf0[1];
808 0 : bf1[6] = -bf0[6] + bf0[2];
809 0 : bf1[7] = -bf0[7] + bf0[3];
810 0 : range_check(stage, input, bf1, size, stage_range[stage]);
811 :
812 : // stage 4
813 0 : stage++;
814 0 : cospi = cospi_arr(cos_bit[stage]);
815 0 : bf0 = output;
816 0 : bf1 = step;
817 0 : bf1[0] = bf0[0];
818 0 : bf1[1] = bf0[1];
819 0 : bf1[2] = bf0[2];
820 0 : bf1[3] = bf0[3];
821 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
822 0 : bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
823 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
824 0 : bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
825 0 : range_check(stage, input, bf1, size, stage_range[stage]);
826 :
827 : // stage 5
828 0 : stage++;
829 0 : bf0 = step;
830 0 : bf1 = output;
831 0 : bf1[0] = bf0[0] + bf0[2];
832 0 : bf1[1] = bf0[1] + bf0[3];
833 0 : bf1[2] = -bf0[2] + bf0[0];
834 0 : bf1[3] = -bf0[3] + bf0[1];
835 0 : bf1[4] = bf0[4] + bf0[6];
836 0 : bf1[5] = bf0[5] + bf0[7];
837 0 : bf1[6] = -bf0[6] + bf0[4];
838 0 : bf1[7] = -bf0[7] + bf0[5];
839 0 : range_check(stage, input, bf1, size, stage_range[stage]);
840 :
841 : // stage 6
842 0 : stage++;
843 0 : cospi = cospi_arr(cos_bit[stage]);
844 0 : bf0 = output;
845 0 : bf1 = step;
846 0 : bf1[0] = bf0[0];
847 0 : bf1[1] = bf0[1];
848 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
849 0 : bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
850 0 : bf1[4] = bf0[4];
851 0 : bf1[5] = bf0[5];
852 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
853 0 : bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
854 0 : range_check(stage, input, bf1, size, stage_range[stage]);
855 :
856 : // stage 7
857 0 : stage++;
858 0 : bf0 = step;
859 0 : bf1 = output;
860 0 : bf1[0] = bf0[0];
861 0 : bf1[1] = -bf0[4];
862 0 : bf1[2] = bf0[6];
863 0 : bf1[3] = -bf0[2];
864 0 : bf1[4] = bf0[3];
865 0 : bf1[5] = -bf0[7];
866 0 : bf1[6] = bf0[5];
867 0 : bf1[7] = -bf0[1];
868 0 : range_check(stage, input, bf1, size, stage_range[stage]);
869 0 : }
870 :
871 0 : void av1_fadst16_new(const int32_t *input, int32_t *output,
872 : const int8_t *cos_bit, const int8_t *stage_range) {
873 0 : const int32_t size = 16;
874 : const int32_t *cospi;
875 :
876 0 : int32_t stage = 0;
877 : int32_t *bf0, *bf1;
878 : int32_t step[16];
879 :
880 : // stage 0;
881 0 : range_check(stage, input, input, size, stage_range[stage]);
882 :
883 : // stage 1;
884 0 : stage++;
885 0 : bf1 = output;
886 0 : bf1[0] = input[15];
887 0 : bf1[1] = input[0];
888 0 : bf1[2] = input[13];
889 0 : bf1[3] = input[2];
890 0 : bf1[4] = input[11];
891 0 : bf1[5] = input[4];
892 0 : bf1[6] = input[9];
893 0 : bf1[7] = input[6];
894 0 : bf1[8] = input[7];
895 0 : bf1[9] = input[8];
896 0 : bf1[10] = input[5];
897 0 : bf1[11] = input[10];
898 0 : bf1[12] = input[3];
899 0 : bf1[13] = input[12];
900 0 : bf1[14] = input[1];
901 0 : bf1[15] = input[14];
902 0 : range_check(stage, input, bf1, size, stage_range[stage]);
903 :
904 : // stage 2
905 0 : stage++;
906 0 : cospi = cospi_arr(cos_bit[stage]);
907 0 : bf0 = output;
908 0 : bf1 = step;
909 0 : bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
910 0 : bf1[1] = half_btf(-cospi[2], bf0[1], cospi[62], bf0[0], cos_bit[stage]);
911 0 : bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
912 0 : bf1[3] = half_btf(-cospi[10], bf0[3], cospi[54], bf0[2], cos_bit[stage]);
913 0 : bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
914 0 : bf1[5] = half_btf(-cospi[18], bf0[5], cospi[46], bf0[4], cos_bit[stage]);
915 0 : bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
916 0 : bf1[7] = half_btf(-cospi[26], bf0[7], cospi[38], bf0[6], cos_bit[stage]);
917 0 : bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
918 0 : bf1[9] = half_btf(-cospi[34], bf0[9], cospi[30], bf0[8], cos_bit[stage]);
919 0 : bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
920 0 : bf1[11] = half_btf(-cospi[42], bf0[11], cospi[22], bf0[10], cos_bit[stage]);
921 0 : bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
922 0 : bf1[13] = half_btf(-cospi[50], bf0[13], cospi[14], bf0[12], cos_bit[stage]);
923 0 : bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
924 0 : bf1[15] = half_btf(-cospi[58], bf0[15], cospi[6], bf0[14], cos_bit[stage]);
925 0 : range_check(stage, input, bf1, size, stage_range[stage]);
926 :
927 : // stage 3
928 0 : stage++;
929 0 : bf0 = step;
930 0 : bf1 = output;
931 0 : bf1[0] = bf0[0] + bf0[8];
932 0 : bf1[1] = bf0[1] + bf0[9];
933 0 : bf1[2] = bf0[2] + bf0[10];
934 0 : bf1[3] = bf0[3] + bf0[11];
935 0 : bf1[4] = bf0[4] + bf0[12];
936 0 : bf1[5] = bf0[5] + bf0[13];
937 0 : bf1[6] = bf0[6] + bf0[14];
938 0 : bf1[7] = bf0[7] + bf0[15];
939 0 : bf1[8] = -bf0[8] + bf0[0];
940 0 : bf1[9] = -bf0[9] + bf0[1];
941 0 : bf1[10] = -bf0[10] + bf0[2];
942 0 : bf1[11] = -bf0[11] + bf0[3];
943 0 : bf1[12] = -bf0[12] + bf0[4];
944 0 : bf1[13] = -bf0[13] + bf0[5];
945 0 : bf1[14] = -bf0[14] + bf0[6];
946 0 : bf1[15] = -bf0[15] + bf0[7];
947 0 : range_check(stage, input, bf1, size, stage_range[stage]);
948 :
949 : // stage 4
950 0 : stage++;
951 0 : cospi = cospi_arr(cos_bit[stage]);
952 0 : bf0 = output;
953 0 : bf1 = step;
954 0 : bf1[0] = bf0[0];
955 0 : bf1[1] = bf0[1];
956 0 : bf1[2] = bf0[2];
957 0 : bf1[3] = bf0[3];
958 0 : bf1[4] = bf0[4];
959 0 : bf1[5] = bf0[5];
960 0 : bf1[6] = bf0[6];
961 0 : bf1[7] = bf0[7];
962 0 : bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
963 0 : bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
964 0 : bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
965 0 : bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
966 0 : bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
967 0 : bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
968 0 : bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
969 0 : bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
970 0 : range_check(stage, input, bf1, size, stage_range[stage]);
971 :
972 : // stage 5
973 0 : stage++;
974 0 : bf0 = step;
975 0 : bf1 = output;
976 0 : bf1[0] = bf0[0] + bf0[4];
977 0 : bf1[1] = bf0[1] + bf0[5];
978 0 : bf1[2] = bf0[2] + bf0[6];
979 0 : bf1[3] = bf0[3] + bf0[7];
980 0 : bf1[4] = -bf0[4] + bf0[0];
981 0 : bf1[5] = -bf0[5] + bf0[1];
982 0 : bf1[6] = -bf0[6] + bf0[2];
983 0 : bf1[7] = -bf0[7] + bf0[3];
984 0 : bf1[8] = bf0[8] + bf0[12];
985 0 : bf1[9] = bf0[9] + bf0[13];
986 0 : bf1[10] = bf0[10] + bf0[14];
987 0 : bf1[11] = bf0[11] + bf0[15];
988 0 : bf1[12] = -bf0[12] + bf0[8];
989 0 : bf1[13] = -bf0[13] + bf0[9];
990 0 : bf1[14] = -bf0[14] + bf0[10];
991 0 : bf1[15] = -bf0[15] + bf0[11];
992 0 : range_check(stage, input, bf1, size, stage_range[stage]);
993 :
994 : // stage 6
995 0 : stage++;
996 0 : cospi = cospi_arr(cos_bit[stage]);
997 0 : bf0 = output;
998 0 : bf1 = step;
999 0 : bf1[0] = bf0[0];
1000 0 : bf1[1] = bf0[1];
1001 0 : bf1[2] = bf0[2];
1002 0 : bf1[3] = bf0[3];
1003 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
1004 0 : bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
1005 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
1006 0 : bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
1007 0 : bf1[8] = bf0[8];
1008 0 : bf1[9] = bf0[9];
1009 0 : bf1[10] = bf0[10];
1010 0 : bf1[11] = bf0[11];
1011 0 : bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
1012 0 : bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
1013 0 : bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
1014 0 : bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
1015 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1016 :
1017 : // stage 7
1018 0 : stage++;
1019 0 : bf0 = step;
1020 0 : bf1 = output;
1021 0 : bf1[0] = bf0[0] + bf0[2];
1022 0 : bf1[1] = bf0[1] + bf0[3];
1023 0 : bf1[2] = -bf0[2] + bf0[0];
1024 0 : bf1[3] = -bf0[3] + bf0[1];
1025 0 : bf1[4] = bf0[4] + bf0[6];
1026 0 : bf1[5] = bf0[5] + bf0[7];
1027 0 : bf1[6] = -bf0[6] + bf0[4];
1028 0 : bf1[7] = -bf0[7] + bf0[5];
1029 0 : bf1[8] = bf0[8] + bf0[10];
1030 0 : bf1[9] = bf0[9] + bf0[11];
1031 0 : bf1[10] = -bf0[10] + bf0[8];
1032 0 : bf1[11] = -bf0[11] + bf0[9];
1033 0 : bf1[12] = bf0[12] + bf0[14];
1034 0 : bf1[13] = bf0[13] + bf0[15];
1035 0 : bf1[14] = -bf0[14] + bf0[12];
1036 0 : bf1[15] = -bf0[15] + bf0[13];
1037 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1038 :
1039 : // stage 8
1040 0 : stage++;
1041 0 : cospi = cospi_arr(cos_bit[stage]);
1042 0 : bf0 = output;
1043 0 : bf1 = step;
1044 0 : bf1[0] = bf0[0];
1045 0 : bf1[1] = bf0[1];
1046 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
1047 0 : bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
1048 0 : bf1[4] = bf0[4];
1049 0 : bf1[5] = bf0[5];
1050 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
1051 0 : bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
1052 0 : bf1[8] = bf0[8];
1053 0 : bf1[9] = bf0[9];
1054 0 : bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
1055 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
1056 0 : bf1[12] = bf0[12];
1057 0 : bf1[13] = bf0[13];
1058 0 : bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
1059 0 : bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
1060 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1061 :
1062 : // stage 9
1063 0 : stage++;
1064 0 : bf0 = step;
1065 0 : bf1 = output;
1066 0 : bf1[0] = bf0[0];
1067 0 : bf1[1] = -bf0[8];
1068 0 : bf1[2] = bf0[12];
1069 0 : bf1[3] = -bf0[4];
1070 0 : bf1[4] = bf0[6];
1071 0 : bf1[5] = -bf0[14];
1072 0 : bf1[6] = bf0[10];
1073 0 : bf1[7] = -bf0[2];
1074 0 : bf1[8] = bf0[3];
1075 0 : bf1[9] = -bf0[11];
1076 0 : bf1[10] = bf0[15];
1077 0 : bf1[11] = -bf0[7];
1078 0 : bf1[12] = bf0[5];
1079 0 : bf1[13] = -bf0[13];
1080 0 : bf1[14] = bf0[9];
1081 0 : bf1[15] = -bf0[1];
1082 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1083 0 : }
1084 :
1085 0 : void av1_fadst32_new(const int32_t *input, int32_t *output,
1086 : const int8_t *cos_bit, const int8_t *stage_range) {
1087 0 : const int32_t size = 32;
1088 : const int32_t *cospi;
1089 :
1090 0 : int32_t stage = 0;
1091 : int32_t *bf0, *bf1;
1092 : int32_t step[32];
1093 :
1094 : // stage 0;
1095 0 : range_check(stage, input, input, size, stage_range[stage]);
1096 :
1097 : // stage 1;
1098 0 : stage++;
1099 0 : bf1 = output;
1100 0 : bf1[0] = input[31];
1101 0 : bf1[1] = input[0];
1102 0 : bf1[2] = input[29];
1103 0 : bf1[3] = input[2];
1104 0 : bf1[4] = input[27];
1105 0 : bf1[5] = input[4];
1106 0 : bf1[6] = input[25];
1107 0 : bf1[7] = input[6];
1108 0 : bf1[8] = input[23];
1109 0 : bf1[9] = input[8];
1110 0 : bf1[10] = input[21];
1111 0 : bf1[11] = input[10];
1112 0 : bf1[12] = input[19];
1113 0 : bf1[13] = input[12];
1114 0 : bf1[14] = input[17];
1115 0 : bf1[15] = input[14];
1116 0 : bf1[16] = input[15];
1117 0 : bf1[17] = input[16];
1118 0 : bf1[18] = input[13];
1119 0 : bf1[19] = input[18];
1120 0 : bf1[20] = input[11];
1121 0 : bf1[21] = input[20];
1122 0 : bf1[22] = input[9];
1123 0 : bf1[23] = input[22];
1124 0 : bf1[24] = input[7];
1125 0 : bf1[25] = input[24];
1126 0 : bf1[26] = input[5];
1127 0 : bf1[27] = input[26];
1128 0 : bf1[28] = input[3];
1129 0 : bf1[29] = input[28];
1130 0 : bf1[30] = input[1];
1131 0 : bf1[31] = input[30];
1132 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1133 :
1134 : // stage 2
1135 0 : stage++;
1136 0 : cospi = cospi_arr(cos_bit[stage]);
1137 0 : bf0 = output;
1138 0 : bf1 = step;
1139 0 : bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
1140 0 : bf1[1] = half_btf(-cospi[1], bf0[1], cospi[63], bf0[0], cos_bit[stage]);
1141 0 : bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
1142 0 : bf1[3] = half_btf(-cospi[5], bf0[3], cospi[59], bf0[2], cos_bit[stage]);
1143 0 : bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
1144 0 : bf1[5] = half_btf(-cospi[9], bf0[5], cospi[55], bf0[4], cos_bit[stage]);
1145 0 : bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
1146 0 : bf1[7] = half_btf(-cospi[13], bf0[7], cospi[51], bf0[6], cos_bit[stage]);
1147 0 : bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
1148 0 : bf1[9] = half_btf(-cospi[17], bf0[9], cospi[47], bf0[8], cos_bit[stage]);
1149 0 : bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
1150 0 : bf1[11] = half_btf(-cospi[21], bf0[11], cospi[43], bf0[10], cos_bit[stage]);
1151 0 : bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
1152 0 : bf1[13] = half_btf(-cospi[25], bf0[13], cospi[39], bf0[12], cos_bit[stage]);
1153 0 : bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
1154 0 : bf1[15] = half_btf(-cospi[29], bf0[15], cospi[35], bf0[14], cos_bit[stage]);
1155 0 : bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
1156 0 : bf1[17] = half_btf(-cospi[33], bf0[17], cospi[31], bf0[16], cos_bit[stage]);
1157 0 : bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
1158 0 : bf1[19] = half_btf(-cospi[37], bf0[19], cospi[27], bf0[18], cos_bit[stage]);
1159 0 : bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
1160 0 : bf1[21] = half_btf(-cospi[41], bf0[21], cospi[23], bf0[20], cos_bit[stage]);
1161 0 : bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
1162 0 : bf1[23] = half_btf(-cospi[45], bf0[23], cospi[19], bf0[22], cos_bit[stage]);
1163 0 : bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
1164 0 : bf1[25] = half_btf(-cospi[49], bf0[25], cospi[15], bf0[24], cos_bit[stage]);
1165 0 : bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
1166 0 : bf1[27] = half_btf(-cospi[53], bf0[27], cospi[11], bf0[26], cos_bit[stage]);
1167 0 : bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
1168 0 : bf1[29] = half_btf(-cospi[57], bf0[29], cospi[7], bf0[28], cos_bit[stage]);
1169 0 : bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
1170 0 : bf1[31] = half_btf(-cospi[61], bf0[31], cospi[3], bf0[30], cos_bit[stage]);
1171 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1172 :
1173 : // stage 3
1174 0 : stage++;
1175 0 : bf0 = step;
1176 0 : bf1 = output;
1177 0 : bf1[0] = bf0[0] + bf0[16];
1178 0 : bf1[1] = bf0[1] + bf0[17];
1179 0 : bf1[2] = bf0[2] + bf0[18];
1180 0 : bf1[3] = bf0[3] + bf0[19];
1181 0 : bf1[4] = bf0[4] + bf0[20];
1182 0 : bf1[5] = bf0[5] + bf0[21];
1183 0 : bf1[6] = bf0[6] + bf0[22];
1184 0 : bf1[7] = bf0[7] + bf0[23];
1185 0 : bf1[8] = bf0[8] + bf0[24];
1186 0 : bf1[9] = bf0[9] + bf0[25];
1187 0 : bf1[10] = bf0[10] + bf0[26];
1188 0 : bf1[11] = bf0[11] + bf0[27];
1189 0 : bf1[12] = bf0[12] + bf0[28];
1190 0 : bf1[13] = bf0[13] + bf0[29];
1191 0 : bf1[14] = bf0[14] + bf0[30];
1192 0 : bf1[15] = bf0[15] + bf0[31];
1193 0 : bf1[16] = -bf0[16] + bf0[0];
1194 0 : bf1[17] = -bf0[17] + bf0[1];
1195 0 : bf1[18] = -bf0[18] + bf0[2];
1196 0 : bf1[19] = -bf0[19] + bf0[3];
1197 0 : bf1[20] = -bf0[20] + bf0[4];
1198 0 : bf1[21] = -bf0[21] + bf0[5];
1199 0 : bf1[22] = -bf0[22] + bf0[6];
1200 0 : bf1[23] = -bf0[23] + bf0[7];
1201 0 : bf1[24] = -bf0[24] + bf0[8];
1202 0 : bf1[25] = -bf0[25] + bf0[9];
1203 0 : bf1[26] = -bf0[26] + bf0[10];
1204 0 : bf1[27] = -bf0[27] + bf0[11];
1205 0 : bf1[28] = -bf0[28] + bf0[12];
1206 0 : bf1[29] = -bf0[29] + bf0[13];
1207 0 : bf1[30] = -bf0[30] + bf0[14];
1208 0 : bf1[31] = -bf0[31] + bf0[15];
1209 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1210 :
1211 : // stage 4
1212 0 : stage++;
1213 0 : cospi = cospi_arr(cos_bit[stage]);
1214 0 : bf0 = output;
1215 0 : bf1 = step;
1216 0 : bf1[0] = bf0[0];
1217 0 : bf1[1] = bf0[1];
1218 0 : bf1[2] = bf0[2];
1219 0 : bf1[3] = bf0[3];
1220 0 : bf1[4] = bf0[4];
1221 0 : bf1[5] = bf0[5];
1222 0 : bf1[6] = bf0[6];
1223 0 : bf1[7] = bf0[7];
1224 0 : bf1[8] = bf0[8];
1225 0 : bf1[9] = bf0[9];
1226 0 : bf1[10] = bf0[10];
1227 0 : bf1[11] = bf0[11];
1228 0 : bf1[12] = bf0[12];
1229 0 : bf1[13] = bf0[13];
1230 0 : bf1[14] = bf0[14];
1231 0 : bf1[15] = bf0[15];
1232 0 : bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
1233 0 : bf1[17] = half_btf(-cospi[4], bf0[17], cospi[60], bf0[16], cos_bit[stage]);
1234 0 : bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
1235 0 : bf1[19] = half_btf(-cospi[20], bf0[19], cospi[44], bf0[18], cos_bit[stage]);
1236 0 : bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
1237 0 : bf1[21] = half_btf(-cospi[36], bf0[21], cospi[28], bf0[20], cos_bit[stage]);
1238 0 : bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
1239 0 : bf1[23] = half_btf(-cospi[52], bf0[23], cospi[12], bf0[22], cos_bit[stage]);
1240 0 : bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
1241 0 : bf1[25] = half_btf(cospi[60], bf0[25], cospi[4], bf0[24], cos_bit[stage]);
1242 0 : bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
1243 0 : bf1[27] = half_btf(cospi[44], bf0[27], cospi[20], bf0[26], cos_bit[stage]);
1244 0 : bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
1245 0 : bf1[29] = half_btf(cospi[28], bf0[29], cospi[36], bf0[28], cos_bit[stage]);
1246 0 : bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
1247 0 : bf1[31] = half_btf(cospi[12], bf0[31], cospi[52], bf0[30], cos_bit[stage]);
1248 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1249 :
1250 : // stage 5
1251 0 : stage++;
1252 0 : bf0 = step;
1253 0 : bf1 = output;
1254 0 : bf1[0] = bf0[0] + bf0[8];
1255 0 : bf1[1] = bf0[1] + bf0[9];
1256 0 : bf1[2] = bf0[2] + bf0[10];
1257 0 : bf1[3] = bf0[3] + bf0[11];
1258 0 : bf1[4] = bf0[4] + bf0[12];
1259 0 : bf1[5] = bf0[5] + bf0[13];
1260 0 : bf1[6] = bf0[6] + bf0[14];
1261 0 : bf1[7] = bf0[7] + bf0[15];
1262 0 : bf1[8] = -bf0[8] + bf0[0];
1263 0 : bf1[9] = -bf0[9] + bf0[1];
1264 0 : bf1[10] = -bf0[10] + bf0[2];
1265 0 : bf1[11] = -bf0[11] + bf0[3];
1266 0 : bf1[12] = -bf0[12] + bf0[4];
1267 0 : bf1[13] = -bf0[13] + bf0[5];
1268 0 : bf1[14] = -bf0[14] + bf0[6];
1269 0 : bf1[15] = -bf0[15] + bf0[7];
1270 0 : bf1[16] = bf0[16] + bf0[24];
1271 0 : bf1[17] = bf0[17] + bf0[25];
1272 0 : bf1[18] = bf0[18] + bf0[26];
1273 0 : bf1[19] = bf0[19] + bf0[27];
1274 0 : bf1[20] = bf0[20] + bf0[28];
1275 0 : bf1[21] = bf0[21] + bf0[29];
1276 0 : bf1[22] = bf0[22] + bf0[30];
1277 0 : bf1[23] = bf0[23] + bf0[31];
1278 0 : bf1[24] = -bf0[24] + bf0[16];
1279 0 : bf1[25] = -bf0[25] + bf0[17];
1280 0 : bf1[26] = -bf0[26] + bf0[18];
1281 0 : bf1[27] = -bf0[27] + bf0[19];
1282 0 : bf1[28] = -bf0[28] + bf0[20];
1283 0 : bf1[29] = -bf0[29] + bf0[21];
1284 0 : bf1[30] = -bf0[30] + bf0[22];
1285 0 : bf1[31] = -bf0[31] + bf0[23];
1286 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1287 :
1288 : // stage 6
1289 0 : stage++;
1290 0 : cospi = cospi_arr(cos_bit[stage]);
1291 0 : bf0 = output;
1292 0 : bf1 = step;
1293 0 : bf1[0] = bf0[0];
1294 0 : bf1[1] = bf0[1];
1295 0 : bf1[2] = bf0[2];
1296 0 : bf1[3] = bf0[3];
1297 0 : bf1[4] = bf0[4];
1298 0 : bf1[5] = bf0[5];
1299 0 : bf1[6] = bf0[6];
1300 0 : bf1[7] = bf0[7];
1301 0 : bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
1302 0 : bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
1303 0 : bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
1304 0 : bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
1305 0 : bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
1306 0 : bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
1307 0 : bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
1308 0 : bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
1309 0 : bf1[16] = bf0[16];
1310 0 : bf1[17] = bf0[17];
1311 0 : bf1[18] = bf0[18];
1312 0 : bf1[19] = bf0[19];
1313 0 : bf1[20] = bf0[20];
1314 0 : bf1[21] = bf0[21];
1315 0 : bf1[22] = bf0[22];
1316 0 : bf1[23] = bf0[23];
1317 0 : bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
1318 0 : bf1[25] = half_btf(-cospi[8], bf0[25], cospi[56], bf0[24], cos_bit[stage]);
1319 0 : bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
1320 0 : bf1[27] = half_btf(-cospi[40], bf0[27], cospi[24], bf0[26], cos_bit[stage]);
1321 0 : bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
1322 0 : bf1[29] = half_btf(cospi[56], bf0[29], cospi[8], bf0[28], cos_bit[stage]);
1323 0 : bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
1324 0 : bf1[31] = half_btf(cospi[24], bf0[31], cospi[40], bf0[30], cos_bit[stage]);
1325 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1326 :
1327 : // stage 7
1328 0 : stage++;
1329 0 : bf0 = step;
1330 0 : bf1 = output;
1331 0 : bf1[0] = bf0[0] + bf0[4];
1332 0 : bf1[1] = bf0[1] + bf0[5];
1333 0 : bf1[2] = bf0[2] + bf0[6];
1334 0 : bf1[3] = bf0[3] + bf0[7];
1335 0 : bf1[4] = -bf0[4] + bf0[0];
1336 0 : bf1[5] = -bf0[5] + bf0[1];
1337 0 : bf1[6] = -bf0[6] + bf0[2];
1338 0 : bf1[7] = -bf0[7] + bf0[3];
1339 0 : bf1[8] = bf0[8] + bf0[12];
1340 0 : bf1[9] = bf0[9] + bf0[13];
1341 0 : bf1[10] = bf0[10] + bf0[14];
1342 0 : bf1[11] = bf0[11] + bf0[15];
1343 0 : bf1[12] = -bf0[12] + bf0[8];
1344 0 : bf1[13] = -bf0[13] + bf0[9];
1345 0 : bf1[14] = -bf0[14] + bf0[10];
1346 0 : bf1[15] = -bf0[15] + bf0[11];
1347 0 : bf1[16] = bf0[16] + bf0[20];
1348 0 : bf1[17] = bf0[17] + bf0[21];
1349 0 : bf1[18] = bf0[18] + bf0[22];
1350 0 : bf1[19] = bf0[19] + bf0[23];
1351 0 : bf1[20] = -bf0[20] + bf0[16];
1352 0 : bf1[21] = -bf0[21] + bf0[17];
1353 0 : bf1[22] = -bf0[22] + bf0[18];
1354 0 : bf1[23] = -bf0[23] + bf0[19];
1355 0 : bf1[24] = bf0[24] + bf0[28];
1356 0 : bf1[25] = bf0[25] + bf0[29];
1357 0 : bf1[26] = bf0[26] + bf0[30];
1358 0 : bf1[27] = bf0[27] + bf0[31];
1359 0 : bf1[28] = -bf0[28] + bf0[24];
1360 0 : bf1[29] = -bf0[29] + bf0[25];
1361 0 : bf1[30] = -bf0[30] + bf0[26];
1362 0 : bf1[31] = -bf0[31] + bf0[27];
1363 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1364 :
1365 : // stage 8
1366 0 : stage++;
1367 0 : cospi = cospi_arr(cos_bit[stage]);
1368 0 : bf0 = output;
1369 0 : bf1 = step;
1370 0 : bf1[0] = bf0[0];
1371 0 : bf1[1] = bf0[1];
1372 0 : bf1[2] = bf0[2];
1373 0 : bf1[3] = bf0[3];
1374 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
1375 0 : bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
1376 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
1377 0 : bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
1378 0 : bf1[8] = bf0[8];
1379 0 : bf1[9] = bf0[9];
1380 0 : bf1[10] = bf0[10];
1381 0 : bf1[11] = bf0[11];
1382 0 : bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
1383 0 : bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
1384 0 : bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
1385 0 : bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
1386 0 : bf1[16] = bf0[16];
1387 0 : bf1[17] = bf0[17];
1388 0 : bf1[18] = bf0[18];
1389 0 : bf1[19] = bf0[19];
1390 0 : bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
1391 0 : bf1[21] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[20], cos_bit[stage]);
1392 0 : bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
1393 0 : bf1[23] = half_btf(cospi[48], bf0[23], cospi[16], bf0[22], cos_bit[stage]);
1394 0 : bf1[24] = bf0[24];
1395 0 : bf1[25] = bf0[25];
1396 0 : bf1[26] = bf0[26];
1397 0 : bf1[27] = bf0[27];
1398 0 : bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
1399 0 : bf1[29] = half_btf(-cospi[16], bf0[29], cospi[48], bf0[28], cos_bit[stage]);
1400 0 : bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
1401 0 : bf1[31] = half_btf(cospi[48], bf0[31], cospi[16], bf0[30], cos_bit[stage]);
1402 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1403 :
1404 : // stage 9
1405 0 : stage++;
1406 0 : bf0 = step;
1407 0 : bf1 = output;
1408 0 : bf1[0] = bf0[0] + bf0[2];
1409 0 : bf1[1] = bf0[1] + bf0[3];
1410 0 : bf1[2] = -bf0[2] + bf0[0];
1411 0 : bf1[3] = -bf0[3] + bf0[1];
1412 0 : bf1[4] = bf0[4] + bf0[6];
1413 0 : bf1[5] = bf0[5] + bf0[7];
1414 0 : bf1[6] = -bf0[6] + bf0[4];
1415 0 : bf1[7] = -bf0[7] + bf0[5];
1416 0 : bf1[8] = bf0[8] + bf0[10];
1417 0 : bf1[9] = bf0[9] + bf0[11];
1418 0 : bf1[10] = -bf0[10] + bf0[8];
1419 0 : bf1[11] = -bf0[11] + bf0[9];
1420 0 : bf1[12] = bf0[12] + bf0[14];
1421 0 : bf1[13] = bf0[13] + bf0[15];
1422 0 : bf1[14] = -bf0[14] + bf0[12];
1423 0 : bf1[15] = -bf0[15] + bf0[13];
1424 0 : bf1[16] = bf0[16] + bf0[18];
1425 0 : bf1[17] = bf0[17] + bf0[19];
1426 0 : bf1[18] = -bf0[18] + bf0[16];
1427 0 : bf1[19] = -bf0[19] + bf0[17];
1428 0 : bf1[20] = bf0[20] + bf0[22];
1429 0 : bf1[21] = bf0[21] + bf0[23];
1430 0 : bf1[22] = -bf0[22] + bf0[20];
1431 0 : bf1[23] = -bf0[23] + bf0[21];
1432 0 : bf1[24] = bf0[24] + bf0[26];
1433 0 : bf1[25] = bf0[25] + bf0[27];
1434 0 : bf1[26] = -bf0[26] + bf0[24];
1435 0 : bf1[27] = -bf0[27] + bf0[25];
1436 0 : bf1[28] = bf0[28] + bf0[30];
1437 0 : bf1[29] = bf0[29] + bf0[31];
1438 0 : bf1[30] = -bf0[30] + bf0[28];
1439 0 : bf1[31] = -bf0[31] + bf0[29];
1440 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1441 :
1442 : // stage 10
1443 0 : stage++;
1444 0 : cospi = cospi_arr(cos_bit[stage]);
1445 0 : bf0 = output;
1446 0 : bf1 = step;
1447 0 : bf1[0] = bf0[0];
1448 0 : bf1[1] = bf0[1];
1449 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
1450 0 : bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
1451 0 : bf1[4] = bf0[4];
1452 0 : bf1[5] = bf0[5];
1453 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
1454 0 : bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
1455 0 : bf1[8] = bf0[8];
1456 0 : bf1[9] = bf0[9];
1457 0 : bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
1458 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
1459 0 : bf1[12] = bf0[12];
1460 0 : bf1[13] = bf0[13];
1461 0 : bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
1462 0 : bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
1463 0 : bf1[16] = bf0[16];
1464 0 : bf1[17] = bf0[17];
1465 0 : bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
1466 0 : bf1[19] = half_btf(-cospi[32], bf0[19], cospi[32], bf0[18], cos_bit[stage]);
1467 0 : bf1[20] = bf0[20];
1468 0 : bf1[21] = bf0[21];
1469 0 : bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
1470 0 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[22], cos_bit[stage]);
1471 0 : bf1[24] = bf0[24];
1472 0 : bf1[25] = bf0[25];
1473 0 : bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
1474 0 : bf1[27] = half_btf(-cospi[32], bf0[27], cospi[32], bf0[26], cos_bit[stage]);
1475 0 : bf1[28] = bf0[28];
1476 0 : bf1[29] = bf0[29];
1477 0 : bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
1478 0 : bf1[31] = half_btf(-cospi[32], bf0[31], cospi[32], bf0[30], cos_bit[stage]);
1479 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1480 :
1481 : // stage 11
1482 0 : stage++;
1483 0 : bf0 = step;
1484 0 : bf1 = output;
1485 0 : bf1[0] = bf0[0];
1486 0 : bf1[1] = -bf0[16];
1487 0 : bf1[2] = bf0[24];
1488 0 : bf1[3] = -bf0[8];
1489 0 : bf1[4] = bf0[12];
1490 0 : bf1[5] = -bf0[28];
1491 0 : bf1[6] = bf0[20];
1492 0 : bf1[7] = -bf0[4];
1493 0 : bf1[8] = bf0[6];
1494 0 : bf1[9] = -bf0[22];
1495 0 : bf1[10] = bf0[30];
1496 0 : bf1[11] = -bf0[14];
1497 0 : bf1[12] = bf0[10];
1498 0 : bf1[13] = -bf0[26];
1499 0 : bf1[14] = bf0[18];
1500 0 : bf1[15] = -bf0[2];
1501 0 : bf1[16] = bf0[3];
1502 0 : bf1[17] = -bf0[19];
1503 0 : bf1[18] = bf0[27];
1504 0 : bf1[19] = -bf0[11];
1505 0 : bf1[20] = bf0[15];
1506 0 : bf1[21] = -bf0[31];
1507 0 : bf1[22] = bf0[23];
1508 0 : bf1[23] = -bf0[7];
1509 0 : bf1[24] = bf0[5];
1510 0 : bf1[25] = -bf0[21];
1511 0 : bf1[26] = bf0[29];
1512 0 : bf1[27] = -bf0[13];
1513 0 : bf1[28] = bf0[9];
1514 0 : bf1[29] = -bf0[25];
1515 0 : bf1[30] = bf0[17];
1516 0 : bf1[31] = -bf0[1];
1517 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1518 0 : }
1519 :
1520 : #if CONFIG_EXT_TX
1521 0 : void av1_fidentity4_c(const int32_t *input, int32_t *output,
1522 : const int8_t *cos_bit, const int8_t *stage_range) {
1523 : (void)cos_bit;
1524 0 : for (int i = 0; i < 4; ++i)
1525 0 : output[i] = (int32_t)dct_const_round_shift(input[i] * Sqrt2);
1526 : range_check(0, input, output, 4, stage_range[0]);
1527 0 : }
1528 :
1529 0 : void av1_fidentity8_c(const int32_t *input, int32_t *output,
1530 : const int8_t *cos_bit, const int8_t *stage_range) {
1531 : (void)cos_bit;
1532 0 : for (int i = 0; i < 8; ++i) output[i] = input[i] * 2;
1533 : range_check(0, input, output, 8, stage_range[0]);
1534 0 : }
1535 :
1536 0 : void av1_fidentity16_c(const int32_t *input, int32_t *output,
1537 : const int8_t *cos_bit, const int8_t *stage_range) {
1538 : (void)cos_bit;
1539 0 : for (int i = 0; i < 16; ++i)
1540 0 : output[i] = (int32_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
1541 : range_check(0, input, output, 16, stage_range[0]);
1542 0 : }
1543 :
1544 0 : void av1_fidentity32_c(const int32_t *input, int32_t *output,
1545 : const int8_t *cos_bit, const int8_t *stage_range) {
1546 : (void)cos_bit;
1547 0 : for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
1548 : range_check(0, input, output, 32, stage_range[0]);
1549 0 : }
1550 : #endif // CONFIG_EXT_TX
1551 :
1552 : #if CONFIG_TX64X64
1553 : void av1_fdct64_new(const int32_t *input, int32_t *output,
1554 : const int8_t *cos_bit, const int8_t *stage_range) {
1555 : const int32_t size = 64;
1556 : const int32_t *cospi;
1557 :
1558 : int32_t stage = 0;
1559 : int32_t *bf0, *bf1;
1560 : int32_t step[64];
1561 :
1562 : // stage 0;
1563 : range_check(stage, input, input, size, stage_range[stage]);
1564 :
1565 : // stage 1;
1566 : stage++;
1567 : cospi = cospi_arr(cos_bit[stage]);
1568 : bf1 = output;
1569 : bf1[0] = input[0] + input[63];
1570 : bf1[1] = input[1] + input[62];
1571 : bf1[2] = input[2] + input[61];
1572 : bf1[3] = input[3] + input[60];
1573 : bf1[4] = input[4] + input[59];
1574 : bf1[5] = input[5] + input[58];
1575 : bf1[6] = input[6] + input[57];
1576 : bf1[7] = input[7] + input[56];
1577 : bf1[8] = input[8] + input[55];
1578 : bf1[9] = input[9] + input[54];
1579 : bf1[10] = input[10] + input[53];
1580 : bf1[11] = input[11] + input[52];
1581 : bf1[12] = input[12] + input[51];
1582 : bf1[13] = input[13] + input[50];
1583 : bf1[14] = input[14] + input[49];
1584 : bf1[15] = input[15] + input[48];
1585 : bf1[16] = input[16] + input[47];
1586 : bf1[17] = input[17] + input[46];
1587 : bf1[18] = input[18] + input[45];
1588 : bf1[19] = input[19] + input[44];
1589 : bf1[20] = input[20] + input[43];
1590 : bf1[21] = input[21] + input[42];
1591 : bf1[22] = input[22] + input[41];
1592 : bf1[23] = input[23] + input[40];
1593 : bf1[24] = input[24] + input[39];
1594 : bf1[25] = input[25] + input[38];
1595 : bf1[26] = input[26] + input[37];
1596 : bf1[27] = input[27] + input[36];
1597 : bf1[28] = input[28] + input[35];
1598 : bf1[29] = input[29] + input[34];
1599 : bf1[30] = input[30] + input[33];
1600 : bf1[31] = input[31] + input[32];
1601 : bf1[32] = -input[32] + input[31];
1602 : bf1[33] = -input[33] + input[30];
1603 : bf1[34] = -input[34] + input[29];
1604 : bf1[35] = -input[35] + input[28];
1605 : bf1[36] = -input[36] + input[27];
1606 : bf1[37] = -input[37] + input[26];
1607 : bf1[38] = -input[38] + input[25];
1608 : bf1[39] = -input[39] + input[24];
1609 : bf1[40] = -input[40] + input[23];
1610 : bf1[41] = -input[41] + input[22];
1611 : bf1[42] = -input[42] + input[21];
1612 : bf1[43] = -input[43] + input[20];
1613 : bf1[44] = -input[44] + input[19];
1614 : bf1[45] = -input[45] + input[18];
1615 : bf1[46] = -input[46] + input[17];
1616 : bf1[47] = -input[47] + input[16];
1617 : bf1[48] = -input[48] + input[15];
1618 : bf1[49] = -input[49] + input[14];
1619 : bf1[50] = -input[50] + input[13];
1620 : bf1[51] = -input[51] + input[12];
1621 : bf1[52] = -input[52] + input[11];
1622 : bf1[53] = -input[53] + input[10];
1623 : bf1[54] = -input[54] + input[9];
1624 : bf1[55] = -input[55] + input[8];
1625 : bf1[56] = -input[56] + input[7];
1626 : bf1[57] = -input[57] + input[6];
1627 : bf1[58] = -input[58] + input[5];
1628 : bf1[59] = -input[59] + input[4];
1629 : bf1[60] = -input[60] + input[3];
1630 : bf1[61] = -input[61] + input[2];
1631 : bf1[62] = -input[62] + input[1];
1632 : bf1[63] = -input[63] + input[0];
1633 : range_check(stage, input, bf1, size, stage_range[stage]);
1634 :
1635 : // stage 2
1636 : stage++;
1637 : cospi = cospi_arr(cos_bit[stage]);
1638 : bf0 = output;
1639 : bf1 = step;
1640 : bf1[0] = bf0[0] + bf0[31];
1641 : bf1[1] = bf0[1] + bf0[30];
1642 : bf1[2] = bf0[2] + bf0[29];
1643 : bf1[3] = bf0[3] + bf0[28];
1644 : bf1[4] = bf0[4] + bf0[27];
1645 : bf1[5] = bf0[5] + bf0[26];
1646 : bf1[6] = bf0[6] + bf0[25];
1647 : bf1[7] = bf0[7] + bf0[24];
1648 : bf1[8] = bf0[8] + bf0[23];
1649 : bf1[9] = bf0[9] + bf0[22];
1650 : bf1[10] = bf0[10] + bf0[21];
1651 : bf1[11] = bf0[11] + bf0[20];
1652 : bf1[12] = bf0[12] + bf0[19];
1653 : bf1[13] = bf0[13] + bf0[18];
1654 : bf1[14] = bf0[14] + bf0[17];
1655 : bf1[15] = bf0[15] + bf0[16];
1656 : bf1[16] = -bf0[16] + bf0[15];
1657 : bf1[17] = -bf0[17] + bf0[14];
1658 : bf1[18] = -bf0[18] + bf0[13];
1659 : bf1[19] = -bf0[19] + bf0[12];
1660 : bf1[20] = -bf0[20] + bf0[11];
1661 : bf1[21] = -bf0[21] + bf0[10];
1662 : bf1[22] = -bf0[22] + bf0[9];
1663 : bf1[23] = -bf0[23] + bf0[8];
1664 : bf1[24] = -bf0[24] + bf0[7];
1665 : bf1[25] = -bf0[25] + bf0[6];
1666 : bf1[26] = -bf0[26] + bf0[5];
1667 : bf1[27] = -bf0[27] + bf0[4];
1668 : bf1[28] = -bf0[28] + bf0[3];
1669 : bf1[29] = -bf0[29] + bf0[2];
1670 : bf1[30] = -bf0[30] + bf0[1];
1671 : bf1[31] = -bf0[31] + bf0[0];
1672 : bf1[32] = bf0[32];
1673 : bf1[33] = bf0[33];
1674 : bf1[34] = bf0[34];
1675 : bf1[35] = bf0[35];
1676 : bf1[36] = bf0[36];
1677 : bf1[37] = bf0[37];
1678 : bf1[38] = bf0[38];
1679 : bf1[39] = bf0[39];
1680 : bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
1681 : bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
1682 : bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
1683 : bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
1684 : bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
1685 : bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
1686 : bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
1687 : bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
1688 : bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit[stage]);
1689 : bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit[stage]);
1690 : bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit[stage]);
1691 : bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit[stage]);
1692 : bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit[stage]);
1693 : bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit[stage]);
1694 : bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit[stage]);
1695 : bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit[stage]);
1696 : bf1[56] = bf0[56];
1697 : bf1[57] = bf0[57];
1698 : bf1[58] = bf0[58];
1699 : bf1[59] = bf0[59];
1700 : bf1[60] = bf0[60];
1701 : bf1[61] = bf0[61];
1702 : bf1[62] = bf0[62];
1703 : bf1[63] = bf0[63];
1704 : range_check(stage, input, bf1, size, stage_range[stage]);
1705 :
1706 : // stage 3
1707 : stage++;
1708 : cospi = cospi_arr(cos_bit[stage]);
1709 : bf0 = step;
1710 : bf1 = output;
1711 : bf1[0] = bf0[0] + bf0[15];
1712 : bf1[1] = bf0[1] + bf0[14];
1713 : bf1[2] = bf0[2] + bf0[13];
1714 : bf1[3] = bf0[3] + bf0[12];
1715 : bf1[4] = bf0[4] + bf0[11];
1716 : bf1[5] = bf0[5] + bf0[10];
1717 : bf1[6] = bf0[6] + bf0[9];
1718 : bf1[7] = bf0[7] + bf0[8];
1719 : bf1[8] = -bf0[8] + bf0[7];
1720 : bf1[9] = -bf0[9] + bf0[6];
1721 : bf1[10] = -bf0[10] + bf0[5];
1722 : bf1[11] = -bf0[11] + bf0[4];
1723 : bf1[12] = -bf0[12] + bf0[3];
1724 : bf1[13] = -bf0[13] + bf0[2];
1725 : bf1[14] = -bf0[14] + bf0[1];
1726 : bf1[15] = -bf0[15] + bf0[0];
1727 : bf1[16] = bf0[16];
1728 : bf1[17] = bf0[17];
1729 : bf1[18] = bf0[18];
1730 : bf1[19] = bf0[19];
1731 : bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
1732 : bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
1733 : bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
1734 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
1735 : bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
1736 : bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
1737 : bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
1738 : bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
1739 : bf1[28] = bf0[28];
1740 : bf1[29] = bf0[29];
1741 : bf1[30] = bf0[30];
1742 : bf1[31] = bf0[31];
1743 : bf1[32] = bf0[32] + bf0[47];
1744 : bf1[33] = bf0[33] + bf0[46];
1745 : bf1[34] = bf0[34] + bf0[45];
1746 : bf1[35] = bf0[35] + bf0[44];
1747 : bf1[36] = bf0[36] + bf0[43];
1748 : bf1[37] = bf0[37] + bf0[42];
1749 : bf1[38] = bf0[38] + bf0[41];
1750 : bf1[39] = bf0[39] + bf0[40];
1751 : bf1[40] = -bf0[40] + bf0[39];
1752 : bf1[41] = -bf0[41] + bf0[38];
1753 : bf1[42] = -bf0[42] + bf0[37];
1754 : bf1[43] = -bf0[43] + bf0[36];
1755 : bf1[44] = -bf0[44] + bf0[35];
1756 : bf1[45] = -bf0[45] + bf0[34];
1757 : bf1[46] = -bf0[46] + bf0[33];
1758 : bf1[47] = -bf0[47] + bf0[32];
1759 : bf1[48] = -bf0[48] + bf0[63];
1760 : bf1[49] = -bf0[49] + bf0[62];
1761 : bf1[50] = -bf0[50] + bf0[61];
1762 : bf1[51] = -bf0[51] + bf0[60];
1763 : bf1[52] = -bf0[52] + bf0[59];
1764 : bf1[53] = -bf0[53] + bf0[58];
1765 : bf1[54] = -bf0[54] + bf0[57];
1766 : bf1[55] = -bf0[55] + bf0[56];
1767 : bf1[56] = bf0[56] + bf0[55];
1768 : bf1[57] = bf0[57] + bf0[54];
1769 : bf1[58] = bf0[58] + bf0[53];
1770 : bf1[59] = bf0[59] + bf0[52];
1771 : bf1[60] = bf0[60] + bf0[51];
1772 : bf1[61] = bf0[61] + bf0[50];
1773 : bf1[62] = bf0[62] + bf0[49];
1774 : bf1[63] = bf0[63] + bf0[48];
1775 : range_check(stage, input, bf1, size, stage_range[stage]);
1776 :
1777 : // stage 4
1778 : stage++;
1779 : cospi = cospi_arr(cos_bit[stage]);
1780 : bf0 = output;
1781 : bf1 = step;
1782 : bf1[0] = bf0[0] + bf0[7];
1783 : bf1[1] = bf0[1] + bf0[6];
1784 : bf1[2] = bf0[2] + bf0[5];
1785 : bf1[3] = bf0[3] + bf0[4];
1786 : bf1[4] = -bf0[4] + bf0[3];
1787 : bf1[5] = -bf0[5] + bf0[2];
1788 : bf1[6] = -bf0[6] + bf0[1];
1789 : bf1[7] = -bf0[7] + bf0[0];
1790 : bf1[8] = bf0[8];
1791 : bf1[9] = bf0[9];
1792 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
1793 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
1794 : bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
1795 : bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
1796 : bf1[14] = bf0[14];
1797 : bf1[15] = bf0[15];
1798 : bf1[16] = bf0[16] + bf0[23];
1799 : bf1[17] = bf0[17] + bf0[22];
1800 : bf1[18] = bf0[18] + bf0[21];
1801 : bf1[19] = bf0[19] + bf0[20];
1802 : bf1[20] = -bf0[20] + bf0[19];
1803 : bf1[21] = -bf0[21] + bf0[18];
1804 : bf1[22] = -bf0[22] + bf0[17];
1805 : bf1[23] = -bf0[23] + bf0[16];
1806 : bf1[24] = -bf0[24] + bf0[31];
1807 : bf1[25] = -bf0[25] + bf0[30];
1808 : bf1[26] = -bf0[26] + bf0[29];
1809 : bf1[27] = -bf0[27] + bf0[28];
1810 : bf1[28] = bf0[28] + bf0[27];
1811 : bf1[29] = bf0[29] + bf0[26];
1812 : bf1[30] = bf0[30] + bf0[25];
1813 : bf1[31] = bf0[31] + bf0[24];
1814 : bf1[32] = bf0[32];
1815 : bf1[33] = bf0[33];
1816 : bf1[34] = bf0[34];
1817 : bf1[35] = bf0[35];
1818 : bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
1819 : bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
1820 : bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
1821 : bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
1822 : bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
1823 : bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
1824 : bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
1825 : bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
1826 : bf1[44] = bf0[44];
1827 : bf1[45] = bf0[45];
1828 : bf1[46] = bf0[46];
1829 : bf1[47] = bf0[47];
1830 : bf1[48] = bf0[48];
1831 : bf1[49] = bf0[49];
1832 : bf1[50] = bf0[50];
1833 : bf1[51] = bf0[51];
1834 : bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit[stage]);
1835 : bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit[stage]);
1836 : bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit[stage]);
1837 : bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit[stage]);
1838 : bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit[stage]);
1839 : bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit[stage]);
1840 : bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit[stage]);
1841 : bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit[stage]);
1842 : bf1[60] = bf0[60];
1843 : bf1[61] = bf0[61];
1844 : bf1[62] = bf0[62];
1845 : bf1[63] = bf0[63];
1846 : range_check(stage, input, bf1, size, stage_range[stage]);
1847 :
1848 : // stage 5
1849 : stage++;
1850 : cospi = cospi_arr(cos_bit[stage]);
1851 : bf0 = step;
1852 : bf1 = output;
1853 : bf1[0] = bf0[0] + bf0[3];
1854 : bf1[1] = bf0[1] + bf0[2];
1855 : bf1[2] = -bf0[2] + bf0[1];
1856 : bf1[3] = -bf0[3] + bf0[0];
1857 : bf1[4] = bf0[4];
1858 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
1859 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
1860 : bf1[7] = bf0[7];
1861 : bf1[8] = bf0[8] + bf0[11];
1862 : bf1[9] = bf0[9] + bf0[10];
1863 : bf1[10] = -bf0[10] + bf0[9];
1864 : bf1[11] = -bf0[11] + bf0[8];
1865 : bf1[12] = -bf0[12] + bf0[15];
1866 : bf1[13] = -bf0[13] + bf0[14];
1867 : bf1[14] = bf0[14] + bf0[13];
1868 : bf1[15] = bf0[15] + bf0[12];
1869 : bf1[16] = bf0[16];
1870 : bf1[17] = bf0[17];
1871 : bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
1872 : bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
1873 : bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
1874 : bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
1875 : bf1[22] = bf0[22];
1876 : bf1[23] = bf0[23];
1877 : bf1[24] = bf0[24];
1878 : bf1[25] = bf0[25];
1879 : bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
1880 : bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
1881 : bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
1882 : bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
1883 : bf1[30] = bf0[30];
1884 : bf1[31] = bf0[31];
1885 : bf1[32] = bf0[32] + bf0[39];
1886 : bf1[33] = bf0[33] + bf0[38];
1887 : bf1[34] = bf0[34] + bf0[37];
1888 : bf1[35] = bf0[35] + bf0[36];
1889 : bf1[36] = -bf0[36] + bf0[35];
1890 : bf1[37] = -bf0[37] + bf0[34];
1891 : bf1[38] = -bf0[38] + bf0[33];
1892 : bf1[39] = -bf0[39] + bf0[32];
1893 : bf1[40] = -bf0[40] + bf0[47];
1894 : bf1[41] = -bf0[41] + bf0[46];
1895 : bf1[42] = -bf0[42] + bf0[45];
1896 : bf1[43] = -bf0[43] + bf0[44];
1897 : bf1[44] = bf0[44] + bf0[43];
1898 : bf1[45] = bf0[45] + bf0[42];
1899 : bf1[46] = bf0[46] + bf0[41];
1900 : bf1[47] = bf0[47] + bf0[40];
1901 : bf1[48] = bf0[48] + bf0[55];
1902 : bf1[49] = bf0[49] + bf0[54];
1903 : bf1[50] = bf0[50] + bf0[53];
1904 : bf1[51] = bf0[51] + bf0[52];
1905 : bf1[52] = -bf0[52] + bf0[51];
1906 : bf1[53] = -bf0[53] + bf0[50];
1907 : bf1[54] = -bf0[54] + bf0[49];
1908 : bf1[55] = -bf0[55] + bf0[48];
1909 : bf1[56] = -bf0[56] + bf0[63];
1910 : bf1[57] = -bf0[57] + bf0[62];
1911 : bf1[58] = -bf0[58] + bf0[61];
1912 : bf1[59] = -bf0[59] + bf0[60];
1913 : bf1[60] = bf0[60] + bf0[59];
1914 : bf1[61] = bf0[61] + bf0[58];
1915 : bf1[62] = bf0[62] + bf0[57];
1916 : bf1[63] = bf0[63] + bf0[56];
1917 : range_check(stage, input, bf1, size, stage_range[stage]);
1918 :
1919 : // stage 6
1920 : stage++;
1921 : cospi = cospi_arr(cos_bit[stage]);
1922 : bf0 = output;
1923 : bf1 = step;
1924 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
1925 : bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
1926 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
1927 : bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
1928 : bf1[4] = bf0[4] + bf0[5];
1929 : bf1[5] = -bf0[5] + bf0[4];
1930 : bf1[6] = -bf0[6] + bf0[7];
1931 : bf1[7] = bf0[7] + bf0[6];
1932 : bf1[8] = bf0[8];
1933 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
1934 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
1935 : bf1[11] = bf0[11];
1936 : bf1[12] = bf0[12];
1937 : bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
1938 : bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
1939 : bf1[15] = bf0[15];
1940 : bf1[16] = bf0[16] + bf0[19];
1941 : bf1[17] = bf0[17] + bf0[18];
1942 : bf1[18] = -bf0[18] + bf0[17];
1943 : bf1[19] = -bf0[19] + bf0[16];
1944 : bf1[20] = -bf0[20] + bf0[23];
1945 : bf1[21] = -bf0[21] + bf0[22];
1946 : bf1[22] = bf0[22] + bf0[21];
1947 : bf1[23] = bf0[23] + bf0[20];
1948 : bf1[24] = bf0[24] + bf0[27];
1949 : bf1[25] = bf0[25] + bf0[26];
1950 : bf1[26] = -bf0[26] + bf0[25];
1951 : bf1[27] = -bf0[27] + bf0[24];
1952 : bf1[28] = -bf0[28] + bf0[31];
1953 : bf1[29] = -bf0[29] + bf0[30];
1954 : bf1[30] = bf0[30] + bf0[29];
1955 : bf1[31] = bf0[31] + bf0[28];
1956 : bf1[32] = bf0[32];
1957 : bf1[33] = bf0[33];
1958 : bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
1959 : bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
1960 : bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
1961 : bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
1962 : bf1[38] = bf0[38];
1963 : bf1[39] = bf0[39];
1964 : bf1[40] = bf0[40];
1965 : bf1[41] = bf0[41];
1966 : bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
1967 : bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
1968 : bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
1969 : bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
1970 : bf1[46] = bf0[46];
1971 : bf1[47] = bf0[47];
1972 : bf1[48] = bf0[48];
1973 : bf1[49] = bf0[49];
1974 : bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit[stage]);
1975 : bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit[stage]);
1976 : bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit[stage]);
1977 : bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit[stage]);
1978 : bf1[54] = bf0[54];
1979 : bf1[55] = bf0[55];
1980 : bf1[56] = bf0[56];
1981 : bf1[57] = bf0[57];
1982 : bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit[stage]);
1983 : bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit[stage]);
1984 : bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit[stage]);
1985 : bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit[stage]);
1986 : bf1[62] = bf0[62];
1987 : bf1[63] = bf0[63];
1988 : range_check(stage, input, bf1, size, stage_range[stage]);
1989 :
1990 : // stage 7
1991 : stage++;
1992 : cospi = cospi_arr(cos_bit[stage]);
1993 : bf0 = step;
1994 : bf1 = output;
1995 : bf1[0] = bf0[0];
1996 : bf1[1] = bf0[1];
1997 : bf1[2] = bf0[2];
1998 : bf1[3] = bf0[3];
1999 : bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
2000 : bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
2001 : bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
2002 : bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
2003 : bf1[8] = bf0[8] + bf0[9];
2004 : bf1[9] = -bf0[9] + bf0[8];
2005 : bf1[10] = -bf0[10] + bf0[11];
2006 : bf1[11] = bf0[11] + bf0[10];
2007 : bf1[12] = bf0[12] + bf0[13];
2008 : bf1[13] = -bf0[13] + bf0[12];
2009 : bf1[14] = -bf0[14] + bf0[15];
2010 : bf1[15] = bf0[15] + bf0[14];
2011 : bf1[16] = bf0[16];
2012 : bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
2013 : bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
2014 : bf1[19] = bf0[19];
2015 : bf1[20] = bf0[20];
2016 : bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
2017 : bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
2018 : bf1[23] = bf0[23];
2019 : bf1[24] = bf0[24];
2020 : bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
2021 : bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
2022 : bf1[27] = bf0[27];
2023 : bf1[28] = bf0[28];
2024 : bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
2025 : bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
2026 : bf1[31] = bf0[31];
2027 : bf1[32] = bf0[32] + bf0[35];
2028 : bf1[33] = bf0[33] + bf0[34];
2029 : bf1[34] = -bf0[34] + bf0[33];
2030 : bf1[35] = -bf0[35] + bf0[32];
2031 : bf1[36] = -bf0[36] + bf0[39];
2032 : bf1[37] = -bf0[37] + bf0[38];
2033 : bf1[38] = bf0[38] + bf0[37];
2034 : bf1[39] = bf0[39] + bf0[36];
2035 : bf1[40] = bf0[40] + bf0[43];
2036 : bf1[41] = bf0[41] + bf0[42];
2037 : bf1[42] = -bf0[42] + bf0[41];
2038 : bf1[43] = -bf0[43] + bf0[40];
2039 : bf1[44] = -bf0[44] + bf0[47];
2040 : bf1[45] = -bf0[45] + bf0[46];
2041 : bf1[46] = bf0[46] + bf0[45];
2042 : bf1[47] = bf0[47] + bf0[44];
2043 : bf1[48] = bf0[48] + bf0[51];
2044 : bf1[49] = bf0[49] + bf0[50];
2045 : bf1[50] = -bf0[50] + bf0[49];
2046 : bf1[51] = -bf0[51] + bf0[48];
2047 : bf1[52] = -bf0[52] + bf0[55];
2048 : bf1[53] = -bf0[53] + bf0[54];
2049 : bf1[54] = bf0[54] + bf0[53];
2050 : bf1[55] = bf0[55] + bf0[52];
2051 : bf1[56] = bf0[56] + bf0[59];
2052 : bf1[57] = bf0[57] + bf0[58];
2053 : bf1[58] = -bf0[58] + bf0[57];
2054 : bf1[59] = -bf0[59] + bf0[56];
2055 : bf1[60] = -bf0[60] + bf0[63];
2056 : bf1[61] = -bf0[61] + bf0[62];
2057 : bf1[62] = bf0[62] + bf0[61];
2058 : bf1[63] = bf0[63] + bf0[60];
2059 : range_check(stage, input, bf1, size, stage_range[stage]);
2060 :
2061 : // stage 8
2062 : stage++;
2063 : cospi = cospi_arr(cos_bit[stage]);
2064 : bf0 = output;
2065 : bf1 = step;
2066 : bf1[0] = bf0[0];
2067 : bf1[1] = bf0[1];
2068 : bf1[2] = bf0[2];
2069 : bf1[3] = bf0[3];
2070 : bf1[4] = bf0[4];
2071 : bf1[5] = bf0[5];
2072 : bf1[6] = bf0[6];
2073 : bf1[7] = bf0[7];
2074 : bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
2075 : bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
2076 : bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
2077 : bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
2078 : bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
2079 : bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
2080 : bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
2081 : bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
2082 : bf1[16] = bf0[16] + bf0[17];
2083 : bf1[17] = -bf0[17] + bf0[16];
2084 : bf1[18] = -bf0[18] + bf0[19];
2085 : bf1[19] = bf0[19] + bf0[18];
2086 : bf1[20] = bf0[20] + bf0[21];
2087 : bf1[21] = -bf0[21] + bf0[20];
2088 : bf1[22] = -bf0[22] + bf0[23];
2089 : bf1[23] = bf0[23] + bf0[22];
2090 : bf1[24] = bf0[24] + bf0[25];
2091 : bf1[25] = -bf0[25] + bf0[24];
2092 : bf1[26] = -bf0[26] + bf0[27];
2093 : bf1[27] = bf0[27] + bf0[26];
2094 : bf1[28] = bf0[28] + bf0[29];
2095 : bf1[29] = -bf0[29] + bf0[28];
2096 : bf1[30] = -bf0[30] + bf0[31];
2097 : bf1[31] = bf0[31] + bf0[30];
2098 : bf1[32] = bf0[32];
2099 : bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
2100 : bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
2101 : bf1[35] = bf0[35];
2102 : bf1[36] = bf0[36];
2103 : bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
2104 : bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
2105 : bf1[39] = bf0[39];
2106 : bf1[40] = bf0[40];
2107 : bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
2108 : bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
2109 : bf1[43] = bf0[43];
2110 : bf1[44] = bf0[44];
2111 : bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
2112 : bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
2113 : bf1[47] = bf0[47];
2114 : bf1[48] = bf0[48];
2115 : bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit[stage]);
2116 : bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit[stage]);
2117 : bf1[51] = bf0[51];
2118 : bf1[52] = bf0[52];
2119 : bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit[stage]);
2120 : bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit[stage]);
2121 : bf1[55] = bf0[55];
2122 : bf1[56] = bf0[56];
2123 : bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit[stage]);
2124 : bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit[stage]);
2125 : bf1[59] = bf0[59];
2126 : bf1[60] = bf0[60];
2127 : bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit[stage]);
2128 : bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit[stage]);
2129 : bf1[63] = bf0[63];
2130 : range_check(stage, input, bf1, size, stage_range[stage]);
2131 :
2132 : // stage 9
2133 : stage++;
2134 : cospi = cospi_arr(cos_bit[stage]);
2135 : bf0 = step;
2136 : bf1 = output;
2137 : bf1[0] = bf0[0];
2138 : bf1[1] = bf0[1];
2139 : bf1[2] = bf0[2];
2140 : bf1[3] = bf0[3];
2141 : bf1[4] = bf0[4];
2142 : bf1[5] = bf0[5];
2143 : bf1[6] = bf0[6];
2144 : bf1[7] = bf0[7];
2145 : bf1[8] = bf0[8];
2146 : bf1[9] = bf0[9];
2147 : bf1[10] = bf0[10];
2148 : bf1[11] = bf0[11];
2149 : bf1[12] = bf0[12];
2150 : bf1[13] = bf0[13];
2151 : bf1[14] = bf0[14];
2152 : bf1[15] = bf0[15];
2153 : bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
2154 : bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
2155 : bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
2156 : bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
2157 : bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
2158 : bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
2159 : bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
2160 : bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
2161 : bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
2162 : bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
2163 : bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
2164 : bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
2165 : bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
2166 : bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
2167 : bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
2168 : bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
2169 : bf1[32] = bf0[32] + bf0[33];
2170 : bf1[33] = -bf0[33] + bf0[32];
2171 : bf1[34] = -bf0[34] + bf0[35];
2172 : bf1[35] = bf0[35] + bf0[34];
2173 : bf1[36] = bf0[36] + bf0[37];
2174 : bf1[37] = -bf0[37] + bf0[36];
2175 : bf1[38] = -bf0[38] + bf0[39];
2176 : bf1[39] = bf0[39] + bf0[38];
2177 : bf1[40] = bf0[40] + bf0[41];
2178 : bf1[41] = -bf0[41] + bf0[40];
2179 : bf1[42] = -bf0[42] + bf0[43];
2180 : bf1[43] = bf0[43] + bf0[42];
2181 : bf1[44] = bf0[44] + bf0[45];
2182 : bf1[45] = -bf0[45] + bf0[44];
2183 : bf1[46] = -bf0[46] + bf0[47];
2184 : bf1[47] = bf0[47] + bf0[46];
2185 : bf1[48] = bf0[48] + bf0[49];
2186 : bf1[49] = -bf0[49] + bf0[48];
2187 : bf1[50] = -bf0[50] + bf0[51];
2188 : bf1[51] = bf0[51] + bf0[50];
2189 : bf1[52] = bf0[52] + bf0[53];
2190 : bf1[53] = -bf0[53] + bf0[52];
2191 : bf1[54] = -bf0[54] + bf0[55];
2192 : bf1[55] = bf0[55] + bf0[54];
2193 : bf1[56] = bf0[56] + bf0[57];
2194 : bf1[57] = -bf0[57] + bf0[56];
2195 : bf1[58] = -bf0[58] + bf0[59];
2196 : bf1[59] = bf0[59] + bf0[58];
2197 : bf1[60] = bf0[60] + bf0[61];
2198 : bf1[61] = -bf0[61] + bf0[60];
2199 : bf1[62] = -bf0[62] + bf0[63];
2200 : bf1[63] = bf0[63] + bf0[62];
2201 : range_check(stage, input, bf1, size, stage_range[stage]);
2202 :
2203 : // stage 10
2204 : stage++;
2205 : cospi = cospi_arr(cos_bit[stage]);
2206 : bf0 = output;
2207 : bf1 = step;
2208 : bf1[0] = bf0[0];
2209 : bf1[1] = bf0[1];
2210 : bf1[2] = bf0[2];
2211 : bf1[3] = bf0[3];
2212 : bf1[4] = bf0[4];
2213 : bf1[5] = bf0[5];
2214 : bf1[6] = bf0[6];
2215 : bf1[7] = bf0[7];
2216 : bf1[8] = bf0[8];
2217 : bf1[9] = bf0[9];
2218 : bf1[10] = bf0[10];
2219 : bf1[11] = bf0[11];
2220 : bf1[12] = bf0[12];
2221 : bf1[13] = bf0[13];
2222 : bf1[14] = bf0[14];
2223 : bf1[15] = bf0[15];
2224 : bf1[16] = bf0[16];
2225 : bf1[17] = bf0[17];
2226 : bf1[18] = bf0[18];
2227 : bf1[19] = bf0[19];
2228 : bf1[20] = bf0[20];
2229 : bf1[21] = bf0[21];
2230 : bf1[22] = bf0[22];
2231 : bf1[23] = bf0[23];
2232 : bf1[24] = bf0[24];
2233 : bf1[25] = bf0[25];
2234 : bf1[26] = bf0[26];
2235 : bf1[27] = bf0[27];
2236 : bf1[28] = bf0[28];
2237 : bf1[29] = bf0[29];
2238 : bf1[30] = bf0[30];
2239 : bf1[31] = bf0[31];
2240 : bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit[stage]);
2241 : bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit[stage]);
2242 : bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit[stage]);
2243 : bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit[stage]);
2244 : bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit[stage]);
2245 : bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit[stage]);
2246 : bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit[stage]);
2247 : bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit[stage]);
2248 : bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit[stage]);
2249 : bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit[stage]);
2250 : bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit[stage]);
2251 : bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit[stage]);
2252 : bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit[stage]);
2253 : bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit[stage]);
2254 : bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit[stage]);
2255 : bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit[stage]);
2256 : bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit[stage]);
2257 : bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit[stage]);
2258 : bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit[stage]);
2259 : bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit[stage]);
2260 : bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit[stage]);
2261 : bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit[stage]);
2262 : bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit[stage]);
2263 : bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit[stage]);
2264 : bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit[stage]);
2265 : bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit[stage]);
2266 : bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit[stage]);
2267 : bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit[stage]);
2268 : bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit[stage]);
2269 : bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit[stage]);
2270 : bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit[stage]);
2271 : bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit[stage]);
2272 : range_check(stage, input, bf1, size, stage_range[stage]);
2273 :
2274 : // stage 11
2275 : stage++;
2276 : cospi = cospi_arr(cos_bit[stage]);
2277 : bf0 = step;
2278 : bf1 = output;
2279 : bf1[0] = bf0[0];
2280 : bf1[1] = bf0[32];
2281 : bf1[2] = bf0[16];
2282 : bf1[3] = bf0[48];
2283 : bf1[4] = bf0[8];
2284 : bf1[5] = bf0[40];
2285 : bf1[6] = bf0[24];
2286 : bf1[7] = bf0[56];
2287 : bf1[8] = bf0[4];
2288 : bf1[9] = bf0[36];
2289 : bf1[10] = bf0[20];
2290 : bf1[11] = bf0[52];
2291 : bf1[12] = bf0[12];
2292 : bf1[13] = bf0[44];
2293 : bf1[14] = bf0[28];
2294 : bf1[15] = bf0[60];
2295 : bf1[16] = bf0[2];
2296 : bf1[17] = bf0[34];
2297 : bf1[18] = bf0[18];
2298 : bf1[19] = bf0[50];
2299 : bf1[20] = bf0[10];
2300 : bf1[21] = bf0[42];
2301 : bf1[22] = bf0[26];
2302 : bf1[23] = bf0[58];
2303 : bf1[24] = bf0[6];
2304 : bf1[25] = bf0[38];
2305 : bf1[26] = bf0[22];
2306 : bf1[27] = bf0[54];
2307 : bf1[28] = bf0[14];
2308 : bf1[29] = bf0[46];
2309 : bf1[30] = bf0[30];
2310 : bf1[31] = bf0[62];
2311 : bf1[32] = bf0[1];
2312 : bf1[33] = bf0[33];
2313 : bf1[34] = bf0[17];
2314 : bf1[35] = bf0[49];
2315 : bf1[36] = bf0[9];
2316 : bf1[37] = bf0[41];
2317 : bf1[38] = bf0[25];
2318 : bf1[39] = bf0[57];
2319 : bf1[40] = bf0[5];
2320 : bf1[41] = bf0[37];
2321 : bf1[42] = bf0[21];
2322 : bf1[43] = bf0[53];
2323 : bf1[44] = bf0[13];
2324 : bf1[45] = bf0[45];
2325 : bf1[46] = bf0[29];
2326 : bf1[47] = bf0[61];
2327 : bf1[48] = bf0[3];
2328 : bf1[49] = bf0[35];
2329 : bf1[50] = bf0[19];
2330 : bf1[51] = bf0[51];
2331 : bf1[52] = bf0[11];
2332 : bf1[53] = bf0[43];
2333 : bf1[54] = bf0[27];
2334 : bf1[55] = bf0[59];
2335 : bf1[56] = bf0[7];
2336 : bf1[57] = bf0[39];
2337 : bf1[58] = bf0[23];
2338 : bf1[59] = bf0[55];
2339 : bf1[60] = bf0[15];
2340 : bf1[61] = bf0[47];
2341 : bf1[62] = bf0[31];
2342 : bf1[63] = bf0[63];
2343 : range_check(stage, input, bf1, size, stage_range[stage]);
2344 : }
2345 : #endif // CONFIG_TX64X64
|