Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <stdlib.h>
13 : #include "aom_dsp/inv_txfm.h"
14 : #include "av1/common/av1_inv_txfm1d.h"
15 : #if CONFIG_COEFFICIENT_RANGE_CHECKING
16 :
17 : void range_check_func(int32_t stage, const int32_t *input, const int32_t *buf,
18 : int32_t size, int8_t bit) {
19 : const int64_t maxValue = (1LL << (bit - 1)) - 1;
20 : const int64_t minValue = -(1LL << (bit - 1));
21 :
22 : for (int i = 0; i < size; ++i) {
23 : if (buf[i] < minValue || buf[i] > maxValue) {
24 : fprintf(stderr, "Error: coeffs contain out-of-range values\n");
25 : fprintf(stderr, "stage: %d\n", stage);
26 : fprintf(stderr, "node: %d\n", i);
27 : fprintf(stderr, "allowed range: [%" PRId64 ";%" PRId64 "]\n", minValue,
28 : maxValue);
29 : fprintf(stderr, "coeffs: ");
30 :
31 : fprintf(stderr, "[");
32 : for (int j = 0; j < size; j++) {
33 : if (j > 0) fprintf(stderr, ", ");
34 : fprintf(stderr, "%d", input[j]);
35 : }
36 : fprintf(stderr, "]\n");
37 : assert(0);
38 : }
39 : }
40 : }
41 :
42 : #define range_check(stage, input, buf, size, bit) \
43 : range_check_func(stage, input, buf, size, bit)
44 : #else
45 : #define range_check(stage, input, buf, size, bit) \
46 : { \
47 : (void)stage; \
48 : (void)input; \
49 : (void)buf; \
50 : (void)size; \
51 : (void)bit; \
52 : }
53 : #endif
54 :
55 : // TODO(angiebird): Make 1-d txfm functions static
56 0 : void av1_idct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
57 : const int8_t *stage_range) {
58 0 : const int32_t size = 4;
59 : const int32_t *cospi;
60 :
61 0 : int32_t stage = 0;
62 : int32_t *bf0, *bf1;
63 : int32_t step[4];
64 :
65 : // stage 0;
66 0 : range_check(stage, input, input, size, stage_range[stage]);
67 :
68 : // stage 1;
69 0 : stage++;
70 0 : assert(output != input);
71 0 : bf1 = output;
72 0 : bf1[0] = input[0];
73 0 : bf1[1] = input[2];
74 0 : bf1[2] = input[1];
75 0 : bf1[3] = input[3];
76 0 : range_check(stage, input, bf1, size, stage_range[stage]);
77 :
78 : // stage 2
79 0 : stage++;
80 0 : cospi = cospi_arr(cos_bit[stage]);
81 0 : bf0 = output;
82 0 : bf1 = step;
83 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
84 0 : bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
85 0 : bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
86 0 : bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
87 0 : range_check(stage, input, bf1, size, stage_range[stage]);
88 :
89 : // stage 3
90 0 : stage++;
91 0 : bf0 = step;
92 0 : bf1 = output;
93 0 : bf1[0] = bf0[0] + bf0[3];
94 0 : bf1[1] = bf0[1] + bf0[2];
95 0 : bf1[2] = bf0[1] - bf0[2];
96 0 : bf1[3] = bf0[0] - bf0[3];
97 0 : range_check(stage, input, bf1, size, stage_range[stage]);
98 0 : }
99 :
100 0 : void av1_idct8_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
101 : const int8_t *stage_range) {
102 0 : const int32_t size = 8;
103 : const int32_t *cospi;
104 :
105 0 : int32_t stage = 0;
106 : int32_t *bf0, *bf1;
107 : int32_t step[8];
108 :
109 : // stage 0;
110 0 : range_check(stage, input, input, size, stage_range[stage]);
111 :
112 : // stage 1;
113 0 : stage++;
114 0 : assert(output != input);
115 0 : bf1 = output;
116 0 : bf1[0] = input[0];
117 0 : bf1[1] = input[4];
118 0 : bf1[2] = input[2];
119 0 : bf1[3] = input[6];
120 0 : bf1[4] = input[1];
121 0 : bf1[5] = input[5];
122 0 : bf1[6] = input[3];
123 0 : bf1[7] = input[7];
124 0 : range_check(stage, input, bf1, size, stage_range[stage]);
125 :
126 : // stage 2
127 0 : stage++;
128 0 : cospi = cospi_arr(cos_bit[stage]);
129 0 : bf0 = output;
130 0 : bf1 = step;
131 0 : bf1[0] = bf0[0];
132 0 : bf1[1] = bf0[1];
133 0 : bf1[2] = bf0[2];
134 0 : bf1[3] = bf0[3];
135 0 : bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
136 0 : bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
137 0 : bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
138 0 : bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
139 0 : range_check(stage, input, bf1, size, stage_range[stage]);
140 :
141 : // stage 3
142 0 : stage++;
143 0 : cospi = cospi_arr(cos_bit[stage]);
144 0 : bf0 = step;
145 0 : bf1 = output;
146 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
147 0 : bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
148 0 : bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
149 0 : bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
150 0 : bf1[4] = bf0[4] + bf0[5];
151 0 : bf1[5] = bf0[4] - bf0[5];
152 0 : bf1[6] = -bf0[6] + bf0[7];
153 0 : bf1[7] = bf0[6] + bf0[7];
154 0 : range_check(stage, input, bf1, size, stage_range[stage]);
155 :
156 : // stage 4
157 0 : stage++;
158 0 : cospi = cospi_arr(cos_bit[stage]);
159 0 : bf0 = output;
160 0 : bf1 = step;
161 0 : bf1[0] = bf0[0] + bf0[3];
162 0 : bf1[1] = bf0[1] + bf0[2];
163 0 : bf1[2] = bf0[1] - bf0[2];
164 0 : bf1[3] = bf0[0] - bf0[3];
165 0 : bf1[4] = bf0[4];
166 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
167 0 : bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
168 0 : bf1[7] = bf0[7];
169 0 : range_check(stage, input, bf1, size, stage_range[stage]);
170 :
171 : // stage 5
172 0 : stage++;
173 0 : bf0 = step;
174 0 : bf1 = output;
175 0 : bf1[0] = bf0[0] + bf0[7];
176 0 : bf1[1] = bf0[1] + bf0[6];
177 0 : bf1[2] = bf0[2] + bf0[5];
178 0 : bf1[3] = bf0[3] + bf0[4];
179 0 : bf1[4] = bf0[3] - bf0[4];
180 0 : bf1[5] = bf0[2] - bf0[5];
181 0 : bf1[6] = bf0[1] - bf0[6];
182 0 : bf1[7] = bf0[0] - bf0[7];
183 0 : range_check(stage, input, bf1, size, stage_range[stage]);
184 0 : }
185 :
186 0 : void av1_idct16_new(const int32_t *input, int32_t *output,
187 : const int8_t *cos_bit, const int8_t *stage_range) {
188 0 : const int32_t size = 16;
189 : const int32_t *cospi;
190 :
191 0 : int32_t stage = 0;
192 : int32_t *bf0, *bf1;
193 : int32_t step[16];
194 :
195 : // stage 0;
196 0 : range_check(stage, input, input, size, stage_range[stage]);
197 :
198 : // stage 1;
199 0 : stage++;
200 0 : assert(output != input);
201 0 : bf1 = output;
202 0 : bf1[0] = input[0];
203 0 : bf1[1] = input[8];
204 0 : bf1[2] = input[4];
205 0 : bf1[3] = input[12];
206 0 : bf1[4] = input[2];
207 0 : bf1[5] = input[10];
208 0 : bf1[6] = input[6];
209 0 : bf1[7] = input[14];
210 0 : bf1[8] = input[1];
211 0 : bf1[9] = input[9];
212 0 : bf1[10] = input[5];
213 0 : bf1[11] = input[13];
214 0 : bf1[12] = input[3];
215 0 : bf1[13] = input[11];
216 0 : bf1[14] = input[7];
217 0 : bf1[15] = input[15];
218 0 : range_check(stage, input, bf1, size, stage_range[stage]);
219 :
220 : // stage 2
221 0 : stage++;
222 0 : cospi = cospi_arr(cos_bit[stage]);
223 0 : bf0 = output;
224 0 : bf1 = step;
225 0 : bf1[0] = bf0[0];
226 0 : bf1[1] = bf0[1];
227 0 : bf1[2] = bf0[2];
228 0 : bf1[3] = bf0[3];
229 0 : bf1[4] = bf0[4];
230 0 : bf1[5] = bf0[5];
231 0 : bf1[6] = bf0[6];
232 0 : bf1[7] = bf0[7];
233 0 : bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
234 0 : bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
235 0 : bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
236 0 : bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
237 0 : bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
238 0 : bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
239 0 : bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
240 0 : bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
241 0 : range_check(stage, input, bf1, size, stage_range[stage]);
242 :
243 : // stage 3
244 0 : stage++;
245 0 : cospi = cospi_arr(cos_bit[stage]);
246 0 : bf0 = step;
247 0 : bf1 = output;
248 0 : bf1[0] = bf0[0];
249 0 : bf1[1] = bf0[1];
250 0 : bf1[2] = bf0[2];
251 0 : bf1[3] = bf0[3];
252 0 : bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
253 0 : bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
254 0 : bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
255 0 : bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
256 0 : bf1[8] = bf0[8] + bf0[9];
257 0 : bf1[9] = bf0[8] - bf0[9];
258 0 : bf1[10] = -bf0[10] + bf0[11];
259 0 : bf1[11] = bf0[10] + bf0[11];
260 0 : bf1[12] = bf0[12] + bf0[13];
261 0 : bf1[13] = bf0[12] - bf0[13];
262 0 : bf1[14] = -bf0[14] + bf0[15];
263 0 : bf1[15] = bf0[14] + bf0[15];
264 0 : range_check(stage, input, bf1, size, stage_range[stage]);
265 :
266 : // stage 4
267 0 : stage++;
268 0 : cospi = cospi_arr(cos_bit[stage]);
269 0 : bf0 = output;
270 0 : bf1 = step;
271 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
272 0 : bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
273 0 : bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
274 0 : bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
275 0 : bf1[4] = bf0[4] + bf0[5];
276 0 : bf1[5] = bf0[4] - bf0[5];
277 0 : bf1[6] = -bf0[6] + bf0[7];
278 0 : bf1[7] = bf0[6] + bf0[7];
279 0 : bf1[8] = bf0[8];
280 0 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
281 0 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
282 0 : bf1[11] = bf0[11];
283 0 : bf1[12] = bf0[12];
284 0 : bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
285 0 : bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
286 0 : bf1[15] = bf0[15];
287 0 : range_check(stage, input, bf1, size, stage_range[stage]);
288 :
289 : // stage 5
290 0 : stage++;
291 0 : cospi = cospi_arr(cos_bit[stage]);
292 0 : bf0 = step;
293 0 : bf1 = output;
294 0 : bf1[0] = bf0[0] + bf0[3];
295 0 : bf1[1] = bf0[1] + bf0[2];
296 0 : bf1[2] = bf0[1] - bf0[2];
297 0 : bf1[3] = bf0[0] - bf0[3];
298 0 : bf1[4] = bf0[4];
299 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
300 0 : bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
301 0 : bf1[7] = bf0[7];
302 0 : bf1[8] = bf0[8] + bf0[11];
303 0 : bf1[9] = bf0[9] + bf0[10];
304 0 : bf1[10] = bf0[9] - bf0[10];
305 0 : bf1[11] = bf0[8] - bf0[11];
306 0 : bf1[12] = -bf0[12] + bf0[15];
307 0 : bf1[13] = -bf0[13] + bf0[14];
308 0 : bf1[14] = bf0[13] + bf0[14];
309 0 : bf1[15] = bf0[12] + bf0[15];
310 0 : range_check(stage, input, bf1, size, stage_range[stage]);
311 :
312 : // stage 6
313 0 : stage++;
314 0 : cospi = cospi_arr(cos_bit[stage]);
315 0 : bf0 = output;
316 0 : bf1 = step;
317 0 : bf1[0] = bf0[0] + bf0[7];
318 0 : bf1[1] = bf0[1] + bf0[6];
319 0 : bf1[2] = bf0[2] + bf0[5];
320 0 : bf1[3] = bf0[3] + bf0[4];
321 0 : bf1[4] = bf0[3] - bf0[4];
322 0 : bf1[5] = bf0[2] - bf0[5];
323 0 : bf1[6] = bf0[1] - bf0[6];
324 0 : bf1[7] = bf0[0] - bf0[7];
325 0 : bf1[8] = bf0[8];
326 0 : bf1[9] = bf0[9];
327 0 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
328 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
329 0 : bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
330 0 : bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
331 0 : bf1[14] = bf0[14];
332 0 : bf1[15] = bf0[15];
333 0 : range_check(stage, input, bf1, size, stage_range[stage]);
334 :
335 : // stage 7
336 0 : stage++;
337 0 : bf0 = step;
338 0 : bf1 = output;
339 0 : bf1[0] = bf0[0] + bf0[15];
340 0 : bf1[1] = bf0[1] + bf0[14];
341 0 : bf1[2] = bf0[2] + bf0[13];
342 0 : bf1[3] = bf0[3] + bf0[12];
343 0 : bf1[4] = bf0[4] + bf0[11];
344 0 : bf1[5] = bf0[5] + bf0[10];
345 0 : bf1[6] = bf0[6] + bf0[9];
346 0 : bf1[7] = bf0[7] + bf0[8];
347 0 : bf1[8] = bf0[7] - bf0[8];
348 0 : bf1[9] = bf0[6] - bf0[9];
349 0 : bf1[10] = bf0[5] - bf0[10];
350 0 : bf1[11] = bf0[4] - bf0[11];
351 0 : bf1[12] = bf0[3] - bf0[12];
352 0 : bf1[13] = bf0[2] - bf0[13];
353 0 : bf1[14] = bf0[1] - bf0[14];
354 0 : bf1[15] = bf0[0] - bf0[15];
355 0 : range_check(stage, input, bf1, size, stage_range[stage]);
356 0 : }
357 :
358 0 : void av1_idct32_new(const int32_t *input, int32_t *output,
359 : const int8_t *cos_bit, const int8_t *stage_range) {
360 0 : const int32_t size = 32;
361 : const int32_t *cospi;
362 :
363 0 : int32_t stage = 0;
364 : int32_t *bf0, *bf1;
365 : int32_t step[32];
366 :
367 : // stage 0;
368 0 : range_check(stage, input, input, size, stage_range[stage]);
369 :
370 : // stage 1;
371 0 : stage++;
372 0 : assert(output != input);
373 0 : bf1 = output;
374 0 : bf1[0] = input[0];
375 0 : bf1[1] = input[16];
376 0 : bf1[2] = input[8];
377 0 : bf1[3] = input[24];
378 0 : bf1[4] = input[4];
379 0 : bf1[5] = input[20];
380 0 : bf1[6] = input[12];
381 0 : bf1[7] = input[28];
382 0 : bf1[8] = input[2];
383 0 : bf1[9] = input[18];
384 0 : bf1[10] = input[10];
385 0 : bf1[11] = input[26];
386 0 : bf1[12] = input[6];
387 0 : bf1[13] = input[22];
388 0 : bf1[14] = input[14];
389 0 : bf1[15] = input[30];
390 0 : bf1[16] = input[1];
391 0 : bf1[17] = input[17];
392 0 : bf1[18] = input[9];
393 0 : bf1[19] = input[25];
394 0 : bf1[20] = input[5];
395 0 : bf1[21] = input[21];
396 0 : bf1[22] = input[13];
397 0 : bf1[23] = input[29];
398 0 : bf1[24] = input[3];
399 0 : bf1[25] = input[19];
400 0 : bf1[26] = input[11];
401 0 : bf1[27] = input[27];
402 0 : bf1[28] = input[7];
403 0 : bf1[29] = input[23];
404 0 : bf1[30] = input[15];
405 0 : bf1[31] = input[31];
406 0 : range_check(stage, input, bf1, size, stage_range[stage]);
407 :
408 : // stage 2
409 0 : stage++;
410 0 : cospi = cospi_arr(cos_bit[stage]);
411 0 : bf0 = output;
412 0 : bf1 = step;
413 0 : bf1[0] = bf0[0];
414 0 : bf1[1] = bf0[1];
415 0 : bf1[2] = bf0[2];
416 0 : bf1[3] = bf0[3];
417 0 : bf1[4] = bf0[4];
418 0 : bf1[5] = bf0[5];
419 0 : bf1[6] = bf0[6];
420 0 : bf1[7] = bf0[7];
421 0 : bf1[8] = bf0[8];
422 0 : bf1[9] = bf0[9];
423 0 : bf1[10] = bf0[10];
424 0 : bf1[11] = bf0[11];
425 0 : bf1[12] = bf0[12];
426 0 : bf1[13] = bf0[13];
427 0 : bf1[14] = bf0[14];
428 0 : bf1[15] = bf0[15];
429 0 : bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
430 0 : bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
431 0 : bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
432 0 : bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
433 0 : bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
434 0 : bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
435 0 : bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
436 0 : bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
437 0 : bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
438 0 : bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
439 0 : bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
440 0 : bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
441 0 : bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
442 0 : bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
443 0 : bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
444 0 : bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
445 0 : range_check(stage, input, bf1, size, stage_range[stage]);
446 :
447 : // stage 3
448 0 : stage++;
449 0 : cospi = cospi_arr(cos_bit[stage]);
450 0 : bf0 = step;
451 0 : bf1 = output;
452 0 : bf1[0] = bf0[0];
453 0 : bf1[1] = bf0[1];
454 0 : bf1[2] = bf0[2];
455 0 : bf1[3] = bf0[3];
456 0 : bf1[4] = bf0[4];
457 0 : bf1[5] = bf0[5];
458 0 : bf1[6] = bf0[6];
459 0 : bf1[7] = bf0[7];
460 0 : bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
461 0 : bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
462 0 : bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
463 0 : bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
464 0 : bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
465 0 : bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
466 0 : bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
467 0 : bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
468 0 : bf1[16] = bf0[16] + bf0[17];
469 0 : bf1[17] = bf0[16] - bf0[17];
470 0 : bf1[18] = -bf0[18] + bf0[19];
471 0 : bf1[19] = bf0[18] + bf0[19];
472 0 : bf1[20] = bf0[20] + bf0[21];
473 0 : bf1[21] = bf0[20] - bf0[21];
474 0 : bf1[22] = -bf0[22] + bf0[23];
475 0 : bf1[23] = bf0[22] + bf0[23];
476 0 : bf1[24] = bf0[24] + bf0[25];
477 0 : bf1[25] = bf0[24] - bf0[25];
478 0 : bf1[26] = -bf0[26] + bf0[27];
479 0 : bf1[27] = bf0[26] + bf0[27];
480 0 : bf1[28] = bf0[28] + bf0[29];
481 0 : bf1[29] = bf0[28] - bf0[29];
482 0 : bf1[30] = -bf0[30] + bf0[31];
483 0 : bf1[31] = bf0[30] + bf0[31];
484 0 : range_check(stage, input, bf1, size, stage_range[stage]);
485 :
486 : // stage 4
487 0 : stage++;
488 0 : cospi = cospi_arr(cos_bit[stage]);
489 0 : bf0 = output;
490 0 : bf1 = step;
491 0 : bf1[0] = bf0[0];
492 0 : bf1[1] = bf0[1];
493 0 : bf1[2] = bf0[2];
494 0 : bf1[3] = bf0[3];
495 0 : bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
496 0 : bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
497 0 : bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
498 0 : bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
499 0 : bf1[8] = bf0[8] + bf0[9];
500 0 : bf1[9] = bf0[8] - bf0[9];
501 0 : bf1[10] = -bf0[10] + bf0[11];
502 0 : bf1[11] = bf0[10] + bf0[11];
503 0 : bf1[12] = bf0[12] + bf0[13];
504 0 : bf1[13] = bf0[12] - bf0[13];
505 0 : bf1[14] = -bf0[14] + bf0[15];
506 0 : bf1[15] = bf0[14] + bf0[15];
507 0 : bf1[16] = bf0[16];
508 0 : bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
509 0 : bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
510 0 : bf1[19] = bf0[19];
511 0 : bf1[20] = bf0[20];
512 0 : bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
513 0 : bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
514 0 : bf1[23] = bf0[23];
515 0 : bf1[24] = bf0[24];
516 0 : bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
517 0 : bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
518 0 : bf1[27] = bf0[27];
519 0 : bf1[28] = bf0[28];
520 0 : bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
521 0 : bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
522 0 : bf1[31] = bf0[31];
523 0 : range_check(stage, input, bf1, size, stage_range[stage]);
524 :
525 : // stage 5
526 0 : stage++;
527 0 : cospi = cospi_arr(cos_bit[stage]);
528 0 : bf0 = step;
529 0 : bf1 = output;
530 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
531 0 : bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
532 0 : bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
533 0 : bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
534 0 : bf1[4] = bf0[4] + bf0[5];
535 0 : bf1[5] = bf0[4] - bf0[5];
536 0 : bf1[6] = -bf0[6] + bf0[7];
537 0 : bf1[7] = bf0[6] + bf0[7];
538 0 : bf1[8] = bf0[8];
539 0 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
540 0 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
541 0 : bf1[11] = bf0[11];
542 0 : bf1[12] = bf0[12];
543 0 : bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
544 0 : bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
545 0 : bf1[15] = bf0[15];
546 0 : bf1[16] = bf0[16] + bf0[19];
547 0 : bf1[17] = bf0[17] + bf0[18];
548 0 : bf1[18] = bf0[17] - bf0[18];
549 0 : bf1[19] = bf0[16] - bf0[19];
550 0 : bf1[20] = -bf0[20] + bf0[23];
551 0 : bf1[21] = -bf0[21] + bf0[22];
552 0 : bf1[22] = bf0[21] + bf0[22];
553 0 : bf1[23] = bf0[20] + bf0[23];
554 0 : bf1[24] = bf0[24] + bf0[27];
555 0 : bf1[25] = bf0[25] + bf0[26];
556 0 : bf1[26] = bf0[25] - bf0[26];
557 0 : bf1[27] = bf0[24] - bf0[27];
558 0 : bf1[28] = -bf0[28] + bf0[31];
559 0 : bf1[29] = -bf0[29] + bf0[30];
560 0 : bf1[30] = bf0[29] + bf0[30];
561 0 : bf1[31] = bf0[28] + bf0[31];
562 0 : range_check(stage, input, bf1, size, stage_range[stage]);
563 :
564 : // stage 6
565 0 : stage++;
566 0 : cospi = cospi_arr(cos_bit[stage]);
567 0 : bf0 = output;
568 0 : bf1 = step;
569 0 : bf1[0] = bf0[0] + bf0[3];
570 0 : bf1[1] = bf0[1] + bf0[2];
571 0 : bf1[2] = bf0[1] - bf0[2];
572 0 : bf1[3] = bf0[0] - bf0[3];
573 0 : bf1[4] = bf0[4];
574 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
575 0 : bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
576 0 : bf1[7] = bf0[7];
577 0 : bf1[8] = bf0[8] + bf0[11];
578 0 : bf1[9] = bf0[9] + bf0[10];
579 0 : bf1[10] = bf0[9] - bf0[10];
580 0 : bf1[11] = bf0[8] - bf0[11];
581 0 : bf1[12] = -bf0[12] + bf0[15];
582 0 : bf1[13] = -bf0[13] + bf0[14];
583 0 : bf1[14] = bf0[13] + bf0[14];
584 0 : bf1[15] = bf0[12] + bf0[15];
585 0 : bf1[16] = bf0[16];
586 0 : bf1[17] = bf0[17];
587 0 : bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
588 0 : bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
589 0 : bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
590 0 : bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
591 0 : bf1[22] = bf0[22];
592 0 : bf1[23] = bf0[23];
593 0 : bf1[24] = bf0[24];
594 0 : bf1[25] = bf0[25];
595 0 : bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
596 0 : bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
597 0 : bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
598 0 : bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
599 0 : bf1[30] = bf0[30];
600 0 : bf1[31] = bf0[31];
601 0 : range_check(stage, input, bf1, size, stage_range[stage]);
602 :
603 : // stage 7
604 0 : stage++;
605 0 : cospi = cospi_arr(cos_bit[stage]);
606 0 : bf0 = step;
607 0 : bf1 = output;
608 0 : bf1[0] = bf0[0] + bf0[7];
609 0 : bf1[1] = bf0[1] + bf0[6];
610 0 : bf1[2] = bf0[2] + bf0[5];
611 0 : bf1[3] = bf0[3] + bf0[4];
612 0 : bf1[4] = bf0[3] - bf0[4];
613 0 : bf1[5] = bf0[2] - bf0[5];
614 0 : bf1[6] = bf0[1] - bf0[6];
615 0 : bf1[7] = bf0[0] - bf0[7];
616 0 : bf1[8] = bf0[8];
617 0 : bf1[9] = bf0[9];
618 0 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
619 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
620 0 : bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
621 0 : bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
622 0 : bf1[14] = bf0[14];
623 0 : bf1[15] = bf0[15];
624 0 : bf1[16] = bf0[16] + bf0[23];
625 0 : bf1[17] = bf0[17] + bf0[22];
626 0 : bf1[18] = bf0[18] + bf0[21];
627 0 : bf1[19] = bf0[19] + bf0[20];
628 0 : bf1[20] = bf0[19] - bf0[20];
629 0 : bf1[21] = bf0[18] - bf0[21];
630 0 : bf1[22] = bf0[17] - bf0[22];
631 0 : bf1[23] = bf0[16] - bf0[23];
632 0 : bf1[24] = -bf0[24] + bf0[31];
633 0 : bf1[25] = -bf0[25] + bf0[30];
634 0 : bf1[26] = -bf0[26] + bf0[29];
635 0 : bf1[27] = -bf0[27] + bf0[28];
636 0 : bf1[28] = bf0[27] + bf0[28];
637 0 : bf1[29] = bf0[26] + bf0[29];
638 0 : bf1[30] = bf0[25] + bf0[30];
639 0 : bf1[31] = bf0[24] + bf0[31];
640 0 : range_check(stage, input, bf1, size, stage_range[stage]);
641 :
642 : // stage 8
643 0 : stage++;
644 0 : cospi = cospi_arr(cos_bit[stage]);
645 0 : bf0 = output;
646 0 : bf1 = step;
647 0 : bf1[0] = bf0[0] + bf0[15];
648 0 : bf1[1] = bf0[1] + bf0[14];
649 0 : bf1[2] = bf0[2] + bf0[13];
650 0 : bf1[3] = bf0[3] + bf0[12];
651 0 : bf1[4] = bf0[4] + bf0[11];
652 0 : bf1[5] = bf0[5] + bf0[10];
653 0 : bf1[6] = bf0[6] + bf0[9];
654 0 : bf1[7] = bf0[7] + bf0[8];
655 0 : bf1[8] = bf0[7] - bf0[8];
656 0 : bf1[9] = bf0[6] - bf0[9];
657 0 : bf1[10] = bf0[5] - bf0[10];
658 0 : bf1[11] = bf0[4] - bf0[11];
659 0 : bf1[12] = bf0[3] - bf0[12];
660 0 : bf1[13] = bf0[2] - bf0[13];
661 0 : bf1[14] = bf0[1] - bf0[14];
662 0 : bf1[15] = bf0[0] - bf0[15];
663 0 : bf1[16] = bf0[16];
664 0 : bf1[17] = bf0[17];
665 0 : bf1[18] = bf0[18];
666 0 : bf1[19] = bf0[19];
667 0 : bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
668 0 : bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
669 0 : bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
670 0 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
671 0 : bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
672 0 : bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
673 0 : bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
674 0 : bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
675 0 : bf1[28] = bf0[28];
676 0 : bf1[29] = bf0[29];
677 0 : bf1[30] = bf0[30];
678 0 : bf1[31] = bf0[31];
679 0 : range_check(stage, input, bf1, size, stage_range[stage]);
680 :
681 : // stage 9
682 0 : stage++;
683 0 : bf0 = step;
684 0 : bf1 = output;
685 0 : bf1[0] = bf0[0] + bf0[31];
686 0 : bf1[1] = bf0[1] + bf0[30];
687 0 : bf1[2] = bf0[2] + bf0[29];
688 0 : bf1[3] = bf0[3] + bf0[28];
689 0 : bf1[4] = bf0[4] + bf0[27];
690 0 : bf1[5] = bf0[5] + bf0[26];
691 0 : bf1[6] = bf0[6] + bf0[25];
692 0 : bf1[7] = bf0[7] + bf0[24];
693 0 : bf1[8] = bf0[8] + bf0[23];
694 0 : bf1[9] = bf0[9] + bf0[22];
695 0 : bf1[10] = bf0[10] + bf0[21];
696 0 : bf1[11] = bf0[11] + bf0[20];
697 0 : bf1[12] = bf0[12] + bf0[19];
698 0 : bf1[13] = bf0[13] + bf0[18];
699 0 : bf1[14] = bf0[14] + bf0[17];
700 0 : bf1[15] = bf0[15] + bf0[16];
701 0 : bf1[16] = bf0[15] - bf0[16];
702 0 : bf1[17] = bf0[14] - bf0[17];
703 0 : bf1[18] = bf0[13] - bf0[18];
704 0 : bf1[19] = bf0[12] - bf0[19];
705 0 : bf1[20] = bf0[11] - bf0[20];
706 0 : bf1[21] = bf0[10] - bf0[21];
707 0 : bf1[22] = bf0[9] - bf0[22];
708 0 : bf1[23] = bf0[8] - bf0[23];
709 0 : bf1[24] = bf0[7] - bf0[24];
710 0 : bf1[25] = bf0[6] - bf0[25];
711 0 : bf1[26] = bf0[5] - bf0[26];
712 0 : bf1[27] = bf0[4] - bf0[27];
713 0 : bf1[28] = bf0[3] - bf0[28];
714 0 : bf1[29] = bf0[2] - bf0[29];
715 0 : bf1[30] = bf0[1] - bf0[30];
716 0 : bf1[31] = bf0[0] - bf0[31];
717 0 : range_check(stage, input, bf1, size, stage_range[stage]);
718 0 : }
719 :
720 0 : void av1_iadst4_new(const int32_t *input, int32_t *output,
721 : const int8_t *cos_bit, const int8_t *stage_range) {
722 0 : const int32_t size = 4;
723 : const int32_t *cospi;
724 :
725 0 : int32_t stage = 0;
726 : int32_t *bf0, *bf1;
727 : int32_t step[4];
728 :
729 : // stage 0;
730 0 : range_check(stage, input, input, size, stage_range[stage]);
731 :
732 : // stage 1;
733 0 : stage++;
734 0 : assert(output != input);
735 0 : bf1 = output;
736 0 : bf1[0] = input[0];
737 0 : bf1[1] = -input[3];
738 0 : bf1[2] = -input[1];
739 0 : bf1[3] = input[2];
740 0 : range_check(stage, input, bf1, size, stage_range[stage]);
741 :
742 : // stage 2
743 0 : stage++;
744 0 : cospi = cospi_arr(cos_bit[stage]);
745 0 : bf0 = output;
746 0 : bf1 = step;
747 0 : bf1[0] = bf0[0];
748 0 : bf1[1] = bf0[1];
749 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
750 0 : bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
751 0 : range_check(stage, input, bf1, size, stage_range[stage]);
752 :
753 : // stage 3
754 0 : stage++;
755 0 : bf0 = step;
756 0 : bf1 = output;
757 0 : bf1[0] = bf0[0] + bf0[2];
758 0 : bf1[1] = bf0[1] + bf0[3];
759 0 : bf1[2] = bf0[0] - bf0[2];
760 0 : bf1[3] = bf0[1] - bf0[3];
761 0 : range_check(stage, input, bf1, size, stage_range[stage]);
762 :
763 : // stage 4
764 0 : stage++;
765 0 : cospi = cospi_arr(cos_bit[stage]);
766 0 : bf0 = output;
767 0 : bf1 = step;
768 0 : bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
769 0 : bf1[1] = half_btf(cospi[56], bf0[0], -cospi[8], bf0[1], cos_bit[stage]);
770 0 : bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
771 0 : bf1[3] = half_btf(cospi[24], bf0[2], -cospi[40], bf0[3], cos_bit[stage]);
772 0 : range_check(stage, input, bf1, size, stage_range[stage]);
773 :
774 : // stage 5
775 0 : stage++;
776 0 : bf0 = step;
777 0 : bf1 = output;
778 0 : bf1[0] = bf0[1];
779 0 : bf1[1] = bf0[2];
780 0 : bf1[2] = bf0[3];
781 0 : bf1[3] = bf0[0];
782 0 : range_check(stage, input, bf1, size, stage_range[stage]);
783 0 : }
784 :
785 0 : void av1_iadst8_new(const int32_t *input, int32_t *output,
786 : const int8_t *cos_bit, const int8_t *stage_range) {
787 0 : const int32_t size = 8;
788 : const int32_t *cospi;
789 :
790 0 : int32_t stage = 0;
791 : int32_t *bf0, *bf1;
792 : int32_t step[8];
793 :
794 : // stage 0;
795 0 : range_check(stage, input, input, size, stage_range[stage]);
796 :
797 : // stage 1;
798 0 : stage++;
799 0 : assert(output != input);
800 0 : bf1 = output;
801 0 : bf1[0] = input[0];
802 0 : bf1[1] = -input[7];
803 0 : bf1[2] = -input[3];
804 0 : bf1[3] = input[4];
805 0 : bf1[4] = -input[1];
806 0 : bf1[5] = input[6];
807 0 : bf1[6] = input[2];
808 0 : bf1[7] = -input[5];
809 0 : range_check(stage, input, bf1, size, stage_range[stage]);
810 :
811 : // stage 2
812 0 : stage++;
813 0 : cospi = cospi_arr(cos_bit[stage]);
814 0 : bf0 = output;
815 0 : bf1 = step;
816 0 : bf1[0] = bf0[0];
817 0 : bf1[1] = bf0[1];
818 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
819 0 : bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
820 0 : bf1[4] = bf0[4];
821 0 : bf1[5] = bf0[5];
822 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
823 0 : bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
824 0 : range_check(stage, input, bf1, size, stage_range[stage]);
825 :
826 : // stage 3
827 0 : stage++;
828 0 : bf0 = step;
829 0 : bf1 = output;
830 0 : bf1[0] = bf0[0] + bf0[2];
831 0 : bf1[1] = bf0[1] + bf0[3];
832 0 : bf1[2] = bf0[0] - bf0[2];
833 0 : bf1[3] = bf0[1] - bf0[3];
834 0 : bf1[4] = bf0[4] + bf0[6];
835 0 : bf1[5] = bf0[5] + bf0[7];
836 0 : bf1[6] = bf0[4] - bf0[6];
837 0 : bf1[7] = bf0[5] - bf0[7];
838 0 : range_check(stage, input, bf1, size, stage_range[stage]);
839 :
840 : // stage 4
841 0 : stage++;
842 0 : cospi = cospi_arr(cos_bit[stage]);
843 0 : bf0 = output;
844 0 : bf1 = step;
845 0 : bf1[0] = bf0[0];
846 0 : bf1[1] = bf0[1];
847 0 : bf1[2] = bf0[2];
848 0 : bf1[3] = bf0[3];
849 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
850 0 : bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
851 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
852 0 : bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
853 0 : range_check(stage, input, bf1, size, stage_range[stage]);
854 :
855 : // stage 5
856 0 : stage++;
857 0 : bf0 = step;
858 0 : bf1 = output;
859 0 : bf1[0] = bf0[0] + bf0[4];
860 0 : bf1[1] = bf0[1] + bf0[5];
861 0 : bf1[2] = bf0[2] + bf0[6];
862 0 : bf1[3] = bf0[3] + bf0[7];
863 0 : bf1[4] = bf0[0] - bf0[4];
864 0 : bf1[5] = bf0[1] - bf0[5];
865 0 : bf1[6] = bf0[2] - bf0[6];
866 0 : bf1[7] = bf0[3] - bf0[7];
867 0 : range_check(stage, input, bf1, size, stage_range[stage]);
868 :
869 : // stage 6
870 0 : stage++;
871 0 : cospi = cospi_arr(cos_bit[stage]);
872 0 : bf0 = output;
873 0 : bf1 = step;
874 0 : bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
875 0 : bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit[stage]);
876 0 : bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
877 0 : bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit[stage]);
878 0 : bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
879 0 : bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit[stage]);
880 0 : bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
881 0 : bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit[stage]);
882 0 : range_check(stage, input, bf1, size, stage_range[stage]);
883 :
884 : // stage 7
885 0 : stage++;
886 0 : bf0 = step;
887 0 : bf1 = output;
888 0 : bf1[0] = bf0[1];
889 0 : bf1[1] = bf0[6];
890 0 : bf1[2] = bf0[3];
891 0 : bf1[3] = bf0[4];
892 0 : bf1[4] = bf0[5];
893 0 : bf1[5] = bf0[2];
894 0 : bf1[6] = bf0[7];
895 0 : bf1[7] = bf0[0];
896 0 : range_check(stage, input, bf1, size, stage_range[stage]);
897 0 : }
898 :
899 0 : void av1_iadst16_new(const int32_t *input, int32_t *output,
900 : const int8_t *cos_bit, const int8_t *stage_range) {
901 0 : const int32_t size = 16;
902 : const int32_t *cospi;
903 :
904 0 : int32_t stage = 0;
905 : int32_t *bf0, *bf1;
906 : int32_t step[16];
907 :
908 : // stage 0;
909 0 : range_check(stage, input, input, size, stage_range[stage]);
910 :
911 : // stage 1;
912 0 : stage++;
913 0 : assert(output != input);
914 0 : bf1 = output;
915 0 : bf1[0] = input[0];
916 0 : bf1[1] = -input[15];
917 0 : bf1[2] = -input[7];
918 0 : bf1[3] = input[8];
919 0 : bf1[4] = -input[3];
920 0 : bf1[5] = input[12];
921 0 : bf1[6] = input[4];
922 0 : bf1[7] = -input[11];
923 0 : bf1[8] = -input[1];
924 0 : bf1[9] = input[14];
925 0 : bf1[10] = input[6];
926 0 : bf1[11] = -input[9];
927 0 : bf1[12] = input[2];
928 0 : bf1[13] = -input[13];
929 0 : bf1[14] = -input[5];
930 0 : bf1[15] = input[10];
931 0 : range_check(stage, input, bf1, size, stage_range[stage]);
932 :
933 : // stage 2
934 0 : stage++;
935 0 : cospi = cospi_arr(cos_bit[stage]);
936 0 : bf0 = output;
937 0 : bf1 = step;
938 0 : bf1[0] = bf0[0];
939 0 : bf1[1] = bf0[1];
940 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
941 0 : bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
942 0 : bf1[4] = bf0[4];
943 0 : bf1[5] = bf0[5];
944 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
945 0 : bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
946 0 : bf1[8] = bf0[8];
947 0 : bf1[9] = bf0[9];
948 0 : bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
949 0 : bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
950 0 : bf1[12] = bf0[12];
951 0 : bf1[13] = bf0[13];
952 0 : bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
953 0 : bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
954 0 : range_check(stage, input, bf1, size, stage_range[stage]);
955 :
956 : // stage 3
957 0 : stage++;
958 0 : bf0 = step;
959 0 : bf1 = output;
960 0 : bf1[0] = bf0[0] + bf0[2];
961 0 : bf1[1] = bf0[1] + bf0[3];
962 0 : bf1[2] = bf0[0] - bf0[2];
963 0 : bf1[3] = bf0[1] - bf0[3];
964 0 : bf1[4] = bf0[4] + bf0[6];
965 0 : bf1[5] = bf0[5] + bf0[7];
966 0 : bf1[6] = bf0[4] - bf0[6];
967 0 : bf1[7] = bf0[5] - bf0[7];
968 0 : bf1[8] = bf0[8] + bf0[10];
969 0 : bf1[9] = bf0[9] + bf0[11];
970 0 : bf1[10] = bf0[8] - bf0[10];
971 0 : bf1[11] = bf0[9] - bf0[11];
972 0 : bf1[12] = bf0[12] + bf0[14];
973 0 : bf1[13] = bf0[13] + bf0[15];
974 0 : bf1[14] = bf0[12] - bf0[14];
975 0 : bf1[15] = bf0[13] - bf0[15];
976 0 : range_check(stage, input, bf1, size, stage_range[stage]);
977 :
978 : // stage 4
979 0 : stage++;
980 0 : cospi = cospi_arr(cos_bit[stage]);
981 0 : bf0 = output;
982 0 : bf1 = step;
983 0 : bf1[0] = bf0[0];
984 0 : bf1[1] = bf0[1];
985 0 : bf1[2] = bf0[2];
986 0 : bf1[3] = bf0[3];
987 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
988 0 : bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
989 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
990 0 : bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
991 0 : bf1[8] = bf0[8];
992 0 : bf1[9] = bf0[9];
993 0 : bf1[10] = bf0[10];
994 0 : bf1[11] = bf0[11];
995 0 : bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
996 0 : bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
997 0 : bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
998 0 : bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
999 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1000 :
1001 : // stage 5
1002 0 : stage++;
1003 0 : bf0 = step;
1004 0 : bf1 = output;
1005 0 : bf1[0] = bf0[0] + bf0[4];
1006 0 : bf1[1] = bf0[1] + bf0[5];
1007 0 : bf1[2] = bf0[2] + bf0[6];
1008 0 : bf1[3] = bf0[3] + bf0[7];
1009 0 : bf1[4] = bf0[0] - bf0[4];
1010 0 : bf1[5] = bf0[1] - bf0[5];
1011 0 : bf1[6] = bf0[2] - bf0[6];
1012 0 : bf1[7] = bf0[3] - bf0[7];
1013 0 : bf1[8] = bf0[8] + bf0[12];
1014 0 : bf1[9] = bf0[9] + bf0[13];
1015 0 : bf1[10] = bf0[10] + bf0[14];
1016 0 : bf1[11] = bf0[11] + bf0[15];
1017 0 : bf1[12] = bf0[8] - bf0[12];
1018 0 : bf1[13] = bf0[9] - bf0[13];
1019 0 : bf1[14] = bf0[10] - bf0[14];
1020 0 : bf1[15] = bf0[11] - bf0[15];
1021 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1022 :
1023 : // stage 6
1024 0 : stage++;
1025 0 : cospi = cospi_arr(cos_bit[stage]);
1026 0 : bf0 = output;
1027 0 : bf1 = step;
1028 0 : bf1[0] = bf0[0];
1029 0 : bf1[1] = bf0[1];
1030 0 : bf1[2] = bf0[2];
1031 0 : bf1[3] = bf0[3];
1032 0 : bf1[4] = bf0[4];
1033 0 : bf1[5] = bf0[5];
1034 0 : bf1[6] = bf0[6];
1035 0 : bf1[7] = bf0[7];
1036 0 : bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
1037 0 : bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
1038 0 : bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
1039 0 : bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
1040 0 : bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
1041 0 : bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
1042 0 : bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
1043 0 : bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
1044 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1045 :
1046 : // stage 7
1047 0 : stage++;
1048 0 : bf0 = step;
1049 0 : bf1 = output;
1050 0 : bf1[0] = bf0[0] + bf0[8];
1051 0 : bf1[1] = bf0[1] + bf0[9];
1052 0 : bf1[2] = bf0[2] + bf0[10];
1053 0 : bf1[3] = bf0[3] + bf0[11];
1054 0 : bf1[4] = bf0[4] + bf0[12];
1055 0 : bf1[5] = bf0[5] + bf0[13];
1056 0 : bf1[6] = bf0[6] + bf0[14];
1057 0 : bf1[7] = bf0[7] + bf0[15];
1058 0 : bf1[8] = bf0[0] - bf0[8];
1059 0 : bf1[9] = bf0[1] - bf0[9];
1060 0 : bf1[10] = bf0[2] - bf0[10];
1061 0 : bf1[11] = bf0[3] - bf0[11];
1062 0 : bf1[12] = bf0[4] - bf0[12];
1063 0 : bf1[13] = bf0[5] - bf0[13];
1064 0 : bf1[14] = bf0[6] - bf0[14];
1065 0 : bf1[15] = bf0[7] - bf0[15];
1066 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1067 :
1068 : // stage 8
1069 0 : stage++;
1070 0 : cospi = cospi_arr(cos_bit[stage]);
1071 0 : bf0 = output;
1072 0 : bf1 = step;
1073 0 : bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
1074 0 : bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit[stage]);
1075 0 : bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
1076 0 : bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit[stage]);
1077 0 : bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
1078 0 : bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit[stage]);
1079 0 : bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
1080 0 : bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit[stage]);
1081 0 : bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
1082 0 : bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit[stage]);
1083 0 : bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
1084 0 : bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit[stage]);
1085 0 : bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
1086 0 : bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit[stage]);
1087 0 : bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
1088 0 : bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit[stage]);
1089 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1090 :
1091 : // stage 9
1092 0 : stage++;
1093 0 : bf0 = step;
1094 0 : bf1 = output;
1095 0 : bf1[0] = bf0[1];
1096 0 : bf1[1] = bf0[14];
1097 0 : bf1[2] = bf0[3];
1098 0 : bf1[3] = bf0[12];
1099 0 : bf1[4] = bf0[5];
1100 0 : bf1[5] = bf0[10];
1101 0 : bf1[6] = bf0[7];
1102 0 : bf1[7] = bf0[8];
1103 0 : bf1[8] = bf0[9];
1104 0 : bf1[9] = bf0[6];
1105 0 : bf1[10] = bf0[11];
1106 0 : bf1[11] = bf0[4];
1107 0 : bf1[12] = bf0[13];
1108 0 : bf1[13] = bf0[2];
1109 0 : bf1[14] = bf0[15];
1110 0 : bf1[15] = bf0[0];
1111 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1112 0 : }
1113 :
1114 0 : void av1_iadst32_new(const int32_t *input, int32_t *output,
1115 : const int8_t *cos_bit, const int8_t *stage_range) {
1116 0 : const int32_t size = 32;
1117 : const int32_t *cospi;
1118 :
1119 0 : int32_t stage = 0;
1120 : int32_t *bf0, *bf1;
1121 : int32_t step[32];
1122 :
1123 : // stage 0;
1124 0 : range_check(stage, input, input, size, stage_range[stage]);
1125 :
1126 : // stage 1;
1127 0 : stage++;
1128 0 : assert(output != input);
1129 0 : bf1 = output;
1130 0 : bf1[0] = input[0];
1131 0 : bf1[1] = -input[31];
1132 0 : bf1[2] = -input[15];
1133 0 : bf1[3] = input[16];
1134 0 : bf1[4] = -input[7];
1135 0 : bf1[5] = input[24];
1136 0 : bf1[6] = input[8];
1137 0 : bf1[7] = -input[23];
1138 0 : bf1[8] = -input[3];
1139 0 : bf1[9] = input[28];
1140 0 : bf1[10] = input[12];
1141 0 : bf1[11] = -input[19];
1142 0 : bf1[12] = input[4];
1143 0 : bf1[13] = -input[27];
1144 0 : bf1[14] = -input[11];
1145 0 : bf1[15] = input[20];
1146 0 : bf1[16] = -input[1];
1147 0 : bf1[17] = input[30];
1148 0 : bf1[18] = input[14];
1149 0 : bf1[19] = -input[17];
1150 0 : bf1[20] = input[6];
1151 0 : bf1[21] = -input[25];
1152 0 : bf1[22] = -input[9];
1153 0 : bf1[23] = input[22];
1154 0 : bf1[24] = input[2];
1155 0 : bf1[25] = -input[29];
1156 0 : bf1[26] = -input[13];
1157 0 : bf1[27] = input[18];
1158 0 : bf1[28] = -input[5];
1159 0 : bf1[29] = input[26];
1160 0 : bf1[30] = input[10];
1161 0 : bf1[31] = -input[21];
1162 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1163 :
1164 : // stage 2
1165 0 : stage++;
1166 0 : cospi = cospi_arr(cos_bit[stage]);
1167 0 : bf0 = output;
1168 0 : bf1 = step;
1169 0 : bf1[0] = bf0[0];
1170 0 : bf1[1] = bf0[1];
1171 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
1172 0 : bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
1173 0 : bf1[4] = bf0[4];
1174 0 : bf1[5] = bf0[5];
1175 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
1176 0 : bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
1177 0 : bf1[8] = bf0[8];
1178 0 : bf1[9] = bf0[9];
1179 0 : bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
1180 0 : bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
1181 0 : bf1[12] = bf0[12];
1182 0 : bf1[13] = bf0[13];
1183 0 : bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
1184 0 : bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
1185 0 : bf1[16] = bf0[16];
1186 0 : bf1[17] = bf0[17];
1187 0 : bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
1188 0 : bf1[19] = half_btf(cospi[32], bf0[18], -cospi[32], bf0[19], cos_bit[stage]);
1189 0 : bf1[20] = bf0[20];
1190 0 : bf1[21] = bf0[21];
1191 0 : bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
1192 0 : bf1[23] = half_btf(cospi[32], bf0[22], -cospi[32], bf0[23], cos_bit[stage]);
1193 0 : bf1[24] = bf0[24];
1194 0 : bf1[25] = bf0[25];
1195 0 : bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
1196 0 : bf1[27] = half_btf(cospi[32], bf0[26], -cospi[32], bf0[27], cos_bit[stage]);
1197 0 : bf1[28] = bf0[28];
1198 0 : bf1[29] = bf0[29];
1199 0 : bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
1200 0 : bf1[31] = half_btf(cospi[32], bf0[30], -cospi[32], bf0[31], cos_bit[stage]);
1201 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1202 :
1203 : // stage 3
1204 0 : stage++;
1205 0 : bf0 = step;
1206 0 : bf1 = output;
1207 0 : bf1[0] = bf0[0] + bf0[2];
1208 0 : bf1[1] = bf0[1] + bf0[3];
1209 0 : bf1[2] = bf0[0] - bf0[2];
1210 0 : bf1[3] = bf0[1] - bf0[3];
1211 0 : bf1[4] = bf0[4] + bf0[6];
1212 0 : bf1[5] = bf0[5] + bf0[7];
1213 0 : bf1[6] = bf0[4] - bf0[6];
1214 0 : bf1[7] = bf0[5] - bf0[7];
1215 0 : bf1[8] = bf0[8] + bf0[10];
1216 0 : bf1[9] = bf0[9] + bf0[11];
1217 0 : bf1[10] = bf0[8] - bf0[10];
1218 0 : bf1[11] = bf0[9] - bf0[11];
1219 0 : bf1[12] = bf0[12] + bf0[14];
1220 0 : bf1[13] = bf0[13] + bf0[15];
1221 0 : bf1[14] = bf0[12] - bf0[14];
1222 0 : bf1[15] = bf0[13] - bf0[15];
1223 0 : bf1[16] = bf0[16] + bf0[18];
1224 0 : bf1[17] = bf0[17] + bf0[19];
1225 0 : bf1[18] = bf0[16] - bf0[18];
1226 0 : bf1[19] = bf0[17] - bf0[19];
1227 0 : bf1[20] = bf0[20] + bf0[22];
1228 0 : bf1[21] = bf0[21] + bf0[23];
1229 0 : bf1[22] = bf0[20] - bf0[22];
1230 0 : bf1[23] = bf0[21] - bf0[23];
1231 0 : bf1[24] = bf0[24] + bf0[26];
1232 0 : bf1[25] = bf0[25] + bf0[27];
1233 0 : bf1[26] = bf0[24] - bf0[26];
1234 0 : bf1[27] = bf0[25] - bf0[27];
1235 0 : bf1[28] = bf0[28] + bf0[30];
1236 0 : bf1[29] = bf0[29] + bf0[31];
1237 0 : bf1[30] = bf0[28] - bf0[30];
1238 0 : bf1[31] = bf0[29] - bf0[31];
1239 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1240 :
1241 : // stage 4
1242 0 : stage++;
1243 0 : cospi = cospi_arr(cos_bit[stage]);
1244 0 : bf0 = output;
1245 0 : bf1 = step;
1246 0 : bf1[0] = bf0[0];
1247 0 : bf1[1] = bf0[1];
1248 0 : bf1[2] = bf0[2];
1249 0 : bf1[3] = bf0[3];
1250 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
1251 0 : bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
1252 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
1253 0 : bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
1254 0 : bf1[8] = bf0[8];
1255 0 : bf1[9] = bf0[9];
1256 0 : bf1[10] = bf0[10];
1257 0 : bf1[11] = bf0[11];
1258 0 : bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
1259 0 : bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
1260 0 : bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
1261 0 : bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
1262 0 : bf1[16] = bf0[16];
1263 0 : bf1[17] = bf0[17];
1264 0 : bf1[18] = bf0[18];
1265 0 : bf1[19] = bf0[19];
1266 0 : bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
1267 0 : bf1[21] = half_btf(cospi[48], bf0[20], -cospi[16], bf0[21], cos_bit[stage]);
1268 0 : bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
1269 0 : bf1[23] = half_btf(cospi[16], bf0[22], cospi[48], bf0[23], cos_bit[stage]);
1270 0 : bf1[24] = bf0[24];
1271 0 : bf1[25] = bf0[25];
1272 0 : bf1[26] = bf0[26];
1273 0 : bf1[27] = bf0[27];
1274 0 : bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
1275 0 : bf1[29] = half_btf(cospi[48], bf0[28], -cospi[16], bf0[29], cos_bit[stage]);
1276 0 : bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
1277 0 : bf1[31] = half_btf(cospi[16], bf0[30], cospi[48], bf0[31], cos_bit[stage]);
1278 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1279 :
1280 : // stage 5
1281 0 : stage++;
1282 0 : bf0 = step;
1283 0 : bf1 = output;
1284 0 : bf1[0] = bf0[0] + bf0[4];
1285 0 : bf1[1] = bf0[1] + bf0[5];
1286 0 : bf1[2] = bf0[2] + bf0[6];
1287 0 : bf1[3] = bf0[3] + bf0[7];
1288 0 : bf1[4] = bf0[0] - bf0[4];
1289 0 : bf1[5] = bf0[1] - bf0[5];
1290 0 : bf1[6] = bf0[2] - bf0[6];
1291 0 : bf1[7] = bf0[3] - bf0[7];
1292 0 : bf1[8] = bf0[8] + bf0[12];
1293 0 : bf1[9] = bf0[9] + bf0[13];
1294 0 : bf1[10] = bf0[10] + bf0[14];
1295 0 : bf1[11] = bf0[11] + bf0[15];
1296 0 : bf1[12] = bf0[8] - bf0[12];
1297 0 : bf1[13] = bf0[9] - bf0[13];
1298 0 : bf1[14] = bf0[10] - bf0[14];
1299 0 : bf1[15] = bf0[11] - bf0[15];
1300 0 : bf1[16] = bf0[16] + bf0[20];
1301 0 : bf1[17] = bf0[17] + bf0[21];
1302 0 : bf1[18] = bf0[18] + bf0[22];
1303 0 : bf1[19] = bf0[19] + bf0[23];
1304 0 : bf1[20] = bf0[16] - bf0[20];
1305 0 : bf1[21] = bf0[17] - bf0[21];
1306 0 : bf1[22] = bf0[18] - bf0[22];
1307 0 : bf1[23] = bf0[19] - bf0[23];
1308 0 : bf1[24] = bf0[24] + bf0[28];
1309 0 : bf1[25] = bf0[25] + bf0[29];
1310 0 : bf1[26] = bf0[26] + bf0[30];
1311 0 : bf1[27] = bf0[27] + bf0[31];
1312 0 : bf1[28] = bf0[24] - bf0[28];
1313 0 : bf1[29] = bf0[25] - bf0[29];
1314 0 : bf1[30] = bf0[26] - bf0[30];
1315 0 : bf1[31] = bf0[27] - bf0[31];
1316 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1317 :
1318 : // stage 6
1319 0 : stage++;
1320 0 : cospi = cospi_arr(cos_bit[stage]);
1321 0 : bf0 = output;
1322 0 : bf1 = step;
1323 0 : bf1[0] = bf0[0];
1324 0 : bf1[1] = bf0[1];
1325 0 : bf1[2] = bf0[2];
1326 0 : bf1[3] = bf0[3];
1327 0 : bf1[4] = bf0[4];
1328 0 : bf1[5] = bf0[5];
1329 0 : bf1[6] = bf0[6];
1330 0 : bf1[7] = bf0[7];
1331 0 : bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
1332 0 : bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
1333 0 : bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
1334 0 : bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
1335 0 : bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
1336 0 : bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
1337 0 : bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
1338 0 : bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
1339 0 : bf1[16] = bf0[16];
1340 0 : bf1[17] = bf0[17];
1341 0 : bf1[18] = bf0[18];
1342 0 : bf1[19] = bf0[19];
1343 0 : bf1[20] = bf0[20];
1344 0 : bf1[21] = bf0[21];
1345 0 : bf1[22] = bf0[22];
1346 0 : bf1[23] = bf0[23];
1347 0 : bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
1348 0 : bf1[25] = half_btf(cospi[56], bf0[24], -cospi[8], bf0[25], cos_bit[stage]);
1349 0 : bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
1350 0 : bf1[27] = half_btf(cospi[24], bf0[26], -cospi[40], bf0[27], cos_bit[stage]);
1351 0 : bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
1352 0 : bf1[29] = half_btf(cospi[8], bf0[28], cospi[56], bf0[29], cos_bit[stage]);
1353 0 : bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
1354 0 : bf1[31] = half_btf(cospi[40], bf0[30], cospi[24], bf0[31], cos_bit[stage]);
1355 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1356 :
1357 : // stage 7
1358 0 : stage++;
1359 0 : bf0 = step;
1360 0 : bf1 = output;
1361 0 : bf1[0] = bf0[0] + bf0[8];
1362 0 : bf1[1] = bf0[1] + bf0[9];
1363 0 : bf1[2] = bf0[2] + bf0[10];
1364 0 : bf1[3] = bf0[3] + bf0[11];
1365 0 : bf1[4] = bf0[4] + bf0[12];
1366 0 : bf1[5] = bf0[5] + bf0[13];
1367 0 : bf1[6] = bf0[6] + bf0[14];
1368 0 : bf1[7] = bf0[7] + bf0[15];
1369 0 : bf1[8] = bf0[0] - bf0[8];
1370 0 : bf1[9] = bf0[1] - bf0[9];
1371 0 : bf1[10] = bf0[2] - bf0[10];
1372 0 : bf1[11] = bf0[3] - bf0[11];
1373 0 : bf1[12] = bf0[4] - bf0[12];
1374 0 : bf1[13] = bf0[5] - bf0[13];
1375 0 : bf1[14] = bf0[6] - bf0[14];
1376 0 : bf1[15] = bf0[7] - bf0[15];
1377 0 : bf1[16] = bf0[16] + bf0[24];
1378 0 : bf1[17] = bf0[17] + bf0[25];
1379 0 : bf1[18] = bf0[18] + bf0[26];
1380 0 : bf1[19] = bf0[19] + bf0[27];
1381 0 : bf1[20] = bf0[20] + bf0[28];
1382 0 : bf1[21] = bf0[21] + bf0[29];
1383 0 : bf1[22] = bf0[22] + bf0[30];
1384 0 : bf1[23] = bf0[23] + bf0[31];
1385 0 : bf1[24] = bf0[16] - bf0[24];
1386 0 : bf1[25] = bf0[17] - bf0[25];
1387 0 : bf1[26] = bf0[18] - bf0[26];
1388 0 : bf1[27] = bf0[19] - bf0[27];
1389 0 : bf1[28] = bf0[20] - bf0[28];
1390 0 : bf1[29] = bf0[21] - bf0[29];
1391 0 : bf1[30] = bf0[22] - bf0[30];
1392 0 : bf1[31] = bf0[23] - bf0[31];
1393 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1394 :
1395 : // stage 8
1396 0 : stage++;
1397 0 : cospi = cospi_arr(cos_bit[stage]);
1398 0 : bf0 = output;
1399 0 : bf1 = step;
1400 0 : bf1[0] = bf0[0];
1401 0 : bf1[1] = bf0[1];
1402 0 : bf1[2] = bf0[2];
1403 0 : bf1[3] = bf0[3];
1404 0 : bf1[4] = bf0[4];
1405 0 : bf1[5] = bf0[5];
1406 0 : bf1[6] = bf0[6];
1407 0 : bf1[7] = bf0[7];
1408 0 : bf1[8] = bf0[8];
1409 0 : bf1[9] = bf0[9];
1410 0 : bf1[10] = bf0[10];
1411 0 : bf1[11] = bf0[11];
1412 0 : bf1[12] = bf0[12];
1413 0 : bf1[13] = bf0[13];
1414 0 : bf1[14] = bf0[14];
1415 0 : bf1[15] = bf0[15];
1416 0 : bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
1417 0 : bf1[17] = half_btf(cospi[60], bf0[16], -cospi[4], bf0[17], cos_bit[stage]);
1418 0 : bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
1419 0 : bf1[19] = half_btf(cospi[44], bf0[18], -cospi[20], bf0[19], cos_bit[stage]);
1420 0 : bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
1421 0 : bf1[21] = half_btf(cospi[28], bf0[20], -cospi[36], bf0[21], cos_bit[stage]);
1422 0 : bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
1423 0 : bf1[23] = half_btf(cospi[12], bf0[22], -cospi[52], bf0[23], cos_bit[stage]);
1424 0 : bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
1425 0 : bf1[25] = half_btf(cospi[4], bf0[24], cospi[60], bf0[25], cos_bit[stage]);
1426 0 : bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
1427 0 : bf1[27] = half_btf(cospi[20], bf0[26], cospi[44], bf0[27], cos_bit[stage]);
1428 0 : bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
1429 0 : bf1[29] = half_btf(cospi[36], bf0[28], cospi[28], bf0[29], cos_bit[stage]);
1430 0 : bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
1431 0 : bf1[31] = half_btf(cospi[52], bf0[30], cospi[12], bf0[31], cos_bit[stage]);
1432 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1433 :
1434 : // stage 9
1435 0 : stage++;
1436 0 : bf0 = step;
1437 0 : bf1 = output;
1438 0 : bf1[0] = bf0[0] + bf0[16];
1439 0 : bf1[1] = bf0[1] + bf0[17];
1440 0 : bf1[2] = bf0[2] + bf0[18];
1441 0 : bf1[3] = bf0[3] + bf0[19];
1442 0 : bf1[4] = bf0[4] + bf0[20];
1443 0 : bf1[5] = bf0[5] + bf0[21];
1444 0 : bf1[6] = bf0[6] + bf0[22];
1445 0 : bf1[7] = bf0[7] + bf0[23];
1446 0 : bf1[8] = bf0[8] + bf0[24];
1447 0 : bf1[9] = bf0[9] + bf0[25];
1448 0 : bf1[10] = bf0[10] + bf0[26];
1449 0 : bf1[11] = bf0[11] + bf0[27];
1450 0 : bf1[12] = bf0[12] + bf0[28];
1451 0 : bf1[13] = bf0[13] + bf0[29];
1452 0 : bf1[14] = bf0[14] + bf0[30];
1453 0 : bf1[15] = bf0[15] + bf0[31];
1454 0 : bf1[16] = bf0[0] - bf0[16];
1455 0 : bf1[17] = bf0[1] - bf0[17];
1456 0 : bf1[18] = bf0[2] - bf0[18];
1457 0 : bf1[19] = bf0[3] - bf0[19];
1458 0 : bf1[20] = bf0[4] - bf0[20];
1459 0 : bf1[21] = bf0[5] - bf0[21];
1460 0 : bf1[22] = bf0[6] - bf0[22];
1461 0 : bf1[23] = bf0[7] - bf0[23];
1462 0 : bf1[24] = bf0[8] - bf0[24];
1463 0 : bf1[25] = bf0[9] - bf0[25];
1464 0 : bf1[26] = bf0[10] - bf0[26];
1465 0 : bf1[27] = bf0[11] - bf0[27];
1466 0 : bf1[28] = bf0[12] - bf0[28];
1467 0 : bf1[29] = bf0[13] - bf0[29];
1468 0 : bf1[30] = bf0[14] - bf0[30];
1469 0 : bf1[31] = bf0[15] - bf0[31];
1470 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1471 :
1472 : // stage 10
1473 0 : stage++;
1474 0 : cospi = cospi_arr(cos_bit[stage]);
1475 0 : bf0 = output;
1476 0 : bf1 = step;
1477 0 : bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
1478 0 : bf1[1] = half_btf(cospi[63], bf0[0], -cospi[1], bf0[1], cos_bit[stage]);
1479 0 : bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
1480 0 : bf1[3] = half_btf(cospi[59], bf0[2], -cospi[5], bf0[3], cos_bit[stage]);
1481 0 : bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
1482 0 : bf1[5] = half_btf(cospi[55], bf0[4], -cospi[9], bf0[5], cos_bit[stage]);
1483 0 : bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
1484 0 : bf1[7] = half_btf(cospi[51], bf0[6], -cospi[13], bf0[7], cos_bit[stage]);
1485 0 : bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
1486 0 : bf1[9] = half_btf(cospi[47], bf0[8], -cospi[17], bf0[9], cos_bit[stage]);
1487 0 : bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
1488 0 : bf1[11] = half_btf(cospi[43], bf0[10], -cospi[21], bf0[11], cos_bit[stage]);
1489 0 : bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
1490 0 : bf1[13] = half_btf(cospi[39], bf0[12], -cospi[25], bf0[13], cos_bit[stage]);
1491 0 : bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
1492 0 : bf1[15] = half_btf(cospi[35], bf0[14], -cospi[29], bf0[15], cos_bit[stage]);
1493 0 : bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
1494 0 : bf1[17] = half_btf(cospi[31], bf0[16], -cospi[33], bf0[17], cos_bit[stage]);
1495 0 : bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
1496 0 : bf1[19] = half_btf(cospi[27], bf0[18], -cospi[37], bf0[19], cos_bit[stage]);
1497 0 : bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
1498 0 : bf1[21] = half_btf(cospi[23], bf0[20], -cospi[41], bf0[21], cos_bit[stage]);
1499 0 : bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
1500 0 : bf1[23] = half_btf(cospi[19], bf0[22], -cospi[45], bf0[23], cos_bit[stage]);
1501 0 : bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
1502 0 : bf1[25] = half_btf(cospi[15], bf0[24], -cospi[49], bf0[25], cos_bit[stage]);
1503 0 : bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
1504 0 : bf1[27] = half_btf(cospi[11], bf0[26], -cospi[53], bf0[27], cos_bit[stage]);
1505 0 : bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
1506 0 : bf1[29] = half_btf(cospi[7], bf0[28], -cospi[57], bf0[29], cos_bit[stage]);
1507 0 : bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
1508 0 : bf1[31] = half_btf(cospi[3], bf0[30], -cospi[61], bf0[31], cos_bit[stage]);
1509 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1510 :
1511 : // stage 11
1512 0 : stage++;
1513 0 : bf0 = step;
1514 0 : bf1 = output;
1515 0 : bf1[0] = bf0[1];
1516 0 : bf1[1] = bf0[30];
1517 0 : bf1[2] = bf0[3];
1518 0 : bf1[3] = bf0[28];
1519 0 : bf1[4] = bf0[5];
1520 0 : bf1[5] = bf0[26];
1521 0 : bf1[6] = bf0[7];
1522 0 : bf1[7] = bf0[24];
1523 0 : bf1[8] = bf0[9];
1524 0 : bf1[9] = bf0[22];
1525 0 : bf1[10] = bf0[11];
1526 0 : bf1[11] = bf0[20];
1527 0 : bf1[12] = bf0[13];
1528 0 : bf1[13] = bf0[18];
1529 0 : bf1[14] = bf0[15];
1530 0 : bf1[15] = bf0[16];
1531 0 : bf1[16] = bf0[17];
1532 0 : bf1[17] = bf0[14];
1533 0 : bf1[18] = bf0[19];
1534 0 : bf1[19] = bf0[12];
1535 0 : bf1[20] = bf0[21];
1536 0 : bf1[21] = bf0[10];
1537 0 : bf1[22] = bf0[23];
1538 0 : bf1[23] = bf0[8];
1539 0 : bf1[24] = bf0[25];
1540 0 : bf1[25] = bf0[6];
1541 0 : bf1[26] = bf0[27];
1542 0 : bf1[27] = bf0[4];
1543 0 : bf1[28] = bf0[29];
1544 0 : bf1[29] = bf0[2];
1545 0 : bf1[30] = bf0[31];
1546 0 : bf1[31] = bf0[0];
1547 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1548 0 : }
1549 :
1550 : #if CONFIG_EXT_TX
1551 0 : void av1_iidentity4_c(const int32_t *input, int32_t *output,
1552 : const int8_t *cos_bit, const int8_t *stage_range) {
1553 : (void)cos_bit;
1554 0 : for (int i = 0; i < 4; ++i)
1555 0 : output[i] = (int32_t)dct_const_round_shift(input[i] * Sqrt2);
1556 : range_check(0, input, output, 4, stage_range[0]);
1557 0 : }
1558 :
1559 0 : void av1_iidentity8_c(const int32_t *input, int32_t *output,
1560 : const int8_t *cos_bit, const int8_t *stage_range) {
1561 : (void)cos_bit;
1562 0 : for (int i = 0; i < 8; ++i) output[i] = input[i] * 2;
1563 : range_check(0, input, output, 8, stage_range[0]);
1564 0 : }
1565 :
1566 0 : void av1_iidentity16_c(const int32_t *input, int32_t *output,
1567 : const int8_t *cos_bit, const int8_t *stage_range) {
1568 : (void)cos_bit;
1569 0 : for (int i = 0; i < 16; ++i)
1570 0 : output[i] = (int32_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
1571 : range_check(0, input, output, 16, stage_range[0]);
1572 0 : }
1573 :
1574 0 : void av1_iidentity32_c(const int32_t *input, int32_t *output,
1575 : const int8_t *cos_bit, const int8_t *stage_range) {
1576 : (void)cos_bit;
1577 0 : for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
1578 : range_check(0, input, output, 32, stage_range[0]);
1579 0 : }
1580 : #endif // CONFIG_EXT_TX
1581 :
1582 : #if CONFIG_TX64X64
1583 : void av1_idct64_new(const int32_t *input, int32_t *output,
1584 : const int8_t *cos_bit, const int8_t *stage_range) {
1585 : const int32_t size = 64;
1586 : const int32_t *cospi;
1587 :
1588 : int32_t stage = 0;
1589 : int32_t *bf0, *bf1;
1590 : int32_t step[64];
1591 :
1592 : // stage 0;
1593 : range_check(stage, input, input, size, stage_range[stage]);
1594 :
1595 : // stage 1;
1596 : stage++;
1597 : cospi = cospi_arr(cos_bit[stage]);
1598 : assert(output != input);
1599 : bf1 = output;
1600 : bf1[0] = input[0];
1601 : bf1[1] = input[32];
1602 : bf1[2] = input[16];
1603 : bf1[3] = input[48];
1604 : bf1[4] = input[8];
1605 : bf1[5] = input[40];
1606 : bf1[6] = input[24];
1607 : bf1[7] = input[56];
1608 : bf1[8] = input[4];
1609 : bf1[9] = input[36];
1610 : bf1[10] = input[20];
1611 : bf1[11] = input[52];
1612 : bf1[12] = input[12];
1613 : bf1[13] = input[44];
1614 : bf1[14] = input[28];
1615 : bf1[15] = input[60];
1616 : bf1[16] = input[2];
1617 : bf1[17] = input[34];
1618 : bf1[18] = input[18];
1619 : bf1[19] = input[50];
1620 : bf1[20] = input[10];
1621 : bf1[21] = input[42];
1622 : bf1[22] = input[26];
1623 : bf1[23] = input[58];
1624 : bf1[24] = input[6];
1625 : bf1[25] = input[38];
1626 : bf1[26] = input[22];
1627 : bf1[27] = input[54];
1628 : bf1[28] = input[14];
1629 : bf1[29] = input[46];
1630 : bf1[30] = input[30];
1631 : bf1[31] = input[62];
1632 : bf1[32] = input[1];
1633 : bf1[33] = input[33];
1634 : bf1[34] = input[17];
1635 : bf1[35] = input[49];
1636 : bf1[36] = input[9];
1637 : bf1[37] = input[41];
1638 : bf1[38] = input[25];
1639 : bf1[39] = input[57];
1640 : bf1[40] = input[5];
1641 : bf1[41] = input[37];
1642 : bf1[42] = input[21];
1643 : bf1[43] = input[53];
1644 : bf1[44] = input[13];
1645 : bf1[45] = input[45];
1646 : bf1[46] = input[29];
1647 : bf1[47] = input[61];
1648 : bf1[48] = input[3];
1649 : bf1[49] = input[35];
1650 : bf1[50] = input[19];
1651 : bf1[51] = input[51];
1652 : bf1[52] = input[11];
1653 : bf1[53] = input[43];
1654 : bf1[54] = input[27];
1655 : bf1[55] = input[59];
1656 : bf1[56] = input[7];
1657 : bf1[57] = input[39];
1658 : bf1[58] = input[23];
1659 : bf1[59] = input[55];
1660 : bf1[60] = input[15];
1661 : bf1[61] = input[47];
1662 : bf1[62] = input[31];
1663 : bf1[63] = input[63];
1664 : range_check(stage, input, bf1, size, stage_range[stage]);
1665 :
1666 : // stage 2
1667 : stage++;
1668 : cospi = cospi_arr(cos_bit[stage]);
1669 : bf0 = output;
1670 : bf1 = step;
1671 : bf1[0] = bf0[0];
1672 : bf1[1] = bf0[1];
1673 : bf1[2] = bf0[2];
1674 : bf1[3] = bf0[3];
1675 : bf1[4] = bf0[4];
1676 : bf1[5] = bf0[5];
1677 : bf1[6] = bf0[6];
1678 : bf1[7] = bf0[7];
1679 : bf1[8] = bf0[8];
1680 : bf1[9] = bf0[9];
1681 : bf1[10] = bf0[10];
1682 : bf1[11] = bf0[11];
1683 : bf1[12] = bf0[12];
1684 : bf1[13] = bf0[13];
1685 : bf1[14] = bf0[14];
1686 : bf1[15] = bf0[15];
1687 : bf1[16] = bf0[16];
1688 : bf1[17] = bf0[17];
1689 : bf1[18] = bf0[18];
1690 : bf1[19] = bf0[19];
1691 : bf1[20] = bf0[20];
1692 : bf1[21] = bf0[21];
1693 : bf1[22] = bf0[22];
1694 : bf1[23] = bf0[23];
1695 : bf1[24] = bf0[24];
1696 : bf1[25] = bf0[25];
1697 : bf1[26] = bf0[26];
1698 : bf1[27] = bf0[27];
1699 : bf1[28] = bf0[28];
1700 : bf1[29] = bf0[29];
1701 : bf1[30] = bf0[30];
1702 : bf1[31] = bf0[31];
1703 : bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit[stage]);
1704 : bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit[stage]);
1705 : bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit[stage]);
1706 : bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit[stage]);
1707 : bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit[stage]);
1708 : bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit[stage]);
1709 : bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit[stage]);
1710 : bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit[stage]);
1711 : bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit[stage]);
1712 : bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit[stage]);
1713 : bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit[stage]);
1714 : bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit[stage]);
1715 : bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit[stage]);
1716 : bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit[stage]);
1717 : bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit[stage]);
1718 : bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit[stage]);
1719 : bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit[stage]);
1720 : bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit[stage]);
1721 : bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit[stage]);
1722 : bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit[stage]);
1723 : bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit[stage]);
1724 : bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit[stage]);
1725 : bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit[stage]);
1726 : bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit[stage]);
1727 : bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit[stage]);
1728 : bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit[stage]);
1729 : bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit[stage]);
1730 : bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit[stage]);
1731 : bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit[stage]);
1732 : bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit[stage]);
1733 : bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit[stage]);
1734 : bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit[stage]);
1735 : range_check(stage, input, bf1, size, stage_range[stage]);
1736 :
1737 : // stage 3
1738 : stage++;
1739 : cospi = cospi_arr(cos_bit[stage]);
1740 : bf0 = step;
1741 : bf1 = output;
1742 : bf1[0] = bf0[0];
1743 : bf1[1] = bf0[1];
1744 : bf1[2] = bf0[2];
1745 : bf1[3] = bf0[3];
1746 : bf1[4] = bf0[4];
1747 : bf1[5] = bf0[5];
1748 : bf1[6] = bf0[6];
1749 : bf1[7] = bf0[7];
1750 : bf1[8] = bf0[8];
1751 : bf1[9] = bf0[9];
1752 : bf1[10] = bf0[10];
1753 : bf1[11] = bf0[11];
1754 : bf1[12] = bf0[12];
1755 : bf1[13] = bf0[13];
1756 : bf1[14] = bf0[14];
1757 : bf1[15] = bf0[15];
1758 : bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
1759 : bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
1760 : bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
1761 : bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
1762 : bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
1763 : bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
1764 : bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
1765 : bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
1766 : bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
1767 : bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
1768 : bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
1769 : bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
1770 : bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
1771 : bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
1772 : bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
1773 : bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
1774 : bf1[32] = bf0[32] + bf0[33];
1775 : bf1[33] = bf0[32] - bf0[33];
1776 : bf1[34] = -bf0[34] + bf0[35];
1777 : bf1[35] = bf0[34] + bf0[35];
1778 : bf1[36] = bf0[36] + bf0[37];
1779 : bf1[37] = bf0[36] - bf0[37];
1780 : bf1[38] = -bf0[38] + bf0[39];
1781 : bf1[39] = bf0[38] + bf0[39];
1782 : bf1[40] = bf0[40] + bf0[41];
1783 : bf1[41] = bf0[40] - bf0[41];
1784 : bf1[42] = -bf0[42] + bf0[43];
1785 : bf1[43] = bf0[42] + bf0[43];
1786 : bf1[44] = bf0[44] + bf0[45];
1787 : bf1[45] = bf0[44] - bf0[45];
1788 : bf1[46] = -bf0[46] + bf0[47];
1789 : bf1[47] = bf0[46] + bf0[47];
1790 : bf1[48] = bf0[48] + bf0[49];
1791 : bf1[49] = bf0[48] - bf0[49];
1792 : bf1[50] = -bf0[50] + bf0[51];
1793 : bf1[51] = bf0[50] + bf0[51];
1794 : bf1[52] = bf0[52] + bf0[53];
1795 : bf1[53] = bf0[52] - bf0[53];
1796 : bf1[54] = -bf0[54] + bf0[55];
1797 : bf1[55] = bf0[54] + bf0[55];
1798 : bf1[56] = bf0[56] + bf0[57];
1799 : bf1[57] = bf0[56] - bf0[57];
1800 : bf1[58] = -bf0[58] + bf0[59];
1801 : bf1[59] = bf0[58] + bf0[59];
1802 : bf1[60] = bf0[60] + bf0[61];
1803 : bf1[61] = bf0[60] - bf0[61];
1804 : bf1[62] = -bf0[62] + bf0[63];
1805 : bf1[63] = bf0[62] + bf0[63];
1806 : range_check(stage, input, bf1, size, stage_range[stage]);
1807 :
1808 : // stage 4
1809 : stage++;
1810 : cospi = cospi_arr(cos_bit[stage]);
1811 : bf0 = output;
1812 : bf1 = step;
1813 : bf1[0] = bf0[0];
1814 : bf1[1] = bf0[1];
1815 : bf1[2] = bf0[2];
1816 : bf1[3] = bf0[3];
1817 : bf1[4] = bf0[4];
1818 : bf1[5] = bf0[5];
1819 : bf1[6] = bf0[6];
1820 : bf1[7] = bf0[7];
1821 : bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
1822 : bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
1823 : bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
1824 : bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
1825 : bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
1826 : bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
1827 : bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
1828 : bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
1829 : bf1[16] = bf0[16] + bf0[17];
1830 : bf1[17] = bf0[16] - bf0[17];
1831 : bf1[18] = -bf0[18] + bf0[19];
1832 : bf1[19] = bf0[18] + bf0[19];
1833 : bf1[20] = bf0[20] + bf0[21];
1834 : bf1[21] = bf0[20] - bf0[21];
1835 : bf1[22] = -bf0[22] + bf0[23];
1836 : bf1[23] = bf0[22] + bf0[23];
1837 : bf1[24] = bf0[24] + bf0[25];
1838 : bf1[25] = bf0[24] - bf0[25];
1839 : bf1[26] = -bf0[26] + bf0[27];
1840 : bf1[27] = bf0[26] + bf0[27];
1841 : bf1[28] = bf0[28] + bf0[29];
1842 : bf1[29] = bf0[28] - bf0[29];
1843 : bf1[30] = -bf0[30] + bf0[31];
1844 : bf1[31] = bf0[30] + bf0[31];
1845 : bf1[32] = bf0[32];
1846 : bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
1847 : bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
1848 : bf1[35] = bf0[35];
1849 : bf1[36] = bf0[36];
1850 : bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
1851 : bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
1852 : bf1[39] = bf0[39];
1853 : bf1[40] = bf0[40];
1854 : bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
1855 : bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
1856 : bf1[43] = bf0[43];
1857 : bf1[44] = bf0[44];
1858 : bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
1859 : bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
1860 : bf1[47] = bf0[47];
1861 : bf1[48] = bf0[48];
1862 : bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit[stage]);
1863 : bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit[stage]);
1864 : bf1[51] = bf0[51];
1865 : bf1[52] = bf0[52];
1866 : bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit[stage]);
1867 : bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit[stage]);
1868 : bf1[55] = bf0[55];
1869 : bf1[56] = bf0[56];
1870 : bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit[stage]);
1871 : bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit[stage]);
1872 : bf1[59] = bf0[59];
1873 : bf1[60] = bf0[60];
1874 : bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit[stage]);
1875 : bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit[stage]);
1876 : bf1[63] = bf0[63];
1877 : range_check(stage, input, bf1, size, stage_range[stage]);
1878 :
1879 : // stage 5
1880 : stage++;
1881 : cospi = cospi_arr(cos_bit[stage]);
1882 : bf0 = step;
1883 : bf1 = output;
1884 : bf1[0] = bf0[0];
1885 : bf1[1] = bf0[1];
1886 : bf1[2] = bf0[2];
1887 : bf1[3] = bf0[3];
1888 : bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
1889 : bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
1890 : bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
1891 : bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
1892 : bf1[8] = bf0[8] + bf0[9];
1893 : bf1[9] = bf0[8] - bf0[9];
1894 : bf1[10] = -bf0[10] + bf0[11];
1895 : bf1[11] = bf0[10] + bf0[11];
1896 : bf1[12] = bf0[12] + bf0[13];
1897 : bf1[13] = bf0[12] - bf0[13];
1898 : bf1[14] = -bf0[14] + bf0[15];
1899 : bf1[15] = bf0[14] + bf0[15];
1900 : bf1[16] = bf0[16];
1901 : bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
1902 : bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
1903 : bf1[19] = bf0[19];
1904 : bf1[20] = bf0[20];
1905 : bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
1906 : bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
1907 : bf1[23] = bf0[23];
1908 : bf1[24] = bf0[24];
1909 : bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
1910 : bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
1911 : bf1[27] = bf0[27];
1912 : bf1[28] = bf0[28];
1913 : bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
1914 : bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
1915 : bf1[31] = bf0[31];
1916 : bf1[32] = bf0[32] + bf0[35];
1917 : bf1[33] = bf0[33] + bf0[34];
1918 : bf1[34] = bf0[33] - bf0[34];
1919 : bf1[35] = bf0[32] - bf0[35];
1920 : bf1[36] = -bf0[36] + bf0[39];
1921 : bf1[37] = -bf0[37] + bf0[38];
1922 : bf1[38] = bf0[37] + bf0[38];
1923 : bf1[39] = bf0[36] + bf0[39];
1924 : bf1[40] = bf0[40] + bf0[43];
1925 : bf1[41] = bf0[41] + bf0[42];
1926 : bf1[42] = bf0[41] - bf0[42];
1927 : bf1[43] = bf0[40] - bf0[43];
1928 : bf1[44] = -bf0[44] + bf0[47];
1929 : bf1[45] = -bf0[45] + bf0[46];
1930 : bf1[46] = bf0[45] + bf0[46];
1931 : bf1[47] = bf0[44] + bf0[47];
1932 : bf1[48] = bf0[48] + bf0[51];
1933 : bf1[49] = bf0[49] + bf0[50];
1934 : bf1[50] = bf0[49] - bf0[50];
1935 : bf1[51] = bf0[48] - bf0[51];
1936 : bf1[52] = -bf0[52] + bf0[55];
1937 : bf1[53] = -bf0[53] + bf0[54];
1938 : bf1[54] = bf0[53] + bf0[54];
1939 : bf1[55] = bf0[52] + bf0[55];
1940 : bf1[56] = bf0[56] + bf0[59];
1941 : bf1[57] = bf0[57] + bf0[58];
1942 : bf1[58] = bf0[57] - bf0[58];
1943 : bf1[59] = bf0[56] - bf0[59];
1944 : bf1[60] = -bf0[60] + bf0[63];
1945 : bf1[61] = -bf0[61] + bf0[62];
1946 : bf1[62] = bf0[61] + bf0[62];
1947 : bf1[63] = bf0[60] + bf0[63];
1948 : range_check(stage, input, bf1, size, stage_range[stage]);
1949 :
1950 : // stage 6
1951 : stage++;
1952 : cospi = cospi_arr(cos_bit[stage]);
1953 : bf0 = output;
1954 : bf1 = step;
1955 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
1956 : bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
1957 : bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
1958 : bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
1959 : bf1[4] = bf0[4] + bf0[5];
1960 : bf1[5] = bf0[4] - bf0[5];
1961 : bf1[6] = -bf0[6] + bf0[7];
1962 : bf1[7] = bf0[6] + bf0[7];
1963 : bf1[8] = bf0[8];
1964 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
1965 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
1966 : bf1[11] = bf0[11];
1967 : bf1[12] = bf0[12];
1968 : bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
1969 : bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
1970 : bf1[15] = bf0[15];
1971 : bf1[16] = bf0[16] + bf0[19];
1972 : bf1[17] = bf0[17] + bf0[18];
1973 : bf1[18] = bf0[17] - bf0[18];
1974 : bf1[19] = bf0[16] - bf0[19];
1975 : bf1[20] = -bf0[20] + bf0[23];
1976 : bf1[21] = -bf0[21] + bf0[22];
1977 : bf1[22] = bf0[21] + bf0[22];
1978 : bf1[23] = bf0[20] + bf0[23];
1979 : bf1[24] = bf0[24] + bf0[27];
1980 : bf1[25] = bf0[25] + bf0[26];
1981 : bf1[26] = bf0[25] - bf0[26];
1982 : bf1[27] = bf0[24] - bf0[27];
1983 : bf1[28] = -bf0[28] + bf0[31];
1984 : bf1[29] = -bf0[29] + bf0[30];
1985 : bf1[30] = bf0[29] + bf0[30];
1986 : bf1[31] = bf0[28] + bf0[31];
1987 : bf1[32] = bf0[32];
1988 : bf1[33] = bf0[33];
1989 : bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
1990 : bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
1991 : bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
1992 : bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
1993 : bf1[38] = bf0[38];
1994 : bf1[39] = bf0[39];
1995 : bf1[40] = bf0[40];
1996 : bf1[41] = bf0[41];
1997 : bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
1998 : bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
1999 : bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
2000 : bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
2001 : bf1[46] = bf0[46];
2002 : bf1[47] = bf0[47];
2003 : bf1[48] = bf0[48];
2004 : bf1[49] = bf0[49];
2005 : bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit[stage]);
2006 : bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit[stage]);
2007 : bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit[stage]);
2008 : bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit[stage]);
2009 : bf1[54] = bf0[54];
2010 : bf1[55] = bf0[55];
2011 : bf1[56] = bf0[56];
2012 : bf1[57] = bf0[57];
2013 : bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit[stage]);
2014 : bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit[stage]);
2015 : bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit[stage]);
2016 : bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit[stage]);
2017 : bf1[62] = bf0[62];
2018 : bf1[63] = bf0[63];
2019 : range_check(stage, input, bf1, size, stage_range[stage]);
2020 :
2021 : // stage 7
2022 : stage++;
2023 : cospi = cospi_arr(cos_bit[stage]);
2024 : bf0 = step;
2025 : bf1 = output;
2026 : bf1[0] = bf0[0] + bf0[3];
2027 : bf1[1] = bf0[1] + bf0[2];
2028 : bf1[2] = bf0[1] - bf0[2];
2029 : bf1[3] = bf0[0] - bf0[3];
2030 : bf1[4] = bf0[4];
2031 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
2032 : bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
2033 : bf1[7] = bf0[7];
2034 : bf1[8] = bf0[8] + bf0[11];
2035 : bf1[9] = bf0[9] + bf0[10];
2036 : bf1[10] = bf0[9] - bf0[10];
2037 : bf1[11] = bf0[8] - bf0[11];
2038 : bf1[12] = -bf0[12] + bf0[15];
2039 : bf1[13] = -bf0[13] + bf0[14];
2040 : bf1[14] = bf0[13] + bf0[14];
2041 : bf1[15] = bf0[12] + bf0[15];
2042 : bf1[16] = bf0[16];
2043 : bf1[17] = bf0[17];
2044 : bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
2045 : bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
2046 : bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
2047 : bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
2048 : bf1[22] = bf0[22];
2049 : bf1[23] = bf0[23];
2050 : bf1[24] = bf0[24];
2051 : bf1[25] = bf0[25];
2052 : bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
2053 : bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
2054 : bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
2055 : bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
2056 : bf1[30] = bf0[30];
2057 : bf1[31] = bf0[31];
2058 : bf1[32] = bf0[32] + bf0[39];
2059 : bf1[33] = bf0[33] + bf0[38];
2060 : bf1[34] = bf0[34] + bf0[37];
2061 : bf1[35] = bf0[35] + bf0[36];
2062 : bf1[36] = bf0[35] - bf0[36];
2063 : bf1[37] = bf0[34] - bf0[37];
2064 : bf1[38] = bf0[33] - bf0[38];
2065 : bf1[39] = bf0[32] - bf0[39];
2066 : bf1[40] = -bf0[40] + bf0[47];
2067 : bf1[41] = -bf0[41] + bf0[46];
2068 : bf1[42] = -bf0[42] + bf0[45];
2069 : bf1[43] = -bf0[43] + bf0[44];
2070 : bf1[44] = bf0[43] + bf0[44];
2071 : bf1[45] = bf0[42] + bf0[45];
2072 : bf1[46] = bf0[41] + bf0[46];
2073 : bf1[47] = bf0[40] + bf0[47];
2074 : bf1[48] = bf0[48] + bf0[55];
2075 : bf1[49] = bf0[49] + bf0[54];
2076 : bf1[50] = bf0[50] + bf0[53];
2077 : bf1[51] = bf0[51] + bf0[52];
2078 : bf1[52] = bf0[51] - bf0[52];
2079 : bf1[53] = bf0[50] - bf0[53];
2080 : bf1[54] = bf0[49] - bf0[54];
2081 : bf1[55] = bf0[48] - bf0[55];
2082 : bf1[56] = -bf0[56] + bf0[63];
2083 : bf1[57] = -bf0[57] + bf0[62];
2084 : bf1[58] = -bf0[58] + bf0[61];
2085 : bf1[59] = -bf0[59] + bf0[60];
2086 : bf1[60] = bf0[59] + bf0[60];
2087 : bf1[61] = bf0[58] + bf0[61];
2088 : bf1[62] = bf0[57] + bf0[62];
2089 : bf1[63] = bf0[56] + bf0[63];
2090 : range_check(stage, input, bf1, size, stage_range[stage]);
2091 :
2092 : // stage 8
2093 : stage++;
2094 : cospi = cospi_arr(cos_bit[stage]);
2095 : bf0 = output;
2096 : bf1 = step;
2097 : bf1[0] = bf0[0] + bf0[7];
2098 : bf1[1] = bf0[1] + bf0[6];
2099 : bf1[2] = bf0[2] + bf0[5];
2100 : bf1[3] = bf0[3] + bf0[4];
2101 : bf1[4] = bf0[3] - bf0[4];
2102 : bf1[5] = bf0[2] - bf0[5];
2103 : bf1[6] = bf0[1] - bf0[6];
2104 : bf1[7] = bf0[0] - bf0[7];
2105 : bf1[8] = bf0[8];
2106 : bf1[9] = bf0[9];
2107 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
2108 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
2109 : bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
2110 : bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
2111 : bf1[14] = bf0[14];
2112 : bf1[15] = bf0[15];
2113 : bf1[16] = bf0[16] + bf0[23];
2114 : bf1[17] = bf0[17] + bf0[22];
2115 : bf1[18] = bf0[18] + bf0[21];
2116 : bf1[19] = bf0[19] + bf0[20];
2117 : bf1[20] = bf0[19] - bf0[20];
2118 : bf1[21] = bf0[18] - bf0[21];
2119 : bf1[22] = bf0[17] - bf0[22];
2120 : bf1[23] = bf0[16] - bf0[23];
2121 : bf1[24] = -bf0[24] + bf0[31];
2122 : bf1[25] = -bf0[25] + bf0[30];
2123 : bf1[26] = -bf0[26] + bf0[29];
2124 : bf1[27] = -bf0[27] + bf0[28];
2125 : bf1[28] = bf0[27] + bf0[28];
2126 : bf1[29] = bf0[26] + bf0[29];
2127 : bf1[30] = bf0[25] + bf0[30];
2128 : bf1[31] = bf0[24] + bf0[31];
2129 : bf1[32] = bf0[32];
2130 : bf1[33] = bf0[33];
2131 : bf1[34] = bf0[34];
2132 : bf1[35] = bf0[35];
2133 : bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
2134 : bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
2135 : bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
2136 : bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
2137 : bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
2138 : bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
2139 : bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
2140 : bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
2141 : bf1[44] = bf0[44];
2142 : bf1[45] = bf0[45];
2143 : bf1[46] = bf0[46];
2144 : bf1[47] = bf0[47];
2145 : bf1[48] = bf0[48];
2146 : bf1[49] = bf0[49];
2147 : bf1[50] = bf0[50];
2148 : bf1[51] = bf0[51];
2149 : bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit[stage]);
2150 : bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit[stage]);
2151 : bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit[stage]);
2152 : bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit[stage]);
2153 : bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit[stage]);
2154 : bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit[stage]);
2155 : bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit[stage]);
2156 : bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit[stage]);
2157 : bf1[60] = bf0[60];
2158 : bf1[61] = bf0[61];
2159 : bf1[62] = bf0[62];
2160 : bf1[63] = bf0[63];
2161 : range_check(stage, input, bf1, size, stage_range[stage]);
2162 :
2163 : // stage 9
2164 : stage++;
2165 : cospi = cospi_arr(cos_bit[stage]);
2166 : bf0 = step;
2167 : bf1 = output;
2168 : bf1[0] = bf0[0] + bf0[15];
2169 : bf1[1] = bf0[1] + bf0[14];
2170 : bf1[2] = bf0[2] + bf0[13];
2171 : bf1[3] = bf0[3] + bf0[12];
2172 : bf1[4] = bf0[4] + bf0[11];
2173 : bf1[5] = bf0[5] + bf0[10];
2174 : bf1[6] = bf0[6] + bf0[9];
2175 : bf1[7] = bf0[7] + bf0[8];
2176 : bf1[8] = bf0[7] - bf0[8];
2177 : bf1[9] = bf0[6] - bf0[9];
2178 : bf1[10] = bf0[5] - bf0[10];
2179 : bf1[11] = bf0[4] - bf0[11];
2180 : bf1[12] = bf0[3] - bf0[12];
2181 : bf1[13] = bf0[2] - bf0[13];
2182 : bf1[14] = bf0[1] - bf0[14];
2183 : bf1[15] = bf0[0] - bf0[15];
2184 : bf1[16] = bf0[16];
2185 : bf1[17] = bf0[17];
2186 : bf1[18] = bf0[18];
2187 : bf1[19] = bf0[19];
2188 : bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
2189 : bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
2190 : bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
2191 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
2192 : bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
2193 : bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
2194 : bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
2195 : bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
2196 : bf1[28] = bf0[28];
2197 : bf1[29] = bf0[29];
2198 : bf1[30] = bf0[30];
2199 : bf1[31] = bf0[31];
2200 : bf1[32] = bf0[32] + bf0[47];
2201 : bf1[33] = bf0[33] + bf0[46];
2202 : bf1[34] = bf0[34] + bf0[45];
2203 : bf1[35] = bf0[35] + bf0[44];
2204 : bf1[36] = bf0[36] + bf0[43];
2205 : bf1[37] = bf0[37] + bf0[42];
2206 : bf1[38] = bf0[38] + bf0[41];
2207 : bf1[39] = bf0[39] + bf0[40];
2208 : bf1[40] = bf0[39] - bf0[40];
2209 : bf1[41] = bf0[38] - bf0[41];
2210 : bf1[42] = bf0[37] - bf0[42];
2211 : bf1[43] = bf0[36] - bf0[43];
2212 : bf1[44] = bf0[35] - bf0[44];
2213 : bf1[45] = bf0[34] - bf0[45];
2214 : bf1[46] = bf0[33] - bf0[46];
2215 : bf1[47] = bf0[32] - bf0[47];
2216 : bf1[48] = -bf0[48] + bf0[63];
2217 : bf1[49] = -bf0[49] + bf0[62];
2218 : bf1[50] = -bf0[50] + bf0[61];
2219 : bf1[51] = -bf0[51] + bf0[60];
2220 : bf1[52] = -bf0[52] + bf0[59];
2221 : bf1[53] = -bf0[53] + bf0[58];
2222 : bf1[54] = -bf0[54] + bf0[57];
2223 : bf1[55] = -bf0[55] + bf0[56];
2224 : bf1[56] = bf0[55] + bf0[56];
2225 : bf1[57] = bf0[54] + bf0[57];
2226 : bf1[58] = bf0[53] + bf0[58];
2227 : bf1[59] = bf0[52] + bf0[59];
2228 : bf1[60] = bf0[51] + bf0[60];
2229 : bf1[61] = bf0[50] + bf0[61];
2230 : bf1[62] = bf0[49] + bf0[62];
2231 : bf1[63] = bf0[48] + bf0[63];
2232 : range_check(stage, input, bf1, size, stage_range[stage]);
2233 :
2234 : // stage 10
2235 : stage++;
2236 : cospi = cospi_arr(cos_bit[stage]);
2237 : bf0 = output;
2238 : bf1 = step;
2239 : bf1[0] = bf0[0] + bf0[31];
2240 : bf1[1] = bf0[1] + bf0[30];
2241 : bf1[2] = bf0[2] + bf0[29];
2242 : bf1[3] = bf0[3] + bf0[28];
2243 : bf1[4] = bf0[4] + bf0[27];
2244 : bf1[5] = bf0[5] + bf0[26];
2245 : bf1[6] = bf0[6] + bf0[25];
2246 : bf1[7] = bf0[7] + bf0[24];
2247 : bf1[8] = bf0[8] + bf0[23];
2248 : bf1[9] = bf0[9] + bf0[22];
2249 : bf1[10] = bf0[10] + bf0[21];
2250 : bf1[11] = bf0[11] + bf0[20];
2251 : bf1[12] = bf0[12] + bf0[19];
2252 : bf1[13] = bf0[13] + bf0[18];
2253 : bf1[14] = bf0[14] + bf0[17];
2254 : bf1[15] = bf0[15] + bf0[16];
2255 : bf1[16] = bf0[15] - bf0[16];
2256 : bf1[17] = bf0[14] - bf0[17];
2257 : bf1[18] = bf0[13] - bf0[18];
2258 : bf1[19] = bf0[12] - bf0[19];
2259 : bf1[20] = bf0[11] - bf0[20];
2260 : bf1[21] = bf0[10] - bf0[21];
2261 : bf1[22] = bf0[9] - bf0[22];
2262 : bf1[23] = bf0[8] - bf0[23];
2263 : bf1[24] = bf0[7] - bf0[24];
2264 : bf1[25] = bf0[6] - bf0[25];
2265 : bf1[26] = bf0[5] - bf0[26];
2266 : bf1[27] = bf0[4] - bf0[27];
2267 : bf1[28] = bf0[3] - bf0[28];
2268 : bf1[29] = bf0[2] - bf0[29];
2269 : bf1[30] = bf0[1] - bf0[30];
2270 : bf1[31] = bf0[0] - bf0[31];
2271 : bf1[32] = bf0[32];
2272 : bf1[33] = bf0[33];
2273 : bf1[34] = bf0[34];
2274 : bf1[35] = bf0[35];
2275 : bf1[36] = bf0[36];
2276 : bf1[37] = bf0[37];
2277 : bf1[38] = bf0[38];
2278 : bf1[39] = bf0[39];
2279 : bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
2280 : bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
2281 : bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
2282 : bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
2283 : bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
2284 : bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
2285 : bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
2286 : bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
2287 : bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
2288 : bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
2289 : bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
2290 : bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
2291 : bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
2292 : bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
2293 : bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
2294 : bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
2295 : bf1[56] = bf0[56];
2296 : bf1[57] = bf0[57];
2297 : bf1[58] = bf0[58];
2298 : bf1[59] = bf0[59];
2299 : bf1[60] = bf0[60];
2300 : bf1[61] = bf0[61];
2301 : bf1[62] = bf0[62];
2302 : bf1[63] = bf0[63];
2303 : range_check(stage, input, bf1, size, stage_range[stage]);
2304 :
2305 : // stage 11
2306 : stage++;
2307 : cospi = cospi_arr(cos_bit[stage]);
2308 : bf0 = step;
2309 : bf1 = output;
2310 : bf1[0] = bf0[0] + bf0[63];
2311 : bf1[1] = bf0[1] + bf0[62];
2312 : bf1[2] = bf0[2] + bf0[61];
2313 : bf1[3] = bf0[3] + bf0[60];
2314 : bf1[4] = bf0[4] + bf0[59];
2315 : bf1[5] = bf0[5] + bf0[58];
2316 : bf1[6] = bf0[6] + bf0[57];
2317 : bf1[7] = bf0[7] + bf0[56];
2318 : bf1[8] = bf0[8] + bf0[55];
2319 : bf1[9] = bf0[9] + bf0[54];
2320 : bf1[10] = bf0[10] + bf0[53];
2321 : bf1[11] = bf0[11] + bf0[52];
2322 : bf1[12] = bf0[12] + bf0[51];
2323 : bf1[13] = bf0[13] + bf0[50];
2324 : bf1[14] = bf0[14] + bf0[49];
2325 : bf1[15] = bf0[15] + bf0[48];
2326 : bf1[16] = bf0[16] + bf0[47];
2327 : bf1[17] = bf0[17] + bf0[46];
2328 : bf1[18] = bf0[18] + bf0[45];
2329 : bf1[19] = bf0[19] + bf0[44];
2330 : bf1[20] = bf0[20] + bf0[43];
2331 : bf1[21] = bf0[21] + bf0[42];
2332 : bf1[22] = bf0[22] + bf0[41];
2333 : bf1[23] = bf0[23] + bf0[40];
2334 : bf1[24] = bf0[24] + bf0[39];
2335 : bf1[25] = bf0[25] + bf0[38];
2336 : bf1[26] = bf0[26] + bf0[37];
2337 : bf1[27] = bf0[27] + bf0[36];
2338 : bf1[28] = bf0[28] + bf0[35];
2339 : bf1[29] = bf0[29] + bf0[34];
2340 : bf1[30] = bf0[30] + bf0[33];
2341 : bf1[31] = bf0[31] + bf0[32];
2342 : bf1[32] = bf0[31] - bf0[32];
2343 : bf1[33] = bf0[30] - bf0[33];
2344 : bf1[34] = bf0[29] - bf0[34];
2345 : bf1[35] = bf0[28] - bf0[35];
2346 : bf1[36] = bf0[27] - bf0[36];
2347 : bf1[37] = bf0[26] - bf0[37];
2348 : bf1[38] = bf0[25] - bf0[38];
2349 : bf1[39] = bf0[24] - bf0[39];
2350 : bf1[40] = bf0[23] - bf0[40];
2351 : bf1[41] = bf0[22] - bf0[41];
2352 : bf1[42] = bf0[21] - bf0[42];
2353 : bf1[43] = bf0[20] - bf0[43];
2354 : bf1[44] = bf0[19] - bf0[44];
2355 : bf1[45] = bf0[18] - bf0[45];
2356 : bf1[46] = bf0[17] - bf0[46];
2357 : bf1[47] = bf0[16] - bf0[47];
2358 : bf1[48] = bf0[15] - bf0[48];
2359 : bf1[49] = bf0[14] - bf0[49];
2360 : bf1[50] = bf0[13] - bf0[50];
2361 : bf1[51] = bf0[12] - bf0[51];
2362 : bf1[52] = bf0[11] - bf0[52];
2363 : bf1[53] = bf0[10] - bf0[53];
2364 : bf1[54] = bf0[9] - bf0[54];
2365 : bf1[55] = bf0[8] - bf0[55];
2366 : bf1[56] = bf0[7] - bf0[56];
2367 : bf1[57] = bf0[6] - bf0[57];
2368 : bf1[58] = bf0[5] - bf0[58];
2369 : bf1[59] = bf0[4] - bf0[59];
2370 : bf1[60] = bf0[3] - bf0[60];
2371 : bf1[61] = bf0[2] - bf0[61];
2372 : bf1[62] = bf0[1] - bf0[62];
2373 : bf1[63] = bf0[0] - bf0[63];
2374 : range_check(stage, input, bf1, size, stage_range[stage]);
2375 : }
2376 : #endif // CONFIG_TX64X64
|