LCOV - code coverage report
Current view: top level - third_party/aom/av1/common - av1_fwd_txfm1d.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 1324 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 12 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <stdlib.h>
      13             : #include "aom_dsp/inv_txfm.h"
      14             : #include "av1/common/av1_fwd_txfm1d.h"
      15             : #if CONFIG_COEFFICIENT_RANGE_CHECKING
      16             : 
      17             : void range_check_func(int32_t stage, const int32_t *input, const int32_t *buf,
      18             :                       int32_t size, int8_t bit);
      19             : 
      20             : #define range_check(stage, input, buf, size, bit) \
      21             :   range_check_func(stage, input, buf, size, bit)
      22             : #else
      23             : #define range_check(stage, input, buf, size, bit) \
      24             :   {                                               \
      25             :     (void)stage;                                  \
      26             :     (void)input;                                  \
      27             :     (void)buf;                                    \
      28             :     (void)size;                                   \
      29             :     (void)bit;                                    \
      30             :   }
      31             : #endif
      32             : 
      33             : // TODO(angiebird): Make 1-d txfm functions static
      34           0 : void av1_fdct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
      35             :                    const int8_t *stage_range) {
      36           0 :   const int32_t size = 4;
      37             :   const int32_t *cospi;
      38             : 
      39           0 :   int32_t stage = 0;
      40             :   int32_t *bf0, *bf1;
      41             :   int32_t step[4];
      42             : 
      43             :   // stage 0;
      44           0 :   range_check(stage, input, input, size, stage_range[stage]);
      45             : 
      46             :   // stage 1;
      47           0 :   stage++;
      48           0 :   bf1 = output;
      49           0 :   bf1[0] = input[0] + input[3];
      50           0 :   bf1[1] = input[1] + input[2];
      51           0 :   bf1[2] = -input[2] + input[1];
      52           0 :   bf1[3] = -input[3] + input[0];
      53           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
      54             : 
      55             :   // stage 2
      56           0 :   stage++;
      57           0 :   cospi = cospi_arr(cos_bit[stage]);
      58           0 :   bf0 = output;
      59           0 :   bf1 = step;
      60           0 :   bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
      61           0 :   bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
      62           0 :   bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
      63           0 :   bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
      64           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
      65             : 
      66             :   // stage 3
      67           0 :   stage++;
      68           0 :   bf0 = step;
      69           0 :   bf1 = output;
      70           0 :   bf1[0] = bf0[0];
      71           0 :   bf1[1] = bf0[2];
      72           0 :   bf1[2] = bf0[1];
      73           0 :   bf1[3] = bf0[3];
      74           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
      75           0 : }
      76             : 
      77           0 : void av1_fdct8_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
      78             :                    const int8_t *stage_range) {
      79           0 :   const int32_t size = 8;
      80             :   const int32_t *cospi;
      81             : 
      82           0 :   int32_t stage = 0;
      83             :   int32_t *bf0, *bf1;
      84             :   int32_t step[8];
      85             : 
      86             :   // stage 0;
      87           0 :   range_check(stage, input, input, size, stage_range[stage]);
      88             : 
      89             :   // stage 1;
      90           0 :   stage++;
      91           0 :   bf1 = output;
      92           0 :   bf1[0] = input[0] + input[7];
      93           0 :   bf1[1] = input[1] + input[6];
      94           0 :   bf1[2] = input[2] + input[5];
      95           0 :   bf1[3] = input[3] + input[4];
      96           0 :   bf1[4] = -input[4] + input[3];
      97           0 :   bf1[5] = -input[5] + input[2];
      98           0 :   bf1[6] = -input[6] + input[1];
      99           0 :   bf1[7] = -input[7] + input[0];
     100           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     101             : 
     102             :   // stage 2
     103           0 :   stage++;
     104           0 :   cospi = cospi_arr(cos_bit[stage]);
     105           0 :   bf0 = output;
     106           0 :   bf1 = step;
     107           0 :   bf1[0] = bf0[0] + bf0[3];
     108           0 :   bf1[1] = bf0[1] + bf0[2];
     109           0 :   bf1[2] = -bf0[2] + bf0[1];
     110           0 :   bf1[3] = -bf0[3] + bf0[0];
     111           0 :   bf1[4] = bf0[4];
     112           0 :   bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
     113           0 :   bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
     114           0 :   bf1[7] = bf0[7];
     115           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     116             : 
     117             :   // stage 3
     118           0 :   stage++;
     119           0 :   cospi = cospi_arr(cos_bit[stage]);
     120           0 :   bf0 = step;
     121           0 :   bf1 = output;
     122           0 :   bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
     123           0 :   bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
     124           0 :   bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
     125           0 :   bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
     126           0 :   bf1[4] = bf0[4] + bf0[5];
     127           0 :   bf1[5] = -bf0[5] + bf0[4];
     128           0 :   bf1[6] = -bf0[6] + bf0[7];
     129           0 :   bf1[7] = bf0[7] + bf0[6];
     130           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     131             : 
     132             :   // stage 4
     133           0 :   stage++;
     134           0 :   cospi = cospi_arr(cos_bit[stage]);
     135           0 :   bf0 = output;
     136           0 :   bf1 = step;
     137           0 :   bf1[0] = bf0[0];
     138           0 :   bf1[1] = bf0[1];
     139           0 :   bf1[2] = bf0[2];
     140           0 :   bf1[3] = bf0[3];
     141           0 :   bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
     142           0 :   bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
     143           0 :   bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
     144           0 :   bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
     145           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     146             : 
     147             :   // stage 5
     148           0 :   stage++;
     149           0 :   bf0 = step;
     150           0 :   bf1 = output;
     151           0 :   bf1[0] = bf0[0];
     152           0 :   bf1[1] = bf0[4];
     153           0 :   bf1[2] = bf0[2];
     154           0 :   bf1[3] = bf0[6];
     155           0 :   bf1[4] = bf0[1];
     156           0 :   bf1[5] = bf0[5];
     157           0 :   bf1[6] = bf0[3];
     158           0 :   bf1[7] = bf0[7];
     159           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     160           0 : }
     161             : 
     162           0 : void av1_fdct16_new(const int32_t *input, int32_t *output,
     163             :                     const int8_t *cos_bit, const int8_t *stage_range) {
     164           0 :   const int32_t size = 16;
     165             :   const int32_t *cospi;
     166             : 
     167           0 :   int32_t stage = 0;
     168             :   int32_t *bf0, *bf1;
     169             :   int32_t step[16];
     170             : 
     171             :   // stage 0;
     172           0 :   range_check(stage, input, input, size, stage_range[stage]);
     173             : 
     174             :   // stage 1;
     175           0 :   stage++;
     176           0 :   bf1 = output;
     177           0 :   bf1[0] = input[0] + input[15];
     178           0 :   bf1[1] = input[1] + input[14];
     179           0 :   bf1[2] = input[2] + input[13];
     180           0 :   bf1[3] = input[3] + input[12];
     181           0 :   bf1[4] = input[4] + input[11];
     182           0 :   bf1[5] = input[5] + input[10];
     183           0 :   bf1[6] = input[6] + input[9];
     184           0 :   bf1[7] = input[7] + input[8];
     185           0 :   bf1[8] = -input[8] + input[7];
     186           0 :   bf1[9] = -input[9] + input[6];
     187           0 :   bf1[10] = -input[10] + input[5];
     188           0 :   bf1[11] = -input[11] + input[4];
     189           0 :   bf1[12] = -input[12] + input[3];
     190           0 :   bf1[13] = -input[13] + input[2];
     191           0 :   bf1[14] = -input[14] + input[1];
     192           0 :   bf1[15] = -input[15] + input[0];
     193           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     194             : 
     195             :   // stage 2
     196           0 :   stage++;
     197           0 :   cospi = cospi_arr(cos_bit[stage]);
     198           0 :   bf0 = output;
     199           0 :   bf1 = step;
     200           0 :   bf1[0] = bf0[0] + bf0[7];
     201           0 :   bf1[1] = bf0[1] + bf0[6];
     202           0 :   bf1[2] = bf0[2] + bf0[5];
     203           0 :   bf1[3] = bf0[3] + bf0[4];
     204           0 :   bf1[4] = -bf0[4] + bf0[3];
     205           0 :   bf1[5] = -bf0[5] + bf0[2];
     206           0 :   bf1[6] = -bf0[6] + bf0[1];
     207           0 :   bf1[7] = -bf0[7] + bf0[0];
     208           0 :   bf1[8] = bf0[8];
     209           0 :   bf1[9] = bf0[9];
     210           0 :   bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
     211           0 :   bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
     212           0 :   bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
     213           0 :   bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
     214           0 :   bf1[14] = bf0[14];
     215           0 :   bf1[15] = bf0[15];
     216           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     217             : 
     218             :   // stage 3
     219           0 :   stage++;
     220           0 :   cospi = cospi_arr(cos_bit[stage]);
     221           0 :   bf0 = step;
     222           0 :   bf1 = output;
     223           0 :   bf1[0] = bf0[0] + bf0[3];
     224           0 :   bf1[1] = bf0[1] + bf0[2];
     225           0 :   bf1[2] = -bf0[2] + bf0[1];
     226           0 :   bf1[3] = -bf0[3] + bf0[0];
     227           0 :   bf1[4] = bf0[4];
     228           0 :   bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
     229           0 :   bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
     230           0 :   bf1[7] = bf0[7];
     231           0 :   bf1[8] = bf0[8] + bf0[11];
     232           0 :   bf1[9] = bf0[9] + bf0[10];
     233           0 :   bf1[10] = -bf0[10] + bf0[9];
     234           0 :   bf1[11] = -bf0[11] + bf0[8];
     235           0 :   bf1[12] = -bf0[12] + bf0[15];
     236           0 :   bf1[13] = -bf0[13] + bf0[14];
     237           0 :   bf1[14] = bf0[14] + bf0[13];
     238           0 :   bf1[15] = bf0[15] + bf0[12];
     239           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     240             : 
     241             :   // stage 4
     242           0 :   stage++;
     243           0 :   cospi = cospi_arr(cos_bit[stage]);
     244           0 :   bf0 = output;
     245           0 :   bf1 = step;
     246           0 :   bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
     247           0 :   bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
     248           0 :   bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
     249           0 :   bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
     250           0 :   bf1[4] = bf0[4] + bf0[5];
     251           0 :   bf1[5] = -bf0[5] + bf0[4];
     252           0 :   bf1[6] = -bf0[6] + bf0[7];
     253           0 :   bf1[7] = bf0[7] + bf0[6];
     254           0 :   bf1[8] = bf0[8];
     255           0 :   bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
     256           0 :   bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
     257           0 :   bf1[11] = bf0[11];
     258           0 :   bf1[12] = bf0[12];
     259           0 :   bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
     260           0 :   bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
     261           0 :   bf1[15] = bf0[15];
     262           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     263             : 
     264             :   // stage 5
     265           0 :   stage++;
     266           0 :   cospi = cospi_arr(cos_bit[stage]);
     267           0 :   bf0 = step;
     268           0 :   bf1 = output;
     269           0 :   bf1[0] = bf0[0];
     270           0 :   bf1[1] = bf0[1];
     271           0 :   bf1[2] = bf0[2];
     272           0 :   bf1[3] = bf0[3];
     273           0 :   bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
     274           0 :   bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
     275           0 :   bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
     276           0 :   bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
     277           0 :   bf1[8] = bf0[8] + bf0[9];
     278           0 :   bf1[9] = -bf0[9] + bf0[8];
     279           0 :   bf1[10] = -bf0[10] + bf0[11];
     280           0 :   bf1[11] = bf0[11] + bf0[10];
     281           0 :   bf1[12] = bf0[12] + bf0[13];
     282           0 :   bf1[13] = -bf0[13] + bf0[12];
     283           0 :   bf1[14] = -bf0[14] + bf0[15];
     284           0 :   bf1[15] = bf0[15] + bf0[14];
     285           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     286             : 
     287             :   // stage 6
     288           0 :   stage++;
     289           0 :   cospi = cospi_arr(cos_bit[stage]);
     290           0 :   bf0 = output;
     291           0 :   bf1 = step;
     292           0 :   bf1[0] = bf0[0];
     293           0 :   bf1[1] = bf0[1];
     294           0 :   bf1[2] = bf0[2];
     295           0 :   bf1[3] = bf0[3];
     296           0 :   bf1[4] = bf0[4];
     297           0 :   bf1[5] = bf0[5];
     298           0 :   bf1[6] = bf0[6];
     299           0 :   bf1[7] = bf0[7];
     300           0 :   bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
     301           0 :   bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
     302           0 :   bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
     303           0 :   bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
     304           0 :   bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
     305           0 :   bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
     306           0 :   bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
     307           0 :   bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
     308           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     309             : 
     310             :   // stage 7
     311           0 :   stage++;
     312           0 :   bf0 = step;
     313           0 :   bf1 = output;
     314           0 :   bf1[0] = bf0[0];
     315           0 :   bf1[1] = bf0[8];
     316           0 :   bf1[2] = bf0[4];
     317           0 :   bf1[3] = bf0[12];
     318           0 :   bf1[4] = bf0[2];
     319           0 :   bf1[5] = bf0[10];
     320           0 :   bf1[6] = bf0[6];
     321           0 :   bf1[7] = bf0[14];
     322           0 :   bf1[8] = bf0[1];
     323           0 :   bf1[9] = bf0[9];
     324           0 :   bf1[10] = bf0[5];
     325           0 :   bf1[11] = bf0[13];
     326           0 :   bf1[12] = bf0[3];
     327           0 :   bf1[13] = bf0[11];
     328           0 :   bf1[14] = bf0[7];
     329           0 :   bf1[15] = bf0[15];
     330           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     331           0 : }
     332             : 
     333           0 : void av1_fdct32_new(const int32_t *input, int32_t *output,
     334             :                     const int8_t *cos_bit, const int8_t *stage_range) {
     335           0 :   const int32_t size = 32;
     336             :   const int32_t *cospi;
     337             : 
     338           0 :   int32_t stage = 0;
     339             :   int32_t *bf0, *bf1;
     340             :   int32_t step[32];
     341             : 
     342             :   // stage 0;
     343           0 :   range_check(stage, input, input, size, stage_range[stage]);
     344             : 
     345             :   // stage 1;
     346           0 :   stage++;
     347           0 :   bf1 = output;
     348           0 :   bf1[0] = input[0] + input[31];
     349           0 :   bf1[1] = input[1] + input[30];
     350           0 :   bf1[2] = input[2] + input[29];
     351           0 :   bf1[3] = input[3] + input[28];
     352           0 :   bf1[4] = input[4] + input[27];
     353           0 :   bf1[5] = input[5] + input[26];
     354           0 :   bf1[6] = input[6] + input[25];
     355           0 :   bf1[7] = input[7] + input[24];
     356           0 :   bf1[8] = input[8] + input[23];
     357           0 :   bf1[9] = input[9] + input[22];
     358           0 :   bf1[10] = input[10] + input[21];
     359           0 :   bf1[11] = input[11] + input[20];
     360           0 :   bf1[12] = input[12] + input[19];
     361           0 :   bf1[13] = input[13] + input[18];
     362           0 :   bf1[14] = input[14] + input[17];
     363           0 :   bf1[15] = input[15] + input[16];
     364           0 :   bf1[16] = -input[16] + input[15];
     365           0 :   bf1[17] = -input[17] + input[14];
     366           0 :   bf1[18] = -input[18] + input[13];
     367           0 :   bf1[19] = -input[19] + input[12];
     368           0 :   bf1[20] = -input[20] + input[11];
     369           0 :   bf1[21] = -input[21] + input[10];
     370           0 :   bf1[22] = -input[22] + input[9];
     371           0 :   bf1[23] = -input[23] + input[8];
     372           0 :   bf1[24] = -input[24] + input[7];
     373           0 :   bf1[25] = -input[25] + input[6];
     374           0 :   bf1[26] = -input[26] + input[5];
     375           0 :   bf1[27] = -input[27] + input[4];
     376           0 :   bf1[28] = -input[28] + input[3];
     377           0 :   bf1[29] = -input[29] + input[2];
     378           0 :   bf1[30] = -input[30] + input[1];
     379           0 :   bf1[31] = -input[31] + input[0];
     380           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     381             : 
     382             :   // stage 2
     383           0 :   stage++;
     384           0 :   cospi = cospi_arr(cos_bit[stage]);
     385           0 :   bf0 = output;
     386           0 :   bf1 = step;
     387           0 :   bf1[0] = bf0[0] + bf0[15];
     388           0 :   bf1[1] = bf0[1] + bf0[14];
     389           0 :   bf1[2] = bf0[2] + bf0[13];
     390           0 :   bf1[3] = bf0[3] + bf0[12];
     391           0 :   bf1[4] = bf0[4] + bf0[11];
     392           0 :   bf1[5] = bf0[5] + bf0[10];
     393           0 :   bf1[6] = bf0[6] + bf0[9];
     394           0 :   bf1[7] = bf0[7] + bf0[8];
     395           0 :   bf1[8] = -bf0[8] + bf0[7];
     396           0 :   bf1[9] = -bf0[9] + bf0[6];
     397           0 :   bf1[10] = -bf0[10] + bf0[5];
     398           0 :   bf1[11] = -bf0[11] + bf0[4];
     399           0 :   bf1[12] = -bf0[12] + bf0[3];
     400           0 :   bf1[13] = -bf0[13] + bf0[2];
     401           0 :   bf1[14] = -bf0[14] + bf0[1];
     402           0 :   bf1[15] = -bf0[15] + bf0[0];
     403           0 :   bf1[16] = bf0[16];
     404           0 :   bf1[17] = bf0[17];
     405           0 :   bf1[18] = bf0[18];
     406           0 :   bf1[19] = bf0[19];
     407           0 :   bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
     408           0 :   bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
     409           0 :   bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
     410           0 :   bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
     411           0 :   bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
     412           0 :   bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
     413           0 :   bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
     414           0 :   bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
     415           0 :   bf1[28] = bf0[28];
     416           0 :   bf1[29] = bf0[29];
     417           0 :   bf1[30] = bf0[30];
     418           0 :   bf1[31] = bf0[31];
     419           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     420             : 
     421             :   // stage 3
     422           0 :   stage++;
     423           0 :   cospi = cospi_arr(cos_bit[stage]);
     424           0 :   bf0 = step;
     425           0 :   bf1 = output;
     426           0 :   bf1[0] = bf0[0] + bf0[7];
     427           0 :   bf1[1] = bf0[1] + bf0[6];
     428           0 :   bf1[2] = bf0[2] + bf0[5];
     429           0 :   bf1[3] = bf0[3] + bf0[4];
     430           0 :   bf1[4] = -bf0[4] + bf0[3];
     431           0 :   bf1[5] = -bf0[5] + bf0[2];
     432           0 :   bf1[6] = -bf0[6] + bf0[1];
     433           0 :   bf1[7] = -bf0[7] + bf0[0];
     434           0 :   bf1[8] = bf0[8];
     435           0 :   bf1[9] = bf0[9];
     436           0 :   bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
     437           0 :   bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
     438           0 :   bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
     439           0 :   bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
     440           0 :   bf1[14] = bf0[14];
     441           0 :   bf1[15] = bf0[15];
     442           0 :   bf1[16] = bf0[16] + bf0[23];
     443           0 :   bf1[17] = bf0[17] + bf0[22];
     444           0 :   bf1[18] = bf0[18] + bf0[21];
     445           0 :   bf1[19] = bf0[19] + bf0[20];
     446           0 :   bf1[20] = -bf0[20] + bf0[19];
     447           0 :   bf1[21] = -bf0[21] + bf0[18];
     448           0 :   bf1[22] = -bf0[22] + bf0[17];
     449           0 :   bf1[23] = -bf0[23] + bf0[16];
     450           0 :   bf1[24] = -bf0[24] + bf0[31];
     451           0 :   bf1[25] = -bf0[25] + bf0[30];
     452           0 :   bf1[26] = -bf0[26] + bf0[29];
     453           0 :   bf1[27] = -bf0[27] + bf0[28];
     454           0 :   bf1[28] = bf0[28] + bf0[27];
     455           0 :   bf1[29] = bf0[29] + bf0[26];
     456           0 :   bf1[30] = bf0[30] + bf0[25];
     457           0 :   bf1[31] = bf0[31] + bf0[24];
     458           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     459             : 
     460             :   // stage 4
     461           0 :   stage++;
     462           0 :   cospi = cospi_arr(cos_bit[stage]);
     463           0 :   bf0 = output;
     464           0 :   bf1 = step;
     465           0 :   bf1[0] = bf0[0] + bf0[3];
     466           0 :   bf1[1] = bf0[1] + bf0[2];
     467           0 :   bf1[2] = -bf0[2] + bf0[1];
     468           0 :   bf1[3] = -bf0[3] + bf0[0];
     469           0 :   bf1[4] = bf0[4];
     470           0 :   bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
     471           0 :   bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
     472           0 :   bf1[7] = bf0[7];
     473           0 :   bf1[8] = bf0[8] + bf0[11];
     474           0 :   bf1[9] = bf0[9] + bf0[10];
     475           0 :   bf1[10] = -bf0[10] + bf0[9];
     476           0 :   bf1[11] = -bf0[11] + bf0[8];
     477           0 :   bf1[12] = -bf0[12] + bf0[15];
     478           0 :   bf1[13] = -bf0[13] + bf0[14];
     479           0 :   bf1[14] = bf0[14] + bf0[13];
     480           0 :   bf1[15] = bf0[15] + bf0[12];
     481           0 :   bf1[16] = bf0[16];
     482           0 :   bf1[17] = bf0[17];
     483           0 :   bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
     484           0 :   bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
     485           0 :   bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
     486           0 :   bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
     487           0 :   bf1[22] = bf0[22];
     488           0 :   bf1[23] = bf0[23];
     489           0 :   bf1[24] = bf0[24];
     490           0 :   bf1[25] = bf0[25];
     491           0 :   bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
     492           0 :   bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
     493           0 :   bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
     494           0 :   bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
     495           0 :   bf1[30] = bf0[30];
     496           0 :   bf1[31] = bf0[31];
     497           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     498             : 
     499             :   // stage 5
     500           0 :   stage++;
     501           0 :   cospi = cospi_arr(cos_bit[stage]);
     502           0 :   bf0 = step;
     503           0 :   bf1 = output;
     504           0 :   bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
     505           0 :   bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
     506           0 :   bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
     507           0 :   bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
     508           0 :   bf1[4] = bf0[4] + bf0[5];
     509           0 :   bf1[5] = -bf0[5] + bf0[4];
     510           0 :   bf1[6] = -bf0[6] + bf0[7];
     511           0 :   bf1[7] = bf0[7] + bf0[6];
     512           0 :   bf1[8] = bf0[8];
     513           0 :   bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
     514           0 :   bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
     515           0 :   bf1[11] = bf0[11];
     516           0 :   bf1[12] = bf0[12];
     517           0 :   bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
     518           0 :   bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
     519           0 :   bf1[15] = bf0[15];
     520           0 :   bf1[16] = bf0[16] + bf0[19];
     521           0 :   bf1[17] = bf0[17] + bf0[18];
     522           0 :   bf1[18] = -bf0[18] + bf0[17];
     523           0 :   bf1[19] = -bf0[19] + bf0[16];
     524           0 :   bf1[20] = -bf0[20] + bf0[23];
     525           0 :   bf1[21] = -bf0[21] + bf0[22];
     526           0 :   bf1[22] = bf0[22] + bf0[21];
     527           0 :   bf1[23] = bf0[23] + bf0[20];
     528           0 :   bf1[24] = bf0[24] + bf0[27];
     529           0 :   bf1[25] = bf0[25] + bf0[26];
     530           0 :   bf1[26] = -bf0[26] + bf0[25];
     531           0 :   bf1[27] = -bf0[27] + bf0[24];
     532           0 :   bf1[28] = -bf0[28] + bf0[31];
     533           0 :   bf1[29] = -bf0[29] + bf0[30];
     534           0 :   bf1[30] = bf0[30] + bf0[29];
     535           0 :   bf1[31] = bf0[31] + bf0[28];
     536           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     537             : 
     538             :   // stage 6
     539           0 :   stage++;
     540           0 :   cospi = cospi_arr(cos_bit[stage]);
     541           0 :   bf0 = output;
     542           0 :   bf1 = step;
     543           0 :   bf1[0] = bf0[0];
     544           0 :   bf1[1] = bf0[1];
     545           0 :   bf1[2] = bf0[2];
     546           0 :   bf1[3] = bf0[3];
     547           0 :   bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
     548           0 :   bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
     549           0 :   bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
     550           0 :   bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
     551           0 :   bf1[8] = bf0[8] + bf0[9];
     552           0 :   bf1[9] = -bf0[9] + bf0[8];
     553           0 :   bf1[10] = -bf0[10] + bf0[11];
     554           0 :   bf1[11] = bf0[11] + bf0[10];
     555           0 :   bf1[12] = bf0[12] + bf0[13];
     556           0 :   bf1[13] = -bf0[13] + bf0[12];
     557           0 :   bf1[14] = -bf0[14] + bf0[15];
     558           0 :   bf1[15] = bf0[15] + bf0[14];
     559           0 :   bf1[16] = bf0[16];
     560           0 :   bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
     561           0 :   bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
     562           0 :   bf1[19] = bf0[19];
     563           0 :   bf1[20] = bf0[20];
     564           0 :   bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
     565           0 :   bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
     566           0 :   bf1[23] = bf0[23];
     567           0 :   bf1[24] = bf0[24];
     568           0 :   bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
     569           0 :   bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
     570           0 :   bf1[27] = bf0[27];
     571           0 :   bf1[28] = bf0[28];
     572           0 :   bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
     573           0 :   bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
     574           0 :   bf1[31] = bf0[31];
     575           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     576             : 
     577             :   // stage 7
     578           0 :   stage++;
     579           0 :   cospi = cospi_arr(cos_bit[stage]);
     580           0 :   bf0 = step;
     581           0 :   bf1 = output;
     582           0 :   bf1[0] = bf0[0];
     583           0 :   bf1[1] = bf0[1];
     584           0 :   bf1[2] = bf0[2];
     585           0 :   bf1[3] = bf0[3];
     586           0 :   bf1[4] = bf0[4];
     587           0 :   bf1[5] = bf0[5];
     588           0 :   bf1[6] = bf0[6];
     589           0 :   bf1[7] = bf0[7];
     590           0 :   bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
     591           0 :   bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
     592           0 :   bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
     593           0 :   bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
     594           0 :   bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
     595           0 :   bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
     596           0 :   bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
     597           0 :   bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
     598           0 :   bf1[16] = bf0[16] + bf0[17];
     599           0 :   bf1[17] = -bf0[17] + bf0[16];
     600           0 :   bf1[18] = -bf0[18] + bf0[19];
     601           0 :   bf1[19] = bf0[19] + bf0[18];
     602           0 :   bf1[20] = bf0[20] + bf0[21];
     603           0 :   bf1[21] = -bf0[21] + bf0[20];
     604           0 :   bf1[22] = -bf0[22] + bf0[23];
     605           0 :   bf1[23] = bf0[23] + bf0[22];
     606           0 :   bf1[24] = bf0[24] + bf0[25];
     607           0 :   bf1[25] = -bf0[25] + bf0[24];
     608           0 :   bf1[26] = -bf0[26] + bf0[27];
     609           0 :   bf1[27] = bf0[27] + bf0[26];
     610           0 :   bf1[28] = bf0[28] + bf0[29];
     611           0 :   bf1[29] = -bf0[29] + bf0[28];
     612           0 :   bf1[30] = -bf0[30] + bf0[31];
     613           0 :   bf1[31] = bf0[31] + bf0[30];
     614           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     615             : 
     616             :   // stage 8
     617           0 :   stage++;
     618           0 :   cospi = cospi_arr(cos_bit[stage]);
     619           0 :   bf0 = output;
     620           0 :   bf1 = step;
     621           0 :   bf1[0] = bf0[0];
     622           0 :   bf1[1] = bf0[1];
     623           0 :   bf1[2] = bf0[2];
     624           0 :   bf1[3] = bf0[3];
     625           0 :   bf1[4] = bf0[4];
     626           0 :   bf1[5] = bf0[5];
     627           0 :   bf1[6] = bf0[6];
     628           0 :   bf1[7] = bf0[7];
     629           0 :   bf1[8] = bf0[8];
     630           0 :   bf1[9] = bf0[9];
     631           0 :   bf1[10] = bf0[10];
     632           0 :   bf1[11] = bf0[11];
     633           0 :   bf1[12] = bf0[12];
     634           0 :   bf1[13] = bf0[13];
     635           0 :   bf1[14] = bf0[14];
     636           0 :   bf1[15] = bf0[15];
     637           0 :   bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
     638           0 :   bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
     639           0 :   bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
     640           0 :   bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
     641           0 :   bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
     642           0 :   bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
     643           0 :   bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
     644           0 :   bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
     645           0 :   bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
     646           0 :   bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
     647           0 :   bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
     648           0 :   bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
     649           0 :   bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
     650           0 :   bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
     651           0 :   bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
     652           0 :   bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
     653           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     654             : 
     655             :   // stage 9
     656           0 :   stage++;
     657           0 :   bf0 = step;
     658           0 :   bf1 = output;
     659           0 :   bf1[0] = bf0[0];
     660           0 :   bf1[1] = bf0[16];
     661           0 :   bf1[2] = bf0[8];
     662           0 :   bf1[3] = bf0[24];
     663           0 :   bf1[4] = bf0[4];
     664           0 :   bf1[5] = bf0[20];
     665           0 :   bf1[6] = bf0[12];
     666           0 :   bf1[7] = bf0[28];
     667           0 :   bf1[8] = bf0[2];
     668           0 :   bf1[9] = bf0[18];
     669           0 :   bf1[10] = bf0[10];
     670           0 :   bf1[11] = bf0[26];
     671           0 :   bf1[12] = bf0[6];
     672           0 :   bf1[13] = bf0[22];
     673           0 :   bf1[14] = bf0[14];
     674           0 :   bf1[15] = bf0[30];
     675           0 :   bf1[16] = bf0[1];
     676           0 :   bf1[17] = bf0[17];
     677           0 :   bf1[18] = bf0[9];
     678           0 :   bf1[19] = bf0[25];
     679           0 :   bf1[20] = bf0[5];
     680           0 :   bf1[21] = bf0[21];
     681           0 :   bf1[22] = bf0[13];
     682           0 :   bf1[23] = bf0[29];
     683           0 :   bf1[24] = bf0[3];
     684           0 :   bf1[25] = bf0[19];
     685           0 :   bf1[26] = bf0[11];
     686           0 :   bf1[27] = bf0[27];
     687           0 :   bf1[28] = bf0[7];
     688           0 :   bf1[29] = bf0[23];
     689           0 :   bf1[30] = bf0[15];
     690           0 :   bf1[31] = bf0[31];
     691           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     692           0 : }
     693             : 
     694           0 : void av1_fadst4_new(const int32_t *input, int32_t *output,
     695             :                     const int8_t *cos_bit, const int8_t *stage_range) {
     696           0 :   const int32_t size = 4;
     697             :   const int32_t *cospi;
     698             : 
     699           0 :   int32_t stage = 0;
     700             :   int32_t *bf0, *bf1;
     701             :   int32_t step[4];
     702             : 
     703             :   // stage 0;
     704           0 :   range_check(stage, input, input, size, stage_range[stage]);
     705             : 
     706             :   // stage 1;
     707           0 :   stage++;
     708           0 :   bf1 = output;
     709           0 :   bf1[0] = input[3];
     710           0 :   bf1[1] = input[0];
     711           0 :   bf1[2] = input[1];
     712           0 :   bf1[3] = input[2];
     713           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     714             : 
     715             :   // stage 2
     716           0 :   stage++;
     717           0 :   cospi = cospi_arr(cos_bit[stage]);
     718           0 :   bf0 = output;
     719           0 :   bf1 = step;
     720           0 :   bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
     721           0 :   bf1[1] = half_btf(-cospi[8], bf0[1], cospi[56], bf0[0], cos_bit[stage]);
     722           0 :   bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
     723           0 :   bf1[3] = half_btf(-cospi[40], bf0[3], cospi[24], bf0[2], cos_bit[stage]);
     724           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     725             : 
     726             :   // stage 3
     727           0 :   stage++;
     728           0 :   bf0 = step;
     729           0 :   bf1 = output;
     730           0 :   bf1[0] = bf0[0] + bf0[2];
     731           0 :   bf1[1] = bf0[1] + bf0[3];
     732           0 :   bf1[2] = -bf0[2] + bf0[0];
     733           0 :   bf1[3] = -bf0[3] + bf0[1];
     734           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     735             : 
     736             :   // stage 4
     737           0 :   stage++;
     738           0 :   cospi = cospi_arr(cos_bit[stage]);
     739           0 :   bf0 = output;
     740           0 :   bf1 = step;
     741           0 :   bf1[0] = bf0[0];
     742           0 :   bf1[1] = bf0[1];
     743           0 :   bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
     744           0 :   bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
     745           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     746             : 
     747             :   // stage 5
     748           0 :   stage++;
     749           0 :   bf0 = step;
     750           0 :   bf1 = output;
     751           0 :   bf1[0] = bf0[0];
     752           0 :   bf1[1] = -bf0[2];
     753           0 :   bf1[2] = bf0[3];
     754           0 :   bf1[3] = -bf0[1];
     755           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     756           0 : }
     757             : 
     758           0 : void av1_fadst8_new(const int32_t *input, int32_t *output,
     759             :                     const int8_t *cos_bit, const int8_t *stage_range) {
     760           0 :   const int32_t size = 8;
     761             :   const int32_t *cospi;
     762             : 
     763           0 :   int32_t stage = 0;
     764             :   int32_t *bf0, *bf1;
     765             :   int32_t step[8];
     766             : 
     767             :   // stage 0;
     768           0 :   range_check(stage, input, input, size, stage_range[stage]);
     769             : 
     770             :   // stage 1;
     771           0 :   stage++;
     772           0 :   bf1 = output;
     773           0 :   bf1[0] = input[7];
     774           0 :   bf1[1] = input[0];
     775           0 :   bf1[2] = input[5];
     776           0 :   bf1[3] = input[2];
     777           0 :   bf1[4] = input[3];
     778           0 :   bf1[5] = input[4];
     779           0 :   bf1[6] = input[1];
     780           0 :   bf1[7] = input[6];
     781           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     782             : 
     783             :   // stage 2
     784           0 :   stage++;
     785           0 :   cospi = cospi_arr(cos_bit[stage]);
     786           0 :   bf0 = output;
     787           0 :   bf1 = step;
     788           0 :   bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
     789           0 :   bf1[1] = half_btf(-cospi[4], bf0[1], cospi[60], bf0[0], cos_bit[stage]);
     790           0 :   bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
     791           0 :   bf1[3] = half_btf(-cospi[20], bf0[3], cospi[44], bf0[2], cos_bit[stage]);
     792           0 :   bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
     793           0 :   bf1[5] = half_btf(-cospi[36], bf0[5], cospi[28], bf0[4], cos_bit[stage]);
     794           0 :   bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
     795           0 :   bf1[7] = half_btf(-cospi[52], bf0[7], cospi[12], bf0[6], cos_bit[stage]);
     796           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     797             : 
     798             :   // stage 3
     799           0 :   stage++;
     800           0 :   bf0 = step;
     801           0 :   bf1 = output;
     802           0 :   bf1[0] = bf0[0] + bf0[4];
     803           0 :   bf1[1] = bf0[1] + bf0[5];
     804           0 :   bf1[2] = bf0[2] + bf0[6];
     805           0 :   bf1[3] = bf0[3] + bf0[7];
     806           0 :   bf1[4] = -bf0[4] + bf0[0];
     807           0 :   bf1[5] = -bf0[5] + bf0[1];
     808           0 :   bf1[6] = -bf0[6] + bf0[2];
     809           0 :   bf1[7] = -bf0[7] + bf0[3];
     810           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     811             : 
     812             :   // stage 4
     813           0 :   stage++;
     814           0 :   cospi = cospi_arr(cos_bit[stage]);
     815           0 :   bf0 = output;
     816           0 :   bf1 = step;
     817           0 :   bf1[0] = bf0[0];
     818           0 :   bf1[1] = bf0[1];
     819           0 :   bf1[2] = bf0[2];
     820           0 :   bf1[3] = bf0[3];
     821           0 :   bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
     822           0 :   bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
     823           0 :   bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
     824           0 :   bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
     825           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     826             : 
     827             :   // stage 5
     828           0 :   stage++;
     829           0 :   bf0 = step;
     830           0 :   bf1 = output;
     831           0 :   bf1[0] = bf0[0] + bf0[2];
     832           0 :   bf1[1] = bf0[1] + bf0[3];
     833           0 :   bf1[2] = -bf0[2] + bf0[0];
     834           0 :   bf1[3] = -bf0[3] + bf0[1];
     835           0 :   bf1[4] = bf0[4] + bf0[6];
     836           0 :   bf1[5] = bf0[5] + bf0[7];
     837           0 :   bf1[6] = -bf0[6] + bf0[4];
     838           0 :   bf1[7] = -bf0[7] + bf0[5];
     839           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     840             : 
     841             :   // stage 6
     842           0 :   stage++;
     843           0 :   cospi = cospi_arr(cos_bit[stage]);
     844           0 :   bf0 = output;
     845           0 :   bf1 = step;
     846           0 :   bf1[0] = bf0[0];
     847           0 :   bf1[1] = bf0[1];
     848           0 :   bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
     849           0 :   bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
     850           0 :   bf1[4] = bf0[4];
     851           0 :   bf1[5] = bf0[5];
     852           0 :   bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
     853           0 :   bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
     854           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     855             : 
     856             :   // stage 7
     857           0 :   stage++;
     858           0 :   bf0 = step;
     859           0 :   bf1 = output;
     860           0 :   bf1[0] = bf0[0];
     861           0 :   bf1[1] = -bf0[4];
     862           0 :   bf1[2] = bf0[6];
     863           0 :   bf1[3] = -bf0[2];
     864           0 :   bf1[4] = bf0[3];
     865           0 :   bf1[5] = -bf0[7];
     866           0 :   bf1[6] = bf0[5];
     867           0 :   bf1[7] = -bf0[1];
     868           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     869           0 : }
     870             : 
     871           0 : void av1_fadst16_new(const int32_t *input, int32_t *output,
     872             :                      const int8_t *cos_bit, const int8_t *stage_range) {
     873           0 :   const int32_t size = 16;
     874             :   const int32_t *cospi;
     875             : 
     876           0 :   int32_t stage = 0;
     877             :   int32_t *bf0, *bf1;
     878             :   int32_t step[16];
     879             : 
     880             :   // stage 0;
     881           0 :   range_check(stage, input, input, size, stage_range[stage]);
     882             : 
     883             :   // stage 1;
     884           0 :   stage++;
     885           0 :   bf1 = output;
     886           0 :   bf1[0] = input[15];
     887           0 :   bf1[1] = input[0];
     888           0 :   bf1[2] = input[13];
     889           0 :   bf1[3] = input[2];
     890           0 :   bf1[4] = input[11];
     891           0 :   bf1[5] = input[4];
     892           0 :   bf1[6] = input[9];
     893           0 :   bf1[7] = input[6];
     894           0 :   bf1[8] = input[7];
     895           0 :   bf1[9] = input[8];
     896           0 :   bf1[10] = input[5];
     897           0 :   bf1[11] = input[10];
     898           0 :   bf1[12] = input[3];
     899           0 :   bf1[13] = input[12];
     900           0 :   bf1[14] = input[1];
     901           0 :   bf1[15] = input[14];
     902           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     903             : 
     904             :   // stage 2
     905           0 :   stage++;
     906           0 :   cospi = cospi_arr(cos_bit[stage]);
     907           0 :   bf0 = output;
     908           0 :   bf1 = step;
     909           0 :   bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
     910           0 :   bf1[1] = half_btf(-cospi[2], bf0[1], cospi[62], bf0[0], cos_bit[stage]);
     911           0 :   bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
     912           0 :   bf1[3] = half_btf(-cospi[10], bf0[3], cospi[54], bf0[2], cos_bit[stage]);
     913           0 :   bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
     914           0 :   bf1[5] = half_btf(-cospi[18], bf0[5], cospi[46], bf0[4], cos_bit[stage]);
     915           0 :   bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
     916           0 :   bf1[7] = half_btf(-cospi[26], bf0[7], cospi[38], bf0[6], cos_bit[stage]);
     917           0 :   bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
     918           0 :   bf1[9] = half_btf(-cospi[34], bf0[9], cospi[30], bf0[8], cos_bit[stage]);
     919           0 :   bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
     920           0 :   bf1[11] = half_btf(-cospi[42], bf0[11], cospi[22], bf0[10], cos_bit[stage]);
     921           0 :   bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
     922           0 :   bf1[13] = half_btf(-cospi[50], bf0[13], cospi[14], bf0[12], cos_bit[stage]);
     923           0 :   bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
     924           0 :   bf1[15] = half_btf(-cospi[58], bf0[15], cospi[6], bf0[14], cos_bit[stage]);
     925           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     926             : 
     927             :   // stage 3
     928           0 :   stage++;
     929           0 :   bf0 = step;
     930           0 :   bf1 = output;
     931           0 :   bf1[0] = bf0[0] + bf0[8];
     932           0 :   bf1[1] = bf0[1] + bf0[9];
     933           0 :   bf1[2] = bf0[2] + bf0[10];
     934           0 :   bf1[3] = bf0[3] + bf0[11];
     935           0 :   bf1[4] = bf0[4] + bf0[12];
     936           0 :   bf1[5] = bf0[5] + bf0[13];
     937           0 :   bf1[6] = bf0[6] + bf0[14];
     938           0 :   bf1[7] = bf0[7] + bf0[15];
     939           0 :   bf1[8] = -bf0[8] + bf0[0];
     940           0 :   bf1[9] = -bf0[9] + bf0[1];
     941           0 :   bf1[10] = -bf0[10] + bf0[2];
     942           0 :   bf1[11] = -bf0[11] + bf0[3];
     943           0 :   bf1[12] = -bf0[12] + bf0[4];
     944           0 :   bf1[13] = -bf0[13] + bf0[5];
     945           0 :   bf1[14] = -bf0[14] + bf0[6];
     946           0 :   bf1[15] = -bf0[15] + bf0[7];
     947           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     948             : 
     949             :   // stage 4
     950           0 :   stage++;
     951           0 :   cospi = cospi_arr(cos_bit[stage]);
     952           0 :   bf0 = output;
     953           0 :   bf1 = step;
     954           0 :   bf1[0] = bf0[0];
     955           0 :   bf1[1] = bf0[1];
     956           0 :   bf1[2] = bf0[2];
     957           0 :   bf1[3] = bf0[3];
     958           0 :   bf1[4] = bf0[4];
     959           0 :   bf1[5] = bf0[5];
     960           0 :   bf1[6] = bf0[6];
     961           0 :   bf1[7] = bf0[7];
     962           0 :   bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
     963           0 :   bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
     964           0 :   bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
     965           0 :   bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
     966           0 :   bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
     967           0 :   bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
     968           0 :   bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
     969           0 :   bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
     970           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     971             : 
     972             :   // stage 5
     973           0 :   stage++;
     974           0 :   bf0 = step;
     975           0 :   bf1 = output;
     976           0 :   bf1[0] = bf0[0] + bf0[4];
     977           0 :   bf1[1] = bf0[1] + bf0[5];
     978           0 :   bf1[2] = bf0[2] + bf0[6];
     979           0 :   bf1[3] = bf0[3] + bf0[7];
     980           0 :   bf1[4] = -bf0[4] + bf0[0];
     981           0 :   bf1[5] = -bf0[5] + bf0[1];
     982           0 :   bf1[6] = -bf0[6] + bf0[2];
     983           0 :   bf1[7] = -bf0[7] + bf0[3];
     984           0 :   bf1[8] = bf0[8] + bf0[12];
     985           0 :   bf1[9] = bf0[9] + bf0[13];
     986           0 :   bf1[10] = bf0[10] + bf0[14];
     987           0 :   bf1[11] = bf0[11] + bf0[15];
     988           0 :   bf1[12] = -bf0[12] + bf0[8];
     989           0 :   bf1[13] = -bf0[13] + bf0[9];
     990           0 :   bf1[14] = -bf0[14] + bf0[10];
     991           0 :   bf1[15] = -bf0[15] + bf0[11];
     992           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
     993             : 
     994             :   // stage 6
     995           0 :   stage++;
     996           0 :   cospi = cospi_arr(cos_bit[stage]);
     997           0 :   bf0 = output;
     998           0 :   bf1 = step;
     999           0 :   bf1[0] = bf0[0];
    1000           0 :   bf1[1] = bf0[1];
    1001           0 :   bf1[2] = bf0[2];
    1002           0 :   bf1[3] = bf0[3];
    1003           0 :   bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
    1004           0 :   bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
    1005           0 :   bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
    1006           0 :   bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
    1007           0 :   bf1[8] = bf0[8];
    1008           0 :   bf1[9] = bf0[9];
    1009           0 :   bf1[10] = bf0[10];
    1010           0 :   bf1[11] = bf0[11];
    1011           0 :   bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
    1012           0 :   bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
    1013           0 :   bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
    1014           0 :   bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
    1015           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1016             : 
    1017             :   // stage 7
    1018           0 :   stage++;
    1019           0 :   bf0 = step;
    1020           0 :   bf1 = output;
    1021           0 :   bf1[0] = bf0[0] + bf0[2];
    1022           0 :   bf1[1] = bf0[1] + bf0[3];
    1023           0 :   bf1[2] = -bf0[2] + bf0[0];
    1024           0 :   bf1[3] = -bf0[3] + bf0[1];
    1025           0 :   bf1[4] = bf0[4] + bf0[6];
    1026           0 :   bf1[5] = bf0[5] + bf0[7];
    1027           0 :   bf1[6] = -bf0[6] + bf0[4];
    1028           0 :   bf1[7] = -bf0[7] + bf0[5];
    1029           0 :   bf1[8] = bf0[8] + bf0[10];
    1030           0 :   bf1[9] = bf0[9] + bf0[11];
    1031           0 :   bf1[10] = -bf0[10] + bf0[8];
    1032           0 :   bf1[11] = -bf0[11] + bf0[9];
    1033           0 :   bf1[12] = bf0[12] + bf0[14];
    1034           0 :   bf1[13] = bf0[13] + bf0[15];
    1035           0 :   bf1[14] = -bf0[14] + bf0[12];
    1036           0 :   bf1[15] = -bf0[15] + bf0[13];
    1037           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1038             : 
    1039             :   // stage 8
    1040           0 :   stage++;
    1041           0 :   cospi = cospi_arr(cos_bit[stage]);
    1042           0 :   bf0 = output;
    1043           0 :   bf1 = step;
    1044           0 :   bf1[0] = bf0[0];
    1045           0 :   bf1[1] = bf0[1];
    1046           0 :   bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
    1047           0 :   bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
    1048           0 :   bf1[4] = bf0[4];
    1049           0 :   bf1[5] = bf0[5];
    1050           0 :   bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
    1051           0 :   bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
    1052           0 :   bf1[8] = bf0[8];
    1053           0 :   bf1[9] = bf0[9];
    1054           0 :   bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
    1055           0 :   bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
    1056           0 :   bf1[12] = bf0[12];
    1057           0 :   bf1[13] = bf0[13];
    1058           0 :   bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
    1059           0 :   bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
    1060           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1061             : 
    1062             :   // stage 9
    1063           0 :   stage++;
    1064           0 :   bf0 = step;
    1065           0 :   bf1 = output;
    1066           0 :   bf1[0] = bf0[0];
    1067           0 :   bf1[1] = -bf0[8];
    1068           0 :   bf1[2] = bf0[12];
    1069           0 :   bf1[3] = -bf0[4];
    1070           0 :   bf1[4] = bf0[6];
    1071           0 :   bf1[5] = -bf0[14];
    1072           0 :   bf1[6] = bf0[10];
    1073           0 :   bf1[7] = -bf0[2];
    1074           0 :   bf1[8] = bf0[3];
    1075           0 :   bf1[9] = -bf0[11];
    1076           0 :   bf1[10] = bf0[15];
    1077           0 :   bf1[11] = -bf0[7];
    1078           0 :   bf1[12] = bf0[5];
    1079           0 :   bf1[13] = -bf0[13];
    1080           0 :   bf1[14] = bf0[9];
    1081           0 :   bf1[15] = -bf0[1];
    1082           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1083           0 : }
    1084             : 
    1085           0 : void av1_fadst32_new(const int32_t *input, int32_t *output,
    1086             :                      const int8_t *cos_bit, const int8_t *stage_range) {
    1087           0 :   const int32_t size = 32;
    1088             :   const int32_t *cospi;
    1089             : 
    1090           0 :   int32_t stage = 0;
    1091             :   int32_t *bf0, *bf1;
    1092             :   int32_t step[32];
    1093             : 
    1094             :   // stage 0;
    1095           0 :   range_check(stage, input, input, size, stage_range[stage]);
    1096             : 
    1097             :   // stage 1;
    1098           0 :   stage++;
    1099           0 :   bf1 = output;
    1100           0 :   bf1[0] = input[31];
    1101           0 :   bf1[1] = input[0];
    1102           0 :   bf1[2] = input[29];
    1103           0 :   bf1[3] = input[2];
    1104           0 :   bf1[4] = input[27];
    1105           0 :   bf1[5] = input[4];
    1106           0 :   bf1[6] = input[25];
    1107           0 :   bf1[7] = input[6];
    1108           0 :   bf1[8] = input[23];
    1109           0 :   bf1[9] = input[8];
    1110           0 :   bf1[10] = input[21];
    1111           0 :   bf1[11] = input[10];
    1112           0 :   bf1[12] = input[19];
    1113           0 :   bf1[13] = input[12];
    1114           0 :   bf1[14] = input[17];
    1115           0 :   bf1[15] = input[14];
    1116           0 :   bf1[16] = input[15];
    1117           0 :   bf1[17] = input[16];
    1118           0 :   bf1[18] = input[13];
    1119           0 :   bf1[19] = input[18];
    1120           0 :   bf1[20] = input[11];
    1121           0 :   bf1[21] = input[20];
    1122           0 :   bf1[22] = input[9];
    1123           0 :   bf1[23] = input[22];
    1124           0 :   bf1[24] = input[7];
    1125           0 :   bf1[25] = input[24];
    1126           0 :   bf1[26] = input[5];
    1127           0 :   bf1[27] = input[26];
    1128           0 :   bf1[28] = input[3];
    1129           0 :   bf1[29] = input[28];
    1130           0 :   bf1[30] = input[1];
    1131           0 :   bf1[31] = input[30];
    1132           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1133             : 
    1134             :   // stage 2
    1135           0 :   stage++;
    1136           0 :   cospi = cospi_arr(cos_bit[stage]);
    1137           0 :   bf0 = output;
    1138           0 :   bf1 = step;
    1139           0 :   bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
    1140           0 :   bf1[1] = half_btf(-cospi[1], bf0[1], cospi[63], bf0[0], cos_bit[stage]);
    1141           0 :   bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
    1142           0 :   bf1[3] = half_btf(-cospi[5], bf0[3], cospi[59], bf0[2], cos_bit[stage]);
    1143           0 :   bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
    1144           0 :   bf1[5] = half_btf(-cospi[9], bf0[5], cospi[55], bf0[4], cos_bit[stage]);
    1145           0 :   bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
    1146           0 :   bf1[7] = half_btf(-cospi[13], bf0[7], cospi[51], bf0[6], cos_bit[stage]);
    1147           0 :   bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
    1148           0 :   bf1[9] = half_btf(-cospi[17], bf0[9], cospi[47], bf0[8], cos_bit[stage]);
    1149           0 :   bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
    1150           0 :   bf1[11] = half_btf(-cospi[21], bf0[11], cospi[43], bf0[10], cos_bit[stage]);
    1151           0 :   bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
    1152           0 :   bf1[13] = half_btf(-cospi[25], bf0[13], cospi[39], bf0[12], cos_bit[stage]);
    1153           0 :   bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
    1154           0 :   bf1[15] = half_btf(-cospi[29], bf0[15], cospi[35], bf0[14], cos_bit[stage]);
    1155           0 :   bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
    1156           0 :   bf1[17] = half_btf(-cospi[33], bf0[17], cospi[31], bf0[16], cos_bit[stage]);
    1157           0 :   bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
    1158           0 :   bf1[19] = half_btf(-cospi[37], bf0[19], cospi[27], bf0[18], cos_bit[stage]);
    1159           0 :   bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
    1160           0 :   bf1[21] = half_btf(-cospi[41], bf0[21], cospi[23], bf0[20], cos_bit[stage]);
    1161           0 :   bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
    1162           0 :   bf1[23] = half_btf(-cospi[45], bf0[23], cospi[19], bf0[22], cos_bit[stage]);
    1163           0 :   bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
    1164           0 :   bf1[25] = half_btf(-cospi[49], bf0[25], cospi[15], bf0[24], cos_bit[stage]);
    1165           0 :   bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
    1166           0 :   bf1[27] = half_btf(-cospi[53], bf0[27], cospi[11], bf0[26], cos_bit[stage]);
    1167           0 :   bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
    1168           0 :   bf1[29] = half_btf(-cospi[57], bf0[29], cospi[7], bf0[28], cos_bit[stage]);
    1169           0 :   bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
    1170           0 :   bf1[31] = half_btf(-cospi[61], bf0[31], cospi[3], bf0[30], cos_bit[stage]);
    1171           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1172             : 
    1173             :   // stage 3
    1174           0 :   stage++;
    1175           0 :   bf0 = step;
    1176           0 :   bf1 = output;
    1177           0 :   bf1[0] = bf0[0] + bf0[16];
    1178           0 :   bf1[1] = bf0[1] + bf0[17];
    1179           0 :   bf1[2] = bf0[2] + bf0[18];
    1180           0 :   bf1[3] = bf0[3] + bf0[19];
    1181           0 :   bf1[4] = bf0[4] + bf0[20];
    1182           0 :   bf1[5] = bf0[5] + bf0[21];
    1183           0 :   bf1[6] = bf0[6] + bf0[22];
    1184           0 :   bf1[7] = bf0[7] + bf0[23];
    1185           0 :   bf1[8] = bf0[8] + bf0[24];
    1186           0 :   bf1[9] = bf0[9] + bf0[25];
    1187           0 :   bf1[10] = bf0[10] + bf0[26];
    1188           0 :   bf1[11] = bf0[11] + bf0[27];
    1189           0 :   bf1[12] = bf0[12] + bf0[28];
    1190           0 :   bf1[13] = bf0[13] + bf0[29];
    1191           0 :   bf1[14] = bf0[14] + bf0[30];
    1192           0 :   bf1[15] = bf0[15] + bf0[31];
    1193           0 :   bf1[16] = -bf0[16] + bf0[0];
    1194           0 :   bf1[17] = -bf0[17] + bf0[1];
    1195           0 :   bf1[18] = -bf0[18] + bf0[2];
    1196           0 :   bf1[19] = -bf0[19] + bf0[3];
    1197           0 :   bf1[20] = -bf0[20] + bf0[4];
    1198           0 :   bf1[21] = -bf0[21] + bf0[5];
    1199           0 :   bf1[22] = -bf0[22] + bf0[6];
    1200           0 :   bf1[23] = -bf0[23] + bf0[7];
    1201           0 :   bf1[24] = -bf0[24] + bf0[8];
    1202           0 :   bf1[25] = -bf0[25] + bf0[9];
    1203           0 :   bf1[26] = -bf0[26] + bf0[10];
    1204           0 :   bf1[27] = -bf0[27] + bf0[11];
    1205           0 :   bf1[28] = -bf0[28] + bf0[12];
    1206           0 :   bf1[29] = -bf0[29] + bf0[13];
    1207           0 :   bf1[30] = -bf0[30] + bf0[14];
    1208           0 :   bf1[31] = -bf0[31] + bf0[15];
    1209           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1210             : 
    1211             :   // stage 4
    1212           0 :   stage++;
    1213           0 :   cospi = cospi_arr(cos_bit[stage]);
    1214           0 :   bf0 = output;
    1215           0 :   bf1 = step;
    1216           0 :   bf1[0] = bf0[0];
    1217           0 :   bf1[1] = bf0[1];
    1218           0 :   bf1[2] = bf0[2];
    1219           0 :   bf1[3] = bf0[3];
    1220           0 :   bf1[4] = bf0[4];
    1221           0 :   bf1[5] = bf0[5];
    1222           0 :   bf1[6] = bf0[6];
    1223           0 :   bf1[7] = bf0[7];
    1224           0 :   bf1[8] = bf0[8];
    1225           0 :   bf1[9] = bf0[9];
    1226           0 :   bf1[10] = bf0[10];
    1227           0 :   bf1[11] = bf0[11];
    1228           0 :   bf1[12] = bf0[12];
    1229           0 :   bf1[13] = bf0[13];
    1230           0 :   bf1[14] = bf0[14];
    1231           0 :   bf1[15] = bf0[15];
    1232           0 :   bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
    1233           0 :   bf1[17] = half_btf(-cospi[4], bf0[17], cospi[60], bf0[16], cos_bit[stage]);
    1234           0 :   bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
    1235           0 :   bf1[19] = half_btf(-cospi[20], bf0[19], cospi[44], bf0[18], cos_bit[stage]);
    1236           0 :   bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
    1237           0 :   bf1[21] = half_btf(-cospi[36], bf0[21], cospi[28], bf0[20], cos_bit[stage]);
    1238           0 :   bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
    1239           0 :   bf1[23] = half_btf(-cospi[52], bf0[23], cospi[12], bf0[22], cos_bit[stage]);
    1240           0 :   bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
    1241           0 :   bf1[25] = half_btf(cospi[60], bf0[25], cospi[4], bf0[24], cos_bit[stage]);
    1242           0 :   bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
    1243           0 :   bf1[27] = half_btf(cospi[44], bf0[27], cospi[20], bf0[26], cos_bit[stage]);
    1244           0 :   bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
    1245           0 :   bf1[29] = half_btf(cospi[28], bf0[29], cospi[36], bf0[28], cos_bit[stage]);
    1246           0 :   bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
    1247           0 :   bf1[31] = half_btf(cospi[12], bf0[31], cospi[52], bf0[30], cos_bit[stage]);
    1248           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1249             : 
    1250             :   // stage 5
    1251           0 :   stage++;
    1252           0 :   bf0 = step;
    1253           0 :   bf1 = output;
    1254           0 :   bf1[0] = bf0[0] + bf0[8];
    1255           0 :   bf1[1] = bf0[1] + bf0[9];
    1256           0 :   bf1[2] = bf0[2] + bf0[10];
    1257           0 :   bf1[3] = bf0[3] + bf0[11];
    1258           0 :   bf1[4] = bf0[4] + bf0[12];
    1259           0 :   bf1[5] = bf0[5] + bf0[13];
    1260           0 :   bf1[6] = bf0[6] + bf0[14];
    1261           0 :   bf1[7] = bf0[7] + bf0[15];
    1262           0 :   bf1[8] = -bf0[8] + bf0[0];
    1263           0 :   bf1[9] = -bf0[9] + bf0[1];
    1264           0 :   bf1[10] = -bf0[10] + bf0[2];
    1265           0 :   bf1[11] = -bf0[11] + bf0[3];
    1266           0 :   bf1[12] = -bf0[12] + bf0[4];
    1267           0 :   bf1[13] = -bf0[13] + bf0[5];
    1268           0 :   bf1[14] = -bf0[14] + bf0[6];
    1269           0 :   bf1[15] = -bf0[15] + bf0[7];
    1270           0 :   bf1[16] = bf0[16] + bf0[24];
    1271           0 :   bf1[17] = bf0[17] + bf0[25];
    1272           0 :   bf1[18] = bf0[18] + bf0[26];
    1273           0 :   bf1[19] = bf0[19] + bf0[27];
    1274           0 :   bf1[20] = bf0[20] + bf0[28];
    1275           0 :   bf1[21] = bf0[21] + bf0[29];
    1276           0 :   bf1[22] = bf0[22] + bf0[30];
    1277           0 :   bf1[23] = bf0[23] + bf0[31];
    1278           0 :   bf1[24] = -bf0[24] + bf0[16];
    1279           0 :   bf1[25] = -bf0[25] + bf0[17];
    1280           0 :   bf1[26] = -bf0[26] + bf0[18];
    1281           0 :   bf1[27] = -bf0[27] + bf0[19];
    1282           0 :   bf1[28] = -bf0[28] + bf0[20];
    1283           0 :   bf1[29] = -bf0[29] + bf0[21];
    1284           0 :   bf1[30] = -bf0[30] + bf0[22];
    1285           0 :   bf1[31] = -bf0[31] + bf0[23];
    1286           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1287             : 
    1288             :   // stage 6
    1289           0 :   stage++;
    1290           0 :   cospi = cospi_arr(cos_bit[stage]);
    1291           0 :   bf0 = output;
    1292           0 :   bf1 = step;
    1293           0 :   bf1[0] = bf0[0];
    1294           0 :   bf1[1] = bf0[1];
    1295           0 :   bf1[2] = bf0[2];
    1296           0 :   bf1[3] = bf0[3];
    1297           0 :   bf1[4] = bf0[4];
    1298           0 :   bf1[5] = bf0[5];
    1299           0 :   bf1[6] = bf0[6];
    1300           0 :   bf1[7] = bf0[7];
    1301           0 :   bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
    1302           0 :   bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
    1303           0 :   bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
    1304           0 :   bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
    1305           0 :   bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
    1306           0 :   bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
    1307           0 :   bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
    1308           0 :   bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
    1309           0 :   bf1[16] = bf0[16];
    1310           0 :   bf1[17] = bf0[17];
    1311           0 :   bf1[18] = bf0[18];
    1312           0 :   bf1[19] = bf0[19];
    1313           0 :   bf1[20] = bf0[20];
    1314           0 :   bf1[21] = bf0[21];
    1315           0 :   bf1[22] = bf0[22];
    1316           0 :   bf1[23] = bf0[23];
    1317           0 :   bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
    1318           0 :   bf1[25] = half_btf(-cospi[8], bf0[25], cospi[56], bf0[24], cos_bit[stage]);
    1319           0 :   bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
    1320           0 :   bf1[27] = half_btf(-cospi[40], bf0[27], cospi[24], bf0[26], cos_bit[stage]);
    1321           0 :   bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
    1322           0 :   bf1[29] = half_btf(cospi[56], bf0[29], cospi[8], bf0[28], cos_bit[stage]);
    1323           0 :   bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
    1324           0 :   bf1[31] = half_btf(cospi[24], bf0[31], cospi[40], bf0[30], cos_bit[stage]);
    1325           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1326             : 
    1327             :   // stage 7
    1328           0 :   stage++;
    1329           0 :   bf0 = step;
    1330           0 :   bf1 = output;
    1331           0 :   bf1[0] = bf0[0] + bf0[4];
    1332           0 :   bf1[1] = bf0[1] + bf0[5];
    1333           0 :   bf1[2] = bf0[2] + bf0[6];
    1334           0 :   bf1[3] = bf0[3] + bf0[7];
    1335           0 :   bf1[4] = -bf0[4] + bf0[0];
    1336           0 :   bf1[5] = -bf0[5] + bf0[1];
    1337           0 :   bf1[6] = -bf0[6] + bf0[2];
    1338           0 :   bf1[7] = -bf0[7] + bf0[3];
    1339           0 :   bf1[8] = bf0[8] + bf0[12];
    1340           0 :   bf1[9] = bf0[9] + bf0[13];
    1341           0 :   bf1[10] = bf0[10] + bf0[14];
    1342           0 :   bf1[11] = bf0[11] + bf0[15];
    1343           0 :   bf1[12] = -bf0[12] + bf0[8];
    1344           0 :   bf1[13] = -bf0[13] + bf0[9];
    1345           0 :   bf1[14] = -bf0[14] + bf0[10];
    1346           0 :   bf1[15] = -bf0[15] + bf0[11];
    1347           0 :   bf1[16] = bf0[16] + bf0[20];
    1348           0 :   bf1[17] = bf0[17] + bf0[21];
    1349           0 :   bf1[18] = bf0[18] + bf0[22];
    1350           0 :   bf1[19] = bf0[19] + bf0[23];
    1351           0 :   bf1[20] = -bf0[20] + bf0[16];
    1352           0 :   bf1[21] = -bf0[21] + bf0[17];
    1353           0 :   bf1[22] = -bf0[22] + bf0[18];
    1354           0 :   bf1[23] = -bf0[23] + bf0[19];
    1355           0 :   bf1[24] = bf0[24] + bf0[28];
    1356           0 :   bf1[25] = bf0[25] + bf0[29];
    1357           0 :   bf1[26] = bf0[26] + bf0[30];
    1358           0 :   bf1[27] = bf0[27] + bf0[31];
    1359           0 :   bf1[28] = -bf0[28] + bf0[24];
    1360           0 :   bf1[29] = -bf0[29] + bf0[25];
    1361           0 :   bf1[30] = -bf0[30] + bf0[26];
    1362           0 :   bf1[31] = -bf0[31] + bf0[27];
    1363           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1364             : 
    1365             :   // stage 8
    1366           0 :   stage++;
    1367           0 :   cospi = cospi_arr(cos_bit[stage]);
    1368           0 :   bf0 = output;
    1369           0 :   bf1 = step;
    1370           0 :   bf1[0] = bf0[0];
    1371           0 :   bf1[1] = bf0[1];
    1372           0 :   bf1[2] = bf0[2];
    1373           0 :   bf1[3] = bf0[3];
    1374           0 :   bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
    1375           0 :   bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
    1376           0 :   bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
    1377           0 :   bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
    1378           0 :   bf1[8] = bf0[8];
    1379           0 :   bf1[9] = bf0[9];
    1380           0 :   bf1[10] = bf0[10];
    1381           0 :   bf1[11] = bf0[11];
    1382           0 :   bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
    1383           0 :   bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
    1384           0 :   bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
    1385           0 :   bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
    1386           0 :   bf1[16] = bf0[16];
    1387           0 :   bf1[17] = bf0[17];
    1388           0 :   bf1[18] = bf0[18];
    1389           0 :   bf1[19] = bf0[19];
    1390           0 :   bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
    1391           0 :   bf1[21] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[20], cos_bit[stage]);
    1392           0 :   bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
    1393           0 :   bf1[23] = half_btf(cospi[48], bf0[23], cospi[16], bf0[22], cos_bit[stage]);
    1394           0 :   bf1[24] = bf0[24];
    1395           0 :   bf1[25] = bf0[25];
    1396           0 :   bf1[26] = bf0[26];
    1397           0 :   bf1[27] = bf0[27];
    1398           0 :   bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
    1399           0 :   bf1[29] = half_btf(-cospi[16], bf0[29], cospi[48], bf0[28], cos_bit[stage]);
    1400           0 :   bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
    1401           0 :   bf1[31] = half_btf(cospi[48], bf0[31], cospi[16], bf0[30], cos_bit[stage]);
    1402           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1403             : 
    1404             :   // stage 9
    1405           0 :   stage++;
    1406           0 :   bf0 = step;
    1407           0 :   bf1 = output;
    1408           0 :   bf1[0] = bf0[0] + bf0[2];
    1409           0 :   bf1[1] = bf0[1] + bf0[3];
    1410           0 :   bf1[2] = -bf0[2] + bf0[0];
    1411           0 :   bf1[3] = -bf0[3] + bf0[1];
    1412           0 :   bf1[4] = bf0[4] + bf0[6];
    1413           0 :   bf1[5] = bf0[5] + bf0[7];
    1414           0 :   bf1[6] = -bf0[6] + bf0[4];
    1415           0 :   bf1[7] = -bf0[7] + bf0[5];
    1416           0 :   bf1[8] = bf0[8] + bf0[10];
    1417           0 :   bf1[9] = bf0[9] + bf0[11];
    1418           0 :   bf1[10] = -bf0[10] + bf0[8];
    1419           0 :   bf1[11] = -bf0[11] + bf0[9];
    1420           0 :   bf1[12] = bf0[12] + bf0[14];
    1421           0 :   bf1[13] = bf0[13] + bf0[15];
    1422           0 :   bf1[14] = -bf0[14] + bf0[12];
    1423           0 :   bf1[15] = -bf0[15] + bf0[13];
    1424           0 :   bf1[16] = bf0[16] + bf0[18];
    1425           0 :   bf1[17] = bf0[17] + bf0[19];
    1426           0 :   bf1[18] = -bf0[18] + bf0[16];
    1427           0 :   bf1[19] = -bf0[19] + bf0[17];
    1428           0 :   bf1[20] = bf0[20] + bf0[22];
    1429           0 :   bf1[21] = bf0[21] + bf0[23];
    1430           0 :   bf1[22] = -bf0[22] + bf0[20];
    1431           0 :   bf1[23] = -bf0[23] + bf0[21];
    1432           0 :   bf1[24] = bf0[24] + bf0[26];
    1433           0 :   bf1[25] = bf0[25] + bf0[27];
    1434           0 :   bf1[26] = -bf0[26] + bf0[24];
    1435           0 :   bf1[27] = -bf0[27] + bf0[25];
    1436           0 :   bf1[28] = bf0[28] + bf0[30];
    1437           0 :   bf1[29] = bf0[29] + bf0[31];
    1438           0 :   bf1[30] = -bf0[30] + bf0[28];
    1439           0 :   bf1[31] = -bf0[31] + bf0[29];
    1440           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1441             : 
    1442             :   // stage 10
    1443           0 :   stage++;
    1444           0 :   cospi = cospi_arr(cos_bit[stage]);
    1445           0 :   bf0 = output;
    1446           0 :   bf1 = step;
    1447           0 :   bf1[0] = bf0[0];
    1448           0 :   bf1[1] = bf0[1];
    1449           0 :   bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
    1450           0 :   bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
    1451           0 :   bf1[4] = bf0[4];
    1452           0 :   bf1[5] = bf0[5];
    1453           0 :   bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
    1454           0 :   bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
    1455           0 :   bf1[8] = bf0[8];
    1456           0 :   bf1[9] = bf0[9];
    1457           0 :   bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
    1458           0 :   bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
    1459           0 :   bf1[12] = bf0[12];
    1460           0 :   bf1[13] = bf0[13];
    1461           0 :   bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
    1462           0 :   bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
    1463           0 :   bf1[16] = bf0[16];
    1464           0 :   bf1[17] = bf0[17];
    1465           0 :   bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
    1466           0 :   bf1[19] = half_btf(-cospi[32], bf0[19], cospi[32], bf0[18], cos_bit[stage]);
    1467           0 :   bf1[20] = bf0[20];
    1468           0 :   bf1[21] = bf0[21];
    1469           0 :   bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
    1470           0 :   bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[22], cos_bit[stage]);
    1471           0 :   bf1[24] = bf0[24];
    1472           0 :   bf1[25] = bf0[25];
    1473           0 :   bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
    1474           0 :   bf1[27] = half_btf(-cospi[32], bf0[27], cospi[32], bf0[26], cos_bit[stage]);
    1475           0 :   bf1[28] = bf0[28];
    1476           0 :   bf1[29] = bf0[29];
    1477           0 :   bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
    1478           0 :   bf1[31] = half_btf(-cospi[32], bf0[31], cospi[32], bf0[30], cos_bit[stage]);
    1479           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1480             : 
    1481             :   // stage 11
    1482           0 :   stage++;
    1483           0 :   bf0 = step;
    1484           0 :   bf1 = output;
    1485           0 :   bf1[0] = bf0[0];
    1486           0 :   bf1[1] = -bf0[16];
    1487           0 :   bf1[2] = bf0[24];
    1488           0 :   bf1[3] = -bf0[8];
    1489           0 :   bf1[4] = bf0[12];
    1490           0 :   bf1[5] = -bf0[28];
    1491           0 :   bf1[6] = bf0[20];
    1492           0 :   bf1[7] = -bf0[4];
    1493           0 :   bf1[8] = bf0[6];
    1494           0 :   bf1[9] = -bf0[22];
    1495           0 :   bf1[10] = bf0[30];
    1496           0 :   bf1[11] = -bf0[14];
    1497           0 :   bf1[12] = bf0[10];
    1498           0 :   bf1[13] = -bf0[26];
    1499           0 :   bf1[14] = bf0[18];
    1500           0 :   bf1[15] = -bf0[2];
    1501           0 :   bf1[16] = bf0[3];
    1502           0 :   bf1[17] = -bf0[19];
    1503           0 :   bf1[18] = bf0[27];
    1504           0 :   bf1[19] = -bf0[11];
    1505           0 :   bf1[20] = bf0[15];
    1506           0 :   bf1[21] = -bf0[31];
    1507           0 :   bf1[22] = bf0[23];
    1508           0 :   bf1[23] = -bf0[7];
    1509           0 :   bf1[24] = bf0[5];
    1510           0 :   bf1[25] = -bf0[21];
    1511           0 :   bf1[26] = bf0[29];
    1512           0 :   bf1[27] = -bf0[13];
    1513           0 :   bf1[28] = bf0[9];
    1514           0 :   bf1[29] = -bf0[25];
    1515           0 :   bf1[30] = bf0[17];
    1516           0 :   bf1[31] = -bf0[1];
    1517           0 :   range_check(stage, input, bf1, size, stage_range[stage]);
    1518           0 : }
    1519             : 
    1520             : #if CONFIG_EXT_TX
    1521           0 : void av1_fidentity4_c(const int32_t *input, int32_t *output,
    1522             :                       const int8_t *cos_bit, const int8_t *stage_range) {
    1523             :   (void)cos_bit;
    1524           0 :   for (int i = 0; i < 4; ++i)
    1525           0 :     output[i] = (int32_t)dct_const_round_shift(input[i] * Sqrt2);
    1526             :   range_check(0, input, output, 4, stage_range[0]);
    1527           0 : }
    1528             : 
    1529           0 : void av1_fidentity8_c(const int32_t *input, int32_t *output,
    1530             :                       const int8_t *cos_bit, const int8_t *stage_range) {
    1531             :   (void)cos_bit;
    1532           0 :   for (int i = 0; i < 8; ++i) output[i] = input[i] * 2;
    1533             :   range_check(0, input, output, 8, stage_range[0]);
    1534           0 : }
    1535             : 
    1536           0 : void av1_fidentity16_c(const int32_t *input, int32_t *output,
    1537             :                        const int8_t *cos_bit, const int8_t *stage_range) {
    1538             :   (void)cos_bit;
    1539           0 :   for (int i = 0; i < 16; ++i)
    1540           0 :     output[i] = (int32_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
    1541             :   range_check(0, input, output, 16, stage_range[0]);
    1542           0 : }
    1543             : 
    1544           0 : void av1_fidentity32_c(const int32_t *input, int32_t *output,
    1545             :                        const int8_t *cos_bit, const int8_t *stage_range) {
    1546             :   (void)cos_bit;
    1547           0 :   for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
    1548             :   range_check(0, input, output, 32, stage_range[0]);
    1549           0 : }
    1550             : #endif  // CONFIG_EXT_TX
    1551             : 
    1552             : #if CONFIG_TX64X64
    1553             : void av1_fdct64_new(const int32_t *input, int32_t *output,
    1554             :                     const int8_t *cos_bit, const int8_t *stage_range) {
    1555             :   const int32_t size = 64;
    1556             :   const int32_t *cospi;
    1557             : 
    1558             :   int32_t stage = 0;
    1559             :   int32_t *bf0, *bf1;
    1560             :   int32_t step[64];
    1561             : 
    1562             :   // stage 0;
    1563             :   range_check(stage, input, input, size, stage_range[stage]);
    1564             : 
    1565             :   // stage 1;
    1566             :   stage++;
    1567             :   cospi = cospi_arr(cos_bit[stage]);
    1568             :   bf1 = output;
    1569             :   bf1[0] = input[0] + input[63];
    1570             :   bf1[1] = input[1] + input[62];
    1571             :   bf1[2] = input[2] + input[61];
    1572             :   bf1[3] = input[3] + input[60];
    1573             :   bf1[4] = input[4] + input[59];
    1574             :   bf1[5] = input[5] + input[58];
    1575             :   bf1[6] = input[6] + input[57];
    1576             :   bf1[7] = input[7] + input[56];
    1577             :   bf1[8] = input[8] + input[55];
    1578             :   bf1[9] = input[9] + input[54];
    1579             :   bf1[10] = input[10] + input[53];
    1580             :   bf1[11] = input[11] + input[52];
    1581             :   bf1[12] = input[12] + input[51];
    1582             :   bf1[13] = input[13] + input[50];
    1583             :   bf1[14] = input[14] + input[49];
    1584             :   bf1[15] = input[15] + input[48];
    1585             :   bf1[16] = input[16] + input[47];
    1586             :   bf1[17] = input[17] + input[46];
    1587             :   bf1[18] = input[18] + input[45];
    1588             :   bf1[19] = input[19] + input[44];
    1589             :   bf1[20] = input[20] + input[43];
    1590             :   bf1[21] = input[21] + input[42];
    1591             :   bf1[22] = input[22] + input[41];
    1592             :   bf1[23] = input[23] + input[40];
    1593             :   bf1[24] = input[24] + input[39];
    1594             :   bf1[25] = input[25] + input[38];
    1595             :   bf1[26] = input[26] + input[37];
    1596             :   bf1[27] = input[27] + input[36];
    1597             :   bf1[28] = input[28] + input[35];
    1598             :   bf1[29] = input[29] + input[34];
    1599             :   bf1[30] = input[30] + input[33];
    1600             :   bf1[31] = input[31] + input[32];
    1601             :   bf1[32] = -input[32] + input[31];
    1602             :   bf1[33] = -input[33] + input[30];
    1603             :   bf1[34] = -input[34] + input[29];
    1604             :   bf1[35] = -input[35] + input[28];
    1605             :   bf1[36] = -input[36] + input[27];
    1606             :   bf1[37] = -input[37] + input[26];
    1607             :   bf1[38] = -input[38] + input[25];
    1608             :   bf1[39] = -input[39] + input[24];
    1609             :   bf1[40] = -input[40] + input[23];
    1610             :   bf1[41] = -input[41] + input[22];
    1611             :   bf1[42] = -input[42] + input[21];
    1612             :   bf1[43] = -input[43] + input[20];
    1613             :   bf1[44] = -input[44] + input[19];
    1614             :   bf1[45] = -input[45] + input[18];
    1615             :   bf1[46] = -input[46] + input[17];
    1616             :   bf1[47] = -input[47] + input[16];
    1617             :   bf1[48] = -input[48] + input[15];
    1618             :   bf1[49] = -input[49] + input[14];
    1619             :   bf1[50] = -input[50] + input[13];
    1620             :   bf1[51] = -input[51] + input[12];
    1621             :   bf1[52] = -input[52] + input[11];
    1622             :   bf1[53] = -input[53] + input[10];
    1623             :   bf1[54] = -input[54] + input[9];
    1624             :   bf1[55] = -input[55] + input[8];
    1625             :   bf1[56] = -input[56] + input[7];
    1626             :   bf1[57] = -input[57] + input[6];
    1627             :   bf1[58] = -input[58] + input[5];
    1628             :   bf1[59] = -input[59] + input[4];
    1629             :   bf1[60] = -input[60] + input[3];
    1630             :   bf1[61] = -input[61] + input[2];
    1631             :   bf1[62] = -input[62] + input[1];
    1632             :   bf1[63] = -input[63] + input[0];
    1633             :   range_check(stage, input, bf1, size, stage_range[stage]);
    1634             : 
    1635             :   // stage 2
    1636             :   stage++;
    1637             :   cospi = cospi_arr(cos_bit[stage]);
    1638             :   bf0 = output;
    1639             :   bf1 = step;
    1640             :   bf1[0] = bf0[0] + bf0[31];
    1641             :   bf1[1] = bf0[1] + bf0[30];
    1642             :   bf1[2] = bf0[2] + bf0[29];
    1643             :   bf1[3] = bf0[3] + bf0[28];
    1644             :   bf1[4] = bf0[4] + bf0[27];
    1645             :   bf1[5] = bf0[5] + bf0[26];
    1646             :   bf1[6] = bf0[6] + bf0[25];
    1647             :   bf1[7] = bf0[7] + bf0[24];
    1648             :   bf1[8] = bf0[8] + bf0[23];
    1649             :   bf1[9] = bf0[9] + bf0[22];
    1650             :   bf1[10] = bf0[10] + bf0[21];
    1651             :   bf1[11] = bf0[11] + bf0[20];
    1652             :   bf1[12] = bf0[12] + bf0[19];
    1653             :   bf1[13] = bf0[13] + bf0[18];
    1654             :   bf1[14] = bf0[14] + bf0[17];
    1655             :   bf1[15] = bf0[15] + bf0[16];
    1656             :   bf1[16] = -bf0[16] + bf0[15];
    1657             :   bf1[17] = -bf0[17] + bf0[14];
    1658             :   bf1[18] = -bf0[18] + bf0[13];
    1659             :   bf1[19] = -bf0[19] + bf0[12];
    1660             :   bf1[20] = -bf0[20] + bf0[11];
    1661             :   bf1[21] = -bf0[21] + bf0[10];
    1662             :   bf1[22] = -bf0[22] + bf0[9];
    1663             :   bf1[23] = -bf0[23] + bf0[8];
    1664             :   bf1[24] = -bf0[24] + bf0[7];
    1665             :   bf1[25] = -bf0[25] + bf0[6];
    1666             :   bf1[26] = -bf0[26] + bf0[5];
    1667             :   bf1[27] = -bf0[27] + bf0[4];
    1668             :   bf1[28] = -bf0[28] + bf0[3];
    1669             :   bf1[29] = -bf0[29] + bf0[2];
    1670             :   bf1[30] = -bf0[30] + bf0[1];
    1671             :   bf1[31] = -bf0[31] + bf0[0];
    1672             :   bf1[32] = bf0[32];
    1673             :   bf1[33] = bf0[33];
    1674             :   bf1[34] = bf0[34];
    1675             :   bf1[35] = bf0[35];
    1676             :   bf1[36] = bf0[36];
    1677             :   bf1[37] = bf0[37];
    1678             :   bf1[38] = bf0[38];
    1679             :   bf1[39] = bf0[39];
    1680             :   bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
    1681             :   bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
    1682             :   bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
    1683             :   bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
    1684             :   bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
    1685             :   bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
    1686             :   bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
    1687             :   bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
    1688             :   bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit[stage]);
    1689             :   bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit[stage]);
    1690             :   bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit[stage]);
    1691             :   bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit[stage]);
    1692             :   bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit[stage]);
    1693             :   bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit[stage]);
    1694             :   bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit[stage]);
    1695             :   bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit[stage]);
    1696             :   bf1[56] = bf0[56];
    1697             :   bf1[57] = bf0[57];
    1698             :   bf1[58] = bf0[58];
    1699             :   bf1[59] = bf0[59];
    1700             :   bf1[60] = bf0[60];
    1701             :   bf1[61] = bf0[61];
    1702             :   bf1[62] = bf0[62];
    1703             :   bf1[63] = bf0[63];
    1704             :   range_check(stage, input, bf1, size, stage_range[stage]);
    1705             : 
    1706             :   // stage 3
    1707             :   stage++;
    1708             :   cospi = cospi_arr(cos_bit[stage]);
    1709             :   bf0 = step;
    1710             :   bf1 = output;
    1711             :   bf1[0] = bf0[0] + bf0[15];
    1712             :   bf1[1] = bf0[1] + bf0[14];
    1713             :   bf1[2] = bf0[2] + bf0[13];
    1714             :   bf1[3] = bf0[3] + bf0[12];
    1715             :   bf1[4] = bf0[4] + bf0[11];
    1716             :   bf1[5] = bf0[5] + bf0[10];
    1717             :   bf1[6] = bf0[6] + bf0[9];
    1718             :   bf1[7] = bf0[7] + bf0[8];
    1719             :   bf1[8] = -bf0[8] + bf0[7];
    1720             :   bf1[9] = -bf0[9] + bf0[6];
    1721             :   bf1[10] = -bf0[10] + bf0[5];
    1722             :   bf1[11] = -bf0[11] + bf0[4];
    1723             :   bf1[12] = -bf0[12] + bf0[3];
    1724             :   bf1[13] = -bf0[13] + bf0[2];
    1725             :   bf1[14] = -bf0[14] + bf0[1];
    1726             :   bf1[15] = -bf0[15] + bf0[0];
    1727             :   bf1[16] = bf0[16];
    1728             :   bf1[17] = bf0[17];
    1729             :   bf1[18] = bf0[18];
    1730             :   bf1[19] = bf0[19];
    1731             :   bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
    1732             :   bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
    1733             :   bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
    1734             :   bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
    1735             :   bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
    1736             :   bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
    1737             :   bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
    1738             :   bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
    1739             :   bf1[28] = bf0[28];
    1740             :   bf1[29] = bf0[29];
    1741             :   bf1[30] = bf0[30];
    1742             :   bf1[31] = bf0[31];
    1743             :   bf1[32] = bf0[32] + bf0[47];
    1744             :   bf1[33] = bf0[33] + bf0[46];
    1745             :   bf1[34] = bf0[34] + bf0[45];
    1746             :   bf1[35] = bf0[35] + bf0[44];
    1747             :   bf1[36] = bf0[36] + bf0[43];
    1748             :   bf1[37] = bf0[37] + bf0[42];
    1749             :   bf1[38] = bf0[38] + bf0[41];
    1750             :   bf1[39] = bf0[39] + bf0[40];
    1751             :   bf1[40] = -bf0[40] + bf0[39];
    1752             :   bf1[41] = -bf0[41] + bf0[38];
    1753             :   bf1[42] = -bf0[42] + bf0[37];
    1754             :   bf1[43] = -bf0[43] + bf0[36];
    1755             :   bf1[44] = -bf0[44] + bf0[35];
    1756             :   bf1[45] = -bf0[45] + bf0[34];
    1757             :   bf1[46] = -bf0[46] + bf0[33];
    1758             :   bf1[47] = -bf0[47] + bf0[32];
    1759             :   bf1[48] = -bf0[48] + bf0[63];
    1760             :   bf1[49] = -bf0[49] + bf0[62];
    1761             :   bf1[50] = -bf0[50] + bf0[61];
    1762             :   bf1[51] = -bf0[51] + bf0[60];
    1763             :   bf1[52] = -bf0[52] + bf0[59];
    1764             :   bf1[53] = -bf0[53] + bf0[58];
    1765             :   bf1[54] = -bf0[54] + bf0[57];
    1766             :   bf1[55] = -bf0[55] + bf0[56];
    1767             :   bf1[56] = bf0[56] + bf0[55];
    1768             :   bf1[57] = bf0[57] + bf0[54];
    1769             :   bf1[58] = bf0[58] + bf0[53];
    1770             :   bf1[59] = bf0[59] + bf0[52];
    1771             :   bf1[60] = bf0[60] + bf0[51];
    1772             :   bf1[61] = bf0[61] + bf0[50];
    1773             :   bf1[62] = bf0[62] + bf0[49];
    1774             :   bf1[63] = bf0[63] + bf0[48];
    1775             :   range_check(stage, input, bf1, size, stage_range[stage]);
    1776             : 
    1777             :   // stage 4
    1778             :   stage++;
    1779             :   cospi = cospi_arr(cos_bit[stage]);
    1780             :   bf0 = output;
    1781             :   bf1 = step;
    1782             :   bf1[0] = bf0[0] + bf0[7];
    1783             :   bf1[1] = bf0[1] + bf0[6];
    1784             :   bf1[2] = bf0[2] + bf0[5];
    1785             :   bf1[3] = bf0[3] + bf0[4];
    1786             :   bf1[4] = -bf0[4] + bf0[3];
    1787             :   bf1[5] = -bf0[5] + bf0[2];
    1788             :   bf1[6] = -bf0[6] + bf0[1];
    1789             :   bf1[7] = -bf0[7] + bf0[0];
    1790             :   bf1[8] = bf0[8];
    1791             :   bf1[9] = bf0[9];
    1792             :   bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
    1793             :   bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
    1794             :   bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
    1795             :   bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
    1796             :   bf1[14] = bf0[14];
    1797             :   bf1[15] = bf0[15];
    1798             :   bf1[16] = bf0[16] + bf0[23];
    1799             :   bf1[17] = bf0[17] + bf0[22];
    1800             :   bf1[18] = bf0[18] + bf0[21];
    1801             :   bf1[19] = bf0[19] + bf0[20];
    1802             :   bf1[20] = -bf0[20] + bf0[19];
    1803             :   bf1[21] = -bf0[21] + bf0[18];
    1804             :   bf1[22] = -bf0[22] + bf0[17];
    1805             :   bf1[23] = -bf0[23] + bf0[16];
    1806             :   bf1[24] = -bf0[24] + bf0[31];
    1807             :   bf1[25] = -bf0[25] + bf0[30];
    1808             :   bf1[26] = -bf0[26] + bf0[29];
    1809             :   bf1[27] = -bf0[27] + bf0[28];
    1810             :   bf1[28] = bf0[28] + bf0[27];
    1811             :   bf1[29] = bf0[29] + bf0[26];
    1812             :   bf1[30] = bf0[30] + bf0[25];
    1813             :   bf1[31] = bf0[31] + bf0[24];
    1814             :   bf1[32] = bf0[32];
    1815             :   bf1[33] = bf0[33];
    1816             :   bf1[34] = bf0[34];
    1817             :   bf1[35] = bf0[35];
    1818             :   bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
    1819             :   bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
    1820             :   bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
    1821             :   bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
    1822             :   bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
    1823             :   bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
    1824             :   bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
    1825             :   bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
    1826             :   bf1[44] = bf0[44];
    1827             :   bf1[45] = bf0[45];
    1828             :   bf1[46] = bf0[46];
    1829             :   bf1[47] = bf0[47];
    1830             :   bf1[48] = bf0[48];
    1831             :   bf1[49] = bf0[49];
    1832             :   bf1[50] = bf0[50];
    1833             :   bf1[51] = bf0[51];
    1834             :   bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit[stage]);
    1835             :   bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit[stage]);
    1836             :   bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit[stage]);
    1837             :   bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit[stage]);
    1838             :   bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit[stage]);
    1839             :   bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit[stage]);
    1840             :   bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit[stage]);
    1841             :   bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit[stage]);
    1842             :   bf1[60] = bf0[60];
    1843             :   bf1[61] = bf0[61];
    1844             :   bf1[62] = bf0[62];
    1845             :   bf1[63] = bf0[63];
    1846             :   range_check(stage, input, bf1, size, stage_range[stage]);
    1847             : 
    1848             :   // stage 5
    1849             :   stage++;
    1850             :   cospi = cospi_arr(cos_bit[stage]);
    1851             :   bf0 = step;
    1852             :   bf1 = output;
    1853             :   bf1[0] = bf0[0] + bf0[3];
    1854             :   bf1[1] = bf0[1] + bf0[2];
    1855             :   bf1[2] = -bf0[2] + bf0[1];
    1856             :   bf1[3] = -bf0[3] + bf0[0];
    1857             :   bf1[4] = bf0[4];
    1858             :   bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
    1859             :   bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
    1860             :   bf1[7] = bf0[7];
    1861             :   bf1[8] = bf0[8] + bf0[11];
    1862             :   bf1[9] = bf0[9] + bf0[10];
    1863             :   bf1[10] = -bf0[10] + bf0[9];
    1864             :   bf1[11] = -bf0[11] + bf0[8];
    1865             :   bf1[12] = -bf0[12] + bf0[15];
    1866             :   bf1[13] = -bf0[13] + bf0[14];
    1867             :   bf1[14] = bf0[14] + bf0[13];
    1868             :   bf1[15] = bf0[15] + bf0[12];
    1869             :   bf1[16] = bf0[16];
    1870             :   bf1[17] = bf0[17];
    1871             :   bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
    1872             :   bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
    1873             :   bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
    1874             :   bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
    1875             :   bf1[22] = bf0[22];
    1876             :   bf1[23] = bf0[23];
    1877             :   bf1[24] = bf0[24];
    1878             :   bf1[25] = bf0[25];
    1879             :   bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
    1880             :   bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
    1881             :   bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
    1882             :   bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
    1883             :   bf1[30] = bf0[30];
    1884             :   bf1[31] = bf0[31];
    1885             :   bf1[32] = bf0[32] + bf0[39];
    1886             :   bf1[33] = bf0[33] + bf0[38];
    1887             :   bf1[34] = bf0[34] + bf0[37];
    1888             :   bf1[35] = bf0[35] + bf0[36];
    1889             :   bf1[36] = -bf0[36] + bf0[35];
    1890             :   bf1[37] = -bf0[37] + bf0[34];
    1891             :   bf1[38] = -bf0[38] + bf0[33];
    1892             :   bf1[39] = -bf0[39] + bf0[32];
    1893             :   bf1[40] = -bf0[40] + bf0[47];
    1894             :   bf1[41] = -bf0[41] + bf0[46];
    1895             :   bf1[42] = -bf0[42] + bf0[45];
    1896             :   bf1[43] = -bf0[43] + bf0[44];
    1897             :   bf1[44] = bf0[44] + bf0[43];
    1898             :   bf1[45] = bf0[45] + bf0[42];
    1899             :   bf1[46] = bf0[46] + bf0[41];
    1900             :   bf1[47] = bf0[47] + bf0[40];
    1901             :   bf1[48] = bf0[48] + bf0[55];
    1902             :   bf1[49] = bf0[49] + bf0[54];
    1903             :   bf1[50] = bf0[50] + bf0[53];
    1904             :   bf1[51] = bf0[51] + bf0[52];
    1905             :   bf1[52] = -bf0[52] + bf0[51];
    1906             :   bf1[53] = -bf0[53] + bf0[50];
    1907             :   bf1[54] = -bf0[54] + bf0[49];
    1908             :   bf1[55] = -bf0[55] + bf0[48];
    1909             :   bf1[56] = -bf0[56] + bf0[63];
    1910             :   bf1[57] = -bf0[57] + bf0[62];
    1911             :   bf1[58] = -bf0[58] + bf0[61];
    1912             :   bf1[59] = -bf0[59] + bf0[60];
    1913             :   bf1[60] = bf0[60] + bf0[59];
    1914             :   bf1[61] = bf0[61] + bf0[58];
    1915             :   bf1[62] = bf0[62] + bf0[57];
    1916             :   bf1[63] = bf0[63] + bf0[56];
    1917             :   range_check(stage, input, bf1, size, stage_range[stage]);
    1918             : 
    1919             :   // stage 6
    1920             :   stage++;
    1921             :   cospi = cospi_arr(cos_bit[stage]);
    1922             :   bf0 = output;
    1923             :   bf1 = step;
    1924             :   bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
    1925             :   bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
    1926             :   bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
    1927             :   bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
    1928             :   bf1[4] = bf0[4] + bf0[5];
    1929             :   bf1[5] = -bf0[5] + bf0[4];
    1930             :   bf1[6] = -bf0[6] + bf0[7];
    1931             :   bf1[7] = bf0[7] + bf0[6];
    1932             :   bf1[8] = bf0[8];
    1933             :   bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
    1934             :   bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
    1935             :   bf1[11] = bf0[11];
    1936             :   bf1[12] = bf0[12];
    1937             :   bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
    1938             :   bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
    1939             :   bf1[15] = bf0[15];
    1940             :   bf1[16] = bf0[16] + bf0[19];
    1941             :   bf1[17] = bf0[17] + bf0[18];
    1942             :   bf1[18] = -bf0[18] + bf0[17];
    1943             :   bf1[19] = -bf0[19] + bf0[16];
    1944             :   bf1[20] = -bf0[20] + bf0[23];
    1945             :   bf1[21] = -bf0[21] + bf0[22];
    1946             :   bf1[22] = bf0[22] + bf0[21];
    1947             :   bf1[23] = bf0[23] + bf0[20];
    1948             :   bf1[24] = bf0[24] + bf0[27];
    1949             :   bf1[25] = bf0[25] + bf0[26];
    1950             :   bf1[26] = -bf0[26] + bf0[25];
    1951             :   bf1[27] = -bf0[27] + bf0[24];
    1952             :   bf1[28] = -bf0[28] + bf0[31];
    1953             :   bf1[29] = -bf0[29] + bf0[30];
    1954             :   bf1[30] = bf0[30] + bf0[29];
    1955             :   bf1[31] = bf0[31] + bf0[28];
    1956             :   bf1[32] = bf0[32];
    1957             :   bf1[33] = bf0[33];
    1958             :   bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
    1959             :   bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
    1960             :   bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
    1961             :   bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
    1962             :   bf1[38] = bf0[38];
    1963             :   bf1[39] = bf0[39];
    1964             :   bf1[40] = bf0[40];
    1965             :   bf1[41] = bf0[41];
    1966             :   bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
    1967             :   bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
    1968             :   bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
    1969             :   bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
    1970             :   bf1[46] = bf0[46];
    1971             :   bf1[47] = bf0[47];
    1972             :   bf1[48] = bf0[48];
    1973             :   bf1[49] = bf0[49];
    1974             :   bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit[stage]);
    1975             :   bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit[stage]);
    1976             :   bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit[stage]);
    1977             :   bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit[stage]);
    1978             :   bf1[54] = bf0[54];
    1979             :   bf1[55] = bf0[55];
    1980             :   bf1[56] = bf0[56];
    1981             :   bf1[57] = bf0[57];
    1982             :   bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit[stage]);
    1983             :   bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit[stage]);
    1984             :   bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit[stage]);
    1985             :   bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit[stage]);
    1986             :   bf1[62] = bf0[62];
    1987             :   bf1[63] = bf0[63];
    1988             :   range_check(stage, input, bf1, size, stage_range[stage]);
    1989             : 
    1990             :   // stage 7
    1991             :   stage++;
    1992             :   cospi = cospi_arr(cos_bit[stage]);
    1993             :   bf0 = step;
    1994             :   bf1 = output;
    1995             :   bf1[0] = bf0[0];
    1996             :   bf1[1] = bf0[1];
    1997             :   bf1[2] = bf0[2];
    1998             :   bf1[3] = bf0[3];
    1999             :   bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
    2000             :   bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
    2001             :   bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
    2002             :   bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
    2003             :   bf1[8] = bf0[8] + bf0[9];
    2004             :   bf1[9] = -bf0[9] + bf0[8];
    2005             :   bf1[10] = -bf0[10] + bf0[11];
    2006             :   bf1[11] = bf0[11] + bf0[10];
    2007             :   bf1[12] = bf0[12] + bf0[13];
    2008             :   bf1[13] = -bf0[13] + bf0[12];
    2009             :   bf1[14] = -bf0[14] + bf0[15];
    2010             :   bf1[15] = bf0[15] + bf0[14];
    2011             :   bf1[16] = bf0[16];
    2012             :   bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
    2013             :   bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
    2014             :   bf1[19] = bf0[19];
    2015             :   bf1[20] = bf0[20];
    2016             :   bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
    2017             :   bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
    2018             :   bf1[23] = bf0[23];
    2019             :   bf1[24] = bf0[24];
    2020             :   bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
    2021             :   bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
    2022             :   bf1[27] = bf0[27];
    2023             :   bf1[28] = bf0[28];
    2024             :   bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
    2025             :   bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
    2026             :   bf1[31] = bf0[31];
    2027             :   bf1[32] = bf0[32] + bf0[35];
    2028             :   bf1[33] = bf0[33] + bf0[34];
    2029             :   bf1[34] = -bf0[34] + bf0[33];
    2030             :   bf1[35] = -bf0[35] + bf0[32];
    2031             :   bf1[36] = -bf0[36] + bf0[39];
    2032             :   bf1[37] = -bf0[37] + bf0[38];
    2033             :   bf1[38] = bf0[38] + bf0[37];
    2034             :   bf1[39] = bf0[39] + bf0[36];
    2035             :   bf1[40] = bf0[40] + bf0[43];
    2036             :   bf1[41] = bf0[41] + bf0[42];
    2037             :   bf1[42] = -bf0[42] + bf0[41];
    2038             :   bf1[43] = -bf0[43] + bf0[40];
    2039             :   bf1[44] = -bf0[44] + bf0[47];
    2040             :   bf1[45] = -bf0[45] + bf0[46];
    2041             :   bf1[46] = bf0[46] + bf0[45];
    2042             :   bf1[47] = bf0[47] + bf0[44];
    2043             :   bf1[48] = bf0[48] + bf0[51];
    2044             :   bf1[49] = bf0[49] + bf0[50];
    2045             :   bf1[50] = -bf0[50] + bf0[49];
    2046             :   bf1[51] = -bf0[51] + bf0[48];
    2047             :   bf1[52] = -bf0[52] + bf0[55];
    2048             :   bf1[53] = -bf0[53] + bf0[54];
    2049             :   bf1[54] = bf0[54] + bf0[53];
    2050             :   bf1[55] = bf0[55] + bf0[52];
    2051             :   bf1[56] = bf0[56] + bf0[59];
    2052             :   bf1[57] = bf0[57] + bf0[58];
    2053             :   bf1[58] = -bf0[58] + bf0[57];
    2054             :   bf1[59] = -bf0[59] + bf0[56];
    2055             :   bf1[60] = -bf0[60] + bf0[63];
    2056             :   bf1[61] = -bf0[61] + bf0[62];
    2057             :   bf1[62] = bf0[62] + bf0[61];
    2058             :   bf1[63] = bf0[63] + bf0[60];
    2059             :   range_check(stage, input, bf1, size, stage_range[stage]);
    2060             : 
    2061             :   // stage 8
    2062             :   stage++;
    2063             :   cospi = cospi_arr(cos_bit[stage]);
    2064             :   bf0 = output;
    2065             :   bf1 = step;
    2066             :   bf1[0] = bf0[0];
    2067             :   bf1[1] = bf0[1];
    2068             :   bf1[2] = bf0[2];
    2069             :   bf1[3] = bf0[3];
    2070             :   bf1[4] = bf0[4];
    2071             :   bf1[5] = bf0[5];
    2072             :   bf1[6] = bf0[6];
    2073             :   bf1[7] = bf0[7];
    2074             :   bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
    2075             :   bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
    2076             :   bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
    2077             :   bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
    2078             :   bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
    2079             :   bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
    2080             :   bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
    2081             :   bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
    2082             :   bf1[16] = bf0[16] + bf0[17];
    2083             :   bf1[17] = -bf0[17] + bf0[16];
    2084             :   bf1[18] = -bf0[18] + bf0[19];
    2085             :   bf1[19] = bf0[19] + bf0[18];
    2086             :   bf1[20] = bf0[20] + bf0[21];
    2087             :   bf1[21] = -bf0[21] + bf0[20];
    2088             :   bf1[22] = -bf0[22] + bf0[23];
    2089             :   bf1[23] = bf0[23] + bf0[22];
    2090             :   bf1[24] = bf0[24] + bf0[25];
    2091             :   bf1[25] = -bf0[25] + bf0[24];
    2092             :   bf1[26] = -bf0[26] + bf0[27];
    2093             :   bf1[27] = bf0[27] + bf0[26];
    2094             :   bf1[28] = bf0[28] + bf0[29];
    2095             :   bf1[29] = -bf0[29] + bf0[28];
    2096             :   bf1[30] = -bf0[30] + bf0[31];
    2097             :   bf1[31] = bf0[31] + bf0[30];
    2098             :   bf1[32] = bf0[32];
    2099             :   bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
    2100             :   bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
    2101             :   bf1[35] = bf0[35];
    2102             :   bf1[36] = bf0[36];
    2103             :   bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
    2104             :   bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
    2105             :   bf1[39] = bf0[39];
    2106             :   bf1[40] = bf0[40];
    2107             :   bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
    2108             :   bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
    2109             :   bf1[43] = bf0[43];
    2110             :   bf1[44] = bf0[44];
    2111             :   bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
    2112             :   bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
    2113             :   bf1[47] = bf0[47];
    2114             :   bf1[48] = bf0[48];
    2115             :   bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit[stage]);
    2116             :   bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit[stage]);
    2117             :   bf1[51] = bf0[51];
    2118             :   bf1[52] = bf0[52];
    2119             :   bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit[stage]);
    2120             :   bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit[stage]);
    2121             :   bf1[55] = bf0[55];
    2122             :   bf1[56] = bf0[56];
    2123             :   bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit[stage]);
    2124             :   bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit[stage]);
    2125             :   bf1[59] = bf0[59];
    2126             :   bf1[60] = bf0[60];
    2127             :   bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit[stage]);
    2128             :   bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit[stage]);
    2129             :   bf1[63] = bf0[63];
    2130             :   range_check(stage, input, bf1, size, stage_range[stage]);
    2131             : 
    2132             :   // stage 9
    2133             :   stage++;
    2134             :   cospi = cospi_arr(cos_bit[stage]);
    2135             :   bf0 = step;
    2136             :   bf1 = output;
    2137             :   bf1[0] = bf0[0];
    2138             :   bf1[1] = bf0[1];
    2139             :   bf1[2] = bf0[2];
    2140             :   bf1[3] = bf0[3];
    2141             :   bf1[4] = bf0[4];
    2142             :   bf1[5] = bf0[5];
    2143             :   bf1[6] = bf0[6];
    2144             :   bf1[7] = bf0[7];
    2145             :   bf1[8] = bf0[8];
    2146             :   bf1[9] = bf0[9];
    2147             :   bf1[10] = bf0[10];
    2148             :   bf1[11] = bf0[11];
    2149             :   bf1[12] = bf0[12];
    2150             :   bf1[13] = bf0[13];
    2151             :   bf1[14] = bf0[14];
    2152             :   bf1[15] = bf0[15];
    2153             :   bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
    2154             :   bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
    2155             :   bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
    2156             :   bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
    2157             :   bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
    2158             :   bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
    2159             :   bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
    2160             :   bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
    2161             :   bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
    2162             :   bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
    2163             :   bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
    2164             :   bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
    2165             :   bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
    2166             :   bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
    2167             :   bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
    2168             :   bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
    2169             :   bf1[32] = bf0[32] + bf0[33];
    2170             :   bf1[33] = -bf0[33] + bf0[32];
    2171             :   bf1[34] = -bf0[34] + bf0[35];
    2172             :   bf1[35] = bf0[35] + bf0[34];
    2173             :   bf1[36] = bf0[36] + bf0[37];
    2174             :   bf1[37] = -bf0[37] + bf0[36];
    2175             :   bf1[38] = -bf0[38] + bf0[39];
    2176             :   bf1[39] = bf0[39] + bf0[38];
    2177             :   bf1[40] = bf0[40] + bf0[41];
    2178             :   bf1[41] = -bf0[41] + bf0[40];
    2179             :   bf1[42] = -bf0[42] + bf0[43];
    2180             :   bf1[43] = bf0[43] + bf0[42];
    2181             :   bf1[44] = bf0[44] + bf0[45];
    2182             :   bf1[45] = -bf0[45] + bf0[44];
    2183             :   bf1[46] = -bf0[46] + bf0[47];
    2184             :   bf1[47] = bf0[47] + bf0[46];
    2185             :   bf1[48] = bf0[48] + bf0[49];
    2186             :   bf1[49] = -bf0[49] + bf0[48];
    2187             :   bf1[50] = -bf0[50] + bf0[51];
    2188             :   bf1[51] = bf0[51] + bf0[50];
    2189             :   bf1[52] = bf0[52] + bf0[53];
    2190             :   bf1[53] = -bf0[53] + bf0[52];
    2191             :   bf1[54] = -bf0[54] + bf0[55];
    2192             :   bf1[55] = bf0[55] + bf0[54];
    2193             :   bf1[56] = bf0[56] + bf0[57];
    2194             :   bf1[57] = -bf0[57] + bf0[56];
    2195             :   bf1[58] = -bf0[58] + bf0[59];
    2196             :   bf1[59] = bf0[59] + bf0[58];
    2197             :   bf1[60] = bf0[60] + bf0[61];
    2198             :   bf1[61] = -bf0[61] + bf0[60];
    2199             :   bf1[62] = -bf0[62] + bf0[63];
    2200             :   bf1[63] = bf0[63] + bf0[62];
    2201             :   range_check(stage, input, bf1, size, stage_range[stage]);
    2202             : 
    2203             :   // stage 10
    2204             :   stage++;
    2205             :   cospi = cospi_arr(cos_bit[stage]);
    2206             :   bf0 = output;
    2207             :   bf1 = step;
    2208             :   bf1[0] = bf0[0];
    2209             :   bf1[1] = bf0[1];
    2210             :   bf1[2] = bf0[2];
    2211             :   bf1[3] = bf0[3];
    2212             :   bf1[4] = bf0[4];
    2213             :   bf1[5] = bf0[5];
    2214             :   bf1[6] = bf0[6];
    2215             :   bf1[7] = bf0[7];
    2216             :   bf1[8] = bf0[8];
    2217             :   bf1[9] = bf0[9];
    2218             :   bf1[10] = bf0[10];
    2219             :   bf1[11] = bf0[11];
    2220             :   bf1[12] = bf0[12];
    2221             :   bf1[13] = bf0[13];
    2222             :   bf1[14] = bf0[14];
    2223             :   bf1[15] = bf0[15];
    2224             :   bf1[16] = bf0[16];
    2225             :   bf1[17] = bf0[17];
    2226             :   bf1[18] = bf0[18];
    2227             :   bf1[19] = bf0[19];
    2228             :   bf1[20] = bf0[20];
    2229             :   bf1[21] = bf0[21];
    2230             :   bf1[22] = bf0[22];
    2231             :   bf1[23] = bf0[23];
    2232             :   bf1[24] = bf0[24];
    2233             :   bf1[25] = bf0[25];
    2234             :   bf1[26] = bf0[26];
    2235             :   bf1[27] = bf0[27];
    2236             :   bf1[28] = bf0[28];
    2237             :   bf1[29] = bf0[29];
    2238             :   bf1[30] = bf0[30];
    2239             :   bf1[31] = bf0[31];
    2240             :   bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit[stage]);
    2241             :   bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit[stage]);
    2242             :   bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit[stage]);
    2243             :   bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit[stage]);
    2244             :   bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit[stage]);
    2245             :   bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit[stage]);
    2246             :   bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit[stage]);
    2247             :   bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit[stage]);
    2248             :   bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit[stage]);
    2249             :   bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit[stage]);
    2250             :   bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit[stage]);
    2251             :   bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit[stage]);
    2252             :   bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit[stage]);
    2253             :   bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit[stage]);
    2254             :   bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit[stage]);
    2255             :   bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit[stage]);
    2256             :   bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit[stage]);
    2257             :   bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit[stage]);
    2258             :   bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit[stage]);
    2259             :   bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit[stage]);
    2260             :   bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit[stage]);
    2261             :   bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit[stage]);
    2262             :   bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit[stage]);
    2263             :   bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit[stage]);
    2264             :   bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit[stage]);
    2265             :   bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit[stage]);
    2266             :   bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit[stage]);
    2267             :   bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit[stage]);
    2268             :   bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit[stage]);
    2269             :   bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit[stage]);
    2270             :   bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit[stage]);
    2271             :   bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit[stage]);
    2272             :   range_check(stage, input, bf1, size, stage_range[stage]);
    2273             : 
    2274             :   // stage 11
    2275             :   stage++;
    2276             :   cospi = cospi_arr(cos_bit[stage]);
    2277             :   bf0 = step;
    2278             :   bf1 = output;
    2279             :   bf1[0] = bf0[0];
    2280             :   bf1[1] = bf0[32];
    2281             :   bf1[2] = bf0[16];
    2282             :   bf1[3] = bf0[48];
    2283             :   bf1[4] = bf0[8];
    2284             :   bf1[5] = bf0[40];
    2285             :   bf1[6] = bf0[24];
    2286             :   bf1[7] = bf0[56];
    2287             :   bf1[8] = bf0[4];
    2288             :   bf1[9] = bf0[36];
    2289             :   bf1[10] = bf0[20];
    2290             :   bf1[11] = bf0[52];
    2291             :   bf1[12] = bf0[12];
    2292             :   bf1[13] = bf0[44];
    2293             :   bf1[14] = bf0[28];
    2294             :   bf1[15] = bf0[60];
    2295             :   bf1[16] = bf0[2];
    2296             :   bf1[17] = bf0[34];
    2297             :   bf1[18] = bf0[18];
    2298             :   bf1[19] = bf0[50];
    2299             :   bf1[20] = bf0[10];
    2300             :   bf1[21] = bf0[42];
    2301             :   bf1[22] = bf0[26];
    2302             :   bf1[23] = bf0[58];
    2303             :   bf1[24] = bf0[6];
    2304             :   bf1[25] = bf0[38];
    2305             :   bf1[26] = bf0[22];
    2306             :   bf1[27] = bf0[54];
    2307             :   bf1[28] = bf0[14];
    2308             :   bf1[29] = bf0[46];
    2309             :   bf1[30] = bf0[30];
    2310             :   bf1[31] = bf0[62];
    2311             :   bf1[32] = bf0[1];
    2312             :   bf1[33] = bf0[33];
    2313             :   bf1[34] = bf0[17];
    2314             :   bf1[35] = bf0[49];
    2315             :   bf1[36] = bf0[9];
    2316             :   bf1[37] = bf0[41];
    2317             :   bf1[38] = bf0[25];
    2318             :   bf1[39] = bf0[57];
    2319             :   bf1[40] = bf0[5];
    2320             :   bf1[41] = bf0[37];
    2321             :   bf1[42] = bf0[21];
    2322             :   bf1[43] = bf0[53];
    2323             :   bf1[44] = bf0[13];
    2324             :   bf1[45] = bf0[45];
    2325             :   bf1[46] = bf0[29];
    2326             :   bf1[47] = bf0[61];
    2327             :   bf1[48] = bf0[3];
    2328             :   bf1[49] = bf0[35];
    2329             :   bf1[50] = bf0[19];
    2330             :   bf1[51] = bf0[51];
    2331             :   bf1[52] = bf0[11];
    2332             :   bf1[53] = bf0[43];
    2333             :   bf1[54] = bf0[27];
    2334             :   bf1[55] = bf0[59];
    2335             :   bf1[56] = bf0[7];
    2336             :   bf1[57] = bf0[39];
    2337             :   bf1[58] = bf0[23];
    2338             :   bf1[59] = bf0[55];
    2339             :   bf1[60] = bf0[15];
    2340             :   bf1[61] = bf0[47];
    2341             :   bf1[62] = bf0[31];
    2342             :   bf1[63] = bf0[63];
    2343             :   range_check(stage, input, bf1, size, stage_range[stage]);
    2344             : }
    2345             : #endif  // CONFIG_TX64X64

Generated by: LCOV version 1.13